├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── README.md ├── crash-recover-test ├── CMakeLists.txt ├── test_crash_client.cc └── test_crash_server.cc ├── documents └── fast23_FUSEE_Extended_Version.pdf ├── micro-test ├── CMakeLists.txt ├── gen-micro-workload.py ├── latency_test.cc ├── latency_test.h ├── latency_test_client.cc ├── latency_test_client_cr.cc ├── micro_test.cc ├── micro_test.h ├── micro_test_multi_client.cc └── micro_test_multi_client_cr.cc ├── setup ├── download_gdrive.py ├── download_workload.sh └── setup-env.sh ├── src ├── CMakeLists.txt ├── client.cc ├── client.h ├── client_cr.cc ├── client_cr.h ├── client_mm.cc ├── client_mm.h ├── hashtable.cc ├── hashtable.h ├── ib.cc ├── ib.h ├── init.cc ├── kv_debug.h ├── kv_utils.cc ├── kv_utils.h ├── nm.cc ├── nm.h ├── server.cc ├── server.h ├── server_mm.cc ├── server_mm.h └── spinlock.h ├── tests ├── CMakeLists.txt ├── client_config.json ├── client_kv_shell.cc ├── ddckv_test.cc ├── ddckv_test.h ├── server_config.json ├── test_client.h ├── test_client_client.cc ├── test_client_server.cc ├── test_conf.json ├── test_kv_utils.cc ├── test_mm.cc ├── test_mm.h ├── test_nm.cc ├── test_nm.h ├── test_remote_nm.cc ├── test_remote_nm.h ├── test_server.cc └── test_server.h └── ycsb-test ├── CMakeLists.txt ├── gen-ycsb-workload.py ├── merge-ycsb-lat.py ├── split-workload.py ├── ycsb_multi_client_cont_tpt.cc ├── ycsb_server_crash_multi_client.cc ├── ycsb_test.cc ├── ycsb_test.h ├── ycsb_test_client.cc ├── ycsb_test_multi_client.cc ├── ycsb_test_server.cc ├── ycsb_wl_loader.cc └── ycsb_wl_worker.cc /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .vscode 3 | ycsb-test/workloads/* 4 | micro-test/micro-workloads/* 5 | ycsb-test/upd-workloads/* 6 | *.ipynb 7 | workloads.tgz 8 | micro-workloads.tgz 9 | upd-workload.tgz 10 | setup/workloads 11 | setup/install/ 12 | setup/micro-workloads/ 13 | setup/upd-workloads.tgz 14 | setup/upd-workloads/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ycsb-test/YCSB-C"] 2 | path = ycsb-test/YCSB-C 3 | url = https://gitee.com/bernardshen/YCSB-C.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16 FATAL_ERROR) 2 | 3 | project(DDCKV LANGUAGES CXX) 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | find_package(Boost REQUIRED) 7 | 8 | include_directories(src) 9 | set(CMAKE_BUILD_TYPE Release) 10 | 11 | add_subdirectory(src) 12 | 13 | add_subdirectory(ycsb-test) 14 | add_subdirectory(crash-recover-test) 15 | add_subdirectory(micro-test) 16 | 17 | enable_testing() 18 | add_subdirectory(tests) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FUSEE: A Fully Memory-Disaggregated Key-Value Store 2 | 3 | 4 | This is the implementation repository of our FAST'23 paper: **FUSEE: A Fully Memory-Disaggregated Key-Value Store**. 5 | 6 | 7 | 8 | ## Description 9 | 10 | We proposes ***FUSEE***, a FUlly memory-diSaggrEgated KV Stor***E*** that brings disaggregation to metadata management. *FUSEE* replicates metadata, *i.e.*, the index and memory management information, on memory nodes, manages them directly on the client side, and handles complex failures under the DM architecture. To scalably replicate the index on clients, *FUSEE* proposes a client-centric replication protocol that allows clients to concurrently access and modify the replicated index. To efficiently manage disaggregated memory, *FUSEE* adopts a two-level memory management scheme that splits the memory management duty among clients and memory nodes. Finally, to handle the metadata corruption under client failures, *FUSEE* leverages an embedded operation log scheme to repair metadata with low log maintenance overhead. 11 | 12 | 13 | ## Environment 14 | 15 | * For hardware, each machine should be equipped with one **8-core Intel processer**(*e.g.*, Intel Xeon E5-2450), **16GB DRAM** and one **RDMA NIC card** (*e.g.*, Mellanox ConnectX-3). Each RNIC should be connected to an **Infiniband or Ethernet switch** (*e.g.*, Mellanox SX6036G). All machines are separated into memory nodes and compute nodes. At maximum 5 memory nodes and 17 compute nodes are used for the experiments in our paper. If you do not have such testbed, consider using [CloudLab](https://www.cloudlab.us/). 16 | 17 | * For software, **Ubuntu 18.04** is recommended for each machine. In our experiments, **7168 HugePages** of 2MB size in each memory node and **2048** ones in compute nodes is need to be allocated. You can set up this with `echo 7168 > /proc/sys/vm/nr_hugepages` command for memory nodes and `echo 2048 > /proc/sys/vm/nr_hugepages` for compute nodes. 18 | 19 | 20 | 21 | ## Configurations 22 | 23 | Configuration files for servers and clients should be provided to the program. Here are two example configuration files below. 24 | 25 | #### 1. Servers configuration 26 | 27 | For each memory node, you should provide a configuration file `server_config.json` where you can flexibly configure the server: 28 | 29 | ```json 30 | { 31 | "role": "SERVER", 32 | "conn_type": "IB", 33 | "server_id": 0, 34 | "udp_port": 2333, 35 | "memory_num": 3, 36 | "memory_ips": [ 37 | "10.10.10.1", 38 | "10.10.10.2", 39 | "10.10.10.3" 40 | ], 41 | "ib_dev_id": 0, 42 | "ib_port_id": 1, 43 | "ib_gid_idx": 0, 44 | 45 | "server_base_addr": "0x10000000", 46 | "server_data_len": 15032385536, 47 | "block_size": 67108864, 48 | "subblock_size": 256, 49 | "client_local_size": 1073741824, 50 | 51 | "num_replication": 3, 52 | 53 | "main_core_id": 0, 54 | "poll_core_id": 1, 55 | "bg_core_id": 2, 56 | "gc_core_id": 3 57 | } 58 | ``` 59 | 60 | For briefness, we call each memory node as "server `i`" (`i` = 0, 1, ...). 61 | 62 | #### 2. Clients configuration 63 | 64 | For each compute node, you should provide a configuration file `client_config.json` where you can flexibly configure the client: 65 | 66 | ```json 67 | { 68 | "role": "CLIENT", 69 | "conn_type": "IB", 70 | "server_id": 2, 71 | "udp_port": 2333, 72 | "memory_num": 2, 73 | "memory_ips": [ 74 | "128.110.96.102", 75 | "128.110.96.81" 76 | ], 77 | "ib_dev_id": 0, 78 | "ib_port_id": 1, 79 | "ib_gid_idx": 0, 80 | 81 | "server_base_addr": "0x10000000", 82 | "server_data_len": 15032385536, 83 | "block_size": 67108864, 84 | "subblock_size": 1024, 85 | "client_local_size": 1073741824, 86 | 87 | "num_replication": 2, 88 | "num_idx_rep": 1, 89 | "num_coroutines": 10, 90 | "miss_rate_threash": 0.1, 91 | "workload_run_time": 10, 92 | "micro_workload_num": 10000, 93 | 94 | "main_core_id": 0, 95 | "poll_core_id": 1, 96 | "bg_core_id": 2, 97 | "gc_core_id": 3 98 | } 99 | ``` 100 | 101 | For briefness, we call each compute node as "client `i`" (`i` = 0, 1, 2, ...). 102 | 103 | It should be noted that, the `server_id` parameter of client `i` should be set to `2+i*8`. For example, the `server_id` of the first three client is 2, 10, 18 respectively. 104 | 105 | 106 | 107 | ## Experiments 108 | 109 | For each node, execute the following commands to compile the entire program: 110 | 111 | ```shell 112 | mkdir build && cd build 113 | cmake .. 114 | make -j 115 | ``` 116 | 117 | We test *FUSEE* with **micro-benchmark** and **YCSB benchmarks** respectively. For each experiments, you should put `server_config.json` in directory `./build`, and then use the following command in memory nodes to set up servers: 118 | 119 | ```shell 120 | numactl -N 0 -m 0 ./ycsb-test/ycsb_test_server [SERVER_NUM] 121 | ``` 122 | 123 | `[SERVER_NUM]` should be the serial number of this memory node, counting from 0. 124 | 125 | 126 | 127 | #### 1. Micro-benchmark 128 | 129 | * **Latency** 130 | 131 | To evaluate the latency of each operation, we use a single client to iteratively execute each operation (**INSERT**, **DELETE**, **UPDATE**, and **SEARCH**) for 10,000 times. 132 | 133 | Enter `./build/micro-test` and use the following command in client `0`: 134 | 135 | ```shell 136 | numactl -N 0 -m 0 ./latency_test_client [PATH_TO_CLIENT_CONFIG] 137 | ``` 138 | 139 | Test results will be saved in `./build/micro-test/results`. 140 | 141 | * **Throughput** 142 | 143 | To evaluate the throughput of each operations, each client first iteratively INSERTs different keys for 0.5 seconds. UPDATE and SEARCH operations are then executed on these keys for 10 seconds. Finally, each client executes DELETE for 0.5 seconds. 144 | 145 | Enter `./build/micro-test` and execute the following command on all client nodes at the same time: 146 | 147 | ```shell 148 | numactl -N 0 -m 0 ./micro_test_multi_client [PATH_TO_CLIENT_CONFIG] 8 149 | ``` 150 | 151 | Number `8` indicates there are 8 client threads in each client node. You will need to use the keyboard to simultaneously send space signals to each client node for starting each operation testing synchronously. 152 | 153 | Test results will be displayed on each client terminal. 154 | 155 | 156 | 157 | #### 2. YCSB benchmarks 158 | 159 | * **Workload preparation** 160 | 161 | Firstly, download all the testing workloads using `sh download_workload.sh` in directory `./setup` and unpack the workloads you want to `./build/ycsb-test/workloads`. 162 | 163 | Here is the description of the YCSB workloads: 164 | 165 | | Workload | SEARCH | UPDATE | INSERT | 166 | | -------- | ------ | ------ | ------ | 167 | | A | 0.5 | 0.5 | 0 | 168 | | B | 0.95 | 0.95 | 0 | 169 | | C | 1 | 0 | 0 | 170 | | D | 0.95 | 0 | 0.05 | 171 | | upd[X] | 1-[X]% | [X]% | 0 | 172 | 173 | Then, you should execute the following command in `./build/ycsb-test` to split the workloads into N parts(N is the total number of client threads): 174 | 175 | ```shell 176 | python split-workload.py [N] 177 | ``` 178 | 179 | And then we can start testing *FUSEE* using YCSB benchmarks. 180 | 181 | * **Throughput** 182 | 183 | To show the **scalability** of *FUSEE*,we can test the throughput of *FUSEE* with different number of client nodes. Besides, we can evaluate the **read-write performance** of *FUSEE* by testing the throughput of *FUSEE* using workloads with different search-update ratios `X`. Here is the command of testing the throughput of *FUSEE*: 184 | 185 | ```shell 186 | numactl -N 0 -m 0 ./ycsb_test_multi_client [PATH_TO_CLIENT_CONFIG] [WORKLOAD-NAME] 8 187 | ``` 188 | 189 | Execute the command on all the client nodes at the same time. `[WORKLOAD-NAME]` can be chosen from `workloada ~ workloadd` or `workloadudp0 ~ workloadudp100` (indicating different search-update ratios) . Number `8` indicates there are 8 client threads in each client node. You will need to use the keyboard to simultaneously send space signals to each client node for starting each operation testing synchronously. 190 | 191 | Test results will be displayed on each client terminal. 192 | 193 | 194 | -------------------------------------------------------------------------------- /crash-recover-test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(test_crash_client test_crash_client.cc) 2 | 3 | target_link_libraries(test_crash_client 4 | libddckv 5 | pthread 6 | ibverbs 7 | ) -------------------------------------------------------------------------------- /crash-recover-test/test_crash_client.cc: -------------------------------------------------------------------------------- 1 | #include "client.h" 2 | 3 | #define INSERT_NUM 1000 4 | #define UPDATE_NUM 1000 5 | 6 | const char * key = "test-12345-k"; 7 | const char * key_template = "test-%d-k"; 8 | const char * value_insert_template = "test-%d-v-insert"; 9 | const char * value_update_template = "test-12345-v-update-%d"; 10 | const char * value_template = "test-12345-v-%s"; 11 | 12 | KVReqCtx * prepare_ctx(Client & client, const char * key, const char * value, int req_type) { 13 | uint64_t client_input_buf = (uint64_t)client.get_input_buf(); 14 | uint32_t client_input_buf_lkey = client.get_input_buf_lkey(); 15 | 16 | memcpy((void *)(client_input_buf + sizeof(KVLogHeader)), key, strlen(key)); 17 | memcpy((void *)(client_input_buf + sizeof(KVLogHeader) + strlen(key)), value, strlen(value)); 18 | KVLogHeader * header = (KVLogHeader *)client_input_buf; 19 | header->is_valid = true; 20 | header->key_length = strlen(key); 21 | header->value_length = strlen(value); 22 | 23 | KVInfo * kv_info = (KVInfo *)malloc(sizeof(KVInfo)); 24 | kv_info->l_addr = (void *)client_input_buf; 25 | kv_info->key_len = strlen(key); 26 | kv_info->value_len = strlen(value); 27 | kv_info->lkey = client_input_buf_lkey; 28 | 29 | KVReqCtx * ctx = new KVReqCtx; 30 | ctx->req_type = req_type; 31 | ctx->use_cache = true; 32 | ctx->kv_info = kv_info; 33 | ctx->lkey = client.get_local_buf_mr()->lkey; 34 | 35 | int num_idx_rep = client.get_num_idx_rep(); 36 | int num_replication = client.get_num_rep(); 37 | ctx->kv_modify_pr_cas_list.resize(1); 38 | ctx->kv_modify_bk_0_cas_list.resize(num_idx_rep - 1); 39 | ctx->kv_modify_bk_1_cas_list.resize(num_idx_rep - 1); 40 | ctx->log_commit_addr_list.resize(num_replication); 41 | char key_buf[128] = {0}; 42 | memcpy(key_buf, (void *)((uint64_t)ctx->kv_info->l_addr + sizeof(KVLogHeader)), ctx->kv_info->key_len); 43 | ctx->key_str = std::string(key_buf); 44 | return ctx; 45 | } 46 | 47 | void init_insert_ctx(Client & client, KVReqCtx * ctx) { 48 | uint64_t client_local_buf = (uint64_t)client.get_local_buf_mr()->addr; 49 | uint32_t client_local_buf_lkey = client.get_local_buf_mr()->lkey; 50 | 51 | ctx->local_bucket_addr = (RaceHashBucket *)client_local_buf; 52 | ctx->local_cas_target_value_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket)); 53 | ctx->local_cas_return_value_addr = (void *)((uint64_t)ctx->local_cas_target_value_addr + sizeof(uint64_t)); 54 | ctx->op_laddr = (void *)((uint64_t)ctx->local_cas_return_value_addr + sizeof(uint64_t) * MAX_REP_NUM); 55 | ctx->lkey = client_local_buf_lkey; 56 | 57 | KVLogHeader * header = (KVLogHeader *)ctx->kv_info->l_addr; 58 | KVLogTail * tail = (KVLogTail *)((uint64_t)ctx->kv_info->l_addr 59 | + sizeof(KVLogHeader) + header->key_length + header->value_length); 60 | tail->op = KV_OP_INSERT; 61 | } 62 | 63 | void init_update_ctx(Client & client, KVReqCtx * ctx) { 64 | uint64_t client_local_buf = (uint64_t)client.get_local_buf_mr()->addr; 65 | uint32_t client_local_buf_lkey = (uint64_t)client.get_local_buf_mr()->rkey; 66 | 67 | ctx->local_bucket_addr = (RaceHashBucket *)client_local_buf; 68 | ctx->local_kv_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket)); 69 | ctx->local_cas_target_value_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket)); 70 | ctx->local_cas_return_value_addr = (void *)((uint64_t)ctx->local_cas_target_value_addr + sizeof(uint64_t)); 71 | ctx->op_laddr = (void *)((uint64_t)ctx->local_cas_target_value_addr + sizeof(uint64_t) * MAX_REP_NUM); 72 | ctx->local_cache_addr = (void *)((uint64_t)ctx->op_laddr + 2048); 73 | ctx->lkey = client_local_buf_lkey; 74 | 75 | KVLogHeader * header = (KVLogHeader *)ctx->kv_info->l_addr; 76 | KVLogTail * tail = (KVLogTail *)((uint64_t)ctx->kv_info->l_addr 77 | + sizeof(KVLogHeader) + header->key_length + header->value_length); 78 | tail->op = KV_OP_UPDATE; 79 | } 80 | 81 | void init_search_ctx(Client & client, KVReqCtx * ctx) { 82 | uint64_t client_local_buf = (uint64_t)client.get_local_buf_mr()->addr; 83 | uint32_t client_local_buf_lkey = (uint64_t)client.get_local_buf_mr()->rkey; 84 | 85 | ctx->local_bucket_addr = (RaceHashBucket *)client_local_buf; 86 | ctx->local_cache_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket)); 87 | ctx->local_kv_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket)); 88 | ctx->lkey = client_local_buf_lkey; 89 | } 90 | 91 | void test_crash_update_prepare(Client & client, int crash_point) { 92 | int ret = 0; 93 | // insert a kv 94 | char value_buf[128]; 95 | bool should_stop = false; 96 | 97 | sprintf(value_buf, value_insert_template, INSERT_NUM); 98 | KVReqCtx * insert_ctx = prepare_ctx(client, key, value_buf, KV_REQ_INSERT); 99 | init_insert_ctx(client, insert_ctx); 100 | insert_ctx->should_stop = &should_stop; 101 | insert_ctx->coro_id = 100; 102 | ret = client.kv_insert_sync(insert_ctx); 103 | assert(ret == 0); 104 | 105 | // update the kv and crash 106 | for (int i = 0; i < UPDATE_NUM - 1; i ++) { 107 | sprintf(value_buf, value_update_template, i); 108 | KVReqCtx * update_ctx = prepare_ctx(client, key, value_buf, KV_REQ_UPDATE); 109 | init_update_ctx(client, update_ctx); 110 | ret = client.kv_update_sync(update_ctx); 111 | } 112 | sprintf(value_buf, value_update_template, UPDATE_NUM); 113 | KVReqCtx * update_ctx = prepare_ctx(client, key, value_buf, KV_REQ_UPDATE); 114 | init_update_ctx(client, update_ctx); 115 | ret = client.kv_update_w_crash(update_ctx, crash_point); 116 | assert(ret == -1); 117 | } 118 | 119 | void test_crash_insert_prepare(Client & client, int crash_point) { 120 | int ret = 0; 121 | char key_buf[128]; 122 | char value_buf[128]; 123 | // insert 1000 kv 124 | for (int i = 0; i < INSERT_NUM; i ++) { 125 | sprintf(key_buf, key_template, i); 126 | sprintf(value_buf, value_insert_template, i); 127 | KVReqCtx * insert_ctx = prepare_ctx(client, key_buf, value_buf, KV_REQ_INSERT); 128 | init_insert_ctx(client, insert_ctx); 129 | bool should_stop = false; 130 | insert_ctx->should_stop = &should_stop; 131 | insert_ctx->coro_id = 100; 132 | ret = client.kv_insert_sync(insert_ctx); 133 | if (ret != 0) { 134 | printf("[%s] insert error\n", __FUNCTION__); 135 | exit(1); 136 | } 137 | } 138 | 139 | sprintf(value_buf, value_insert_template, INSERT_NUM); 140 | KVReqCtx * insert_ctx = prepare_ctx(client, key, value_buf, KV_REQ_INSERT); 141 | init_insert_ctx(client, insert_ctx); 142 | bool should_stop = false; 143 | insert_ctx->should_stop = &should_stop; 144 | insert_ctx->coro_id = 100; 145 | ret = client.kv_insert_w_crash(insert_ctx, crash_point); 146 | if (ret != -1) { 147 | printf("[%s] failed to crash\n", __FUNCTION__); 148 | exit(1); 149 | } 150 | } 151 | 152 | void test_crash_recover(Client & client) { 153 | int ret = 0; 154 | void * search_ret; 155 | char new_value_buf[256]; 156 | sprintf(new_value_buf, value_template, "update-after-crash"); 157 | // recover 158 | KVReqCtx * update_ctx = prepare_ctx(client, key, new_value_buf, KV_REQ_UPDATE); 159 | init_update_ctx(client, update_ctx); 160 | ret = client.kv_update_sync(update_ctx); 161 | if (ret != 0) { 162 | printf("[%s] error update %d\n", __FUNCTION__, ret); 163 | } 164 | // assert(ret == 0); 165 | 166 | KVReqCtx * search_ctx = prepare_ctx(client, key, new_value_buf, KV_REQ_SEARCH); 167 | init_search_ctx(client, search_ctx); 168 | search_ret = client.kv_search_sync(search_ctx); 169 | if (memcmp(search_ret, new_value_buf, strlen(new_value_buf)) != 0) { 170 | printf("recover failed\n"); 171 | } else { 172 | printf("recover success!\n"); 173 | } 174 | } 175 | 176 | int main(int argc, char ** argv) { 177 | if (argc != 2) { 178 | printf("Usage: %s path-to-config-file\n", argv[0]); 179 | } 180 | int ret = 0; 181 | GlobalConfig config; 182 | ret = load_config(argv[1], &config); 183 | assert(ret == 0); 184 | 185 | config.num_coroutines = 1; 186 | config.is_recovery = false; 187 | Client client(&config); 188 | // pthread_t pollint_tid = client.start_polling_thread(); 189 | client.start_gc_fiber(); 190 | 191 | if (config.is_recovery == false) { 192 | test_crash_insert_prepare(client, KV_CRASH_UNCOMMITTED_BK_CONSENSUS_0); 193 | printf("crashed\n"); 194 | } 195 | client.stop_gc_fiber(); 196 | 197 | config.is_recovery = true; 198 | std::vector recover_time_bd; 199 | struct timeval st, et; 200 | gettimeofday(&st, NULL); 201 | Client clientr(&config); 202 | clientr.start_gc_fiber(); 203 | gettimeofday(&et, NULL); 204 | clientr.get_recover_time(recover_time_bd); 205 | test_crash_recover(clientr); 206 | 207 | clientr.stop_gc_fiber(); 208 | 209 | uint64_t connection_recover_time_us = time_spent_us(&recover_time_bd[0], &recover_time_bd[1]); 210 | uint64_t local_recover_space_reg_time_us = time_spent_us(&recover_time_bd[1], &recover_time_bd[2]); 211 | uint64_t get_meta_addr_time_us = time_spent_us(&recover_time_bd[2], &recover_time_bd[3]); 212 | uint64_t traverse_log_time_us = time_spent_us(&recover_time_bd[3], &recover_time_bd[4]); 213 | uint64_t mm_recover_time_us = time_spent_us(&recover_time_bd[4], &recover_time_bd[5]); 214 | uint64_t local_mr_reg_time_us = time_spent_us(&recover_time_bd[5], &recover_time_bd[6]); 215 | uint64_t kv_ops_recover_time_us = time_spent_us(&recover_time_bd[6], &recover_time_bd[7]); 216 | 217 | printf("0. conn rec: %ld us\n", connection_recover_time_us); 218 | printf("1. rec space reg: %ld us\n", local_recover_space_reg_time_us); 219 | printf("2. get meta addr: %ld us\n", get_meta_addr_time_us); 220 | printf("3. taverse log: %ld us\n", traverse_log_time_us); 221 | printf("4. mm rec: %ld us\n", mm_recover_time_us); 222 | printf("5. local mr reg time: %ld us\n", local_mr_reg_time_us); 223 | printf("6. ops rec: %ld us\n", kv_ops_recover_time_us); 224 | printf("total:%ld us\n", time_spent_us(&st, &et)); 225 | } -------------------------------------------------------------------------------- /crash-recover-test/test_crash_server.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "server.h" 7 | 8 | int main(int argc, char ** argv) { 9 | if (argc != 2) { 10 | printf("Usage: %s [server_id]\n", argv[0]); 11 | return -1; 12 | } 13 | 14 | int32_t server_id = atoi(argv[1]); 15 | int32_t ret = 0; 16 | struct GlobalConfig server_conf; 17 | ret = load_config("./server_config.json", &server_conf); 18 | assert(ret == 0); 19 | server_conf.server_id = server_id; 20 | 21 | printf("===== Starting Server %d =====\n", server_conf.server_id); 22 | Server * server = new Server(&server_conf); 23 | pthread_t server_tid; 24 | pthread_create(&server_tid, NULL, server_main, (void *)server); 25 | 26 | printf("press to exit\n"); 27 | getchar(); 28 | printf("===== Ending Server %d =====\n", server_conf.server_id); 29 | 30 | server->stop(); 31 | return 0; 32 | } -------------------------------------------------------------------------------- /documents/fast23_FUSEE_Extended_Version.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmemsys/FUSEE/d1e9932a0aad3deffb446511811911cc0f7e82f7/documents/fast23_FUSEE_Extended_Version.pdf -------------------------------------------------------------------------------- /micro-test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(latency_test latency_test.cc) 2 | add_library(micro_test micro_test.cc) 3 | add_executable(latency_test_client latency_test_client.cc) 4 | add_executable(latency_test_client_cr latency_test_client_cr.cc) 5 | add_executable(micro_test_multi_client micro_test_multi_client.cc) 6 | add_executable(micro_test_multi_client_cr micro_test_multi_client_cr.cc) 7 | 8 | target_link_libraries(latency_test 9 | libddckv 10 | ycsb_test 11 | pthread 12 | ibverbs 13 | ) 14 | 15 | target_link_libraries(latency_test_client 16 | latency_test 17 | libddckv 18 | pthread 19 | ibverbs 20 | ) 21 | 22 | target_link_libraries(latency_test_client_cr 23 | latency_test 24 | libddckv 25 | pthread 26 | ibverbs 27 | ) 28 | 29 | target_link_libraries(micro_test 30 | libddckv 31 | pthread 32 | ibverbs 33 | ) 34 | 35 | target_link_libraries(micro_test_multi_client 36 | micro_test 37 | libddckv 38 | pthread 39 | ibverbs 40 | ) 41 | 42 | target_link_libraries(micro_test_multi_client_cr 43 | micro_test 44 | libddckv 45 | pthread 46 | ibverbs 47 | ) -------------------------------------------------------------------------------- /micro-test/gen-micro-workload.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | insertTemplate = "INSERT usertable {}\n" 4 | updateTemplate = "UPDATE usertable {}\n" 5 | searchTemplate = "READ usertable {}\n" 6 | deleteTemplate = "DELETE usertable {}\n" 7 | 8 | workloadNameTemplate = "workload{}.spec_trans" 9 | workloadNameList = ["ins", "upd", "rea", "del"] 10 | templateDict = { 11 | "ins": insertTemplate, 12 | "upd": updateTemplate, 13 | "rea": searchTemplate, 14 | "del": deleteTemplate 15 | } 16 | 17 | workloadSize = int(sys.argv[1]) 18 | 19 | for wl in workloadNameList: 20 | wlName = "micro-workloads/" + workloadNameTemplate.format(wl) 21 | lineTemplate = templateDict[wl] 22 | lineList = [] 23 | for key in range(workloadSize): 24 | line = lineTemplate.format(key) 25 | lineList.append(line) 26 | of = open(wlName, "w") 27 | of.writelines(lineList) 28 | of.close() -------------------------------------------------------------------------------- /micro-test/latency_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "client.h" 5 | 6 | #include "latency_test.h" 7 | 8 | #define WORKLOAD_ALL (-1) 9 | // #define WORKLOAD_NUM WORKLOAD_ALL 10 | #define WORKLOAD_NUM 100000 11 | 12 | 13 | static int test_lat(Client & client, char * op_type, const char * out_fname) { 14 | int ret = 0; 15 | ret = client.load_seq_kv_requests(WORKLOAD_NUM, op_type); 16 | assert(ret == 0); 17 | 18 | printf("lat test %s\n", op_type); 19 | uint64_t * lat_list = (uint64_t *)malloc(sizeof(uint64_t) * client.num_local_operations_); 20 | memset(lat_list, 0, sizeof(uint64_t) * client.num_local_operations_); 21 | 22 | uint32_t num_failed = 0; 23 | void * search_addr; 24 | struct timeval st, et; 25 | bool should_stop = false; 26 | client.init_kvreq_space(0, 0, client.num_local_operations_); 27 | for (int i = 0; i < client.num_local_operations_; i ++) { 28 | KVReqCtx * ctx = &client.kv_req_ctx_list_[i]; 29 | ctx->coro_id = 0; 30 | ctx->should_stop = &should_stop; 31 | // ctx->use_cache = false; 32 | 33 | switch (ctx->req_type) { 34 | case KV_REQ_SEARCH: 35 | gettimeofday(&st, NULL); 36 | search_addr = client.kv_search_sync(ctx); 37 | gettimeofday(&et, NULL); 38 | if (search_addr == NULL) { 39 | num_failed ++; 40 | } 41 | break; 42 | case KV_REQ_INSERT: 43 | gettimeofday(&st, NULL); 44 | ret = client.kv_insert_sync(ctx); 45 | gettimeofday(&et, NULL); 46 | if (ret == KV_OPS_FAIL_RETURN) { 47 | num_failed ++; 48 | } 49 | break; 50 | case KV_REQ_UPDATE: 51 | gettimeofday(&st, NULL); 52 | ret = client.kv_update_sync(ctx); 53 | if (ret == KV_OPS_FAIL_RETURN) { 54 | num_failed ++; 55 | } 56 | gettimeofday(&et, NULL); 57 | break; 58 | case KV_REQ_DELETE: 59 | gettimeofday(&st, NULL); 60 | ret = client.kv_delete_sync(ctx); 61 | if (ret == KV_OPS_FAIL_RETURN) { 62 | num_failed ++; 63 | } 64 | gettimeofday(&et, NULL); 65 | break; 66 | default: 67 | assert(0); 68 | break; 69 | } 70 | 71 | lat_list[i] = (et.tv_sec - st.tv_sec) * 1000000 + (et.tv_usec - st.tv_usec); 72 | } 73 | printf("Failed: %d\n", num_failed); 74 | 75 | FILE * lat_fp = fopen(out_fname, "w"); 76 | assert(lat_fp != NULL); 77 | for (int i = 0; i < client.num_local_operations_; i ++) { 78 | fprintf(lat_fp, "%ld\n", lat_list[i]); 79 | } 80 | fclose(lat_fp); 81 | return 0; 82 | } 83 | 84 | static int test_lat(ClientCR & client, char * op_type, const char * out_fname) { 85 | int ret = 0; 86 | ret = client.load_seq_kv_requests(WORKLOAD_NUM, op_type); 87 | assert(ret == 0); 88 | 89 | printf("lat test %s\n", op_type); 90 | uint64_t * lat_list = (uint64_t *)malloc(sizeof(uint64_t) * client.num_local_operations_); 91 | memset(lat_list, 0, sizeof(uint64_t) * client.num_local_operations_); 92 | 93 | uint32_t num_failed = 0; 94 | void * search_addr; 95 | struct timeval st, et; 96 | bool should_stop = false; 97 | client.init_kvreq_space(0, 0, client.num_local_operations_); 98 | for (int i = 0; i < client.num_local_operations_; i ++) { 99 | KVReqCtx * ctx = &client.kv_req_ctx_list_[i]; 100 | ctx->coro_id = 0; 101 | ctx->should_stop = &should_stop; 102 | 103 | switch (ctx->req_type) { 104 | case KV_REQ_SEARCH: 105 | gettimeofday(&st, NULL); 106 | search_addr = client.kv_search_sync(ctx); 107 | gettimeofday(&et, NULL); 108 | if (search_addr == NULL) { 109 | num_failed ++; 110 | } 111 | break; 112 | case KV_REQ_INSERT: 113 | gettimeofday(&st, NULL); 114 | ret = client.kv_insert_sync(ctx); 115 | gettimeofday(&et, NULL); 116 | if (ret == KV_OPS_FAIL_REDO || ret == KV_OPS_FAIL_RETURN) { 117 | num_failed ++; 118 | } 119 | break; 120 | case KV_REQ_UPDATE: 121 | gettimeofday(&st, NULL); 122 | ret = client.kv_update_sync(ctx); 123 | gettimeofday(&et, NULL); 124 | break; 125 | case KV_REQ_DELETE: 126 | gettimeofday(&st, NULL); 127 | ret = client.kv_delete_sync(ctx); 128 | gettimeofday(&et, NULL); 129 | break; 130 | default: 131 | assert(0); 132 | break; 133 | } 134 | 135 | lat_list[i] = (et.tv_sec - st.tv_sec) * 1000000 + (et.tv_usec - st.tv_usec); 136 | } 137 | printf("Failed: %d\n", num_failed); 138 | 139 | FILE * lat_fp = fopen(out_fname, "w"); 140 | assert(lat_fp != NULL); 141 | for (int i = 0; i < client.num_local_operations_; i ++) { 142 | fprintf(lat_fp, "%ld\n", lat_list[i]); 143 | } 144 | fclose(lat_fp); 145 | return 0; 146 | } 147 | 148 | int test_insert_lat(Client & client) { 149 | char out_fname[128]; 150 | int num_rep = client.get_num_rep(); 151 | sprintf(out_fname, "results/insert_lat-%drp.txt", num_rep); 152 | return test_lat(client, "INSERT", out_fname); 153 | } 154 | 155 | int test_search_lat(Client & client) { 156 | char out_fname[128]; 157 | int num_rep = client.get_num_rep(); 158 | sprintf(out_fname, "results/search_lat-%drp.txt", num_rep); 159 | return test_lat(client, "READ", out_fname); 160 | } 161 | 162 | int test_update_lat(Client & client) { 163 | char out_fname[128]; 164 | int num_rep = client.get_num_rep(); 165 | sprintf(out_fname, "results/update_lat-%drp.txt", num_rep); 166 | return test_lat(client, "UPDATE", out_fname); 167 | } 168 | 169 | int test_delete_lat(Client & client) { 170 | char out_fname[128]; 171 | int num_rep = client.get_num_rep(); 172 | sprintf(out_fname, "results/delete_lat-%drp.txt", num_rep); 173 | return test_lat(client, "DELETE", out_fname); 174 | } 175 | 176 | int test_insert_lat(ClientCR & client) { 177 | char out_fname[128]; 178 | int num_rep = client.get_num_rep(); 179 | sprintf(out_fname, "results/insert_cr_lat-%drp.txt", num_rep); 180 | return test_lat(client, "INSERT", out_fname); 181 | } 182 | 183 | int test_search_lat(ClientCR & client) { 184 | char out_fname[128]; 185 | int num_rep = client.get_num_rep(); 186 | sprintf(out_fname, "results/search_cr_lat-%drp.txt", num_rep); 187 | return test_lat(client, "READ", out_fname); 188 | } 189 | 190 | int test_update_lat(ClientCR & client) { 191 | char out_fname[128]; 192 | int num_rep = client.get_num_rep(); 193 | sprintf(out_fname, "results/update_cr_lat-%drp.txt", num_rep); 194 | return test_lat(client, "UPDATE", out_fname); 195 | } 196 | 197 | int test_delete_lat(ClientCR & client) { 198 | char out_fname[128]; 199 | int num_rep = client.get_num_rep(); 200 | sprintf(out_fname, "results/delete_cr_lat-%drp.txt", num_rep); 201 | return test_lat(client, "DELETE", out_fname); 202 | } -------------------------------------------------------------------------------- /micro-test/latency_test.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_LATENCY_TEST_H_ 2 | #define DDCKV_LATENCY_TEST_H_ 3 | 4 | #include "client.h" 5 | #include "client_cr.h" 6 | 7 | int test_insert_lat(Client & client); 8 | int test_search_lat(Client & client); 9 | int test_update_lat(Client & client); 10 | int test_delete_lat(Client & client); 11 | 12 | int test_insert_lat(ClientCR & client); 13 | int test_search_lat(ClientCR & client); 14 | int test_update_lat(ClientCR & client); 15 | int test_delete_lat(ClientCR & client); 16 | 17 | #endif -------------------------------------------------------------------------------- /micro-test/latency_test_client.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "latency_test.h" 5 | 6 | int main(int argc, char ** argv) { 7 | if (argc != 2) { 8 | printf("Usage: %s path-to-config-file\n", argv[0]); 9 | return 1; 10 | } 11 | 12 | int ret = 0; 13 | GlobalConfig config; 14 | ret = load_config(argv[1], &config); 15 | assert(ret == 0); 16 | 17 | cpu_set_t cpuset; 18 | CPU_ZERO(&cpuset); 19 | CPU_SET(config.main_core_id, &cpuset); 20 | ret = sched_setaffinity(0, sizeof(cpuset), &cpuset); 21 | assert(ret == 0); 22 | ret = sched_getaffinity(0, sizeof(cpuset), &cpuset); 23 | assert(ret == 0); 24 | for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) { 25 | if (CPU_ISSET(i, &cpuset)) { 26 | printf("main process running on core: %d\n", i); 27 | } 28 | } 29 | 30 | Client client(&config); 31 | 32 | ret = test_insert_lat(client); 33 | assert(ret == 0); 34 | 35 | ret = test_search_lat(client); 36 | assert(ret == 0); 37 | 38 | ret = test_update_lat(client); 39 | assert(ret == 0); 40 | 41 | ret = test_delete_lat(client); 42 | assert(ret == 0); 43 | } -------------------------------------------------------------------------------- /micro-test/latency_test_client_cr.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "latency_test.h" 5 | 6 | int main(int argc, char ** argv) { 7 | if (argc != 2) { 8 | printf("Usage: %s path-to-config-file\n", argv[0]); 9 | return 1; 10 | } 11 | 12 | int ret = 0; 13 | GlobalConfig config; 14 | ret = load_config(argv[1], &config); 15 | assert(ret == 0); 16 | 17 | cpu_set_t cpuset; 18 | CPU_ZERO(&cpuset); 19 | CPU_SET(config.main_core_id, &cpuset); 20 | ret = sched_setaffinity(0, sizeof(cpuset), &cpuset); 21 | assert(ret == 0); 22 | ret = sched_getaffinity(0, sizeof(cpuset), &cpuset); 23 | assert(ret == 0); 24 | for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) { 25 | if (CPU_ISSET(i, &cpuset)) { 26 | printf("main process running on core: %d\n", i); 27 | } 28 | } 29 | 30 | ClientCR client(&config); 31 | 32 | ret = test_insert_lat(client); 33 | assert(ret == 0); 34 | 35 | ret = test_search_lat(client); 36 | assert(ret == 0); 37 | 38 | ret = test_update_lat(client); 39 | assert(ret == 0); 40 | 41 | ret = test_delete_lat(client); 42 | assert(ret == 0); 43 | } -------------------------------------------------------------------------------- /micro-test/micro_test.cc: -------------------------------------------------------------------------------- 1 | #include "micro_test.h" 2 | #include "client.h" 3 | #include "client_cr.h" 4 | 5 | static void timer_fb_func(volatile bool * should_stop, int seconds) { 6 | boost::this_fiber::sleep_for(std::chrono::seconds(seconds)); 7 | *should_stop = true; 8 | // printf("stopped!\n"); 9 | } 10 | 11 | static void timer_fb_func_ms(volatile bool * should_stop, int milliseconds) { 12 | boost::this_fiber::sleep_for(std::chrono::milliseconds(milliseconds)); 13 | *should_stop = true; 14 | // printf("stopped!\n"); 15 | } 16 | 17 | static int micro_test_tpt(Client & client, MicroRunClientArgs * args) { 18 | int ret = 0; 19 | ret = client.load_seq_kv_requests(client.micro_workload_num_, args->op_type); 20 | assert(ret == 0); 21 | 22 | printf("Test phase start\n"); 23 | boost::fibers::barrier global_barrier(client.num_coroutines_ + 1); 24 | ClientFiberArgs * fb_args_list = (ClientFiberArgs *)malloc(sizeof(ClientFiberArgs) * client.num_local_operations_); 25 | uint32_t coro_num_ops = client.num_local_operations_ / client.num_coroutines_; 26 | for (int i = 0; i < client.num_coroutines_; i ++) { 27 | fb_args_list[i].client = &client; 28 | fb_args_list[i].coro_id = i; 29 | fb_args_list[i].ops_num = coro_num_ops; 30 | fb_args_list[i].ops_st_idx = coro_num_ops * i; 31 | fb_args_list[i].num_failed = 0; 32 | fb_args_list[i].b = &global_barrier; 33 | fb_args_list[i].should_stop = args->should_stop; 34 | } 35 | fb_args_list[client.num_coroutines_ - 1].ops_num += client.num_local_operations_ % client.num_coroutines_; 36 | 37 | boost::fibers::fiber fb_list[client.num_coroutines_]; 38 | for (int i = 0; i < client.num_coroutines_; i ++) { 39 | boost::fibers::fiber fb(client_ops_fb_cnt_ops_micro, &fb_args_list[i]); 40 | fb_list[i] = std::move(fb); 41 | } 42 | 43 | global_barrier.wait(); 44 | boost::fibers::fiber timer_fb; 45 | if (args->thread_id == 0) { 46 | printf("%d initializes timer\n", args->thread_id); 47 | pthread_barrier_wait(args->timer_barrier); 48 | boost::fibers::fiber fb(timer_fb_func_ms, args->should_stop, client.workload_run_time_); 49 | timer_fb = std::move(fb); 50 | } else { 51 | printf("%d wait for timer\n", args->thread_id); 52 | pthread_barrier_wait(args->timer_barrier); 53 | } 54 | 55 | printf("%d passed barrier\n", args->thread_id); 56 | if (args->thread_id == 0) { 57 | timer_fb.join(); 58 | } 59 | uint32_t ops_cnt = 0; 60 | uint32_t num_failed = 0; 61 | for (int i = 0; i < client.num_coroutines_; i ++) { 62 | fb_list[i].join(); 63 | ops_cnt += fb_args_list[i].ops_cnt; 64 | num_failed += fb_args_list[i].num_failed; 65 | printf("fb%d finished\n", fb_args_list[i].coro_id); 66 | } 67 | printf("thread: %d %d ops/s\n", args->thread_id, ops_cnt / 10); 68 | printf("%d failed\n", num_failed); 69 | 70 | // update counter 71 | if (strcmp(args->op_type, "INSERT") == 0) { 72 | args->ret_num_insert_ops = ops_cnt; 73 | args->ret_fail_insert_num = num_failed; 74 | } else if (strcmp(args->op_type, "UPDATE") == 0) { 75 | args->ret_num_update_ops = ops_cnt; 76 | args->ret_fail_update_num = num_failed; 77 | } else if (strcmp(args->op_type, "READ") == 0) { 78 | args->ret_num_search_ops = ops_cnt; 79 | args->ret_fail_search_num = num_failed; 80 | } else { 81 | assert(strcmp(args->op_type, "DELETE") == 0); 82 | args->ret_num_delete_ops = ops_cnt; 83 | args->ret_fail_delete_num = num_failed; 84 | } 85 | free(fb_args_list); 86 | return 0; 87 | } 88 | 89 | static int micro_test_tpt(ClientCR & client, MicroRunClientArgs * args) { 90 | int ret = 0; 91 | ret = client.load_seq_kv_requests(client.micro_workload_num_, args->op_type); 92 | assert(ret == 0); 93 | 94 | printf("Test phase start\n"); 95 | boost::fibers::barrier global_barrier(client.num_coroutines_ + 1); 96 | ClientFiberArgs * fb_args_list = (ClientFiberArgs *)malloc(sizeof(ClientFiberArgs) * client.num_local_operations_); 97 | uint32_t coro_num_ops = client.num_local_operations_ / client.num_coroutines_; 98 | for (int i = 0; i < client.num_coroutines_; i ++) { 99 | fb_args_list[i].client_cr = &client; 100 | fb_args_list[i].coro_id = i; 101 | fb_args_list[i].ops_num = coro_num_ops; 102 | fb_args_list[i].ops_st_idx = coro_num_ops * i; 103 | fb_args_list[i].num_failed = 0; 104 | fb_args_list[i].b = &global_barrier; 105 | fb_args_list[i].should_stop = args->should_stop; 106 | } 107 | fb_args_list[client.num_coroutines_ - 1].ops_num += client.num_local_operations_ % client.num_coroutines_; 108 | 109 | boost::fibers::fiber fb_list[client.num_coroutines_]; 110 | for (int i = 0; i < client.num_coroutines_; i ++) { 111 | boost::fibers::fiber fb(client_cr_ops_fb_cnt_ops_micro, &fb_args_list[i]); 112 | fb_list[i] = std::move(fb); 113 | } 114 | 115 | global_barrier.wait(); 116 | boost::fibers::fiber timer_fb; 117 | if (args->thread_id == 0) { 118 | printf("%d initializes timer\n", args->thread_id); 119 | pthread_barrier_wait(args->timer_barrier); 120 | boost::fibers::fiber fb(timer_fb_func, args->should_stop, client.workload_run_time_); 121 | timer_fb = std::move(fb); 122 | } else { 123 | printf("%d wait for timer\n", args->thread_id); 124 | pthread_barrier_wait(args->timer_barrier); 125 | } 126 | 127 | printf("%d passed barrier\n", args->thread_id); 128 | if (args->thread_id == 0) { 129 | timer_fb.join(); 130 | } 131 | uint32_t ops_cnt = 0; 132 | uint32_t num_failed = 0; 133 | for (int i = 0; i < client.num_coroutines_; i ++) { 134 | fb_list[i].join(); 135 | ops_cnt += fb_args_list[i].ops_cnt; 136 | num_failed += fb_args_list[i].num_failed; 137 | printf("fb%d finished\n", fb_args_list[i].coro_id); 138 | } 139 | printf("thread: %d %d ops/s\n", args->thread_id, ops_cnt / 10); 140 | printf("%d failed\n", num_failed); 141 | 142 | // update counter 143 | if (strcmp(args->op_type, "INSERT") == 0) { 144 | args->ret_num_insert_ops = ops_cnt; 145 | args->ret_fail_insert_num = num_failed; 146 | } else if (strcmp(args->op_type, "UPDATE") == 0) { 147 | args->ret_num_update_ops = ops_cnt; 148 | args->ret_fail_update_num = num_failed; 149 | } else if (strcmp(args->op_type, "READ") == 0) { 150 | args->ret_num_search_ops = ops_cnt; 151 | args->ret_fail_search_num = num_failed; 152 | } else { 153 | assert(strcmp(args->op_type, "DELETE") == 0); 154 | args->ret_num_delete_ops = ops_cnt; 155 | args->ret_fail_delete_num = num_failed; 156 | } 157 | free(fb_args_list); 158 | return 0; 159 | } 160 | 161 | void * run_client(void * _args) { 162 | MicroRunClientArgs * args = (MicroRunClientArgs *)_args; 163 | 164 | int ret = 0; 165 | GlobalConfig config; 166 | ret = load_config(args->config_file, &config); 167 | assert(ret == 0); 168 | 169 | config.main_core_id = args->main_core_id; 170 | config.poll_core_id = args->poll_core_id; 171 | config.server_id += args->thread_id; 172 | 173 | cpu_set_t cpuset; 174 | CPU_ZERO(&cpuset); 175 | CPU_SET(config.main_core_id, &cpuset); 176 | pthread_t this_tid = pthread_self(); 177 | ret = pthread_setaffinity_np(this_tid, sizeof(cpuset), &cpuset); 178 | // assert(ret == 0); 179 | ret = pthread_getaffinity_np(this_tid, sizeof(cpuset), &cpuset); 180 | for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) { 181 | if (CPU_ISSET(i, &cpuset)) { 182 | printf("client %d main process running on core: %d\n", args->thread_id, i); 183 | } 184 | } 185 | 186 | Client client(&config); 187 | 188 | pthread_t polling_tid = client.start_polling_thread(); 189 | 190 | args->op_type = "INSERT"; 191 | client.workload_run_time_ = 500; 192 | if (args->thread_id == 0) { 193 | printf("press to sync start %s\n", args->op_type); 194 | getchar(); 195 | } 196 | pthread_barrier_wait(args->insert_start_barrier); 197 | 198 | // insert 199 | printf("%d start %s\n", args->thread_id, args->op_type); 200 | ret = micro_test_tpt(client, args); 201 | assert(ret == 0); 202 | printf("%d %s finished\n", args->thread_id, args->op_type); 203 | pthread_barrier_wait(args->insert_finish_barrier); 204 | 205 | args->op_type = "READ"; 206 | client.workload_run_time_ = 5000; 207 | if (args->thread_id == 0) { 208 | pthread_barrier_init(args->timer_barrier, NULL, args->num_threads); 209 | *args->should_stop = false; 210 | printf("press to sync start %s\n", args->op_type); 211 | getchar(); 212 | } 213 | pthread_barrier_wait(args->search_start_barrier); 214 | 215 | printf("%d start %s\n", args->thread_id, args->op_type); 216 | ret = micro_test_tpt(client, args); 217 | assert(ret == 0); 218 | printf("%d %s finished\n", args->thread_id, args->op_type); 219 | pthread_barrier_wait(args->search_finish_barrier); 220 | 221 | args->op_type = "UPDATE"; 222 | client.workload_run_time_ = 5000; 223 | if (args->thread_id == 0) { 224 | pthread_barrier_init(args->timer_barrier, NULL, args->num_threads); 225 | *args->should_stop = false; 226 | printf("press to sync start %s\n", args->op_type); 227 | getchar(); 228 | } 229 | pthread_barrier_wait(args->update_start_barrier); 230 | 231 | printf("%d start %s\n", args->thread_id, args->op_type); 232 | ret = micro_test_tpt(client, args); 233 | assert(ret == 0); 234 | printf("%d %s finished\n", args->thread_id, args->op_type); 235 | pthread_barrier_wait(args->update_finish_barrier); 236 | 237 | args->op_type = "DELETE"; 238 | client.workload_run_time_ = 500; 239 | if (args->thread_id == 0) { 240 | pthread_barrier_init(args->timer_barrier, NULL, args->num_threads); 241 | *args->should_stop = false; 242 | printf("press to sync start %s\n", args->op_type); 243 | getchar(); 244 | } 245 | pthread_barrier_wait(args->delete_start_barrier); 246 | 247 | printf("%d start %s\n", args->thread_id, args->op_type); 248 | ret = micro_test_tpt(client, args); 249 | assert(ret == 0); 250 | printf("%d %s finished\n", args->thread_id, args->op_type); 251 | pthread_barrier_wait(args->delete_finish_barrier); 252 | 253 | client.stop_polling_thread(); 254 | pthread_join(polling_tid, NULL); 255 | return 0; 256 | } 257 | 258 | void * run_client_cr(void * _args) { 259 | MicroRunClientArgs * args = (MicroRunClientArgs *)_args; 260 | 261 | int ret = 0; 262 | GlobalConfig config; 263 | ret = load_config(args->config_file, &config); 264 | assert(ret == 0); 265 | 266 | config.main_core_id = args->main_core_id; 267 | config.poll_core_id = args->poll_core_id; 268 | config.server_id += args->thread_id; 269 | 270 | cpu_set_t cpuset; 271 | CPU_ZERO(&cpuset); 272 | CPU_SET(config.main_core_id, &cpuset); 273 | pthread_t this_tid = pthread_self(); 274 | ret = pthread_setaffinity_np(this_tid, sizeof(cpuset), &cpuset); 275 | // assert(ret == 0); 276 | ret = pthread_getaffinity_np(this_tid, sizeof(cpuset), &cpuset); 277 | for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) { 278 | if (CPU_ISSET(i, &cpuset)) { 279 | printf("client %d main process running on core: %d\n", args->thread_id, i); 280 | } 281 | } 282 | 283 | ClientCR client(&config); 284 | 285 | pthread_t polling_tid = client.start_polling_thread(); 286 | 287 | args->op_type = "INSERT"; 288 | if (args->thread_id == 0) { 289 | printf("press to sync start %s\n", args->op_type); 290 | getchar(); 291 | } 292 | pthread_barrier_wait(args->insert_start_barrier); 293 | 294 | // insert 295 | printf("%d start %s\n", args->thread_id, args->op_type); 296 | ret = micro_test_tpt(client, args); 297 | assert(ret == 0); 298 | printf("%d %s finished\n", args->thread_id, args->op_type); 299 | pthread_barrier_wait(args->insert_finish_barrier); 300 | 301 | args->op_type = "UPDATE"; 302 | if (args->thread_id == 0) { 303 | pthread_barrier_init(args->timer_barrier, NULL, args->num_threads); 304 | *args->should_stop = false; 305 | printf("press to sync start %s\n", args->op_type); 306 | getchar(); 307 | } 308 | pthread_barrier_wait(args->update_start_barrier); 309 | 310 | printf("%d start %s\n", args->thread_id, args->op_type); 311 | ret = micro_test_tpt(client, args); 312 | assert(ret == 0); 313 | printf("%d %s finished\n", args->thread_id, args->op_type); 314 | pthread_barrier_wait(args->update_finish_barrier); 315 | 316 | args->op_type = "READ"; 317 | if (args->thread_id == 0) { 318 | pthread_barrier_init(args->timer_barrier, NULL, args->num_threads); 319 | *args->should_stop = false; 320 | printf("press to sync start %s\n", args->op_type); 321 | getchar(); 322 | } 323 | pthread_barrier_wait(args->search_start_barrier); 324 | 325 | printf("%d start %s\n", args->thread_id, args->op_type); 326 | ret = micro_test_tpt(client, args); 327 | assert(ret == 0); 328 | printf("%d %s finished\n", args->thread_id, args->op_type); 329 | pthread_barrier_wait(args->search_finish_barrier); 330 | 331 | args->op_type = "DELETE"; 332 | if (args->thread_id == 0) { 333 | pthread_barrier_init(args->timer_barrier, NULL, args->num_threads); 334 | *args->should_stop = false; 335 | printf("press to sync start %s\n", args->op_type); 336 | getchar(); 337 | } 338 | pthread_barrier_wait(args->delete_start_barrier); 339 | 340 | printf("%d start %s\n", args->thread_id, args->op_type); 341 | ret = micro_test_tpt(client, args); 342 | assert(ret == 0); 343 | printf("%d %s finished\n", args->thread_id, args->op_type); 344 | pthread_barrier_wait(args->delete_finish_barrier); 345 | 346 | client.stop_polling_thread(); 347 | pthread_join(polling_tid, NULL); 348 | return 0; 349 | } -------------------------------------------------------------------------------- /micro-test/micro_test.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_MICRO_TEST_H_ 2 | #define DDCKV_MICRO_TEST_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "client.h" 8 | 9 | typedef struct TagMicroRunClientArgs { 10 | int thread_id; 11 | int main_core_id; 12 | int poll_core_id; 13 | char * workload_name; 14 | char * config_file; 15 | pthread_barrier_t * insert_start_barrier; 16 | pthread_barrier_t * insert_finish_barrier; 17 | pthread_barrier_t * update_start_barrier; 18 | pthread_barrier_t * update_finish_barrier; 19 | pthread_barrier_t * search_start_barrier; 20 | pthread_barrier_t * search_finish_barrier; 21 | pthread_barrier_t * delete_start_barrier; 22 | pthread_barrier_t * delete_finish_barrier; 23 | volatile bool * should_stop; 24 | // bool * timer_is_ready; 25 | pthread_barrier_t * timer_barrier; 26 | 27 | uint32_t ret_num_insert_ops; 28 | uint32_t ret_num_update_ops; 29 | uint32_t ret_num_search_ops; 30 | uint32_t ret_num_delete_ops; 31 | uint32_t ret_fail_insert_num; 32 | uint32_t ret_fail_update_num; 33 | uint32_t ret_fail_search_num; 34 | uint32_t ret_fail_delete_num; 35 | 36 | uint32_t client_id; 37 | uint32_t num_threads; 38 | char * op_type; 39 | Client * client; 40 | } MicroRunClientArgs; 41 | 42 | void * run_client(void * _args); 43 | void * run_client_cr(void * _args); 44 | 45 | #endif -------------------------------------------------------------------------------- /micro-test/micro_test_multi_client.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "client.h" 10 | #include "micro_test.h" 11 | 12 | static void start_client_threads(char * op_type, int num_clients, GlobalConfig * config, 13 | char * config_fname) { 14 | MicroRunClientArgs * client_args_list = (MicroRunClientArgs *)malloc(sizeof(MicroRunClientArgs) * num_clients); 15 | pthread_barrier_t insert_start_barrier; 16 | pthread_barrier_t insert_finish_barrier; 17 | pthread_barrier_t update_start_barrier; 18 | pthread_barrier_t update_finish_barrier; 19 | pthread_barrier_t search_start_barrier; 20 | pthread_barrier_t search_finish_barrier; 21 | pthread_barrier_t delete_start_barrier; 22 | pthread_barrier_t delete_finish_barrier; 23 | pthread_barrier_t global_timer_barrier; 24 | pthread_barrier_init(&insert_start_barrier, NULL, num_clients); 25 | pthread_barrier_init(&insert_finish_barrier, NULL, num_clients); 26 | pthread_barrier_init(&update_start_barrier, NULL, num_clients); 27 | pthread_barrier_init(&update_finish_barrier, NULL, num_clients); 28 | pthread_barrier_init(&search_start_barrier, NULL, num_clients); 29 | pthread_barrier_init(&search_finish_barrier, NULL, num_clients); 30 | pthread_barrier_init(&delete_start_barrier, NULL, num_clients); 31 | pthread_barrier_init(&delete_finish_barrier, NULL, num_clients); 32 | pthread_barrier_init(&global_timer_barrier, NULL, num_clients); 33 | volatile bool should_stop = false; 34 | 35 | pthread_t tid_list[num_clients]; 36 | for (int i = 0; i < num_clients; i ++) { 37 | client_args_list[i].client_id = config->server_id - config->memory_num; 38 | client_args_list[i].thread_id = i; 39 | client_args_list[i].num_threads = num_clients; 40 | client_args_list[i].main_core_id = config->main_core_id + i * 2; 41 | client_args_list[i].poll_core_id = config->poll_core_id + i * 2; 42 | client_args_list[i].config_file = config_fname; 43 | client_args_list[i].insert_start_barrier= &insert_start_barrier; 44 | client_args_list[i].insert_finish_barrier= &insert_finish_barrier; 45 | client_args_list[i].update_start_barrier= &update_start_barrier; 46 | client_args_list[i].update_finish_barrier= &update_finish_barrier; 47 | client_args_list[i].search_start_barrier= &search_start_barrier; 48 | client_args_list[i].search_finish_barrier= &search_finish_barrier; 49 | client_args_list[i].delete_start_barrier= &delete_start_barrier; 50 | client_args_list[i].delete_finish_barrier= &delete_finish_barrier; 51 | client_args_list[i].timer_barrier = &global_timer_barrier; 52 | client_args_list[i].should_stop = &should_stop; 53 | client_args_list[i].ret_num_insert_ops = 0; 54 | client_args_list[i].ret_num_update_ops = 0; 55 | client_args_list[i].ret_num_search_ops = 0; 56 | client_args_list[i].ret_num_delete_ops = 0; 57 | client_args_list[i].ret_fail_insert_num = 0; 58 | client_args_list[i].ret_fail_update_num = 0; 59 | client_args_list[i].ret_fail_search_num = 0; 60 | client_args_list[i].ret_fail_delete_num = 0; 61 | client_args_list[i].op_type = op_type; 62 | pthread_t tid; 63 | pthread_create(&tid, NULL, run_client, &client_args_list[i]); 64 | tid_list[i] = tid; 65 | } 66 | 67 | uint32_t total_insert_tpt = 0; 68 | uint32_t total_insert_failed = 0; 69 | uint32_t total_update_tpt = 0; 70 | uint32_t total_update_failed = 0; 71 | uint32_t total_search_tpt = 0; 72 | uint32_t total_search_failed = 0; 73 | uint32_t total_delete_tpt = 0; 74 | uint32_t total_delete_failed = 0; 75 | for (int i = 0; i < num_clients; i ++) { 76 | pthread_join(tid_list[i], NULL); 77 | total_insert_tpt += client_args_list[i].ret_num_insert_ops; 78 | total_update_tpt += client_args_list[i].ret_num_update_ops; 79 | total_search_tpt += client_args_list[i].ret_num_search_ops; 80 | total_delete_tpt += client_args_list[i].ret_num_delete_ops; 81 | total_insert_failed += client_args_list[i].ret_fail_insert_num; 82 | total_update_failed += client_args_list[i].ret_fail_update_num; 83 | total_search_failed += client_args_list[i].ret_fail_search_num; 84 | total_delete_failed += client_args_list[i].ret_fail_delete_num; 85 | } 86 | printf("insert total: %d ops\n", total_insert_tpt); 87 | printf("insert failed: %d ops\n", total_insert_failed); 88 | printf("insert tpt: %d ops/s\n", (total_insert_tpt - total_insert_failed) * 1000 / 500); 89 | printf("update total: %d ops\n", total_update_tpt); 90 | printf("update failed: %d ops\n", total_update_failed); 91 | printf("update tpt: %d ops/s\n", (total_update_tpt - total_update_failed) * 1000 / 5000); 92 | printf("search total: %d ops\n", total_search_tpt); 93 | printf("search failed: %d ops\n", total_search_failed); 94 | printf("search tpt: %d ops/s\n", (total_search_tpt - total_search_failed) * 1000 / 5000); 95 | printf("delete total: %d ops\n", total_delete_tpt); 96 | printf("delete failed: %d ops\n", total_delete_failed); 97 | printf("delete tpt: %d ops/s\n", (total_delete_tpt - total_delete_failed) * 1000 / 500); 98 | free(client_args_list); 99 | } 100 | 101 | int main(int argc, char ** argv) { 102 | if (argc != 3) { 103 | printf("Usage: %s path-to-config-file num-clients\n", argv[0]); 104 | return 1; 105 | } 106 | 107 | int num_clients = atoi(argv[2]); 108 | 109 | GlobalConfig config; 110 | int ret = load_config(argv[1], &config); 111 | assert(ret == 0); 112 | 113 | start_client_threads("INSERT", num_clients, &config, argv[1]); 114 | } -------------------------------------------------------------------------------- /micro-test/micro_test_multi_client_cr.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "client.h" 10 | #include "micro_test.h" 11 | 12 | static void start_client_threads(char * op_type, int num_clients, GlobalConfig * config, 13 | char * config_fname) { 14 | MicroRunClientArgs * client_args_list = (MicroRunClientArgs *)malloc(sizeof(MicroRunClientArgs) * num_clients); 15 | pthread_barrier_t insert_start_barrier; 16 | pthread_barrier_t insert_finish_barrier; 17 | pthread_barrier_t update_start_barrier; 18 | pthread_barrier_t update_finish_barrier; 19 | pthread_barrier_t search_start_barrier; 20 | pthread_barrier_t search_finish_barrier; 21 | pthread_barrier_t delete_start_barrier; 22 | pthread_barrier_t delete_finish_barrier; 23 | pthread_barrier_t global_timer_barrier; 24 | pthread_barrier_init(&insert_start_barrier, NULL, num_clients); 25 | pthread_barrier_init(&insert_finish_barrier, NULL, num_clients); 26 | pthread_barrier_init(&update_start_barrier, NULL, num_clients); 27 | pthread_barrier_init(&update_finish_barrier, NULL, num_clients); 28 | pthread_barrier_init(&search_start_barrier, NULL, num_clients); 29 | pthread_barrier_init(&search_finish_barrier, NULL, num_clients); 30 | pthread_barrier_init(&delete_start_barrier, NULL, num_clients); 31 | pthread_barrier_init(&delete_finish_barrier, NULL, num_clients); 32 | pthread_barrier_init(&global_timer_barrier, NULL, num_clients); 33 | volatile bool should_stop = false; 34 | 35 | pthread_t tid_list[num_clients]; 36 | for (int i = 0; i < num_clients; i ++) { 37 | client_args_list[i].client_id = config->server_id - config->memory_num; 38 | client_args_list[i].thread_id = i; 39 | client_args_list[i].num_threads = num_clients; 40 | client_args_list[i].main_core_id = config->main_core_id + i * 2; 41 | client_args_list[i].poll_core_id = config->poll_core_id + i * 2; 42 | client_args_list[i].config_file = config_fname; 43 | client_args_list[i].insert_start_barrier= &insert_start_barrier; 44 | client_args_list[i].insert_finish_barrier= &insert_finish_barrier; 45 | client_args_list[i].update_start_barrier= &update_start_barrier; 46 | client_args_list[i].update_finish_barrier= &update_finish_barrier; 47 | client_args_list[i].search_start_barrier= &search_start_barrier; 48 | client_args_list[i].search_finish_barrier= &search_finish_barrier; 49 | client_args_list[i].delete_start_barrier= &delete_start_barrier; 50 | client_args_list[i].delete_finish_barrier= &delete_finish_barrier; 51 | client_args_list[i].timer_barrier = &global_timer_barrier; 52 | client_args_list[i].should_stop = &should_stop; 53 | client_args_list[i].ret_num_insert_ops = 0; 54 | client_args_list[i].ret_num_update_ops = 0; 55 | client_args_list[i].ret_num_search_ops = 0; 56 | client_args_list[i].ret_num_delete_ops = 0; 57 | client_args_list[i].ret_fail_insert_num = 0; 58 | client_args_list[i].ret_fail_update_num = 0; 59 | client_args_list[i].ret_fail_search_num = 0; 60 | client_args_list[i].ret_fail_delete_num = 0; 61 | client_args_list[i].op_type = op_type; 62 | pthread_t tid; 63 | pthread_create(&tid, NULL, run_client_cr, &client_args_list[i]); 64 | tid_list[i] = tid; 65 | } 66 | 67 | uint32_t total_insert_tpt = 0; 68 | uint32_t total_insert_failed = 0; 69 | uint32_t total_update_tpt = 0; 70 | uint32_t total_update_failed = 0; 71 | uint32_t total_search_tpt = 0; 72 | uint32_t total_search_failed = 0; 73 | uint32_t total_delete_tpt = 0; 74 | uint32_t total_delete_failed = 0; 75 | for (int i = 0; i < num_clients; i ++) { 76 | pthread_join(tid_list[i], NULL); 77 | total_insert_tpt += client_args_list[i].ret_num_insert_ops; 78 | total_update_tpt += client_args_list[i].ret_num_update_ops; 79 | total_search_tpt += client_args_list[i].ret_num_search_ops; 80 | total_delete_tpt += client_args_list[i].ret_num_delete_ops; 81 | total_insert_failed += client_args_list[i].ret_fail_insert_num; 82 | total_update_failed += client_args_list[i].ret_fail_update_num; 83 | total_search_failed += client_args_list[i].ret_fail_search_num; 84 | total_delete_failed += client_args_list[i].ret_fail_delete_num; 85 | } 86 | printf("insert total: %d ops\n", total_insert_tpt); 87 | printf("insert failed: %d ops\n", total_insert_failed); 88 | printf("insert tpt: %d ops/s\n", (total_insert_tpt - total_insert_failed) / config->workload_run_time); 89 | printf("update total: %d ops\n", total_update_tpt); 90 | printf("update failed: %d ops\n", total_update_failed); 91 | printf("update tpt: %d ops/s\n", (total_update_tpt - total_update_failed) / config->workload_run_time); 92 | printf("search total: %d ops\n", total_search_tpt); 93 | printf("search failed: %d ops\n", total_search_failed); 94 | printf("search tpt: %d ops/s\n", (total_search_tpt - total_search_failed) / config->workload_run_time); 95 | printf("delete total: %d ops\n", total_delete_tpt); 96 | printf("delete failed: %d ops\n", total_delete_failed); 97 | printf("delete tpt: %d ops/s\n", (total_delete_tpt - total_delete_failed) / config->workload_run_time); 98 | free(client_args_list); 99 | } 100 | 101 | int main(int argc, char ** argv) { 102 | if (argc != 3) { 103 | printf("Usage: %s path-to-config-file num-clients\n", argv[0]); 104 | return 1; 105 | } 106 | 107 | int num_clients = atoi(argv[2]); 108 | 109 | GlobalConfig config; 110 | int ret = load_config(argv[1], &config); 111 | assert(ret == 0); 112 | 113 | start_client_threads("INSERT", num_clients, &config, argv[1]); 114 | } -------------------------------------------------------------------------------- /setup/download_gdrive.py: -------------------------------------------------------------------------------- 1 | import gdown 2 | import sys 3 | 4 | fid = sys.argv[1] 5 | output = sys.argv[2] 6 | 7 | url = "https://drive.google.com/uc?id={}&export=download".format(fid) 8 | 9 | gdown.download(url, output, quiet=False) -------------------------------------------------------------------------------- /setup/download_workload.sh: -------------------------------------------------------------------------------- 1 | # install python and gdown 2 | sudo apt install python3-pip -y 3 | pip3 install gdown 4 | 5 | # download workload 6 | echo "downloading workloads.tgz" 7 | if [ ! -d "./workloads.tgz" ]; then 8 | python3 ./download_gdrive.py 1Ifd8AwQ5e6EMcm3l9yYn8tgI3qMwhRpb workloads.tgz 9 | fi 10 | 11 | echo "downloading micro-workloads.tgz" 12 | if [ ! -d "./micro-workloads.tgz" ]; then 13 | python3 ./download_gdrive.py 1727S-g5j568BEgqMjc4zghT2_pz0EZhf micro-workloads.tgz 14 | fi 15 | 16 | # decompress upd-workload 17 | echo "downloading upd-workloads" 18 | if [ ! -d "./upd-workloads" ]; then 19 | python3 ./download_gdrive.py 1CJjkswX08XqoF2RaxXBiKgWapjyMrXdi upd-workloads.tgz 20 | fi 21 | 22 | # decompress workload 23 | echo "decompressing workload files" 24 | if [ ! -d "./workloads" ]; then 25 | tar zxvf workloads.tgz 26 | fi 27 | 28 | if [ ! -d "./micro-workloads" ]; then 29 | tar zxvf micro-workloads.tgz 30 | fi 31 | 32 | if [ ! -d "./upd-workloads" ]; then 33 | tar zxvf upd-workloads.tgz 34 | fi -------------------------------------------------------------------------------- /setup/setup-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #!/bin/bash 3 | 4 | mode="$1" 5 | ubuntu_version=$(lsb_release -r -s) 6 | 7 | if [ $ubuntu_version == "18.04" ]; then 8 | wget https://content.mellanox.com/ofed/MLNX_OFED-4.9-5.1.0.0/MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu18.04-x86_64.tgz 9 | mv MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu18.04-x86_64.tgz ofed.tgz 10 | elif [ $ubuntu_version == "20.04" ]; then 11 | wget https://content.mellanox.com/ofed/MLNX_OFED-4.9-5.1.0.0/MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu20.04-x86_64.tgz 12 | mv MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu20.04-x86_64.tgz ofed.tgz 13 | else 14 | echo "Wrong ubuntu distribution for $mode!" 15 | exit 0 16 | fi 17 | echo $mode $ubuntu_version $ofed_fid 18 | 19 | sudo apt update -y 20 | 21 | # install anaconda 22 | mkdir install 23 | mv ofed.tgz install 24 | 25 | cd install 26 | if [ ! -f "./anaconda-install.sh" ]; then 27 | wget https://repo.anaconda.com/archive/Anaconda3-2022.05-Linux-x86_64.sh -O anaconda-install.sh 28 | fi 29 | if [ ! -d "$HOME/anaconda3" ]; then 30 | chmod +x anaconda-install.sh 31 | ./anaconda-install.sh -b 32 | export PATH=$PATH:$HOME/anaconda3/bin 33 | # add conda to path 34 | echo PATH=$PATH:$HOME/anaconda3/bin >> $HOME/.bashrc 35 | conda init 36 | source ~/.bashrc 37 | # activate base 38 | fi 39 | conda activate base 40 | cd .. 41 | 42 | pip install gdown 43 | sudo apt install memcached -y 44 | sudo apt install libtbb-dev libboost-all-dev -y 45 | 46 | # install ofed 47 | cd install 48 | if [ ! -d "./ofed" ]; then 49 | tar zxf ofed.tgz 50 | mv MLNX* ofed 51 | fi 52 | cd ofed 53 | sudo ./mlnxofedinstall --force 54 | if [ $mode == "scalestore" ]; then 55 | sudo /etc/init.d/openibd restart 56 | fi 57 | cd .. 58 | 59 | # install cmake 60 | cd install 61 | if [ ! -f cmake-3.16.8.tar.gz ]; then 62 | wget https://cmake.org/files/v3.16/cmake-3.16.8.tar.gz 63 | fi 64 | if [ ! -d "./cmake-3.16.8" ]; then 65 | tar zxf cmake-3.16.8.tar.gz 66 | cd cmake-3.16.8 && ./configure && make -j 4 && sudo make install 67 | fi 68 | cd .. 69 | 70 | # install gtest 71 | if [ ! -d "/usr/src/gtest" ]; then 72 | sudo apt install -y libgtest-dev 73 | fi 74 | cd /usr/src/gtest 75 | sudo cmake . 76 | sudo make 77 | sudo make install -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | list(APPEND source_ddckv 2 | nm.cc 3 | ib.cc 4 | server_mm.cc 5 | server.cc 6 | kv_utils.cc 7 | hashtable.cc 8 | client_mm.cc 9 | client.cc 10 | client_cr.cc) 11 | 12 | add_library(libddckv STATIC ${source_ddckv}) 13 | 14 | target_compile_options( 15 | libddckv 16 | PRIVATE 17 | "-O2" 18 | # ${CMAKE_CXX_FLAGS_DEBUG} 19 | "-g" 20 | # "-D_DEBUG" 21 | ) 22 | 23 | target_link_libraries(libddckv ${Boost_LIBRARIES} boost_context boost_fiber tbb) 24 | 25 | -------------------------------------------------------------------------------- /src/client_cr.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_CLIENT_CR_H_ 2 | #define DDCKV_CLIENT_CR_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "client_mm.h" 15 | #include "nm.h" 16 | #include "kv_utils.h" 17 | #include "hashtable.h" 18 | #include "ib.h" 19 | #include "kv_debug.h" 20 | #include "client.h" 21 | 22 | class ClientCR { 23 | private: 24 | ClientMM * mm_; 25 | UDPNetworkManager * nm_; 26 | 27 | uint32_t my_server_id_; 28 | uint32_t num_replication_; 29 | uint32_t num_memory_; 30 | uint32_t num_idx_rep_; 31 | 32 | uint8_t pr_log_server_id_; 33 | uint64_t pr_log_head_; 34 | uint64_t pr_log_tail_; 35 | 36 | uint64_t remote_global_meta_addr_; 37 | uint64_t remote_meta_addr_; 38 | uint64_t remote_gc_addr_; 39 | uint64_t remote_root_addr_; 40 | 41 | uint64_t server_st_addr_; 42 | uint64_t server_data_len_; 43 | 44 | float miss_rate_threash_; 45 | 46 | RaceHashRoot * race_root_; 47 | struct ibv_mr * race_root_mr_; 48 | 49 | void * local_buf_; 50 | struct ibv_mr * local_buf_mr_; 51 | 52 | void * input_buf_; 53 | struct ibv_mr * input_buf_mr_; 54 | 55 | uint64_t * coro_local_addr_list_; 56 | 57 | std::map addr_cache_; 58 | std::map server_mr_info_map_; 59 | 60 | // core bind information 61 | uint32_t main_core_id_; 62 | uint32_t poll_core_id_; 63 | uint32_t bg_core_id_; 64 | uint32_t gc_core_id_; 65 | 66 | // crash testing information 67 | std::map server_crash_map_; 68 | std::vector meta_addr_info_; 69 | 70 | // private inline methods 71 | private: 72 | inline int get_race_root() { 73 | int ret = nm_->nm_rdma_read_from_sid((void *)race_root_, race_root_mr_->lkey, sizeof(RaceHashRoot), 74 | remote_root_addr_, server_mr_info_map_[0]->rkey, 0); 75 | // assert(ret == 0); 76 | return 0; 77 | } 78 | 79 | inline int write_race_root() { 80 | int ret = 0; 81 | for (int i = 0; i < num_replication_; i ++) { 82 | ret = nm_->nm_rdma_write_to_sid((void *)race_root_, race_root_mr_->lkey, sizeof(RaceHashRoot), 83 | remote_root_addr_, server_mr_info_map_[i]->rkey, i); 84 | // assert(ret == 0); 85 | } 86 | return 0; 87 | } 88 | 89 | inline char * get_key(KVInfo * kv_info) { 90 | return (char *)((uint64_t)kv_info->l_addr + sizeof(KVLogHeader)); 91 | } 92 | 93 | inline char * get_value(KVInfo * kv_info) { 94 | return (char *)((uint64_t)kv_info->l_addr + sizeof(KVLogHeader) + kv_info->key_len); 95 | } 96 | 97 | inline KVLogHeader * get_header(KVInfo * kv_info) { 98 | return (KVLogHeader *)kv_info->l_addr; 99 | } 100 | 101 | inline void update_cache(std::string key_str, RaceHashSlot * slot_info, uint64_t * r_slot_addr_list) { 102 | // char key_buf[128] = {0}; 103 | // memcpy(key_buf, get_key(kv_info), kv_info->key_len); 104 | // std::string tmp_key(key_buf); 105 | 106 | std::map::iterator it = addr_cache_.find(key_str); 107 | if (it != addr_cache_.end()) { 108 | LocalCacheEntry * entry = it->second; 109 | // check if is miss 110 | if (*(uint64_t *)(&entry->l_slot_ptr) != *(uint64_t *)slot_info) { 111 | entry->miss_cnt ++; 112 | memcpy(&entry->l_slot_ptr, slot_info, sizeof(RaceHashSlot)); 113 | for (int i = 0; i < num_idx_rep_; i ++) { 114 | entry->r_slot_addr[i] = r_slot_addr_list[i]; 115 | } 116 | } 117 | // update access cnt 118 | entry->acc_cnt ++; 119 | return; 120 | } 121 | 122 | LocalCacheEntry * tmp_value = (LocalCacheEntry *)malloc(sizeof(LocalCacheEntry)); 123 | memcpy(&tmp_value->l_slot_ptr, slot_info, sizeof(RaceHashSlot)); 124 | tmp_value->acc_cnt = 1; 125 | tmp_value->miss_cnt = 0; 126 | 127 | for (int i = 0; i < num_idx_rep_; i ++) { 128 | tmp_value->r_slot_addr[i] = r_slot_addr_list[i]; 129 | } 130 | 131 | addr_cache_[key_str] = tmp_value; 132 | // print_log(DEBUG, "\t[%s] %s->slot(%lx) kv(%lx)", __FUNCTION__, key_buf, r_slot_addr_list[0], HashIndexConvert40To64Bits(tmp_value->l_slot_ptr.pointer)); 133 | } 134 | 135 | inline LocalCacheEntry * check_cache(std::string key_str) { 136 | // char key_buf[128] = {0}; 137 | // memcpy(key_buf, get_key(kv_info), kv_info->key_len); 138 | // std::string tmp_key(key_buf); 139 | 140 | std::map::iterator it = addr_cache_.find(key_str); 141 | if (it == addr_cache_.end()) { 142 | // print_log(DEBUG, "\t\t[%s] cache miss", __FUNCTION__); 143 | return NULL; 144 | } 145 | if (HashIndexConvert40To64Bits(it->second->l_slot_ptr.pointer) == 0) { 146 | free(it->second); 147 | addr_cache_.erase(it); 148 | // print_log(DEBUG, "\t\t[%s] cache empty pointer miss", __FUNCTION__); 149 | return NULL; 150 | } 151 | 152 | float miss_rate = ((float)it->second->miss_cnt / it->second->acc_cnt); 153 | if (miss_rate > miss_rate_threash_) { 154 | return NULL; 155 | } 156 | // print_log(DEBUG, "\t\t[%s] cache hit", __FUNCTION__); 157 | return it->second; 158 | } 159 | 160 | inline void remove_cache(std::string key_str) { 161 | std::map::iterator it = addr_cache_.find(key_str); 162 | if (it != addr_cache_.end()) { 163 | addr_cache_.erase(it); 164 | } 165 | } 166 | 167 | inline bool delete_cache(KVInfo * kv_info) { 168 | char key_buf[256]; 169 | memset(key_buf, 0, 256); 170 | memcpy(key_buf, get_key(kv_info), kv_info->key_len); 171 | std::string tmp_key(key_buf); 172 | 173 | return addr_cache_.erase(tmp_key); 174 | } 175 | 176 | inline bool check_key(KVLogHeader * log_header, KVInfo * kv_info) { 177 | uint64_t r_key_addr = (uint64_t)log_header + sizeof(log_header); 178 | uint64_t l_key_addr = (uint64_t)kv_info->l_addr + sizeof(KVLogHeader); 179 | return CheckKey((void *)r_key_addr, log_header->key_length, (void *)l_key_addr, kv_info->key_len); 180 | } 181 | 182 | inline int poll_completion(std::map & wait_wrid_wc_map) { 183 | int ret = 0; 184 | while (ib_is_all_wrid_finished(wait_wrid_wc_map) == false) { 185 | // print_log(DEBUG, "\t\t[%s] fiber: %ld yielding", __FUNCTION__, boost::this_fiber::get_id()); 186 | // boost::this_fiber::yield(); 187 | boost::this_fiber::sleep_for(std::chrono::microseconds(10)); 188 | ret = nm_->nm_check_completion(wait_wrid_wc_map); 189 | // kv_assert(ret == 0); 190 | } 191 | return ret; 192 | } 193 | 194 | inline int poll_completion(std::map & wait_wrid_wc_map, volatile bool * should_stop) { 195 | int ret = 0; 196 | while (ib_is_all_wrid_finished(wait_wrid_wc_map) == false && (*should_stop) == false) { 197 | // print_log(DEBUG, "\t\t[%s] fiber: %ld yielding", __FUNCTION__, boost::this_fiber::get_id()); 198 | if (*(should_stop)) { 199 | return ret; 200 | } 201 | boost::this_fiber::yield(); 202 | ret = nm_->nm_check_completion(wait_wrid_wc_map); 203 | // kv_assert(ret == 0); 204 | } 205 | return ret; 206 | } 207 | 208 | // private methods 209 | private: 210 | bool init_is_finished(); 211 | int sync_init_finish(); 212 | int connect_ib_qps(); 213 | int write_client_meta_info(); 214 | 215 | void get_kv_addr_info(KVHashInfo * a_kv_hash_info, __OUT KVTableAddrInfo * a_kv_addr_info); 216 | void get_kv_hash_info(KVInfo * a_kv_info, __OUT KVHashInfo * a_kv_hash_info); 217 | void fill_slot(ClientMMAllocCtx * mm_alloc_ctx, KVHashInfo * a_kv_hash_info, 218 | __OUT RaceHashSlot * local_slot); 219 | void fill_cas_addr(KVTableAddrInfo * addr_info, uint64_t remote_slot_addr, RaceHashSlot * old_local_slot_addr, RaceHashSlot * new_local_slot_addr, 220 | __OUT KVCASAddr * pr_cas_addr, __OUT KVCASAddr * bk_cas_addr); 221 | void fill_cas_addr(KVReqCtx * ctx, uint64_t * remote_slot_addr, RaceHashSlot * old_local_slot_addr, RaceHashSlot * new_local_slot_addr); 222 | void fill_heartbeat_addr(uint8_t server_id, __OUT KVRWAddr * hb_addr); 223 | void fill_invalid_addr(KVReqCtx * ctx, RaceHashSlot * local_slot); 224 | 225 | IbvSrList * gen_read_bucket_sr_lists(KVReqCtx * ctx, __OUT uint32_t * num_sr_lists); 226 | void free_read_bucket_sr_lists(IbvSrList * sr_list); 227 | IbvSrList * gen_read_all_bucket_sr_lists(KVReqCtx * ctx, __OUT uint32_t * num_sr_lists); 228 | void free_read_all_bucket_sr_lists(IbvSrList * sr_list); 229 | IbvSrList * gen_read_bucket_sr_lists_on_crash(KVReqCtx * ctx, __OUT uint32_t * num_sr_lists); 230 | void free_read_bucket_sr_lists_on_crash(IbvSrList * sr_lists, int num_sr_lists); 231 | IbvSrList * gen_write_kv_sr_lists(uint32_t coro_id, KVInfo * a_kv_info, ClientMMAllocCtx * r_mm_info, __OUT uint32_t * num_sr_lists); 232 | void free_write_kv_sr_lists(IbvSrList * sr_list); 233 | IbvSrList * gen_write_del_log_sr_lists(uint32_t coro_id, KVInfo * a_kv_info, 234 | ClientMMAllocCtx * r_mm_info, __OUT uint32_t * num_sr_lists); 235 | void free_write_del_log_sr_lists(IbvSrList * sr_list); 236 | IbvSrList * gen_read_kv_sr_lists(uint32_t coro_id, const std::vector & r_addr_list, __OUT uint32_t * num_sr_lists); 237 | void free_read_kv_sr_lists(IbvSrList * sr_lists, int num_sr_lists); 238 | IbvSrList * gen_cas_sr_lists(uint32_t coro_id, const std::vector & cas_addr_list, __OUT uint32_t * num_sr_lists); 239 | void free_cas_sr_lists(IbvSrList * sr_lists, int num_sr_lists); 240 | IbvSrList * gen_invalid_sr_lists(uint32_t coro_id, KVRWAddr * r_addr, uint64_t local_data_addr); 241 | void free_invalid_sr_lists(IbvSrList * sr_list); 242 | IbvSrList * gen_read_cache_kv_sr_lists(uint32_t coro_id, RaceHashSlot * local_slot_ptr, uint64_t local_addr); 243 | void free_read_cache_kv_sr_lists(IbvSrList * sr_lists); 244 | IbvSrList * gen_write_hb_sr_lists(uint32_t coro_id, std::vector & rw_addr_list, __OUT uint32_t * num_sr_lists); 245 | void free_write_hb_sr_lists(IbvSrList * sr_lists, int num_sr_lists); 246 | IbvSrList * gen_log_commit_sr_lists(uint32_t coro_id, void * local_addr, uint32_t size, 247 | std::vector & rw_addr_list, __OUT uint32_t * num_sr_lists); 248 | void free_log_commit_sr_lists(IbvSrList * sr_lists, int num_sr_lists); 249 | 250 | void prepare_request(KVReqCtx * ctx); 251 | void prepare_log_commit_addrs(KVReqCtx * ctx); 252 | void find_kv_in_buckets(KVReqCtx * ctx); 253 | void find_kv_in_buckets_on_crash(KVReqCtx * ctx); 254 | void find_empty_slot(KVReqCtx * ctx); 255 | int32_t find_match_kv_idx(KVReqCtx * ctx); 256 | void get_local_bucket_info(KVReqCtx * ctx); 257 | void modify_primary_idx(KVReqCtx * ctx); 258 | void modify_primary_idx_sync(KVReqCtx * ctx); 259 | void kv_log_commit(KVReqCtx * ctx); 260 | void kv_log_commit_sync(KVReqCtx * ctx); 261 | void check_recover_need_cas_pr(KVReqCtx * ctx); 262 | void recover_modified_slots(KVReqCtx * ctx); 263 | void check_failed_index(KVReqCtx * ctx); 264 | void check_failed_data(KVReqCtx * ctx); 265 | RaceHashSlot * check_failed_cache(LocalCacheEntry * local_cache_entry); 266 | int32_t find_healthy_idx(uint8_t target_server, uint64_t target_addr); 267 | ClientMetaAddrInfo * find_corresponding_addr_info(uint8_t target_server, uint64_t target_addr); 268 | 269 | void kv_search_read_buckets(KVReqCtx * ctx); 270 | void kv_search_read_buckets_sync(KVReqCtx * ctx); 271 | void kv_search_read_kv(KVReqCtx * ctx); 272 | void kv_search_read_kv_sync(KVReqCtx * ctx); 273 | void kv_search_check_kv(KVReqCtx * ctx); 274 | void kv_search_read_all_healthy_index(KVReqCtx * ctx); 275 | void kv_search_read_failed_kv(KVReqCtx * ctx); 276 | 277 | void kv_insert_read_buckets_and_write_kv(KVReqCtx * ctx); 278 | void kv_insert_read_buckets_and_write_kv_sync(KVReqCtx * ctx); 279 | void kv_insert_backup_consensus_0(KVReqCtx * ctx); 280 | void kv_insert_backup_consensus_0_sync(KVReqCtx * ctx); 281 | void kv_insert_commit_log(KVReqCtx * ctx); 282 | void kv_insert_commit_log_sync(KVReqCtx * ctx); 283 | void kv_insert_cas_primary(KVReqCtx * ctx); 284 | void kv_insert_cas_primary_sync(KVReqCtx * ctx); 285 | 286 | void kv_update_read_buckets_and_write_kv(KVReqCtx * ctx); 287 | void kv_update_read_buckets_and_write_kv_sync(KVReqCtx * ctx); 288 | void kv_update_read_kv(KVReqCtx * ctx); 289 | void kv_update_read_kv_sync(KVReqCtx * ctx); 290 | void kv_update_backup_consensus_0(KVReqCtx * ctx); 291 | void kv_update_backup_consensus_0_sync(KVReqCtx * ctx); 292 | void kv_update_commit_log(KVReqCtx * ctx); 293 | void kv_update_commit_log_sync(KVReqCtx * ctx); 294 | void kv_update_cas_primary(KVReqCtx * ctx); 295 | void kv_update_cas_primary_sync(KVReqCtx * ctx); 296 | void kv_update_bg_operations(KVReqCtx * ctx); 297 | 298 | void kv_delete_read_buckets_write_log(KVReqCtx * ctx); 299 | void kv_delete_read_buckets_write_log_sync(KVReqCtx * ctx); 300 | void kv_delete_read_kv(KVReqCtx * ctx); 301 | void kv_delete_read_kv_sync(KVReqCtx * ctx); 302 | void kv_delete_backup_consensus_0(KVReqCtx * ctx); 303 | void kv_delete_backup_consensus_0_sync(KVReqCtx * ctx); 304 | void kv_delete_commit_log(KVReqCtx * ctx); 305 | void kv_delete_commit_log_sync(KVReqCtx * ctx); 306 | void kv_delete_cas_primary(KVReqCtx * ctx); 307 | void kv_delete_cas_primary_sync(KVReqCtx * ctx); 308 | void kv_delete_bg_operations(KVReqCtx * ctx); 309 | 310 | int post_sr_lists_and_yield_wait(IbvSrList * sr_lists, uint32_t sr_lists_num); 311 | int post_sr_list_batch_and_yield_wait(std::vector sr_list_batch, 312 | std::vector sr_list_batch_num); 313 | 314 | void init_kv_req_ctx(KVReqCtx * req_ctx, KVInfo * kv_info, char * operation); 315 | void update_log_tail(KVLogTail * kv_header, ClientMMAllocCtx * alloc_ctx); 316 | 317 | int client_recovery(); 318 | void init_recover_req_ctx(KVInfo * kv_info, __OUT KVReqCtx * rec_ctx); 319 | 320 | void iteratively_cas_sync(IbvSrList * sr_lists, uint32_t sr_lists_num); 321 | void iteratively_cas_async(IbvSrList * sr_lists, uint32_t sr_lsits_num); 322 | int init_hash_table(); 323 | 324 | // inline methods 325 | public: 326 | inline void * get_input_buf() { 327 | return input_buf_; 328 | } 329 | 330 | inline uint32_t get_input_buf_lkey() { 331 | return input_buf_mr_->lkey; 332 | } 333 | 334 | inline struct ibv_mr * get_local_buf_mr() { 335 | return local_buf_mr_; 336 | } 337 | 338 | // public methods 339 | public: 340 | ClientCR(const struct GlobalConfig * conf); 341 | ~ClientCR(); 342 | 343 | KVInfo * kv_info_list_; 344 | KVReqCtx * kv_req_ctx_list_; 345 | uint32_t num_total_operations_; 346 | uint32_t num_local_operations_; 347 | uint32_t num_coroutines_; 348 | int workload_run_time_; 349 | int micro_workload_num_; 350 | 351 | int kv_update(KVInfo * kv_info); 352 | int kv_update(KVReqCtx * ctx); 353 | int kv_update_w_cache(KVInfo * kv_info); 354 | int kv_update_w_crash(KVReqCtx * ctx, int crash_point); 355 | int kv_update_sync(KVReqCtx * ctx); 356 | 357 | int kv_insert(KVInfo * kv_info); 358 | int kv_insert(KVReqCtx * ctx); 359 | int kv_insert_w_cache(KVInfo * kv_info); 360 | int kv_insert_w_crash(KVReqCtx * ctx, int crash_point); 361 | int kv_insert_sync(KVReqCtx * ctx); 362 | 363 | void * kv_search(KVInfo * kv_info); 364 | void * kv_search(KVReqCtx * ctx); 365 | void * kv_search_w_cache(KVInfo * kv_info); 366 | void * kv_search_on_crash(KVReqCtx * ctx); 367 | void * kv_search_sync(KVReqCtx * ctx); 368 | 369 | int kv_delete(KVInfo * kv_info); 370 | int kv_delete(KVReqCtx * ctx); 371 | int kv_delete_w_cache(KVInfo * kv_info); 372 | int kv_delete_sync(KVReqCtx * ctx); 373 | 374 | 375 | pthread_t start_polling_thread(); 376 | boost::fibers::fiber start_polling_fiber(); 377 | void stop_polling_thread(); 378 | 379 | void init_kvreq_space(uint32_t coro_id, uint32_t kv_req_st_idx, uint32_t num_ops); 380 | void init_kv_insert_space(void * coro_local_addr, uint32_t kv_req_idx); 381 | void init_kv_insert_space(void * coro_local_addr, KVReqCtx * kv_req_ctx); 382 | void init_kv_search_space(void * coro_local_addr, uint32_t kv_req_idx); 383 | void init_kv_search_space(void * coro_local_addr, KVReqCtx * kv_req_ctx); 384 | void init_kv_update_space(void * coro_local_addr, uint32_t kv_req_idx); 385 | void init_kv_update_space(void * coro_local_addr, KVReqCtx * kv_req_ctx); 386 | void init_kv_delete_space(void * coro_local_addr, uint32_t kv_req_idx); 387 | void init_kv_delete_space(void * coro_local_addr, KVReqCtx * kv_req_ctx); 388 | 389 | void crash_server(const std::vector & fail_server_list); 390 | void get_addr_translate_table(const std::vector & server_id_list); 391 | 392 | void dump_cache(); 393 | void load_cache(); 394 | int load_seq_kv_requests(uint32_t num_keys, char * op_type); 395 | int load_kv_requests(const char * fname, uint32_t st_idx, int32_t num_ops); 396 | 397 | int get_num_rep(); 398 | 399 | // for testing 400 | public: 401 | int test_get_root(__OUT RaceHashRoot * race_root); 402 | int test_get_log_meta_info(__OUT ClientLogMetaInfo * remote_log_meta_info_list, 403 | __OUT ClientLogMetaInfo * local_meta); 404 | int test_get_pr_log_meta_info(__OUT ClientLogMetaInfo * pr_log_meta_info); 405 | int test_get_remote_log_header(uint8_t server_id, uint64_t raddr, uint32_t buf_size, 406 | __OUT void * buf); 407 | int test_get_local_mm_blocks(__OUT ClientMMBlock * mm_block_list, __OUT uint64_t * list_len); 408 | ClientMetaAddrInfo ** test_get_meta_addr_info(__OUT uint64_t * list_len); 409 | 410 | inline ClientMM * get_mm() { 411 | return mm_; 412 | } 413 | 414 | inline UDPNetworkManager * get_nm() { 415 | return nm_; 416 | } 417 | }; 418 | 419 | void * client_cr_ops_fb_cnt_time(void * arg); 420 | void * client_cr_ops_fb_cnt_ops(void * arg); 421 | void * client_cr_ops_fb_cnt_ops_micro(void * arg); 422 | void * client_cr_ops_fb_on_crash(void * arg); 423 | 424 | #endif -------------------------------------------------------------------------------- /src/client_mm.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_CLIENT_MM_H_ 2 | #define DDCKV_CLIENT_MM_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "kv_utils.h" 15 | #include "nm.h" 16 | #include "spinlock.h" 17 | #include "hashtable.h" 18 | 19 | #define MAX_NUM_SUBBLOCKS 4 20 | #define MAX_WATER_MARK 0.7 21 | 22 | typedef struct TagClientMMBlock { 23 | struct MrInfo mr_info_list[MAX_REP_NUM]; 24 | uint8_t server_id_list[MAX_REP_NUM]; 25 | bool * bmap; 26 | uint32_t num_allocated; 27 | int32_t prev_free_subblock_idx; 28 | int32_t next_free_subblock_idx; 29 | int32_t next_free_subblock_cnt; 30 | 31 | uint64_t next_mmblock_addr[MAX_REP_NUM]; 32 | } ClientMMBlock; 33 | 34 | enum ClientAllocType { 35 | TYPE_SUBTABLE = 1, 36 | TYPE_KVBLOCK = 2, 37 | }; 38 | 39 | typedef struct TagClientMMAllocCtx { 40 | uint8_t server_id_list[MAX_REP_NUM]; 41 | uint64_t addr_list[MAX_REP_NUM]; 42 | uint64_t prev_addr_list[MAX_REP_NUM]; 43 | uint64_t next_addr_list[MAX_REP_NUM]; 44 | uint32_t rkey_list[MAX_REP_NUM]; 45 | uint32_t prev_rkey_list[MAX_REP_NUM]; 46 | uint32_t next_rkey_list[MAX_REP_NUM]; 47 | 48 | uint32_t num_subblocks; 49 | bool need_change_prev; 50 | } ClientMMAllocCtx; 51 | 52 | typedef struct TagClientMMAllocSubtableCtx { 53 | uint8_t server_id; 54 | uint64_t addr; 55 | } ClientMMAllocSubtableCtx; 56 | 57 | typedef struct TagRecoverLogInfo { 58 | KVLogTail * local_tail_addr; 59 | uint32_t key_len; 60 | uint32_t val_len; 61 | uint64_t remote_addr; 62 | uint8_t server_id; 63 | } RecoverLogInfo; 64 | 65 | typedef struct TagSubblockInfo { 66 | uint64_t addr_list[MAX_REP_NUM]; 67 | uint32_t rkey_list[MAX_REP_NUM]; 68 | uint8_t server_id_list[MAX_REP_NUM]; 69 | } SubblockInfo; 70 | 71 | class ClientMM { 72 | private: 73 | uint32_t num_replication_; 74 | uint32_t num_idx_rep_; 75 | uint32_t num_memory_; 76 | 77 | std::vector mm_blocks_; 78 | spinlock_t mm_blocks_lock_; 79 | uint32_t cur_mm_block_idx_; 80 | 81 | uint32_t subblock_num_; 82 | uint32_t last_allocated_; 83 | 84 | uint32_t bmap_block_num_; 85 | 86 | uint8_t pr_log_server_id_; 87 | uint64_t pr_log_head_; 88 | 89 | uint64_t client_meta_addr_; 90 | uint64_t client_gc_addr_; 91 | 92 | uint64_t server_limit_addr_; 93 | uint64_t server_kv_area_off_; 94 | uint64_t server_kv_area_addr_; 95 | uint64_t server_num_blocks_; 96 | 97 | std::mutex alloc_new_block_lock_; 98 | bool is_allocing_new_block_; 99 | 100 | // for recovery 101 | void * recover_buf_; 102 | struct ibv_mr * recover_mr_; 103 | std::vector recover_log_info_list_; 104 | std::unordered_map recover_addr_is_allocated_map_; 105 | KVLogTail * log_tail_st_ptr_; 106 | void * tmp_buf_; 107 | 108 | // modification 109 | std::deque subblock_free_queue_; 110 | SubblockInfo last_allocated_info_; 111 | 112 | 113 | 114 | // std::map> allocated_subblock_key_map_; 115 | 116 | struct timeval local_recover_space_et_; 117 | struct timeval get_addr_meta_et_; 118 | struct timeval traverse_log_et_; 119 | 120 | // private methods 121 | private: 122 | int init_get_new_block_from_server(UDPNetworkManager * nm); 123 | int init_reg_space(struct MrInfo mr_inf_list[][MAX_REP_NUM], uint8_t server_id_list[][MAX_REP_NUM], 124 | UDPNetworkManager * nm, int reg_type); 125 | int dyn_get_new_block_from_server(UDPNetworkManager * nm); 126 | int get_new_block_from_server(UDPNetworkManager * nm); 127 | int local_reg_blocks(const struct MrInfo * mr_info_list, const uint8_t * server_id_list); 128 | int reg_new_space(const struct MrInfo * mr_info_list, const uint8_t * server_id_list, 129 | UDPNetworkManager * nm, int reg_type); 130 | int dyn_reg_new_space(const struct MrInfo * mr_info_list, const uint8_t * server_id_list, 131 | UDPNetworkManager * nm, int reg_type); 132 | int32_t alloc_from_sid(uint32_t server_id, UDPNetworkManager * nm, int alloc_type, 133 | __OUT struct MrInfo * mr_info); 134 | void update_mm_block_next(ClientMMBlock * mm_block); 135 | int remote_write_meta_addr(UDPNetworkManager * nm); 136 | 137 | int mm_recovery(UDPNetworkManager * nm); 138 | int mm_recover_prepare_space(UDPNetworkManager * nm); 139 | int get_remote_log_header(UDPNetworkManager * nm, uint8_t server_id, uint64_t r_addr, 140 | KVLogHeader * local_addr); 141 | int mm_traverse_log(UDPNetworkManager * nm); 142 | int mm_get_addr_meta(UDPNetworkManager * nm); 143 | int mm_recover_mm_blocks(UDPNetworkManager * nm); 144 | 145 | uint32_t get_subblock_idx(uint64_t addr, ClientMMBlock * cur_block); 146 | ClientMMBlock * get_new_mmblock(); 147 | 148 | void gen_subblock_info(ClientMMBlock * mm_block, uint32_t subblock_idx, __OUT SubblockInfo * subblock_info); 149 | 150 | void get_block_map(); 151 | 152 | // inline private methods 153 | private: 154 | inline uint32_t get_alloc_hint_rr() { 155 | #ifndef SERVER_MM 156 | return last_allocated_ ++; 157 | #else 158 | // last_allocated_ ++; 159 | // return last_allocated_ / 65536; 160 | return last_allocated_ ++; 161 | #endif 162 | } 163 | 164 | inline float get_water_mark() { 165 | float num_used = 0; 166 | for (size_t i = 0; i < mm_blocks_.size(); i ++) { 167 | num_used += mm_blocks_[i]->num_allocated; 168 | } 169 | return num_used / (mm_blocks_.size() * subblock_num_); 170 | } 171 | 172 | // public methods 173 | public: 174 | uint64_t mm_block_sz_; 175 | uint64_t subblock_sz_; 176 | // block_mapping 177 | std::unordered_map > alloc_block_map_; 178 | std::unordered_map > total_block_map_; 179 | 180 | // for free 181 | std::unordered_map free_faa_map_; 182 | 183 | ClientMM(const struct GlobalConfig * conf, 184 | UDPNetworkManager * nm); 185 | ~ClientMM(); 186 | 187 | void get_log_head(__OUT uint64_t * pr_log_head, __OUT uint64_t * bk_log_head); 188 | 189 | void mm_alloc(size_t size, UDPNetworkManager * nm, __OUT ClientMMAllocCtx * ctx); 190 | void mm_alloc(size_t size, UDPNetworkManager * nm, std::string key, __OUT ClientMMAllocCtx * ctx); 191 | void mm_alloc_log_info(RecoverLogInfo * log_info, __OUT ClientMMAllocCtx * ctx); 192 | 193 | void mm_free_cur(const ClientMMAllocCtx * ctx); 194 | void mm_free(uint64_t orig_slot_value); 195 | 196 | void mm_alloc_subtable(UDPNetworkManager * nm, __OUT ClientMMAllocSubtableCtx * ctx); 197 | 198 | int get_last_log_recover_info(__OUT RecoverLogInfo * recover_log_info); 199 | void free_recover_buf(); 200 | 201 | void get_time_bread_down(std::vector & time_vec); 202 | 203 | // inline public methods 204 | public: 205 | inline uint64_t get_remote_meta_ptr() { 206 | return client_meta_addr_; 207 | } 208 | 209 | inline uint32_t get_num_mm_blocks() { 210 | return mm_blocks_.size(); 211 | } 212 | 213 | inline bool should_alloc_new() { 214 | float water_mark = get_water_mark(); 215 | return water_mark > MAX_WATER_MARK; 216 | } 217 | 218 | inline bool should_start_gc() { 219 | ClientMMBlock * cur_mmblock = mm_blocks_[cur_mm_block_idx_]; 220 | return cur_mmblock->next_free_subblock_cnt < MAX_NUM_SUBBLOCKS; 221 | } 222 | 223 | inline ClientMMBlock * get_cur_mm_block() { 224 | return mm_blocks_[cur_mm_block_idx_]; 225 | } 226 | 227 | inline void get_log_head(__OUT uint8_t * pr_log_server_id, __OUT uint64_t * pr_log_head) { 228 | *pr_log_server_id = pr_log_server_id_; 229 | *pr_log_head = pr_log_head_; 230 | } 231 | 232 | inline size_t get_aligned_size(size_t size) { 233 | if ((size % subblock_sz_) == 0) { 234 | return size; 235 | } 236 | size_t aligned = ((size / subblock_sz_) + 1) * subblock_sz_; 237 | return aligned; 238 | } 239 | }; 240 | 241 | #endif -------------------------------------------------------------------------------- /src/hashtable.cc: -------------------------------------------------------------------------------- 1 | #include "hashtable.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "kv_debug.h" 7 | 8 | #define NUMBER64_1 11400714785074694791ULL 9 | #define NUMBER64_2 14029467366897019727ULL 10 | #define NUMBER64_3 1609587929392839161ULL 11 | #define NUMBER64_4 9650029242287828579ULL 12 | #define NUMBER64_5 2870177450012600261ULL 13 | 14 | #define hash_get64bits(x) hash_read64_align(x, align) 15 | #define hash_get32bits(x) hash_read32_align(x, align) 16 | #define shifting_hash(x, r) ((x << r) | (x >> (64 - r))) 17 | #define TO64(x) (((U64_INT *)(x))->v) 18 | #define TO32(x) (((U32_INT *)(x))->v) 19 | 20 | typedef struct U64_INT { 21 | uint64_t v; 22 | } U64_INT; 23 | 24 | typedef struct U32_INT { 25 | uint32_t v; 26 | } U32_INT; 27 | 28 | static uint64_t hash_read64_align(const void * ptr, uint32_t align) { 29 | if (align == 0) { 30 | return TO64(ptr); 31 | } 32 | return *(uint64_t *)ptr; 33 | } 34 | 35 | static uint32_t hash_read32_align(const void * ptr, uint32_t align) { 36 | if (align == 0) { 37 | return TO32(ptr); 38 | } 39 | return *(uint32_t *)ptr; 40 | } 41 | 42 | static uint64_t string_key_hash_computation(const void * data, uint64_t length, 43 | uint64_t seed, uint32_t align) { 44 | const uint8_t * p = (const uint8_t *)data; 45 | const uint8_t * end = p + length; 46 | uint64_t hash; 47 | 48 | if (length >= 32) { 49 | const uint8_t * const limitation = end - 32; 50 | uint64_t v1 = seed + NUMBER64_1 + NUMBER64_2; 51 | uint64_t v2 = seed + NUMBER64_2; 52 | uint64_t v3 = seed + 0; 53 | uint64_t v4 = seed - NUMBER64_1; 54 | 55 | do { 56 | v1 += hash_get64bits(p) * NUMBER64_2; 57 | p += 8; 58 | v1 = shifting_hash(v1, 31); 59 | v1 *= NUMBER64_1; 60 | v2 += hash_get64bits(p) * NUMBER64_2; 61 | p += 8; 62 | v2 = shifting_hash(v2, 31); 63 | v2 *= NUMBER64_1; 64 | v3 += hash_get64bits(p) * NUMBER64_2; 65 | p += 8; 66 | v3 = shifting_hash(v3, 31); 67 | v3 *= NUMBER64_1; 68 | v4 += hash_get64bits(p) * NUMBER64_2; 69 | p += 8; 70 | v4 = shifting_hash(v4, 31); 71 | v4 *= NUMBER64_1; 72 | } while (p <= limitation); 73 | 74 | hash = shifting_hash(v1, 1) + shifting_hash(v2, 7) + shifting_hash(v3, 12) + shifting_hash(v4, 18); 75 | 76 | v1 *= NUMBER64_2; 77 | v1 = shifting_hash(v1, 31); 78 | v1 *= NUMBER64_1; 79 | hash ^= v1; 80 | hash = hash * NUMBER64_1 + NUMBER64_4; 81 | 82 | v2 *= NUMBER64_2; 83 | v2 = shifting_hash(v2, 31); 84 | v2 *= NUMBER64_1; 85 | hash ^= v2; 86 | hash = hash * NUMBER64_1 + NUMBER64_4; 87 | 88 | v3 *= NUMBER64_2; 89 | v3 = shifting_hash(v3, 31); 90 | v3 *= NUMBER64_1; 91 | hash ^= v3; 92 | hash = hash * NUMBER64_1 + NUMBER64_4; 93 | 94 | v4 *= NUMBER64_2; 95 | v4 = shifting_hash(v4, 31); 96 | v4 *= NUMBER64_1; 97 | hash ^= v4; 98 | hash = hash * NUMBER64_1 + NUMBER64_4; 99 | } else { 100 | hash = seed + NUMBER64_5; 101 | } 102 | 103 | hash += (uint64_t)length; 104 | 105 | while (p + 8 <= end) { 106 | uint64_t k1 = hash_get64bits(p); 107 | k1 *= NUMBER64_2; 108 | k1 = shifting_hash(k1, 31); 109 | k1 *= NUMBER64_1; 110 | hash ^= k1; 111 | hash = shifting_hash(hash, 27) * NUMBER64_1 + NUMBER64_4; 112 | p += 8; 113 | } 114 | 115 | if (p + 4 <= end) { 116 | hash ^= (uint64_t)(hash_get32bits(p)) * NUMBER64_1; 117 | hash = shifting_hash(hash, 23) * NUMBER64_2 + NUMBER64_3; 118 | p += 4; 119 | } 120 | 121 | while (p < end) { 122 | hash ^= (*p) * NUMBER64_5; 123 | hash = shifting_hash(hash, 11) * NUMBER64_1; 124 | p ++; 125 | } 126 | 127 | hash ^= hash >> 33; 128 | hash *= NUMBER64_2; 129 | hash ^= hash >> 29; 130 | hash *= NUMBER64_3; 131 | hash ^= hash >> 32; 132 | 133 | return hash; 134 | } 135 | 136 | uint64_t VariableLengthHash(const void * data, uint64_t length, uint64_t seed) { 137 | if ((((uint64_t)data) & 7) == 0) { 138 | return string_key_hash_computation(data, length, seed, 1); 139 | } 140 | return string_key_hash_computation(data, length, seed, 0); 141 | } 142 | 143 | uint32_t GetFreeSlotNum(RaceHashBucket * bucket, __OUT uint32_t * free_idx) { 144 | *free_idx = RACE_HASH_ASSOC_NUM; 145 | uint32_t free_num = 0; 146 | for (int i = 0; i < RACE_HASH_ASSOC_NUM; i++) { 147 | if (bucket->slots[i].fp == 0 && bucket->slots[i].kv_len == 0 && 148 | IsEmptyPointer(bucket->slots[i].pointer, 5)) { 149 | // free_idx_list[free_num] = i; 150 | free_num ++; 151 | *free_idx = i; 152 | } 153 | } 154 | return free_num; 155 | } 156 | 157 | bool IsEmptyPointer(uint8_t * pointer, uint32_t num) { 158 | for (int i = 0; i < num; i ++) { 159 | if (pointer[i] != 0) { 160 | return false; 161 | } 162 | } 163 | return true; 164 | } 165 | 166 | uint8_t HashIndexComputeFp(uint64_t hash) { 167 | uint8_t fp = 0; 168 | hash >>= 48; 169 | fp ^= hash; 170 | hash >>= 8; 171 | fp ^= hash; 172 | return fp; 173 | } 174 | 175 | bool CheckKey(void * r_key_addr, uint32_t r_key_len, void * l_key_addr, uint32_t l_key_len) { 176 | // TODO: delete the following code 177 | // char local_key_buf[256] = {0}; 178 | // char remote_key_buf[256] = {0}; 179 | // memset(local_key_buf, 0, 256); 180 | // memset(remote_key_buf, 0, 256); 181 | // printf("%d %d\n", r_key_len, l_key_len); 182 | // memcpy(local_key_buf, l_key_addr, l_key_len); 183 | // memcpy(remote_key_buf, r_key_addr, r_key_len); 184 | // printf("%s %s\n", remote_key_buf, local_key_buf); 185 | // print_log(DEBUG, " [%s] comparing %s %s %d %d", __FUNCTION__, local_key_buf, remote_key_buf, l_key_len, r_key_len); 186 | 187 | if (r_key_len != l_key_len) 188 | return false; 189 | 190 | uint64_t r_hash_value = VariableLengthHash(r_key_addr, r_key_len, 0); 191 | uint64_t l_hash_value = VariableLengthHash(l_key_addr, l_key_len, 0); 192 | 193 | if (r_hash_value != l_hash_value) 194 | return false; 195 | 196 | return memcmp(r_key_addr, l_key_addr, r_key_len) == 0; 197 | } -------------------------------------------------------------------------------- /src/hashtable.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_HASH_TABLE_H_ 2 | #define DDCKV_HASH_TABLE_H_ 3 | 4 | #include 5 | #include "kv_utils.h" 6 | 7 | #define RACE_HASH_GLOBAL_DEPTH (5) 8 | #define RACE_HASH_INIT_LOCAL_DEPTH (5) 9 | #define RACE_HASH_SUBTABLE_NUM (1 << RACE_HASH_GLOBAL_DEPTH) 10 | #define RACE_HASH_INIT_SUBTABLE_NUM (1 << RACE_HASH_INIT_LOCAL_DEPTH) 11 | #define RACE_HASH_MAX_GLOBAL_DEPTH (5) 12 | #define RACE_HASH_MAX_SUBTABLE_NUM (1 << RACE_HASH_MAX_GLOBAL_DEPTH) 13 | #define RACE_HASH_ADDRESSABLE_BUCKET_NUM (34000ULL) 14 | #define RACE_HASH_SUBTABLE_BUCKET_NUM (RACE_HASH_ADDRESSABLE_BUCKET_NUM * 3 / 2) 15 | #define RACE_HASH_ASSOC_NUM (7) 16 | #define RACE_HASH_RESERVED_MAX_KV_NUM (1024ULL * 1024 * 10) 17 | #define RACE_HASH_KVOFFSET_RING_NUM (1024ULL * 1024 * 16) 18 | #define RACE_HASH_KV_BLOCK_LENGTH (64ULL) 19 | #define SUBTABLE_USED_HASH_BIT_NUM (32) 20 | #define RACE_HASH_MASK(n) ((1 << n) - 1) 21 | 22 | #define ROOT_RES_LEN (sizeof(RaceHashRoot)) 23 | #define SUBTABLE_LEN (RACE_HASH_ADDRESSABLE_BUCKET_NUM * sizeof(RaceHashBucket)) 24 | #define SUBTABLE_RES_LEN (RACE_HASH_MAX_SUBTABLE_NUM * SUBTABLE_LEN) 25 | #define KV_RES_LEN (RACE_HASH_RESERVED_MAX_KV_NUM * RACE_HASH_KV_BLOCK_LENGTH) 26 | // #define META_AREA_LEN (128 * 1024 * 1024) 27 | #define META_AREA_LEN (256 * 1024 * 1024) 28 | // #define GC_AREA_LEN (128 * 1024 * 1024) 29 | #define GC_AREA_LEN (0) 30 | #define HASH_AREA_LEN (128 * 1024 * 1024) 31 | #define CLIENT_META_LEN (1 * 1024 * 1024) 32 | #define CLIENT_GC_LEN (1 * 1024 * 1024) 33 | // #define RACE_HASH_MAX_NUM_REP (10) 34 | 35 | 36 | typedef struct __attribute__((__packed__)) TagRaceHashSlot { 37 | uint8_t fp; 38 | uint8_t kv_len; 39 | uint8_t server_id; 40 | uint8_t pointer[5]; 41 | } RaceHashSlot; 42 | 43 | typedef struct __attribute__((__packed__)) TagRacsHashBucket { 44 | uint32_t local_depth; 45 | uint32_t prefix; 46 | RaceHashSlot slots[RACE_HASH_ASSOC_NUM]; 47 | } RaceHashBucket; 48 | 49 | typedef struct TagRaceHashSubtableEntry { 50 | uint8_t lock; 51 | uint8_t local_depth; 52 | uint8_t server_id; 53 | uint8_t pointer[5]; 54 | } RaceHashSubtableEntry; 55 | 56 | typedef struct TagRaceHashRoot { 57 | uint64_t global_depth; 58 | uint64_t init_local_depth; 59 | uint64_t max_global_depth; 60 | uint64_t prefix_num; 61 | uint64_t subtable_res_num; 62 | uint64_t subtable_init_num; 63 | uint64_t subtable_hash_num; 64 | uint64_t subtable_hash_range; 65 | uint64_t subtable_bucket_num; 66 | uint64_t seed; 67 | 68 | uint64_t mem_id; 69 | uint64_t root_offset; 70 | uint64_t subtable_offset; 71 | uint64_t kv_offset; 72 | uint64_t kv_len; 73 | 74 | uint64_t lock; 75 | RaceHashSubtableEntry subtable_entry[RACE_HASH_MAX_SUBTABLE_NUM][MAX_REP_NUM]; 76 | } RaceHashRoot; 77 | 78 | typedef struct TagRaceHashSearchContext { 79 | int32_t result; 80 | int32_t no_back; 81 | uint64_t hash_value; 82 | uint8_t fp; // fingerprint 83 | // HashIndexSearchReq * req; 84 | uint64_t f_com_bucket_addr; 85 | uint64_t s_com_bucket_addr; 86 | uint64_t read_kv_addr; 87 | 88 | uint64_t f_remote_com_bucket_offset; 89 | uint64_t s_remote_com_bucket_offset; 90 | 91 | uint64_t read_kv_offset; 92 | uint32_t read_kv_len; 93 | 94 | RaceHashRoot * local_root; 95 | 96 | void * key; 97 | uint32_t key_len; 98 | uint32_t value_len; 99 | 100 | bool sync_root_done; 101 | bool is_resizing; 102 | } RaceHashSearchContext; 103 | 104 | typedef struct TagKVTableAddrInfo { 105 | uint8_t server_id_list[MAX_REP_NUM]; 106 | uint64_t f_bucket_addr[MAX_REP_NUM]; 107 | uint64_t s_bucket_addr[MAX_REP_NUM]; 108 | uint32_t f_bucket_addr_rkey[MAX_REP_NUM]; 109 | uint32_t s_bucket_addr_rkey[MAX_REP_NUM]; 110 | uint32_t f_main_idx; 111 | uint32_t s_main_idx; 112 | uint32_t f_idx; 113 | uint32_t s_idx; 114 | } KVTableAddrInfo; 115 | 116 | typedef struct TagKVHashInfo { 117 | uint64_t hash_value; 118 | uint64_t prefix; 119 | uint8_t fp; 120 | uint8_t local_depth; 121 | } KVHashInfo; 122 | 123 | typedef struct TagKVInfo { 124 | void * l_addr; 125 | uint32_t lkey; 126 | uint32_t key_len; 127 | uint32_t value_len; 128 | } KVInfo; 129 | 130 | typedef struct TagKVRWAddr { 131 | uint8_t server_id; 132 | uint64_t r_kv_addr; 133 | uint64_t l_kv_addr; 134 | uint32_t rkey; 135 | uint32_t lkey; 136 | uint32_t length; 137 | } KVRWAddr; 138 | 139 | typedef struct TagKVCASAddr { 140 | uint8_t server_id; 141 | uint64_t r_kv_addr; 142 | uint64_t l_kv_addr; 143 | uint32_t rkey; 144 | uint32_t lkey; 145 | uint64_t orig_value; 146 | uint64_t swap_value; 147 | } KVCASAddr; 148 | 149 | typedef struct TagLocalCacheEntry { 150 | uint64_t r_slot_addr[MAX_REP_NUM]; 151 | RaceHashSlot l_slot_ptr; 152 | uint32_t miss_cnt; 153 | uint32_t acc_cnt; 154 | } LocalCacheEntry; 155 | 156 | static inline uint64_t SubtableFirstIndex(uint64_t hash_value, uint64_t capacity) { 157 | return hash_value % (capacity / 2); 158 | } 159 | 160 | static inline uint64_t SubtableSecondIndex(uint64_t hash_value, uint64_t f_index, uint64_t capacity) { 161 | uint32_t hash = hash_value; 162 | uint16_t partial = (uint16_t)(hash >> 16); 163 | uint16_t non_sero_tag = (partial >> 1 << 1) + 1; 164 | uint64_t hash_of_tag = (uint64_t)(non_sero_tag * 0xc6a4a7935bd1e995); 165 | return (uint64_t)(((uint64_t)(f_index) ^ hash_of_tag) % (capacity / 2) + capacity / 2); 166 | } 167 | 168 | static inline uint64_t HashIndexConvert40To64Bits(uint8_t * addr) { 169 | uint64_t ret = 0; 170 | return ret | ((uint64_t)addr[0] << 40) | ((uint64_t)addr[1] << 32) 171 | | ((uint64_t)addr[2] << 24) | ((uint64_t)addr[3] << 16) 172 | | ((uint64_t)addr[4] << 8); 173 | } 174 | 175 | static inline void HashIndexConvert64To40Bits(uint64_t addr, __OUT uint8_t * o_addr) { 176 | o_addr[0] = (uint8_t)((addr >> 40) & 0xFF); 177 | o_addr[1] = (uint8_t)((addr >> 32) & 0xFF); 178 | o_addr[2] = (uint8_t)((addr >> 24) & 0xFF); 179 | o_addr[3] = (uint8_t)((addr >> 16) & 0xFF); 180 | o_addr[4] = (uint8_t)((addr >> 8) & 0xFF); 181 | } 182 | 183 | static inline void ConvertSlotToAddr(RaceHashSlot * slot, __OUT KVRWAddr * kv_addr) { 184 | kv_addr->server_id = slot->server_id; 185 | kv_addr->r_kv_addr = HashIndexConvert40To64Bits(slot->pointer); 186 | } 187 | 188 | static inline uint64_t ConvertSlotToInt(RaceHashSlot * slot) { 189 | return *(uint64_t *)slot; 190 | } 191 | 192 | uint64_t VariableLengthHash(const void * data, uint64_t length, uint64_t seed); 193 | uint8_t HashIndexComputeFp(uint64_t hash); 194 | uint32_t GetFreeSlotNum(RaceHashBucket * bucekt, uint32_t * free_idx); 195 | bool IsEmptyPointer(uint8_t * pointer, uint32_t num); 196 | bool CheckKey(void * r_key_addr, uint32_t r_key_len, void * l_key_addr, uint32_t l_key_len); 197 | 198 | #endif -------------------------------------------------------------------------------- /src/ib.cc: -------------------------------------------------------------------------------- 1 | #include "ib.h" 2 | 3 | #include 4 | 5 | static int modify_qp_to_init(struct ibv_qp * qp, const struct QpInfo * local_qp_info) { 6 | struct ibv_qp_attr attr; 7 | int attr_mask; 8 | int rc; 9 | memset(&attr, 0, sizeof(struct ibv_qp_attr)); 10 | attr.qp_state = IBV_QPS_INIT; 11 | attr.port_num = local_qp_info->port_num; 12 | attr.pkey_index = 0; 13 | attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | 14 | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; 15 | attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; 16 | rc = ibv_modify_qp(qp, &attr, attr_mask); 17 | // assert(rc == 0); 18 | return 0; 19 | } 20 | 21 | static int modify_qp_to_rtr(struct ibv_qp * local_qp, 22 | const struct QpInfo * local_qp_info, 23 | const struct QpInfo * remote_qp_info, 24 | uint8_t conn_type) { 25 | struct ibv_qp_attr attr; 26 | int attr_mask; 27 | int rc; 28 | memset(&attr, 0, sizeof(struct ibv_qp_attr)); 29 | attr.qp_state = IBV_QPS_RTR; 30 | attr.path_mtu = IBV_MTU_1024; 31 | attr.dest_qp_num = remote_qp_info->qp_num; 32 | attr.rq_psn = 0; 33 | attr.max_dest_rd_atomic = 16; 34 | attr.min_rnr_timer = 0x12; 35 | attr.ah_attr.is_global = 0; 36 | attr.ah_attr.dlid = remote_qp_info->lid; 37 | attr.ah_attr.sl = 0; 38 | attr.ah_attr.src_path_bits = 0; 39 | attr.ah_attr.port_num = local_qp_info->port_num; 40 | if (conn_type == ROCE) { 41 | attr.ah_attr.is_global = 1; 42 | attr.ah_attr.port_num = local_qp_info->port_num; 43 | memcpy(&attr.ah_attr.grh.dgid, remote_qp_info->gid, 16); 44 | attr.ah_attr.grh.flow_label = 0; 45 | attr.ah_attr.grh.hop_limit = 1; 46 | attr.ah_attr.grh.sgid_index = local_qp_info->gid_idx; 47 | attr.ah_attr.grh.traffic_class = 0; 48 | } 49 | attr_mask = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | 50 | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; 51 | rc = ibv_modify_qp(local_qp, &attr, attr_mask); 52 | // assert(rc == 0); 53 | return 0; 54 | } 55 | 56 | static int modify_qp_to_rts(struct ibv_qp * local_qp) { 57 | struct ibv_qp_attr attr; 58 | int attr_mask; 59 | int rc; 60 | memset(&attr, 0, sizeof(struct ibv_qp_attr)); 61 | attr.qp_state = IBV_QPS_RTS; 62 | attr.timeout = 0x12; 63 | attr.retry_cnt = 6; 64 | attr.rnr_retry = 0; 65 | attr.sq_psn = 0; 66 | attr.max_rd_atomic = 16; 67 | attr_mask = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | 68 | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC; 69 | rc = ibv_modify_qp(local_qp, &attr, attr_mask); 70 | // assert(rc == 0); 71 | return 0; 72 | } 73 | 74 | struct ibv_context * ib_get_ctx(uint32_t dev_id, uint32_t port_id) { 75 | struct ibv_device ** ib_dev_list; 76 | struct ibv_device * ib_dev; 77 | int num_device; 78 | 79 | ib_dev_list = ibv_get_device_list(&num_device); 80 | // assert(ib_dev_list != NULL && num_device > dev_id); 81 | ib_dev = ib_dev_list[dev_id]; 82 | 83 | struct ibv_context * ret = ibv_open_device(ib_dev); 84 | // assert(ret != NULL); 85 | ibv_free_device_list(ib_dev_list); 86 | return ret; 87 | } 88 | 89 | struct ibv_qp * ib_create_rc_qp(struct ibv_pd * ib_pd, 90 | struct ibv_qp_init_attr * qp_init_attr) { 91 | return ibv_create_qp(ib_pd, qp_init_attr); 92 | } 93 | 94 | int ib_connect_qp(struct ibv_qp * local_qp, 95 | const struct QpInfo * local_qp_info, 96 | const struct QpInfo * remote_qp_info, 97 | uint8_t conn_type, uint8_t role) { 98 | int rc = 0; 99 | rc = modify_qp_to_init(local_qp, local_qp_info); 100 | // assert(rc == 0); 101 | 102 | rc = modify_qp_to_rtr(local_qp, local_qp_info, remote_qp_info, conn_type); 103 | // assert(rc == 0); 104 | 105 | if (role == SERVER) { 106 | return 0; 107 | } 108 | 109 | // assert(role == CLIENT); 110 | rc = modify_qp_to_rts(local_qp); 111 | // assert(rc == 0); 112 | return 0; 113 | } 114 | 115 | struct ibv_send_wr * ib_merge_sr_lists_unsignaled(std::vector sr_lists) { 116 | struct ibv_send_wr * ret_sr_head = sr_lists[0]->sr_list; 117 | for (size_t i = 1; i < sr_lists.size(); i ++) { 118 | uint32_t pre_num_sr = sr_lists[i - 1]->num_sr; 119 | sr_lists[i - 1]->sr_list[pre_num_sr - 1].next = sr_lists[i]->sr_list; 120 | } 121 | 122 | size_t last_idx = sr_lists.size() - 1; 123 | uint32_t num_sr = sr_lists[last_idx]->num_sr; 124 | sr_lists[last_idx]->sr_list[num_sr - 1].next = NULL; 125 | 126 | return ret_sr_head; 127 | } 128 | 129 | struct ibv_send_wr * ib_merge_sr_lists(std::vector sr_lists, __OUT uint64_t * last_wr_id) { 130 | struct ibv_send_wr * ret_sr_head = sr_lists[0]->sr_list; 131 | for (size_t i = 1; i < sr_lists.size(); i ++) { 132 | uint32_t pre_num_sr = sr_lists[i - 1]->num_sr; 133 | sr_lists[i - 1]->sr_list[pre_num_sr - 1].next = sr_lists[i]->sr_list; 134 | } 135 | 136 | size_t last_idx = sr_lists.size() - 1; 137 | uint32_t num_sr = sr_lists[last_idx]->num_sr; 138 | sr_lists[last_idx]->sr_list[num_sr - 1].next = NULL; 139 | sr_lists[last_idx]->sr_list[num_sr - 1].send_flags |= IBV_SEND_SIGNALED; 140 | 141 | *last_wr_id = sr_lists[last_idx]->sr_list[num_sr - 1].wr_id; 142 | 143 | return ret_sr_head; 144 | } 145 | 146 | void ib_free_sr_lists(IbvSrList * sr_lists, uint32_t num_sr_list) { 147 | // TODO: finish this 148 | return; 149 | // free(sr_lists[0].sr_list->sg_list); 150 | // free(sr_lists[0].sr_list); 151 | // free(sr_lists); 152 | } 153 | 154 | void ib_free_sr_lists_batch(std::vector & sr_lists_batch, std::vector & sr_list_num_batch) { 155 | for (int i = 0; i < sr_lists_batch.size(); i ++) { 156 | ib_free_sr_lists(sr_lists_batch[i], sr_list_num_batch[i]); 157 | } 158 | } 159 | 160 | inline uint64_t gen_wr_id(uint8_t server_id, uint64_t wr_id) { 161 | return server_id * 1000 + wr_id; 162 | } 163 | 164 | inline uint64_t wr_id_to_server_wr_id(uint64_t wr_id) { 165 | return wr_id % 1000; 166 | } 167 | 168 | inline uint8_t wr_id_to_server_id(uint64_t wr_id) { 169 | return (uint8_t)(wr_id / 1000); 170 | } -------------------------------------------------------------------------------- /src/ib.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_IB_H_ 2 | #define DDCKV_IB_H_ 3 | 4 | #include "kv_utils.h" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | typedef struct TagIbvSrList { 14 | struct ibv_send_wr * sr_list; 15 | uint32_t num_sr; 16 | uint32_t server_id; 17 | } IbvSrList; 18 | 19 | struct ibv_context * ib_get_ctx(uint32_t dev_id, uint32_t port_id); 20 | struct ibv_qp * ib_create_rc_qp(struct ibv_pd * ib_pd, struct ibv_qp_init_attr * qp_init_attr); 21 | 22 | int ib_connect_qp(struct ibv_qp * local_qp, 23 | const struct QpInfo * local_qp_info, 24 | const struct QpInfo * remote_qp_info, uint8_t conn_type, uint8_t role); 25 | 26 | // merge wr_lists and set the last wr to be signaled 27 | struct ibv_send_wr * ib_merge_sr_lists_unsignaled(std::vector sr_lists); 28 | struct ibv_send_wr * ib_merge_sr_lists(std::vector sr_lists, __OUT uint64_t * last_wr_id); 29 | void ib_free_sr_lists(IbvSrList * sr_lists, uint32_t num_sr_list); 30 | void ib_free_sr_lists_batch(std::vector & sr_lists_batch, std::vector & sr_list_num_batch); 31 | void ib_free_sr_list(IbvSrList * sr_list); 32 | 33 | inline bool ib_is_all_wrid_finished(const std::map & wait_wrid_wc_map) { 34 | std::map::const_iterator it; 35 | for (it = wait_wrid_wc_map.begin(); it != wait_wrid_wc_map.end(); it ++) { 36 | if (it->second == NULL) { 37 | return false; 38 | } 39 | } 40 | return true; 41 | } 42 | 43 | inline uint64_t ib_gen_wr_id(uint32_t coro_id, uint8_t dst_server_id, uint32_t req_type_st, uint32_t req_seq) { 44 | return (((uint64_t)coro_id << 8) + dst_server_id) * 1000 + req_type_st + req_seq; 45 | } 46 | 47 | inline uint32_t wrid_to_fiber_id(uint64_t wr_id) { 48 | return (uint32_t)((wr_id / 1000) >> 8); 49 | } 50 | 51 | inline uint8_t wrid_to_dst_sid(uint64_t wr_id) { 52 | return (uint8_t)((wr_id / 1000) & 0xFF); 53 | } 54 | 55 | inline uint32_t wrid_to_req_seq(uint64_t wr_id) { 56 | return (uint32_t)(wr_id % 1000); 57 | } 58 | 59 | inline uint64_t wr_id_to_server_wr_id(uint64_t wr_id); 60 | inline uint8_t wr_id_to_server_id(uint64_t wr_id); 61 | 62 | 63 | #endif -------------------------------------------------------------------------------- /src/init.cc: -------------------------------------------------------------------------------- 1 | #include "kv_utils.h" 2 | 3 | #include 4 | 5 | int run_server(struct GlobalConfig * conf); 6 | int run_client(struct GlobalConfig * conf); 7 | 8 | int main(int argc, char ** argv) { 9 | // assert(argc == 2); 10 | char * conf_file_name = argv[1]; 11 | struct GlobalConfig conf; 12 | int ret = 0; 13 | 14 | ret = load_config(conf_file_name, &conf); 15 | if (ret != 0) { 16 | return 1; 17 | } 18 | 19 | if (conf.role == SERVER) { 20 | ret = run_server(&conf); 21 | } else { 22 | // assert(conf.role == CLIENT); 23 | ret = run_client(&conf); 24 | } 25 | } -------------------------------------------------------------------------------- /src/kv_debug.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_DEBUG_H_ 2 | #define DDCKV_DEBUG_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include "kv_utils.h" 14 | #include "hashtable.h" 15 | #include "ib.h" 16 | 17 | enum { 18 | INFO = 0, 19 | DEBUG 20 | }; 21 | 22 | static const char * str_prefix[] = {"[INFO]", "[DEBUG]"}; 23 | 24 | static inline void kv_assert(bool value) { 25 | #ifdef _DEBUG 26 | assert(value); 27 | #endif 28 | } 29 | 30 | static inline void print_log(int log_level, const char * fmt, ...) { 31 | if (log_level == DEBUG) { 32 | #ifdef _DEBUG 33 | int fmt_len = strlen(fmt); 34 | char * new_fmt_buf = (char *)malloc(fmt_len + 10); 35 | sprintf(new_fmt_buf, "%s %s\n", str_prefix[log_level], fmt); 36 | va_list args; 37 | va_start(args, fmt); 38 | vprintf(new_fmt_buf, args); 39 | va_end(args); 40 | #endif 41 | } else { 42 | int fmt_len = strlen(fmt); 43 | char * new_fmt_buf = (char *)malloc(fmt_len + 10); 44 | sprintf(new_fmt_buf, "%s %s\n", str_prefix[log_level], fmt); 45 | va_list args; 46 | va_start(args, fmt); 47 | vprintf(new_fmt_buf, args); 48 | va_end(args); 49 | } 50 | } 51 | 52 | static inline void print_sr_list(struct ibv_send_wr * sr_list) { 53 | struct ibv_send_wr * p; 54 | for (p = sr_list; p != NULL; p = p->next) { 55 | // print_log(DEBUG, "wr_id(%ld) raddr(%lx) rkey(%x)", p->wr_id, p->wr.rdma.remote_addr, 56 | // p->wr.rdma.rkey); 57 | } 58 | } 59 | 60 | static inline void print_sr_lists(std::vector & sr_list_batch, 61 | std::vector & sr_list_num_batch) { 62 | for (size_t i = 0; i < sr_list_batch.size(); i ++) { 63 | uint8_t server_id; 64 | for (int j = 0; j < sr_list_num_batch[i]; j ++) { 65 | server_id = sr_list_batch[i][j].server_id; 66 | // print_log(DEBUG, "server_id(%d)", server_id); 67 | print_sr_list(sr_list_batch[i][j].sr_list); 68 | } 69 | } 70 | } 71 | 72 | static inline void print_key(char * key_addr, uint32_t key_len) { 73 | char keystr[256]; 74 | memset(keystr, 0, 256); 75 | memcpy(keystr, key_addr, key_len); 76 | printf("%s", keystr); 77 | } 78 | 79 | #endif -------------------------------------------------------------------------------- /src/kv_utils.cc: -------------------------------------------------------------------------------- 1 | #include "kv_utils.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | inline static uint64_t htonll(uint64_t val) { 17 | return (((uint64_t) htonl(val)) << 32) + htonl(val >> 32); 18 | } 19 | 20 | inline static uint64_t ntohll(uint64_t val) { 21 | return (((uint64_t) ntohl(val)) << 32) + ntohl(val >> 32); 22 | } 23 | 24 | void serialize_kvmsg(__OUT struct KVMsg * kvmsg) { 25 | switch (kvmsg->type) { 26 | case REQ_CONNECT: 27 | case REP_CONNECT: 28 | serialize_conn_info(&kvmsg->body.conn_info); 29 | break; 30 | case REQ_ALLOC: 31 | case REP_ALLOC: 32 | case REQ_ALLOC_SUBTABLE: 33 | case REP_ALLOC_SUBTABLE: 34 | serialize_mr_info(&kvmsg->body.mr_info); 35 | break; 36 | default: 37 | break; 38 | } 39 | kvmsg->type = htons(kvmsg->type); 40 | kvmsg->id = htons(kvmsg->id); 41 | } 42 | 43 | void deserialize_kvmsg(__OUT struct KVMsg * kvmsg) { 44 | kvmsg->type = ntohs(kvmsg->type); 45 | kvmsg->id = ntohs(kvmsg->id); 46 | switch (kvmsg->type) { 47 | case REQ_CONNECT: 48 | case REP_CONNECT: 49 | deserialize_conn_info(&kvmsg->body.conn_info); 50 | break; 51 | case REQ_ALLOC: 52 | case REP_ALLOC: 53 | case REQ_ALLOC_SUBTABLE: 54 | case REP_ALLOC_SUBTABLE: 55 | deserialize_mr_info(&kvmsg->body.mr_info); 56 | break; 57 | default: 58 | break; 59 | } 60 | } 61 | 62 | void serialize_qp_info(__OUT struct QpInfo * qp_info) { 63 | qp_info->qp_num = htonl(qp_info->qp_num); 64 | qp_info->lid = htons(qp_info->lid); 65 | } 66 | 67 | void deserialize_qp_info(__OUT struct QpInfo * qp_info) { 68 | qp_info->qp_num = ntohl(qp_info->qp_num); 69 | qp_info->lid = ntohs(qp_info->lid); 70 | } 71 | 72 | void serialize_mr_info(__OUT struct MrInfo * mr_info) { 73 | mr_info->addr = htonll(mr_info->addr); 74 | mr_info->rkey = htonl(mr_info->rkey); 75 | } 76 | 77 | void deserialize_mr_info(__OUT struct MrInfo * mr_info) { 78 | mr_info->addr = ntohll(mr_info->addr); 79 | mr_info->rkey = ntohl(mr_info->rkey); 80 | } 81 | 82 | void serialize_conn_info(__OUT struct ConnInfo * conn_info) { 83 | serialize_qp_info(&conn_info->qp_info); 84 | serialize_mr_info(&conn_info->gc_info); 85 | } 86 | 87 | void deserialize_conn_info(__OUT struct ConnInfo * conn_info) { 88 | deserialize_qp_info(&conn_info->qp_info); 89 | deserialize_mr_info(&conn_info->gc_info); 90 | } 91 | 92 | int load_config(const char * fname, __OUT struct GlobalConfig * config) { 93 | std::fstream config_fs(fname); 94 | // assert(config_fs.is_open()); 95 | 96 | boost::property_tree::ptree pt; 97 | try { 98 | boost::property_tree::read_json(config_fs, pt); 99 | } catch (boost::property_tree::ptree_error & e) { 100 | return -1; 101 | } 102 | 103 | try { 104 | std::string role_str = pt.get("role"); 105 | if (role_str == std::string("SERVER")) { 106 | config->role = SERVER; 107 | } else { 108 | // assert(role_str == std::string("CLIENT")); 109 | config->role = CLIENT; 110 | } 111 | 112 | std::string conn_type_str = pt.get("conn_type"); 113 | if (conn_type_str == std::string("IB")) { 114 | config->conn_type = IB; 115 | } else { 116 | // assert(conn_type_str == std::string("ROCE")); 117 | config->conn_type = ROCE; 118 | } 119 | 120 | config->server_id = pt.get("server_id"); 121 | config->udp_port = pt.get("udp_port"); 122 | config->memory_num = pt.get("memory_num"); 123 | 124 | int i = 0; 125 | BOOST_FOREACH(boost::property_tree::ptree::value_type & v, pt.get_child("memory_ips")) { 126 | // assert(v.first.empty()); 127 | std::string ip = v.second.get(""); 128 | // assert(ip.length() > 0 && ip.length() < 16); 129 | strcpy(config->memory_ips[i], ip.c_str()); 130 | i ++; 131 | } 132 | // assert(i == config->memory_num); 133 | 134 | config->ib_dev_id = pt.get("ib_dev_id"); 135 | config->ib_port_id = pt.get("ib_port_id"); 136 | config->ib_gid_idx = pt.get("ib_gid_idx", -1); 137 | 138 | std::string server_base_addr_str = pt.get("server_base_addr"); 139 | sscanf(server_base_addr_str.c_str(), "0x%lx", &config->server_base_addr); 140 | 141 | config->server_data_len = pt.get("server_data_len"); 142 | config->block_size = pt.get("block_size"); 143 | config->subblock_size = pt.get("subblock_size"); 144 | config->client_local_size = pt.get("client_local_size"); 145 | 146 | config->num_replication = pt.get("num_replication"); 147 | config->num_coroutines = pt.get("num_coroutines", 1); 148 | 149 | config->main_core_id = pt.get("main_core_id", 0); 150 | config->poll_core_id = pt.get("poll_core_id", 0); 151 | config->bg_core_id = pt.get("bg_core_id", 0); 152 | config->gc_core_id = pt.get("gc_core_id", 0); 153 | 154 | config->is_recovery = pt.get("is_recovery", 0); 155 | 156 | config->num_idx_rep = pt.get("num_idx_rep", 1); 157 | config->miss_rate_threash = pt.get("miss_rate_threash", 0.1); 158 | config->workload_run_time = pt.get("workload_run_time", 10); 159 | config->micro_workload_num = pt.get("micro_workload_num", 10000); 160 | } catch (boost::property_tree::ptree_error & e) { 161 | return -1; 162 | } 163 | return 0; 164 | } 165 | 166 | void encode_gc_slot(DecodedClientGCSlot * d_gc_slot, __OUT uint64_t * e_gc_slot) { 167 | uint64_t masked_block_off = (d_gc_slot->pr_addr >> 8) & BLOCK_OFF_BMASK; 168 | uint64_t masked_pr_addr = (d_gc_slot->pr_addr >> 26) & BLOCK_ADDR_BMASK; 169 | uint64_t masked_bk_addr = (d_gc_slot->bk_addr >> 26) & BLOCK_ADDR_BMASK; 170 | uint64_t masked_num_subblock = d_gc_slot->num_subblocks & SUBBLOCK_NUM_BMASK; 171 | *(e_gc_slot) = (masked_block_off << 46) | (masked_pr_addr << 25) 172 | | (masked_bk_addr << 4) | (masked_num_subblock); 173 | } 174 | 175 | void decode_gc_slot(uint64_t e_gc_slot, __OUT DecodedClientGCSlot * d_gc_slot) { 176 | uint64_t block_offset = e_gc_slot >> 46; 177 | uint64_t pr_block_addr = (e_gc_slot >> 25) & BLOCK_ADDR_BMASK; 178 | uint64_t bk_block_addr = (e_gc_slot >> 4) & BLOCK_ADDR_BMASK; 179 | uint8_t num_subblocks = e_gc_slot & SUBBLOCK_NUM_BMASK; 180 | d_gc_slot->pr_addr = (pr_block_addr << 26) | (block_offset << 8); 181 | d_gc_slot->bk_addr = (bk_block_addr << 26) | (block_offset << 8); 182 | d_gc_slot->num_subblocks = num_subblocks; 183 | } 184 | 185 | int stick_this_thread_to_core(int core_id) { 186 | int num_cores = sysconf(_SC_NPROCESSORS_CONF); 187 | if (core_id < 0 || core_id >= num_cores) { 188 | return -1; 189 | } 190 | 191 | cpu_set_t cpuset; 192 | CPU_ZERO(&cpuset); 193 | CPU_SET(core_id, &cpuset); 194 | 195 | pthread_t current_thread = pthread_self(); 196 | return pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); 197 | } 198 | 199 | uint64_t current_time_us() { 200 | struct timeval now; 201 | gettimeofday(&now, NULL); 202 | return now.tv_usec; 203 | } 204 | 205 | void dump_lat_file(char * fname, const std::vector & lat_vec) { 206 | if (lat_vec.size() == 0) { 207 | return; 208 | } 209 | FILE * out_fp = fopen(fname, "w"); 210 | for (size_t i = 0; i < lat_vec.size(); i ++) { 211 | fprintf(out_fp, "%ld\n", lat_vec[i]); 212 | } 213 | } -------------------------------------------------------------------------------- /src/kv_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_KV_UTILS_H 2 | #define DDCKV_KV_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #define __OUT 12 | #define DDCKV_MAX_SERVER 64 13 | 14 | #define SERVER_ID_BMASK 0x3F 15 | #define BLOCK_ADDR_BMASK 0x1FFFFFULL 16 | #define BLOCK_OFF_BMASK 0x3FFFFULL 17 | #define SUBBLOCK_NUM_BMASK 0xF 18 | #define MAX_REP_NUM 10 19 | 20 | // #define YCSB_10M 21 | // #define SERVER_MM 22 | 23 | enum ConnType { 24 | IB, 25 | ROCE, 26 | }; 27 | 28 | enum Role { 29 | CLIENT, 30 | SERVER 31 | }; 32 | 33 | // enum KVLogState { 34 | // KV_LOG_VALID = 1, 35 | // KV_LOG_COMMITTED = 1 << 1, 36 | // KV_LOG_GC = 1 << 2, 37 | // KV_LOG_INSERT = 1 << 3, 38 | // }; 39 | 40 | enum KVLogOp { 41 | KV_OP_INSERT = 1, 42 | KV_OP_UPDATE, 43 | KV_OP_DELETE, 44 | KV_OP_FINISH 45 | }; 46 | 47 | struct GlobalConfig { 48 | uint8_t role; 49 | uint8_t conn_type; 50 | uint32_t server_id; 51 | uint16_t udp_port; 52 | uint32_t memory_num; // 0 ~ memory_num -1 is the server id 53 | char memory_ips[16][16]; 54 | 55 | uint32_t ib_dev_id; 56 | uint32_t ib_port_id; 57 | int32_t ib_gid_idx; 58 | 59 | uint64_t server_base_addr; 60 | uint64_t server_data_len; 61 | uint64_t block_size; 62 | uint64_t subblock_size; 63 | uint64_t client_local_size; 64 | 65 | uint32_t num_replication; // 0 ~ num_replication_ - 1 is the meta replication server 66 | uint32_t num_idx_rep; 67 | uint32_t num_coroutines; 68 | 69 | uint32_t main_core_id; 70 | uint32_t poll_core_id; 71 | uint32_t bg_core_id; 72 | uint32_t gc_core_id; 73 | 74 | int is_recovery; 75 | 76 | // for master 77 | uint16_t master_port; 78 | char master_ip[16]; 79 | float miss_rate_threash; 80 | int workload_run_time; 81 | int micro_workload_num; 82 | }; 83 | 84 | struct GlobalInfo { 85 | int local_id; 86 | int num_clients; 87 | int num_memories; 88 | 89 | struct ibv_context * ctx; 90 | int port_index; 91 | int device_id; 92 | int dev_port_id; 93 | int numa_node_id; 94 | 95 | struct ibv_pd * pd; 96 | struct ibv_qp * ud_qp; 97 | 98 | int role; 99 | pthread_mutex_t lock; 100 | }; 101 | 102 | enum KVMsgType { 103 | REQ_CONNECT, 104 | REQ_ALLOC, 105 | REQ_ALLOC_SUBTABLE, 106 | REP_CONNECT, 107 | REP_ALLOC, 108 | REP_ALLOC_SUBTABLE, 109 | REQ_REGISTER, 110 | REP_REGISTER, 111 | REQ_RECOVER, 112 | REP_RECOVER, 113 | REQ_HEARTBEAT, 114 | REP_HEAETBEAT 115 | }; 116 | 117 | struct QpInfo { 118 | uint32_t qp_num; 119 | uint16_t lid; 120 | uint8_t port_num; 121 | uint8_t gid[16]; 122 | uint8_t gid_idx; 123 | }; 124 | 125 | struct MrInfo { 126 | uint64_t addr; 127 | uint32_t rkey; 128 | }; 129 | 130 | struct IbInfo { 131 | uint8_t conn_type; 132 | struct ibv_context * ib_ctx; 133 | struct ibv_pd * ib_pd; 134 | struct ibv_cq * ib_cq; 135 | struct ibv_port_attr * ib_port_attr; 136 | union ibv_gid * ib_gid; 137 | }; 138 | 139 | struct ConnInfo { 140 | struct QpInfo qp_info; 141 | struct MrInfo gc_info; 142 | }; 143 | 144 | struct KVMsg { 145 | uint16_t type; 146 | uint16_t id; 147 | union { 148 | struct ConnInfo conn_info; 149 | struct MrInfo mr_info; 150 | } body; 151 | }; 152 | 153 | enum MMBlockRole { 154 | PRIMARY, 155 | BACKUP 156 | }; 157 | 158 | struct KVLogHeader { 159 | uint8_t is_valid; 160 | uint16_t key_length; 161 | uint32_t value_length; 162 | }; 163 | 164 | struct KVLogTail { 165 | uint8_t next_addr[6]; 166 | uint8_t prev_addr[6]; 167 | uint64_t old_value; 168 | uint8_t crc; 169 | uint8_t op; 170 | }; 171 | 172 | typedef struct TagClientLogMetaInfo { 173 | uint8_t pr_server_id; 174 | uint64_t pr_log_head; 175 | uint64_t pr_log_tail; 176 | } ClientLogMetaInfo; 177 | 178 | typedef struct TagEncodedClientGCSlot { 179 | // off: 18bit 180 | // block addr: 21 * 2 181 | // len: 4bit 182 | uint64_t meta_gc_addr; 183 | } EncodedClientGCSlot; 184 | 185 | typedef struct TagClientMetaAddrInfo { 186 | uint8_t meta_info_type; 187 | uint8_t server_id_list[MAX_REP_NUM]; 188 | uint64_t addr_list[MAX_REP_NUM]; 189 | } ClientMetaAddrInfo; 190 | 191 | typedef struct TagDecodedClientGCSlot { 192 | uint64_t pr_addr; 193 | uint64_t bk_addr; 194 | uint8_t num_subblocks; 195 | } DecodedClientGCSlot; 196 | 197 | static inline uint64_t roundup_256(uint64_t len) { 198 | if (len % 256 == 0) { 199 | return len; 200 | } 201 | return (len / 256 + 1) * 256; 202 | } 203 | 204 | static inline bool log_is_valid(KVLogHeader * head) { 205 | return head->is_valid == true; 206 | } 207 | 208 | static inline bool log_is_committed(KVLogTail * tail) { 209 | return tail->old_value != 0; 210 | } 211 | 212 | static inline bool log_is_insert(KVLogTail * tail) { 213 | return tail->op == KV_OP_INSERT; 214 | } 215 | 216 | static inline uint64_t time_spent_us(struct timeval * st, struct timeval * et) { 217 | return (et->tv_sec - st->tv_sec) * 1000000 + (et->tv_usec - st->tv_usec); 218 | } 219 | 220 | static inline uint64_t round_up(uint64_t addr, uint32_t align) { 221 | return ((addr) + align - 1) - ((addr + align - 1) % align); 222 | } 223 | 224 | void serialize_kvmsg(__OUT struct KVMsg * kvmsg); 225 | void deserialize_kvmsg(__OUT struct KVMsg * kvmsg); 226 | void serialize_qp_info(__OUT struct QpInfo * qp_info); 227 | void deserialize_qp_info(__OUT struct QpInfo * qp_info); 228 | void serialize_mr_info(__OUT struct MrInfo * mr_info); 229 | void deserialize_mr_info(__OUT struct MrInfo * mr_info); 230 | void serialize_conn_info(__OUT struct ConnInfo * conn_info); 231 | void deserialize_conn_info(__OUT struct ConnInfo * conn_info); 232 | 233 | int load_config(const char * fname, __OUT struct GlobalConfig * config); 234 | 235 | void encode_gc_slot(DecodedClientGCSlot * d_gc_slot, __OUT uint64_t * e_gc_slot); 236 | void decode_gc_slot(uint64_t e_gc_slot, __OUT DecodedClientGCSlot * d_gc_slot); 237 | 238 | int stick_this_thread_to_core(int core_id); 239 | 240 | uint64_t current_time_us(); 241 | 242 | void dump_lat_file(char * fname, const std::vector & lat_vec); 243 | 244 | #endif -------------------------------------------------------------------------------- /src/nm.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_NM_H 2 | #define DDCKV_NM_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "kv_utils.h" 16 | #include "ib.h" 17 | 18 | class UDPNetworkManager { 19 | private: 20 | uint32_t udp_sock_; 21 | uint16_t udp_port_; 22 | uint8_t role_; 23 | uint8_t conn_type_; 24 | struct sockaddr_in * server_addr_list_; 25 | uint32_t num_server_; 26 | uint32_t server_id_; 27 | 28 | struct ibv_context * ib_ctx_; 29 | struct ibv_pd * ib_pd_; 30 | struct ibv_cq * ib_cq_; 31 | uint8_t ib_port_num_; 32 | struct ibv_port_attr ib_port_attr_; 33 | struct ibv_device_attr ib_device_attr_; 34 | union ibv_gid * ib_gid_; 35 | std::vector rc_qp_list_; 36 | std::vector mr_info_list_; 37 | 38 | tbb::concurrent_hash_map wrid_wc_map_; 39 | 40 | volatile bool stop_polling_; 41 | 42 | 43 | int UDPCMInitClient(const struct GlobalConfig * conf); 44 | int UDPCMInitServer(const struct GlobalConfig * conf); 45 | 46 | // private methods 47 | private: 48 | struct ibv_qp * server_create_rc_qp(); 49 | struct ibv_qp * client_create_rc_qp(); 50 | int get_qp_info(struct ibv_qp * qp, __OUT struct QpInfo * qp_info); 51 | bool is_all_complete(const std::map & wr_id_comp_map); 52 | 53 | // inline public functions 54 | public: 55 | inline uint32_t get_one_server_id(uint32_t hint) { 56 | return hint % num_server_; 57 | } 58 | 59 | inline uint32_t get_server_rkey(uint8_t server_id) { 60 | return mr_info_list_[server_id]->rkey; 61 | } 62 | 63 | inline uint32_t get_server_id() { 64 | return server_id_; 65 | } 66 | 67 | inline uint32_t get_num_servers() { 68 | return num_server_; 69 | } 70 | 71 | public: 72 | UDPNetworkManager(const struct GlobalConfig * conf); 73 | ~UDPNetworkManager(); 74 | 75 | // common udp functions 76 | int nm_recv_udp_msg(__OUT struct KVMsg * kvmsg, 77 | __OUT struct sockaddr_in * src_addr, __OUT socklen_t * src_addr_len); 78 | int nm_send_udp_msg(struct KVMsg * kvmsg, struct sockaddr_in * dest_addr, 79 | socklen_t dest_addr_len); 80 | int nm_send_udp_msg_to_server(struct KVMsg * kvmsg, uint32_t server_id); 81 | void close_udp_sock(); 82 | 83 | // common ib functions 84 | void get_ib_info(__OUT struct IbInfo * ib_info); 85 | int rdma_post_send_batch_async(uint32_t server_id, struct ibv_send_wr * wr_list); 86 | int rdma_post_send_batch_sync(uint32_t server_id, struct ibv_send_wr * wr_list); 87 | int rdma_post_sr_lists_async_unsignaled(IbvSrList * sr_lists, 88 | uint32_t num_sr_lists); 89 | int rdma_post_sr_lists_sync_unsignaled(IbvSrList * sr_lists, 90 | uint32_t num_sr_lists); 91 | int rdma_post_sr_lists_sync(IbvSrList * sr_lists, uint32_t num_sr_lists, 92 | __OUT struct ibv_wc * wc); 93 | int rdma_post_sr_lists_async(IbvSrList * sr_lists, uint32_t num_sr_lists, 94 | __OUT std::map & wait_wrid_wc_map); 95 | int rdma_post_sr_list_batch_sync(std::vector & sr_list_batch, 96 | std::vector & sr_list_num_batch, __OUT struct ibv_wc * wc); 97 | int rdma_post_sr_list_batch_async(std::vector & sr_list_batch, 98 | std::vector & sr_list_num_batch, __OUT std::map & wait_wrid_wc_map); 99 | 100 | int rdma_poll_one_completion(struct ibv_wc * wc); 101 | int nm_check_completion(std::map & wrid_wc_map); 102 | int nm_poll_completion_sync(std::map & wrid_wc_map); 103 | int nm_rdma_write_inl_to_sid(void * data, uint32_t size, uint64_t remote_addr, 104 | uint32_t remote_rkey, uint32_t server_id); 105 | int nm_rdma_write_inl_to_sid_sync(void * data, uint32_t size, uint64_t remote_addr, 106 | uint32_t remote_rkey, uint32_t server_id); 107 | int nm_rdma_read_from_sid_sync(void * local_addr, uint32_t local_lkey, 108 | uint32_t size, uint64_t remote_addr, uint32_t remote_rkey, uint32_t server_id); 109 | int nm_rdma_read_from_sid(void * local_addr, uint32_t local_lkey, 110 | uint32_t size, uint64_t remote_addr, uint32_t remote_rkey, uint32_t server_id); 111 | int nm_rdma_write_to_sid(void * local_addr, uint32_t local_lkey, 112 | uint32_t size, uint64_t remote_addr, uint32_t remote_rkey, uint32_t server_id); 113 | 114 | // for server 115 | int nm_on_connect_new_qp(const struct KVMsg * request, __OUT struct QpInfo * qp_info); 116 | int nm_on_connect_connect_qp(uint32_t client_id, 117 | const struct QpInfo * local_qp_info, 118 | const struct QpInfo * remote_qp_info); 119 | 120 | // for client 121 | int client_connect_all_rc_qp(); 122 | int client_connect_one_rc_qp(uint32_t server_id, __OUT struct MrInfo * mr_info); 123 | int client_connect_one_rc_qp(uint32_t server_id); 124 | 125 | // for polling thread 126 | void nm_fiber_polling(); 127 | void nm_thread_polling(); 128 | void stop_polling(); 129 | }; 130 | 131 | typedef struct TagNMPollingThreadArgs { 132 | UDPNetworkManager * nm; 133 | int core_id; 134 | } NMPollingThreadArgs; 135 | 136 | void * nm_polling_thread(void * args); 137 | void * nm_polling_fiber(void * args); 138 | 139 | #endif -------------------------------------------------------------------------------- /src/server.cc: -------------------------------------------------------------------------------- 1 | #include "server.h" 2 | 3 | #include 4 | 5 | #include "kv_utils.h" 6 | #include "kv_debug.h" 7 | 8 | void * server_main(void * server_main_args) { 9 | ServerMainArgs * args = (ServerMainArgs *)server_main_args; 10 | Server * server_instance = args->server; 11 | 12 | // stick to a core 13 | int ret = stick_this_thread_to_core(args->core_id); 14 | // assert(ret == 0); 15 | // print_log(DEBUG, "server is running on core: %d", args->core_id); 16 | 17 | // start working 18 | return server_instance->thread_main(); 19 | } 20 | 21 | Server::Server(const struct GlobalConfig * conf) { 22 | server_id_ = conf->server_id; 23 | need_stop_ = 0; 24 | 25 | nm_ = new UDPNetworkManager(conf); 26 | 27 | struct IbInfo ib_info; 28 | nm_->get_ib_info(&ib_info); 29 | mm_ = new ServerMM(conf->server_base_addr, conf->server_data_len, 30 | conf->block_size, &ib_info, conf); 31 | } 32 | 33 | Server::~Server() { 34 | delete mm_; 35 | delete nm_; 36 | } 37 | 38 | int Server::server_on_connect(const struct KVMsg * request, 39 | struct sockaddr_in * src_addr, 40 | socklen_t src_addr_len) { 41 | int rc = 0; 42 | struct KVMsg reply; 43 | memset(&reply, 0, sizeof(struct KVMsg)); 44 | 45 | reply.id = server_id_; 46 | reply.type = REP_CONNECT; 47 | rc = nm_->nm_on_connect_new_qp(request, &reply.body.conn_info.qp_info); 48 | // assert(rc == 0); 49 | 50 | rc = mm_->get_mr_info(&reply.body.conn_info.gc_info); 51 | // assert(rc == 0); 52 | 53 | serialize_kvmsg(&reply); 54 | 55 | rc = nm_->nm_send_udp_msg(&reply, src_addr, src_addr_len); 56 | // assert(rc == 0); 57 | 58 | deserialize_kvmsg(&reply); 59 | rc = nm_->nm_on_connect_connect_qp(request->id, &reply.body.conn_info.qp_info, &request->body.conn_info.qp_info); 60 | // assert(rc == 0); 61 | return 0; 62 | } 63 | 64 | int Server::server_on_alloc(const struct KVMsg * request, struct sockaddr_in * src_addr, 65 | socklen_t src_addr_len) { 66 | uint64_t alloc_addr = mm_->mm_alloc(); 67 | // assert(mmblock != NULL); 68 | // print_log(DEBUG, "allocated addr: %lx", mmblock->addr); 69 | // assert((mmblock->addr & 0x3FFFFFF) == 0); 70 | 71 | struct KVMsg reply; 72 | memset(&reply, 0, sizeof(struct KVMsg)); 73 | reply.type = REP_ALLOC; 74 | reply.id = nm_->get_server_id(); 75 | reply.body.mr_info.rkey = mm_->get_rkey(); 76 | if (alloc_addr != 0) { 77 | reply.body.mr_info.addr = alloc_addr; 78 | } else { 79 | printf("server no space\n"); 80 | reply.body.mr_info.addr = 0; 81 | } 82 | serialize_kvmsg(&reply); 83 | 84 | int ret = nm_->nm_send_udp_msg(&reply, src_addr, src_addr_len); 85 | // assert(ret == 0); 86 | 87 | return 0; 88 | } 89 | 90 | int Server::server_on_alloc_subtable(const struct KVMsg * request, struct sockaddr_in * src_addr, 91 | socklen_t src_addr_len) { 92 | uint64_t subtable_addr = mm_->mm_alloc_subtable(); 93 | // assert(subtable_addr != 0); 94 | // print_log(DEBUG, "alloc subtable: %lx", subtable_addr); 95 | // assert((subtable_addr & 0xFF) == 0); 96 | 97 | struct KVMsg reply; 98 | memset(&reply, 0, sizeof(struct KVMsg)); 99 | reply.type = REP_ALLOC_SUBTABLE; 100 | reply.id = nm_->get_server_id(); 101 | reply.body.mr_info.addr = subtable_addr; 102 | reply.body.mr_info.rkey = mm_->get_rkey(); 103 | serialize_kvmsg(&reply); 104 | int ret = nm_->nm_send_udp_msg(&reply, src_addr, src_addr_len); 105 | // assert(ret == 0); 106 | return 0; 107 | } 108 | 109 | void * Server::thread_main() { 110 | struct sockaddr_in client_addr; 111 | socklen_t client_addr_len = sizeof(struct sockaddr_in); 112 | struct KVMsg request; 113 | int rc = 0; 114 | while (!need_stop_) { 115 | rc = nm_->nm_recv_udp_msg(&request, &client_addr, &client_addr_len); 116 | if (rc && need_stop_) { 117 | break; 118 | } else if (rc) { 119 | continue; 120 | } 121 | // assert(rc == 0); 122 | deserialize_kvmsg(&request); 123 | 124 | if (request.type == REQ_CONNECT) { 125 | rc = server_on_connect(&request, &client_addr, client_addr_len); 126 | // assert(rc == 0); 127 | } else if (request.type == REQ_ALLOC_SUBTABLE) { 128 | rc = server_on_alloc_subtable(&request, &client_addr, client_addr_len); 129 | // assert(rc == 0); 130 | } else { 131 | // assert(request.type == REQ_ALLOC); 132 | rc = server_on_alloc(&request, &client_addr, client_addr_len); 133 | // assert(rc == 0); 134 | } 135 | } 136 | return NULL; 137 | } 138 | 139 | void Server::stop() { 140 | need_stop_ = 1; 141 | } 142 | 143 | uint64_t Server::get_kv_area_addr() { 144 | return mm_->get_kv_area_addr(); 145 | } 146 | 147 | uint64_t Server::get_subtable_st_addr() { 148 | return mm_->get_subtable_st_addr(); 149 | } -------------------------------------------------------------------------------- /src/server.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_SERVER_H_ 2 | #define DDCKV_SERVER_H_ 3 | 4 | #include "nm.h" 5 | #include "server_mm.h" 6 | #include "kv_utils.h" 7 | 8 | class Server { 9 | uint32_t server_id_; 10 | volatile uint8_t need_stop_; 11 | UDPNetworkManager * nm_; 12 | ServerMM * mm_; 13 | 14 | public: 15 | Server(const struct GlobalConfig * conf); 16 | ~Server(); 17 | 18 | int server_on_connect(const struct KVMsg * request, 19 | struct sockaddr_in * src_addr, socklen_t src_addr_len); 20 | int server_on_alloc(const struct KVMsg * request, 21 | struct sockaddr_in * src_addr, socklen_t src_addr_len); 22 | int server_on_alloc_subtable(const struct KVMsg * request, 23 | struct sockaddr_in * src_addr, socklen_t src_addr_len); 24 | 25 | void * thread_main(); 26 | 27 | void stop(); 28 | 29 | // for testing 30 | uint64_t get_kv_area_addr(); 31 | uint64_t get_subtable_st_addr(); 32 | }; 33 | 34 | typedef struct TagServerMainArgs { 35 | Server * server; 36 | int core_id; 37 | } ServerMainArgs; 38 | 39 | void * server_main(void * server_main_args); 40 | 41 | #endif -------------------------------------------------------------------------------- /src/server_mm.cc: -------------------------------------------------------------------------------- 1 | #include "server_mm.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "kv_debug.h" 8 | 9 | #define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) 10 | #define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) 11 | 12 | ServerMM::ServerMM(uint64_t server_base_addr, uint64_t base_len, 13 | uint32_t block_size, const struct IbInfo * ib_info, 14 | const struct GlobalConfig * conf) { 15 | this->block_size_ = block_size; 16 | this->base_addr_ = server_base_addr; 17 | this->base_len_ = base_len; 18 | int port_flag = PROT_READ | PROT_WRITE; 19 | int mm_flag = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB | MAP_HUGE_2MB; 20 | this->data_ = mmap((void *)this->base_addr_, this->base_len_, port_flag, mm_flag, -1, 0); 21 | // assert((uint64_t)this->data_ == this->base_addr_); 22 | 23 | client_meta_area_off_ = 0; 24 | client_meta_area_len_ = META_AREA_LEN; 25 | client_gc_area_off_ = this->client_meta_area_len_; 26 | client_gc_area_len_ = GC_AREA_LEN; 27 | client_hash_area_off_ = this->client_gc_area_off_ + this->client_gc_area_len_; 28 | client_hash_area_len_ = HASH_AREA_LEN; 29 | client_kv_area_off_ = this->client_hash_area_off_ + this->client_hash_area_len_; 30 | client_kv_area_off_ = round_up(client_kv_area_off_, block_size_); 31 | client_kv_area_len_ = base_len_ - client_kv_area_off_; 32 | client_kv_area_limit_ = base_len_ + base_addr_; 33 | printf("kv_area_addr: %lx, block_size: %x\n", client_kv_area_off_, block_size_); 34 | 35 | //init hash index 36 | init_hashtable(); 37 | 38 | int access_flag = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | 39 | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; 40 | this->mr_ = ibv_reg_mr(ib_info->ib_pd, this->data_, this->base_len_, access_flag); 41 | // print_log(DEBUG, "addr %lx rkey %x", mr_->addr, mr_->rkey); 42 | 43 | num_memory_ = conf->memory_num; 44 | num_replication_ = conf->num_replication; 45 | my_sid_ = conf->server_id; 46 | printf("my_sid_: %d, num_memory_: %d\n", my_sid_, num_memory_); 47 | 48 | // init blocks 49 | num_blocks_ = client_kv_area_len_ / block_size_; 50 | get_allocable_blocks(); 51 | } 52 | 53 | ServerMM::~ServerMM() { 54 | munmap(data_, this->base_len_); 55 | } 56 | 57 | void ServerMM::get_allocable_blocks() { 58 | uint64_t kv_area_addr = base_addr_ + client_kv_area_off_; 59 | std::vector mn_addr_ptr; 60 | for (int i = 0; i < num_memory_; i ++) 61 | mn_addr_ptr.push_back(kv_area_addr); 62 | 63 | uint32_t num_rep_blocks = (num_blocks_ * num_memory_) / num_replication_; 64 | printf("num_rep_blocks: %d, num_blocks: %d, limit: %lx\n", 65 | num_rep_blocks, num_blocks_, client_kv_area_limit_); 66 | 67 | uint32_t block_cnt = 0; 68 | while (block_cnt < num_rep_blocks) { 69 | uint32_t st_sid = block_cnt % num_memory_; 70 | while (mn_addr_ptr[st_sid] == client_kv_area_limit_) 71 | st_sid = (st_sid + 1) % num_memory_; 72 | 73 | uint64_t addr_list[num_replication_]; 74 | for (int i = 0; i < num_replication_; i ++) { 75 | uint8_t sid = (st_sid + i) % num_memory_; 76 | 77 | if (mn_addr_ptr[sid] >= client_kv_area_limit_) { 78 | printf("Error addr map %d %d %d\n", block_cnt, sid, st_sid); 79 | for (int j = 0; j < num_memory_; j ++) 80 | printf("server: %lx\n", mn_addr_ptr[j]); 81 | exit(1); 82 | } 83 | if (mn_addr_ptr[sid] & 0xFF != 0) { 84 | printf("Error addr map addr\n"); 85 | exit(1); 86 | } 87 | 88 | addr_list[i] = mn_addr_ptr[sid]; 89 | mn_addr_ptr[sid] += block_size_; 90 | } 91 | if (st_sid == my_sid_) { 92 | allocable_blocks_.push(addr_list[0]); 93 | } 94 | block_cnt ++; 95 | } 96 | } 97 | 98 | uint64_t ServerMM::mm_alloc() { 99 | if (allocable_blocks_.size() == 0) { 100 | return 0; 101 | } 102 | 103 | uint64_t ret_addr = allocable_blocks_.front(); 104 | allocable_blocks_.pop(); 105 | allocated_blocks_[ret_addr] = true; 106 | return ret_addr; 107 | } 108 | 109 | int ServerMM::mm_free(uint64_t st_addr) { 110 | if (allocated_blocks_[st_addr] != true) 111 | return -1; 112 | 113 | allocated_blocks_[st_addr] = false; 114 | allocable_blocks_.push(st_addr); 115 | return 0; 116 | } 117 | 118 | uint64_t ServerMM::mm_alloc_subtable() { 119 | int ret = 0; 120 | uint64_t subtable_st_addr = base_addr_ + client_hash_area_off_ + roundup_256(ROOT_RES_LEN); 121 | for (size_t i = 0; i < subtable_alloc_map_.size(); i ++) { 122 | if (subtable_alloc_map_[i] == 0) { 123 | subtable_alloc_map_[i] = 1; 124 | return subtable_st_addr + i * roundup_256(SUBTABLE_LEN); 125 | } 126 | } 127 | return 0; 128 | } 129 | 130 | uint32_t ServerMM::get_rkey() { 131 | return this->mr_->rkey; 132 | } 133 | 134 | int ServerMM::get_client_gc_info(uint32_t client_id, __OUT struct MrInfo * mr_info) { 135 | uint64_t single_gc_len = 1024 * 1024; 136 | uint64_t client_gc_off = client_id * single_gc_len; 137 | if (client_gc_off + single_gc_len >= this->client_gc_area_len_) { 138 | return -1; 139 | } 140 | mr_info->addr = this->client_gc_area_off_ + client_gc_off + this->base_addr_; 141 | mr_info->rkey = this->mr_->rkey; 142 | return 0; 143 | } 144 | 145 | int ServerMM::get_mr_info(__OUT struct MrInfo * mr_info) { 146 | mr_info->addr = this->base_addr_; 147 | mr_info->rkey = this->mr_->rkey; 148 | return 0; 149 | } 150 | 151 | int ServerMM::init_root(void * root_addr) { 152 | RaceHashRoot * root = (RaceHashRoot *)root_addr; 153 | root->global_depth = RACE_HASH_GLOBAL_DEPTH; 154 | root->init_local_depth = RACE_HASH_INIT_LOCAL_DEPTH; 155 | root->max_global_depth = RACE_HASH_MAX_GLOBAL_DEPTH; 156 | root->prefix_num = 1 << RACE_HASH_MAX_GLOBAL_DEPTH; 157 | root->subtable_res_num = root->prefix_num; 158 | root->subtable_init_num = RACE_HASH_INIT_SUBTABLE_NUM; 159 | root->subtable_hash_range = RACE_HASH_ADDRESSABLE_BUCKET_NUM; 160 | root->subtable_bucket_num = RACE_HASH_SUBTABLE_BUCKET_NUM; 161 | root->seed = rand(); 162 | root->root_offset = client_hash_area_off_; 163 | root->subtable_offset = root->root_offset + roundup_256(ROOT_RES_LEN); 164 | root->kv_offset = client_kv_area_off_; 165 | root->kv_len = client_kv_area_len_; 166 | root->lock = 0; 167 | 168 | return 0; 169 | } 170 | 171 | int ServerMM::init_subtable(void * subtable_addr) { 172 | // RaceHashBucket * bucket = (RaceHashBucket *)subtable_addr; 173 | uint64_t max_subtables = (base_addr_ + client_hash_area_off_ + client_hash_area_len_ - (uint64_t)subtable_addr) / roundup_256(SUBTABLE_LEN); 174 | 175 | subtable_alloc_map_.resize(max_subtables); 176 | for (int i = 0; i < max_subtables; i ++) { 177 | uint64_t cur_subtable_addr = (uint64_t)subtable_addr + i * roundup_256(SUBTABLE_LEN); 178 | subtable_alloc_map_[i] = 0; 179 | for (int j = 0; j < RACE_HASH_ADDRESSABLE_BUCKET_NUM; j ++) { 180 | RaceHashBucket * bucket = (RaceHashBucket *)cur_subtable_addr + j; 181 | bucket->local_depth = RACE_HASH_INIT_LOCAL_DEPTH; 182 | bucket->prefix = i; 183 | bucket ++; 184 | } 185 | } 186 | 187 | return 0; 188 | } 189 | 190 | int ServerMM::init_hashtable() { 191 | uint64_t root_addr = base_addr_ + client_hash_area_off_; 192 | uint64_t subtable_st_addr = get_subtable_st_addr(); 193 | init_root((void *)(root_addr)); 194 | init_subtable((void *)(subtable_st_addr)); 195 | return 0; 196 | } 197 | 198 | uint64_t ServerMM::get_kv_area_addr() { 199 | return client_kv_area_off_ + base_addr_; 200 | } 201 | 202 | uint64_t ServerMM::get_subtable_st_addr() { 203 | return client_hash_area_off_ + base_addr_ + roundup_256(ROOT_RES_LEN); 204 | } -------------------------------------------------------------------------------- /src/server_mm.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_SERVER_MM_ 2 | #define DDCKV_SERVER_MM_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "kv_utils.h" 12 | #include "hashtable.h" 13 | 14 | class ServerMM { 15 | private: 16 | uint16_t my_sid_; 17 | 18 | uint64_t base_addr_; 19 | uint64_t base_len_; 20 | uint64_t client_meta_area_off_; 21 | uint64_t client_meta_area_len_; 22 | uint64_t client_gc_area_off_; 23 | uint64_t client_gc_area_len_; 24 | uint64_t client_hash_area_off_; 25 | uint64_t client_hash_area_len_; 26 | uint64_t client_kv_area_off_; 27 | uint64_t client_kv_area_len_; 28 | uint64_t client_kv_area_limit_; 29 | 30 | uint32_t num_memory_; 31 | uint32_t num_replication_; 32 | 33 | uint32_t block_size_; 34 | uint32_t num_blocks_; 35 | struct ibv_mr * mr_; 36 | #ifdef SERVER_MM 37 | uint64_t next_free_block_addr_; 38 | #endif 39 | 40 | std::vector subtable_alloc_map_; 41 | std::queue allocable_blocks_; 42 | std::unordered_map allocated_blocks_; 43 | 44 | void * data_; 45 | 46 | // private methods 47 | private: 48 | //init hash table index stored at client_hash_area_off_ 49 | int init_root(void * root_addr); 50 | int init_subtable(void * subtable_addr); 51 | int init_hashtable(); 52 | void get_allocable_blocks(); 53 | 54 | public: 55 | ServerMM(uint64_t server_base_addr, uint64_t base_len, 56 | uint32_t block_size, const struct IbInfo * ib_info, 57 | const struct GlobalConfig * conf); 58 | ~ServerMM(); 59 | 60 | uint64_t mm_alloc(); 61 | 62 | int mm_free(uint64_t st_addr); 63 | 64 | uint64_t mm_alloc_subtable(); 65 | 66 | uint32_t get_rkey(); 67 | 68 | int get_client_gc_info(uint32_t client_id, __OUT struct MrInfo * mr_info); 69 | int get_mr_info(__OUT struct MrInfo * mr_info); 70 | 71 | uint64_t get_kv_area_addr(); 72 | uint64_t get_subtable_st_addr(); 73 | }; 74 | 75 | #endif -------------------------------------------------------------------------------- /src/spinlock.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_SPINLOCK_H_ 2 | #define DDCKV_SPINLOCK_H_ 3 | 4 | #define barrier() asm volatile("": : :"memory") 5 | 6 | #define _cpu_relax() asm volatile("pause\n": : :"memory") 7 | 8 | static inline unsigned short xchg_8(void * ptr, unsigned char x) { 9 | __asm__ __volatile__("xchgb %0,%1" 10 | :"=r" (x) 11 | :"m" (*(volatile unsigned char *)ptr), "0" (x) 12 | :"memory"); 13 | return x; 14 | } 15 | 16 | #define BUSY 1 17 | typedef unsigned char spinlock_t; 18 | 19 | #define SPINLOCK_INITIALIZER 0 20 | 21 | static inline void spin_lock(spinlock_t * lock) { 22 | while (1) { 23 | if (!xchg_8(lock, BUSY)) 24 | return; 25 | 26 | while (*lock) _cpu_relax(); 27 | } 28 | } 29 | 30 | static inline void spin_unlock(spinlock_t * lock) { 31 | barrier(); 32 | *lock = 0; 33 | } 34 | 35 | static inline int spin_trylock(spinlock_t * lock) { 36 | return xchg_8(lock, BUSY); 37 | } 38 | 39 | #endif -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(GTest REQUIRED) 2 | 3 | include_directories(${GTEST_INCLUDE_DIRS}) 4 | 5 | add_executable(test_nm test_nm.cc ddckv_test.cc) 6 | add_executable(test_kv_utils test_kv_utils.cc ddckv_test.cc) 7 | # add_executable(test_server test_server.cc ddckv_test.cc) 8 | add_executable(test_client_server test_client_server.cc) 9 | add_executable(test_client_client test_client_client.cc) 10 | add_executable(test_remote_nm test_remote_nm.cc ddckv_test.cc) 11 | add_executable(client_kv_shell client_kv_shell.cc) 12 | add_executable(test_mm test_mm.cc ddckv_test.cc) 13 | 14 | target_link_libraries(test_remote_nm 15 | ${GTEST_BOTH_LIBRARIES} 16 | libddckv 17 | pthread 18 | ibverbs 19 | ) 20 | 21 | target_link_libraries(test_kv_utils 22 | ${GTEST_BOTH_LIBRARIES} 23 | libddckv 24 | pthread 25 | ) 26 | 27 | target_link_libraries(test_nm 28 | ${GTEST_BOTH_LIBRARIES} 29 | libddckv 30 | pthread 31 | ibverbs 32 | ) 33 | 34 | # target_link_libraries(test_server 35 | # ${GTEST_BOTH_LIBRARIES} 36 | # libddckv 37 | # pthread 38 | # ibverbs 39 | # ) 40 | 41 | target_link_libraries(test_client_server 42 | libddckv 43 | pthread 44 | ibverbs 45 | ) 46 | 47 | target_compile_options( 48 | test_client_client 49 | PRIVATE 50 | # ${CMAKE_CXX_FLAGS_DEBUG} 51 | # "-g" 52 | # "-D_DEBUG" 53 | ) 54 | 55 | target_link_libraries(test_client_client 56 | ${GTEST_BOTH_LIBRARIES} 57 | libddckv 58 | pthread 59 | ibverbs 60 | ) 61 | 62 | target_link_libraries(client_kv_shell 63 | libddckv 64 | pthread 65 | ibverbs 66 | ) 67 | 68 | target_link_libraries(test_mm 69 | ${GTEST_BOTH_LIBRARIES} 70 | libddckv 71 | pthread 72 | ibverbs 73 | ) 74 | 75 | gtest_discover_tests( 76 | test_nm 77 | test_remote_nm 78 | test_kv_utils 79 | # test_server 80 | test_client_client 81 | test_mm 82 | ) -------------------------------------------------------------------------------- /tests/client_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "role": "CLIENT", 3 | "conn_type": "IB", 4 | "server_id": 3, 5 | "udp_port": 2333, 6 | "memory_num": 3, 7 | "memory_ips": [ 8 | "10.10.10.1", 9 | "10.10.10.2", 10 | "10.10.10.3" 11 | ], 12 | "ib_dev_id": 0, 13 | "ib_port_id": 1, 14 | "ib_gid_idx": 0, 15 | 16 | "server_base_addr": "0x10000000", 17 | "server_data_len": 2147483648, 18 | "block_size": 67108864, 19 | "subblock_size": 256, 20 | "client_local_size": 1073741824, 21 | 22 | "num_replication": 3, 23 | "num_idx_rep": 1, 24 | "num_coroutines": 8, 25 | "miss_rate_threash": 0.1, 26 | 27 | "main_core_id": 0, 28 | "poll_core_id": 1, 29 | "bg_core_id": 2, 30 | "gc_core_id": 3 31 | } 32 | -------------------------------------------------------------------------------- /tests/client_kv_shell.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "client.h" 7 | 8 | #define KV_KEYLEN_LIMIT 64 9 | 10 | enum ReqType { 11 | INSERT, 12 | SEARCH, 13 | UPDATE, 14 | DELETE 15 | }; 16 | 17 | typedef struct TagClientCmd { 18 | ReqType cmdType; 19 | char key[KV_KEYLEN_LIMIT]; 20 | uint64_t klen; 21 | int64_t value; 22 | uint64_t vlen; 23 | } ClientCmd; 24 | 25 | static int checkNumber(char * str) { 26 | for (int i = 0; i < strlen(str); i++) { 27 | if (str[i] < '0' || str[i] > '9') { 28 | return -1; 29 | } 30 | } 31 | return 0; 32 | } 33 | 34 | static void usage() { 35 | printf("==== Usage ====\n"); 36 | printf("put key value\n"); 37 | printf("get key\n"); 38 | printf("del key\n"); 39 | printf("===============\n"); 40 | } 41 | 42 | int parseInput(char * buf, __OUT ClientCmd * cmd) { 43 | int ret = -1; 44 | char * p = strtok(buf, " "); 45 | char * parsed[3]; // parsed[0]: cmdName, parsed[1]: key, parsed[2]: value 46 | // fetch key and value to the parsed 47 | for (int i = 0; i < 3; i++) { 48 | parsed[i] = p; 49 | p = strtok(NULL, " "); 50 | } 51 | 52 | // parse commands 53 | if (!strcmp(parsed[0], "search") || !strcmp(parsed[0], "SEARCH")) { 54 | // check if parsed[1] exists 55 | if (parsed[1] == NULL) { 56 | printf("Usage: %s key\n", parsed[0]); 57 | return -1; 58 | } 59 | // check if the length of the key exceeds the limit 60 | int klen = strlen(parsed[1]); 61 | if (klen > KV_KEYLEN_LIMIT) { 62 | printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT); 63 | return -1; 64 | } 65 | // copy the key to the ClientCmd 66 | memcpy(cmd->key, parsed[1], klen); 67 | // set other arguments 68 | cmd->cmdType = SEARCH; 69 | cmd->klen = klen; 70 | cmd->vlen = 0; 71 | return 0; // return success here 72 | } else if (!strcmp(parsed[0], "insert") || !strcmp(parsed[0], "INSERT")) { 73 | if (parsed[1] == NULL || parsed[2] == NULL) { 74 | printf("Usage: %s key value\n", parsed[0]); 75 | return -1; 76 | } 77 | // check if the length of the key exceeds the limit 78 | int klen = strlen(parsed[1]); 79 | if (klen > KV_KEYLEN_LIMIT) { 80 | printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT); 81 | return -1; 82 | } 83 | // check if the second argument is a number 84 | ret = checkNumber(parsed[2]); 85 | if (ret < 0) { 86 | printf("Error: value should be an integer number\n"); 87 | return -1; 88 | } 89 | // set cmd 90 | cmd->cmdType = INSERT; 91 | memcpy(cmd->key, parsed[1], klen); 92 | cmd->klen = klen; 93 | cmd->value = atoll(parsed[2]); 94 | cmd->vlen = sizeof(int64_t); 95 | return 0; 96 | } else if (!strcmp(parsed[0], "update") || !strcmp(parsed[0], "UPDATE")) { 97 | if (parsed[1] == NULL || parsed[2] == NULL) { 98 | printf("Usage: %s key value\n", parsed[0]); 99 | return -1; 100 | } 101 | // check if the length of the key exceeds the limit 102 | int klen = strlen(parsed[1]); 103 | if (klen > KV_KEYLEN_LIMIT) { 104 | printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT); 105 | return -1; 106 | } 107 | // check if the second argument is a number 108 | ret = checkNumber(parsed[2]); 109 | if (ret < 0) { 110 | printf("Error: value should be an integer number\n"); 111 | return -1; 112 | } 113 | // set cmd 114 | cmd->cmdType = UPDATE; 115 | memcpy(cmd->key, parsed[1], klen); 116 | cmd->klen = klen; 117 | cmd->value = atoll(parsed[2]); 118 | cmd->vlen = sizeof(int64_t); 119 | return 0; 120 | } else if (!strcmp(parsed[0], "delete") || !strcmp(parsed[0], "DELETE")) { 121 | // check if the key exists 122 | if (parsed[1] == NULL) { 123 | printf("Usage: %s key\n", parsed[0]); 124 | return -1; 125 | } 126 | // check if the length of the key exceeds the limit 127 | int klen = strlen(parsed[1]); 128 | if (klen > KV_KEYLEN_LIMIT) { 129 | printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT); 130 | return -1; 131 | } 132 | // set cmd 133 | cmd->cmdType = DELETE; 134 | memcpy(cmd->key, parsed[1], klen); 135 | cmd->klen = klen; 136 | return 0; // return success here 137 | } else if (!strcmp(parsed[0], "quit") || !strcmp(parsed[0], "q") || !strcmp(parsed[0], "exit")) { 138 | exit(0); 139 | } else if (!strcmp(parsed[0], "help")) { 140 | usage(); 141 | return 0; 142 | } 143 | else { 144 | // no match cmd 145 | printf("Error: command not supported\n"); 146 | return -1; 147 | } 148 | return -1; 149 | } 150 | 151 | typedef struct TagRetVal { 152 | union { 153 | int ret_code; 154 | void * val_addr; 155 | } ret_val; 156 | } RetVal; 157 | 158 | static RetVal * clientShellExe(Client * client, ClientCmd * cmd) { 159 | RetVal * ret_val = (RetVal *)malloc(sizeof(RetVal)); 160 | char buf[17] = {0}; 161 | 162 | void * client_local_addr = client->get_local_buf_mr()->addr; 163 | uint64_t client_input_addr = (uint64_t)client->get_input_buf(); 164 | 165 | KVReqCtx ctx; 166 | KVInfo kv_info; 167 | memset((void *)client_input_addr, 0, 1024); 168 | memcpy((void *)(client_input_addr + sizeof(KVLogHeader)), cmd->key, cmd->klen); 169 | memcpy((void *)(client_input_addr + sizeof(KVLogHeader) + cmd->klen), &cmd->value, cmd->vlen); 170 | KVLogHeader * header = (KVLogHeader *)client_input_addr; 171 | header->key_length = cmd->klen; 172 | header->value_length = cmd->vlen; 173 | header->is_valid = true; 174 | 175 | KVLogTail * tail = (KVLogTail *)((uint64_t)client_input_addr 176 | + sizeof(KVLogHeader) + header->key_length + header->value_length); 177 | 178 | kv_info.key_len = cmd->klen; 179 | kv_info.l_addr = (void *)client_input_addr; 180 | kv_info.lkey = client->get_input_buf_lkey(); 181 | kv_info.value_len = cmd->vlen; 182 | 183 | ctx.kv_info = &kv_info; 184 | ctx.coro_id = 0; 185 | ctx.use_cache = true; 186 | ctx.lkey = client->get_local_buf_mr()->lkey; 187 | 188 | switch (cmd->cmdType) { 189 | case SEARCH: 190 | ctx.req_type = KV_REQ_SEARCH; 191 | printf("searching\n"); 192 | client->init_kv_search_space(client_local_addr, &ctx); 193 | ret_val->ret_val.val_addr = client->kv_search(&ctx); 194 | break; 195 | case UPDATE: 196 | ctx.req_type = KV_REQ_UPDATE; 197 | tail->op = KV_OP_UPDATE; 198 | client->init_kv_update_space(client_local_addr, &ctx); 199 | ret_val->ret_val.ret_code = client->kv_update(&ctx); 200 | break; 201 | case DELETE: 202 | ctx.req_type = KV_REQ_DELETE; 203 | client->init_kv_delete_space(client_local_addr, &ctx); 204 | ret_val->ret_val.ret_code = client->kv_delete(&ctx); 205 | break; 206 | case INSERT: 207 | ctx.req_type = KV_REQ_INSERT; 208 | tail->op = KV_OP_INSERT; 209 | printf("inserting\n"); 210 | client->init_kv_insert_space(client_local_addr, &ctx); 211 | ret_val->ret_val.ret_code = client->kv_insert(&ctx); 212 | break; 213 | default: 214 | ret_val->ret_val.ret_code = -1; 215 | } 216 | return ret_val; 217 | } 218 | 219 | int main(int argc, char ** argv) { 220 | int ret = 0; 221 | if (argc != 2) { 222 | printf("Usage: %s path-to-config\n", argv[0]); 223 | return 1; 224 | } 225 | GlobalConfig config; 226 | ret = load_config(argv[1], &config); 227 | // assert(ret == 0); 228 | RetVal * ret_val = NULL; 229 | 230 | Client client(&config); 231 | 232 | boost::fibers::fiber polling_fb = client.start_polling_fiber(); 233 | 234 | while (1) { 235 | char buf[256]; 236 | // cmdline hint 237 | printf("mykv >> "); 238 | 239 | // get input 240 | fgets(buf, 256, stdin); 241 | printf("buf: %s\n", buf); 242 | buf[strlen(buf) - 1] = '\0'; 243 | 244 | // parse command 245 | ClientCmd cmd; 246 | ret = parseInput(buf, &cmd); 247 | if (ret < 0) { 248 | printf("parse failed\n"); 249 | continue; 250 | } 251 | 252 | // execute command 253 | ret_val = clientShellExe(&client, &cmd); 254 | if (ret < 0) { 255 | printf("%s failed\n", buf); 256 | continue; 257 | } 258 | 259 | // print result 260 | if (cmd.cmdType == SEARCH) { 261 | if (ret_val->ret_val.val_addr != NULL) 262 | printf("%ld\n", *(uint64_t *)ret_val->ret_val.val_addr); 263 | else 264 | printf("key not found\n"); 265 | } else { 266 | printf("%d\n", ret_val->ret_val.ret_code); 267 | } 268 | free(ret_val); 269 | } 270 | polling_fb.join(); 271 | } -------------------------------------------------------------------------------- /tests/ddckv_test.cc: -------------------------------------------------------------------------------- 1 | #include "ddckv_test.h" 2 | 3 | void DDCKVTest::setup_server_conf() { 4 | strcpy(server_conf_.memory_ips[0], "127.0.0.1"); 5 | server_conf_.role = SERVER; 6 | server_conf_.conn_type = IB; 7 | server_conf_.server_id = 0; 8 | server_conf_.udp_port = 2333; 9 | server_conf_.memory_num = 1; 10 | server_conf_.ib_dev_id = 0; 11 | server_conf_.ib_port_id = 1; 12 | server_conf_.ib_gid_idx = -1; 13 | server_conf_.server_base_addr = 0x10000000; 14 | server_conf_.server_data_len = 2ll * GB; 15 | server_conf_.block_size = 64ll * MB; 16 | } 17 | 18 | void DDCKVTest::setup_client_conf() { 19 | strcpy(client_conf_.memory_ips[0], "127.0.0.1"); 20 | client_conf_.role = CLIENT; 21 | client_conf_.conn_type = IB; 22 | client_conf_.server_id = 1; 23 | client_conf_.udp_port = 2333; 24 | client_conf_.memory_num = 1; 25 | client_conf_.ib_dev_id = 0; 26 | client_conf_.ib_port_id = 1; 27 | client_conf_.ib_gid_idx = -1; 28 | client_conf_.num_replication = 2; 29 | } -------------------------------------------------------------------------------- /tests/ddckv_test.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_TEST_H_ 2 | #define DDCKV_TEST_H_ 3 | 4 | #include 5 | 6 | #include "kv_utils.h" 7 | 8 | #define GB (1024ll * 1024 * 1024) 9 | #define MB (1024ll * 1024) 10 | #define KB (1024ll) 11 | 12 | class DDCKVTest : public ::testing::Test { 13 | protected: 14 | void setup_server_conf(); 15 | void setup_client_conf(); 16 | 17 | public: 18 | struct GlobalConfig server_conf_; 19 | struct GlobalConfig client_conf_; 20 | }; 21 | 22 | #endif -------------------------------------------------------------------------------- /tests/server_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "role": "SERVER", 3 | "conn_type": "IB", 4 | "server_id": 0, 5 | "udp_port": 2333, 6 | "memory_num": 3, 7 | "memory_ips": [ 8 | "10.10.10.1", 9 | "10.10.10.2", 10 | "10.10.10.3" 11 | ], 12 | "ib_dev_id": 0, 13 | "ib_port_id": 1, 14 | "ib_gid_idx": 0, 15 | 16 | "server_base_addr": "0x10000000", 17 | "server_data_len": 2147483648, 18 | "block_size": 67108864, 19 | "subblock_size": 256, 20 | "client_local_size": 1073741824, 21 | 22 | "num_replication": 3, 23 | 24 | "main_core_id": 0, 25 | "poll_core_id": 1, 26 | "bg_core_id": 2, 27 | "gc_core_id": 3 28 | } 29 | -------------------------------------------------------------------------------- /tests/test_client.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_TEST_CLIENT_H_ 2 | #define DDCKV_TEST_CLIENT_H_ 3 | 4 | #include 5 | 6 | #include "client.h" 7 | #include "kv_utils.h" 8 | 9 | class ClientTest : public ::testing::Test { 10 | protected: 11 | void SetUp() override; 12 | void TearDown() override; 13 | 14 | public: 15 | struct GlobalConfig client_conf_; 16 | Client * client_; 17 | }; 18 | 19 | #endif -------------------------------------------------------------------------------- /tests/test_client_server.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "server.h" 7 | 8 | int main(int argc, char ** argv) { 9 | if (argc != 2) { 10 | printf("Usage: %s [server_id]\n", argv[0]); 11 | return -1; 12 | } 13 | 14 | int32_t server_id = atoi(argv[1]); 15 | int32_t ret = 0; 16 | struct GlobalConfig server_conf; 17 | ret = load_config("./server_config.json", &server_conf); 18 | // assert(ret == 0); 19 | server_conf.server_id = server_id; 20 | 21 | printf("===== Starting Server %d =====\n", server_conf.server_id); 22 | Server * server = new Server(&server_conf); 23 | pthread_t server_tid; 24 | pthread_create(&server_tid, NULL, server_main, (void *)server); 25 | 26 | printf("press to exit\n"); 27 | getchar(); 28 | printf("===== Ending Server %d =====\n", server_conf.server_id); 29 | 30 | server->stop(); 31 | return 0; 32 | } -------------------------------------------------------------------------------- /tests/test_conf.json: -------------------------------------------------------------------------------- 1 | { 2 | "role": "SERVER", 3 | "conn_type": "IB", 4 | "server_id": 0, 5 | "udp_port": 2333, 6 | "memory_num": 2, 7 | "memory_ips": [ 8 | "10.10.10.1", 9 | "10.10.10.2" 10 | ], 11 | 12 | "ib_dev_id": 0, 13 | "ib_port_id": 1, 14 | 15 | "server_base_addr": "0x100000", 16 | "server_data_len": 2147483648, 17 | "block_size": 67108864, 18 | "subblock_size": 256, 19 | "client_local_size": 1073741824, 20 | 21 | "num_replication": 3 22 | } -------------------------------------------------------------------------------- /tests/test_kv_utils.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "kv_utils.h" 7 | 8 | TEST(test_kv_utils, kv_msg_conn_info) { 9 | struct KVMsg orig_kvmsg; 10 | struct KVMsg sent_kvmsg; 11 | 12 | std::default_random_engine e; 13 | 14 | orig_kvmsg.id = (uint16_t)e(); 15 | orig_kvmsg.type = REQ_CONNECT; 16 | 17 | for (int i = 0; i < 16; i ++) { 18 | orig_kvmsg.body.conn_info.qp_info.gid[i] = (uint8_t)e(); 19 | } 20 | 21 | orig_kvmsg.body.conn_info.qp_info.gid_idx = 0; 22 | orig_kvmsg.body.conn_info.qp_info.lid = (uint16_t)e(); 23 | orig_kvmsg.body.conn_info.qp_info.port_num = (uint8_t)e(); 24 | orig_kvmsg.body.conn_info.qp_info.qp_num = (uint32_t)e(); 25 | orig_kvmsg.body.conn_info.gc_info.addr = (uint64_t)e(); 26 | orig_kvmsg.body.conn_info.gc_info.rkey = (uint32_t)e(); 27 | 28 | memcpy(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg)); 29 | 30 | serialize_kvmsg(&sent_kvmsg); 31 | deserialize_kvmsg(&sent_kvmsg); 32 | 33 | int rc = memcmp(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg)); 34 | ASSERT_EQ(rc, 0); 35 | } 36 | 37 | TEST(test_kv_utils, kv_msg_alloc_info) { 38 | struct KVMsg orig_kvmsg; 39 | struct KVMsg sent_kvmsg; 40 | 41 | std::default_random_engine e; 42 | orig_kvmsg.id = e(); 43 | orig_kvmsg.type = REQ_ALLOC; 44 | orig_kvmsg.body.mr_info.addr = (uint64_t)e(); 45 | orig_kvmsg.body.mr_info.rkey = (uint32_t)e(); 46 | 47 | memcpy(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg)); 48 | 49 | serialize_kvmsg(&sent_kvmsg); 50 | deserialize_kvmsg(&sent_kvmsg); 51 | 52 | int rc = memcmp(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg)); 53 | ASSERT_EQ(rc, 0); 54 | } 55 | 56 | TEST(test_kv_utils, kv_msg_alloc_subtable_info) { 57 | struct KVMsg orig_kvmsg; 58 | struct KVMsg sent_kvmsg; 59 | 60 | std::default_random_engine e; 61 | orig_kvmsg.id = e(); 62 | orig_kvmsg.type = REQ_ALLOC_SUBTABLE; 63 | orig_kvmsg.body.mr_info.addr = (uint64_t)e(); 64 | orig_kvmsg.body.mr_info.rkey = (uint32_t)e(); 65 | 66 | memcpy(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg)); 67 | 68 | serialize_kvmsg(&sent_kvmsg); 69 | deserialize_kvmsg(&sent_kvmsg); 70 | 71 | int rc = memcmp(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg)); 72 | ASSERT_EQ(rc, 0); 73 | } 74 | 75 | TEST(test_kv_utils, load_config) { 76 | const char * config_file_name = "./test_conf.json"; 77 | struct GlobalConfig conf; 78 | int ret = load_config(config_file_name, &conf); 79 | ASSERT_TRUE(ret == 0); 80 | 81 | ASSERT_TRUE(conf.role == SERVER); 82 | ASSERT_TRUE(conf.conn_type == IB); 83 | ASSERT_TRUE(conf.server_id == 0); 84 | ASSERT_TRUE(conf.udp_port == 2333); 85 | ASSERT_TRUE(conf.memory_num == 2); 86 | ASSERT_EQ(strcmp(conf.memory_ips[0], "10.10.10.1"), 0); 87 | ASSERT_EQ(strcmp(conf.memory_ips[1], "10.10.10.2"), 0); 88 | 89 | ASSERT_TRUE(conf.ib_dev_id == 0); 90 | ASSERT_TRUE(conf.ib_port_id == 1); 91 | ASSERT_TRUE(conf.ib_gid_idx == -1); 92 | 93 | ASSERT_TRUE(conf.server_base_addr == 0x100000); 94 | ASSERT_TRUE(conf.server_data_len == 2147483648); 95 | ASSERT_TRUE(conf.block_size == 64 * 1024 * 1024); 96 | ASSERT_TRUE(conf.subblock_size == 256); 97 | ASSERT_TRUE(conf.client_local_size == 1024 * 1024 * 1024); 98 | 99 | ASSERT_TRUE(conf.num_replication == 3); 100 | } 101 | 102 | TEST(test_kv_utils, encode_gc_slot) { 103 | size_t buf_sz = 64 * 1024 * 1024; 104 | void * buf_pr = mmap((void *)0x10000000, buf_sz, PROT_READ | PROT_WRITE, 105 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1, 0); 106 | void * buf_bk = mmap((void *)0x20000000, buf_sz, PROT_READ | PROT_WRITE, 107 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1, 0); 108 | 109 | ASSERT_TRUE((uint64_t)buf_pr == 0x10000000); 110 | ASSERT_TRUE((uint64_t)buf_bk == 0x20000000); 111 | 112 | DecodedClientGCSlot orig; 113 | uint64_t blockoff = 5 * 256; 114 | orig.bk_addr = (uint64_t)buf_bk + blockoff; 115 | orig.pr_addr = (uint64_t)buf_pr + blockoff; 116 | orig.num_subblocks = 5; 117 | 118 | EncodedClientGCSlot e; 119 | encode_gc_slot(&orig, &e.meta_gc_addr); 120 | 121 | DecodedClientGCSlot d; 122 | decode_gc_slot(e.meta_gc_addr, &d); 123 | ASSERT_TRUE(d.pr_addr == orig.pr_addr); 124 | ASSERT_TRUE(d.bk_addr == orig.bk_addr); 125 | ASSERT_TRUE(d.num_subblocks == orig.num_subblocks); 126 | } -------------------------------------------------------------------------------- /tests/test_mm.cc: -------------------------------------------------------------------------------- 1 | #include "test_mm.h" 2 | 3 | #include 4 | #include 5 | 6 | void MMTest::SetUp() { 7 | int ret = 0; 8 | 9 | setup_server_conf(); 10 | setup_client_conf(); 11 | client_conf_.num_replication = 1; 12 | client_conf_.server_base_addr = 0x10000000; 13 | client_conf_.server_data_len = 2147483648; 14 | client_conf_.block_size = 67108864; 15 | client_conf_.subblock_size = 256; 16 | client_conf_.client_local_size = 1073741824; 17 | client_conf_.num_coroutines = 8; 18 | 19 | server_ = new Server(&server_conf_); 20 | pthread_create(&server_tid_, NULL, server_main, server_); 21 | 22 | client_ = new Client(&client_conf_); 23 | polling_tid_ = client_->start_polling_thread(); 24 | 25 | client_nm_ = client_->get_nm(); 26 | client_mm_ = client_->get_mm(); 27 | printf("===== Initialization finished ====\n"); 28 | } 29 | 30 | void MMTest::TearDown() { 31 | server_->stop(); 32 | pthread_join(server_tid_, NULL); 33 | client_->stop_polling_thread(); 34 | pthread_join(polling_tid_, NULL); 35 | delete server_; 36 | delete client_nm_; 37 | delete client_mm_; 38 | } 39 | 40 | TEST_F(MMTest, initialization) { 41 | ASSERT_TRUE(true); 42 | } 43 | 44 | TEST_F(MMTest, mmalloc) { 45 | ClientMMAllocCtx alloc_ctx; 46 | client_mm_->mm_alloc(1024 * 1024 * 32, client_nm_, &alloc_ctx); 47 | printf("%lx\n", alloc_ctx.addr_list[0]); 48 | } 49 | 50 | TEST_F(MMTest, mmalloc_multi) { 51 | for (int i = 0; i < 10; i ++) { 52 | ClientMMAllocCtx ctx; 53 | client_mm_->mm_alloc(1024 * 1024 * 32, client_nm_, &ctx); 54 | printf("%lx\n", ctx.addr_list[0]); 55 | } 56 | } 57 | 58 | TEST_F(MMTest, mmalloc_multi_fiber) { 59 | boost::fibers::fiber fiber_list[8]; 60 | for (int i = 0; i < 8; i ++) { 61 | boost::fibers::fiber fb([&](int coro_id) { 62 | for (int i = 0; i < 2; i ++) { 63 | ClientMMAllocCtx ctx; 64 | printf("%d: start alloc\n", coro_id); 65 | client_mm_->mm_alloc(1024 * 1024 * 32, client_nm_, &ctx); 66 | printf("%d: %lx\n", coro_id, ctx.addr_list[0]); 67 | boost::this_fiber::yield(); 68 | } 69 | }, i); 70 | fiber_list[i] = std::move(fb); 71 | } 72 | for (int i = 0; i < 8; i ++) { 73 | fiber_list[i].join(); 74 | } 75 | } -------------------------------------------------------------------------------- /tests/test_mm.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_TEST_MM_H_ 2 | #define DDCKV_TEST_MM_H_ 3 | 4 | #include 5 | 6 | #include 7 | 8 | #include "ddckv_test.h" 9 | #include "kv_utils.h" 10 | #include "client.h" 11 | #include "server.h" 12 | #include "nm.h" 13 | 14 | class MMTest : public DDCKVTest { 15 | protected: 16 | void SetUp() override; 17 | void TearDown() override; 18 | 19 | public: 20 | Server * server_; 21 | Client * client_; 22 | ClientMM * client_mm_; 23 | UDPNetworkManager * client_nm_; 24 | 25 | pthread_t server_tid_; 26 | pthread_t polling_tid_; 27 | 28 | int ib_connect(struct MrInfo * mr_info); 29 | }; 30 | 31 | #endif -------------------------------------------------------------------------------- /tests/test_nm.cc: -------------------------------------------------------------------------------- 1 | #include "test_nm.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "nm.h" 10 | #include "server.h" 11 | #include "kv_utils.h" 12 | 13 | void NMTest::SetUp() { 14 | setup_server_conf(); 15 | setup_client_conf(); 16 | server_nm_ = new UDPNetworkManager(&server_conf_); 17 | client_nm_ = new UDPNetworkManager(&client_conf_); 18 | } 19 | 20 | void NMTest::TearDown() { 21 | delete server_nm_; 22 | delete client_nm_; 23 | } 24 | 25 | int NMTest::ib_connect(struct MrInfo * mr_info) { 26 | pthread_t server_tid; 27 | int ret; 28 | pthread_create(&server_tid, NULL, ib_connect_server, server_nm_); 29 | 30 | ret = client_nm_->client_connect_one_rc_qp(0, mr_info); 31 | assert(ret == 0); 32 | 33 | pthread_join(server_tid, NULL); 34 | return 0; 35 | } 36 | 37 | SrReqCtx * NMTest::gen_sr_reqs(struct MrInfo * mr_info) { 38 | SrReqCtx * ret_ctx = (SrReqCtx *)malloc(sizeof(SrReqCtx)); 39 | for (int i = 0; i < 4; i ++) { 40 | test_source_data_[i] = (123 * i) ^ i; 41 | } 42 | 43 | for (int i = 0; i < 4; i ++) { 44 | if (i < 2) { 45 | ret_ctx->sg_list_1[i].addr = (uint64_t)&test_source_data_[i]; 46 | ret_ctx->sg_list_1[i].length = 8; 47 | ret_ctx->sg_list_1[i].lkey = 0; 48 | } else { 49 | ret_ctx->sg_list_2[i - 2].addr = (uint64_t)&test_source_data_[i]; 50 | ret_ctx->sg_list_2[i - 2].length = 8; 51 | ret_ctx->sg_list_2[i - 2].lkey = 0; 52 | } 53 | } 54 | 55 | for (int i = 0; i < 4; i ++) { 56 | if (i < 2) { 57 | ret_ctx->sr_list_1[i].wr_id = i; 58 | ret_ctx->sr_list_1[i].sg_list = &ret_ctx->sg_list_1[i]; 59 | ret_ctx->sr_list_1[i].num_sge = 1; 60 | ret_ctx->sr_list_1[i].opcode = IBV_WR_RDMA_WRITE; 61 | ret_ctx->sr_list_1[i].send_flags = IBV_SEND_INLINE; 62 | ret_ctx->sr_list_1[i].wr.rdma.remote_addr = mr_info->addr + i * sizeof(uint64_t); 63 | ret_ctx->sr_list_1[i].wr.rdma.rkey = mr_info->rkey; 64 | ret_ctx->sr_list_1[i].next = NULL; 65 | } else { 66 | ret_ctx->sr_list_2[i - 2].wr_id = i; 67 | ret_ctx->sr_list_2[i - 2].sg_list = &ret_ctx->sg_list_2[i - 2]; 68 | ret_ctx->sr_list_2[i - 2].num_sge = 1; 69 | ret_ctx->sr_list_2[i - 2].opcode = IBV_WR_RDMA_WRITE; 70 | ret_ctx->sr_list_2[i - 2].send_flags = IBV_SEND_INLINE; 71 | ret_ctx->sr_list_2[i - 2].wr.rdma.remote_addr = mr_info->addr + i * sizeof(uint64_t); 72 | ret_ctx->sr_list_2[i - 2].wr.rdma.rkey = mr_info->rkey; 73 | ret_ctx->sr_list_2[i - 2].next = NULL; 74 | } 75 | } 76 | ret_ctx->sr_list_1[0].next = &ret_ctx->sr_list_1[1]; 77 | ret_ctx->sr_list_2[0].next = &ret_ctx->sr_list_2[1]; 78 | 79 | ret_ctx->m_srl[0].num_sr = 2; 80 | ret_ctx->m_srl[0].server_id = 0; 81 | ret_ctx->m_srl[0].sr_list = ret_ctx->sr_list_1; 82 | ret_ctx->m_srl[1].num_sr = 2; 83 | ret_ctx->m_srl[1].server_id = 0; 84 | ret_ctx->m_srl[1].sr_list = ret_ctx->sr_list_2; 85 | 86 | ret_ctx->srl1.num_sr = 2; 87 | ret_ctx->srl1.server_id = 0; 88 | ret_ctx->srl1.sr_list = ret_ctx->sr_list_1; 89 | ret_ctx->srl2.num_sr = 2; 90 | ret_ctx->srl2.server_id = 0; 91 | ret_ctx->srl2.sr_list = ret_ctx->sr_list_2; 92 | 93 | return ret_ctx; 94 | } 95 | 96 | void * udp_send_recv_server(void * args) { 97 | UDPNetworkManager * nm = (UDPNetworkManager *)args; 98 | KVMsg request; 99 | struct sockaddr_in src_addr; 100 | socklen_t src_addr_len = sizeof(struct sockaddr_in); 101 | int ret = nm->nm_recv_udp_msg(&request, &src_addr, &src_addr_len); 102 | assert(ret == 0); 103 | deserialize_kvmsg(&request); 104 | assert(request.type == REQ_ALLOC); 105 | assert(request.id == 1); 106 | KVMsg reply; 107 | reply.type = REP_ALLOC; 108 | reply.id = nm->get_server_id(); 109 | serialize_kvmsg(&reply); 110 | ret = nm->nm_send_udp_msg(&reply, &src_addr, src_addr_len); 111 | assert(ret == 0); 112 | return NULL; 113 | } 114 | 115 | void * udp_send_recv_client(void * args) { 116 | UDPNetworkManager * nm = (UDPNetworkManager *)args; 117 | struct KVMsg request; 118 | request.type = REQ_ALLOC; 119 | request.id = nm->get_server_id(); 120 | serialize_kvmsg(&request); 121 | int ret = nm->nm_send_udp_msg_to_server(&request, 0); 122 | assert(ret == 0); 123 | struct KVMsg reply; 124 | ret = nm->nm_recv_udp_msg(&reply, NULL, NULL); 125 | assert(ret == 0); 126 | deserialize_kvmsg(&reply); 127 | assert(reply.id == 0); 128 | assert(reply.type == REP_ALLOC); 129 | return NULL; 130 | } 131 | 132 | void * ib_connect_server(void * args) { 133 | UDPNetworkManager * nm = (UDPNetworkManager *)args; 134 | struct KVMsg request; 135 | struct sockaddr_in client_addr; 136 | socklen_t client_addr_len = sizeof(struct sockaddr_in); 137 | int rc = nm->nm_recv_udp_msg(&request, &client_addr, &client_addr_len); 138 | assert(rc == 0); 139 | deserialize_kvmsg(&request); 140 | 141 | assert(request.type == REQ_CONNECT); 142 | assert(request.id == 1); 143 | struct KVMsg reply; 144 | reply.id = nm->get_server_id(); 145 | reply.type = REP_CONNECT; 146 | rc = nm->nm_on_connect_new_qp(&request, &reply.body.conn_info.qp_info); 147 | assert(rc == 0); 148 | 149 | struct IbInfo ib_info; 150 | nm->get_ib_info(&ib_info); 151 | void * buf = malloc(1024); 152 | struct ibv_mr * mr = ibv_reg_mr(ib_info.ib_pd, buf, 128, 153 | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ); 154 | reply.body.conn_info.gc_info.addr = (uint64_t)buf; 155 | reply.body.conn_info.gc_info.rkey = mr->rkey; 156 | serialize_kvmsg(&reply); 157 | 158 | rc = nm->nm_send_udp_msg(&reply, &client_addr, client_addr_len); 159 | assert(rc == 0); 160 | deserialize_kvmsg(&reply); 161 | rc = nm->nm_on_connect_connect_qp(request.id, &reply.body.conn_info.qp_info, 162 | &request.body.conn_info.qp_info); 163 | assert(rc == 0); 164 | return NULL; 165 | } 166 | 167 | TEST_F(NMTest, udp_send_recv) { 168 | pthread_t server_tid, client_tid; 169 | pthread_create(&server_tid, NULL, 170 | udp_send_recv_server, (void *)server_nm_); 171 | pthread_create(&client_tid, NULL, 172 | udp_send_recv_client, (void *)client_nm_); 173 | pthread_join(server_tid, NULL); 174 | pthread_join(client_tid, NULL); 175 | } 176 | 177 | TEST_F(NMTest, ib_connect) { 178 | // create server process 179 | pthread_t server_tid; 180 | int ret; 181 | pthread_create(&server_tid, NULL, ib_connect_server, (void *)server_nm_); 182 | 183 | ret = client_nm_->client_connect_all_rc_qp(); 184 | ASSERT_TRUE(ret == 0); 185 | 186 | pthread_join(server_tid, NULL); 187 | 188 | server_nm_->close_udp_sock(); 189 | client_nm_->close_udp_sock(); 190 | } 191 | 192 | TEST_F(NMTest, nm_utils) { 193 | uint32_t server_id = server_nm_->get_server_id(); 194 | uint32_t client_id = client_nm_->get_server_id(); 195 | ASSERT_TRUE(server_id == server_conf_.server_id); 196 | ASSERT_TRUE(client_id == client_conf_.server_id); 197 | } 198 | 199 | TEST_F(NMTest, ib_write_read) { 200 | struct MrInfo mr_info; 201 | int ret = ib_connect(&mr_info); 202 | ASSERT_TRUE(ret == 0); 203 | ASSERT_TRUE(mr_info.addr != 0); 204 | memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4); 205 | 206 | uint64_t test_data = 100; 207 | ret = client_nm_->nm_rdma_write_inl_to_sid(&test_data, sizeof(uint64_t), 208 | mr_info.addr, mr_info.rkey, 0); 209 | ASSERT_TRUE(ret == 0); 210 | ASSERT_TRUE(test_data == *(uint64_t *)mr_info.addr); 211 | 212 | test_data = 10101; 213 | struct IbInfo client_ib_info; 214 | client_nm_->get_ib_info(&client_ib_info); 215 | struct ibv_mr * tmp_mr = ibv_reg_mr(client_ib_info.ib_pd, 216 | &test_data, sizeof(uint64_t), IBV_ACCESS_LOCAL_WRITE); 217 | ASSERT_TRUE(tmp_mr != NULL); 218 | ret = client_nm_->nm_rdma_write_to_sid(&test_data, tmp_mr->lkey, sizeof(uint64_t), mr_info.addr, mr_info.rkey, 0); 219 | 220 | 221 | uint64_t read_data = 0; 222 | tmp_mr = ibv_reg_mr(client_ib_info.ib_pd, &read_data, sizeof(uint64_t), IBV_ACCESS_LOCAL_WRITE); 223 | ASSERT_TRUE(tmp_mr != NULL); 224 | ASSERT_EQ((uint64_t)&read_data, (uint64_t)tmp_mr->addr); 225 | 226 | ret = client_nm_->nm_rdma_read_from_sid(&read_data, 227 | tmp_mr->lkey, sizeof(uint64_t), mr_info.addr, mr_info.rkey, 0); 228 | ASSERT_TRUE(ret == 0); 229 | ASSERT_TRUE(test_data == read_data); 230 | } 231 | 232 | TEST_F(NMTest, rdma_post_sr_lists_sync_0) { 233 | struct MrInfo mr_info; 234 | int ret = ib_connect(&mr_info); 235 | ASSERT_TRUE(ret == 0); 236 | ASSERT_TRUE(mr_info.addr != 0); 237 | memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4); 238 | 239 | SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info); 240 | ASSERT_TRUE(sr_ctx != NULL); 241 | 242 | ret = client_nm_->rdma_post_sr_lists_sync(&sr_ctx->srl1, 1, NULL); 243 | ASSERT_TRUE(ret == 0); 244 | ret = client_nm_->rdma_post_sr_lists_sync(&sr_ctx->srl2, 1, NULL); 245 | ASSERT_TRUE(ret == 0); 246 | 247 | uint64_t * tar_addr = (uint64_t *)mr_info.addr; 248 | for (int i = 0; i < 4; i ++) { 249 | ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << " src: " 250 | << test_source_data_[i] << std::endl; 251 | } 252 | } 253 | 254 | TEST_F(NMTest, rdma_post_sr_lists_sync_1) { 255 | struct MrInfo mr_info; 256 | int ret = ib_connect(&mr_info); 257 | ASSERT_TRUE(ret == 0); 258 | ASSERT_TRUE(mr_info.addr != 0); 259 | memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4); 260 | 261 | SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info); 262 | ASSERT_TRUE(sr_ctx != NULL); 263 | 264 | ret = client_nm_->rdma_post_sr_lists_sync(sr_ctx->m_srl, 2, NULL); 265 | ASSERT_TRUE(ret == 0); 266 | 267 | uint64_t * tar_addr = (uint64_t *)mr_info.addr; 268 | for (int i = 0; i < 4; i ++) { 269 | ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << " src: " 270 | << test_source_data_[i] << std::endl; 271 | } 272 | } 273 | 274 | TEST_F(NMTest, poll_local) { 275 | struct MrInfo mr_info; 276 | int ret = ib_connect(&mr_info); 277 | ASSERT_TRUE(ret == 0); 278 | ASSERT_TRUE(mr_info.addr != 0); 279 | memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4); 280 | 281 | pthread_t polling_tid; 282 | pthread_create(&polling_tid, NULL, nm_polling_thread, client_nm_); 283 | 284 | SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info); 285 | ASSERT_TRUE(sr_ctx != NULL); 286 | 287 | sr_ctx->sr_list_1[1].send_flags |= IBV_SEND_SIGNALED; 288 | sr_ctx->sr_list_2[1].send_flags |= IBV_SEND_SIGNALED; 289 | 290 | ret = client_nm_->rdma_post_send_batch_async(0, sr_ctx->sr_list_1); 291 | ASSERT_TRUE(ret == 0); 292 | ret = client_nm_->rdma_post_send_batch_async(0, sr_ctx->sr_list_2); 293 | ASSERT_TRUE(ret == 0); 294 | 295 | std::map l_wait_wc_map; 296 | l_wait_wc_map[1] = NULL; 297 | l_wait_wc_map[3] = NULL; 298 | while (1) { 299 | ret = client_nm_->nm_check_completion(l_wait_wc_map); 300 | ASSERT_TRUE(ret == 0); 301 | if (ib_is_all_wrid_finished(l_wait_wc_map)) { 302 | break; 303 | } 304 | } 305 | 306 | uint64_t * tar_addr = (uint64_t *)mr_info.addr; 307 | for (int i = 0; i < 4; i ++) { 308 | ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << " src: " 309 | << test_source_data_[i] << std::endl; 310 | } 311 | 312 | client_nm_->stop_polling(); 313 | pthread_join(polling_tid, NULL); 314 | } 315 | 316 | TEST_F(NMTest, rdma_post_sr_list_batch_sync_0) { 317 | struct MrInfo mr_info; 318 | int ret = ib_connect(&mr_info); 319 | ASSERT_TRUE(ret == 0); 320 | ASSERT_TRUE(mr_info.addr != 0); 321 | memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4); 322 | 323 | SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info); 324 | ASSERT_TRUE(sr_ctx != NULL); 325 | 326 | std::vector test_batch; 327 | std::vector test_num_batch; 328 | test_batch.push_back(&sr_ctx->srl1); 329 | test_batch.push_back(&sr_ctx->srl2); 330 | test_num_batch.push_back(1); 331 | test_num_batch.push_back(1); 332 | 333 | ret = client_nm_->rdma_post_sr_list_batch_sync(test_batch, test_num_batch, NULL); 334 | ASSERT_TRUE(ret == 0); 335 | 336 | uint64_t * tar_addr = (uint64_t *)mr_info.addr; 337 | for (int i = 0; i < 4; i ++) { 338 | ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << " src: " 339 | << test_source_data_[i] << std::endl; 340 | } 341 | } 342 | 343 | TEST_F(NMTest, rdma_post_sr_list_batch_sync_1) { 344 | struct MrInfo mr_info; 345 | int ret = ib_connect(&mr_info); 346 | ASSERT_TRUE(ret == 0); 347 | ASSERT_TRUE(mr_info.addr != 0); 348 | memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4); 349 | 350 | SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info); 351 | ASSERT_TRUE(sr_ctx != NULL); 352 | 353 | std::vector test_batch; 354 | std::vector test_num_batch; 355 | test_batch.push_back(sr_ctx->m_srl); 356 | test_num_batch.push_back(2); 357 | 358 | ret = client_nm_->rdma_post_sr_list_batch_sync(test_batch, test_num_batch, NULL); 359 | ASSERT_TRUE(ret == 0); 360 | 361 | uint64_t * tar_addr = (uint64_t *)mr_info.addr; 362 | for (int i = 0; i < 4; i ++) { 363 | ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << " src: " 364 | << test_source_data_[i] << std::endl; 365 | } 366 | } -------------------------------------------------------------------------------- /tests/test_nm.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_TEST_NM_H_ 2 | #define DDCKV_TEST_NM_H_ 3 | 4 | #include 5 | 6 | #include "ddckv_test.h" 7 | #include "kv_utils.h" 8 | #include "nm.h" 9 | 10 | typedef struct TagSrReqCtx { 11 | struct ibv_send_wr sr_list_1[2]; 12 | struct ibv_send_wr sr_list_2[2]; 13 | struct ibv_sge sg_list_1[2]; 14 | struct ibv_sge sg_list_2[2]; 15 | 16 | IbvSrList srl1; 17 | IbvSrList srl2; 18 | IbvSrList m_srl[2]; 19 | } SrReqCtx; 20 | 21 | class NMTest : public DDCKVTest { 22 | protected: 23 | void SetUp() override; 24 | void TearDown() override; 25 | 26 | public: 27 | UDPNetworkManager * server_nm_; 28 | UDPNetworkManager * client_nm_; 29 | 30 | uint64_t test_source_data_[4]; 31 | 32 | int ib_connect(struct MrInfo * mr_info); 33 | SrReqCtx * gen_sr_reqs(struct MrInfo * mr_info); 34 | }; 35 | 36 | void * udp_send_recv_server(void * args); 37 | void * udp_send_recv_client(void * args); 38 | 39 | void * ib_connect_server(void * args); 40 | 41 | #endif -------------------------------------------------------------------------------- /tests/test_remote_nm.cc: -------------------------------------------------------------------------------- 1 | #include "test_remote_nm.h" 2 | 3 | #include 4 | 5 | #include "nm.h" 6 | #include "kv_utils.h" 7 | 8 | void NMRemoteTest::SetUp() { 9 | int ret = 0; 10 | ret = load_config("./client_config.json", &global_conf_); 11 | ASSERT_TRUE(ret == 0); 12 | 13 | client_nm_ = new UDPNetworkManager(&global_conf_); 14 | 15 | for (int i = 0; i < 2; i ++) { 16 | ret = client_nm_->client_connect_one_rc_qp(i, &mr_info_[i]); 17 | ASSERT_TRUE(ret == 0); 18 | } 19 | } 20 | 21 | void NMRemoteTest::TearDown() { 22 | delete client_nm_; 23 | } 24 | 25 | SrReqCtx * NMRemoteTest::gen_sr_reqs() { 26 | SrReqCtx * ret_ctx = (SrReqCtx *)malloc(sizeof(SrReqCtx)); 27 | for (int i = 0; i < 4; i ++) { 28 | test_source_data_[i] = (123 * i) ^ i; 29 | } 30 | 31 | for (int i = 0; i < 4; i ++) { 32 | if (i < 2) { 33 | ret_ctx->sg_list_1[i].addr = (uint64_t)&test_source_data_[i]; 34 | ret_ctx->sg_list_1[i].length = 8; 35 | ret_ctx->sg_list_1[i].lkey = 0; 36 | } else { 37 | ret_ctx->sg_list_2[i - 2].addr = (uint64_t)&test_source_data_[i]; 38 | ret_ctx->sg_list_2[i - 2].length = 8; 39 | ret_ctx->sg_list_2[i - 2].lkey = 0; 40 | } 41 | } 42 | 43 | for (int i = 0; i < 4; i ++) { 44 | if (i < 2) { 45 | ret_ctx->sr_list_1[i].wr_id = i; 46 | ret_ctx->sr_list_1[i].sg_list = &ret_ctx->sg_list_1[i]; 47 | ret_ctx->sr_list_1[i].num_sge = 1; 48 | ret_ctx->sr_list_1[i].opcode = IBV_WR_RDMA_WRITE; 49 | ret_ctx->sr_list_1[i].send_flags = IBV_SEND_INLINE; 50 | ret_ctx->sr_list_1[i].wr.rdma.remote_addr = mr_info_[0].addr + i * sizeof(uint64_t); 51 | ret_ctx->sr_list_1[i].wr.rdma.rkey = mr_info_[0].rkey; 52 | ret_ctx->sr_list_1[i].next = NULL; 53 | } else { 54 | ret_ctx->sr_list_2[i - 2].wr_id = i; 55 | ret_ctx->sr_list_2[i - 2].sg_list = &ret_ctx->sg_list_2[i - 2]; 56 | ret_ctx->sr_list_2[i - 2].num_sge = 1; 57 | ret_ctx->sr_list_2[i - 2].opcode = IBV_WR_RDMA_WRITE; 58 | ret_ctx->sr_list_2[i - 2].send_flags = IBV_SEND_INLINE; 59 | ret_ctx->sr_list_2[i - 2].wr.rdma.remote_addr = mr_info_[1].addr + i * sizeof(uint64_t); 60 | ret_ctx->sr_list_2[i - 2].wr.rdma.rkey = mr_info_[1].rkey; 61 | ret_ctx->sr_list_2[i - 2].next = NULL; 62 | } 63 | } 64 | ret_ctx->sr_list_1[0].next = &ret_ctx->sr_list_1[1]; 65 | ret_ctx->sr_list_2[0].next = &ret_ctx->sr_list_2[1]; 66 | 67 | ret_ctx->m_srl[0].num_sr = 2; 68 | ret_ctx->m_srl[0].server_id = 0; 69 | ret_ctx->m_srl[0].sr_list = ret_ctx->sr_list_1; 70 | ret_ctx->m_srl[1].num_sr = 2; 71 | ret_ctx->m_srl[1].server_id = 1; 72 | ret_ctx->m_srl[1].sr_list = ret_ctx->sr_list_2; 73 | 74 | ret_ctx->srl1.num_sr = 2; 75 | ret_ctx->srl1.server_id = 0; 76 | ret_ctx->srl1.sr_list = ret_ctx->sr_list_1; 77 | ret_ctx->srl2.num_sr = 2; 78 | ret_ctx->srl2.server_id = 1; 79 | ret_ctx->srl2.sr_list = ret_ctx->sr_list_2; 80 | 81 | return ret_ctx; 82 | } 83 | 84 | TEST_F(NMRemoteTest, remote_basic) { 85 | int ret = 0; 86 | uint64_t test_data = 1231241; 87 | ret = client_nm_->nm_rdma_write_inl_to_sid(&test_data, sizeof(uint64_t), 88 | mr_info_[0].addr, mr_info_[0].rkey, 0); 89 | ASSERT_TRUE(ret == 0); 90 | 91 | uint64_t read_data = 0; 92 | struct IbInfo client_ib_info; 93 | client_nm_->get_ib_info(&client_ib_info); 94 | struct ibv_mr * tmp_mr = ibv_reg_mr(client_ib_info.ib_pd, &read_data, 95 | sizeof(uint64_t), IBV_ACCESS_LOCAL_WRITE); 96 | ASSERT_TRUE(tmp_mr != NULL); 97 | 98 | ret = client_nm_->nm_rdma_read_from_sid(&read_data, tmp_mr->lkey, 99 | sizeof(uint64_t), mr_info_[0].addr, mr_info_[0].rkey, 0); 100 | ASSERT_TRUE(ret == 0); 101 | ASSERT_TRUE(test_data == read_data); 102 | } 103 | 104 | TEST_F(NMRemoteTest, rdma_post_sr_lists_async) { 105 | int ret = 0; 106 | SrReqCtx * sr_ctx = gen_sr_reqs(); 107 | ASSERT_TRUE(sr_ctx != NULL); 108 | 109 | pthread_t polling_tid; 110 | pthread_create(&polling_tid, NULL, nm_polling_thread, client_nm_); 111 | 112 | std::map l_wait_wc_map; 113 | ret = client_nm_->rdma_post_sr_lists_async(sr_ctx->m_srl, 2, l_wait_wc_map); 114 | ASSERT_TRUE(ret == 0); 115 | 116 | std::map::iterator it = l_wait_wc_map.begin(); 117 | ASSERT_TRUE(l_wait_wc_map.size() == 2); 118 | 119 | while (1) { 120 | ret = client_nm_->nm_check_completion(l_wait_wc_map); 121 | ASSERT_TRUE(ret == 0); 122 | if (ib_is_all_wrid_finished(l_wait_wc_map)) { 123 | break; 124 | } 125 | } 126 | 127 | client_nm_->stop_polling(); 128 | pthread_join(polling_tid, NULL); 129 | } 130 | 131 | TEST_F(NMRemoteTest, rdma_post_sr_list_batch_async) { 132 | int ret = 0; 133 | SrReqCtx * sr_ctx = gen_sr_reqs(); 134 | ASSERT_TRUE(sr_ctx != NULL); 135 | 136 | pthread_t polling_tid; 137 | pthread_create(&polling_tid, NULL, nm_polling_thread, client_nm_); 138 | 139 | std::vector test_batch; 140 | std::vector test_num_batch; 141 | test_batch.push_back(&sr_ctx->srl1); 142 | test_batch.push_back(&sr_ctx->srl2); 143 | test_num_batch.push_back(1); 144 | test_num_batch.push_back(1); 145 | 146 | std::map l_wait_wc_map; 147 | ret = client_nm_->rdma_post_sr_list_batch_async(test_batch, test_num_batch, l_wait_wc_map); 148 | ASSERT_TRUE(ret == 0); 149 | 150 | std::map::iterator it = l_wait_wc_map.begin(); 151 | ASSERT_TRUE(l_wait_wc_map.size() == 2); 152 | 153 | while (1) { 154 | ret = client_nm_->nm_check_completion(l_wait_wc_map); 155 | ASSERT_TRUE(ret == 0); 156 | if (ib_is_all_wrid_finished(l_wait_wc_map)) { 157 | break; 158 | } 159 | } 160 | 161 | client_nm_->stop_polling(); 162 | pthread_join(polling_tid, NULL); 163 | } -------------------------------------------------------------------------------- /tests/test_remote_nm.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_TEST_REMOTE_NM_H_ 2 | #define DDCKV_TEST_REMOTE_NM_H_ 3 | 4 | #include 5 | 6 | #include "ddckv_test.h" 7 | #include "kv_utils.h" 8 | #include "nm.h" 9 | 10 | typedef struct TagSrReqCtx { 11 | struct ibv_send_wr sr_list_1[2]; 12 | struct ibv_send_wr sr_list_2[2]; 13 | struct ibv_sge sg_list_1[2]; 14 | struct ibv_sge sg_list_2[2]; 15 | 16 | IbvSrList srl1; 17 | IbvSrList srl2; 18 | IbvSrList m_srl[2]; 19 | } SrReqCtx; 20 | 21 | class NMRemoteTest : public DDCKVTest { 22 | protected: 23 | void SetUp() override; 24 | void TearDown() override; 25 | 26 | public: 27 | struct GlobalConfig global_conf_; 28 | struct MrInfo mr_info_[2]; 29 | UDPNetworkManager * client_nm_; 30 | 31 | uint64_t test_source_data_[4]; 32 | 33 | SrReqCtx * gen_sr_reqs(); 34 | }; 35 | 36 | void * udp_send_recv_server(void * args); 37 | void * udp_send_recv_client(void * args); 38 | 39 | void * ib_connect_server(void * args); 40 | 41 | #endif -------------------------------------------------------------------------------- /tests/test_server.cc: -------------------------------------------------------------------------------- 1 | #include "test_server.h" 2 | 3 | void ServerTest::SetUp() { 4 | setup_server_conf(); 5 | setup_client_conf(); 6 | server_ = new Server(&server_conf_); 7 | client_nm_ = new UDPNetworkManager(&client_conf_); 8 | } 9 | 10 | void ServerTest::TearDown() { 11 | delete server_; 12 | } 13 | 14 | TEST_F(ServerTest, ib_connect) { 15 | pthread_t server_tid; 16 | int ret; 17 | ret = pthread_create(&server_tid, NULL, server_main, server_); 18 | ASSERT_TRUE(ret == 0); 19 | 20 | struct MrInfo mr_info; 21 | ret = client_nm_->client_connect_one_rc_qp(0, &mr_info); 22 | ASSERT_TRUE(ret == 0); 23 | ASSERT_TRUE(mr_info.addr == server_conf_.server_base_addr); 24 | 25 | uint64_t client_msg[2]; 26 | client_msg[0] = 10086; 27 | client_msg[1] = 9527; 28 | ret = client_nm_->nm_rdma_write_inl_to_sid(client_msg, sizeof(uint64_t) * 2, 29 | mr_info.addr, mr_info.rkey, 0); 30 | ASSERT_TRUE(ret == 0); 31 | 32 | ASSERT_TRUE(((uint64_t *)(mr_info.addr))[0] == client_msg[0]); 33 | ASSERT_TRUE(((uint64_t *)(mr_info.addr))[1] == client_msg[1]); 34 | 35 | server_->stop(); 36 | pthread_join(server_tid, NULL); 37 | ASSERT_TRUE(ret == 0); 38 | ASSERT_TRUE(1); 39 | } 40 | 41 | TEST_F(ServerTest, rdma_connect) { 42 | pthread_t server_tid; 43 | int ret; 44 | ret = pthread_create(&server_tid, NULL, server_main, server_); 45 | ASSERT_TRUE(ret == 0); 46 | 47 | struct MrInfo gc_info; 48 | ret = client_nm_->client_connect_one_rc_qp(0, &gc_info); 49 | ASSERT_TRUE(ret == 0); 50 | 51 | uint64_t msg = 10086; 52 | struct ibv_send_wr test_wr; 53 | struct ibv_sge test_sge; 54 | memset(&test_wr, 0, sizeof(struct ibv_send_wr)); 55 | memset(&test_sge, 0, sizeof(struct ibv_sge)); 56 | test_sge.addr = (uint64_t)&msg; 57 | test_sge.length = sizeof(uint64_t); 58 | test_sge.lkey = 0; 59 | test_wr.sg_list = &test_sge; 60 | test_wr.num_sge = 1; 61 | test_wr.next = NULL; 62 | test_wr.opcode = IBV_WR_RDMA_WRITE; 63 | test_wr.send_flags = IBV_SEND_INLINE | IBV_SEND_SIGNALED; 64 | test_wr.wr.rdma.remote_addr = gc_info.addr; 65 | test_wr.wr.rdma.rkey = gc_info.rkey; 66 | test_wr.wr_id = 10000; 67 | ret = client_nm_->rdma_post_send_batch_async(0, &test_wr); 68 | ASSERT_TRUE(ret == 0); 69 | 70 | struct ibv_wc wc; 71 | ret = client_nm_->rdma_poll_one_completion(&wc); 72 | ASSERT_TRUE(ret == 0); 73 | ASSERT_TRUE(wc.status == IBV_WC_SUCCESS); 74 | ASSERT_TRUE(wc.wr_id == 10000); 75 | 76 | msg = *(uint64_t *)(gc_info.addr); 77 | ASSERT_TRUE(msg == 10086); 78 | 79 | server_->stop(); 80 | pthread_join(server_tid, NULL); 81 | ASSERT_TRUE(ret == 0); 82 | ASSERT_TRUE(1); 83 | } 84 | 85 | TEST_F(ServerTest, alloc) { 86 | pthread_t server_tid; 87 | int ret; 88 | ret = pthread_create(&server_tid, NULL, server_main, server_); 89 | ASSERT_TRUE(ret == 0); 90 | 91 | struct MrInfo addr_info; 92 | ret = client_nm_->client_connect_one_rc_qp(0, &addr_info); 93 | ASSERT_TRUE(ret == 0); 94 | 95 | for (int i = 0; i < 10; i ++) { 96 | struct KVMsg alloc_req; 97 | memset(&alloc_req, 0, sizeof(struct KVMsg)); 98 | alloc_req.type = REQ_ALLOC; 99 | alloc_req.id = client_nm_->get_server_id(); 100 | serialize_kvmsg(&alloc_req); 101 | ret = client_nm_->nm_send_udp_msg_to_server(&alloc_req, 0); 102 | ASSERT_TRUE(ret == 0); 103 | 104 | struct KVMsg alloc_rep; 105 | ret = client_nm_->nm_recv_udp_msg(&alloc_rep, NULL, NULL); 106 | ASSERT_TRUE(ret == 0); 107 | deserialize_kvmsg(&alloc_rep); 108 | 109 | ASSERT_TRUE(alloc_rep.body.mr_info.addr == server_->get_kv_area_addr() + i * server_conf_.block_size) << "ret_addr: 0x" << std::hex << alloc_rep.body.mr_info.addr 110 | << " kv_area_off: 0x" << std::hex << server_->get_kv_area_addr() + i * server_conf_.block_size; 111 | } 112 | 113 | server_->stop(); 114 | pthread_join(server_tid, NULL); 115 | } 116 | 117 | TEST_F(ServerTest, alloc_subtable) { 118 | pthread_t server_tid; 119 | int ret; 120 | ret = pthread_create(&server_tid, NULL, server_main, server_); 121 | ASSERT_TRUE(ret == 0); 122 | 123 | struct MrInfo addr_info; 124 | ret = client_nm_->client_connect_one_rc_qp(0, &addr_info); 125 | ASSERT_TRUE(ret == 0); 126 | 127 | for (int i = 0; i < 32; i ++) { 128 | struct KVMsg alloc_req; 129 | memset(&alloc_req, 0, sizeof(struct KVMsg)); 130 | alloc_req.type = REQ_ALLOC_SUBTABLE; 131 | alloc_req.id = client_nm_->get_server_id(); 132 | serialize_kvmsg(&alloc_req); 133 | ret = client_nm_->nm_send_udp_msg_to_server(&alloc_req, 0); 134 | ASSERT_TRUE(ret == 0); 135 | 136 | struct KVMsg alloc_rep; 137 | ret = client_nm_->nm_recv_udp_msg(&alloc_rep, NULL, NULL); 138 | ASSERT_TRUE(ret == 0); 139 | deserialize_kvmsg(&alloc_rep); 140 | 141 | ASSERT_TRUE(alloc_rep.body.mr_info.addr == server_->get_subtable_st_addr() + i * roundup_256(SUBTABLE_LEN)) << "ret_addr: 0x" << std::hex << alloc_rep.body.mr_info.addr 142 | << " expected: 0x" << std::hex << server_->get_subtable_st_addr() + i * roundup_256(SUBTABLE_LEN); 143 | } 144 | server_->stop(); 145 | pthread_join(server_tid, NULL); 146 | } -------------------------------------------------------------------------------- /tests/test_server.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_TEST_SERVER_H_ 2 | #define DDCKV_TEST_SERVER_H_ 3 | 4 | #include 5 | 6 | #include "ddckv_test.h" 7 | #include "server.h" 8 | #include "nm.h" 9 | 10 | class ServerTest : public DDCKVTest { 11 | protected: 12 | void SetUp() override; 13 | void TearDown() override; 14 | 15 | public: 16 | Server * server_; 17 | UDPNetworkManager * client_nm_; 18 | }; 19 | 20 | #endif -------------------------------------------------------------------------------- /ycsb-test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(ycsb_test ycsb_test.cc) 2 | add_executable(ycsb_test_client ycsb_test_client.cc) 3 | add_executable(ycsb_test_server ycsb_test_server.cc) 4 | add_executable(ycsb_wl_loader ycsb_wl_loader.cc) 5 | add_executable(ycsb_wl_worker ycsb_wl_worker.cc) 6 | add_executable(ycsb_test_multi_client ycsb_test_multi_client.cc) 7 | add_executable(ycsb_multi_client_cont_tpt ycsb_multi_client_cont_tpt.cc) 8 | add_executable(ycsb_server_crash_multi_client ycsb_server_crash_multi_client.cc) 9 | 10 | target_compile_options(ycsb_test_client 11 | PRIVATE 12 | ${CMAKE_CXX_FLAGS_DEBUG} 13 | "-g" 14 | ) 15 | 16 | target_compile_options(ycsb_test_multi_client 17 | PRIVATE 18 | "-g" 19 | ) 20 | 21 | target_link_libraries(ycsb_test 22 | libddckv 23 | pthread 24 | ibverbs 25 | ) 26 | 27 | target_link_libraries(ycsb_test_multi_client 28 | libddckv 29 | ycsb_test 30 | ibverbs 31 | ) 32 | 33 | target_link_libraries(ycsb_test_client 34 | libddckv 35 | ycsb_test 36 | pthread 37 | ibverbs 38 | ) 39 | 40 | target_link_libraries(ycsb_test_server 41 | libddckv 42 | ycsb_test 43 | pthread 44 | ibverbs 45 | ) 46 | 47 | 48 | target_link_libraries(ycsb_wl_worker 49 | libddckv 50 | ycsb_test 51 | pthread 52 | ibverbs 53 | ) 54 | 55 | target_link_libraries(ycsb_wl_loader 56 | libddckv 57 | ycsb_test 58 | pthread 59 | ibverbs 60 | ) 61 | 62 | target_link_libraries(ycsb_multi_client_cont_tpt 63 | ycsb_test 64 | libddckv 65 | pthread 66 | ibverbs 67 | ) 68 | 69 | target_link_libraries(ycsb_server_crash_multi_client 70 | ycsb_test 71 | libddckv 72 | pthread 73 | ibverbs 74 | ) -------------------------------------------------------------------------------- /ycsb-test/gen-ycsb-workload.py: -------------------------------------------------------------------------------- 1 | from genericpath import isdir 2 | import json 3 | import os 4 | 5 | def get_workload_names(path): 6 | workload_name_list = [] 7 | for i in os.listdir(path): 8 | file_path = os.path.join(path, i) 9 | assert(os.path.isdir(file_path) == False) 10 | print(file_path) 11 | if "upd" in file_path: 12 | workload_name_list.append(file_path) 13 | return workload_name_list 14 | 15 | def mv_files(s_name, d_name): 16 | cmd = "mv {} {}".format(s_name, d_name) 17 | os.system(cmd) 18 | 19 | def gen_workloads(workload_name_list): 20 | cmd = "./ycsb-c/ycsbc -db basic_file -threads 1 -P {}" 21 | for workload in workload_name_list: 22 | run_cmd = cmd.format(workload) 23 | print("Running: {}".format(run_cmd)) 24 | os.system(run_cmd) 25 | 26 | # copy generated files 27 | d_name = workload.split("/")[-1] + "_load" 28 | mv_files("workload_load", "./upd-workloads/{}".format(d_name)) 29 | d_name = workload.split("/")[-1] + "_trans" 30 | mv_files("workload_trans", "./upd-workloads/{}".format(d_name)) 31 | 32 | wllist = get_workload_names("./ycsb-c/workloads") 33 | gen_workloads(wllist) 34 | -------------------------------------------------------------------------------- /ycsb-test/merge-ycsb-lat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | worklaodName = sys.argv[1] 4 | threadIDList = [i for i in range(8)] 5 | coroIDList = [i for i in range(10)] 6 | workloadOpsDict = { 7 | "workloada": ["search", "update"], 8 | "workloadb": ["search", "update"], 9 | "workloadc": ["search"], 10 | "workloadd": ["search", "insert"] 11 | } 12 | fnameTemplate = 'results/ycsb_{}_lat_{}_{}.txt' 13 | outFnameTemplate = 'results/{}_{}_lat.txt' 14 | 15 | opsList = workloadOpsDict[worklaodName] 16 | for op in opsList: 17 | mergedLines = [] 18 | for tid in threadIDList: 19 | for cid in coroIDList: 20 | fname = fnameTemplate.format(op, tid, cid) 21 | tmpFile = open(fname, "r") 22 | lines = tmpFile.readlines() 23 | mergedLines += lines 24 | tmpFile.close() 25 | outFname = outFnameTemplate.format(worklaodName, op) 26 | print("merging {}".format(outFname)) 27 | outF = open(outFname, "w") 28 | outF.writelines(mergedLines) 29 | outF.close() -------------------------------------------------------------------------------- /ycsb-test/split-workload.py: -------------------------------------------------------------------------------- 1 | from os import close 2 | import sys 3 | 4 | wlNameList = ['a', 'b', 'c', 'd'] 5 | # wlTemplateList = ["./workloads/workload{}.spec_trans", "./ycsb-small/workload{}_small.trans"] 6 | wlTemplateList = ["./ycsb-small/workload{}_small.trans"] 7 | splitNum = int(sys.argv[1]) 8 | 9 | for n in wlNameList: 10 | for tplate in wlTemplateList: 11 | fname = tplate.format(n) 12 | wlFile = open(fname, "r") 13 | lines = wlFile.readlines() 14 | lineNum = len(lines) 15 | splitSize = lineNum / splitNum 16 | for i in range(splitNum): 17 | print(i * splitSize, (i + 1) * splitSize) 18 | slines = lines[int(i * splitSize): int((i + 1) * splitSize)] 19 | splitFname = fname + str(i) 20 | outFile = open(splitFname, "w") 21 | outFile.writelines(slines) 22 | outFile.close() 23 | wlFile.close() -------------------------------------------------------------------------------- /ycsb-test/ycsb_multi_client_cont_tpt.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "client.h" 7 | #include "ycsb_test.h" 8 | 9 | int main(int argc, char ** argv) { 10 | if (argc != 4) { 11 | printf("Usage: %s path-to-config-file workload-name num-clients\n", argv[0]); 12 | return 1; 13 | } 14 | 15 | WorkloadFileName * workload_fnames = get_workload_fname(argv[2]); 16 | int num_clients = atoi(argv[3]); 17 | 18 | GlobalConfig config; 19 | int ret = load_config(argv[1], &config); 20 | assert(ret == 0); 21 | 22 | // bind this process to main core 23 | // run client args 24 | RunClientArgs * client_args_list = (RunClientArgs *)malloc(sizeof(RunClientArgs) * num_clients); 25 | pthread_barrier_t global_load_barrier; 26 | pthread_barrier_init(&global_load_barrier, NULL, num_clients); 27 | pthread_barrier_t global_timer_barrier; 28 | pthread_barrier_init(&global_timer_barrier, NULL, num_clients); 29 | volatile bool should_stop = false; 30 | 31 | pthread_t tid_list[num_clients]; 32 | for (int i = 0; i < num_clients; i ++) { 33 | client_args_list[i].client_id = config.server_id - config.memory_num; 34 | client_args_list[i].thread_id = i; 35 | client_args_list[i].main_core_id = config.main_core_id + i * 2; 36 | client_args_list[i].poll_core_id = config.poll_core_id + i * 2; 37 | client_args_list[i].workload_name = argv[2]; 38 | client_args_list[i].config_file = argv[1]; 39 | client_args_list[i].load_barrier = &global_load_barrier; 40 | client_args_list[i].should_stop = &should_stop; 41 | client_args_list[i].timer_barrier = &global_timer_barrier; 42 | client_args_list[i].ret_num_ops = 0; 43 | client_args_list[i].ret_faile_num = 0; 44 | client_args_list[i].num_threads = num_clients; 45 | pthread_t tid; 46 | pthread_create(&tid, NULL, run_client_cont_tpt, &client_args_list[i]); 47 | tid_list[i] = tid; 48 | } 49 | 50 | for (int i = 0; i < num_clients; i ++) { 51 | pthread_join(tid_list[i], NULL); 52 | printf("thread %d finished\n", i); 53 | } 54 | } -------------------------------------------------------------------------------- /ycsb-test/ycsb_server_crash_multi_client.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "client.h" 4 | #include "ycsb_test.h" 5 | 6 | int main(int argc, char ** argv) { 7 | if (argc != 4) { 8 | printf("Usage: %s path-to-config-file workload-name num-clients\n", argv[0]); 9 | return 1; 10 | } 11 | 12 | WorkloadFileName * workload_fnames = get_workload_fname(argv[2]); 13 | int num_clients = atoi(argv[3]); 14 | 15 | GlobalConfig config; 16 | int ret = load_config(argv[1], &config); 17 | assert(ret == 0); 18 | 19 | RunClientArgs * client_args_list = (RunClientArgs *)malloc(sizeof(RunClientArgs) * num_clients); 20 | pthread_barrier_t global_load_barrier; 21 | pthread_barrier_init(&global_load_barrier, NULL, num_clients); 22 | pthread_barrier_t global_timer_barrier; 23 | pthread_barrier_init(&global_timer_barrier, NULL, num_clients); 24 | volatile bool should_stop = false; 25 | 26 | pthread_t tid_list[num_clients]; 27 | for (int i = 0; i < num_clients; i ++) { 28 | client_args_list[i].client_id = config.server_id - config.memory_num; 29 | client_args_list[i].thread_id = i; 30 | client_args_list[i].main_core_id = config.main_core_id + i * 2; 31 | client_args_list[i].poll_core_id = config.poll_core_id + i * 2; 32 | client_args_list[i].workload_name = argv[2]; 33 | client_args_list[i].config_file = argv[1]; 34 | client_args_list[i].load_barrier = &global_load_barrier; 35 | client_args_list[i].should_stop = &should_stop; 36 | client_args_list[i].timer_barrier = &global_timer_barrier; 37 | client_args_list[i].ret_num_ops = 0; 38 | client_args_list[i].ret_faile_num = 0; 39 | client_args_list[i].num_threads = num_clients; 40 | pthread_t tid; 41 | pthread_create(&tid, NULL, run_client_on_crash_cont_tpt, &client_args_list[i]); 42 | tid_list[i] = tid; 43 | } 44 | 45 | for (int i = 0; i < num_clients; i ++) { 46 | pthread_join(tid_list[i], NULL); 47 | printf("thread %d finished\n", i); 48 | } 49 | } -------------------------------------------------------------------------------- /ycsb-test/ycsb_test.h: -------------------------------------------------------------------------------- 1 | #ifndef DDCKV_YCSB_TEST_H_ 2 | #define DDCKV_YCSB_TEST_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "client.h" 10 | #include "client_cr.h" 11 | 12 | typedef struct TagWorkloadFileName { 13 | char load_fname[64]; 14 | char trans_fname[64]; 15 | } WorkloadFileName; 16 | 17 | typedef struct TagRunClientArgs { 18 | int thread_id; 19 | int main_core_id; 20 | int poll_core_id; 21 | char * workload_name; 22 | char * config_file; 23 | pthread_barrier_t * load_barrier; 24 | volatile bool * should_stop; 25 | // bool * timer_is_ready; 26 | pthread_barrier_t * timer_barrier; 27 | 28 | uint32_t ret_num_ops; 29 | uint32_t ret_faile_num; 30 | 31 | uint32_t client_id; 32 | uint32_t num_threads; 33 | } RunClientArgs; 34 | 35 | int is_valid_workload(char * workload_name); 36 | WorkloadFileName * get_workload_fname(char * workload_name); 37 | WorkloadFileName * get_workload_fname(char * workload_name, int thread_id); 38 | int get_num_failed(ClientFiberArgs * fb_args_list, int num_coro); 39 | uint64_t get_time_spent(ClientFiberArgs * fb_args_list, int num_coro); 40 | bool time_is_less_than(struct timeval * t1, struct timeval * t2); 41 | 42 | int load_workload(Client & client, WorkloadFileName * workload_fnames); 43 | int load_workload(ClientCR & client, WorkloadFileName * workload_fnames); 44 | int load_workload_sync(Client & client, WorkloadFileName * workload_fnames); 45 | int load_workload_sync(ClientCR & client, WorkloadFileName * workload_fnames); 46 | int load_workload_1coro(Client & client, WorkloadFileName * workload_fnames); 47 | int load_workload_1coro(Client & client, WorkloadFileName * workload_fnames, int st, int ed); 48 | 49 | int load_test_cnt_time(Client & client, WorkloadFileName * workload_fnames); 50 | int load_test_cnt_ops(Client & client, WorkloadFileName * workload_fnames); 51 | int load_test_cnt_ops(ClientCR & client, WorkloadFileName * workloadfnames); 52 | int load_test_cnt_ops_mt(Client & client, WorkloadFileName * workload_fnames, RunClientArgs * args); 53 | int load_test_cnt_ops_mt(ClientCR & client, WorkloadFileName * workload_fnames, RunClientArgs * arg); 54 | int load_test_cnt_ops_on_crash(Client & client, WorkloadFileName * workload_fnames); 55 | int load_test_cnt_ops_mt_on_crash_cont_sample(Client & client, WorkloadFileName * workload_fnames); 56 | 57 | int load_test_lat_mt(Client & client, WorkloadFileName * workload_fnames, RunClientArgs * args, const char * out_fname); 58 | int load_test_lat_mt(ClientCR & client, WorkloadFileName * workload_fnames, RunClientArgs * args, const char * out_fname); 59 | int load_test_lat(Client & client, WorkloadFileName * get_workload_fname, const char * out_fname); 60 | int load_test_lat(ClientCR & client, WorkloadFileName * get_workload_fname, const char * out_fname); 61 | 62 | void conf_reassign_cores(GlobalConfig * conf, int new_client_id); 63 | 64 | void timer_fb_func(volatile bool * should_stop, int seconds); 65 | void timer_fb_func_ms(volatile bool * should_stop, int milliseconds); 66 | 67 | void * run_client(void * _args); 68 | void * run_client_cr(void * _args); 69 | void * run_client_lat(void *_args); 70 | void * run_client_cr_lat(void * _args); 71 | void * run_client_cont_tpt(void * _args); 72 | void * run_client_on_crash_cont_tpt(void * _args); 73 | 74 | #endif -------------------------------------------------------------------------------- /ycsb-test/ycsb_test_client.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "client.h" 4 | #include "ycsb_test.h" 5 | 6 | int main(int argc, char ** argv) { 7 | if (argc != 3) { 8 | printf("Usage: %s path-to-config-file workload-name\n", argv[0]); 9 | return 1; 10 | } 11 | 12 | WorkloadFileName * workload_fnames = get_workload_fname(argv[2]); 13 | 14 | int ret = 0; 15 | GlobalConfig config; 16 | ret = load_config(argv[1], &config); 17 | assert(ret == 0); 18 | printf("running with %d coros\n", config.num_coroutines); 19 | 20 | // bind this process to main core 21 | cpu_set_t cpuset; 22 | CPU_ZERO(&cpuset); 23 | CPU_SET(config.main_core_id, &cpuset); 24 | ret = sched_setaffinity(0, sizeof(cpuset), &cpuset); 25 | assert(ret == 0); 26 | ret = sched_getaffinity(0, sizeof(cpuset), &cpuset); 27 | for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) { 28 | if (CPU_ISSET(i, &cpuset)) { 29 | printf("main process running on core: %d\n", i); 30 | } 31 | } 32 | 33 | Client client(&config); 34 | 35 | // start polling thread 36 | pthread_t polling_tid = client.start_polling_thread(); 37 | 38 | // 1. load workload load 39 | ret = load_workload(client, workload_fnames); 40 | assert(ret == 0); 41 | 42 | // 2. load test workload 43 | // ret = load_test_cnt_time(client, workload_fnames); 44 | bool should_stop = false; 45 | ret = load_test_cnt_ops(client, workload_fnames); 46 | assert(ret == 0); 47 | 48 | client.stop_polling_thread(); 49 | pthread_join(polling_tid, NULL); 50 | return 0; 51 | } -------------------------------------------------------------------------------- /ycsb-test/ycsb_test_multi_client.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "client.h" 7 | #include "ycsb_test.h" 8 | 9 | int main(int argc, char ** argv) { 10 | if (argc != 4) { 11 | printf("Usage: %s path-to-config-file workload-name num-clients\n", argv[0]); 12 | return 1; 13 | } 14 | 15 | WorkloadFileName * workload_fnames = get_workload_fname(argv[2]); 16 | int num_clients = atoi(argv[3]); 17 | 18 | GlobalConfig config; 19 | int ret = load_config(argv[1], &config); 20 | assert(ret == 0); 21 | 22 | // bind this process to main core 23 | // run client args 24 | RunClientArgs * client_args_list = (RunClientArgs *)malloc(sizeof(RunClientArgs) * num_clients); 25 | pthread_barrier_t global_load_barrier; 26 | pthread_barrier_init(&global_load_barrier, NULL, num_clients); 27 | pthread_barrier_t global_timer_barrier; 28 | pthread_barrier_init(&global_timer_barrier, NULL, num_clients); 29 | volatile bool should_stop = false; 30 | 31 | pthread_t tid_list[num_clients]; 32 | for (int i = 0; i < num_clients; i ++) { 33 | client_args_list[i].client_id = config.server_id - config.memory_num; 34 | client_args_list[i].thread_id = i; 35 | client_args_list[i].main_core_id = config.main_core_id + i * 2; 36 | client_args_list[i].poll_core_id = config.poll_core_id + i * 2; 37 | client_args_list[i].workload_name = argv[2]; 38 | client_args_list[i].config_file = argv[1]; 39 | client_args_list[i].load_barrier = &global_load_barrier; 40 | client_args_list[i].should_stop = &should_stop; 41 | client_args_list[i].timer_barrier = &global_timer_barrier; 42 | client_args_list[i].ret_num_ops = 0; 43 | client_args_list[i].ret_faile_num = 0; 44 | client_args_list[i].num_threads = num_clients; 45 | pthread_t tid; 46 | pthread_create(&tid, NULL, run_client, &client_args_list[i]); 47 | tid_list[i] = tid; 48 | } 49 | 50 | uint32_t total_tpt = 0; 51 | uint32_t total_failed = 0; 52 | for (int i = 0; i < num_clients; i ++) { 53 | pthread_join(tid_list[i], NULL); 54 | total_tpt += client_args_list[i].ret_num_ops; 55 | total_failed += client_args_list[i].ret_faile_num; 56 | } 57 | printf("total: %d ops\n", total_tpt); 58 | printf("failed: %d ops\n", total_failed); 59 | printf("tpt: %d ops/s\n", (total_tpt - total_failed) / config.workload_run_time); 60 | } -------------------------------------------------------------------------------- /ycsb-test/ycsb_test_server.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "server.h" 7 | 8 | int main(int argc, char ** argv) { 9 | if (argc != 2) { 10 | printf("Usage: %s [server_id]\n", argv[0]); 11 | return -1; 12 | } 13 | 14 | int32_t server_id = atoi(argv[1]); 15 | int32_t ret = 0; 16 | struct GlobalConfig server_conf; 17 | ret = load_config("./server_config.json", &server_conf); 18 | assert(ret == 0); 19 | server_conf.server_id = server_id; 20 | 21 | printf("===== Starting Server %d =====\n", server_conf.server_id); 22 | Server * server = new Server(&server_conf); 23 | ServerMainArgs server_main_args; 24 | server_main_args.server = server; 25 | server_main_args.core_id = server_conf.main_core_id; 26 | 27 | pthread_t server_tid; 28 | pthread_create(&server_tid, NULL, server_main, (void *)&server_main_args); 29 | 30 | printf("press to exit\n"); 31 | // getchar(); 32 | printf("===== Ending Server %d =====\n", server_conf.server_id); 33 | sleep(100000000ll); 34 | 35 | server->stop(); 36 | return 0; 37 | } -------------------------------------------------------------------------------- /ycsb-test/ycsb_wl_loader.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "client.h" 4 | #include "ycsb_test.h" 5 | 6 | int main(int argc, char ** argv) { 7 | if (argc != 3) { 8 | printf("Usage: %s path-to-config-file workload-name\n", argv[0]); 9 | return 1; 10 | } 11 | 12 | WorkloadFileName * workload_fnames = get_workload_fname(argv[2]); 13 | 14 | int ret = 0; 15 | GlobalConfig config; 16 | ret = load_config(argv[1], &config); 17 | assert(ret == 0); 18 | // config.num_coroutines = 1; 19 | 20 | // bind this process to main core 21 | cpu_set_t cpuset; 22 | CPU_ZERO(&cpuset); 23 | CPU_SET(config.main_core_id, &cpuset); 24 | ret = sched_setaffinity(0, sizeof(cpuset), &cpuset); 25 | assert(ret == 0); 26 | 27 | // check if affinity is successfully set 28 | CPU_ZERO(&cpuset); 29 | ret = sched_getaffinity(0, sizeof(cpuset), &cpuset); 30 | for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) { 31 | if (CPU_ISSET(i, &cpuset)) { 32 | printf("main process running on core: %d\n", i); 33 | } 34 | } 35 | 36 | Client client(&config); 37 | 38 | // start polling_tid 39 | pthread_t polling_tid = client.start_polling_thread(); 40 | 41 | // load workload 42 | ret = load_workload(client, workload_fnames); 43 | assert(ret == 0); 44 | 45 | client.stop_polling_thread(); 46 | pthread_join(polling_tid, NULL); 47 | 48 | client.dump_cache(); 49 | return 0; 50 | } -------------------------------------------------------------------------------- /ycsb-test/ycsb_wl_worker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "client.h" 5 | #include "ycsb_test.h" 6 | 7 | int main(int argc, char ** argv) { 8 | if (argc != 4) { 9 | printf("Usage: %s client-id config-file workload-name\n", argv[0]); 10 | return 1; 11 | } 12 | 13 | WorkloadFileName * workload_fnames = get_workload_fname(argv[3]); 14 | 15 | int ret = 0; 16 | GlobalConfig config; 17 | ret = load_config(argv[2], &config); 18 | assert(ret == 0); 19 | 20 | // assign client id and core id 21 | int client_id = atoi(argv[1]); 22 | assert(client_id > config.num_replication); 23 | config.server_id = client_id; 24 | conf_reassign_cores(&config, client_id); 25 | 26 | cpu_set_t cpuset; 27 | CPU_ZERO(&cpuset); 28 | CPU_SET(config.main_core_id, &cpuset); 29 | ret = sched_setaffinity(0, sizeof(cpuset), &cpuset); 30 | assert(ret == 0); 31 | 32 | // check if affinity is successfully set 33 | CPU_ZERO(&cpuset); 34 | ret = sched_getaffinity(0, sizeof(cpuset), &cpuset); 35 | for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) { 36 | if (CPU_ISSET(i, &cpuset)) { 37 | printf("main process running on core: %d\n", i); 38 | } 39 | } 40 | 41 | Client client(&config); 42 | client.load_cache(); 43 | 44 | pthread_t polling_tid = client.start_polling_thread(); 45 | 46 | ret = load_test_cnt_time(client, workload_fnames); 47 | assert(ret == 0); 48 | client.stop_polling_thread(); 49 | pthread_join(polling_tid, NULL); 50 | return 0; 51 | } --------------------------------------------------------------------------------