├── .gitignore ├── Apache-LICENSE2.0 ├── CMakeLists.txt ├── README.md ├── build.sh ├── compute_pool ├── CMakeLists.txt ├── run │ ├── CMakeLists.txt │ ├── run.cc │ └── run_micro.cc └── worker │ ├── CMakeLists.txt │ ├── handler.cc │ ├── handler.h │ ├── worker.cc │ └── worker.h ├── config ├── compute_node_config.json ├── memory_node_config.json ├── micro_config.json ├── smallbank_config.json ├── tatp_config.json └── tpcc_config.json ├── core ├── CMakeLists.txt ├── allocator │ ├── buffer_allocator.h │ ├── log_allocator.h │ └── region_allocator.h ├── base │ └── common.h ├── cache │ ├── addr_cache.h │ ├── lock_status.h │ └── version_status.h ├── connection │ ├── meta_manager.cc │ ├── meta_manager.h │ ├── qp_manager.cc │ └── qp_manager.h ├── dtx │ ├── doorbell.cc │ ├── doorbell.h │ ├── dtx.cc │ ├── dtx.h │ ├── dtx_check.cc │ ├── dtx_check_ro.cc │ ├── dtx_check_rw.cc │ ├── dtx_compare.cc │ ├── dtx_compare_check.cc │ ├── dtx_compare_issue.cc │ ├── dtx_exe_commit.cc │ ├── dtx_issue.cc │ ├── dtx_local_meta.cc │ └── structs.h ├── flags.h ├── memstore │ ├── data_item.h │ ├── hash_store.h │ └── mem_store.h ├── scheduler │ ├── corotine_scheduler.cc │ ├── corotine_scheduler.h │ └── coroutine.h └── util │ ├── ct.h │ ├── debug.h │ ├── fast_random.h │ ├── hash.h │ ├── hazard_pointer.h │ ├── json_config.h │ ├── latency.h │ ├── seqlock.h │ ├── spinlock.h │ ├── thread_pool.h │ ├── timer.h │ └── zipf.h ├── memory_pool ├── CMakeLists.txt └── server │ ├── CMakeLists.txt │ ├── server.cc │ └── server.h ├── thirdparty ├── rapidjson │ ├── allocators.h │ ├── document.h │ ├── encodedstream.h │ ├── encodings.h │ ├── error │ │ ├── en.h │ │ └── error.h │ ├── filereadstream.h │ ├── filewritestream.h │ ├── internal │ │ ├── biginteger.h │ │ ├── diyfp.h │ │ ├── dtoa.h │ │ ├── ieee754.h │ │ ├── itoa.h │ │ ├── meta.h │ │ ├── pow10.h │ │ ├── stack.h │ │ ├── strfunc.h │ │ ├── strtod.h │ │ └── swap.h │ ├── memorybuffer.h │ ├── memorystream.h │ ├── msinttypes │ │ ├── inttypes.h │ │ └── stdint.h │ ├── pointer.h │ ├── prettywriter.h │ ├── rapidjson.h │ ├── reader.h │ ├── stringbuffer.h │ └── writer.h └── rlib │ ├── CMakeLists.txt │ ├── common.hpp │ ├── logging.hpp │ ├── mr.hpp │ ├── msg_interface.hpp │ ├── pre_connector.hpp │ ├── qp.hpp │ ├── qp_impl.hpp │ ├── rdma_ctrl.hpp │ ├── rdma_ctrl_impl.hpp │ └── rnic.hpp └── workload ├── CMakeLists.txt ├── config └── table_type.h ├── micro ├── CMakeLists.txt ├── micro_db.cc ├── micro_db.h ├── micro_tables │ └── micro.json ├── micro_txn.cc └── micro_txn.h ├── smallbank ├── CMakeLists.txt ├── smallbank_db.cc ├── smallbank_db.h ├── smallbank_tables │ ├── checking.json │ └── savings.json ├── smallbank_txn.cc └── smallbank_txn.h ├── tatp ├── CMakeLists.txt ├── tatp_db.cc ├── tatp_db.h ├── tatp_tables │ ├── README.md │ ├── access_info.json │ ├── call_forwarding.json │ ├── sec_subscriber.json │ ├── special_facility.json │ └── subscriber.json ├── tatp_txn.cc └── tatp_txn.h └── tpcc ├── CMakeLists.txt ├── tpcc_db.cc ├── tpcc_db.h ├── tpcc_tables ├── customer.json ├── district.json ├── item.json ├── stock.json └── warehouse.json ├── tpcc_tables_1G ├── customer.json ├── district.json ├── item.json ├── stock.json └── warehouse.json ├── tpcc_tables_8G ├── customer.json ├── district.json ├── item.json ├── stock.json └── warehouse.json ├── tpcc_tables_normal ├── customer.json ├── district.json ├── item.json ├── stock.json └── warehouse.json ├── tpcc_txn.cc └── tpcc_txn.h /.gitignore: -------------------------------------------------------------------------------- 1 | cmake-build-debug 2 | cmake-build-release 3 | build 4 | .idea 5 | .vscode -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | cmake_minimum_required(VERSION 3.3) 5 | 6 | project(FORD) 7 | 8 | set(CMAKE_CXX_STANDARD 11) 9 | 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result -fPIC") 11 | 12 | if(CMAKE_BUILD_TYPE STREQUAL "Release") 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG") 14 | else() 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -DDEBUG -g") 16 | endif() 17 | 18 | include_directories( 19 | core 20 | workload 21 | thirdparty 22 | compute_pool 23 | ) 24 | 25 | add_subdirectory(thirdparty/rlib) 26 | 27 | add_subdirectory(core) 28 | 29 | add_subdirectory(workload) 30 | 31 | add_subdirectory(compute_pool) # Dep list: rlib->ford->workload_db+_txn->worker 32 | 33 | add_subdirectory(memory_pool) # Dep list: rlib->workload_db->server -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FORD 2 | This is an open source repository for our papers in [FAST 2022](https://www.usenix.org/conference/fast22) and [ACM Transactions on Storage](https://dl.acm.org/journal/tos). 3 | 4 | > **Ming Zhang**, Yu Hua, Pengfei Zuo, and Lurong Liu. "FORD: Fast One-sided RDMA-based Distributed Transactions for Disaggregated Persistent Memory". In 20th USENIX Conference on File and Storage Technologies, FAST 2022, Santa Clara, California, USA, February 22 - 24, 2022, pages 51-68. USENIX Association, 2022. 5 | > 6 | > **Ming Zhang**, Yu Hua, Pengfei Zuo, and Lurong Liu. "Localized Validation Accelerates Distributed Transactions on Disaggregated Persistent Memory". ACM Transactions on Storage (TOS), Vol. 19, No. 3, Article 21, pages 1-35, 2023. 7 | 8 | # Brief Introduction 9 | Persistent memory (PM) disaggregation improves the resource utilization and failure isolation to build a scalable and cost-effective remote memory pool. However, due to offering limited computing power and overlooking the persistence and bandwidth properties of real PMs, existing distributed transaction schemes, which are designed for legacy DRAM-based monolithic servers, fail to efficiently work on the disaggregated PM architecture. 10 | 11 | We propose FORD, a **F**ast **O**ne-sided **R**DMA-based **D**istributed transaction system. FORD thoroughly leverages one-sided RDMA to handle transactions for bypassing the remote CPU in PM pool. To reduce the round trips, FORD batches the read and lock operations into one request to eliminate extra locking and validations. To accelerate the transaction commit, FORD updates all the remote replicas in a single round trip with parallel undo logging and data visibility control. Moreover, considering the limited PM bandwidth, FORD enables the backup replicas to be read to alleviate the load on the primary replicas, thus improving the throughput. To efficiently guarantee the remote data persistency in the PM pool, FORD selectively flushes data to the backup replicas to mitigate the network overheads. FORD further leverages a localized validation scheme to transfer the validation operations for the read-only data from remote to local as much as possible to reduce the round trips. Experimental results demonstrate that FORD improves the transaction throughput and reduces the latency. To learn more, please read our papers. 12 | 13 | # Framework 14 | We implement a coroutine-enabled framework that runs FORD and its counterparts in the same manner when processing distributed transactions: 1) Issue one-sided RDMA requests. 2) Yield CPU to another coroutine. 3) Check all the RDMA ACKs and replies. This is in fact an interleaved execution model that aims to saturate the CPUs in the compute pool to improve the throughput. 15 | 16 | # Prerequisites to Build 17 | - Hardware 18 | - Intel Optane DC Persistent Memory 19 | - Mellanox InfiniBand NIC (e.g., ConnectX-5) that supports RDMA 20 | - Mellanox InfiniBand Switch 21 | - Software 22 | - Operating System: Ubuntu 18.04 LTS or CentOS 7 23 | - Programming Language: C++ 11 24 | - Compiler: g++ 7.5.0 (at least) 25 | - Libraries: ibverbs, pthread, boost_coroutine, boost_context, boost_system 26 | - Machines 27 | - 3 machines, one acts as the compute pool and other two act as the memory pool to maintain a primary-backup replication 28 | 29 | 30 | # Configure 31 | - Configure all the options in ```compute_node_config.json``` and ```memory_node_config.json``` in ```config/``` as you need, e.g., machine_num, machine_id, ip, port, and PM path, etc. 32 | - Configure the options in ```core/flags.h```, e.g., ```MAX_ITEM_SIZE```, etc. 33 | - Configure the number of backup replicas in ```core/base/common.h```, i.e., BACKUP_DEGREE. 34 | 35 | # Build 36 | The codes are constructed by CMake (version >= 3.3). We prepare a shell script for easy building 37 | 38 | ```sh 39 | $ git clone https://github.com/minghust/ford.git 40 | $ cd ford 41 | ``` 42 | 43 | - For each machine in the memory pool: 44 | 45 | ```sh 46 | $ ./build.sh -s 47 | ``` 48 | 49 | - For each machine in the compute pool (boost is required): 50 | 51 | ```sh 52 | $ ./build.sh 53 | ``` 54 | 55 | Note that the Release version is the default option for better performance. However, if you need a Debug version, just add ```-d``` option, e.g., ```./build.sh -s -d``` for the memory pool, and ```./build.sh -d``` for the compute pool. 56 | 57 | After running the ```build.sh``` script, cmake will automatically generate a ```build/``` directory in which all the compiled libraries and executable files are stored. 58 | 59 | 60 | # Run 61 | - For each machine in the memory pool: Start server to load tables. Due to using PM in *devdax* mode, you may need ```sudo``` if you are not a root user. 62 | ```sh 63 | $ cd ford 64 | $ cd ./build/memory_pool/server 65 | $ sudo ./zm_mem_pool 66 | ``` 67 | 68 | - For each machine in the compute pool: After loading database tables in the memory pool, we run a benchmark, e.g., TPCC. 69 | ```sh 70 | $ cd ford 71 | $ cd ./build/compute_pool/run 72 | $ ./run tpcc ford 16 8 # run ford with 16 threads and each thread spawns 8 coroutines 73 | ``` 74 | Now, the memory nodes are in a disaggregated mode, i.e., the CPUs are not used for any computation tasks in transaction processing. 75 | 76 | # Results 77 | After running, we automatically generate a ```bench_results``` dir to record the results. The summarized attempted and committed throughputs (K txn/sec) and the average 50th and 99th percentile latencies are recorded in ```bench_results/tpcc/result.txt```. Moreover, the detailed results of each thread are recorded in ```bench_results/tpcc/detail_result.txt``` 78 | 79 | # Acknowledgments 80 | 81 | We sincerely thank the following open source repos (in the ```thirdparty/``` directory) that help us shorten the developing process 82 | 83 | - [rlib](https://github.com/wxdwfc/rlib): We use rlib to do RDMA connections. This is a convinient and easy-to-understand library to finish RDMA connections. Moreover, we have modified rlib : 1) Fix a bug in en/decoding the QP id. 2) Change the QP connections from the active mode to the passive mode in the server side. In this way, all the QP connections are completed without explict ```connect``` usages in the server-side code. This is beneficial for the case in which the server does not know how many clients will issue the connect requests. 84 | 85 | - [rapidjson](https://github.com/Tencent/rapidjson): We use rapidjson to read configurations from json files. This is an easy-to-use library that accelerate configurations. 86 | 87 | # LICENSE 88 | 89 | ```text 90 | Copyright [2022] [Ming Zhang] 91 | 92 | Licensed under the Apache License, Version 2.0 (the "License"); 93 | you may not use this file except in compliance with the License. 94 | You may obtain a copy of the License at 95 | 96 | http://www.apache.org/licenses/LICENSE-2.0 97 | 98 | Unless required by applicable law or agreed to in writing, software 99 | distributed under the License is distributed on an "AS IS" BASIS, 100 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 101 | See the License for the specific language governing permissions and 102 | limitations under the License. 103 | ``` 104 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Author: Ming Zhang 4 | # Copyright (c) 2022 5 | 6 | BUILD_TARGET=client 7 | BUILD_TYPE=Release 8 | 9 | while getopts "sd" arg 10 | do 11 | case $arg in 12 | s) 13 | echo "building server"; 14 | BUILD_TARGET="server"; 15 | ;; 16 | d) 17 | BUILD_TYPE=Debug; 18 | ;; 19 | ?) 20 | echo "unkonw argument" 21 | exit 1 22 | ;; 23 | esac 24 | done 25 | 26 | if [[ -d build ]]; then 27 | echo "Build directory exists"; 28 | else 29 | echo "Create build directory"; 30 | mkdir build 31 | fi 32 | 33 | CMAKE_CMD="cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ../" 34 | echo ${CMAKE_CMD} 35 | cd ./build 36 | ${CMAKE_CMD} 37 | 38 | if [ "${BUILD_TARGET}" == "server" ];then 39 | echo "------------------- building server ------------------" 40 | make zm_mem_pool -j32 41 | else 42 | echo "------------------- building client + server ------------------" 43 | make -j32 44 | fi 45 | echo "-------------------- build finish ----------------------" 46 | -------------------------------------------------------------------------------- /compute_pool/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | # run relies on worker 5 | 6 | add_subdirectory(worker) 7 | 8 | add_subdirectory(run) -------------------------------------------------------------------------------- /compute_pool/run/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(RUN_SRC run.cc) 5 | add_executable(run ${RUN_SRC}) 6 | target_link_libraries(run worker) 7 | -------------------------------------------------------------------------------- /compute_pool/run/run.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "worker/handler.h" 5 | 6 | // Entrance to run threads that spawn coroutines as coordinators to run distributed transactions 7 | int main(int argc, char* argv[]) { 8 | if (argc < 3) { 9 | std::cerr << "./run (optional) (optional). E.g., ./run tatp ford 16 8" << std::endl; 10 | return 0; 11 | } 12 | 13 | Handler* handler = new Handler(); 14 | handler->ConfigureComputeNode(argc, argv); 15 | handler->GenThreads(std::string(argv[1])); 16 | handler->OutputResult(std::string(argv[1]), std::string(argv[2])); 17 | } 18 | -------------------------------------------------------------------------------- /compute_pool/run/run_micro.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "worker/handler.h" 5 | 6 | // Entrance to run threads that spawn coroutines as coordinators to run distributed transactions 7 | int main(int argc, char* argv[]) { 8 | // e.g. ./run_micro s-100 means run FORD with skewed access and write ratio 100% 9 | Handler* handler = new Handler(); 10 | handler->ConfigureComputeNodeForMICRO(argc, argv); 11 | handler->GenThreadsForMICRO(); 12 | handler->OutputResult("MICRO", "FORD"); 13 | } 14 | -------------------------------------------------------------------------------- /compute_pool/worker/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(WORKER_SRC 5 | handler.cc 6 | worker.cc) 7 | 8 | add_library(worker STATIC 9 | ${WORKER_SRC} 10 | ) 11 | 12 | set_target_properties(worker PROPERTIES LINKER_LANGUAGE CXX) 13 | 14 | target_link_libraries(worker ford tatp_db tatp_txn smallbank_db smallbank_txn tpcc_db tpcc_txn micro_db micro_txn) 15 | -------------------------------------------------------------------------------- /compute_pool/worker/handler.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | class Handler { 17 | public: 18 | Handler() {} 19 | // For macro-benchmark 20 | void ConfigureComputeNode(int argc, char* argv[]); 21 | void GenThreads(std::string bench_name); 22 | void OutputResult(std::string bench_name, std::string system_name); 23 | 24 | // For micro-benchmark 25 | void ConfigureComputeNodeForMICRO(int argc, char* argv[]); 26 | void GenThreadsForMICRO(); 27 | }; -------------------------------------------------------------------------------- /compute_pool/worker/worker.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "allocator/region_allocator.h" 7 | #include "base/common.h" 8 | #include "cache/lock_status.h" 9 | #include "cache/version_status.h" 10 | #include "connection/meta_manager.h" 11 | 12 | #include "tatp/tatp_db.h" 13 | #include "smallbank/smallbank_db.h" 14 | #include "tpcc/tpcc_db.h" 15 | #include "micro/micro_db.h" 16 | 17 | struct thread_params { 18 | t_id_t thread_local_id; 19 | t_id_t thread_global_id; 20 | t_id_t thread_num_per_machine; 21 | t_id_t total_thread_num; 22 | MetaManager* global_meta_man; 23 | VersionCache* global_status; 24 | LockCache* global_lcache; 25 | RDMARegionAllocator* global_rdma_region; 26 | int coro_num; 27 | std::string bench_name; 28 | }; 29 | 30 | void run_thread(thread_params* params, 31 | TATP* tatp_client, 32 | SmallBank* smallbank_client, 33 | TPCC* tpcc_client); -------------------------------------------------------------------------------- /config/compute_node_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "local_compute_node": { 3 | "machine_num": 1, 4 | "machine_id": 0, 5 | "thread_num_per_machine": 16, 6 | "coroutine_num": 8, 7 | "local_port": 12345, 8 | "txn_system": 2, 9 | "comment": "0 is farm, 1 is drtmh, 2 is ford" 10 | }, 11 | "remote_pm_nodes": { 12 | "remote_ips": [ 13 | "10.0.0.1", 14 | "10.0.0.3", 15 | "10.0.0.5" 16 | ], 17 | "remote_ports": [ 18 | 12346, 19 | 12346, 20 | 12346 21 | ], 22 | "remote_meta_ports": [ 23 | 12347, 24 | 12347, 25 | 12347 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /config/memory_node_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "local_memory_node": { 3 | "machine_num": 2, 4 | "machine_id": 0, 5 | "local_port": 12346, 6 | "local_meta_port": 12347, 7 | "use_pm": 1, 8 | "mem_size_GB": 8, 9 | "log_buf_size_GB": 1, 10 | "pm_root": "/dev/dax0.1", 11 | "workload": "TPCC" 12 | }, 13 | "remote_compute_nodes": { 14 | "compute_node_ips": ["10.0.0.7"], 15 | "compute_node_ports": [12345] 16 | } 17 | } -------------------------------------------------------------------------------- /config/micro_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "micro": { 3 | "num_keys": 1000000, 4 | "is_skewed": true, 5 | "zipf_theta": 0.99, 6 | "data_set_size": 1, 7 | "write_ratio": 25, 8 | "attempted_num": 100000 9 | } 10 | } -------------------------------------------------------------------------------- /config/smallbank_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "smallbank": { 3 | "num_accounts": 100000, 4 | "num_hot_accounts": 4000, 5 | "attempted_num": 1000000 6 | } 7 | } -------------------------------------------------------------------------------- /config/tatp_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "tatp": { 3 | "num_subscriber": 100000, 4 | "attempted_num": 1000000 5 | } 6 | } -------------------------------------------------------------------------------- /config/tpcc_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "tpcc": { 3 | "attempted_num": 50000 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(CONNECTION_SRC 5 | connection/meta_manager.cc 6 | connection/qp_manager.cc 7 | ) 8 | 9 | set(DTX_SRC 10 | dtx/doorbell.cc 11 | dtx/dtx_check.cc 12 | dtx/dtx_check_ro.cc 13 | dtx/dtx_check_rw.cc 14 | dtx/dtx_issue.cc 15 | dtx/dtx.cc 16 | dtx/dtx_exe_commit.cc 17 | dtx/dtx_local_meta.cc 18 | dtx/dtx_compare.cc 19 | dtx/dtx_compare_issue.cc 20 | dtx/dtx_compare_check.cc 21 | ) 22 | 23 | set(SCHEDULER_SRC 24 | scheduler/corotine_scheduler.cc 25 | ) 26 | 27 | add_library(ford STATIC 28 | ${CONNECTION_SRC} 29 | ${DTX_SRC} 30 | ${SCHEDULER_SRC} 31 | ) 32 | 33 | set_target_properties(ford PROPERTIES LINKER_LANGUAGE CXX) 34 | 35 | target_link_libraries(ford rlib pthread boost_coroutine boost_context boost_system) -------------------------------------------------------------------------------- /core/allocator/buffer_allocator.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "allocator/region_allocator.h" 7 | #include "base/common.h" 8 | 9 | // Alloc registered RDMA buffer for each thread 10 | class RDMABufferAllocator { 11 | public: 12 | RDMABufferAllocator(char* s, char* e) : start(s), end(e), cur_offset(0) {} 13 | 14 | ALWAYS_INLINE 15 | char* Alloc(size_t size) { 16 | // When the thread local region is exhausted, the region 17 | // can be re-used (i.e., overwritten) at the front offset, i.e., 0. This is almost always true, 18 | // because the local region is typically GB-scale, and hence the front 19 | // allocated buffer has already finished serving for RDMA requests and replies, or has already aborted. 20 | // As such, our Allocator is extremely fast due to simply moving the pointer. 21 | // If anyone relies on a more reliable allocator, you can just re-implement this Alloc interface 22 | // using other standard allocators, e.g., ptmalloc/jemalloc/tcmalloc. 23 | 24 | if (unlikely(start + cur_offset + size > end)) { 25 | cur_offset = 0; 26 | } 27 | char* ret = start + cur_offset; 28 | cur_offset += size; 29 | return ret; 30 | } 31 | 32 | ALWAYS_INLINE 33 | void Free(void* p) { 34 | // As the memory region can be safely reused, we do not need to 35 | // explicitly deallocate the previously allocated memory region buffer. 36 | } 37 | 38 | private: 39 | // Each thread has a local RDMA region to temporarily alloc a small buffer. 40 | // This local region has an address range: [start, end) 41 | char* start; 42 | char* end; 43 | uint64_t cur_offset; 44 | }; 45 | -------------------------------------------------------------------------------- /core/allocator/log_allocator.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "base/common.h" 7 | 8 | const offset_t LOG_BUFFER_SIZE = 1024 * 1024 * 1024; 9 | const node_id_t NUM_MEMORY_NODES = BACKUP_DEGREE + 1; 10 | 11 | // Remote offset to write log 12 | class LogOffsetAllocator { 13 | public: 14 | LogOffsetAllocator(t_id_t tid, t_id_t num_thread) { 15 | auto per_thread_remote_log_buffer_size = LOG_BUFFER_SIZE / num_thread; 16 | for (node_id_t i = 0; i < NUM_MEMORY_NODES; i++) { 17 | start_log_offsets[i] = tid * per_thread_remote_log_buffer_size; 18 | end_log_offsets[i] = (tid + 1) * per_thread_remote_log_buffer_size; 19 | current_log_offsets[i] = 0; 20 | } 21 | } 22 | 23 | offset_t GetNextLogOffset(node_id_t node_id, size_t log_entry_size) { 24 | if (unlikely(start_log_offsets[node_id] + current_log_offsets[node_id] + log_entry_size > end_log_offsets[node_id])) { 25 | current_log_offsets[node_id] = 0; 26 | } 27 | offset_t offset = start_log_offsets[node_id] + current_log_offsets[node_id]; 28 | current_log_offsets[node_id] += log_entry_size; 29 | return offset; 30 | } 31 | 32 | private: 33 | offset_t start_log_offsets[NUM_MEMORY_NODES]; 34 | offset_t end_log_offsets[NUM_MEMORY_NODES]; 35 | offset_t current_log_offsets[NUM_MEMORY_NODES]; 36 | }; -------------------------------------------------------------------------------- /core/allocator/region_allocator.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "connection/meta_manager.h" 7 | 8 | const uint64_t PER_THREAD_ALLOC_SIZE = (size_t)500 * 1024 * 1024; 9 | 10 | // This allocator is a global one which manages all the RDMA regions in this machine 11 | 12 | // | | <- t1 start 13 | // | | 14 | // | | 15 | // | | 16 | // | | <- t1 end. t2 start 17 | // | | 18 | // | | 19 | // | | 20 | // | | <- t2 end. t3 start 21 | 22 | class RDMARegionAllocator { 23 | public: 24 | RDMARegionAllocator(MetaManager* global_meta_man, t_id_t thread_num_per_machine) { 25 | size_t global_mr_size = (size_t)thread_num_per_machine * PER_THREAD_ALLOC_SIZE; 26 | // Register a buffer to the previous opened device. It's DRAM in compute pools 27 | global_mr = (char*)malloc(global_mr_size); 28 | thread_num = thread_num_per_machine; 29 | memset(global_mr, 0, global_mr_size); 30 | RDMA_ASSERT(global_meta_man->global_rdma_ctrl->register_memory(CLIENT_MR_ID, global_mr, global_mr_size, global_meta_man->opened_rnic)); 31 | } 32 | 33 | ~RDMARegionAllocator() { 34 | if (global_mr) free(global_mr); 35 | } 36 | 37 | ALWAYS_INLINE 38 | std::pair GetThreadLocalRegion(t_id_t tid) { 39 | assert(tid < thread_num); 40 | return std::make_pair(global_mr + tid * PER_THREAD_ALLOC_SIZE, global_mr + (tid + 1) * PER_THREAD_ALLOC_SIZE); 41 | } 42 | 43 | private: 44 | char* global_mr; // memory region 45 | t_id_t thread_num; 46 | size_t log_buf_size; 47 | }; 48 | -------------------------------------------------------------------------------- /core/base/common.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include // For size_t 7 | #include // For uintxx_t 8 | 9 | #include "flags.h" 10 | 11 | // Global specification 12 | using tx_id_t = uint64_t; // Transaction id type 13 | using t_id_t = uint32_t; // Thread id type 14 | using coro_id_t = int; // Coroutine id type 15 | using node_id_t = int; // Machine id type 16 | using mr_id_t = int; // Memory region id type 17 | using table_id_t = uint64_t; // Table id type 18 | using itemkey_t = uint64_t; // Data item key type, used in DB tables 19 | using offset_t = int64_t; // Offset type. Usually used in remote offset for RDMA 20 | using version_t = uint64_t; // Version type, used in version checking 21 | using lock_t = uint64_t; // Lock type, used in remote locking 22 | 23 | // Memory region ids for server's hash store buffer and undo log buffer 24 | const mr_id_t SERVER_HASH_BUFF_ID = 97; 25 | const mr_id_t SERVER_LOG_BUFF_ID = 98; 26 | 27 | // Memory region ids for client's local_mr 28 | const mr_id_t CLIENT_MR_ID = 100; 29 | 30 | // Indicating that memory store metas have been transmitted 31 | const uint64_t MEM_STORE_META_END = 0xE0FF0E0F; 32 | 33 | // Node and thread conf 34 | #define BACKUP_DEGREE 2 // Backup memory node number. MUST **NOT** BE SET TO 0 35 | #define MAX_REMOTE_NODE_NUM 100 // Max remote memory node number 36 | #define MAX_DB_TABLE_NUM 15 // Max DB tables 37 | 38 | // Data state 39 | #define STATE_INVISIBLE 0x8000000000000000 // Data cannot be read 40 | #define STATE_LOCKED 1 // Data cannot be written. Used for serializing transactions 41 | #define STATE_CLEAN 0 42 | 43 | // Alias 44 | #define Aligned8 __attribute__((aligned(8))) 45 | #define ALWAYS_INLINE inline __attribute__((always_inline)) 46 | #define TID (std::this_thread::get_id()) 47 | 48 | // Helpful for improving condition prediction hit rate 49 | #define unlikely(x) __builtin_expect(!!(x), 0) 50 | #define likely(x) __builtin_expect(!!(x), 1) 51 | -------------------------------------------------------------------------------- /core/cache/addr_cache.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include "base/common.h" 10 | 11 | const offset_t NOT_FOUND = -1; 12 | 13 | // For fast remote address lookup 14 | class AddrCache { 15 | public: 16 | void Insert(node_id_t remote_node_id, table_id_t table_id, itemkey_t key, offset_t remote_offset) { 17 | auto node_search = addr_map.find(remote_node_id); 18 | if (node_search == addr_map.end()) { 19 | // There is no such node. Init the node and table 20 | addr_map[remote_node_id] = std::unordered_map>(); 21 | addr_map[remote_node_id][table_id] = std::unordered_map(); 22 | } else if (node_search->second.find(table_id) == node_search->second.end()) { 23 | // The node exists, but the table does not exist. Init the table 24 | addr_map[remote_node_id][table_id] = std::unordered_map(); 25 | } 26 | 27 | // The node and table both exist, then insert/update the pair 28 | addr_map[remote_node_id][table_id][key] = remote_offset; 29 | } 30 | 31 | // We know which node to read, but we do not konw whether it is cached before 32 | offset_t Search(node_id_t remote_node_id, table_id_t table_id, itemkey_t key) { 33 | auto node_search = addr_map.find(remote_node_id); 34 | if (node_search == addr_map.end()) return NOT_FOUND; 35 | auto table_search = node_search->second.find(table_id); 36 | if (table_search == node_search->second.end()) return NOT_FOUND; 37 | auto offset_search = table_search->second.find(key); 38 | return offset_search == table_search->second.end() ? NOT_FOUND : offset_search->second; 39 | } 40 | 41 | // If we have read this record, we do not read it from another node 42 | void Search(table_id_t query_table_id, itemkey_t query_key, node_id_t& remote_node_id, offset_t& remote_offset) { 43 | // look up node first 44 | for (auto it = addr_map.begin(); it != addr_map.end(); it++) { 45 | auto table_search = it->second.find(query_table_id); 46 | if (table_search == it->second.end()) { 47 | continue; 48 | } 49 | 50 | auto offset_search = table_search->second.find(query_key); 51 | if (offset_search == table_search->second.end()) { 52 | // No such key. Change to hash read 53 | return; 54 | } 55 | 56 | // Tableid and key match. Get the cached remote node id and remote offset 57 | remote_node_id = it->first; 58 | remote_offset = offset_search->second; 59 | return; 60 | } 61 | } 62 | 63 | size_t TotalAddrSize() { 64 | size_t total_size = 0; 65 | for (auto it = addr_map.begin(); it != addr_map.end(); it++) { 66 | total_size += sizeof(node_id_t); 67 | for (auto it2 = it->second.begin(); it2 != it->second.end(); it2++) { 68 | total_size += sizeof(table_id_t); 69 | for (auto it3 = it2->second.begin(); it3 != it2->second.end(); it3++) { 70 | total_size += (sizeof(itemkey_t) + sizeof(offset_t)); 71 | } 72 | } 73 | } 74 | 75 | return total_size; 76 | } 77 | 78 | private: 79 | std::unordered_map>> addr_map; 80 | }; -------------------------------------------------------------------------------- /core/cache/lock_status.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include "flags.h" 10 | #include "dtx/structs.h" 11 | 12 | struct LockBkt { 13 | std::atomic key; 14 | std::atomic lock; 15 | }; 16 | 17 | // Unfortunately, we find that in a high concurrency environment, substantial CPU CAS operations 18 | // need to frequently retry to find empty slots due to the high collision rates in a small hash table. 19 | // This will in turn cause the latency of local locking to be an order of magnitude higher than the 20 | // remote locking, i.e., by using the microsecond-latency RDMA CAS. Due to this reason, we disable 21 | // the use of local cache in FORD. 22 | 23 | class LockCache { 24 | public: 25 | LockCache() { 26 | #if LOCAL_LOCK 27 | RDMA_LOG(INFO) << "Initializing local lock tables"; 28 | for (int i = 0; i < MAX_TABLE_NUM; i++) { 29 | total_slot = (size_t)(SLOT_PER_BKT * NUM_BKT); 30 | auto* table = new LockBkt[total_slot]; 31 | size_t sz = sizeof(LockBkt) * total_slot; 32 | memset(table, 0, sz); 33 | RDMA_LOG(INFO) << "Initializing table " << i << " " << sz / 1024 / 1024 << " MB"; 34 | status_table.push_back(table); 35 | } 36 | #endif 37 | } 38 | 39 | ~LockCache() { 40 | for (auto* table : status_table) { 41 | if (table) delete[] table; 42 | } 43 | } 44 | 45 | bool TryLock(std::vector& read_write_set) { 46 | for (auto& item : read_write_set) { 47 | itemkey_t my_key = item.item_ptr->key; 48 | table_id_t table_id = item.item_ptr->table_id; 49 | auto* table = status_table[table_id]; 50 | 51 | for (uint64_t bkt_id = GetBktId(my_key);; bkt_id++) { 52 | bkt_id = bkt_id % total_slot; 53 | 54 | uint64_t probed_key = table[bkt_id].key.load(std::memory_order_relaxed); 55 | 56 | if (probed_key == my_key) { 57 | lock_t expect_lock = 0; 58 | bool exchanged = table[bkt_id].lock.compare_exchange_strong(expect_lock, STATE_LOCKED); 59 | if (!exchanged) return false; // This key is locked by another coordinator 60 | // I successfully lock this key 61 | item.bkt_idx = (int64_t)bkt_id; 62 | break; 63 | } else { 64 | // Another key occupies 65 | if (probed_key != 0) continue; 66 | 67 | // An empty slot 68 | uint64_t expect_key = 0; 69 | bool exchanged = table[bkt_id].key.compare_exchange_strong(expect_key, my_key); 70 | if (exchanged) { 71 | // We cannot just use store here because another thread may cas succ in `if (probed_key == my_key)' above after 72 | // we fill the key. So we need to do cas instead of pure store. Only cas succ we can get the lock 73 | lock_t expect_lock = 0; 74 | bool exchanged = table[bkt_id].lock.compare_exchange_strong(expect_lock, STATE_LOCKED); 75 | if (!exchanged) return false; // This key is locked by another coordinator 76 | // I successfully lock this key 77 | item.bkt_idx = (int64_t)bkt_id; 78 | break; 79 | } else if (!exchanged && expect_key == my_key) { 80 | // Another thread locks the same key, I abort 81 | return false; 82 | } else if (!exchanged && expect_key != my_key) { 83 | // Another thread writes a different key, I keep probe 84 | continue; 85 | } 86 | } 87 | } 88 | } 89 | 90 | return true; 91 | } 92 | 93 | void Unlock(std::vector& read_write_set) { 94 | for (auto& item : read_write_set) { 95 | if (item.bkt_idx == -1) continue; 96 | table_id_t table_id = item.item_ptr->table_id; 97 | auto* table = status_table[table_id]; 98 | table[item.bkt_idx].lock.store(STATE_CLEAN, std::memory_order_relaxed); 99 | } 100 | } 101 | 102 | private: 103 | uint64_t GetBktId(itemkey_t k) { 104 | // return std_hash(k); 105 | k ^= k >> 33; 106 | k *= 0xff51afd7ed558ccd; 107 | k ^= k >> 33; 108 | k *= 0xc4ceb9fe1a85ec53; 109 | k ^= k >> 33; 110 | return k; 111 | } 112 | 113 | std::hash std_hash; 114 | std::vector status_table; 115 | size_t total_slot; 116 | int CONFLICT_COUNT = 0; 117 | }; -------------------------------------------------------------------------------- /core/connection/meta_manager.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include "base/common.h" 10 | #include "memstore/hash_store.h" 11 | #include "rlib/rdma_ctrl.hpp" 12 | 13 | using namespace rdmaio; 14 | 15 | // const size_t LOG_BUFFER_SIZE = 1024 * 1024 * 512; 16 | 17 | struct RemoteNode { 18 | node_id_t node_id; 19 | std::string ip; 20 | int port; 21 | }; 22 | 23 | class MetaManager { 24 | public: 25 | MetaManager(); 26 | 27 | node_id_t GetMemStoreMeta(std::string& remote_ip, int remote_port); 28 | 29 | void GetMRMeta(const RemoteNode& node); 30 | 31 | /*** Memory Store Metadata ***/ 32 | ALWAYS_INLINE 33 | const HashMeta& GetPrimaryHashMetaWithTableID(const table_id_t table_id) const { 34 | auto search = primary_hash_metas.find(table_id); 35 | assert(search != primary_hash_metas.end()); 36 | return search->second; 37 | } 38 | 39 | ALWAYS_INLINE 40 | const std::vector* GetBackupHashMetasWithTableID(const table_id_t table_id) const { 41 | // if (backup_hash_metas.empty()) { 42 | // return nullptr; 43 | // } 44 | // auto search = backup_hash_metas.find(table_id); 45 | // assert(search != backup_hash_metas.end()); 46 | // return &(search->second); 47 | return &(backup_hash_metas[table_id]); 48 | } 49 | 50 | /*** Node ID Metadata ***/ 51 | ALWAYS_INLINE 52 | node_id_t GetPrimaryNodeID(const table_id_t table_id) const { 53 | auto search = primary_table_nodes.find(table_id); 54 | assert(search != primary_table_nodes.end()); 55 | return search->second; 56 | } 57 | 58 | ALWAYS_INLINE 59 | const std::vector* GetBackupNodeID(const table_id_t table_id) { 60 | // if (backup_table_nodes.empty()) { 61 | // return nullptr; 62 | // } 63 | // auto search = backup_table_nodes.find(table_id); 64 | // assert(search != backup_table_nodes.end()); 65 | // return &(search->second); 66 | return &(backup_table_nodes[table_id]); 67 | } 68 | 69 | ALWAYS_INLINE 70 | const MemoryAttr& GetRemoteLogMR(const node_id_t node_id) const { 71 | auto mrsearch = remote_log_mrs.find(node_id); 72 | assert(mrsearch != remote_log_mrs.end()); 73 | return mrsearch->second; 74 | } 75 | 76 | /*** RDMA Memory Region Metadata ***/ 77 | ALWAYS_INLINE 78 | const MemoryAttr& GetRemoteHashMR(const node_id_t node_id) const { 79 | auto mrsearch = remote_hash_mrs.find(node_id); 80 | assert(mrsearch != remote_hash_mrs.end()); 81 | return mrsearch->second; 82 | } 83 | 84 | private: 85 | std::unordered_map primary_hash_metas; 86 | 87 | // std::unordered_map> backup_hash_metas; 88 | 89 | std::vector backup_hash_metas[MAX_DB_TABLE_NUM]; 90 | 91 | std::unordered_map primary_table_nodes; 92 | 93 | // std::unordered_map> backup_table_nodes; 94 | 95 | std::vector backup_table_nodes[MAX_DB_TABLE_NUM]; 96 | 97 | std::unordered_map remote_hash_mrs; 98 | 99 | std::unordered_map remote_log_mrs; 100 | 101 | node_id_t local_machine_id; 102 | 103 | public: 104 | // Used by QP manager and RDMA Region 105 | RdmaCtrlPtr global_rdma_ctrl; 106 | 107 | std::vector remote_nodes; 108 | 109 | RNicHandler* opened_rnic; 110 | 111 | // Below are some parameteres from json file 112 | int64_t txn_system; 113 | }; 114 | -------------------------------------------------------------------------------- /core/connection/qp_manager.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "connection/qp_manager.h" 5 | 6 | void QPManager::BuildQPConnection(MetaManager* meta_man) { 7 | for (const auto& remote_node : meta_man->remote_nodes) { 8 | // Note that each remote machine has one MemStore mr and one Log mr 9 | MemoryAttr remote_hash_mr = meta_man->GetRemoteHashMR(remote_node.node_id); 10 | MemoryAttr remote_log_mr = meta_man->GetRemoteLogMR(remote_node.node_id); 11 | 12 | // Build QPs with one remote machine (this machine can be a primary or a backup) 13 | // Create the thread local queue pair 14 | MemoryAttr local_mr = meta_man->global_rdma_ctrl->get_local_mr(CLIENT_MR_ID); 15 | RCQP* data_qp = meta_man->global_rdma_ctrl->create_rc_qp(create_rc_idx(remote_node.node_id, (int)global_tid * 2), 16 | meta_man->opened_rnic, 17 | &local_mr); 18 | 19 | RCQP* log_qp = meta_man->global_rdma_ctrl->create_rc_qp(create_rc_idx(remote_node.node_id, (int)global_tid * 2 + 1), 20 | meta_man->opened_rnic, 21 | &local_mr); 22 | 23 | // Queue pair connection, exchange queue pair info via TCP 24 | ConnStatus rc; 25 | do { 26 | rc = data_qp->connect(remote_node.ip, remote_node.port); 27 | if (rc == SUCC) { 28 | data_qp->bind_remote_mr(remote_hash_mr); // Bind the hash mr as the default remote mr for convenient parameter passing 29 | data_qps[remote_node.node_id] = data_qp; 30 | // RDMA_LOG(INFO) << "Thread " << global_tid << ": Data QP connected! with remote node: " << remote_node.node_id << " ip: " << remote_node.ip; 31 | } 32 | usleep(2000); 33 | } while (rc != SUCC); 34 | 35 | do { 36 | rc = log_qp->connect(remote_node.ip, remote_node.port); 37 | if (rc == SUCC) { 38 | log_qp->bind_remote_mr(remote_log_mr); // Bind the log mr as the default remote mr for convenient parameter passing 39 | log_qps[remote_node.node_id] = log_qp; 40 | // RDMA_LOG(INFO) << "Thread " << global_tid << ": Log QP connected! with remote node: " << remote_node.node_id << " ip: " << remote_node.ip; 41 | } 42 | usleep(2000); 43 | } while (rc != SUCC); 44 | } 45 | } -------------------------------------------------------------------------------- /core/connection/qp_manager.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "connection/meta_manager.h" 7 | 8 | // This QPManager builds qp connections (compute node <-> memory node) for each txn thread in each compute node 9 | class QPManager { 10 | public: 11 | QPManager(t_id_t global_tid) : global_tid(global_tid) {} 12 | 13 | void BuildQPConnection(MetaManager* meta_man); 14 | 15 | ALWAYS_INLINE 16 | RCQP* GetRemoteDataQPWithNodeID(const node_id_t node_id) const { 17 | return data_qps[node_id]; 18 | } 19 | 20 | ALWAYS_INLINE 21 | void GetRemoteDataQPsWithNodeIDs(const std::vector* node_ids, std::vector& qps) { 22 | for (node_id_t node_id : *node_ids) { 23 | RCQP* qp = data_qps[node_id]; 24 | if (qp) { 25 | qps.push_back(qp); 26 | } 27 | } 28 | } 29 | 30 | ALWAYS_INLINE 31 | RCQP* GetRemoteLogQPWithNodeID(const node_id_t node_id) const { 32 | return log_qps[node_id]; 33 | } 34 | 35 | private: 36 | RCQP* data_qps[MAX_REMOTE_NODE_NUM]{nullptr}; 37 | 38 | RCQP* log_qps[MAX_REMOTE_NODE_NUM]{nullptr}; 39 | 40 | t_id_t global_tid; 41 | }; 42 | -------------------------------------------------------------------------------- /core/dtx/doorbell.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "base/common.h" 7 | #include "rlib/rdma_ctrl.hpp" 8 | #include "scheduler/corotine_scheduler.h" 9 | 10 | using namespace rdmaio; 11 | 12 | // Two RDMA requests are sent to the QP in a doorbelled (or batched) way. 13 | // These requests are executed within one round trip 14 | // Target: improve performance 15 | 16 | class DoorbellBatch { 17 | public: 18 | DoorbellBatch() { 19 | // The key of doorbell: set the pointer to link two requests 20 | sr[0].num_sge = 1; 21 | sr[0].sg_list = &sge[0]; 22 | sr[0].send_flags = 0; 23 | sr[0].next = &sr[1]; 24 | sr[1].num_sge = 1; 25 | sr[1].sg_list = &sge[1]; 26 | sr[1].send_flags = IBV_SEND_SIGNALED; 27 | sr[1].next = NULL; 28 | } 29 | 30 | struct ibv_send_wr sr[2]; 31 | 32 | struct ibv_sge sge[2]; 33 | 34 | struct ibv_send_wr* bad_sr; 35 | }; 36 | 37 | class LockReadBatch : public DoorbellBatch { 38 | public: 39 | LockReadBatch() : DoorbellBatch() {} 40 | 41 | // SetLockReq and SetReadReq are a doorbelled group 42 | // First lock, then read 43 | void SetLockReq(char* local_addr, uint64_t remote_off, uint64_t compare, uint64_t swap); 44 | 45 | void SetReadReq(char* local_addr, uint64_t remote_off, size_t size); 46 | 47 | // Send doorbelled requests to the queue pair 48 | bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id); 49 | 50 | // Fill the parameters 51 | bool FillParams(RCQP* qp); 52 | }; 53 | 54 | class WriteUnlockBatch : public DoorbellBatch { 55 | public: 56 | WriteUnlockBatch() : DoorbellBatch() {} 57 | 58 | // SetWritePrimaryReq and SetUnLockReq are a doorbelled group 59 | // First write, then unlock 60 | void SetWritePrimaryReq(char* local_addr, uint64_t remote_off, size_t size); 61 | 62 | void SetUnLockReq(char* local_addr, uint64_t remote_off); 63 | 64 | void SetUnLockReq(char* local_addr, uint64_t remote_off, uint64_t compare, uint64_t swap); 65 | 66 | // Send doorbelled requests to the queue pair 67 | bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas); 68 | }; 69 | 70 | class InvisibleWriteBatch : public DoorbellBatch { 71 | public: 72 | InvisibleWriteBatch() : DoorbellBatch() {} 73 | 74 | // SetInvisibleReq and SetWriteRemoteReq are a doorbelled group 75 | // First lock, then write 76 | void SetInvisibleReq(char* local_addr, uint64_t remote_off, uint64_t compare, uint64_t swap); 77 | 78 | void SetInvisibleReq(char* local_addr, uint64_t remote_off); 79 | 80 | void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size); 81 | 82 | // Send doorbelled requests to the queue pair 83 | bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas); 84 | 85 | bool SendReqsSync(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas); 86 | }; 87 | 88 | class WriteFlushBatch : public DoorbellBatch { 89 | public: 90 | WriteFlushBatch() : DoorbellBatch() {} 91 | 92 | void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size); 93 | 94 | void SetReadRemoteReq(char* local_addr, uint64_t remote_off, size_t size); 95 | // Send doorbelled requests to the queue pair 96 | bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, MemoryAttr& remote_mr); 97 | }; 98 | 99 | class InvisibleWriteFlushBatch { 100 | public: 101 | InvisibleWriteFlushBatch() { 102 | // The key of doorbell: set the pointer to link two requests 103 | sr[0].num_sge = 1; 104 | sr[0].sg_list = &sge[0]; 105 | sr[0].send_flags = 0; 106 | sr[0].next = &sr[1]; 107 | 108 | sr[1].num_sge = 1; 109 | sr[1].sg_list = &sge[1]; 110 | sr[1].send_flags = 0; 111 | sr[1].next = &sr[2]; 112 | 113 | sr[2].num_sge = 1; 114 | sr[2].sg_list = &sge[2]; 115 | sr[2].send_flags = IBV_SEND_SIGNALED; 116 | sr[2].next = NULL; 117 | } 118 | 119 | void SetInvisibleReq(char* local_addr, uint64_t remote_off); 120 | 121 | void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size); 122 | 123 | void SetReadRemoteReq(char* local_addr, uint64_t remote_off, size_t size); 124 | 125 | // Send doorbelled requests to the queue pair 126 | bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas); 127 | 128 | private: 129 | struct ibv_send_wr sr[3]; 130 | 131 | struct ibv_sge sge[3]; 132 | 133 | struct ibv_send_wr* bad_sr; 134 | }; 135 | 136 | class ComparatorUpdateRemote { 137 | public: 138 | ComparatorUpdateRemote() { 139 | sr[0].num_sge = 1; 140 | sr[0].sg_list = &sge[0]; 141 | sr[0].send_flags = 0; 142 | sr[0].next = &sr[1]; 143 | 144 | sr[1].num_sge = 1; 145 | sr[1].sg_list = &sge[1]; 146 | sr[1].send_flags = 0; 147 | sr[1].next = &sr[2]; 148 | 149 | sr[2].num_sge = 1; 150 | sr[2].sg_list = &sge[2]; 151 | sr[2].send_flags = IBV_SEND_SIGNALED; 152 | sr[2].next = NULL; 153 | } 154 | 155 | void SetInvisibleReq(char* local_addr, uint64_t remote_off); 156 | 157 | void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size); 158 | 159 | void SetReleaseReq(char* local_addr, uint64_t remote_off); 160 | 161 | // Send doorbelled requests to the queue pair 162 | bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas); 163 | 164 | private: 165 | struct ibv_send_wr sr[3]; 166 | 167 | struct ibv_sge sge[3]; 168 | 169 | struct ibv_send_wr* bad_sr; 170 | }; -------------------------------------------------------------------------------- /core/dtx/dtx_check.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "dtx/dtx.h" 5 | #include "util/timer.h" 6 | 7 | bool DTX::CheckReadRO(std::vector& pending_direct_ro, 8 | std::vector& pending_hash_ro, 9 | std::list& pending_invisible_ro, 10 | std::list& pending_next_hash_ro, 11 | coro_yield_t& yield) { 12 | if (!CheckDirectRO(pending_direct_ro, pending_invisible_ro, pending_next_hash_ro)) return false; 13 | if (!CheckHashRO(pending_hash_ro, pending_invisible_ro, pending_next_hash_ro)) return false; 14 | 15 | // During results checking, we may re-read data due to invisibility and hash collisions 16 | while (!pending_invisible_ro.empty() || !pending_next_hash_ro.empty()) { 17 | coro_sched->Yield(yield, coro_id); 18 | if (!CheckInvisibleRO(pending_invisible_ro)) return false; 19 | if (!CheckNextHashRO(pending_invisible_ro, pending_next_hash_ro)) return false; 20 | } 21 | return true; 22 | } 23 | 24 | bool DTX::CheckReadRORW(std::vector& pending_direct_ro, 25 | std::vector& pending_hash_ro, 26 | std::vector& pending_hash_rw, 27 | std::vector& pending_insert_off_rw, 28 | std::vector& pending_cas_rw, 29 | std::list& pending_invisible_ro, 30 | std::list& pending_next_hash_ro, 31 | std::list& pending_next_hash_rw, 32 | std::list& pending_next_off_rw, 33 | coro_yield_t& yield) { 34 | // check read-only results 35 | if (!CheckDirectRO(pending_direct_ro, pending_invisible_ro, pending_next_hash_ro)) return false; 36 | if (!CheckHashRO(pending_hash_ro, pending_invisible_ro, pending_next_hash_ro)) return false; 37 | // The reason to use separate CheckHashRO and CheckHashRW: We need to compare txid with the fetched id in read-write txn 38 | // check read-write results 39 | if (!CheckCasRW(pending_cas_rw, pending_next_hash_rw, pending_next_off_rw)) return false; 40 | if (!CheckHashRW(pending_hash_rw, pending_invisible_ro, pending_next_hash_rw)) return false; 41 | if (!CheckInsertOffRW(pending_insert_off_rw, pending_invisible_ro, pending_next_off_rw)) return false; 42 | 43 | // During results checking, we may re-read data due to invisibility and hash collisions 44 | while (!pending_invisible_ro.empty() || !pending_next_hash_ro.empty() || !pending_next_hash_rw.empty() || !pending_next_off_rw.empty()) { 45 | coro_sched->Yield(yield, coro_id); 46 | 47 | // Recheck read-only replies 48 | if (!CheckInvisibleRO(pending_invisible_ro)) return false; 49 | if (!CheckNextHashRO(pending_invisible_ro, pending_next_hash_ro)) return false; 50 | 51 | // Recheck read-write replies 52 | if (!CheckNextHashRW(pending_invisible_ro, pending_next_hash_rw)) return false; 53 | if (!CheckNextOffRW(pending_invisible_ro, pending_next_off_rw)) return false; 54 | } 55 | return true; 56 | } 57 | 58 | bool DTX::CheckValidate(std::vector& pending_validate) { 59 | // Check version 60 | for (auto& re : pending_validate) { 61 | auto it = re.item->item_ptr; 62 | if (re.has_lock_in_validate) { 63 | #if LOCK_WAIT 64 | if (*((lock_t*)re.cas_buf) != STATE_CLEAN) { 65 | // Re-read the slot until it becomes unlocked 66 | // FOR TEST ONLY 67 | 68 | auto remote_data_addr = re.item->item_ptr->remote_offset; 69 | auto remote_lock_addr = re.item->item_ptr->GetRemoteLockAddr(remote_data_addr); 70 | auto remote_version_addr = re.item->item_ptr->GetRemoteVersionAddr(remote_data_addr); 71 | 72 | while (*((lock_t*)re.cas_buf) != STATE_CLEAN) { 73 | // timing 74 | Timer timer; 75 | timer.Start(); 76 | 77 | auto rc = re.qp->post_cas(re.cas_buf, remote_lock_addr, STATE_CLEAN, STATE_LOCKED, IBV_SEND_SIGNALED); 78 | if (rc != SUCC) { 79 | TLOG(ERROR, t_id) << "client: post cas fail. rc=" << rc; 80 | exit(-1); 81 | } 82 | 83 | ibv_wc wc{}; 84 | rc = re.qp->poll_till_completion(wc, no_timeout); 85 | if (rc != SUCC) { 86 | TLOG(ERROR, t_id) << "client: poll cas fail. rc=" << rc; 87 | exit(-1); 88 | } 89 | 90 | timer.Stop(); 91 | lock_durations.emplace_back(timer.Duration_us()); 92 | } 93 | 94 | auto rc = re.qp->post_send(IBV_WR_RDMA_READ, re.version_buf, sizeof(version_t), remote_version_addr, IBV_SEND_SIGNALED); 95 | 96 | if (rc != SUCC) { 97 | TLOG(ERROR, t_id) << "client: post read fail. rc=" << rc; 98 | exit(-1); 99 | } 100 | // Note: Now the coordinator gets the lock. It can read the data 101 | 102 | ibv_wc wc{}; 103 | rc = re.qp->poll_till_completion(wc, no_timeout); 104 | if (rc != SUCC) { 105 | TLOG(ERROR, t_id) << "client: poll read fail. rc=" << rc; 106 | exit(-1); 107 | } 108 | } 109 | #else 110 | if (*((lock_t*)re.cas_buf) != STATE_CLEAN) { 111 | // it->Debug(); 112 | // RDMA_LOG(DBG) << "remote lock not clean " << std::hex << *((lock_t*)re.cas_buf); 113 | return false; 114 | } 115 | #endif 116 | version_t my_version = it->version; 117 | if (it->user_insert) { 118 | // If it is an insertion, we need to compare the the fetched version with 119 | // the old version, instead of the new version stored in item 120 | for (auto& old_version : old_version_for_insert) { 121 | if (old_version.table_id == it->table_id && old_version.key == it->key) { 122 | my_version = old_version.version; 123 | break; 124 | } 125 | } 126 | } 127 | // Compare version 128 | if (my_version != *((version_t*)re.version_buf)) { 129 | // it->Debug(); 130 | // RDMA_LOG(DBG) << "MY VERSION " << it->version; 131 | // RDMA_LOG(DBG) << "version_buf " << *((version_t*)re.version_buf); 132 | return false; 133 | } 134 | } else { 135 | // Compare version 136 | if (it->version != *((version_t*)re.version_buf)) { 137 | // it->Debug(); 138 | // RDMA_LOG(DBG) << "MY VERSION " << it->version; 139 | // RDMA_LOG(DBG) << "version_buf " << *((version_t*)re.version_buf); 140 | return false; 141 | } 142 | } 143 | } 144 | return true; 145 | } 146 | 147 | bool DTX::CheckCommitAll(std::vector& pending_commit_write, char* cas_buf) { 148 | // Release: set visible and unlock remote data 149 | for (auto& re : pending_commit_write) { 150 | auto* qp = thread_qp_man->GetRemoteDataQPWithNodeID(re.node_id); 151 | qp->post_send(IBV_WR_RDMA_WRITE, cas_buf, sizeof(lock_t), re.lock_off, 0); // Release 152 | } 153 | return true; 154 | } -------------------------------------------------------------------------------- /core/dtx/dtx_compare.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "dtx/dtx.h" 5 | 6 | bool DTX::CompareExeRO(coro_yield_t& yield) { 7 | std::vector pending_direct_ro; 8 | std::vector pending_hash_ro; 9 | 10 | // Issue reads 11 | if (!CompareIssueReadRO(pending_direct_ro, pending_hash_ro)) return false; 12 | 13 | // Yield to other coroutines when waiting for network replies 14 | coro_sched->Yield(yield, coro_id); 15 | 16 | // Receive data 17 | std::list pending_next_hash_ro; 18 | std::list pending_invisible_ro; 19 | auto res = CheckReadRO(pending_direct_ro, pending_hash_ro, pending_invisible_ro, pending_next_hash_ro, yield); 20 | return res; 21 | } 22 | 23 | bool DTX::CompareExeRW(coro_yield_t& yield) { 24 | std::vector pending_direct_ro; 25 | std::vector pending_direct_rw; 26 | 27 | std::vector pending_hash_ro; 28 | std::vector pending_hash_rw; 29 | 30 | std::list pending_next_hash_ro; 31 | std::list pending_next_hash_rw; 32 | 33 | std::vector pending_insert_off_rw; 34 | std::list pending_next_off_rw; 35 | 36 | std::list pending_invisible_ro; 37 | 38 | if (!CompareIssueReadRO(pending_direct_ro, pending_hash_ro)) return false; 39 | if (!CompareIssueReadRW(pending_direct_rw, pending_hash_rw, pending_insert_off_rw)) return false; 40 | 41 | // Yield to other coroutines when waiting for network replies 42 | coro_sched->Yield(yield, coro_id); 43 | 44 | auto res = CompareCheckReadRORW(pending_direct_ro, 45 | pending_direct_rw, 46 | pending_hash_ro, 47 | pending_hash_rw, 48 | pending_next_hash_ro, 49 | pending_next_hash_rw, 50 | pending_insert_off_rw, 51 | pending_next_off_rw, 52 | pending_invisible_ro, 53 | yield); 54 | 55 | if (global_meta_man->txn_system == DTX_SYS::LOCAL) { 56 | ParallelUndoLog(); 57 | } 58 | return res; 59 | } 60 | 61 | bool DTX::CompareLocking(coro_yield_t& yield) { 62 | std::vector pending_lock; 63 | if (!CompareIssueLocking(pending_lock)) return false; 64 | 65 | coro_sched->Yield(yield, coro_id); 66 | 67 | auto res = CompareCheckLocking(pending_lock); 68 | return res; 69 | } 70 | 71 | bool DTX::CompareValidation(coro_yield_t& yield) { 72 | std::vector pending_version_read; 73 | if (!CompareIssueValidation(pending_version_read)) return false; 74 | 75 | coro_sched->Yield(yield, coro_id); 76 | 77 | auto res = CompareCheckValidation(pending_version_read); 78 | return res; 79 | } 80 | 81 | bool DTX::CompareLockingValidation(coro_yield_t& yield) { 82 | // This is the same with our validation scheme, i.e., lock+read write set, read read set 83 | std::vector pending_validate; 84 | if (!CompareIssueLockValidation(pending_validate)) return false; 85 | 86 | coro_sched->Yield(yield, coro_id); 87 | 88 | auto res = CheckValidate(pending_validate); 89 | return res; 90 | } 91 | 92 | bool DTX::CompareCommitBackup(coro_yield_t& yield) { 93 | tx_status = TXStatus::TX_COMMIT; 94 | 95 | #if RFLUSH == 0 96 | if (!CompareIssueCommitBackup()) return false; 97 | #elif RFLUSH == 1 98 | if (!CompareIssueCommitBackupFullFlush()) return false; 99 | #elif RFLUSH == 2 100 | if (!CompareIssueCommitBackupSelectiveFlush()) return false; 101 | #endif 102 | 103 | coro_sched->Yield(yield, coro_id); 104 | 105 | return true; 106 | } 107 | 108 | bool DTX::CompareCommitPrimary(coro_yield_t& yield) { 109 | if (!CompareIssueCommitPrimary()) { 110 | return false; 111 | } 112 | coro_sched->Yield(yield, coro_id); 113 | return true; 114 | } 115 | 116 | bool DTX::CompareTruncateAsync(coro_yield_t& yield) { 117 | // Truncate: Update backup's data region in an async manner 118 | if (!CompareIssueTruncate()) { 119 | return false; 120 | } 121 | // No yield, not waiting for ack 122 | return true; 123 | } -------------------------------------------------------------------------------- /core/dtx/dtx_exe_commit.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "dtx/dtx.h" 5 | 6 | bool DTX::TxExe(coro_yield_t& yield, bool fail_abort) { 7 | // Start executing transaction 8 | tx_status = TXStatus::TX_EXE; 9 | if (read_write_set.empty() && read_only_set.empty()) { 10 | return true; 11 | } 12 | 13 | if (global_meta_man->txn_system == DTX_SYS::FORD) { 14 | // Run our system 15 | if (read_write_set.empty()) { 16 | if (ExeRO(yield)) 17 | return true; 18 | else { 19 | goto ABORT; 20 | } 21 | } else { 22 | if (ExeRW(yield)) 23 | return true; 24 | else { 25 | goto ABORT; 26 | } 27 | } 28 | } else if (global_meta_man->txn_system == DTX_SYS::FaRM || global_meta_man->txn_system == DTX_SYS::DrTMH || global_meta_man->txn_system == DTX_SYS::LOCAL) { 29 | if (read_write_set.empty()) { 30 | if (CompareExeRO(yield)) 31 | return true; 32 | else 33 | goto ABORT; 34 | } else { 35 | if (CompareExeRW(yield)) 36 | return true; 37 | else 38 | goto ABORT; 39 | } 40 | } else { 41 | RDMA_LOG(FATAL) << "NOT SUPPORT SYSTEM ID: " << global_meta_man->txn_system; 42 | } 43 | 44 | return true; 45 | 46 | ABORT: 47 | if (fail_abort) Abort(); 48 | return false; 49 | } 50 | 51 | bool DTX::TxCommit(coro_yield_t& yield) { 52 | // Only read one item 53 | if (read_write_set.empty() && read_only_set.size() == 1) { 54 | return true; 55 | } 56 | 57 | bool commit_stat; 58 | 59 | /*! 60 | FORD's commit protocol 61 | */ 62 | 63 | if (global_meta_man->txn_system == DTX_SYS::FORD) { 64 | if (!Validate(yield)) { 65 | goto ABORT; 66 | } 67 | 68 | // Next step. If read-write txns, we need to commit the updates to remote replicas 69 | if (!read_write_set.empty()) { 70 | // Write back for read-write tx 71 | #if COMMIT_TOGETHER 72 | commit_stat = CoalescentCommit(yield); 73 | if (commit_stat) { 74 | return true; 75 | } else { 76 | goto ABORT; 77 | } 78 | #else 79 | commit_stat = CompareCommitBackup(yield); 80 | if (!commit_stat) { 81 | goto ABORT; 82 | } 83 | commit_stat = CompareCommitPrimary(yield); 84 | if (!commit_stat) { 85 | goto ABORT; 86 | } 87 | commit_stat = CompareTruncateAsync(yield); 88 | if (commit_stat) { 89 | return true; 90 | } else { 91 | goto ABORT; 92 | } 93 | #endif 94 | } 95 | } 96 | 97 | if (global_meta_man->txn_system == DTX_SYS::LOCAL) { 98 | if (!read_write_set.empty()) { 99 | // For read-write txn 100 | if (!LocalLock()) return false; 101 | if (!LocalValidate()) return false; 102 | commit_stat = CoalescentCommit(yield); 103 | if (commit_stat) { 104 | return true; 105 | } else { 106 | abort(); 107 | } 108 | LocalUnlock(); 109 | } else { 110 | // For read-only txn 111 | if (!LocalValidate()) return false; 112 | } 113 | } 114 | 115 | /*! 116 | DrTM+H's commit protocol 117 | */ 118 | 119 | if (global_meta_man->txn_system == DTX_SYS::DrTMH) { 120 | // Lock and Validation are batched 121 | if (!CompareLockingValidation(yield)) { 122 | goto ABORT; 123 | } 124 | 125 | // Seperately commit backup and primary 126 | if (!read_write_set.empty()) { 127 | commit_stat = CompareCommitBackup(yield); 128 | if (!commit_stat) { 129 | goto ABORT; 130 | } 131 | commit_stat = CompareCommitPrimary(yield); 132 | if (!commit_stat) { 133 | goto ABORT; 134 | } 135 | commit_stat = CompareTruncateAsync(yield); 136 | if (commit_stat) { 137 | return true; 138 | } else { 139 | goto ABORT; 140 | } 141 | } 142 | } 143 | 144 | /*! 145 | FaRM's commit protocol 146 | */ 147 | 148 | if (global_meta_man->txn_system == DTX_SYS::FaRM) { 149 | if (!CompareLocking(yield)) { 150 | goto ABORT; 151 | } 152 | if (!CompareValidation(yield)) { 153 | goto ABORT; 154 | } 155 | 156 | // Seperately commit backup and primary 157 | if (!read_write_set.empty()) { 158 | commit_stat = CompareCommitBackup(yield); 159 | if (!commit_stat) { 160 | goto ABORT; 161 | } 162 | commit_stat = CompareCommitPrimary(yield); 163 | if (!commit_stat) { 164 | goto ABORT; 165 | } 166 | commit_stat = CompareTruncateAsync(yield); 167 | if (commit_stat) { 168 | return true; 169 | } else { 170 | goto ABORT; 171 | } 172 | } 173 | } 174 | 175 | return true; 176 | ABORT: 177 | Abort(); 178 | return false; 179 | } 180 | -------------------------------------------------------------------------------- /core/dtx/dtx_local_meta.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "dtx/dtx.h" 5 | 6 | bool DTX::LocalLock() { 7 | auto res = global_lcache->TryLock(read_write_set); 8 | if (!res) { 9 | global_lcache->Unlock(read_write_set); 10 | return false; 11 | } 12 | return true; 13 | } 14 | 15 | void DTX::LocalUnlock() { 16 | global_lcache->Unlock(read_write_set); 17 | } 18 | 19 | bool DTX::LocalValidate() { 20 | auto res = global_vcache->CheckVersion(read_only_set, tx_id); 21 | if (res == VersionStatus::VERSION_CHANGED) { 22 | global_lcache->Unlock(read_write_set); 23 | return false; 24 | } 25 | return true; 26 | } -------------------------------------------------------------------------------- /core/dtx/structs.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "memstore/hash_store.h" 7 | #include "rlib/rdma_ctrl.hpp" 8 | 9 | enum DTX_SYS : int { 10 | FaRM = 0, 11 | DrTMH = 1, 12 | FORD = 2, 13 | LOCAL = 3 // FORD with localized metadata including locks and versions 14 | }; 15 | 16 | enum TXStatus : int { 17 | TX_INIT = 0, // Transaction initialization 18 | TX_EXE, // Transaction execution, read only 19 | TX_LOCK, // Transaction execution, read+lock 20 | TX_VAL, // Transaction validate 21 | TX_COMMIT, // Commit primary and backups 22 | TX_ABORT // Aborted transaction 23 | }; 24 | 25 | enum ValStatus : int { 26 | RDMA_ERROR = -1, // Validation network error 27 | NO_NEED_VAL = 0, // Do not need validation, i.e., the coroutine does not need to yield CPU 28 | NEED_VAL = 1, // Need validation, i.e., the coroutine needs to yield CPU 29 | MUST_ABORT = 2 // The data version must be changed and hence no validation is needed 30 | }; 31 | 32 | // Following are stuctures for maintaining coroutine's state, similar to context switch 33 | 34 | struct DataSetItem { 35 | DataItemPtr item_ptr; 36 | bool is_fetched; 37 | bool is_logged; 38 | node_id_t read_which_node; // From which node this data item is read. This is a node id, e.g., 0, 1, 2... 39 | int64_t bkt_idx; // The bkt idx of local lock table 40 | }; 41 | 42 | struct OldVersionForInsert { 43 | table_id_t table_id; 44 | itemkey_t key; 45 | version_t version; 46 | }; 47 | 48 | struct LockAddr { 49 | node_id_t node_id; 50 | uint64_t lock_addr; 51 | }; 52 | 53 | // For coroutines 54 | struct DirectRead { 55 | RCQP* qp; 56 | DataSetItem* item; 57 | char* buf; 58 | node_id_t remote_node; 59 | }; 60 | 61 | struct HashRead { 62 | RCQP* qp; 63 | DataSetItem* item; 64 | char* buf; 65 | node_id_t remote_node; 66 | const HashMeta meta; 67 | }; 68 | 69 | struct InvisibleRead { 70 | RCQP* qp; 71 | char* buf; 72 | uint64_t off; 73 | }; 74 | 75 | struct CasRead { 76 | RCQP* qp; 77 | DataSetItem* item; 78 | char* cas_buf; 79 | char* data_buf; 80 | node_id_t primary_node_id; 81 | }; 82 | 83 | struct InsertOffRead { 84 | RCQP* qp; 85 | DataSetItem* item; 86 | char* buf; 87 | node_id_t remote_node; 88 | const HashMeta meta; 89 | offset_t node_off; 90 | }; 91 | 92 | struct ValidateRead { 93 | RCQP* qp; 94 | DataSetItem* item; 95 | char* cas_buf; 96 | char* version_buf; 97 | bool has_lock_in_validate; 98 | }; 99 | 100 | struct Lock { 101 | RCQP* qp; 102 | DataSetItem* item; 103 | char* cas_buf; 104 | uint64_t lock_off; 105 | }; 106 | 107 | struct Unlock { 108 | char* cas_buf; 109 | }; 110 | 111 | struct Version { 112 | DataSetItem* item; 113 | char* version_buf; 114 | }; 115 | 116 | struct CommitWrite { 117 | node_id_t node_id; 118 | uint64_t lock_off; 119 | }; 120 | -------------------------------------------------------------------------------- /core/flags.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | /*********************** For common **********************/ 7 | // Max data item size. 8 | // 8: smallbank 9 | // 40: tatp 10 | // 664: tpcc 11 | // 40: micro-benchmark 12 | 13 | const size_t MAX_ITEM_SIZE = 664; 14 | 15 | /*********************** For FORD **********************/ 16 | // 0: Read rw data without lock 17 | // 1: Read+lock rw data 18 | #define READ_LOCK 1 19 | 20 | // 0: Seperately commit remote replicas 21 | // 1: Coalescently commit remote replicas 22 | #define COMMIT_TOGETHER 1 23 | 24 | // 0: Disable reading read-only data from backups 25 | // 1: Enable reading read-only data from backups 26 | #define READ_BACKUP 0 27 | 28 | // 0: No remote persistency guarantee 29 | // 1: Full flush 30 | // 2: Selective flush 31 | #define RFLUSH 2 32 | 33 | // 0: Wait if invisible 34 | // 1: Abort if invisible 35 | #define INV_ABORT 1 36 | 37 | /*********************** For Localized opt **********************/ 38 | // Below are only for FORD with coalescent commit 39 | // 0: Disable local lock 40 | // 1: Enable locl lock 41 | #define LOCAL_LOCK 0 42 | 43 | // 0: Remote validation for RO set 44 | // 1: Cache versions in local 45 | #define LOCAL_VALIDATION 0 46 | 47 | // Hash table parameters for localized validation 48 | // For tatp 49 | // 5 50 | // 4 51 | // 10000000 52 | 53 | // For smallbank 54 | // 2 55 | // 1 56 | // 100000 57 | 58 | // For tpcc 59 | // 11 60 | // 72 61 | // 100000 62 | 63 | #define MAX_TABLE_NUM 11 64 | #define SLOT_PER_BKT 72 65 | #define NUM_BKT 100000 66 | 67 | /*********************** For counterparts **********************/ 68 | // 0: Do not cache addrs in local. Default for FaRM 69 | // 1: Cache addrs in local. Default for DrTM+h, Optmized for FaRM 70 | #define USE_LOCAL_ADDR_CACHE 0 71 | 72 | // 1: Locks block reads 73 | // 0: Use FORD's machenism, i.e., visibility control to enable read locked data but not invisible data 74 | // This is an **opposite** scheme compared with our visibility control, i.e., open this will close visibility, and close this will open visibility 75 | #define LOCK_REFUSE_READ_RO 0 76 | #define LOCK_REFUSE_READ_RW 0 77 | 78 | /*********************** For micro-benchmarks **********************/ 79 | // 0: Does not wait lock, just abort (For end-to-end tests) 80 | // 1: wait lock until resuming execution (For lock duration tests, remember set coroutine num as 2) 81 | #define LOCK_WAIT 0 82 | 83 | // 0: Does not busily wait the data to be visible, e.g., yield to another coroutine to execute the next tx (For end-to-end tests) 84 | // 1: Busily wait the data to be visible (For visibility tests, remember set coroutine num as 2) 85 | #define INV_BUSY_WAIT 0 86 | -------------------------------------------------------------------------------- /core/memstore/data_item.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "base/common.h" 13 | #include "util/debug.h" 14 | 15 | struct DataItem { 16 | table_id_t table_id; 17 | size_t value_size; // The length of uint8* value 18 | itemkey_t key; 19 | // remote_offset records this item's offset in the remote memory region 20 | // it's helpful for addressing each filed in DataItem 21 | offset_t remote_offset; 22 | version_t version; 23 | lock_t lock; 24 | uint8_t value[MAX_ITEM_SIZE]; 25 | uint8_t valid; // 1: Not deleted, 0: Deleted 26 | uint8_t user_insert; // 1: User insert operation, 0: Not user insert operation 27 | 28 | DataItem() {} 29 | // Build an empty item for fetching data from remote 30 | DataItem(table_id_t t, itemkey_t k) 31 | : table_id(t), value_size(0), key(k), remote_offset(0), version(0), lock(0), valid(1), user_insert(0) {} 32 | 33 | // For user insert item 34 | DataItem(table_id_t t, size_t s, itemkey_t k, version_t v, uint8_t ins) 35 | : table_id(t), value_size(s), key(k), remote_offset(0), version(v), lock(0), valid(1), user_insert(ins) {} 36 | 37 | // For server load data 38 | DataItem(table_id_t t, size_t s, itemkey_t k, uint8_t* d) : table_id(t), value_size(s), key(k), remote_offset(0), version(0), lock(0), valid(1), user_insert(0) { 39 | memcpy(value, d, s); 40 | } 41 | 42 | ALWAYS_INLINE 43 | size_t GetSerializeSize() const { 44 | return sizeof(*this); 45 | } 46 | 47 | ALWAYS_INLINE 48 | void Serialize(char* undo_buffer) { 49 | memcpy(undo_buffer, (char*)this, sizeof(*this)); 50 | } 51 | 52 | ALWAYS_INLINE 53 | uint64_t GetRemoteLockAddr() { 54 | return remote_offset + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset) + sizeof(version); 55 | } 56 | 57 | ALWAYS_INLINE 58 | uint64_t GetRemoteLockAddr(offset_t remote_item_off) { 59 | return remote_item_off + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset) + sizeof(version); 60 | } 61 | 62 | ALWAYS_INLINE 63 | uint64_t GetRemoteVersionAddr() { 64 | return remote_offset + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset); 65 | } 66 | 67 | ALWAYS_INLINE 68 | uint64_t GetRemoteVersionAddr(offset_t remote_item_off) { 69 | return remote_item_off + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset); 70 | } 71 | 72 | ALWAYS_INLINE 73 | void Debug(t_id_t tid) const { 74 | // For debug usage 75 | TLOG(INFO, tid) << "[Item debug] table id: " << this->table_id << ", value size: " << this->value_size 76 | << ", key: " << this->key 77 | << ", remote offset: " << this->remote_offset << ", version: " << this->version 78 | << ", lock: " 79 | << (int)this->lock << ", valid: " << (int)this->valid << ", user insert: " 80 | << (int)this->user_insert << std::endl; 81 | // TLOG(INFO, tid) << "Contents: 0x"; 82 | // int i = 0; 83 | // for (; i < MAX_ITEM_SIZE - 1; i++) { 84 | // TLOG(INFO, tid) << (int) value[i] << " "; 85 | // } 86 | // TLOG(INFO, tid) << (int) value[i] << " END\n\n"; 87 | } 88 | ALWAYS_INLINE 89 | void Debug() const { 90 | // For debug usage 91 | RDMA_LOG(DBG) << "[Item debug] table id: " << this->table_id << ", value size: " << this->value_size 92 | << ", key: " << this->key 93 | << ", remote offset: " << this->remote_offset << ", version: " << this->version 94 | << ", lock: " << std::hex << "0x" 95 | << this->lock << ", valid: " << std::dec << (int)this->valid << ", user insert: " 96 | << (int)this->user_insert; 97 | } 98 | } Aligned8; // Size: 560B in X86 arch. 99 | 100 | const size_t DataItemSize = sizeof(DataItem); 101 | 102 | const size_t RFlushReadSize = 1; // The size of RDMA read, that is after write to emulate rdma flush 103 | 104 | using DataItemPtr = std::shared_ptr; -------------------------------------------------------------------------------- /core/memstore/hash_store.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang, Lurong Liu 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include "memstore/data_item.h" 9 | #include "memstore/mem_store.h" 10 | #include "util/hash.h" 11 | 12 | #define OFFSET_NOT_FOUND -1 13 | #define OFFSET_FOUND 0 14 | #define VERSION_TOO_OLD -2 // The new version < old version 15 | 16 | #define SLOT_NOT_FOUND -1 17 | #define SLOT_INV -2 18 | #define SLOT_LOCKED -3 19 | #define SLOT_FOUND 0 20 | 21 | const int ITEM_NUM_PER_NODE = 22; 22 | 23 | struct HashMeta { 24 | // To which table this hash store belongs 25 | table_id_t table_id; 26 | 27 | // Virtual address of the table, used to calculate the distance 28 | // between some HashNodes with the table for traversing 29 | // the linked list 30 | uint64_t data_ptr; 31 | 32 | // Offset of the table, relative to the RDMA local_mr 33 | offset_t base_off; 34 | 35 | // Total hash buckets 36 | uint64_t bucket_num; 37 | 38 | // Size of hash node 39 | size_t node_size; 40 | 41 | HashMeta(table_id_t table_id, 42 | uint64_t data_ptr, 43 | uint64_t bucket_num, 44 | size_t node_size, 45 | offset_t base_off) : table_id(table_id), 46 | data_ptr(data_ptr), 47 | base_off(base_off), 48 | bucket_num(bucket_num), 49 | node_size(node_size) {} 50 | HashMeta() {} 51 | } Aligned8; 52 | 53 | // A hashnode is a bucket 54 | struct HashNode { 55 | // A dataitem is a slot 56 | DataItem data_items[ITEM_NUM_PER_NODE]; 57 | HashNode* next; 58 | } Aligned8; 59 | 60 | class HashStore { 61 | public: 62 | HashStore(table_id_t table_id, uint64_t bucket_num, MemStoreAllocParam* param) 63 | : table_id(table_id), base_off(0), bucket_num(bucket_num), data_ptr(nullptr), node_num(0) { 64 | assert(bucket_num > 0); 65 | table_size = (bucket_num) * sizeof(HashNode); 66 | region_start_ptr = param->mem_region_start; 67 | assert((uint64_t)param->mem_store_start + param->mem_store_alloc_offset + table_size <= (uint64_t)param->mem_store_reserve); 68 | data_ptr = param->mem_store_start + param->mem_store_alloc_offset; 69 | param->mem_store_alloc_offset += table_size; 70 | 71 | base_off = (uint64_t)data_ptr - (uint64_t)region_start_ptr; 72 | assert(base_off >= 0); 73 | 74 | RDMA_LOG(INFO) << "Table " << table_id << " size: " << table_size / 1024 / 1024 75 | << " MB. Start address: " << std::hex << "0x" << (uint64_t)data_ptr 76 | << ", base_off: 0x" << base_off << ", bucket_size: " << std::dec << ITEM_NUM_PER_NODE * DataItemSize << " B"; 77 | assert(data_ptr != nullptr); 78 | memset(data_ptr, 0, table_size); 79 | } 80 | 81 | table_id_t GetTableID() const { 82 | return table_id; 83 | } 84 | 85 | offset_t GetBaseOff() const { 86 | return base_off; 87 | } 88 | 89 | uint64_t GetHashNodeSize() const { 90 | return sizeof(HashNode); 91 | } 92 | 93 | uint64_t GetBucketNum() const { 94 | return bucket_num; 95 | } 96 | 97 | char* GetDataPtr() const { 98 | return data_ptr; 99 | } 100 | 101 | offset_t GetItemRemoteOffset(const void* item_ptr) const { 102 | return (uint64_t)item_ptr - (uint64_t)region_start_ptr; 103 | } 104 | 105 | uint64_t TableSize() const { 106 | return table_size; 107 | } 108 | 109 | uint64_t GetHash(itemkey_t key) { 110 | return MurmurHash64A(key, 0xdeadbeef) % bucket_num; 111 | } 112 | 113 | DataItem* LocalGet(itemkey_t key); 114 | 115 | DataItem* LocalInsert(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param); 116 | 117 | DataItem* LocalPut(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param); 118 | 119 | bool LocalDelete(itemkey_t key); 120 | 121 | private: 122 | // To which table this hash store belongs 123 | table_id_t table_id; 124 | 125 | // The offset in the RDMA region 126 | offset_t base_off; 127 | 128 | // Total hash buckets 129 | uint64_t bucket_num; 130 | 131 | // The point to value in the table 132 | char* data_ptr; 133 | 134 | // Total hash node nums 135 | uint64_t node_num; 136 | 137 | // The size of the entire hash table 138 | size_t table_size; 139 | 140 | // Start of the memory region address, for installing remote offset for data item 141 | char* region_start_ptr; 142 | }; 143 | 144 | ALWAYS_INLINE 145 | DataItem* HashStore::LocalGet(itemkey_t key) { 146 | uint64_t hash = GetHash(key); 147 | auto* node = (HashNode*)(hash * sizeof(HashNode) + data_ptr); 148 | while (node) { 149 | for (auto& data_item : node->data_items) { 150 | if (data_item.valid && data_item.key == key) { 151 | return &data_item; 152 | } 153 | } 154 | node = node->next; 155 | } 156 | return nullptr; // failed to found one 157 | } 158 | 159 | ALWAYS_INLINE 160 | DataItem* HashStore::LocalInsert(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param) { 161 | uint64_t hash = GetHash(key); 162 | auto* node = (HashNode*)(hash * sizeof(HashNode) + data_ptr); 163 | 164 | // Find 165 | while (node) { 166 | for (auto& item : node->data_items) { 167 | if (!item.valid) { 168 | item = data_item; 169 | item.valid = 1; 170 | return &item; 171 | } 172 | } 173 | if (!node->next) break; 174 | node = node->next; 175 | } 176 | 177 | // Allocate 178 | RDMA_LOG(INFO) << "Table " << table_id << " alloc a new bucket for key: " << key << ". Current slotnum/bucket: " << ITEM_NUM_PER_NODE; 179 | assert((uint64_t)param->mem_store_reserve + param->mem_store_reserve_offset <= (uint64_t)param->mem_store_end); 180 | auto* new_node = (HashNode*)(param->mem_store_reserve + param->mem_store_reserve_offset); 181 | param->mem_store_reserve_offset += sizeof(HashNode); 182 | memset(new_node, 0, sizeof(HashNode)); 183 | new_node->data_items[0] = data_item; 184 | new_node->data_items[0].valid = 1; 185 | new_node->next = nullptr; 186 | node->next = new_node; 187 | node_num++; 188 | return &(new_node->data_items[0]); 189 | } 190 | 191 | ALWAYS_INLINE 192 | DataItem* HashStore::LocalPut(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param) { 193 | DataItem* res; 194 | if ((res = LocalGet(key)) != nullptr) { 195 | // KV pair has already exist, then update 196 | *res = data_item; 197 | return res; 198 | } 199 | // Insert 200 | return LocalInsert(key, data_item, param); 201 | } 202 | 203 | ALWAYS_INLINE 204 | bool HashStore::LocalDelete(itemkey_t key) { 205 | uint64_t hash = GetHash(key); 206 | auto* node = (HashNode*)(hash * sizeof(HashNode) + data_ptr); 207 | for (auto& data_item : node->data_items) { 208 | if (data_item.valid && data_item.key == key) { 209 | data_item.valid = 0; 210 | return true; 211 | } 212 | } 213 | node = node->next; 214 | while (node) { 215 | for (auto& data_item : node->data_items) { 216 | if (data_item.valid && data_item.key == key) { 217 | data_item.valid = 0; 218 | return true; 219 | } 220 | } 221 | node = node->next; 222 | } 223 | return false; // Failed to find one to be deleted 224 | } 225 | -------------------------------------------------------------------------------- /core/memstore/mem_store.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include "base/common.h" 9 | 10 | enum class MemStoreType { 11 | kHash = 0, 12 | kBPlusTree, 13 | }; 14 | 15 | struct MemStoreAllocParam { 16 | // The start of the registered memory region for storing memory stores 17 | char* mem_region_start; 18 | 19 | // The start of the whole memory store space (e.g., Hash Store Space) 20 | char* mem_store_start; 21 | 22 | // The start offset of each memory store instance 23 | offset_t mem_store_alloc_offset; 24 | 25 | // The start address of the whole reserved space (e.g., for insert in hash conflict). Here for overflow check 26 | char* mem_store_reserve; 27 | 28 | MemStoreAllocParam(char* region_start, char* store_start, offset_t start_off, char* reserve_start) 29 | : mem_region_start(region_start), 30 | mem_store_start(store_start), 31 | mem_store_alloc_offset(start_off), 32 | mem_store_reserve(reserve_start) {} 33 | }; 34 | 35 | struct MemStoreReserveParam { 36 | // The start address of the whole reserved space (e.g., for insert in hash conflict). 37 | char* mem_store_reserve; 38 | 39 | // For allocation in case of memory store (e.g., HashStore) conflict 40 | offset_t mem_store_reserve_offset; 41 | 42 | // The end address of the memory store space. Here for overflow check 43 | char* mem_store_end; 44 | 45 | MemStoreReserveParam(char* reserve_start, offset_t reserve_off, char* end) 46 | : mem_store_reserve(reserve_start), mem_store_reserve_offset(reserve_off), mem_store_end(end) {} 47 | }; -------------------------------------------------------------------------------- /core/scheduler/corotine_scheduler.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "scheduler/corotine_scheduler.h" 5 | 6 | #include 7 | 8 | #include "util/debug.h" 9 | 10 | void CoroutineScheduler::PollRegularCompletion() { 11 | for (auto it = pending_qps.begin(); it != pending_qps.end();) { 12 | RCQP* qp = *it; 13 | struct ibv_wc wc; 14 | auto poll_result = qp->poll_send_completion(wc); // The qp polls its own wc 15 | if (poll_result == 0) { 16 | it++; 17 | continue; 18 | } 19 | if (unlikely(wc.status != IBV_WC_SUCCESS)) { 20 | RDMA_LOG(EMPH) << "Bad completion status: " << wc.status << " with error " << ibv_wc_status_str(wc.status) << ";@ node " << qp->idx_.node_id; 21 | if (wc.status != IBV_WC_RETRY_EXC_ERR) { 22 | RDMA_LOG(EMPH) << "completion status != IBV_WC_RETRY_EXC_ERR. abort()"; 23 | abort(); 24 | } else { 25 | it++; 26 | continue; 27 | } 28 | } 29 | auto coro_id = wc.wr_id; 30 | if (coro_id == 0) continue; 31 | assert(pending_counts[coro_id] > 0); 32 | pending_counts[coro_id] -= 1; 33 | if (pending_counts[coro_id] == 0) { 34 | AppendCoroutine(&coro_array[coro_id]); 35 | } 36 | it = pending_qps.erase(it); 37 | } 38 | } 39 | 40 | void CoroutineScheduler::PollLogCompletion() { 41 | for (auto it = pending_log_qps.begin(); it != pending_log_qps.end();) { 42 | RCQP* qp = *it; 43 | struct ibv_wc wc; 44 | auto poll_result = qp->poll_send_completion(wc); 45 | if (poll_result == 0) { 46 | it++; 47 | continue; 48 | } 49 | if (unlikely(wc.status != IBV_WC_SUCCESS)) { 50 | RDMA_LOG(EMPH) << "Bad completion status: " << wc.status << " with error " << ibv_wc_status_str(wc.status) << ";@ node " << qp->idx_.node_id; 51 | if (wc.status != IBV_WC_RETRY_EXC_ERR) { 52 | RDMA_LOG(EMPH) << "completion status != IBV_WC_RETRY_EXC_ERR. abort()"; 53 | abort(); 54 | } else { 55 | it++; 56 | continue; 57 | } 58 | } 59 | auto coro_id = wc.wr_id; 60 | if (coro_id == 0) continue; 61 | assert(pending_log_counts[coro_id] > 0); 62 | pending_log_counts[coro_id] -= 1; 63 | it = pending_log_qps.erase(it); 64 | } 65 | } 66 | 67 | void CoroutineScheduler::PollCompletion() { 68 | PollRegularCompletion(); 69 | PollLogCompletion(); 70 | } 71 | 72 | bool CoroutineScheduler::CheckLogAck(coro_id_t c_id) { 73 | if (pending_log_counts[c_id] == 0) { 74 | return true; 75 | } 76 | PollLogCompletion(); 77 | return pending_log_counts[c_id] == 0; 78 | } -------------------------------------------------------------------------------- /core/scheduler/coroutine.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | // Use symmetric_coroutine from boost::coroutine, not asymmetric_coroutine from boost::coroutine2 7 | // symmetric_coroutine meets transaction processing, in which each coroutine can freely yield to another 8 | #define BOOST_COROUTINES_NO_DEPRECATION_WARNING 9 | 10 | #include 11 | 12 | #include "base/common.h" 13 | 14 | using coro_call_t = boost::coroutines::symmetric_coroutine::call_type; 15 | 16 | using coro_yield_t = boost::coroutines::symmetric_coroutine::yield_type; 17 | 18 | // For coroutine scheduling 19 | struct Coroutine { 20 | Coroutine() : is_wait_poll(false) {} 21 | 22 | // Wether I am waiting for polling network replies. If true, I leave the yield-able coroutine list 23 | bool is_wait_poll; 24 | 25 | // My coroutine ID 26 | coro_id_t coro_id; 27 | 28 | // Registered coroutine function 29 | coro_call_t func; 30 | 31 | // Use pointer to accelerate yield. Otherwise, one needs a while loop 32 | // to yield the next coroutine that does not wait for network replies 33 | Coroutine* prev_coro; 34 | 35 | Coroutine* next_coro; 36 | }; -------------------------------------------------------------------------------- /core/util/debug.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "base/common.h" 15 | #include "rlib/logging.hpp" 16 | 17 | using namespace rdmaio; 18 | 19 | #define ASSERT(condition) \ 20 | if (unlikely(!(condition))) \ 21 | ::rdmaio::MessageLogger((char*)__FILE__, __LINE__, ::rdmaio::FATAL + 1).stream() << "Assertion! " 22 | 23 | #define TLOG(n, tid) \ 24 | if (n >= RDMA_LOG_LEVEL) \ 25 | LogicalThreadLogger((char*)__FILE__, __LINE__, n, tid).stream() 26 | 27 | // Use the logical thread ID 28 | class LogicalThreadLogger { 29 | public: 30 | LogicalThreadLogger(const char* file, int line, int level, t_id_t tid) : level_(level), tid_(tid) { 31 | if (level_ < RDMA_LOG_LEVEL) 32 | return; 33 | stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] "; 34 | } 35 | 36 | ~LogicalThreadLogger() { 37 | if (level_ >= RDMA_LOG_LEVEL) { 38 | std::ofstream fout; 39 | std::string log_file_name = "./" + std::to_string(tid_) + "_log.txt"; 40 | fout.open(log_file_name, std::ios::app); 41 | fout << stream_.str() << std::endl; 42 | fout.close(); 43 | if (level_ >= ::rdmaio::FATAL) 44 | abort(); 45 | } 46 | } 47 | 48 | // Return the stream associated with the logger object. 49 | std::stringstream& stream() { return stream_; } 50 | 51 | private: 52 | std::stringstream stream_; 53 | int level_; 54 | t_id_t tid_; 55 | 56 | static std::string StripBasename(const std::string& full_path) { 57 | const char kSeparator = '/'; 58 | size_t pos = full_path.rfind(kSeparator); 59 | if (pos != std::string::npos) { 60 | return full_path.substr(pos + 1, std::string::npos); 61 | } else { 62 | return full_path; 63 | } 64 | } 65 | }; 66 | 67 | // Use the physical thread ID 68 | class PhysicalThreadLogger { 69 | public: 70 | PhysicalThreadLogger(const char* file, int line, int level, std::thread::id tid) : level_(level), tid_(tid) { 71 | if (level_ < RDMA_LOG_LEVEL) 72 | return; 73 | stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] "; 74 | } 75 | 76 | ~PhysicalThreadLogger() { 77 | if (level_ >= RDMA_LOG_LEVEL) { 78 | std::ofstream fout; 79 | 80 | std::ostringstream oss; 81 | oss << tid_; 82 | std::string stid = oss.str(); 83 | std::string log_file_name = "./" + stid + "_log.txt"; 84 | fout.open(log_file_name, std::ios::app); 85 | fout << stream_.str() << std::endl; 86 | fout.close(); 87 | if (level_ >= ::rdmaio::FATAL) 88 | abort(); 89 | } 90 | } 91 | 92 | // Return the stream associated with the logger object. 93 | std::stringstream& stream() { return stream_; } 94 | 95 | private: 96 | std::stringstream stream_; 97 | int level_; 98 | std::thread::id tid_; 99 | 100 | static std::string StripBasename(const std::string& full_path) { 101 | const char kSeparator = '/'; 102 | size_t pos = full_path.rfind(kSeparator); 103 | if (pos != std::string::npos) { 104 | return full_path.substr(pos + 1, std::string::npos); 105 | } else { 106 | return full_path; 107 | } 108 | } 109 | }; 110 | 111 | // https://panthema.net/2008/0901-stacktrace-demangled/ 112 | static void PrintStackTrace(FILE* out = stderr, unsigned int max_frames = 63) { 113 | fprintf(out, "stack trace:\n"); 114 | 115 | // storage array for stack trace address data 116 | void* addrlist[max_frames + 1]; 117 | 118 | // retrieve current stack addresses 119 | int addrlen = backtrace(addrlist, sizeof(addrlist) / sizeof(void*)); 120 | 121 | if (addrlen == 0) { 122 | fprintf(out, " \n"); 123 | return; 124 | } 125 | 126 | // resolve addresses into strings containing "filename(function+address)", 127 | // this array must be free()-ed 128 | char** symbollist = backtrace_symbols(addrlist, addrlen); 129 | 130 | // allocate string which will be filled with the demangled function name 131 | size_t funcnamesize = 256; 132 | char* funcname = (char*)malloc(funcnamesize); 133 | 134 | // iterate over the returned symbol lines. skip the first, it is the 135 | // address of this function. 136 | for (int i = 1; i < addrlen; i++) { 137 | char *begin_name = 0, *begin_offset = 0, *end_offset = 0; 138 | 139 | // find parentheses and +address offset surrounding the mangled name: 140 | // ./module(function+0x15c) [0x8048a6d] 141 | for (char* p = symbollist[i]; *p; ++p) { 142 | if (*p == '(') 143 | begin_name = p; 144 | else if (*p == '+') 145 | begin_offset = p; 146 | else if (*p == ')' && begin_offset) { 147 | end_offset = p; 148 | break; 149 | } 150 | } 151 | 152 | if (begin_name && begin_offset && end_offset && begin_name < begin_offset) { 153 | *begin_name++ = '\0'; 154 | *begin_offset++ = '\0'; 155 | *end_offset = '\0'; 156 | 157 | // mangled name is now in [begin_name, begin_offset) and caller 158 | // offset in [begin_offset, end_offset). now apply 159 | // __cxa_demangle(): 160 | 161 | int status; 162 | char* ret = abi::__cxa_demangle(begin_name, 163 | funcname, &funcnamesize, &status); 164 | if (status == 0) { 165 | funcname = ret; // use possibly realloc()-ed string 166 | fprintf(out, " %s : %s+%s\n", 167 | symbollist[i], funcname, begin_offset); 168 | } else { 169 | // demangling failed. Output function name as a C function with 170 | // no arguments. 171 | fprintf(out, " %s : %s()+%s\n", 172 | symbollist[i], begin_name, begin_offset); 173 | } 174 | } else { 175 | // couldn't parse the line? print the whole line. 176 | fprintf(out, " %s\n", symbollist[i]); 177 | } 178 | } 179 | 180 | free(funcname); 181 | free(symbollist); 182 | } 183 | -------------------------------------------------------------------------------- /core/util/fast_random.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Adapted from mica 3 | // Copyright (c) 2022 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "base/common.h" 10 | 11 | class Rand { 12 | public: 13 | explicit Rand() : state_(0) {} 14 | explicit Rand(uint64_t seed) : state_(seed) { assert(seed < (1UL << 48)); } 15 | Rand(const Rand& o) : state_(o.state_) {} 16 | Rand& operator=(const Rand& o) { 17 | state_ = o.state_; 18 | return *this; 19 | } 20 | 21 | uint32_t next_u32() { 22 | // same as Java's 23 | state_ = (state_ * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1); 24 | return (uint32_t)(state_ >> (48 - 32)); 25 | } 26 | 27 | double next_f64() { 28 | // caution: this is maybe too non-random 29 | state_ = (state_ * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1); 30 | return (double)state_ / (double)((1UL << 48) - 1); 31 | } 32 | 33 | private: 34 | uint64_t state_; 35 | }; 36 | 37 | // Generate random number for workload testing 38 | static ALWAYS_INLINE 39 | uint32_t FastRand(uint64_t* seed) { 40 | *seed = *seed * 1103515245 + 12345; 41 | return (uint32_t)(*seed >> 32); 42 | } 43 | 44 | // not thread-safe 45 | // 46 | // taken from java: 47 | // http://developer.classpath.org/doc/java/util/Random-source.html 48 | class FastRandom { 49 | public: 50 | FastRandom(unsigned long sed) 51 | : seed(0) { 52 | SetSeed0(sed); 53 | } 54 | 55 | FastRandom() : seed(0) { 56 | SetSeed0(seed); 57 | } 58 | 59 | inline unsigned long 60 | Next() { 61 | return ((unsigned long)Next(32) << 32) + Next(32); 62 | } 63 | 64 | inline uint32_t 65 | NextU32() { 66 | return Next(32); 67 | } 68 | 69 | inline uint16_t 70 | NextU16() { 71 | return Next(16); 72 | } 73 | 74 | /** [0.0, 1.0) */ 75 | inline double 76 | NextUniform() { 77 | return (((unsigned long)Next(26) << 27) + Next(27)) / (double)(1L << 53); 78 | } 79 | 80 | inline char 81 | NextChar() { 82 | return Next(8) % 256; 83 | } 84 | 85 | inline std::string 86 | NextString(size_t len) { 87 | std::string s(len, 0); 88 | for (size_t i = 0; i < len; i++) 89 | s[i] = NextChar(); 90 | return s; 91 | } 92 | 93 | inline unsigned long 94 | GetSeed() { 95 | return seed; 96 | } 97 | 98 | inline void 99 | SetSeed(unsigned long sed) { 100 | this->seed = sed; 101 | } 102 | 103 | inline void 104 | SetSeed0(unsigned long sed) { 105 | this->seed = (sed ^ 0x5DEECE66DL) & ((1L << 48) - 1); 106 | } 107 | 108 | inline uint64_t RandNumber(int min, int max) { 109 | return CheckBetweenInclusive((uint64_t)(NextUniform() * (max - min + 1) + min), min, max); 110 | } 111 | 112 | inline uint64_t CheckBetweenInclusive(uint64_t v, uint64_t min, uint64_t max) { 113 | assert(v >= min); 114 | assert(v <= max); 115 | return v; 116 | } 117 | 118 | private: 119 | inline unsigned long 120 | Next(unsigned int bits) { 121 | seed = (seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1); 122 | return (unsigned long)(seed >> (48 - bits)); 123 | } 124 | 125 | unsigned long seed; 126 | }; 127 | -------------------------------------------------------------------------------- /core/util/hash.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "base/common.h" 7 | 8 | // 64-bit hash for 64-bit platforms 9 | static ALWAYS_INLINE 10 | uint64_t MurmurHash64A(uint64_t key, unsigned int seed) { 11 | const uint64_t m = 0xc6a4a7935bd1e995; 12 | const int r = 47; 13 | uint64_t h = seed ^ (8 * m); 14 | const uint64_t* data = &key; 15 | const uint64_t* end = data + 1; 16 | 17 | while (data != end) { 18 | uint64_t k = *data++; 19 | k *= m; 20 | k ^= k >> r; 21 | k *= m; 22 | h ^= k; 23 | h *= m; 24 | } 25 | 26 | // const unsigned char* data2 = (const unsigned char*)data; 27 | 28 | // switch (8 & 7) { 29 | // case 7: 30 | // h ^= uint64_t(data2[6]) << 48; 31 | // case 6: 32 | // h ^= uint64_t(data2[5]) << 40; 33 | // case 5: 34 | // h ^= uint64_t(data2[4]) << 32; 35 | // case 4: 36 | // h ^= uint64_t(data2[3]) << 24; 37 | // case 3: 38 | // h ^= uint64_t(data2[2]) << 16; 39 | // case 2: 40 | // h ^= uint64_t(data2[1]) << 8; 41 | // case 1: 42 | // h ^= uint64_t(data2[0]); 43 | // h *= m; 44 | // }; 45 | 46 | h ^= h >> r; 47 | h *= m; 48 | h ^= h >> r; 49 | 50 | return h; 51 | } 52 | 53 | static ALWAYS_INLINE 54 | uint64_t MurmurHash64ALen(const char* key, uint32_t len, uint64_t seed) { 55 | const uint64_t m = 0xc6a4a7935bd1e995; 56 | const int r = 47; 57 | 58 | uint64_t h = seed ^ (len * m); 59 | 60 | const uint64_t* data = (const uint64_t*)key; 61 | const uint64_t* end = data + (len / 8); 62 | 63 | while (data != end) { 64 | uint64_t k = *data++; 65 | 66 | k *= m; 67 | k ^= k >> r; 68 | k *= m; 69 | 70 | h ^= k; 71 | h *= m; 72 | } 73 | 74 | const unsigned char* data2 = (const unsigned char*)data; 75 | 76 | switch (len & 7) { 77 | case 7: 78 | h ^= (uint64_t)((uint64_t)data2[6] << (uint64_t)48); 79 | case 6: 80 | h ^= (uint64_t)((uint64_t)data2[5] << (uint64_t)40); 81 | case 5: 82 | h ^= (uint64_t)((uint64_t)data2[4] << (uint64_t)32); 83 | case 4: 84 | h ^= (uint64_t)((uint64_t)data2[3] << (uint64_t)24); 85 | case 3: 86 | h ^= (uint64_t)((uint64_t)data2[2] << (uint64_t)16); 87 | case 2: 88 | h ^= (uint64_t)((uint64_t)data2[1] << (uint64_t)8); 89 | case 1: 90 | h ^= (uint64_t)((uint64_t)data2[0]); 91 | h *= m; 92 | }; 93 | 94 | h ^= h >> r; 95 | h *= m; 96 | h ^= h >> r; 97 | 98 | return h; 99 | } 100 | -------------------------------------------------------------------------------- /core/util/latency.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Adapted from mica 3 | // Copyright (c) 2022 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | // Test ibv_poll_cq 11 | static inline unsigned long GetCPUCycle() { 12 | unsigned a, d; 13 | __asm __volatile("rdtsc" 14 | : "=a"(a), "=d"(d)); 15 | return ((unsigned long)a) | (((unsigned long)d) << 32); 16 | } 17 | 18 | class Latency { 19 | public: 20 | Latency() { reset(); } 21 | 22 | void reset() { memset(reinterpret_cast(this), 0, sizeof(Latency)); } 23 | 24 | void update(uint64_t us) { 25 | if (us < 128) 26 | bin0_[us]++; 27 | else if (us < 384) 28 | bin1_[(us - 128) / 2]++; 29 | else if (us < 896) 30 | bin2_[(us - 384) / 4]++; 31 | else if (us < 1920) 32 | bin3_[(us - 896) / 8]++; 33 | else if (us < 3968) 34 | bin4_[(us - 1920) / 16]++; 35 | else 36 | bin5_++; 37 | } 38 | 39 | Latency& operator+=(const Latency& o) { 40 | uint64_t i; 41 | for (i = 0; i < 128; i++) bin0_[i] += o.bin0_[i]; 42 | for (i = 0; i < 128; i++) bin1_[i] += o.bin1_[i]; 43 | for (i = 0; i < 128; i++) bin2_[i] += o.bin2_[i]; 44 | for (i = 0; i < 128; i++) bin3_[i] += o.bin3_[i]; 45 | for (i = 0; i < 128; i++) bin4_[i] += o.bin4_[i]; 46 | bin5_ += o.bin5_; 47 | return *this; 48 | } 49 | 50 | uint64_t count() const { 51 | uint64_t count = 0; 52 | uint64_t i; 53 | for (i = 0; i < 128; i++) count += bin0_[i]; 54 | for (i = 0; i < 128; i++) count += bin1_[i]; 55 | for (i = 0; i < 128; i++) count += bin2_[i]; 56 | for (i = 0; i < 128; i++) count += bin3_[i]; 57 | for (i = 0; i < 128; i++) count += bin4_[i]; 58 | count += bin5_; 59 | return count; 60 | } 61 | 62 | uint64_t sum() const { 63 | uint64_t sum = 0; 64 | uint64_t i; 65 | for (i = 0; i < 128; i++) sum += bin0_[i] * (0 + i * 1); 66 | for (i = 0; i < 128; i++) sum += bin1_[i] * (128 + i * 2); 67 | for (i = 0; i < 128; i++) sum += bin2_[i] * (384 + i * 4); 68 | for (i = 0; i < 128; i++) sum += bin3_[i] * (896 + i * 8); 69 | for (i = 0; i < 128; i++) sum += bin4_[i] * (1920 + i * 16); 70 | sum += bin5_ * 3968; 71 | return sum; 72 | } 73 | 74 | uint64_t avg() const { return sum() / std::max(uint64_t(1), count()); } 75 | 76 | uint64_t min() const { 77 | uint64_t i; 78 | for (i = 0; i < 128; i++) 79 | if (bin0_[i] != 0) return 0 + i * 1; 80 | for (i = 0; i < 128; i++) 81 | if (bin1_[i] != 0) return 128 + i * 2; 82 | for (i = 0; i < 128; i++) 83 | if (bin2_[i] != 0) return 384 + i * 4; 84 | for (i = 0; i < 128; i++) 85 | if (bin3_[i] != 0) return 896 + i * 8; 86 | for (i = 0; i < 128; i++) 87 | if (bin4_[i] != 0) return 1920 + i * 16; 88 | // if (bin5_ != 0) return 3968; 89 | return 3968; 90 | } 91 | 92 | uint64_t max() const { 93 | int64_t i; 94 | if (bin5_ != 0) return 3968; 95 | for (i = 127; i >= 0; i--) 96 | if (bin4_[i] != 0) return 1920 + static_cast(i) * 16; 97 | for (i = 127; i >= 0; i--) 98 | if (bin3_[i] != 0) return 896 + static_cast(i) * 8; 99 | for (i = 127; i >= 0; i--) 100 | if (bin2_[i] != 0) return 384 + static_cast(i) * 4; 101 | for (i = 127; i >= 0; i--) 102 | if (bin1_[i] != 0) return 128 + static_cast(i) * 2; 103 | for (i = 127; i >= 0; i--) 104 | if (bin0_[i] != 0) return 0 + static_cast(i) * 1; 105 | return 0; 106 | } 107 | 108 | // Return the (p * 100) percentile latency 109 | uint64_t perc(double p) const { 110 | assert(p >= 0.0 && p <= 1.00); 111 | 112 | uint64_t i; 113 | int64_t thres = static_cast(p * static_cast(count())); 114 | for (i = 0; i < 128; i++) 115 | if ((thres -= static_cast(bin0_[i])) < 0) return 0 + i * 1; 116 | for (i = 0; i < 128; i++) 117 | if ((thres -= static_cast(bin1_[i])) < 0) return 128 + i * 2; 118 | for (i = 0; i < 128; i++) 119 | if ((thres -= static_cast(bin2_[i])) < 0) return 384 + i * 4; 120 | for (i = 0; i < 128; i++) 121 | if ((thres -= static_cast(bin3_[i])) < 0) return 896 + i * 8; 122 | for (i = 0; i < 128; i++) 123 | if ((thres -= static_cast(bin4_[i])) < 0) return 1920 + i * 16; 124 | return 3968; 125 | } 126 | 127 | void print(FILE* fp) const { 128 | uint64_t i; 129 | for (i = 0; i < 128; i++) 130 | if (bin0_[i] != 0) 131 | fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 0 + i * 1, bin0_[i]); 132 | for (i = 0; i < 128; i++) 133 | if (bin1_[i] != 0) 134 | fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 128 + i * 2, bin1_[i]); 135 | for (i = 0; i < 128; i++) 136 | if (bin2_[i] != 0) 137 | fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 384 + i * 4, bin2_[i]); 138 | for (i = 0; i < 128; i++) 139 | if (bin3_[i] != 0) 140 | fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 896 + i * 8, bin3_[i]); 141 | for (i = 0; i < 128; i++) 142 | if (bin4_[i] != 0) 143 | fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 1920 + i * 16, bin4_[i]); 144 | if (bin5_ != 0) fprintf(fp, "%4d %6" PRIu64 "\n", 3968, bin5_); 145 | } 146 | 147 | private: 148 | // [0, 128) us 149 | uint64_t bin0_[128]; 150 | // [128, 384) us 151 | uint64_t bin1_[128]; 152 | // [384, 896) us 153 | uint64_t bin2_[128]; 154 | // [896, 1920) us 155 | uint64_t bin3_[128]; 156 | // [1920, 3968) us 157 | uint64_t bin4_[128]; 158 | // [3968, inf) us 159 | uint64_t bin5_; 160 | }; -------------------------------------------------------------------------------- /core/util/seqlock.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include "util/spinlock.h" 7 | 8 | // Sequence lock 9 | class SeqLock { 10 | public: 11 | SeqLock() { 12 | spin_lock = new SpinLock(); 13 | } 14 | 15 | void BeginWrite() { 16 | spin_lock->Lock(); 17 | } 18 | 19 | void EndWrite() { 20 | spin_lock->Unlock(); 21 | } 22 | 23 | void BeginRead() { 24 | // Wait the writer 25 | while (IsWriting()) 26 | ; 27 | } 28 | 29 | void EndRead() { 30 | // Read again if a writer locks 31 | // if (IsWriting()) BeginRead(); 32 | } 33 | 34 | private: 35 | SpinLock* spin_lock; 36 | bool IsWriting() { 37 | return spin_lock->Counter() % 2 == 1; 38 | } 39 | }; -------------------------------------------------------------------------------- /core/util/spinlock.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | class SpinLock { 9 | public: 10 | SpinLock() { 11 | counter.store(0, std::memory_order_release); 12 | } 13 | 14 | void Lock() { 15 | int locked = 1; 16 | int unlocked = 0; 17 | 18 | // Wait for unlock 19 | while (counter.compare_exchange_strong(locked, unlocked, std::memory_order_acq_rel)) 20 | ; 21 | } 22 | 23 | void Unlock() { 24 | int unlocked = 0; 25 | counter.exchange(unlocked, std::memory_order_acq_rel); 26 | } 27 | 28 | int Counter() { 29 | return counter.load(std::memory_order_acquire); 30 | } 31 | 32 | private: 33 | std::atomic_int counter; 34 | }; -------------------------------------------------------------------------------- /core/util/thread_pool.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "base/common.h" 17 | 18 | class ThreadPool { 19 | public: 20 | ThreadPool(size_t); 21 | template 22 | auto Enqueue(F&& f, Args&&... args) -> std::future; 23 | ~ThreadPool(); 24 | 25 | private: 26 | std::vector workers; 27 | std::queue> tasks; 28 | std::mutex queue_mutex; 29 | std::condition_variable condition; 30 | bool stop; 31 | }; 32 | 33 | ALWAYS_INLINE 34 | ThreadPool::ThreadPool(size_t threads) : stop(false) { 35 | for (size_t i = 0; i < threads; ++i) { 36 | workers.emplace_back([this] { 37 | for (;;) { 38 | std::function task; 39 | { 40 | std::unique_lock lock(this->queue_mutex); 41 | this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); }); 42 | if (this->stop && this->tasks.empty()) return; 43 | task = std::move(this->tasks.front()); 44 | this->tasks.pop(); 45 | } 46 | task(); // Execute the enqueued task 47 | } 48 | }); 49 | } 50 | } 51 | 52 | // Add a task to the thread pool 53 | template 54 | ALWAYS_INLINE 55 | auto ThreadPool::Enqueue(F&& f, Args&&... args) -> std::future { 56 | auto task = std::make_shared>( 57 | std::bind(std::forward(f), std::forward(args)...)); 58 | { 59 | std::unique_lock lock(queue_mutex); 60 | if (stop) throw std::runtime_error("Enqueue on stopped ThreadPool"); 61 | tasks.emplace([task]() { (*task)(); }); 62 | } 63 | condition.notify_one(); 64 | return task->get_future(); // Return the results of the task 65 | } 66 | 67 | ALWAYS_INLINE 68 | ThreadPool::~ThreadPool() { 69 | { 70 | std::unique_lock lock(queue_mutex); 71 | stop = true; 72 | } 73 | condition.notify_all(); 74 | for (std::thread& worker : workers) { 75 | if (worker.joinable()) { 76 | worker.join(); 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /core/util/timer.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | using namespace std::chrono; 9 | 10 | // Records one event's duration 11 | class Timer { 12 | public: 13 | Timer() {} 14 | void Start() { start = high_resolution_clock::now(); } 15 | void Stop() { end = high_resolution_clock::now(); } 16 | 17 | double Duration_s() { 18 | return duration_cast>(end - start).count(); 19 | } 20 | 21 | uint64_t Duration_ns() { 22 | return duration_cast(end - start).count(); 23 | } 24 | 25 | uint64_t Duration_us() { 26 | return duration_cast(end - start).count(); 27 | } 28 | 29 | uint64_t Duration_ms() { 30 | return duration_cast(end - start).count(); 31 | } 32 | 33 | private: 34 | high_resolution_clock::time_point start; 35 | high_resolution_clock::time_point end; 36 | }; 37 | -------------------------------------------------------------------------------- /core/util/zipf.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Adapted from mica 3 | // Copyright (c) 2022 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "util/fast_random.h" 12 | 13 | class ZipfGen { 14 | public: 15 | ZipfGen(uint64_t n, double theta, uint64_t rand_seed) { 16 | assert(n > 0); 17 | if (theta > 0.992 && theta < 1) 18 | fprintf(stderr, "warning: theta > 0.992 will be inaccurate due to approximation\n"); 19 | if (theta >= 1. && theta < 40.) { 20 | fprintf(stderr, "error: theta in [1., 40.) is not supported\n"); 21 | assert(false); 22 | theta_ = 0; // unused 23 | alpha_ = 0; // unused 24 | thres_ = 0; // unused 25 | return; 26 | } 27 | assert(theta == -1. || (theta >= 0. && theta < 1.) || theta >= 40.); 28 | n_ = n; 29 | theta_ = theta; 30 | if (theta == -1.) { 31 | seq_ = rand_seed % n; 32 | alpha_ = 0; // unused 33 | thres_ = 0; // unused 34 | } else if (theta > 0. && theta < 1.) { 35 | seq_ = 0; // unused 36 | alpha_ = 1. / (1. - theta); 37 | thres_ = 1. + pow_approx(0.5, theta); 38 | } else { 39 | seq_ = 0; // unused 40 | alpha_ = 0.; // unused 41 | thres_ = 0.; // unused 42 | } 43 | last_n_ = 0; 44 | zetan_ = 0.; 45 | eta_ = 0; 46 | // rand_state_[0] = (unsigned short)(rand_seed >> 0); 47 | // rand_state_[1] = (unsigned short)(rand_seed >> 16); 48 | // rand_state_[2] = (unsigned short)(rand_seed >> 32); 49 | rand_ = Rand(rand_seed); 50 | } 51 | 52 | ZipfGen(const ZipfGen& src) { 53 | n_ = src.n_; 54 | theta_ = src.theta_; 55 | alpha_ = src.alpha_; 56 | thres_ = src.thres_; 57 | last_n_ = src.last_n_; 58 | dbl_n_ = src.dbl_n_; 59 | zetan_ = src.zetan_; 60 | eta_ = src.eta_; 61 | seq_ = src.seq_; 62 | rand_ = src.rand_; 63 | } 64 | 65 | ZipfGen(const ZipfGen& src, uint64_t rand_seed) { 66 | n_ = src.n_; 67 | theta_ = src.theta_; 68 | alpha_ = src.alpha_; 69 | thres_ = src.thres_; 70 | last_n_ = src.last_n_; 71 | dbl_n_ = src.dbl_n_; 72 | zetan_ = src.zetan_; 73 | eta_ = src.eta_; 74 | seq_ = src.seq_; 75 | rand_ = Rand(rand_seed); 76 | } 77 | 78 | ZipfGen& operator=(const ZipfGen& src) { 79 | n_ = src.n_; 80 | theta_ = src.theta_; 81 | alpha_ = src.alpha_; 82 | thres_ = src.thres_; 83 | last_n_ = src.last_n_; 84 | dbl_n_ = src.dbl_n_; 85 | zetan_ = src.zetan_; 86 | eta_ = src.eta_; 87 | seq_ = src.seq_; 88 | rand_ = src.rand_; 89 | return *this; 90 | } 91 | 92 | void change_n(uint64_t n) { n_ = n; } 93 | 94 | uint64_t next() { 95 | if (last_n_ != n_) { 96 | if (theta_ > 0. && theta_ < 1.) { 97 | zetan_ = zeta(last_n_, zetan_, n_, theta_); 98 | eta_ = (1. - pow_approx(2. / (double)n_, 1. - theta_)) / 99 | (1. - zeta(0, 0., 2, theta_) / zetan_); 100 | } 101 | last_n_ = n_; 102 | dbl_n_ = (double)n_; 103 | } 104 | 105 | if (theta_ == -1.) { 106 | uint64_t v = seq_; 107 | if (++seq_ >= n_) seq_ = 0; 108 | return v; 109 | } else if (theta_ == 0.) { 110 | double u = rand_.next_f64(); 111 | return (uint64_t)(dbl_n_ * u); 112 | } else if (theta_ >= 40.) { 113 | return 0UL; 114 | } else { 115 | // from J. Gray et al. Quickly generating billion-record synthetic 116 | // databases. In SIGMOD, 1994. 117 | 118 | // double u = erand48(rand_state_); 119 | double u = rand_.next_f64(); 120 | double uz = u * zetan_; 121 | if (uz < 1.) 122 | return 0UL; 123 | else if (uz < thres_) 124 | return 1UL; 125 | else { 126 | uint64_t v = 127 | (uint64_t)(dbl_n_ * pow_approx(eta_ * (u - 1.) + 1., alpha_)); 128 | if (v >= n_) v = n_ - 1; 129 | return v; 130 | } 131 | } 132 | } 133 | 134 | static void test(double theta) { 135 | double zetan = 0.; 136 | const uint64_t n = 1000000UL; 137 | uint64_t i; 138 | 139 | for (i = 0; i < n; i++) zetan += 1. / pow((double)i + 1., theta); 140 | 141 | if (theta < 1. || theta >= 40.) { 142 | ZipfGen zg(n, theta, 0); 143 | 144 | uint64_t num_key0 = 0; 145 | const uint64_t num_samples = 10000000UL; 146 | if (theta < 1. || theta >= 40.) { 147 | for (i = 0; i < num_samples; i++) 148 | if (zg.next() == 0) num_key0++; 149 | } 150 | 151 | printf("theta = %lf; using pow(): %.10lf", theta, 1. / zetan); 152 | if (theta < 1. || theta >= 40.) 153 | printf(", using approx-pow(): %.10lf", 154 | (double)num_key0 / (double)num_samples); 155 | printf("\n"); 156 | } 157 | } 158 | 159 | private: 160 | static double pow_approx(double a, double b) { 161 | // from 162 | // http://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/ 163 | 164 | // calculate approximation with fraction of the exponent 165 | int e = (int)b; 166 | union { 167 | double d; 168 | int x[2]; 169 | } u = {a}; 170 | u.x[1] = (int)((b - (double)e) * (double)(u.x[1] - 1072632447) + 1072632447.); 171 | u.x[0] = 0; 172 | 173 | // exponentiation by squaring with the exponent's integer part 174 | // double r = u.d makes everything much slower, not sure why 175 | // TODO: use popcount? 176 | double r = 1.; 177 | while (e) { 178 | if (e & 1) r *= a; 179 | a *= a; 180 | e >>= 1; 181 | } 182 | 183 | return r * u.d; 184 | } 185 | 186 | static double zeta(uint64_t last_n, double last_sum, uint64_t n, double theta) { 187 | if (last_n > n) { 188 | last_n = 0; 189 | last_sum = 0.; 190 | } 191 | while (last_n < n) { 192 | last_sum += 1. / pow_approx((double)last_n + 1., theta); 193 | last_n++; 194 | } 195 | return last_sum; 196 | } 197 | 198 | private: 199 | // number of items (input) 200 | uint64_t n_; 201 | 202 | // skewness (input) in (0, 1); or, 0 = uniform, 1 = always zero 203 | double theta_; 204 | 205 | // only depends on theta 206 | double alpha_; 207 | 208 | // only depends on theta 209 | double thres_; 210 | 211 | // last n used to calculate the following 212 | uint64_t last_n_; 213 | 214 | double dbl_n_; 215 | 216 | double zetan_; 217 | 218 | double eta_; 219 | 220 | // for sequential number generation 221 | uint64_t seq_; 222 | 223 | Rand rand_; 224 | } __attribute__((aligned(128))); // To prevent false sharing caused by adjacent cacheline prefetching -------------------------------------------------------------------------------- /memory_pool/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | add_subdirectory(server) 5 | -------------------------------------------------------------------------------- /memory_pool/server/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(SERVER_SOURCE server.cc) 5 | add_executable(zm_mem_pool ${SERVER_SOURCE}) 6 | target_link_libraries(zm_mem_pool tatp_db smallbank_db tpcc_db micro_db rlib) 7 | -------------------------------------------------------------------------------- /memory_pool/server/server.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "memstore/data_item.h" 13 | #include "memstore/hash_store.h" 14 | #include "rlib/rdma_ctrl.hpp" 15 | 16 | // Load DB 17 | #include "micro/micro_db.h" 18 | #include "smallbank/smallbank_db.h" 19 | #include "tatp/tatp_db.h" 20 | #include "tpcc/tpcc_db.h" 21 | 22 | using namespace rdmaio; 23 | 24 | class Server { 25 | public: 26 | Server(int nid, int local_port, int local_meta_port, size_t hash_buf_size, size_t log_buf_size, int use_pm, std::string& pm_file, size_t pm_size) 27 | : server_node_id(nid), 28 | local_port(local_port), 29 | local_meta_port(local_meta_port), 30 | hash_buf_size(hash_buf_size), 31 | log_buf_size(log_buf_size), 32 | use_pm(use_pm), 33 | pm_file(pm_file), 34 | pm_size(pm_size), 35 | hash_buffer(nullptr), 36 | log_buffer(nullptr) {} 37 | 38 | ~Server() { 39 | RDMA_LOG(INFO) << "Do server cleaning..."; 40 | if (tatp_server) { 41 | delete tatp_server; 42 | RDMA_LOG(INFO) << "delete tatp tables"; 43 | } 44 | 45 | if (smallbank_server) { 46 | delete smallbank_server; 47 | RDMA_LOG(INFO) << "delete smallbank tables"; 48 | } 49 | 50 | if (tpcc_server) { 51 | delete tpcc_server; 52 | RDMA_LOG(INFO) << "delete tpcc tables"; 53 | } 54 | 55 | if (micro_server) { 56 | delete micro_server; 57 | RDMA_LOG(INFO) << "delete micro tables"; 58 | } 59 | 60 | if (use_pm) { 61 | munmap(hash_buffer, pm_size); 62 | close(pm_file_fd); 63 | RDMA_LOG(INFO) << "munmap hash buffer"; 64 | } else { 65 | if (hash_buffer) { 66 | free(hash_buffer); 67 | RDMA_LOG(INFO) << "Free hash buffer"; 68 | } 69 | } 70 | 71 | if (log_buffer) { 72 | free(log_buffer); 73 | RDMA_LOG(INFO) << "free log buffer"; 74 | } 75 | } 76 | 77 | void AllocMem(); 78 | 79 | void InitMem(); 80 | 81 | void InitRDMA(); 82 | 83 | void LoadData(node_id_t machine_id, node_id_t machine_num, std::string& workload); 84 | 85 | void SendMeta(node_id_t machine_id, std::string& workload, size_t compute_node_num); 86 | 87 | void PrepareHashMeta(node_id_t machine_id, std::string& workload, char** hash_meta_buffer, size_t& total_meta_size); 88 | 89 | void SendHashMeta(char* hash_meta_buffer, size_t& total_meta_size); 90 | 91 | void CleanTable(); 92 | 93 | void CleanQP(); 94 | 95 | bool Run(); 96 | 97 | private: 98 | const int server_node_id; 99 | 100 | const int local_port; 101 | 102 | const int local_meta_port; 103 | 104 | const size_t hash_buf_size; 105 | 106 | const size_t log_buf_size; 107 | 108 | const int use_pm; 109 | 110 | const std::string pm_file; 111 | 112 | const size_t pm_size; 113 | 114 | int pm_file_fd; 115 | 116 | RdmaCtrlPtr rdma_ctrl; 117 | 118 | // The start address of the whole hash store space 119 | char* hash_buffer; 120 | 121 | // The start address of the reserved space in hash store. For insertion in case of conflict in a full bucket 122 | char* hash_reserve_buffer; 123 | 124 | char* log_buffer; 125 | 126 | // For server-side workload 127 | TATP* tatp_server = nullptr; 128 | 129 | SmallBank* smallbank_server = nullptr; 130 | 131 | TPCC* tpcc_server = nullptr; 132 | 133 | MICRO* micro_server = nullptr; 134 | }; 135 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/error/en.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_ERROR_EN_H__ 16 | #define RAPIDJSON_ERROR_EN_H__ 17 | 18 | #include "error.h" 19 | 20 | 21 | RAPIDJSON_NAMESPACE_BEGIN 22 | 23 | //! Maps error code of parsing into error message. 24 | /*! 25 | \ingroup RAPIDJSON_ERRORS 26 | \param parseErrorCode Error code obtained in parsing. 27 | \return the error message. 28 | \note User can make a copy of this function for localization. 29 | Using switch-case is safer for future modification of error codes. 30 | */ 31 | inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode) { 32 | switch (parseErrorCode) { 33 | case kParseErrorNone: return RAPIDJSON_ERROR_STRING("No error."); 34 | 35 | case kParseErrorDocumentEmpty: return RAPIDJSON_ERROR_STRING("The document is empty."); 36 | case kParseErrorDocumentRootNotSingular: return RAPIDJSON_ERROR_STRING("The document root must not follow by other values."); 37 | 38 | case kParseErrorValueInvalid: return RAPIDJSON_ERROR_STRING("Invalid value."); 39 | 40 | case kParseErrorObjectMissName: return RAPIDJSON_ERROR_STRING("Missing a name for object member."); 41 | case kParseErrorObjectMissColon: return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member."); 42 | case kParseErrorObjectMissCommaOrCurlyBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member."); 43 | 44 | case kParseErrorArrayMissCommaOrSquareBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element."); 45 | 46 | case kParseErrorStringUnicodeEscapeInvalidHex: return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string."); 47 | case kParseErrorStringUnicodeSurrogateInvalid: return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid."); 48 | case kParseErrorStringEscapeInvalid: return RAPIDJSON_ERROR_STRING("Invalid escape character in string."); 49 | case kParseErrorStringMissQuotationMark: return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string."); 50 | case kParseErrorStringInvalidEncoding: return RAPIDJSON_ERROR_STRING("Invalid encoding in string."); 51 | 52 | case kParseErrorNumberTooBig: return RAPIDJSON_ERROR_STRING("Number too big to be stored in double."); 53 | case kParseErrorNumberMissFraction: return RAPIDJSON_ERROR_STRING("Miss fraction part in number."); 54 | case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); 55 | 56 | case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); 57 | case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); 58 | 59 | default:return RAPIDJSON_ERROR_STRING("Unknown error."); 60 | } 61 | } 62 | 63 | RAPIDJSON_NAMESPACE_END 64 | 65 | #endif // RAPIDJSON_ERROR_EN_H__ 66 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/error/error.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_ERROR_ERROR_H__ 16 | #define RAPIDJSON_ERROR_ERROR_H__ 17 | 18 | #include "../rapidjson.h" 19 | 20 | /*! \file error.h */ 21 | 22 | /*! \defgroup RAPIDJSON_ERRORS RapidJSON error handling */ 23 | 24 | /////////////////////////////////////////////////////////////////////////////// 25 | // RAPIDJSON_ERROR_CHARTYPE 26 | 27 | //! Character type of error messages. 28 | /*! \ingroup RAPIDJSON_ERRORS 29 | The default character type is \c char. 30 | On Windows, user can define this macro as \c TCHAR for supporting both 31 | unicode/non-unicode settings. 32 | */ 33 | #ifndef RAPIDJSON_ERROR_CHARTYPE 34 | #define RAPIDJSON_ERROR_CHARTYPE char 35 | #endif 36 | 37 | /////////////////////////////////////////////////////////////////////////////// 38 | // RAPIDJSON_ERROR_STRING 39 | 40 | //! Macro for converting string literial to \ref RAPIDJSON_ERROR_CHARTYPE[]. 41 | /*! \ingroup RAPIDJSON_ERRORS 42 | By default this conversion macro does nothing. 43 | On Windows, user can define this macro as \c _T(x) for supporting both 44 | unicode/non-unicode settings. 45 | */ 46 | #ifndef RAPIDJSON_ERROR_STRING 47 | #define RAPIDJSON_ERROR_STRING(x) x 48 | #endif 49 | 50 | RAPIDJSON_NAMESPACE_BEGIN 51 | 52 | /////////////////////////////////////////////////////////////////////////////// 53 | // ParseErrorCode 54 | 55 | //! Error code of parsing. 56 | /*! \ingroup RAPIDJSON_ERRORS 57 | \see GenericReader::Parse, GenericReader::GetParseErrorCode 58 | */ 59 | enum ParseErrorCode { 60 | kParseErrorNone = 0, //!< No error. 61 | 62 | kParseErrorDocumentEmpty, //!< The document is empty. 63 | kParseErrorDocumentRootNotSingular, //!< The document root must not follow by other values. 64 | 65 | kParseErrorValueInvalid, //!< Invalid value. 66 | 67 | kParseErrorObjectMissName, //!< Missing a name for object member. 68 | kParseErrorObjectMissColon, //!< Missing a colon after a name of object member. 69 | kParseErrorObjectMissCommaOrCurlyBracket, //!< Missing a comma or '}' after an object member. 70 | 71 | kParseErrorArrayMissCommaOrSquareBracket, //!< Missing a comma or ']' after an array element. 72 | 73 | kParseErrorStringUnicodeEscapeInvalidHex, //!< Incorrect hex digit after \\u escape in string. 74 | kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid. 75 | kParseErrorStringEscapeInvalid, //!< Invalid escape character in string. 76 | kParseErrorStringMissQuotationMark, //!< Missing a closing quotation mark in string. 77 | kParseErrorStringInvalidEncoding, //!< Invalid encoding in string. 78 | 79 | kParseErrorNumberTooBig, //!< Number too big to be stored in double. 80 | kParseErrorNumberMissFraction, //!< Miss fraction part in number. 81 | kParseErrorNumberMissExponent, //!< Miss exponent in number. 82 | 83 | kParseErrorTermination, //!< Parsing was terminated. 84 | kParseErrorUnspecificSyntaxError //!< Unspecific syntax error. 85 | }; 86 | 87 | //! Result of parsing (wraps ParseErrorCode) 88 | /*! 89 | \ingroup RAPIDJSON_ERRORS 90 | \code 91 | Document doc; 92 | ParseResult ok = doc.Parse("[42]"); 93 | if (!ok) { 94 | fprintf(stderr, "JSON parse error: %s (%u)", 95 | GetParseError_En(ok.Code()), ok.Offset()); 96 | exit(EXIT_FAILURE); 97 | } 98 | \endcode 99 | \see GenericReader::Parse, GenericDocument::Parse 100 | */ 101 | struct ParseResult { 102 | 103 | //! Default constructor, no error. 104 | ParseResult() : code_(kParseErrorNone), offset_(0) {} 105 | //! Constructor to set an error. 106 | ParseResult(ParseErrorCode code, size_t offset) : code_(code), offset_(offset) {} 107 | 108 | //! Get the error code. 109 | ParseErrorCode Code() const { return code_; } 110 | //! Get the error offset, if \ref IsError(), 0 otherwise. 111 | size_t Offset() const { return offset_; } 112 | 113 | //! Conversion to \c bool, returns \c true, iff !\ref IsError(). 114 | operator bool() const { return !IsError(); } 115 | //! Whether the result is an error. 116 | bool IsError() const { return code_ != kParseErrorNone; } 117 | 118 | bool operator==(const ParseResult& that) const { return code_ == that.code_; } 119 | bool operator==(ParseErrorCode code) const { return code_ == code; } 120 | friend bool operator==(ParseErrorCode code, const ParseResult& err) { return code == err.code_; } 121 | 122 | //! Reset error code. 123 | void Clear() { Set(kParseErrorNone); } 124 | //! Update error code and offset. 125 | void Set(ParseErrorCode code, size_t offset = 0) { 126 | code_ = code; 127 | offset_ = offset; 128 | } 129 | 130 | private: 131 | ParseErrorCode code_; 132 | size_t offset_; 133 | }; 134 | 135 | //! Function pointer type of GetParseError(). 136 | /*! \ingroup RAPIDJSON_ERRORS 137 | 138 | This is the prototype for \c GetParseError_X(), where \c X is a locale. 139 | User can dynamically change locale in runtime, e.g.: 140 | \code 141 | GetParseErrorFunc GetParseError = GetParseError_En; // or whatever 142 | const RAPIDJSON_ERROR_CHARTYPE* s = GetParseError(document.GetParseErrorCode()); 143 | \endcode 144 | */ 145 | typedef const RAPIDJSON_ERROR_CHARTYPE* (* GetParseErrorFunc)(ParseErrorCode); 146 | 147 | RAPIDJSON_NAMESPACE_END 148 | 149 | #endif // RAPIDJSON_ERROR_ERROR_H__ 150 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/filereadstream.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_FILEREADSTREAM_H_ 16 | #define RAPIDJSON_FILEREADSTREAM_H_ 17 | 18 | #include "rapidjson.h" 19 | #include 20 | 21 | 22 | RAPIDJSON_NAMESPACE_BEGIN 23 | 24 | //! File byte stream for input using fread(). 25 | /*! 26 | \note implements Stream concept 27 | */ 28 | class FileReadStream { 29 | public: 30 | typedef char Ch; //!< Character type (byte). 31 | 32 | //! Constructor. 33 | /*! 34 | \param fp File pointer opened for read. 35 | \param buffer user-supplied buffer. 36 | \param bufferSize size of buffer in bytes. Must >=4 bytes. 37 | */ 38 | FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { 39 | RAPIDJSON_ASSERT(fp_ != 0); 40 | RAPIDJSON_ASSERT(bufferSize >= 4); 41 | Read(); 42 | } 43 | 44 | Ch Peek() const { return *current_; } 45 | Ch Take() { 46 | Ch c = *current_; 47 | Read(); 48 | return c; 49 | } 50 | size_t Tell() const { return count_ + static_cast(current_ - buffer_); } 51 | 52 | // Not implemented 53 | void Put(Ch) { RAPIDJSON_ASSERT(false); } 54 | void Flush() { RAPIDJSON_ASSERT(false); } 55 | Ch* PutBegin() { 56 | RAPIDJSON_ASSERT(false); 57 | return 0; 58 | } 59 | size_t PutEnd(Ch*) { 60 | RAPIDJSON_ASSERT(false); 61 | return 0; 62 | } 63 | 64 | // For encoding detection only. 65 | const Ch* Peek4() const { 66 | return (current_ + 4 <= bufferLast_) ? current_ : 0; 67 | } 68 | 69 | private: 70 | void Read() { 71 | if (current_ < bufferLast_) 72 | ++current_; 73 | else if (!eof_) { 74 | count_ += readCount_; 75 | readCount_ = fread(buffer_, 1, bufferSize_, fp_); 76 | bufferLast_ = buffer_ + readCount_ - 1; 77 | current_ = buffer_; 78 | 79 | if (readCount_ < bufferSize_) { 80 | buffer_[readCount_] = '\0'; 81 | ++bufferLast_; 82 | eof_ = true; 83 | } 84 | } 85 | } 86 | 87 | std::FILE* fp_; 88 | Ch* buffer_; 89 | size_t bufferSize_; 90 | Ch* bufferLast_; 91 | Ch* current_; 92 | size_t readCount_; 93 | size_t count_; //!< Number of characters read 94 | bool eof_; 95 | }; 96 | 97 | RAPIDJSON_NAMESPACE_END 98 | 99 | #endif // RAPIDJSON_FILESTREAM_H_ 100 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/filewritestream.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_FILEWRITESTREAM_H_ 16 | #define RAPIDJSON_FILEWRITESTREAM_H_ 17 | 18 | #include "rapidjson.h" 19 | #include 20 | 21 | 22 | RAPIDJSON_NAMESPACE_BEGIN 23 | 24 | //! Wrapper of C file stream for input using fread(). 25 | /*! 26 | \note implements Stream concept 27 | */ 28 | class FileWriteStream { 29 | public: 30 | typedef char Ch; //!< Character type. Only support char. 31 | 32 | FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) { 33 | RAPIDJSON_ASSERT(fp_ != 0); 34 | } 35 | 36 | void Put(char c) { 37 | if (current_ >= bufferEnd_) 38 | Flush(); 39 | 40 | *current_++ = c; 41 | } 42 | 43 | void PutN(char c, size_t n) { 44 | size_t avail = static_cast(bufferEnd_ - current_); 45 | while (n > avail) { 46 | std::memset(current_, c, avail); 47 | current_ += avail; 48 | Flush(); 49 | n -= avail; 50 | avail = static_cast(bufferEnd_ - current_); 51 | } 52 | 53 | if (n > 0) { 54 | std::memset(current_, c, n); 55 | current_ += n; 56 | } 57 | } 58 | 59 | void Flush() { 60 | if (current_ != buffer_) { 61 | size_t result = fwrite(buffer_, 1, static_cast(current_ - buffer_), fp_); 62 | if (result < static_cast(current_ - buffer_)) { 63 | // failure deliberately ignored at this time 64 | // added to avoid warn_unused_result build errors 65 | } 66 | current_ = buffer_; 67 | } 68 | } 69 | 70 | // Not implemented 71 | char Peek() const { 72 | RAPIDJSON_ASSERT(false); 73 | return 0; 74 | } 75 | char Take() { 76 | RAPIDJSON_ASSERT(false); 77 | return 0; 78 | } 79 | size_t Tell() const { 80 | RAPIDJSON_ASSERT(false); 81 | return 0; 82 | } 83 | char* PutBegin() { 84 | RAPIDJSON_ASSERT(false); 85 | return 0; 86 | } 87 | size_t PutEnd(char*) { 88 | RAPIDJSON_ASSERT(false); 89 | return 0; 90 | } 91 | 92 | private: 93 | // Prohibit copy constructor & assignment operator. 94 | FileWriteStream(const FileWriteStream&); 95 | FileWriteStream& operator=(const FileWriteStream&); 96 | 97 | std::FILE* fp_; 98 | char* buffer_; 99 | char* bufferEnd_; 100 | char* current_; 101 | }; 102 | 103 | //! Implement specialized version of PutN() with memset() for better performance. 104 | template <> 105 | inline void PutN(FileWriteStream& stream, char c, size_t n) { 106 | stream.PutN(c, n); 107 | } 108 | 109 | RAPIDJSON_NAMESPACE_END 110 | 111 | #endif // RAPIDJSON_FILESTREAM_H_ 112 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/internal/ieee754.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_IEEE754_ 16 | #define RAPIDJSON_IEEE754_ 17 | 18 | #include "../rapidjson.h" 19 | 20 | 21 | RAPIDJSON_NAMESPACE_BEGIN 22 | namespace internal { 23 | 24 | class Double { 25 | public: 26 | Double() {} 27 | Double(double d) : d_(d) {} 28 | Double(uint64_t u) : u_(u) {} 29 | 30 | double Value() const { return d_; } 31 | uint64_t Uint64Value() const { return u_; } 32 | 33 | double NextPositiveDouble() const { 34 | RAPIDJSON_ASSERT(!Sign()); 35 | return Double(u_ + 1).Value(); 36 | } 37 | 38 | bool Sign() const { return (u_ & kSignMask) != 0; } 39 | uint64_t Significand() const { return u_ & kSignificandMask; } 40 | int Exponent() const { return static_cast(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); } 41 | 42 | bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; } 43 | bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; } 44 | bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; } 45 | bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; } 46 | 47 | uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); } 48 | int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; } 49 | uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; } 50 | 51 | static unsigned EffectiveSignificandSize(int order) { 52 | if (order >= -1021) 53 | return 53; 54 | else if (order <= -1074) 55 | return 0; 56 | else 57 | return (unsigned) order + 1074; 58 | } 59 | 60 | private: 61 | static const int kSignificandSize = 52; 62 | static const int kExponentBias = 0x3FF; 63 | static const int kDenormalExponent = 1 - kExponentBias; 64 | static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000); 65 | static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); 66 | static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); 67 | static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); 68 | 69 | union { 70 | double d_; 71 | uint64_t u_; 72 | }; 73 | }; 74 | 75 | } // namespace internal 76 | RAPIDJSON_NAMESPACE_END 77 | 78 | #endif // RAPIDJSON_IEEE754_ 79 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/internal/pow10.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_POW10_ 16 | #define RAPIDJSON_POW10_ 17 | 18 | #include "../rapidjson.h" 19 | 20 | 21 | RAPIDJSON_NAMESPACE_BEGIN 22 | namespace internal { 23 | 24 | //! Computes integer powers of 10 in double (10.0^n). 25 | /*! This function uses lookup table for fast and accurate results. 26 | \param n non-negative exponent. Must <= 308. 27 | \return 10.0^n 28 | */ 29 | inline double Pow10(int n) { 30 | static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes 31 | 1e+0, 32 | 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 33 | 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 34 | 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 35 | 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, 36 | 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, 37 | 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119, 1e+120, 38 | 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 39 | 1e+141, 1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 40 | 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179, 1e+180, 41 | 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199, 1e+200, 42 | 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 43 | 1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 44 | 1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260, 45 | 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279, 1e+280, 46 | 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299, 1e+300, 47 | 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307, 1e+308 48 | }; 49 | RAPIDJSON_ASSERT(n >= 0 && n <= 308); 50 | return e[n]; 51 | } 52 | 53 | } // namespace internal 54 | RAPIDJSON_NAMESPACE_END 55 | 56 | #endif // RAPIDJSON_POW10_ 57 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/internal/stack.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_INTERNAL_STACK_H_ 16 | #define RAPIDJSON_INTERNAL_STACK_H_ 17 | 18 | #include "../rapidjson.h" 19 | #include "swap.h" 20 | 21 | 22 | RAPIDJSON_NAMESPACE_BEGIN 23 | namespace internal { 24 | 25 | /////////////////////////////////////////////////////////////////////////////// 26 | // Stack 27 | 28 | //! A type-unsafe stack for storing different types of data. 29 | /*! \tparam Allocator Allocator for allocating stack memory. 30 | */ 31 | template 32 | class Stack { 33 | public: 34 | // Optimization note: Do not allocate memory for stack_ in constructor. 35 | // Do it lazily when first Push() -> Expand() -> Resize(). 36 | Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) { 37 | RAPIDJSON_ASSERT(stackCapacity > 0); 38 | } 39 | 40 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS 41 | Stack(Stack&& rhs) 42 | : allocator_(rhs.allocator_), 43 | ownAllocator_(rhs.ownAllocator_), 44 | stack_(rhs.stack_), 45 | stackTop_(rhs.stackTop_), 46 | stackEnd_(rhs.stackEnd_), 47 | initialCapacity_(rhs.initialCapacity_) { 48 | rhs.allocator_ = 0; 49 | rhs.ownAllocator_ = 0; 50 | rhs.stack_ = 0; 51 | rhs.stackTop_ = 0; 52 | rhs.stackEnd_ = 0; 53 | rhs.initialCapacity_ = 0; 54 | } 55 | #endif 56 | 57 | ~Stack() { 58 | Destroy(); 59 | } 60 | 61 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS 62 | Stack& operator=(Stack&& rhs) { 63 | if (&rhs != this) { 64 | Destroy(); 65 | 66 | allocator_ = rhs.allocator_; 67 | ownAllocator_ = rhs.ownAllocator_; 68 | stack_ = rhs.stack_; 69 | stackTop_ = rhs.stackTop_; 70 | stackEnd_ = rhs.stackEnd_; 71 | initialCapacity_ = rhs.initialCapacity_; 72 | 73 | rhs.allocator_ = 0; 74 | rhs.ownAllocator_ = 0; 75 | rhs.stack_ = 0; 76 | rhs.stackTop_ = 0; 77 | rhs.stackEnd_ = 0; 78 | rhs.initialCapacity_ = 0; 79 | } 80 | return *this; 81 | } 82 | #endif 83 | 84 | void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT { 85 | internal::Swap(allocator_, rhs.allocator_); 86 | internal::Swap(ownAllocator_, rhs.ownAllocator_); 87 | internal::Swap(stack_, rhs.stack_); 88 | internal::Swap(stackTop_, rhs.stackTop_); 89 | internal::Swap(stackEnd_, rhs.stackEnd_); 90 | internal::Swap(initialCapacity_, rhs.initialCapacity_); 91 | } 92 | 93 | void Clear() { stackTop_ = stack_; } 94 | 95 | void ShrinkToFit() { 96 | if (Empty()) { 97 | // If the stack is empty, completely deallocate the memory. 98 | Allocator::Free(stack_); 99 | stack_ = 0; 100 | stackTop_ = 0; 101 | stackEnd_ = 0; 102 | } else 103 | Resize(GetSize()); 104 | } 105 | 106 | // Optimization note: try to minimize the size of this function for force inline. 107 | // Expansion is run very infrequently, so it is moved to another (probably non-inline) function. 108 | template 109 | RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) { 110 | // Expand the stack if needed 111 | if (stackTop_ + sizeof(T) * count >= stackEnd_) 112 | Expand(count); 113 | 114 | T* ret = reinterpret_cast(stackTop_); 115 | stackTop_ += sizeof(T) * count; 116 | return ret; 117 | } 118 | 119 | template 120 | T* Pop(size_t count) { 121 | RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T)); 122 | stackTop_ -= count * sizeof(T); 123 | return reinterpret_cast(stackTop_); 124 | } 125 | 126 | template 127 | T* Top() { 128 | RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); 129 | return reinterpret_cast(stackTop_ - sizeof(T)); 130 | } 131 | 132 | template 133 | T* Bottom() { return (T*) stack_; } 134 | 135 | bool HasAllocator() const { 136 | return allocator_ != 0; 137 | } 138 | 139 | Allocator& GetAllocator() { 140 | RAPIDJSON_ASSERT(allocator_); 141 | return *allocator_; 142 | } 143 | bool Empty() const { return stackTop_ == stack_; } 144 | size_t GetSize() const { return static_cast(stackTop_ - stack_); } 145 | size_t GetCapacity() const { return static_cast(stackEnd_ - stack_); } 146 | 147 | private: 148 | template 149 | void Expand(size_t count) { 150 | // Only expand the capacity if the current stack exists. Otherwise just create a stack with initial capacity. 151 | size_t newCapacity; 152 | if (stack_ == 0) { 153 | if (!allocator_) 154 | ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); 155 | newCapacity = initialCapacity_; 156 | } else { 157 | newCapacity = GetCapacity(); 158 | newCapacity += (newCapacity + 1) / 2; 159 | } 160 | size_t newSize = GetSize() + sizeof(T) * count; 161 | if (newCapacity < newSize) 162 | newCapacity = newSize; 163 | 164 | Resize(newCapacity); 165 | } 166 | 167 | void Resize(size_t newCapacity) { 168 | const size_t size = GetSize(); // Backup the current size 169 | stack_ = (char*) allocator_->Realloc(stack_, GetCapacity(), newCapacity); 170 | stackTop_ = stack_ + size; 171 | stackEnd_ = stack_ + newCapacity; 172 | } 173 | 174 | void Destroy() { 175 | Allocator::Free(stack_); 176 | RAPIDJSON_DELETE(ownAllocator_); // Only delete if it is owned by the stack 177 | } 178 | 179 | // Prohibit copy constructor & assignment operator. 180 | Stack(const Stack&); 181 | Stack& operator=(const Stack&); 182 | 183 | Allocator* allocator_; 184 | Allocator* ownAllocator_; 185 | char* stack_; 186 | char* stackTop_; 187 | char* stackEnd_; 188 | size_t initialCapacity_; 189 | }; 190 | 191 | } // namespace internal 192 | RAPIDJSON_NAMESPACE_END 193 | 194 | #endif // RAPIDJSON_STACK_H_ 195 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/internal/strfunc.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_INTERNAL_STRFUNC_H_ 16 | #define RAPIDJSON_INTERNAL_STRFUNC_H_ 17 | 18 | #include "../rapidjson.h" 19 | 20 | 21 | RAPIDJSON_NAMESPACE_BEGIN 22 | namespace internal { 23 | 24 | //! Custom strlen() which works on different character types. 25 | /*! \tparam Ch Character type (e.g. char, wchar_t, short) 26 | \param s Null-terminated input string. 27 | \return Number of characters in the string. 28 | \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints. 29 | */ 30 | template 31 | inline SizeType StrLen(const Ch* s) { 32 | const Ch* p = s; 33 | while (*p) ++p; 34 | return SizeType(p - s); 35 | } 36 | 37 | } // namespace internal 38 | RAPIDJSON_NAMESPACE_END 39 | 40 | #endif // RAPIDJSON_INTERNAL_STRFUNC_H_ 41 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/internal/swap.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_INTERNAL_SWAP_H_ 16 | #define RAPIDJSON_INTERNAL_SWAP_H_ 17 | 18 | #include "../rapidjson.h" 19 | 20 | 21 | RAPIDJSON_NAMESPACE_BEGIN 22 | namespace internal { 23 | 24 | //! Custom swap() to avoid dependency on C++ header 25 | /*! \tparam T Type of the arguments to swap, should be instantiated with primitive C++ types only. 26 | \note This has the same semantics as std::swap(). 27 | */ 28 | template 29 | inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT { 30 | T tmp = a; 31 | a = b; 32 | b = tmp; 33 | } 34 | 35 | } // namespace internal 36 | RAPIDJSON_NAMESPACE_END 37 | 38 | #endif // RAPIDJSON_INTERNAL_SWAP_H_ 39 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/memorybuffer.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_MEMORYBUFFER_H_ 16 | #define RAPIDJSON_MEMORYBUFFER_H_ 17 | 18 | #include "rapidjson.h" 19 | #include "internal/stack.h" 20 | 21 | 22 | RAPIDJSON_NAMESPACE_BEGIN 23 | 24 | //! Represents an in-memory output byte stream. 25 | /*! 26 | This class is mainly for being wrapped by EncodedOutputStream or AutoUTFOutputStream. 27 | 28 | It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file. 29 | 30 | Differences between MemoryBuffer and StringBuffer: 31 | 1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. 32 | 2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator. 33 | 34 | \tparam Allocator type for allocating memory buffer. 35 | \note implements Stream concept 36 | */ 37 | template 38 | struct GenericMemoryBuffer { 39 | typedef char Ch; // byte 40 | 41 | GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} 42 | 43 | void Put(Ch c) { *stack_.template Push() = c; } 44 | void Flush() {} 45 | 46 | void Clear() { stack_.Clear(); } 47 | void ShrinkToFit() { stack_.ShrinkToFit(); } 48 | Ch* Push(size_t count) { return stack_.template Push(count); } 49 | void Pop(size_t count) { stack_.template Pop(count); } 50 | 51 | const Ch* GetBuffer() const { 52 | return stack_.template Bottom(); 53 | } 54 | 55 | size_t GetSize() const { return stack_.GetSize(); } 56 | 57 | static const size_t kDefaultCapacity = 256; 58 | mutable internal::Stack stack_; 59 | }; 60 | 61 | typedef GenericMemoryBuffer<> MemoryBuffer; 62 | 63 | //! Implement specialized version of PutN() with memset() for better performance. 64 | template <> 65 | inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) { 66 | std::memset(memoryBuffer.stack_.Push(n), c, n * sizeof(c)); 67 | } 68 | 69 | RAPIDJSON_NAMESPACE_END 70 | 71 | #endif // RAPIDJSON_MEMORYBUFFER_H_ 72 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/memorystream.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_MEMORYSTREAM_H_ 16 | #define RAPIDJSON_MEMORYSTREAM_H_ 17 | 18 | #include "rapidjson.h" 19 | 20 | 21 | RAPIDJSON_NAMESPACE_BEGIN 22 | 23 | //! Represents an in-memory input byte stream. 24 | /*! 25 | This class is mainly for being wrapped by EncodedInputStream or AutoUTFInputStream. 26 | 27 | It is similar to FileReadBuffer but the source is an in-memory buffer instead of a file. 28 | 29 | Differences between MemoryStream and StringStream: 30 | 1. StringStream has encoding but MemoryStream is a byte stream. 31 | 2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated. StringStream assume null-terminated string as source. 32 | 3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an encoding so it should not have Peek4(). 33 | \note implements Stream concept 34 | */ 35 | struct MemoryStream { 36 | typedef char Ch; // byte 37 | 38 | MemoryStream(const Ch* src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {} 39 | 40 | Ch Peek() const { return (src_ == end_) ? '\0' : *src_; } 41 | Ch Take() { return (src_ == end_) ? '\0' : *src_++; } 42 | size_t Tell() const { return static_cast(src_ - begin_); } 43 | 44 | Ch* PutBegin() { 45 | RAPIDJSON_ASSERT(false); 46 | return 0; 47 | } 48 | void Put(Ch) { RAPIDJSON_ASSERT(false); } 49 | void Flush() { RAPIDJSON_ASSERT(false); } 50 | size_t PutEnd(Ch*) { 51 | RAPIDJSON_ASSERT(false); 52 | return 0; 53 | } 54 | 55 | // For encoding detection only. 56 | const Ch* Peek4() const { 57 | return Tell() + 4 <= size_ ? src_ : 0; 58 | } 59 | 60 | const Ch* src_; //!< Current read position. 61 | const Ch* begin_; //!< Original head of the string. 62 | const Ch* end_; //!< End of stream. 63 | size_t size_; //!< Size of the stream. 64 | }; 65 | 66 | RAPIDJSON_NAMESPACE_END 67 | 68 | #endif // RAPIDJSON_MEMORYBUFFER_H_ 69 | -------------------------------------------------------------------------------- /thirdparty/rapidjson/stringbuffer.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making RapidJSON available. 2 | // 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 | // 5 | // Licensed under the MIT License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // http://opensource.org/licenses/MIT 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef RAPIDJSON_STRINGBUFFER_H_ 16 | #define RAPIDJSON_STRINGBUFFER_H_ 17 | 18 | #include "rapidjson.h" 19 | 20 | 21 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS 22 | 23 | #include // std::move 24 | 25 | 26 | #endif 27 | 28 | #include "internal/stack.h" 29 | 30 | 31 | RAPIDJSON_NAMESPACE_BEGIN 32 | 33 | //! Represents an in-memory output stream. 34 | /*! 35 | \tparam Encoding Encoding of the stream. 36 | \tparam Allocator type for allocating memory buffer. 37 | \note implements Stream concept 38 | */ 39 | template 40 | class GenericStringBuffer { 41 | public: 42 | typedef typename Encoding::Ch Ch; 43 | 44 | GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} 45 | 46 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS 47 | GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {} 48 | GenericStringBuffer& operator=(GenericStringBuffer&& rhs) { 49 | if (&rhs != this) 50 | stack_ = std::move(rhs.stack_); 51 | return *this; 52 | } 53 | #endif 54 | 55 | void Put(Ch c) { *stack_.template Push() = c; } 56 | void Flush() {} 57 | 58 | void Clear() { stack_.Clear(); } 59 | void ShrinkToFit() { 60 | // Push and pop a null terminator. This is safe. 61 | *stack_.template Push() = '\0'; 62 | stack_.ShrinkToFit(); 63 | stack_.template Pop(1); 64 | } 65 | Ch* Push(size_t count) { return stack_.template Push(count); } 66 | void Pop(size_t count) { stack_.template Pop(count); } 67 | 68 | const Ch* GetString() const { 69 | // Push and pop a null terminator. This is safe. 70 | *stack_.template Push() = '\0'; 71 | stack_.template Pop(1); 72 | 73 | return stack_.template Bottom(); 74 | } 75 | 76 | size_t GetSize() const { return stack_.GetSize(); } 77 | 78 | static const size_t kDefaultCapacity = 256; 79 | mutable internal::Stack stack_; 80 | 81 | private: 82 | // Prohibit copy constructor & assignment operator. 83 | GenericStringBuffer(const GenericStringBuffer&); 84 | GenericStringBuffer& operator=(const GenericStringBuffer&); 85 | }; 86 | 87 | //! String buffer with UTF8 encoding 88 | typedef GenericStringBuffer > StringBuffer; 89 | 90 | //! Implement specialized version of PutN() with memset() for better performance. 91 | template <> 92 | inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { 93 | std::memset(stream.stack_.Push(n), c, n * sizeof(c)); 94 | } 95 | 96 | RAPIDJSON_NAMESPACE_END 97 | 98 | #endif // RAPIDJSON_STRINGBUFFER_H_ 99 | -------------------------------------------------------------------------------- /thirdparty/rlib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | file(GLOB SOURCES "*.hpp") 5 | 6 | add_library(rlib STATIC ${SOURCES}) 7 | set_target_properties(rlib PROPERTIES LINKER_LANGUAGE CXX) 8 | target_link_libraries(rlib ibverbs pthread) -------------------------------------------------------------------------------- /thirdparty/rlib/common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "logging.hpp" 6 | #include "mr.hpp" 7 | #include "rnic.hpp" 8 | 9 | 10 | namespace rdmaio { 11 | 12 | // connection status 13 | enum ConnStatus { 14 | SUCC = 0, 15 | TIMEOUT = 1, 16 | WRONG_ARG = 2, 17 | ERR = 3, 18 | NOT_READY = 4, 19 | UNKNOWN = 5 20 | }; 21 | 22 | /** 23 | * The connection information exchanged between different QPs. 24 | * RC/UC QPs uses lid & addr to conncet to remote QPs, while qpn is used upon send requests. 25 | * local_node_id & port_id is used for UD QP to create addresses. 26 | */ 27 | struct QPAttr { 28 | address_t addr; 29 | uint16_t lid; 30 | uint32_t qpn; 31 | uint32_t psn; 32 | uint16_t node_id; 33 | uint16_t port_id; 34 | }; 35 | 36 | /** 37 | * The QP connection requests sent to remote. 38 | * from_node & from_worker identifies which QP it shall connect to 39 | */ 40 | struct QPConnArg { 41 | uint16_t from_node; 42 | uint32_t from_worker; 43 | uint8_t qp_type; // RC QP or UD QP 44 | QPAttr qp_attr; 45 | }; 46 | 47 | /** 48 | * The MR connection requests sent to remote. 49 | */ 50 | struct MRConnArg { 51 | uint64_t mr_id; 52 | }; 53 | 54 | struct ConnArg { 55 | enum { 56 | MR, 57 | QP 58 | } type; 59 | union { 60 | QPConnArg qp; 61 | MRConnArg mr; 62 | } payload; 63 | }; 64 | 65 | struct ConnReply { 66 | ConnStatus ack; 67 | union { 68 | QPAttr qp; 69 | MemoryAttr mr; 70 | } payload; 71 | }; 72 | 73 | inline int convert_mtu(ibv_mtu type) { 74 | int mtu = 0; 75 | switch (type) { 76 | case IBV_MTU_256:mtu = 256; 77 | break; 78 | case IBV_MTU_512:mtu = 512; 79 | break; 80 | case IBV_MTU_1024:mtu = 1024; 81 | break; 82 | case IBV_MTU_2048:mtu = 2048; 83 | break; 84 | case IBV_MTU_4096:mtu = 4096; 85 | break; 86 | } 87 | return mtu; 88 | } 89 | 90 | // The structure used to configure UDQP 91 | typedef struct { 92 | int max_send_size; 93 | int max_recv_size; 94 | int qkey; 95 | int psn; 96 | } UDConfig; 97 | 98 | typedef struct { 99 | int access_flags; 100 | int max_rd_atomic; 101 | int max_dest_rd_atomic; 102 | int rq_psn; 103 | int sq_psn; 104 | int timeout; 105 | } RCConfig; 106 | 107 | } // namespace rdmaio 108 | -------------------------------------------------------------------------------- /thirdparty/rlib/logging.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * The logging utilities used in libRDMA. 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | 11 | namespace rdmaio { 12 | 13 | /** 14 | * \def FATAL 15 | * Used for fatal and probably irrecoverable conditions 16 | * \def ERROR 17 | * Used for errors which are recoverable within the scope of the function 18 | * \def WARNING 19 | * Logs interesting conditions which are probably not fatal 20 | * \def EMPH 21 | * Outputs as INFO, but in WARNING colors. Useful for 22 | * outputting information you want to emphasize. 23 | * \def INFO 24 | * Used for providing general useful information 25 | * \def DEBUG 26 | * Debugging purposes only 27 | * \def EVERYTHING 28 | * Log everything 29 | */ 30 | 31 | enum loglevel { 32 | NONE = 7, 33 | FATAL = 6, 34 | ERROR = 5, 35 | WARNING = 4, 36 | EMPH = 3, 37 | INFO = 2, 38 | DBG = 1, 39 | EVERYTHING = 0 40 | }; 41 | 42 | #define unlikely(x) __builtin_expect(!!(x), 0) 43 | 44 | #ifndef RDMA_LOG_LEVEL 45 | #define RDMA_LOG_LEVEL ::rdmaio::DBG 46 | #endif 47 | 48 | // logging macro definiations 49 | // default log 50 | #define RDMA_LOG(n) \ 51 | if (n >= RDMA_LOG_LEVEL) \ 52 | ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream() 53 | 54 | // #define RDMA_LOG(n) \ 55 | // if (n != ::rdmaio::INFO && n >= RDMA_LOG_LEVEL) \ 56 | // ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream() 57 | 58 | // #define RDMA_LOG(n) \ 59 | // if (false) \ 60 | // ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream() 61 | 62 | 63 | // log with tag 64 | #define RDMA_TLOG(n, t) \ 65 | if (n >= RDMA_LOG_LEVEL) \ 66 | ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream() \ 67 | << "[" << (t) << "]" 68 | 69 | #define RDMA_LOG_IF(n, condition) \ 70 | if (n >= RDMA_LOG_LEVEL && (condition)) \ 71 | ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream() 72 | 73 | #define RDMA_ASSERT(condition) \ 74 | if (unlikely(!(condition))) \ 75 | ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, ::rdmaio::FATAL + 1).stream() << "Assertion! " 76 | 77 | #define RDMA_VERIFY(n, condition) RDMA_LOG_IF(n, (!(condition))) 78 | 79 | class MessageLogger { 80 | public: 81 | MessageLogger(const char* file, int line, int level) : level_(level) { 82 | if (level_ < RDMA_LOG_LEVEL) 83 | return; 84 | stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] "; 85 | } 86 | 87 | ~MessageLogger() { 88 | if (level_ >= RDMA_LOG_LEVEL) { 89 | stream_ << "\n"; 90 | std::cout << "\033[" << RDMA_DEBUG_LEVEL_COLOR[std::min(level_, 6)] << "m" 91 | << stream_.str() << EndcolorFlag(); 92 | if (level_ >= ::rdmaio::FATAL) 93 | abort(); 94 | } 95 | } 96 | 97 | // Return the stream associated with the logger object. 98 | std::stringstream& stream() { return stream_; } 99 | 100 | private: 101 | std::stringstream stream_; 102 | int level_; 103 | 104 | // control flags for color 105 | #define R_BLACK 39 106 | #define R_RED 31 107 | #define R_GREEN 32 108 | #define R_YELLOW 33 109 | #define R_BLUE 34 110 | #define R_MAGENTA 35 111 | #define R_CYAN 36 112 | #define R_WHITE 37 113 | 114 | const int RDMA_DEBUG_LEVEL_COLOR[7] = {R_BLACK, R_BLACK, R_YELLOW, R_GREEN, R_MAGENTA, R_RED, R_RED}; 115 | 116 | static std::string StripBasename(const std::string& full_path) { 117 | const char kSeparator = '/'; 118 | size_t pos = full_path.rfind(kSeparator); 119 | if (pos != std::string::npos) { 120 | return full_path.substr(pos + 1, std::string::npos); 121 | } else { 122 | return full_path; 123 | } 124 | } 125 | 126 | static std::string EndcolorFlag() { 127 | char flag[7]; 128 | snprintf(flag, 7, "%c[0m", 0x1B); 129 | return std::string(flag); 130 | } 131 | }; 132 | 133 | }; // namespace rdmaio 134 | -------------------------------------------------------------------------------- /thirdparty/rlib/mr.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "logging.hpp" 6 | 7 | 8 | namespace rdmaio { 9 | 10 | struct MemoryAttr { 11 | uintptr_t buf; 12 | uint32_t key; 13 | }; 14 | 15 | class Memory { 16 | public: 17 | /** 18 | * The default protection flag of a memory region. 19 | * In default, the memory can be read/write by local and remote RNIC operations. 20 | */ 21 | static const int DEFAULT_PROTECTION_FLAG = (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | 22 | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC); 23 | 24 | Memory(const char* addr, uint64_t len, ibv_pd* pd, int flag) : addr(addr), 25 | len(len), 26 | mr(ibv_reg_mr(pd, (void*) addr, len, flag)) { 27 | if (mr == nullptr) { 28 | RDMA_LOG(WARNING) << "failed to register local_mr, for addr " << addr << "; len " << len; 29 | } else { 30 | rattr.buf = (uintptr_t) addr; 31 | rattr.key = mr->rkey; 32 | } 33 | } 34 | 35 | ~Memory() { 36 | if (mr != nullptr) { 37 | int rc = ibv_dereg_mr(mr); 38 | RDMA_LOG_IF(ERROR, rc != 0) << "dereg local_mr error: " << strerror(errno); 39 | } 40 | } 41 | 42 | bool valid() { 43 | return mr != nullptr; 44 | } 45 | 46 | const char* addr; 47 | uint64_t len; 48 | 49 | MemoryAttr rattr; // RDMA registered attr 50 | ibv_mr* mr = nullptr; // local_mr in the driver 51 | }; 52 | 53 | }; // namespace rdmaio 54 | -------------------------------------------------------------------------------- /thirdparty/rlib/msg_interface.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "common.hpp" 8 | 9 | 10 | namespace rdmaio { 11 | 12 | typedef std::function msg_callback_t_; 13 | 14 | /** 15 | * An abstract message interface 16 | * Assumption: one per thread 17 | */ 18 | class MsgAdapter { 19 | public: 20 | MsgAdapter(msg_callback_t_ callback) 21 | : callback_(callback) { 22 | } 23 | 24 | MsgAdapter() { 25 | } 26 | 27 | void set_callback(msg_callback_t_ callback) { 28 | callback_ = callback; 29 | } 30 | 31 | virtual ConnStatus connect(std::string ip, int port) = 0; 32 | 33 | /** 34 | * Basic send interfaces 35 | */ 36 | virtual ConnStatus send_to(int node_id, const char* msg, int len) = 0; 37 | 38 | virtual ConnStatus send_to(int node_id, int tid, const char* msg, int len) { 39 | return send_to(node_id, msg, len); 40 | } 41 | 42 | /** 43 | * Interfaces which allow batching at the sender's side 44 | */ 45 | virtual void prepare_pending() { 46 | } 47 | 48 | virtual ConnStatus send_pending(int node_id, const char* msg, int len) { 49 | RDMA_ASSERT(false); // not implemented 50 | } 51 | 52 | virtual ConnStatus send_pending(int node_id, int tid, const char* msg, int len) { 53 | return send_pending(node_id, msg, len); 54 | } 55 | 56 | /** 57 | * Flush all the currently pended message 58 | */ 59 | virtual ConnStatus flush_pending() { 60 | return SUCC; 61 | } 62 | 63 | /** 64 | * Examples to use batching at the sender side 65 | * Broadcast the message to a set of servers 66 | */ 67 | virtual ConnStatus broadcast_to(const std::set& nodes, const char* msg, int len) { 68 | prepare_pending(); 69 | for (auto it = nodes.begin(); it != nodes.end(); ++it) { 70 | send_pending(*it, msg, len); 71 | } 72 | flush_pending(); 73 | return SUCC; // TODO 74 | } 75 | 76 | virtual ConnStatus broadcast_to(int* nodes, int num, const char* msg, int len) { 77 | prepare_pending(); 78 | for (int i = 0; i < num; ++i) { 79 | send_pending(nodes[i], msg, len); 80 | } 81 | flush_pending(); 82 | return SUCC; // TODO 83 | } 84 | 85 | /** 86 | * The receive function 87 | */ 88 | virtual void poll_comps() = 0; 89 | 90 | /** 91 | * The size of meta value used by the MsgAdapter for each message 92 | */ 93 | virtual int msg_meta_len() { 94 | return 0; 95 | } 96 | 97 | protected: 98 | msg_callback_t_ callback_; 99 | }; 100 | 101 | }; // namespace rdmaio 102 | -------------------------------------------------------------------------------- /thirdparty/rlib/pre_connector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include //hostent 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include "logging.hpp" 14 | 15 | 16 | namespace rdmaio { 17 | 18 | constexpr struct timeval default_timeout = {0, 8000}; 19 | constexpr struct timeval no_timeout = {0, 0}; // it means forever 20 | 21 | inline __attribute__((always_inline)) // inline to avoid multiple-definiations 22 | int64_t 23 | diff_time(const struct timeval& end, const struct timeval& start) { 24 | int64_t diff = (end.tv_sec > start.tv_sec) ? (end.tv_sec - start.tv_sec) * 1000 : 0; 25 | if (end.tv_usec > start.tv_usec) { 26 | diff += (end.tv_usec - start.tv_usec); 27 | } else { 28 | diff -= (start.tv_usec - end.tv_usec); 29 | } 30 | return diff; 31 | } 32 | 33 | class PreConnector { // helper class used to exchange QP information using TCP/IP 34 | public: 35 | static int get_listen_socket(const std::string& addr, int port) { 36 | struct sockaddr_in serv_addr; 37 | auto sockfd = socket(AF_INET, SOCK_STREAM, 0); 38 | RDMA_ASSERT(sockfd >= 0) << "ERROR opening listen socket: " << strerror(errno); 39 | 40 | /* setup the host_addr structure for use in bind call */ 41 | // server byte order 42 | serv_addr.sin_family = AF_INET; 43 | 44 | serv_addr.sin_addr.s_addr = INADDR_ANY; 45 | 46 | // port 47 | serv_addr.sin_port = htons(port); 48 | int on = 1; 49 | setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 50 | RDMA_ASSERT(bind(sockfd, (struct sockaddr*) &serv_addr, 51 | sizeof(serv_addr)) == 0) 52 | << "ERROR on binding: " << strerror(errno); 53 | return sockfd; 54 | } 55 | 56 | static int get_send_socket(const std::string& addr, int port, struct timeval timeout = default_timeout) { 57 | int sockfd; 58 | struct sockaddr_in serv_addr; 59 | 60 | RDMA_ASSERT((sockfd = socket(AF_INET, SOCK_STREAM, 0)) >= 0) << "Error open socket for send!"; 61 | fcntl(sockfd, F_SETFL, O_NONBLOCK); 62 | 63 | serv_addr.sin_family = AF_INET; 64 | serv_addr.sin_port = htons(port); 65 | 66 | auto ip = host_to_ip(addr); 67 | if (ip == "") { 68 | close(sockfd); 69 | return -1; 70 | } 71 | 72 | serv_addr.sin_addr.s_addr = inet_addr(ip.c_str()); 73 | 74 | if (connect(sockfd, (struct sockaddr*) &serv_addr, sizeof(serv_addr)) == -1) { 75 | if (errno == EINPROGRESS) { 76 | goto PROGRESS; 77 | } 78 | close(sockfd); 79 | return -1; 80 | } 81 | PROGRESS: 82 | // check return status 83 | fd_set fdset; 84 | FD_ZERO(&fdset); 85 | FD_SET(sockfd, &fdset); 86 | 87 | if (select(sockfd + 1, NULL, &fdset, NULL, &timeout) == 1) { 88 | int so_error; 89 | socklen_t len = sizeof so_error; 90 | 91 | getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &so_error, &len); 92 | 93 | if (so_error == 0) { 94 | // success 95 | } else { 96 | close(sockfd); 97 | return -1; 98 | } 99 | } 100 | 101 | return sockfd; 102 | } 103 | 104 | // timeout in microsend 105 | static bool wait_recv(int socket, uint32_t timeout = 2000) { 106 | while (true) { 107 | fd_set rfds; 108 | FD_ZERO(&rfds); 109 | FD_SET(socket, &rfds); 110 | 111 | struct timeval s_timeout = {0, timeout}; 112 | int ready = select(socket + 1, &rfds, NULL, NULL, &s_timeout); 113 | RDMA_ASSERT(ready != -1); 114 | 115 | if (ready == 0) { // no file descriptor found 116 | continue; 117 | } 118 | 119 | if (ready < 0) { // error case 120 | RDMA_ASSERT(false) << "select error " << strerror(errno); 121 | } 122 | 123 | if (FD_ISSET(socket, &rfds)) { 124 | break; // ready 125 | } 126 | } 127 | return true; 128 | } 129 | 130 | static void wait_close(int socket) { 131 | shutdown(socket, SHUT_WR); 132 | char buf[2]; 133 | 134 | struct timeval timeout = {1, 0}; 135 | auto ret = setsockopt(socket, SOL_SOCKET, SO_RCVTIMEO, (const char*) &timeout, sizeof(timeout)); 136 | RDMA_ASSERT(ret == 0); 137 | 138 | recv(socket, buf, 2, 0); 139 | close(socket); 140 | } 141 | 142 | static int send_to(int fd, char* usrbuf, size_t n) { 143 | size_t nleft = n; 144 | ssize_t nwritten; 145 | char* bufp = usrbuf; 146 | 147 | while (nleft > 0) { 148 | if ((nwritten = write(fd, bufp, nleft)) <= 0) { 149 | if (errno == EINTR) /* Interrupted by sig handler return */ 150 | nwritten = 0; /* and call write() again */ 151 | else 152 | return -1; /* errno set by write() */ 153 | } 154 | nleft -= nwritten; 155 | bufp += nwritten; 156 | } 157 | return n; 158 | } 159 | 160 | typedef std::map ipmap_t; 161 | static ipmap_t& local_ip_cache() { 162 | static __thread ipmap_t cache; 163 | return cache; 164 | } 165 | 166 | static std::string host_to_ip(const std::string& host) { 167 | ipmap_t cache = local_ip_cache(); 168 | if (cache.find(host) != cache.end()) 169 | return cache[host]; 170 | 171 | std::string res = ""; 172 | 173 | struct addrinfo hints, * infoptr; 174 | memset(&hints, 0, sizeof hints); 175 | hints.ai_family = AF_INET; // AF_INET means IPv4 only addresses 176 | 177 | int result = getaddrinfo(host.c_str(), NULL, &hints, &infoptr); 178 | if (result) { 179 | fprintf(stderr, "getaddrinfo: %s at %s\n", gai_strerror(result), host.c_str()); 180 | return ""; 181 | } 182 | char ip[64]; 183 | memset(ip, 0, sizeof(ip)); 184 | 185 | for (struct addrinfo* p = infoptr; p != NULL; p = p->ai_next) { 186 | getnameinfo(p->ai_addr, p->ai_addrlen, ip, sizeof(ip), NULL, 0, NI_NUMERICHOST); 187 | } 188 | 189 | res = std::string(ip); 190 | if (res != "") 191 | cache.insert(std::make_pair(host, res)); 192 | return res; 193 | } 194 | }; 195 | 196 | }; // namespace rdmaio 197 | -------------------------------------------------------------------------------- /thirdparty/rlib/rdma_ctrl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // add this to pass compile 4 | #include 5 | 6 | #include "qp.hpp" 7 | 8 | 9 | namespace rdmaio { 10 | 11 | const int MAX_SERVER_SUPPORTED = 16; 12 | typedef RUDQP UDQP; 13 | typedef RRCQP RCQP; 14 | 15 | typedef std::function connection_callback_t; 16 | 17 | class RdmaCtrl { 18 | public: 19 | typedef struct { 20 | int dev_id; 21 | int port_id; 22 | } DevIdx; 23 | 24 | RdmaCtrl( 25 | int node_id, int tcp_base_port, 26 | connection_callback_t callback = [](const QPConnArg&) { 27 | // the default callback does nothing 28 | }, 29 | std::string ip = "localhost"); 30 | 31 | ~RdmaCtrl(); 32 | 33 | int current_node_id(); 34 | int listening_port(); 35 | 36 | /** 37 | * Query devices info on this machine, 38 | * if there is a previous call, return previous results unless clear_dev_info has been called 39 | */ 40 | std::vector query_devs(); 41 | 42 | static std::vector query_devs_helper(); 43 | 44 | // clear the cached infos by RdmaCtrl; 45 | void clear_dev_info(); 46 | 47 | /** 48 | * Open device handlers. 49 | * RdmaCtrl opens a device for each thread. 50 | * The get_device returns previously opened device of this thread, if it is already opened 51 | */ 52 | RNicHandler* open_thread_local_device(DevIdx idx); 53 | 54 | RNicHandler* open_device(DevIdx idx); 55 | 56 | RNicHandler* get_device(); 57 | 58 | /** 59 | * The *callback* is called once a QP connection request is sent to this server 60 | */ 61 | void register_qp_callback(connection_callback_t callback); 62 | 63 | void close_device(); 64 | 65 | void close_device(RNicHandler*); 66 | 67 | /** 68 | * Each RDMA NIC has multiple ports, so we use two-dimeson index to locate the target port. 69 | * convert_port_idx provides a way to translate the one-dimeson index to the two-dimeson 70 | */ 71 | DevIdx convert_port_idx(int idx); 72 | 73 | /** 74 | * Register memory to a specific RNIC handler 75 | */ 76 | bool register_memory(int id, const char* buf, uint64_t size, RNicHandler* rnic, 77 | int flag = Memory::DEFAULT_PROTECTION_FLAG); 78 | 79 | /** 80 | * Get the local registered memory 81 | * undefined if local_mr_id has been registered 82 | */ 83 | MemoryAttr get_local_mr(int mr_id); 84 | 85 | /** 86 | * Return an arbitrary registered MR 87 | * return -1 if no MR is registered to RdmaCtrl 88 | * return the first local_mr index, if found one 89 | */ 90 | int get_default_mr(MemoryAttr& attr); 91 | 92 | /** 93 | * Create and query QPs 94 | * For create, an optional local_attr can be provided to bind to this QP 95 | * A local MR is passed as the default local local_mr for this QP. 96 | * If local_attr = nullptr, then this QP is unbind to any MR. 97 | */ 98 | RCQP* create_rc_qp(QPIdx idx, RNicHandler* dev, MemoryAttr* local_attr = NULL); 99 | UDQP* create_ud_qp(QPIdx idx, RNicHandler* dev, MemoryAttr* local_attr = NULL); 100 | 101 | void destroy_rc_qp(); 102 | 103 | RCQP* get_rc_qp(QPIdx idx); 104 | UDQP* get_ud_qp(QPIdx idx); 105 | 106 | /** 107 | * Some helper functions (example usage of RdmaCtrl) 108 | * Fully link the QP in a symmetric way, for this thread. 109 | * For example, node 0 can connect to node 1, while node 1 connect to node 0. 110 | */ 111 | bool link_symmetric_rcqps(const std::vector& cluster, 112 | int l_mrid, int mr_id, int wid, int idx = 0); 113 | 114 | private: 115 | class RdmaCtrlImpl; 116 | 117 | std::unique_ptr impl_; 118 | }; 119 | 120 | using RdmaCtrlPtr = std::shared_ptr; 121 | 122 | } // namespace rdmaio 123 | 124 | #include "rdma_ctrl_impl.hpp" // real implemeatation here 125 | -------------------------------------------------------------------------------- /thirdparty/rlib/rnic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "logging.hpp" 8 | 9 | 10 | namespace rdmaio { 11 | 12 | // The name of the particular port on the RNIC. 13 | typedef struct { 14 | uint64_t subnet_prefix; 15 | uint64_t interface_id; 16 | uint32_t local_id; 17 | } address_t; 18 | 19 | struct RNicInfo { 20 | typedef struct { 21 | uint port_id; 22 | std::string link_layer; 23 | } PortInfo; 24 | 25 | RNicInfo(const char* name, int id, ibv_context* ctx) : dev_id(id), 26 | dev_name(name) { 27 | query_port_infos(ctx); 28 | query_active_gids(ctx); 29 | } 30 | 31 | bool query_dev_attribute(ibv_context* ctx, ibv_device_attr& attr) { 32 | int rc = ibv_query_device(ctx, &attr); 33 | if (rc != 0) { 34 | RDMA_LOG(ERROR) << "query device attribute error: " << strerror(errno); 35 | return false; 36 | } 37 | return true; 38 | } 39 | 40 | // fill in the active_ports 41 | void query_port_infos(ibv_context* ctx) { 42 | ibv_device_attr attr; 43 | if (!query_dev_attribute(ctx, attr)) 44 | return; 45 | 46 | // query port info 47 | for (uint port_id = 1; port_id <= attr.phys_port_cnt; ++port_id) { 48 | struct ibv_port_attr port_attr; 49 | int rc = ibv_query_port(ctx, port_id, &port_attr); 50 | if (rc != 0) { 51 | RDMA_LOG(ERROR) << "query port_id " << port_id << " on device " << dev_id << "error."; 52 | continue; 53 | } 54 | 55 | // check port status 56 | if (port_attr.phys_state != IBV_PORT_ACTIVE && port_attr.phys_state != IBV_PORT_ACTIVE_DEFER) { 57 | RDMA_LOG(WARNING) << "query port_id " << port_id << " on device " << dev_id << " not active."; 58 | continue; 59 | } 60 | 61 | std::string link_layer = ""; 62 | switch (port_attr.link_layer) { 63 | case IBV_LINK_LAYER_ETHERNET:link_layer = "RoCE"; 64 | break; 65 | case IBV_LINK_LAYER_INFINIBAND:link_layer = "Infiniband"; 66 | break; 67 | default:RDMA_LOG(WARNING) << "unknown link layer at this port: " << port_attr.link_layer; 68 | link_layer = "Unknown"; 69 | }; 70 | active_ports.push_back({port_id, link_layer}); 71 | } 72 | } 73 | 74 | /** 75 | * I assume that the active gid is the same in the RNIC 76 | */ 77 | void query_active_gids(ibv_context* ctx) { 78 | if (active_ports.size() == 0) 79 | return; 80 | 81 | int port_id = active_ports[0].port_id; 82 | struct ibv_port_attr port_attr; 83 | int rc = ibv_query_port(ctx, port_id, &port_attr); 84 | 85 | if (rc != 0) { 86 | RDMA_LOG(WARNING) << "query port attribute at dev " << dev_name << ",port " << port_id 87 | << "; w error: " << strerror(errno); 88 | return; 89 | } 90 | 91 | for (uint i = 0; i < port_attr.gid_tbl_len; ++i) { 92 | ibv_gid gid = {}; 93 | auto rc = ibv_query_gid(ctx, port_id, i, &gid); 94 | if (gid.global.interface_id) { 95 | active_gids.push_back(i); 96 | } 97 | } 98 | } 99 | 100 | void print() const { 101 | RDMA_LOG(3) << to_string(); 102 | } 103 | 104 | std::string to_string() const { 105 | std::ostringstream oss; 106 | 107 | oss << "device " << dev_name << " has " << active_ports.size() << " active ports."; 108 | for (auto i : active_ports) { 109 | oss << "port " << i.port_id << " w link layer " << i.link_layer << "."; 110 | } 111 | for (uint i = 0; i < active_gids.size(); ++i) { 112 | oss << "active gid: " << active_gids[i] << "."; 113 | } 114 | return oss.str(); 115 | } 116 | 117 | // members 118 | int dev_id; 119 | std::string dev_name; 120 | std::vector active_ports; 121 | std::vector active_gids; 122 | }; 123 | 124 | class RdmaCtrl; 125 | 126 | struct RNicHandler { 127 | RNicHandler(int dev_id, int port_id, ibv_context* ctx, ibv_pd* pd, int lid, int gid = 0) : dev_id(dev_id), 128 | port_id(port_id), 129 | ctx(ctx), 130 | pd(pd), 131 | lid(lid), 132 | gid(gid) { 133 | } 134 | 135 | address_t query_addr() { 136 | return query_addr(gid); 137 | } 138 | 139 | address_t query_addr(uint8_t gid_index) { 140 | ibv_gid gid; 141 | ibv_query_gid(ctx, port_id, gid_index, &gid); 142 | 143 | address_t addr{ 144 | .subnet_prefix = gid.global.subnet_prefix, 145 | .interface_id = gid.global.interface_id, 146 | .local_id = gid_index}; 147 | return addr; 148 | } 149 | 150 | friend class RdmaCtrl; 151 | 152 | ~RNicHandler() { 153 | // delete ctx & pd 154 | RDMA_VERIFY(INFO, ibv_close_device(ctx) == 0) << "failed to close device " << dev_id; 155 | RDMA_VERIFY(INFO, ibv_dealloc_pd(pd) == 0) << "failed to dealloc pd at device " << dev_id 156 | << "; w error " << strerror(errno); 157 | } 158 | 159 | public: 160 | uint16_t dev_id; // which RNIC 161 | uint16_t port_id; // which port 162 | 163 | struct ibv_context* ctx; 164 | struct ibv_pd* pd; 165 | uint16_t lid; 166 | uint16_t gid; 167 | }; 168 | 169 | } // namespace rdmaio 170 | -------------------------------------------------------------------------------- /workload/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | add_subdirectory(tpcc) 5 | 6 | add_subdirectory(tatp) 7 | 8 | add_subdirectory(smallbank) 9 | 10 | add_subdirectory(micro) -------------------------------------------------------------------------------- /workload/config/table_type.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | // Global table identifier in a single machine (type: table_id_t) 7 | #define TABLE_TATP 0 8 | #define TABLE_TPCC 0 9 | #define TABLE_SMALLBANK 0 10 | #define TABLE_MICRO 0 -------------------------------------------------------------------------------- /workload/micro/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(DB_SOURCES micro_db.cc) 5 | set(TXN_SOURCES micro_txn.cc) 6 | 7 | add_library(micro_db STATIC ${DB_SOURCES}) 8 | add_library(micro_txn STATIC ${TXN_SOURCES}) 9 | 10 | set_target_properties(micro_db PROPERTIES LINKER_LANGUAGE CXX) 11 | set_target_properties(micro_txn PROPERTIES LINKER_LANGUAGE CXX) 12 | 13 | target_link_libraries(micro_txn ford) -------------------------------------------------------------------------------- /workload/micro/micro_db.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "micro/micro_db.h" 5 | #include "unistd.h" 6 | #include "util/json_config.h" 7 | 8 | /* Called by main. Only initialize here. The worker threads will populate. */ 9 | void MICRO::LoadTable(node_id_t node_id, 10 | node_id_t num_server, 11 | MemStoreAllocParam* mem_store_alloc_param, 12 | MemStoreReserveParam* mem_store_reserve_param) { 13 | // Initiate + Populate table for primary role 14 | if ((node_id_t)MicroTableType::kMicroTable % num_server == node_id) { 15 | printf("Primary: Initializing MICRO table\n"); 16 | std::string config_filepath = "../../../workload/micro/micro_tables/micro.json"; 17 | auto json_config = JsonConfig::load_file(config_filepath); 18 | auto table_config = json_config.get("table"); 19 | micro_table = new HashStore((table_id_t)MicroTableType::kMicroTable, 20 | table_config.get("bkt_num").get_uint64(), 21 | mem_store_alloc_param); 22 | PopulateMicroTable(mem_store_reserve_param); 23 | primary_table_ptrs.push_back(micro_table); 24 | } 25 | 26 | // Initiate + Populate table for backup role 27 | if (BACKUP_DEGREE < num_server) { 28 | for (node_id_t i = 1; i <= BACKUP_DEGREE; i++) { 29 | if ((node_id_t)MicroTableType::kMicroTable % num_server == (node_id - i + num_server) % num_server) { 30 | printf("Backup: Initializing MICRO table\n"); 31 | std::string config_filepath = "../../../workload/micro/micro_tables/micro.json"; 32 | auto json_config = JsonConfig::load_file(config_filepath); 33 | auto table_config = json_config.get("table"); 34 | micro_table = new HashStore((table_id_t)MicroTableType::kMicroTable, 35 | table_config.get("bkt_num").get_uint64(), 36 | mem_store_alloc_param); 37 | PopulateMicroTable(mem_store_reserve_param); 38 | backup_table_ptrs.push_back(micro_table); 39 | } 40 | } 41 | } 42 | } 43 | 44 | void MICRO::PopulateMicroTable(MemStoreReserveParam* mem_store_reserve_param) { 45 | /* All threads must execute the loop below deterministically */ 46 | RDMA_LOG(DBG) << "NUM KEYS TOTAL: " << num_keys_global; 47 | /* Populate the tables */ 48 | for (uint64_t id = 0; id < num_keys_global; id++) { 49 | micro_key_t micro_key; 50 | micro_key.micro_id = (uint64_t)id; 51 | 52 | micro_val_t micro_val; 53 | for (int i = 0; i < 5; i++) { 54 | micro_val.magic[i] = micro_magic + i; 55 | } 56 | 57 | LoadRecord(micro_table, micro_key.item_key, 58 | (void*)µ_val, sizeof(micro_val_t), 59 | (table_id_t)MicroTableType::kMicroTable, 60 | mem_store_reserve_param); 61 | } 62 | } 63 | 64 | int MICRO::LoadRecord(HashStore* table, 65 | itemkey_t item_key, 66 | void* val_ptr, 67 | size_t val_size, 68 | table_id_t table_id, 69 | MemStoreReserveParam* mem_store_reserve_param) { 70 | assert(val_size <= MAX_ITEM_SIZE); 71 | /* Insert into HashStore */ 72 | DataItem item_to_be_inserted(table_id, val_size, item_key, (uint8_t*)val_ptr); 73 | DataItem* inserted_item = table->LocalInsert(item_key, item_to_be_inserted, mem_store_reserve_param); 74 | inserted_item->remote_offset = table->GetItemRemoteOffset(inserted_item); 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /workload/micro/micro_db.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "config/table_type.h" 11 | #include "memstore/hash_store.h" 12 | #include "util/fast_random.h" 13 | #include "util/json_config.h" 14 | 15 | union micro_key_t { 16 | uint64_t micro_id; 17 | uint64_t item_key; 18 | 19 | micro_key_t() { 20 | item_key = 0; 21 | } 22 | }; 23 | 24 | static_assert(sizeof(micro_key_t) == sizeof(uint64_t), ""); 25 | 26 | struct micro_val_t { 27 | // 40 bytes, consistent with FaSST 28 | uint64_t magic[5]; 29 | }; 30 | static_assert(sizeof(micro_val_t) == 40, ""); 31 | 32 | // Magic numbers for debugging. These are unused in the spec. 33 | #define Micro_MAGIC 97 /* Some magic number <= 255 */ 34 | #define micro_magic (Micro_MAGIC) 35 | 36 | // Helpers for generating workload 37 | enum class MicroTxType : int { 38 | kLockContention, 39 | }; 40 | 41 | // Table id 42 | enum class MicroTableType : uint64_t { 43 | kMicroTable = TABLE_MICRO, 44 | }; 45 | 46 | static ALWAYS_INLINE 47 | uint64_t align_pow2(uint64_t v) { 48 | v--; 49 | v |= v >> 1; 50 | v |= v >> 2; 51 | v |= v >> 4; 52 | v |= v >> 8; 53 | v |= v >> 16; 54 | v |= v >> 32; 55 | return v + 1; 56 | } 57 | 58 | class MICRO { 59 | public: 60 | std::string bench_name; 61 | 62 | uint64_t num_keys_global; 63 | 64 | /* Tables */ 65 | HashStore* micro_table; 66 | 67 | std::vector primary_table_ptrs; 68 | 69 | std::vector backup_table_ptrs; 70 | 71 | // For server usage: Provide interfaces to servers for loading tables 72 | // Also for client usage: Provide interfaces to clients for generating ids during tests 73 | MICRO() { 74 | bench_name = "MICRO"; 75 | std::string config_filepath = "../../../config/micro_config.json"; 76 | auto json_config = JsonConfig::load_file(config_filepath); 77 | auto conf = json_config.get("micro"); 78 | auto num_keys = conf.get("num_keys").get_int64(); 79 | num_keys_global = align_pow2(num_keys); 80 | micro_table = nullptr; 81 | } 82 | 83 | ~MICRO() { 84 | if (micro_table) delete micro_table; 85 | } 86 | 87 | void LoadTable(node_id_t node_id, 88 | node_id_t num_server, 89 | MemStoreAllocParam* mem_store_alloc_param, 90 | MemStoreReserveParam* mem_store_reserve_param); 91 | 92 | void PopulateMicroTable(MemStoreReserveParam* mem_store_reserve_param); 93 | 94 | int LoadRecord(HashStore* table, 95 | itemkey_t item_key, 96 | void* val_ptr, 97 | size_t val_size, 98 | table_id_t table_id, 99 | MemStoreReserveParam* mem_store_reserve_param); 100 | 101 | ALWAYS_INLINE 102 | std::vector GetPrimaryHashStore() { 103 | return primary_table_ptrs; 104 | } 105 | 106 | ALWAYS_INLINE 107 | std::vector GetBackupHashStore() { 108 | return backup_table_ptrs; 109 | } 110 | }; 111 | -------------------------------------------------------------------------------- /workload/micro/micro_tables/micro.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "MICRO", 4 | "bkt_num": 200000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/micro/micro_txn.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include "dtx/dtx.h" 9 | #include "micro/micro_db.h" 10 | #include "util/zipf.h" 11 | 12 | /******************** The business logic (Transaction) start ********************/ 13 | 14 | struct DataItemDuplicate { 15 | DataItemPtr data_item_ptr; 16 | bool is_dup; 17 | }; 18 | 19 | bool TxTestCachedAddr(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio); 20 | bool TxLockContention(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio); 21 | bool TxReadBackup(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio); 22 | bool TxReadOnly(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio); 23 | bool TxRFlush1(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio); 24 | bool TxRFlush2(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio); 25 | /******************** The business logic (Transaction) end ********************/ -------------------------------------------------------------------------------- /workload/smallbank/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(DB_SOURCES smallbank_db.cc) 5 | set(TXN_SOURCES smallbank_txn.cc) 6 | 7 | add_library(smallbank_db STATIC ${DB_SOURCES}) 8 | add_library(smallbank_txn STATIC ${TXN_SOURCES}) 9 | 10 | set_target_properties(smallbank_db PROPERTIES LINKER_LANGUAGE CXX) 11 | set_target_properties(smallbank_txn PROPERTIES LINKER_LANGUAGE CXX) 12 | 13 | target_link_libraries(smallbank_txn ford) -------------------------------------------------------------------------------- /workload/smallbank/smallbank_db.cc: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #include "smallbank_db.h" 5 | 6 | #include "unistd.h" 7 | #include "util/json_config.h" 8 | 9 | /* Called by main. Only initialize here. The worker threads will populate. */ 10 | void SmallBank::LoadTable(node_id_t node_id, 11 | node_id_t num_server, 12 | MemStoreAllocParam* mem_store_alloc_param, 13 | MemStoreReserveParam* mem_store_reserve_param) { 14 | // Initiate + Populate table for primary role 15 | if ((node_id_t)SmallBankTableType::kSavingsTable % num_server == node_id) { 16 | printf("Primary: Initializing SAVINGS table\n"); 17 | std::string config_filepath = "../../../workload/smallbank/smallbank_tables/savings.json"; 18 | auto json_config = JsonConfig::load_file(config_filepath); 19 | auto table_config = json_config.get("table"); 20 | savings_table = new HashStore((table_id_t)SmallBankTableType::kSavingsTable, 21 | table_config.get("bkt_num").get_uint64(), 22 | mem_store_alloc_param); 23 | PopulateSavingsTable(mem_store_reserve_param); 24 | primary_table_ptrs.push_back(savings_table); 25 | } 26 | if ((node_id_t)SmallBankTableType::kCheckingTable % num_server == node_id) { 27 | printf("Primary: Initializing CHECKING table\n"); 28 | std::string config_filepath = "../../../workload/smallbank/smallbank_tables/checking.json"; 29 | auto json_config = JsonConfig::load_file(config_filepath); 30 | auto table_config = json_config.get("table"); 31 | checking_table = new HashStore((table_id_t)SmallBankTableType::kCheckingTable, 32 | table_config.get("bkt_num").get_uint64(), 33 | mem_store_alloc_param); 34 | PopulateCheckingTable(mem_store_reserve_param); 35 | primary_table_ptrs.push_back(checking_table); 36 | } 37 | 38 | // Initiate + Populate table for backup role 39 | if (BACKUP_DEGREE < num_server) { 40 | for (node_id_t i = 1; i <= BACKUP_DEGREE; i++) { 41 | if ((node_id_t)SmallBankTableType::kSavingsTable % num_server == (node_id - i + num_server) % num_server) { 42 | printf("Backup: Initializing SAVINGS table\n"); 43 | std::string config_filepath = "../../../workload/smallbank/smallbank_tables/savings.json"; 44 | auto json_config = JsonConfig::load_file(config_filepath); 45 | auto table_config = json_config.get("table"); 46 | savings_table = new HashStore((table_id_t)SmallBankTableType::kSavingsTable, 47 | table_config.get("bkt_num").get_uint64(), 48 | mem_store_alloc_param); 49 | PopulateSavingsTable(mem_store_reserve_param); 50 | backup_table_ptrs.push_back(savings_table); 51 | } 52 | if ((node_id_t)SmallBankTableType::kCheckingTable % num_server == (node_id - i + num_server) % num_server) { 53 | printf("Backup: Initializing CHECKING table\n"); 54 | std::string config_filepath = "../../../workload/smallbank/smallbank_tables/checking.json"; 55 | auto json_config = JsonConfig::load_file(config_filepath); 56 | auto table_config = json_config.get("table"); 57 | checking_table = new HashStore((table_id_t)SmallBankTableType::kCheckingTable, 58 | table_config.get("bkt_num").get_uint64(), 59 | mem_store_alloc_param); 60 | PopulateCheckingTable(mem_store_reserve_param); 61 | backup_table_ptrs.push_back(checking_table); 62 | } 63 | } 64 | } 65 | } 66 | 67 | int SmallBank::LoadRecord(HashStore* table, 68 | itemkey_t item_key, 69 | void* val_ptr, 70 | size_t val_size, 71 | table_id_t table_id, 72 | MemStoreReserveParam* mem_store_reserve_param) { 73 | assert(val_size <= MAX_ITEM_SIZE); 74 | /* Insert into HashStore */ 75 | DataItem item_to_be_inserted(table_id, val_size, item_key, (uint8_t*)val_ptr); 76 | DataItem* inserted_item = table->LocalInsert(item_key, item_to_be_inserted, mem_store_reserve_param); 77 | inserted_item->remote_offset = table->GetItemRemoteOffset(inserted_item); 78 | return 1; 79 | } 80 | 81 | void SmallBank::PopulateSavingsTable(MemStoreReserveParam* mem_store_reserve_param) { 82 | /* All threads must execute the loop below deterministically */ 83 | 84 | /* Populate the tables */ 85 | for (uint32_t acct_id = 0; acct_id < num_accounts_global; acct_id++) { 86 | // Savings 87 | smallbank_savings_key_t savings_key; 88 | savings_key.acct_id = (uint64_t)acct_id; 89 | 90 | smallbank_savings_val_t savings_val; 91 | savings_val.magic = smallbank_savings_magic; 92 | savings_val.bal = 1000000000ull; 93 | 94 | LoadRecord(savings_table, savings_key.item_key, 95 | (void*)&savings_val, sizeof(smallbank_savings_val_t), 96 | (table_id_t)SmallBankTableType::kSavingsTable, 97 | mem_store_reserve_param); 98 | } 99 | } 100 | 101 | void SmallBank::PopulateCheckingTable(MemStoreReserveParam* mem_store_reserve_param) { 102 | /* All threads must execute the loop below deterministically */ 103 | 104 | /* Populate the tables */ 105 | for (uint32_t acct_id = 0; acct_id < num_accounts_global; acct_id++) { 106 | // Checking 107 | smallbank_checking_key_t checking_key; 108 | checking_key.acct_id = (uint64_t)acct_id; 109 | 110 | smallbank_checking_val_t checking_val; 111 | checking_val.magic = smallbank_checking_magic; 112 | checking_val.bal = 1000000000ull; 113 | 114 | LoadRecord(checking_table, checking_key.item_key, 115 | (void*)&checking_val, sizeof(smallbank_checking_val_t), 116 | (table_id_t)SmallBankTableType::kCheckingTable, 117 | mem_store_reserve_param); 118 | } 119 | } -------------------------------------------------------------------------------- /workload/smallbank/smallbank_db.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "config/table_type.h" 10 | #include "memstore/hash_store.h" 11 | #include "util/fast_random.h" 12 | #include "util/json_config.h" 13 | 14 | /* STORED PROCEDURE EXECUTION FREQUENCIES (0-100) */ 15 | #define FREQUENCY_AMALGAMATE 15 16 | #define FREQUENCY_BALANCE 15 17 | #define FREQUENCY_DEPOSIT_CHECKING 15 18 | #define FREQUENCY_SEND_PAYMENT 25 19 | #define FREQUENCY_TRANSACT_SAVINGS 15 20 | #define FREQUENCY_WRITE_CHECK 15 21 | 22 | #define TX_HOT 90 /* Percentage of txns that use accounts from hotspot */ 23 | 24 | // Smallbank table keys and values 25 | // All keys have been sized to 8 bytes 26 | // All values have been sized to the next multiple of 8 bytes 27 | 28 | /* 29 | * SAVINGS table. 30 | */ 31 | union smallbank_savings_key_t { 32 | uint64_t acct_id; 33 | uint64_t item_key; 34 | 35 | smallbank_savings_key_t() { 36 | item_key = 0; 37 | } 38 | }; 39 | 40 | static_assert(sizeof(smallbank_savings_key_t) == sizeof(uint64_t), ""); 41 | 42 | struct smallbank_savings_val_t { 43 | uint32_t magic; 44 | float bal; 45 | }; 46 | static_assert(sizeof(smallbank_savings_val_t) == sizeof(uint64_t), ""); 47 | 48 | /* 49 | * CHECKING table 50 | */ 51 | union smallbank_checking_key_t { 52 | uint64_t acct_id; 53 | uint64_t item_key; 54 | 55 | smallbank_checking_key_t() { 56 | item_key = 0; 57 | } 58 | }; 59 | 60 | static_assert(sizeof(smallbank_checking_key_t) == sizeof(uint64_t), ""); 61 | 62 | struct smallbank_checking_val_t { 63 | uint32_t magic; 64 | float bal; 65 | }; 66 | static_assert(sizeof(smallbank_checking_val_t) == sizeof(uint64_t), ""); 67 | 68 | // Magic numbers for debugging. These are unused in the spec. 69 | #define SmallBank_MAGIC 97 /* Some magic number <= 255 */ 70 | #define smallbank_savings_magic (SmallBank_MAGIC) 71 | #define smallbank_checking_magic (SmallBank_MAGIC + 1) 72 | 73 | // Helpers for generating workload 74 | #define SmallBank_TX_TYPES 6 75 | enum class SmallBankTxType : int { 76 | kAmalgamate, 77 | kBalance, 78 | kDepositChecking, 79 | kSendPayment, 80 | kTransactSaving, 81 | kWriteCheck, 82 | }; 83 | 84 | 85 | const std::string SmallBank_TX_NAME[SmallBank_TX_TYPES] = {"Amalgamate", "Balance", "DepositChecking", \ 86 | "SendPayment", "TransactSaving", "WriteCheck"}; 87 | 88 | // Table id 89 | enum class SmallBankTableType : uint64_t { 90 | kSavingsTable = TABLE_SMALLBANK, 91 | kCheckingTable, 92 | }; 93 | 94 | class SmallBank { 95 | public: 96 | std::string bench_name; 97 | 98 | uint32_t total_thread_num; 99 | 100 | uint32_t num_accounts_global, num_hot_global; 101 | 102 | /* Tables */ 103 | HashStore* savings_table; 104 | 105 | HashStore* checking_table; 106 | 107 | std::vector primary_table_ptrs; 108 | 109 | std::vector backup_table_ptrs; 110 | 111 | // For server usage: Provide interfaces to servers for loading tables 112 | // Also for client usage: Provide interfaces to clients for generating ids during tests 113 | SmallBank() { 114 | bench_name = "SmallBank"; 115 | // Used for populate table (line num) and get account 116 | std::string config_filepath = "../../../config/smallbank_config.json"; 117 | auto json_config = JsonConfig::load_file(config_filepath); 118 | auto conf = json_config.get("smallbank"); 119 | num_accounts_global = conf.get("num_accounts").get_uint64(); 120 | num_hot_global = conf.get("num_hot_accounts").get_uint64(); 121 | 122 | /* Up to 2 billion accounts */ 123 | assert(num_accounts_global <= 2ull * 1024 * 1024 * 1024); 124 | 125 | savings_table = nullptr; 126 | checking_table = nullptr; 127 | } 128 | 129 | ~SmallBank() { 130 | if (savings_table) delete savings_table; 131 | if (checking_table) delete checking_table; 132 | } 133 | 134 | SmallBankTxType* CreateWorkgenArray() { 135 | SmallBankTxType* workgen_arr = new SmallBankTxType[100]; 136 | 137 | int i = 0, j = 0; 138 | 139 | j += FREQUENCY_AMALGAMATE; 140 | for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kAmalgamate; 141 | 142 | j += FREQUENCY_BALANCE; 143 | for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kBalance; 144 | 145 | j += FREQUENCY_DEPOSIT_CHECKING; 146 | for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kDepositChecking; 147 | 148 | j += FREQUENCY_SEND_PAYMENT; 149 | for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kSendPayment; 150 | 151 | j += FREQUENCY_TRANSACT_SAVINGS; 152 | for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kTransactSaving; 153 | 154 | j += FREQUENCY_WRITE_CHECK; 155 | for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kWriteCheck; 156 | 157 | assert(i == 100 && j == 100); 158 | return workgen_arr; 159 | } 160 | 161 | /* 162 | * Generators for new account IDs. Called once per transaction because 163 | * we need to decide hot-or-not per transaction, not per account. 164 | */ 165 | inline void get_account(uint64_t* seed, uint64_t* acct_id) const { 166 | if (FastRand(seed) % 100 < TX_HOT) { 167 | *acct_id = FastRand(seed) % num_hot_global; 168 | } else { 169 | *acct_id = FastRand(seed) % num_accounts_global; 170 | } 171 | } 172 | 173 | inline void get_two_accounts(uint64_t* seed, uint64_t* acct_id_0, uint64_t* acct_id_1) const { 174 | if (FastRand(seed) % 100 < TX_HOT) { 175 | *acct_id_0 = FastRand(seed) % num_hot_global; 176 | *acct_id_1 = FastRand(seed) % num_hot_global; 177 | while (*acct_id_1 == *acct_id_0) { 178 | *acct_id_1 = FastRand(seed) % num_hot_global; 179 | } 180 | } else { 181 | *acct_id_0 = FastRand(seed) % num_accounts_global; 182 | *acct_id_1 = FastRand(seed) % num_accounts_global; 183 | while (*acct_id_1 == *acct_id_0) { 184 | *acct_id_1 = FastRand(seed) % num_accounts_global; 185 | } 186 | } 187 | } 188 | 189 | void LoadTable(node_id_t node_id, 190 | node_id_t num_server, 191 | MemStoreAllocParam* mem_store_alloc_param, 192 | MemStoreReserveParam* mem_store_reserve_param); 193 | 194 | void PopulateSavingsTable(MemStoreReserveParam* mem_store_reserve_param); 195 | 196 | void PopulateCheckingTable(MemStoreReserveParam* mem_store_reserve_param); 197 | 198 | int LoadRecord(HashStore* table, 199 | itemkey_t item_key, 200 | void* val_ptr, 201 | size_t val_size, 202 | table_id_t table_id, 203 | MemStoreReserveParam* mem_store_reserve_param); 204 | 205 | ALWAYS_INLINE 206 | std::vector GetPrimaryHashStore() { 207 | return primary_table_ptrs; 208 | } 209 | 210 | ALWAYS_INLINE 211 | std::vector GetBackupHashStore() { 212 | return backup_table_ptrs; 213 | } 214 | }; 215 | -------------------------------------------------------------------------------- /workload/smallbank/smallbank_tables/checking.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "CHECKING", 4 | "bkt_num": 200000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/smallbank/smallbank_tables/savings.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "SAVINGS", 4 | "bkt_num": 200000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/smallbank/smallbank_txn.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include "dtx/dtx.h" 9 | #include "smallbank/smallbank_db.h" 10 | 11 | /******************** The business logic (Transaction) start ********************/ 12 | 13 | bool TxAmalgamate(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 14 | /* Calculate the sum of saving and checking kBalance */ 15 | bool TxBalance(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 16 | /* Add $1.3 to acct_id's checking account */ 17 | bool TxDepositChecking(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 18 | /* Send $5 from acct_id_0's checking account to acct_id_1's checking account */ 19 | bool TxSendPayment(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 20 | /* Add $20 to acct_id's saving's account */ 21 | bool TxTransactSaving(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 22 | /* Read saving and checking kBalance + update checking kBalance unconditionally */ 23 | bool TxWriteCheck(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 24 | /******************** The business logic (Transaction) end ********************/ -------------------------------------------------------------------------------- /workload/tatp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(DB_SOURCES tatp_db.cc) 5 | set(TXN_SOURCES tatp_txn.cc) 6 | 7 | add_library(tatp_db STATIC ${DB_SOURCES}) 8 | add_library(tatp_txn STATIC ${TXN_SOURCES}) 9 | 10 | set_target_properties(tatp_db PROPERTIES LINKER_LANGUAGE CXX) 11 | set_target_properties(tatp_txn PROPERTIES LINKER_LANGUAGE CXX) 12 | 13 | target_link_libraries(tatp_txn ford) -------------------------------------------------------------------------------- /workload/tatp/tatp_tables/README.md: -------------------------------------------------------------------------------- 1 | # Table bucket capacities 2 | 3 | * On average, every worker adds `SUBSCRIBERS_PER_MACHINE` (1 M) subscribers to its `SUBSCRIBER` table partition. 4 | 5 | * There are 2.5 `ACCESS_INFO` records per subscriber, so 2.5 M `ACCESS_INFO` 6 | records in total. Similarly, there are 2.5 M `SPECIAL_FACILITY` records. 7 | 8 | * There are 1.25 `CALL_FORWARDING` records per `SPECIAL_FACILITY` record, so around 3.2 M `CALL_FORWARDING` records in 9 | total. 10 | 11 | ## Index sizing 12 | 13 | We allocate 25% extra space in the index for all records: 14 | 15 | * `SUBSCRIBER`: 1.25 M 16 | * Secondary `SUBSCRIBER` table: 1.25 M 17 | * `ACCESS_INFO`: 3.2 M -> Does not work so make it 4 M 18 | * `SPECIAL_FACILITY`: 3.2 M -> Does not work so make it 4 M 19 | * `CALL_FORWARDING`: 4 M -> Does not work so make it 5 M 20 | 21 | # Additional info 22 | 23 | ## Table key-value sizes 24 | 25 | * All key sizes are fixed at 8 bytes. Value sizes are padded to next multiple of 8 bytes 26 | * `SUBSCRIBER`: 40 bytes 27 | * Secondary `SUBSCRIBER` table: 8 bytes 28 | * `ACCESS_INFO`: 16 bytes 29 | * `SPECIAL_FACILITY`: 8 bytes 30 | * `CALL_FORWARDING`: 24 bytes 31 | 32 | ## Pool sizing (irrelevant for FixedTable) 33 | 34 | On top of an (assumed) 32-byte per-entry pool overhead, allocate 0% extra pool space. Also add the 8-byte HoTS object 35 | header to make the total header size = 40 bytes. 36 | 37 | * `SUBSCRIBER`: `(40 + 40) * .125 = 10 MB` 38 | * Secondary `SUBSCRIBER` table: `(40 + 8) * .125 = 6 MB` 39 | * `ACCESS_INFO`: `(40 + 16) * .32 = 18 MB` 40 | * `SPECIAL_FACILITY`: `(40 + 8) * .32 = 16 MB` 41 | * `CALL_FORWARDING`: `(40 + 24) * .4 = 26 MB` 42 | -------------------------------------------------------------------------------- /workload/tatp/tatp_tables/access_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "ACCESS INFO", 4 | "bkt_num": 40000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tatp/tatp_tables/call_forwarding.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "CALL FORWARDING", 4 | "bkt_num": 50000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tatp/tatp_tables/sec_subscriber.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "SECONDARY SUBSCRIBER", 4 | "bkt_num": 12500 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tatp/tatp_tables/special_facility.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "SPECIAL FACILITY", 4 | "bkt_num": 40000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tatp/tatp_tables/subscriber.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "SUBSCRIBER", 4 | "bkt_num": 12500 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tatp/tatp_txn.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include "dtx/dtx.h" 9 | #include "tatp/tatp_db.h" 10 | 11 | /******************** The business logic (Transaction) start ********************/ 12 | 13 | // Read 1 SUBSCRIBER row 14 | bool TxGetSubsciberData(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 15 | 16 | // 1. Read 1 SPECIAL_FACILITY row 17 | // 2. Read up to 3 CALL_FORWARDING rows 18 | // 3. Validate up to 4 rows 19 | bool TxGetNewDestination(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 20 | 21 | // Read 1 ACCESS_INFO row 22 | bool TxGetAccessData(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 23 | 24 | // Update 1 SUBSCRIBER row and 1 SPECIAL_FACILTY row 25 | bool TxUpdateSubscriberData(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 26 | 27 | // 1. Read a SECONDARY_SUBSCRIBER row 28 | // 2. Update a SUBSCRIBER row 29 | bool TxUpdateLocation(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 30 | 31 | // 1. Read a SECONDARY_SUBSCRIBER row 32 | // 2. Read a SPECIAL_FACILTY row 33 | // 3. Insert a CALL_FORWARDING row 34 | bool TxInsertCallForwarding(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 35 | 36 | // 1. Read a SECONDARY_SUBSCRIBER row 37 | // 2. Delete a CALL_FORWARDING row 38 | bool TxDeleteCallForwarding(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 39 | 40 | /******************** The business logic (Transaction) end ********************/ -------------------------------------------------------------------------------- /workload/tpcc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Author: Ming Zhang 2 | # Copyright (c) 2022 3 | 4 | set(DB_SOURCES tpcc_db.cc) 5 | set(TXN_SOURCES tpcc_txn.cc) 6 | 7 | add_library(tpcc_db STATIC ${DB_SOURCES}) 8 | add_library(tpcc_txn STATIC ${TXN_SOURCES}) 9 | 10 | set_target_properties(tpcc_db PROPERTIES LINKER_LANGUAGE CXX) 11 | set_target_properties(tpcc_txn PROPERTIES LINKER_LANGUAGE CXX) 12 | 13 | target_link_libraries(tpcc_txn ford) -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables/customer.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "CUSTOMER", 4 | "bkt_num": 300 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables/district.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "DISTRICT", 4 | "bkt_num": 10 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables/item.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "ITEM", 4 | "bkt_num": 10000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables/stock.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "STOCK", 4 | "bkt_num": 10000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables/warehouse.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "WAREHOUSE", 4 | "bkt_num": 8 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_1G/customer.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "CUSTOMER", 4 | "bkt_num": 30 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_1G/district.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "DISTRICT", 4 | "bkt_num": 10 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_1G/item.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "ITEM", 4 | "bkt_num": 1000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_1G/stock.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "STOCK", 4 | "bkt_num": 1000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_1G/warehouse.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "WAREHOUSE", 4 | "bkt_num": 50 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_8G/customer.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "CUSTOMER", 4 | "bkt_num": 300 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_8G/district.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "DISTRICT", 4 | "bkt_num": 10 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_8G/item.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "ITEM", 4 | "bkt_num": 10000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_8G/stock.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "STOCK", 4 | "bkt_num": 10000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_8G/warehouse.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "WAREHOUSE", 4 | "bkt_num": 8 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_normal/customer.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "CUSTOMER", 4 | "bkt_num": 300 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_normal/district.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "DISTRICT", 4 | "bkt_num": 10 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_normal/item.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "ITEM", 4 | "bkt_num": 10000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_normal/stock.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "STOCK", 4 | "bkt_num": 10000 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_tables_normal/warehouse.json: -------------------------------------------------------------------------------- 1 | { 2 | "table": { 3 | "name": "WAREHOUSE", 4 | "bkt_num": 30 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /workload/tpcc/tpcc_txn.h: -------------------------------------------------------------------------------- 1 | // Author: Ming Zhang 2 | // Copyright (c) 2022 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include "dtx/dtx.h" 9 | #include "tpcc/tpcc_db.h" 10 | 11 | /******************** The business logic (Transaction) start ********************/ 12 | 13 | // The following transaction business logics are referred to the standard TPCC specification. 14 | 15 | /* TPC BENCHMARK™ C 16 | ** Standard Specification 17 | ** Revision 5.11 18 | ** February 2010 19 | ** url: http://tpc.org/tpc_documents_current_versions/pdf/tpc-c_v5.11.0.pdf 20 | */ 21 | 22 | // Note: Remote hash slot limits the insertion number. For a 20-slot bucket, the uppper bound is 44744 new order. 23 | bool TxNewOrder(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 24 | bool TxPayment(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 25 | bool TxDelivery(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 26 | bool TxOrderStatus(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 27 | bool TxStockLevel(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx); 28 | /******************** The business logic (Transaction) end ********************/ --------------------------------------------------------------------------------