├── .gitignore
├── Apache-LICENSE2.0
├── CMakeLists.txt
├── README.md
├── build.sh
├── compute_pool
    ├── CMakeLists.txt
    ├── run
    │   ├── CMakeLists.txt
    │   ├── run.cc
    │   └── run_micro.cc
    └── worker
    │   ├── CMakeLists.txt
    │   ├── handler.cc
    │   ├── handler.h
    │   ├── worker.cc
    │   └── worker.h
├── config
    ├── compute_node_config.json
    ├── memory_node_config.json
    ├── micro_config.json
    ├── smallbank_config.json
    ├── tatp_config.json
    └── tpcc_config.json
├── core
    ├── CMakeLists.txt
    ├── allocator
    │   ├── buffer_allocator.h
    │   ├── log_allocator.h
    │   └── region_allocator.h
    ├── base
    │   └── common.h
    ├── cache
    │   ├── addr_cache.h
    │   ├── lock_status.h
    │   └── version_status.h
    ├── connection
    │   ├── meta_manager.cc
    │   ├── meta_manager.h
    │   ├── qp_manager.cc
    │   └── qp_manager.h
    ├── dtx
    │   ├── doorbell.cc
    │   ├── doorbell.h
    │   ├── dtx.cc
    │   ├── dtx.h
    │   ├── dtx_check.cc
    │   ├── dtx_check_ro.cc
    │   ├── dtx_check_rw.cc
    │   ├── dtx_compare.cc
    │   ├── dtx_compare_check.cc
    │   ├── dtx_compare_issue.cc
    │   ├── dtx_exe_commit.cc
    │   ├── dtx_issue.cc
    │   ├── dtx_local_meta.cc
    │   └── structs.h
    ├── flags.h
    ├── memstore
    │   ├── data_item.h
    │   ├── hash_store.h
    │   └── mem_store.h
    ├── scheduler
    │   ├── corotine_scheduler.cc
    │   ├── corotine_scheduler.h
    │   └── coroutine.h
    └── util
    │   ├── ct.h
    │   ├── debug.h
    │   ├── fast_random.h
    │   ├── hash.h
    │   ├── hazard_pointer.h
    │   ├── json_config.h
    │   ├── latency.h
    │   ├── seqlock.h
    │   ├── spinlock.h
    │   ├── thread_pool.h
    │   ├── timer.h
    │   └── zipf.h
├── memory_pool
    ├── CMakeLists.txt
    └── server
    │   ├── CMakeLists.txt
    │   ├── server.cc
    │   └── server.h
├── thirdparty
    ├── rapidjson
    │   ├── allocators.h
    │   ├── document.h
    │   ├── encodedstream.h
    │   ├── encodings.h
    │   ├── error
    │   │   ├── en.h
    │   │   └── error.h
    │   ├── filereadstream.h
    │   ├── filewritestream.h
    │   ├── internal
    │   │   ├── biginteger.h
    │   │   ├── diyfp.h
    │   │   ├── dtoa.h
    │   │   ├── ieee754.h
    │   │   ├── itoa.h
    │   │   ├── meta.h
    │   │   ├── pow10.h
    │   │   ├── stack.h
    │   │   ├── strfunc.h
    │   │   ├── strtod.h
    │   │   └── swap.h
    │   ├── memorybuffer.h
    │   ├── memorystream.h
    │   ├── msinttypes
    │   │   ├── inttypes.h
    │   │   └── stdint.h
    │   ├── pointer.h
    │   ├── prettywriter.h
    │   ├── rapidjson.h
    │   ├── reader.h
    │   ├── stringbuffer.h
    │   └── writer.h
    └── rlib
    │   ├── CMakeLists.txt
    │   ├── common.hpp
    │   ├── logging.hpp
    │   ├── mr.hpp
    │   ├── msg_interface.hpp
    │   ├── pre_connector.hpp
    │   ├── qp.hpp
    │   ├── qp_impl.hpp
    │   ├── rdma_ctrl.hpp
    │   ├── rdma_ctrl_impl.hpp
    │   └── rnic.hpp
└── workload
    ├── CMakeLists.txt
    ├── config
        └── table_type.h
    ├── micro
        ├── CMakeLists.txt
        ├── micro_db.cc
        ├── micro_db.h
        ├── micro_tables
        │   └── micro.json
        ├── micro_txn.cc
        └── micro_txn.h
    ├── smallbank
        ├── CMakeLists.txt
        ├── smallbank_db.cc
        ├── smallbank_db.h
        ├── smallbank_tables
        │   ├── checking.json
        │   └── savings.json
        ├── smallbank_txn.cc
        └── smallbank_txn.h
    ├── tatp
        ├── CMakeLists.txt
        ├── tatp_db.cc
        ├── tatp_db.h
        ├── tatp_tables
        │   ├── README.md
        │   ├── access_info.json
        │   ├── call_forwarding.json
        │   ├── sec_subscriber.json
        │   ├── special_facility.json
        │   └── subscriber.json
        ├── tatp_txn.cc
        └── tatp_txn.h
    └── tpcc
        ├── CMakeLists.txt
        ├── tpcc_db.cc
        ├── tpcc_db.h
        ├── tpcc_tables
            ├── customer.json
            ├── district.json
            ├── item.json
            ├── stock.json
            └── warehouse.json
        ├── tpcc_tables_1G
            ├── customer.json
            ├── district.json
            ├── item.json
            ├── stock.json
            └── warehouse.json
        ├── tpcc_tables_8G
            ├── customer.json
            ├── district.json
            ├── item.json
            ├── stock.json
            └── warehouse.json
        ├── tpcc_tables_normal
            ├── customer.json
            ├── district.json
            ├── item.json
            ├── stock.json
            └── warehouse.json
        ├── tpcc_txn.cc
        └── tpcc_txn.h


/.gitignore:
--------------------------------------------------------------------------------
1 | cmake-build-debug
2 | cmake-build-release
3 | build
4 | .idea
5 | .vscode


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | cmake_minimum_required(VERSION 3.3)
 5 | 
 6 | project(FORD)
 7 | 
 8 | set(CMAKE_CXX_STANDARD 11)
 9 | 
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result -fPIC")
11 | 
12 | if(CMAKE_BUILD_TYPE STREQUAL "Release")
13 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
14 | else()
15 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -DDEBUG -g")
16 | endif()
17 | 
18 | include_directories(
19 |     core
20 |     workload
21 |     thirdparty
22 |     compute_pool
23 | )
24 | 
25 | add_subdirectory(thirdparty/rlib)
26 | 
27 | add_subdirectory(core)
28 | 
29 | add_subdirectory(workload)
30 | 
31 | add_subdirectory(compute_pool) # Dep list: rlib->ford->workload_db+_txn->worker
32 | 
33 | add_subdirectory(memory_pool) # Dep list: rlib->workload_db->server


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FORD
  2 | This is an open source repository for our papers in [FAST 2022](https://www.usenix.org/conference/fast22) and [ACM Transactions on Storage](https://dl.acm.org/journal/tos).
  3 | 
  4 | > **Ming Zhang**, Yu Hua, Pengfei Zuo, and Lurong Liu. "FORD: Fast One-sided RDMA-based Distributed Transactions for Disaggregated Persistent Memory". In 20th USENIX Conference on File and Storage Technologies, FAST 2022, Santa Clara, California, USA, February 22 - 24, 2022, pages 51-68. USENIX Association, 2022.
  5 | >
  6 | > **Ming Zhang**, Yu Hua, Pengfei Zuo, and Lurong Liu. "Localized Validation Accelerates Distributed Transactions on Disaggregated Persistent Memory". ACM Transactions on Storage (TOS), Vol. 19, No. 3, Article 21, pages 1-35, 2023.
  7 | 
  8 | # Brief Introduction
  9 | Persistent memory (PM) disaggregation improves the resource utilization and failure isolation to build a scalable and cost-effective remote memory pool. However, due to offering limited computing power and overlooking the persistence and bandwidth properties of real PMs, existing distributed transaction schemes, which are designed for legacy DRAM-based monolithic servers, fail to efficiently work on the disaggregated PM architecture.
 10 | 
 11 | We propose FORD, a **F**ast **O**ne-sided **R**DMA-based **D**istributed transaction system. FORD thoroughly leverages one-sided RDMA to handle transactions for bypassing the remote CPU in PM pool. To reduce the round trips, FORD batches the read and lock operations into one request to eliminate extra locking and validations. To accelerate the transaction commit, FORD updates all the remote replicas in a single round trip with parallel undo logging and data visibility control. Moreover, considering the limited PM bandwidth, FORD enables the backup replicas to be read to alleviate the load on the primary replicas, thus improving the throughput. To efficiently guarantee the remote data persistency in the PM pool, FORD selectively flushes data to the backup replicas to mitigate the network overheads. FORD further leverages a localized validation scheme to transfer the validation operations for the read-only data from remote to local as much as possible to reduce the round trips. Experimental results demonstrate that FORD improves the transaction throughput and reduces the latency. To learn more, please read our papers.
 12 | 
 13 | # Framework
 14 | We implement a coroutine-enabled framework that runs FORD and its counterparts in the same manner when processing distributed transactions: 1) Issue one-sided RDMA requests. 2) Yield CPU to another coroutine. 3) Check all the RDMA ACKs and replies. This is in fact an interleaved execution model that aims to saturate the CPUs in the compute pool to improve the throughput.
 15 | 
 16 | # Prerequisites to Build
 17 | - Hardware
 18 |   - Intel Optane DC Persistent Memory
 19 |   - Mellanox InfiniBand NIC (e.g., ConnectX-5) that supports RDMA
 20 |   - Mellanox InfiniBand Switch
 21 | - Software
 22 |   - Operating System: Ubuntu 18.04 LTS or CentOS 7
 23 |   - Programming Language: C++ 11
 24 |   - Compiler: g++ 7.5.0 (at least)
 25 |   - Libraries: ibverbs, pthread, boost_coroutine, boost_context, boost_system
 26 | - Machines
 27 |   - 3 machines, one acts as the compute pool and other two act as the memory pool to maintain a primary-backup replication
 28 | 
 29 | 
 30 | # Configure
 31 | - Configure all the options in ```compute_node_config.json``` and ```memory_node_config.json``` in ```config/``` as you need, e.g., machine_num, machine_id, ip, port, and PM path, etc.
 32 | - Configure the options in ```core/flags.h```, e.g., ```MAX_ITEM_SIZE```, etc.
 33 | - Configure the number of backup replicas in ```core/base/common.h```, i.e., BACKUP_DEGREE.
 34 | 
 35 | # Build
 36 | The codes are constructed by CMake (version >= 3.3). We prepare a shell script for easy building
 37 | 
 38 | ```sh
 39 | $ git clone https://github.com/minghust/ford.git
 40 | $ cd ford
 41 | ```
 42 | 
 43 | - For each machine in the memory pool: 
 44 | 
 45 | ```sh 
 46 | $ ./build.sh -s
 47 | ```
 48 | 
 49 | - For each machine in the compute pool (boost is required):
 50 | 
 51 | ```sh 
 52 | $ ./build.sh
 53 | ```
 54 | 
 55 | Note that the Release version is the default option for better performance. However, if you need a Debug version, just add ```-d``` option, e.g., ```./build.sh -s -d``` for the memory pool, and ```./build.sh -d``` for the compute pool.
 56 | 
 57 | After running the ```build.sh``` script, cmake will automatically generate a ```build/``` directory in which all the compiled libraries and executable files are stored.
 58 | 
 59 | 
 60 | # Run
 61 | - For each machine in the memory pool: Start server to load tables. Due to using PM in *devdax* mode, you may need ```sudo``` if you are not a root user.
 62 | ```sh
 63 | $ cd ford
 64 | $ cd ./build/memory_pool/server
 65 | $ sudo ./zm_mem_pool
 66 | ```
 67 | 
 68 | - For each machine in the compute pool: After loading database tables in the memory pool, we run a benchmark, e.g., TPCC.
 69 | ```sh
 70 | $ cd ford
 71 | $ cd ./build/compute_pool/run
 72 | $ ./run tpcc ford 16 8 # run ford with 16 threads and each thread spawns 8 coroutines
 73 | ```
 74 | Now, the memory nodes are in a disaggregated mode, i.e., the CPUs are not used for any computation tasks in transaction processing.
 75 | 
 76 | # Results
 77 | After running, we automatically generate a ```bench_results``` dir to record the results. The summarized attempted and committed throughputs (K txn/sec) and the average 50th and 99th percentile latencies are recorded in ```bench_results/tpcc/result.txt```. Moreover, the detailed results of each thread are recorded in ```bench_results/tpcc/detail_result.txt``` 
 78 | 
 79 | # Acknowledgments
 80 | 
 81 | We sincerely thank the following open source repos (in the ```thirdparty/``` directory) that help us shorten the developing process
 82 | 
 83 | - [rlib](https://github.com/wxdwfc/rlib): We use rlib to do RDMA connections. This is a convinient and easy-to-understand library to finish RDMA connections. Moreover, we have modified rlib : 1) Fix a bug in en/decoding the QP id. 2) Change the QP connections from the active mode to the passive mode in the server side. In this way, all the QP connections are completed without explict ```connect``` usages in the server-side code. This is beneficial for the case in which the server does not know how many clients will issue the connect requests.
 84 | 
 85 | - [rapidjson](https://github.com/Tencent/rapidjson): We use rapidjson to read configurations from json files. This is an easy-to-use library that accelerate configurations.
 86 | 
 87 | # LICENSE
 88 | 
 89 | ```text
 90 | Copyright [2022] [Ming Zhang]
 91 | 
 92 | Licensed under the Apache License, Version 2.0 (the "License");
 93 | you may not use this file except in compliance with the License.
 94 | You may obtain a copy of the License at
 95 | 
 96 |     http://www.apache.org/licenses/LICENSE-2.0
 97 | 
 98 | Unless required by applicable law or agreed to in writing, software
 99 | distributed under the License is distributed on an "AS IS" BASIS,
100 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
101 | See the License for the specific language governing permissions and
102 | limitations under the License.
103 | ```
104 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Author: Ming Zhang
 4 | # Copyright (c) 2022
 5 | 
 6 | BUILD_TARGET=client
 7 | BUILD_TYPE=Release
 8 | 
 9 | while getopts "sd" arg
10 | do
11 |   case $arg in
12 |     s)
13 |       echo "building server";
14 |       BUILD_TARGET="server";
15 |       ;;
16 |     d)
17 |       BUILD_TYPE=Debug;
18 |       ;;
19 |     ?)
20 |       echo "unkonw argument"
21 |   exit 1
22 |   ;;
23 |   esac
24 | done
25 | 
26 | if [[ -d build ]]; then
27 |   echo "Build directory exists";
28 | else
29 |   echo "Create build directory";
30 |   mkdir build
31 | fi
32 | 
33 | CMAKE_CMD="cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ../"
34 | echo ${CMAKE_CMD}
35 | cd ./build
36 | ${CMAKE_CMD}
37 | 
38 | if [ "${BUILD_TARGET}" == "server" ];then
39 |   echo "------------------- building server ------------------"
40 |   make zm_mem_pool -j32
41 | else
42 |   echo "------------------- building client + server ------------------"
43 |   make -j32
44 | fi
45 | echo "-------------------- build finish ----------------------"
46 | 


--------------------------------------------------------------------------------
/compute_pool/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Author: Ming Zhang
2 | # Copyright (c) 2022
3 | 
4 | # run relies on worker
5 | 
6 | add_subdirectory(worker)
7 | 
8 | add_subdirectory(run)


--------------------------------------------------------------------------------
/compute_pool/run/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Author: Ming Zhang
2 | # Copyright (c) 2022
3 | 
4 | set(RUN_SRC run.cc)
5 | add_executable(run ${RUN_SRC})
6 | target_link_libraries(run worker)
7 | 


--------------------------------------------------------------------------------
/compute_pool/run/run.cc:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #include "worker/handler.h"
 5 | 
 6 | // Entrance to run threads that spawn coroutines as coordinators to run distributed transactions
 7 | int main(int argc, char* argv[]) {
 8 |   if (argc < 3) {
 9 |     std::cerr << "./run <benchmark_name> <system_name> <thread_num>(optional) <coroutine_num>(optional). E.g., ./run tatp ford 16 8" << std::endl;
10 |     return 0;
11 |   }
12 | 
13 |   Handler* handler = new Handler();
14 |   handler->ConfigureComputeNode(argc, argv);
15 |   handler->GenThreads(std::string(argv[1]));
16 |   handler->OutputResult(std::string(argv[1]), std::string(argv[2]));
17 | }
18 | 


--------------------------------------------------------------------------------
/compute_pool/run/run_micro.cc:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #include "worker/handler.h"
 5 | 
 6 | // Entrance to run threads that spawn coroutines as coordinators to run distributed transactions
 7 | int main(int argc, char* argv[]) {
 8 |   // e.g. ./run_micro s-100 means run FORD with skewed access and write ratio 100%
 9 |   Handler* handler = new Handler();
10 |   handler->ConfigureComputeNodeForMICRO(argc, argv);
11 |   handler->GenThreadsForMICRO();
12 |   handler->OutputResult("MICRO", "FORD");
13 | }
14 | 


--------------------------------------------------------------------------------
/compute_pool/worker/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | set(WORKER_SRC
 5 |         handler.cc
 6 |         worker.cc)
 7 | 
 8 | add_library(worker STATIC
 9 |         ${WORKER_SRC}
10 |         )
11 | 
12 | set_target_properties(worker PROPERTIES LINKER_LANGUAGE CXX)
13 | 
14 | target_link_libraries(worker ford tatp_db tatp_txn smallbank_db smallbank_txn tpcc_db tpcc_txn micro_db micro_txn)
15 | 


--------------------------------------------------------------------------------
/compute_pool/worker/handler.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <unistd.h>
 7 | 
 8 | #include <algorithm>
 9 | #include <atomic>
10 | #include <fstream>
11 | #include <iostream>
12 | #include <mutex>
13 | #include <string>
14 | #include <vector>
15 | 
16 | class Handler {
17 |  public:
18 |   Handler() {}
19 |   // For macro-benchmark
20 |   void ConfigureComputeNode(int argc, char* argv[]);
21 |   void GenThreads(std::string bench_name);
22 |   void OutputResult(std::string bench_name, std::string system_name);
23 | 
24 |   // For micro-benchmark
25 |   void ConfigureComputeNodeForMICRO(int argc, char* argv[]);
26 |   void GenThreadsForMICRO();
27 | };


--------------------------------------------------------------------------------
/compute_pool/worker/worker.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "allocator/region_allocator.h"
 7 | #include "base/common.h"
 8 | #include "cache/lock_status.h"
 9 | #include "cache/version_status.h"
10 | #include "connection/meta_manager.h"
11 | 
12 | #include "tatp/tatp_db.h"
13 | #include "smallbank/smallbank_db.h"
14 | #include "tpcc/tpcc_db.h"
15 | #include "micro/micro_db.h"
16 | 
17 | struct thread_params {
18 |   t_id_t thread_local_id;
19 |   t_id_t thread_global_id;
20 |   t_id_t thread_num_per_machine;
21 |   t_id_t total_thread_num;
22 |   MetaManager* global_meta_man;
23 |   VersionCache* global_status;
24 |   LockCache* global_lcache;
25 |   RDMARegionAllocator* global_rdma_region;
26 |   int coro_num;
27 |   std::string bench_name;
28 | };
29 | 
30 | void run_thread(thread_params* params,
31 |                 TATP* tatp_client,
32 |                 SmallBank* smallbank_client,
33 |                 TPCC* tpcc_client);


--------------------------------------------------------------------------------
/config/compute_node_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "local_compute_node": {
 3 |     "machine_num": 1,
 4 |     "machine_id": 0,
 5 |     "thread_num_per_machine": 16,
 6 |     "coroutine_num": 8,
 7 |     "local_port": 12345,
 8 |     "txn_system": 2,
 9 |     "comment": "0 is farm, 1 is drtmh, 2 is ford"
10 |   },
11 |   "remote_pm_nodes": {
12 |     "remote_ips": [
13 |       "10.0.0.1",
14 |       "10.0.0.3",
15 |       "10.0.0.5"
16 |     ],
17 |     "remote_ports": [
18 |       12346,
19 |       12346,
20 |       12346
21 |     ],
22 |     "remote_meta_ports": [
23 |       12347,
24 |       12347,
25 |       12347
26 |     ]
27 |   }
28 | }


--------------------------------------------------------------------------------
/config/memory_node_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "local_memory_node": {
 3 |     "machine_num": 2,
 4 |     "machine_id": 0,
 5 |     "local_port": 12346,
 6 |     "local_meta_port": 12347,
 7 |     "use_pm": 1,
 8 |     "mem_size_GB": 8,
 9 |     "log_buf_size_GB": 1,
10 |     "pm_root": "/dev/dax0.1",
11 |     "workload": "TPCC"
12 |   },
13 |   "remote_compute_nodes": {
14 |     "compute_node_ips": ["10.0.0.7"],
15 |     "compute_node_ports": [12345]
16 |   }
17 | }


--------------------------------------------------------------------------------
/config/micro_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "micro": {
 3 |     "num_keys": 1000000,
 4 |     "is_skewed": true,
 5 |     "zipf_theta": 0.99,
 6 |     "data_set_size": 1,
 7 |     "write_ratio": 25,
 8 |     "attempted_num": 100000
 9 |   }
10 | }


--------------------------------------------------------------------------------
/config/smallbank_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "smallbank": {
3 |     "num_accounts": 100000,
4 |     "num_hot_accounts": 4000,
5 |     "attempted_num": 1000000
6 |   }
7 | }


--------------------------------------------------------------------------------
/config/tatp_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "tatp": {
3 |     "num_subscriber": 100000,
4 |     "attempted_num": 1000000
5 |   }
6 | }


--------------------------------------------------------------------------------
/config/tpcc_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "tpcc": {
3 |     "attempted_num": 50000
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/core/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | set(CONNECTION_SRC
 5 |         connection/meta_manager.cc
 6 |         connection/qp_manager.cc
 7 |         )
 8 | 
 9 | set(DTX_SRC
10 |         dtx/doorbell.cc
11 |         dtx/dtx_check.cc
12 |         dtx/dtx_check_ro.cc
13 |         dtx/dtx_check_rw.cc
14 |         dtx/dtx_issue.cc
15 |         dtx/dtx.cc
16 |         dtx/dtx_exe_commit.cc
17 |         dtx/dtx_local_meta.cc
18 |         dtx/dtx_compare.cc
19 |         dtx/dtx_compare_issue.cc
20 |         dtx/dtx_compare_check.cc
21 |         )
22 | 
23 | set(SCHEDULER_SRC
24 |         scheduler/corotine_scheduler.cc
25 |         )
26 | 
27 | add_library(ford STATIC
28 |         ${CONNECTION_SRC}
29 |         ${DTX_SRC}
30 |         ${SCHEDULER_SRC}
31 |         )
32 | 
33 | set_target_properties(ford PROPERTIES LINKER_LANGUAGE CXX)
34 | 
35 | target_link_libraries(ford rlib pthread boost_coroutine boost_context boost_system)


--------------------------------------------------------------------------------
/core/allocator/buffer_allocator.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "allocator/region_allocator.h"
 7 | #include "base/common.h"
 8 | 
 9 | // Alloc registered RDMA buffer for each thread
10 | class RDMABufferAllocator {
11 |  public:
12 |   RDMABufferAllocator(char* s, char* e) : start(s), end(e), cur_offset(0) {}
13 | 
14 |   ALWAYS_INLINE
15 |   char* Alloc(size_t size) {
16 |     // When the thread local region is exhausted, the region
17 |     // can be re-used (i.e., overwritten) at the front offset, i.e., 0. This is almost always true,
18 |     // because the local region is typically GB-scale, and hence the front
19 |     // allocated buffer has already finished serving for RDMA requests and replies, or has already aborted.
20 |     // As such, our Allocator is extremely fast due to simply moving the pointer.
21 |     // If anyone relies on a more reliable allocator, you can just re-implement this Alloc interface
22 |     // using other standard allocators, e.g., ptmalloc/jemalloc/tcmalloc.
23 | 
24 |     if (unlikely(start + cur_offset + size > end)) {
25 |       cur_offset = 0;
26 |     }
27 |     char* ret = start + cur_offset;
28 |     cur_offset += size;
29 |     return ret;
30 |   }
31 | 
32 |   ALWAYS_INLINE
33 |   void Free(void* p) {
34 |     // As the memory region can be safely reused, we do not need to
35 |     // explicitly deallocate the previously allocated memory region buffer.
36 |   }
37 | 
38 |  private:
39 |   // Each thread has a local RDMA region to temporarily alloc a small buffer.
40 |   // This local region has an address range: [start, end)
41 |   char* start;
42 |   char* end;
43 |   uint64_t cur_offset;
44 | };
45 | 


--------------------------------------------------------------------------------
/core/allocator/log_allocator.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "base/common.h"
 7 | 
 8 | const offset_t LOG_BUFFER_SIZE = 1024 * 1024 * 1024;
 9 | const node_id_t NUM_MEMORY_NODES = BACKUP_DEGREE + 1;
10 | 
11 | // Remote offset to write log
12 | class LogOffsetAllocator {
13 |  public:
14 |   LogOffsetAllocator(t_id_t tid, t_id_t num_thread) {
15 |     auto per_thread_remote_log_buffer_size = LOG_BUFFER_SIZE / num_thread;
16 |     for (node_id_t i = 0; i < NUM_MEMORY_NODES; i++) {
17 |       start_log_offsets[i] = tid * per_thread_remote_log_buffer_size;
18 |       end_log_offsets[i] = (tid + 1) * per_thread_remote_log_buffer_size;
19 |       current_log_offsets[i] = 0;
20 |     }
21 |   }
22 | 
23 |   offset_t GetNextLogOffset(node_id_t node_id, size_t log_entry_size) {
24 |     if (unlikely(start_log_offsets[node_id] + current_log_offsets[node_id] + log_entry_size > end_log_offsets[node_id])) {
25 |       current_log_offsets[node_id] = 0;
26 |     }
27 |     offset_t offset = start_log_offsets[node_id] + current_log_offsets[node_id];
28 |     current_log_offsets[node_id] += log_entry_size;
29 |     return offset;
30 |   }
31 | 
32 |  private:
33 |   offset_t start_log_offsets[NUM_MEMORY_NODES];
34 |   offset_t end_log_offsets[NUM_MEMORY_NODES];
35 |   offset_t current_log_offsets[NUM_MEMORY_NODES];
36 | };


--------------------------------------------------------------------------------
/core/allocator/region_allocator.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "connection/meta_manager.h"
 7 | 
 8 | const uint64_t PER_THREAD_ALLOC_SIZE = (size_t)500 * 1024 * 1024;
 9 | 
10 | // This allocator is a global one which manages all the RDMA regions in this machine
11 | 
12 | // |                   | <- t1 start
13 | // |                   |
14 | // |                   |
15 | // |                   |
16 | // |                   | <- t1 end. t2 start
17 | // |                   |
18 | // |                   |
19 | // |                   |
20 | // |                   | <- t2 end. t3 start
21 | 
22 | class RDMARegionAllocator {
23 |  public:
24 |   RDMARegionAllocator(MetaManager* global_meta_man, t_id_t thread_num_per_machine) {
25 |     size_t global_mr_size = (size_t)thread_num_per_machine * PER_THREAD_ALLOC_SIZE;
26 |     // Register a buffer to the previous opened device. It's DRAM in compute pools
27 |     global_mr = (char*)malloc(global_mr_size);
28 |     thread_num = thread_num_per_machine;
29 |     memset(global_mr, 0, global_mr_size);
30 |     RDMA_ASSERT(global_meta_man->global_rdma_ctrl->register_memory(CLIENT_MR_ID, global_mr, global_mr_size, global_meta_man->opened_rnic));
31 |   }
32 | 
33 |   ~RDMARegionAllocator() {
34 |     if (global_mr) free(global_mr);
35 |   }
36 | 
37 |   ALWAYS_INLINE
38 |   std::pair<char*, char*> GetThreadLocalRegion(t_id_t tid) {
39 |     assert(tid < thread_num);
40 |     return std::make_pair(global_mr + tid * PER_THREAD_ALLOC_SIZE, global_mr + (tid + 1) * PER_THREAD_ALLOC_SIZE);
41 |   }
42 | 
43 |  private:
44 |   char* global_mr;  // memory region
45 |   t_id_t thread_num;
46 |   size_t log_buf_size;
47 | };
48 | 


--------------------------------------------------------------------------------
/core/base/common.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <cstddef>  // For size_t
 7 | #include <cstdint>  // For uintxx_t
 8 | 
 9 | #include "flags.h"
10 | 
11 | // Global specification
12 | using tx_id_t = uint64_t;     // Transaction id type
13 | using t_id_t = uint32_t;      // Thread id type
14 | using coro_id_t = int;        // Coroutine id type
15 | using node_id_t = int;        // Machine id type
16 | using mr_id_t = int;          // Memory region id type
17 | using table_id_t = uint64_t;  // Table id type
18 | using itemkey_t = uint64_t;   // Data item key type, used in DB tables
19 | using offset_t = int64_t;     // Offset type. Usually used in remote offset for RDMA
20 | using version_t = uint64_t;   // Version type, used in version checking
21 | using lock_t = uint64_t;      // Lock type, used in remote locking
22 | 
23 | // Memory region ids for server's hash store buffer and undo log buffer
24 | const mr_id_t SERVER_HASH_BUFF_ID = 97;
25 | const mr_id_t SERVER_LOG_BUFF_ID = 98;
26 | 
27 | // Memory region ids for client's local_mr
28 | const mr_id_t CLIENT_MR_ID = 100;
29 | 
30 | // Indicating that memory store metas have been transmitted
31 | const uint64_t MEM_STORE_META_END = 0xE0FF0E0F;
32 | 
33 | // Node and thread conf
34 | #define BACKUP_DEGREE 2          // Backup memory node number. MUST **NOT** BE SET TO 0
35 | #define MAX_REMOTE_NODE_NUM 100  // Max remote memory node number
36 | #define MAX_DB_TABLE_NUM 15      // Max DB tables
37 | 
38 | // Data state
39 | #define STATE_INVISIBLE 0x8000000000000000  // Data cannot be read
40 | #define STATE_LOCKED 1                      // Data cannot be written. Used for serializing transactions
41 | #define STATE_CLEAN 0
42 | 
43 | // Alias
44 | #define Aligned8 __attribute__((aligned(8)))
45 | #define ALWAYS_INLINE inline __attribute__((always_inline))
46 | #define TID (std::this_thread::get_id())
47 | 
48 | // Helpful for improving condition prediction hit rate
49 | #define unlikely(x) __builtin_expect(!!(x), 0)
50 | #define likely(x) __builtin_expect(!!(x), 1)
51 | 


--------------------------------------------------------------------------------
/core/cache/addr_cache.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <map>
 7 | #include <unordered_map>
 8 | 
 9 | #include "base/common.h"
10 | 
11 | const offset_t NOT_FOUND = -1;
12 | 
13 | // For fast remote address lookup
14 | class AddrCache {
15 |  public:
16 |   void Insert(node_id_t remote_node_id, table_id_t table_id, itemkey_t key, offset_t remote_offset) {
17 |     auto node_search = addr_map.find(remote_node_id);
18 |     if (node_search == addr_map.end()) {
19 |       // There is no such node. Init the node and table
20 |       addr_map[remote_node_id] = std::unordered_map<table_id_t, std::unordered_map<itemkey_t, offset_t>>();
21 |       addr_map[remote_node_id][table_id] = std::unordered_map<itemkey_t, offset_t>();
22 |     } else if (node_search->second.find(table_id) == node_search->second.end()) {
23 |       // The node exists, but the table does not exist. Init the table
24 |       addr_map[remote_node_id][table_id] = std::unordered_map<itemkey_t, offset_t>();
25 |     }
26 | 
27 |     // The node and table both exist, then insert/update the <key,offset> pair
28 |     addr_map[remote_node_id][table_id][key] = remote_offset;
29 |   }
30 | 
31 |   // We know which node to read, but we do not konw whether it is cached before
32 |   offset_t Search(node_id_t remote_node_id, table_id_t table_id, itemkey_t key) {
33 |     auto node_search = addr_map.find(remote_node_id);
34 |     if (node_search == addr_map.end()) return NOT_FOUND;
35 |     auto table_search = node_search->second.find(table_id);
36 |     if (table_search == node_search->second.end()) return NOT_FOUND;
37 |     auto offset_search = table_search->second.find(key);
38 |     return offset_search == table_search->second.end() ? NOT_FOUND : offset_search->second;
39 |   }
40 | 
41 |   // If we have read this record, we do not read it from another node
42 |   void Search(table_id_t query_table_id, itemkey_t query_key, node_id_t& remote_node_id, offset_t& remote_offset) {
43 |     // look up node first
44 |     for (auto it = addr_map.begin(); it != addr_map.end(); it++) {
45 |       auto table_search = it->second.find(query_table_id);
46 |       if (table_search == it->second.end()) {
47 |         continue;
48 |       }
49 | 
50 |       auto offset_search = table_search->second.find(query_key);
51 |       if (offset_search == table_search->second.end()) {
52 |         // No such key. Change to hash read
53 |         return;
54 |       }
55 | 
56 |       // Tableid and key match. Get the cached remote node id and remote offset
57 |       remote_node_id = it->first;
58 |       remote_offset = offset_search->second;
59 |       return;
60 |     }
61 |   }
62 | 
63 |   size_t TotalAddrSize() {
64 |     size_t total_size = 0;
65 |     for (auto it = addr_map.begin(); it != addr_map.end(); it++) {
66 |       total_size += sizeof(node_id_t);
67 |       for (auto it2 = it->second.begin(); it2 != it->second.end(); it2++) {
68 |         total_size += sizeof(table_id_t);
69 |         for (auto it3 = it2->second.begin(); it3 != it2->second.end(); it3++) {
70 |           total_size += (sizeof(itemkey_t) + sizeof(offset_t));
71 |         }
72 |       }
73 |     }
74 | 
75 |     return total_size;
76 |   }
77 | 
78 |  private:
79 |   std::unordered_map<node_id_t, std::unordered_map<table_id_t, std::unordered_map<itemkey_t, offset_t>>> addr_map;
80 | };


--------------------------------------------------------------------------------
/core/cache/lock_status.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include <functional>
  7 | #include <iostream>
  8 | 
  9 | #include "flags.h"
 10 | #include "dtx/structs.h"
 11 | 
 12 | struct LockBkt {
 13 |   std::atomic<uint64_t> key;
 14 |   std::atomic<uint64_t> lock;
 15 | };
 16 | 
 17 | // Unfortunately, we find that in a high concurrency environment, substantial CPU CAS operations
 18 | // need to frequently retry to find empty slots due to the high collision rates in a small hash table. 
 19 | // This will in turn cause the latency of local locking to be an order of magnitude higher than the 
 20 | // remote locking, i.e., by using the microsecond-latency RDMA CAS. Due to this reason, we disable 
 21 | // the use of local cache in FORD.
 22 | 
 23 | class LockCache {
 24 |  public:
 25 |   LockCache() {
 26 | #if LOCAL_LOCK
 27 |     RDMA_LOG(INFO) << "Initializing local lock tables";
 28 |     for (int i = 0; i < MAX_TABLE_NUM; i++) {
 29 |       total_slot = (size_t)(SLOT_PER_BKT * NUM_BKT);
 30 |       auto* table = new LockBkt[total_slot];
 31 |       size_t sz = sizeof(LockBkt) * total_slot;
 32 |       memset(table, 0, sz);
 33 |       RDMA_LOG(INFO) << "Initializing table " << i << " " << sz / 1024 / 1024 << " MB";
 34 |       status_table.push_back(table);
 35 |     }
 36 | #endif
 37 |   }
 38 | 
 39 |   ~LockCache() {
 40 |     for (auto* table : status_table) {
 41 |       if (table) delete[] table;
 42 |     }
 43 |   }
 44 | 
 45 |   bool TryLock(std::vector<DataSetItem>& read_write_set) {
 46 |     for (auto& item : read_write_set) {
 47 |       itemkey_t my_key = item.item_ptr->key;
 48 |       table_id_t table_id = item.item_ptr->table_id;
 49 |       auto* table = status_table[table_id];
 50 | 
 51 |       for (uint64_t bkt_id = GetBktId(my_key);; bkt_id++) {
 52 |         bkt_id = bkt_id % total_slot;
 53 | 
 54 |         uint64_t probed_key = table[bkt_id].key.load(std::memory_order_relaxed);
 55 | 
 56 |         if (probed_key == my_key) {
 57 |           lock_t expect_lock = 0;
 58 |           bool exchanged = table[bkt_id].lock.compare_exchange_strong(expect_lock, STATE_LOCKED);
 59 |           if (!exchanged) return false;  // This key is locked by another coordinator
 60 |           // I successfully lock this key
 61 |           item.bkt_idx = (int64_t)bkt_id;
 62 |           break;
 63 |         } else {
 64 |           // Another key occupies
 65 |           if (probed_key != 0) continue;
 66 | 
 67 |           // An empty slot
 68 |           uint64_t expect_key = 0;
 69 |           bool exchanged = table[bkt_id].key.compare_exchange_strong(expect_key, my_key);
 70 |           if (exchanged) {
 71 |             // We cannot just use store here because another thread may cas succ in `if (probed_key == my_key)' above after
 72 |             // we fill the key. So we need to do cas instead of pure store. Only cas succ we can get the lock
 73 |             lock_t expect_lock = 0;
 74 |             bool exchanged = table[bkt_id].lock.compare_exchange_strong(expect_lock, STATE_LOCKED);
 75 |             if (!exchanged) return false;  // This key is locked by another coordinator
 76 |             // I successfully lock this key
 77 |             item.bkt_idx = (int64_t)bkt_id;
 78 |             break;
 79 |           } else if (!exchanged && expect_key == my_key) {
 80 |             // Another thread locks the same key, I abort
 81 |             return false;
 82 |           } else if (!exchanged && expect_key != my_key) {
 83 |             // Another thread writes a different key, I keep probe
 84 |             continue;
 85 |           }
 86 |         }
 87 |       }
 88 |     }
 89 | 
 90 |     return true;
 91 |   }
 92 | 
 93 |   void Unlock(std::vector<DataSetItem>& read_write_set) {
 94 |     for (auto& item : read_write_set) {
 95 |       if (item.bkt_idx == -1) continue;
 96 |       table_id_t table_id = item.item_ptr->table_id;
 97 |       auto* table = status_table[table_id];
 98 |       table[item.bkt_idx].lock.store(STATE_CLEAN, std::memory_order_relaxed);
 99 |     }
100 |   }
101 | 
102 |  private:
103 |   uint64_t GetBktId(itemkey_t k) {
104 |     // return std_hash(k);
105 |     k ^= k >> 33;
106 |     k *= 0xff51afd7ed558ccd;
107 |     k ^= k >> 33;
108 |     k *= 0xc4ceb9fe1a85ec53;
109 |     k ^= k >> 33;
110 |     return k;
111 |   }
112 | 
113 |   std::hash<itemkey_t> std_hash;
114 |   std::vector<LockBkt*> status_table;
115 |   size_t total_slot;
116 |   int CONFLICT_COUNT = 0;
117 | };


--------------------------------------------------------------------------------
/core/connection/meta_manager.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include <atomic>
  7 | #include <unordered_map>
  8 | 
  9 | #include "base/common.h"
 10 | #include "memstore/hash_store.h"
 11 | #include "rlib/rdma_ctrl.hpp"
 12 | 
 13 | using namespace rdmaio;
 14 | 
 15 | // const size_t LOG_BUFFER_SIZE = 1024 * 1024 * 512;
 16 | 
 17 | struct RemoteNode {
 18 |   node_id_t node_id;
 19 |   std::string ip;
 20 |   int port;
 21 | };
 22 | 
 23 | class MetaManager {
 24 |  public:
 25 |   MetaManager();
 26 | 
 27 |   node_id_t GetMemStoreMeta(std::string& remote_ip, int remote_port);
 28 | 
 29 |   void GetMRMeta(const RemoteNode& node);
 30 | 
 31 |   /*** Memory Store Metadata ***/
 32 |   ALWAYS_INLINE
 33 |   const HashMeta& GetPrimaryHashMetaWithTableID(const table_id_t table_id) const {
 34 |     auto search = primary_hash_metas.find(table_id);
 35 |     assert(search != primary_hash_metas.end());
 36 |     return search->second;
 37 |   }
 38 | 
 39 |   ALWAYS_INLINE
 40 |   const std::vector<HashMeta>* GetBackupHashMetasWithTableID(const table_id_t table_id) const {
 41 |     // if (backup_hash_metas.empty()) {
 42 |     //   return nullptr;
 43 |     // }
 44 |     // auto search = backup_hash_metas.find(table_id);
 45 |     // assert(search != backup_hash_metas.end());
 46 |     // return &(search->second);
 47 |     return &(backup_hash_metas[table_id]);
 48 |   }
 49 | 
 50 |   /*** Node ID Metadata ***/
 51 |   ALWAYS_INLINE
 52 |   node_id_t GetPrimaryNodeID(const table_id_t table_id) const {
 53 |     auto search = primary_table_nodes.find(table_id);
 54 |     assert(search != primary_table_nodes.end());
 55 |     return search->second;
 56 |   }
 57 | 
 58 |   ALWAYS_INLINE
 59 |   const std::vector<node_id_t>* GetBackupNodeID(const table_id_t table_id) {
 60 |     // if (backup_table_nodes.empty()) {
 61 |     //   return nullptr;
 62 |     // }
 63 |     // auto search = backup_table_nodes.find(table_id);
 64 |     // assert(search != backup_table_nodes.end());
 65 |     // return &(search->second);
 66 |     return &(backup_table_nodes[table_id]);
 67 |   }
 68 | 
 69 |   ALWAYS_INLINE
 70 |   const MemoryAttr& GetRemoteLogMR(const node_id_t node_id) const {
 71 |     auto mrsearch = remote_log_mrs.find(node_id);
 72 |     assert(mrsearch != remote_log_mrs.end());
 73 |     return mrsearch->second;
 74 |   }
 75 | 
 76 |   /*** RDMA Memory Region Metadata ***/
 77 |   ALWAYS_INLINE
 78 |   const MemoryAttr& GetRemoteHashMR(const node_id_t node_id) const {
 79 |     auto mrsearch = remote_hash_mrs.find(node_id);
 80 |     assert(mrsearch != remote_hash_mrs.end());
 81 |     return mrsearch->second;
 82 |   }
 83 | 
 84 |  private:
 85 |   std::unordered_map<table_id_t, HashMeta> primary_hash_metas;
 86 | 
 87 |   // std::unordered_map<table_id_t, std::vector<HashMeta>> backup_hash_metas;
 88 | 
 89 |   std::vector<HashMeta> backup_hash_metas[MAX_DB_TABLE_NUM];
 90 | 
 91 |   std::unordered_map<table_id_t, node_id_t> primary_table_nodes;
 92 | 
 93 |   // std::unordered_map<table_id_t, std::vector<node_id_t>> backup_table_nodes;
 94 | 
 95 |   std::vector<node_id_t> backup_table_nodes[MAX_DB_TABLE_NUM];
 96 | 
 97 |   std::unordered_map<node_id_t, MemoryAttr> remote_hash_mrs;
 98 | 
 99 |   std::unordered_map<node_id_t, MemoryAttr> remote_log_mrs;
100 | 
101 |   node_id_t local_machine_id;
102 | 
103 |  public:
104 |   // Used by QP manager and RDMA Region
105 |   RdmaCtrlPtr global_rdma_ctrl;
106 | 
107 |   std::vector<RemoteNode> remote_nodes;
108 | 
109 |   RNicHandler* opened_rnic;
110 | 
111 |   // Below are some parameteres from json file
112 |   int64_t txn_system;
113 | };
114 | 


--------------------------------------------------------------------------------
/core/connection/qp_manager.cc:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #include "connection/qp_manager.h"
 5 | 
 6 | void QPManager::BuildQPConnection(MetaManager* meta_man) {
 7 |   for (const auto& remote_node : meta_man->remote_nodes) {
 8 |     // Note that each remote machine has one MemStore mr and one Log mr
 9 |     MemoryAttr remote_hash_mr = meta_man->GetRemoteHashMR(remote_node.node_id);
10 |     MemoryAttr remote_log_mr = meta_man->GetRemoteLogMR(remote_node.node_id);
11 | 
12 |     // Build QPs with one remote machine (this machine can be a primary or a backup)
13 |     // Create the thread local queue pair
14 |     MemoryAttr local_mr = meta_man->global_rdma_ctrl->get_local_mr(CLIENT_MR_ID);
15 |     RCQP* data_qp = meta_man->global_rdma_ctrl->create_rc_qp(create_rc_idx(remote_node.node_id, (int)global_tid * 2),
16 |                                                              meta_man->opened_rnic,
17 |                                                              &local_mr);
18 | 
19 |     RCQP* log_qp = meta_man->global_rdma_ctrl->create_rc_qp(create_rc_idx(remote_node.node_id, (int)global_tid * 2 + 1),
20 |                                                             meta_man->opened_rnic,
21 |                                                             &local_mr);
22 | 
23 |     // Queue pair connection, exchange queue pair info via TCP
24 |     ConnStatus rc;
25 |     do {
26 |       rc = data_qp->connect(remote_node.ip, remote_node.port);
27 |       if (rc == SUCC) {
28 |         data_qp->bind_remote_mr(remote_hash_mr);  // Bind the hash mr as the default remote mr for convenient parameter passing
29 |         data_qps[remote_node.node_id] = data_qp;
30 |         // RDMA_LOG(INFO) << "Thread " << global_tid << ": Data QP connected! with remote node: " << remote_node.node_id << " ip: " << remote_node.ip;
31 |       }
32 |       usleep(2000);
33 |     } while (rc != SUCC);
34 | 
35 |     do {
36 |       rc = log_qp->connect(remote_node.ip, remote_node.port);
37 |       if (rc == SUCC) {
38 |         log_qp->bind_remote_mr(remote_log_mr);  // Bind the log mr as the default remote mr for convenient parameter passing
39 |         log_qps[remote_node.node_id] = log_qp;
40 |         // RDMA_LOG(INFO) << "Thread " << global_tid << ": Log QP connected! with remote node: " << remote_node.node_id << " ip: " << remote_node.ip;
41 |       }
42 |       usleep(2000);
43 |     } while (rc != SUCC);
44 |   }
45 | }


--------------------------------------------------------------------------------
/core/connection/qp_manager.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "connection/meta_manager.h"
 7 | 
 8 | // This QPManager builds qp connections (compute node <-> memory node) for each txn thread in each compute node
 9 | class QPManager {
10 |  public:
11 |   QPManager(t_id_t global_tid) : global_tid(global_tid) {}
12 | 
13 |   void BuildQPConnection(MetaManager* meta_man);
14 | 
15 |   ALWAYS_INLINE
16 |   RCQP* GetRemoteDataQPWithNodeID(const node_id_t node_id) const {
17 |     return data_qps[node_id];
18 |   }
19 | 
20 |   ALWAYS_INLINE
21 |   void GetRemoteDataQPsWithNodeIDs(const std::vector<node_id_t>* node_ids, std::vector<RCQP*>& qps) {
22 |     for (node_id_t node_id : *node_ids) {
23 |       RCQP* qp = data_qps[node_id];
24 |       if (qp) {
25 |         qps.push_back(qp);
26 |       }
27 |     }
28 |   }
29 | 
30 |   ALWAYS_INLINE
31 |   RCQP* GetRemoteLogQPWithNodeID(const node_id_t node_id) const {
32 |     return log_qps[node_id];
33 |   }
34 | 
35 |  private:
36 |   RCQP* data_qps[MAX_REMOTE_NODE_NUM]{nullptr};
37 | 
38 |   RCQP* log_qps[MAX_REMOTE_NODE_NUM]{nullptr};
39 |   
40 |   t_id_t global_tid;
41 | };
42 | 


--------------------------------------------------------------------------------
/core/dtx/doorbell.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include "base/common.h"
  7 | #include "rlib/rdma_ctrl.hpp"
  8 | #include "scheduler/corotine_scheduler.h"
  9 | 
 10 | using namespace rdmaio;
 11 | 
 12 | // Two RDMA requests are sent to the QP in a doorbelled (or batched) way.
 13 | // These requests are executed within one round trip
 14 | // Target: improve performance
 15 | 
 16 | class DoorbellBatch {
 17 |  public:
 18 |   DoorbellBatch() {
 19 |     // The key of doorbell: set the pointer to link two requests
 20 |     sr[0].num_sge = 1;
 21 |     sr[0].sg_list = &sge[0];
 22 |     sr[0].send_flags = 0;
 23 |     sr[0].next = &sr[1];
 24 |     sr[1].num_sge = 1;
 25 |     sr[1].sg_list = &sge[1];
 26 |     sr[1].send_flags = IBV_SEND_SIGNALED;
 27 |     sr[1].next = NULL;
 28 |   }
 29 | 
 30 |   struct ibv_send_wr sr[2];
 31 | 
 32 |   struct ibv_sge sge[2];
 33 | 
 34 |   struct ibv_send_wr* bad_sr;
 35 | };
 36 | 
 37 | class LockReadBatch : public DoorbellBatch {
 38 |  public:
 39 |   LockReadBatch() : DoorbellBatch() {}
 40 | 
 41 |   // SetLockReq and SetReadReq are a doorbelled group
 42 |   // First lock, then read
 43 |   void SetLockReq(char* local_addr, uint64_t remote_off, uint64_t compare, uint64_t swap);
 44 | 
 45 |   void SetReadReq(char* local_addr, uint64_t remote_off, size_t size);
 46 | 
 47 |   // Send doorbelled requests to the queue pair
 48 |   bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id);
 49 | 
 50 |   // Fill the parameters
 51 |   bool FillParams(RCQP* qp);
 52 | };
 53 | 
 54 | class WriteUnlockBatch : public DoorbellBatch {
 55 |  public:
 56 |   WriteUnlockBatch() : DoorbellBatch() {}
 57 | 
 58 |   // SetWritePrimaryReq and SetUnLockReq are a doorbelled group
 59 |   // First write, then unlock
 60 |   void SetWritePrimaryReq(char* local_addr, uint64_t remote_off, size_t size);
 61 | 
 62 |   void SetUnLockReq(char* local_addr, uint64_t remote_off);
 63 | 
 64 |   void SetUnLockReq(char* local_addr, uint64_t remote_off, uint64_t compare, uint64_t swap);
 65 | 
 66 |   // Send doorbelled requests to the queue pair
 67 |   bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas);
 68 | };
 69 | 
 70 | class InvisibleWriteBatch : public DoorbellBatch {
 71 |  public:
 72 |   InvisibleWriteBatch() : DoorbellBatch() {}
 73 | 
 74 |   // SetInvisibleReq and SetWriteRemoteReq are a doorbelled group
 75 |   // First lock, then write
 76 |   void SetInvisibleReq(char* local_addr, uint64_t remote_off, uint64_t compare, uint64_t swap);
 77 | 
 78 |   void SetInvisibleReq(char* local_addr, uint64_t remote_off);
 79 | 
 80 |   void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size);
 81 | 
 82 |   // Send doorbelled requests to the queue pair
 83 |   bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas);
 84 | 
 85 |   bool SendReqsSync(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas);
 86 | };
 87 | 
 88 | class WriteFlushBatch : public DoorbellBatch {
 89 |  public:
 90 |   WriteFlushBatch() : DoorbellBatch() {}
 91 | 
 92 |   void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size);
 93 | 
 94 |   void SetReadRemoteReq(char* local_addr, uint64_t remote_off, size_t size);
 95 |   // Send doorbelled requests to the queue pair
 96 |   bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, MemoryAttr& remote_mr);
 97 | };
 98 | 
 99 | class InvisibleWriteFlushBatch {
100 |  public:
101 |   InvisibleWriteFlushBatch() {
102 |     // The key of doorbell: set the pointer to link two requests
103 |     sr[0].num_sge = 1;
104 |     sr[0].sg_list = &sge[0];
105 |     sr[0].send_flags = 0;
106 |     sr[0].next = &sr[1];
107 | 
108 |     sr[1].num_sge = 1;
109 |     sr[1].sg_list = &sge[1];
110 |     sr[1].send_flags = 0;
111 |     sr[1].next = &sr[2];
112 | 
113 |     sr[2].num_sge = 1;
114 |     sr[2].sg_list = &sge[2];
115 |     sr[2].send_flags = IBV_SEND_SIGNALED;
116 |     sr[2].next = NULL;
117 |   }
118 | 
119 |   void SetInvisibleReq(char* local_addr, uint64_t remote_off);
120 | 
121 |   void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size);
122 | 
123 |   void SetReadRemoteReq(char* local_addr, uint64_t remote_off, size_t size);
124 | 
125 |   // Send doorbelled requests to the queue pair
126 |   bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas);
127 | 
128 |  private:
129 |   struct ibv_send_wr sr[3];
130 | 
131 |   struct ibv_sge sge[3];
132 | 
133 |   struct ibv_send_wr* bad_sr;
134 | };
135 | 
136 | class ComparatorUpdateRemote {
137 |  public:
138 |   ComparatorUpdateRemote() {
139 |     sr[0].num_sge = 1;
140 |     sr[0].sg_list = &sge[0];
141 |     sr[0].send_flags = 0;
142 |     sr[0].next = &sr[1];
143 | 
144 |     sr[1].num_sge = 1;
145 |     sr[1].sg_list = &sge[1];
146 |     sr[1].send_flags = 0;
147 |     sr[1].next = &sr[2];
148 | 
149 |     sr[2].num_sge = 1;
150 |     sr[2].sg_list = &sge[2];
151 |     sr[2].send_flags = IBV_SEND_SIGNALED;
152 |     sr[2].next = NULL;
153 |   }
154 | 
155 |   void SetInvisibleReq(char* local_addr, uint64_t remote_off);
156 | 
157 |   void SetWriteRemoteReq(char* local_addr, uint64_t remote_off, size_t size);
158 | 
159 |   void SetReleaseReq(char* local_addr, uint64_t remote_off);
160 | 
161 |   // Send doorbelled requests to the queue pair
162 |   bool SendReqs(CoroutineScheduler* coro_sched, RCQP* qp, coro_id_t coro_id, int use_cas);
163 | 
164 |  private:
165 |   struct ibv_send_wr sr[3];
166 | 
167 |   struct ibv_sge sge[3];
168 | 
169 |   struct ibv_send_wr* bad_sr;
170 | };


--------------------------------------------------------------------------------
/core/dtx/dtx_check.cc:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #include "dtx/dtx.h"
  5 | #include "util/timer.h"
  6 | 
  7 | bool DTX::CheckReadRO(std::vector<DirectRead>& pending_direct_ro,
  8 |                       std::vector<HashRead>& pending_hash_ro,
  9 |                       std::list<InvisibleRead>& pending_invisible_ro,
 10 |                       std::list<HashRead>& pending_next_hash_ro,
 11 |                       coro_yield_t& yield) {
 12 |   if (!CheckDirectRO(pending_direct_ro, pending_invisible_ro, pending_next_hash_ro)) return false;
 13 |   if (!CheckHashRO(pending_hash_ro, pending_invisible_ro, pending_next_hash_ro)) return false;
 14 | 
 15 |   // During results checking, we may re-read data due to invisibility and hash collisions
 16 |   while (!pending_invisible_ro.empty() || !pending_next_hash_ro.empty()) {
 17 |     coro_sched->Yield(yield, coro_id);
 18 |     if (!CheckInvisibleRO(pending_invisible_ro)) return false;
 19 |     if (!CheckNextHashRO(pending_invisible_ro, pending_next_hash_ro)) return false;
 20 |   }
 21 |   return true;
 22 | }
 23 | 
 24 | bool DTX::CheckReadRORW(std::vector<DirectRead>& pending_direct_ro,
 25 |                         std::vector<HashRead>& pending_hash_ro,
 26 |                         std::vector<HashRead>& pending_hash_rw,
 27 |                         std::vector<InsertOffRead>& pending_insert_off_rw,
 28 |                         std::vector<CasRead>& pending_cas_rw,
 29 |                         std::list<InvisibleRead>& pending_invisible_ro,
 30 |                         std::list<HashRead>& pending_next_hash_ro,
 31 |                         std::list<HashRead>& pending_next_hash_rw,
 32 |                         std::list<InsertOffRead>& pending_next_off_rw,
 33 |                         coro_yield_t& yield) {
 34 |   // check read-only results
 35 |   if (!CheckDirectRO(pending_direct_ro, pending_invisible_ro, pending_next_hash_ro)) return false;
 36 |   if (!CheckHashRO(pending_hash_ro, pending_invisible_ro, pending_next_hash_ro)) return false;
 37 |   // The reason to use separate CheckHashRO and CheckHashRW: We need to compare txid with the fetched id in read-write txn 
 38 |   // check read-write results
 39 |   if (!CheckCasRW(pending_cas_rw, pending_next_hash_rw, pending_next_off_rw)) return false;
 40 |   if (!CheckHashRW(pending_hash_rw, pending_invisible_ro, pending_next_hash_rw)) return false;
 41 |   if (!CheckInsertOffRW(pending_insert_off_rw, pending_invisible_ro, pending_next_off_rw)) return false;
 42 | 
 43 |   // During results checking, we may re-read data due to invisibility and hash collisions
 44 |   while (!pending_invisible_ro.empty() || !pending_next_hash_ro.empty() || !pending_next_hash_rw.empty() || !pending_next_off_rw.empty()) {
 45 |     coro_sched->Yield(yield, coro_id);
 46 | 
 47 |     // Recheck read-only replies
 48 |     if (!CheckInvisibleRO(pending_invisible_ro)) return false;
 49 |     if (!CheckNextHashRO(pending_invisible_ro, pending_next_hash_ro)) return false;
 50 | 
 51 |     // Recheck read-write replies
 52 |     if (!CheckNextHashRW(pending_invisible_ro, pending_next_hash_rw)) return false;
 53 |     if (!CheckNextOffRW(pending_invisible_ro, pending_next_off_rw)) return false;
 54 |   }
 55 |   return true;
 56 | }
 57 | 
 58 | bool DTX::CheckValidate(std::vector<ValidateRead>& pending_validate) {
 59 |   // Check version
 60 |   for (auto& re : pending_validate) {
 61 |     auto it = re.item->item_ptr;
 62 |     if (re.has_lock_in_validate) {
 63 | #if LOCK_WAIT
 64 |       if (*((lock_t*)re.cas_buf) != STATE_CLEAN) {
 65 |         // Re-read the slot until it becomes unlocked
 66 |         // FOR TEST ONLY
 67 | 
 68 |         auto remote_data_addr = re.item->item_ptr->remote_offset;
 69 |         auto remote_lock_addr = re.item->item_ptr->GetRemoteLockAddr(remote_data_addr);
 70 |         auto remote_version_addr = re.item->item_ptr->GetRemoteVersionAddr(remote_data_addr);
 71 | 
 72 |         while (*((lock_t*)re.cas_buf) != STATE_CLEAN) {
 73 |           // timing
 74 |           Timer timer;
 75 |           timer.Start();
 76 | 
 77 |           auto rc = re.qp->post_cas(re.cas_buf, remote_lock_addr, STATE_CLEAN, STATE_LOCKED, IBV_SEND_SIGNALED);
 78 |           if (rc != SUCC) {
 79 |             TLOG(ERROR, t_id) << "client: post cas fail. rc=" << rc;
 80 |             exit(-1);
 81 |           }
 82 | 
 83 |           ibv_wc wc{};
 84 |           rc = re.qp->poll_till_completion(wc, no_timeout);
 85 |           if (rc != SUCC) {
 86 |             TLOG(ERROR, t_id) << "client: poll cas fail. rc=" << rc;
 87 |             exit(-1);
 88 |           }
 89 | 
 90 |           timer.Stop();
 91 |           lock_durations.emplace_back(timer.Duration_us());
 92 |         }
 93 | 
 94 |         auto rc = re.qp->post_send(IBV_WR_RDMA_READ, re.version_buf, sizeof(version_t), remote_version_addr, IBV_SEND_SIGNALED);
 95 | 
 96 |         if (rc != SUCC) {
 97 |           TLOG(ERROR, t_id) << "client: post read fail. rc=" << rc;
 98 |           exit(-1);
 99 |         }
100 |         // Note: Now the coordinator gets the lock. It can read the data
101 | 
102 |         ibv_wc wc{};
103 |         rc = re.qp->poll_till_completion(wc, no_timeout);
104 |         if (rc != SUCC) {
105 |           TLOG(ERROR, t_id) << "client: poll read fail. rc=" << rc;
106 |           exit(-1);
107 |         }
108 |       }
109 | #else
110 |       if (*((lock_t*)re.cas_buf) != STATE_CLEAN) {
111 |         // it->Debug();
112 |         // RDMA_LOG(DBG) << "remote lock not clean " << std::hex << *((lock_t*)re.cas_buf);
113 |         return false;
114 |       }
115 | #endif
116 |       version_t my_version = it->version;
117 |       if (it->user_insert) {
118 |         // If it is an insertion, we need to compare the the fetched version with
119 |         // the old version, instead of the new version stored in item
120 |         for (auto& old_version : old_version_for_insert) {
121 |           if (old_version.table_id == it->table_id && old_version.key == it->key) {
122 |             my_version = old_version.version;
123 |             break;
124 |           }
125 |         }
126 |       }
127 |       // Compare version
128 |       if (my_version != *((version_t*)re.version_buf)) {
129 |         // it->Debug();
130 |         // RDMA_LOG(DBG) << "MY VERSION " << it->version;
131 |         // RDMA_LOG(DBG) << "version_buf " << *((version_t*)re.version_buf);
132 |         return false;
133 |       }
134 |     } else {
135 |       // Compare version
136 |       if (it->version != *((version_t*)re.version_buf)) {
137 |         // it->Debug();
138 |         // RDMA_LOG(DBG) << "MY VERSION " << it->version;
139 |         // RDMA_LOG(DBG) << "version_buf " << *((version_t*)re.version_buf);
140 |         return false;
141 |       }
142 |     }
143 |   }
144 |   return true;
145 | }
146 | 
147 | bool DTX::CheckCommitAll(std::vector<CommitWrite>& pending_commit_write, char* cas_buf) {
148 |   // Release: set visible and unlock remote data
149 |   for (auto& re : pending_commit_write) {
150 |     auto* qp = thread_qp_man->GetRemoteDataQPWithNodeID(re.node_id);
151 |     qp->post_send(IBV_WR_RDMA_WRITE, cas_buf, sizeof(lock_t), re.lock_off, 0);  // Release
152 |   }
153 |   return true;
154 | }


--------------------------------------------------------------------------------
/core/dtx/dtx_compare.cc:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #include "dtx/dtx.h"
  5 | 
  6 | bool DTX::CompareExeRO(coro_yield_t& yield) {
  7 |   std::vector<DirectRead> pending_direct_ro;
  8 |   std::vector<HashRead> pending_hash_ro;
  9 | 
 10 |   // Issue reads
 11 |   if (!CompareIssueReadRO(pending_direct_ro, pending_hash_ro)) return false;
 12 | 
 13 |   // Yield to other coroutines when waiting for network replies
 14 |   coro_sched->Yield(yield, coro_id);
 15 | 
 16 |   // Receive data
 17 |   std::list<HashRead> pending_next_hash_ro;
 18 |   std::list<InvisibleRead> pending_invisible_ro;
 19 |   auto res = CheckReadRO(pending_direct_ro, pending_hash_ro, pending_invisible_ro, pending_next_hash_ro, yield);
 20 |   return res;
 21 | }
 22 | 
 23 | bool DTX::CompareExeRW(coro_yield_t& yield) {
 24 |   std::vector<DirectRead> pending_direct_ro;
 25 |   std::vector<DirectRead> pending_direct_rw;
 26 | 
 27 |   std::vector<HashRead> pending_hash_ro;
 28 |   std::vector<HashRead> pending_hash_rw;
 29 | 
 30 |   std::list<HashRead> pending_next_hash_ro;
 31 |   std::list<HashRead> pending_next_hash_rw;
 32 | 
 33 |   std::vector<InsertOffRead> pending_insert_off_rw;
 34 |   std::list<InsertOffRead> pending_next_off_rw;
 35 | 
 36 |   std::list<InvisibleRead> pending_invisible_ro;
 37 | 
 38 |   if (!CompareIssueReadRO(pending_direct_ro, pending_hash_ro)) return false;
 39 |   if (!CompareIssueReadRW(pending_direct_rw, pending_hash_rw, pending_insert_off_rw)) return false;
 40 | 
 41 |   // Yield to other coroutines when waiting for network replies
 42 |   coro_sched->Yield(yield, coro_id);
 43 | 
 44 |   auto res = CompareCheckReadRORW(pending_direct_ro,
 45 |                                   pending_direct_rw,
 46 |                                   pending_hash_ro,
 47 |                                   pending_hash_rw,
 48 |                                   pending_next_hash_ro,
 49 |                                   pending_next_hash_rw,
 50 |                                   pending_insert_off_rw,
 51 |                                   pending_next_off_rw,
 52 |                                   pending_invisible_ro,
 53 |                                   yield);
 54 | 
 55 |   if (global_meta_man->txn_system == DTX_SYS::LOCAL) {
 56 |     ParallelUndoLog();
 57 |   }
 58 |   return res;
 59 | }
 60 | 
 61 | bool DTX::CompareLocking(coro_yield_t& yield) {
 62 |   std::vector<Lock> pending_lock;
 63 |   if (!CompareIssueLocking(pending_lock)) return false;
 64 | 
 65 |   coro_sched->Yield(yield, coro_id);
 66 | 
 67 |   auto res = CompareCheckLocking(pending_lock);
 68 |   return res;
 69 | }
 70 | 
 71 | bool DTX::CompareValidation(coro_yield_t& yield) {
 72 |   std::vector<Version> pending_version_read;
 73 |   if (!CompareIssueValidation(pending_version_read)) return false;
 74 | 
 75 |   coro_sched->Yield(yield, coro_id);
 76 | 
 77 |   auto res = CompareCheckValidation(pending_version_read);
 78 |   return res;
 79 | }
 80 | 
 81 | bool DTX::CompareLockingValidation(coro_yield_t& yield) {
 82 |   // This is the same with our validation scheme, i.e., lock+read write set, read read set
 83 |   std::vector<ValidateRead> pending_validate;
 84 |   if (!CompareIssueLockValidation(pending_validate)) return false;
 85 | 
 86 |   coro_sched->Yield(yield, coro_id);
 87 | 
 88 |   auto res = CheckValidate(pending_validate);
 89 |   return res;
 90 | }
 91 | 
 92 | bool DTX::CompareCommitBackup(coro_yield_t& yield) {
 93 |   tx_status = TXStatus::TX_COMMIT;
 94 | 
 95 | #if RFLUSH == 0
 96 |   if (!CompareIssueCommitBackup()) return false;
 97 | #elif RFLUSH == 1
 98 |   if (!CompareIssueCommitBackupFullFlush()) return false;
 99 | #elif RFLUSH == 2
100 |   if (!CompareIssueCommitBackupSelectiveFlush()) return false;
101 | #endif
102 | 
103 |   coro_sched->Yield(yield, coro_id);
104 | 
105 |   return true;
106 | }
107 | 
108 | bool DTX::CompareCommitPrimary(coro_yield_t& yield) {
109 |   if (!CompareIssueCommitPrimary()) {
110 |     return false;
111 |   }
112 |   coro_sched->Yield(yield, coro_id);
113 |   return true;
114 | }
115 | 
116 | bool DTX::CompareTruncateAsync(coro_yield_t& yield) {
117 |   // Truncate: Update backup's data region in an async manner
118 |   if (!CompareIssueTruncate()) {
119 |     return false;
120 |   }
121 |   // No yield, not waiting for ack
122 |   return true;
123 | }


--------------------------------------------------------------------------------
/core/dtx/dtx_exe_commit.cc:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #include "dtx/dtx.h"
  5 | 
  6 | bool DTX::TxExe(coro_yield_t& yield, bool fail_abort) {
  7 |   // Start executing transaction
  8 |   tx_status = TXStatus::TX_EXE;
  9 |   if (read_write_set.empty() && read_only_set.empty()) {
 10 |     return true;
 11 |   }
 12 | 
 13 |   if (global_meta_man->txn_system == DTX_SYS::FORD) {
 14 |     // Run our system
 15 |     if (read_write_set.empty()) {
 16 |       if (ExeRO(yield))
 17 |         return true;
 18 |       else {
 19 |         goto ABORT;
 20 |       }
 21 |     } else {
 22 |       if (ExeRW(yield))
 23 |         return true;
 24 |       else {
 25 |         goto ABORT;
 26 |       }
 27 |     }
 28 |   } else if (global_meta_man->txn_system == DTX_SYS::FaRM || global_meta_man->txn_system == DTX_SYS::DrTMH || global_meta_man->txn_system == DTX_SYS::LOCAL) {
 29 |     if (read_write_set.empty()) {
 30 |       if (CompareExeRO(yield))
 31 |         return true;
 32 |       else
 33 |         goto ABORT;
 34 |     } else {
 35 |       if (CompareExeRW(yield))
 36 |         return true;
 37 |       else
 38 |         goto ABORT;
 39 |     }
 40 |   } else {
 41 |     RDMA_LOG(FATAL) << "NOT SUPPORT SYSTEM ID: " << global_meta_man->txn_system;
 42 |   }
 43 | 
 44 |   return true;
 45 | 
 46 | ABORT:
 47 |   if (fail_abort) Abort();
 48 |   return false;
 49 | }
 50 | 
 51 | bool DTX::TxCommit(coro_yield_t& yield) {
 52 |   // Only read one item
 53 |   if (read_write_set.empty() && read_only_set.size() == 1) {
 54 |     return true;
 55 |   }
 56 | 
 57 |   bool commit_stat;
 58 | 
 59 |   /*!
 60 |     FORD's commit protocol
 61 |     */
 62 | 
 63 |   if (global_meta_man->txn_system == DTX_SYS::FORD) {
 64 |     if (!Validate(yield)) {
 65 |       goto ABORT;
 66 |     }
 67 | 
 68 |     // Next step. If read-write txns, we need to commit the updates to remote replicas
 69 |     if (!read_write_set.empty()) {
 70 |       // Write back for read-write tx
 71 | #if COMMIT_TOGETHER
 72 |       commit_stat = CoalescentCommit(yield);
 73 |       if (commit_stat) {
 74 |         return true;
 75 |       } else {
 76 |         goto ABORT;
 77 |       }
 78 | #else
 79 |       commit_stat = CompareCommitBackup(yield);
 80 |       if (!commit_stat) {
 81 |         goto ABORT;
 82 |       }
 83 |       commit_stat = CompareCommitPrimary(yield);
 84 |       if (!commit_stat) {
 85 |         goto ABORT;
 86 |       }
 87 |       commit_stat = CompareTruncateAsync(yield);
 88 |       if (commit_stat) {
 89 |         return true;
 90 |       } else {
 91 |         goto ABORT;
 92 |       }
 93 | #endif
 94 |     }
 95 |   }
 96 | 
 97 |   if (global_meta_man->txn_system == DTX_SYS::LOCAL) {
 98 |     if (!read_write_set.empty()) {
 99 |       // For read-write txn
100 |       if (!LocalLock()) return false;
101 |       if (!LocalValidate()) return false;
102 |       commit_stat = CoalescentCommit(yield);
103 |       if (commit_stat) {
104 |         return true;
105 |       } else {
106 |         abort();
107 |       }
108 |       LocalUnlock();
109 |     } else {
110 |       // For read-only txn
111 |       if (!LocalValidate()) return false;
112 |     }
113 |   }
114 | 
115 |   /*!
116 |     DrTM+H's commit protocol
117 |     */
118 | 
119 |   if (global_meta_man->txn_system == DTX_SYS::DrTMH) {
120 |     // Lock and Validation are batched
121 |     if (!CompareLockingValidation(yield)) {
122 |       goto ABORT;
123 |     }
124 | 
125 |     // Seperately commit backup and primary
126 |     if (!read_write_set.empty()) {
127 |       commit_stat = CompareCommitBackup(yield);
128 |       if (!commit_stat) {
129 |         goto ABORT;
130 |       }
131 |       commit_stat = CompareCommitPrimary(yield);
132 |       if (!commit_stat) {
133 |         goto ABORT;
134 |       }
135 |       commit_stat = CompareTruncateAsync(yield);
136 |       if (commit_stat) {
137 |         return true;
138 |       } else {
139 |         goto ABORT;
140 |       }
141 |     }
142 |   }
143 | 
144 |   /*!
145 |     FaRM's commit protocol
146 |     */
147 | 
148 |   if (global_meta_man->txn_system == DTX_SYS::FaRM) {
149 |     if (!CompareLocking(yield)) {
150 |       goto ABORT;
151 |     }
152 |     if (!CompareValidation(yield)) {
153 |       goto ABORT;
154 |     }
155 | 
156 |     // Seperately commit backup and primary
157 |     if (!read_write_set.empty()) {
158 |       commit_stat = CompareCommitBackup(yield);
159 |       if (!commit_stat) {
160 |         goto ABORT;
161 |       }
162 |       commit_stat = CompareCommitPrimary(yield);
163 |       if (!commit_stat) {
164 |         goto ABORT;
165 |       }
166 |       commit_stat = CompareTruncateAsync(yield);
167 |       if (commit_stat) {
168 |         return true;
169 |       } else {
170 |         goto ABORT;
171 |       }
172 |     }
173 |   }
174 | 
175 |   return true;
176 | ABORT:
177 |   Abort();
178 |   return false;
179 | }
180 | 


--------------------------------------------------------------------------------
/core/dtx/dtx_local_meta.cc:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #include "dtx/dtx.h"
 5 | 
 6 | bool DTX::LocalLock() {
 7 |   auto res = global_lcache->TryLock(read_write_set);
 8 |   if (!res) {
 9 |     global_lcache->Unlock(read_write_set);
10 |     return false;
11 |   }
12 |   return true;
13 | }
14 | 
15 | void DTX::LocalUnlock() {
16 |   global_lcache->Unlock(read_write_set);
17 | }
18 | 
19 | bool DTX::LocalValidate() {
20 |   auto res = global_vcache->CheckVersion(read_only_set, tx_id);
21 |   if (res == VersionStatus::VERSION_CHANGED) {
22 |     global_lcache->Unlock(read_write_set);
23 |     return false;
24 |   }
25 |   return true;
26 | }


--------------------------------------------------------------------------------
/core/dtx/structs.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include "memstore/hash_store.h"
  7 | #include "rlib/rdma_ctrl.hpp"
  8 | 
  9 | enum DTX_SYS : int {
 10 |   FaRM = 0,
 11 |   DrTMH = 1,
 12 |   FORD = 2,
 13 |   LOCAL = 3 // FORD with localized metadata including locks and versions
 14 | };
 15 | 
 16 | enum TXStatus : int {
 17 |   TX_INIT = 0,  // Transaction initialization
 18 |   TX_EXE,       // Transaction execution, read only
 19 |   TX_LOCK,      // Transaction execution, read+lock
 20 |   TX_VAL,       // Transaction validate
 21 |   TX_COMMIT,    // Commit primary and backups
 22 |   TX_ABORT      // Aborted transaction
 23 | };
 24 | 
 25 | enum ValStatus : int {
 26 |   RDMA_ERROR = -1,  // Validation network error
 27 |   NO_NEED_VAL = 0,  // Do not need validation, i.e., the coroutine does not need to yield CPU
 28 |   NEED_VAL = 1,     // Need validation, i.e., the coroutine needs to yield CPU
 29 |   MUST_ABORT = 2    // The data version must be changed and hence no validation is needed
 30 | };
 31 | 
 32 | // Following are stuctures for maintaining coroutine's state, similar to context switch
 33 | 
 34 | struct DataSetItem {
 35 |   DataItemPtr item_ptr;
 36 |   bool is_fetched;
 37 |   bool is_logged;
 38 |   node_id_t read_which_node;  // From which node this data item is read. This is a node id, e.g., 0, 1, 2...
 39 |   int64_t bkt_idx; // The bkt idx of local lock table
 40 | };
 41 | 
 42 | struct OldVersionForInsert {
 43 |   table_id_t table_id;
 44 |   itemkey_t key;
 45 |   version_t version;
 46 | };
 47 | 
 48 | struct LockAddr {
 49 |   node_id_t node_id;
 50 |   uint64_t lock_addr;
 51 | };
 52 | 
 53 | // For coroutines
 54 | struct DirectRead {
 55 |   RCQP* qp;
 56 |   DataSetItem* item;
 57 |   char* buf;
 58 |   node_id_t remote_node;
 59 | };
 60 | 
 61 | struct HashRead {
 62 |   RCQP* qp;
 63 |   DataSetItem* item;
 64 |   char* buf;
 65 |   node_id_t remote_node;
 66 |   const HashMeta meta;
 67 | };
 68 | 
 69 | struct InvisibleRead {
 70 |   RCQP* qp;
 71 |   char* buf;
 72 |   uint64_t off;
 73 | };
 74 | 
 75 | struct CasRead {
 76 |   RCQP* qp;
 77 |   DataSetItem* item;
 78 |   char* cas_buf;
 79 |   char* data_buf;
 80 |   node_id_t primary_node_id;
 81 | };
 82 | 
 83 | struct InsertOffRead {
 84 |   RCQP* qp;
 85 |   DataSetItem* item;
 86 |   char* buf;
 87 |   node_id_t remote_node;
 88 |   const HashMeta meta;
 89 |   offset_t node_off;
 90 | };
 91 | 
 92 | struct ValidateRead {
 93 |   RCQP* qp;
 94 |   DataSetItem* item;
 95 |   char* cas_buf;
 96 |   char* version_buf;
 97 |   bool has_lock_in_validate;
 98 | };
 99 | 
100 | struct Lock {
101 |   RCQP* qp;
102 |   DataSetItem* item;
103 |   char* cas_buf;
104 |   uint64_t lock_off;
105 | };
106 | 
107 | struct Unlock {
108 |   char* cas_buf;
109 | };
110 | 
111 | struct Version {
112 |   DataSetItem* item;
113 |   char* version_buf;
114 | };
115 | 
116 | struct CommitWrite {
117 |   node_id_t node_id;
118 |   uint64_t lock_off;
119 | };
120 | 


--------------------------------------------------------------------------------
/core/flags.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | /*********************** For common **********************/
 7 | // Max data item size.
 8 | // 8: smallbank
 9 | // 40: tatp
10 | // 664: tpcc
11 | // 40: micro-benchmark
12 | 
13 | const size_t MAX_ITEM_SIZE = 664;
14 | 
15 | /*********************** For FORD **********************/
16 | // 0: Read rw data without lock
17 | // 1: Read+lock rw data
18 | #define READ_LOCK 1
19 | 
20 | // 0: Seperately commit remote replicas
21 | // 1: Coalescently commit remote replicas
22 | #define COMMIT_TOGETHER 1
23 | 
24 | // 0: Disable reading read-only data from backups
25 | // 1: Enable reading read-only data from backups
26 | #define READ_BACKUP 0
27 | 
28 | // 0: No remote persistency guarantee
29 | // 1: Full flush
30 | // 2: Selective flush
31 | #define RFLUSH 2
32 | 
33 | // 0: Wait if invisible
34 | // 1: Abort if invisible
35 | #define INV_ABORT 1
36 | 
37 | /*********************** For Localized opt **********************/
38 | // Below are only for FORD with coalescent commit
39 | // 0: Disable local lock
40 | // 1: Enable locl lock
41 | #define LOCAL_LOCK 0
42 | 
43 | // 0: Remote validation for RO set
44 | // 1: Cache versions in local
45 | #define LOCAL_VALIDATION 0
46 | 
47 | // Hash table parameters for localized validation
48 | // For tatp
49 | // 5
50 | // 4
51 | // 10000000
52 | 
53 | // For smallbank
54 | // 2
55 | // 1
56 | // 100000
57 | 
58 | // For tpcc
59 | // 11
60 | // 72
61 | // 100000
62 | 
63 | #define MAX_TABLE_NUM 11
64 | #define SLOT_PER_BKT 72
65 | #define NUM_BKT 100000
66 | 
67 | /*********************** For counterparts **********************/
68 | // 0: Do not cache addrs in local. Default for FaRM
69 | // 1: Cache addrs in local. Default for DrTM+h, Optmized for FaRM
70 | #define USE_LOCAL_ADDR_CACHE 0
71 | 
72 | // 1: Locks block reads
73 | // 0: Use FORD's machenism, i.e., visibility control to enable read locked data but not invisible data
74 | // This is an **opposite** scheme compared with our visibility control, i.e., open this will close visibility, and close this will open visibility
75 | #define LOCK_REFUSE_READ_RO 0
76 | #define LOCK_REFUSE_READ_RW 0
77 | 
78 | /*********************** For micro-benchmarks **********************/
79 | // 0: Does not wait lock, just abort (For end-to-end tests)
80 | // 1: wait lock until resuming execution (For lock duration tests, remember set coroutine num as 2)
81 | #define LOCK_WAIT 0
82 | 
83 | // 0: Does not busily wait the data to be visible, e.g., yield to another coroutine to execute the next tx (For end-to-end tests)
84 | // 1: Busily wait the data to be visible (For visibility tests, remember set coroutine num as 2)
85 | #define INV_BUSY_WAIT 0
86 | 


--------------------------------------------------------------------------------
/core/memstore/data_item.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include <cstring>
  7 | #include <iostream>
  8 | #include <memory>
  9 | #include <sstream>
 10 | #include <string>
 11 | 
 12 | #include "base/common.h"
 13 | #include "util/debug.h"
 14 | 
 15 | struct DataItem {
 16 |   table_id_t table_id;
 17 |   size_t value_size;  // The length of uint8* value
 18 |   itemkey_t key;
 19 |   // remote_offset records this item's offset in the remote memory region
 20 |   // it's helpful for addressing each filed in DataItem
 21 |   offset_t remote_offset;
 22 |   version_t version;
 23 |   lock_t lock;
 24 |   uint8_t value[MAX_ITEM_SIZE];
 25 |   uint8_t valid;        // 1: Not deleted, 0: Deleted
 26 |   uint8_t user_insert;  // 1: User insert operation, 0: Not user insert operation
 27 | 
 28 |   DataItem() {}
 29 |   // Build an empty item for fetching data from remote
 30 |   DataItem(table_id_t t, itemkey_t k)
 31 |       : table_id(t), value_size(0), key(k), remote_offset(0), version(0), lock(0), valid(1), user_insert(0) {}
 32 | 
 33 |   // For user insert item
 34 |   DataItem(table_id_t t, size_t s, itemkey_t k, version_t v, uint8_t ins)
 35 |       : table_id(t), value_size(s), key(k), remote_offset(0), version(v), lock(0), valid(1), user_insert(ins) {}
 36 | 
 37 |   // For server load data
 38 |   DataItem(table_id_t t, size_t s, itemkey_t k, uint8_t* d) : table_id(t), value_size(s), key(k), remote_offset(0), version(0), lock(0), valid(1), user_insert(0) {
 39 |     memcpy(value, d, s);
 40 |   }
 41 | 
 42 |   ALWAYS_INLINE
 43 |   size_t GetSerializeSize() const {
 44 |     return sizeof(*this);
 45 |   }
 46 | 
 47 |   ALWAYS_INLINE
 48 |   void Serialize(char* undo_buffer) {
 49 |     memcpy(undo_buffer, (char*)this, sizeof(*this));
 50 |   }
 51 | 
 52 |   ALWAYS_INLINE
 53 |   uint64_t GetRemoteLockAddr() {
 54 |     return remote_offset + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset) + sizeof(version);
 55 |   }
 56 | 
 57 |   ALWAYS_INLINE
 58 |   uint64_t GetRemoteLockAddr(offset_t remote_item_off) {
 59 |     return remote_item_off + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset) + sizeof(version);
 60 |   }
 61 | 
 62 |   ALWAYS_INLINE
 63 |   uint64_t GetRemoteVersionAddr() {
 64 |     return remote_offset + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset);
 65 |   }
 66 | 
 67 |   ALWAYS_INLINE
 68 |   uint64_t GetRemoteVersionAddr(offset_t remote_item_off) {
 69 |     return remote_item_off + sizeof(table_id) + sizeof(value_size) + sizeof(key) + sizeof(remote_offset);
 70 |   }
 71 | 
 72 |   ALWAYS_INLINE
 73 |   void Debug(t_id_t tid) const {
 74 |     // For debug usage
 75 |     TLOG(INFO, tid) << "[Item debug] table id: " << this->table_id << ", value size: " << this->value_size
 76 |                     << ", key: " << this->key
 77 |                     << ", remote offset: " << this->remote_offset << ", version: " << this->version
 78 |                     << ", lock: "
 79 |                     << (int)this->lock << ", valid: " << (int)this->valid << ", user insert: "
 80 |                     << (int)this->user_insert << std::endl;
 81 |     //        TLOG(INFO, tid) << "Contents: 0x";
 82 |     //        int i = 0;
 83 |     //        for (; i < MAX_ITEM_SIZE - 1; i++) {
 84 |     //            TLOG(INFO, tid) << (int) value[i] << " ";
 85 |     //        }
 86 |     //        TLOG(INFO, tid) << (int) value[i] << " END\n\n";
 87 |   }
 88 |   ALWAYS_INLINE
 89 |   void Debug() const {
 90 |     // For debug usage
 91 |     RDMA_LOG(DBG) << "[Item debug] table id: " << this->table_id << ", value size: " << this->value_size
 92 |                   << ", key: " << this->key
 93 |                   << ", remote offset: " << this->remote_offset << ", version: " << this->version
 94 |                   << ", lock: " << std::hex << "0x"
 95 |                   << this->lock << ", valid: " << std::dec << (int)this->valid << ", user insert: "
 96 |                   << (int)this->user_insert;
 97 |   }
 98 | } Aligned8;  // Size: 560B in X86 arch.
 99 | 
100 | const size_t DataItemSize = sizeof(DataItem);
101 | 
102 | const size_t RFlushReadSize = 1;  // The size of RDMA read, that is after write to emulate rdma flush
103 | 
104 | using DataItemPtr = std::shared_ptr<DataItem>;


--------------------------------------------------------------------------------
/core/memstore/hash_store.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang, Lurong Liu
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include <cassert>
  7 | 
  8 | #include "memstore/data_item.h"
  9 | #include "memstore/mem_store.h"
 10 | #include "util/hash.h"
 11 | 
 12 | #define OFFSET_NOT_FOUND -1
 13 | #define OFFSET_FOUND 0
 14 | #define VERSION_TOO_OLD -2  // The new version < old version
 15 | 
 16 | #define SLOT_NOT_FOUND -1
 17 | #define SLOT_INV -2
 18 | #define SLOT_LOCKED -3
 19 | #define SLOT_FOUND 0
 20 | 
 21 | const int ITEM_NUM_PER_NODE = 22;
 22 | 
 23 | struct HashMeta {
 24 |   // To which table this hash store belongs
 25 |   table_id_t table_id;
 26 | 
 27 |   // Virtual address of the table, used to calculate the distance
 28 |   // between some HashNodes with the table for traversing
 29 |   // the linked list
 30 |   uint64_t data_ptr;
 31 | 
 32 |   // Offset of the table, relative to the RDMA local_mr
 33 |   offset_t base_off;
 34 | 
 35 |   // Total hash buckets
 36 |   uint64_t bucket_num;
 37 | 
 38 |   // Size of hash node
 39 |   size_t node_size;
 40 | 
 41 |   HashMeta(table_id_t table_id,
 42 |            uint64_t data_ptr,
 43 |            uint64_t bucket_num,
 44 |            size_t node_size,
 45 |            offset_t base_off) : table_id(table_id),
 46 |                                 data_ptr(data_ptr),
 47 |                                 base_off(base_off),
 48 |                                 bucket_num(bucket_num),
 49 |                                 node_size(node_size) {}
 50 |   HashMeta() {}
 51 | } Aligned8;
 52 | 
 53 | // A hashnode is a bucket
 54 | struct HashNode {
 55 |   // A dataitem is a slot
 56 |   DataItem data_items[ITEM_NUM_PER_NODE];
 57 |   HashNode* next;
 58 | } Aligned8;
 59 | 
 60 | class HashStore {
 61 |  public:
 62 |   HashStore(table_id_t table_id, uint64_t bucket_num, MemStoreAllocParam* param)
 63 |       : table_id(table_id), base_off(0), bucket_num(bucket_num), data_ptr(nullptr), node_num(0) {
 64 |     assert(bucket_num > 0);
 65 |     table_size = (bucket_num) * sizeof(HashNode);
 66 |     region_start_ptr = param->mem_region_start;
 67 |     assert((uint64_t)param->mem_store_start + param->mem_store_alloc_offset + table_size <= (uint64_t)param->mem_store_reserve);
 68 |     data_ptr = param->mem_store_start + param->mem_store_alloc_offset;
 69 |     param->mem_store_alloc_offset += table_size;
 70 | 
 71 |     base_off = (uint64_t)data_ptr - (uint64_t)region_start_ptr;
 72 |     assert(base_off >= 0);
 73 | 
 74 |     RDMA_LOG(INFO) << "Table " << table_id << " size: " << table_size / 1024 / 1024
 75 |                    << " MB. Start address: " << std::hex << "0x" << (uint64_t)data_ptr
 76 |                    << ", base_off: 0x" << base_off << ", bucket_size: " << std::dec << ITEM_NUM_PER_NODE * DataItemSize << " B";
 77 |     assert(data_ptr != nullptr);
 78 |     memset(data_ptr, 0, table_size);
 79 |   }
 80 | 
 81 |   table_id_t GetTableID() const {
 82 |     return table_id;
 83 |   }
 84 | 
 85 |   offset_t GetBaseOff() const {
 86 |     return base_off;
 87 |   }
 88 | 
 89 |   uint64_t GetHashNodeSize() const {
 90 |     return sizeof(HashNode);
 91 |   }
 92 | 
 93 |   uint64_t GetBucketNum() const {
 94 |     return bucket_num;
 95 |   }
 96 | 
 97 |   char* GetDataPtr() const {
 98 |     return data_ptr;
 99 |   }
100 | 
101 |   offset_t GetItemRemoteOffset(const void* item_ptr) const {
102 |     return (uint64_t)item_ptr - (uint64_t)region_start_ptr;
103 |   }
104 | 
105 |   uint64_t TableSize() const {
106 |     return table_size;
107 |   }
108 | 
109 |   uint64_t GetHash(itemkey_t key) {
110 |     return MurmurHash64A(key, 0xdeadbeef) % bucket_num;
111 |   }
112 | 
113 |   DataItem* LocalGet(itemkey_t key);
114 | 
115 |   DataItem* LocalInsert(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param);
116 | 
117 |   DataItem* LocalPut(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param);
118 | 
119 |   bool LocalDelete(itemkey_t key);
120 | 
121 |  private:
122 |   // To which table this hash store belongs
123 |   table_id_t table_id;
124 | 
125 |   // The offset in the RDMA region
126 |   offset_t base_off;
127 | 
128 |   // Total hash buckets
129 |   uint64_t bucket_num;
130 | 
131 |   // The point to value in the table
132 |   char* data_ptr;
133 | 
134 |   // Total hash node nums
135 |   uint64_t node_num;
136 | 
137 |   // The size of the entire hash table
138 |   size_t table_size;
139 | 
140 |   // Start of the memory region address, for installing remote offset for data item
141 |   char* region_start_ptr;
142 | };
143 | 
144 | ALWAYS_INLINE
145 | DataItem* HashStore::LocalGet(itemkey_t key) {
146 |   uint64_t hash = GetHash(key);
147 |   auto* node = (HashNode*)(hash * sizeof(HashNode) + data_ptr);
148 |   while (node) {
149 |     for (auto& data_item : node->data_items) {
150 |       if (data_item.valid && data_item.key == key) {
151 |         return &data_item;
152 |       }
153 |     }
154 |     node = node->next;
155 |   }
156 |   return nullptr;  // failed to found one
157 | }
158 | 
159 | ALWAYS_INLINE
160 | DataItem* HashStore::LocalInsert(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param) {
161 |   uint64_t hash = GetHash(key);
162 |   auto* node = (HashNode*)(hash * sizeof(HashNode) + data_ptr);
163 | 
164 |   // Find
165 |   while (node) {
166 |     for (auto& item : node->data_items) {
167 |       if (!item.valid) {
168 |         item = data_item;
169 |         item.valid = 1;
170 |         return &item;
171 |       }
172 |     }
173 |     if (!node->next) break;
174 |     node = node->next;
175 |   }
176 | 
177 |   // Allocate
178 |   RDMA_LOG(INFO) << "Table " << table_id << " alloc a new bucket for key: " << key << ". Current slotnum/bucket: " << ITEM_NUM_PER_NODE;
179 |   assert((uint64_t)param->mem_store_reserve + param->mem_store_reserve_offset <= (uint64_t)param->mem_store_end);
180 |   auto* new_node = (HashNode*)(param->mem_store_reserve + param->mem_store_reserve_offset);
181 |   param->mem_store_reserve_offset += sizeof(HashNode);
182 |   memset(new_node, 0, sizeof(HashNode));
183 |   new_node->data_items[0] = data_item;
184 |   new_node->data_items[0].valid = 1;
185 |   new_node->next = nullptr;
186 |   node->next = new_node;
187 |   node_num++;
188 |   return &(new_node->data_items[0]);
189 | }
190 | 
191 | ALWAYS_INLINE
192 | DataItem* HashStore::LocalPut(itemkey_t key, const DataItem& data_item, MemStoreReserveParam* param) {
193 |   DataItem* res;
194 |   if ((res = LocalGet(key)) != nullptr) {
195 |     // KV pair has already exist, then update
196 |     *res = data_item;
197 |     return res;
198 |   }
199 |   // Insert
200 |   return LocalInsert(key, data_item, param);
201 | }
202 | 
203 | ALWAYS_INLINE
204 | bool HashStore::LocalDelete(itemkey_t key) {
205 |   uint64_t hash = GetHash(key);
206 |   auto* node = (HashNode*)(hash * sizeof(HashNode) + data_ptr);
207 |   for (auto& data_item : node->data_items) {
208 |     if (data_item.valid && data_item.key == key) {
209 |       data_item.valid = 0;
210 |       return true;
211 |     }
212 |   }
213 |   node = node->next;
214 |   while (node) {
215 |     for (auto& data_item : node->data_items) {
216 |       if (data_item.valid && data_item.key == key) {
217 |         data_item.valid = 0;
218 |         return true;
219 |       }
220 |     }
221 |     node = node->next;
222 |   }
223 |   return false;  // Failed to find one to be deleted
224 | }
225 | 


--------------------------------------------------------------------------------
/core/memstore/mem_store.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <string>
 7 | 
 8 | #include "base/common.h"
 9 | 
10 | enum class MemStoreType {
11 |   kHash = 0,
12 |   kBPlusTree,
13 | };
14 | 
15 | struct MemStoreAllocParam {
16 |   // The start of the registered memory region for storing memory stores
17 |   char* mem_region_start;
18 | 
19 |   // The start of the whole memory store space (e.g., Hash Store Space)
20 |   char* mem_store_start;
21 | 
22 |   // The start offset of each memory store instance
23 |   offset_t mem_store_alloc_offset;
24 | 
25 |   // The start address of the whole reserved space (e.g., for insert in hash conflict). Here for overflow check
26 |   char* mem_store_reserve;
27 | 
28 |   MemStoreAllocParam(char* region_start, char* store_start, offset_t start_off, char* reserve_start)
29 |       : mem_region_start(region_start),
30 |         mem_store_start(store_start),
31 |         mem_store_alloc_offset(start_off),
32 |         mem_store_reserve(reserve_start) {}
33 | };
34 | 
35 | struct MemStoreReserveParam {
36 |   // The start address of the whole reserved space (e.g., for insert in hash conflict).
37 |   char* mem_store_reserve;
38 | 
39 |   // For allocation in case of memory store (e.g., HashStore) conflict
40 |   offset_t mem_store_reserve_offset;
41 |   
42 |   // The end address of the memory store space. Here for overflow check
43 |   char* mem_store_end;
44 | 
45 |   MemStoreReserveParam(char* reserve_start, offset_t reserve_off, char* end)
46 |       : mem_store_reserve(reserve_start), mem_store_reserve_offset(reserve_off), mem_store_end(end) {}
47 | };


--------------------------------------------------------------------------------
/core/scheduler/corotine_scheduler.cc:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #include "scheduler/corotine_scheduler.h"
 5 | 
 6 | #include <cassert>
 7 | 
 8 | #include "util/debug.h"
 9 | 
10 | void CoroutineScheduler::PollRegularCompletion() {
11 |   for (auto it = pending_qps.begin(); it != pending_qps.end();) {
12 |     RCQP* qp = *it;
13 |     struct ibv_wc wc;
14 |     auto poll_result = qp->poll_send_completion(wc);  // The qp polls its own wc
15 |     if (poll_result == 0) {
16 |       it++;
17 |       continue;
18 |     }
19 |     if (unlikely(wc.status != IBV_WC_SUCCESS)) {
20 |       RDMA_LOG(EMPH) << "Bad completion status: " << wc.status << " with error " << ibv_wc_status_str(wc.status) << ";@ node " << qp->idx_.node_id;
21 |       if (wc.status != IBV_WC_RETRY_EXC_ERR) {
22 |         RDMA_LOG(EMPH) << "completion status != IBV_WC_RETRY_EXC_ERR. abort()";
23 |         abort();
24 |       } else {
25 |         it++;
26 |         continue;
27 |       }
28 |     }
29 |     auto coro_id = wc.wr_id;
30 |     if (coro_id == 0) continue;
31 |     assert(pending_counts[coro_id] > 0);
32 |     pending_counts[coro_id] -= 1;
33 |     if (pending_counts[coro_id] == 0) {
34 |       AppendCoroutine(&coro_array[coro_id]);
35 |     }
36 |     it = pending_qps.erase(it);
37 |   }
38 | }
39 | 
40 | void CoroutineScheduler::PollLogCompletion() {
41 |   for (auto it = pending_log_qps.begin(); it != pending_log_qps.end();) {
42 |     RCQP* qp = *it;
43 |     struct ibv_wc wc;
44 |     auto poll_result = qp->poll_send_completion(wc);
45 |     if (poll_result == 0) {
46 |       it++;
47 |       continue;
48 |     }
49 |     if (unlikely(wc.status != IBV_WC_SUCCESS)) {
50 |       RDMA_LOG(EMPH) << "Bad completion status: " << wc.status << " with error " << ibv_wc_status_str(wc.status) << ";@ node " << qp->idx_.node_id;
51 |       if (wc.status != IBV_WC_RETRY_EXC_ERR) {
52 |         RDMA_LOG(EMPH) << "completion status != IBV_WC_RETRY_EXC_ERR. abort()";
53 |         abort();
54 |       } else {
55 |         it++;
56 |         continue;
57 |       }
58 |     }
59 |     auto coro_id = wc.wr_id;
60 |     if (coro_id == 0) continue;
61 |     assert(pending_log_counts[coro_id] > 0);
62 |     pending_log_counts[coro_id] -= 1;
63 |     it = pending_log_qps.erase(it);
64 |   }
65 | }
66 | 
67 | void CoroutineScheduler::PollCompletion() {
68 |   PollRegularCompletion();
69 |   PollLogCompletion();
70 | }
71 | 
72 | bool CoroutineScheduler::CheckLogAck(coro_id_t c_id) {
73 |   if (pending_log_counts[c_id] == 0) {
74 |     return true;
75 |   }
76 |   PollLogCompletion();
77 |   return pending_log_counts[c_id] == 0;
78 | }


--------------------------------------------------------------------------------
/core/scheduler/coroutine.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | // Use symmetric_coroutine from boost::coroutine, not asymmetric_coroutine from boost::coroutine2
 7 | // symmetric_coroutine meets transaction processing, in which each coroutine can freely yield to another
 8 | #define BOOST_COROUTINES_NO_DEPRECATION_WARNING
 9 | 
10 | #include <boost/coroutine/all.hpp>
11 | 
12 | #include "base/common.h"
13 | 
14 | using coro_call_t = boost::coroutines::symmetric_coroutine<void>::call_type;
15 | 
16 | using coro_yield_t = boost::coroutines::symmetric_coroutine<void>::yield_type;
17 | 
18 | // For coroutine scheduling
19 | struct Coroutine {
20 |   Coroutine() : is_wait_poll(false) {}
21 | 
22 |   // Wether I am waiting for polling network replies. If true, I leave the yield-able coroutine list
23 |   bool is_wait_poll;
24 | 
25 |   // My coroutine ID
26 |   coro_id_t coro_id;
27 | 
28 |   // Registered coroutine function
29 |   coro_call_t func;
30 | 
31 |   // Use pointer to accelerate yield. Otherwise, one needs a while loop
32 |   // to yield the next coroutine that does not wait for network replies
33 |   Coroutine* prev_coro;
34 | 
35 |   Coroutine* next_coro;
36 | };


--------------------------------------------------------------------------------
/core/util/debug.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include <cxxabi.h>
  7 | #include <execinfo.h>
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | 
 11 | #include <fstream>
 12 | #include <thread>
 13 | 
 14 | #include "base/common.h"
 15 | #include "rlib/logging.hpp"
 16 | 
 17 | using namespace rdmaio;
 18 | 
 19 | #define ASSERT(condition)     \
 20 |   if (unlikely(!(condition))) \
 21 |   ::rdmaio::MessageLogger((char*)__FILE__, __LINE__, ::rdmaio::FATAL + 1).stream() << "Assertion! "
 22 | 
 23 | #define TLOG(n, tid)       \
 24 |   if (n >= RDMA_LOG_LEVEL) \
 25 |   LogicalThreadLogger((char*)__FILE__, __LINE__, n, tid).stream()
 26 | 
 27 | // Use the logical thread ID
 28 | class LogicalThreadLogger {
 29 |  public:
 30 |   LogicalThreadLogger(const char* file, int line, int level, t_id_t tid) : level_(level), tid_(tid) {
 31 |     if (level_ < RDMA_LOG_LEVEL)
 32 |       return;
 33 |     stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] ";
 34 |   }
 35 | 
 36 |   ~LogicalThreadLogger() {
 37 |     if (level_ >= RDMA_LOG_LEVEL) {
 38 |       std::ofstream fout;
 39 |       std::string log_file_name = "./" + std::to_string(tid_) + "_log.txt";
 40 |       fout.open(log_file_name, std::ios::app);
 41 |       fout << stream_.str() << std::endl;
 42 |       fout.close();
 43 |       if (level_ >= ::rdmaio::FATAL)
 44 |         abort();
 45 |     }
 46 |   }
 47 | 
 48 |   // Return the stream associated with the logger object.
 49 |   std::stringstream& stream() { return stream_; }
 50 | 
 51 |  private:
 52 |   std::stringstream stream_;
 53 |   int level_;
 54 |   t_id_t tid_;
 55 | 
 56 |   static std::string StripBasename(const std::string& full_path) {
 57 |     const char kSeparator = '/';
 58 |     size_t pos = full_path.rfind(kSeparator);
 59 |     if (pos != std::string::npos) {
 60 |       return full_path.substr(pos + 1, std::string::npos);
 61 |     } else {
 62 |       return full_path;
 63 |     }
 64 |   }
 65 | };
 66 | 
 67 | // Use the physical thread ID
 68 | class PhysicalThreadLogger {
 69 |  public:
 70 |   PhysicalThreadLogger(const char* file, int line, int level, std::thread::id tid) : level_(level), tid_(tid) {
 71 |     if (level_ < RDMA_LOG_LEVEL)
 72 |       return;
 73 |     stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] ";
 74 |   }
 75 | 
 76 |   ~PhysicalThreadLogger() {
 77 |     if (level_ >= RDMA_LOG_LEVEL) {
 78 |       std::ofstream fout;
 79 | 
 80 |       std::ostringstream oss;
 81 |       oss << tid_;
 82 |       std::string stid = oss.str();
 83 |       std::string log_file_name = "./" + stid + "_log.txt";
 84 |       fout.open(log_file_name, std::ios::app);
 85 |       fout << stream_.str() << std::endl;
 86 |       fout.close();
 87 |       if (level_ >= ::rdmaio::FATAL)
 88 |         abort();
 89 |     }
 90 |   }
 91 | 
 92 |   // Return the stream associated with the logger object.
 93 |   std::stringstream& stream() { return stream_; }
 94 | 
 95 |  private:
 96 |   std::stringstream stream_;
 97 |   int level_;
 98 |   std::thread::id tid_;
 99 | 
100 |   static std::string StripBasename(const std::string& full_path) {
101 |     const char kSeparator = '/';
102 |     size_t pos = full_path.rfind(kSeparator);
103 |     if (pos != std::string::npos) {
104 |       return full_path.substr(pos + 1, std::string::npos);
105 |     } else {
106 |       return full_path;
107 |     }
108 |   }
109 | };
110 | 
111 | // https://panthema.net/2008/0901-stacktrace-demangled/
112 | static void PrintStackTrace(FILE* out = stderr, unsigned int max_frames = 63) {
113 |   fprintf(out, "stack trace:\n");
114 | 
115 |   // storage array for stack trace address data
116 |   void* addrlist[max_frames + 1];
117 | 
118 |   // retrieve current stack addresses
119 |   int addrlen = backtrace(addrlist, sizeof(addrlist) / sizeof(void*));
120 | 
121 |   if (addrlen == 0) {
122 |     fprintf(out, "  <empty, possibly corrupt>\n");
123 |     return;
124 |   }
125 | 
126 |   // resolve addresses into strings containing "filename(function+address)",
127 |   // this array must be free()-ed
128 |   char** symbollist = backtrace_symbols(addrlist, addrlen);
129 | 
130 |   // allocate string which will be filled with the demangled function name
131 |   size_t funcnamesize = 256;
132 |   char* funcname = (char*)malloc(funcnamesize);
133 | 
134 |   // iterate over the returned symbol lines. skip the first, it is the
135 |   // address of this function.
136 |   for (int i = 1; i < addrlen; i++) {
137 |     char *begin_name = 0, *begin_offset = 0, *end_offset = 0;
138 | 
139 |     // find parentheses and +address offset surrounding the mangled name:
140 |     // ./module(function+0x15c) [0x8048a6d]
141 |     for (char* p = symbollist[i]; *p; ++p) {
142 |       if (*p == '(')
143 |         begin_name = p;
144 |       else if (*p == '+')
145 |         begin_offset = p;
146 |       else if (*p == ')' && begin_offset) {
147 |         end_offset = p;
148 |         break;
149 |       }
150 |     }
151 | 
152 |     if (begin_name && begin_offset && end_offset && begin_name < begin_offset) {
153 |       *begin_name++ = '\0';
154 |       *begin_offset++ = '\0';
155 |       *end_offset = '\0';
156 | 
157 |       // mangled name is now in [begin_name, begin_offset) and caller
158 |       // offset in [begin_offset, end_offset). now apply
159 |       // __cxa_demangle():
160 | 
161 |       int status;
162 |       char* ret = abi::__cxa_demangle(begin_name,
163 |                                       funcname, &funcnamesize, &status);
164 |       if (status == 0) {
165 |         funcname = ret;  // use possibly realloc()-ed string
166 |         fprintf(out, "  %s : %s+%s\n",
167 |                 symbollist[i], funcname, begin_offset);
168 |       } else {
169 |         // demangling failed. Output function name as a C function with
170 |         // no arguments.
171 |         fprintf(out, "  %s : %s()+%s\n",
172 |                 symbollist[i], begin_name, begin_offset);
173 |       }
174 |     } else {
175 |       // couldn't parse the line? print the whole line.
176 |       fprintf(out, "  %s\n", symbollist[i]);
177 |     }
178 |   }
179 | 
180 |   free(funcname);
181 |   free(symbollist);
182 | }
183 | 


--------------------------------------------------------------------------------
/core/util/fast_random.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Adapted from mica
  3 | // Copyright (c) 2022
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <string>
  8 | 
  9 | #include "base/common.h"
 10 | 
 11 | class Rand {
 12 |  public:
 13 |   explicit Rand() : state_(0) {}
 14 |   explicit Rand(uint64_t seed) : state_(seed) { assert(seed < (1UL << 48)); }
 15 |   Rand(const Rand& o) : state_(o.state_) {}
 16 |   Rand& operator=(const Rand& o) {
 17 |     state_ = o.state_;
 18 |     return *this;
 19 |   }
 20 | 
 21 |   uint32_t next_u32() {
 22 |     // same as Java's
 23 |     state_ = (state_ * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1);
 24 |     return (uint32_t)(state_ >> (48 - 32));
 25 |   }
 26 | 
 27 |   double next_f64() {
 28 |     // caution: this is maybe too non-random
 29 |     state_ = (state_ * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1);
 30 |     return (double)state_ / (double)((1UL << 48) - 1);
 31 |   }
 32 | 
 33 |  private:
 34 |   uint64_t state_;
 35 | };
 36 | 
 37 | // Generate random number for workload testing
 38 | static ALWAYS_INLINE 
 39 | uint32_t FastRand(uint64_t* seed) {
 40 |   *seed = *seed * 1103515245 + 12345;
 41 |   return (uint32_t)(*seed >> 32);
 42 | }
 43 | 
 44 | // not thread-safe
 45 | //
 46 | // taken from java:
 47 | //   http://developer.classpath.org/doc/java/util/Random-source.html
 48 | class FastRandom {
 49 |  public:
 50 |   FastRandom(unsigned long sed)
 51 |       : seed(0) {
 52 |     SetSeed0(sed);
 53 |   }
 54 | 
 55 |   FastRandom() : seed(0) {
 56 |     SetSeed0(seed);
 57 |   }
 58 | 
 59 |   inline unsigned long
 60 |   Next() {
 61 |     return ((unsigned long)Next(32) << 32) + Next(32);
 62 |   }
 63 | 
 64 |   inline uint32_t
 65 |   NextU32() {
 66 |     return Next(32);
 67 |   }
 68 | 
 69 |   inline uint16_t
 70 |   NextU16() {
 71 |     return Next(16);
 72 |   }
 73 | 
 74 |   /** [0.0, 1.0) */
 75 |   inline double
 76 |   NextUniform() {
 77 |     return (((unsigned long)Next(26) << 27) + Next(27)) / (double)(1L << 53);
 78 |   }
 79 | 
 80 |   inline char
 81 |   NextChar() {
 82 |     return Next(8) % 256;
 83 |   }
 84 | 
 85 |   inline std::string
 86 |   NextString(size_t len) {
 87 |     std::string s(len, 0);
 88 |     for (size_t i = 0; i < len; i++)
 89 |       s[i] = NextChar();
 90 |     return s;
 91 |   }
 92 | 
 93 |   inline unsigned long
 94 |   GetSeed() {
 95 |     return seed;
 96 |   }
 97 | 
 98 |   inline void
 99 |   SetSeed(unsigned long sed) {
100 |     this->seed = sed;
101 |   }
102 | 
103 |   inline void
104 |   SetSeed0(unsigned long sed) {
105 |     this->seed = (sed ^ 0x5DEECE66DL) & ((1L << 48) - 1);
106 |   }
107 | 
108 |   inline uint64_t RandNumber(int min, int max) {
109 |     return CheckBetweenInclusive((uint64_t)(NextUniform() * (max - min + 1) + min), min, max);
110 |   }
111 | 
112 |   inline uint64_t CheckBetweenInclusive(uint64_t v, uint64_t min, uint64_t max) {
113 |     assert(v >= min);
114 |     assert(v <= max);
115 |     return v;
116 |   }
117 | 
118 |  private:
119 |   inline unsigned long
120 |   Next(unsigned int bits) {
121 |     seed = (seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1);
122 |     return (unsigned long)(seed >> (48 - bits));
123 |   }
124 | 
125 |   unsigned long seed;
126 | };
127 | 


--------------------------------------------------------------------------------
/core/util/hash.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include "base/common.h"
  7 | 
  8 | // 64-bit hash for 64-bit platforms
  9 | static ALWAYS_INLINE
 10 | uint64_t MurmurHash64A(uint64_t key, unsigned int seed) {
 11 |   const uint64_t m = 0xc6a4a7935bd1e995;
 12 |   const int r = 47;
 13 |   uint64_t h = seed ^ (8 * m);
 14 |   const uint64_t* data = &key;
 15 |   const uint64_t* end = data + 1;
 16 | 
 17 |   while (data != end) {
 18 |     uint64_t k = *data++;
 19 |     k *= m;
 20 |     k ^= k >> r;
 21 |     k *= m;
 22 |     h ^= k;
 23 |     h *= m;
 24 |   }
 25 | 
 26 |   // const unsigned char* data2 = (const unsigned char*)data;
 27 | 
 28 |   // switch (8 & 7) {
 29 |   //   case 7:
 30 |   //     h ^= uint64_t(data2[6]) << 48;
 31 |   //   case 6:
 32 |   //     h ^= uint64_t(data2[5]) << 40;
 33 |   //   case 5:
 34 |   //     h ^= uint64_t(data2[4]) << 32;
 35 |   //   case 4:
 36 |   //     h ^= uint64_t(data2[3]) << 24;
 37 |   //   case 3:
 38 |   //     h ^= uint64_t(data2[2]) << 16;
 39 |   //   case 2:
 40 |   //     h ^= uint64_t(data2[1]) << 8;
 41 |   //   case 1:
 42 |   //     h ^= uint64_t(data2[0]);
 43 |   //     h *= m;
 44 |   // };
 45 | 
 46 |   h ^= h >> r;
 47 |   h *= m;
 48 |   h ^= h >> r;
 49 | 
 50 |   return h;
 51 | }
 52 | 
 53 | static ALWAYS_INLINE
 54 | uint64_t MurmurHash64ALen(const char* key, uint32_t len, uint64_t seed) {
 55 |   const uint64_t m = 0xc6a4a7935bd1e995;
 56 |   const int r = 47;
 57 | 
 58 |   uint64_t h = seed ^ (len * m);
 59 | 
 60 |   const uint64_t* data = (const uint64_t*)key;
 61 |   const uint64_t* end = data + (len / 8);
 62 | 
 63 |   while (data != end) {
 64 |     uint64_t k = *data++;
 65 | 
 66 |     k *= m;
 67 |     k ^= k >> r;
 68 |     k *= m;
 69 | 
 70 |     h ^= k;
 71 |     h *= m;
 72 |   }
 73 | 
 74 |   const unsigned char* data2 = (const unsigned char*)data;
 75 | 
 76 |   switch (len & 7) {
 77 |     case 7:
 78 |       h ^= (uint64_t)((uint64_t)data2[6] << (uint64_t)48);
 79 |     case 6:
 80 |       h ^= (uint64_t)((uint64_t)data2[5] << (uint64_t)40);
 81 |     case 5:
 82 |       h ^= (uint64_t)((uint64_t)data2[4] << (uint64_t)32);
 83 |     case 4:
 84 |       h ^= (uint64_t)((uint64_t)data2[3] << (uint64_t)24);
 85 |     case 3:
 86 |       h ^= (uint64_t)((uint64_t)data2[2] << (uint64_t)16);
 87 |     case 2:
 88 |       h ^= (uint64_t)((uint64_t)data2[1] << (uint64_t)8);
 89 |     case 1:
 90 |       h ^= (uint64_t)((uint64_t)data2[0]);
 91 |       h *= m;
 92 |   };
 93 | 
 94 |   h ^= h >> r;
 95 |   h *= m;
 96 |   h ^= h >> r;
 97 | 
 98 |   return h;
 99 | }
100 | 


--------------------------------------------------------------------------------
/core/util/latency.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Adapted from mica
  3 | // Copyright (c) 2022
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <algorithm>
  8 | #include <cstdio>
  9 | 
 10 | // Test ibv_poll_cq
 11 | static inline unsigned long GetCPUCycle() {
 12 |   unsigned a, d;
 13 |   __asm __volatile("rdtsc"
 14 |                    : "=a"(a), "=d"(d));
 15 |   return ((unsigned long)a) | (((unsigned long)d) << 32);
 16 | }
 17 | 
 18 | class Latency {
 19 |  public:
 20 |   Latency() { reset(); }
 21 | 
 22 |   void reset() { memset(reinterpret_cast<void*>(this), 0, sizeof(Latency)); }
 23 | 
 24 |   void update(uint64_t us) {
 25 |     if (us < 128)
 26 |       bin0_[us]++;
 27 |     else if (us < 384)
 28 |       bin1_[(us - 128) / 2]++;
 29 |     else if (us < 896)
 30 |       bin2_[(us - 384) / 4]++;
 31 |     else if (us < 1920)
 32 |       bin3_[(us - 896) / 8]++;
 33 |     else if (us < 3968)
 34 |       bin4_[(us - 1920) / 16]++;
 35 |     else
 36 |       bin5_++;
 37 |   }
 38 | 
 39 |   Latency& operator+=(const Latency& o) {
 40 |     uint64_t i;
 41 |     for (i = 0; i < 128; i++) bin0_[i] += o.bin0_[i];
 42 |     for (i = 0; i < 128; i++) bin1_[i] += o.bin1_[i];
 43 |     for (i = 0; i < 128; i++) bin2_[i] += o.bin2_[i];
 44 |     for (i = 0; i < 128; i++) bin3_[i] += o.bin3_[i];
 45 |     for (i = 0; i < 128; i++) bin4_[i] += o.bin4_[i];
 46 |     bin5_ += o.bin5_;
 47 |     return *this;
 48 |   }
 49 | 
 50 |   uint64_t count() const {
 51 |     uint64_t count = 0;
 52 |     uint64_t i;
 53 |     for (i = 0; i < 128; i++) count += bin0_[i];
 54 |     for (i = 0; i < 128; i++) count += bin1_[i];
 55 |     for (i = 0; i < 128; i++) count += bin2_[i];
 56 |     for (i = 0; i < 128; i++) count += bin3_[i];
 57 |     for (i = 0; i < 128; i++) count += bin4_[i];
 58 |     count += bin5_;
 59 |     return count;
 60 |   }
 61 | 
 62 |   uint64_t sum() const {
 63 |     uint64_t sum = 0;
 64 |     uint64_t i;
 65 |     for (i = 0; i < 128; i++) sum += bin0_[i] * (0 + i * 1);
 66 |     for (i = 0; i < 128; i++) sum += bin1_[i] * (128 + i * 2);
 67 |     for (i = 0; i < 128; i++) sum += bin2_[i] * (384 + i * 4);
 68 |     for (i = 0; i < 128; i++) sum += bin3_[i] * (896 + i * 8);
 69 |     for (i = 0; i < 128; i++) sum += bin4_[i] * (1920 + i * 16);
 70 |     sum += bin5_ * 3968;
 71 |     return sum;
 72 |   }
 73 | 
 74 |   uint64_t avg() const { return sum() / std::max(uint64_t(1), count()); }
 75 | 
 76 |   uint64_t min() const {
 77 |     uint64_t i;
 78 |     for (i = 0; i < 128; i++)
 79 |       if (bin0_[i] != 0) return 0 + i * 1;
 80 |     for (i = 0; i < 128; i++)
 81 |       if (bin1_[i] != 0) return 128 + i * 2;
 82 |     for (i = 0; i < 128; i++)
 83 |       if (bin2_[i] != 0) return 384 + i * 4;
 84 |     for (i = 0; i < 128; i++)
 85 |       if (bin3_[i] != 0) return 896 + i * 8;
 86 |     for (i = 0; i < 128; i++)
 87 |       if (bin4_[i] != 0) return 1920 + i * 16;
 88 |     // if (bin5_ != 0) return 3968;
 89 |     return 3968;
 90 |   }
 91 | 
 92 |   uint64_t max() const {
 93 |     int64_t i;
 94 |     if (bin5_ != 0) return 3968;
 95 |     for (i = 127; i >= 0; i--)
 96 |       if (bin4_[i] != 0) return 1920 + static_cast<uint64_t>(i) * 16;
 97 |     for (i = 127; i >= 0; i--)
 98 |       if (bin3_[i] != 0) return 896 + static_cast<uint64_t>(i) * 8;
 99 |     for (i = 127; i >= 0; i--)
100 |       if (bin2_[i] != 0) return 384 + static_cast<uint64_t>(i) * 4;
101 |     for (i = 127; i >= 0; i--)
102 |       if (bin1_[i] != 0) return 128 + static_cast<uint64_t>(i) * 2;
103 |     for (i = 127; i >= 0; i--)
104 |       if (bin0_[i] != 0) return 0 + static_cast<uint64_t>(i) * 1;
105 |     return 0;
106 |   }
107 | 
108 |   // Return the (p * 100) percentile latency
109 |   uint64_t perc(double p) const {
110 |     assert(p >= 0.0 && p <= 1.00);
111 | 
112 |     uint64_t i;
113 |     int64_t thres = static_cast<int64_t>(p * static_cast<double>(count()));
114 |     for (i = 0; i < 128; i++)
115 |       if ((thres -= static_cast<int64_t>(bin0_[i])) < 0) return 0 + i * 1;
116 |     for (i = 0; i < 128; i++)
117 |       if ((thres -= static_cast<int64_t>(bin1_[i])) < 0) return 128 + i * 2;
118 |     for (i = 0; i < 128; i++)
119 |       if ((thres -= static_cast<int64_t>(bin2_[i])) < 0) return 384 + i * 4;
120 |     for (i = 0; i < 128; i++)
121 |       if ((thres -= static_cast<int64_t>(bin3_[i])) < 0) return 896 + i * 8;
122 |     for (i = 0; i < 128; i++)
123 |       if ((thres -= static_cast<int64_t>(bin4_[i])) < 0) return 1920 + i * 16;
124 |     return 3968;
125 |   }
126 | 
127 |   void print(FILE* fp) const {
128 |     uint64_t i;
129 |     for (i = 0; i < 128; i++)
130 |       if (bin0_[i] != 0)
131 |         fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 0 + i * 1, bin0_[i]);
132 |     for (i = 0; i < 128; i++)
133 |       if (bin1_[i] != 0)
134 |         fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 128 + i * 2, bin1_[i]);
135 |     for (i = 0; i < 128; i++)
136 |       if (bin2_[i] != 0)
137 |         fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 384 + i * 4, bin2_[i]);
138 |     for (i = 0; i < 128; i++)
139 |       if (bin3_[i] != 0)
140 |         fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 896 + i * 8, bin3_[i]);
141 |     for (i = 0; i < 128; i++)
142 |       if (bin4_[i] != 0)
143 |         fprintf(fp, "%4" PRIu64 " %6" PRIu64 "\n", 1920 + i * 16, bin4_[i]);
144 |     if (bin5_ != 0) fprintf(fp, "%4d %6" PRIu64 "\n", 3968, bin5_);
145 |   }
146 | 
147 |  private:
148 |   // [0, 128) us
149 |   uint64_t bin0_[128];
150 |   // [128, 384) us
151 |   uint64_t bin1_[128];
152 |   // [384, 896) us
153 |   uint64_t bin2_[128];
154 |   // [896, 1920) us
155 |   uint64_t bin3_[128];
156 |   // [1920, 3968) us
157 |   uint64_t bin4_[128];
158 |   // [3968, inf) us
159 |   uint64_t bin5_;
160 | };


--------------------------------------------------------------------------------
/core/util/seqlock.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "util/spinlock.h"
 7 | 
 8 | // Sequence lock
 9 | class SeqLock {
10 |  public:
11 |   SeqLock() {
12 |     spin_lock = new SpinLock();
13 |   }
14 | 
15 |   void BeginWrite() {
16 |     spin_lock->Lock();
17 |   }
18 | 
19 |   void EndWrite() {
20 |     spin_lock->Unlock();
21 |   }
22 | 
23 |   void BeginRead() {
24 |     // Wait the writer
25 |     while (IsWriting())
26 |       ;
27 |   }
28 | 
29 |   void EndRead() {
30 |     // Read again if a writer locks
31 |     // if (IsWriting()) BeginRead();
32 |   }
33 | 
34 |  private:
35 |   SpinLock* spin_lock;
36 |   bool IsWriting() {
37 |     return spin_lock->Counter() % 2 == 1;
38 |   }
39 | };


--------------------------------------------------------------------------------
/core/util/spinlock.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <atomic>
 7 | 
 8 | class SpinLock {
 9 |  public:
10 |   SpinLock() {
11 |     counter.store(0, std::memory_order_release);
12 |   }
13 | 
14 |   void Lock() {
15 |     int locked = 1;
16 |     int unlocked = 0;
17 | 
18 |     // Wait for unlock
19 |     while (counter.compare_exchange_strong(locked, unlocked, std::memory_order_acq_rel))
20 |       ;
21 |   }
22 | 
23 |   void Unlock() {
24 |     int unlocked = 0;
25 |     counter.exchange(unlocked, std::memory_order_acq_rel);
26 |   }
27 | 
28 |   int Counter() {
29 |     return counter.load(std::memory_order_acquire);
30 |   }
31 | 
32 |  private:
33 |   std::atomic_int counter;
34 | };


--------------------------------------------------------------------------------
/core/util/thread_pool.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <condition_variable>
 7 | #include <functional>
 8 | #include <future>
 9 | #include <memory>
10 | #include <mutex>
11 | #include <queue>
12 | #include <stdexcept>
13 | #include <thread>
14 | #include <vector>
15 | 
16 | #include "base/common.h"
17 | 
18 | class ThreadPool {
19 |  public:
20 |   ThreadPool(size_t);
21 |   template <class F, class... Args>
22 |   auto Enqueue(F&& f, Args&&... args) -> std::future<decltype(f(args...))>;
23 |   ~ThreadPool();
24 | 
25 |  private:
26 |   std::vector<std::thread> workers;
27 |   std::queue<std::function<void()>> tasks;
28 |   std::mutex queue_mutex;
29 |   std::condition_variable condition;
30 |   bool stop;
31 | };
32 | 
33 | ALWAYS_INLINE
34 | ThreadPool::ThreadPool(size_t threads) : stop(false) {
35 |   for (size_t i = 0; i < threads; ++i) {
36 |     workers.emplace_back([this] {
37 |       for (;;) {
38 |         std::function<void()> task;
39 |         {
40 |           std::unique_lock<std::mutex> lock(this->queue_mutex);
41 |           this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); });
42 |           if (this->stop && this->tasks.empty()) return;
43 |           task = std::move(this->tasks.front());
44 |           this->tasks.pop();
45 |         }
46 |         task();  // Execute the enqueued task
47 |       }
48 |     });
49 |   }
50 | }
51 | 
52 | // Add a task to the thread pool
53 | template <class F, class... Args>
54 | ALWAYS_INLINE 
55 | auto ThreadPool::Enqueue(F&& f, Args&&... args) -> std::future<decltype(f(args...))> {
56 |   auto task = std::make_shared<std::packaged_task<return_type()>>(
57 |       std::bind(std::forward<F>(f), std::forward<Args>(args)...));
58 |   {
59 |     std::unique_lock<std::mutex> lock(queue_mutex);
60 |     if (stop) throw std::runtime_error("Enqueue on stopped ThreadPool");
61 |     tasks.emplace([task]() { (*task)(); });
62 |   }
63 |   condition.notify_one();
64 |   return task->get_future();  // Return the results of the task
65 | }
66 | 
67 | ALWAYS_INLINE
68 | ThreadPool::~ThreadPool() {
69 |   {
70 |     std::unique_lock<std::mutex> lock(queue_mutex);
71 |     stop = true;
72 |   }
73 |   condition.notify_all();
74 |   for (std::thread& worker : workers) {
75 |     if (worker.joinable()) {
76 |       worker.join();
77 |     }
78 |   }
79 | }


--------------------------------------------------------------------------------
/core/util/timer.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <chrono>
 7 | 
 8 | using namespace std::chrono;
 9 | 
10 | // Records one event's duration
11 | class Timer {
12 |  public:
13 |   Timer() {}
14 |   void Start() { start = high_resolution_clock::now(); }
15 |   void Stop() { end = high_resolution_clock::now(); }
16 | 
17 |   double Duration_s() {
18 |     return duration_cast<duration<double>>(end - start).count();
19 |   }
20 | 
21 |   uint64_t Duration_ns() {
22 |     return duration_cast<std::chrono::nanoseconds>(end - start).count();
23 |   }
24 | 
25 |   uint64_t Duration_us() {
26 |     return duration_cast<std::chrono::microseconds>(end - start).count();
27 |   }
28 | 
29 |   uint64_t Duration_ms() {
30 |     return duration_cast<std::chrono::milliseconds>(end - start).count();
31 |   }
32 | 
33 |  private:
34 |   high_resolution_clock::time_point start;
35 |   high_resolution_clock::time_point end;
36 | };
37 | 


--------------------------------------------------------------------------------
/core/util/zipf.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Adapted from mica
  3 | // Copyright (c) 2022
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <cassert>
  8 | #include <cmath>
  9 | #include <cstdio>
 10 | 
 11 | #include "util/fast_random.h"
 12 | 
 13 | class ZipfGen {
 14 |  public:
 15 |   ZipfGen(uint64_t n, double theta, uint64_t rand_seed) {
 16 |     assert(n > 0);
 17 |     if (theta > 0.992 && theta < 1)
 18 |       fprintf(stderr, "warning: theta > 0.992 will be inaccurate due to approximation\n");
 19 |     if (theta >= 1. && theta < 40.) {
 20 |       fprintf(stderr, "error: theta in [1., 40.) is not supported\n");
 21 |       assert(false);
 22 |       theta_ = 0;  // unused
 23 |       alpha_ = 0;  // unused
 24 |       thres_ = 0;  // unused
 25 |       return;
 26 |     }
 27 |     assert(theta == -1. || (theta >= 0. && theta < 1.) || theta >= 40.);
 28 |     n_ = n;
 29 |     theta_ = theta;
 30 |     if (theta == -1.) {
 31 |       seq_ = rand_seed % n;
 32 |       alpha_ = 0;  // unused
 33 |       thres_ = 0;  // unused
 34 |     } else if (theta > 0. && theta < 1.) {
 35 |       seq_ = 0;  // unused
 36 |       alpha_ = 1. / (1. - theta);
 37 |       thres_ = 1. + pow_approx(0.5, theta);
 38 |     } else {
 39 |       seq_ = 0;     // unused
 40 |       alpha_ = 0.;  // unused
 41 |       thres_ = 0.;  // unused
 42 |     }
 43 |     last_n_ = 0;
 44 |     zetan_ = 0.;
 45 |     eta_ = 0;
 46 |     // rand_state_[0] = (unsigned short)(rand_seed >> 0);
 47 |     // rand_state_[1] = (unsigned short)(rand_seed >> 16);
 48 |     // rand_state_[2] = (unsigned short)(rand_seed >> 32);
 49 |     rand_ = Rand(rand_seed);
 50 |   }
 51 | 
 52 |   ZipfGen(const ZipfGen& src) {
 53 |     n_ = src.n_;
 54 |     theta_ = src.theta_;
 55 |     alpha_ = src.alpha_;
 56 |     thres_ = src.thres_;
 57 |     last_n_ = src.last_n_;
 58 |     dbl_n_ = src.dbl_n_;
 59 |     zetan_ = src.zetan_;
 60 |     eta_ = src.eta_;
 61 |     seq_ = src.seq_;
 62 |     rand_ = src.rand_;
 63 |   }
 64 | 
 65 |   ZipfGen(const ZipfGen& src, uint64_t rand_seed) {
 66 |     n_ = src.n_;
 67 |     theta_ = src.theta_;
 68 |     alpha_ = src.alpha_;
 69 |     thres_ = src.thres_;
 70 |     last_n_ = src.last_n_;
 71 |     dbl_n_ = src.dbl_n_;
 72 |     zetan_ = src.zetan_;
 73 |     eta_ = src.eta_;
 74 |     seq_ = src.seq_;
 75 |     rand_ = Rand(rand_seed);
 76 |   }
 77 | 
 78 |   ZipfGen& operator=(const ZipfGen& src) {
 79 |     n_ = src.n_;
 80 |     theta_ = src.theta_;
 81 |     alpha_ = src.alpha_;
 82 |     thres_ = src.thres_;
 83 |     last_n_ = src.last_n_;
 84 |     dbl_n_ = src.dbl_n_;
 85 |     zetan_ = src.zetan_;
 86 |     eta_ = src.eta_;
 87 |     seq_ = src.seq_;
 88 |     rand_ = src.rand_;
 89 |     return *this;
 90 |   }
 91 | 
 92 |   void change_n(uint64_t n) { n_ = n; }
 93 | 
 94 |   uint64_t next() {
 95 |     if (last_n_ != n_) {
 96 |       if (theta_ > 0. && theta_ < 1.) {
 97 |         zetan_ = zeta(last_n_, zetan_, n_, theta_);
 98 |         eta_ = (1. - pow_approx(2. / (double)n_, 1. - theta_)) /
 99 |                (1. - zeta(0, 0., 2, theta_) / zetan_);
100 |       }
101 |       last_n_ = n_;
102 |       dbl_n_ = (double)n_;
103 |     }
104 | 
105 |     if (theta_ == -1.) {
106 |       uint64_t v = seq_;
107 |       if (++seq_ >= n_) seq_ = 0;
108 |       return v;
109 |     } else if (theta_ == 0.) {
110 |       double u = rand_.next_f64();
111 |       return (uint64_t)(dbl_n_ * u);
112 |     } else if (theta_ >= 40.) {
113 |       return 0UL;
114 |     } else {
115 |       // from J. Gray et al. Quickly generating billion-record synthetic
116 |       // databases. In SIGMOD, 1994.
117 | 
118 |       // double u = erand48(rand_state_);
119 |       double u = rand_.next_f64();
120 |       double uz = u * zetan_;
121 |       if (uz < 1.)
122 |         return 0UL;
123 |       else if (uz < thres_)
124 |         return 1UL;
125 |       else {
126 |         uint64_t v =
127 |             (uint64_t)(dbl_n_ * pow_approx(eta_ * (u - 1.) + 1., alpha_));
128 |         if (v >= n_) v = n_ - 1;
129 |         return v;
130 |       }
131 |     }
132 |   }
133 | 
134 |   static void test(double theta) {
135 |     double zetan = 0.;
136 |     const uint64_t n = 1000000UL;
137 |     uint64_t i;
138 | 
139 |     for (i = 0; i < n; i++) zetan += 1. / pow((double)i + 1., theta);
140 | 
141 |     if (theta < 1. || theta >= 40.) {
142 |       ZipfGen zg(n, theta, 0);
143 | 
144 |       uint64_t num_key0 = 0;
145 |       const uint64_t num_samples = 10000000UL;
146 |       if (theta < 1. || theta >= 40.) {
147 |         for (i = 0; i < num_samples; i++)
148 |           if (zg.next() == 0) num_key0++;
149 |       }
150 | 
151 |       printf("theta = %lf; using pow(): %.10lf", theta, 1. / zetan);
152 |       if (theta < 1. || theta >= 40.)
153 |         printf(", using approx-pow(): %.10lf",
154 |                (double)num_key0 / (double)num_samples);
155 |       printf("\n");
156 |     }
157 |   }
158 | 
159 |  private:
160 |   static double pow_approx(double a, double b) {
161 |     // from
162 |     // http://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/
163 | 
164 |     // calculate approximation with fraction of the exponent
165 |     int e = (int)b;
166 |     union {
167 |       double d;
168 |       int x[2];
169 |     } u = {a};
170 |     u.x[1] = (int)((b - (double)e) * (double)(u.x[1] - 1072632447) + 1072632447.);
171 |     u.x[0] = 0;
172 | 
173 |     // exponentiation by squaring with the exponent's integer part
174 |     // double r = u.d makes everything much slower, not sure why
175 |     // TODO: use popcount?
176 |     double r = 1.;
177 |     while (e) {
178 |       if (e & 1) r *= a;
179 |       a *= a;
180 |       e >>= 1;
181 |     }
182 | 
183 |     return r * u.d;
184 |   }
185 | 
186 |   static double zeta(uint64_t last_n, double last_sum, uint64_t n, double theta) {
187 |     if (last_n > n) {
188 |       last_n = 0;
189 |       last_sum = 0.;
190 |     }
191 |     while (last_n < n) {
192 |       last_sum += 1. / pow_approx((double)last_n + 1., theta);
193 |       last_n++;
194 |     }
195 |     return last_sum;
196 |   }
197 | 
198 |  private:
199 |   // number of items (input)
200 |   uint64_t n_;
201 | 
202 |   // skewness (input) in (0, 1); or, 0 = uniform, 1 = always zero
203 |   double theta_;
204 | 
205 |   // only depends on theta
206 |   double alpha_;
207 | 
208 |   // only depends on theta
209 |   double thres_;
210 | 
211 |   // last n used to calculate the following
212 |   uint64_t last_n_;
213 | 
214 |   double dbl_n_;
215 | 
216 |   double zetan_;
217 | 
218 |   double eta_;
219 | 
220 |   // for sequential number generation
221 |   uint64_t seq_;
222 |   
223 |   Rand rand_;
224 | } __attribute__((aligned(128)));  // To prevent false sharing caused by adjacent cacheline prefetching


--------------------------------------------------------------------------------
/memory_pool/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Author: Ming Zhang
2 | # Copyright (c) 2022
3 | 
4 | add_subdirectory(server)
5 | 


--------------------------------------------------------------------------------
/memory_pool/server/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Author: Ming Zhang
2 | # Copyright (c) 2022
3 | 
4 | set(SERVER_SOURCE server.cc)
5 | add_executable(zm_mem_pool ${SERVER_SOURCE})
6 | target_link_libraries(zm_mem_pool tatp_db smallbank_db tpcc_db micro_db rlib)
7 | 


--------------------------------------------------------------------------------
/memory_pool/server/server.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include <sys/mman.h>
  7 | 
  8 | #include <cstdio>
  9 | #include <cstring>
 10 | #include <string>
 11 | 
 12 | #include "memstore/data_item.h"
 13 | #include "memstore/hash_store.h"
 14 | #include "rlib/rdma_ctrl.hpp"
 15 | 
 16 | // Load DB
 17 | #include "micro/micro_db.h"
 18 | #include "smallbank/smallbank_db.h"
 19 | #include "tatp/tatp_db.h"
 20 | #include "tpcc/tpcc_db.h"
 21 | 
 22 | using namespace rdmaio;
 23 | 
 24 | class Server {
 25 |  public:
 26 |   Server(int nid, int local_port, int local_meta_port, size_t hash_buf_size, size_t log_buf_size, int use_pm, std::string& pm_file, size_t pm_size)
 27 |       : server_node_id(nid),
 28 |         local_port(local_port),
 29 |         local_meta_port(local_meta_port),
 30 |         hash_buf_size(hash_buf_size),
 31 |         log_buf_size(log_buf_size),
 32 |         use_pm(use_pm),
 33 |         pm_file(pm_file),
 34 |         pm_size(pm_size),
 35 |         hash_buffer(nullptr),
 36 |         log_buffer(nullptr) {}
 37 | 
 38 |   ~Server() {
 39 |     RDMA_LOG(INFO) << "Do server cleaning...";
 40 |     if (tatp_server) {
 41 |       delete tatp_server;
 42 |       RDMA_LOG(INFO) << "delete tatp tables";
 43 |     }
 44 | 
 45 |     if (smallbank_server) {
 46 |       delete smallbank_server;
 47 |       RDMA_LOG(INFO) << "delete smallbank tables";
 48 |     }
 49 | 
 50 |     if (tpcc_server) {
 51 |       delete tpcc_server;
 52 |       RDMA_LOG(INFO) << "delete tpcc tables";
 53 |     }
 54 | 
 55 |     if (micro_server) {
 56 |       delete micro_server;
 57 |       RDMA_LOG(INFO) << "delete micro tables";
 58 |     }
 59 | 
 60 |     if (use_pm) {
 61 |       munmap(hash_buffer, pm_size);
 62 |       close(pm_file_fd);
 63 |       RDMA_LOG(INFO) << "munmap hash buffer";
 64 |     } else {
 65 |       if (hash_buffer) {
 66 |         free(hash_buffer);
 67 |         RDMA_LOG(INFO) << "Free hash buffer";
 68 |       }
 69 |     }
 70 | 
 71 |     if (log_buffer) {
 72 |       free(log_buffer);
 73 |       RDMA_LOG(INFO) << "free log buffer";
 74 |     }
 75 |   }
 76 | 
 77 |   void AllocMem();
 78 | 
 79 |   void InitMem();
 80 | 
 81 |   void InitRDMA();
 82 | 
 83 |   void LoadData(node_id_t machine_id, node_id_t machine_num, std::string& workload);
 84 | 
 85 |   void SendMeta(node_id_t machine_id, std::string& workload, size_t compute_node_num);
 86 | 
 87 |   void PrepareHashMeta(node_id_t machine_id, std::string& workload, char** hash_meta_buffer, size_t& total_meta_size);
 88 | 
 89 |   void SendHashMeta(char* hash_meta_buffer, size_t& total_meta_size);
 90 | 
 91 |   void CleanTable();
 92 | 
 93 |   void CleanQP();
 94 | 
 95 |   bool Run();
 96 | 
 97 |  private:
 98 |   const int server_node_id;
 99 | 
100 |   const int local_port;
101 | 
102 |   const int local_meta_port;
103 | 
104 |   const size_t hash_buf_size;
105 | 
106 |   const size_t log_buf_size;
107 | 
108 |   const int use_pm;
109 | 
110 |   const std::string pm_file;
111 | 
112 |   const size_t pm_size;
113 | 
114 |   int pm_file_fd;
115 | 
116 |   RdmaCtrlPtr rdma_ctrl;
117 | 
118 |   // The start address of the whole hash store space
119 |   char* hash_buffer;
120 | 
121 |   // The start address of the reserved space in hash store. For insertion in case of conflict in a full bucket
122 |   char* hash_reserve_buffer;
123 | 
124 |   char* log_buffer;
125 | 
126 |   // For server-side workload
127 |   TATP* tatp_server = nullptr;
128 | 
129 |   SmallBank* smallbank_server = nullptr;
130 | 
131 |   TPCC* tpcc_server = nullptr;
132 |   
133 |   MICRO* micro_server = nullptr;
134 | };
135 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/error/en.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | // 
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed 
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_ERROR_EN_H__
16 | #define RAPIDJSON_ERROR_EN_H__
17 | 
18 | #include "error.h"
19 | 
20 | 
21 | RAPIDJSON_NAMESPACE_BEGIN
22 | 
23 | //! Maps error code of parsing into error message.
24 | /*!
25 |     \ingroup RAPIDJSON_ERRORS
26 |     \param parseErrorCode Error code obtained in parsing.
27 |     \return the error message.
28 |     \note User can make a copy of this function for localization.
29 |         Using switch-case is safer for future modification of error codes.
30 | */
31 | inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode) {
32 |   switch (parseErrorCode) {
33 |     case kParseErrorNone: return RAPIDJSON_ERROR_STRING("No error.");
34 | 
35 |     case kParseErrorDocumentEmpty: return RAPIDJSON_ERROR_STRING("The document is empty.");
36 |     case kParseErrorDocumentRootNotSingular: return RAPIDJSON_ERROR_STRING("The document root must not follow by other values.");
37 | 
38 |     case kParseErrorValueInvalid: return RAPIDJSON_ERROR_STRING("Invalid value.");
39 | 
40 |     case kParseErrorObjectMissName: return RAPIDJSON_ERROR_STRING("Missing a name for object member.");
41 |     case kParseErrorObjectMissColon: return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member.");
42 |     case kParseErrorObjectMissCommaOrCurlyBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member.");
43 | 
44 |     case kParseErrorArrayMissCommaOrSquareBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element.");
45 | 
46 |     case kParseErrorStringUnicodeEscapeInvalidHex: return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string.");
47 |     case kParseErrorStringUnicodeSurrogateInvalid: return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid.");
48 |     case kParseErrorStringEscapeInvalid: return RAPIDJSON_ERROR_STRING("Invalid escape character in string.");
49 |     case kParseErrorStringMissQuotationMark: return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string.");
50 |     case kParseErrorStringInvalidEncoding: return RAPIDJSON_ERROR_STRING("Invalid encoding in string.");
51 | 
52 |     case kParseErrorNumberTooBig: return RAPIDJSON_ERROR_STRING("Number too big to be stored in double.");
53 |     case kParseErrorNumberMissFraction: return RAPIDJSON_ERROR_STRING("Miss fraction part in number.");
54 |     case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
55 | 
56 |     case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
57 |     case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
58 | 
59 |     default:return RAPIDJSON_ERROR_STRING("Unknown error.");
60 |   }
61 | }
62 | 
63 | RAPIDJSON_NAMESPACE_END
64 | 
65 | #endif // RAPIDJSON_ERROR_EN_H__
66 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/error/error.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making RapidJSON available.
  2 | // 
  3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4 | //
  5 | // Licensed under the MIT License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // http://opensource.org/licenses/MIT
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed 
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef RAPIDJSON_ERROR_ERROR_H__
 16 | #define RAPIDJSON_ERROR_ERROR_H__
 17 | 
 18 | #include "../rapidjson.h"
 19 | 
 20 | /*! \file error.h */
 21 | 
 22 | /*! \defgroup RAPIDJSON_ERRORS RapidJSON error handling */
 23 | 
 24 | ///////////////////////////////////////////////////////////////////////////////
 25 | // RAPIDJSON_ERROR_CHARTYPE
 26 | 
 27 | //! Character type of error messages.
 28 | /*! \ingroup RAPIDJSON_ERRORS
 29 |     The default character type is \c char.
 30 |     On Windows, user can define this macro as \c TCHAR for supporting both
 31 |     unicode/non-unicode settings.
 32 | */
 33 | #ifndef RAPIDJSON_ERROR_CHARTYPE
 34 | #define RAPIDJSON_ERROR_CHARTYPE char
 35 | #endif
 36 | 
 37 | ///////////////////////////////////////////////////////////////////////////////
 38 | // RAPIDJSON_ERROR_STRING
 39 | 
 40 | //! Macro for converting string literial to \ref RAPIDJSON_ERROR_CHARTYPE[].
 41 | /*! \ingroup RAPIDJSON_ERRORS
 42 |     By default this conversion macro does nothing.
 43 |     On Windows, user can define this macro as \c _T(x) for supporting both
 44 |     unicode/non-unicode settings.
 45 | */
 46 | #ifndef RAPIDJSON_ERROR_STRING
 47 | #define RAPIDJSON_ERROR_STRING(x) x
 48 | #endif
 49 | 
 50 | RAPIDJSON_NAMESPACE_BEGIN
 51 | 
 52 | ///////////////////////////////////////////////////////////////////////////////
 53 | // ParseErrorCode
 54 | 
 55 | //! Error code of parsing.
 56 | /*! \ingroup RAPIDJSON_ERRORS
 57 |     \see GenericReader::Parse, GenericReader::GetParseErrorCode
 58 | */
 59 | enum ParseErrorCode {
 60 |   kParseErrorNone = 0,                        //!< No error.
 61 | 
 62 |   kParseErrorDocumentEmpty,                   //!< The document is empty.
 63 |   kParseErrorDocumentRootNotSingular,         //!< The document root must not follow by other values.
 64 | 
 65 |   kParseErrorValueInvalid,                    //!< Invalid value.
 66 | 
 67 |   kParseErrorObjectMissName,                  //!< Missing a name for object member.
 68 |   kParseErrorObjectMissColon,                 //!< Missing a colon after a name of object member.
 69 |   kParseErrorObjectMissCommaOrCurlyBracket,   //!< Missing a comma or '}' after an object member.
 70 | 
 71 |   kParseErrorArrayMissCommaOrSquareBracket,   //!< Missing a comma or ']' after an array element.
 72 | 
 73 |   kParseErrorStringUnicodeEscapeInvalidHex,   //!< Incorrect hex digit after \\u escape in string.
 74 |   kParseErrorStringUnicodeSurrogateInvalid,   //!< The surrogate pair in string is invalid.
 75 |   kParseErrorStringEscapeInvalid,             //!< Invalid escape character in string.
 76 |   kParseErrorStringMissQuotationMark,         //!< Missing a closing quotation mark in string.
 77 |   kParseErrorStringInvalidEncoding,           //!< Invalid encoding in string.
 78 | 
 79 |   kParseErrorNumberTooBig,                    //!< Number too big to be stored in double.
 80 |   kParseErrorNumberMissFraction,              //!< Miss fraction part in number.
 81 |   kParseErrorNumberMissExponent,              //!< Miss exponent in number.
 82 | 
 83 |   kParseErrorTermination,                     //!< Parsing was terminated.
 84 |   kParseErrorUnspecificSyntaxError            //!< Unspecific syntax error.
 85 | };
 86 | 
 87 | //! Result of parsing (wraps ParseErrorCode)
 88 | /*!
 89 |     \ingroup RAPIDJSON_ERRORS
 90 |     \code
 91 |         Document doc;
 92 |         ParseResult ok = doc.Parse("[42]");
 93 |         if (!ok) {
 94 |             fprintf(stderr, "JSON parse error: %s (%u)",
 95 |                     GetParseError_En(ok.Code()), ok.Offset());
 96 |             exit(EXIT_FAILURE);
 97 |         }
 98 |     \endcode
 99 |     \see GenericReader::Parse, GenericDocument::Parse
100 | */
101 | struct ParseResult {
102 | 
103 |   //! Default constructor, no error.
104 |   ParseResult() : code_(kParseErrorNone), offset_(0) {}
105 |   //! Constructor to set an error.
106 |   ParseResult(ParseErrorCode code, size_t offset) : code_(code), offset_(offset) {}
107 | 
108 |   //! Get the error code.
109 |   ParseErrorCode Code() const { return code_; }
110 |   //! Get the error offset, if \ref IsError(), 0 otherwise.
111 |   size_t Offset() const { return offset_; }
112 | 
113 |   //! Conversion to \c bool, returns \c true, iff !\ref IsError().
114 |   operator bool() const { return !IsError(); }
115 |   //! Whether the result is an error.
116 |   bool IsError() const { return code_ != kParseErrorNone; }
117 | 
118 |   bool operator==(const ParseResult& that) const { return code_ == that.code_; }
119 |   bool operator==(ParseErrorCode code) const { return code_ == code; }
120 |   friend bool operator==(ParseErrorCode code, const ParseResult& err) { return code == err.code_; }
121 | 
122 |   //! Reset error code.
123 |   void Clear() { Set(kParseErrorNone); }
124 |   //! Update error code and offset.
125 |   void Set(ParseErrorCode code, size_t offset = 0) {
126 |     code_ = code;
127 |     offset_ = offset;
128 |   }
129 | 
130 |  private:
131 |   ParseErrorCode code_;
132 |   size_t offset_;
133 | };
134 | 
135 | //! Function pointer type of GetParseError().
136 | /*! \ingroup RAPIDJSON_ERRORS
137 | 
138 |     This is the prototype for \c GetParseError_X(), where \c X is a locale.
139 |     User can dynamically change locale in runtime, e.g.:
140 | \code
141 |     GetParseErrorFunc GetParseError = GetParseError_En; // or whatever
142 |     const RAPIDJSON_ERROR_CHARTYPE* s = GetParseError(document.GetParseErrorCode());
143 | \endcode
144 | */
145 | typedef const RAPIDJSON_ERROR_CHARTYPE* (* GetParseErrorFunc)(ParseErrorCode);
146 | 
147 | RAPIDJSON_NAMESPACE_END
148 | 
149 | #endif // RAPIDJSON_ERROR_ERROR_H__
150 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/filereadstream.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making RapidJSON available.
  2 | // 
  3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4 | //
  5 | // Licensed under the MIT License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // http://opensource.org/licenses/MIT
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed 
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef RAPIDJSON_FILEREADSTREAM_H_
 16 | #define RAPIDJSON_FILEREADSTREAM_H_
 17 | 
 18 | #include "rapidjson.h"
 19 | #include <cstdio>
 20 | 
 21 | 
 22 | RAPIDJSON_NAMESPACE_BEGIN
 23 | 
 24 | //! File byte stream for input using fread().
 25 | /*!
 26 |     \note implements Stream concept
 27 | */
 28 | class FileReadStream {
 29 |  public:
 30 |   typedef char Ch;    //!< Character type (byte).
 31 | 
 32 |   //! Constructor.
 33 |   /*!
 34 |       \param fp File pointer opened for read.
 35 |       \param buffer user-supplied buffer.
 36 |       \param bufferSize size of buffer in bytes. Must >=4 bytes.
 37 |   */
 38 |   FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) {
 39 |     RAPIDJSON_ASSERT(fp_ != 0);
 40 |     RAPIDJSON_ASSERT(bufferSize >= 4);
 41 |     Read();
 42 |   }
 43 | 
 44 |   Ch Peek() const { return *current_; }
 45 |   Ch Take() {
 46 |     Ch c = *current_;
 47 |     Read();
 48 |     return c;
 49 |   }
 50 |   size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); }
 51 | 
 52 |   // Not implemented
 53 |   void Put(Ch) { RAPIDJSON_ASSERT(false); }
 54 |   void Flush() { RAPIDJSON_ASSERT(false); }
 55 |   Ch* PutBegin() {
 56 |     RAPIDJSON_ASSERT(false);
 57 |     return 0;
 58 |   }
 59 |   size_t PutEnd(Ch*) {
 60 |     RAPIDJSON_ASSERT(false);
 61 |     return 0;
 62 |   }
 63 | 
 64 |   // For encoding detection only.
 65 |   const Ch* Peek4() const {
 66 |     return (current_ + 4 <= bufferLast_) ? current_ : 0;
 67 |   }
 68 | 
 69 |  private:
 70 |   void Read() {
 71 |     if (current_ < bufferLast_)
 72 |       ++current_;
 73 |     else if (!eof_) {
 74 |       count_ += readCount_;
 75 |       readCount_ = fread(buffer_, 1, bufferSize_, fp_);
 76 |       bufferLast_ = buffer_ + readCount_ - 1;
 77 |       current_ = buffer_;
 78 | 
 79 |       if (readCount_ < bufferSize_) {
 80 |         buffer_[readCount_] = '\0';
 81 |         ++bufferLast_;
 82 |         eof_ = true;
 83 |       }
 84 |     }
 85 |   }
 86 | 
 87 |   std::FILE* fp_;
 88 |   Ch* buffer_;
 89 |   size_t bufferSize_;
 90 |   Ch* bufferLast_;
 91 |   Ch* current_;
 92 |   size_t readCount_;
 93 |   size_t count_;  //!< Number of characters read
 94 |   bool eof_;
 95 | };
 96 | 
 97 | RAPIDJSON_NAMESPACE_END
 98 | 
 99 | #endif // RAPIDJSON_FILESTREAM_H_
100 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/filewritestream.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making RapidJSON available.
  2 | // 
  3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4 | //
  5 | // Licensed under the MIT License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // http://opensource.org/licenses/MIT
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed 
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef RAPIDJSON_FILEWRITESTREAM_H_
 16 | #define RAPIDJSON_FILEWRITESTREAM_H_
 17 | 
 18 | #include "rapidjson.h"
 19 | #include <cstdio>
 20 | 
 21 | 
 22 | RAPIDJSON_NAMESPACE_BEGIN
 23 | 
 24 | //! Wrapper of C file stream for input using fread().
 25 | /*!
 26 |     \note implements Stream concept
 27 | */
 28 | class FileWriteStream {
 29 |  public:
 30 |   typedef char Ch;    //!< Character type. Only support char.
 31 | 
 32 |   FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) {
 33 |     RAPIDJSON_ASSERT(fp_ != 0);
 34 |   }
 35 | 
 36 |   void Put(char c) {
 37 |     if (current_ >= bufferEnd_)
 38 |       Flush();
 39 | 
 40 |     *current_++ = c;
 41 |   }
 42 | 
 43 |   void PutN(char c, size_t n) {
 44 |     size_t avail = static_cast<size_t>(bufferEnd_ - current_);
 45 |     while (n > avail) {
 46 |       std::memset(current_, c, avail);
 47 |       current_ += avail;
 48 |       Flush();
 49 |       n -= avail;
 50 |       avail = static_cast<size_t>(bufferEnd_ - current_);
 51 |     }
 52 | 
 53 |     if (n > 0) {
 54 |       std::memset(current_, c, n);
 55 |       current_ += n;
 56 |     }
 57 |   }
 58 | 
 59 |   void Flush() {
 60 |     if (current_ != buffer_) {
 61 |       size_t result = fwrite(buffer_, 1, static_cast<size_t>(current_ - buffer_), fp_);
 62 |       if (result < static_cast<size_t>(current_ - buffer_)) {
 63 |         // failure deliberately ignored at this time
 64 |         // added to avoid warn_unused_result build errors
 65 |       }
 66 |       current_ = buffer_;
 67 |     }
 68 |   }
 69 | 
 70 |   // Not implemented
 71 |   char Peek() const {
 72 |     RAPIDJSON_ASSERT(false);
 73 |     return 0;
 74 |   }
 75 |   char Take() {
 76 |     RAPIDJSON_ASSERT(false);
 77 |     return 0;
 78 |   }
 79 |   size_t Tell() const {
 80 |     RAPIDJSON_ASSERT(false);
 81 |     return 0;
 82 |   }
 83 |   char* PutBegin() {
 84 |     RAPIDJSON_ASSERT(false);
 85 |     return 0;
 86 |   }
 87 |   size_t PutEnd(char*) {
 88 |     RAPIDJSON_ASSERT(false);
 89 |     return 0;
 90 |   }
 91 | 
 92 |  private:
 93 |   // Prohibit copy constructor & assignment operator.
 94 |   FileWriteStream(const FileWriteStream&);
 95 |   FileWriteStream& operator=(const FileWriteStream&);
 96 | 
 97 |   std::FILE* fp_;
 98 |   char* buffer_;
 99 |   char* bufferEnd_;
100 |   char* current_;
101 | };
102 | 
103 | //! Implement specialized version of PutN() with memset() for better performance.
104 | template <>
105 | inline void PutN(FileWriteStream& stream, char c, size_t n) {
106 |   stream.PutN(c, n);
107 | }
108 | 
109 | RAPIDJSON_NAMESPACE_END
110 | 
111 | #endif // RAPIDJSON_FILESTREAM_H_
112 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/internal/ieee754.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | // 
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed 
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_IEEE754_
16 | #define RAPIDJSON_IEEE754_
17 | 
18 | #include "../rapidjson.h"
19 | 
20 | 
21 | RAPIDJSON_NAMESPACE_BEGIN
22 | namespace internal {
23 | 
24 | class Double {
25 |  public:
26 |   Double() {}
27 |   Double(double d) : d_(d) {}
28 |   Double(uint64_t u) : u_(u) {}
29 | 
30 |   double Value() const { return d_; }
31 |   uint64_t Uint64Value() const { return u_; }
32 | 
33 |   double NextPositiveDouble() const {
34 |     RAPIDJSON_ASSERT(!Sign());
35 |     return Double(u_ + 1).Value();
36 |   }
37 | 
38 |   bool Sign() const { return (u_ & kSignMask) != 0; }
39 |   uint64_t Significand() const { return u_ & kSignificandMask; }
40 |   int Exponent() const { return static_cast<int>(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); }
41 | 
42 |   bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; }
43 |   bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; }
44 |   bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; }
45 |   bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; }
46 | 
47 |   uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); }
48 |   int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; }
49 |   uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; }
50 | 
51 |   static unsigned EffectiveSignificandSize(int order) {
52 |     if (order >= -1021)
53 |       return 53;
54 |     else if (order <= -1074)
55 |       return 0;
56 |     else
57 |       return (unsigned) order + 1074;
58 |   }
59 | 
60 |  private:
61 |   static const int kSignificandSize = 52;
62 |   static const int kExponentBias = 0x3FF;
63 |   static const int kDenormalExponent = 1 - kExponentBias;
64 |   static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000);
65 |   static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
66 |   static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
67 |   static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
68 | 
69 |   union {
70 |     double d_;
71 |     uint64_t u_;
72 |   };
73 | };
74 | 
75 | } // namespace internal
76 | RAPIDJSON_NAMESPACE_END
77 | 
78 | #endif // RAPIDJSON_IEEE754_
79 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/internal/pow10.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | // 
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed 
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_POW10_
16 | #define RAPIDJSON_POW10_
17 | 
18 | #include "../rapidjson.h"
19 | 
20 | 
21 | RAPIDJSON_NAMESPACE_BEGIN
22 | namespace internal {
23 | 
24 | //! Computes integer powers of 10 in double (10.0^n).
25 | /*! This function uses lookup table for fast and accurate results.
26 |     \param n non-negative exponent. Must <= 308.
27 |     \return 10.0^n
28 | */
29 | inline double Pow10(int n) {
30 |   static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes
31 |     1e+0,
32 |     1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20,
33 |     1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
34 |     1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
35 |     1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
36 |     1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
37 |     1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119, 1e+120,
38 |     1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140,
39 |     1e+141, 1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160,
40 |     1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179, 1e+180,
41 |     1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199, 1e+200,
42 |     1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219, 1e+220,
43 |     1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240,
44 |     1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260,
45 |     1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279, 1e+280,
46 |     1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299, 1e+300,
47 |     1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307, 1e+308
48 |   };
49 |   RAPIDJSON_ASSERT(n >= 0 && n <= 308);
50 |   return e[n];
51 | }
52 | 
53 | } // namespace internal
54 | RAPIDJSON_NAMESPACE_END
55 | 
56 | #endif // RAPIDJSON_POW10_
57 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/internal/stack.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making RapidJSON available.
  2 | // 
  3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4 | //
  5 | // Licensed under the MIT License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // http://opensource.org/licenses/MIT
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed 
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef RAPIDJSON_INTERNAL_STACK_H_
 16 | #define RAPIDJSON_INTERNAL_STACK_H_
 17 | 
 18 | #include "../rapidjson.h"
 19 | #include "swap.h"
 20 | 
 21 | 
 22 | RAPIDJSON_NAMESPACE_BEGIN
 23 | namespace internal {
 24 | 
 25 | ///////////////////////////////////////////////////////////////////////////////
 26 | // Stack
 27 | 
 28 | //! A type-unsafe stack for storing different types of data.
 29 | /*! \tparam Allocator Allocator for allocating stack memory.
 30 | */
 31 | template <typename Allocator>
 32 | class Stack {
 33 |  public:
 34 |   // Optimization note: Do not allocate memory for stack_ in constructor.
 35 |   // Do it lazily when first Push() -> Expand() -> Resize().
 36 |   Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) {
 37 |     RAPIDJSON_ASSERT(stackCapacity > 0);
 38 |   }
 39 | 
 40 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
 41 |   Stack(Stack&& rhs)
 42 |     : allocator_(rhs.allocator_),
 43 |       ownAllocator_(rhs.ownAllocator_),
 44 |       stack_(rhs.stack_),
 45 |       stackTop_(rhs.stackTop_),
 46 |       stackEnd_(rhs.stackEnd_),
 47 |       initialCapacity_(rhs.initialCapacity_) {
 48 |     rhs.allocator_ = 0;
 49 |     rhs.ownAllocator_ = 0;
 50 |     rhs.stack_ = 0;
 51 |     rhs.stackTop_ = 0;
 52 |     rhs.stackEnd_ = 0;
 53 |     rhs.initialCapacity_ = 0;
 54 |   }
 55 | #endif
 56 | 
 57 |   ~Stack() {
 58 |     Destroy();
 59 |   }
 60 | 
 61 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
 62 |   Stack& operator=(Stack&& rhs) {
 63 |     if (&rhs != this) {
 64 |       Destroy();
 65 | 
 66 |       allocator_ = rhs.allocator_;
 67 |       ownAllocator_ = rhs.ownAllocator_;
 68 |       stack_ = rhs.stack_;
 69 |       stackTop_ = rhs.stackTop_;
 70 |       stackEnd_ = rhs.stackEnd_;
 71 |       initialCapacity_ = rhs.initialCapacity_;
 72 | 
 73 |       rhs.allocator_ = 0;
 74 |       rhs.ownAllocator_ = 0;
 75 |       rhs.stack_ = 0;
 76 |       rhs.stackTop_ = 0;
 77 |       rhs.stackEnd_ = 0;
 78 |       rhs.initialCapacity_ = 0;
 79 |     }
 80 |     return *this;
 81 |   }
 82 | #endif
 83 | 
 84 |   void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT {
 85 |     internal::Swap(allocator_, rhs.allocator_);
 86 |     internal::Swap(ownAllocator_, rhs.ownAllocator_);
 87 |     internal::Swap(stack_, rhs.stack_);
 88 |     internal::Swap(stackTop_, rhs.stackTop_);
 89 |     internal::Swap(stackEnd_, rhs.stackEnd_);
 90 |     internal::Swap(initialCapacity_, rhs.initialCapacity_);
 91 |   }
 92 | 
 93 |   void Clear() { stackTop_ = stack_; }
 94 | 
 95 |   void ShrinkToFit() {
 96 |     if (Empty()) {
 97 |       // If the stack is empty, completely deallocate the memory.
 98 |       Allocator::Free(stack_);
 99 |       stack_ = 0;
100 |       stackTop_ = 0;
101 |       stackEnd_ = 0;
102 |     } else
103 |       Resize(GetSize());
104 |   }
105 | 
106 |   // Optimization note: try to minimize the size of this function for force inline.
107 |   // Expansion is run very infrequently, so it is moved to another (probably non-inline) function.
108 |   template <typename T>
109 |   RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) {
110 |     // Expand the stack if needed
111 |     if (stackTop_ + sizeof(T) * count >= stackEnd_)
112 |       Expand<T>(count);
113 | 
114 |     T* ret = reinterpret_cast<T*>(stackTop_);
115 |     stackTop_ += sizeof(T) * count;
116 |     return ret;
117 |   }
118 | 
119 |   template <typename T>
120 |   T* Pop(size_t count) {
121 |     RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T));
122 |     stackTop_ -= count * sizeof(T);
123 |     return reinterpret_cast<T*>(stackTop_);
124 |   }
125 | 
126 |   template <typename T>
127 |   T* Top() {
128 |     RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
129 |     return reinterpret_cast<T*>(stackTop_ - sizeof(T));
130 |   }
131 | 
132 |   template <typename T>
133 |   T* Bottom() { return (T*) stack_; }
134 | 
135 |   bool HasAllocator() const {
136 |     return allocator_ != 0;
137 |   }
138 | 
139 |   Allocator& GetAllocator() {
140 |     RAPIDJSON_ASSERT(allocator_);
141 |     return *allocator_;
142 |   }
143 |   bool Empty() const { return stackTop_ == stack_; }
144 |   size_t GetSize() const { return static_cast<size_t>(stackTop_ - stack_); }
145 |   size_t GetCapacity() const { return static_cast<size_t>(stackEnd_ - stack_); }
146 | 
147 |  private:
148 |   template <typename T>
149 |   void Expand(size_t count) {
150 |     // Only expand the capacity if the current stack exists. Otherwise just create a stack with initial capacity.
151 |     size_t newCapacity;
152 |     if (stack_ == 0) {
153 |       if (!allocator_)
154 |         ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
155 |       newCapacity = initialCapacity_;
156 |     } else {
157 |       newCapacity = GetCapacity();
158 |       newCapacity += (newCapacity + 1) / 2;
159 |     }
160 |     size_t newSize = GetSize() + sizeof(T) * count;
161 |     if (newCapacity < newSize)
162 |       newCapacity = newSize;
163 | 
164 |     Resize(newCapacity);
165 |   }
166 | 
167 |   void Resize(size_t newCapacity) {
168 |     const size_t size = GetSize();  // Backup the current size
169 |     stack_ = (char*) allocator_->Realloc(stack_, GetCapacity(), newCapacity);
170 |     stackTop_ = stack_ + size;
171 |     stackEnd_ = stack_ + newCapacity;
172 |   }
173 | 
174 |   void Destroy() {
175 |     Allocator::Free(stack_);
176 |     RAPIDJSON_DELETE(ownAllocator_); // Only delete if it is owned by the stack
177 |   }
178 | 
179 |   // Prohibit copy constructor & assignment operator.
180 |   Stack(const Stack&);
181 |   Stack& operator=(const Stack&);
182 | 
183 |   Allocator* allocator_;
184 |   Allocator* ownAllocator_;
185 |   char* stack_;
186 |   char* stackTop_;
187 |   char* stackEnd_;
188 |   size_t initialCapacity_;
189 | };
190 | 
191 | } // namespace internal
192 | RAPIDJSON_NAMESPACE_END
193 | 
194 | #endif // RAPIDJSON_STACK_H_
195 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/internal/strfunc.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | // 
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed 
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_INTERNAL_STRFUNC_H_
16 | #define RAPIDJSON_INTERNAL_STRFUNC_H_
17 | 
18 | #include "../rapidjson.h"
19 | 
20 | 
21 | RAPIDJSON_NAMESPACE_BEGIN
22 | namespace internal {
23 | 
24 | //! Custom strlen() which works on different character types.
25 | /*! \tparam Ch Character type (e.g. char, wchar_t, short)
26 |     \param s Null-terminated input string.
27 |     \return Number of characters in the string. 
28 |     \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints.
29 | */
30 | template <typename Ch>
31 | inline SizeType StrLen(const Ch* s) {
32 |   const Ch* p = s;
33 |   while (*p) ++p;
34 |   return SizeType(p - s);
35 | }
36 | 
37 | } // namespace internal
38 | RAPIDJSON_NAMESPACE_END
39 | 
40 | #endif // RAPIDJSON_INTERNAL_STRFUNC_H_
41 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/internal/swap.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | //
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_INTERNAL_SWAP_H_
16 | #define RAPIDJSON_INTERNAL_SWAP_H_
17 | 
18 | #include "../rapidjson.h"
19 | 
20 | 
21 | RAPIDJSON_NAMESPACE_BEGIN
22 | namespace internal {
23 | 
24 | //! Custom swap() to avoid dependency on C++ <algorithm> header
25 | /*! \tparam T Type of the arguments to swap, should be instantiated with primitive C++ types only.
26 |     \note This has the same semantics as std::swap().
27 | */
28 | template <typename T>
29 | inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT {
30 |   T tmp = a;
31 |   a = b;
32 |   b = tmp;
33 | }
34 | 
35 | } // namespace internal
36 | RAPIDJSON_NAMESPACE_END
37 | 
38 | #endif // RAPIDJSON_INTERNAL_SWAP_H_
39 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/memorybuffer.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | // 
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed 
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_MEMORYBUFFER_H_
16 | #define RAPIDJSON_MEMORYBUFFER_H_
17 | 
18 | #include "rapidjson.h"
19 | #include "internal/stack.h"
20 | 
21 | 
22 | RAPIDJSON_NAMESPACE_BEGIN
23 | 
24 | //! Represents an in-memory output byte stream.
25 | /*!
26 |     This class is mainly for being wrapped by EncodedOutputStream or AutoUTFOutputStream.
27 | 
28 |     It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file.
29 | 
30 |     Differences between MemoryBuffer and StringBuffer:
31 |     1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. 
32 |     2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator.
33 | 
34 |     \tparam Allocator type for allocating memory buffer.
35 |     \note implements Stream concept
36 | */
37 | template <typename Allocator = CrtAllocator>
38 | struct GenericMemoryBuffer {
39 |   typedef char Ch; // byte
40 | 
41 |   GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
42 | 
43 |   void Put(Ch c) { *stack_.template Push<Ch>() = c; }
44 |   void Flush() {}
45 | 
46 |   void Clear() { stack_.Clear(); }
47 |   void ShrinkToFit() { stack_.ShrinkToFit(); }
48 |   Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
49 |   void Pop(size_t count) { stack_.template Pop<Ch>(count); }
50 | 
51 |   const Ch* GetBuffer() const {
52 |     return stack_.template Bottom<Ch>();
53 |   }
54 | 
55 |   size_t GetSize() const { return stack_.GetSize(); }
56 | 
57 |   static const size_t kDefaultCapacity = 256;
58 |   mutable internal::Stack<Allocator> stack_;
59 | };
60 | 
61 | typedef GenericMemoryBuffer<> MemoryBuffer;
62 | 
63 | //! Implement specialized version of PutN() with memset() for better performance.
64 | template <>
65 | inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) {
66 |   std::memset(memoryBuffer.stack_.Push<char>(n), c, n * sizeof(c));
67 | }
68 | 
69 | RAPIDJSON_NAMESPACE_END
70 | 
71 | #endif // RAPIDJSON_MEMORYBUFFER_H_
72 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/memorystream.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | // 
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed 
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_MEMORYSTREAM_H_
16 | #define RAPIDJSON_MEMORYSTREAM_H_
17 | 
18 | #include "rapidjson.h"
19 | 
20 | 
21 | RAPIDJSON_NAMESPACE_BEGIN
22 | 
23 | //! Represents an in-memory input byte stream.
24 | /*!
25 |     This class is mainly for being wrapped by EncodedInputStream or AutoUTFInputStream.
26 | 
27 |     It is similar to FileReadBuffer but the source is an in-memory buffer instead of a file.
28 | 
29 |     Differences between MemoryStream and StringStream:
30 |     1. StringStream has encoding but MemoryStream is a byte stream.
31 |     2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated. StringStream assume null-terminated string as source.
32 |     3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an encoding so it should not have Peek4().
33 |     \note implements Stream concept
34 | */
35 | struct MemoryStream {
36 |   typedef char Ch; // byte
37 | 
38 |   MemoryStream(const Ch* src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {}
39 | 
40 |   Ch Peek() const { return (src_ == end_) ? '\0' : *src_; }
41 |   Ch Take() { return (src_ == end_) ? '\0' : *src_++; }
42 |   size_t Tell() const { return static_cast<size_t>(src_ - begin_); }
43 | 
44 |   Ch* PutBegin() {
45 |     RAPIDJSON_ASSERT(false);
46 |     return 0;
47 |   }
48 |   void Put(Ch) { RAPIDJSON_ASSERT(false); }
49 |   void Flush() { RAPIDJSON_ASSERT(false); }
50 |   size_t PutEnd(Ch*) {
51 |     RAPIDJSON_ASSERT(false);
52 |     return 0;
53 |   }
54 | 
55 |   // For encoding detection only.
56 |   const Ch* Peek4() const {
57 |     return Tell() + 4 <= size_ ? src_ : 0;
58 |   }
59 | 
60 |   const Ch* src_;     //!< Current read position.
61 |   const Ch* begin_;   //!< Original head of the string.
62 |   const Ch* end_;     //!< End of stream.
63 |   size_t size_;       //!< Size of the stream.
64 | };
65 | 
66 | RAPIDJSON_NAMESPACE_END
67 | 
68 | #endif // RAPIDJSON_MEMORYBUFFER_H_
69 | 


--------------------------------------------------------------------------------
/thirdparty/rapidjson/stringbuffer.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making RapidJSON available.
 2 | // 
 3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 4 | //
 5 | // Licensed under the MIT License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // http://opensource.org/licenses/MIT
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed 
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef RAPIDJSON_STRINGBUFFER_H_
16 | #define RAPIDJSON_STRINGBUFFER_H_
17 | 
18 | #include "rapidjson.h"
19 | 
20 | 
21 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
22 | 
23 | #include <utility> // std::move
24 | 
25 | 
26 | #endif
27 | 
28 | #include "internal/stack.h"
29 | 
30 | 
31 | RAPIDJSON_NAMESPACE_BEGIN
32 | 
33 | //! Represents an in-memory output stream.
34 | /*!
35 |     \tparam Encoding Encoding of the stream.
36 |     \tparam Allocator type for allocating memory buffer.
37 |     \note implements Stream concept
38 | */
39 | template <typename Encoding, typename Allocator = CrtAllocator>
40 | class GenericStringBuffer {
41 |  public:
42 |   typedef typename Encoding::Ch Ch;
43 | 
44 |   GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
45 | 
46 | #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
47 |   GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {}
48 |   GenericStringBuffer& operator=(GenericStringBuffer&& rhs) {
49 |     if (&rhs != this)
50 |       stack_ = std::move(rhs.stack_);
51 |     return *this;
52 |   }
53 | #endif
54 | 
55 |   void Put(Ch c) { *stack_.template Push<Ch>() = c; }
56 |   void Flush() {}
57 | 
58 |   void Clear() { stack_.Clear(); }
59 |   void ShrinkToFit() {
60 |     // Push and pop a null terminator. This is safe.
61 |     *stack_.template Push<Ch>() = '\0';
62 |     stack_.ShrinkToFit();
63 |     stack_.template Pop<Ch>(1);
64 |   }
65 |   Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
66 |   void Pop(size_t count) { stack_.template Pop<Ch>(count); }
67 | 
68 |   const Ch* GetString() const {
69 |     // Push and pop a null terminator. This is safe.
70 |     *stack_.template Push<Ch>() = '\0';
71 |     stack_.template Pop<Ch>(1);
72 | 
73 |     return stack_.template Bottom<Ch>();
74 |   }
75 | 
76 |   size_t GetSize() const { return stack_.GetSize(); }
77 | 
78 |   static const size_t kDefaultCapacity = 256;
79 |   mutable internal::Stack<Allocator> stack_;
80 | 
81 |  private:
82 |   // Prohibit copy constructor & assignment operator.
83 |   GenericStringBuffer(const GenericStringBuffer&);
84 |   GenericStringBuffer& operator=(const GenericStringBuffer&);
85 | };
86 | 
87 | //! String buffer with UTF8 encoding
88 | typedef GenericStringBuffer<UTF8<> > StringBuffer;
89 | 
90 | //! Implement specialized version of PutN() with memset() for better performance.
91 | template <>
92 | inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
93 |   std::memset(stream.stack_.Push<char>(n), c, n * sizeof(c));
94 | }
95 | 
96 | RAPIDJSON_NAMESPACE_END
97 | 
98 | #endif // RAPIDJSON_STRINGBUFFER_H_
99 | 


--------------------------------------------------------------------------------
/thirdparty/rlib/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Author: Ming Zhang
2 | # Copyright (c) 2022
3 | 
4 | file(GLOB SOURCES "*.hpp")
5 | 
6 | add_library(rlib STATIC ${SOURCES})
7 | set_target_properties(rlib PROPERTIES LINKER_LANGUAGE CXX)
8 | target_link_libraries(rlib ibverbs pthread)


--------------------------------------------------------------------------------
/thirdparty/rlib/common.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cstdint>
  4 | 
  5 | #include "logging.hpp"
  6 | #include "mr.hpp"
  7 | #include "rnic.hpp"
  8 | 
  9 | 
 10 | namespace rdmaio {
 11 | 
 12 | // connection status
 13 | enum ConnStatus {
 14 |   SUCC = 0,
 15 |   TIMEOUT = 1,
 16 |   WRONG_ARG = 2,
 17 |   ERR = 3,
 18 |   NOT_READY = 4,
 19 |   UNKNOWN = 5
 20 | };
 21 | 
 22 | /**
 23 |  * The connection information exchanged between different QPs.
 24 |  * RC/UC QPs uses lid & addr to conncet to remote QPs, while qpn is used upon send requests.
 25 |  * local_node_id & port_id is used for UD QP to create addresses.
 26 |  */
 27 | struct QPAttr {
 28 |   address_t addr;
 29 |   uint16_t lid;
 30 |   uint32_t qpn;
 31 |   uint32_t psn;
 32 |   uint16_t node_id;
 33 |   uint16_t port_id;
 34 | };
 35 | 
 36 | /**
 37 |  * The QP connection requests sent to remote.
 38 |  * from_node & from_worker identifies which QP it shall connect to
 39 |  */
 40 | struct QPConnArg {
 41 |   uint16_t from_node;
 42 |   uint32_t from_worker;
 43 |   uint8_t qp_type;  // RC QP or UD QP
 44 |   QPAttr qp_attr;
 45 | };
 46 | 
 47 | /**
 48 |  * The MR connection requests sent to remote.
 49 |  */
 50 | struct MRConnArg {
 51 |   uint64_t mr_id;
 52 | };
 53 | 
 54 | struct ConnArg {
 55 |   enum {
 56 |     MR,
 57 |     QP
 58 |   } type;
 59 |   union {
 60 |     QPConnArg qp;
 61 |     MRConnArg mr;
 62 |   } payload;
 63 | };
 64 | 
 65 | struct ConnReply {
 66 |   ConnStatus ack;
 67 |   union {
 68 |     QPAttr qp;
 69 |     MemoryAttr mr;
 70 |   } payload;
 71 | };
 72 | 
 73 | inline int convert_mtu(ibv_mtu type) {
 74 |   int mtu = 0;
 75 |   switch (type) {
 76 |     case IBV_MTU_256:mtu = 256;
 77 |       break;
 78 |     case IBV_MTU_512:mtu = 512;
 79 |       break;
 80 |     case IBV_MTU_1024:mtu = 1024;
 81 |       break;
 82 |     case IBV_MTU_2048:mtu = 2048;
 83 |       break;
 84 |     case IBV_MTU_4096:mtu = 4096;
 85 |       break;
 86 |   }
 87 |   return mtu;
 88 | }
 89 | 
 90 | // The structure used to configure UDQP
 91 | typedef struct {
 92 |   int max_send_size;
 93 |   int max_recv_size;
 94 |   int qkey;
 95 |   int psn;
 96 | } UDConfig;
 97 | 
 98 | typedef struct {
 99 |   int access_flags;
100 |   int max_rd_atomic;
101 |   int max_dest_rd_atomic;
102 |   int rq_psn;
103 |   int sq_psn;
104 |   int timeout;
105 | } RCConfig;
106 | 
107 | }  // namespace rdmaio
108 | 


--------------------------------------------------------------------------------
/thirdparty/rlib/logging.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The logging utilities used in libRDMA.
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <iostream>
  8 | #include <sstream>
  9 | 
 10 | 
 11 | namespace rdmaio {
 12 | 
 13 | /**
 14 |  * \def FATAL
 15 |  *   Used for fatal and probably irrecoverable conditions
 16 |  * \def ERROR
 17 |  *   Used for errors which are recoverable within the scope of the function
 18 |  * \def WARNING
 19 |  *   Logs interesting conditions which are probably not fatal
 20 |  * \def EMPH
 21 |  *   Outputs as INFO, but in WARNING colors. Useful for
 22 |  *   outputting information you want to emphasize.
 23 |  * \def INFO
 24 |  *   Used for providing general useful information
 25 |  * \def DEBUG
 26 |  *   Debugging purposes only
 27 |  * \def EVERYTHING
 28 |  *   Log everything
 29 |  */
 30 | 
 31 | enum loglevel {
 32 |   NONE = 7,
 33 |   FATAL = 6,
 34 |   ERROR = 5,
 35 |   WARNING = 4,
 36 |   EMPH = 3,
 37 |   INFO = 2,
 38 |   DBG = 1,
 39 |   EVERYTHING = 0
 40 | };
 41 | 
 42 | #define unlikely(x) __builtin_expect(!!(x), 0)
 43 | 
 44 | #ifndef RDMA_LOG_LEVEL
 45 | #define RDMA_LOG_LEVEL ::rdmaio::DBG
 46 | #endif
 47 | 
 48 | // logging macro definiations
 49 | // default log
 50 | #define RDMA_LOG(n)        \
 51 |   if (n >= RDMA_LOG_LEVEL) \
 52 |   ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream()
 53 | 
 54 | // #define RDMA_LOG(n)        \
 55 | //   if (n != ::rdmaio::INFO && n >= RDMA_LOG_LEVEL) \
 56 | //   ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream()
 57 | 
 58 | // #define RDMA_LOG(n)        \
 59 | //   if (false) \
 60 | //   ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream()
 61 | 
 62 | 
 63 | // log with tag
 64 | #define RDMA_TLOG(n, t)                                           \
 65 |   if (n >= RDMA_LOG_LEVEL)                                        \
 66 |   ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream() \
 67 |       << "[" << (t) << "]"
 68 | 
 69 | #define RDMA_LOG_IF(n, condition)         \
 70 |   if (n >= RDMA_LOG_LEVEL && (condition)) \
 71 |   ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, n).stream()
 72 | 
 73 | #define RDMA_ASSERT(condition) \
 74 |   if (unlikely(!(condition)))  \
 75 |   ::rdmaio::MessageLogger((char *)__FILE__, __LINE__, ::rdmaio::FATAL + 1).stream() << "Assertion! "
 76 | 
 77 | #define RDMA_VERIFY(n, condition) RDMA_LOG_IF(n, (!(condition)))
 78 | 
 79 | class MessageLogger {
 80 |  public:
 81 |   MessageLogger(const char* file, int line, int level) : level_(level) {
 82 |     if (level_ < RDMA_LOG_LEVEL)
 83 |       return;
 84 |     stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] ";
 85 |   }
 86 | 
 87 |   ~MessageLogger() {
 88 |     if (level_ >= RDMA_LOG_LEVEL) {
 89 |       stream_ << "\n";
 90 |       std::cout << "\033[" << RDMA_DEBUG_LEVEL_COLOR[std::min(level_, 6)] << "m"
 91 |                 << stream_.str() << EndcolorFlag();
 92 |       if (level_ >= ::rdmaio::FATAL)
 93 |         abort();
 94 |     }
 95 |   }
 96 | 
 97 |   // Return the stream associated with the logger object.
 98 |   std::stringstream& stream() { return stream_; }
 99 | 
100 |  private:
101 |   std::stringstream stream_;
102 |   int level_;
103 | 
104 |   // control flags for color
105 | #define R_BLACK 39
106 | #define R_RED 31
107 | #define R_GREEN 32
108 | #define R_YELLOW 33
109 | #define R_BLUE 34
110 | #define R_MAGENTA 35
111 | #define R_CYAN 36
112 | #define R_WHITE 37
113 | 
114 |   const int RDMA_DEBUG_LEVEL_COLOR[7] = {R_BLACK, R_BLACK, R_YELLOW, R_GREEN, R_MAGENTA, R_RED, R_RED};
115 | 
116 |   static std::string StripBasename(const std::string& full_path) {
117 |     const char kSeparator = '/';
118 |     size_t pos = full_path.rfind(kSeparator);
119 |     if (pos != std::string::npos) {
120 |       return full_path.substr(pos + 1, std::string::npos);
121 |     } else {
122 |       return full_path;
123 |     }
124 |   }
125 | 
126 |   static std::string EndcolorFlag() {
127 |     char flag[7];
128 |     snprintf(flag, 7, "%c[0m", 0x1B);
129 |     return std::string(flag);
130 |   }
131 | };
132 | 
133 | };  // namespace rdmaio
134 | 


--------------------------------------------------------------------------------
/thirdparty/rlib/mr.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <infiniband/verbs.h>
 4 | 
 5 | #include "logging.hpp"
 6 | 
 7 | 
 8 | namespace rdmaio {
 9 | 
10 | struct MemoryAttr {
11 |   uintptr_t buf;
12 |   uint32_t key;
13 | };
14 | 
15 | class Memory {
16 |  public:
17 |   /**
18 |      * The default protection flag of a memory region.
19 |      * In default, the memory can be read/write by local and remote RNIC operations.
20 |      */
21 |   static const int DEFAULT_PROTECTION_FLAG = (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ |
22 |     IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC);
23 | 
24 |   Memory(const char* addr, uint64_t len, ibv_pd* pd, int flag) : addr(addr),
25 |                                                                  len(len),
26 |                                                                  mr(ibv_reg_mr(pd, (void*) addr, len, flag)) {
27 |     if (mr == nullptr) {
28 |       RDMA_LOG(WARNING) << "failed to register local_mr, for addr " << addr << "; len " << len;
29 |     } else {
30 |       rattr.buf = (uintptr_t) addr;
31 |       rattr.key = mr->rkey;
32 |     }
33 |   }
34 | 
35 |   ~Memory() {
36 |     if (mr != nullptr) {
37 |       int rc = ibv_dereg_mr(mr);
38 |       RDMA_LOG_IF(ERROR, rc != 0) << "dereg local_mr error: " << strerror(errno);
39 |     }
40 |   }
41 | 
42 |   bool valid() {
43 |     return mr != nullptr;
44 |   }
45 | 
46 |   const char* addr;
47 |   uint64_t len;
48 | 
49 |   MemoryAttr rattr;      // RDMA registered attr
50 |   ibv_mr* mr = nullptr;  // local_mr in the driver
51 | };
52 | 
53 | };  // namespace rdmaio
54 | 


--------------------------------------------------------------------------------
/thirdparty/rlib/msg_interface.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <functional>
  4 | #include <set>
  5 | #include <string>
  6 | 
  7 | #include "common.hpp"
  8 | 
  9 | 
 10 | namespace rdmaio {
 11 | 
 12 | typedef std::function<void(const char*, int, int)> msg_callback_t_;
 13 | 
 14 | /**
 15 |  * An abstract message interface
 16 |  * Assumption: one per thread
 17 |  */
 18 | class MsgAdapter {
 19 |  public:
 20 |   MsgAdapter(msg_callback_t_ callback)
 21 |     : callback_(callback) {
 22 |   }
 23 | 
 24 |   MsgAdapter() {
 25 |   }
 26 | 
 27 |   void set_callback(msg_callback_t_ callback) {
 28 |     callback_ = callback;
 29 |   }
 30 | 
 31 |   virtual ConnStatus connect(std::string ip, int port) = 0;
 32 | 
 33 |   /**
 34 |    * Basic send interfaces
 35 |    */
 36 |   virtual ConnStatus send_to(int node_id, const char* msg, int len) = 0;
 37 | 
 38 |   virtual ConnStatus send_to(int node_id, int tid, const char* msg, int len) {
 39 |     return send_to(node_id, msg, len);
 40 |   }
 41 | 
 42 |   /**
 43 |    * Interfaces which allow batching at the sender's side
 44 |    */
 45 |   virtual void prepare_pending() {
 46 |   }
 47 | 
 48 |   virtual ConnStatus send_pending(int node_id, const char* msg, int len) {
 49 |     RDMA_ASSERT(false);  // not implemented
 50 |   }
 51 | 
 52 |   virtual ConnStatus send_pending(int node_id, int tid, const char* msg, int len) {
 53 |     return send_pending(node_id, msg, len);
 54 |   }
 55 | 
 56 |   /**
 57 |    * Flush all the currently pended message
 58 |    */
 59 |   virtual ConnStatus flush_pending() {
 60 |     return SUCC;
 61 |   }
 62 | 
 63 |   /**
 64 |    * Examples to use batching at the sender side
 65 |    * Broadcast the message to a set of servers
 66 |    */
 67 |   virtual ConnStatus broadcast_to(const std::set<int>& nodes, const char* msg, int len) {
 68 |     prepare_pending();
 69 |     for (auto it = nodes.begin(); it != nodes.end(); ++it) {
 70 |       send_pending(*it, msg, len);
 71 |     }
 72 |     flush_pending();
 73 |     return SUCC;  // TODO
 74 |   }
 75 | 
 76 |   virtual ConnStatus broadcast_to(int* nodes, int num, const char* msg, int len) {
 77 |     prepare_pending();
 78 |     for (int i = 0; i < num; ++i) {
 79 |       send_pending(nodes[i], msg, len);
 80 |     }
 81 |     flush_pending();
 82 |     return SUCC;  // TODO
 83 |   }
 84 | 
 85 |   /**
 86 |    * The receive function
 87 |    */
 88 |   virtual void poll_comps() = 0;
 89 | 
 90 |   /**
 91 |    * The size of meta value used by the MsgAdapter for each message
 92 |    */
 93 |   virtual int msg_meta_len() {
 94 |     return 0;
 95 |   }
 96 | 
 97 |  protected:
 98 |   msg_callback_t_ callback_;
 99 | };
100 | 
101 | };  // namespace rdmaio
102 | 


--------------------------------------------------------------------------------
/thirdparty/rlib/pre_connector.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <arpa/inet.h>
  4 | #include <errno.h>
  5 | #include <fcntl.h>
  6 | #include <netdb.h>  //hostent
  7 | #include <string.h>
  8 | #include <sys/time.h>
  9 | #include <unistd.h>
 10 | 
 11 | #include <map>
 12 | 
 13 | #include "logging.hpp"
 14 | 
 15 | 
 16 | namespace rdmaio {
 17 | 
 18 | constexpr struct timeval default_timeout = {0, 8000};
 19 | constexpr struct timeval no_timeout = {0, 0};  // it means forever
 20 | 
 21 | inline __attribute__((always_inline))  // inline to avoid multiple-definiations
 22 | int64_t
 23 | diff_time(const struct timeval& end, const struct timeval& start) {
 24 |   int64_t diff = (end.tv_sec > start.tv_sec) ? (end.tv_sec - start.tv_sec) * 1000 : 0;
 25 |   if (end.tv_usec > start.tv_usec) {
 26 |     diff += (end.tv_usec - start.tv_usec);
 27 |   } else {
 28 |     diff -= (start.tv_usec - end.tv_usec);
 29 |   }
 30 |   return diff;
 31 | }
 32 | 
 33 | class PreConnector {  // helper class used to exchange QP information using TCP/IP
 34 |  public:
 35 |   static int get_listen_socket(const std::string& addr, int port) {
 36 |     struct sockaddr_in serv_addr;
 37 |     auto sockfd = socket(AF_INET, SOCK_STREAM, 0);
 38 |     RDMA_ASSERT(sockfd >= 0) << "ERROR opening listen socket: " << strerror(errno);
 39 | 
 40 |     /* setup the host_addr structure for use in bind call */
 41 |     // server byte order
 42 |     serv_addr.sin_family = AF_INET;
 43 | 
 44 |     serv_addr.sin_addr.s_addr = INADDR_ANY;
 45 | 
 46 |     // port
 47 |     serv_addr.sin_port = htons(port);
 48 |     int on = 1;
 49 |     setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
 50 |     RDMA_ASSERT(bind(sockfd, (struct sockaddr*) &serv_addr,
 51 |                      sizeof(serv_addr)) == 0)
 52 |       << "ERROR on binding: " << strerror(errno);
 53 |     return sockfd;
 54 |   }
 55 | 
 56 |   static int get_send_socket(const std::string& addr, int port, struct timeval timeout = default_timeout) {
 57 |     int sockfd;
 58 |     struct sockaddr_in serv_addr;
 59 | 
 60 |     RDMA_ASSERT((sockfd = socket(AF_INET, SOCK_STREAM, 0)) >= 0) << "Error open socket for send!";
 61 |     fcntl(sockfd, F_SETFL, O_NONBLOCK);
 62 | 
 63 |     serv_addr.sin_family = AF_INET;
 64 |     serv_addr.sin_port = htons(port);
 65 | 
 66 |     auto ip = host_to_ip(addr);
 67 |     if (ip == "") {
 68 |       close(sockfd);
 69 |       return -1;
 70 |     }
 71 | 
 72 |     serv_addr.sin_addr.s_addr = inet_addr(ip.c_str());
 73 | 
 74 |     if (connect(sockfd, (struct sockaddr*) &serv_addr, sizeof(serv_addr)) == -1) {
 75 |       if (errno == EINPROGRESS) {
 76 |         goto PROGRESS;
 77 |       }
 78 |       close(sockfd);
 79 |       return -1;
 80 |     }
 81 |     PROGRESS:
 82 |     // check return status
 83 |     fd_set fdset;
 84 |     FD_ZERO(&fdset);
 85 |     FD_SET(sockfd, &fdset);
 86 | 
 87 |     if (select(sockfd + 1, NULL, &fdset, NULL, &timeout) == 1) {
 88 |       int so_error;
 89 |       socklen_t len = sizeof so_error;
 90 | 
 91 |       getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &so_error, &len);
 92 | 
 93 |       if (so_error == 0) {
 94 |         // success
 95 |       } else {
 96 |         close(sockfd);
 97 |         return -1;
 98 |       }
 99 |     }
100 | 
101 |     return sockfd;
102 |   }
103 | 
104 |   // timeout in microsend
105 |   static bool wait_recv(int socket, uint32_t timeout = 2000) {
106 |     while (true) {
107 |       fd_set rfds;
108 |       FD_ZERO(&rfds);
109 |       FD_SET(socket, &rfds);
110 | 
111 |       struct timeval s_timeout = {0, timeout};
112 |       int ready = select(socket + 1, &rfds, NULL, NULL, &s_timeout);
113 |       RDMA_ASSERT(ready != -1);
114 | 
115 |       if (ready == 0) {  // no file descriptor found
116 |         continue;
117 |       }
118 | 
119 |       if (ready < 0) {  // error case
120 |         RDMA_ASSERT(false) << "select error " << strerror(errno);
121 |       }
122 | 
123 |       if (FD_ISSET(socket, &rfds)) {
124 |         break;  // ready
125 |       }
126 |     }
127 |     return true;
128 |   }
129 | 
130 |   static void wait_close(int socket) {
131 |     shutdown(socket, SHUT_WR);
132 |     char buf[2];
133 | 
134 |     struct timeval timeout = {1, 0};
135 |     auto ret = setsockopt(socket, SOL_SOCKET, SO_RCVTIMEO, (const char*) &timeout, sizeof(timeout));
136 |     RDMA_ASSERT(ret == 0);
137 | 
138 |     recv(socket, buf, 2, 0);
139 |     close(socket);
140 |   }
141 | 
142 |   static int send_to(int fd, char* usrbuf, size_t n) {
143 |     size_t nleft = n;
144 |     ssize_t nwritten;
145 |     char* bufp = usrbuf;
146 | 
147 |     while (nleft > 0) {
148 |       if ((nwritten = write(fd, bufp, nleft)) <= 0) {
149 |         if (errno == EINTR) /* Interrupted by sig handler return */
150 |           nwritten = 0;     /* and call write() again */
151 |         else
152 |           return -1; /* errno set by write() */
153 |       }
154 |       nleft -= nwritten;
155 |       bufp += nwritten;
156 |     }
157 |     return n;
158 |   }
159 | 
160 |   typedef std::map<std::string, std::string> ipmap_t;
161 |   static ipmap_t& local_ip_cache() {
162 |     static __thread ipmap_t cache;
163 |     return cache;
164 |   }
165 | 
166 |   static std::string host_to_ip(const std::string& host) {
167 |     ipmap_t cache = local_ip_cache();
168 |     if (cache.find(host) != cache.end())
169 |       return cache[host];
170 | 
171 |     std::string res = "";
172 | 
173 |     struct addrinfo hints, * infoptr;
174 |     memset(&hints, 0, sizeof hints);
175 |     hints.ai_family = AF_INET;  // AF_INET means IPv4 only addresses
176 | 
177 |     int result = getaddrinfo(host.c_str(), NULL, &hints, &infoptr);
178 |     if (result) {
179 |       fprintf(stderr, "getaddrinfo: %s at %s\n", gai_strerror(result), host.c_str());
180 |       return "";
181 |     }
182 |     char ip[64];
183 |     memset(ip, 0, sizeof(ip));
184 | 
185 |     for (struct addrinfo* p = infoptr; p != NULL; p = p->ai_next) {
186 |       getnameinfo(p->ai_addr, p->ai_addrlen, ip, sizeof(ip), NULL, 0, NI_NUMERICHOST);
187 |     }
188 | 
189 |     res = std::string(ip);
190 |     if (res != "")
191 |       cache.insert(std::make_pair(host, res));
192 |     return res;
193 |   }
194 | };
195 | 
196 | };  // namespace rdmaio
197 | 


--------------------------------------------------------------------------------
/thirdparty/rlib/rdma_ctrl.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <functional>  // add this to pass compile
  4 | #include <memory>
  5 | 
  6 | #include "qp.hpp"
  7 | 
  8 | 
  9 | namespace rdmaio {
 10 | 
 11 | const int MAX_SERVER_SUPPORTED = 16;
 12 | typedef RUDQP<default_ud_config, MAX_SERVER_SUPPORTED> UDQP;
 13 | typedef RRCQP<default_rc_config> RCQP;
 14 | 
 15 | typedef std::function<void(const QPConnArg&)> connection_callback_t;
 16 | 
 17 | class RdmaCtrl {
 18 |  public:
 19 |   typedef struct {
 20 |     int dev_id;
 21 |     int port_id;
 22 |   } DevIdx;
 23 | 
 24 |   RdmaCtrl(
 25 |     int node_id, int tcp_base_port,
 26 |     connection_callback_t callback = [](const QPConnArg&) {
 27 |       // the default callback does nothing
 28 |     },
 29 |     std::string ip = "localhost");
 30 | 
 31 |   ~RdmaCtrl();
 32 | 
 33 |   int current_node_id();
 34 |   int listening_port();
 35 | 
 36 |   /**
 37 |      * Query devices info on this machine,
 38 |      * if there is a previous call, return previous results unless clear_dev_info has been called
 39 |      */
 40 |   std::vector<RNicInfo> query_devs();
 41 | 
 42 |   static std::vector<RNicInfo> query_devs_helper();
 43 | 
 44 |   // clear the cached infos by RdmaCtrl;
 45 |   void clear_dev_info();
 46 | 
 47 |   /**
 48 |      * Open device handlers.
 49 |      * RdmaCtrl opens a device for each thread.
 50 |      * The get_device returns previously opened device of this thread, if it is already opened
 51 |      */
 52 |   RNicHandler* open_thread_local_device(DevIdx idx);
 53 | 
 54 |   RNicHandler* open_device(DevIdx idx);
 55 | 
 56 |   RNicHandler* get_device();
 57 | 
 58 |   /**
 59 |      * The *callback* is called once a QP connection request is sent to this server
 60 |      */
 61 |   void register_qp_callback(connection_callback_t callback);
 62 | 
 63 |   void close_device();
 64 | 
 65 |   void close_device(RNicHandler*);
 66 | 
 67 |   /**
 68 |      * Each RDMA NIC has multiple ports, so we use two-dimeson index to locate the target port.
 69 |      * convert_port_idx provides a way to translate the one-dimeson index to the two-dimeson
 70 |      */
 71 |   DevIdx convert_port_idx(int idx);
 72 | 
 73 |   /**
 74 |      * Register memory to a specific RNIC handler
 75 |      */
 76 |   bool register_memory(int id, const char* buf, uint64_t size, RNicHandler* rnic,
 77 |                        int flag = Memory::DEFAULT_PROTECTION_FLAG);
 78 | 
 79 |   /**
 80 |      * Get the local registered memory
 81 |      * undefined if local_mr_id has been registered
 82 |      */
 83 |   MemoryAttr get_local_mr(int mr_id);
 84 | 
 85 |   /**
 86 |      * Return an arbitrary registered MR
 87 |      * return -1 if no MR is registered to RdmaCtrl
 88 |      * return the first local_mr index, if found one
 89 |      */
 90 |   int get_default_mr(MemoryAttr& attr);
 91 | 
 92 |   /**
 93 |      * Create and query QPs
 94 |      * For create, an optional local_attr can be provided to bind to this QP
 95 |      * A local MR is passed as the default local local_mr for this QP.
 96 |      * If local_attr = nullptr, then this QP is unbind to any MR.
 97 |      */
 98 |   RCQP* create_rc_qp(QPIdx idx, RNicHandler* dev, MemoryAttr* local_attr = NULL);
 99 |   UDQP* create_ud_qp(QPIdx idx, RNicHandler* dev, MemoryAttr* local_attr = NULL);
100 | 
101 |   void destroy_rc_qp();
102 | 
103 |   RCQP* get_rc_qp(QPIdx idx);
104 |   UDQP* get_ud_qp(QPIdx idx);
105 | 
106 |   /**
107 |      * Some helper functions (example usage of RdmaCtrl)
108 |      * Fully link the QP in a symmetric way, for this thread.
109 |      * For example, node 0 can connect to node 1, while node 1 connect to node 0.
110 |      */
111 |   bool link_symmetric_rcqps(const std::vector<std::string>& cluster,
112 |                             int l_mrid, int mr_id, int wid, int idx = 0);
113 | 
114 |  private:
115 |   class RdmaCtrlImpl;
116 | 
117 |   std::unique_ptr<RdmaCtrlImpl> impl_;
118 | };
119 | 
120 | using RdmaCtrlPtr = std::shared_ptr<RdmaCtrl>;
121 | 
122 | }  // namespace rdmaio
123 | 
124 | #include "rdma_ctrl_impl.hpp"  // real implemeatation here
125 | 


--------------------------------------------------------------------------------
/thirdparty/rlib/rnic.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <infiniband/verbs.h>
  4 | 
  5 | #include <vector>
  6 | 
  7 | #include "logging.hpp"
  8 | 
  9 | 
 10 | namespace rdmaio {
 11 | 
 12 | // The name of the particular port on the RNIC.
 13 | typedef struct {
 14 |   uint64_t subnet_prefix;
 15 |   uint64_t interface_id;
 16 |   uint32_t local_id;
 17 | } address_t;
 18 | 
 19 | struct RNicInfo {
 20 |   typedef struct {
 21 |     uint port_id;
 22 |     std::string link_layer;
 23 |   } PortInfo;
 24 | 
 25 |   RNicInfo(const char* name, int id, ibv_context* ctx) : dev_id(id),
 26 |                                                          dev_name(name) {
 27 |     query_port_infos(ctx);
 28 |     query_active_gids(ctx);
 29 |   }
 30 | 
 31 |   bool query_dev_attribute(ibv_context* ctx, ibv_device_attr& attr) {
 32 |     int rc = ibv_query_device(ctx, &attr);
 33 |     if (rc != 0) {
 34 |       RDMA_LOG(ERROR) << "query device attribute error: " << strerror(errno);
 35 |       return false;
 36 |     }
 37 |     return true;
 38 |   }
 39 | 
 40 |   // fill in the active_ports
 41 |   void query_port_infos(ibv_context* ctx) {
 42 |     ibv_device_attr attr;
 43 |     if (!query_dev_attribute(ctx, attr))
 44 |       return;
 45 | 
 46 |     // query port info
 47 |     for (uint port_id = 1; port_id <= attr.phys_port_cnt; ++port_id) {
 48 |       struct ibv_port_attr port_attr;
 49 |       int rc = ibv_query_port(ctx, port_id, &port_attr);
 50 |       if (rc != 0) {
 51 |         RDMA_LOG(ERROR) << "query port_id " << port_id << " on device " << dev_id << "error.";
 52 |         continue;
 53 |       }
 54 | 
 55 |       // check port status
 56 |       if (port_attr.phys_state != IBV_PORT_ACTIVE && port_attr.phys_state != IBV_PORT_ACTIVE_DEFER) {
 57 |         RDMA_LOG(WARNING) << "query port_id " << port_id << " on device " << dev_id << " not active.";
 58 |         continue;
 59 |       }
 60 | 
 61 |       std::string link_layer = "";
 62 |       switch (port_attr.link_layer) {
 63 |         case IBV_LINK_LAYER_ETHERNET:link_layer = "RoCE";
 64 |           break;
 65 |         case IBV_LINK_LAYER_INFINIBAND:link_layer = "Infiniband";
 66 |           break;
 67 |         default:RDMA_LOG(WARNING) << "unknown link layer at this port: " << port_attr.link_layer;
 68 |           link_layer = "Unknown";
 69 |       };
 70 |       active_ports.push_back({port_id, link_layer});
 71 |     }
 72 |   }
 73 | 
 74 |   /**
 75 |    * I assume that the active gid is the same in the RNIC
 76 |    */
 77 |   void query_active_gids(ibv_context* ctx) {
 78 |     if (active_ports.size() == 0)
 79 |       return;
 80 | 
 81 |     int port_id = active_ports[0].port_id;
 82 |     struct ibv_port_attr port_attr;
 83 |     int rc = ibv_query_port(ctx, port_id, &port_attr);
 84 | 
 85 |     if (rc != 0) {
 86 |       RDMA_LOG(WARNING) << "query port attribute at dev " << dev_name << ",port " << port_id
 87 |                         << "; w error: " << strerror(errno);
 88 |       return;
 89 |     }
 90 | 
 91 |     for (uint i = 0; i < port_attr.gid_tbl_len; ++i) {
 92 |       ibv_gid gid = {};
 93 |       auto rc = ibv_query_gid(ctx, port_id, i, &gid);
 94 |       if (gid.global.interface_id) {
 95 |         active_gids.push_back(i);
 96 |       }
 97 |     }
 98 |   }
 99 | 
100 |   void print() const {
101 |     RDMA_LOG(3) << to_string();
102 |   }
103 | 
104 |   std::string to_string() const {
105 |     std::ostringstream oss;
106 | 
107 |     oss << "device " << dev_name << " has " << active_ports.size() << " active ports.";
108 |     for (auto i : active_ports) {
109 |       oss << "port " << i.port_id << " w link layer " << i.link_layer << ".";
110 |     }
111 |     for (uint i = 0; i < active_gids.size(); ++i) {
112 |       oss << "active gid: " << active_gids[i] << ".";
113 |     }
114 |     return oss.str();
115 |   }
116 | 
117 |   // members
118 |   int dev_id;
119 |   std::string dev_name;
120 |   std::vector<PortInfo> active_ports;
121 |   std::vector<int> active_gids;
122 | };
123 | 
124 | class RdmaCtrl;
125 | 
126 | struct RNicHandler {
127 |   RNicHandler(int dev_id, int port_id, ibv_context* ctx, ibv_pd* pd, int lid, int gid = 0) : dev_id(dev_id),
128 |                                                                                              port_id(port_id),
129 |                                                                                              ctx(ctx),
130 |                                                                                              pd(pd),
131 |                                                                                              lid(lid),
132 |                                                                                              gid(gid) {
133 |   }
134 | 
135 |   address_t query_addr() {
136 |     return query_addr(gid);
137 |   }
138 | 
139 |   address_t query_addr(uint8_t gid_index) {
140 |     ibv_gid gid;
141 |     ibv_query_gid(ctx, port_id, gid_index, &gid);
142 | 
143 |     address_t addr{
144 |       .subnet_prefix = gid.global.subnet_prefix,
145 |       .interface_id = gid.global.interface_id,
146 |       .local_id = gid_index};
147 |     return addr;
148 |   }
149 | 
150 |   friend class RdmaCtrl;
151 | 
152 |   ~RNicHandler() {
153 |     // delete ctx & pd
154 |     RDMA_VERIFY(INFO, ibv_close_device(ctx) == 0) << "failed to close device " << dev_id;
155 |     RDMA_VERIFY(INFO, ibv_dealloc_pd(pd) == 0) << "failed to dealloc pd at device " << dev_id
156 |                                                << "; w error " << strerror(errno);
157 |   }
158 | 
159 |  public:
160 |   uint16_t dev_id;   // which RNIC
161 |   uint16_t port_id;  // which port
162 | 
163 |   struct ibv_context* ctx;
164 |   struct ibv_pd* pd;
165 |   uint16_t lid;
166 |   uint16_t gid;
167 | };
168 | 
169 | }  // namespace rdmaio
170 | 


--------------------------------------------------------------------------------
/workload/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | add_subdirectory(tpcc)
 5 | 
 6 | add_subdirectory(tatp)
 7 | 
 8 | add_subdirectory(smallbank)
 9 | 
10 | add_subdirectory(micro)


--------------------------------------------------------------------------------
/workload/config/table_type.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | // Global table identifier in a single machine (type: table_id_t)
 7 | #define TABLE_TATP 0
 8 | #define TABLE_TPCC 0
 9 | #define TABLE_SMALLBANK 0
10 | #define TABLE_MICRO 0


--------------------------------------------------------------------------------
/workload/micro/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | set(DB_SOURCES micro_db.cc)
 5 | set(TXN_SOURCES micro_txn.cc)
 6 | 
 7 | add_library(micro_db STATIC ${DB_SOURCES})
 8 | add_library(micro_txn STATIC ${TXN_SOURCES})
 9 | 
10 | set_target_properties(micro_db PROPERTIES LINKER_LANGUAGE CXX)
11 | set_target_properties(micro_txn PROPERTIES LINKER_LANGUAGE CXX)
12 | 
13 | target_link_libraries(micro_txn ford)


--------------------------------------------------------------------------------
/workload/micro/micro_db.cc:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #include "micro/micro_db.h"
 5 | #include "unistd.h"
 6 | #include "util/json_config.h"
 7 | 
 8 | /* Called by main. Only initialize here. The worker threads will populate. */
 9 | void MICRO::LoadTable(node_id_t node_id,
10 |                       node_id_t num_server,
11 |                       MemStoreAllocParam* mem_store_alloc_param,
12 |                       MemStoreReserveParam* mem_store_reserve_param) {
13 |   // Initiate + Populate table for primary role
14 |   if ((node_id_t)MicroTableType::kMicroTable % num_server == node_id) {
15 |     printf("Primary: Initializing MICRO table\n");
16 |     std::string config_filepath = "../../../workload/micro/micro_tables/micro.json";
17 |     auto json_config = JsonConfig::load_file(config_filepath);
18 |     auto table_config = json_config.get("table");
19 |     micro_table = new HashStore((table_id_t)MicroTableType::kMicroTable,
20 |                                 table_config.get("bkt_num").get_uint64(),
21 |                                 mem_store_alloc_param);
22 |     PopulateMicroTable(mem_store_reserve_param);
23 |     primary_table_ptrs.push_back(micro_table);
24 |   }
25 | 
26 |   // Initiate + Populate table for backup role
27 |   if (BACKUP_DEGREE < num_server) {
28 |     for (node_id_t i = 1; i <= BACKUP_DEGREE; i++) {
29 |       if ((node_id_t)MicroTableType::kMicroTable % num_server == (node_id - i + num_server) % num_server) {
30 |         printf("Backup: Initializing MICRO table\n");
31 |         std::string config_filepath = "../../../workload/micro/micro_tables/micro.json";
32 |         auto json_config = JsonConfig::load_file(config_filepath);
33 |         auto table_config = json_config.get("table");
34 |         micro_table = new HashStore((table_id_t)MicroTableType::kMicroTable,
35 |                                     table_config.get("bkt_num").get_uint64(),
36 |                                     mem_store_alloc_param);
37 |         PopulateMicroTable(mem_store_reserve_param);
38 |         backup_table_ptrs.push_back(micro_table);
39 |       }
40 |     }
41 |   }
42 | }
43 | 
44 | void MICRO::PopulateMicroTable(MemStoreReserveParam* mem_store_reserve_param) {
45 |   /* All threads must execute the loop below deterministically */
46 |   RDMA_LOG(DBG) << "NUM KEYS TOTAL: " << num_keys_global;
47 |   /* Populate the tables */
48 |   for (uint64_t id = 0; id < num_keys_global; id++) {
49 |     micro_key_t micro_key;
50 |     micro_key.micro_id = (uint64_t)id;
51 | 
52 |     micro_val_t micro_val;
53 |     for (int i = 0; i < 5; i++) {
54 |       micro_val.magic[i] = micro_magic + i;
55 |     }
56 | 
57 |     LoadRecord(micro_table, micro_key.item_key,
58 |                (void*)&micro_val, sizeof(micro_val_t),
59 |                (table_id_t)MicroTableType::kMicroTable,
60 |                mem_store_reserve_param);
61 |   }
62 | }
63 | 
64 | int MICRO::LoadRecord(HashStore* table,
65 |                       itemkey_t item_key,
66 |                       void* val_ptr,
67 |                       size_t val_size,
68 |                       table_id_t table_id,
69 |                       MemStoreReserveParam* mem_store_reserve_param) {
70 |   assert(val_size <= MAX_ITEM_SIZE);
71 |   /* Insert into HashStore */
72 |   DataItem item_to_be_inserted(table_id, val_size, item_key, (uint8_t*)val_ptr);
73 |   DataItem* inserted_item = table->LocalInsert(item_key, item_to_be_inserted, mem_store_reserve_param);
74 |   inserted_item->remote_offset = table->GetItemRemoteOffset(inserted_item);
75 |   return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/workload/micro/micro_db.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #pragma once
  5 | 
  6 | #include <cassert>
  7 | #include <cstdint>
  8 | #include <vector>
  9 | 
 10 | #include "config/table_type.h"
 11 | #include "memstore/hash_store.h"
 12 | #include "util/fast_random.h"
 13 | #include "util/json_config.h"
 14 | 
 15 | union micro_key_t {
 16 |   uint64_t micro_id;
 17 |   uint64_t item_key;
 18 | 
 19 |   micro_key_t() {
 20 |     item_key = 0;
 21 |   }
 22 | };
 23 | 
 24 | static_assert(sizeof(micro_key_t) == sizeof(uint64_t), "");
 25 | 
 26 | struct micro_val_t {
 27 |   // 40 bytes, consistent with FaSST
 28 |   uint64_t magic[5];
 29 | };
 30 | static_assert(sizeof(micro_val_t) == 40, "");
 31 | 
 32 | // Magic numbers for debugging. These are unused in the spec.
 33 | #define Micro_MAGIC 97 /* Some magic number <= 255 */
 34 | #define micro_magic (Micro_MAGIC)
 35 | 
 36 | // Helpers for generating workload
 37 | enum class MicroTxType : int {
 38 |   kLockContention,
 39 | };
 40 | 
 41 | // Table id
 42 | enum class MicroTableType : uint64_t {
 43 |   kMicroTable = TABLE_MICRO,
 44 | };
 45 | 
 46 | static ALWAYS_INLINE 
 47 | uint64_t align_pow2(uint64_t v) {
 48 |   v--;
 49 |   v |= v >> 1;
 50 |   v |= v >> 2;
 51 |   v |= v >> 4;
 52 |   v |= v >> 8;
 53 |   v |= v >> 16;
 54 |   v |= v >> 32;
 55 |   return v + 1;
 56 | }
 57 | 
 58 | class MICRO {
 59 |  public:
 60 |   std::string bench_name;
 61 |   
 62 |   uint64_t num_keys_global;
 63 | 
 64 |   /* Tables */
 65 |   HashStore* micro_table;
 66 | 
 67 |   std::vector<HashStore*> primary_table_ptrs;
 68 |   
 69 |   std::vector<HashStore*> backup_table_ptrs;
 70 | 
 71 |   // For server usage: Provide interfaces to servers for loading tables
 72 |   // Also for client usage: Provide interfaces to clients for generating ids during tests
 73 |   MICRO() {
 74 |     bench_name = "MICRO";
 75 |     std::string config_filepath = "../../../config/micro_config.json";
 76 |     auto json_config = JsonConfig::load_file(config_filepath);
 77 |     auto conf = json_config.get("micro");
 78 |     auto num_keys = conf.get("num_keys").get_int64();
 79 |     num_keys_global = align_pow2(num_keys);
 80 |     micro_table = nullptr;
 81 |   }
 82 | 
 83 |   ~MICRO() {
 84 |     if (micro_table) delete micro_table;
 85 |   }
 86 | 
 87 |   void LoadTable(node_id_t node_id,
 88 |                  node_id_t num_server,
 89 |                  MemStoreAllocParam* mem_store_alloc_param,
 90 |                  MemStoreReserveParam* mem_store_reserve_param);
 91 | 
 92 |   void PopulateMicroTable(MemStoreReserveParam* mem_store_reserve_param);
 93 | 
 94 |   int LoadRecord(HashStore* table,
 95 |                  itemkey_t item_key,
 96 |                  void* val_ptr,
 97 |                  size_t val_size,
 98 |                  table_id_t table_id,
 99 |                  MemStoreReserveParam* mem_store_reserve_param);
100 | 
101 |   ALWAYS_INLINE
102 |   std::vector<HashStore*> GetPrimaryHashStore() {
103 |     return primary_table_ptrs;
104 |   }
105 | 
106 |   ALWAYS_INLINE
107 |   std::vector<HashStore*> GetBackupHashStore() {
108 |     return backup_table_ptrs;
109 |   }
110 | };
111 | 


--------------------------------------------------------------------------------
/workload/micro/micro_tables/micro.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "MICRO",
4 |     "bkt_num": 200000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/micro/micro_txn.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <memory>
 7 | 
 8 | #include "dtx/dtx.h"
 9 | #include "micro/micro_db.h"
10 | #include "util/zipf.h"
11 | 
12 | /******************** The business logic (Transaction) start ********************/
13 | 
14 | struct DataItemDuplicate {
15 |   DataItemPtr data_item_ptr;
16 |   bool is_dup;
17 | };
18 | 
19 | bool TxTestCachedAddr(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio);
20 | bool TxLockContention(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio);
21 | bool TxReadBackup(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio);
22 | bool TxReadOnly(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio);
23 | bool TxRFlush1(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio);
24 | bool TxRFlush2(ZipfGen* zipf_gen, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx, bool is_skewed, uint64_t data_set_size, uint64_t num_keys_global, uint64_t write_ratio);
25 | /******************** The business logic (Transaction) end ********************/


--------------------------------------------------------------------------------
/workload/smallbank/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | set(DB_SOURCES smallbank_db.cc)
 5 | set(TXN_SOURCES smallbank_txn.cc)
 6 | 
 7 | add_library(smallbank_db STATIC ${DB_SOURCES})
 8 | add_library(smallbank_txn STATIC ${TXN_SOURCES})
 9 | 
10 | set_target_properties(smallbank_db PROPERTIES LINKER_LANGUAGE CXX)
11 | set_target_properties(smallbank_txn PROPERTIES LINKER_LANGUAGE CXX)
12 | 
13 | target_link_libraries(smallbank_txn ford)


--------------------------------------------------------------------------------
/workload/smallbank/smallbank_db.cc:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | 
  4 | #include "smallbank_db.h"
  5 | 
  6 | #include "unistd.h"
  7 | #include "util/json_config.h"
  8 | 
  9 | /* Called by main. Only initialize here. The worker threads will populate. */
 10 | void SmallBank::LoadTable(node_id_t node_id,
 11 |                           node_id_t num_server,
 12 |                           MemStoreAllocParam* mem_store_alloc_param,
 13 |                           MemStoreReserveParam* mem_store_reserve_param) {
 14 |   // Initiate + Populate table for primary role
 15 |   if ((node_id_t)SmallBankTableType::kSavingsTable % num_server == node_id) {
 16 |     printf("Primary: Initializing SAVINGS table\n");
 17 |     std::string config_filepath = "../../../workload/smallbank/smallbank_tables/savings.json";
 18 |     auto json_config = JsonConfig::load_file(config_filepath);
 19 |     auto table_config = json_config.get("table");
 20 |     savings_table = new HashStore((table_id_t)SmallBankTableType::kSavingsTable,
 21 |                                   table_config.get("bkt_num").get_uint64(),
 22 |                                   mem_store_alloc_param);
 23 |     PopulateSavingsTable(mem_store_reserve_param);
 24 |     primary_table_ptrs.push_back(savings_table);
 25 |   }
 26 |   if ((node_id_t)SmallBankTableType::kCheckingTable % num_server == node_id) {
 27 |     printf("Primary: Initializing CHECKING table\n");
 28 |     std::string config_filepath = "../../../workload/smallbank/smallbank_tables/checking.json";
 29 |     auto json_config = JsonConfig::load_file(config_filepath);
 30 |     auto table_config = json_config.get("table");
 31 |     checking_table = new HashStore((table_id_t)SmallBankTableType::kCheckingTable,
 32 |                                    table_config.get("bkt_num").get_uint64(),
 33 |                                    mem_store_alloc_param);
 34 |     PopulateCheckingTable(mem_store_reserve_param);
 35 |     primary_table_ptrs.push_back(checking_table);
 36 |   }
 37 | 
 38 |   // Initiate + Populate table for backup role
 39 |   if (BACKUP_DEGREE < num_server) {
 40 |     for (node_id_t i = 1; i <= BACKUP_DEGREE; i++) {
 41 |       if ((node_id_t)SmallBankTableType::kSavingsTable % num_server == (node_id - i + num_server) % num_server) {
 42 |         printf("Backup: Initializing SAVINGS table\n");
 43 |         std::string config_filepath = "../../../workload/smallbank/smallbank_tables/savings.json";
 44 |         auto json_config = JsonConfig::load_file(config_filepath);
 45 |         auto table_config = json_config.get("table");
 46 |         savings_table = new HashStore((table_id_t)SmallBankTableType::kSavingsTable,
 47 |                                       table_config.get("bkt_num").get_uint64(),
 48 |                                       mem_store_alloc_param);
 49 |         PopulateSavingsTable(mem_store_reserve_param);
 50 |         backup_table_ptrs.push_back(savings_table);
 51 |       }
 52 |       if ((node_id_t)SmallBankTableType::kCheckingTable % num_server == (node_id - i + num_server) % num_server) {
 53 |         printf("Backup: Initializing CHECKING table\n");
 54 |         std::string config_filepath = "../../../workload/smallbank/smallbank_tables/checking.json";
 55 |         auto json_config = JsonConfig::load_file(config_filepath);
 56 |         auto table_config = json_config.get("table");
 57 |         checking_table = new HashStore((table_id_t)SmallBankTableType::kCheckingTable,
 58 |                                        table_config.get("bkt_num").get_uint64(),
 59 |                                        mem_store_alloc_param);
 60 |         PopulateCheckingTable(mem_store_reserve_param);
 61 |         backup_table_ptrs.push_back(checking_table);
 62 |       }
 63 |     }
 64 |   }
 65 | }
 66 | 
 67 | int SmallBank::LoadRecord(HashStore* table,
 68 |                           itemkey_t item_key,
 69 |                           void* val_ptr,
 70 |                           size_t val_size,
 71 |                           table_id_t table_id,
 72 |                           MemStoreReserveParam* mem_store_reserve_param) {
 73 |   assert(val_size <= MAX_ITEM_SIZE);
 74 |   /* Insert into HashStore */
 75 |   DataItem item_to_be_inserted(table_id, val_size, item_key, (uint8_t*)val_ptr);
 76 |   DataItem* inserted_item = table->LocalInsert(item_key, item_to_be_inserted, mem_store_reserve_param);
 77 |   inserted_item->remote_offset = table->GetItemRemoteOffset(inserted_item);
 78 |   return 1;
 79 | }
 80 | 
 81 | void SmallBank::PopulateSavingsTable(MemStoreReserveParam* mem_store_reserve_param) {
 82 |   /* All threads must execute the loop below deterministically */
 83 | 
 84 |   /* Populate the tables */
 85 |   for (uint32_t acct_id = 0; acct_id < num_accounts_global; acct_id++) {
 86 |     // Savings
 87 |     smallbank_savings_key_t savings_key;
 88 |     savings_key.acct_id = (uint64_t)acct_id;
 89 | 
 90 |     smallbank_savings_val_t savings_val;
 91 |     savings_val.magic = smallbank_savings_magic;
 92 |     savings_val.bal = 1000000000ull;
 93 | 
 94 |     LoadRecord(savings_table, savings_key.item_key,
 95 |                (void*)&savings_val, sizeof(smallbank_savings_val_t),
 96 |                (table_id_t)SmallBankTableType::kSavingsTable,
 97 |                mem_store_reserve_param);
 98 |   }
 99 | }
100 | 
101 | void SmallBank::PopulateCheckingTable(MemStoreReserveParam* mem_store_reserve_param) {
102 |   /* All threads must execute the loop below deterministically */
103 | 
104 |   /* Populate the tables */
105 |   for (uint32_t acct_id = 0; acct_id < num_accounts_global; acct_id++) {
106 |     // Checking
107 |     smallbank_checking_key_t checking_key;
108 |     checking_key.acct_id = (uint64_t)acct_id;
109 | 
110 |     smallbank_checking_val_t checking_val;
111 |     checking_val.magic = smallbank_checking_magic;
112 |     checking_val.bal = 1000000000ull;
113 | 
114 |     LoadRecord(checking_table, checking_key.item_key,
115 |                (void*)&checking_val, sizeof(smallbank_checking_val_t),
116 |                (table_id_t)SmallBankTableType::kCheckingTable,
117 |                mem_store_reserve_param);
118 |   }
119 | }


--------------------------------------------------------------------------------
/workload/smallbank/smallbank_db.h:
--------------------------------------------------------------------------------
  1 | // Author: Ming Zhang
  2 | // Copyright (c) 2022
  3 | #pragma once
  4 | 
  5 | #include <cassert>
  6 | #include <cstdint>
  7 | #include <vector>
  8 | 
  9 | #include "config/table_type.h"
 10 | #include "memstore/hash_store.h"
 11 | #include "util/fast_random.h"
 12 | #include "util/json_config.h"
 13 | 
 14 | /* STORED PROCEDURE EXECUTION FREQUENCIES (0-100) */
 15 | #define FREQUENCY_AMALGAMATE 15
 16 | #define FREQUENCY_BALANCE 15
 17 | #define FREQUENCY_DEPOSIT_CHECKING 15
 18 | #define FREQUENCY_SEND_PAYMENT 25
 19 | #define FREQUENCY_TRANSACT_SAVINGS 15
 20 | #define FREQUENCY_WRITE_CHECK 15
 21 | 
 22 | #define TX_HOT 90 /* Percentage of txns that use accounts from hotspot */
 23 | 
 24 | // Smallbank table keys and values
 25 | // All keys have been sized to 8 bytes
 26 | // All values have been sized to the next multiple of 8 bytes
 27 | 
 28 | /*
 29 |  * SAVINGS table.
 30 |  */
 31 | union smallbank_savings_key_t {
 32 |   uint64_t acct_id;
 33 |   uint64_t item_key;
 34 | 
 35 |   smallbank_savings_key_t() {
 36 |     item_key = 0;
 37 |   }
 38 | };
 39 | 
 40 | static_assert(sizeof(smallbank_savings_key_t) == sizeof(uint64_t), "");
 41 | 
 42 | struct smallbank_savings_val_t {
 43 |   uint32_t magic;
 44 |   float bal;
 45 | };
 46 | static_assert(sizeof(smallbank_savings_val_t) == sizeof(uint64_t), "");
 47 | 
 48 | /*
 49 |  * CHECKING table
 50 |  */
 51 | union smallbank_checking_key_t {
 52 |   uint64_t acct_id;
 53 |   uint64_t item_key;
 54 | 
 55 |   smallbank_checking_key_t() {
 56 |     item_key = 0;
 57 |   }
 58 | };
 59 | 
 60 | static_assert(sizeof(smallbank_checking_key_t) == sizeof(uint64_t), "");
 61 | 
 62 | struct smallbank_checking_val_t {
 63 |   uint32_t magic;
 64 |   float bal;
 65 | };
 66 | static_assert(sizeof(smallbank_checking_val_t) == sizeof(uint64_t), "");
 67 | 
 68 | // Magic numbers for debugging. These are unused in the spec.
 69 | #define SmallBank_MAGIC 97 /* Some magic number <= 255 */
 70 | #define smallbank_savings_magic (SmallBank_MAGIC)
 71 | #define smallbank_checking_magic (SmallBank_MAGIC + 1)
 72 | 
 73 | // Helpers for generating workload
 74 | #define SmallBank_TX_TYPES 6
 75 | enum class SmallBankTxType : int {
 76 |   kAmalgamate,
 77 |   kBalance,
 78 |   kDepositChecking,
 79 |   kSendPayment,
 80 |   kTransactSaving,
 81 |   kWriteCheck,
 82 | };
 83 | 
 84 | 
 85 | const std::string SmallBank_TX_NAME[SmallBank_TX_TYPES] = {"Amalgamate", "Balance", "DepositChecking", \
 86 | "SendPayment", "TransactSaving", "WriteCheck"};
 87 | 
 88 | // Table id
 89 | enum class SmallBankTableType : uint64_t {
 90 |   kSavingsTable = TABLE_SMALLBANK,
 91 |   kCheckingTable,
 92 | };
 93 | 
 94 | class SmallBank {
 95 |  public:
 96 |   std::string bench_name;
 97 | 
 98 |   uint32_t total_thread_num;
 99 | 
100 |   uint32_t num_accounts_global, num_hot_global;
101 | 
102 |   /* Tables */
103 |   HashStore* savings_table;
104 | 
105 |   HashStore* checking_table;
106 | 
107 |   std::vector<HashStore*> primary_table_ptrs;
108 | 
109 |   std::vector<HashStore*> backup_table_ptrs;
110 | 
111 |   // For server usage: Provide interfaces to servers for loading tables
112 |   // Also for client usage: Provide interfaces to clients for generating ids during tests
113 |   SmallBank() {
114 |     bench_name = "SmallBank";
115 |     // Used for populate table (line num) and get account
116 |     std::string config_filepath = "../../../config/smallbank_config.json";
117 |     auto json_config = JsonConfig::load_file(config_filepath);
118 |     auto conf = json_config.get("smallbank");
119 |     num_accounts_global = conf.get("num_accounts").get_uint64();
120 |     num_hot_global = conf.get("num_hot_accounts").get_uint64();
121 | 
122 |     /* Up to 2 billion accounts */
123 |     assert(num_accounts_global <= 2ull * 1024 * 1024 * 1024);
124 | 
125 |     savings_table = nullptr;
126 |     checking_table = nullptr;
127 |   }
128 | 
129 |   ~SmallBank() {
130 |     if (savings_table) delete savings_table;
131 |     if (checking_table) delete checking_table;
132 |   }
133 | 
134 |   SmallBankTxType* CreateWorkgenArray() {
135 |     SmallBankTxType* workgen_arr = new SmallBankTxType[100];
136 | 
137 |     int i = 0, j = 0;
138 | 
139 |     j += FREQUENCY_AMALGAMATE;
140 |     for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kAmalgamate;
141 | 
142 |     j += FREQUENCY_BALANCE;
143 |     for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kBalance;
144 | 
145 |     j += FREQUENCY_DEPOSIT_CHECKING;
146 |     for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kDepositChecking;
147 | 
148 |     j += FREQUENCY_SEND_PAYMENT;
149 |     for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kSendPayment;
150 | 
151 |     j += FREQUENCY_TRANSACT_SAVINGS;
152 |     for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kTransactSaving;
153 | 
154 |     j += FREQUENCY_WRITE_CHECK;
155 |     for (; i < j; i++) workgen_arr[i] = SmallBankTxType::kWriteCheck;
156 | 
157 |     assert(i == 100 && j == 100);
158 |     return workgen_arr;
159 |   }
160 | 
161 |   /*
162 |    * Generators for new account IDs. Called once per transaction because
163 |    * we need to decide hot-or-not per transaction, not per account.
164 |    */
165 |   inline void get_account(uint64_t* seed, uint64_t* acct_id) const {
166 |     if (FastRand(seed) % 100 < TX_HOT) {
167 |       *acct_id = FastRand(seed) % num_hot_global;
168 |     } else {
169 |       *acct_id = FastRand(seed) % num_accounts_global;
170 |     }
171 |   }
172 | 
173 |   inline void get_two_accounts(uint64_t* seed, uint64_t* acct_id_0, uint64_t* acct_id_1) const {
174 |     if (FastRand(seed) % 100 < TX_HOT) {
175 |       *acct_id_0 = FastRand(seed) % num_hot_global;
176 |       *acct_id_1 = FastRand(seed) % num_hot_global;
177 |       while (*acct_id_1 == *acct_id_0) {
178 |         *acct_id_1 = FastRand(seed) % num_hot_global;
179 |       }
180 |     } else {
181 |       *acct_id_0 = FastRand(seed) % num_accounts_global;
182 |       *acct_id_1 = FastRand(seed) % num_accounts_global;
183 |       while (*acct_id_1 == *acct_id_0) {
184 |         *acct_id_1 = FastRand(seed) % num_accounts_global;
185 |       }
186 |     }
187 |   }
188 | 
189 |   void LoadTable(node_id_t node_id,
190 |                  node_id_t num_server,
191 |                  MemStoreAllocParam* mem_store_alloc_param,
192 |                  MemStoreReserveParam* mem_store_reserve_param);
193 | 
194 |   void PopulateSavingsTable(MemStoreReserveParam* mem_store_reserve_param);
195 | 
196 |   void PopulateCheckingTable(MemStoreReserveParam* mem_store_reserve_param);
197 | 
198 |   int LoadRecord(HashStore* table,
199 |                  itemkey_t item_key,
200 |                  void* val_ptr,
201 |                  size_t val_size,
202 |                  table_id_t table_id,
203 |                  MemStoreReserveParam* mem_store_reserve_param);
204 | 
205 |   ALWAYS_INLINE
206 |   std::vector<HashStore*> GetPrimaryHashStore() {
207 |     return primary_table_ptrs;
208 |   }
209 | 
210 |   ALWAYS_INLINE
211 |   std::vector<HashStore*> GetBackupHashStore() {
212 |     return backup_table_ptrs;
213 |   }
214 | };
215 | 


--------------------------------------------------------------------------------
/workload/smallbank/smallbank_tables/checking.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "CHECKING",
4 |     "bkt_num": 200000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/smallbank/smallbank_tables/savings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "SAVINGS",
4 |     "bkt_num": 200000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/smallbank/smallbank_txn.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <memory>
 7 | 
 8 | #include "dtx/dtx.h"
 9 | #include "smallbank/smallbank_db.h"
10 | 
11 | /******************** The business logic (Transaction) start ********************/
12 | 
13 | bool TxAmalgamate(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
14 | /* Calculate the sum of saving and checking kBalance */
15 | bool TxBalance(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
16 | /* Add $1.3 to acct_id's checking account */
17 | bool TxDepositChecking(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
18 | /* Send $5 from acct_id_0's checking account to acct_id_1's checking account */
19 | bool TxSendPayment(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
20 | /* Add $20 to acct_id's saving's account */
21 | bool TxTransactSaving(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
22 | /* Read saving and checking kBalance + update checking kBalance unconditionally */
23 | bool TxWriteCheck(SmallBank* smallbank_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
24 | /******************** The business logic (Transaction) end ********************/


--------------------------------------------------------------------------------
/workload/tatp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | set(DB_SOURCES tatp_db.cc)
 5 | set(TXN_SOURCES tatp_txn.cc)
 6 | 
 7 | add_library(tatp_db STATIC ${DB_SOURCES})
 8 | add_library(tatp_txn STATIC ${TXN_SOURCES})
 9 | 
10 | set_target_properties(tatp_db PROPERTIES LINKER_LANGUAGE CXX)
11 | set_target_properties(tatp_txn PROPERTIES LINKER_LANGUAGE CXX)
12 | 
13 | target_link_libraries(tatp_txn ford)


--------------------------------------------------------------------------------
/workload/tatp/tatp_tables/README.md:
--------------------------------------------------------------------------------
 1 | # Table bucket capacities
 2 | 
 3 | * On average, every worker adds `SUBSCRIBERS_PER_MACHINE` (1 M) subscribers to its `SUBSCRIBER` table partition.
 4 | 
 5 | * There are 2.5 `ACCESS_INFO` records per subscriber, so 2.5 M `ACCESS_INFO`
 6 |   records in total. Similarly, there are 2.5 M `SPECIAL_FACILITY` records.
 7 | 
 8 | * There are 1.25 `CALL_FORWARDING` records per `SPECIAL_FACILITY` record, so around 3.2 M `CALL_FORWARDING` records in
 9 |   total.
10 | 
11 | ## Index sizing
12 | 
13 | We allocate 25% extra space in the index for all records:
14 | 
15 | * `SUBSCRIBER`: 1.25 M
16 | * Secondary `SUBSCRIBER` table: 1.25 M
17 | * `ACCESS_INFO`: 3.2 M -> Does not work so make it 4 M
18 | * `SPECIAL_FACILITY`: 3.2 M -> Does not work so make it 4 M
19 | * `CALL_FORWARDING`: 4 M -> Does not work so make it 5 M
20 | 
21 | # Additional info
22 | 
23 | ## Table key-value sizes
24 | 
25 | * All key sizes are fixed at 8 bytes. Value sizes are padded to next multiple of 8 bytes
26 | * `SUBSCRIBER`: 40 bytes
27 | * Secondary `SUBSCRIBER` table: 8 bytes
28 | * `ACCESS_INFO`: 16 bytes
29 | * `SPECIAL_FACILITY`: 8 bytes
30 | * `CALL_FORWARDING`: 24 bytes
31 | 
32 | ## Pool sizing (irrelevant for FixedTable)
33 | 
34 | On top of an (assumed) 32-byte per-entry pool overhead, allocate 0% extra pool space. Also add the 8-byte HoTS object
35 | header to make the total header size = 40 bytes.
36 | 
37 | * `SUBSCRIBER`: `(40 + 40) * .125 = 10 MB`
38 | * Secondary `SUBSCRIBER` table: `(40 + 8) * .125 = 6 MB`
39 | * `ACCESS_INFO`: `(40 + 16) * .32 = 18 MB`
40 | * `SPECIAL_FACILITY`: `(40 + 8) * .32 = 16 MB`
41 | * `CALL_FORWARDING`: `(40 + 24) * .4 = 26 MB`
42 | 


--------------------------------------------------------------------------------
/workload/tatp/tatp_tables/access_info.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "ACCESS INFO",
4 |     "bkt_num": 40000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tatp/tatp_tables/call_forwarding.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "CALL FORWARDING",
4 |     "bkt_num": 50000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tatp/tatp_tables/sec_subscriber.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "SECONDARY SUBSCRIBER",
4 |     "bkt_num": 12500
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tatp/tatp_tables/special_facility.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "SPECIAL FACILITY",
4 |     "bkt_num": 40000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tatp/tatp_tables/subscriber.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "SUBSCRIBER",
4 |     "bkt_num": 12500
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tatp/tatp_txn.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <memory>
 7 | 
 8 | #include "dtx/dtx.h"
 9 | #include "tatp/tatp_db.h"
10 | 
11 | /******************** The business logic (Transaction) start ********************/
12 | 
13 | // Read 1 SUBSCRIBER row
14 | bool TxGetSubsciberData(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
15 | 
16 | // 1. Read 1 SPECIAL_FACILITY row
17 | // 2. Read up to 3 CALL_FORWARDING rows
18 | // 3. Validate up to 4 rows
19 | bool TxGetNewDestination(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
20 | 
21 | // Read 1 ACCESS_INFO row
22 | bool TxGetAccessData(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
23 | 
24 | // Update 1 SUBSCRIBER row and 1 SPECIAL_FACILTY row
25 | bool TxUpdateSubscriberData(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
26 | 
27 | // 1. Read a SECONDARY_SUBSCRIBER row
28 | // 2. Update a SUBSCRIBER row
29 | bool TxUpdateLocation(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
30 | 
31 | // 1. Read a SECONDARY_SUBSCRIBER row
32 | // 2. Read a SPECIAL_FACILTY row
33 | // 3. Insert a CALL_FORWARDING row
34 | bool TxInsertCallForwarding(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
35 | 
36 | // 1. Read a SECONDARY_SUBSCRIBER row
37 | // 2. Delete a CALL_FORWARDING row
38 | bool TxDeleteCallForwarding(TATP* tatp_client, uint64_t* seed, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
39 | 
40 | /******************** The business logic (Transaction) end ********************/


--------------------------------------------------------------------------------
/workload/tpcc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author: Ming Zhang
 2 | # Copyright (c) 2022
 3 | 
 4 | set(DB_SOURCES tpcc_db.cc)
 5 | set(TXN_SOURCES tpcc_txn.cc)
 6 | 
 7 | add_library(tpcc_db STATIC ${DB_SOURCES})
 8 | add_library(tpcc_txn STATIC ${TXN_SOURCES})
 9 | 
10 | set_target_properties(tpcc_db PROPERTIES LINKER_LANGUAGE CXX)
11 | set_target_properties(tpcc_txn PROPERTIES LINKER_LANGUAGE CXX)
12 | 
13 | target_link_libraries(tpcc_txn ford)


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables/customer.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "CUSTOMER",
4 |     "bkt_num": 300
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables/district.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "DISTRICT",
4 |     "bkt_num": 10
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables/item.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "ITEM",
4 |     "bkt_num": 10000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables/stock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "STOCK",
4 |     "bkt_num": 10000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables/warehouse.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "WAREHOUSE",
4 |     "bkt_num": 8
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_1G/customer.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "CUSTOMER",
4 |     "bkt_num": 30
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_1G/district.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "DISTRICT",
4 |     "bkt_num": 10
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_1G/item.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "ITEM",
4 |     "bkt_num": 1000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_1G/stock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "STOCK",
4 |     "bkt_num": 1000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_1G/warehouse.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "WAREHOUSE",
4 |     "bkt_num": 50
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_8G/customer.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "CUSTOMER",
4 |     "bkt_num": 300
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_8G/district.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "DISTRICT",
4 |     "bkt_num": 10
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_8G/item.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "ITEM",
4 |     "bkt_num": 10000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_8G/stock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "STOCK",
4 |     "bkt_num": 10000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_8G/warehouse.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "WAREHOUSE",
4 |     "bkt_num": 8
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_normal/customer.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "CUSTOMER",
4 |     "bkt_num": 300
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_normal/district.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "DISTRICT",
4 |     "bkt_num": 10
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_normal/item.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "ITEM",
4 |     "bkt_num": 10000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_normal/stock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "STOCK",
4 |     "bkt_num": 10000
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_tables_normal/warehouse.json:
--------------------------------------------------------------------------------
1 | {
2 |   "table": {
3 |     "name": "WAREHOUSE",
4 |     "bkt_num": 30
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/workload/tpcc/tpcc_txn.h:
--------------------------------------------------------------------------------
 1 | // Author: Ming Zhang
 2 | // Copyright (c) 2022
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <memory>
 7 | 
 8 | #include "dtx/dtx.h"
 9 | #include "tpcc/tpcc_db.h"
10 | 
11 | /******************** The business logic (Transaction) start ********************/
12 | 
13 | // The following transaction business logics are referred to the standard TPCC specification.
14 | 
15 | /* TPC BENCHMARK™ C
16 | ** Standard Specification
17 | ** Revision 5.11
18 | ** February 2010
19 | ** url: http://tpc.org/tpc_documents_current_versions/pdf/tpc-c_v5.11.0.pdf
20 | */
21 | 
22 | // Note: Remote hash slot limits the insertion number. For a 20-slot bucket, the uppper bound is 44744 new order.
23 | bool TxNewOrder(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
24 | bool TxPayment(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
25 | bool TxDelivery(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
26 | bool TxOrderStatus(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
27 | bool TxStockLevel(TPCC* tpcc_client, FastRandom* random_generator, coro_yield_t& yield, tx_id_t tx_id, DTX* dtx);
28 | /******************** The business logic (Transaction) end ********************/


--------------------------------------------------------------------------------