├── .clang-format ├── .gitignore ├── CMakeLists.txt ├── README.md ├── doc ├── event_loop.drawio └── remote_direct_io.drawio ├── fs └── rchfs.cc ├── include ├── options.hpp ├── rcmp.hpp └── status.hpp ├── script ├── create_cxl_mem.sh ├── gen-perf-svg.sh ├── rchfs_fio.sh ├── run_client_shell.sh ├── run_cluster.sh ├── run_dht.sh └── scp-src.sh ├── src ├── CMakeLists.txt ├── allocator.cc ├── cxl.cc ├── daemon.cc ├── fiber_poll.cc ├── include │ ├── allocator.hpp │ ├── cmdline.h │ ├── common.hpp │ ├── concurrent_hashmap.hpp │ ├── concurrent_queue.hpp │ ├── config.hpp │ ├── cxl.hpp │ ├── fiber_pool.hpp │ ├── impl.hpp │ ├── lock.hpp │ ├── log.hpp │ ├── msg_queue.hpp │ ├── page_table.hpp │ ├── promise.hpp │ ├── proto │ │ ├── rpc_adaptor.hpp │ │ ├── rpc_caller.hpp │ │ ├── rpc_client.hpp │ │ ├── rpc_daemon.hpp │ │ ├── rpc_master.hpp │ │ └── rpc_register.hpp │ ├── rdma_rc.hpp │ ├── robin_hood.h │ ├── stats.hpp │ ├── udp_client.hpp │ ├── udp_server.hpp │ └── utils.hpp ├── master.cc ├── msg_queue.cc ├── page_table.cc ├── proto │ ├── rpc_client.cc │ ├── rpc_daemon.cc │ └── rpc_master.cc ├── rcmp.cc ├── rdma_rc.cc ├── stats.cc ├── test │ ├── CMakeLists.txt │ ├── conqueue_test.cc │ ├── erpc │ │ ├── client_sta_test.cc │ │ ├── common_sta.h │ │ └── server_sta_test.cc │ ├── hello_world_test.cc │ ├── ring_allocator_test.cc │ └── simple_adaptor_test.cc └── utils.cc ├── test ├── CMakeLists.txt ├── client_shell.cc ├── dht.cc ├── dht.hpp ├── microbench_core.hpp ├── mmap_shell.cc └── rw.cc └── third_party └── eRPC ├── erpc.h └── liberpc.a /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -1 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveMacros: false 7 | AlignConsecutiveAssignments: false 8 | AlignConsecutiveDeclarations: false 9 | AlignEscapedNewlines: Left 10 | AlignOperands: true 11 | AlignTrailingComments: true 12 | AllowAllArgumentsOnNextLine: true 13 | AllowAllConstructorInitializersOnNextLine: true 14 | AllowAllParametersOfDeclarationOnNextLine: true 15 | AllowShortBlocksOnASingleLine: Never 16 | AllowShortCaseLabelsOnASingleLine: false 17 | AllowShortFunctionsOnASingleLine: All 18 | AllowShortLambdasOnASingleLine: All 19 | AllowShortIfStatementsOnASingleLine: WithoutElse 20 | AllowShortLoopsOnASingleLine: true 21 | AlwaysBreakAfterDefinitionReturnType: None 22 | AlwaysBreakAfterReturnType: None 23 | AlwaysBreakBeforeMultilineStrings: true 24 | AlwaysBreakTemplateDeclarations: Yes 25 | BinPackArguments: true 26 | BinPackParameters: true 27 | BraceWrapping: 28 | AfterCaseLabel: false 29 | AfterClass: false 30 | AfterControlStatement: false 31 | AfterEnum: false 32 | AfterFunction: false 33 | AfterNamespace: false 34 | AfterObjCDeclaration: false 35 | AfterStruct: false 36 | AfterUnion: false 37 | AfterExternBlock: false 38 | BeforeCatch: false 39 | BeforeElse: false 40 | IndentBraces: false 41 | SplitEmptyFunction: true 42 | SplitEmptyRecord: true 43 | SplitEmptyNamespace: true 44 | BreakBeforeBinaryOperators: None 45 | BreakBeforeBraces: Attach 46 | BreakBeforeInheritanceComma: false 47 | BreakInheritanceList: BeforeColon 48 | BreakBeforeTernaryOperators: true 49 | BreakConstructorInitializersBeforeComma: false 50 | BreakConstructorInitializers: BeforeColon 51 | BreakAfterJavaFieldAnnotations: false 52 | BreakStringLiterals: true 53 | ColumnLimit: 100 54 | CommentPragmas: '^ IWYU pragma:' 55 | CompactNamespaces: false 56 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 57 | ConstructorInitializerIndentWidth: 4 58 | ContinuationIndentWidth: 4 59 | Cpp11BracedListStyle: true 60 | DeriveLineEnding: true 61 | DerivePointerAlignment: true 62 | DisableFormat: false 63 | ExperimentalAutoDetectBinPacking: false 64 | FixNamespaceComments: true 65 | ForEachMacros: 66 | - foreach 67 | - Q_FOREACH 68 | - BOOST_FOREACH 69 | IncludeBlocks: Regroup 70 | IncludeCategories: 71 | - Regex: '^' 72 | Priority: 2 73 | SortPriority: 0 74 | - Regex: '^<.*\.h>' 75 | Priority: 1 76 | SortPriority: 0 77 | - Regex: '^<.*' 78 | Priority: 2 79 | SortPriority: 0 80 | - Regex: '.*' 81 | Priority: 3 82 | SortPriority: 0 83 | IncludeIsMainRegex: '([-_](test|unittest))?$' 84 | IncludeIsMainSourceRegex: '' 85 | IndentCaseLabels: true 86 | IndentGotoLabels: true 87 | IndentPPDirectives: None 88 | IndentWidth: 4 89 | IndentWrappedFunctionNames: false 90 | JavaScriptQuotes: Leave 91 | JavaScriptWrapImports: true 92 | KeepEmptyLinesAtTheStartOfBlocks: false 93 | MacroBlockBegin: '' 94 | MacroBlockEnd: '' 95 | MaxEmptyLinesToKeep: 1 96 | NamespaceIndentation: None 97 | ObjCBinPackProtocolList: Never 98 | ObjCBlockIndentWidth: 2 99 | ObjCSpaceAfterProperty: false 100 | ObjCSpaceBeforeProtocolList: true 101 | PenaltyBreakAssignment: 2 102 | PenaltyBreakBeforeFirstCallParameter: 1 103 | PenaltyBreakComment: 300 104 | PenaltyBreakFirstLessLess: 120 105 | PenaltyBreakString: 1000 106 | PenaltyBreakTemplateDeclaration: 10 107 | PenaltyExcessCharacter: 1000000 108 | PenaltyReturnTypeOnItsOwnLine: 200 109 | PointerAlignment: Left 110 | RawStringFormats: 111 | - Language: Cpp 112 | Delimiters: 113 | - cc 114 | - CC 115 | - cpp 116 | - Cpp 117 | - CPP 118 | - 'c++' 119 | - 'C++' 120 | CanonicalDelimiter: '' 121 | BasedOnStyle: google 122 | - Language: TextProto 123 | Delimiters: 124 | - pb 125 | - PB 126 | - proto 127 | - PROTO 128 | EnclosingFunctions: 129 | - EqualsProto 130 | - EquivToProto 131 | - PARSE_PARTIAL_TEXT_PROTO 132 | - PARSE_TEST_PROTO 133 | - PARSE_TEXT_PROTO 134 | - ParseTextOrDie 135 | - ParseTextProtoOrDie 136 | CanonicalDelimiter: '' 137 | BasedOnStyle: google 138 | ReflowComments: true 139 | SortIncludes: true 140 | SortUsingDeclarations: true 141 | SpaceAfterCStyleCast: false 142 | SpaceAfterLogicalNot: false 143 | SpaceAfterTemplateKeyword: true 144 | SpaceBeforeAssignmentOperators: true 145 | SpaceBeforeCpp11BracedList: false 146 | SpaceBeforeCtorInitializerColon: true 147 | SpaceBeforeInheritanceColon: true 148 | SpaceBeforeParens: ControlStatements 149 | SpaceBeforeRangeBasedForLoopColon: true 150 | SpaceInEmptyBlock: false 151 | SpaceInEmptyParentheses: false 152 | SpacesBeforeTrailingComments: 2 153 | SpacesInAngles: false 154 | SpacesInConditionalStatement: false 155 | SpacesInContainerLiterals: true 156 | SpacesInCStyleCastParentheses: false 157 | SpacesInParentheses: false 158 | SpacesInSquareBrackets: false 159 | SpaceBeforeSquareBrackets: false 160 | Standard: Auto 161 | StatementMacros: 162 | - Q_UNUSED 163 | - QT_REQUIRE_VERSION 164 | TabWidth: 8 165 | UseCRLF: false 166 | UseTab: Never 167 | ... 168 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | /build 3 | /.cache -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(rcmp) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | if( CMAKE_BUILD_TYPE STREQUAL "Release" ) 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") 8 | message(STATUS "Release Mode") 9 | else() 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") 11 | message(STATUS "Debug Mode") 12 | endif() 13 | 14 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 15 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 16 | 17 | find_package(Boost REQUIRED COMPONENTS context fiber) 18 | 19 | if(Boost_FOUND) 20 | else() 21 | message(err: Boost fiber not found) 22 | endif() 23 | 24 | link_directories("third_party/eRPC/") 25 | include_directories("include" "src/include" "third_party") 26 | add_subdirectory(src) 27 | 28 | # ################################################## 29 | # 库 30 | # ################################################## 31 | 32 | add_library(rcmp SHARED 33 | src/rcmp.cc 34 | ) 35 | target_link_libraries( 36 | rcmp 37 | base 38 | ) 39 | 40 | # ################################################## 41 | # 可执行文件 42 | # ################################################## 43 | 44 | add_executable( 45 | rcmp_master 46 | src/master.cc 47 | ) 48 | target_link_libraries( 49 | rcmp_master 50 | base 51 | ) 52 | 53 | add_executable( 54 | rcmp_daemon 55 | src/daemon.cc 56 | ) 57 | target_link_libraries( 58 | rcmp_daemon 59 | base 60 | ) 61 | 62 | add_executable( 63 | rchfs 64 | fs/rchfs.cc 65 | ) 66 | target_link_libraries( 67 | rchfs 68 | rcmp 69 | fuse3 70 | ) 71 | 72 | add_subdirectory(test) 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rcmp: A hybrid memory pooling system based on RDMA and CXL 2 | 3 | Rcmp is a user-layer library for a distributed memory pooling system that mixes CXL and RDMA. Rcmp deploys large memory pools in separate racks, using CXL for coherent memory access within racks and RDMA for remote one-side access across racks. The CXL memory devices used within the rack have sub-microsecond latency, which can greatly accelerate remote memory access. And RDMA can scale the capacity of the memory pool well. However, since RDMA cannot do memory coherent access by raw verbs API, Rcmp introduces Remote Direct IO and Remote Page Swap policys in combination with RDMA to achieve coherent access across racks. 4 | 5 | Rcmp currently supports the following features: 6 | 7 | * **Memory Allocation and Release**: Clients allocate and release page-sized memory space via the AllocPage and FreePage APIs. 8 | 9 | * **Consistent Memory Read/Write**: Users can access memory data using the global address GAddr and access data through Read/Write/CAS API. According to memory access hotspot is divided into CXL load/store access and RDMA one-side verb operation. 10 | 11 | # How to use 12 | 13 | * Using the Rcmp dynamic library 14 | 15 | The interfaces are defined in `include/rcmp.hpp`, and their use can be found in the `test/client_shell.cc`. This test launches a memory pool operations program and uses Rcmp's API to perform various operations on the memory pool. 16 | 17 | 1. Dependencies 18 | 19 | * gcc(>=4.6) 20 | 21 | * numactl 22 | 23 | * boost-fiber-dev 24 | 25 | * boost-coroutine-dev 26 | 27 | * boost-context-dev 28 | 29 | * asio 30 | 31 | * redis-plus-plus 32 | 33 | * fuse3 34 | 35 | 2. Compile 36 | 37 | ```shell 38 | mkdir -p build 39 | cd build 40 | cmake .. -DCMAKE_BUILD_TYPE=Release 41 | make 42 | ``` 43 | 44 | 3. Run Cluster 45 | 46 | * Start Master (MN) 47 | 48 | The MN process will start `ERPC`, please **apply for huge pages with 2GB granularity** in advance. 49 | 50 | ```shell 51 | sudo /home/user/Rcmp/build/rcmp_master --master_ip=192.168.200.51 --master_port=31850 52 | ``` 53 | 54 | * Start Rack Daemon (DN) 55 | 56 | The DN process will start `ERPC`, please **apply for huge pages with 2GB granularity** in advance. 57 | 58 | This project currently uses shared memory across NUMA to simulate CXL access. Run `script/create_cxl_mem.sh` to create shared memory on NUMA 1. 59 | 60 | All other processes run on NUMA 0. 61 | 62 | ```shell 63 | # Add rack 0 on 192.168.200.51 with CXL size 2.19GB 64 | sudo numactl -N 0 /home/user/Rcmp/build/rcmp_daemon --master_ip=192.168.200.51 --master_port=31850 --daemon_ip=192.168.200.51 --daemon_port=31851 --rack_id=0 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=2357198848 --hot_decay=0.04 --hot_swap_watermark=3 65 | ``` 66 | 67 | ```shell 68 | # Add rack 1 on 192.168.201.89 with CXL size 18GB 69 | sudo numactl -N 0 /home/user/Rcmp/build/rcmp_daemon --master_ip=192.168.200.51 --master_port=31850 --daemon_ip=192.168.201.89 --daemon_port=31852 --rack_id=1 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=19327352832 --hot_decay=0.04 --hot_swap_watermark=3 70 | ``` 71 | 72 | * Launching the client test program (CN) 73 | 74 | To simulate CXL, launch CN in the same rack on the same server as DN. 75 | 76 | `test/rw.cc` is a micro-benchmark. Use redis for cross-rack test synchronisation. 77 | 78 | ```shell 79 | sudo numactl -N 0 /home/user/Rcmp/build/test/rw --client_ip=192.168.200.51 --client_port=14800 --rack_id=0 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=2357198848 --iteration=10000000 --payload_size=64 --addr_range=17179869184 --thread=32 --thread_all=1 --no_node=1 --node_id=0 --redis_server_ip=192.168.201.52:6379 80 | ``` 81 | 82 | # Application 83 | 84 | After starting the necessary memory pool cluster environment (start MNs with DNs), use the Rcmp dynamic library to create the memory pool application. The following gives the implementation of the application that already exists in the project. 85 | 86 | * Distributed hash table 87 | 88 | The distributed hash table uses the Rcmp interface to implement a linearly probed two-tier hash table. For simplicity, the hash table is fixed-sized. We will implement dynamic scaling added later (similar to CCEH). 89 | 90 | Location: `test/dht.hpp`. 91 | 92 | * rchfs 93 | 94 | rchfs uses the FUSE API to implement a simple high-capacity in-memory file system. File metadata is stored on the client, file data blocks are allocated using Rcmp's AllocPage, and write/read system calls are redirected to Rcmp's Write/Read API. Later, we will add file metadata sharing in memory pool. 95 | 96 | Location: `fs/rchfs.cc`. 97 | 98 | # Paper 99 | 100 | Zhonghua Wang, Yixing Guo, Kai Lu*, Jiguang Wan, Daohui Wang, Ting Yao, Huatao Wu. Rcmp: Reconstructing RDMA-based Memory Disaggregation via CXL. ACM Transactions on Architecture and Code Optimization (TACO) 2023. (Just Accepted) -------------------------------------------------------------------------------- /include/options.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // enable stat 6 | #define RCMP_PERF_ON 1 7 | 8 | namespace rcmp { 9 | 10 | class ClientOptions { 11 | public: 12 | std::string client_ip; 13 | uint16_t client_port; 14 | 15 | uint32_t rack_id; 16 | 17 | // Whether to register as a CXL client (currently only `true` is supported) 18 | bool with_cxl = true; 19 | std::string cxl_devdax_path; 20 | size_t cxl_memory_size; 21 | int prealloc_fiber_num = 2; // Number of pre-allocated boost coroutine 22 | }; 23 | 24 | class DaemonOptions { 25 | public: 26 | std::string master_ip; 27 | uint16_t master_port = 31850; 28 | 29 | std::string daemon_ip; 30 | uint16_t daemon_port; 31 | 32 | uint32_t rack_id; 33 | 34 | // Whether to register as a CXL client (currently only `true` is supported) 35 | bool with_cxl = true; 36 | std::string cxl_devdax_path; 37 | size_t cxl_memory_size; 38 | 39 | // Maximum number of clients limit (limited by msgq communication area of shared memory) 40 | size_t max_client_limit = 32; 41 | size_t swap_zone_size = 64ul << 20; 42 | 43 | int prealloc_fiber_num = 32; // Number of pre-allocated boost coroutine 44 | float heat_half_life_us = 1000; // Page Heat decay coefficient 45 | float hot_swap_watermark = 3; // Page Swap heat threshold 46 | 47 | int cm_qp_num = 2; // Number of QPs connected to other daemons 48 | }; 49 | 50 | class MasterOptions { 51 | public: 52 | std::string master_ip; 53 | uint16_t master_port = 31850; 54 | 55 | size_t max_cluster_mac_num = 1000; // Maximum number of connected nodes in the cluster 56 | int prealloc_fiber_num = 16; // Number of pre-allocated boost coroutine 57 | }; 58 | 59 | } // namespace rcmp -------------------------------------------------------------------------------- /include/rcmp.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "options.hpp" 7 | #include "status.hpp" 8 | 9 | namespace rcmp { 10 | 11 | using GAddr = uintptr_t; 12 | constexpr static GAddr GNullPtr = 0; 13 | 14 | /** 15 | * @brief Memory Pool Client Context 16 | */ 17 | class PoolContext; 18 | 19 | /** 20 | * @brief 21 | * Opens the memory pool. Returns a pointer to the memory pool context on success, otherwise returns 22 | * `nullptr`. The return object is generated by the new operation and should be closed and deleted 23 | * by `Close()`. 24 | * 25 | * @param options Memory Pool Initialisation Options 26 | * @return PoolContext* 27 | */ 28 | PoolContext *Open(ClientOptions options); 29 | 30 | /** 31 | * @brief Closing the Memory Pool Context 32 | * 33 | * @param pool_ctx 34 | */ 35 | void Close(PoolContext *pool_ctx); 36 | 37 | class PoolContext { 38 | private: 39 | /** 40 | * @brief `PoolContext` internal implementation 41 | */ 42 | class PoolContextImpl; 43 | 44 | public: 45 | PoolContext(ClientOptions options); 46 | ~PoolContext(); 47 | 48 | /** 49 | * @brief Memory request. The memory request policy allocates memory according to the proximity 50 | * of the cabinet where the client is located. A failed request returns `GNullPtr`. 51 | * 52 | * @param size 53 | * @return GAddr 54 | */ 55 | GAddr Alloc(size_t size); 56 | /** 57 | * @brief Read `gaddr` address, size `size` into `buf`. 58 | * 59 | * @param gaddr 60 | * @param size 61 | * @param buf 62 | * @return Status 63 | */ 64 | Status Read(GAddr gaddr, size_t size, void *buf); 65 | /** 66 | * @brief Write data from `buf` to `gaddr` address, size `size`. 67 | * 68 | * @param gaddr 69 | * @param size 70 | * @param buf 71 | * @return Status 72 | */ 73 | Status Write(GAddr gaddr, size_t size, const void *buf); 74 | /** 75 | * @brief Free memory 76 | * 77 | * @param gaddr 78 | * @param size 79 | * @return Status 80 | */ 81 | Status Free(GAddr gaddr, size_t size); 82 | 83 | /** 84 | * @brief Allocate contiguous memory pages. The memory request policy is allocated according to 85 | * the proximity of the cabinet where the client is located. A failed request returns 86 | * `GNullPtr`. 87 | * 88 | * @param size 89 | * @return GAddr 90 | */ 91 | GAddr AllocPage(size_t count); 92 | 93 | /** 94 | * @brief Free consecutive memory pages. 95 | * 96 | * @param gaddr 97 | * @return Status 98 | */ 99 | Status FreePage(GAddr gaddr, size_t count); 100 | 101 | /** 102 | * @brief CAS 8byte-aligned addr 103 | * 104 | * @param gaddr 105 | * @param expected 106 | * @param desired 107 | * @param ret 108 | * @return Status 109 | */ 110 | Status CAS(GAddr gaddr, uint64_t &expected, uint64_t desired, bool &ret); 111 | 112 | // /** 113 | // * @brief Write data from `buf` to `gaddr` address, size `size`. 114 | // * 115 | // * @param gaddr 116 | // * @param size 117 | // * @param buf 118 | // * @return Status 119 | // */ 120 | // Status WriteBatch(GAddr gaddr, size_t size, void *buf); 121 | 122 | const ClientOptions &GetOptions() const; 123 | 124 | /*********************** for test ***********************/ 125 | 126 | void __DumpStats(); 127 | 128 | void __ClearStats(); 129 | 130 | Status __TestDataSend1(int *array, size_t size); 131 | 132 | Status __TestDataSend2(int *array, size_t size); 133 | 134 | Status __NotifyPerf(); 135 | 136 | Status __StopPerf(); 137 | 138 | private: 139 | PoolContextImpl *m_impl; 140 | }; 141 | 142 | } // namespace rcmp -------------------------------------------------------------------------------- /include/status.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace rcmp { 6 | 7 | enum Status { 8 | ERROR = 0, 9 | OK = 1, 10 | }; 11 | 12 | inline static std::string GetStatusString(Status s) { 13 | switch (s) { 14 | case ERROR: 15 | return "ERROR"; 16 | case OK: 17 | return "OK"; 18 | default: 19 | return "Unkown Status"; 20 | } 21 | } 22 | 23 | } // namespace rcmp -------------------------------------------------------------------------------- /script/create_cxl_mem.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SIZE=$1 4 | 5 | numactl --membind=1 dd if=/dev/zero of=/dev/shm/cxlsim0 bs=1M count=$SIZE -------------------------------------------------------------------------------- /script/gen-perf-svg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | t=$(date "+%Y-%m-%d_%H-%M-%S") 4 | sudo perf script | stackcollapse-perf.pl | flamegraph.pl > perf_$t.svg -------------------------------------------------------------------------------- /script/rchfs_fio.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | user=`whoami` 4 | 5 | fio -filename=/home/$user/tmp_rchfs_fs/fio_test -direct=1 -iodepth=1 -thread -rw=randwrite -ioengine=psync -bs=16k -size=2G -numjobs=10 -runtime=60 -group_reporting -name=mytest -------------------------------------------------------------------------------- /script/run_client_shell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | user=`whoami` 4 | port=$((14800+$1)) 5 | 6 | sudo /home/$user/Rcmp/build/test/client_shell --client_ip=192.168.1.51 --client_port=$port --rack_id=$1 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=4294967296 -------------------------------------------------------------------------------- /script/run_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | user=$1 4 | passwd=$2 5 | CMD_DIR="/home/$user/Rcmp/build" 6 | SUDO="echo $passwd | sudo -S" 7 | 8 | IP_MN="192.168.200.51" 9 | PORT_MN=31850 10 | # IP_DNs=("192.168.200.51" "192.168.201.52" "192.168.201.33" "192.168.201.89") 11 | # IP_CNs=(${IP_DNs[0]} ${IP_DNs[1]} ${IP_DNs[2]} ${IP_DNs[3]}) 12 | IP_DNs=("192.168.200.51") 13 | IP_CNs=(${IP_DNs[0]}) 14 | 15 | kill_all() { 16 | echo "kill all" 17 | 18 | for ((i=0; i<${#IP_CNs[@]}; i++)) 19 | do 20 | sshpass -p $passwd ssh $user@${IP_CNs[i]} "echo $passwd | sudo -S killall rw" & 21 | done 22 | 23 | sleep 2 24 | 25 | for ((i=0; i<${#IP_DNs[@]}; i++)) 26 | do 27 | sshpass -p $passwd ssh $user@${IP_DNs[i]} "echo $passwd | sudo -S killall rcmp_daemon" & 28 | sleep 2 29 | done 30 | 31 | sshpass -p $passwd ssh $user@$IP_MN "echo $passwd | sudo -S killall rcmp_master" & 32 | 33 | sleep 2 34 | } 35 | 36 | test_run() { 37 | MN_CMD="echo $passwd | sudo -S $CMD_DIR/rcmp_master --master_ip=$IP_MN --master_port=$PORT_MN" 38 | 39 | echo "[exec] $MN_CMD" 40 | sshpass -p $passwd ssh $user@$IP_MN "echo $passwd | sudo -S $MN_CMD" & 41 | 42 | sleep 5 43 | 44 | for ((i=0; i<${#IP_DNs[@]}; i++)) 45 | do 46 | port=$(($PORT_MN+1+$i)) 47 | 48 | DN_CMD="echo $passwd | sudo -S numactl -N 0 $CMD_DIR/rcmp_daemon --master_ip=$IP_MN --master_port=$PORT_MN --daemon_ip=${IP_DNs[i]} --daemon_port=$port --rack_id=$i --cxl_devdax_path=/dev/shm/cxlsim$i --cxl_memory_size=$CXL_MEM_SZ --hot_decay=$HOT_DECAY --hot_swap_watermark=$WATERMARK" 49 | 50 | echo "[exec] $DN_CMD" 51 | sshpass -p $passwd ssh $user@${IP_DNs[i]} "echo $passwd | sudo -S $DN_CMD" & 52 | 53 | sleep 5 54 | done 55 | 56 | PIDS=() 57 | 58 | for ((i=0; i<${#IP_CNs[@]}; i++)) 59 | do 60 | port=$((14800+$i)) 61 | 62 | NODES=${#IP_CNs[@]} 63 | NID=$i 64 | 65 | CN_CMD="echo $passwd | sudo -S numactl -N 0 $CMD_DIR/test/rw --client_ip=${IP_CNs[i]} --client_port=$port --rack_id=$i --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=$CXL_MEM_SZ --iteration=$IT --payload_size=$payload --addr_range=$ADDR_RANGE --thread=$THREAD --thread_all=1 --no_node=$NODES --node_id=$NID --redis_server_ip=192.168.201.52:6379" 66 | 67 | echo "[exec] $CN_CMD" 68 | sshpass -p $passwd ssh $user@${IP_CNs[i]} "echo $passwd | sudo -S $CN_CMD" & 69 | PIDS+=($!) 70 | 71 | sleep 4 72 | done 73 | 74 | wait ${PIDS[@]} 75 | 76 | kill_all 77 | } 78 | 79 | kill_all 80 | 81 | $CMD_DIR/../script/scp-src.sh $user $passwd 82 | 83 | echo "Start ..." 84 | 85 | port=$((14800+0)) 86 | 87 | # reserve 2GB, data 8GB(include swap 100MB) 88 | CXL_MEM_SZ=$((10*1024*1024*1024)) 89 | ADDR_RANGE=$(((8*1024-100)*1024*1024)) 90 | HOT_DECAY=0.04 91 | WATERMARK=3 92 | THREAD=8 93 | IT=1000000 94 | SA=$((2*1024*1024)) 95 | 96 | for payload in 64 97 | do 98 | test_run 99 | done 100 | -------------------------------------------------------------------------------- /script/run_dht.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | user=`whoami` 4 | port=$((14800+$1)) 5 | 6 | sudo numactl -N 0 /home/$user/Rcmp/build/test/dht --client_ip=192.168.1.51 --client_port=$port --rack_id=$1 --cxl_devdax_path=/dev/shm/cxlsim$1 --cxl_memory_size=4294967296 --iteration=1000000 --read_ratio=100 --initor=$2 -------------------------------------------------------------------------------- /script/scp-src.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | user=$1 4 | passwd=$2 5 | 6 | CMD_DIR="/home/$user/Rcmp/build" 7 | 8 | for ip in 192.168.1.52 # 192.168.1.33 192.168.1.89 9 | do 10 | sshpass -p $passwd scp $CMD_DIR/test/rw $user@$ip:$CMD_DIR/test 11 | sshpass -p $passwd scp $CMD_DIR/librcmp.so $CMD_DIR/rcmp_daemon $user@$ip:$CMD_DIR/ 12 | done -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE LIB_SRCS *.cc) 2 | 3 | list(FILTER LIB_SRCS EXCLUDE REGEX "/rcmp.cc$") 4 | list(FILTER LIB_SRCS EXCLUDE REGEX "/daemon.cc$") 5 | list(FILTER LIB_SRCS EXCLUDE REGEX "/master.cc$") 6 | list(FILTER LIB_SRCS EXCLUDE REGEX "/test/") 7 | 8 | add_library(base STATIC ${LIB_SRCS}) 9 | target_link_libraries(base pthread erpc ibverbs numa rdmacm boost_coroutine Boost::fiber Boost::context) 10 | 11 | add_subdirectory(test) -------------------------------------------------------------------------------- /src/allocator.cc: -------------------------------------------------------------------------------- 1 | #include "allocator.hpp" 2 | 3 | #include "log.hpp" 4 | #include "utils.hpp" 5 | 6 | IDGenerator::id_t IDGenerator::Gen() { 7 | if (UNLIKELY(m_size + 1 > m_bset.size())) { 8 | return -1; 9 | } 10 | 11 | std::lock_guard guard(m_lck); 12 | 13 | size_t cur_tmp = m_gen_cur; 14 | do { 15 | size_t cur = m_gen_cur; 16 | m_gen_cur = (m_gen_cur + 1) % m_bset.size(); 17 | auto ref = m_bset[cur]; 18 | if (ref == false) { 19 | ref.flip(); 20 | m_size += 1; 21 | return cur; 22 | } 23 | } while (cur_tmp != m_gen_cur); 24 | 25 | return -1; 26 | } 27 | 28 | IDGenerator::id_t IDGenerator::MultiGen(size_t count) { 29 | if (count == 1) { 30 | return Gen(); 31 | } 32 | 33 | if (UNLIKELY(m_size + count > m_bset.size())) { 34 | return -1; 35 | } 36 | 37 | std::lock_guard guard(m_lck); 38 | 39 | id_t start = -1; 40 | size_t c = 0; 41 | size_t cur_tmp = m_gen_cur; 42 | 43 | do { 44 | size_t cur = m_gen_cur; 45 | m_gen_cur = (m_gen_cur + 1) % m_bset.size(); 46 | if (m_bset[cur] == false) { 47 | if (c == 0) { 48 | start = cur; 49 | } 50 | ++c; 51 | if (c == count) { 52 | for (size_t k = start; k < start + count; ++k) { 53 | m_bset[k].flip(); 54 | } 55 | m_size += count; 56 | return start; 57 | } 58 | } else { 59 | c = 0; 60 | } 61 | 62 | // Preventing Loopback 63 | if (m_gen_cur == 0) { 64 | c = 0; 65 | } 66 | 67 | } while (cur_tmp != m_gen_cur); 68 | 69 | return -1; 70 | } 71 | 72 | void IDGenerator::Recycle(IDGenerator::id_t id) { 73 | DLOG_ASSERT(m_bset[id] == true, "IDGenerator double recycle"); 74 | 75 | std::lock_guard guard(m_lck); 76 | 77 | m_bset[id].flip(); 78 | m_size -= 1; 79 | } 80 | 81 | void IDGenerator::MultiRecycle(id_t id, size_t count) { 82 | if (count == 1) { 83 | Recycle(id); 84 | return; 85 | } 86 | 87 | std::lock_guard guard(m_lck); 88 | 89 | while (count--) { 90 | DLOG_ASSERT(m_bset[id] == true, "IDGenerator double recycle"); 91 | m_bset[id].flip(); 92 | id++; 93 | } 94 | m_size -= count; 95 | } 96 | 97 | void IDGenerator::Expand(size_t n) { 98 | std::lock_guard guard(m_lck); 99 | m_bset.insert(m_bset.end(), n, false); 100 | } 101 | -------------------------------------------------------------------------------- /src/cxl.cc: -------------------------------------------------------------------------------- 1 | #include "cxl.hpp" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "config.hpp" 10 | #include "log.hpp" 11 | #include "utils.hpp" 12 | 13 | void *cxl_open_simulate(std::string file, size_t size, int *fd) { 14 | *fd = open(file.c_str(), O_RDWR | O_CREAT, 0666); 15 | DLOG_ASSERT(*fd != -1, "Failed to open cxl dev: %s", file.c_str()); 16 | 17 | void *addr = aligned_alloc(mem_region_aligned_size, size); 18 | free(addr); 19 | addr = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_LOCKED, *fd, 0); 20 | DLOG_ASSERT(addr != MAP_FAILED, "Failed to mmap cxl dev: %s", file.c_str()); 21 | return addr; 22 | } 23 | 24 | void cxl_close_simulate(int fd, CXLMemFormat &format) { 25 | munmap(const_cast(format.start_addr), format.super_block->total_size); 26 | close(fd); 27 | } 28 | 29 | void cxl_memory_init(CXLMemFormat &format, void *cxl_memory_addr, size_t size, 30 | size_t msgq_zone_size) { 31 | DLOG_ASSERT(size > mem_region_aligned_size, "The size of cxl memory needs larger than 2GB"); 32 | 33 | CXLSuperBlock *super_block = reinterpret_cast(cxl_memory_addr); 34 | super_block->total_size = size; 35 | super_block->msgq_zone_size = msgq_zone_size; 36 | super_block->reserve_heap_size = 37 | align_ceil(cxl_super_block_size + msgq_zone_size, mem_region_aligned_size) - 38 | (cxl_super_block_size + msgq_zone_size); 39 | super_block->page_data_zone_size = align_floor( 40 | size - cxl_super_block_size - msgq_zone_size - super_block->reserve_heap_size, page_size); 41 | 42 | cxl_memory_open(format, cxl_memory_addr); 43 | } 44 | 45 | void cxl_memory_open(CXLMemFormat &format, void *cxl_memory_addr) { 46 | format.start_addr = cxl_memory_addr; 47 | format.super_block = reinterpret_cast(cxl_memory_addr); 48 | format.msgq_zone_start_addr = reinterpret_cast( 49 | (reinterpret_cast(cxl_memory_addr) + cxl_super_block_size)); 50 | format.reserve_zone_addr = 51 | reinterpret_cast((reinterpret_cast(format.msgq_zone_start_addr) + 52 | format.super_block->msgq_zone_size)); 53 | format.page_data_start_addr = 54 | reinterpret_cast((reinterpret_cast(format.reserve_zone_addr) + 55 | format.super_block->reserve_heap_size)); 56 | format.end_addr = 57 | reinterpret_cast((reinterpret_cast(format.page_data_start_addr) + 58 | format.super_block->page_data_zone_size)); 59 | 60 | DLOG("super_block: %p", format.super_block); 61 | DLOG("msgq_zone_start_addr: %p", format.msgq_zone_start_addr); 62 | DLOG("reserve_zone_addr: %p", format.reserve_zone_addr); 63 | DLOG("page_data_start_addr: %p", format.page_data_start_addr); 64 | DLOG("end_addr: %p", format.end_addr); 65 | } -------------------------------------------------------------------------------- /src/fiber_poll.cc: -------------------------------------------------------------------------------- 1 | // Copyright Nat Goodspeed 2014. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | 6 | #include "fiber_pool.hpp" 7 | 8 | //[priority_props 9 | priority_props::priority_props(boost::fibers::context* ctx) 10 | : fiber_properties(ctx), /*< Your subclass constructor must accept a 11 | [^[class_link context]*] and pass it to 12 | the `fiber_properties` constructor. >*/ 13 | priority_(1) {} 14 | 15 | int priority_props::get_priority() const { 16 | return priority_; /*< Provide read access methods at your own discretion. >*/ 17 | } 18 | 19 | // Call this method to alter priority, because we must notify 20 | // priority_scheduler of any change. 21 | void priority_props::set_priority(int p) { /*< 22 | It's important to call `notify()` on any 23 | change in a property that can affect the 24 | scheduler's behavior. Therefore, such 25 | modifications should only be performed 26 | through an access method. >*/ 27 | // Of course, it's only worth reshuffling the queue and all if we're 28 | // actually changing the priority. 29 | if (p != priority_) { 30 | priority_ = p; 31 | notify(); 32 | } 33 | } 34 | 35 | void priority_props::set_low_priority() { set_priority(1); } 36 | void priority_props::set_high_priority() { set_priority(100); } 37 | //] 38 | 39 | //[priority_scheduler 40 | 41 | priority_scheduler::priority_scheduler() : rqueue_high_(), rqueue_low_() {} 42 | 43 | // For a subclass of algorithm_with_properties<>, it's important to 44 | // override the correct awakened() overload. 45 | /*<< You must override the [member_link algorithm_with_properties..awakened] 46 | method. This is how your scheduler receives notification of a 47 | fiber that has become ready to run. >>*/ 48 | void priority_scheduler::awakened(boost::fibers::context* ctx, priority_props& props) noexcept { 49 | int ctx_priority = props.get_priority(); /*< `props` is the instance of 50 | priority_props associated 51 | with the passed fiber `ctx`. >*/ 52 | // With this scheduler, fibers with higher priority values are 53 | // preferred over fibers with lower priority values. But fibers with 54 | // equal priority values are processed in round-robin fashion. So when 55 | // we're handed a new context*, put it at the end of the fibers 56 | // with that same priority. In other words: search for the first fiber 57 | // in the queue with LOWER priority, and insert before that one. 58 | if (ctx_priority == 1) { 59 | rqueue_low_.push_back(*ctx); 60 | } else { 61 | rqueue_high_.push_back(*ctx); 62 | } 63 | } 64 | 65 | /*<< You must override the [member_link algorithm_with_properties..pick_next] 66 | method. This is how your scheduler actually advises the fiber manager 67 | of the next fiber to run. >>*/ 68 | boost::fibers::context* priority_scheduler::pick_next() noexcept { 69 | boost::fibers::context* ctx; 70 | if (!rqueue_high_.empty()) { 71 | ctx = &rqueue_high_.front(); 72 | rqueue_high_.pop_front(); 73 | } else if (!rqueue_low_.empty()) { 74 | ctx = &rqueue_low_.front(); 75 | rqueue_low_.pop_front(); 76 | } else { 77 | ctx = nullptr; 78 | } 79 | return ctx; 80 | } 81 | 82 | /*<< You must override [member_link algorithm_with_properties..has_ready_fibers] 83 | to inform the fiber manager of the state of your ready queue. >>*/ 84 | bool priority_scheduler::has_ready_fibers() const noexcept { 85 | return !rqueue_high_.empty() || !rqueue_low_.empty(); 86 | } 87 | 88 | /*<< Overriding [member_link algorithm_with_properties..property_change] 89 | is optional. This override handles the case in which the running 90 | fiber changes the priority of another ready fiber: a fiber already in 91 | our queue. In that case, move the updated fiber within the queue. >>*/ 92 | void priority_scheduler::property_change(boost::fibers::context* ctx, 93 | priority_props& props) noexcept { 94 | // Although our priority_props class defines multiple properties, only 95 | // one of them (priority) actually calls notify() when changed. The 96 | // point of a property_change() override is to reshuffle the ready 97 | // queue according to the updated priority value. 98 | 99 | // 'ctx' might not be in our queue at all, if caller is changing the 100 | // priority of (say) the running fiber. If it's not there, no need to 101 | // move it: we'll handle it next time it hits awakened(). 102 | if (!ctx->ready_is_linked()) { /*< 103 | Your `property_change()` override must be able to 104 | handle the case in which the passed `ctx` is not in 105 | your ready queue. It might be running, or it might be 106 | blocked. >*/ 107 | //<- 108 | // hopefully user will distinguish this case by noticing that 109 | // the fiber with which we were called does not appear in the 110 | // ready queue at all 111 | //-> 112 | return; 113 | } 114 | 115 | // Found ctx: unlink it 116 | ctx->ready_unlink(); 117 | 118 | // Here we know that ctx was in our ready queue, but we've unlinked 119 | // it. We happen to have a method that will (re-)add a context* to the 120 | // right place in the ready queue. 121 | awakened(ctx, props); 122 | } 123 | 124 | void priority_scheduler::suspend_until( 125 | std::chrono::steady_clock::time_point const& time_point) noexcept { 126 | if ((std::chrono::steady_clock::time_point::max)() == time_point) { 127 | std::unique_lock lk(mtx_); 128 | cnd_.wait(lk, [this]() { return flag_; }); 129 | flag_ = false; 130 | } else { 131 | std::unique_lock lk(mtx_); 132 | cnd_.wait_until(lk, time_point, [this]() { return flag_; }); 133 | flag_ = false; 134 | } 135 | } 136 | 137 | void priority_scheduler::notify() noexcept { 138 | { 139 | std::unique_lock lk(mtx_); 140 | flag_ = true; 141 | } 142 | cnd_.notify_all(); 143 | } 144 | 145 | FiberPool::~FiberPool() { EraseAll(); } 146 | 147 | size_t FiberPool::FiberSize() const { return fibers_.size(); } 148 | 149 | void FiberPool::AddFiber(size_t n) { AddFiber(fr_queue_, n); } 150 | 151 | void FiberPool::AddFiber(WorkerFiberTaskQueue& my_queue, size_t n) { 152 | std::unique_lock lock(my_queue.fiber_mutex_); 153 | for (std::size_t i = 0; i < n; ++i) { 154 | auto fiber = boost::fibers::fiber([this, &my_queue] { 155 | while (!fiber_stop_) { 156 | std::function task; 157 | { 158 | std::unique_lock lock(my_queue.fiber_mutex_); 159 | my_queue.fiber_cond_.wait(lock, [this, &my_queue] { 160 | return !my_queue.fiber_tasks_.empty() || fiber_stop_; 161 | }); 162 | if (fiber_stop_) return; 163 | if (my_queue.fiber_tasks_.empty()) continue; 164 | task = std::move(my_queue.fiber_tasks_.front()); 165 | my_queue.fiber_tasks_.pop(); 166 | } 167 | task(); 168 | } 169 | }); 170 | fiber.properties().set_low_priority(); 171 | fibers_.emplace_back(std::move(fiber)); 172 | } 173 | } 174 | 175 | void FiberPool::EraseAll() { 176 | { 177 | std::unique_lock lock(fr_queue_.fiber_mutex_); 178 | fiber_stop_ = true; 179 | } 180 | fr_queue_.fiber_cond_.notify_all(); 181 | 182 | for (auto& fiber : fibers_) { 183 | fiber.join(); 184 | } 185 | fibers_.clear(); 186 | } 187 | -------------------------------------------------------------------------------- /src/include/allocator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "lock.hpp" 4 | #include "log.hpp" 5 | #include "utils.hpp" 6 | 7 | template 8 | class ObjectPoolAllocator { 9 | public: 10 | using value_type = T; 11 | 12 | ObjectPoolAllocator() = default; 13 | 14 | template 15 | ObjectPoolAllocator(const ObjectPoolAllocator&) {} 16 | 17 | T* allocate(size_t n) { 18 | DLOG_ASSERT(n == 1, "Must allocate 1 element"); 19 | 20 | if (pool.empty()) { 21 | return static_cast(::operator new(sizeof(T))); 22 | } else { 23 | T* obj = pool.back(); 24 | pool.pop_back(); 25 | return obj; 26 | } 27 | } 28 | 29 | void deallocate(T* p, size_t n) { pool.push_back(p); } 30 | 31 | private: 32 | class raw_ptr_vector : public std::vector { 33 | public: 34 | ~raw_ptr_vector() { 35 | for (auto* p : *this) { 36 | ::operator delete(p); 37 | } 38 | } 39 | }; 40 | 41 | static thread_local raw_ptr_vector pool; 42 | }; 43 | 44 | template 45 | thread_local typename ObjectPoolAllocator::raw_ptr_vector ObjectPoolAllocator::pool; 46 | 47 | class IDGenerator { 48 | public: 49 | using id_t = uint64_t; 50 | 51 | IDGenerator() : m_gen_cur(0), m_size(0) {} 52 | 53 | bool empty() const { return size() == 0; } 54 | 55 | bool full() const { return size() == capacity(); } 56 | 57 | size_t size() const { return m_size; } 58 | 59 | size_t capacity() const { return m_bset.size(); } 60 | 61 | id_t Gen(); 62 | 63 | id_t MultiGen(size_t count); 64 | 65 | void Recycle(id_t id); 66 | 67 | void MultiRecycle(id_t id, size_t count); 68 | 69 | void Expand(size_t n); 70 | 71 | private: 72 | size_t m_size; 73 | size_t m_gen_cur; 74 | std::vector m_bset; 75 | Mutex m_lck; 76 | }; 77 | 78 | template 79 | class SingleAllocator : private IDGenerator { 80 | public: 81 | SingleAllocator(size_t total_size) { Expand(total_size / UNIT_SZ); } 82 | 83 | uintptr_t allocate(size_t n) { 84 | DLOG_ASSERT(n == 1, "Must allocate 1 element"); 85 | IDGenerator::id_t id = Gen(); 86 | if (UNLIKELY(id == -1)) { 87 | return -1; 88 | } 89 | return id * UNIT_SZ; 90 | } 91 | 92 | void deallocate(uintptr_t ptr, size_t n) { Recycle(ptr / UNIT_SZ); } 93 | }; 94 | 95 | template 96 | class RingArena { 97 | public: 98 | RingArena() { 99 | for (size_t i = 0; i < BucketNum; ++i) { 100 | m_bs[i].pv.raw = 0; 101 | } 102 | } 103 | 104 | const void* base() const { return m_bs; } 105 | 106 | void* allocate(size_t s) { 107 | DLOG_ASSERT(s <= block_size, "Can't allocate large than block size: %lu, %lu", s, 108 | block_size); 109 | 110 | thread_local uint8_t b_cur = (reinterpret_cast(&b_cur) >> 5) % BucketNum; 111 | b_cur = (b_cur + 1) % BucketNum; 112 | uint8_t bc = b_cur; 113 | do { 114 | Block& b = m_bs[bc]; 115 | atomic_po_val_t opv = b.pv.load(std::memory_order_acquire), npv; 116 | 117 | while (1) { 118 | npv = opv; 119 | npv.pos += s; 120 | if (npv.pos < block_size) { 121 | ++npv.cnt; 122 | 123 | if (b.pv.compare_exchange_weak(opv, npv, std::memory_order_acquire, 124 | std::memory_order_acquire)) { 125 | return b.b + opv.pos; 126 | } 127 | 128 | } else { 129 | bc = (bc + 1) % BucketNum; 130 | break; 131 | } 132 | } 133 | 134 | } while (bc != b_cur); 135 | 136 | return nullptr; 137 | } 138 | 139 | void deallocate(void* p, size_t n) { 140 | uint8_t bc = div_floor(reinterpret_cast(p) - reinterpret_cast(m_bs), 141 | sizeof(Block)); 142 | DLOG_ASSERT(bc < BucketNum, "Out Of Memory"); 143 | 144 | Block& b = m_bs[bc]; 145 | atomic_po_val_t opv = b.pv.load(std::memory_order_acquire), npv; 146 | do { 147 | DLOG_ASSERT(opv.cnt != 0); 148 | npv = opv; 149 | if ((--npv.cnt) == 0) { 150 | npv.pos = 0; 151 | } 152 | } while (!b.pv.compare_exchange_weak(opv, npv, std::memory_order_release, 153 | std::memory_order_acquire)); 154 | } 155 | 156 | private: 157 | static constexpr size_t block_size = SZ / BucketNum; 158 | 159 | struct Block { 160 | atomic_po_val_t pv; 161 | uint8_t b[block_size]; 162 | }; 163 | 164 | Block m_bs[BucketNum]; 165 | }; 166 | 167 | template 168 | class ObjectPool { 169 | public: 170 | T* pop() { 171 | if (pool.empty()) { 172 | return new T(); 173 | } else { 174 | T* obj = pool.back(); 175 | pool.pop_back(); 176 | obj->clear(); 177 | return obj; 178 | } 179 | } 180 | 181 | void put(T* p) { pool.push_back(p); } 182 | 183 | private: 184 | class raw_ptr_vector : public std::vector { 185 | public: 186 | ~raw_ptr_vector() { 187 | for (auto* p : *this) { 188 | delete (p); 189 | } 190 | } 191 | }; 192 | 193 | static thread_local raw_ptr_vector pool; 194 | }; 195 | 196 | template 197 | thread_local typename ObjectPool::raw_ptr_vector ObjectPool::pool; -------------------------------------------------------------------------------- /src/include/common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "config.hpp" 6 | #include "rcmp.hpp" 7 | 8 | using page_id_t = uint64_t; 9 | using offset_t = uint64_t; 10 | using mac_id_t = uint32_t; 11 | using rack_id_t = uint32_t; 12 | 13 | enum SystemRole : uint8_t { 14 | MN = 1, 15 | CN = 2, 16 | CXL_CN = 3, 17 | DAEMON = 4, 18 | CXL_DAEMON = 5, 19 | }; 20 | 21 | constexpr static mac_id_t master_id = 0; 22 | constexpr static page_id_t invalid_page_id = -1; 23 | 24 | union GAddrCombineUnion { 25 | struct { 26 | offset_t off : offset_bits; 27 | page_id_t p : page_id_bits; 28 | }; 29 | rcmp::GAddr gaddr; 30 | }; 31 | 32 | inline static page_id_t GetPageID(rcmp::GAddr gaddr) { 33 | GAddrCombineUnion u; 34 | u.gaddr = gaddr; 35 | return u.p; 36 | } 37 | inline static offset_t GetPageOffset(rcmp::GAddr gaddr) { 38 | GAddrCombineUnion u; 39 | u.gaddr = gaddr; 40 | return u.off; 41 | } 42 | inline static rcmp::GAddr GetGAddr(page_id_t page_id, offset_t offset) { 43 | GAddrCombineUnion u; 44 | u.p = page_id; 45 | u.off = offset; 46 | return u.gaddr; 47 | } 48 | 49 | struct RDMARCConnectParam { 50 | SystemRole role; 51 | mac_id_t mac_id; 52 | }; -------------------------------------------------------------------------------- /src/include/concurrent_hashmap.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "config.hpp" 7 | #include "lock.hpp" 8 | #include "robin_hood.h" 9 | #include "utils.hpp" 10 | 11 | template > 13 | class ConcurrentHashMap { 14 | constexpr static const size_t BucketNum = BUCKET_NUM; 15 | 16 | struct SliceHash { 17 | size_t operator()(K key) const { return _Hash()(key) / BUCKET_NUM; } 18 | }; 19 | 20 | using HashTable = std::unordered_map; 21 | 22 | public: 23 | /** 24 | * @brief 25 | * @warning Rehashing causes the iterator to fail, but the change cannot be sensed. The iterator 26 | * should be updated in a timely manner during a large number of `insert()`, or `at()` should be 27 | * used. 28 | */ 29 | class iterator { 30 | public: 31 | std::pair* operator->() { return it.operator->(); } 32 | bool operator==(const iterator& other) { return hidx == other.hidx && it == other.it; } 33 | bool operator!=(const iterator& other) { return hidx != other.hidx || it != other.it; } 34 | 35 | private: 36 | friend class ConcurrentHashMap; 37 | 38 | iterator(int hidx, typename HashTable::iterator it) : hidx(hidx), it(it) {} 39 | 40 | int hidx; 41 | typename HashTable::iterator it; 42 | }; 43 | 44 | const iterator end() { return {0, m_shards[0].m_map.end()}; } 45 | 46 | bool empty() const { 47 | for (size_t i = 0; i < BucketNum; ++i) { 48 | if (!m_shards[i].m_map.empty()) { 49 | return false; 50 | } 51 | } 52 | return true; 53 | } 54 | 55 | size_t size() const { 56 | size_t count = 0; 57 | for (size_t i = 0; i < BucketNum; ++i) { 58 | count += m_shards[i].m_map.size(); 59 | } 60 | return count; 61 | } 62 | 63 | std::pair insert(K key, V val) { 64 | int index = hash(key); 65 | auto& shard = m_shards[index]; 66 | auto& map = shard.m_map; 67 | 68 | std::unique_lock<__SharedMutex> guard(shard.m_lock); 69 | auto p = map.emplace(key, val); 70 | return {{index, p.first}, p.second}; 71 | } 72 | 73 | iterator find(K key) { 74 | int index = hash(key); 75 | auto& shard = m_shards[index]; 76 | auto& map = shard.m_map; 77 | 78 | std::shared_lock<__SharedMutex> guard(shard.m_lock); 79 | auto it = map.find(key); 80 | if (it != map.end()) { 81 | return {index, it}; 82 | } 83 | return end(); 84 | } 85 | 86 | V& at(K key) { 87 | int index = hash(key); 88 | auto& shard = m_shards[index]; 89 | auto& map = shard.m_map; 90 | 91 | std::shared_lock<__SharedMutex> guard(shard.m_lock); 92 | return map.at(key); 93 | } 94 | 95 | V& operator[](K key) { return at(key); } 96 | 97 | /** 98 | * @brief Finds an element. If it does not exist, call `cotr_fn()` to insert a new element 99 | * 100 | * @tparam ConFn 101 | * @param key 102 | * @param cotr_fn 103 | * @return std::pair 104 | */ 105 | template 106 | std::pair find_or_emplace(K key, ConFn&& ctor_fn) { 107 | auto iter = find(key); 108 | if (iter != end()) { 109 | return {iter, false}; 110 | } 111 | 112 | int index = hash(key); 113 | auto& shard = m_shards[index]; 114 | auto& map = shard.m_map; 115 | 116 | std::unique_lock<__SharedMutex> guard(shard.m_lock); 117 | auto it = map.find(key); 118 | if (it != map.end()) { 119 | return {{index, it}, false}; 120 | } 121 | 122 | auto p = map.emplace(key, std::move(ctor_fn())); 123 | return {{index, p.first}, p.second}; 124 | } 125 | 126 | void erase(K key) { 127 | auto it = find(key); 128 | erase(it); 129 | } 130 | 131 | void erase(iterator it) { 132 | if (it == end()) return; 133 | 134 | auto& shard = m_shards[it.hidx]; 135 | auto& map = shard.m_map; 136 | 137 | std::unique_lock<__SharedMutex> guard(shard.m_lock); 138 | map.erase(it.it); 139 | } 140 | 141 | /** 142 | * @tparam F 143 | * @param f bool(std::pair &),Returning false means the traversal is terminated. 144 | */ 145 | template 146 | void foreach_all(F&& f) { 147 | for (size_t i = 0; i < BucketNum; ++i) { 148 | auto& shard = m_shards[i]; 149 | auto& map = shard.m_map; 150 | 151 | std::shared_lock<__SharedMutex> guard(shard.m_lock); 152 | for (auto& p : map) { 153 | if (!f(p)) { 154 | return; 155 | } 156 | } 157 | } 158 | } 159 | 160 | private: 161 | struct CACHE_ALIGN Shard { 162 | __SharedMutex m_lock; 163 | HashTable m_map; 164 | }; 165 | 166 | Shard m_shards[BucketNum]; 167 | 168 | static size_t hash(K key) { return std::hash()(key) % BucketNum; } 169 | }; 170 | 171 | template > 173 | class RandomAccessMap { 174 | public: 175 | class iterator { 176 | public: 177 | std::pair* operator->() { 178 | if (m->size() < index || m->values_[index].first != key) { 179 | update(); 180 | } 181 | return &m->values_[index]; 182 | } 183 | std::pair& operator*() { 184 | if (m->size() < index || m->values_[index].first != key) { 185 | update(); 186 | } 187 | return m->values_[index]; 188 | } 189 | bool operator==(const iterator& other) { return m == other.m && key == other.key; } 190 | bool operator!=(const iterator& other) { return m != other.m || key != other.key; } 191 | 192 | private: 193 | friend class RandomAccessMap; 194 | 195 | iterator(int index, K key, RandomAccessMap* m) : index(index), key(key), m(m) {} 196 | 197 | void update() { 198 | auto new_it = m->find(key); 199 | index = new_it.index; 200 | key = new_it.key; 201 | m = new_it.m; 202 | } 203 | 204 | int index; 205 | K key; 206 | RandomAccessMap* m; 207 | }; 208 | 209 | bool empty() const { return values_.empty(); } 210 | size_t size() const { return values_.size(); } 211 | 212 | const iterator end() { return iterator(-1, K(), this); } 213 | 214 | std::pair emplace(const K& key, const V& value) { 215 | std::unique_lock<__SharedMutex> guard(lock_); 216 | auto it = key_to_index_.find(key); 217 | if (it != key_to_index_.end()) { 218 | return {iterator(it->second, key, this), false}; 219 | } 220 | 221 | int index = key_to_index_[key] = values_.size() - 1; 222 | values_.push_back(value); 223 | return {iterator(index, key, this), true}; 224 | } 225 | 226 | iterator find(const K& key) { 227 | std::shared_lock<__SharedMutex> guard(lock_); 228 | auto it = key_to_index_.find(key); 229 | if (it == key_to_index_.end()) { 230 | return end(); 231 | } 232 | return iterator(it->second, key, this); 233 | } 234 | 235 | template 236 | std::pair find_or_emplace(const K& key, ConFn&& ctor_fn) { 237 | auto iter = find(key); 238 | if (iter != end()) { 239 | return {iter, false}; 240 | } 241 | 242 | std::unique_lock<__SharedMutex> guard(lock_); 243 | auto it = key_to_index_.find(key); 244 | if (it != key_to_index_.end()) { 245 | return {iterator(it->second, key, this), false}; 246 | } 247 | 248 | int index = values_.size(); 249 | key_to_index_.emplace(key, index); 250 | 251 | values_.push_back({key, std::move(ctor_fn())}); 252 | return {iterator(index, key, this), true}; 253 | } 254 | 255 | V& at(const K& key) { 256 | std::shared_lock<__SharedMutex> guard(lock_); 257 | return key_to_index_.find(key)->second; 258 | } 259 | 260 | void erase(iterator it) { 261 | if (it == end()) return; 262 | 263 | std::unique_lock<__SharedMutex> guard(lock_); 264 | 265 | auto kit = key_to_index_.find(it.key); 266 | int index = kit->second; 267 | key_to_index_.erase(kit); 268 | values_[index] = std::move(values_.back()); 269 | values_.pop_back(); 270 | if (index < values_.size()) { 271 | key_to_index_[values_[index].first] = index; 272 | } 273 | } 274 | 275 | template 276 | std::vector> getRandomN(Genrator g, size_t n, F &&filter_fn) { 277 | DLOG_ASSERT(n <= values_.size()); 278 | 279 | std::shared_lock<__SharedMutex> guard(lock_); 280 | 281 | std::vector> result; 282 | std::vector indices(values_.size(), false); 283 | std::uniform_int_distribution<> dis(0, values_.size() - 1); 284 | while (result.size() < n) { 285 | int index = dis(g); 286 | if (!indices[index]) { 287 | indices[index] = true; 288 | if (filter_fn(values_[index])) { 289 | result.push_back(values_[index]); 290 | } 291 | } 292 | } 293 | return result; 294 | } 295 | 296 | private: 297 | __SharedMutex lock_; 298 | std::vector> values_; 299 | std::unordered_map key_to_index_; 300 | }; -------------------------------------------------------------------------------- /src/include/concurrent_queue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "utils.hpp" 8 | 9 | enum ConcurrentQueueProducerMode { SP, MP }; 10 | enum ConcurrentQueueConsumerMode { SC, MC }; 11 | 12 | template 14 | class ConcurrentQueue; 15 | 16 | template 17 | class ConcurrentQueue { 18 | public: 19 | ConcurrentQueue() : m_head(0), m_tail(0) {} 20 | ~ConcurrentQueue() = default; 21 | 22 | size_t capacity() const { return SZ; } 23 | 24 | bool TryEnqueue(T n) { 25 | uint32_t tail = m_tail.load(std::memory_order_relaxed); 26 | uint32_t next_tail = (tail + 1) % SZ; 27 | if (UNLIKELY(next_tail == m_head.load(std::memory_order_acquire))) { 28 | return false; // full 29 | } 30 | m_data[tail] = std::move(n); 31 | m_tail.store(next_tail, std::memory_order_release); 32 | return true; 33 | } 34 | 35 | bool TryDequeue(T *n) { 36 | uint32_t head = m_head.load(std::memory_order_relaxed); 37 | if (UNLIKELY(head == m_tail.load(std::memory_order_acquire))) { 38 | return false; // empty 39 | } 40 | *n = std::move(m_data[head]); 41 | m_head.store((head + 1) % SZ, std::memory_order_release); 42 | return true; 43 | } 44 | 45 | private: 46 | std::atomic m_head; 47 | T m_data[SZ]; 48 | std::atomic m_tail; 49 | }; 50 | 51 | template 52 | class ConcurrentQueue { 53 | public: 54 | ConcurrentQueue() { 55 | m_prod_head.raw = 0; 56 | m_prod_tail.raw = 0; 57 | m_cons_tail = 0; 58 | } 59 | 60 | size_t capacity() const { return SZ; } 61 | 62 | void ForceEnqueue(T n) { 63 | atomic_po_val_t h, oh, nh; 64 | 65 | oh = m_prod_head.fetch_add_both(1, 1, std::memory_order_acquire); 66 | while (UNLIKELY(oh.pos - m_cons_tail.load(std::memory_order_relaxed) >= SZ)) { 67 | h = m_prod_tail.load(std::memory_order_acquire); 68 | while (h.cnt == oh.cnt && 69 | !m_prod_tail.compare_exchange_weak(h, oh, std::memory_order_release, 70 | std::memory_order_acquire)) { 71 | } 72 | } 73 | 74 | m_data[oh.pos % SZ] = std::move(n); 75 | 76 | oh = m_prod_tail.load(std::memory_order_acquire); 77 | do { 78 | h = m_prod_head.load(std::memory_order_relaxed); 79 | nh = oh; 80 | if ((++nh.cnt) == h.cnt) nh.pos = h.pos; 81 | } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release, 82 | std::memory_order_acquire)); 83 | } 84 | 85 | bool TryEnqueue(T n) { 86 | atomic_po_val_t h, oh, nh; 87 | 88 | oh = m_prod_head.load(std::memory_order_acquire); 89 | do { 90 | if (UNLIKELY(oh.pos - m_cons_tail.load(std::memory_order_relaxed) >= SZ)) { 91 | return false; 92 | } 93 | nh.pos = oh.pos + 1; 94 | nh.cnt = oh.cnt + 1; 95 | } while (!m_prod_head.compare_exchange_weak(oh, nh, std::memory_order_acquire, 96 | std::memory_order_acquire)); 97 | 98 | m_data[oh.pos % SZ] = std::move(n); 99 | 100 | oh = m_prod_tail.load(std::memory_order_acquire); 101 | do { 102 | h = m_prod_head.load(std::memory_order_relaxed); 103 | nh = oh; 104 | if ((++nh.cnt) == h.cnt) nh.pos = h.pos; 105 | } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release, 106 | std::memory_order_acquire)); 107 | 108 | return true; 109 | } 110 | 111 | bool TryDequeue(T *n) { return TryDequeue(n, n + 1) == 1; } 112 | 113 | template 114 | uint32_t TryDequeue(Iter first, Iter last) { 115 | uint32_t l = 0; 116 | uint32_t count = std::distance(first, last); 117 | uint32_t ot = m_cons_tail.load(std::memory_order_relaxed); 118 | l = std::min(count, m_prod_tail.load(std::memory_order_relaxed).pos - ot); 119 | if (l == 0) { 120 | return 0; 121 | } 122 | 123 | for (uint32_t i = 0; i < l; ++i) { 124 | *(first++) = std::move(m_data[(ot + i) % SZ]); 125 | } 126 | 127 | m_cons_tail.store(ot + l, std::memory_order_release); 128 | return l; 129 | } 130 | 131 | private: 132 | atomic_po_val_t m_prod_head; 133 | atomic_po_val_t m_prod_tail; 134 | 135 | T m_data[SZ]; 136 | 137 | std::atomic m_cons_tail; 138 | }; 139 | 140 | template 141 | class ConcurrentQueue { 142 | public: 143 | ConcurrentQueue() { 144 | m_prod_head.raw = 0; 145 | m_prod_tail.raw = 0; 146 | m_cons_head.raw = 0; 147 | m_cons_tail.raw = 0; 148 | } 149 | ~ConcurrentQueue() = default; 150 | 151 | size_t capacity() const { return SZ; } 152 | 153 | void ForceEnqueue(T n) { 154 | atomic_po_val_t h, oh, nh; 155 | 156 | oh = m_prod_head.fetch_add_both(1, 1, std::memory_order_acquire); 157 | while (UNLIKELY(oh.pos >= m_cons_tail.load(std::memory_order_relaxed).pos + SZ)) { 158 | h = m_prod_tail.load(std::memory_order_acquire); 159 | while (h.cnt == oh.cnt && 160 | !m_prod_tail.compare_exchange_weak(h, oh, std::memory_order_release, 161 | std::memory_order_acquire)) { 162 | } 163 | } 164 | 165 | m_data[oh.pos % SZ] = std::move(n); 166 | 167 | oh = m_prod_tail.load(std::memory_order_acquire); 168 | do { 169 | h = m_prod_head.load(std::memory_order_relaxed); 170 | nh = oh; 171 | if ((++nh.cnt) == h.cnt) nh.pos = h.pos; 172 | } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release, 173 | std::memory_order_acquire)); 174 | } 175 | 176 | bool TryEnqueue(T n) { 177 | atomic_po_val_t h, oh, nh; 178 | 179 | oh = m_prod_head.load(std::memory_order_acquire); 180 | do { 181 | if (UNLIKELY(oh.pos - m_cons_tail.load(std::memory_order_relaxed).pos >= SZ)) { 182 | return false; 183 | } 184 | nh.pos = oh.pos + 1; 185 | nh.cnt = oh.cnt + 1; 186 | } while (!m_prod_head.compare_exchange_weak(oh, nh, std::memory_order_acquire, 187 | std::memory_order_acquire)); 188 | 189 | m_data[oh.pos % SZ] = std::move(n); 190 | 191 | oh = m_prod_tail.load(std::memory_order_acquire); 192 | do { 193 | h = m_prod_head.load(std::memory_order_relaxed); 194 | nh = oh; 195 | if ((++nh.cnt) == h.cnt) nh.pos = h.pos; 196 | } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release, 197 | std::memory_order_acquire)); 198 | 199 | return true; 200 | } 201 | 202 | bool TryDequeue(T *n) { return TryDequeue(n, n + 1) == 1; } 203 | 204 | template 205 | uint32_t TryDequeue(Iter first, Iter last) { 206 | atomic_po_val_t t, ot, nt; 207 | uint32_t l = 0; 208 | uint32_t count = std::distance(first, last); 209 | 210 | ot = m_cons_head.load(std::memory_order_acquire); 211 | do { 212 | l = std::min(count, m_prod_tail.load(std::memory_order_relaxed).pos - ot.pos); 213 | if (l == 0) { 214 | return 0; 215 | } 216 | nt.pos = ot.pos + l; 217 | nt.cnt = ot.cnt + 1; 218 | } while (!m_cons_head.compare_exchange_weak(ot, nt, std::memory_order_acquire, 219 | std::memory_order_acquire)); 220 | 221 | for (uint32_t i = 0; i < l; ++i) { 222 | *(first++) = std::move(m_data[(ot.pos + i) % SZ]); 223 | } 224 | 225 | ot = m_cons_tail.load(std::memory_order_acquire); 226 | do { 227 | t = m_cons_head.load(std::memory_order_relaxed); 228 | nt = ot; 229 | if ((++nt.cnt) == t.cnt) nt.pos = t.pos; 230 | } while (!m_cons_tail.compare_exchange_weak(ot, nt, std::memory_order_release, 231 | std::memory_order_acquire)); 232 | 233 | return true; 234 | } 235 | 236 | private: 237 | atomic_po_val_t m_prod_head; 238 | atomic_po_val_t m_prod_tail; 239 | 240 | T m_data[SZ]; 241 | 242 | atomic_po_val_t m_cons_head; 243 | atomic_po_val_t m_cons_tail; 244 | }; 245 | -------------------------------------------------------------------------------- /src/include/config.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "rcmp.hpp" 6 | 7 | #define MSGQ_SINGLE_FIFO_ON 0 8 | 9 | constexpr static size_t page_size = 4ul << 10; 10 | constexpr static size_t cache_line_size = 64; 11 | constexpr static size_t min_slab_size = 64; 12 | constexpr static size_t mem_region_aligned_size = 2ul << 30; 13 | 14 | constexpr static size_t offset_bits = __builtin_ffsl(page_size) - 1; 15 | constexpr static size_t page_id_bits = sizeof(rcmp::GAddr) * 8 - offset_bits; 16 | 17 | constexpr static size_t msgq_ring_buf_len = 16ul << 20; 18 | constexpr static size_t msgq_ring_depth = 256; 19 | constexpr static size_t write_batch_buffer_size = 64ul << 20; 20 | constexpr static size_t write_batch_buffer_overflow_size = 2ul << 20; 21 | 22 | constexpr static size_t get_page_cxl_ref_or_proxy_write_raw_max_size = UINT64_MAX; 23 | 24 | /** 25 | * @brief Intervals before and after heat statisticsus 26 | */ 27 | constexpr static size_t hot_stat_freq_timeout_interval = 100; -------------------------------------------------------------------------------- /src/include/cxl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | constexpr static size_t cxl_super_block_size = 4096; 6 | 7 | struct CXLSuperBlock { 8 | size_t total_size; 9 | size_t msgq_zone_size; 10 | size_t reserve_heap_size; 11 | size_t page_data_zone_size; 12 | }; 13 | 14 | /** 15 | * @brief cxl memory block format 16 | * 17 | * 2GB align 2GB align 18 | * 19 | * 0 4096 4096+msgq align(4096+msgq,2GB) align(psize) total 20 | * 21 | * [sp blk][ msgq ][ reserve ][ page data ][ unused ] 22 | */ 23 | 24 | struct CXLMemFormat { 25 | const void *start_addr; 26 | CXLSuperBlock *super_block; 27 | void *msgq_zone_start_addr; 28 | void *reserve_zone_addr; 29 | void *page_data_start_addr; 30 | const void *end_addr; 31 | }; 32 | 33 | void *cxl_open_simulate(std::string file, size_t size, int *fd); 34 | void cxl_close_simulate(int fd, CXLMemFormat &format); 35 | void cxl_memory_init(CXLMemFormat &format, void* cxl_memory_addr, size_t size, size_t msgq_zone_size); 36 | void cxl_memory_open(CXLMemFormat &format,void* cxl_memory_addr); 37 | -------------------------------------------------------------------------------- /src/include/fiber_pool.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Nat Goodspeed 2014. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See accompanying file LICENSE_1_0.txt or copy at 4 | // http://www.boost.org/LICENSE_1_0.txt) 5 | 6 | #pragma once 7 | 8 | #include // std::find_if() 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "log.hpp" 22 | 23 | //[priority_props 24 | class priority_props : public boost::fibers::fiber_properties { 25 | public: 26 | priority_props(boost::fibers::context* ctx); 27 | 28 | int get_priority() const; 29 | 30 | // Call this method to alter priority, because we must notify 31 | // priority_scheduler of any change. 32 | void set_priority(int p); 33 | 34 | void set_low_priority(); 35 | void set_high_priority(); 36 | 37 | // The fiber name of course is solely for purposes of this example 38 | // program; it has nothing to do with implementing scheduler priority. 39 | // This is a public data member -- not requiring set/get access methods -- 40 | // because we need not inform the scheduler of any change. 41 | std::string name; /*< A property that does not affect the scheduler does 42 | not need access methods. >*/ 43 | private: 44 | int priority_; 45 | }; 46 | //] 47 | 48 | //[priority_scheduler 49 | class priority_scheduler : public boost::fibers::algo::algorithm_with_properties { 50 | private: 51 | typedef boost::fibers::scheduler::ready_queue_type /*< See [link ready_queue_t]. >*/ rqueue_t; 52 | 53 | rqueue_t rqueue_high_; 54 | rqueue_t rqueue_low_; 55 | std::mutex mtx_{}; 56 | std::condition_variable cnd_{}; 57 | bool flag_{false}; 58 | 59 | public: 60 | priority_scheduler(); 61 | 62 | // For a subclass of algorithm_with_properties<>, it's important to 63 | // override the correct awakened() overload. 64 | /*<< You must override the [member_link algorithm_with_properties..awakened] 65 | method. This is how your scheduler receives notification of a 66 | fiber that has become ready to run. >>*/ 67 | virtual void awakened(boost::fibers::context* ctx, priority_props& props) noexcept; 68 | 69 | /*<< You must override the [member_link algorithm_with_properties..pick_next] 70 | method. This is how your scheduler actually advises the fiber manager 71 | of the next fiber to run. >>*/ 72 | virtual boost::fibers::context* pick_next() noexcept; 73 | 74 | /*<< You must override [member_link algorithm_with_properties..has_ready_fibers] 75 | to inform the fiber manager of the state of your ready queue. >>*/ 76 | virtual bool has_ready_fibers() const noexcept; 77 | 78 | /*<< Overriding [member_link algorithm_with_properties..property_change] 79 | is optional. This override handles the case in which the running 80 | fiber changes the priority of another ready fiber: a fiber already in 81 | our queue. In that case, move the updated fiber within the queue. >>*/ 82 | virtual void property_change(boost::fibers::context* ctx, priority_props& props) noexcept; 83 | 84 | void suspend_until(std::chrono::steady_clock::time_point const& time_point) noexcept; 85 | 86 | void notify() noexcept; 87 | }; 88 | 89 | class FiberPool { 90 | private: 91 | struct WorkerFiberTaskQueue { 92 | std::queue> fiber_tasks_; 93 | boost::fibers::mutex fiber_mutex_; 94 | boost::fibers::condition_variable fiber_cond_; 95 | }; 96 | 97 | public: 98 | ~FiberPool(); 99 | 100 | size_t FiberSize() const; 101 | 102 | void AddFiber(size_t n); 103 | 104 | void AddFiber(WorkerFiberTaskQueue& my_queue, size_t n); 105 | 106 | void EraseAll(); 107 | 108 | template 109 | void EnqueueTask(F&& f) { 110 | { 111 | std::unique_lock lock(fr_queue_.fiber_mutex_); 112 | fr_queue_.fiber_tasks_.emplace(std::forward(f)); 113 | } 114 | fr_queue_.fiber_cond_.notify_one(); 115 | } 116 | 117 | private: 118 | std::vector fibers_; 119 | volatile bool fiber_stop_ = false; 120 | volatile bool stop_ = false; 121 | WorkerFiberTaskQueue fr_queue_; 122 | }; 123 | -------------------------------------------------------------------------------- /src/include/lock.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using Mutex = std::mutex; 11 | using SharedMutex = std::shared_mutex; 12 | 13 | class SpinMutex { 14 | public: 15 | SpinMutex() { pthread_spin_init(&m_spinlock, 0); } 16 | ~SpinMutex() { pthread_spin_destroy(&m_spinlock); } 17 | 18 | void lock() { pthread_spin_lock(&m_spinlock); } 19 | bool try_lock() { return pthread_spin_trylock(&m_spinlock) == 0; } 20 | void unlock() { pthread_spin_unlock(&m_spinlock); } 21 | 22 | private: 23 | pthread_spinlock_t m_spinlock; 24 | }; 25 | 26 | class Barrier { 27 | public: 28 | Barrier(uint32_t n) { pthread_barrier_init(&m_b, nullptr, n); } 29 | ~Barrier() { pthread_barrier_destroy(&m_b); } 30 | 31 | void wait() { pthread_barrier_wait(&m_b); } 32 | 33 | private: 34 | pthread_barrier_t m_b; 35 | }; 36 | 37 | using CortMutex = boost::fibers::mutex; 38 | using CortConditionalVariable = boost::fibers::condition_variable; 39 | 40 | class CortSharedMutex { 41 | public: 42 | CortSharedMutex() : state(0) {} 43 | 44 | void lock() { 45 | std::unique_lock lk(mtx); 46 | g1.wait(lk, [=] { return !write_entered(); }); 47 | state |= _S_write_entered; 48 | g2.wait(lk, [=] { return readers() == 0; }); 49 | } 50 | 51 | bool try_lock() { 52 | std::unique_lock lk(mtx, std::try_to_lock); 53 | if (lk.owns_lock() && state == 0) { 54 | state = _S_write_entered; 55 | return true; 56 | } 57 | return false; 58 | } 59 | 60 | void unlock() { 61 | std::lock_guard lk(mtx); 62 | state = 0; 63 | g1.notify_all(); 64 | } 65 | 66 | void lock_shared() { 67 | std::unique_lock lk(mtx); 68 | g1.wait(lk, [=] { return state < _S_max_readers; }); 69 | ++state; 70 | } 71 | 72 | bool try_lock_shared() { 73 | std::unique_lock lk(mtx, std::try_to_lock); 74 | if (!lk.owns_lock()) { 75 | return false; 76 | } 77 | if (state < _S_max_readers) { 78 | ++state; 79 | return true; 80 | } 81 | return false; 82 | } 83 | 84 | void unlock_shared() { 85 | std::lock_guard lk(mtx); 86 | auto prev = state--; 87 | if (write_entered()) { 88 | if (readers() == 0) { 89 | g2.notify_one(); 90 | } 91 | } else { 92 | if (prev == _S_max_readers) { 93 | g1.notify_one(); 94 | } 95 | } 96 | } 97 | 98 | private: 99 | boost::fibers::mutex mtx; 100 | boost::fibers::condition_variable g1, g2; 101 | unsigned state; 102 | 103 | static constexpr unsigned _S_write_entered = 1U << (sizeof(unsigned) * __CHAR_BIT__ - 1); 104 | static constexpr unsigned _S_max_readers = ~_S_write_entered; 105 | 106 | bool write_entered() const { return state & _S_write_entered; } 107 | unsigned readers() const { return state & _S_max_readers; } 108 | }; -------------------------------------------------------------------------------- /src/include/log.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /** 4 | * @file dlog.h 5 | 6 | * @brief terminal log output macro 7 | 8 | * @version 0.1 9 | * @date 2022-05-27 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #define DLOG_STREAM(stream, format, ...) \ 24 | do { \ 25 | struct timeval tv; \ 26 | struct tm tm; \ 27 | char tbuf[28] = {0}; \ 28 | gettimeofday(&tv, NULL); \ 29 | localtime_r(&tv.tv_sec, &tm); \ 30 | strftime(tbuf, sizeof(tbuf), "%Y-%m-%d %H:%M:%S", &tm); \ 31 | fprintf(stream, "[%s.%06d] [%d %#lx] %s:%d: " format "\n", tbuf, (int)tv.tv_usec, \ 32 | getpid(), pthread_self(), __FILE__, __LINE__, ##__VA_ARGS__); \ 33 | } while (0) 34 | 35 | #define DLOG_INFO(format, ...) DLOG_STREAM(stderr, "[INFO] " format, ##__VA_ARGS__) 36 | #define DLOG_ERROR(format, ...) \ 37 | DLOG_STREAM(stderr, "[ERROR] " format ": %s", ##__VA_ARGS__, strerror(errno)) 38 | #define DLOG_WARNING(format, ...) DLOG_STREAM(stderr, "[WARNING] " format, ##__VA_ARGS__) 39 | #define DLOG_FATAL(format, ...) \ 40 | do { \ 41 | DLOG_STREAM(stderr, "[FATAL] " format ": %s", ##__VA_ARGS__, strerror(errno)); \ 42 | fflush(stdout); \ 43 | abort(); \ 44 | } while (0) 45 | 46 | #define DLOG(format, ...) DLOG_INFO(format, ##__VA_ARGS__) 47 | 48 | #define DLOG_FILE(file, format, ...) \ 49 | do { \ 50 | FILE *fp = fopen(file, "w+"); \ 51 | assert(fp != NULL); \ 52 | DLOG_STREAM(fp, format, ##__VA_ARGS__); \ 53 | fclose(fp); \ 54 | } while (0) 55 | 56 | #define DLOG_IF(expr, format, ...) \ 57 | do { \ 58 | if (expr) DLOG(format, ##__VA_ARGS__); \ 59 | } while (0) 60 | 61 | #ifndef NDEBUG 62 | 63 | namespace type_fmt_str_detail { 64 | template 65 | struct helper; 66 | template <> 67 | struct helper { 68 | constexpr static const char *type_str = "%d"; 69 | }; 70 | template <> 71 | struct helper { 72 | constexpr static const char *type_str = "%u"; 73 | }; 74 | template <> 75 | struct helper { 76 | constexpr static const char *type_str = "%c"; 77 | }; 78 | template <> 79 | struct helper { 80 | constexpr static const char *type_str = "%hhu"; 81 | }; 82 | template <> 83 | struct helper { 84 | constexpr static const char *type_str = "%hd"; 85 | }; 86 | template <> 87 | struct helper { 88 | constexpr static const char *type_str = "%hu"; 89 | }; 90 | template <> 91 | struct helper { 92 | constexpr static const char *type_str = "%ld"; 93 | }; 94 | template <> 95 | struct helper { 96 | constexpr static const char *type_str = "%lu"; 97 | }; 98 | template <> 99 | struct helper { 100 | constexpr static const char *type_str = "%lld"; 101 | }; 102 | template <> 103 | struct helper { 104 | constexpr static const char *type_str = "%llu"; 105 | }; 106 | template <> 107 | struct helper { 108 | constexpr static const char *type_str = "%f"; 109 | }; 110 | template <> 111 | struct helper { 112 | constexpr static const char *type_str = "%lf"; 113 | }; 114 | template <> 115 | struct helper { 116 | constexpr static const char *type_str = "%llf"; 117 | }; 118 | template 119 | struct helper { 120 | constexpr static const char *type_str = "%p"; 121 | }; 122 | template <> 123 | struct helper { 124 | constexpr static const char *type_str = "%s"; 125 | }; 126 | template <> 127 | struct helper { 128 | constexpr static const char *type_str = "%s"; 129 | }; 130 | } // namespace type_fmt_str_detail 131 | 132 | /** 133 | * Assert the judgment between two values. 134 | * @example DLOG_EXPR(malloc(1), !=, nullptr) 135 | * 136 | * @warning In C++11, `NULL` will throw warning: passing NULL to non-pointer 137 | * argument... You should use `nullptr` instead of `NULL`. 138 | */ 139 | #define DLOG_EXPR(val_a, op, val_b) \ 140 | do { \ 141 | decltype(val_a) a = val_a; \ 142 | decltype(val_b) b = val_b; \ 143 | if (__glibc_unlikely(!(a op b))) { \ 144 | char fmt[] = "Because " #val_a " = %???, " #val_b " = %???"; \ 145 | char tmp[sizeof(fmt) + 42]; \ 146 | snprintf(fmt, sizeof(fmt), "Because " #val_a " = %s, " #val_b " = %s", \ 147 | type_fmt_str_detail::helper::type>::type>::type_str, \ 149 | type_fmt_str_detail::helper::type>::type>::type_str); \ 151 | snprintf(tmp, sizeof(tmp), fmt, a, b); \ 152 | DLOG_FATAL("Assertion `" #val_a " " #op " " #val_b "` failed. %s", tmp); \ 153 | } \ 154 | } while (0) 155 | 156 | #define DLOG_ASSERT(expr, format...) \ 157 | do { \ 158 | if (__glibc_unlikely(!(expr))) { \ 159 | DLOG_FATAL("Assertion `" #expr "` failed. " format); \ 160 | } \ 161 | } while (0) 162 | 163 | #else 164 | 165 | #define DLOG_EXPR(val_a, op, val_b) \ 166 | do { \ 167 | } while (0) 168 | #define DLOG_ASSERT(expr, format...) \ 169 | do { \ 170 | } while (0) 171 | 172 | #endif -------------------------------------------------------------------------------- /src/include/msg_queue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "allocator.hpp" 8 | #include "common.hpp" 9 | #include "concurrent_queue.hpp" 10 | #include "config.hpp" 11 | 12 | /** 13 | * @brief 14 | * 15 | * msgq_s 16 | * 17 | * [ public msgq ][ private cn msgq ] 18 | */ 19 | 20 | namespace msgq { 21 | 22 | struct MsgBuffer; 23 | struct MsgQueue; 24 | 25 | struct MsgUDPConnPacket { 26 | uintptr_t recv_q_off; 27 | }; 28 | 29 | using msgq_handler_t = void (*)(MsgBuffer &req, void *ctx); 30 | using msgq_callback_t = void (*)(MsgBuffer &resp, void *arg); 31 | 32 | #if MSGQ_SINGLE_FIFO_ON == 1 33 | 34 | struct MsgHeader final { 35 | bool invalid_flag : 1; 36 | enum : uint8_t { REQ, RESP } msg_type : 1; 37 | uint8_t rpc_type; 38 | size_t size : 32; // Actual data size 39 | msgq_callback_t cb; 40 | void *arg; 41 | 42 | uint8_t data[0]; 43 | }; 44 | 45 | struct MsgBuffer { 46 | size_t size() const; 47 | void *get_buf() const; 48 | 49 | MsgQueue *m_q; 50 | MsgHeader *m_msg; // Address pointing to the MsgHeader 51 | size_t m_size; // Actual data size 52 | }; 53 | 54 | struct MsgQueue final { 55 | MsgQueue(); 56 | ~MsgQueue() = default; 57 | 58 | MsgHeader *alloc_msg_buffer(size_t size); 59 | void enqueue_msg(); 60 | void dequeue_msg(std::vector &hv); 61 | void free_msg_buffer(); 62 | 63 | atomic_po_val_t m_prod_head; 64 | atomic_po_val_t m_prod_tail; 65 | atomic_po_val_t m_cons_head; 66 | atomic_po_val_t m_cons_tail; 67 | 68 | constexpr static size_t SZ = msgq_ring_buf_len; 69 | 70 | uint8_t m_ring[msgq_ring_buf_len]; 71 | 72 | MsgHeader *at(size_t i); 73 | static void update_ht(atomic_po_val_t *ht, atomic_po_val_t *ht_); 74 | }; 75 | 76 | #else 77 | struct MsgHeader final { 78 | enum : uint8_t { REQ, RESP } msg_type : 1; 79 | uint8_t rpc_type; 80 | size_t size : 32; // Actual data size 81 | offset_t buf_offset; // Based on the address of MsgQueue::m_ring 82 | msgq_callback_t cb; 83 | uint64_t send_ts; 84 | void *arg; 85 | 86 | // static_assert(msgq_ring_buf_len < (1ul << 16), ""); 87 | }; 88 | 89 | struct MsgBuffer { 90 | size_t size() const; 91 | void *get_buf() const; 92 | 93 | MsgQueue *m_q; 94 | MsgHeader m_msg; 95 | }; 96 | 97 | struct MsgQueue final { 98 | MsgQueue() = default; 99 | ~MsgQueue() = default; 100 | 101 | offset_t alloc_msg_buffer(size_t size); 102 | void enqueue_msg(MsgBuffer &msg_buf); 103 | uint32_t dequeue_msg(MsgHeader *hv, size_t max_deq); 104 | void free_msg_buffer(MsgBuffer &msg_buf); 105 | 106 | ConcurrentQueue 108 | msgq_q; 109 | RingArena m_ra; 110 | }; 111 | 112 | #endif // MSGQ_SINGLE_FIFO_ON 113 | 114 | struct MsgQueueSatistics { 115 | uint64_t send_io = 0; 116 | uint64_t send_bytes = 0; 117 | uint64_t send_time = 0; 118 | uint64_t recv_io = 0; 119 | uint64_t recv_bytes = 0; 120 | uint64_t recv_time = 0; 121 | 122 | void start_sample(uint64_t &timer) { 123 | #if (RCMP_PERF_ON != 0) 124 | timer = getNsTimestamp(); 125 | #endif // RCMP_PERF_ON 126 | } 127 | 128 | void send_sample(size_t bytes, uint64_t &timer) { 129 | #if (RCMP_PERF_ON != 0) 130 | uint64_t tmp = getNsTimestamp(); 131 | send_io++; 132 | send_bytes += bytes; 133 | send_time += tmp - timer; 134 | timer = tmp; 135 | #endif // RCMP_PERF_ON 136 | } 137 | 138 | void recv_sample(size_t bytes, uint64_t &timer) { 139 | #if (RCMP_PERF_ON != 0) 140 | uint64_t tmp = getNsTimestamp(); 141 | recv_io++; 142 | recv_bytes += bytes; 143 | recv_time += tmp - timer; 144 | timer = tmp; 145 | #endif // RCMP_PERF_ON 146 | } 147 | }; 148 | 149 | struct MsgQueueNexus { 150 | constexpr static size_t max_msgq_handler = (1 << (sizeof(uint8_t) * 8)); 151 | 152 | MsgQueueNexus(void *msgq_zone_start_addr); 153 | 154 | MsgQueue *GetPublicMsgQ() const { return m_public_msgq; } 155 | 156 | void *GetMsgQZoneStartAddr() const { return m_msgq_zone_start_addr; } 157 | 158 | void register_req_func(uint8_t rpc_type, msgq_handler_t handler); 159 | 160 | static msgq_handler_t __handlers[max_msgq_handler]; 161 | 162 | void *m_msgq_zone_start_addr; 163 | MsgQueue *m_public_msgq; 164 | MsgQueueSatistics m_stats; 165 | }; 166 | 167 | struct MsgQueueRPC { 168 | MsgQueueRPC(MsgQueueNexus *nexus, MsgQueue *send_queue, MsgQueue *recv_queue, void *ctx); 169 | 170 | /** 171 | * @brief Allocate msg buffer 172 | * 173 | * @warning The operation is a blocking call 174 | * 175 | * @param size 176 | * @return MsgBuffer 177 | */ 178 | MsgBuffer alloc_msg_buffer(size_t size); 179 | 180 | /** 181 | * @brief Enqueue a request message 182 | * 183 | * @param rpc_type 184 | * @param msg_buf 185 | * @param cb 186 | * @param arg 187 | */ 188 | void enqueue_request(uint8_t rpc_type, MsgBuffer &msg_buf, msgq_callback_t cb, void *arg); 189 | 190 | /** 191 | * @brief Enqueue a response message 192 | * 193 | * @param req_buf 194 | * @param resp_buf 195 | */ 196 | void enqueue_response(MsgBuffer &req_buf, MsgBuffer &resp_buf); 197 | 198 | /** 199 | * @brief rpc queue polling once 200 | * 201 | */ 202 | void run_event_loop_once(); 203 | 204 | /** 205 | * @brief free msg buffer 206 | * 207 | * @param msg_buf 208 | */ 209 | void free_msg_buffer(MsgBuffer &msg_buf); 210 | 211 | MsgQueueNexus *m_nexus; 212 | MsgQueue *m_send_queue; 213 | MsgQueue *m_recv_queue; 214 | void *m_ctx; 215 | }; 216 | 217 | } // namespace msgq 218 | 219 | struct MsgQueueManager { 220 | const static size_t RING_ELEM_SIZE = sizeof(msgq::MsgQueue); 221 | 222 | void *start_addr; 223 | uint32_t ring_cnt; 224 | std::unique_ptr> msgq_allocator; 225 | std::unique_ptr nexus; 226 | std::unique_ptr rpc; 227 | 228 | msgq::MsgQueue *allocQueue(); 229 | void freeQueue(msgq::MsgQueue *msgq); 230 | }; -------------------------------------------------------------------------------- /src/include/page_table.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "allocator.hpp" 8 | #include "common.hpp" 9 | #include "concurrent_hashmap.hpp" 10 | #include "lock.hpp" 11 | #include "robin_hood.h" 12 | #include "stats.hpp" 13 | 14 | struct MasterToDaemonConnection; 15 | struct MasterToClientConnection; 16 | 17 | struct PageRackMetadata { 18 | uint32_t rack_id; 19 | mac_id_t daemon_id; 20 | CortSharedMutex latch; 21 | }; 22 | 23 | struct RackMacTable { 24 | size_t GetCurrentAllocatedPageNum() const { return current_allocated_page_num; } 25 | size_t GetMaxFreePageNum() const { return max_free_page_num; } 26 | 27 | bool with_cxl; 28 | MasterToDaemonConnection *daemon_connect; 29 | size_t max_free_page_num; 30 | size_t current_allocated_page_num; 31 | std::vector client_connect_table; 32 | }; 33 | 34 | struct PageDirectory { 35 | PageRackMetadata *FindPage(page_id_t page_id); 36 | PageRackMetadata *AddPage(RackMacTable *rack_table, page_id_t page_id); 37 | void RemovePage(RackMacTable *rack_table, page_id_t page_id); 38 | 39 | ConcurrentHashMap table; 40 | std::unique_ptr page_id_allocator; 41 | }; 42 | 43 | struct DaemonToClientConnection; 44 | struct DaemonToDaemonConnection; 45 | 46 | struct PageVMMapMetadata { 47 | offset_t cxl_memory_offset; // Relative to `format.page_data_start_addr` 48 | std::set ref_client; 49 | std::set ref_daemon; 50 | }; 51 | 52 | struct RemotePageRefMeta { 53 | volatile int version; 54 | volatile bool swapping = false; 55 | FreqStats stats; 56 | uintptr_t remote_page_addr; 57 | uint32_t remote_page_rkey; 58 | DaemonToDaemonConnection *remote_page_daemon_conn; 59 | 60 | RemotePageRefMeta(uint64_t half_life_us) : version(rand()) {} 61 | 62 | FreqStats::Heatness WriteHeat() { return stats.m_wr_heat.heat(rdtsc() / 1e3); } 63 | FreqStats::Heatness ReadHeat() { return stats.m_wr_heat.heat(rdtsc() / 1e3); } 64 | FreqStats::Heatness UpdateWriteHeat() { return stats.add_wr(rdtsc() / 1e3); } 65 | FreqStats::Heatness UpdateReadHeat() { return stats.add_rd(rdtsc() / 1e3); } 66 | void ClearHeat() { stats.clear(); } 67 | }; 68 | 69 | struct PageMetadata { 70 | uint32_t version; 71 | CortSharedMutex page_ref_lock; 72 | CortMutex remote_ref_lock; 73 | PageVMMapMetadata *vm_meta = nullptr; 74 | RemotePageRefMeta *remote_ref_meta = nullptr; 75 | }; 76 | 77 | struct PageTableManager { 78 | template 79 | PageMetadata *FindOrCreatePageMeta(page_id_t page_id, F &&fn, Args &&...args) { 80 | auto p = table.find_or_emplace(page_id, [&]() { 81 | PageMetadata *page_meta = new PageMetadata(); 82 | fn(page_meta, std::move(args)...); 83 | return page_meta; 84 | }); 85 | return p.first->second; 86 | } 87 | 88 | PageMetadata *FindOrCreatePageMeta(page_id_t page_id) { 89 | auto p = table.find_or_emplace(page_id, [&]() { return new PageMetadata(); }); 90 | return p.first->second; 91 | } 92 | 93 | template 94 | RemotePageRefMeta *FindOrCreateRemotePageRefMeta(PageMetadata *page_meta, F &&fn, 95 | Args &&...args) { 96 | if (page_meta->remote_ref_meta == nullptr) { 97 | std::unique_lock page_remote_ref_lock(page_meta->remote_ref_lock); 98 | if (page_meta->remote_ref_meta == nullptr) { 99 | RemotePageRefMeta *remote_ref_meta = new RemotePageRefMeta(heat_half_life_us); 100 | fn(remote_ref_meta, std::move(args)...); 101 | page_meta->remote_ref_meta = remote_ref_meta; 102 | } 103 | } 104 | return page_meta->remote_ref_meta; 105 | } 106 | 107 | RemotePageRefMeta *FindOrCreateRemotePageRefMeta(PageMetadata *page_meta) { 108 | if (page_meta->remote_ref_meta == nullptr) { 109 | std::unique_lock page_remote_ref_lock(page_meta->remote_ref_lock); 110 | if (page_meta->remote_ref_meta == nullptr) { 111 | page_meta->remote_ref_meta = new RemotePageRefMeta(heat_half_life_us); 112 | } 113 | } 114 | return page_meta->remote_ref_meta; 115 | } 116 | 117 | void EraseRemotePageRefMeta(PageMetadata *page_meta); 118 | PageVMMapMetadata *AllocPageMemory(); 119 | void FreePageMemory(PageVMMapMetadata *page_vm_meta); 120 | void ApplyPageMemory(PageMetadata *page_meta, PageVMMapMetadata *page_vm_meta); 121 | void CancelPageMemory(PageMetadata *page_meta); 122 | bool PickUnvisitPage(page_id_t &page_id, PageMetadata *&page_meta); 123 | std::vector> RandomPickVMPage(size_t n); 124 | 125 | // TODO: release page meta resource when vm_meta and remote_ref_meta are nullptr 126 | 127 | bool NearlyFull() const { return current_used_page_num == max_data_page_num; } 128 | 129 | bool TestAllocPageMemory(size_t count = 1) const { 130 | return current_used_page_num + count <= total_page_num; 131 | } 132 | 133 | size_t GetCurrentUsedPageNum() const { return current_used_page_num; } 134 | 135 | uint64_t heat_half_life_us; 136 | 137 | size_t total_page_num; // Number of all pages 138 | size_t max_swap_page_num; // Number of pages in swap area 139 | size_t max_data_page_num; // Number of all available data pages 140 | 141 | std::atomic current_used_page_num; // Number of data pages currently in use 142 | 143 | RandomAccessMap table; 144 | std::queue> unvisited_pages; // lock unsafe 145 | std::unique_ptr> page_allocator; 146 | }; 147 | 148 | struct LocalPageCache { 149 | FreqStats::Heatness UpdateHeat() { return stats.add_wr(rdtsc() / 1000); } 150 | FreqStats::Heatness Heat() { return stats.m_wr_heat.heat(rdtsc() / 1000); } 151 | 152 | FreqStats stats; 153 | offset_t offset; 154 | }; 155 | 156 | struct RemotePageHint { 157 | uint32_t version = 0; 158 | uint64_t hint = 0; 159 | }; 160 | 161 | struct PageCacheMeta { 162 | Mutex ref_lock; 163 | LocalPageCache *cache = nullptr; 164 | RemotePageHint hint; 165 | }; 166 | 167 | struct PageCacheTable { 168 | ~PageCacheTable(); 169 | 170 | PageCacheMeta *FindOrCreateCacheMeta(page_id_t page_id); 171 | LocalPageCache *FindCache(page_id_t page_id); 172 | LocalPageCache *FindCache(PageCacheMeta *cache_meta) const; 173 | LocalPageCache *AddCache(PageCacheMeta *cache_meta, offset_t offset); 174 | void RemoveCache(PageCacheMeta *cache_meta); 175 | 176 | SharedMutex table_lock; 177 | 178 | robin_hood::unordered_flat_map> table; 179 | }; 180 | 181 | struct PageThreadLocalCache; 182 | 183 | struct PageThreadCacheManager { 184 | void insert(PageThreadLocalCache *tcache); 185 | void erase(PageThreadLocalCache *tcache); 186 | 187 | template 188 | void foreach_all(F &&fn, Args &&...args) { 189 | std::shared_lock lck(mutex_); 190 | for (auto &tcache : tcache_list_) { 191 | fn(*tcache, std::move(args)...); 192 | } 193 | } 194 | 195 | std::shared_mutex mutex_; 196 | std::list tcache_list_; 197 | }; 198 | 199 | class PageThreadLocalCache { 200 | public: 201 | static PageThreadLocalCache &getInstance(PageThreadCacheManager &mgr) { 202 | static thread_local PageThreadLocalCache instance(mgr); 203 | return instance; 204 | } 205 | 206 | PageCacheTable page_cache_table; 207 | 208 | private: 209 | PageThreadCacheManager &mgr; 210 | PageThreadLocalCache(PageThreadCacheManager &mgr) : mgr(mgr) { mgr.insert(this); } 211 | ~PageThreadLocalCache() { mgr.erase(this); } 212 | }; -------------------------------------------------------------------------------- /src/include/promise.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | class SpinFuture; 12 | 13 | template 14 | class SpinPromise { 15 | public: 16 | SpinPromise() : ready_(false) {} 17 | ~SpinPromise() {} 18 | 19 | SpinFuture get_future() { return SpinFuture(this); } 20 | 21 | void set_value(const T &value) { 22 | value_ = value; 23 | ready_.store(true, std::memory_order_release); 24 | } 25 | 26 | private: 27 | friend class SpinFuture; 28 | 29 | T value_; 30 | std::atomic_bool ready_; 31 | }; 32 | 33 | template 34 | class SpinFuture { 35 | public: 36 | SpinFuture(SpinPromise *promise) : promise_(promise) {} 37 | ~SpinFuture() {} 38 | 39 | const T &get() const { 40 | while (!promise_->ready_.load(std::memory_order_acquire)) { 41 | // spin 42 | } 43 | return promise_->value_; 44 | } 45 | 46 | template 47 | std::future_status wait_for(const std::chrono::duration<_Rep, _Period> &__rel) const { 48 | if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready; 49 | if (__rel > __rel.zero()) { 50 | std::this_thread::sleep_for(__rel); 51 | if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready; 52 | } 53 | return std::future_status::timeout; 54 | } 55 | 56 | private: 57 | SpinPromise *promise_; 58 | }; 59 | 60 | template <> 61 | class SpinPromise; 62 | template <> 63 | class SpinFuture; 64 | 65 | template <> 66 | class SpinPromise { 67 | public: 68 | SpinPromise() : ready_(false) {} 69 | ~SpinPromise() {} 70 | 71 | SpinFuture get_future(); 72 | 73 | void set_value() { ready_.store(true, std::memory_order_release); } 74 | 75 | private: 76 | friend class SpinFuture; 77 | 78 | std::atomic_bool ready_; 79 | }; 80 | 81 | template <> 82 | class SpinFuture { 83 | public: 84 | SpinFuture(SpinPromise *promise) : promise_(promise) {} 85 | ~SpinFuture() {} 86 | 87 | void get() const { 88 | while (!promise_->ready_.load(std::memory_order_acquire)) { 89 | // spin 90 | } 91 | } 92 | 93 | void wait() { throw std::runtime_error("wait dead spinning"); } 94 | 95 | template 96 | std::future_status wait_for(const std::chrono::duration<_Rep, _Period> &__rel) const { 97 | if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready; 98 | if (__rel > __rel.zero()) { 99 | std::this_thread::sleep_for(__rel); 100 | if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready; 101 | } 102 | return std::future_status::timeout; 103 | } 104 | 105 | private: 106 | SpinPromise *promise_; 107 | }; 108 | 109 | inline SpinFuture SpinPromise::get_future() { return SpinFuture(this); } 110 | 111 | template 112 | using CortPromise = boost::fibers::promise; 113 | template 114 | using CortFuture = boost::fibers::future; 115 | 116 | struct FutureControlBlock { 117 | bool ready = false; 118 | boost::fibers::mutex mtx; 119 | boost::fibers::condition_variable cv; 120 | 121 | void clear() { ready = false; } 122 | 123 | void get() { 124 | std::unique_lock lck(mtx); 125 | cv.wait(lck, [&]() { return ready; }); 126 | } 127 | void set_value() { 128 | { 129 | std::unique_lock lck(mtx); 130 | ready = true; 131 | } 132 | cv.notify_all(); 133 | } 134 | }; -------------------------------------------------------------------------------- /src/include/proto/rpc_caller.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "utils.hpp" 4 | 5 | namespace detail { 6 | 7 | template 8 | struct RpcCallerWrapperHelper { 9 | using FT = function_traits; 10 | using SelfContext = typename std::remove_reference>::type; 11 | using PeerContext = typename std::remove_reference>::type; 12 | using RequestType = typename std::remove_reference>::type; 13 | using ResponseHandleType = 14 | typename std::remove_reference>::type; 15 | 16 | using ResponseType = typename container_traits::type; 17 | }; 18 | 19 | template 20 | struct RpcCallerWrapper; 21 | 22 | template 23 | struct ErpcFuncWrapper : public RpcCallerWrapperHelper { 24 | static RpcFunc func; 25 | static bool registed; 26 | }; 27 | 28 | template 29 | bool ErpcFuncWrapper::registed = false; 30 | template 31 | RpcFunc ErpcFuncWrapper::func; 32 | 33 | template 34 | struct MsgqRpcFuncWrapper : public RpcCallerWrapperHelper { 35 | static RpcFunc func; 36 | static bool registed; 37 | }; 38 | 39 | template 40 | bool MsgqRpcFuncWrapper::registed = false; 41 | template 42 | RpcFunc MsgqRpcFuncWrapper::func; 43 | 44 | /** 45 | * @brief Binding RPCs with the BIND_RPC_TYPE_STRUCT() macro 46 | * 47 | * @warning The call must be on a different line in the same file. 48 | */ 49 | #define BIND_RPC_TYPE_STRUCT(rpc_func) \ 50 | template <> \ 51 | struct detail::RpcCallerWrapper \ 52 | : public detail::RpcCallerWrapperHelper { \ 53 | constexpr static uint8_t rpc_type = __LINE__; \ 54 | static_assert(rpc_type != 0, "overflow"); \ 55 | }; 56 | 57 | /** 58 | * @brief Get the structure to which the rpc is bound. 59 | */ 60 | #define RPC_TYPE_STRUCT(rpc_func) ::detail::RpcCallerWrapper 61 | 62 | } // namespace detail 63 | -------------------------------------------------------------------------------- /src/include/proto/rpc_client.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.hpp" 4 | #include "impl.hpp" 5 | #include "proto/rpc_adaptor.hpp" 6 | 7 | namespace rpc_client { 8 | 9 | struct RemovePageCacheRequest { 10 | mac_id_t mac_id; 11 | page_id_t page_id; 12 | }; 13 | struct RemovePageCacheReply { 14 | bool ret; 15 | }; 16 | void removePageCache(ClientContext& client_context, ClientToDaemonConnection& daemon_connection, 17 | RemovePageCacheRequest& req, 18 | ResponseHandle& resp_handle); 19 | 20 | struct GetCurrentWriteDataRequest { 21 | mac_id_t mac_id; 22 | const void* dio_write_buf; 23 | size_t dio_write_size; 24 | }; 25 | struct GetCurrentWriteDataReply { 26 | uint8_t data[0]; 27 | }; 28 | void getCurrentWriteData(ClientContext& client_context, ClientToDaemonConnection& daemon_connection, 29 | GetCurrentWriteDataRequest& req, 30 | ResponseHandle& resp_handle); 31 | 32 | struct GetPagePastAccessFreqRequest { 33 | mac_id_t mac_id; 34 | int num_detect_pages; 35 | page_id_t pages[128]; 36 | }; 37 | struct GetPagePastAccessFreqReply { 38 | float avg_heat; 39 | page_id_t coldest_page_id; 40 | float coldest_page_heat; 41 | float coldest_page_rd_heat; 42 | }; 43 | void getPagePastAccessFreq(ClientContext& client_context, 44 | ClientToDaemonConnection& daemon_connection, 45 | GetPagePastAccessFreqRequest& req, 46 | ResponseHandle& resp_handle); 47 | 48 | } // namespace rpc_client -------------------------------------------------------------------------------- /src/include/proto/rpc_daemon.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "common.hpp" 6 | #include "impl.hpp" 7 | #include "log.hpp" 8 | #include "proto/rpc_adaptor.hpp" 9 | #include "rcmp.hpp" 10 | #include "utils.hpp" 11 | 12 | namespace rpc_daemon { 13 | 14 | struct JoinRackRequest { 15 | mac_id_t mac_id; // unused 16 | IPv4String client_ipv4; 17 | uint16_t client_port; 18 | rack_id_t rack_id; 19 | }; 20 | struct JoinRackReply { 21 | mac_id_t client_mac_id; 22 | mac_id_t daemon_mac_id; 23 | float half_life_us; 24 | }; 25 | /** 26 | * @brief Adds client to the rack. Called when the connection is established. 27 | * 28 | * @param daemon_context 29 | * @param client_connection It needs to be dereferenced to the object requested from the heap, after 30 | * which its lifecycle will be maintained by the MasterContext. 31 | * @param req 32 | * @param resp_handle 33 | */ 34 | void joinRack(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 35 | JoinRackRequest& req, ResponseHandle& resp_handle); 36 | 37 | struct CrossRackConnectRequest { 38 | mac_id_t mac_id; 39 | IPv4String ip; 40 | uint16_t port; 41 | rack_id_t rack_id; 42 | mac_id_t conn_mac_id; 43 | }; 44 | struct CrossRackConnectReply { 45 | mac_id_t daemon_mac_id; 46 | uint16_t rdma_port; 47 | }; 48 | void crossRackConnect(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection, 49 | CrossRackConnectRequest& req, 50 | ResponseHandle& resp_handle); 51 | 52 | struct GetPageCXLRefOrProxyRequest { 53 | mac_id_t mac_id; 54 | enum { 55 | READ, 56 | WRITE, 57 | WRITE_RAW, 58 | CAS, 59 | } type; 60 | rcmp::GAddr gaddr; 61 | uint32_t hint_version; 62 | uint64_t hint; 63 | union { 64 | struct { // type == WRITE 65 | size_t cn_write_size; 66 | const void* cn_write_buf; 67 | } write; 68 | struct { // type == READ 69 | size_t cn_read_size; 70 | } read; 71 | struct { // type == WRITE_RAW 72 | size_t cn_write_raw_size; 73 | uint8_t cn_write_raw_buf[0]; 74 | } write_raw; 75 | struct { // type == CAS 76 | size_t expected; 77 | size_t desired; 78 | } cas; 79 | } u; 80 | }; 81 | struct GetPageCXLRefOrProxyReply { 82 | bool refs; 83 | uint32_t hint_version; 84 | uint64_t hint; 85 | union { 86 | struct { // refs == true 87 | offset_t offset; 88 | }; 89 | struct { // refs == false 90 | struct { // cas 91 | uint64_t old_val; 92 | }; 93 | struct { // read 94 | uint8_t read_data[0]; 95 | }; 96 | }; 97 | }; 98 | }; 99 | /** 100 | * @brief Get a reference to the page. If the local Page Table does not have that page id, a remote 101 | * io is triggered. 102 | * 103 | * @param daemon_context 104 | * @param client_connection 105 | * @param req 106 | * @param resp_handle 107 | */ 108 | void getPageCXLRefOrProxy(DaemonContext& daemon_context, 109 | DaemonToClientConnection& client_connection, 110 | GetPageCXLRefOrProxyRequest& req, 111 | ResponseHandle& resp_handle); 112 | 113 | struct AllocPageMemoryRequest { 114 | mac_id_t mac_id; 115 | page_id_t start_page_id; 116 | size_t count; 117 | }; 118 | struct AllocPageMemoryReply { 119 | bool ret; 120 | }; 121 | /** 122 | * @brief Allocate a page physical address space 123 | * 124 | * @param daemon_context 125 | * @param master_connection 126 | * @param req 127 | * @param resp_handle 128 | */ 129 | void allocPageMemory(DaemonContext& daemon_context, DaemonToMasterConnection& master_connection, 130 | AllocPageMemoryRequest& req, 131 | ResponseHandle& resp_handle); 132 | 133 | struct AllocRequest { 134 | mac_id_t mac_id; 135 | size_t size; 136 | }; 137 | struct AllocReply { 138 | rcmp::GAddr gaddr; 139 | }; 140 | void alloc(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 141 | AllocRequest& req, ResponseHandle& resp_handle); 142 | struct AllocPageRequest { 143 | mac_id_t mac_id; 144 | size_t count; 145 | }; 146 | struct AllocPageReply { 147 | page_id_t start_page_id; // Allocated start page id 148 | size_t start_count; // Number actually allocated in the requesting rack 149 | }; 150 | /** 151 | * @brief Allocate for a page 152 | * 153 | * @param daemon_context 154 | * @param client_connection 155 | * @param req 156 | * @param resp_handle 157 | */ 158 | void allocPage(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 159 | AllocPageRequest& req, ResponseHandle& resp_handle); 160 | 161 | struct FreePageRequest { 162 | mac_id_t mac_id; 163 | page_id_t start_page_id; 164 | size_t count; 165 | }; 166 | struct FreePageReply { 167 | bool ret; 168 | }; 169 | /** 170 | * @brief Free a page 171 | * 172 | * @param master_context 173 | * @param daemon_connection 174 | * @param req 175 | * @param resp_handle 176 | */ 177 | void freePage(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 178 | FreePageRequest& req, ResponseHandle& resp_handle); 179 | 180 | struct FreeRequest { 181 | mac_id_t mac_id; 182 | rcmp::GAddr gaddr; 183 | size_t n; 184 | }; 185 | struct FreeReply { 186 | bool ret; 187 | }; 188 | void free(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 189 | FreeRequest& req, ResponseHandle& resp_handle); 190 | 191 | struct GetPageRDMARefRequest { 192 | mac_id_t mac_id; 193 | page_id_t page_id; 194 | }; 195 | struct GetPageRDMARefReply { 196 | uintptr_t addr; 197 | uint32_t rkey; 198 | }; 199 | /** 200 | * @brief Get a reference to the page. If the local Page Table does not have that page id, a remote 201 | * io is triggered. 202 | * 203 | * @param daemon_context 204 | * @param daemon_connection 205 | * @param req 206 | * @param resp_handle 207 | */ 208 | void getPageRDMARef(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection, 209 | GetPageRDMARefRequest& req, ResponseHandle& resp_handle); 210 | 211 | struct DelPageRDMARefRequest { 212 | mac_id_t mac_id; 213 | page_id_t page_id; // Preparing to delete the page id of the ref 214 | }; 215 | struct DelPageRDMARefReply { 216 | bool ret; 217 | }; 218 | /** 219 | * @brief Removes a reference to a page. 220 | * 221 | * @param daemon_context 222 | * @param daemon_connection 223 | * @param req 224 | * @param resp_handle 225 | */ 226 | void delPageRDMARef(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection, 227 | DelPageRDMARefRequest& req, ResponseHandle& resp_handle); 228 | 229 | struct MigratePageRequest { 230 | mac_id_t mac_id; 231 | page_id_t page_id; 232 | page_id_t swap_page_id; 233 | uintptr_t swapout_page_addr; // When `swapout_page_addr == 0` and `swapout_page_rkey == 0`, it 234 | // means no swapout. 235 | uintptr_t swapin_page_addr; 236 | uint32_t swapout_page_rkey; 237 | uint32_t swapin_page_rkey; 238 | }; 239 | struct MigratePageReply { 240 | bool swapped; 241 | }; 242 | /** 243 | * @brief 244 | * 245 | * @param daemon_context 246 | * @param daemon_connection 247 | * @param req 248 | * @param resp_handle 249 | */ 250 | void migratePage(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection, 251 | MigratePageRequest& req, ResponseHandle& resp_handle); 252 | 253 | struct TryDelPageRequest { 254 | mac_id_t mac_id; 255 | page_id_t page_id; 256 | }; 257 | struct TryDelPageReply { 258 | bool ret; 259 | }; 260 | void tryDelPage(DaemonContext& daemon_context, DaemonToMasterConnection& master_connection, 261 | TryDelPageRequest& req, ResponseHandle& resp_handle); 262 | 263 | /************************* for test ***************************/ 264 | 265 | struct __TestDataSend1Request { 266 | mac_id_t mac_id; 267 | size_t size; 268 | int data[64]; 269 | }; 270 | struct __TestDataSend1Reply { 271 | size_t size; 272 | int data[64]; 273 | }; 274 | 275 | struct __TestDataSend2Request { 276 | mac_id_t mac_id; 277 | size_t size; 278 | int data[72]; 279 | }; 280 | struct __TestDataSend2Reply { 281 | size_t size; 282 | int data[72]; 283 | }; 284 | 285 | void __testdataSend1(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 286 | __TestDataSend1Request& req, 287 | ResponseHandle<__TestDataSend1Reply>& resp_handle); 288 | 289 | void __testdataSend2(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 290 | __TestDataSend2Request& req, 291 | ResponseHandle<__TestDataSend2Reply>& resp_handle); 292 | 293 | struct __notifyPerfRequest { 294 | mac_id_t mac_id; 295 | }; 296 | struct __notifyPerfReply {}; 297 | void __notifyPerf(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 298 | __notifyPerfRequest& req, ResponseHandle<__notifyPerfReply>& resp_handle); 299 | 300 | struct __stopPerfRequest { 301 | mac_id_t mac_id; 302 | }; 303 | struct __stopPerfReply {}; 304 | void __stopPerf(DaemonContext& daemon_context, DaemonToClientConnection& client_connection, 305 | __stopPerfRequest& req, ResponseHandle<__stopPerfReply>& resp_handle); 306 | 307 | } // namespace rpc_daemon 308 | -------------------------------------------------------------------------------- /src/include/proto/rpc_master.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.hpp" 4 | #include "impl.hpp" 5 | #include "proto/rpc_adaptor.hpp" 6 | #include "utils.hpp" 7 | 8 | namespace rpc_master { 9 | 10 | struct JoinDaemonRequest { 11 | mac_id_t mac_id; // unused 12 | IPv4String ip; 13 | uint16_t port; 14 | rack_id_t rack_id; 15 | bool with_cxl; 16 | size_t free_page_num; 17 | }; 18 | struct JoinDaemonReply { 19 | mac_id_t daemon_mac_id; 20 | mac_id_t master_mac_id; 21 | uint16_t rdma_port; 22 | 23 | struct RackInfo { 24 | rack_id_t rack_id; 25 | mac_id_t daemon_id; 26 | IPv4String daemon_ipv4; 27 | uint16_t daemon_erpc_port; 28 | uint16_t daemon_rdma_port; 29 | }; 30 | 31 | size_t other_rack_count; 32 | RackInfo other_rack_infos[0]; 33 | }; 34 | /** 35 | * @brief Adds the daemon to the cluster. Called when a connection is established. 36 | * 37 | * @param master_context 38 | * @param daemon_connection It needs to be dereferenced to the object requested from the heap, after 39 | * which its lifecycle will be maintained by the MasterContext. 40 | * @param req 41 | * @param resp_handle 42 | */ 43 | void joinDaemon(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, 44 | JoinDaemonRequest& req, ResponseHandle& resp_handle); 45 | 46 | struct JoinClientRequest { 47 | mac_id_t mac_id; // unused 48 | rack_id_t rack_id; 49 | }; 50 | struct JoinClientReply { 51 | mac_id_t mac_id; 52 | }; 53 | /** 54 | * @brief Adds the client to the cluster. Called when a connection is established. 55 | * 56 | * @param master_context 57 | * @param client_connection It needs to be dereferenced to the object requested from the heap, after 58 | * which its lifecycle will be maintained by the MasterContext. 59 | * @param req 60 | * @param resp_handle 61 | */ 62 | void joinClient(MasterContext& master_context, MasterToClientConnection& client_connection, 63 | JoinClientRequest& req, ResponseHandle& resp_handle); 64 | 65 | struct AllocPageRequest { 66 | mac_id_t mac_id; 67 | size_t count; 68 | }; 69 | struct AllocPageReply { 70 | page_id_t current_start_page_id; // Allocated start page id 71 | size_t current_page_count; // Number actually allocated in the requesting rack 72 | page_id_t other_start_page_id; 73 | size_t other_page_count; 74 | }; 75 | /** 76 | * @brief 77 | * Allocate a page. this operation will expect a call to `allocPageMemory()` on the daemon side to 78 | * allocate the CXL physical address. If the daemon is full, this operation will randomly send this 79 | * function to other daemons for allocation. 80 | * 81 | * @param master_context 82 | * @param daemon_connection 83 | * @param req 84 | * @param resp_handle 85 | */ 86 | void allocPage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, 87 | AllocPageRequest& req, ResponseHandle& resp_handle); 88 | 89 | struct FreePageRequest { 90 | mac_id_t mac_id; 91 | page_id_t start_page_id; 92 | size_t count; 93 | }; 94 | struct FreePageReply { 95 | bool ret; 96 | }; 97 | /** 98 | * @brief Free a page 99 | * 100 | * @param master_context 101 | * @param daemon_connection 102 | * @param req 103 | * @param resp_handle 104 | */ 105 | void freePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, 106 | FreePageRequest& req, ResponseHandle& resp_handle); 107 | 108 | struct GetRackDaemonByPageIDRequest { 109 | page_id_t page_id; 110 | }; 111 | struct GetRackDaemonByPageIDReply { 112 | IPv4String dest_daemon_ipv4; 113 | uint16_t dest_daemon_port; 114 | rack_id_t rack_id; 115 | }; 116 | /** 117 | * @brief Get the IPv4 address of the daemon corresponding to rack based on the page id. This call 118 | * should be used in the remote direct io case of the daemon. 119 | * 120 | * @param master_context 121 | * @param client_connection 122 | * @param req 123 | * @param resp_handle 124 | */ 125 | void getRackDaemonByPageID(MasterContext& master_context, 126 | MasterToDaemonConnection& daemon_connection, 127 | GetRackDaemonByPageIDRequest& req, 128 | ResponseHandle& resp_handle); 129 | 130 | struct LatchRemotePageRequest { 131 | mac_id_t mac_id; 132 | bool exclusive; 133 | page_id_t page_id; 134 | }; 135 | struct LatchRemotePageReply { 136 | rack_id_t dest_rack_id; 137 | mac_id_t dest_daemon_id; 138 | }; 139 | /** 140 | * @brief Get and latch the remote page from being swapped. 141 | * 142 | * @param master_context 143 | * @param daemon_connection 144 | * @param req 145 | * @param resp_handle 146 | */ 147 | void latchRemotePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, 148 | LatchRemotePageRequest& req, 149 | ResponseHandle& resp_handle); 150 | 151 | struct UnLatchRemotePageRequest { 152 | mac_id_t mac_id; 153 | bool exclusive; 154 | page_id_t page_id; 155 | }; 156 | struct UnLatchRemotePageReply { 157 | bool ret; 158 | }; 159 | /** 160 | * @brief Unlatch remote page 161 | * 162 | * @param master_context 163 | * @param daemon_connection 164 | * @param req 165 | * @param resp_handle 166 | */ 167 | void unLatchRemotePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, 168 | UnLatchRemotePageRequest& req, 169 | ResponseHandle& resp_handle); 170 | 171 | struct tryMigratePageRequest { 172 | mac_id_t mac_id; 173 | bool exclusive; 174 | page_id_t page_id; 175 | float page_heat; 176 | page_id_t page_id_swap; 177 | }; 178 | struct tryMigratePageReply { 179 | bool ret; 180 | }; 181 | /** 182 | * @brief Try migrate page, if success, all pages will locked. You need call `MigratePageDone` when 183 | * migrating is done. 184 | * 185 | * @param master_context 186 | * @param daemon_connection 187 | * @param req 188 | * @param resp_handle 189 | */ 190 | void tryMigratePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, 191 | tryMigratePageRequest& req, ResponseHandle& resp_handle); 192 | 193 | struct MigratePageDoneRequest { 194 | mac_id_t mac_id; 195 | page_id_t page_id; // Swapin page (originally at the far end) 196 | mac_id_t new_daemon_id; // Your own daemon id 197 | rack_id_t new_rack_id; // Your own rack id 198 | page_id_t page_id_swap; // Swapped out page (originally local), if invalid, no swap 199 | mac_id_t new_daemon_id_swap; // The peer's daemon id 200 | rack_id_t new_rack_id_swap; // The peer's rack id 201 | }; 202 | struct MigratePageDoneReply { 203 | bool ret; 204 | }; 205 | /** 206 | * @brief Unlatch the remote page and transfer the page to this daemon 207 | * 208 | * @param master_context 209 | * @param daemon_connection 210 | * @param req 211 | * @param resp_handle 212 | */ 213 | void MigratePageDone(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, 214 | MigratePageDoneRequest& req, 215 | ResponseHandle& resp_handle); 216 | 217 | } // namespace rpc_master -------------------------------------------------------------------------------- /src/include/proto/rpc_register.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "rpc_caller.hpp" 4 | #include "rpc_client.hpp" 5 | #include "rpc_daemon.hpp" 6 | #include "rpc_master.hpp" 7 | 8 | /******************* Binding RPC Functions **********************/ 9 | 10 | BIND_RPC_TYPE_STRUCT(rpc_master::joinDaemon); 11 | BIND_RPC_TYPE_STRUCT(rpc_master::joinClient); 12 | BIND_RPC_TYPE_STRUCT(rpc_master::allocPage); 13 | BIND_RPC_TYPE_STRUCT(rpc_master::freePage); 14 | BIND_RPC_TYPE_STRUCT(rpc_master::latchRemotePage); 15 | BIND_RPC_TYPE_STRUCT(rpc_master::unLatchRemotePage); 16 | BIND_RPC_TYPE_STRUCT(rpc_master::tryMigratePage); 17 | BIND_RPC_TYPE_STRUCT(rpc_master::MigratePageDone); 18 | 19 | BIND_RPC_TYPE_STRUCT(rpc_daemon::joinRack); 20 | BIND_RPC_TYPE_STRUCT(rpc_daemon::crossRackConnect); 21 | BIND_RPC_TYPE_STRUCT(rpc_daemon::getPageCXLRefOrProxy); 22 | BIND_RPC_TYPE_STRUCT(rpc_daemon::allocPage); 23 | BIND_RPC_TYPE_STRUCT(rpc_daemon::freePage); 24 | BIND_RPC_TYPE_STRUCT(rpc_daemon::allocPageMemory); 25 | BIND_RPC_TYPE_STRUCT(rpc_daemon::alloc); 26 | BIND_RPC_TYPE_STRUCT(rpc_daemon::free); 27 | BIND_RPC_TYPE_STRUCT(rpc_daemon::getPageRDMARef); 28 | BIND_RPC_TYPE_STRUCT(rpc_daemon::delPageRDMARef); 29 | BIND_RPC_TYPE_STRUCT(rpc_daemon::tryDelPage); 30 | BIND_RPC_TYPE_STRUCT(rpc_daemon::migratePage); 31 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__testdataSend1); 32 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__testdataSend2); 33 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__notifyPerf); 34 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__stopPerf); 35 | 36 | BIND_RPC_TYPE_STRUCT(rpc_client::removePageCache); 37 | BIND_RPC_TYPE_STRUCT(rpc_client::getCurrentWriteData); 38 | BIND_RPC_TYPE_STRUCT(rpc_client::getPagePastAccessFreq); -------------------------------------------------------------------------------- /src/include/rdma_rc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "allocator.hpp" 8 | #include "fiber_pool.hpp" 9 | #include "promise.hpp" 10 | 11 | namespace rdma_rc { 12 | 13 | class RDMAEnv { 14 | public: 15 | RDMAEnv(const RDMAEnv &) = delete; 16 | RDMAEnv(RDMAEnv &&) = delete; 17 | RDMAEnv &operator=(const RDMAEnv &) = delete; 18 | RDMAEnv &operator=(RDMAEnv &&) = delete; 19 | 20 | static int init(); 21 | static RDMAEnv &get_instance() { 22 | static RDMAEnv env; 23 | return env; 24 | } 25 | 26 | bool m_active_; 27 | rdma_event_channel *m_cm_client_channel_; 28 | rdma_event_channel *m_cm_server_channel_; 29 | ibv_context **m_ibv_ctxs_; 30 | int m_nr_dev_; 31 | 32 | std::map m_pd_map_; 33 | std::map m_comp_chan_map_; 34 | std::map m_cq_map_; 35 | 36 | private: 37 | RDMAEnv() : m_active_(false) {} 38 | ~RDMAEnv(); 39 | int __init__(); 40 | }; 41 | 42 | struct SgeWr { 43 | ibv_sge sge; 44 | ibv_send_wr wr; 45 | }; 46 | 47 | struct RDMAConnection; 48 | 49 | struct SyncData { 50 | uint32_t inflight; 51 | uint32_t now_ms; 52 | RDMAConnection *conn; 53 | volatile bool wc_finish; 54 | bool timeout; 55 | uint8_t props_size; 56 | std::array props; 57 | FutureControlBlock *cbk; 58 | 59 | SyncData() : cbk(ObjectPool().pop()) {} 60 | ~SyncData() { ObjectPool().put(cbk); } 61 | 62 | void *operator new(std::size_t size) { return ObjectPoolAllocator().allocate(1); } 63 | 64 | void operator delete(void *ptr) { 65 | ObjectPoolAllocator().deallocate(static_cast(ptr), 1); 66 | } 67 | }; 68 | 69 | struct RDMAFuture { 70 | int get(); 71 | /** 72 | * @return 73 | * * 0 - ok 74 | * * 1 - pending 75 | * * -1 - error 76 | */ 77 | int try_get(); 78 | 79 | std::unique_ptr m_sd_ = {nullptr}; 80 | }; 81 | 82 | struct RDMAConnection { 83 | // Global Options 84 | static int MAX_SEND_WR; 85 | static int MAX_SEND_SGE; 86 | static int CQE_NUM; 87 | static int RESOLVE_TIMEOUT_MS; 88 | static uint8_t RETRY_COUNT; 89 | static int RNR_RETRY_COUNT; 90 | static uint8_t INITIATOR_DEPTH; 91 | static int RESPONDER_RESOURCES; 92 | static int POLL_ENTRY_COUNT; 93 | static bool RDMA_TIMEOUT_ENABLE; 94 | static uint32_t RDMA_TIMEOUT_MS; 95 | 96 | RDMAConnection(); 97 | ~RDMAConnection(); 98 | 99 | /** 100 | * @brief Listening on RDMA card IP and port 0 101 | * 102 | * @param ip 103 | * @return int 104 | */ 105 | int listen(const std::string &ip); 106 | /** 107 | * @brief Connecting the RDMA card IP and the other port 108 | * 109 | * @param ip 110 | * @param port 111 | * @param param 112 | * @param param_size 113 | * @return int 114 | */ 115 | int connect(const std::string &ip, uint16_t port, const void *param, uint8_t param_size); 116 | 117 | std::pair get_local_addr(); 118 | std::pair get_peer_addr(); 119 | 120 | ibv_mr *register_memory(void *ptr, size_t size); 121 | ibv_mr *register_memory(size_t size); 122 | void deregister_memory(ibv_mr *mr, bool freed = true); 123 | 124 | // prep operations are thread-unsafety for the same `sge_vec`. 125 | 126 | int prep_write(std::vector &sge_vec, uint64_t local_addr, uint32_t lkey, uint32_t length, 127 | uint64_t remote_addr, uint32_t rkey, bool inline_data); 128 | int prep_read(std::vector &sge_vec, uint64_t local_addr, uint32_t lkey, uint32_t length, 129 | uint64_t remote_addr, uint32_t rkey, bool inline_data); 130 | int prep_fetch_add(std::vector &sge_vec, uint64_t local_addr, uint32_t lkey, 131 | uint64_t remote_addr, uint32_t rkey, uint64_t n); 132 | int prep_cas(std::vector &sge_vec, uint64_t local_addr, uint32_t lkey, 133 | uint64_t remote_addr, uint32_t rkey, uint64_t expected, uint64_t desired); 134 | 135 | int prep_write(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint32_t length, 136 | uint64_t remote_addr, uint32_t rkey, bool inline_data); 137 | int prep_read(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint32_t length, 138 | uint64_t remote_addr, uint32_t rkey, bool inline_data); 139 | int prep_fetch_add(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, 140 | uint32_t rkey, uint64_t n); 141 | int prep_cas(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, 142 | uint32_t rkey, uint64_t expected, uint64_t desired); 143 | 144 | /** 145 | * @brief submit prep sge_vec 146 | */ 147 | RDMAFuture submit(std::vector &sge_vec); 148 | 149 | /** 150 | * @brief submit prep sgewr 151 | * 152 | * @warning The sge wr array must be reserved before future get 153 | * 154 | * @param begin 155 | * @param n 156 | * @return RDMAFuture 157 | */ 158 | RDMAFuture submit(SgeWr *begin, size_t n); 159 | 160 | static std::function m_hook_connect_; 161 | static std::function m_hook_disconnect_; 162 | static void register_connect_hook( 163 | std::function &&hook_connect); 164 | static void register_disconnect_hook(std::function &&hook_disconnect); 165 | 166 | enum conn_type_t { 167 | INVALID, 168 | SENDER, 169 | LISTENER, 170 | }; 171 | conn_type_t m_conn_type_; 172 | volatile bool m_stop_ : 1; 173 | bool m_atomic_support_ : 1; 174 | bool m_inline_support_ : 1; 175 | std::atomic m_inflight_count_; 176 | ibv_comp_channel *m_comp_chan_; 177 | ibv_pd *m_pd_; 178 | ibv_cq *m_cq_; 179 | std::deque m_cm_ids_; 180 | 181 | std::thread *m_conn_handler_; 182 | 183 | Mutex m_mu_; 184 | // std::unique_ptr m_current_sd_ = {nullptr}; 185 | SgeWr *m_sw_head_ = nullptr; 186 | SgeWr *m_sw_tail_ = nullptr; 187 | 188 | bool m_rdma_conn_param_valid_(); 189 | int m_init_last_ibv_subconnection_(); 190 | void m_handle_connection_(); 191 | int m_poll_conn_sd_wr_(); 192 | static void m_init_last_subconnection_(RDMAConnection *init_conn); 193 | static int m_acknowledge_sd_cqe_(int rc, ibv_wc wcs[]); 194 | RDMAFuture m_submit_impl(SgeWr *sge_wrs, size_t n); 195 | }; 196 | 197 | } // namespace rdma_rc -------------------------------------------------------------------------------- /src/include/stats.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "lock.hpp" 11 | 12 | class Histogram { 13 | public: 14 | Histogram(int numBuckets, double minValue, double maxValue); 15 | ~Histogram() = default; 16 | 17 | void addValue(double value); 18 | void clear(); 19 | int getBucketCount() const; 20 | double getBucketValue(int bucket) const; 21 | int getBucketCount(int bucket) const; 22 | int getTotalCount() const; 23 | int getPercentileBucket(double percentile) const; 24 | double getPercentile(double percentile) const; 25 | double getAverage() const; 26 | 27 | Histogram merge(Histogram &other); 28 | 29 | private: 30 | int getBucket(double value) const; 31 | 32 | const int m_numBuckets; 33 | const double m_minValue; 34 | const double m_maxValue; 35 | const double m_bucketWidth; 36 | std::vector m_buckets; 37 | }; 38 | 39 | class FreqStats { 40 | public: 41 | struct Heatness { 42 | uint64_t last_time; 43 | float last_heat; 44 | 45 | Heatness() : last_time(0), last_heat(0) {} 46 | 47 | static Heatness one(uint64_t t); 48 | Heatness heat(uint64_t t) const; 49 | void clear(); 50 | 51 | Heatness operator+(const Heatness &b) const; 52 | }; 53 | 54 | Heatness add_wr(uint64_t t); 55 | Heatness add_rd(uint64_t t); 56 | void clear(); 57 | 58 | Heatness m_wr_heat; 59 | Heatness m_rd_heat; 60 | 61 | static void init_exp_decays(float half_life_us); 62 | 63 | private: 64 | static Mutex m_exp_decays_lck; 65 | static std::vector m_exp_decays; 66 | }; 67 | 68 | /** 69 | * @brief Generates random number according zipfian distribution. 70 | * It is defined as: P(X=k)= C / k^q, 1 <= k <= n 71 | */ 72 | template 73 | class zipf_distribution { 74 | public: 75 | typedef IntType result_type; 76 | 77 | zipf_distribution(IntType max, double theta) : max_(max), theta_(theta), dist_(0.0, 1.0) { 78 | c_ = std::pow(max_, -theta_) / zeta(theta_, max_); 79 | q_ = std::pow(2.0, -theta_); 80 | h_ = harmonic(max_); 81 | v_ = dist_(gen_); 82 | } 83 | 84 | /** 85 | * @brief Returns zipf distributed random number [0, max) 86 | * 87 | * @tparam Generator 88 | * @param g 89 | * @return IntType 90 | */ 91 | template 92 | IntType operator()(Generator &g) { 93 | while (true) { 94 | double u = dist_(g) - 0.5; 95 | double y = std::floor(std::pow(max_ + 0.5, v_ - u) - 0.5); 96 | if (y < 1 || y > max_) continue; 97 | double k = std::floor(y); 98 | v_ = dist_(g); 99 | if (v_ >= q_ * std::pow(k + 1, theta_) / (h_ + k)) continue; 100 | return static_cast(k) - 1; 101 | } 102 | } 103 | 104 | private: 105 | IntType max_; 106 | double theta_; 107 | double c_; 108 | double q_; 109 | double h_; 110 | double v_; 111 | std::mt19937 gen_; 112 | std::uniform_real_distribution dist_; 113 | 114 | static double zeta(double theta, IntType n) { 115 | double sum = 0.0; 116 | for (IntType i = 1; i <= n; ++i) sum += std::pow(i, -theta); 117 | return sum; 118 | } 119 | 120 | double harmonic(IntType n) const { return c_ * zeta(theta_, n); } 121 | }; -------------------------------------------------------------------------------- /src/include/udp_client.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "log.hpp" 4 | #define ASIO_STANDLONE 5 | #include 6 | #include 7 | #include 8 | 9 | /// Basic UDP client class that supports sending messages and caches remote 10 | /// addrinfo mappings 11 | template 12 | class UDPClient { 13 | public: 14 | UDPClient() 15 | : resolver_(new asio::ip::udp::resolver(io_context_)), 16 | socket_(new asio::ip::udp::socket(io_context_)) { 17 | socket_->open(asio::ip::udp::v4()); 18 | } 19 | 20 | UDPClient(const UDPClient &) = delete; 21 | 22 | ~UDPClient() {} 23 | 24 | /** 25 | * @brief Send a UDP message to a remote host 26 | * 27 | * @param rem_hostname DNS-resolvable name of the remote host 28 | * @param rem_port Destination UDP port to send the message to 29 | * @param msg Contents of the message 30 | * 31 | * @return Number of bytes sent on success, SIZE_MAX on failure 32 | */ 33 | size_t send(const std::string rem_hostname, uint16_t rem_port, const T &msg) { 34 | asio::error_code error; 35 | asio::ip::udp::resolver::results_type results = 36 | resolver_->resolve(rem_hostname, std::to_string(rem_port), error); 37 | 38 | if (results.size() == 0) { 39 | DLOG_ERROR("eRPC: Failed to resolve %s, asio error = %s.\n", rem_hostname.c_str(), 40 | error.message().c_str()); 41 | return SIZE_MAX; 42 | } 43 | 44 | // Pick an IPv4 endpoint 45 | for (const auto &endpoint_iter : results) { 46 | if (!endpoint_iter.endpoint().address().is_v4()) continue; 47 | 48 | try { 49 | const size_t ret = socket_->send_to(asio::buffer(&msg, sizeof(T)), endpoint_iter); 50 | if (enable_recording_flag_) sent_vec_.push_back(msg); 51 | return ret; 52 | } catch (const asio::system_error &e) { 53 | DLOG_ERROR("eRPC: asio send_to() failed to %s, error: %s\n", rem_hostname.c_str(), 54 | e.what()); 55 | return SIZE_MAX; 56 | } 57 | } 58 | 59 | // We failed to find an IPv4 endpoint 60 | DLOG_ERROR( 61 | "eRPC: Failed to find an IPv4 endpoint to %s. Found %zu non-IPv4 " 62 | "endpoints to %s though.\n", 63 | rem_hostname.c_str(), results.size(), rem_hostname.c_str()); 64 | return SIZE_MAX; 65 | } 66 | 67 | /// Maintain a all packets sent by this client 68 | void enable_recording() { enable_recording_flag_ = true; } 69 | 70 | private: 71 | asio::io_context io_context_; 72 | std::unique_ptr resolver_; 73 | std::unique_ptr socket_; 74 | 75 | /// The list of all packets sent, maintained if recording is enabled 76 | std::vector sent_vec_; 77 | bool enable_recording_flag_ = false; /// Flag to enable recording for testing 78 | }; // namespace erpc 79 | -------------------------------------------------------------------------------- /src/include/udp_server.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define ASIO_STANDLONE 4 | #include 5 | #include 6 | 7 | /// Basic UDP server class that supports receiving messages 8 | template 9 | class UDPServer { 10 | public: 11 | UDPServer(uint16_t port, size_t timeout_ms) 12 | : timeout_ms_(timeout_ms), 13 | socket_(new asio::ip::udp::socket(io_context_, 14 | asio::ip::udp::endpoint(asio::ip::udp::v4(), port))) {} 15 | 16 | UDPServer() {} 17 | UDPServer(const UDPServer &) = delete; 18 | 19 | ~UDPServer() {} 20 | 21 | size_t recv_blocking(T &msg) { 22 | size_t ret = socket_->receive(asio::buffer(reinterpret_cast(&msg), sizeof(T))); 23 | return ret; 24 | } 25 | 26 | private: 27 | size_t timeout_ms_; 28 | asio::io_context io_context_; 29 | std::unique_ptr socket_; 30 | }; 31 | -------------------------------------------------------------------------------- /src/include/utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define CACHE_ALIGN __attribute__((aligned(cache_line_size))) 14 | 15 | #define LIKELY __glibc_likely 16 | #define UNLIKELY __glibc_unlikely 17 | 18 | #ifdef NDEBUG 19 | #define DEBUGY(cond) if (false) 20 | #else 21 | #define DEBUGY(cond) if (UNLIKELY(cond)) 22 | #endif // NDEBUG 23 | 24 | template 25 | using MaxHeap = std::priority_queue, std::less>; 26 | 27 | template 28 | using MinHeap = std::priority_queue, std::greater>; 29 | 30 | template 31 | constexpr D div_ceil(D x, uint64_t div) { 32 | return (x + div - 1) / div; 33 | } 34 | 35 | template 36 | D div_floor(D x, uint64_t div) { 37 | return x / div; 38 | } 39 | 40 | template 41 | D align_ceil(D x, uint64_t aligned) { 42 | return div_ceil(x, aligned) * aligned; 43 | } 44 | 45 | template 46 | D align_floor(D x, uint64_t aligned) { 47 | return div_floor(x, aligned) * aligned; 48 | } 49 | 50 | inline uint64_t rdtsc() { return __builtin_ia32_rdtsc(); } 51 | 52 | void threadBindCore(int core_id); 53 | uint64_t getMsTimestamp(); 54 | uint64_t getUsTimestamp(); 55 | uint64_t getNsTimestamp(); 56 | 57 | class IPv4String { 58 | public: 59 | IPv4String() = default; 60 | IPv4String(const std::string &ip); 61 | IPv4String(const IPv4String &ip) = default; 62 | IPv4String(IPv4String &&ip) = default; 63 | IPv4String &operator=(const std::string &ip); 64 | IPv4String &operator=(const IPv4String &ip) = default; 65 | IPv4String &operator=(IPv4String &&ip) = default; 66 | 67 | std::string get_string() const { return std::string(raw.ipstr); } 68 | 69 | private: 70 | struct { 71 | char ipstr[16]; 72 | } raw; 73 | }; 74 | 75 | struct NOCOPYABLE { 76 | NOCOPYABLE() = default; 77 | ~NOCOPYABLE() = default; 78 | NOCOPYABLE(const NOCOPYABLE &) = delete; 79 | NOCOPYABLE(NOCOPYABLE &&) = delete; 80 | NOCOPYABLE &operator=(const NOCOPYABLE &) = delete; 81 | NOCOPYABLE &operator=(NOCOPYABLE &&) = delete; 82 | }; 83 | 84 | template 85 | struct function_traits_helper { 86 | static constexpr std::size_t count = sizeof...(Args); 87 | using result_type = R; 88 | using args_tuple_type = std::tuple; 89 | template 90 | using args_type = typename std::tuple_element>::type; 91 | }; 92 | 93 | template 94 | struct function_traits; 95 | template 96 | struct function_traits : public function_traits_helper {}; 97 | template 98 | struct function_traits : public function_traits_helper {}; 99 | template 100 | struct function_traits : public function_traits_helper {}; 101 | 102 | template 103 | struct container_traits; 104 | 105 | template