├── .clang-format
├── .gitignore
├── CMakeLists.txt
├── README.md
├── doc
    ├── event_loop.drawio
    └── remote_direct_io.drawio
├── fs
    └── rchfs.cc
├── include
    ├── options.hpp
    ├── rcmp.hpp
    └── status.hpp
├── script
    ├── create_cxl_mem.sh
    ├── gen-perf-svg.sh
    ├── rchfs_fio.sh
    ├── run_client_shell.sh
    ├── run_cluster.sh
    ├── run_dht.sh
    └── scp-src.sh
├── src
    ├── CMakeLists.txt
    ├── allocator.cc
    ├── cxl.cc
    ├── daemon.cc
    ├── fiber_poll.cc
    ├── include
    │   ├── allocator.hpp
    │   ├── cmdline.h
    │   ├── common.hpp
    │   ├── concurrent_hashmap.hpp
    │   ├── concurrent_queue.hpp
    │   ├── config.hpp
    │   ├── cxl.hpp
    │   ├── fiber_pool.hpp
    │   ├── impl.hpp
    │   ├── lock.hpp
    │   ├── log.hpp
    │   ├── msg_queue.hpp
    │   ├── page_table.hpp
    │   ├── promise.hpp
    │   ├── proto
    │   │   ├── rpc_adaptor.hpp
    │   │   ├── rpc_caller.hpp
    │   │   ├── rpc_client.hpp
    │   │   ├── rpc_daemon.hpp
    │   │   ├── rpc_master.hpp
    │   │   └── rpc_register.hpp
    │   ├── rdma_rc.hpp
    │   ├── robin_hood.h
    │   ├── stats.hpp
    │   ├── udp_client.hpp
    │   ├── udp_server.hpp
    │   └── utils.hpp
    ├── master.cc
    ├── msg_queue.cc
    ├── page_table.cc
    ├── proto
    │   ├── rpc_client.cc
    │   ├── rpc_daemon.cc
    │   └── rpc_master.cc
    ├── rcmp.cc
    ├── rdma_rc.cc
    ├── stats.cc
    ├── test
    │   ├── CMakeLists.txt
    │   ├── conqueue_test.cc
    │   ├── erpc
    │   │   ├── client_sta_test.cc
    │   │   ├── common_sta.h
    │   │   └── server_sta_test.cc
    │   ├── hello_world_test.cc
    │   ├── ring_allocator_test.cc
    │   └── simple_adaptor_test.cc
    └── utils.cc
├── test
    ├── CMakeLists.txt
    ├── client_shell.cc
    ├── dht.cc
    ├── dht.hpp
    ├── microbench_core.hpp
    ├── mmap_shell.cc
    └── rw.cc
└── third_party
    └── eRPC
        ├── erpc.h
        └── liberpc.a


/.clang-format:
--------------------------------------------------------------------------------
  1 | ---
  2 | Language:        Cpp
  3 | # BasedOnStyle:  Google
  4 | AccessModifierOffset: -1
  5 | AlignAfterOpenBracket: Align
  6 | AlignConsecutiveMacros: false
  7 | AlignConsecutiveAssignments: false
  8 | AlignConsecutiveDeclarations: false
  9 | AlignEscapedNewlines: Left
 10 | AlignOperands:   true
 11 | AlignTrailingComments: true
 12 | AllowAllArgumentsOnNextLine: true
 13 | AllowAllConstructorInitializersOnNextLine: true
 14 | AllowAllParametersOfDeclarationOnNextLine: true
 15 | AllowShortBlocksOnASingleLine: Never
 16 | AllowShortCaseLabelsOnASingleLine: false
 17 | AllowShortFunctionsOnASingleLine: All
 18 | AllowShortLambdasOnASingleLine: All
 19 | AllowShortIfStatementsOnASingleLine: WithoutElse
 20 | AllowShortLoopsOnASingleLine: true
 21 | AlwaysBreakAfterDefinitionReturnType: None
 22 | AlwaysBreakAfterReturnType: None
 23 | AlwaysBreakBeforeMultilineStrings: true
 24 | AlwaysBreakTemplateDeclarations: Yes
 25 | BinPackArguments: true
 26 | BinPackParameters: true
 27 | BraceWrapping:
 28 |   AfterCaseLabel:  false
 29 |   AfterClass:      false
 30 |   AfterControlStatement: false
 31 |   AfterEnum:       false
 32 |   AfterFunction:   false
 33 |   AfterNamespace:  false
 34 |   AfterObjCDeclaration: false
 35 |   AfterStruct:     false
 36 |   AfterUnion:      false
 37 |   AfterExternBlock: false
 38 |   BeforeCatch:     false
 39 |   BeforeElse:      false
 40 |   IndentBraces:    false
 41 |   SplitEmptyFunction: true
 42 |   SplitEmptyRecord: true
 43 |   SplitEmptyNamespace: true
 44 | BreakBeforeBinaryOperators: None
 45 | BreakBeforeBraces: Attach
 46 | BreakBeforeInheritanceComma: false
 47 | BreakInheritanceList: BeforeColon
 48 | BreakBeforeTernaryOperators: true
 49 | BreakConstructorInitializersBeforeComma: false
 50 | BreakConstructorInitializers: BeforeColon
 51 | BreakAfterJavaFieldAnnotations: false
 52 | BreakStringLiterals: true
 53 | ColumnLimit:     100
 54 | CommentPragmas:  '^ IWYU pragma:'
 55 | CompactNamespaces: false
 56 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
 57 | ConstructorInitializerIndentWidth: 4
 58 | ContinuationIndentWidth: 4
 59 | Cpp11BracedListStyle: true
 60 | DeriveLineEnding: true
 61 | DerivePointerAlignment: true
 62 | DisableFormat:   false
 63 | ExperimentalAutoDetectBinPacking: false
 64 | FixNamespaceComments: true
 65 | ForEachMacros:
 66 |   - foreach
 67 |   - Q_FOREACH
 68 |   - BOOST_FOREACH
 69 | IncludeBlocks:   Regroup
 70 | IncludeCategories:
 71 |   - Regex:           '^<ext/.*\.h>'
 72 |     Priority:        2
 73 |     SortPriority:    0
 74 |   - Regex:           '^<.*\.h>'
 75 |     Priority:        1
 76 |     SortPriority:    0
 77 |   - Regex:           '^<.*'
 78 |     Priority:        2
 79 |     SortPriority:    0
 80 |   - Regex:           '.*'
 81 |     Priority:        3
 82 |     SortPriority:    0
 83 | IncludeIsMainRegex: '([-_](test|unittest))?$'
 84 | IncludeIsMainSourceRegex: ''
 85 | IndentCaseLabels: true
 86 | IndentGotoLabels: true
 87 | IndentPPDirectives: None
 88 | IndentWidth:     4
 89 | IndentWrappedFunctionNames: false
 90 | JavaScriptQuotes: Leave
 91 | JavaScriptWrapImports: true
 92 | KeepEmptyLinesAtTheStartOfBlocks: false
 93 | MacroBlockBegin: ''
 94 | MacroBlockEnd:   ''
 95 | MaxEmptyLinesToKeep: 1
 96 | NamespaceIndentation: None
 97 | ObjCBinPackProtocolList: Never
 98 | ObjCBlockIndentWidth: 2
 99 | ObjCSpaceAfterProperty: false
100 | ObjCSpaceBeforeProtocolList: true
101 | PenaltyBreakAssignment: 2
102 | PenaltyBreakBeforeFirstCallParameter: 1
103 | PenaltyBreakComment: 300
104 | PenaltyBreakFirstLessLess: 120
105 | PenaltyBreakString: 1000
106 | PenaltyBreakTemplateDeclaration: 10
107 | PenaltyExcessCharacter: 1000000
108 | PenaltyReturnTypeOnItsOwnLine: 200
109 | PointerAlignment: Left
110 | RawStringFormats:
111 |   - Language:        Cpp
112 |     Delimiters:
113 |       - cc
114 |       - CC
115 |       - cpp
116 |       - Cpp
117 |       - CPP
118 |       - 'c++'
119 |       - 'C++'
120 |     CanonicalDelimiter: ''
121 |     BasedOnStyle:    google
122 |   - Language:        TextProto
123 |     Delimiters:
124 |       - pb
125 |       - PB
126 |       - proto
127 |       - PROTO
128 |     EnclosingFunctions:
129 |       - EqualsProto
130 |       - EquivToProto
131 |       - PARSE_PARTIAL_TEXT_PROTO
132 |       - PARSE_TEST_PROTO
133 |       - PARSE_TEXT_PROTO
134 |       - ParseTextOrDie
135 |       - ParseTextProtoOrDie
136 |     CanonicalDelimiter: ''
137 |     BasedOnStyle:    google
138 | ReflowComments:  true
139 | SortIncludes:    true
140 | SortUsingDeclarations: true
141 | SpaceAfterCStyleCast: false
142 | SpaceAfterLogicalNot: false
143 | SpaceAfterTemplateKeyword: true
144 | SpaceBeforeAssignmentOperators: true
145 | SpaceBeforeCpp11BracedList: false
146 | SpaceBeforeCtorInitializerColon: true
147 | SpaceBeforeInheritanceColon: true
148 | SpaceBeforeParens: ControlStatements
149 | SpaceBeforeRangeBasedForLoopColon: true
150 | SpaceInEmptyBlock: false
151 | SpaceInEmptyParentheses: false
152 | SpacesBeforeTrailingComments: 2
153 | SpacesInAngles:  false
154 | SpacesInConditionalStatement: false
155 | SpacesInContainerLiterals: true
156 | SpacesInCStyleCastParentheses: false
157 | SpacesInParentheses: false
158 | SpacesInSquareBrackets: false
159 | SpaceBeforeSquareBrackets: false
160 | Standard:        Auto
161 | StatementMacros:
162 |   - Q_UNUSED
163 |   - QT_REQUIRE_VERSION
164 | TabWidth:        8
165 | UseCRLF:         false
166 | UseTab:          Never
167 | ...
168 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.vscode
2 | /build
3 | /.cache


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(rcmp)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 17)
 5 | 
 6 | if( CMAKE_BUILD_TYPE STREQUAL "Release" )
 7 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
 8 |     message(STATUS "Release Mode")
 9 | else()
10 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
11 |     message(STATUS "Debug Mode")
12 | endif()
13 | 
14 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
15 | set(CMAKE_POSITION_INDEPENDENT_CODE ON)
16 | 
17 | find_package(Boost REQUIRED COMPONENTS context fiber)
18 | 
19 | if(Boost_FOUND)
20 | else()
21 |     message(err: Boost fiber not found)
22 | endif()
23 | 
24 | link_directories("third_party/eRPC/")
25 | include_directories("include" "src/include" "third_party")
26 | add_subdirectory(src)
27 | 
28 | # ##################################################
29 | #                         库
30 | # ##################################################
31 | 
32 | add_library(rcmp SHARED
33 |     src/rcmp.cc
34 | )
35 | target_link_libraries(
36 |     rcmp
37 |     base
38 | )
39 | 
40 | # ##################################################
41 | #                     可执行文件
42 | # ##################################################
43 | 
44 | add_executable(
45 |     rcmp_master
46 |     src/master.cc
47 | )
48 | target_link_libraries(
49 |     rcmp_master
50 |     base
51 | )
52 | 
53 | add_executable(
54 |     rcmp_daemon
55 |     src/daemon.cc
56 | )
57 | target_link_libraries(
58 |     rcmp_daemon
59 |     base
60 | )
61 | 
62 | add_executable(
63 |     rchfs
64 |     fs/rchfs.cc
65 | )
66 | target_link_libraries(
67 |     rchfs
68 |     rcmp
69 |     fuse3
70 | )
71 | 
72 | add_subdirectory(test)
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Rcmp: A hybrid memory pooling system based on RDMA and CXL
  2 | 
  3 | Rcmp is a user-layer library for a distributed memory pooling system that mixes CXL and RDMA. Rcmp deploys large memory pools in separate racks, using CXL for coherent memory access within racks and RDMA for remote one-side access across racks. The CXL memory devices used within the rack have sub-microsecond latency, which can greatly accelerate remote memory access. And RDMA can scale the capacity of the memory pool well. However, since RDMA cannot do memory coherent access by raw verbs API, Rcmp introduces Remote Direct IO and Remote Page Swap policys in combination with RDMA to achieve coherent access across racks.
  4 | 
  5 | Rcmp currently supports the following features:
  6 | 
  7 | * **Memory Allocation and Release**: Clients allocate and release page-sized memory space via the AllocPage and FreePage APIs.
  8 | 
  9 | * **Consistent Memory Read/Write**: Users can access memory data using the global address GAddr and access data through Read/Write/CAS API. According to memory access hotspot is divided into CXL load/store access and RDMA one-side verb operation.
 10 | 
 11 | # How to use
 12 | 
 13 | * Using the Rcmp dynamic library
 14 | 
 15 |     The interfaces are defined in `include/rcmp.hpp`, and their use can be found in the `test/client_shell.cc`. This test launches a memory pool operations program and uses Rcmp's API to perform various operations on the memory pool.
 16 | 
 17 | 1. Dependencies
 18 | 
 19 |     * gcc(>=4.6)
 20 | 
 21 |     * numactl
 22 | 
 23 |     * boost-fiber-dev
 24 | 
 25 |     * boost-coroutine-dev
 26 | 
 27 |     * boost-context-dev
 28 | 
 29 |     * asio
 30 | 
 31 |     * redis-plus-plus
 32 | 
 33 |     * fuse3
 34 | 
 35 | 2. Compile
 36 | 
 37 |     ```shell
 38 |     mkdir -p build
 39 |     cd build
 40 |     cmake .. -DCMAKE_BUILD_TYPE=Release
 41 |     make
 42 |     ```
 43 | 
 44 | 3. Run Cluster
 45 | 
 46 | * Start Master (MN)
 47 | 
 48 |     The MN process will start `ERPC`, please **apply for huge pages with 2GB granularity** in advance.
 49 | 
 50 |     ```shell
 51 |     sudo /home/user/Rcmp/build/rcmp_master --master_ip=192.168.200.51 --master_port=31850
 52 |     ```
 53 | 
 54 | * Start Rack Daemon (DN)
 55 | 
 56 |     The DN process will start `ERPC`, please **apply for huge pages with 2GB granularity** in advance.
 57 | 
 58 |     This project currently uses shared memory across NUMA to simulate CXL access. Run `script/create_cxl_mem.sh` to create shared memory on NUMA 1.
 59 | 
 60 |     All other processes run on NUMA 0.
 61 | 
 62 |     ```shell
 63 |     # Add rack 0 on 192.168.200.51 with CXL size 2.19GB
 64 |     sudo numactl -N 0 /home/user/Rcmp/build/rcmp_daemon --master_ip=192.168.200.51 --master_port=31850 --daemon_ip=192.168.200.51 --daemon_port=31851 --rack_id=0 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=2357198848 --hot_decay=0.04 --hot_swap_watermark=3
 65 |     ```
 66 | 
 67 |     ```shell
 68 |     # Add rack 1 on 192.168.201.89 with CXL size 18GB
 69 |     sudo numactl -N 0 /home/user/Rcmp/build/rcmp_daemon --master_ip=192.168.200.51 --master_port=31850 --daemon_ip=192.168.201.89 --daemon_port=31852 --rack_id=1 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=19327352832 --hot_decay=0.04 --hot_swap_watermark=3
 70 |     ```
 71 | 
 72 | * Launching the client test program (CN)
 73 | 
 74 |     To simulate CXL, launch CN in the same rack on the same server as DN.
 75 | 
 76 |     `test/rw.cc` is a micro-benchmark. Use redis for cross-rack test synchronisation.
 77 | 
 78 |     ```shell
 79 |     sudo numactl -N 0 /home/user/Rcmp/build/test/rw --client_ip=192.168.200.51 --client_port=14800 --rack_id=0 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=2357198848 --iteration=10000000 --payload_size=64 --addr_range=17179869184 --thread=32 --thread_all=1 --no_node=1 --node_id=0 --redis_server_ip=192.168.201.52:6379
 80 |     ```
 81 | 
 82 | # Application
 83 | 
 84 | After starting the necessary memory pool cluster environment (start MNs with DNs), use the Rcmp dynamic library to create the memory pool application. The following gives the implementation of the application that already exists in the project.
 85 | 
 86 | * Distributed hash table
 87 | 
 88 |     The distributed hash table uses the Rcmp interface to implement a linearly probed two-tier hash table. For simplicity, the hash table is fixed-sized. We will implement dynamic scaling added later (similar to CCEH).
 89 | 
 90 |     Location: `test/dht.hpp`.
 91 | 
 92 | * rchfs
 93 | 
 94 |     rchfs uses the FUSE API to implement a simple high-capacity in-memory file system. File metadata is stored on the client, file data blocks are allocated using Rcmp's AllocPage, and write/read system calls are redirected to Rcmp's Write/Read API. Later, we will add file metadata sharing in memory pool.
 95 | 
 96 |     Location: `fs/rchfs.cc`.
 97 | 
 98 | # Paper
 99 | 
100 | Zhonghua Wang, Yixing Guo, Kai Lu*, Jiguang Wan, Daohui Wang, Ting Yao, Huatao Wu. Rcmp: Reconstructing RDMA-based Memory Disaggregation via CXL. ACM Transactions on Architecture and Code Optimization (TACO) 2023. (Just Accepted)


--------------------------------------------------------------------------------
/include/options.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | 
 5 | // enable stat
 6 | #define RCMP_PERF_ON 1
 7 | 
 8 | namespace rcmp {
 9 | 
10 | class ClientOptions {
11 |    public:
12 |     std::string client_ip;
13 |     uint16_t client_port;
14 | 
15 |     uint32_t rack_id;
16 | 
17 |     // Whether to register as a CXL client (currently only `true` is supported)
18 |     bool with_cxl = true;
19 |     std::string cxl_devdax_path;
20 |     size_t cxl_memory_size;
21 |     int prealloc_fiber_num = 2;  // Number of pre-allocated boost coroutine
22 | };
23 | 
24 | class DaemonOptions {
25 |    public:
26 |     std::string master_ip;
27 |     uint16_t master_port = 31850;
28 | 
29 |     std::string daemon_ip;
30 |     uint16_t daemon_port;
31 | 
32 |     uint32_t rack_id;
33 | 
34 |     // Whether to register as a CXL client (currently only `true` is supported)
35 |     bool with_cxl = true;
36 |     std::string cxl_devdax_path;
37 |     size_t cxl_memory_size;
38 | 
39 |     // Maximum number of clients limit (limited by msgq communication area of shared memory)
40 |     size_t max_client_limit = 32;
41 |     size_t swap_zone_size = 64ul << 20;
42 | 
43 |     int prealloc_fiber_num = 32;     // Number of pre-allocated boost coroutine
44 |     float heat_half_life_us = 1000;  // Page Heat decay coefficient
45 |     float hot_swap_watermark = 3;    // Page Swap heat threshold
46 | 
47 |     int cm_qp_num = 2;  // Number of QPs connected to other daemons
48 | };
49 | 
50 | class MasterOptions {
51 |    public:
52 |     std::string master_ip;
53 |     uint16_t master_port = 31850;
54 | 
55 |     size_t max_cluster_mac_num = 1000;  // Maximum number of connected nodes in the cluster
56 |     int prealloc_fiber_num = 16;        // Number of pre-allocated boost coroutine
57 | };
58 | 
59 | }  // namespace rcmp


--------------------------------------------------------------------------------
/include/rcmp.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cstdint>
  4 | #include <string>
  5 | 
  6 | #include "options.hpp"
  7 | #include "status.hpp"
  8 | 
  9 | namespace rcmp {
 10 | 
 11 | using GAddr = uintptr_t;
 12 | constexpr static GAddr GNullPtr = 0;
 13 | 
 14 | /**
 15 |  * @brief Memory Pool Client Context
 16 |  */
 17 | class PoolContext;
 18 | 
 19 | /**
 20 |  * @brief
 21 |  * Opens the memory pool. Returns a pointer to the memory pool context on success, otherwise returns
 22 |  * `nullptr`. The return object is generated by the new operation and should be closed and deleted
 23 |  * by `Close()`.
 24 |  *
 25 |  * @param options Memory Pool Initialisation Options
 26 |  * @return PoolContext*
 27 |  */
 28 | PoolContext *Open(ClientOptions options);
 29 | 
 30 | /**
 31 |  * @brief Closing the Memory Pool Context
 32 |  *
 33 |  * @param pool_ctx
 34 |  */
 35 | void Close(PoolContext *pool_ctx);
 36 | 
 37 | class PoolContext {
 38 |    private:
 39 |     /**
 40 |      * @brief `PoolContext` internal implementation
 41 |      */
 42 |     class PoolContextImpl;
 43 | 
 44 |    public:
 45 |     PoolContext(ClientOptions options);
 46 |     ~PoolContext();
 47 | 
 48 |     /**
 49 |      * @brief Memory request. The memory request policy allocates memory according to the proximity
 50 |      * of the cabinet where the client is located. A failed request returns `GNullPtr`.
 51 |      *
 52 |      * @param size
 53 |      * @return GAddr
 54 |      */
 55 |     GAddr Alloc(size_t size);
 56 |     /**
 57 |      * @brief Read `gaddr` address, size `size` into `buf`.
 58 |      *
 59 |      * @param gaddr
 60 |      * @param size
 61 |      * @param buf
 62 |      * @return Status
 63 |      */
 64 |     Status Read(GAddr gaddr, size_t size, void *buf);
 65 |     /**
 66 |      * @brief Write data from `buf` to `gaddr` address, size `size`.
 67 |      *
 68 |      * @param gaddr
 69 |      * @param size
 70 |      * @param buf
 71 |      * @return Status
 72 |      */
 73 |     Status Write(GAddr gaddr, size_t size, const void *buf);
 74 |     /**
 75 |      * @brief Free memory
 76 |      *
 77 |      * @param gaddr
 78 |      * @param size
 79 |      * @return Status
 80 |      */
 81 |     Status Free(GAddr gaddr, size_t size);
 82 | 
 83 |     /**
 84 |      * @brief Allocate contiguous memory pages. The memory request policy is allocated according to
 85 |      * the proximity of the cabinet where the client is located. A failed request returns
 86 |      * `GNullPtr`.
 87 |      *
 88 |      * @param size
 89 |      * @return GAddr
 90 |      */
 91 |     GAddr AllocPage(size_t count);
 92 | 
 93 |     /**
 94 |      * @brief Free consecutive memory pages.
 95 |      *
 96 |      * @param gaddr
 97 |      * @return Status
 98 |      */
 99 |     Status FreePage(GAddr gaddr, size_t count);
100 | 
101 |     /**
102 |      * @brief CAS 8byte-aligned addr
103 |      *
104 |      * @param gaddr
105 |      * @param expected
106 |      * @param desired
107 |      * @param ret
108 |      * @return Status
109 |      */
110 |     Status CAS(GAddr gaddr, uint64_t &expected, uint64_t desired, bool &ret);
111 | 
112 |     // /**
113 |     //  * @brief Write data from `buf` to `gaddr` address, size `size`.
114 |     //  *
115 |     //  * @param gaddr
116 |     //  * @param size
117 |     //  * @param buf
118 |     //  * @return Status
119 |     //  */
120 |     // Status WriteBatch(GAddr gaddr, size_t size, void *buf);
121 | 
122 |     const ClientOptions &GetOptions() const;
123 | 
124 |     /*********************** for test ***********************/
125 | 
126 |     void __DumpStats();
127 | 
128 |     void __ClearStats();
129 | 
130 |     Status __TestDataSend1(int *array, size_t size);
131 | 
132 |     Status __TestDataSend2(int *array, size_t size);
133 | 
134 |     Status __NotifyPerf();
135 | 
136 |     Status __StopPerf();
137 | 
138 |    private:
139 |     PoolContextImpl *m_impl;
140 | };
141 | 
142 | }  // namespace rcmp


--------------------------------------------------------------------------------
/include/status.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | 
 5 | namespace rcmp {
 6 | 
 7 | enum Status {
 8 |     ERROR = 0,
 9 |     OK = 1,
10 | };
11 | 
12 | inline static std::string GetStatusString(Status s) {
13 |     switch (s) {
14 |         case ERROR:
15 |             return "ERROR";
16 |         case OK:
17 |             return "OK";
18 |         default:
19 |             return "Unkown Status";
20 |     }
21 | }
22 | 
23 | }  // namespace rcmp


--------------------------------------------------------------------------------
/script/create_cxl_mem.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | SIZE=$1
4 | 
5 | numactl --membind=1 dd if=/dev/zero of=/dev/shm/cxlsim0 bs=1M count=$SIZE


--------------------------------------------------------------------------------
/script/gen-perf-svg.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | t=$(date "+%Y-%m-%d_%H-%M-%S")
4 | sudo perf script | stackcollapse-perf.pl | flamegraph.pl > perf_$t.svg


--------------------------------------------------------------------------------
/script/rchfs_fio.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | user=`whoami`
4 | 
5 | fio -filename=/home/$user/tmp_rchfs_fs/fio_test -direct=1 -iodepth=1 -thread -rw=randwrite -ioengine=psync -bs=16k -size=2G -numjobs=10 -runtime=60 -group_reporting -name=mytest


--------------------------------------------------------------------------------
/script/run_client_shell.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | user=`whoami`
4 | port=$((14800+$1))
5 | 
6 | sudo /home/$user/Rcmp/build/test/client_shell --client_ip=192.168.1.51 --client_port=$port --rack_id=$1 --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=4294967296


--------------------------------------------------------------------------------
/script/run_cluster.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | user=$1
  4 | passwd=$2
  5 | CMD_DIR="/home/$user/Rcmp/build"
  6 | SUDO="echo $passwd | sudo -S"
  7 | 
  8 | IP_MN="192.168.200.51"
  9 | PORT_MN=31850
 10 | # IP_DNs=("192.168.200.51" "192.168.201.52" "192.168.201.33" "192.168.201.89")
 11 | # IP_CNs=(${IP_DNs[0]} ${IP_DNs[1]} ${IP_DNs[2]} ${IP_DNs[3]})
 12 | IP_DNs=("192.168.200.51")
 13 | IP_CNs=(${IP_DNs[0]})
 14 | 
 15 | kill_all() {
 16 |     echo "kill all"
 17 | 
 18 |     for ((i=0; i<${#IP_CNs[@]}; i++))
 19 |     do
 20 |         sshpass -p $passwd ssh $user@${IP_CNs[i]} "echo $passwd | sudo -S killall rw" &
 21 |     done
 22 | 
 23 |     sleep 2
 24 | 
 25 |     for ((i=0; i<${#IP_DNs[@]}; i++))
 26 |     do
 27 |         sshpass -p $passwd ssh $user@${IP_DNs[i]} "echo $passwd | sudo -S killall rcmp_daemon" &
 28 |         sleep 2
 29 |     done
 30 | 
 31 |     sshpass -p $passwd ssh $user@$IP_MN "echo $passwd | sudo -S killall rcmp_master" &
 32 | 
 33 |     sleep 2
 34 | }
 35 | 
 36 | test_run() {
 37 |     MN_CMD="echo $passwd | sudo -S $CMD_DIR/rcmp_master --master_ip=$IP_MN --master_port=$PORT_MN"
 38 | 
 39 |     echo "[exec] $MN_CMD"
 40 |     sshpass -p $passwd ssh $user@$IP_MN "echo $passwd | sudo -S $MN_CMD" &
 41 | 
 42 |     sleep 5
 43 | 
 44 |     for ((i=0; i<${#IP_DNs[@]}; i++))
 45 |     do
 46 |         port=$(($PORT_MN+1+$i))
 47 | 
 48 |         DN_CMD="echo $passwd | sudo -S numactl -N 0 $CMD_DIR/rcmp_daemon --master_ip=$IP_MN --master_port=$PORT_MN --daemon_ip=${IP_DNs[i]} --daemon_port=$port --rack_id=$i --cxl_devdax_path=/dev/shm/cxlsim$i --cxl_memory_size=$CXL_MEM_SZ --hot_decay=$HOT_DECAY --hot_swap_watermark=$WATERMARK"
 49 | 
 50 |         echo "[exec] $DN_CMD"
 51 |         sshpass -p $passwd ssh $user@${IP_DNs[i]} "echo $passwd | sudo -S $DN_CMD" &
 52 | 
 53 |         sleep 5
 54 |     done
 55 | 
 56 |     PIDS=()
 57 | 
 58 |     for ((i=0; i<${#IP_CNs[@]}; i++))
 59 |     do
 60 |         port=$((14800+$i))
 61 | 
 62 |         NODES=${#IP_CNs[@]}
 63 |         NID=$i
 64 | 
 65 |         CN_CMD="echo $passwd | sudo -S numactl -N 0 $CMD_DIR/test/rw --client_ip=${IP_CNs[i]} --client_port=$port --rack_id=$i --cxl_devdax_path=/dev/shm/cxlsim0 --cxl_memory_size=$CXL_MEM_SZ --iteration=$IT --payload_size=$payload --addr_range=$ADDR_RANGE --thread=$THREAD --thread_all=1 --no_node=$NODES --node_id=$NID --redis_server_ip=192.168.201.52:6379"
 66 | 
 67 |         echo "[exec] $CN_CMD"
 68 |         sshpass -p $passwd ssh $user@${IP_CNs[i]} "echo $passwd | sudo -S $CN_CMD" &
 69 |         PIDS+=($!)
 70 | 
 71 |         sleep 4
 72 |     done
 73 | 
 74 |     wait ${PIDS[@]}
 75 | 
 76 |     kill_all
 77 | }
 78 | 
 79 | kill_all
 80 | 
 81 | $CMD_DIR/../script/scp-src.sh $user $passwd
 82 | 
 83 | echo "Start ..."
 84 | 
 85 | port=$((14800+0))
 86 | 
 87 | # reserve 2GB, data 8GB(include swap 100MB)
 88 | CXL_MEM_SZ=$((10*1024*1024*1024))
 89 | ADDR_RANGE=$(((8*1024-100)*1024*1024))
 90 | HOT_DECAY=0.04
 91 | WATERMARK=3
 92 | THREAD=8
 93 | IT=1000000
 94 | SA=$((2*1024*1024))
 95 | 
 96 | for payload in 64
 97 | do
 98 |     test_run
 99 | done
100 | 


--------------------------------------------------------------------------------
/script/run_dht.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | user=`whoami`
4 | port=$((14800+$1))
5 | 
6 | sudo numactl -N 0 /home/$user/Rcmp/build/test/dht --client_ip=192.168.1.51 --client_port=$port --rack_id=$1 --cxl_devdax_path=/dev/shm/cxlsim$1 --cxl_memory_size=4294967296 --iteration=1000000 --read_ratio=100 --initor=$2


--------------------------------------------------------------------------------
/script/scp-src.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | user=$1
 4 | passwd=$2
 5 | 
 6 | CMD_DIR="/home/$user/Rcmp/build"
 7 | 
 8 | for ip in 192.168.1.52 # 192.168.1.33 192.168.1.89
 9 | do
10 |     sshpass -p $passwd scp $CMD_DIR/test/rw $user@$ip:$CMD_DIR/test
11 |     sshpass -p $passwd scp $CMD_DIR/librcmp.so $CMD_DIR/rcmp_daemon $user@$ip:$CMD_DIR/
12 | done


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB_RECURSE LIB_SRCS *.cc)
 2 | 
 3 | list(FILTER LIB_SRCS EXCLUDE REGEX "/rcmp.cc$")
 4 | list(FILTER LIB_SRCS EXCLUDE REGEX "/daemon.cc$")
 5 | list(FILTER LIB_SRCS EXCLUDE REGEX "/master.cc$")
 6 | list(FILTER LIB_SRCS EXCLUDE REGEX "/test/")
 7 | 
 8 | add_library(base STATIC ${LIB_SRCS})
 9 | target_link_libraries(base pthread erpc ibverbs numa rdmacm boost_coroutine Boost::fiber Boost::context)
10 | 
11 | add_subdirectory(test)


--------------------------------------------------------------------------------
/src/allocator.cc:
--------------------------------------------------------------------------------
  1 | #include "allocator.hpp"
  2 | 
  3 | #include "log.hpp"
  4 | #include "utils.hpp"
  5 | 
  6 | IDGenerator::id_t IDGenerator::Gen() {
  7 |     if (UNLIKELY(m_size + 1 > m_bset.size())) {
  8 |         return -1;
  9 |     }
 10 | 
 11 |     std::lock_guard<Mutex> guard(m_lck);
 12 | 
 13 |     size_t cur_tmp = m_gen_cur;
 14 |     do {
 15 |         size_t cur = m_gen_cur;
 16 |         m_gen_cur = (m_gen_cur + 1) % m_bset.size();
 17 |         auto ref = m_bset[cur];
 18 |         if (ref == false) {
 19 |             ref.flip();
 20 |             m_size += 1;
 21 |             return cur;
 22 |         }
 23 |     } while (cur_tmp != m_gen_cur);
 24 | 
 25 |     return -1;
 26 | }
 27 | 
 28 | IDGenerator::id_t IDGenerator::MultiGen(size_t count) {
 29 |     if (count == 1) {
 30 |         return Gen();
 31 |     }
 32 | 
 33 |     if (UNLIKELY(m_size + count > m_bset.size())) {
 34 |         return -1;
 35 |     }
 36 | 
 37 |     std::lock_guard<Mutex> guard(m_lck);
 38 | 
 39 |     id_t start = -1;
 40 |     size_t c = 0;
 41 |     size_t cur_tmp = m_gen_cur;
 42 | 
 43 |     do {
 44 |         size_t cur = m_gen_cur;
 45 |         m_gen_cur = (m_gen_cur + 1) % m_bset.size();
 46 |         if (m_bset[cur] == false) {
 47 |             if (c == 0) {
 48 |                 start = cur;
 49 |             }
 50 |             ++c;
 51 |             if (c == count) {
 52 |                 for (size_t k = start; k < start + count; ++k) {
 53 |                     m_bset[k].flip();
 54 |                 }
 55 |                 m_size += count;
 56 |                 return start;
 57 |             }
 58 |         } else {
 59 |             c = 0;
 60 |         }
 61 | 
 62 |         // Preventing Loopback
 63 |         if (m_gen_cur == 0) {
 64 |             c = 0;
 65 |         }
 66 | 
 67 |     } while (cur_tmp != m_gen_cur);
 68 | 
 69 |     return -1;
 70 | }
 71 | 
 72 | void IDGenerator::Recycle(IDGenerator::id_t id) {
 73 |     DLOG_ASSERT(m_bset[id] == true, "IDGenerator double recycle");
 74 | 
 75 |     std::lock_guard<Mutex> guard(m_lck);
 76 | 
 77 |     m_bset[id].flip();
 78 |     m_size -= 1;
 79 | }
 80 | 
 81 | void IDGenerator::MultiRecycle(id_t id, size_t count) {
 82 |     if (count == 1) {
 83 |         Recycle(id);
 84 |         return;
 85 |     }
 86 | 
 87 |     std::lock_guard<Mutex> guard(m_lck);
 88 | 
 89 |     while (count--) {
 90 |         DLOG_ASSERT(m_bset[id] == true, "IDGenerator double recycle");
 91 |         m_bset[id].flip();
 92 |         id++;
 93 |     }
 94 |     m_size -= count;
 95 | }
 96 | 
 97 | void IDGenerator::Expand(size_t n) {
 98 |     std::lock_guard<Mutex> guard(m_lck);
 99 |     m_bset.insert(m_bset.end(), n, false);
100 | }
101 | 


--------------------------------------------------------------------------------
/src/cxl.cc:
--------------------------------------------------------------------------------
 1 | #include "cxl.hpp"
 2 | 
 3 | #include <fcntl.h>
 4 | #include <sys/mman.h>
 5 | 
 6 | #include <cstdint>
 7 | #include <cstdlib>
 8 | 
 9 | #include "config.hpp"
10 | #include "log.hpp"
11 | #include "utils.hpp"
12 | 
13 | void *cxl_open_simulate(std::string file, size_t size, int *fd) {
14 |     *fd = open(file.c_str(), O_RDWR | O_CREAT, 0666);
15 |     DLOG_ASSERT(*fd != -1, "Failed to open cxl dev: %s", file.c_str());
16 | 
17 |     void *addr = aligned_alloc(mem_region_aligned_size, size);
18 |     free(addr);
19 |     addr = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_LOCKED, *fd, 0);
20 |     DLOG_ASSERT(addr != MAP_FAILED, "Failed to mmap cxl dev: %s", file.c_str());
21 |     return addr;
22 | }
23 | 
24 | void cxl_close_simulate(int fd, CXLMemFormat &format) {
25 |     munmap(const_cast<void *>(format.start_addr), format.super_block->total_size);
26 |     close(fd);
27 | }
28 | 
29 | void cxl_memory_init(CXLMemFormat &format, void *cxl_memory_addr, size_t size,
30 |                      size_t msgq_zone_size) {
31 |     DLOG_ASSERT(size > mem_region_aligned_size, "The size of cxl memory needs larger than 2GB");
32 | 
33 |     CXLSuperBlock *super_block = reinterpret_cast<CXLSuperBlock *>(cxl_memory_addr);
34 |     super_block->total_size = size;
35 |     super_block->msgq_zone_size = msgq_zone_size;
36 |     super_block->reserve_heap_size =
37 |         align_ceil(cxl_super_block_size + msgq_zone_size, mem_region_aligned_size) -
38 |         (cxl_super_block_size + msgq_zone_size);
39 |     super_block->page_data_zone_size = align_floor(
40 |         size - cxl_super_block_size - msgq_zone_size - super_block->reserve_heap_size, page_size);
41 | 
42 |     cxl_memory_open(format, cxl_memory_addr);
43 | }
44 | 
45 | void cxl_memory_open(CXLMemFormat &format, void *cxl_memory_addr) {
46 |     format.start_addr = cxl_memory_addr;
47 |     format.super_block = reinterpret_cast<CXLSuperBlock *>(cxl_memory_addr);
48 |     format.msgq_zone_start_addr = reinterpret_cast<void *>(
49 |         (reinterpret_cast<uintptr_t>(cxl_memory_addr) + cxl_super_block_size));
50 |     format.reserve_zone_addr =
51 |         reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(format.msgq_zone_start_addr) +
52 |                                   format.super_block->msgq_zone_size));
53 |     format.page_data_start_addr =
54 |         reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(format.reserve_zone_addr) +
55 |                                   format.super_block->reserve_heap_size));
56 |     format.end_addr =
57 |         reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(format.page_data_start_addr) +
58 |                                   format.super_block->page_data_zone_size));
59 | 
60 |     DLOG("super_block: %p", format.super_block);
61 |     DLOG("msgq_zone_start_addr: %p", format.msgq_zone_start_addr);
62 |     DLOG("reserve_zone_addr: %p", format.reserve_zone_addr);
63 |     DLOG("page_data_start_addr: %p", format.page_data_start_addr);
64 |     DLOG("end_addr: %p", format.end_addr);
65 | }


--------------------------------------------------------------------------------
/src/fiber_poll.cc:
--------------------------------------------------------------------------------
  1 | //          Copyright Nat Goodspeed 2014.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | 
  6 | #include "fiber_pool.hpp"
  7 | 
  8 | //[priority_props
  9 | priority_props::priority_props(boost::fibers::context* ctx)
 10 |     : fiber_properties(ctx), /*< Your subclass constructor must accept a
 11 |                               [^[class_link context]*] and pass it to
 12 |                               the `fiber_properties` constructor. >*/
 13 |       priority_(1) {}
 14 | 
 15 | int priority_props::get_priority() const {
 16 |     return priority_; /*< Provide read access methods at your own discretion. >*/
 17 | }
 18 | 
 19 | // Call this method to alter priority, because we must notify
 20 | // priority_scheduler of any change.
 21 | void priority_props::set_priority(int p) { /*<
 22 |        It's important to call `notify()` on any
 23 |        change in a property that can affect the
 24 |        scheduler's behavior. Therefore, such
 25 |        modifications should only be performed
 26 |        through an access method. >*/
 27 |     // Of course, it's only worth reshuffling the queue and all if we're
 28 |     // actually changing the priority.
 29 |     if (p != priority_) {
 30 |         priority_ = p;
 31 |         notify();
 32 |     }
 33 | }
 34 | 
 35 | void priority_props::set_low_priority() { set_priority(1); }
 36 | void priority_props::set_high_priority() { set_priority(100); }
 37 | //]
 38 | 
 39 | //[priority_scheduler
 40 | 
 41 | priority_scheduler::priority_scheduler() : rqueue_high_(), rqueue_low_() {}
 42 | 
 43 | // For a subclass of algorithm_with_properties<>, it's important to
 44 | // override the correct awakened() overload.
 45 | /*<< You must override the [member_link algorithm_with_properties..awakened]
 46 |      method. This is how your scheduler receives notification of a
 47 |      fiber that has become ready to run. >>*/
 48 | void priority_scheduler::awakened(boost::fibers::context* ctx, priority_props& props) noexcept {
 49 |     int ctx_priority = props.get_priority(); /*< `props` is the instance of
 50 |                                                priority_props associated
 51 |                                                with the passed fiber `ctx`. >*/
 52 |     // With this scheduler, fibers with higher priority values are
 53 |     // preferred over fibers with lower priority values. But fibers with
 54 |     // equal priority values are processed in round-robin fashion. So when
 55 |     // we're handed a new context*, put it at the end of the fibers
 56 |     // with that same priority. In other words: search for the first fiber
 57 |     // in the queue with LOWER priority, and insert before that one.
 58 |     if (ctx_priority == 1) {
 59 |         rqueue_low_.push_back(*ctx);
 60 |     } else {
 61 |         rqueue_high_.push_back(*ctx);
 62 |     }
 63 | }
 64 | 
 65 | /*<< You must override the [member_link algorithm_with_properties..pick_next]
 66 |      method. This is how your scheduler actually advises the fiber manager
 67 |      of the next fiber to run. >>*/
 68 | boost::fibers::context* priority_scheduler::pick_next() noexcept {
 69 |     boost::fibers::context* ctx;
 70 |     if (!rqueue_high_.empty()) {
 71 |         ctx = &rqueue_high_.front();
 72 |         rqueue_high_.pop_front();
 73 |     } else if (!rqueue_low_.empty()) {
 74 |         ctx = &rqueue_low_.front();
 75 |         rqueue_low_.pop_front();
 76 |     } else {
 77 |         ctx = nullptr;
 78 |     }
 79 |     return ctx;
 80 | }
 81 | 
 82 | /*<< You must override [member_link algorithm_with_properties..has_ready_fibers]
 83 |   to inform the fiber manager of the state of your ready queue. >>*/
 84 | bool priority_scheduler::has_ready_fibers() const noexcept {
 85 |     return !rqueue_high_.empty() || !rqueue_low_.empty();
 86 | }
 87 | 
 88 | /*<< Overriding [member_link algorithm_with_properties..property_change]
 89 |      is optional. This override handles the case in which the running
 90 |      fiber changes the priority of another ready fiber: a fiber already in
 91 |      our queue. In that case, move the updated fiber within the queue. >>*/
 92 | void priority_scheduler::property_change(boost::fibers::context* ctx,
 93 |                                          priority_props& props) noexcept {
 94 |     // Although our priority_props class defines multiple properties, only
 95 |     // one of them (priority) actually calls notify() when changed. The
 96 |     // point of a property_change() override is to reshuffle the ready
 97 |     // queue according to the updated priority value.
 98 | 
 99 |     // 'ctx' might not be in our queue at all, if caller is changing the
100 |     // priority of (say) the running fiber. If it's not there, no need to
101 |     // move it: we'll handle it next time it hits awakened().
102 |     if (!ctx->ready_is_linked()) { /*<
103 |       Your `property_change()` override must be able to
104 |       handle the case in which the passed `ctx` is not in
105 |       your ready queue. It might be running, or it might be
106 |       blocked. >*/
107 |                                    //<-
108 |         // hopefully user will distinguish this case by noticing that
109 |         // the fiber with which we were called does not appear in the
110 |         // ready queue at all
111 |         //->
112 |         return;
113 |     }
114 | 
115 |     // Found ctx: unlink it
116 |     ctx->ready_unlink();
117 | 
118 |     // Here we know that ctx was in our ready queue, but we've unlinked
119 |     // it. We happen to have a method that will (re-)add a context* to the
120 |     // right place in the ready queue.
121 |     awakened(ctx, props);
122 | }
123 | 
124 | void priority_scheduler::suspend_until(
125 |     std::chrono::steady_clock::time_point const& time_point) noexcept {
126 |     if ((std::chrono::steady_clock::time_point::max)() == time_point) {
127 |         std::unique_lock<std::mutex> lk(mtx_);
128 |         cnd_.wait(lk, [this]() { return flag_; });
129 |         flag_ = false;
130 |     } else {
131 |         std::unique_lock<std::mutex> lk(mtx_);
132 |         cnd_.wait_until(lk, time_point, [this]() { return flag_; });
133 |         flag_ = false;
134 |     }
135 | }
136 | 
137 | void priority_scheduler::notify() noexcept {
138 |     {
139 |         std::unique_lock<std::mutex> lk(mtx_);
140 |         flag_ = true;
141 |     }
142 |     cnd_.notify_all();
143 | }
144 | 
145 | FiberPool::~FiberPool() { EraseAll(); }
146 | 
147 | size_t FiberPool::FiberSize() const { return fibers_.size(); }
148 | 
149 | void FiberPool::AddFiber(size_t n) { AddFiber(fr_queue_, n); }
150 | 
151 | void FiberPool::AddFiber(WorkerFiberTaskQueue& my_queue, size_t n) {
152 |     std::unique_lock<boost::fibers::mutex> lock(my_queue.fiber_mutex_);
153 |     for (std::size_t i = 0; i < n; ++i) {
154 |         auto fiber = boost::fibers::fiber([this, &my_queue] {
155 |             while (!fiber_stop_) {
156 |                 std::function<void()> task;
157 |                 {
158 |                     std::unique_lock<boost::fibers::mutex> lock(my_queue.fiber_mutex_);
159 |                     my_queue.fiber_cond_.wait(lock, [this, &my_queue] {
160 |                         return !my_queue.fiber_tasks_.empty() || fiber_stop_;
161 |                     });
162 |                     if (fiber_stop_) return;
163 |                     if (my_queue.fiber_tasks_.empty()) continue;
164 |                     task = std::move(my_queue.fiber_tasks_.front());
165 |                     my_queue.fiber_tasks_.pop();
166 |                 }
167 |                 task();
168 |             }
169 |         });
170 |         fiber.properties<priority_props>().set_low_priority();
171 |         fibers_.emplace_back(std::move(fiber));
172 |     }
173 | }
174 | 
175 | void FiberPool::EraseAll() {
176 |     {
177 |         std::unique_lock<boost::fibers::mutex> lock(fr_queue_.fiber_mutex_);
178 |         fiber_stop_ = true;
179 |     }
180 |     fr_queue_.fiber_cond_.notify_all();
181 | 
182 |     for (auto& fiber : fibers_) {
183 |         fiber.join();
184 |     }
185 |     fibers_.clear();
186 | }
187 | 


--------------------------------------------------------------------------------
/src/include/allocator.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "lock.hpp"
  4 | #include "log.hpp"
  5 | #include "utils.hpp"
  6 | 
  7 | template <typename T>
  8 | class ObjectPoolAllocator {
  9 |    public:
 10 |     using value_type = T;
 11 | 
 12 |     ObjectPoolAllocator() = default;
 13 | 
 14 |     template <typename U>
 15 |     ObjectPoolAllocator(const ObjectPoolAllocator<U>&) {}
 16 | 
 17 |     T* allocate(size_t n) {
 18 |         DLOG_ASSERT(n == 1, "Must allocate 1 element");
 19 | 
 20 |         if (pool.empty()) {
 21 |             return static_cast<T*>(::operator new(sizeof(T)));
 22 |         } else {
 23 |             T* obj = pool.back();
 24 |             pool.pop_back();
 25 |             return obj;
 26 |         }
 27 |     }
 28 | 
 29 |     void deallocate(T* p, size_t n) { pool.push_back(p); }
 30 | 
 31 |    private:
 32 |     class raw_ptr_vector : public std::vector<T*> {
 33 |        public:
 34 |         ~raw_ptr_vector() {
 35 |             for (auto* p : *this) {
 36 |                 ::operator delete(p);
 37 |             }
 38 |         }
 39 |     };
 40 | 
 41 |     static thread_local raw_ptr_vector pool;
 42 | };
 43 | 
 44 | template <typename T>
 45 | thread_local typename ObjectPoolAllocator<T>::raw_ptr_vector ObjectPoolAllocator<T>::pool;
 46 | 
 47 | class IDGenerator {
 48 |    public:
 49 |     using id_t = uint64_t;
 50 | 
 51 |     IDGenerator() : m_gen_cur(0), m_size(0) {}
 52 | 
 53 |     bool empty() const { return size() == 0; }
 54 | 
 55 |     bool full() const { return size() == capacity(); }
 56 | 
 57 |     size_t size() const { return m_size; }
 58 | 
 59 |     size_t capacity() const { return m_bset.size(); }
 60 | 
 61 |     id_t Gen();
 62 | 
 63 |     id_t MultiGen(size_t count);
 64 | 
 65 |     void Recycle(id_t id);
 66 | 
 67 |     void MultiRecycle(id_t id, size_t count);
 68 | 
 69 |     void Expand(size_t n);
 70 | 
 71 |    private:
 72 |     size_t m_size;
 73 |     size_t m_gen_cur;
 74 |     std::vector<bool> m_bset;
 75 |     Mutex m_lck;
 76 | };
 77 | 
 78 | template <size_t UNIT_SZ>
 79 | class SingleAllocator : private IDGenerator {
 80 |    public:
 81 |     SingleAllocator(size_t total_size) { Expand(total_size / UNIT_SZ); }
 82 | 
 83 |     uintptr_t allocate(size_t n) {
 84 |         DLOG_ASSERT(n == 1, "Must allocate 1 element");
 85 |         IDGenerator::id_t id = Gen();
 86 |         if (UNLIKELY(id == -1)) {
 87 |             return -1;
 88 |         }
 89 |         return id * UNIT_SZ;
 90 |     }
 91 | 
 92 |     void deallocate(uintptr_t ptr, size_t n) { Recycle(ptr / UNIT_SZ); }
 93 | };
 94 | 
 95 | template <size_t SZ, size_t BucketNum = 4>
 96 | class RingArena {
 97 |    public:
 98 |     RingArena() {
 99 |         for (size_t i = 0; i < BucketNum; ++i) {
100 |             m_bs[i].pv.raw = 0;
101 |         }
102 |     }
103 | 
104 |     const void* base() const { return m_bs; }
105 | 
106 |     void* allocate(size_t s) {
107 |         DLOG_ASSERT(s <= block_size, "Can't allocate large than block size: %lu, %lu", s,
108 |                     block_size);
109 | 
110 |         thread_local uint8_t b_cur = (reinterpret_cast<uintptr_t>(&b_cur) >> 5) % BucketNum;
111 |         b_cur = (b_cur + 1) % BucketNum;
112 |         uint8_t bc = b_cur;
113 |         do {
114 |             Block& b = m_bs[bc];
115 |             atomic_po_val_t opv = b.pv.load(std::memory_order_acquire), npv;
116 | 
117 |             while (1) {
118 |                 npv = opv;
119 |                 npv.pos += s;
120 |                 if (npv.pos < block_size) {
121 |                     ++npv.cnt;
122 | 
123 |                     if (b.pv.compare_exchange_weak(opv, npv, std::memory_order_acquire,
124 |                                                    std::memory_order_acquire)) {
125 |                         return b.b + opv.pos;
126 |                     }
127 | 
128 |                 } else {
129 |                     bc = (bc + 1) % BucketNum;
130 |                     break;
131 |                 }
132 |             }
133 | 
134 |         } while (bc != b_cur);
135 | 
136 |         return nullptr;
137 |     }
138 | 
139 |     void deallocate(void* p, size_t n) {
140 |         uint8_t bc = div_floor(reinterpret_cast<uintptr_t>(p) - reinterpret_cast<uintptr_t>(m_bs),
141 |                                sizeof(Block));
142 |         DLOG_ASSERT(bc < BucketNum, "Out Of Memory");
143 | 
144 |         Block& b = m_bs[bc];
145 |         atomic_po_val_t opv = b.pv.load(std::memory_order_acquire), npv;
146 |         do {
147 |             DLOG_ASSERT(opv.cnt != 0);
148 |             npv = opv;
149 |             if ((--npv.cnt) == 0) {
150 |                 npv.pos = 0;
151 |             }
152 |         } while (!b.pv.compare_exchange_weak(opv, npv, std::memory_order_release,
153 |                                              std::memory_order_acquire));
154 |     }
155 | 
156 |    private:
157 |     static constexpr size_t block_size = SZ / BucketNum;
158 | 
159 |     struct Block {
160 |         atomic_po_val_t pv;
161 |         uint8_t b[block_size];
162 |     };
163 | 
164 |     Block m_bs[BucketNum];
165 | };
166 | 
167 | template <typename T>
168 | class ObjectPool {
169 |    public:
170 |     T* pop() {
171 |         if (pool.empty()) {
172 |             return new T();
173 |         } else {
174 |             T* obj = pool.back();
175 |             pool.pop_back();
176 |             obj->clear();
177 |             return obj;
178 |         }
179 |     }
180 | 
181 |     void put(T* p) { pool.push_back(p); }
182 | 
183 |    private:
184 |     class raw_ptr_vector : public std::vector<T*> {
185 |        public:
186 |         ~raw_ptr_vector() {
187 |             for (auto* p : *this) {
188 |                 delete (p);
189 |             }
190 |         }
191 |     };
192 | 
193 |     static thread_local raw_ptr_vector pool;
194 | };
195 | 
196 | template <typename T>
197 | thread_local typename ObjectPool<T>::raw_ptr_vector ObjectPool<T>::pool;


--------------------------------------------------------------------------------
/src/include/common.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | 
 5 | #include "config.hpp"
 6 | #include "rcmp.hpp"
 7 | 
 8 | using page_id_t = uint64_t;
 9 | using offset_t = uint64_t;
10 | using mac_id_t = uint32_t;
11 | using rack_id_t = uint32_t;
12 | 
13 | enum SystemRole : uint8_t {
14 |     MN = 1,
15 |     CN = 2,
16 |     CXL_CN = 3,
17 |     DAEMON = 4,
18 |     CXL_DAEMON = 5,
19 | };
20 | 
21 | constexpr static mac_id_t master_id = 0;
22 | constexpr static page_id_t invalid_page_id = -1;
23 | 
24 | union GAddrCombineUnion {
25 |     struct {
26 |         offset_t off : offset_bits;
27 |         page_id_t p : page_id_bits;
28 |     };
29 |     rcmp::GAddr gaddr;
30 | };
31 | 
32 | inline static page_id_t GetPageID(rcmp::GAddr gaddr) {
33 |     GAddrCombineUnion u;
34 |     u.gaddr = gaddr;
35 |     return u.p;
36 | }
37 | inline static offset_t GetPageOffset(rcmp::GAddr gaddr) {
38 |     GAddrCombineUnion u;
39 |     u.gaddr = gaddr;
40 |     return u.off;
41 | }
42 | inline static rcmp::GAddr GetGAddr(page_id_t page_id, offset_t offset) {
43 |     GAddrCombineUnion u;
44 |     u.p = page_id;
45 |     u.off = offset;
46 |     return u.gaddr;
47 | }
48 | 
49 | struct RDMARCConnectParam {
50 |     SystemRole role;
51 |     mac_id_t mac_id;
52 | };


--------------------------------------------------------------------------------
/src/include/concurrent_hashmap.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <random>
  4 | #include <unordered_map>
  5 | 
  6 | #include "config.hpp"
  7 | #include "lock.hpp"
  8 | #include "robin_hood.h"
  9 | #include "utils.hpp"
 10 | 
 11 | template <typename K, typename V, typename __SharedMutex = SharedMutex, size_t BUCKET_NUM = 32,
 12 |           typename _Hash = std::hash<K>>
 13 | class ConcurrentHashMap {
 14 |     constexpr static const size_t BucketNum = BUCKET_NUM;
 15 | 
 16 |     struct SliceHash {
 17 |         size_t operator()(K key) const { return _Hash()(key) / BUCKET_NUM; }
 18 |     };
 19 | 
 20 |     using HashTable = std::unordered_map<K, V, SliceHash>;
 21 | 
 22 |    public:
 23 |     /**
 24 |      * @brief
 25 |      * @warning Rehashing causes the iterator to fail, but the change cannot be sensed. The iterator
 26 |      * should be updated in a timely manner during a large number of `insert()`, or `at()` should be
 27 |      * used.
 28 |      */
 29 |     class iterator {
 30 |        public:
 31 |         std::pair<const K, V>* operator->() { return it.operator->(); }
 32 |         bool operator==(const iterator& other) { return hidx == other.hidx && it == other.it; }
 33 |         bool operator!=(const iterator& other) { return hidx != other.hidx || it != other.it; }
 34 | 
 35 |        private:
 36 |         friend class ConcurrentHashMap;
 37 | 
 38 |         iterator(int hidx, typename HashTable::iterator it) : hidx(hidx), it(it) {}
 39 | 
 40 |         int hidx;
 41 |         typename HashTable::iterator it;
 42 |     };
 43 | 
 44 |     const iterator end() { return {0, m_shards[0].m_map.end()}; }
 45 | 
 46 |     bool empty() const {
 47 |         for (size_t i = 0; i < BucketNum; ++i) {
 48 |             if (!m_shards[i].m_map.empty()) {
 49 |                 return false;
 50 |             }
 51 |         }
 52 |         return true;
 53 |     }
 54 | 
 55 |     size_t size() const {
 56 |         size_t count = 0;
 57 |         for (size_t i = 0; i < BucketNum; ++i) {
 58 |             count += m_shards[i].m_map.size();
 59 |         }
 60 |         return count;
 61 |     }
 62 | 
 63 |     std::pair<iterator, bool> insert(K key, V val) {
 64 |         int index = hash(key);
 65 |         auto& shard = m_shards[index];
 66 |         auto& map = shard.m_map;
 67 | 
 68 |         std::unique_lock<__SharedMutex> guard(shard.m_lock);
 69 |         auto p = map.emplace(key, val);
 70 |         return {{index, p.first}, p.second};
 71 |     }
 72 | 
 73 |     iterator find(K key) {
 74 |         int index = hash(key);
 75 |         auto& shard = m_shards[index];
 76 |         auto& map = shard.m_map;
 77 | 
 78 |         std::shared_lock<__SharedMutex> guard(shard.m_lock);
 79 |         auto it = map.find(key);
 80 |         if (it != map.end()) {
 81 |             return {index, it};
 82 |         }
 83 |         return end();
 84 |     }
 85 | 
 86 |     V& at(K key) {
 87 |         int index = hash(key);
 88 |         auto& shard = m_shards[index];
 89 |         auto& map = shard.m_map;
 90 | 
 91 |         std::shared_lock<__SharedMutex> guard(shard.m_lock);
 92 |         return map.at(key);
 93 |     }
 94 | 
 95 |     V& operator[](K key) { return at(key); }
 96 | 
 97 |     /**
 98 |      * @brief Finds an element. If it does not exist, call `cotr_fn()` to insert a new element
 99 |      *
100 |      * @tparam ConFn
101 |      * @param key
102 |      * @param cotr_fn
103 |      * @return std::pair<iterator, bool>
104 |      */
105 |     template <typename ConFn>
106 |     std::pair<iterator, bool> find_or_emplace(K key, ConFn&& ctor_fn) {
107 |         auto iter = find(key);
108 |         if (iter != end()) {
109 |             return {iter, false};
110 |         }
111 | 
112 |         int index = hash(key);
113 |         auto& shard = m_shards[index];
114 |         auto& map = shard.m_map;
115 | 
116 |         std::unique_lock<__SharedMutex> guard(shard.m_lock);
117 |         auto it = map.find(key);
118 |         if (it != map.end()) {
119 |             return {{index, it}, false};
120 |         }
121 | 
122 |         auto p = map.emplace(key, std::move(ctor_fn()));
123 |         return {{index, p.first}, p.second};
124 |     }
125 | 
126 |     void erase(K key) {
127 |         auto it = find(key);
128 |         erase(it);
129 |     }
130 | 
131 |     void erase(iterator it) {
132 |         if (it == end()) return;
133 | 
134 |         auto& shard = m_shards[it.hidx];
135 |         auto& map = shard.m_map;
136 | 
137 |         std::unique_lock<__SharedMutex> guard(shard.m_lock);
138 |         map.erase(it.it);
139 |     }
140 | 
141 |     /**
142 |      * @tparam F
143 |      * @param f bool(std::pair<const K, V> &)，Returning false means the traversal is terminated.
144 |      */
145 |     template <typename F>
146 |     void foreach_all(F&& f) {
147 |         for (size_t i = 0; i < BucketNum; ++i) {
148 |             auto& shard = m_shards[i];
149 |             auto& map = shard.m_map;
150 | 
151 |             std::shared_lock<__SharedMutex> guard(shard.m_lock);
152 |             for (auto& p : map) {
153 |                 if (!f(p)) {
154 |                     return;
155 |                 }
156 |             }
157 |         }
158 |     }
159 | 
160 |    private:
161 |     struct CACHE_ALIGN Shard {
162 |         __SharedMutex m_lock;
163 |         HashTable m_map;
164 |     };
165 | 
166 |     Shard m_shards[BucketNum];
167 | 
168 |     static size_t hash(K key) { return std::hash<K>()(key) % BucketNum; }
169 | };
170 | 
171 | template <typename K, typename V, typename __SharedMutex = SharedMutex,
172 |           typename _Hash = std::hash<K>>
173 | class RandomAccessMap {
174 |    public:
175 |     class iterator {
176 |        public:
177 |         std::pair<const K, V>* operator->() {
178 |             if (m->size() < index || m->values_[index].first != key) {
179 |                 update();
180 |             }
181 |             return &m->values_[index];
182 |         }
183 |         std::pair<const K, V>& operator*() {
184 |             if (m->size() < index || m->values_[index].first != key) {
185 |                 update();
186 |             }
187 |             return m->values_[index];
188 |         }
189 |         bool operator==(const iterator& other) { return m == other.m && key == other.key; }
190 |         bool operator!=(const iterator& other) { return m != other.m || key != other.key; }
191 | 
192 |        private:
193 |         friend class RandomAccessMap;
194 | 
195 |         iterator(int index, K key, RandomAccessMap* m) : index(index), key(key), m(m) {}
196 | 
197 |         void update() {
198 |             auto new_it = m->find(key);
199 |             index = new_it.index;
200 |             key = new_it.key;
201 |             m = new_it.m;
202 |         }
203 | 
204 |         int index;
205 |         K key;
206 |         RandomAccessMap* m;
207 |     };
208 | 
209 |     bool empty() const { return values_.empty(); }
210 |     size_t size() const { return values_.size(); }
211 | 
212 |     const iterator end() { return iterator(-1, K(), this); }
213 | 
214 |     std::pair<iterator, bool> emplace(const K& key, const V& value) {
215 |         std::unique_lock<__SharedMutex> guard(lock_);
216 |         auto it = key_to_index_.find(key);
217 |         if (it != key_to_index_.end()) {
218 |             return {iterator(it->second, key, this), false};
219 |         }
220 | 
221 |         int index = key_to_index_[key] = values_.size() - 1;
222 |         values_.push_back(value);
223 |         return {iterator(index, key, this), true};
224 |     }
225 | 
226 |     iterator find(const K& key) {
227 |         std::shared_lock<__SharedMutex> guard(lock_);
228 |         auto it = key_to_index_.find(key);
229 |         if (it == key_to_index_.end()) {
230 |             return end();
231 |         }
232 |         return iterator(it->second, key, this);
233 |     }
234 | 
235 |     template <typename ConFn>
236 |     std::pair<iterator, bool> find_or_emplace(const K& key, ConFn&& ctor_fn) {
237 |         auto iter = find(key);
238 |         if (iter != end()) {
239 |             return {iter, false};
240 |         }
241 | 
242 |         std::unique_lock<__SharedMutex> guard(lock_);
243 |         auto it = key_to_index_.find(key);
244 |         if (it != key_to_index_.end()) {
245 |             return {iterator(it->second, key, this), false};
246 |         }
247 | 
248 |         int index = values_.size();
249 |         key_to_index_.emplace(key, index);
250 | 
251 |         values_.push_back({key, std::move(ctor_fn())});
252 |         return {iterator(index, key, this), true};
253 |     }
254 | 
255 |     V& at(const K& key) {
256 |         std::shared_lock<__SharedMutex> guard(lock_);
257 |         return key_to_index_.find(key)->second;
258 |     }
259 | 
260 |     void erase(iterator it) {
261 |         if (it == end()) return;
262 | 
263 |         std::unique_lock<__SharedMutex> guard(lock_);
264 | 
265 |         auto kit = key_to_index_.find(it.key);
266 |         int index = kit->second;
267 |         key_to_index_.erase(kit);
268 |         values_[index] = std::move(values_.back());
269 |         values_.pop_back();
270 |         if (index < values_.size()) {
271 |             key_to_index_[values_[index].first] = index;
272 |         }
273 |     }
274 | 
275 |     template <typename Genrator, typename F>
276 |     std::vector<std::pair<K, V>> getRandomN(Genrator g, size_t n, F &&filter_fn) {
277 |         DLOG_ASSERT(n <= values_.size());
278 | 
279 |         std::shared_lock<__SharedMutex> guard(lock_);
280 | 
281 |         std::vector<std::pair<K, V>> result;
282 |         std::vector<bool> indices(values_.size(), false);
283 |         std::uniform_int_distribution<> dis(0, values_.size() - 1);
284 |         while (result.size() < n) {
285 |             int index = dis(g);
286 |             if (!indices[index]) {
287 |                 indices[index] = true;
288 |                 if (filter_fn(values_[index])) {
289 |                     result.push_back(values_[index]);
290 |                 }
291 |             }
292 |         }
293 |         return result;
294 |     }
295 | 
296 |    private:
297 |     __SharedMutex lock_;
298 |     std::vector<std::pair<const K, V>> values_;
299 |     std::unordered_map<K, int, _Hash> key_to_index_;
300 | };


--------------------------------------------------------------------------------
/src/include/concurrent_queue.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <atomic>
  4 | #include <cstddef>
  5 | #include <random>
  6 | 
  7 | #include "utils.hpp"
  8 | 
  9 | enum ConcurrentQueueProducerMode { SP, MP };
 10 | enum ConcurrentQueueConsumerMode { SC, MC };
 11 | 
 12 | template <typename T, size_t SZ, ConcurrentQueueProducerMode PROD_MODE,
 13 |           ConcurrentQueueConsumerMode CONS_MODE>
 14 | class ConcurrentQueue;
 15 | 
 16 | template <typename T, size_t SZ>
 17 | class ConcurrentQueue<T, SZ, ConcurrentQueueProducerMode::SP, ConcurrentQueueConsumerMode::SC> {
 18 |    public:
 19 |     ConcurrentQueue() : m_head(0), m_tail(0) {}
 20 |     ~ConcurrentQueue() = default;
 21 | 
 22 |     size_t capacity() const { return SZ; }
 23 | 
 24 |     bool TryEnqueue(T n) {
 25 |         uint32_t tail = m_tail.load(std::memory_order_relaxed);
 26 |         uint32_t next_tail = (tail + 1) % SZ;
 27 |         if (UNLIKELY(next_tail == m_head.load(std::memory_order_acquire))) {
 28 |             return false;  // full
 29 |         }
 30 |         m_data[tail] = std::move(n);
 31 |         m_tail.store(next_tail, std::memory_order_release);
 32 |         return true;
 33 |     }
 34 | 
 35 |     bool TryDequeue(T *n) {
 36 |         uint32_t head = m_head.load(std::memory_order_relaxed);
 37 |         if (UNLIKELY(head == m_tail.load(std::memory_order_acquire))) {
 38 |             return false;  // empty
 39 |         }
 40 |         *n = std::move(m_data[head]);
 41 |         m_head.store((head + 1) % SZ, std::memory_order_release);
 42 |         return true;
 43 |     }
 44 | 
 45 |    private:
 46 |     std::atomic<uint32_t> m_head;
 47 |     T m_data[SZ];
 48 |     std::atomic<uint32_t> m_tail;
 49 | };
 50 | 
 51 | template <typename T, size_t SZ>
 52 | class ConcurrentQueue<T, SZ, ConcurrentQueueProducerMode::MP, ConcurrentQueueConsumerMode::SC> {
 53 |    public:
 54 |     ConcurrentQueue() {
 55 |         m_prod_head.raw = 0;
 56 |         m_prod_tail.raw = 0;
 57 |         m_cons_tail = 0;
 58 |     }
 59 | 
 60 |     size_t capacity() const { return SZ; }
 61 | 
 62 |     void ForceEnqueue(T n) {
 63 |         atomic_po_val_t h, oh, nh;
 64 | 
 65 |         oh = m_prod_head.fetch_add_both(1, 1, std::memory_order_acquire);
 66 |         while (UNLIKELY(oh.pos - m_cons_tail.load(std::memory_order_relaxed) >= SZ)) {
 67 |             h = m_prod_tail.load(std::memory_order_acquire);
 68 |             while (h.cnt == oh.cnt &&
 69 |                    !m_prod_tail.compare_exchange_weak(h, oh, std::memory_order_release,
 70 |                                                       std::memory_order_acquire)) {
 71 |             }
 72 |         }
 73 | 
 74 |         m_data[oh.pos % SZ] = std::move(n);
 75 | 
 76 |         oh = m_prod_tail.load(std::memory_order_acquire);
 77 |         do {
 78 |             h = m_prod_head.load(std::memory_order_relaxed);
 79 |             nh = oh;
 80 |             if ((++nh.cnt) == h.cnt) nh.pos = h.pos;
 81 |         } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release,
 82 |                                                     std::memory_order_acquire));
 83 |     }
 84 | 
 85 |     bool TryEnqueue(T n) {
 86 |         atomic_po_val_t h, oh, nh;
 87 | 
 88 |         oh = m_prod_head.load(std::memory_order_acquire);
 89 |         do {
 90 |             if (UNLIKELY(oh.pos - m_cons_tail.load(std::memory_order_relaxed) >= SZ)) {
 91 |                 return false;
 92 |             }
 93 |             nh.pos = oh.pos + 1;
 94 |             nh.cnt = oh.cnt + 1;
 95 |         } while (!m_prod_head.compare_exchange_weak(oh, nh, std::memory_order_acquire,
 96 |                                                     std::memory_order_acquire));
 97 | 
 98 |         m_data[oh.pos % SZ] = std::move(n);
 99 | 
100 |         oh = m_prod_tail.load(std::memory_order_acquire);
101 |         do {
102 |             h = m_prod_head.load(std::memory_order_relaxed);
103 |             nh = oh;
104 |             if ((++nh.cnt) == h.cnt) nh.pos = h.pos;
105 |         } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release,
106 |                                                     std::memory_order_acquire));
107 | 
108 |         return true;
109 |     }
110 | 
111 |     bool TryDequeue(T *n) { return TryDequeue(n, n + 1) == 1; }
112 | 
113 |     template <typename Iter>
114 |     uint32_t TryDequeue(Iter first, Iter last) {
115 |         uint32_t l = 0;
116 |         uint32_t count = std::distance(first, last);
117 |         uint32_t ot = m_cons_tail.load(std::memory_order_relaxed);
118 |         l = std::min(count, m_prod_tail.load(std::memory_order_relaxed).pos - ot);
119 |         if (l == 0) {
120 |             return 0;
121 |         }
122 | 
123 |         for (uint32_t i = 0; i < l; ++i) {
124 |             *(first++) = std::move(m_data[(ot + i) % SZ]);
125 |         }
126 | 
127 |         m_cons_tail.store(ot + l, std::memory_order_release);
128 |         return l;
129 |     }
130 | 
131 |    private:
132 |     atomic_po_val_t m_prod_head;
133 |     atomic_po_val_t m_prod_tail;
134 | 
135 |     T m_data[SZ];
136 | 
137 |     std::atomic<uint32_t> m_cons_tail;
138 | };
139 | 
140 | template <typename T, size_t SZ>
141 | class ConcurrentQueue<T, SZ, ConcurrentQueueProducerMode::MP, ConcurrentQueueConsumerMode::MC> {
142 |    public:
143 |     ConcurrentQueue() {
144 |         m_prod_head.raw = 0;
145 |         m_prod_tail.raw = 0;
146 |         m_cons_head.raw = 0;
147 |         m_cons_tail.raw = 0;
148 |     }
149 |     ~ConcurrentQueue() = default;
150 | 
151 |     size_t capacity() const { return SZ; }
152 | 
153 |     void ForceEnqueue(T n) {
154 |         atomic_po_val_t h, oh, nh;
155 | 
156 |         oh = m_prod_head.fetch_add_both(1, 1, std::memory_order_acquire);
157 |         while (UNLIKELY(oh.pos >= m_cons_tail.load(std::memory_order_relaxed).pos + SZ)) {
158 |             h = m_prod_tail.load(std::memory_order_acquire);
159 |             while (h.cnt == oh.cnt &&
160 |                    !m_prod_tail.compare_exchange_weak(h, oh, std::memory_order_release,
161 |                                                       std::memory_order_acquire)) {
162 |             }
163 |         }
164 | 
165 |         m_data[oh.pos % SZ] = std::move(n);
166 | 
167 |         oh = m_prod_tail.load(std::memory_order_acquire);
168 |         do {
169 |             h = m_prod_head.load(std::memory_order_relaxed);
170 |             nh = oh;
171 |             if ((++nh.cnt) == h.cnt) nh.pos = h.pos;
172 |         } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release,
173 |                                                     std::memory_order_acquire));
174 |     }
175 | 
176 |     bool TryEnqueue(T n) {
177 |         atomic_po_val_t h, oh, nh;
178 | 
179 |         oh = m_prod_head.load(std::memory_order_acquire);
180 |         do {
181 |             if (UNLIKELY(oh.pos - m_cons_tail.load(std::memory_order_relaxed).pos >= SZ)) {
182 |                 return false;
183 |             }
184 |             nh.pos = oh.pos + 1;
185 |             nh.cnt = oh.cnt + 1;
186 |         } while (!m_prod_head.compare_exchange_weak(oh, nh, std::memory_order_acquire,
187 |                                                     std::memory_order_acquire));
188 | 
189 |         m_data[oh.pos % SZ] = std::move(n);
190 | 
191 |         oh = m_prod_tail.load(std::memory_order_acquire);
192 |         do {
193 |             h = m_prod_head.load(std::memory_order_relaxed);
194 |             nh = oh;
195 |             if ((++nh.cnt) == h.cnt) nh.pos = h.pos;
196 |         } while (!m_prod_tail.compare_exchange_weak(oh, nh, std::memory_order_release,
197 |                                                     std::memory_order_acquire));
198 | 
199 |         return true;
200 |     }
201 | 
202 |     bool TryDequeue(T *n) { return TryDequeue(n, n + 1) == 1; }
203 | 
204 |     template <typename Iter>
205 |     uint32_t TryDequeue(Iter first, Iter last) {
206 |         atomic_po_val_t t, ot, nt;
207 |         uint32_t l = 0;
208 |         uint32_t count = std::distance(first, last);
209 | 
210 |         ot = m_cons_head.load(std::memory_order_acquire);
211 |         do {
212 |             l = std::min(count, m_prod_tail.load(std::memory_order_relaxed).pos - ot.pos);
213 |             if (l == 0) {
214 |                 return 0;
215 |             }
216 |             nt.pos = ot.pos + l;
217 |             nt.cnt = ot.cnt + 1;
218 |         } while (!m_cons_head.compare_exchange_weak(ot, nt, std::memory_order_acquire,
219 |                                                     std::memory_order_acquire));
220 | 
221 |         for (uint32_t i = 0; i < l; ++i) {
222 |             *(first++) = std::move(m_data[(ot.pos + i) % SZ]);
223 |         }
224 | 
225 |         ot = m_cons_tail.load(std::memory_order_acquire);
226 |         do {
227 |             t = m_cons_head.load(std::memory_order_relaxed);
228 |             nt = ot;
229 |             if ((++nt.cnt) == t.cnt) nt.pos = t.pos;
230 |         } while (!m_cons_tail.compare_exchange_weak(ot, nt, std::memory_order_release,
231 |                                                     std::memory_order_acquire));
232 | 
233 |         return true;
234 |     }
235 | 
236 |    private:
237 |     atomic_po_val_t m_prod_head;
238 |     atomic_po_val_t m_prod_tail;
239 | 
240 |     T m_data[SZ];
241 | 
242 |     atomic_po_val_t m_cons_head;
243 |     atomic_po_val_t m_cons_tail;
244 | };
245 | 


--------------------------------------------------------------------------------
/src/include/config.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstddef>
 4 | 
 5 | #include "rcmp.hpp"
 6 | 
 7 | #define MSGQ_SINGLE_FIFO_ON 0
 8 | 
 9 | constexpr static size_t page_size = 4ul << 10;
10 | constexpr static size_t cache_line_size = 64;
11 | constexpr static size_t min_slab_size = 64;
12 | constexpr static size_t mem_region_aligned_size = 2ul << 30;
13 | 
14 | constexpr static size_t offset_bits = __builtin_ffsl(page_size) - 1;
15 | constexpr static size_t page_id_bits = sizeof(rcmp::GAddr) * 8 - offset_bits;
16 | 
17 | constexpr static size_t msgq_ring_buf_len = 16ul << 20;
18 | constexpr static size_t msgq_ring_depth = 256;
19 | constexpr static size_t write_batch_buffer_size = 64ul << 20;
20 | constexpr static size_t write_batch_buffer_overflow_size = 2ul << 20;
21 | 
22 | constexpr static size_t get_page_cxl_ref_or_proxy_write_raw_max_size = UINT64_MAX;
23 | 
24 | /**
25 |  * @brief Intervals before and after heat statisticsus
26 |  */
27 | constexpr static size_t hot_stat_freq_timeout_interval = 100;


--------------------------------------------------------------------------------
/src/include/cxl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | 
 5 | constexpr static size_t cxl_super_block_size = 4096;
 6 | 
 7 | struct CXLSuperBlock {
 8 |     size_t total_size;
 9 |     size_t msgq_zone_size;
10 |     size_t reserve_heap_size;
11 |     size_t page_data_zone_size;
12 | };
13 | 
14 | /**
15 |  * @brief cxl memory block format
16 |  * 
17 |  *   2GB align                               2GB align
18 |  *
19 |  *      0     4096        4096+msgq    align(4096+msgq,2GB)                 align(psize)     total
20 |  *
21 |  *      [sp blk][    msgq    ][     reserve     ][             page data           ][   unused   ]
22 |  */
23 | 
24 | struct CXLMemFormat {
25 |     const void *start_addr;
26 |     CXLSuperBlock *super_block;
27 |     void *msgq_zone_start_addr;
28 |     void *reserve_zone_addr;
29 |     void *page_data_start_addr;
30 |     const void *end_addr;
31 | };
32 | 
33 | void *cxl_open_simulate(std::string file, size_t size, int *fd);
34 | void cxl_close_simulate(int fd, CXLMemFormat &format);
35 | void cxl_memory_init(CXLMemFormat &format, void* cxl_memory_addr, size_t size, size_t msgq_zone_size);
36 | void cxl_memory_open(CXLMemFormat &format,void* cxl_memory_addr);
37 | 


--------------------------------------------------------------------------------
/src/include/fiber_pool.hpp:
--------------------------------------------------------------------------------
  1 | //          Copyright Nat Goodspeed 2014.
  2 | // Distributed under the Boost Software License, Version 1.0.
  3 | //    (See accompanying file LICENSE_1_0.txt or copy at
  4 | //          http://www.boost.org/LICENSE_1_0.txt)
  5 | 
  6 | #pragma once
  7 | 
  8 | #include <algorithm>  // std::find_if()
  9 | #include <boost/fiber/all.hpp>
 10 | #include <boost/fiber/fiber.hpp>
 11 | #include <boost/fiber/operations.hpp>
 12 | #include <boost/fiber/scheduler.hpp>
 13 | #include <chrono>
 14 | #include <condition_variable>
 15 | #include <functional>
 16 | #include <list>
 17 | #include <mutex>
 18 | #include <queue>
 19 | #include <thread>
 20 | 
 21 | #include "log.hpp"
 22 | 
 23 | //[priority_props
 24 | class priority_props : public boost::fibers::fiber_properties {
 25 |    public:
 26 |     priority_props(boost::fibers::context* ctx);
 27 | 
 28 |     int get_priority() const;
 29 | 
 30 |     // Call this method to alter priority, because we must notify
 31 |     // priority_scheduler of any change.
 32 |     void set_priority(int p);
 33 | 
 34 |     void set_low_priority();
 35 |     void set_high_priority();
 36 | 
 37 |     // The fiber name of course is solely for purposes of this example
 38 |     // program; it has nothing to do with implementing scheduler priority.
 39 |     // This is a public data member -- not requiring set/get access methods --
 40 |     // because we need not inform the scheduler of any change.
 41 |     std::string name; /*< A property that does not affect the scheduler does
 42 |                           not need access methods. >*/
 43 |    private:
 44 |     int priority_;
 45 | };
 46 | //]
 47 | 
 48 | //[priority_scheduler
 49 | class priority_scheduler : public boost::fibers::algo::algorithm_with_properties<priority_props> {
 50 |    private:
 51 |     typedef boost::fibers::scheduler::ready_queue_type /*< See [link ready_queue_t]. >*/ rqueue_t;
 52 | 
 53 |     rqueue_t rqueue_high_;
 54 |     rqueue_t rqueue_low_;
 55 |     std::mutex mtx_{};
 56 |     std::condition_variable cnd_{};
 57 |     bool flag_{false};
 58 | 
 59 |    public:
 60 |     priority_scheduler();
 61 | 
 62 |     // For a subclass of algorithm_with_properties<>, it's important to
 63 |     // override the correct awakened() overload.
 64 |     /*<< You must override the [member_link algorithm_with_properties..awakened]
 65 |          method. This is how your scheduler receives notification of a
 66 |          fiber that has become ready to run. >>*/
 67 |     virtual void awakened(boost::fibers::context* ctx, priority_props& props) noexcept;
 68 | 
 69 |     /*<< You must override the [member_link algorithm_with_properties..pick_next]
 70 |          method. This is how your scheduler actually advises the fiber manager
 71 |          of the next fiber to run. >>*/
 72 |     virtual boost::fibers::context* pick_next() noexcept;
 73 | 
 74 |     /*<< You must override [member_link algorithm_with_properties..has_ready_fibers]
 75 |       to inform the fiber manager of the state of your ready queue. >>*/
 76 |     virtual bool has_ready_fibers() const noexcept;
 77 | 
 78 |     /*<< Overriding [member_link algorithm_with_properties..property_change]
 79 |          is optional. This override handles the case in which the running
 80 |          fiber changes the priority of another ready fiber: a fiber already in
 81 |          our queue. In that case, move the updated fiber within the queue. >>*/
 82 |     virtual void property_change(boost::fibers::context* ctx, priority_props& props) noexcept;
 83 | 
 84 |     void suspend_until(std::chrono::steady_clock::time_point const& time_point) noexcept;
 85 | 
 86 |     void notify() noexcept;
 87 | };
 88 | 
 89 | class FiberPool {
 90 |    private:
 91 |     struct WorkerFiberTaskQueue {
 92 |         std::queue<std::function<void()>> fiber_tasks_;
 93 |         boost::fibers::mutex fiber_mutex_;
 94 |         boost::fibers::condition_variable fiber_cond_;
 95 |     };
 96 | 
 97 |    public:
 98 |     ~FiberPool();
 99 | 
100 |     size_t FiberSize() const;
101 | 
102 |     void AddFiber(size_t n);
103 | 
104 |     void AddFiber(WorkerFiberTaskQueue& my_queue, size_t n);
105 | 
106 |     void EraseAll();
107 | 
108 |     template <typename F>
109 |     void EnqueueTask(F&& f) {
110 |         {
111 |             std::unique_lock<boost::fibers::mutex> lock(fr_queue_.fiber_mutex_);
112 |             fr_queue_.fiber_tasks_.emplace(std::forward<F>(f));
113 |         }
114 |         fr_queue_.fiber_cond_.notify_one();
115 |     }
116 | 
117 |    private:
118 |     std::vector<boost::fibers::fiber> fibers_;
119 |     volatile bool fiber_stop_ = false;
120 |     volatile bool stop_ = false;
121 |     WorkerFiberTaskQueue fr_queue_;
122 | };
123 | 


--------------------------------------------------------------------------------
/src/include/lock.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <pthread.h>
  4 | 
  5 | #include <boost/fiber/condition_variable.hpp>
  6 | #include <boost/fiber/mutex.hpp>
  7 | #include <mutex>
  8 | #include <shared_mutex>
  9 | 
 10 | using Mutex = std::mutex;
 11 | using SharedMutex = std::shared_mutex;
 12 | 
 13 | class SpinMutex {
 14 |    public:
 15 |     SpinMutex() { pthread_spin_init(&m_spinlock, 0); }
 16 |     ~SpinMutex() { pthread_spin_destroy(&m_spinlock); }
 17 | 
 18 |     void lock() { pthread_spin_lock(&m_spinlock); }
 19 |     bool try_lock() { return pthread_spin_trylock(&m_spinlock) == 0; }
 20 |     void unlock() { pthread_spin_unlock(&m_spinlock); }
 21 | 
 22 |    private:
 23 |     pthread_spinlock_t m_spinlock;
 24 | };
 25 | 
 26 | class Barrier {
 27 |    public:
 28 |     Barrier(uint32_t n) { pthread_barrier_init(&m_b, nullptr, n); }
 29 |     ~Barrier() { pthread_barrier_destroy(&m_b); }
 30 | 
 31 |     void wait() { pthread_barrier_wait(&m_b); }
 32 | 
 33 |    private:
 34 |     pthread_barrier_t m_b;
 35 | };
 36 | 
 37 | using CortMutex = boost::fibers::mutex;
 38 | using CortConditionalVariable = boost::fibers::condition_variable;
 39 | 
 40 | class CortSharedMutex {
 41 |    public:
 42 |     CortSharedMutex() : state(0) {}
 43 | 
 44 |     void lock() {
 45 |         std::unique_lock<boost::fibers::mutex> lk(mtx);
 46 |         g1.wait(lk, [=] { return !write_entered(); });
 47 |         state |= _S_write_entered;
 48 |         g2.wait(lk, [=] { return readers() == 0; });
 49 |     }
 50 | 
 51 |     bool try_lock() {
 52 |         std::unique_lock<boost::fibers::mutex> lk(mtx, std::try_to_lock);
 53 |         if (lk.owns_lock() && state == 0) {
 54 |             state = _S_write_entered;
 55 |             return true;
 56 |         }
 57 |         return false;
 58 |     }
 59 | 
 60 |     void unlock() {
 61 |         std::lock_guard<boost::fibers::mutex> lk(mtx);
 62 |         state = 0;
 63 |         g1.notify_all();
 64 |     }
 65 | 
 66 |     void lock_shared() {
 67 |         std::unique_lock<boost::fibers::mutex> lk(mtx);
 68 |         g1.wait(lk, [=] { return state < _S_max_readers; });
 69 |         ++state;
 70 |     }
 71 | 
 72 |     bool try_lock_shared() {
 73 |         std::unique_lock<boost::fibers::mutex> lk(mtx, std::try_to_lock);
 74 |         if (!lk.owns_lock()) {
 75 |             return false;
 76 |         }
 77 |         if (state < _S_max_readers) {
 78 |             ++state;
 79 |             return true;
 80 |         }
 81 |         return false;
 82 |     }
 83 | 
 84 |     void unlock_shared() {
 85 |         std::lock_guard<boost::fibers::mutex> lk(mtx);
 86 |         auto prev = state--;
 87 |         if (write_entered()) {
 88 |             if (readers() == 0) {
 89 |                 g2.notify_one();
 90 |             }
 91 |         } else {
 92 |             if (prev == _S_max_readers) {
 93 |                 g1.notify_one();
 94 |             }
 95 |         }
 96 |     }
 97 | 
 98 |    private:
 99 |     boost::fibers::mutex mtx;
100 |     boost::fibers::condition_variable g1, g2;
101 |     unsigned state;
102 | 
103 |     static constexpr unsigned _S_write_entered = 1U << (sizeof(unsigned) * __CHAR_BIT__ - 1);
104 |     static constexpr unsigned _S_max_readers = ~_S_write_entered;
105 | 
106 |     bool write_entered() const { return state & _S_write_entered; }
107 |     unsigned readers() const { return state & _S_max_readers; }
108 | };


--------------------------------------------------------------------------------
/src/include/log.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /**
  4 |  * @file dlog.h
  5 | 
  6 |  * @brief terminal log output macro
  7 | 
  8 |  * @version 0.1
  9 |  * @date 2022-05-27
 10 |  */
 11 | 
 12 | #include <pthread.h>
 13 | #include <sys/time.h>
 14 | #include <unistd.h>
 15 | 
 16 | #include <cassert>
 17 | #include <cerrno>
 18 | #include <cstdio>
 19 | #include <cstdlib>
 20 | #include <cstring>
 21 | #include <ctime>
 22 | 
 23 | #define DLOG_STREAM(stream, format, ...)                                                  \
 24 |     do {                                                                                  \
 25 |         struct timeval tv;                                                                \
 26 |         struct tm tm;                                                                     \
 27 |         char tbuf[28] = {0};                                                              \
 28 |         gettimeofday(&tv, NULL);                                                          \
 29 |         localtime_r(&tv.tv_sec, &tm);                                                     \
 30 |         strftime(tbuf, sizeof(tbuf), "%Y-%m-%d %H:%M:%S", &tm);                           \
 31 |         fprintf(stream, "[%s.%06d] [%d %#lx] %s:%d: " format "\n", tbuf, (int)tv.tv_usec, \
 32 |                 getpid(), pthread_self(), __FILE__, __LINE__, ##__VA_ARGS__);             \
 33 |     } while (0)
 34 | 
 35 | #define DLOG_INFO(format, ...) DLOG_STREAM(stderr, "[INFO] " format, ##__VA_ARGS__)
 36 | #define DLOG_ERROR(format, ...) \
 37 |     DLOG_STREAM(stderr, "[ERROR] " format ": %s", ##__VA_ARGS__, strerror(errno))
 38 | #define DLOG_WARNING(format, ...) DLOG_STREAM(stderr, "[WARNING] " format, ##__VA_ARGS__)
 39 | #define DLOG_FATAL(format, ...)                                                        \
 40 |     do {                                                                               \
 41 |         DLOG_STREAM(stderr, "[FATAL] " format ": %s", ##__VA_ARGS__, strerror(errno)); \
 42 |         fflush(stdout);                                                                \
 43 |         abort();                                                                       \
 44 |     } while (0)
 45 | 
 46 | #define DLOG(format, ...) DLOG_INFO(format, ##__VA_ARGS__)
 47 | 
 48 | #define DLOG_FILE(file, format, ...)            \
 49 |     do {                                        \
 50 |         FILE *fp = fopen(file, "w+");           \
 51 |         assert(fp != NULL);                     \
 52 |         DLOG_STREAM(fp, format, ##__VA_ARGS__); \
 53 |         fclose(fp);                             \
 54 |     } while (0)
 55 | 
 56 | #define DLOG_IF(expr, format, ...)             \
 57 |     do {                                       \
 58 |         if (expr) DLOG(format, ##__VA_ARGS__); \
 59 |     } while (0)
 60 | 
 61 | #ifndef NDEBUG
 62 | 
 63 | namespace type_fmt_str_detail {
 64 | template <typename T>
 65 | struct helper;
 66 | template <>
 67 | struct helper<int> {
 68 |     constexpr static const char *type_str = "%d";
 69 | };
 70 | template <>
 71 | struct helper<unsigned int> {
 72 |     constexpr static const char *type_str = "%u";
 73 | };
 74 | template <>
 75 | struct helper<char> {
 76 |     constexpr static const char *type_str = "%c";
 77 | };
 78 | template <>
 79 | struct helper<unsigned char> {
 80 |     constexpr static const char *type_str = "%hhu";
 81 | };
 82 | template <>
 83 | struct helper<short> {
 84 |     constexpr static const char *type_str = "%hd";
 85 | };
 86 | template <>
 87 | struct helper<unsigned short> {
 88 |     constexpr static const char *type_str = "%hu";
 89 | };
 90 | template <>
 91 | struct helper<long> {
 92 |     constexpr static const char *type_str = "%ld";
 93 | };
 94 | template <>
 95 | struct helper<unsigned long> {
 96 |     constexpr static const char *type_str = "%lu";
 97 | };
 98 | template <>
 99 | struct helper<long long> {
100 |     constexpr static const char *type_str = "%lld";
101 | };
102 | template <>
103 | struct helper<unsigned long long> {
104 |     constexpr static const char *type_str = "%llu";
105 | };
106 | template <>
107 | struct helper<float> {
108 |     constexpr static const char *type_str = "%f";
109 | };
110 | template <>
111 | struct helper<double> {
112 |     constexpr static const char *type_str = "%lf";
113 | };
114 | template <>
115 | struct helper<long double> {
116 |     constexpr static const char *type_str = "%llf";
117 | };
118 | template <typename T>
119 | struct helper<T *> {
120 |     constexpr static const char *type_str = "%p";
121 | };
122 | template <>
123 | struct helper<char *> {
124 |     constexpr static const char *type_str = "%s";
125 | };
126 | template <>
127 | struct helper<const char *> {
128 |     constexpr static const char *type_str = "%s";
129 | };
130 | }  // namespace type_fmt_str_detail
131 | 
132 | /**
133 |  * Assert the judgment between two values.
134 |  * @example DLOG_EXPR(malloc(1), !=, nullptr)
135 |  *
136 |  * @warning In C++11, `NULL` will throw warning: passing NULL to non-pointer
137 |  * argument... You should use `nullptr` instead of `NULL`.
138 |  */
139 | #define DLOG_EXPR(val_a, op, val_b)                                                                \
140 |     do {                                                                                           \
141 |         decltype(val_a) a = val_a;                                                                 \
142 |         decltype(val_b) b = val_b;                                                                 \
143 |         if (__glibc_unlikely(!(a op b))) {                                                         \
144 |             char fmt[] = "Because " #val_a " = %???, " #val_b " = %???";                           \
145 |             char tmp[sizeof(fmt) + 42];                                                            \
146 |             snprintf(fmt, sizeof(fmt), "Because " #val_a " = %s, " #val_b " = %s",                 \
147 |                      type_fmt_str_detail::helper<typename std::remove_cv<                          \
148 |                          typename std::remove_reference<decltype(val_a)>::type>::type>::type_str,  \
149 |                      type_fmt_str_detail::helper<typename std::remove_cv<                          \
150 |                          typename std::remove_reference<decltype(val_b)>::type>::type>::type_str); \
151 |             snprintf(tmp, sizeof(tmp), fmt, a, b);                                                 \
152 |             DLOG_FATAL("Assertion `" #val_a " " #op " " #val_b "` failed. %s", tmp);               \
153 |         }                                                                                          \
154 |     } while (0)
155 | 
156 | #define DLOG_ASSERT(expr, format...)                             \
157 |     do {                                                         \
158 |         if (__glibc_unlikely(!(expr))) {                         \
159 |             DLOG_FATAL("Assertion `" #expr "` failed. " format); \
160 |         }                                                        \
161 |     } while (0)
162 | 
163 | #else
164 | 
165 | #define DLOG_EXPR(val_a, op, val_b) \
166 |     do {                            \
167 |     } while (0)
168 | #define DLOG_ASSERT(expr, format...) \
169 |     do {                             \
170 |     } while (0)
171 | 
172 | #endif


--------------------------------------------------------------------------------
/src/include/msg_queue.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <atomic>
  4 | #include <memory>
  5 | #include <vector>
  6 | 
  7 | #include "allocator.hpp"
  8 | #include "common.hpp"
  9 | #include "concurrent_queue.hpp"
 10 | #include "config.hpp"
 11 | 
 12 | /**
 13 |  * @brief
 14 |  *
 15 |  *   msgq_s
 16 |  *
 17 |  *      [  public msgq  ][         private cn msgq          ]
 18 |  */
 19 | 
 20 | namespace msgq {
 21 | 
 22 | struct MsgBuffer;
 23 | struct MsgQueue;
 24 | 
 25 | struct MsgUDPConnPacket {
 26 |     uintptr_t recv_q_off;
 27 | };
 28 | 
 29 | using msgq_handler_t = void (*)(MsgBuffer &req, void *ctx);
 30 | using msgq_callback_t = void (*)(MsgBuffer &resp, void *arg);
 31 | 
 32 | #if MSGQ_SINGLE_FIFO_ON == 1
 33 | 
 34 | struct MsgHeader final {
 35 |     bool invalid_flag : 1;
 36 |     enum : uint8_t { REQ, RESP } msg_type : 1;
 37 |     uint8_t rpc_type;
 38 |     size_t size : 32;  // Actual data size
 39 |     msgq_callback_t cb;
 40 |     void *arg;
 41 | 
 42 |     uint8_t data[0];
 43 | };
 44 | 
 45 | struct MsgBuffer {
 46 |     size_t size() const;
 47 |     void *get_buf() const;
 48 | 
 49 |     MsgQueue *m_q;
 50 |     MsgHeader *m_msg;  // Address pointing to the MsgHeader
 51 |     size_t m_size;     // Actual data size
 52 | };
 53 | 
 54 | struct MsgQueue final {
 55 |     MsgQueue();
 56 |     ~MsgQueue() = default;
 57 | 
 58 |     MsgHeader *alloc_msg_buffer(size_t size);
 59 |     void enqueue_msg();
 60 |     void dequeue_msg(std::vector<MsgHeader *> &hv);
 61 |     void free_msg_buffer();
 62 | 
 63 |     atomic_po_val_t m_prod_head;
 64 |     atomic_po_val_t m_prod_tail;
 65 |     atomic_po_val_t m_cons_head;
 66 |     atomic_po_val_t m_cons_tail;
 67 | 
 68 |     constexpr static size_t SZ = msgq_ring_buf_len;
 69 | 
 70 |     uint8_t m_ring[msgq_ring_buf_len];
 71 | 
 72 |     MsgHeader *at(size_t i);
 73 |     static void update_ht(atomic_po_val_t *ht, atomic_po_val_t *ht_);
 74 | };
 75 | 
 76 | #else
 77 | struct MsgHeader final {
 78 |     enum : uint8_t { REQ, RESP } msg_type : 1;
 79 |     uint8_t rpc_type;
 80 |     size_t size : 32;     // Actual data size
 81 |     offset_t buf_offset;  // Based on the address of MsgQueue::m_ring
 82 |     msgq_callback_t cb;
 83 |     uint64_t send_ts;
 84 |     void *arg;
 85 | 
 86 |     // static_assert(msgq_ring_buf_len < (1ul << 16), "");
 87 | };
 88 | 
 89 | struct MsgBuffer {
 90 |     size_t size() const;
 91 |     void *get_buf() const;
 92 | 
 93 |     MsgQueue *m_q;
 94 |     MsgHeader m_msg;
 95 | };
 96 | 
 97 | struct MsgQueue final {
 98 |     MsgQueue() = default;
 99 |     ~MsgQueue() = default;
100 | 
101 |     offset_t alloc_msg_buffer(size_t size);
102 |     void enqueue_msg(MsgBuffer &msg_buf);
103 |     uint32_t dequeue_msg(MsgHeader *hv, size_t max_deq);
104 |     void free_msg_buffer(MsgBuffer &msg_buf);
105 | 
106 |     ConcurrentQueue<MsgHeader, msgq_ring_depth, ConcurrentQueueProducerMode::MP,
107 |                     ConcurrentQueueConsumerMode::SC>
108 |         msgq_q;
109 |     RingArena<msgq_ring_buf_len, 8> m_ra;
110 | };
111 | 
112 | #endif  // MSGQ_SINGLE_FIFO_ON
113 | 
114 | struct MsgQueueSatistics {
115 |     uint64_t send_io = 0;
116 |     uint64_t send_bytes = 0;
117 |     uint64_t send_time = 0;
118 |     uint64_t recv_io = 0;
119 |     uint64_t recv_bytes = 0;
120 |     uint64_t recv_time = 0;
121 | 
122 |     void start_sample(uint64_t &timer) {
123 | #if (RCMP_PERF_ON != 0)
124 |         timer = getNsTimestamp();
125 | #endif  // RCMP_PERF_ON
126 |     }
127 | 
128 |     void send_sample(size_t bytes, uint64_t &timer) {
129 | #if (RCMP_PERF_ON != 0)
130 |         uint64_t tmp = getNsTimestamp();
131 |         send_io++;
132 |         send_bytes += bytes;
133 |         send_time += tmp - timer;
134 |         timer = tmp;
135 | #endif  // RCMP_PERF_ON
136 |     }
137 | 
138 |     void recv_sample(size_t bytes, uint64_t &timer) {
139 | #if (RCMP_PERF_ON != 0)
140 |         uint64_t tmp = getNsTimestamp();
141 |         recv_io++;
142 |         recv_bytes += bytes;
143 |         recv_time += tmp - timer;
144 |         timer = tmp;
145 | #endif  // RCMP_PERF_ON
146 |     }
147 | };
148 | 
149 | struct MsgQueueNexus {
150 |     constexpr static size_t max_msgq_handler = (1 << (sizeof(uint8_t) * 8));
151 | 
152 |     MsgQueueNexus(void *msgq_zone_start_addr);
153 | 
154 |     MsgQueue *GetPublicMsgQ() const { return m_public_msgq; }
155 | 
156 |     void *GetMsgQZoneStartAddr() const { return m_msgq_zone_start_addr; }
157 | 
158 |     void register_req_func(uint8_t rpc_type, msgq_handler_t handler);
159 | 
160 |     static msgq_handler_t __handlers[max_msgq_handler];
161 | 
162 |     void *m_msgq_zone_start_addr;
163 |     MsgQueue *m_public_msgq;
164 |     MsgQueueSatistics m_stats;
165 | };
166 | 
167 | struct MsgQueueRPC {
168 |     MsgQueueRPC(MsgQueueNexus *nexus, MsgQueue *send_queue, MsgQueue *recv_queue, void *ctx);
169 | 
170 |     /**
171 |      * @brief Allocate msg buffer
172 |      *
173 |      * @warning The operation is a blocking call
174 |      *
175 |      * @param size
176 |      * @return MsgBuffer
177 |      */
178 |     MsgBuffer alloc_msg_buffer(size_t size);
179 | 
180 |     /**
181 |      * @brief Enqueue a request message
182 |      *
183 |      * @param rpc_type
184 |      * @param msg_buf
185 |      * @param cb
186 |      * @param arg
187 |      */
188 |     void enqueue_request(uint8_t rpc_type, MsgBuffer &msg_buf, msgq_callback_t cb, void *arg);
189 | 
190 |     /**
191 |      * @brief Enqueue a response message
192 |      *
193 |      * @param req_buf
194 |      * @param resp_buf
195 |      */
196 |     void enqueue_response(MsgBuffer &req_buf, MsgBuffer &resp_buf);
197 | 
198 |     /**
199 |      * @brief rpc queue polling once
200 |      *
201 |      */
202 |     void run_event_loop_once();
203 | 
204 |     /**
205 |      * @brief free msg buffer
206 |      *
207 |      * @param msg_buf
208 |      */
209 |     void free_msg_buffer(MsgBuffer &msg_buf);
210 | 
211 |     MsgQueueNexus *m_nexus;
212 |     MsgQueue *m_send_queue;
213 |     MsgQueue *m_recv_queue;
214 |     void *m_ctx;
215 | };
216 | 
217 | }  // namespace msgq
218 | 
219 | struct MsgQueueManager {
220 |     const static size_t RING_ELEM_SIZE = sizeof(msgq::MsgQueue);
221 | 
222 |     void *start_addr;
223 |     uint32_t ring_cnt;
224 |     std::unique_ptr<SingleAllocator<RING_ELEM_SIZE>> msgq_allocator;
225 |     std::unique_ptr<msgq::MsgQueueNexus> nexus;
226 |     std::unique_ptr<msgq::MsgQueueRPC> rpc;
227 | 
228 |     msgq::MsgQueue *allocQueue();
229 |     void freeQueue(msgq::MsgQueue *msgq);
230 | };


--------------------------------------------------------------------------------
/src/include/page_table.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <list>
  4 | #include <mutex>
  5 | #include <set>
  6 | 
  7 | #include "allocator.hpp"
  8 | #include "common.hpp"
  9 | #include "concurrent_hashmap.hpp"
 10 | #include "lock.hpp"
 11 | #include "robin_hood.h"
 12 | #include "stats.hpp"
 13 | 
 14 | struct MasterToDaemonConnection;
 15 | struct MasterToClientConnection;
 16 | 
 17 | struct PageRackMetadata {
 18 |     uint32_t rack_id;
 19 |     mac_id_t daemon_id;
 20 |     CortSharedMutex latch;
 21 | };
 22 | 
 23 | struct RackMacTable {
 24 |     size_t GetCurrentAllocatedPageNum() const { return current_allocated_page_num; }
 25 |     size_t GetMaxFreePageNum() const { return max_free_page_num; }
 26 | 
 27 |     bool with_cxl;
 28 |     MasterToDaemonConnection *daemon_connect;
 29 |     size_t max_free_page_num;
 30 |     size_t current_allocated_page_num;
 31 |     std::vector<MasterToClientConnection *> client_connect_table;
 32 | };
 33 | 
 34 | struct PageDirectory {
 35 |     PageRackMetadata *FindPage(page_id_t page_id);
 36 |     PageRackMetadata *AddPage(RackMacTable *rack_table, page_id_t page_id);
 37 |     void RemovePage(RackMacTable *rack_table, page_id_t page_id);
 38 | 
 39 |     ConcurrentHashMap<page_id_t, PageRackMetadata *, CortSharedMutex> table;
 40 |     std::unique_ptr<IDGenerator> page_id_allocator;
 41 | };
 42 | 
 43 | struct DaemonToClientConnection;
 44 | struct DaemonToDaemonConnection;
 45 | 
 46 | struct PageVMMapMetadata {
 47 |     offset_t cxl_memory_offset;  // Relative to `format.page_data_start_addr`
 48 |     std::set<DaemonToClientConnection *> ref_client;
 49 |     std::set<DaemonToDaemonConnection *> ref_daemon;
 50 | };
 51 | 
 52 | struct RemotePageRefMeta {
 53 |     volatile int version;
 54 |     volatile bool swapping = false;
 55 |     FreqStats stats;
 56 |     uintptr_t remote_page_addr;
 57 |     uint32_t remote_page_rkey;
 58 |     DaemonToDaemonConnection *remote_page_daemon_conn;
 59 | 
 60 |     RemotePageRefMeta(uint64_t half_life_us) : version(rand()) {}
 61 | 
 62 |     FreqStats::Heatness WriteHeat() { return stats.m_wr_heat.heat(rdtsc() / 1e3); }
 63 |     FreqStats::Heatness ReadHeat() { return stats.m_wr_heat.heat(rdtsc() / 1e3); }
 64 |     FreqStats::Heatness UpdateWriteHeat() { return stats.add_wr(rdtsc() / 1e3); }
 65 |     FreqStats::Heatness UpdateReadHeat() { return stats.add_rd(rdtsc() / 1e3); }
 66 |     void ClearHeat() { stats.clear(); }
 67 | };
 68 | 
 69 | struct PageMetadata {
 70 |     uint32_t version;
 71 |     CortSharedMutex page_ref_lock;
 72 |     CortMutex remote_ref_lock;
 73 |     PageVMMapMetadata *vm_meta = nullptr;
 74 |     RemotePageRefMeta *remote_ref_meta = nullptr;
 75 | };
 76 | 
 77 | struct PageTableManager {
 78 |     template <typename F, typename... Args>
 79 |     PageMetadata *FindOrCreatePageMeta(page_id_t page_id, F &&fn, Args &&...args) {
 80 |         auto p = table.find_or_emplace(page_id, [&]() {
 81 |             PageMetadata *page_meta = new PageMetadata();
 82 |             fn(page_meta, std::move(args)...);
 83 |             return page_meta;
 84 |         });
 85 |         return p.first->second;
 86 |     }
 87 | 
 88 |     PageMetadata *FindOrCreatePageMeta(page_id_t page_id) {
 89 |         auto p = table.find_or_emplace(page_id, [&]() { return new PageMetadata(); });
 90 |         return p.first->second;
 91 |     }
 92 | 
 93 |     template <typename F, typename... Args>
 94 |     RemotePageRefMeta *FindOrCreateRemotePageRefMeta(PageMetadata *page_meta, F &&fn,
 95 |                                                      Args &&...args) {
 96 |         if (page_meta->remote_ref_meta == nullptr) {
 97 |             std::unique_lock<CortMutex> page_remote_ref_lock(page_meta->remote_ref_lock);
 98 |             if (page_meta->remote_ref_meta == nullptr) {
 99 |                 RemotePageRefMeta *remote_ref_meta = new RemotePageRefMeta(heat_half_life_us);
100 |                 fn(remote_ref_meta, std::move(args)...);
101 |                 page_meta->remote_ref_meta = remote_ref_meta;
102 |             }
103 |         }
104 |         return page_meta->remote_ref_meta;
105 |     }
106 | 
107 |     RemotePageRefMeta *FindOrCreateRemotePageRefMeta(PageMetadata *page_meta) {
108 |         if (page_meta->remote_ref_meta == nullptr) {
109 |             std::unique_lock<CortMutex> page_remote_ref_lock(page_meta->remote_ref_lock);
110 |             if (page_meta->remote_ref_meta == nullptr) {
111 |                 page_meta->remote_ref_meta = new RemotePageRefMeta(heat_half_life_us);
112 |             }
113 |         }
114 |         return page_meta->remote_ref_meta;
115 |     }
116 | 
117 |     void EraseRemotePageRefMeta(PageMetadata *page_meta);
118 |     PageVMMapMetadata *AllocPageMemory();
119 |     void FreePageMemory(PageVMMapMetadata *page_vm_meta);
120 |     void ApplyPageMemory(PageMetadata *page_meta, PageVMMapMetadata *page_vm_meta);
121 |     void CancelPageMemory(PageMetadata *page_meta);
122 |     bool PickUnvisitPage(page_id_t &page_id, PageMetadata *&page_meta);
123 |     std::vector<std::pair<page_id_t, PageMetadata *>> RandomPickVMPage(size_t n);
124 | 
125 |     // TODO: release page meta resource when vm_meta and remote_ref_meta are nullptr
126 | 
127 |     bool NearlyFull() const { return current_used_page_num == max_data_page_num; }
128 | 
129 |     bool TestAllocPageMemory(size_t count = 1) const {
130 |         return current_used_page_num + count <= total_page_num;
131 |     }
132 | 
133 |     size_t GetCurrentUsedPageNum() const { return current_used_page_num; }
134 | 
135 |     uint64_t heat_half_life_us;
136 | 
137 |     size_t total_page_num;     // Number of all pages
138 |     size_t max_swap_page_num;  // Number of pages in swap area
139 |     size_t max_data_page_num;  // Number of all available data pages
140 | 
141 |     std::atomic<size_t> current_used_page_num;  // Number of data pages currently in use
142 | 
143 |     RandomAccessMap<page_id_t, PageMetadata *, CortSharedMutex> table;
144 |     std::queue<std::pair<page_id_t, PageMetadata *>> unvisited_pages;  // lock unsafe
145 |     std::unique_ptr<SingleAllocator<page_size>> page_allocator;
146 | };
147 | 
148 | struct LocalPageCache {
149 |     FreqStats::Heatness UpdateHeat() { return stats.add_wr(rdtsc() / 1000); }
150 |     FreqStats::Heatness Heat() { return stats.m_wr_heat.heat(rdtsc() / 1000); }
151 | 
152 |     FreqStats stats;
153 |     offset_t offset;
154 | };
155 | 
156 | struct RemotePageHint {
157 |     uint32_t version = 0;
158 |     uint64_t hint = 0;
159 | };
160 | 
161 | struct PageCacheMeta {
162 |     Mutex ref_lock;
163 |     LocalPageCache *cache = nullptr;
164 |     RemotePageHint hint;
165 | };
166 | 
167 | struct PageCacheTable {
168 |     ~PageCacheTable();
169 | 
170 |     PageCacheMeta *FindOrCreateCacheMeta(page_id_t page_id);
171 |     LocalPageCache *FindCache(page_id_t page_id);
172 |     LocalPageCache *FindCache(PageCacheMeta *cache_meta) const;
173 |     LocalPageCache *AddCache(PageCacheMeta *cache_meta, offset_t offset);
174 |     void RemoveCache(PageCacheMeta *cache_meta);
175 | 
176 |     SharedMutex table_lock;
177 | 
178 |     robin_hood::unordered_flat_map<page_id_t, PageCacheMeta *, std::hash<page_id_t>> table;
179 | };
180 | 
181 | struct PageThreadLocalCache;
182 | 
183 | struct PageThreadCacheManager {
184 |     void insert(PageThreadLocalCache *tcache);
185 |     void erase(PageThreadLocalCache *tcache);
186 | 
187 |     template <typename F, typename... Args>
188 |     void foreach_all(F &&fn, Args &&...args) {
189 |         std::shared_lock<std::shared_mutex> lck(mutex_);
190 |         for (auto &tcache : tcache_list_) {
191 |             fn(*tcache, std::move(args)...);
192 |         }
193 |     }
194 | 
195 |     std::shared_mutex mutex_;
196 |     std::list<PageThreadLocalCache *> tcache_list_;
197 | };
198 | 
199 | class PageThreadLocalCache {
200 |    public:
201 |     static PageThreadLocalCache &getInstance(PageThreadCacheManager &mgr) {
202 |         static thread_local PageThreadLocalCache instance(mgr);
203 |         return instance;
204 |     }
205 | 
206 |     PageCacheTable page_cache_table;
207 | 
208 |    private:
209 |     PageThreadCacheManager &mgr;
210 |     PageThreadLocalCache(PageThreadCacheManager &mgr) : mgr(mgr) { mgr.insert(this); }
211 |     ~PageThreadLocalCache() { mgr.erase(this); }
212 | };


--------------------------------------------------------------------------------
/src/include/promise.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <atomic>
  4 | #include <boost/fiber/future/future.hpp>
  5 | #include <boost/fiber/future/promise.hpp>
  6 | #include <future>
  7 | #include <stdexcept>
  8 | #include <thread>
  9 | 
 10 | template <typename T>
 11 | class SpinFuture;
 12 | 
 13 | template <typename T>
 14 | class SpinPromise {
 15 |    public:
 16 |     SpinPromise() : ready_(false) {}
 17 |     ~SpinPromise() {}
 18 | 
 19 |     SpinFuture<T> get_future() { return SpinFuture<T>(this); }
 20 | 
 21 |     void set_value(const T &value) {
 22 |         value_ = value;
 23 |         ready_.store(true, std::memory_order_release);
 24 |     }
 25 | 
 26 |    private:
 27 |     friend class SpinFuture<T>;
 28 | 
 29 |     T value_;
 30 |     std::atomic_bool ready_;
 31 | };
 32 | 
 33 | template <typename T>
 34 | class SpinFuture {
 35 |    public:
 36 |     SpinFuture(SpinPromise<T> *promise) : promise_(promise) {}
 37 |     ~SpinFuture() {}
 38 | 
 39 |     const T &get() const {
 40 |         while (!promise_->ready_.load(std::memory_order_acquire)) {
 41 |             // spin
 42 |         }
 43 |         return promise_->value_;
 44 |     }
 45 | 
 46 |     template <typename _Rep, typename _Period>
 47 |     std::future_status wait_for(const std::chrono::duration<_Rep, _Period> &__rel) const {
 48 |         if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready;
 49 |         if (__rel > __rel.zero()) {
 50 |             std::this_thread::sleep_for(__rel);
 51 |             if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready;
 52 |         }
 53 |         return std::future_status::timeout;
 54 |     }
 55 | 
 56 |    private:
 57 |     SpinPromise<T> *promise_;
 58 | };
 59 | 
 60 | template <>
 61 | class SpinPromise<void>;
 62 | template <>
 63 | class SpinFuture<void>;
 64 | 
 65 | template <>
 66 | class SpinPromise<void> {
 67 |    public:
 68 |     SpinPromise() : ready_(false) {}
 69 |     ~SpinPromise() {}
 70 | 
 71 |     SpinFuture<void> get_future();
 72 | 
 73 |     void set_value() { ready_.store(true, std::memory_order_release); }
 74 | 
 75 |    private:
 76 |     friend class SpinFuture<void>;
 77 | 
 78 |     std::atomic_bool ready_;
 79 | };
 80 | 
 81 | template <>
 82 | class SpinFuture<void> {
 83 |    public:
 84 |     SpinFuture(SpinPromise<void> *promise) : promise_(promise) {}
 85 |     ~SpinFuture() {}
 86 | 
 87 |     void get() const {
 88 |         while (!promise_->ready_.load(std::memory_order_acquire)) {
 89 |             // spin
 90 |         }
 91 |     }
 92 | 
 93 |     void wait() { throw std::runtime_error("wait dead spinning"); }
 94 | 
 95 |     template <typename _Rep, typename _Period>
 96 |     std::future_status wait_for(const std::chrono::duration<_Rep, _Period> &__rel) const {
 97 |         if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready;
 98 |         if (__rel > __rel.zero()) {
 99 |             std::this_thread::sleep_for(__rel);
100 |             if (promise_->ready_.load(std::memory_order_acquire)) return std::future_status::ready;
101 |         }
102 |         return std::future_status::timeout;
103 |     }
104 | 
105 |    private:
106 |     SpinPromise<void> *promise_;
107 | };
108 | 
109 | inline SpinFuture<void> SpinPromise<void>::get_future() { return SpinFuture<void>(this); }
110 | 
111 | template <typename T>
112 | using CortPromise = boost::fibers::promise<T>;
113 | template <typename T>
114 | using CortFuture = boost::fibers::future<T>;
115 | 
116 | struct FutureControlBlock {
117 |     bool ready = false;
118 |     boost::fibers::mutex mtx;
119 |     boost::fibers::condition_variable cv;
120 | 
121 |     void clear() { ready = false; }
122 | 
123 |     void get() {
124 |         std::unique_lock<boost::fibers::mutex> lck(mtx);
125 |         cv.wait(lck, [&]() { return ready; });
126 |     }
127 |     void set_value() {
128 |         {
129 |             std::unique_lock<boost::fibers::mutex> lck(mtx);
130 |             ready = true;
131 |         }
132 |         cv.notify_all();
133 |     }
134 | };


--------------------------------------------------------------------------------
/src/include/proto/rpc_caller.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "utils.hpp"
 4 | 
 5 | namespace detail {
 6 | 
 7 | template <typename RpcFunc>
 8 | struct RpcCallerWrapperHelper {
 9 |     using FT = function_traits<RpcFunc>;
10 |     using SelfContext = typename std::remove_reference<typename FT::template args_type<0>>::type;
11 |     using PeerContext = typename std::remove_reference<typename FT::template args_type<1>>::type;
12 |     using RequestType = typename std::remove_reference<typename FT::template args_type<2>>::type;
13 |     using ResponseHandleType =
14 |         typename std::remove_reference<typename FT::template args_type<3>>::type;
15 | 
16 |     using ResponseType = typename container_traits<ResponseHandleType>::type;
17 | };
18 | 
19 | template <typename RpcFunc>
20 | struct RpcCallerWrapper;
21 | 
22 | template <typename RpcFunc>
23 | struct ErpcFuncWrapper : public RpcCallerWrapperHelper<RpcFunc> {
24 |     static RpcFunc func;
25 |     static bool registed;
26 | };
27 | 
28 | template <typename RpcFunc>
29 | bool ErpcFuncWrapper<RpcFunc>::registed = false;
30 | template <typename RpcFunc>
31 | RpcFunc ErpcFuncWrapper<RpcFunc>::func;
32 | 
33 | template <typename RpcFunc>
34 | struct MsgqRpcFuncWrapper : public RpcCallerWrapperHelper<RpcFunc> {
35 |     static RpcFunc func;
36 |     static bool registed;
37 | };
38 | 
39 | template <typename RpcFunc>
40 | bool MsgqRpcFuncWrapper<RpcFunc>::registed = false;
41 | template <typename RpcFunc>
42 | RpcFunc MsgqRpcFuncWrapper<RpcFunc>::func;
43 | 
44 | /**
45 |  * @brief Binding RPCs with the BIND_RPC_TYPE_STRUCT() macro
46 |  *
47 |  * @warning The call must be on a different line in the same file.
48 |  */
49 | #define BIND_RPC_TYPE_STRUCT(rpc_func)                                \
50 |     template <>                                                       \
51 |     struct detail::RpcCallerWrapper<decltype(rpc_func)>               \
52 |         : public detail::RpcCallerWrapperHelper<decltype(rpc_func)> { \
53 |         constexpr static uint8_t rpc_type = __LINE__;                 \
54 |         static_assert(rpc_type != 0, "overflow");                     \
55 |     };
56 | 
57 | /**
58 |  * @brief Get the structure to which the rpc is bound.
59 |  */
60 | #define RPC_TYPE_STRUCT(rpc_func) ::detail::RpcCallerWrapper<decltype(rpc_func)>
61 | 
62 | }  // namespace detail
63 | 


--------------------------------------------------------------------------------
/src/include/proto/rpc_client.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.hpp"
 4 | #include "impl.hpp"
 5 | #include "proto/rpc_adaptor.hpp"
 6 | 
 7 | namespace rpc_client {
 8 | 
 9 | struct RemovePageCacheRequest {
10 |     mac_id_t mac_id;
11 |     page_id_t page_id;
12 | };
13 | struct RemovePageCacheReply {
14 |     bool ret;
15 | };
16 | void removePageCache(ClientContext& client_context, ClientToDaemonConnection& daemon_connection,
17 |                      RemovePageCacheRequest& req,
18 |                      ResponseHandle<RemovePageCacheReply>& resp_handle);
19 | 
20 | struct GetCurrentWriteDataRequest {
21 |     mac_id_t mac_id;
22 |     const void* dio_write_buf;
23 |     size_t dio_write_size;
24 | };
25 | struct GetCurrentWriteDataReply {
26 |     uint8_t data[0];
27 | };
28 | void getCurrentWriteData(ClientContext& client_context, ClientToDaemonConnection& daemon_connection,
29 |                          GetCurrentWriteDataRequest& req,
30 |                          ResponseHandle<GetCurrentWriteDataReply>& resp_handle);
31 | 
32 | struct GetPagePastAccessFreqRequest {
33 |     mac_id_t mac_id;
34 |     int num_detect_pages;
35 |     page_id_t pages[128];
36 | };
37 | struct GetPagePastAccessFreqReply {
38 |     float avg_heat;
39 |     page_id_t coldest_page_id;
40 |     float coldest_page_heat;
41 |     float coldest_page_rd_heat;
42 | };
43 | void getPagePastAccessFreq(ClientContext& client_context,
44 |                            ClientToDaemonConnection& daemon_connection,
45 |                            GetPagePastAccessFreqRequest& req,
46 |                            ResponseHandle<GetPagePastAccessFreqReply>& resp_handle);
47 | 
48 | }  // namespace rpc_client


--------------------------------------------------------------------------------
/src/include/proto/rpc_daemon.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cstdint>
  4 | 
  5 | #include "common.hpp"
  6 | #include "impl.hpp"
  7 | #include "log.hpp"
  8 | #include "proto/rpc_adaptor.hpp"
  9 | #include "rcmp.hpp"
 10 | #include "utils.hpp"
 11 | 
 12 | namespace rpc_daemon {
 13 | 
 14 | struct JoinRackRequest {
 15 |     mac_id_t mac_id;  // unused
 16 |     IPv4String client_ipv4;
 17 |     uint16_t client_port;
 18 |     rack_id_t rack_id;
 19 | };
 20 | struct JoinRackReply {
 21 |     mac_id_t client_mac_id;
 22 |     mac_id_t daemon_mac_id;
 23 |     float half_life_us;
 24 | };
 25 | /**
 26 |  * @brief Adds client to the rack. Called when the connection is established.
 27 |  *
 28 |  * @param daemon_context
 29 |  * @param client_connection It needs to be dereferenced to the object requested from the heap, after
 30 |  * which its lifecycle will be maintained by the MasterContext.
 31 |  * @param req
 32 |  * @param resp_handle
 33 |  */
 34 | void joinRack(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
 35 |               JoinRackRequest& req, ResponseHandle<JoinRackReply>& resp_handle);
 36 | 
 37 | struct CrossRackConnectRequest {
 38 |     mac_id_t mac_id;
 39 |     IPv4String ip;
 40 |     uint16_t port;
 41 |     rack_id_t rack_id;
 42 |     mac_id_t conn_mac_id;
 43 | };
 44 | struct CrossRackConnectReply {
 45 |     mac_id_t daemon_mac_id;
 46 |     uint16_t rdma_port;
 47 | };
 48 | void crossRackConnect(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection,
 49 |                       CrossRackConnectRequest& req,
 50 |                       ResponseHandle<CrossRackConnectReply>& resp_handle);
 51 | 
 52 | struct GetPageCXLRefOrProxyRequest {
 53 |     mac_id_t mac_id;
 54 |     enum {
 55 |         READ,
 56 |         WRITE,
 57 |         WRITE_RAW,
 58 |         CAS,
 59 |     } type;
 60 |     rcmp::GAddr gaddr;
 61 |     uint32_t hint_version;
 62 |     uint64_t hint;
 63 |     union {
 64 |         struct {  // type == WRITE
 65 |             size_t cn_write_size;
 66 |             const void* cn_write_buf;
 67 |         } write;
 68 |         struct {  // type == READ
 69 |             size_t cn_read_size;
 70 |         } read;
 71 |         struct {  // type == WRITE_RAW
 72 |             size_t cn_write_raw_size;
 73 |             uint8_t cn_write_raw_buf[0];
 74 |         } write_raw;
 75 |         struct {  // type == CAS
 76 |             size_t expected;
 77 |             size_t desired;
 78 |         } cas;
 79 |     } u;
 80 | };
 81 | struct GetPageCXLRefOrProxyReply {
 82 |     bool refs;
 83 |     uint32_t hint_version;
 84 |     uint64_t hint;
 85 |     union {
 86 |         struct {  // refs == true
 87 |             offset_t offset;
 88 |         };
 89 |         struct {      // refs == false
 90 |             struct {  // cas
 91 |                 uint64_t old_val;
 92 |             };
 93 |             struct {  // read
 94 |                 uint8_t read_data[0];
 95 |             };
 96 |         };
 97 |     };
 98 | };
 99 | /**
100 |  * @brief Get a reference to the page. If the local Page Table does not have that page id, a remote
101 |  * io is triggered.
102 |  *
103 |  * @param daemon_context
104 |  * @param client_connection
105 |  * @param req
106 |  * @param resp_handle
107 |  */
108 | void getPageCXLRefOrProxy(DaemonContext& daemon_context,
109 |                           DaemonToClientConnection& client_connection,
110 |                           GetPageCXLRefOrProxyRequest& req,
111 |                           ResponseHandle<GetPageCXLRefOrProxyReply>& resp_handle);
112 | 
113 | struct AllocPageMemoryRequest {
114 |     mac_id_t mac_id;
115 |     page_id_t start_page_id;
116 |     size_t count;
117 | };
118 | struct AllocPageMemoryReply {
119 |     bool ret;
120 | };
121 | /**
122 |  * @brief Allocate a page physical address space
123 |  *
124 |  * @param daemon_context
125 |  * @param master_connection
126 |  * @param req
127 |  * @param resp_handle
128 |  */
129 | void allocPageMemory(DaemonContext& daemon_context, DaemonToMasterConnection& master_connection,
130 |                      AllocPageMemoryRequest& req,
131 |                      ResponseHandle<AllocPageMemoryReply>& resp_handle);
132 | 
133 | struct AllocRequest {
134 |     mac_id_t mac_id;
135 |     size_t size;
136 | };
137 | struct AllocReply {
138 |     rcmp::GAddr gaddr;
139 | };
140 | void alloc(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
141 |            AllocRequest& req, ResponseHandle<AllocReply>& resp_handle);
142 | struct AllocPageRequest {
143 |     mac_id_t mac_id;
144 |     size_t count;
145 | };
146 | struct AllocPageReply {
147 |     page_id_t start_page_id;  // Allocated start page id
148 |     size_t start_count;       // Number actually allocated in the requesting rack
149 | };
150 | /**
151 |  * @brief Allocate for a page
152 |  *
153 |  * @param daemon_context
154 |  * @param client_connection
155 |  * @param req
156 |  * @param resp_handle
157 |  */
158 | void allocPage(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
159 |                AllocPageRequest& req, ResponseHandle<AllocPageReply>& resp_handle);
160 | 
161 | struct FreePageRequest {
162 |     mac_id_t mac_id;
163 |     page_id_t start_page_id;
164 |     size_t count;
165 | };
166 | struct FreePageReply {
167 |     bool ret;
168 | };
169 | /**
170 |  * @brief Free a page
171 |  *
172 |  * @param master_context
173 |  * @param daemon_connection
174 |  * @param req
175 |  * @param resp_handle
176 |  */
177 | void freePage(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
178 |               FreePageRequest& req, ResponseHandle<FreePageReply>& resp_handle);
179 | 
180 | struct FreeRequest {
181 |     mac_id_t mac_id;
182 |     rcmp::GAddr gaddr;
183 |     size_t n;
184 | };
185 | struct FreeReply {
186 |     bool ret;
187 | };
188 | void free(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
189 |           FreeRequest& req, ResponseHandle<FreeReply>& resp_handle);
190 | 
191 | struct GetPageRDMARefRequest {
192 |     mac_id_t mac_id;
193 |     page_id_t page_id;
194 | };
195 | struct GetPageRDMARefReply {
196 |     uintptr_t addr;
197 |     uint32_t rkey;
198 | };
199 | /**
200 |  * @brief Get a reference to the page. If the local Page Table does not have that page id, a remote
201 |  * io is triggered.
202 |  *
203 |  * @param daemon_context
204 |  * @param daemon_connection
205 |  * @param req
206 |  * @param resp_handle
207 |  */
208 | void getPageRDMARef(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection,
209 |                     GetPageRDMARefRequest& req, ResponseHandle<GetPageRDMARefReply>& resp_handle);
210 | 
211 | struct DelPageRDMARefRequest {
212 |     mac_id_t mac_id;
213 |     page_id_t page_id;  // Preparing to delete the page id of the ref
214 | };
215 | struct DelPageRDMARefReply {
216 |     bool ret;
217 | };
218 | /**
219 |  * @brief Removes a reference to a page.
220 |  *
221 |  * @param daemon_context
222 |  * @param daemon_connection
223 |  * @param req
224 |  * @param resp_handle
225 |  */
226 | void delPageRDMARef(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection,
227 |                     DelPageRDMARefRequest& req, ResponseHandle<DelPageRDMARefReply>& resp_handle);
228 | 
229 | struct MigratePageRequest {
230 |     mac_id_t mac_id;
231 |     page_id_t page_id;
232 |     page_id_t swap_page_id;
233 |     uintptr_t swapout_page_addr;  // When `swapout_page_addr == 0` and `swapout_page_rkey == 0`, it
234 |                                   // means no swapout.
235 |     uintptr_t swapin_page_addr;
236 |     uint32_t swapout_page_rkey;
237 |     uint32_t swapin_page_rkey;
238 | };
239 | struct MigratePageReply {
240 |     bool swapped;
241 | };
242 | /**
243 |  * @brief
244 |  *
245 |  * @param daemon_context
246 |  * @param daemon_connection
247 |  * @param req
248 |  * @param resp_handle
249 |  */
250 | void migratePage(DaemonContext& daemon_context, DaemonToDaemonConnection& daemon_connection,
251 |                  MigratePageRequest& req, ResponseHandle<MigratePageReply>& resp_handle);
252 | 
253 | struct TryDelPageRequest {
254 |     mac_id_t mac_id;
255 |     page_id_t page_id;
256 | };
257 | struct TryDelPageReply {
258 |     bool ret;
259 | };
260 | void tryDelPage(DaemonContext& daemon_context, DaemonToMasterConnection& master_connection,
261 |                 TryDelPageRequest& req, ResponseHandle<TryDelPageReply>& resp_handle);
262 | 
263 | /************************* for test ***************************/
264 | 
265 | struct __TestDataSend1Request {
266 |     mac_id_t mac_id;
267 |     size_t size;
268 |     int data[64];
269 | };
270 | struct __TestDataSend1Reply {
271 |     size_t size;
272 |     int data[64];
273 | };
274 | 
275 | struct __TestDataSend2Request {
276 |     mac_id_t mac_id;
277 |     size_t size;
278 |     int data[72];
279 | };
280 | struct __TestDataSend2Reply {
281 |     size_t size;
282 |     int data[72];
283 | };
284 | 
285 | void __testdataSend1(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
286 |                      __TestDataSend1Request& req,
287 |                      ResponseHandle<__TestDataSend1Reply>& resp_handle);
288 | 
289 | void __testdataSend2(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
290 |                      __TestDataSend2Request& req,
291 |                      ResponseHandle<__TestDataSend2Reply>& resp_handle);
292 | 
293 | struct __notifyPerfRequest {
294 |     mac_id_t mac_id;
295 | };
296 | struct __notifyPerfReply {};
297 | void __notifyPerf(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
298 |                   __notifyPerfRequest& req, ResponseHandle<__notifyPerfReply>& resp_handle);
299 | 
300 | struct __stopPerfRequest {
301 |     mac_id_t mac_id;
302 | };
303 | struct __stopPerfReply {};
304 | void __stopPerf(DaemonContext& daemon_context, DaemonToClientConnection& client_connection,
305 |                 __stopPerfRequest& req, ResponseHandle<__stopPerfReply>& resp_handle);
306 | 
307 | }  // namespace rpc_daemon
308 | 


--------------------------------------------------------------------------------
/src/include/proto/rpc_master.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.hpp"
  4 | #include "impl.hpp"
  5 | #include "proto/rpc_adaptor.hpp"
  6 | #include "utils.hpp"
  7 | 
  8 | namespace rpc_master {
  9 | 
 10 | struct JoinDaemonRequest {
 11 |     mac_id_t mac_id;  // unused
 12 |     IPv4String ip;
 13 |     uint16_t port;
 14 |     rack_id_t rack_id;
 15 |     bool with_cxl;
 16 |     size_t free_page_num;
 17 | };
 18 | struct JoinDaemonReply {
 19 |     mac_id_t daemon_mac_id;
 20 |     mac_id_t master_mac_id;
 21 |     uint16_t rdma_port;
 22 | 
 23 |     struct RackInfo {
 24 |         rack_id_t rack_id;
 25 |         mac_id_t daemon_id;
 26 |         IPv4String daemon_ipv4;
 27 |         uint16_t daemon_erpc_port;
 28 |         uint16_t daemon_rdma_port;
 29 |     };
 30 | 
 31 |     size_t other_rack_count;
 32 |     RackInfo other_rack_infos[0];
 33 | };
 34 | /**
 35 |  * @brief Adds the daemon to the cluster. Called when a connection is established.
 36 |  *
 37 |  * @param master_context
 38 |  * @param daemon_connection It needs to be dereferenced to the object requested from the heap, after
 39 |  * which its lifecycle will be maintained by the MasterContext.
 40 |  * @param req
 41 |  * @param resp_handle
 42 |  */
 43 | void joinDaemon(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
 44 |                 JoinDaemonRequest& req, ResponseHandle<JoinDaemonReply>& resp_handle);
 45 | 
 46 | struct JoinClientRequest {
 47 |     mac_id_t mac_id;  // unused
 48 |     rack_id_t rack_id;
 49 | };
 50 | struct JoinClientReply {
 51 |     mac_id_t mac_id;
 52 | };
 53 | /**
 54 |  * @brief Adds the client to the cluster. Called when a connection is established.
 55 |  *
 56 |  * @param master_context
 57 |  * @param client_connection It needs to be dereferenced to the object requested from the heap, after
 58 |  * which its lifecycle will be maintained by the MasterContext.
 59 |  * @param req
 60 |  * @param resp_handle
 61 |  */
 62 | void joinClient(MasterContext& master_context, MasterToClientConnection& client_connection,
 63 |                 JoinClientRequest& req, ResponseHandle<JoinClientReply>& resp_handle);
 64 | 
 65 | struct AllocPageRequest {
 66 |     mac_id_t mac_id;
 67 |     size_t count;
 68 | };
 69 | struct AllocPageReply {
 70 |     page_id_t current_start_page_id;  // Allocated start page id
 71 |     size_t current_page_count;        // Number actually allocated in the requesting rack
 72 |     page_id_t other_start_page_id;
 73 |     size_t other_page_count;
 74 | };
 75 | /**
 76 |  * @brief
 77 |  * Allocate a page. this operation will expect a call to `allocPageMemory()` on the daemon side to
 78 |  * allocate the CXL physical address. If the daemon is full, this operation will randomly send this
 79 |  * function to other daemons for allocation.
 80 |  *
 81 |  * @param master_context
 82 |  * @param daemon_connection
 83 |  * @param req
 84 |  * @param resp_handle
 85 |  */
 86 | void allocPage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
 87 |                AllocPageRequest& req, ResponseHandle<AllocPageReply>& resp_handle);
 88 | 
 89 | struct FreePageRequest {
 90 |     mac_id_t mac_id;
 91 |     page_id_t start_page_id;
 92 |     size_t count;
 93 | };
 94 | struct FreePageReply {
 95 |     bool ret;
 96 | };
 97 | /**
 98 |  * @brief Free a page
 99 |  *
100 |  * @param master_context
101 |  * @param daemon_connection
102 |  * @param req
103 |  * @param resp_handle
104 |  */
105 | void freePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
106 |               FreePageRequest& req, ResponseHandle<FreePageReply>& resp_handle);
107 | 
108 | struct GetRackDaemonByPageIDRequest {
109 |     page_id_t page_id;
110 | };
111 | struct GetRackDaemonByPageIDReply {
112 |     IPv4String dest_daemon_ipv4;
113 |     uint16_t dest_daemon_port;
114 |     rack_id_t rack_id;
115 | };
116 | /**
117 |  * @brief Get the IPv4 address of the daemon corresponding to rack based on the page id. This call
118 |  * should be used in the remote direct io case of the daemon.
119 |  *
120 |  * @param master_context
121 |  * @param client_connection
122 |  * @param req
123 |  * @param resp_handle
124 |  */
125 | void getRackDaemonByPageID(MasterContext& master_context,
126 |                            MasterToDaemonConnection& daemon_connection,
127 |                            GetRackDaemonByPageIDRequest& req,
128 |                            ResponseHandle<GetRackDaemonByPageIDReply>& resp_handle);
129 | 
130 | struct LatchRemotePageRequest {
131 |     mac_id_t mac_id;
132 |     bool exclusive;
133 |     page_id_t page_id;
134 | };
135 | struct LatchRemotePageReply {
136 |     rack_id_t dest_rack_id;
137 |     mac_id_t dest_daemon_id;
138 | };
139 | /**
140 |  * @brief Get and latch the remote page from being swapped.
141 |  *
142 |  * @param master_context
143 |  * @param daemon_connection
144 |  * @param req
145 |  * @param resp_handle
146 |  */
147 | void latchRemotePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
148 |                      LatchRemotePageRequest& req,
149 |                      ResponseHandle<LatchRemotePageReply>& resp_handle);
150 | 
151 | struct UnLatchRemotePageRequest {
152 |     mac_id_t mac_id;
153 |     bool exclusive;
154 |     page_id_t page_id;
155 | };
156 | struct UnLatchRemotePageReply {
157 |     bool ret;
158 | };
159 | /**
160 |  * @brief Unlatch remote page
161 |  *
162 |  * @param master_context
163 |  * @param daemon_connection
164 |  * @param req
165 |  * @param resp_handle
166 |  */
167 | void unLatchRemotePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
168 |                        UnLatchRemotePageRequest& req,
169 |                        ResponseHandle<UnLatchRemotePageReply>& resp_handle);
170 | 
171 | struct tryMigratePageRequest {
172 |     mac_id_t mac_id;
173 |     bool exclusive;
174 |     page_id_t page_id;
175 |     float page_heat;
176 |     page_id_t page_id_swap;
177 | };
178 | struct tryMigratePageReply {
179 |     bool ret;
180 | };
181 | /**
182 |  * @brief Try migrate page, if success, all pages will locked. You need call `MigratePageDone` when
183 |  * migrating is done.
184 |  *
185 |  * @param master_context
186 |  * @param daemon_connection
187 |  * @param req
188 |  * @param resp_handle
189 |  */
190 | void tryMigratePage(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
191 |                     tryMigratePageRequest& req, ResponseHandle<tryMigratePageReply>& resp_handle);
192 | 
193 | struct MigratePageDoneRequest {
194 |     mac_id_t mac_id;
195 |     page_id_t page_id;            // Swapin page (originally at the far end)
196 |     mac_id_t new_daemon_id;       // Your own daemon id
197 |     rack_id_t new_rack_id;        // Your own rack id
198 |     page_id_t page_id_swap;       // Swapped out page (originally local), if invalid, no swap
199 |     mac_id_t new_daemon_id_swap;  // The peer's daemon id
200 |     rack_id_t new_rack_id_swap;   // The peer's rack id
201 | };
202 | struct MigratePageDoneReply {
203 |     bool ret;
204 | };
205 | /**
206 |  * @brief Unlatch the remote page and transfer the page to this daemon
207 |  *
208 |  * @param master_context
209 |  * @param daemon_connection
210 |  * @param req
211 |  * @param resp_handle
212 |  */
213 | void MigratePageDone(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
214 |                      MigratePageDoneRequest& req,
215 |                      ResponseHandle<MigratePageDoneReply>& resp_handle);
216 | 
217 | }  // namespace rpc_master


--------------------------------------------------------------------------------
/src/include/proto/rpc_register.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "rpc_caller.hpp"
 4 | #include "rpc_client.hpp"
 5 | #include "rpc_daemon.hpp"
 6 | #include "rpc_master.hpp"
 7 | 
 8 | /******************* Binding RPC Functions **********************/
 9 | 
10 | BIND_RPC_TYPE_STRUCT(rpc_master::joinDaemon);
11 | BIND_RPC_TYPE_STRUCT(rpc_master::joinClient);
12 | BIND_RPC_TYPE_STRUCT(rpc_master::allocPage);
13 | BIND_RPC_TYPE_STRUCT(rpc_master::freePage);
14 | BIND_RPC_TYPE_STRUCT(rpc_master::latchRemotePage);
15 | BIND_RPC_TYPE_STRUCT(rpc_master::unLatchRemotePage);
16 | BIND_RPC_TYPE_STRUCT(rpc_master::tryMigratePage);
17 | BIND_RPC_TYPE_STRUCT(rpc_master::MigratePageDone);
18 | 
19 | BIND_RPC_TYPE_STRUCT(rpc_daemon::joinRack);
20 | BIND_RPC_TYPE_STRUCT(rpc_daemon::crossRackConnect);
21 | BIND_RPC_TYPE_STRUCT(rpc_daemon::getPageCXLRefOrProxy);
22 | BIND_RPC_TYPE_STRUCT(rpc_daemon::allocPage);
23 | BIND_RPC_TYPE_STRUCT(rpc_daemon::freePage);
24 | BIND_RPC_TYPE_STRUCT(rpc_daemon::allocPageMemory);
25 | BIND_RPC_TYPE_STRUCT(rpc_daemon::alloc);
26 | BIND_RPC_TYPE_STRUCT(rpc_daemon::free);
27 | BIND_RPC_TYPE_STRUCT(rpc_daemon::getPageRDMARef);
28 | BIND_RPC_TYPE_STRUCT(rpc_daemon::delPageRDMARef);
29 | BIND_RPC_TYPE_STRUCT(rpc_daemon::tryDelPage);
30 | BIND_RPC_TYPE_STRUCT(rpc_daemon::migratePage);
31 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__testdataSend1);
32 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__testdataSend2);
33 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__notifyPerf);
34 | BIND_RPC_TYPE_STRUCT(rpc_daemon::__stopPerf);
35 | 
36 | BIND_RPC_TYPE_STRUCT(rpc_client::removePageCache);
37 | BIND_RPC_TYPE_STRUCT(rpc_client::getCurrentWriteData);
38 | BIND_RPC_TYPE_STRUCT(rpc_client::getPagePastAccessFreq);


--------------------------------------------------------------------------------
/src/include/rdma_rc.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <rdma/rdma_cma.h>
  4 | 
  5 | #include <array>
  6 | 
  7 | #include "allocator.hpp"
  8 | #include "fiber_pool.hpp"
  9 | #include "promise.hpp"
 10 | 
 11 | namespace rdma_rc {
 12 | 
 13 | class RDMAEnv {
 14 |    public:
 15 |     RDMAEnv(const RDMAEnv &) = delete;
 16 |     RDMAEnv(RDMAEnv &&) = delete;
 17 |     RDMAEnv &operator=(const RDMAEnv &) = delete;
 18 |     RDMAEnv &operator=(RDMAEnv &&) = delete;
 19 | 
 20 |     static int init();
 21 |     static RDMAEnv &get_instance() {
 22 |         static RDMAEnv env;
 23 |         return env;
 24 |     }
 25 | 
 26 |     bool m_active_;
 27 |     rdma_event_channel *m_cm_client_channel_;
 28 |     rdma_event_channel *m_cm_server_channel_;
 29 |     ibv_context **m_ibv_ctxs_;
 30 |     int m_nr_dev_;
 31 | 
 32 |     std::map<ibv_context *, ibv_pd *> m_pd_map_;
 33 |     std::map<ibv_context *, ibv_comp_channel *> m_comp_chan_map_;
 34 |     std::map<ibv_context *, ibv_cq *> m_cq_map_;
 35 | 
 36 |    private:
 37 |     RDMAEnv() : m_active_(false) {}
 38 |     ~RDMAEnv();
 39 |     int __init__();
 40 | };
 41 | 
 42 | struct SgeWr {
 43 |     ibv_sge sge;
 44 |     ibv_send_wr wr;
 45 | };
 46 | 
 47 | struct RDMAConnection;
 48 | 
 49 | struct SyncData {
 50 |     uint32_t inflight;
 51 |     uint32_t now_ms;
 52 |     RDMAConnection *conn;
 53 |     volatile bool wc_finish;
 54 |     bool timeout;
 55 |     uint8_t props_size;
 56 |     std::array<priority_props *, 8> props;
 57 |     FutureControlBlock *cbk;
 58 | 
 59 |     SyncData() : cbk(ObjectPool<FutureControlBlock>().pop()) {}
 60 |     ~SyncData() { ObjectPool<FutureControlBlock>().put(cbk); }
 61 | 
 62 |     void *operator new(std::size_t size) { return ObjectPoolAllocator<SyncData>().allocate(1); }
 63 | 
 64 |     void operator delete(void *ptr) {
 65 |         ObjectPoolAllocator<SyncData>().deallocate(static_cast<SyncData *>(ptr), 1);
 66 |     }
 67 | };
 68 | 
 69 | struct RDMAFuture {
 70 |     int get();
 71 |     /**
 72 |      * @return
 73 |      *  *  0 - ok
 74 |      *  *  1 - pending
 75 |      *  * -1 - error
 76 |      */
 77 |     int try_get();
 78 | 
 79 |     std::unique_ptr<SyncData> m_sd_ = {nullptr};
 80 | };
 81 | 
 82 | struct RDMAConnection {
 83 |     // Global Options
 84 |     static int MAX_SEND_WR;
 85 |     static int MAX_SEND_SGE;
 86 |     static int CQE_NUM;
 87 |     static int RESOLVE_TIMEOUT_MS;
 88 |     static uint8_t RETRY_COUNT;
 89 |     static int RNR_RETRY_COUNT;
 90 |     static uint8_t INITIATOR_DEPTH;
 91 |     static int RESPONDER_RESOURCES;
 92 |     static int POLL_ENTRY_COUNT;
 93 |     static bool RDMA_TIMEOUT_ENABLE;
 94 |     static uint32_t RDMA_TIMEOUT_MS;
 95 | 
 96 |     RDMAConnection();
 97 |     ~RDMAConnection();
 98 | 
 99 |     /**
100 |      * @brief Listening on RDMA card IP and port 0
101 |      *
102 |      * @param ip
103 |      * @return int
104 |      */
105 |     int listen(const std::string &ip);
106 |     /**
107 |      * @brief Connecting the RDMA card IP and the other port
108 |      *
109 |      * @param ip
110 |      * @param port
111 |      * @param param
112 |      * @param param_size
113 |      * @return int
114 |      */
115 |     int connect(const std::string &ip, uint16_t port, const void *param, uint8_t param_size);
116 | 
117 |     std::pair<std::string, in_port_t> get_local_addr();
118 |     std::pair<std::string, in_port_t> get_peer_addr();
119 | 
120 |     ibv_mr *register_memory(void *ptr, size_t size);
121 |     ibv_mr *register_memory(size_t size);
122 |     void deregister_memory(ibv_mr *mr, bool freed = true);
123 | 
124 |     // prep operations are thread-unsafety for the same `sge_vec`.
125 | 
126 |     int prep_write(std::vector<SgeWr> &sge_vec, uint64_t local_addr, uint32_t lkey, uint32_t length,
127 |                    uint64_t remote_addr, uint32_t rkey, bool inline_data);
128 |     int prep_read(std::vector<SgeWr> &sge_vec, uint64_t local_addr, uint32_t lkey, uint32_t length,
129 |                   uint64_t remote_addr, uint32_t rkey, bool inline_data);
130 |     int prep_fetch_add(std::vector<SgeWr> &sge_vec, uint64_t local_addr, uint32_t lkey,
131 |                        uint64_t remote_addr, uint32_t rkey, uint64_t n);
132 |     int prep_cas(std::vector<SgeWr> &sge_vec, uint64_t local_addr, uint32_t lkey,
133 |                  uint64_t remote_addr, uint32_t rkey, uint64_t expected, uint64_t desired);
134 | 
135 |     int prep_write(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint32_t length,
136 |                    uint64_t remote_addr, uint32_t rkey, bool inline_data);
137 |     int prep_read(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint32_t length,
138 |                   uint64_t remote_addr, uint32_t rkey, bool inline_data);
139 |     int prep_fetch_add(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr,
140 |                        uint32_t rkey, uint64_t n);
141 |     int prep_cas(SgeWr *sge_wr, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr,
142 |                  uint32_t rkey, uint64_t expected, uint64_t desired);
143 | 
144 |     /**
145 |      * @brief submit prep sge_vec
146 |      */
147 |     RDMAFuture submit(std::vector<SgeWr> &sge_vec);
148 | 
149 |     /**
150 |      * @brief submit prep sgewr
151 |      *
152 |      * @warning The sge wr array must be reserved before future get
153 |      *
154 |      * @param begin
155 |      * @param n
156 |      * @return RDMAFuture
157 |      */
158 |     RDMAFuture submit(SgeWr *begin, size_t n);
159 | 
160 |     static std::function<void(rdma_cm_id *cm_id, void *param)> m_hook_connect_;
161 |     static std::function<void(rdma_cm_id *cm_id)> m_hook_disconnect_;
162 |     static void register_connect_hook(
163 |         std::function<void(rdma_cm_id *cm_id, void *param)> &&hook_connect);
164 |     static void register_disconnect_hook(std::function<void(rdma_cm_id *cm_id)> &&hook_disconnect);
165 | 
166 |     enum conn_type_t {
167 |         INVALID,
168 |         SENDER,
169 |         LISTENER,
170 |     };
171 |     conn_type_t m_conn_type_;
172 |     volatile bool m_stop_ : 1;
173 |     bool m_atomic_support_ : 1;
174 |     bool m_inline_support_ : 1;
175 |     std::atomic<uint32_t> m_inflight_count_;
176 |     ibv_comp_channel *m_comp_chan_;
177 |     ibv_pd *m_pd_;
178 |     ibv_cq *m_cq_;
179 |     std::deque<rdma_cm_id *> m_cm_ids_;
180 | 
181 |     std::thread *m_conn_handler_;
182 | 
183 |     Mutex m_mu_;
184 |     // std::unique_ptr<SyncData> m_current_sd_ = {nullptr};
185 |     SgeWr *m_sw_head_ = nullptr;
186 |     SgeWr *m_sw_tail_ = nullptr;
187 | 
188 |     bool m_rdma_conn_param_valid_();
189 |     int m_init_last_ibv_subconnection_();
190 |     void m_handle_connection_();
191 |     int m_poll_conn_sd_wr_();
192 |     static void m_init_last_subconnection_(RDMAConnection *init_conn);
193 |     static int m_acknowledge_sd_cqe_(int rc, ibv_wc wcs[]);
194 |     RDMAFuture m_submit_impl(SgeWr *sge_wrs, size_t n);
195 | };
196 | 
197 | }  // namespace rdma_rc


--------------------------------------------------------------------------------
/src/include/stats.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <algorithm>
  4 | #include <atomic>
  5 | #include <cstdint>
  6 | #include <deque>
  7 | #include <random>
  8 | #include <vector>
  9 | 
 10 | #include "lock.hpp"
 11 | 
 12 | class Histogram {
 13 |    public:
 14 |     Histogram(int numBuckets, double minValue, double maxValue);
 15 |     ~Histogram() = default;
 16 | 
 17 |     void addValue(double value);
 18 |     void clear();
 19 |     int getBucketCount() const;
 20 |     double getBucketValue(int bucket) const;
 21 |     int getBucketCount(int bucket) const;
 22 |     int getTotalCount() const;
 23 |     int getPercentileBucket(double percentile) const;
 24 |     double getPercentile(double percentile) const;
 25 |     double getAverage() const;
 26 | 
 27 |     Histogram merge(Histogram &other);
 28 | 
 29 |    private:
 30 |     int getBucket(double value) const;
 31 | 
 32 |     const int m_numBuckets;
 33 |     const double m_minValue;
 34 |     const double m_maxValue;
 35 |     const double m_bucketWidth;
 36 |     std::vector<int> m_buckets;
 37 | };
 38 | 
 39 | class FreqStats {
 40 |    public:
 41 |     struct Heatness {
 42 |         uint64_t last_time;
 43 |         float last_heat;
 44 | 
 45 |         Heatness() : last_time(0), last_heat(0) {}
 46 | 
 47 |         static Heatness one(uint64_t t);
 48 |         Heatness heat(uint64_t t) const;
 49 |         void clear();
 50 | 
 51 |         Heatness operator+(const Heatness &b) const;
 52 |     };
 53 | 
 54 |     Heatness add_wr(uint64_t t);
 55 |     Heatness add_rd(uint64_t t);
 56 |     void clear();
 57 | 
 58 |     Heatness m_wr_heat;
 59 |     Heatness m_rd_heat;
 60 | 
 61 |     static void init_exp_decays(float half_life_us);
 62 | 
 63 |    private:
 64 |     static Mutex m_exp_decays_lck;
 65 |     static std::vector<float> m_exp_decays;
 66 | };
 67 | 
 68 | /**
 69 |  * @brief Generates random number according zipfian distribution.
 70 |  * It is defined as: P(X=k)= C / k^q, 1 <= k <= n
 71 |  */
 72 | template <typename IntType = int>
 73 | class zipf_distribution {
 74 |    public:
 75 |     typedef IntType result_type;
 76 | 
 77 |     zipf_distribution(IntType max, double theta) : max_(max), theta_(theta), dist_(0.0, 1.0) {
 78 |         c_ = std::pow(max_, -theta_) / zeta(theta_, max_);
 79 |         q_ = std::pow(2.0, -theta_);
 80 |         h_ = harmonic(max_);
 81 |         v_ = dist_(gen_);
 82 |     }
 83 | 
 84 |     /**
 85 |      * @brief Returns zipf distributed random number [0, max)
 86 |      *
 87 |      * @tparam Generator
 88 |      * @param g
 89 |      * @return IntType
 90 |      */
 91 |     template <typename Generator>
 92 |     IntType operator()(Generator &g) {
 93 |         while (true) {
 94 |             double u = dist_(g) - 0.5;
 95 |             double y = std::floor(std::pow(max_ + 0.5, v_ - u) - 0.5);
 96 |             if (y < 1 || y > max_) continue;
 97 |             double k = std::floor(y);
 98 |             v_ = dist_(g);
 99 |             if (v_ >= q_ * std::pow(k + 1, theta_) / (h_ + k)) continue;
100 |             return static_cast<IntType>(k) - 1;
101 |         }
102 |     }
103 | 
104 |    private:
105 |     IntType max_;
106 |     double theta_;
107 |     double c_;
108 |     double q_;
109 |     double h_;
110 |     double v_;
111 |     std::mt19937 gen_;
112 |     std::uniform_real_distribution<double> dist_;
113 | 
114 |     static double zeta(double theta, IntType n) {
115 |         double sum = 0.0;
116 |         for (IntType i = 1; i <= n; ++i) sum += std::pow(i, -theta);
117 |         return sum;
118 |     }
119 | 
120 |     double harmonic(IntType n) const { return c_ * zeta(theta_, n); }
121 | };


--------------------------------------------------------------------------------
/src/include/udp_client.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "log.hpp"
 4 | #define ASIO_STANDLONE
 5 | #include <asio/ts/buffer.hpp>
 6 | #include <asio/ts/internet.hpp>
 7 | #include <memory>
 8 | 
 9 | /// Basic UDP client class that supports sending messages and caches remote
10 | /// addrinfo mappings
11 | template <class T>
12 | class UDPClient {
13 |    public:
14 |     UDPClient()
15 |         : resolver_(new asio::ip::udp::resolver(io_context_)),
16 |           socket_(new asio::ip::udp::socket(io_context_)) {
17 |         socket_->open(asio::ip::udp::v4());
18 |     }
19 | 
20 |     UDPClient(const UDPClient &) = delete;
21 | 
22 |     ~UDPClient() {}
23 | 
24 |     /**
25 |      * @brief Send a UDP message to a remote host
26 |      *
27 |      * @param rem_hostname DNS-resolvable name of the remote host
28 |      * @param rem_port Destination UDP port to send the message to
29 |      * @param msg Contents of the message
30 |      *
31 |      * @return Number of bytes sent on success, SIZE_MAX on failure
32 |      */
33 |     size_t send(const std::string rem_hostname, uint16_t rem_port, const T &msg) {
34 |         asio::error_code error;
35 |         asio::ip::udp::resolver::results_type results =
36 |             resolver_->resolve(rem_hostname, std::to_string(rem_port), error);
37 | 
38 |         if (results.size() == 0) {
39 |             DLOG_ERROR("eRPC: Failed to resolve %s, asio error = %s.\n", rem_hostname.c_str(),
40 |                        error.message().c_str());
41 |             return SIZE_MAX;
42 |         }
43 | 
44 |         // Pick an IPv4 endpoint
45 |         for (const auto &endpoint_iter : results) {
46 |             if (!endpoint_iter.endpoint().address().is_v4()) continue;
47 | 
48 |             try {
49 |                 const size_t ret = socket_->send_to(asio::buffer(&msg, sizeof(T)), endpoint_iter);
50 |                 if (enable_recording_flag_) sent_vec_.push_back(msg);
51 |                 return ret;
52 |             } catch (const asio::system_error &e) {
53 |                 DLOG_ERROR("eRPC: asio send_to() failed to %s, error: %s\n", rem_hostname.c_str(),
54 |                            e.what());
55 |                 return SIZE_MAX;
56 |             }
57 |         }
58 | 
59 |         // We failed to find an IPv4 endpoint
60 |         DLOG_ERROR(
61 |             "eRPC: Failed to find an IPv4 endpoint to %s. Found %zu non-IPv4 "
62 |             "endpoints to %s though.\n",
63 |             rem_hostname.c_str(), results.size(), rem_hostname.c_str());
64 |         return SIZE_MAX;
65 |     }
66 | 
67 |     /// Maintain a all packets sent by this client
68 |     void enable_recording() { enable_recording_flag_ = true; }
69 | 
70 |    private:
71 |     asio::io_context io_context_;
72 |     std::unique_ptr<asio::ip::udp::resolver> resolver_;
73 |     std::unique_ptr<asio::ip::udp::socket> socket_;
74 | 
75 |     /// The list of all packets sent, maintained if recording is enabled
76 |     std::vector<T> sent_vec_;
77 |     bool enable_recording_flag_ = false;  /// Flag to enable recording for testing
78 | };                                        // namespace erpc
79 | 


--------------------------------------------------------------------------------
/src/include/udp_server.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #define ASIO_STANDLONE
 4 | #include <asio/ts/buffer.hpp>
 5 | #include <asio/ts/internet.hpp>
 6 | 
 7 | /// Basic UDP server class that supports receiving messages
 8 | template <class T>
 9 | class UDPServer {
10 |    public:
11 |     UDPServer(uint16_t port, size_t timeout_ms)
12 |         : timeout_ms_(timeout_ms),
13 |           socket_(new asio::ip::udp::socket(io_context_,
14 |                                             asio::ip::udp::endpoint(asio::ip::udp::v4(), port))) {}
15 | 
16 |     UDPServer() {}
17 |     UDPServer(const UDPServer &) = delete;
18 | 
19 |     ~UDPServer() {}
20 | 
21 |     size_t recv_blocking(T &msg) {
22 |         size_t ret = socket_->receive(asio::buffer(reinterpret_cast<void *>(&msg), sizeof(T)));
23 |         return ret;
24 |     }
25 | 
26 |    private:
27 |     size_t timeout_ms_;
28 |     asio::io_context io_context_;
29 |     std::unique_ptr<asio::ip::udp::socket> socket_;
30 | };
31 | 


--------------------------------------------------------------------------------
/src/include/utils.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <unistd.h>
  4 | 
  5 | #include <atomic>
  6 | #include <functional>
  7 | #include <future>
  8 | #include <queue>
  9 | #include <string>
 10 | #include <thread>
 11 | #include <vector>
 12 | 
 13 | #define CACHE_ALIGN __attribute__((aligned(cache_line_size)))
 14 | 
 15 | #define LIKELY __glibc_likely
 16 | #define UNLIKELY __glibc_unlikely
 17 | 
 18 | #ifdef NDEBUG
 19 | #define DEBUGY(cond) if (false)
 20 | #else
 21 | #define DEBUGY(cond) if (UNLIKELY(cond))
 22 | #endif  // NDEBUG
 23 | 
 24 | template <typename T>
 25 | using MaxHeap = std::priority_queue<T, std::vector<T>, std::less<T>>;
 26 | 
 27 | template <typename T>
 28 | using MinHeap = std::priority_queue<T, std::vector<T>, std::greater<T>>;
 29 | 
 30 | template <typename D>
 31 | constexpr D div_ceil(D x, uint64_t div) {
 32 |     return (x + div - 1) / div;
 33 | }
 34 | 
 35 | template <typename D>
 36 | D div_floor(D x, uint64_t div) {
 37 |     return x / div;
 38 | }
 39 | 
 40 | template <typename D>
 41 | D align_ceil(D x, uint64_t aligned) {
 42 |     return div_ceil(x, aligned) * aligned;
 43 | }
 44 | 
 45 | template <typename D>
 46 | D align_floor(D x, uint64_t aligned) {
 47 |     return div_floor(x, aligned) * aligned;
 48 | }
 49 | 
 50 | inline uint64_t rdtsc() { return __builtin_ia32_rdtsc(); }
 51 | 
 52 | void threadBindCore(int core_id);
 53 | uint64_t getMsTimestamp();
 54 | uint64_t getUsTimestamp();
 55 | uint64_t getNsTimestamp();
 56 | 
 57 | class IPv4String {
 58 |    public:
 59 |     IPv4String() = default;
 60 |     IPv4String(const std::string &ip);
 61 |     IPv4String(const IPv4String &ip) = default;
 62 |     IPv4String(IPv4String &&ip) = default;
 63 |     IPv4String &operator=(const std::string &ip);
 64 |     IPv4String &operator=(const IPv4String &ip) = default;
 65 |     IPv4String &operator=(IPv4String &&ip) = default;
 66 | 
 67 |     std::string get_string() const { return std::string(raw.ipstr); }
 68 | 
 69 |    private:
 70 |     struct {
 71 |         char ipstr[16];
 72 |     } raw;
 73 | };
 74 | 
 75 | struct NOCOPYABLE {
 76 |     NOCOPYABLE() = default;
 77 |     ~NOCOPYABLE() = default;
 78 |     NOCOPYABLE(const NOCOPYABLE &) = delete;
 79 |     NOCOPYABLE(NOCOPYABLE &&) = delete;
 80 |     NOCOPYABLE &operator=(const NOCOPYABLE &) = delete;
 81 |     NOCOPYABLE &operator=(NOCOPYABLE &&) = delete;
 82 | };
 83 | 
 84 | template <typename R, typename... Args>
 85 | struct function_traits_helper {
 86 |     static constexpr std::size_t count = sizeof...(Args);
 87 |     using result_type = R;
 88 |     using args_tuple_type = std::tuple<Args...>;
 89 |     template <std::size_t N>
 90 |     using args_type = typename std::tuple_element<N, std::tuple<Args...>>::type;
 91 | };
 92 | 
 93 | template <typename T>
 94 | struct function_traits;
 95 | template <typename R, typename... Args>
 96 | struct function_traits<R(Args...)> : public function_traits_helper<R, Args...> {};
 97 | template <typename R, typename... Args>
 98 | struct function_traits<R (*)(Args...)> : public function_traits_helper<R, Args...> {};
 99 | template <typename R, typename... Args>
100 | struct function_traits<R (&)(Args...)> : public function_traits_helper<R, Args...> {};
101 | 
102 | template <typename T>
103 | struct container_traits;
104 | 
105 | template <template <typename T> class C, typename T>
106 | struct container_traits<C<T>> {
107 |     using type = T;
108 | 
109 |     template <typename R>
110 |     using recontain_type = C<R>;
111 | };
112 | 
113 | struct atomic_po_val_t {
114 |     union {
115 |         struct {
116 |             uint32_t pos;
117 |             uint32_t cnt;
118 |         };
119 |         uint64_t raw;
120 |     };
121 | 
122 |     atomic_po_val_t load(std::memory_order __m = std::memory_order_seq_cst) const {
123 |         atomic_po_val_t o;
124 |         o.raw = __atomic_load_n(&raw, (int)__m);
125 |         return o;
126 |     }
127 | 
128 |     uint32_t fetch_add_cnt(uint32_t cnt_, std::memory_order __m = std::memory_order_seq_cst) {
129 |         return __atomic_fetch_add(&this->cnt, cnt_, (int)__m);
130 |     }
131 | 
132 |     uint32_t fetch_add_pos(uint32_t pos_, std::memory_order __m = std::memory_order_seq_cst) {
133 |         return __atomic_fetch_add(&this->pos, pos_, (int)__m);
134 |     }
135 | 
136 |     atomic_po_val_t fetch_add_both(uint32_t pos, uint32_t cnt,
137 |                                    std::memory_order __m = std::memory_order_seq_cst) {
138 |         atomic_po_val_t o;
139 |         o.cnt = cnt;
140 |         o.pos = pos;
141 |         o.raw = __atomic_fetch_add(&raw, o.raw, (int)__m);
142 |         return o;
143 |     }
144 | 
145 |     bool compare_exchange_weak(atomic_po_val_t &expected, atomic_po_val_t desired,
146 |                                std::memory_order __s = std::memory_order_seq_cst,
147 |                                std::memory_order __f = std::memory_order_seq_cst) {
148 |         return __atomic_compare_exchange_n(&raw, &expected.raw, desired.raw, true, (int)__s,
149 |                                            (int)__f);
150 |     }
151 | };
152 | 
153 | inline void __DEBUG_START_PERF() {
154 |     if (system(("sudo perf record -F 99 -g -p " + std::to_string(getpid()) + " &").c_str())) {
155 |     }
156 | }
157 | 
158 | inline void __RANDOM_SLEEP() { usleep(rand() % 10); }
159 | 


--------------------------------------------------------------------------------
/src/master.cc:
--------------------------------------------------------------------------------
  1 | #include <boost/fiber/operations.hpp>
  2 | #include <memory>
  3 | #include <type_traits>
  4 | 
  5 | #include "cmdline.h"
  6 | #include "common.hpp"
  7 | #include "eRPC/erpc.h"
  8 | #include "impl.hpp"
  9 | #include "log.hpp"
 10 | #include "proto/rpc_adaptor.hpp"
 11 | #include "proto/rpc_master.hpp"
 12 | #include "proto/rpc_register.hpp"
 13 | #include "rdma_rc.hpp"
 14 | 
 15 | void MasterContext::InitCluster() {
 16 |     m_cluster_manager.mac_id_allocator = std::make_unique<IDGenerator>();
 17 |     m_cluster_manager.mac_id_allocator->Expand(m_options.max_cluster_mac_num);
 18 |     IDGenerator::id_t id = m_cluster_manager.mac_id_allocator->Gen();
 19 |     DLOG_ASSERT(id == master_id, "Can't alloc master mac id");
 20 |     m_master_id = master_id;
 21 | 
 22 |     m_page_directory.page_id_allocator = std::make_unique<IDGenerator>();
 23 |     m_page_directory.page_id_allocator->Expand(1);
 24 |     id = m_page_directory.page_id_allocator->Gen();
 25 |     // Ensures that the page id is not 0, which ensures that the allocated GAddr is non-null
 26 |     DLOG_ASSERT(id == 0, "Can't init page id");
 27 | }
 28 | 
 29 | void MasterContext::InitRDMARC() {
 30 |     rdma_rc::RDMAEnv::init();
 31 | 
 32 |     rdma_rc::RDMAConnection::register_connect_hook([this](rdma_cm_id *cm_id, void *param_) {
 33 |         auto param = reinterpret_cast<RDMARCConnectParam *>(param_);
 34 |         auto conn_ = GetConnection(param->mac_id);
 35 |         switch (param->role) {
 36 |             case MN:
 37 |             case CN:
 38 |             case CXL_CN:
 39 |                 DLOG_FATAL("Not Support");
 40 |                 break;
 41 |             case DAEMON:
 42 |             case CXL_DAEMON: {
 43 |                 MasterToDaemonConnection *conn = dynamic_cast<MasterToDaemonConnection *>(conn_);
 44 |                 if (conn->rdma_conn == nullptr) {
 45 |                     conn->rdma_conn.reset(new rdma_rc::RDMAConnection());
 46 |                     conn->rdma_conn->m_conn_type_ = rdma_rc::RDMAConnection::SENDER;
 47 |                 }
 48 | 
 49 |                 rdma_rc::RDMAConnection *rdma_conn = conn->rdma_conn.get();
 50 |                 rdma_conn->m_cm_ids_.push_back(cm_id);
 51 |                 cm_id->context = rdma_conn;
 52 |                 rdma_rc::RDMAConnection::m_init_last_subconnection_(rdma_conn);
 53 | 
 54 |                 DLOG("[RDMA_RC] Get New Connect: %s",
 55 |                      conn->rdma_conn->get_peer_addr().first.c_str());
 56 |             } break;
 57 |         }
 58 |     });
 59 | 
 60 |     rdma_rc::RDMAConnection::register_disconnect_hook([](rdma_cm_id *cm_id) {
 61 |         rdma_rc::RDMAConnection *conn = static_cast<rdma_rc::RDMAConnection *>(cm_id->context);
 62 |         DLOG("[RDMA_RC] Disconnect: %s", conn->get_peer_addr().first.c_str());
 63 |     });
 64 | 
 65 |     m_listen_conn.listen(m_options.master_ip);
 66 | }
 67 | 
 68 | void MasterContext::InitRPCNexus() {
 69 |     std::string master_uri = erpc::concat_server_uri(m_options.master_ip, m_options.master_port);
 70 |     m_erpc_ctx.nexus = std::make_unique<erpc::NexusWrap>(master_uri);
 71 | 
 72 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::joinDaemon)::rpc_type,
 73 |                                         bind_erpc_func<true>(rpc_master::joinDaemon));
 74 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::joinClient)::rpc_type,
 75 |                                         bind_erpc_func<true>(rpc_master::joinClient));
 76 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::allocPage)::rpc_type,
 77 |                                         bind_erpc_func<false>(rpc_master::allocPage));
 78 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::freePage)::rpc_type,
 79 |                                         bind_erpc_func<false>(rpc_master::freePage));
 80 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::latchRemotePage)::rpc_type,
 81 |                                         bind_erpc_func<false>(rpc_master::latchRemotePage));
 82 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::unLatchRemotePage)::rpc_type,
 83 |                                         bind_erpc_func<false>(rpc_master::unLatchRemotePage));
 84 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::tryMigratePage)::rpc_type,
 85 |                                         bind_erpc_func<false>(rpc_master::tryMigratePage));
 86 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(rpc_master::MigratePageDone)::rpc_type,
 87 |                                         bind_erpc_func<false>(rpc_master::MigratePageDone));
 88 | 
 89 |     erpc::SMHandlerWrap smhw;
 90 |     smhw.set_empty();
 91 | 
 92 |     erpc::IBRpcWrap rpc(m_erpc_ctx.nexus.get(), &MasterContext::getInstance(), 0, smhw);
 93 |     m_erpc_ctx.rpc_set.push_back(std::move(rpc));
 94 |     DLOG_ASSERT(m_erpc_ctx.rpc_set.size() == 1);
 95 | }
 96 | 
 97 | void MasterContext::InitFiberPool() {
 98 |     boost::fibers::use_scheduling_algorithm<priority_scheduler>();
 99 |     m_fiber_pool_.AddFiber(m_options.prealloc_fiber_num);
100 | }
101 | 
102 | int main(int argc, char *argv[]) {
103 |     cmdline::parser cmd;
104 |     cmd.add<std::string>("master_ip");
105 |     cmd.add<uint16_t>("master_port");
106 |     bool ret = cmd.parse(argc, argv);
107 |     DLOG_ASSERT(ret);
108 | 
109 |     rcmp::MasterOptions options;
110 |     options.master_ip = cmd.get<std::string>("master_ip");
111 |     options.master_port = cmd.get<uint16_t>("master_port");
112 | 
113 |     MasterContext &master_context = MasterContext::getInstance();
114 |     master_context.m_options = options;
115 | 
116 |     master_context.InitCluster();
117 |     master_context.InitRPCNexus();
118 |     master_context.InitRDMARC();
119 |     master_context.InitFiberPool();
120 | 
121 |     DLOG("START OK");
122 | 
123 |     while (true) {
124 |         master_context.GetErpc().run_event_loop_once();
125 |         boost::this_fiber::yield();
126 |     }
127 | 
128 |     return 0;
129 | }


--------------------------------------------------------------------------------
/src/msg_queue.cc:
--------------------------------------------------------------------------------
  1 | #include "msg_queue.hpp"
  2 | 
  3 | #include <atomic>
  4 | #include <cstdint>
  5 | 
  6 | #include "config.hpp"
  7 | #include "log.hpp"
  8 | #include "utils.hpp"
  9 | 
 10 | namespace msgq {
 11 | 
 12 | msgq_handler_t MsgQueueNexus::__handlers[max_msgq_handler];
 13 | 
 14 | MsgQueueNexus::MsgQueueNexus(void* msgq_zone_start_addr)
 15 |     : m_msgq_zone_start_addr(msgq_zone_start_addr),
 16 |       m_public_msgq(reinterpret_cast<MsgQueue*>(msgq_zone_start_addr)) {}
 17 | 
 18 | void MsgQueueNexus::register_req_func(uint8_t rpc_type, msgq_handler_t handler) {
 19 |     __handlers[rpc_type] = handler;
 20 | }
 21 | 
 22 | MsgQueueRPC::MsgQueueRPC(MsgQueueNexus* nexus, MsgQueue* send_queue, MsgQueue* recv_queue,
 23 |                          void* ctx)
 24 |     : m_nexus(nexus), m_ctx(ctx), m_recv_queue(recv_queue), m_send_queue(send_queue) {}
 25 | 
 26 | #if MSGQ_SINGLE_FIFO_ON == 1
 27 | 
 28 | size_t MsgBuffer::size() const { return m_size; }
 29 | void* MsgBuffer::get_buf() const { return m_msg->data; }
 30 | 
 31 | MsgBuffer MsgQueueRPC::alloc_msg_buffer(size_t size) {
 32 |     MsgBuffer buf;
 33 |     buf.m_size = size;
 34 |     buf.m_q = m_send_queue;
 35 |     buf.m_msg = m_send_queue->alloc_msg_buffer(size);
 36 |     return buf;
 37 | }
 38 | 
 39 | void MsgQueueRPC::free_msg_buffer(MsgBuffer& msg_buf) { msg_buf.m_q->free_msg_buffer(); }
 40 | 
 41 | void MsgQueueRPC::enqueue_request(uint8_t rpc_type, MsgBuffer& msg_buf, msgq_callback_t cb,
 42 |                                   void* arg) {
 43 |     msg_buf.m_msg->msg_type = MsgHeader::REQ;
 44 |     msg_buf.m_msg->cb = cb;
 45 |     msg_buf.m_msg->arg = arg;
 46 |     msg_buf.m_msg->rpc_type = rpc_type;
 47 |     m_send_queue->enqueue_msg();
 48 | }
 49 | 
 50 | void MsgQueueRPC::enqueue_response(MsgBuffer& req_buf, MsgBuffer& resp_buf) {
 51 |     resp_buf.m_msg->msg_type = MsgHeader::RESP;
 52 |     resp_buf.m_msg->cb = req_buf.m_msg->cb;
 53 |     resp_buf.m_msg->arg = req_buf.m_msg->arg;
 54 |     resp_buf.m_q->enqueue_msg();
 55 | }
 56 | 
 57 | void MsgQueueRPC::run_event_loop_once() {
 58 |     std::vector<MsgHeader*> hv;
 59 |     m_recv_queue->dequeue_msg(hv);
 60 |     for (auto& h : hv) {
 61 |         MsgBuffer buf;
 62 |         buf.m_q = m_recv_queue;
 63 |         buf.m_msg = h;
 64 |         buf.m_size = h->size;
 65 |         if (h->msg_type == MsgHeader::REQ) {
 66 |             MsgQueueNexus::__handlers[h->rpc_type](buf, m_ctx);
 67 |         } else {
 68 |             h->cb(buf, h->arg);
 69 |         }
 70 |     }
 71 | }
 72 | 
 73 | MsgQueue::MsgQueue() {
 74 |     m_prod_head.raw = 0;
 75 |     m_prod_tail.raw = 0;
 76 |     m_cons_head.raw = 0;
 77 |     m_cons_tail.raw = 0;
 78 | }
 79 | 
 80 | MsgHeader* MsgQueue::alloc_msg_buffer(size_t size) {
 81 |     MsgHeader *h, *inv_h;
 82 |     size_t inv_s;
 83 |     atomic_po_val_t oh, nh;
 84 | 
 85 | retry:
 86 |     oh = m_prod_head.load(std::memory_order_acquire);
 87 |     do {
 88 |         inv_h = nullptr;
 89 |         nh.cnt = oh.cnt + 1;
 90 |         nh.pos = oh.pos + size + sizeof(MsgHeader);
 91 | 
 92 |         /**
 93 |          * @brief out of ring bound
 94 |          *
 95 |          *       oh   ring_bound   nh
 96 |          *        |       |         |
 97 |          * [ring          ]...........
 98 |          *         \--------v------/
 99 |          *              msg_size
100 |          *
101 |          * after enqueue:
102 |          *
103 |          *        oh  ring_bound          nh%LEN
104 |          *        |      |                  |
105 |          *        Fxxxxxx][
106 |          *           inv  \--------v-------/
107 |          *                      msg_size
108 |          */
109 | 
110 |         if (UNLIKELY(nh.pos % SZ != 0 && div_floor(oh.pos, SZ) != div_floor(nh.pos, SZ))) {
111 |             // invalid tail
112 |             inv_s = align_ceil(oh.pos, SZ) - oh.pos;
113 |             nh.pos += inv_s;
114 |             inv_h = at(oh.pos);
115 |             h = at(SZ);
116 |         } else {
117 |             h = at(oh.pos);
118 |         }
119 |         if (UNLIKELY(nh.pos - m_cons_tail.pos > SZ)) {
120 |             goto retry;
121 |         }
122 |     } while (!m_prod_head.compare_exchange_weak(oh, nh, std::memory_order_acquire,
123 |                                                 std::memory_order_acquire));
124 | 
125 |     if (UNLIKELY(inv_h != nullptr)) {
126 |         inv_h->invalid_flag = true;
127 |     }
128 |     h->invalid_flag = false;
129 |     h->size = size;
130 |     return h;
131 | }
132 | 
133 | void MsgQueue::enqueue_msg() { update_ht(&m_prod_head, &m_prod_tail); }
134 | 
135 | void MsgQueue::dequeue_msg(std::vector<MsgHeader*>& hv) {
136 |     size_t s;
137 |     atomic_po_val_t ot, nt;
138 |     hv.clear();
139 |     ot = m_cons_head.load(std::memory_order_acquire);
140 |     do {
141 |         s = m_prod_tail.pos - ot.pos;
142 |         if (s == 0) {
143 |             return;
144 |         }
145 |         nt.cnt = ot.cnt + 1;
146 |         nt.pos = ot.pos + s;
147 |     } while (UNLIKELY(!m_cons_head.compare_exchange_weak(ot, nt, std::memory_order_acquire,
148 |                                                          std::memory_order_acquire)));
149 | 
150 |     size_t tmp_s = 0;
151 |     do {
152 |         MsgHeader* h = at(ot.pos + tmp_s);
153 |         if (UNLIKELY(h->invalid_flag)) {
154 |             tmp_s += SZ - (reinterpret_cast<uint8_t*>(h) - m_ring);
155 |         } else {
156 |             hv.push_back(h);
157 |             tmp_s += h->size + sizeof(MsgHeader);
158 |         }
159 |     } while (tmp_s != s);
160 | 
161 |     m_cons_head.fetch_add_cnt(hv.size() - 1, std::memory_order_acquire);
162 | }
163 | 
164 | void MsgQueue::free_msg_buffer() { update_ht(&m_cons_head, &m_cons_tail); }
165 | 
166 | MsgHeader* MsgQueue::at(size_t i) { return reinterpret_cast<MsgHeader*>(m_ring + (i % SZ)); }
167 | 
168 | void MsgQueue::update_ht(atomic_po_val_t* ht, atomic_po_val_t* ht_) {
169 |     atomic_po_val_t h, oh, nh;
170 |     oh = ht_->load(std::memory_order_acquire);
171 |     do {
172 |         h = ht->load(std::memory_order_relaxed);
173 |         nh.raw = oh.raw;
174 |         if ((++nh.cnt) == h.cnt) {
175 |             nh.pos = h.pos;
176 |         }
177 |     } while (UNLIKELY(
178 |         !ht_->compare_exchange_weak(oh, nh, std::memory_order_release, std::memory_order_acquire)));
179 | }
180 | 
181 | #else
182 | 
183 | size_t MsgBuffer::size() const { return m_msg.size; }
184 | 
185 | void* MsgBuffer::get_buf() const {
186 |     return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(m_q->m_ra.base()) +
187 |                                    m_msg.buf_offset);
188 | }
189 | 
190 | MsgBuffer MsgQueueRPC::alloc_msg_buffer(size_t size) {
191 |     MsgBuffer buf;
192 |     buf.m_q = m_send_queue;
193 |     buf.m_msg.size = size;
194 |     buf.m_msg.buf_offset = m_send_queue->alloc_msg_buffer(size);
195 |     return buf;
196 | }
197 | 
198 | void MsgQueueRPC::free_msg_buffer(MsgBuffer& msg_buf) { msg_buf.m_q->free_msg_buffer(msg_buf); }
199 | 
200 | void MsgQueueRPC::enqueue_request(uint8_t rpc_type, MsgBuffer& msg_buf, msgq_callback_t cb,
201 |                                   void* arg) {
202 |     msg_buf.m_msg.msg_type = MsgHeader::REQ;
203 |     msg_buf.m_msg.cb = cb;
204 |     msg_buf.m_msg.arg = arg;
205 |     msg_buf.m_msg.rpc_type = rpc_type;
206 |     m_nexus->m_stats.start_sample(msg_buf.m_msg.send_ts);
207 |     m_send_queue->enqueue_msg(msg_buf);
208 | }
209 | 
210 | void MsgQueueRPC::enqueue_response(MsgBuffer& req_buf, MsgBuffer& resp_buf) {
211 |     resp_buf.m_msg.msg_type = MsgHeader::RESP;
212 |     resp_buf.m_msg.cb = req_buf.m_msg.cb;
213 |     resp_buf.m_msg.arg = req_buf.m_msg.arg;
214 |     m_nexus->m_stats.start_sample(resp_buf.m_msg.send_ts);
215 |     resp_buf.m_q->enqueue_msg(resp_buf);
216 | }
217 | 
218 | void MsgQueueRPC::run_event_loop_once() {
219 |     MsgHeader hv[64];
220 |     uint32_t s = m_recv_queue->dequeue_msg(hv, 64);
221 |     for (uint32_t i = 0; i < s; ++i) {
222 |         MsgHeader& h = hv[i];
223 |         MsgBuffer buf;
224 |         buf.m_q = m_recv_queue;
225 |         buf.m_msg = h;
226 | 
227 |         if (h.msg_type == MsgHeader::REQ) {
228 |             m_nexus->m_stats.send_sample(h.size, h.send_ts);
229 |             MsgQueueNexus::__handlers[h.rpc_type](buf, m_ctx);
230 |         } else {
231 |             m_nexus->m_stats.recv_sample(h.size, h.send_ts);
232 |             h.cb(buf, h.arg);
233 |         }
234 |     }
235 | }
236 | 
237 | offset_t MsgQueue::alloc_msg_buffer(size_t size) {
238 |     void* ptr;
239 |     bool noticed = false;
240 |     do {
241 |         ptr = m_ra.allocate(size);
242 |         if (!ptr) {
243 |             if (!noticed) {
244 |                 DLOG_WARNING("msg queue full");
245 |                 noticed = true;
246 |             }
247 |             boost::this_fiber::yield();
248 |             std::this_thread::yield();
249 |         }
250 |     } while (ptr == nullptr);
251 |     return reinterpret_cast<uintptr_t>(ptr) - reinterpret_cast<uintptr_t>(m_ra.base());
252 | }
253 | 
254 | void MsgQueue::enqueue_msg(MsgBuffer& msg_buf) { msgq_q.ForceEnqueue(msg_buf.m_msg); }
255 | 
256 | uint32_t MsgQueue::dequeue_msg(MsgHeader* hv, size_t max_deq) {
257 |     return msgq_q.TryDequeue(hv, hv + max_deq);
258 | }
259 | 
260 | void MsgQueue::free_msg_buffer(MsgBuffer& msg_buf) {
261 |     m_ra.deallocate(msg_buf.get_buf(), msg_buf.size());
262 | }
263 | 
264 | #endif  // MSGQ_SINGLE_FIFO_ON
265 | 
266 | }  // namespace msgq
267 | 
268 | msgq::MsgQueue* MsgQueueManager::allocQueue() {
269 |     uintptr_t ring_off = msgq_allocator->allocate(1);
270 |     DLOG_ASSERT(ring_off != -1, "Can't alloc msg queue");
271 |     msgq::MsgQueue* r =
272 |         reinterpret_cast<msgq::MsgQueue*>(reinterpret_cast<uintptr_t>(start_addr) + ring_off);
273 |     new (r) msgq::MsgQueue();
274 |     ring_cnt++;
275 |     return r;
276 | }
277 | 
278 | void MsgQueueManager::freeQueue(msgq::MsgQueue* msgq) { DLOG_FATAL("Not Support"); }


--------------------------------------------------------------------------------
/src/page_table.cc:
--------------------------------------------------------------------------------
  1 | #include "page_table.hpp"
  2 | 
  3 | #include "impl.hpp"
  4 | 
  5 | PageRackMetadata *PageDirectory::FindPage(page_id_t page_id) {
  6 |     auto it = table.find(page_id);
  7 |     if (it == table.end()) {
  8 |         return nullptr;
  9 |     }
 10 |     return it->second;
 11 | }
 12 | 
 13 | PageRackMetadata *PageDirectory::AddPage(RackMacTable *rack_table, page_id_t page_id) {
 14 |     PageRackMetadata *page_meta = new PageRackMetadata();
 15 |     page_meta->rack_id = rack_table->daemon_connect->rack_id;
 16 |     page_meta->daemon_id = rack_table->daemon_connect->daemon_id;
 17 |     table.insert(page_id, page_meta);
 18 |     rack_table->current_allocated_page_num++;
 19 | 
 20 |     // DLOG("Add page %lu --> rack %u", page_id, page_meta->rack_id);
 21 | 
 22 |     return page_meta;
 23 | }
 24 | 
 25 | void PageDirectory::RemovePage(RackMacTable *rack_table, page_id_t page_id) {
 26 |     auto it = table.find(page_id);
 27 |     PageRackMetadata *page_meta = it->second;
 28 |     table.erase(it);
 29 | 
 30 |     // DLOG("Del page %lu --> rack %u", page_id, page_meta->rack_id);
 31 | 
 32 |     delete page_meta;
 33 |     rack_table->current_allocated_page_num--;
 34 | }
 35 | 
 36 | void PageTableManager::EraseRemotePageRefMeta(PageMetadata *page_meta) {
 37 |     std::unique_lock<CortMutex> page_remote_ref_lock(page_meta->remote_ref_lock);
 38 |     if (page_meta->remote_ref_meta) {
 39 |         delete page_meta->remote_ref_meta;
 40 |         page_meta->remote_ref_meta = nullptr;
 41 |     }
 42 | }
 43 | 
 44 | PageVMMapMetadata *PageTableManager::AllocPageMemory() {
 45 |     DLOG_ASSERT(TestAllocPageMemory(), "Can't allocate more page memory");
 46 | 
 47 |     offset_t cxl_memory_offset = page_allocator->allocate(1);
 48 |     DLOG_ASSERT(cxl_memory_offset != -1, "Can't allocate cxl memory");
 49 | 
 50 |     PageVMMapMetadata *page_vm_meta = new PageVMMapMetadata();
 51 |     page_vm_meta->cxl_memory_offset = cxl_memory_offset;
 52 | 
 53 |     return page_vm_meta;
 54 | }
 55 | 
 56 | void PageTableManager::FreePageMemory(PageVMMapMetadata *page_vm_meta) {
 57 |     page_allocator->deallocate(page_vm_meta->cxl_memory_offset, 1);
 58 |     current_used_page_num--;
 59 |     delete page_vm_meta;
 60 | }
 61 | 
 62 | void PageTableManager::ApplyPageMemory(PageMetadata *page_meta, PageVMMapMetadata *page_vm_meta) {
 63 |     DLOG_ASSERT(page_meta->vm_meta == nullptr, "Can't cover existed page vm meta");
 64 |     page_meta->vm_meta = page_vm_meta;
 65 |     current_used_page_num++;
 66 | }
 67 | 
 68 | void PageTableManager::CancelPageMemory(PageMetadata *page_meta) {
 69 |     auto tmp = page_meta->vm_meta;
 70 |     page_meta->vm_meta = nullptr;
 71 |     FreePageMemory(tmp);
 72 | }
 73 | 
 74 | bool PageTableManager::PickUnvisitPage(page_id_t &page_id, PageMetadata *&page_meta) {
 75 |     while (!unvisited_pages.empty()) {
 76 |         auto p = unvisited_pages.front();
 77 |         unvisited_pages.pop();
 78 |         if (p.second->page_ref_lock.try_lock()) {
 79 |             if (p.second->vm_meta != nullptr && p.second->vm_meta->ref_client.empty()) {
 80 |                 page_id = p.first;
 81 |                 page_meta = p.second;
 82 |                 return true;
 83 |             } else {
 84 |                 p.second->page_ref_lock.unlock();
 85 |             }
 86 |         }
 87 |     }
 88 |     return false;
 89 | }
 90 | 
 91 | std::vector<std::pair<page_id_t, PageMetadata *>> PageTableManager::RandomPickVMPage(size_t n) {
 92 |     thread_local std::mt19937 eng(rand());
 93 |     return table.getRandomN(eng, n, [](const std::pair<page_id_t, PageMetadata *> p) {
 94 |         return p.second->vm_meta != nullptr;
 95 |     });
 96 | }
 97 | 
 98 | PageCacheTable::~PageCacheTable() {
 99 |     for (auto &p : table) {
100 |         if (p.second->cache != nullptr) {
101 |             delete p.second->cache;
102 |         }
103 |         delete p.second;
104 |     }
105 | }
106 | 
107 | PageCacheMeta *PageCacheTable::FindOrCreateCacheMeta(page_id_t page_id) {
108 |     auto it = table.find(page_id);
109 |     if (it == table.end()) {
110 |         it = table.insert({page_id, new PageCacheMeta()}).first;
111 |     }
112 |     return it->second;
113 | }
114 | 
115 | LocalPageCache *PageCacheTable::FindCache(page_id_t page_id) {
116 |     auto it = table.find(page_id);
117 |     if (it == table.end()) {
118 |         return nullptr;
119 |     }
120 |     return it->second->cache;
121 | }
122 | 
123 | LocalPageCache *PageCacheTable::FindCache(PageCacheMeta *cache_meta) const {
124 |     return cache_meta->cache;
125 | }
126 | 
127 | LocalPageCache *PageCacheTable::AddCache(PageCacheMeta *cache_meta, offset_t offset) {
128 |     cache_meta->cache = new LocalPageCache();
129 |     cache_meta->cache->offset = offset;
130 |     return cache_meta->cache;
131 | }
132 | 
133 | void PageCacheTable::RemoveCache(PageCacheMeta *cache_meta) {
134 |     delete cache_meta->cache;
135 |     cache_meta->cache = nullptr;
136 | }
137 | 
138 | void PageThreadCacheManager::insert(PageThreadLocalCache *tcache) {
139 |     std::unique_lock<std::shared_mutex> lck(mutex_);
140 |     tcache_list_.push_back(tcache);
141 | }
142 | 
143 | void PageThreadCacheManager::erase(PageThreadLocalCache *tcache) {
144 |     std::unique_lock<std::shared_mutex> lck(mutex_);
145 |     auto it = std::find(tcache_list_.begin(), tcache_list_.end(), tcache);
146 |     tcache_list_.erase(it);
147 | }


--------------------------------------------------------------------------------
/src/proto/rpc_client.cc:
--------------------------------------------------------------------------------
 1 | #include "proto/rpc_client.hpp"
 2 | 
 3 | #include <mutex>
 4 | 
 5 | #include "lock.hpp"
 6 | 
 7 | namespace rpc_client {
 8 | 
 9 | void getCurrentWriteData(ClientContext& client_context, ClientToDaemonConnection& daemon_connection,
10 |                          GetCurrentWriteDataRequest& req,
11 |                          ResponseHandle<GetCurrentWriteDataReply>& resp_handle) {
12 |     resp_handle.Init(req.dio_write_size);
13 |     auto& reply = resp_handle.Get();
14 | 
15 |     memcpy(reply.data, req.dio_write_buf, req.dio_write_size);
16 | }
17 | 
18 | void getPagePastAccessFreq(ClientContext& client_context,
19 |                            ClientToDaemonConnection& daemon_connection,
20 |                            GetPagePastAccessFreqRequest& req,
21 |                            ResponseHandle<GetPagePastAccessFreqReply>& resp_handle) {
22 |     page_id_t coldest_page = invalid_page_id;
23 |     float coldest_heat = MAXFLOAT;
24 |     float sum_heat = 0, num_found = 0;
25 | 
26 |     for (int i = 0; i < req.num_detect_pages; i++) {
27 |         // Getting the access heat of a sampled page
28 |         bool found = false;
29 |         FreqStats::Heatness page_heat;
30 |         client_context.m_tcache_mgr.foreach_all([&](PageThreadLocalCache& tcache) {
31 |             auto cache = tcache.page_cache_table.FindCache(req.pages[i]);
32 |             if (cache) {
33 |                 page_heat = page_heat + cache->Heat();
34 |                 found = true;
35 |             }
36 |         });
37 | 
38 |         if (found) {
39 |             ++num_found;
40 |             sum_heat += page_heat.last_heat;
41 |             // Get the coldest page
42 |             if (coldest_heat > page_heat.last_heat) {
43 |                 coldest_page = req.pages[i];
44 |                 coldest_heat = page_heat.last_heat;
45 |             }
46 |         }
47 |     }
48 | 
49 |     // calc the average heat of sampled pages
50 |     float avg_heat = sum_heat / (num_found + 1e-9);
51 | 
52 |     // DLOG("CN: %u getPagePastAccessFreq: finished.", client_context.m_client_id);
53 |     resp_handle.Init();
54 |     auto& reply = resp_handle.Get();
55 |     reply.coldest_page_id = coldest_page;
56 |     reply.coldest_page_heat = coldest_heat;
57 |     reply.avg_heat = avg_heat;
58 | }
59 | 
60 | void removePageCache(ClientContext& client_context, ClientToDaemonConnection& daemon_connection,
61 |                      RemovePageCacheRequest& req,
62 |                      ResponseHandle<RemovePageCacheReply>& resp_handle) {
63 |     client_context.m_tcache_mgr.foreach_all([&](PageThreadLocalCache& tcache) {
64 |         auto page_cache_meta = tcache.page_cache_table.FindOrCreateCacheMeta(req.page_id);
65 |         auto page_cache = tcache.page_cache_table.FindCache(page_cache_meta);
66 |         if (page_cache == nullptr) {
67 |             return;
68 |         }
69 | 
70 |         std::unique_lock<Mutex> cache_lock(page_cache_meta->ref_lock);
71 | 
72 |         tcache.page_cache_table.RemoveCache(page_cache_meta);
73 |     });
74 | 
75 |     // DLOG("CN %u: Del page %lu cache.", client_context.m_client_id, req.page_id);
76 | 
77 |     resp_handle.Init();
78 |     auto& reply = resp_handle.Get();
79 |     reply.ret = true;
80 | }
81 | 
82 | }  // namespace rpc_client
83 | 


--------------------------------------------------------------------------------
/src/stats.cc:
--------------------------------------------------------------------------------
  1 | #include "stats.hpp"
  2 | 
  3 | #include <algorithm>
  4 | #include <vector>
  5 | 
  6 | Histogram::Histogram(int numBuckets, double minValue, double maxValue)
  7 |     : m_numBuckets(numBuckets),
  8 |       m_minValue(minValue),
  9 |       m_maxValue(maxValue),
 10 |       m_bucketWidth((maxValue - minValue) / numBuckets),
 11 |       m_buckets(numBuckets, 0) {}
 12 | 
 13 | void Histogram::addValue(double value) {
 14 |     if (value < m_minValue || value > m_maxValue) {
 15 |         return;
 16 |     }
 17 | 
 18 |     int bucket = getBucket(value);
 19 |     ++m_buckets[bucket];
 20 | }
 21 | 
 22 | void Histogram::clear() { std::fill(m_buckets.begin(), m_buckets.end(), 0); }
 23 | 
 24 | int Histogram::getBucketCount() const { return m_numBuckets; }
 25 | 
 26 | double Histogram::getBucketValue(int bucket) const {
 27 |     if (bucket < 0 || bucket >= m_numBuckets) {
 28 |         // Invalid bucket index
 29 |         return -1;
 30 |     }
 31 | 
 32 |     return m_minValue + m_bucketWidth * bucket;
 33 | }
 34 | 
 35 | int Histogram::getBucketCount(int bucket) const {
 36 |     if (bucket < 0 || bucket >= m_numBuckets) {
 37 |         // Invalid bucket index
 38 |         return -1;
 39 |     }
 40 | 
 41 |     return m_buckets[bucket];
 42 | }
 43 | 
 44 | int Histogram::getTotalCount() const {
 45 |     int totalCount = 0;
 46 |     for (int count : m_buckets) {
 47 |         totalCount += count;
 48 |     }
 49 |     return totalCount;
 50 | }
 51 | 
 52 | double Histogram::getPercentile(double percentile) const {
 53 |     return getBucketValue(getPercentileBucket(percentile));
 54 | }
 55 | 
 56 | int Histogram::getPercentileBucket(double percentile) const {
 57 |     if (percentile < 0 || percentile > 100) {
 58 |         // Invalid percentile value
 59 |         return -1;
 60 |     }
 61 | 
 62 |     int totalCount = getTotalCount();
 63 |     int countSoFar = 0;
 64 |     for (int i = 0; i < m_numBuckets; ++i) {
 65 |         countSoFar += m_buckets[i];
 66 |         if (countSoFar / (double)totalCount * 100 >= percentile) {
 67 |             return i;
 68 |         }
 69 |     }
 70 | 
 71 |     // Should not reach here
 72 |     return -1;
 73 | }
 74 | 
 75 | double Histogram::getAverage() const {
 76 |     double S = 0;
 77 |     for (int i = 0; i < m_numBuckets; ++i) {
 78 |         S += getBucketValue(i) * getBucketCount(i);
 79 |     }
 80 |     return S / getTotalCount();
 81 | }
 82 | 
 83 | int Histogram::getBucket(double value) const { return (value - m_minValue) / m_bucketWidth; }
 84 | 
 85 | Histogram Histogram::merge(Histogram &other) {
 86 |     Histogram nh(std::max(m_numBuckets, other.m_numBuckets), std::min(m_minValue, other.m_maxValue),
 87 |                  std::max(m_maxValue, other.m_maxValue));
 88 |     for (int i = 0; i < m_numBuckets; ++i) {
 89 |         nh.m_buckets[nh.getBucket(getBucketValue(i))] += getBucketCount(i);
 90 |     }
 91 |     for (int i = 0; i < other.m_numBuckets; ++i) {
 92 |         nh.m_buckets[nh.getBucket(other.getBucketValue(i))] += other.getBucketCount(i);
 93 |     }
 94 |     return nh;
 95 | }
 96 | 
 97 | Mutex FreqStats::m_exp_decays_lck;
 98 | std::vector<float> FreqStats::m_exp_decays;
 99 | 
100 | FreqStats::Heatness FreqStats::add_wr(uint64_t t) {
101 |     m_wr_heat = m_wr_heat + Heatness::one(t);
102 |     return m_wr_heat;
103 | }
104 | 
105 | FreqStats::Heatness FreqStats::add_rd(uint64_t t) {
106 |     m_rd_heat = m_rd_heat + Heatness::one(t);
107 |     return m_rd_heat;
108 | }
109 | 
110 | void FreqStats::init_exp_decays(float half_life_us) {
111 |     float m_lambda = 0.693147180559945309417232121458176568 /* ln 2*/ / half_life_us;
112 | 
113 |     for (int i = m_exp_decays.size(); i < half_life_us * 4; ++i) {
114 |         m_exp_decays.push_back(exp(-m_lambda * i));
115 |     }
116 | }
117 | 
118 | void FreqStats::clear() {
119 |     m_wr_heat.clear();
120 |     m_wr_heat.clear();
121 | }
122 | 
123 | FreqStats::Heatness FreqStats::Heatness::one(uint64_t t) {
124 |     Heatness h;
125 |     h.last_time = t;
126 |     h.last_heat = 1;
127 |     return h;
128 | }
129 | 
130 | FreqStats::Heatness FreqStats::Heatness::heat(uint64_t t) const {
131 |     Heatness h;
132 |     h.last_time = t;
133 |     uint64_t delta = t - last_time;
134 |     if (t >= FreqStats::m_exp_decays.size()) {
135 |         h.last_heat = last_heat * std::pow(FreqStats::m_exp_decays[1], delta);
136 |     } else {
137 |         h.last_heat = last_heat * FreqStats::m_exp_decays[delta];
138 |     }
139 |     return h;
140 | }
141 | 
142 | void FreqStats::Heatness::clear() {
143 |     last_time = 0;
144 |     last_heat = 0;
145 | }
146 | 
147 | FreqStats::Heatness FreqStats::Heatness::operator+(const FreqStats::Heatness &b) const {
148 |     FreqStats::Heatness h;
149 |     h.last_time = std::max(last_time, b.last_time);
150 |     h.last_heat = heat(h.last_time).last_heat + b.heat(h.last_time).last_heat;
151 |     return h;
152 | }


--------------------------------------------------------------------------------
/src/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(add_test TEST_FILE)
 2 |   get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
 3 |   add_executable(${TEST_NAME} ${TEST_FILE})
 4 |   target_link_libraries(${TEST_NAME}
 5 |   base
 6 |   )
 7 |   message(STATUS "[ADD TEST] : ${TEST_NAME}")
 8 | endfunction()
 9 | 
10 | file(GLOB_RECURSE test_files *_test.cc)
11 | 
12 | foreach(test_file ${test_files})
13 |   add_test(${test_file})
14 | endforeach(test_file)
15 | 


--------------------------------------------------------------------------------
/src/test/conqueue_test.cc:
--------------------------------------------------------------------------------
 1 | #include <cassert>
 2 | #include <iostream>
 3 | #include <thread>
 4 | #include <vector>
 5 | 
 6 | #include "concurrent_queue.hpp"
 7 | 
 8 | using namespace std;
 9 | 
10 | using CQ =
11 |     ConcurrentQueue<int, 4096, ConcurrentQueueProducerMode::MP, ConcurrentQueueConsumerMode::MC>;
12 | 
13 | int main() {
14 |     vector<thread> vs;
15 |     CQ q;
16 | 
17 |     const int PT = 8;
18 |     const int CT = 8;
19 |     const int IT = 1000000;
20 | 
21 |     for (int i = 0; i < PT; ++i) {
22 |         vs.emplace_back([&q]() {
23 |             for (int j = 0; j < IT; ++j) {
24 |                 q.ForceEnqueue(j);
25 |             }
26 |         });
27 |     }
28 | 
29 |     bool over = false;
30 |     vector<uint64_t> sum(CT, 0);
31 |     for (int i = 0; i < CT; ++i) {
32 |         vs.emplace_back([i, &q, &over, &sum]() {
33 |             int n;
34 |             while (!over) {
35 |                 if (q.TryDequeue(&n)) sum[i] += n;
36 |             }
37 |             while (q.TryDequeue(&n)) {
38 |                 sum[i] += n;
39 |             }
40 |         });
41 |     }
42 | 
43 |     uint64_t _s = getUsTimestamp();
44 | 
45 |     for (int i = 0; i < PT; ++i) {
46 |         vs[i].join();
47 |     }
48 | 
49 |     over = true;
50 | 
51 |     for (int i = PT; i < PT + CT; ++i) {
52 |         vs[i].join();
53 |     }
54 | 
55 |     uint64_t S = 0;
56 |     for (auto n : sum) {
57 |         S += n;
58 |     }
59 | 
60 |     assert(S == 1ul * PT * (0 + IT - 1) * IT / 2);
61 | 
62 |     cout << getUsTimestamp() - _s << endl;
63 | 
64 |     return 0;
65 | }


--------------------------------------------------------------------------------
/src/test/erpc/client_sta_test.cc:
--------------------------------------------------------------------------------
  1 | #include <cassert>
  2 | #include <cstdint>
  3 | #include <thread>
  4 | 
  5 | #include "cmdline.h"
  6 | #include "common_sta.h"
  7 | #include "rdma_rc.hpp"
  8 | 
  9 | using namespace erpc;
 10 | 
 11 | #if ONESIDE_EXT == 1
 12 | rdma_rc::RDMAConnection conn;
 13 | uintptr_t server_mr_addr;
 14 | uint32_t server_mr_rkey;
 15 | uintptr_t mr_addr = 0;
 16 | uint32_t mr_lkey = 0;
 17 | #endif
 18 | 
 19 | void a_plus_b(void *_c, void *_tag) {
 20 |     auto c = reinterpret_cast<AppContext *>(_c);
 21 |     auto tag = reinterpret_cast<size_t>(_tag);
 22 | 
 23 |     auto req_raw = c->reqs[tag];
 24 |     auto resp_raw = c->resps[tag];
 25 | 
 26 |     auto resp = reinterpret_cast<APlusBResp *>(resp_raw.get_buf());
 27 |     auto req = reinterpret_cast<APlusBReq *>(req_raw.get_buf());
 28 |     if (req->a + req->b != resp->result) {
 29 |         printf("error %d + %d != %d\n", req->a, req->b, resp->result);
 30 |         abort();
 31 |         // ERPC_ERROR("error %d + %d != %d", req->a, req->b, resp->result);
 32 |     }
 33 |     c->cnt--;
 34 | }
 35 | 
 36 | void cb(void *_c, void *_tag) {
 37 |     auto c = reinterpret_cast<AppContext *>(_c);
 38 |     c->cnt--;
 39 | }
 40 | 
 41 | void thread_func(Params param, erpc::NexusWrap *nexus, Stat *stat, int thread_idx) {
 42 |     AppContext ctx;
 43 |     FastRand rd;
 44 | 
 45 |     size_t resp_s = param.payload;
 46 | #if ONESIDE_EXT == 1
 47 |     resp_s = 8;
 48 |     std::vector<rdma_rc::SgeWr> sge_wrs;
 49 | #endif
 50 | 
 51 |     erpc::SMHandlerWrap smhw;
 52 |     smhw.set_empty();
 53 |     auto rpc = new erpc::IBRpcWrap(nexus, &ctx, thread_idx, smhw);
 54 | 
 55 |     std::string server_uri = kServerHostname + ":" + std::to_string(kServerUDPPort);
 56 |     std::vector<int> sess_vec;
 57 |     for (size_t i = 0; i < param.sess_num; i++) {
 58 |         int session_num = rpc->create_session(server_uri, 0);
 59 | 
 60 |         sess_vec.push_back(session_num);
 61 |     }
 62 | 
 63 |     ctx.reqs.reserve(param.op_per_thread);
 64 |     ctx.resps.reserve(param.op_per_thread);
 65 |     ctx.cnt = param.op_per_thread;
 66 |     ChronoTimer timer;
 67 | 
 68 |     auto st = timer.get_us();
 69 |     for (size_t i = 0; i < param.op_per_thread; i += param.batch_size) {
 70 |         ctx.cnt = param.batch_size;
 71 |         // for (size_t j = 0; j < param.batch_size; j++) {
 72 |         //     auto req = rpc->alloc_msg_buffer_or_die(64);
 73 |         //     auto resp = rpc->alloc_msg_buffer_or_die(resp_s);
 74 |         //     SimpleDataReq *r = (SimpleDataReq *)req.get_buf();
 75 |         //     r->s = resp_s;
 76 |         //     ctx.reqs.push_back(req);
 77 |         //     ctx.resps.push_back(resp);
 78 |         //     rpc->enqueue_request(sess_vec[rd.next_u32() % param.sess_num], kSimpleReqType,
 79 |         //                          ctx.reqs[i + j], ctx.resps[i + j], cb,
 80 |         //                          reinterpret_cast<void *>(i + j));
 81 |         // }
 82 |         // while (ctx.cnt != 0) rpc->run_event_loop_once();
 83 | 
 84 |         // for (size_t j = 0; j < param.batch_size; j++) {
 85 |         //     rpc->free_msg_buffer(ctx.reqs[i + j]);
 86 |         //     rpc->free_msg_buffer(ctx.resps[i + j]);
 87 |         // }
 88 | 
 89 | #if ONESIDE_EXT == 1
 90 |         conn.prep_write(sge_wrs, mr_addr, mr_lkey, param.payload, server_mr_addr, server_mr_rkey, false);
 91 |         rdma_rc::RDMAFuture fu = conn.submit(sge_wrs);
 92 |         fu.get();
 93 | #endif
 94 | 
 95 |     }
 96 |     auto ed = timer.get_us();
 97 |     stat->elapse += (ed - st);
 98 | 
 99 |     // auto req = rpc->alloc_msg_buffer_or_die(kMsgSize);
100 |     // auto resp = rpc->alloc_msg_buffer_or_die(kMsgSize);
101 |     // rpc->enqueue_request(session_num, kStopType, &req, &resp, nullptr,
102 |     // nullptr); rpc->run_event_loop(200);
103 | 
104 |     delete rpc;
105 | }
106 | 
107 | int main(int argc, char **argv) {
108 |     cmdline::parser cmd;
109 |     cmd.add<int>("thread", 't', "thread num", true);
110 |     cmd.add<size_t>("batch", 'b', "batch size", false, 10);
111 |     cmd.add<size_t>("op", 0, "operation num", true);
112 |     cmd.add<size_t>("session", 's', "session number of each rpc", false, 1);
113 |     cmd.add<size_t>("payload");
114 | 
115 | #if ONESIDE_EXT == 1
116 |     cmd.add<std::string>("server_ip");
117 |     cmd.add<uint16_t>("server_port");
118 |     cmd.add<uintptr_t>("mr_addr", 0, "", false);
119 |     cmd.add<uint32_t>("mr_rkey", 0, "", false);
120 | #endif
121 | 
122 |     cmd.parse_check(argc, argv);
123 | 
124 | #if ONESIDE_EXT == 1
125 |     rdma_rc::RDMAEnv::init();
126 |     conn.connect(cmd.get<std::string>("server_ip"), cmd.get<uint16_t>("server_port"), nullptr, 0);
127 | 
128 |     ibv_mr *mr = conn.register_memory(1ul << 20);
129 |     if (mr == nullptr) abort();
130 |     mr_addr = (uintptr_t)mr->addr;
131 |     mr_lkey = mr->lkey;
132 | 
133 |     server_mr_addr = cmd.get<uintptr_t>("mr_addr");
134 |     server_mr_rkey = cmd.get<uint32_t>("mr_rkey");
135 | #endif
136 | 
137 |     auto thread_num = cmd.get<int>("thread");
138 |     auto bs = cmd.get<size_t>("batch");
139 |     auto op = cmd.get<size_t>("op");
140 |     auto sess = cmd.get<size_t>("session");
141 |     auto payload = cmd.get<size_t>("payload");
142 | 
143 |     std::string client_uri = kClientHostname + ":" + std::to_string(kClientUDPPort);
144 |     erpc::NexusWrap nexus(client_uri);
145 | 
146 |     Params param{
147 |         thread_num, op, bs, sess, payload,
148 |     };
149 |     Stat stat;
150 |     std::vector<std::thread> threads;
151 |     for (int i = 0; i < thread_num; i++) {
152 |         threads.emplace_back(thread_func, param, &nexus, &stat, i);
153 |     }
154 | 
155 |     for (auto &th : threads) {
156 |         th.join();
157 |     }
158 | 
159 |     auto avg = stat.elapse * 1.0 / param.thread_num;
160 |     auto tput = param.op_per_thread * 1.0 * thread_num / (avg * 1.0);
161 | 
162 |     printf("Throughput %lf MOps\n", tput);
163 | }
164 | 


--------------------------------------------------------------------------------
/src/test/erpc/common_sta.h:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <atomic>
  3 | #include <chrono>
  4 | #include <cstdint>
  5 | #include <vector>
  6 | #include <random>
  7 | 
  8 | #include "eRPC/erpc.h"
  9 | 
 10 | static const std::string kServerHostname = "192.168.1.52";
 11 | static const std::string kClientHostname = "192.168.1.51";
 12 | 
 13 | static constexpr uint16_t kServerUDPPort = 31850;
 14 | static constexpr uint16_t kClientUDPPort = 31851;
 15 | static constexpr uint8_t kReqType = 2;
 16 | static constexpr uint8_t kStopType = 3;
 17 | static constexpr uint8_t kSimpleReqType=4;
 18 | static constexpr size_t kMsgSize = 64;
 19 | 
 20 | #define ONESIDE_EXT 1
 21 | 
 22 | struct AppContext {
 23 |   std::vector<erpc::MsgBufferWrap> reqs;
 24 |   std::vector<erpc::MsgBufferWrap> resps;
 25 |   volatile size_t cnt;
 26 | };
 27 | 
 28 | struct APlusBReq {
 29 |   int a;
 30 |   int b;
 31 | };
 32 | 
 33 | struct APlusBResp {
 34 |   int result;
 35 | };
 36 | 
 37 | struct SimpleDataReq {
 38 |   size_t s;
 39 | };
 40 | 
 41 | struct SimpleDataResp {};
 42 | 
 43 | struct Params {
 44 |   int thread_num;
 45 |   size_t op_per_thread;
 46 |   size_t batch_size;
 47 |   size_t sess_num;
 48 |   size_t payload;
 49 | };
 50 | 
 51 | struct Stat {
 52 |   std::atomic<size_t> elapse{};
 53 | };
 54 | 
 55 | struct ServerContext {
 56 |   erpc::IBRpcWrap *rpc;
 57 |   int thread_idx;
 58 |   volatile bool running;
 59 | };
 60 | 
 61 | class SlowRand {
 62 |   std::random_device rand_dev_;  // Non-pseudorandom seed for twister
 63 |   std::mt19937_64 mt_;
 64 |   std::uniform_int_distribution<uint64_t> dist_;
 65 | 
 66 |  public:
 67 |   SlowRand() : mt_(rand_dev_()), dist_(0, UINT64_MAX) {}
 68 | 
 69 |   inline uint64_t next_u64() { return dist_(mt_); }
 70 | };
 71 | 
 72 | class FastRand {
 73 |  public:
 74 |   uint64_t seed_;
 75 | 
 76 |   /// Create a FastRand using a seed from SlowRand
 77 |   FastRand() {
 78 |     SlowRand slow_rand;
 79 |     seed_ = slow_rand.next_u64();
 80 |   }
 81 | 
 82 |   inline uint32_t next_u32() {
 83 |     seed_ = seed_ * 1103515245 + 12345;
 84 |     return static_cast<uint32_t>(seed_ >> 32);
 85 |   }
 86 | };
 87 | 
 88 | class ChronoTimer {
 89 |  public:
 90 |   ChronoTimer() { reset(); }
 91 |   void reset() { start_time_ = std::chrono::high_resolution_clock::now(); }
 92 | 
 93 |   /// Return seconds elapsed since this timer was created or last reset
 94 |   double get_sec() const { return get_ns() / 1e9; }
 95 | 
 96 |   /// Return milliseconds elapsed since this timer was created or last reset
 97 |   double get_ms() const { return get_ns() / 1e6; }
 98 | 
 99 |   /// Return microseconds elapsed since this timer was created or last reset
100 |   double get_us() const { return get_ns() / 1e3; }
101 | 
102 |   /// Return nanoseconds elapsed since this timer was created or last reset
103 |   size_t get_ns() const {
104 |     return static_cast<size_t>(
105 |         std::chrono::duration_cast<std::chrono::nanoseconds>(
106 |             std::chrono::high_resolution_clock::now() - start_time_)
107 |             .count());
108 |   }
109 | 
110 |  private:
111 |   std::chrono::time_point<std::chrono::high_resolution_clock> start_time_;
112 | };


--------------------------------------------------------------------------------
/src/test/erpc/server_sta_test.cc:
--------------------------------------------------------------------------------
  1 | #include <cstdint>
  2 | #include <thread>
  3 | 
  4 | #include "cmdline.h"
  5 | #include "common_sta.h"
  6 | #include "rdma_rc.hpp"
  7 | 
  8 | using namespace std;
  9 | 
 10 | void req_handler(erpc::ReqHandle *req_handle, void *_c) {
 11 |   auto ctx = reinterpret_cast<ServerContext *>(_c);
 12 |   erpc::ReqHandleWrap req_wrap(req_handle);
 13 |   auto resp_raw = req_wrap.get_pre_resp_msgbuf();
 14 |   auto req_raw = req_wrap.get_req_msgbuf();
 15 | 
 16 |   auto req = reinterpret_cast<APlusBReq *>(req_raw.get_buf());
 17 |   auto resp = reinterpret_cast<APlusBResp *>(resp_raw.get_buf());
 18 |   resp->result = req->a + req->b;
 19 | 
 20 |   ctx->rpc->resize_msg_buffer(resp_raw, kMsgSize);
 21 |   ctx->rpc->enqueue_response(req_wrap, resp_raw);
 22 | }
 23 | 
 24 | void req_handler2(erpc::ReqHandle *req_handle, void *_c) {
 25 |   auto ctx = reinterpret_cast<ServerContext *>(_c);
 26 |   erpc::ReqHandleWrap req_wrap(req_handle);
 27 |   auto req_raw = req_wrap.get_req_msgbuf();
 28 |   auto req = reinterpret_cast<APlusBReq *>(req_raw.get_buf());
 29 | 
 30 |   auto resp_msg_buf = req_wrap.get_dyn_resp_msgbuf();
 31 |   resp_msg_buf.set(ctx->rpc->alloc_msg_buffer_or_die(kMsgSize));
 32 |   auto resp = reinterpret_cast<APlusBResp *>(resp_msg_buf.get_buf());
 33 | 
 34 |   resp->result = req->a + req->b;
 35 | 
 36 |   ctx->rpc->enqueue_response(req_wrap, resp_msg_buf);
 37 | }
 38 | 
 39 | void simple_data_handler(erpc::ReqHandle *req_handle, void *_c) {
 40 |   auto ctx = reinterpret_cast<ServerContext *>(_c);
 41 |   erpc::ReqHandleWrap req_wrap(req_handle);
 42 |   auto req_raw = req_wrap.get_req_msgbuf();
 43 |   auto req = reinterpret_cast<SimpleDataReq *>(req_raw.get_buf());
 44 | 
 45 |   auto resp_msg_buf = req_wrap.get_dyn_resp_msgbuf();
 46 |   resp_msg_buf.set(ctx->rpc->alloc_msg_buffer_or_die(req->s));
 47 |   auto resp = reinterpret_cast<SimpleDataResp *>(resp_msg_buf.get_buf());
 48 | 
 49 |   ctx->rpc->enqueue_response(req_wrap, resp_msg_buf);
 50 | }
 51 | 
 52 | void stop(erpc::ReqHandle *, void *_c) {
 53 |   auto ctx = reinterpret_cast<ServerContext *>(_c);
 54 |   ctx->running = false;
 55 |   printf("stop %d \n", ctx->thread_idx);
 56 | }
 57 | 
 58 | void server_func(erpc::NexusWrap *nexus, int thread_idx) {
 59 |   ServerContext ctx;
 60 |   ctx.running = true;
 61 |   ctx.thread_idx = thread_idx;
 62 | 
 63 |   erpc::SMHandlerWrap smhw;
 64 |   smhw.set_null();
 65 | 
 66 |   auto rpc = new erpc::IBRpcWrap(nexus, &ctx, thread_idx, smhw);
 67 | 
 68 |   ctx.rpc = rpc;
 69 |   while (ctx.running) {
 70 |     rpc->run_event_loop_once();
 71 |   }
 72 | 
 73 |   delete rpc;
 74 | }
 75 | 
 76 | int main(int argc, char **argv) {
 77 |   cmdline::parser cmd;
 78 |   cmd.add<int>("thread", 't', "thread num", true);
 79 |   #if ONESIDE_EXT == 1
 80 |   cmd.add<string>("server_ip");
 81 |   #endif
 82 |   cmd.parse_check(argc, argv);
 83 | 
 84 |   #if ONESIDE_EXT == 1
 85 |   rdma_rc::RDMAEnv::init();
 86 | 
 87 |   rdma_rc::RDMAConnection m_listen_conn;
 88 |   m_listen_conn.listen(cmd.get<string>("server_ip"));
 89 |   ibv_mr * mr = m_listen_conn.register_memory(1ul << 20);
 90 | 
 91 |   printf("--server_port=%u --mr_addr=%lu --mr_rkey=%u\n", m_listen_conn.get_local_addr().second, (uintptr_t)mr->addr, mr->rkey);
 92 |   #endif
 93 | 
 94 |   auto thread_num = cmd.get<int>("thread");
 95 | 
 96 |   std::string server_uri = kServerHostname + ":" + std::to_string(kServerUDPPort);
 97 | 
 98 |   erpc::NexusWrap nexus(server_uri);
 99 |   if (kMsgSize <= erpc::IBRpcWrap::kMaxDataPerPkt) {
100 |     nexus.register_req_func(kReqType, req_handler);
101 |   } else {
102 |     nexus.register_req_func(kReqType, req_handler2);
103 |   }
104 |   nexus.register_req_func(kSimpleReqType, simple_data_handler);
105 |   nexus.register_req_func(kStopType, stop);
106 | 
107 |   std::vector<std::thread> threads;
108 |   for (int i = 0; i < thread_num; i++) {
109 |     threads.emplace_back(server_func, &nexus, i);
110 |   }
111 |   for (auto &th : threads) {
112 |     th.join();
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/test/hello_world_test.cc:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <cstring>
 3 | #include <arpa/inet.h>
 4 | #include <ifaddrs.h>
 5 | #include <netdb.h>
 6 | #include <netinet/in.h>
 7 | #include <vector>
 8 | #include <iostream>
 9 | 
10 | using namespace std;
11 | 
12 | int main()
13 | {
14 |     struct ifaddrs *ifaddr, *ifa;
15 |     int family, s;
16 |     char host[NI_MAXHOST];
17 | 
18 |     if (getifaddrs(&ifaddr) == -1) {
19 |         std::cerr << "getifaddrs failed" << std::endl;
20 |         return 1;
21 |     }
22 | 
23 |     std::vector<std::string> ipAddresses;
24 | 
25 |     for (ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
26 |         if (ifa->ifa_addr == nullptr) {
27 |             continue;
28 |         }
29 | 
30 |         family = ifa->ifa_addr->sa_family;
31 | 
32 |         if (family == AF_INET || family == AF_INET6) {
33 |             s = getnameinfo(ifa->ifa_addr,
34 |                             (family == AF_INET) ? sizeof(struct sockaddr_in) :
35 |                             sizeof(struct sockaddr_in6),
36 |                             host, NI_MAXHOST,
37 |                             nullptr, 0, NI_NUMERICHOST);
38 |             if (s != 0) {
39 |                 std::cerr << "getnameinfo failed: " << gai_strerror(s) << std::endl;
40 |                 return 1;
41 |             }
42 | 
43 |             if (family == AF_INET) {
44 |                 ipAddresses.push_back(host);
45 |             }
46 |         }
47 |     }
48 | 
49 |     freeifaddrs(ifaddr);
50 | 
51 |     std::cout << "Local IP addresses:" << std::endl;
52 |     for (const auto& address : ipAddresses) {
53 |         std::cout << address << std::endl;
54 |     }
55 | 
56 |     return 0;
57 | }


--------------------------------------------------------------------------------
/src/test/ring_allocator_test.cc:
--------------------------------------------------------------------------------
 1 | #include <unistd.h>
 2 | 
 3 | #include <iostream>
 4 | #include <random>
 5 | 
 6 | #include "allocator.hpp"
 7 | #include "utils.hpp"
 8 | 
 9 | using namespace std;
10 | 
11 | int main() {
12 |     RingArena<512, 4> bb;
13 | 
14 |     const size_t IT = 2000000;
15 | 
16 |     vector<thread> vs;
17 |     for (int i = 0; i < 8; i++) {
18 |         vs.emplace_back([&, i]() {
19 |             size_t a = 0;
20 |             mt19937 rng(i);
21 |             while (a < IT) {
22 |                 int s = (rng() % 10) + 4;
23 |                 int* p = nullptr;
24 |                 while (p == nullptr) {
25 |                     p = (int*)bb.allocate(s);
26 |                 }
27 |                 *p = -1;
28 |                 bb.deallocate(p, s);
29 | 
30 |                 if ((++a) % 1000000 == 0) {
31 |                     printf("%d %lu\n", i, a);
32 |                 }
33 |             }
34 |         });
35 |     }
36 | 
37 |     uint64_t _s = getUsTimestamp();
38 | 
39 |     for (auto& th : vs) {
40 |         th.join();
41 |     }
42 | 
43 |     cout << 1.0 * vs.size() * IT / (getUsTimestamp() - _s) << " Mops" << endl;
44 |     return 0;
45 | }


--------------------------------------------------------------------------------
/src/test/simple_adaptor_test.cc:
--------------------------------------------------------------------------------
  1 | #include <boost/coroutine2/all.hpp>
  2 | #include <boost/fiber/algo/round_robin.hpp>
  3 | #include <boost/fiber/future/async.hpp>
  4 | #include <boost/fiber/operations.hpp>
  5 | #include <boost/fiber/policy.hpp>
  6 | #include <set>
  7 | 
  8 | #include "cmdline.h"
  9 | #include "common.hpp"
 10 | #include "impl.hpp"
 11 | #include "log.hpp"
 12 | #include "options.hpp"
 13 | #include "promise.hpp"
 14 | #include "proto/rpc_caller.hpp"
 15 | #include "proto/rpc_master.hpp"
 16 | #include "utils.hpp"
 17 | 
 18 | using namespace std;
 19 | using namespace std::chrono_literals;
 20 | 
 21 | struct Request {
 22 |     mac_id_t mac_id;
 23 |     char data[64];
 24 | };
 25 | struct Reply {
 26 |     char data[64];
 27 | };
 28 | void dummy(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, Request& req,
 29 |            ResponseHandle<Reply>& resp_handle);
 30 | 
 31 | struct JoinReq {
 32 |     mac_id_t mac_id;
 33 | };
 34 | struct JoinReply {
 35 |     mac_id_t daemon_mac_id;
 36 |     mac_id_t master_mac_id;
 37 | };
 38 | void joinDaemon(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
 39 |                 JoinReq& req, ResponseHandle<JoinReply>& resp_handle);
 40 | 
 41 | BIND_RPC_TYPE_STRUCT(joinDaemon);
 42 | BIND_RPC_TYPE_STRUCT(dummy);
 43 | 
 44 | void joinDaemon(MasterContext& master_context, MasterToDaemonConnection& daemon_connection,
 45 |                 JoinReq& req, ResponseHandle<JoinReply>& resp_handle) {
 46 |     mac_id_t mac_id = master_context.m_cluster_manager.mac_id_allocator->Gen();
 47 |     daemon_connection.daemon_id = mac_id;
 48 | 
 49 |     master_context.m_cluster_manager.connect_table.insert(daemon_connection.daemon_id,
 50 |                                                           &daemon_connection);
 51 | 
 52 |     resp_handle.Init();
 53 |     auto& reply = resp_handle.Get();
 54 |     reply.daemon_mac_id = mac_id;
 55 |     reply.master_mac_id = master_context.m_master_id;
 56 | 
 57 |     DLOG("Connect with daemon [rack:%d --- id:%d]", daemon_connection.rack_id,
 58 |          daemon_connection.daemon_id);
 59 | }
 60 | 
 61 | Request Greq;
 62 | 
 63 | void dummy(MasterContext& master_context, MasterToDaemonConnection& daemon_connection, Request& req,
 64 |            ResponseHandle<Reply>& resp_handle) {
 65 |     resp_handle.Init(0);
 66 |     auto& reply = resp_handle.Get();
 67 |     memcpy(reply.data, req.data, sizeof(req.data));
 68 | }
 69 | 
 70 | void MasterContext::InitCluster() {
 71 |     m_cluster_manager.mac_id_allocator = std::make_unique<IDGenerator>();
 72 |     m_cluster_manager.mac_id_allocator->Expand(m_options.max_cluster_mac_num);
 73 |     IDGenerator::id_t id = m_cluster_manager.mac_id_allocator->Gen();
 74 |     DLOG_ASSERT(id == master_id, "Can't alloc master mac id");
 75 |     m_master_id = master_id;
 76 | }
 77 | 
 78 | void MasterContext::InitRPCNexus() {
 79 |     std::string master_uri = erpc::concat_server_uri(m_options.master_ip, m_options.master_port);
 80 |     m_erpc_ctx.nexus = std::make_unique<erpc::NexusWrap>(master_uri);
 81 | 
 82 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(joinDaemon)::rpc_type,
 83 |                                         bind_erpc_func<true>(joinDaemon));
 84 |     m_erpc_ctx.nexus->register_req_func(RPC_TYPE_STRUCT(dummy)::rpc_type,
 85 |                                         bind_erpc_func<true>(dummy));
 86 | 
 87 |     erpc::SMHandlerWrap smhw;
 88 |     smhw.set_empty();
 89 | 
 90 |     erpc::IBRpcWrap rpc(m_erpc_ctx.nexus.get(), &MasterContext::getInstance(), 0, smhw);
 91 |     m_erpc_ctx.rpc_set.push_back(std::move(rpc));
 92 |     DLOG_ASSERT(m_erpc_ctx.rpc_set.size() == 1);
 93 | }
 94 | 
 95 | void DaemonContext::InitRPCNexus() {
 96 |     // 1. init erpc
 97 |     std::string server_uri = erpc::concat_server_uri(m_options.daemon_ip, m_options.daemon_port);
 98 |     m_erpc_ctx.nexus = std::make_unique<erpc::NexusWrap>(server_uri);
 99 | 
100 |     erpc::SMHandlerWrap smhw;
101 |     smhw.set_empty();
102 | 
103 |     erpc::IBRpcWrap rpc(m_erpc_ctx.nexus.get(), this, 0, smhw);
104 |     m_erpc_ctx.rpc_set.push_back(std::move(rpc));
105 |     DLOG_ASSERT(m_erpc_ctx.rpc_set.size() == 1);
106 | }
107 | 
108 | void DaemonContext::ConnectWithMaster() {
109 |     auto& rpc = GetErpc();
110 | 
111 |     auto& master_connection = m_conn_manager.GetMasterConnection();
112 | 
113 |     master_connection.erpc_conn =
114 |         std::make_unique<ErpcClient>(rpc, m_options.master_ip, m_options.master_port);
115 | 
116 |     auto fu = master_connection.erpc_conn->call<SpinPromise>(joinDaemon, {});
117 | 
118 |     while (fu.wait_for(1ns) == std::future_status::timeout) {
119 |         rpc.run_event_loop_once();
120 |     }
121 | 
122 |     auto& resp = fu.get();
123 | 
124 |     master_connection.ip = m_options.master_ip;
125 |     master_connection.port = m_options.master_port;
126 |     master_connection.master_id = resp.master_mac_id;
127 |     m_daemon_id = resp.daemon_mac_id;
128 | 
129 |     DLOG_ASSERT(master_connection.master_id == master_id, "Fail to get master id");
130 |     DLOG_ASSERT(m_daemon_id != master_id, "Fail to get daemon id");
131 | 
132 |     DLOG("Connection with master OK, my id is %d", m_daemon_id);
133 | }
134 | 
135 | void erpc_general_promise_cb(void*, void* pr) {
136 |     SpinPromise<void>* pro = reinterpret_cast<SpinPromise<void>*>(pr);
137 |     pro->set_value();
138 | }
139 | 
140 | int main(int argc, char* argv[]) {
141 |     cmdline::parser cmd;
142 |     cmd.add<std::string>("master_ip");
143 |     cmd.add<std::string>("daemon_ip", 0, "", false);
144 |     cmd.add<bool>("is_master");
145 |     bool ret = cmd.parse(argc, argv);
146 |     DLOG_ASSERT(ret);
147 | 
148 |     for (int i = 0; i < sizeof(Greq.data); ++i) {
149 |         Greq.data[i] = i;
150 |     }
151 | 
152 |     if (cmd.get<bool>("is_master")) {
153 |         rcmp::MasterOptions options;
154 |         options.master_ip = cmd.get<std::string>("master_ip");
155 |         options.master_port = 31850;
156 | 
157 |         MasterContext& master_context = MasterContext::getInstance();
158 |         master_context.m_options = options;
159 | 
160 |         master_context.InitCluster();
161 |         master_context.InitRPCNexus();
162 | 
163 |         DLOG("START OK");
164 | 
165 |         while (true) {
166 |             master_context.GetErpc().run_event_loop_once();
167 |             boost::this_fiber::yield();
168 |         }
169 |     }
170 | 
171 |     else {
172 |         rcmp::DaemonOptions options;
173 |         options.master_ip = cmd.get<std::string>("master_ip");
174 |         options.master_port = 31850;
175 |         options.daemon_ip = cmd.get<std::string>("daemon_ip");
176 |         options.daemon_port = 31851;
177 |         options.rack_id = 0;
178 |         options.with_cxl = true;
179 | 
180 |         DaemonContext& daemon_context = DaemonContext::getInstance();
181 |         daemon_context.m_options = options;
182 | 
183 |         daemon_context.InitRPCNexus();
184 |         daemon_context.ConnectWithMaster();
185 | 
186 |         boost::fibers::use_scheduling_algorithm<boost::fibers::algo::round_robin>();
187 | 
188 |         uint64_t cnt = 0;
189 |         uint64_t success_cnt = 0;
190 | 
191 |         while (true) {
192 |             daemon_context.GetErpc().run_event_loop_once();
193 |             boost::this_fiber::yield();
194 | 
195 |             {
196 |                 if (cnt == 100000) {
197 |                     if (success_cnt == cnt) break;
198 |                     continue;
199 |                 }
200 |                 if (cnt - success_cnt >= 8) continue;
201 | 
202 |                 boost::fibers::async(boost::fibers::launch::dispatch, [&]() {
203 |                     Request req = Greq;
204 |                     req.mac_id = daemon_context.m_daemon_id;
205 |                     req.data[rand() % sizeof(req.data)] = getUsTimestamp();
206 | 
207 |                     auto fu = daemon_context.m_conn_manager.GetMasterConnection()
208 |                                   .erpc_conn->call<CortPromise>(dummy, std::move(req));
209 | 
210 |                     auto& resp = fu.get();
211 | 
212 |                     for (int i = 0; i < sizeof(req.data); ++i) {
213 |                         DLOG_ASSERT(resp.data[i] == req.data[i]);
214 |                     }
215 | 
216 |                     success_cnt++;
217 |                 });
218 | 
219 |                 cnt++;
220 |             }
221 |         }
222 |     }
223 | 
224 |     return 0;
225 | }


--------------------------------------------------------------------------------
/src/utils.cc:
--------------------------------------------------------------------------------
 1 | #include "utils.hpp"
 2 | 
 3 | #include <pthread.h>
 4 | #include <sched.h>
 5 | #include <sys/time.h>
 6 | 
 7 | #include <cstdlib>
 8 | #include <cstring>
 9 | #include <ctime>
10 | 
11 | #include "log.hpp"
12 | 
13 | void threadBindCore(int core_id) {
14 |     cpu_set_t cpuset;
15 |     CPU_ZERO(&cpuset);
16 |     CPU_SET(core_id, &cpuset);
17 | 
18 |     pthread_t current_thread = pthread_self();
19 |     int result = pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset);
20 |     DLOG_ASSERT(result == 0, "Error: Failed to bind thread to core %d", core_id);
21 | }
22 | 
23 | uint64_t getMsTimestamp() {
24 |     struct timeval tv;
25 |     gettimeofday(&tv, nullptr);
26 |     return ((uint64_t)tv.tv_sec * 1e3) + tv.tv_usec / 1e3;
27 | }
28 | 
29 | uint64_t getUsTimestamp() {
30 |     struct timeval tv;
31 |     gettimeofday(&tv, nullptr);
32 |     return ((uint64_t)tv.tv_sec * 1e6) + tv.tv_usec;
33 | }
34 | 
35 | uint64_t getNsTimestamp() {
36 |     struct timespec tp;
37 |     clock_gettime(CLOCK_REALTIME, &tp);
38 |     return tp.tv_sec * 1e9 + tp.tv_nsec;
39 | }
40 | 
41 | IPv4String::IPv4String(const std::string &ip) { strcpy(raw.ipstr, ip.c_str()); }
42 | IPv4String &IPv4String::operator=(const std::string &ip) {
43 |     strcpy(raw.ipstr, ip.c_str());
44 |     return *this;
45 | }


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(add_test TEST_FILE)
 2 |   get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
 3 |   add_executable(${TEST_NAME} ${TEST_FILE})
 4 |   target_link_libraries(${TEST_NAME}
 5 |   rcmp
 6 |   redis++
 7 |   hiredis
 8 |   )
 9 |   message(STATUS "[ADD TEST] : ${TEST_NAME}")
10 | endfunction()
11 | 
12 | file(GLOB_RECURSE test_files *.cc)
13 | 
14 | foreach(test_file ${test_files})
15 |   add_test(${test_file})
16 | endforeach(test_file)
17 | 


--------------------------------------------------------------------------------
/test/client_shell.cc:
--------------------------------------------------------------------------------
 1 | #include <cassert>
 2 | #include <cstdint>
 3 | #include <vector>
 4 | 
 5 | #include "cmdline.h"
 6 | #include "log.hpp"
 7 | #include "rcmp.hpp"
 8 | #include "utils.hpp"
 9 | 
10 | using namespace std;
11 | 
12 | int main(int argc, char *argv[]) {
13 |     cmdline::parser cmd;
14 |     cmd.add<std::string>("client_ip");
15 |     cmd.add<uint16_t>("client_port");
16 |     cmd.add<uint32_t>("rack_id");
17 |     cmd.add<std::string>("cxl_devdax_path");
18 |     cmd.add<size_t>("cxl_memory_size");
19 |     bool ret = cmd.parse(argc, argv);
20 |     DLOG_ASSERT(ret);
21 | 
22 |     rcmp::ClientOptions options;
23 |     options.client_ip = cmd.get<std::string>("client_ip");
24 |     options.client_port = cmd.get<uint16_t>("client_port");
25 |     options.cxl_devdax_path = cmd.get<std::string>("cxl_devdax_path");
26 |     options.cxl_memory_size = cmd.get<size_t>("cxl_memory_size");
27 |     options.rack_id = cmd.get<uint32_t>("rack_id");
28 |     options.with_cxl = true;
29 | 
30 |     rcmp::PoolContext *pool = rcmp::Open(options);
31 | 
32 |     while (1) {
33 |         std::string cmdstr;
34 |         cout << "> ";
35 |         cin >> cmdstr;
36 |         if (cmdstr == "a") {
37 |             rcmp::GAddr gaddr = pool->AllocPage(1);
38 |             cout << gaddr << endl;
39 |         } else if (cmdstr == "f") {
40 |             rcmp::GAddr gaddr;
41 |             cin >> gaddr;
42 |             pool->FreePage(gaddr, 1);
43 |         } else if (cmdstr == "r") {
44 |             rcmp::GAddr gaddr;
45 |             cin >> gaddr;
46 |             uint64_t n;
47 |             pool->Read(gaddr, 8, &n);
48 |             cout << n << endl;
49 |         } else if (cmdstr == "w") {
50 |             rcmp::GAddr gaddr;
51 |             uint64_t n;
52 |             cin >> gaddr >> n;
53 |             pool->Write(gaddr, 8, &n);
54 |         } else if (cmdstr == "cas") {
55 |             rcmp::GAddr gaddr;
56 |             uint64_t e, d;
57 |             bool ret;
58 |             cin >> gaddr >> e >> d;
59 |             pool->CAS(gaddr, e, d, ret);
60 |             if (!ret) {
61 |                 cout << "CAS Fail, expected " << e << endl;
62 |             }
63 |             cout << ret << endl;
64 |         } else if (cmdstr == "?") {
65 |             cout << "Usage:\n"
66 |                     "\ta \t\t alloc 2MB gaddr\n"
67 |                     "\tf \t\t free 2MB gaddr\n"
68 |                     "\tr <gaddr> \t\t read gaddr 8B int\n"
69 |                     "\tw <gaddr> <int> \t\twrite gaddr 8B int\n"
70 |                     "\tcas <gaddr> <int> <int> \t\tcas gaddr 8B int, expected, desired\n"
71 |                     "\tq \t\t quit\n"
72 |                     "\t? \t\t for help"
73 |                  << endl;
74 |         } else {
75 |         ill_err:
76 |             cout << "Illegal Operation" << endl;
77 |         }
78 |     }
79 | 
80 |     return 0;
81 | }


--------------------------------------------------------------------------------
/test/dht.cc:
--------------------------------------------------------------------------------
  1 | #include "dht.hpp"
  2 | 
  3 | #include <unordered_set>
  4 | #include <vector>
  5 | 
  6 | #include "cmdline.h"
  7 | #include "log.hpp"
  8 | #include "rcmp.hpp"
  9 | #include "stats.hpp"
 10 | #include "utils.hpp"
 11 | 
 12 | using namespace std;
 13 | 
 14 | inline int rdd(int cli_id, int x) { return (((cli_id * 0xaf) ^ (0x45 * x)) ^ 0x89b31); }
 15 | 
 16 | template <typename T>
 17 | void shuffle(vector<T> &v, int seed) {
 18 |     for (int i = v.size() - 1; i >= 0; i--) {
 19 |         int r = rdd(seed, i) % (i + 1);
 20 |         swap(v[r], v[i]);
 21 |     }
 22 | }
 23 | 
 24 | struct PerfStatistics : public Histogram {
 25 |     PerfStatistics() : Histogram(100000, 0, 100000) {}
 26 | };
 27 | 
 28 | int main(int argc, char *argv[]) {
 29 |     cmdline::parser cmd;
 30 |     cmd.add<std::string>("client_ip");
 31 |     cmd.add<uint16_t>("client_port");
 32 |     cmd.add<uint32_t>("rack_id");
 33 |     cmd.add<std::string>("cxl_devdax_path");
 34 |     cmd.add<size_t>("cxl_memory_size");
 35 |     cmd.add<size_t>("iteration");
 36 |     cmd.add<int>("read_ratio");
 37 |     cmd.add<int>("initor");
 38 |     bool ret = cmd.parse(argc, argv);
 39 |     DLOG_ASSERT(ret);
 40 | 
 41 |     rcmp::ClientOptions options;
 42 |     options.client_ip = cmd.get<std::string>("client_ip");
 43 |     options.client_port = cmd.get<uint16_t>("client_port");
 44 |     options.cxl_devdax_path = cmd.get<std::string>("cxl_devdax_path");
 45 |     options.cxl_memory_size = cmd.get<size_t>("cxl_memory_size");
 46 |     options.rack_id = cmd.get<uint32_t>("rack_id");
 47 |     options.with_cxl = true;
 48 | 
 49 |     rcmp::PoolContext *pool = rcmp::Open(options);
 50 | 
 51 |     HashTableRep h;
 52 |     h.pool = pool;
 53 | 
 54 |     if (cmd.get<int>("initor") == 1) {
 55 |         h.init();
 56 |         // return 0;
 57 |     } else {
 58 |         cin >> h.st;
 59 |     }
 60 | 
 61 |     // __DEBUG_START_PERF();
 62 |     // pool->__NotifyPerf();
 63 | 
 64 |     const size_t IT = cmd.get<size_t>("iteration");
 65 |     const int RA = cmd.get<int>("read_ratio");
 66 | 
 67 |     PerfStatistics ps;
 68 |     PerfStatistics hyr, hyw;
 69 | 
 70 |     {
 71 |         uint64_t start_time = getUsTimestamp(), end_time;
 72 | 
 73 |         uint64_t tv = start_time;
 74 |         for (size_t i = 0; i < IT; ++i) {
 75 |             h.put(rdd(0, i), rdd(0, i));
 76 |             uint64_t e = getUsTimestamp();
 77 |             ps.addValue(e - tv);
 78 |             tv = e;
 79 |         }
 80 | 
 81 |         end_time = getUsTimestamp();
 82 | 
 83 |         long diff = end_time - start_time;
 84 | 
 85 |         DLOG("cli %d: %s random write test done. Use time: %ld us. Avg time: %f us\n", 0,
 86 |              "HashTable", diff, 1.0 * diff / IT);
 87 | 
 88 |         double throughput = 1.0 * (IT) / (end_time - start_time);
 89 |         cout << 0 << " clients total add throughput: " << throughput * 1000 << " Kops" << endl;
 90 | 
 91 |         {
 92 |             printf("p50: %fus, p99: %fus, p999: %fus, p9999: %fus\n", ps.getPercentile(50),
 93 |                    ps.getPercentile(99), ps.getPercentile(99.9), ps.getPercentile(99.99));
 94 |         }
 95 |     }
 96 | 
 97 |     {
 98 |         vector<int> rv;
 99 |         for (int i = 0; i < IT; i++) {
100 |             rv.push_back(rdd(0, i));
101 |         }
102 |         shuffle(rv, 0);
103 | 
104 |         uint64_t start_time = getUsTimestamp(), end_time;
105 |         uint64_t tv = start_time;
106 | 
107 |         for (size_t i = 0; i < IT; ++i) {
108 |             uint64_t e;
109 |             if ((i % 100) < RA) {
110 |                 size_t ret = h.get(rv[i]);
111 |                 DLOG_EXPR(ret, ==, rv[i]);
112 |                 e = getUsTimestamp();
113 |                 hyr.addValue(e - tv);
114 |             } else {
115 |                 int r = rdd(0, i) * rdd(0, i);
116 |                 h.put(r, r);
117 |                 e = getUsTimestamp();
118 |                 hyw.addValue(e - tv);
119 |             }
120 |             tv = e;
121 |         }
122 | 
123 |         end_time = getUsTimestamp();
124 | 
125 |         long diff = end_time - start_time;
126 | 
127 |         DLOG("cli %d: %s random op test done. Use time: %ld us. Avg time: %f us\n", 0, "HashTable",
128 |              diff, 1.0 * diff / IT);
129 | 
130 |         double throughput = 1.0 * IT / (end_time - start_time);
131 |         cout << "read ratio: " << RA << endl;
132 |         cout << IT << " clients total op throughput: " << throughput * 1000 << " Kops" << endl;
133 | 
134 |         {
135 |             printf("p50: %fus, p99: %fus, p999: %fus, p9999: %fus\n", hyr.getPercentile(50),
136 |                    hyr.getPercentile(99), hyr.getPercentile(99.9), hyr.getPercentile(99.99));
137 | 
138 |             printf("p50: %fus, p99: %fus, p999: %fus, p9999: %fus\n", hyw.getPercentile(50),
139 |                    hyw.getPercentile(99), hyw.getPercentile(99.9), hyw.getPercentile(99.99));
140 |         }
141 |     }
142 | 
143 |     // pool->__StopPerf();
144 | 
145 |     return 0;
146 | }


--------------------------------------------------------------------------------
/test/dht.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <unordered_set>
  4 | #include <vector>
  5 | 
  6 | #include "rcmp.hpp"
  7 | 
  8 | using namespace std;
  9 | 
 10 | struct HashTableRep {
 11 |     template <typename D>
 12 |     static constexpr D div_ceil(D x, uint64_t div) {
 13 |         return (x + div - 1) / div;
 14 |     }
 15 | 
 16 |     static int rdd(int cli_id, int x) { return (((cli_id * 0xaf) ^ (0x45 * x)) ^ 0x89b31); }
 17 | 
 18 |     static constexpr size_t bkt_per_seg_num = 7;
 19 |     static constexpr size_t seg_num_second_level = 43;
 20 |     static constexpr size_t seg_num = 2700001;
 21 |     static constexpr size_t max_bkt_num = bkt_per_seg_num * seg_num_second_level * seg_num;
 22 |     static constexpr int RETRY_CNT = 3;
 23 | 
 24 |     struct entry {
 25 |         bool valid = false;
 26 |         int key;
 27 |         int val;
 28 |     };
 29 | 
 30 |     struct segment {
 31 |         entry buckets[bkt_per_seg_num];
 32 |     };
 33 |     struct segment_table_2 {
 34 |         segment seg[seg_num_second_level];
 35 |     };
 36 |     struct segment_table {
 37 |         segment_table_2 seg2[seg_num];
 38 |     };
 39 | 
 40 |     // <segment_table>
 41 |     rcmp::GAddr st;
 42 |     rcmp::PoolContext *pool;
 43 | 
 44 |     void init() {
 45 |         bool f = false;
 46 |         st = pool->AllocPage(div_ceil(sizeof(segment_table), 2ul << 20));
 47 | 
 48 |         pool->Write(st + sizeof(segment_table) - 10, sizeof(f), &f);
 49 | 
 50 |         for (size_t i = 0; i < seg_num; ++i) {
 51 |             for (size_t j = 0; j < seg_num_second_level; ++j) {
 52 |                 for (size_t k = 0; k < bkt_per_seg_num; ++k) {
 53 |                     pool->Write(
 54 |                         st + (rcmp::GAddr) &
 55 |                             (((segment_table *)0)->seg2[i].seg[j].buckets[k].valid),
 56 |                         sizeof(f), &f);
 57 |                 }
 58 |             }
 59 |         }
 60 | 
 61 |         printf("create hashtable success. gaddr: %lu\n", st);
 62 |     }
 63 | 
 64 |     void put(int key, int val) {
 65 |         auto fn = [&](size_t sh, rcmp::GAddr &entry_addr, entry &res) {
 66 |             segment seg;
 67 |             pool->Read(st + (rcmp::GAddr) & (((segment_table *)0)
 68 |                                                   ->seg2[sh % seg_num]
 69 |                                                   .seg[sh % seg_num_second_level]),
 70 |                        sizeof(seg), &seg);
 71 | 
 72 |             size_t i = sh % bkt_per_seg_num;
 73 |             do {
 74 |                 if (seg.buckets[i].valid && seg.buckets[i].key == key) {
 75 |                     res = seg.buckets[i];
 76 |                     entry_addr = st + (rcmp::GAddr) & (((segment_table *)0)
 77 |                                                             ->seg2[sh % seg_num]
 78 |                                                             .seg[sh % seg_num_second_level]
 79 |                                                             .buckets[i]);
 80 |                     res.val = val;
 81 |                     return true;
 82 |                 } else if (!seg.buckets[i].valid) {
 83 |                     res = seg.buckets[i];
 84 |                     entry_addr = st + (rcmp::GAddr) & (((segment_table *)0)
 85 |                                                             ->seg2[sh % seg_num]
 86 |                                                             .seg[sh % seg_num_second_level]
 87 |                                                             .buckets[i]);
 88 |                     res.valid = true;
 89 |                     res.key = key;
 90 |                     res.val = val;
 91 |                     return true;
 92 |                 }
 93 |                 i = (i + 1) % bkt_per_seg_num;
 94 |             } while (i != sh % bkt_per_seg_num);
 95 | 
 96 |             return false;
 97 |         };
 98 | 
 99 |         size_t sh = key;
100 |         size_t sh_end;
101 |         unordered_set<size_t> find_shed;
102 |         rcmp::GAddr entry_addr;
103 |         entry res_buf;
104 |         // 重hash RETRY_CNT次
105 |         for (int retry_hash = RETRY_CNT; retry_hash != 0; --retry_hash) {
106 |             sh = rdd(0, sh);
107 |             if (fn(sh, entry_addr, res_buf)) {
108 |                 goto commit;
109 |             }
110 |             find_shed.insert(sh);
111 |         }
112 | 
113 |         // 线性探测segment
114 |         sh_end = sh;
115 |         for (sh = (sh + 1) % seg_num; sh != sh_end; sh = (sh + 1) % seg_num) {
116 |             if (find_shed.find(sh) != find_shed.end()) {
117 |                 continue;
118 |             }
119 |             if (fn(sh, entry_addr, res_buf)) {
120 |                 goto commit;
121 |             }
122 |         }
123 | 
124 |     commit:
125 |         pool->Write(entry_addr, sizeof(entry), &res_buf);
126 |     }
127 | 
128 |     int get(int key) {
129 |         int ret = -1;
130 | 
131 |         auto fn = [&](size_t sh) {
132 |             segment seg;
133 |             pool->Read(st + (rcmp::GAddr) & (((segment_table *)0)
134 |                                                   ->seg2[sh % seg_num]
135 |                                                   .seg[sh % seg_num_second_level]),
136 |                        sizeof(seg), &seg);
137 | 
138 |             size_t i = sh % bkt_per_seg_num;
139 |             do {
140 |                 if (seg.buckets[i].valid && seg.buckets[i].key == key) {
141 |                     ret = seg.buckets[i].val;
142 |                     return 0;
143 |                 } else if (!seg.buckets[i].valid) {
144 |                     return 1;
145 |                 }
146 |                 i = (i + 1) % bkt_per_seg_num;
147 |             } while (i != sh % bkt_per_seg_num);
148 | 
149 |             return 2;
150 |         };
151 | 
152 |         size_t sh = key;
153 |         size_t sh_end;
154 |         unordered_set<size_t> find_shed;
155 |         // Rehash RETRY_CNT times
156 |         for (int retry_hash = RETRY_CNT; retry_hash != 0; --retry_hash) {
157 |             sh = rdd(0, sh);
158 |             if (fn(sh) != 2) {
159 |                 return ret;
160 |             }
161 |             find_shed.insert(sh);
162 |         }
163 | 
164 |         // Linear detection segment
165 |         sh_end = sh;
166 |         for (sh = (sh + 1) % seg_num; sh != sh_end; sh = (sh + 1) % seg_num) {
167 |             if (find_shed.find(sh) != find_shed.end()) {
168 |                 continue;
169 |             }
170 |             if (fn(sh) != 2) {
171 |                 return ret;
172 |             }
173 |         }
174 | 
175 |         return -1;
176 |     }
177 | };


--------------------------------------------------------------------------------
/test/microbench_core.hpp:
--------------------------------------------------------------------------------
  1 | #include <pthread.h>
  2 | #include <sw/redis++/redis++.h>
  3 | 
  4 | #include <cstdint>
  5 | #include <random>
  6 | #include <string>
  7 | #include <thread>
  8 | 
  9 | #include "config.hpp"
 10 | #include "log.hpp"
 11 | #include "stats.hpp"
 12 | #include "utils.hpp"
 13 | 
 14 | using namespace std;
 15 | using namespace sw::redis;
 16 | 
 17 | inline long rdd(long cli_id, long x) {
 18 |     long m = 0xc6a4a7935bd1e995L;
 19 |     long h = 97 ^ 4;
 20 |     long r = cli_id ^ 47;
 21 | 
 22 |     x *= m;
 23 |     x ^= x >> r;
 24 |     x *= m;
 25 |     h *= m;
 26 |     h ^= x;
 27 | 
 28 |     h ^= h >> 13;
 29 |     h *= m;
 30 |     h ^= h >> 15;
 31 |     return h;
 32 | }
 33 | 
 34 | inline void redis_sync(Redis &redis, string sync_key, int NID, int NODES) {
 35 |     std::string key = sync_key + to_string(NID);
 36 |     std::string value = "ok";
 37 | 
 38 |     redis.set(key, value, 30s);
 39 |     for (int i = 0; i < NODES; ++i) {
 40 |         std::string key = sync_key + to_string(i);
 41 |         auto val = redis.get(key);
 42 |         while (!val) {
 43 |             val = redis.get(key);
 44 |             this_thread::sleep_for(100ms);
 45 |         }
 46 |     }
 47 | 
 48 |     DLOG("%s sync done", sync_key.c_str());
 49 | }
 50 | 
 51 | inline void redis_del_sync_key(Redis &redis, string sync_key, int NID, int NODES) {
 52 |     std::string key = sync_key + to_string(NID);
 53 |     redis.del(key);
 54 | }
 55 | 
 56 | struct PerfStatistics : public Histogram {
 57 |     PerfStatistics() : Histogram(2e7, 0, 2e5) {}
 58 |     PerfStatistics(Histogram &&h) : Histogram(h) {}
 59 | };
 60 | 
 61 | struct MemPoolBase {
 62 |     using GAddr = uintptr_t;
 63 |     constexpr static size_t alloc_unit = page_size;
 64 | 
 65 |     virtual GAddr Alloc(size_t s) = 0;
 66 |     virtual void Write(GAddr gaddr, size_t s, void *buf) = 0;
 67 |     virtual void WriteBatch(GAddr gaddr, size_t s, void *buf) = 0;
 68 |     virtual void Read(GAddr gaddr, size_t s, void *buf) = 0;
 69 |     virtual void DumpStats() = 0;
 70 | };
 71 | 
 72 | struct BenchParam {
 73 |     int NID;                 // node id
 74 |     int NODES;               // node number
 75 |     size_t IT;               // iteration(per thread)
 76 |     int TH;                  // thread count
 77 |     size_t PAYLOAD;          // payload size
 78 |     MemPoolBase::GAddr SA;   // start gaddr
 79 |     size_t RANGE;            // gaddr range [SA, SA+RANGE)
 80 |     float ZIPF;              // zipf α
 81 |     string redis_server_ip;  // redis deamon server ip
 82 |     vector<MemPoolBase *> instances;
 83 | };
 84 | 
 85 | enum TestType : int {
 86 |     RAND = 1,
 87 |     ZIPF = 2,
 88 |     SEQ = 4,
 89 |     WRITE = 8,
 90 |     READ = 16,
 91 | };
 92 | 
 93 | inline void run_sample(const string &testname, const BenchParam &param, int type, Redis &redis) {
 94 |     vector<thread> ths;
 95 |     vector<uint64_t> diff_times(param.TH, 0);
 96 |     vector<PerfStatistics> ps(param.TH);
 97 |     pthread_barrier_t b;
 98 |     pthread_barrier_init(&b, nullptr, param.TH + 1);
 99 |     for (int tid = 0; tid < param.TH; ++tid) {
100 |         ths.emplace_back([&, tid]() {
101 |             MemPoolBase *pool = param.instances[tid];
102 |             vector<uint8_t> raw(param.PAYLOAD);
103 |             vector<size_t> rv(param.IT);
104 | 
105 |             if (type & TestType::RAND) {
106 |                 mt19937_64 eng(tid);
107 |                 for (int i = 0; i < param.IT; ++i) {
108 |                     rv[i] = eng();
109 |                 }
110 |             } else if (type & TestType::ZIPF) {
111 |                 zipf_distribution<> zipf_distr(param.RANGE / param.PAYLOAD, param.ZIPF);
112 |                 mt19937_64 eng(0x9cfa2331b);
113 | 
114 |                 for (int i = 0; i < param.IT; ++i) {
115 |                     mt19937_64 e(zipf_distr(eng));
116 |                     rv[i] = e() * param.PAYLOAD;
117 |                 }
118 |             } else if (type & TestType::SEQ) {
119 |                 mt19937_64 eng(tid);
120 |                 int S = eng();
121 |                 for (int i = 0; i < param.IT; ++i) {
122 |                     rv[i] = (S + i) * param.PAYLOAD;
123 |                 }
124 |             }
125 | 
126 |             for (auto &r : rv) {
127 |                 r %= param.RANGE;
128 |                 r = align_floor(r, param.PAYLOAD);
129 |                 r += param.SA;
130 |             }
131 | 
132 |             pthread_barrier_wait(&b);
133 | 
134 |             uint64_t start_time = getNsTimestamp(), end_time;
135 | 
136 |             uint64_t tv = start_time;
137 |             for (size_t i = 0; i < param.IT; ++i) {
138 |                 if (type & TestType::WRITE) {
139 |                     pool->Write(rv[i], param.PAYLOAD, raw.data());
140 |                 } else if (type & TestType::READ) {
141 |                     pool->Read(rv[i], param.PAYLOAD, raw.data());
142 |                 }
143 |                 uint64_t e = getNsTimestamp();
144 |                 ps[tid].addValue(e - tv);
145 |                 tv = e;
146 |             }
147 | 
148 |             end_time = getNsTimestamp();
149 | 
150 |             long diff = end_time - start_time;
151 | 
152 |             diff_times[tid] = diff;
153 | 
154 |             DLOG("client %d tid %d: %s test done. Use time: %ld us. Avg time: %f us", param.NID,
155 |                  tid, testname.c_str(), diff / 1000, 1.0 * diff / param.IT / 1e3);
156 |         });
157 |     }
158 | 
159 |     // redis sync is slow, init thread test context in waiting time.
160 |     redis_sync(redis, testname, param.NID, param.NODES);
161 | 
162 |     pthread_barrier_wait(&b);
163 |     for (auto &th : ths) {
164 |         th.join();
165 |     }
166 |     pthread_barrier_destroy(&b);
167 | 
168 |     double total_throughput = 0;
169 |     for (uint64_t diff : diff_times) {
170 |         total_throughput += 1.0 * param.IT / diff;
171 |     }
172 |     DLOG("%d clients total %s throughput: %f Mops", param.NID, testname.c_str(),
173 |          total_throughput * 1e3);
174 | 
175 |     PerfStatistics all_ps = ps[0];
176 |     for (int i = 1; i < ps.size(); ++i) {
177 |         PerfStatistics ps_tmp = all_ps.merge(ps[i]);
178 |         all_ps.~PerfStatistics();
179 |         new (&all_ps) PerfStatistics(ps_tmp);
180 |     }
181 | 
182 |     DLOG("%d clients %s latnecy: AVG: %fus, P50: %fus, P99: %fus, P999: %fus, P9999: %fus\n",
183 |          param.NID, testname.c_str(), all_ps.getAverage() / 1e3, all_ps.getPercentile(50) / 1e3,
184 |          all_ps.getPercentile(99) / 1e3, all_ps.getPercentile(99.9) / 1e3,
185 |          all_ps.getPercentile(99.99) / 1e3);
186 | 
187 |     MemPoolBase *pool = param.instances[0];
188 |     pool->DumpStats();
189 | }
190 | 
191 | inline void run_init(BenchParam param) {
192 |     DLOG("start initing ...");
193 |     MemPoolBase *pool = param.instances[0];
194 |     if (param.NID == 0) {
195 |         MemPoolBase::GAddr ga = pool->Alloc(param.RANGE);
196 |         DLOG_EXPR(ga, ==, param.SA);
197 |     }
198 |     DLOG("initing end ...");
199 | }
200 | 
201 | inline void run_bench(BenchParam param) {
202 |     DLOG("start runing ...");
203 | 
204 |     auto redis = Redis("tcp://" + param.redis_server_ip);
205 | 
206 |     redis_sync(redis, "start", param.NID, param.NODES);
207 | 
208 |     redis_del_sync_key(redis, "test end", param.NID, param.NODES);
209 | 
210 |     DLOG("start testing ...");
211 | 
212 |     // run_sample("random write", param, TestType::WRITE | TestType::RAND, redis);
213 |     // run_sample("random read", param, TestType::READ | TestType::RAND, redis);
214 |     run_sample("zipf write", param, TestType::WRITE | TestType::ZIPF, redis);
215 |     run_sample("zipf read", param, TestType::READ | TestType::ZIPF, redis);
216 | 
217 |     DLOG("testing end ...");
218 | 
219 |     redis_del_sync_key(redis, "start", param.NID, param.NODES);
220 | 
221 |     redis_sync(redis, "test end", param.NID, param.NODES);
222 | 
223 |     redis_del_sync_key(redis, "random write", param.NID, param.NODES);
224 |     redis_del_sync_key(redis, "random read", param.NID, param.NODES);
225 |     redis_del_sync_key(redis, "zipf write", param.NID, param.NODES);
226 |     redis_del_sync_key(redis, "zipf read", param.NID, param.NODES);
227 | }
228 | 


--------------------------------------------------------------------------------
/test/mmap_shell.cc:
--------------------------------------------------------------------------------
 1 | #include <fcntl.h>
 2 | #include <sys/mman.h>
 3 | #include <unistd.h>
 4 | 
 5 | #include <iostream>
 6 | 
 7 | #include "cmdline.h"
 8 | #include "log.hpp"
 9 | 
10 | using namespace std;
11 | 
12 | int main(int argc, char **argv) {
13 |     cmdline::parser cmd;
14 |     cmd.add<std::string>("file");
15 |     cmd.add<size_t>("size");
16 |     bool ret = cmd.parse(argc, argv);
17 |     if (!ret) {
18 |         DLOG_FATAL("%s", cmd.error_full().c_str());
19 |     }
20 | 
21 |     string file = cmd.get<string>("file");
22 |     size_t size = cmd.get<size_t>("size");
23 | 
24 |     int fd = open(file.c_str(), O_RDWR | O_CREAT | O_SYNC, 0666);
25 | 
26 |     if (fd == -1) {
27 |         DLOG_FATAL("");
28 |     }
29 | 
30 |     char *addr = (char *)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
31 | 
32 |     if (addr == nullptr) {
33 |         DLOG_FATAL("");
34 |     }
35 | 
36 |     if (addr == (void*)-1) {
37 |         DLOG_FATAL("");
38 |     }
39 | 
40 |     DLOG("mmap file: %p", addr);
41 | 
42 |     while (true) {
43 |         cout << "$ ";
44 |         string command;
45 |         cin >> command;
46 |         if (command == "q") {
47 |             break;
48 |         }
49 |         if (command == "r") {
50 |             int off;
51 |             int size;
52 |             cin >> off >> size;
53 |             string str(addr + off, size);
54 |             cout << str << endl;
55 |         } else if (command == "w") {
56 |             int off;
57 |             string str;
58 |             cin >> off;
59 |             getline(cin, str);
60 |             str = str.substr(1);
61 |             memcpy(addr + off, str.c_str(), str.size());
62 |         } else if (command == "s") {
63 |             msync(addr, size, MS_SYNC);
64 |         } else if (command == "h" || command == "?") {
65 |             cout << "q\t\t\tQuit" << endl;
66 |             cout << "r <off> <size>\t\t\tRead size bytes from off offset" << endl;
67 |             cout << "w <off> <string>\t\t\tWrite string to off offset" << endl;
68 |             cout << "s\t\t\tSave" << endl;
69 |             cout << "h,?\t\t\tHelp" << endl;
70 |         }
71 |     }
72 | 
73 |     munmap(addr, size);
74 |     close(fd);
75 | 
76 |     return 0;
77 | }
78 | 


--------------------------------------------------------------------------------
/test/rw.cc:
--------------------------------------------------------------------------------
 1 | #include "cmdline.h"
 2 | #include "log.hpp"
 3 | #include "microbench_core.hpp"
 4 | #include "options.hpp"
 5 | #include "rcmp.hpp"
 6 | #include "stats.hpp"
 7 | #include "utils.hpp"
 8 | 
 9 | using namespace std;
10 | 
11 | struct RCMPMemPool : public MemPoolBase {
12 |     virtual GAddr Alloc(size_t s) override { return ref->AllocPage(s / alloc_unit); }
13 |     virtual void Write(GAddr gaddr, size_t s, void *buf) override { ref->Write(gaddr, s, buf); }
14 |     virtual void WriteBatch(GAddr gaddr, size_t s, void *buf) override {
15 |         // ref->WriteBatch(gaddr, s, buf);
16 |     }
17 |     virtual void Read(GAddr gaddr, size_t s, void *buf) override { ref->Read(gaddr, s, buf); }
18 | 
19 |     virtual void DumpStats() override {
20 |         ref->__DumpStats();
21 |         ref->__ClearStats();
22 |     }
23 | 
24 |     rcmp::PoolContext *ref;
25 | };
26 | 
27 | int main(int argc, char *argv[]) {
28 |     cmdline::parser cmd;
29 |     cmd.add<std::string>("client_ip");
30 |     cmd.add<uint16_t>("client_port");
31 |     cmd.add<uint32_t>("rack_id");
32 |     cmd.add<std::string>("cxl_devdax_path");
33 |     cmd.add<size_t>("cxl_memory_size");
34 |     cmd.add<size_t>("iteration");
35 |     cmd.add<size_t>("payload_size");
36 |     cmd.add<uint64_t>("addr_range");
37 |     cmd.add<int>("thread");
38 |     cmd.add<int>("thread_all", 0, "", false, 0);
39 |     cmd.add<int>("node_id");
40 |     cmd.add<int>("no_node");
41 |     cmd.add<std::string>("redis_server_ip");
42 |     bool ret = cmd.parse(argc, argv);
43 |     if (!ret) {
44 |         DLOG_FATAL("%s", cmd.error_full().c_str());
45 |     }
46 | 
47 |     rcmp::ClientOptions options;
48 |     options.client_ip = cmd.get<std::string>("client_ip");
49 |     options.client_port = cmd.get<uint16_t>("client_port");
50 |     options.cxl_devdax_path = cmd.get<std::string>("cxl_devdax_path");
51 |     options.cxl_memory_size = cmd.get<size_t>("cxl_memory_size");
52 |     options.rack_id = cmd.get<uint32_t>("rack_id");
53 |     options.with_cxl = true;
54 | 
55 |     // pool.ref->__NotifyPerf();
56 | 
57 |     int thread = cmd.get<int>("thread");
58 |     RCMPMemPool instance;
59 |     vector<MemPoolBase *> instances;
60 |     rcmp::ClientOptions op = options;
61 |     instance.ref = rcmp::Open(op);
62 |     for (int i = 0; i < cmd.get<int>("thread"); ++i) {
63 |         instances.push_back(&instance);
64 |     }
65 | 
66 |     BenchParam param = {
67 |         .NID = cmd.get<int>("node_id"),
68 |         .NODES = cmd.get<int>("no_node"),
69 |         .IT = cmd.get<size_t>("iteration"),
70 |         // .PAYLOAD = cmd.get<size_t>("payload_size"),
71 |         .SA = page_size * 1,
72 |         .RANGE = cmd.get<uint64_t>("addr_range"),
73 |         .ZIPF = 0.99,
74 |         .redis_server_ip = cmd.get<string>("redis_server_ip"),
75 |         .instances = instances,
76 |     };
77 | 
78 |     run_init(param);
79 | 
80 |     for (size_t payload = cmd.get<size_t>("payload_size"); payload <= 4096; payload *= 2) {
81 |         DLOG("payload: %lu", payload);
82 |         for (int th = (cmd.get<int>("thread_all") != 0) ? 1 : thread; th <= thread; th *= 2) {
83 |             param.TH = th;
84 |             param.PAYLOAD = payload;
85 |             run_bench(param);
86 |         }
87 |     }
88 | 
89 |     rcmp::Close(instance.ref);
90 | 
91 |     // pool.ref->__StopPerf();
92 | 
93 |     return 0;
94 | }


--------------------------------------------------------------------------------
/third_party/eRPC/erpc.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <memory>
  4 | #include <string>
  5 | 
  6 | namespace erpc {
  7 | class Nexus;
  8 | class ReqHandle;
  9 | class IBRpc;
 10 | class MsgBuffer;
 11 | 
 12 | using erpc_req_func_t = void (*)(ReqHandle *req_handle, void *context);
 13 | using erpc_cont_func_t = void (*)(void *context, void *tag);
 14 | 
 15 | enum erpc_req_func_type_t : uint8_t { kForeground, kBackground };
 16 | 
 17 | class SMHandlerWrap {
 18 |     friend class IBRpcWrap;
 19 | 
 20 |    public:
 21 |     void set_null();
 22 |     void set_empty();
 23 | 
 24 |    private:
 25 |     void *raw_sm_handler_;
 26 | };
 27 | 
 28 | class NexusWrap {
 29 |     friend class IBRpcWrap;
 30 | 
 31 |    public:
 32 |     NexusWrap(std::string uri);
 33 |     NexusWrap(NexusWrap &&nexus);
 34 |     ~NexusWrap();
 35 | 
 36 |     void register_req_func(uint8_t type, erpc_req_func_t req_func,
 37 |                            erpc_req_func_type_t req_func_type = erpc_req_func_type_t::kForeground);
 38 | 
 39 |    private:
 40 |     std::unique_ptr<Nexus> nexus_;
 41 | };
 42 | 
 43 | class MsgBufferWrap {
 44 |     friend class IBRpcWrap;
 45 |     friend class ReqHandleWrap;
 46 | 
 47 |    public:
 48 |     MsgBufferWrap(MsgBuffer *buffer);
 49 |     ~MsgBufferWrap();
 50 | 
 51 |     void *get_buf() const;
 52 |     void set(MsgBufferWrap msgbuf);
 53 | 
 54 |    private:
 55 |     MsgBuffer *buffer_;
 56 | };
 57 | 
 58 | class ReqHandleWrap {
 59 |     friend class IBRpcWrap;
 60 | 
 61 |    public:
 62 |     ReqHandleWrap(ReqHandle *req_handle);
 63 |     ~ReqHandleWrap();
 64 | 
 65 |     uint8_t get_server_rpc_id() const;
 66 |     int get_server_session_num() const;
 67 | 
 68 |     const MsgBufferWrap get_req_msgbuf() const;
 69 |     MsgBufferWrap get_pre_resp_msgbuf() const;
 70 |     MsgBufferWrap get_dyn_resp_msgbuf() const;
 71 | 
 72 |    private:
 73 |     ReqHandle *req_hdl_;
 74 | };
 75 | 
 76 | class IBRpcWrap {
 77 |    public:
 78 |     static size_t kMaxDataPerPkt;
 79 | 
 80 |     IBRpcWrap(NexusWrap *nexus, void *context, uint8_t rpc_id, SMHandlerWrap sm_handler,
 81 |               uint8_t phy_port = 0);
 82 |     IBRpcWrap(IBRpcWrap &&rpc);
 83 |     ~IBRpcWrap();
 84 | 
 85 |     int create_session(std::string remote_uri, uint8_t rem_rpc_id);
 86 |     void run_event_loop_once();
 87 |     MsgBufferWrap alloc_msg_buffer_or_die(size_t max_data_size);
 88 |     void enqueue_request(int session_num, uint8_t req_type, MsgBufferWrap req_msgbuf,
 89 |                          MsgBufferWrap resp_msgbuf, erpc_cont_func_t cont_func, void *tag,
 90 |                          size_t cont_etid = 8);
 91 |     void enqueue_response(ReqHandleWrap req_handle, MsgBufferWrap resp_msgbuf);
 92 |     void resize_msg_buffer(MsgBufferWrap msg_buffer, size_t new_data_size);
 93 |     void free_msg_buffer(MsgBufferWrap msg_buffer);
 94 | 
 95 |    private:
 96 |     std::unique_ptr<IBRpc> rpc_;
 97 | };
 98 | 
 99 | inline std::string concat_server_uri(std::string ip, uint16_t port) {
100 |     return ip + ':' + std::to_string(port);
101 | }
102 | 
103 | }  // namespace erpc


--------------------------------------------------------------------------------
/third_party/eRPC/liberpc.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PDS-Lab/Rcmp/d1a67b843060567b0e8c3a601fbfc7d0754fa2d5/third_party/eRPC/liberpc.a


--------------------------------------------------------------------------------