├── .gitignore ├── CMakeLists.txt ├── app └── HashTable.h ├── include └── libibverbscpp │ ├── .clang-format │ ├── CMakeLists.txt │ ├── README.md │ ├── libibverbscpp.cpp │ └── libibverbscpp.h ├── main.cpp ├── rdma ├── CompletionQueuePair.cpp ├── CompletionQueuePair.hpp ├── MemoryRegion.h ├── Network.cpp ├── Network.hpp ├── NetworkException.cpp ├── NetworkException.h ├── QueuePair.cpp ├── QueuePair.hpp ├── RcQueuePair.cpp ├── RcQueuePair.h ├── UcQueuePair.cpp ├── UcQueuePair.h ├── UdQueuePair.cpp └── UdQueuePair.h ├── src ├── Cache.cpp ├── Cache.h ├── Connection.cpp ├── Connection.h ├── Node.cpp ├── Node.h ├── Receive.cpp ├── Send.cpp └── api.cpp └── util ├── GlobalAddressHash.h ├── NonCopyable.h ├── RDMANetworking.cpp ├── RDMANetworking.h ├── defs.cpp ├── defs.h └── socket ├── Socket.cpp ├── Socket.h ├── domain.cpp ├── domain.h ├── tcp.cpp └── tcp.h /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .DS_Store 3 | cmake-build-debug/ 4 | cmake-build-debug-remote-host/ 5 | build/ -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(medmm) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(WARNINGS "-Wall -Wextra -Wnon-virtual-dtor -Wduplicated-cond -Wduplicated-branches -Wlogical-op -Wrestrict") 6 | set(WARNINGS "${WARNINGS} -Wnull-dereference -Wold-style-cast -Wuseless-cast -Wdouble-promotion") 7 | set(WARNINGS "${WARNINGS} -Wformat=2 -Wshadow=local -Wodr") 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic ${WARNINGS}") 9 | 10 | file(GLOB COMMON_SOURCES 11 | app/*.cpp 12 | rdma/*.cpp 13 | src/*.cpp 14 | util/*.cpp 15 | util/socket/*.cpp 16 | ) 17 | file(GLOB COMMON_HEADERS 18 | app/*.h 19 | rdma/*.h 20 | rdma/*.hpp 21 | src/*.h 22 | util/*.h 23 | util/socket/*.h 24 | ) 25 | 26 | SET(LINK_LIBRARIES 27 | ibverbs 28 | 29 | ) 30 | 31 | include_directories(include/libibverbscpp) 32 | 33 | add_library(medmm STATIC ${COMMON_SOURCES} ${COMMON_HEADERS}) 34 | 35 | add_executable(main main.cpp) 36 | target_link_libraries(main medmm) 37 | target_link_libraries(medmm stdc++fs) 38 | 39 | target_link_libraries(medmm ${LINK_LIBRARIES}) 40 | -------------------------------------------------------------------------------- /app/HashTable.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-08-01. 3 | // 4 | 5 | #ifndef MEDMM_HASHTABLE_H 6 | #define MEDMM_HASHTABLE_H 7 | 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "../src/Node.h" 14 | #include 15 | #include 16 | 17 | 18 | const uint32_t amountNodes = 3; 19 | const uint16_t ns[] = {2000, 3000, 4000}; 20 | 21 | template 22 | class HashTable { 23 | private: 24 | Node *node; 25 | 26 | static constexpr uint32_t hash(uint32_t key) { 27 | key = (key ^ 61) ^ (key >> 16); 28 | key = key + (key << 3); 29 | key = key ^ (key >> 4); 30 | key = key * 0x27d4eb2d; 31 | key = key ^ (key >> 15); 32 | return key; 33 | } 34 | 35 | struct Elem { 36 | uint32_t key; 37 | defs::GlobalAddress gaddr; 38 | Elem *next; 39 | }; 40 | 41 | 42 | std::size_t amountElements = 0; 43 | 44 | 45 | std::vector storage; 46 | 47 | uint32_t hashBucket(uint32_t key) const { return hash(key) % storage.size(); } 48 | 49 | public: 50 | /** 51 | * Constructor 52 | */ 53 | 54 | explicit HashTable(Node *n) { 55 | node = n; 56 | storage.resize(64); 57 | // buckets = new std::array; 58 | // uint32_t lb = 0; 59 | // uint32_t ub = 0; 60 | // for (int i = 0; i < amountNodes; ++i) { 61 | // if(i == amountNodes-1){ 62 | // ub = std::numeric_limits::max(); 63 | // } else { 64 | // ub = lb + std::numeric_limits::max()/ amountNodes; 65 | // } 66 | // buckets[i] = HashBucket(ns[i], lb, ub); 67 | // lb = ub; 68 | // std::cout << buckets[i].getId() << ", " << buckets[i].getLb() << ", " << buckets[i].getUb() << std::endl; 69 | // } 70 | } 71 | 72 | 73 | /** 74 | * Destructor 75 | */ 76 | ~HashTable() { clear(); } 77 | 78 | /** 79 | * insert value to HT if not exists 80 | * 81 | * used for non-overwriting inserts 82 | * 83 | * @param key 84 | * @param value 85 | */ 86 | void insert(uint32_t key, V value) { 87 | uint32_t b = hashBucket(key); 88 | auto gadd = node->Malloc(sizeof(V)); 89 | storage[b] = new Elem({key, gadd, storage[b]}); 90 | ++amountElements; 91 | auto data = new defs::Data(sizeof(V), static_cast(value), gadd); 92 | node->write(data); 93 | } 94 | 95 | 96 | /** 97 | * remove element from HT 98 | * 99 | * used for remove 100 | * 101 | * @param key 102 | */ 103 | 104 | void erase(uint32_t key) { 105 | uint32_t b = hashBucket(key); 106 | Elem *bucket = storage[b]; 107 | Elem *oldBucket = nullptr; 108 | while (bucket != nullptr) { 109 | if (bucket->key == key) { 110 | if (oldBucket) { 111 | oldBucket->next = bucket->next; 112 | } else { 113 | storage[b] = bucket->next; 114 | } 115 | node->Free(bucket->gaddr); 116 | delete (bucket); 117 | --amountElements; 118 | return; 119 | } 120 | oldBucket = bucket; 121 | bucket = bucket->next; 122 | } 123 | } 124 | 125 | /** 126 | * get element from HT wrapped in optional 127 | * 128 | * used for const lookup 129 | * 130 | * @param key 131 | * @return optional containing the element or nothing if not exists 132 | */ 133 | std::optional get(uint32_t key) { 134 | uint32_t b = hashBucket(key); 135 | Elem *bucket = storage[b]; 136 | while (bucket != nullptr) { 137 | if (bucket->key == key) { 138 | auto value = node->read(bucket->gaddr); 139 | auto result = reinterpret_cast(&value); 140 | return *result; 141 | } 142 | bucket = bucket->next; 143 | } 144 | return std::nullopt; 145 | } 146 | 147 | 148 | /** 149 | * get reference to existing HT element or insert new at key 150 | * 151 | * used for lookups, inserts, and editing the HT elements 152 | * 153 | * @param key 154 | * @return reference to HT element 155 | */ 156 | V &operator[](uint32_t key) { 157 | uint32_t b = hashBucket(key); 158 | Elem *bucket = storage[b]; 159 | while (bucket != nullptr) { 160 | if (bucket->key == key) { 161 | auto addr = bucket->gaddr; 162 | uint64_t value = node->read(addr); 163 | auto result = reinterpret_cast(&value); 164 | return *result; 165 | } 166 | bucket = bucket->next; 167 | } 168 | auto gadd = node->Malloc(sizeof(V)); 169 | storage[b] = new Elem({key, gadd, storage[b]}); 170 | ++amountElements; 171 | uint64_t value = node->read(gadd); 172 | auto result = reinterpret_cast(&value); 173 | return *result; 174 | 175 | } 176 | 177 | 178 | /** 179 | * get count of contained elements 180 | * 181 | * used for containment check 182 | * 183 | * @param key 184 | * @return 0 if not contained, 1 if contained 185 | */ 186 | uint64_t count(uint32_t key) { 187 | return get(key).has_value(); 188 | } 189 | 190 | /** 191 | * get number of stored element 192 | * 193 | * @return HT size 194 | */ 195 | std::size_t size() const { 196 | return amountElements; 197 | } 198 | 199 | /** 200 | * is HT empty 201 | * 202 | * @return true if HT empty 203 | */ 204 | bool empty() const { 205 | 206 | return !size(); 207 | } 208 | 209 | /** 210 | * empty the HT 211 | */ 212 | void clear() { 213 | 214 | for (auto &b: storage) { 215 | while (b != nullptr) { 216 | Elem *oldBucket = b; 217 | b = oldBucket->next; 218 | node->Free(oldBucket->gaddr); 219 | } 220 | } 221 | amountElements = 0; 222 | } 223 | 224 | }; 225 | 226 | 227 | #endif //MEDMM_HASHTABLE_H 228 | -------------------------------------------------------------------------------- /include/libibverbscpp/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | AccessModifierOffset: 0 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveAssignments: false 6 | AlignConsecutiveDeclarations: false 7 | AlignEscapedNewlines: Left 8 | AlignOperands: false 9 | AlignTrailingComments: false 10 | AllowAllParametersOfDeclarationOnNextLine: true 11 | AllowShortBlocksOnASingleLine: true 12 | AllowShortCaseLabelsOnASingleLine: true 13 | AllowShortFunctionsOnASingleLine: All 14 | AllowShortIfStatementsOnASingleLine: true 15 | AllowShortLoopsOnASingleLine: true 16 | AlwaysBreakAfterDefinitionReturnType: None 17 | AlwaysBreakAfterReturnType: None 18 | AlwaysBreakBeforeMultilineStrings: false 19 | AlwaysBreakTemplateDeclarations: MultiLine 20 | BinPackArguments: true 21 | BinPackParameters: true 22 | BraceWrapping: 23 | AfterClass: false 24 | AfterControlStatement: false 25 | AfterEnum: false 26 | AfterFunction: false 27 | AfterNamespace: false 28 | AfterObjCDeclaration: false 29 | AfterStruct: false 30 | AfterUnion: false 31 | AfterExternBlock: false 32 | BeforeCatch: false 33 | BeforeElse: false 34 | IndentBraces: false 35 | SplitEmptyFunction: true 36 | SplitEmptyRecord: true 37 | SplitEmptyNamespace: true 38 | BreakBeforeBinaryOperators: None 39 | BreakBeforeBraces: Attach 40 | BreakBeforeInheritanceComma: false 41 | BreakInheritanceList: BeforeColon 42 | BreakBeforeTernaryOperators: false 43 | BreakConstructorInitializersBeforeComma: false 44 | BreakConstructorInitializers: BeforeColon 45 | BreakAfterJavaFieldAnnotations: false 46 | BreakStringLiterals: true 47 | ColumnLimit: 0 48 | CommentPragmas: '(LCOV|unreachable)' 49 | CompactNamespaces: true 50 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 51 | ConstructorInitializerIndentWidth: 4 52 | ContinuationIndentWidth: 4 53 | Cpp11BracedListStyle: true 54 | DerivePointerAlignment: false 55 | DisableFormat: false 56 | ExperimentalAutoDetectBinPacking: true 57 | IncludeBlocks: Preserve 58 | IndentCaseLabels: true 59 | IndentPPDirectives: None 60 | IndentWidth: 4 61 | IndentWrappedFunctionNames: false 62 | KeepEmptyLinesAtTheStartOfBlocks: false 63 | MacroBlockBegin: PROXY_BEGIN 64 | MacroBlockEnd: PROXY_END 65 | MaxEmptyLinesToKeep: 1 66 | NamespaceIndentation: None 67 | PenaltyBreakAssignment: 2 68 | PenaltyBreakBeforeFirstCallParameter: 50 69 | PenaltyBreakComment: 50 70 | PenaltyBreakFirstLessLess: 50 71 | PenaltyBreakString: 50 72 | PenaltyBreakTemplateDeclaration: 10 73 | PenaltyExcessCharacter: 1000000 74 | PenaltyReturnTypeOnItsOwnLine: 50 75 | PointerAlignment: Right 76 | ReflowComments: false 77 | SortIncludes: true 78 | SortUsingDeclarations: true 79 | Standard: Cpp11 80 | UseTab: Never 81 | ... 82 | 83 | -------------------------------------------------------------------------------- /include/libibverbscpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | project(libibverbscpp) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | #set(CMAKE_CXX_COMPILER clang++) 6 | #set(WARNINGS "-Weverything -Wno-c++98-compat -Wno-documentation-unknown-command") 7 | set(WARNINGS "-Wall -Wextra -Wnon-virtual-dtor -Wduplicated-cond -Wduplicated-branches -Wlogical-op -Wrestrict") 8 | set(WARNINGS "${WARNINGS} -Wnull-dereference -Wold-style-cast -Wuseless-cast -Wdouble-promotion") 9 | set(WARNINGS "${WARNINGS} -Wformat=2 -Wshadow=local -Wodr") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic ${WARNINGS}") 11 | 12 | add_library(libibverbscpp libibverbscpp.cpp libibverbscpp.h) -------------------------------------------------------------------------------- /include/libibverbscpp/README.md: -------------------------------------------------------------------------------- 1 | # libibverbscpp - Modern C++ bindings for libibverbs 2 | 3 | > `libibverbs` is a library that allows userspace processes to use InfiniBand/RDMA "verbs" directly. 4 | 5 | However, libibverbs only has C bindings, with little type-safety and a bug-prone manual deallocation mechanism. 6 | 7 | As `libibverbs` already uses some object-oriented approaches, the C++ wrapper can provide stronger types and RAII mechanisms 8 | for resource management. 9 | 10 | ### Building 11 | You'll probably need a reasonably modern compiler for this with basic C++14 support. 12 | 13 | libibverbscpp is currently header-only. Adding it to the include path and linking libibverbs should be sufficient. 14 | 15 | ```cmake 16 | project(foo) 17 | include_directories(libibverbscpp) 18 | target_link_libraries(foo libibverbs) 19 | ``` 20 | 21 | ### Examples 22 | ```C++ 23 | std::byte msg[8]; 24 | auto list = ibv::device::DeviceList(); 25 | auto ctx = list[0]->open(); 26 | auto pd = ctx->allocProtectionDomain(); 27 | auto mr = pd->registerMemoryRegion(&msg, 8, {/* no remote access */}); 28 | auto qpAttr = ibv::queuepair::InitAttributes(); 29 | // TODO: properly set up and connect QueuePair 30 | auto qp = pd->createQueuePair(qpAttr); 31 | auto wr = ibv::workrequest::Simple(); 32 | // TODO: properly set up remote address 33 | wr.setRemoteAddress(ibv::memoryregion::RemoteAddress()); 34 | wr.setLocalAddress(mr->getSlice()); 35 | ibv::workrequest::SendWr *bad; 36 | qp->postSend(wr, bad); 37 | // no explicit teardown needed 38 | ``` 39 | 40 | ### Resource management 41 | All allocations return a `std::unique_ptr`, which automatically handles exception-safe teardown. In error cases, an 42 | exception is thrown, similarly to how `opertor new()` handles failing cases. 43 | Since libibverbs deallocation can potentially fail (e.g. wrong deallocation order), this is treated as fatal and the 44 | error printed to `stderr`. However future releases might call `std::terminate` right away. 45 | 46 | ### License 47 | This project is licensed under the same terms as [libibverbs](https://github.com/linux-rdma/rdma-core), i.e. dually 48 | licensed under BSD/MIT and GPLv2 49 | -------------------------------------------------------------------------------- /include/libibverbscpp/libibverbscpp.cpp: -------------------------------------------------------------------------------- 1 | #include "libibverbscpp.h" 2 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "rdma/Network.hpp" 3 | #include "src/Node.h" 4 | #include "app/HashTable.h" 5 | 6 | 7 | int main() { 8 | auto node = Node(); 9 | 10 | 11 | std::cout << "Server or Client? (0 = server, 1 = client): "; 12 | uint16_t servOcli; // 0 = server, 1 = client 13 | std::cin >> servOcli; 14 | 15 | if (servOcli == 0) { 16 | node.setID(3000); 17 | while(true) { 18 | node.connectAndReceive(node.getID()); 19 | } 20 | } else if (servOcli == 1) { 21 | node.setID(2000); 22 | HashTable h = HashTable(&node); 23 | 24 | h.insert(4,false); 25 | std::cout << "bool should be 0: " << h[4] << std::endl; 26 | 27 | std::cout << "bool should be 0: " << h.get(4).value_or("nope") << std::endl; 28 | std::cout << "size should be 1: " << h.size() << std::endl; 29 | h.insert(5,true); 30 | h.insert(6,false); 31 | std::cout << "size should be 3: " << h.size() << std::endl; 32 | 33 | std::cout << "bool should be 1: " << h[5] << std::endl; 34 | std::cout << "bool should be 0: " << h[6] << std::endl; 35 | h.erase(5); 36 | std::cout << "size should be 2: " << h.size() << std::endl; 37 | 38 | std::cout << "count should be 0: " << h.count(5) << std::endl; 39 | std::cout << "bool should be not existent: " << h.get(5).has_value() << std::endl; 40 | 41 | 42 | 43 | auto conn = node.connectClientSocket(3000); 44 | uint64_t d = reinterpret_cast("Servus"); // need to cast data to uint64_t 45 | size_t size = sizeof(d); 46 | std::cout << "Trying to Malloc" << std::endl; 47 | auto firstgaddr = defs::GlobalAddress(size, nullptr ,0); 48 | auto recv = node.sendAddress(firstgaddr.sendable(node.getID()), defs::IMMDATA::MALLOC, conn); 49 | node.closeClientSocket(conn); 50 | auto test = reinterpret_cast(recv); 51 | std::cout << "Got GAddr: " << test->id << ", " << test->size <<", " << test->ptr << std::endl; 52 | auto data = defs::Data(sizeof(uint64_t), d, *test); 53 | std::cout << "Trying to Write, data: " << d << std::endl; 54 | node.write(&data); 55 | std::cout << "Done. Trying to Read Written Data" << std::endl; 56 | auto result = node.read(*test); 57 | std::cout << "Done. Result: "; 58 | std::cout << reinterpret_cast(result) << ", and now reading from cache"<(result1) << ", and now changing to 1337"< 3 | #include 4 | #include "NetworkException.h" 5 | 6 | using namespace std; 7 | namespace rdma { 8 | CompletionQueuePair::CompletionQueuePair(ibv::context::Context &ctx) : 9 | channel(ctx.createCompletionEventChannel()), // Create event channel 10 | // Create completion queues 11 | sendQueue(ctx.createCompletionQueue(CQ_SIZE, contextPtr, *channel, completionVector)), 12 | receiveQueue(ctx.createCompletionQueue(CQ_SIZE, contextPtr, *channel, completionVector)) { 13 | 14 | // Request notifications 15 | sendQueue->requestNotify(false); 16 | receiveQueue->requestNotify(false); 17 | } 18 | 19 | CompletionQueuePair::~CompletionQueuePair() { 20 | for (auto event : eventsToAck) { 21 | event->ackEvents(1); 22 | } 23 | } 24 | 25 | /// Poll a completion queue 26 | uint64_t CompletionQueuePair::pollCompletionQueue(ibv::completions::CompletionQueue &completionQueue, 27 | ibv::workcompletion::Opcode type) { 28 | // Poll for a work completion 29 | ibv::workcompletion::WorkCompletion completion; 30 | if (completionQueue.poll(1, &completion) == 0) { 31 | return numeric_limits::max(); 32 | } 33 | 34 | // Check status and opcode 35 | if (not completion) { 36 | throw NetworkException("unexpected completion status: " + to_string(completion.getStatus())); 37 | } 38 | if (completion.getOpcode() != type) { 39 | throw NetworkException("unexpected completion opcode: " + to_string(completion.getOpcode())); 40 | } 41 | return completion.getId(); 42 | } 43 | 44 | /// Poll the send completion queue 45 | uint64_t CompletionQueuePair::pollSendCompletionQueue() { 46 | // Poll for a work completion 47 | ibv::workcompletion::WorkCompletion completion; 48 | if (sendQueue->poll(1, &completion) == 0) { 49 | return numeric_limits::max(); 50 | } 51 | 52 | // Check status and opcode 53 | if (not completion) { 54 | throw NetworkException("unexpected completion status: " + to_string(completion.getStatus())); 55 | } 56 | return completion.getId(); 57 | } 58 | 59 | uint64_t CompletionQueuePair::pollSendCompletionQueue(ibv::workcompletion::Opcode type) { 60 | return pollCompletionQueue(*sendQueue, type); 61 | } 62 | 63 | /// Poll the receive completion queue 64 | uint64_t CompletionQueuePair::pollRecvCompletionQueue() { 65 | return pollCompletionQueue(*receiveQueue, ibv::workcompletion::Opcode::RECV); 66 | } 67 | 68 | /// Poll a completion queue blocking 69 | uint64_t 70 | CompletionQueuePair::pollCompletionQueueBlocking(ibv::completions::CompletionQueue &completionQueue, 71 | ibv::workcompletion::Opcode type) { 72 | // Poll for a work completion 73 | ibv::workcompletion::WorkCompletion completion; 74 | while (completionQueue.poll(1, &completion) == 0); // busy poll 75 | 76 | // Check status and opcode 77 | if (not completion) { 78 | throw NetworkException("unexpected completion status: " + to_string(completion.getStatus())); 79 | } 80 | if (completion.getOpcode() != type) { 81 | throw NetworkException("unexpected completion opcode: " + to_string(completion.getOpcode())); 82 | } 83 | return completion.getId(); 84 | } 85 | 86 | /// Poll the send completion queue blocking 87 | uint64_t CompletionQueuePair::pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode opcode) { 88 | return pollCompletionQueueBlocking(*sendQueue, opcode); 89 | } 90 | 91 | /// Poll the receive completion queue blocking 92 | uint64_t CompletionQueuePair::pollRecvCompletionQueueBlocking(ibv::workcompletion::Opcode opcode) { 93 | return pollCompletionQueueBlocking(*receiveQueue, opcode); 94 | } 95 | 96 | /// Wait for a work completion 97 | void CompletionQueuePair::waitForCompletion() { 98 | // Wait for completion queue event 99 | auto[event, ctx] = channel->getEvent(); 100 | std::ignore = ctx; 101 | 102 | eventsToAck.push_back(event); 103 | 104 | // Request a completion queue event 105 | event->requestNotify(false); 106 | 107 | // Poll all work completions 108 | ibv::workcompletion::WorkCompletion completion; 109 | for (;;) { 110 | auto numPolled = event->poll(1, &completion); 111 | 112 | if (numPolled == 0) { 113 | break; 114 | } 115 | if (not completion.isSuccessful()) { 116 | throw NetworkException("unexpected completion status: " + to_string(completion.getStatus())); 117 | } 118 | }; 119 | } 120 | 121 | ibv::completions::CompletionQueue &CompletionQueuePair::getSendQueue() { 122 | return *sendQueue; 123 | } 124 | 125 | ibv::completions::CompletionQueue &CompletionQueuePair::getReceiveQueue() { 126 | return *receiveQueue; 127 | } 128 | 129 | static ibv::workcompletion::WorkCompletion pollQueueBlocking(ibv::completions::CompletionQueue &queue) { 130 | ibv::workcompletion::WorkCompletion completion; 131 | while (queue.poll(1, &completion) == 0); // busy poll 132 | if (not completion) { 133 | throw NetworkException("unexpected completion status: " + to_string(completion.getStatus())); 134 | } 135 | return completion; 136 | } 137 | 138 | ibv::workcompletion::WorkCompletion CompletionQueuePair::pollSendWorkCompletionBlocking() { 139 | return pollQueueBlocking(*sendQueue); 140 | } 141 | 142 | ibv::workcompletion::WorkCompletion CompletionQueuePair::pollRecvWorkCompletionBlocking() { 143 | return pollQueueBlocking(*receiveQueue); 144 | } 145 | } // End of namespace rdma 146 | -------------------------------------------------------------------------------- /rdma/CompletionQueuePair.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace rdma { 9 | class CompletionQueuePair { 10 | static constexpr void *contextPtr = nullptr; 11 | static constexpr int completionVector = 0; 12 | /// The minimal number of entries for the completion queue 13 | static constexpr int CQ_SIZE = 100; 14 | 15 | /// The completion channel 16 | std::unique_ptr channel; 17 | /// The send completion queue 18 | std::unique_ptr sendQueue; 19 | /// The receive completion queue 20 | std::unique_ptr receiveQueue; 21 | 22 | /// The cached work completions 23 | std::vector> cachedCompletions; 24 | /// Protect wait for events method from concurrent access 25 | std::mutex guard; 26 | 27 | uint64_t 28 | pollCompletionQueue(ibv::completions::CompletionQueue &completionQueue, ibv::workcompletion::Opcode type); 29 | 30 | std::vector eventsToAck; 31 | 32 | public: 33 | explicit CompletionQueuePair(ibv::context::Context &ctx); 34 | 35 | ~CompletionQueuePair(); 36 | 37 | ibv::completions::CompletionQueue &getSendQueue(); 38 | 39 | ibv::completions::CompletionQueue &getReceiveQueue(); 40 | 41 | /// Poll the send completion queue 42 | uint64_t pollSendCompletionQueue(); 43 | 44 | /// Poll the send completion queue with a user defined type 45 | uint64_t pollSendCompletionQueue(ibv::workcompletion::Opcode type); 46 | 47 | /// Poll the receive completion queue 48 | uint64_t pollRecvCompletionQueue(); 49 | 50 | // Poll a completion queue blocking 51 | uint64_t pollCompletionQueueBlocking(ibv::completions::CompletionQueue &completionQueue, 52 | ibv::workcompletion::Opcode type); 53 | 54 | /// Poll the send completion queue blocking 55 | uint64_t 56 | pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode opcode = ibv::workcompletion::Opcode::RDMA_READ); 57 | 58 | /// Poll the receive completion queue blocking 59 | uint64_t pollRecvCompletionQueueBlocking(ibv::workcompletion::Opcode opcode = ibv::workcompletion::Opcode::RECV); 60 | 61 | ibv::workcompletion::WorkCompletion pollSendWorkCompletionBlocking(); 62 | 63 | ibv::workcompletion::WorkCompletion pollRecvWorkCompletionBlocking(); 64 | 65 | /// Wait for a work request completion 66 | void waitForCompletion(); 67 | }; 68 | } // End of namespace rdma 69 | -------------------------------------------------------------------------------- /rdma/MemoryRegion.h: -------------------------------------------------------------------------------- 1 | #ifndef L5RDMA_MEMORYREGION_H 2 | #define L5RDMA_MEMORYREGION_H 3 | 4 | #include 5 | #include 6 | 7 | namespace rdma { 8 | template 9 | struct RegisteredMemoryRegion { 10 | std::vector underlying; 11 | std::unique_ptr mr; 12 | 13 | RegisteredMemoryRegion(size_t size, rdma::Network &net, std::initializer_list flags) : 14 | underlying(size), 15 | mr(net.registerMr(underlying.data(), underlying.size() * sizeof(T), flags)) {} 16 | 17 | std::vector &get() { 18 | return underlying; 19 | } 20 | 21 | T *data() { 22 | return underlying.data(); 23 | } 24 | 25 | typename std::vector::iterator begin() { 26 | return std::begin(underlying); 27 | } 28 | 29 | typename std::vector::iterator end() { 30 | return std::end(underlying); 31 | } 32 | 33 | ibv::memoryregion::MemoryRegion &rdmaMr() { 34 | return *mr; 35 | } 36 | 37 | ibv::memoryregion::RemoteAddress getAddr() { 38 | return mr->getRemoteAddress(); 39 | } 40 | 41 | ibv::memoryregion::Slice getSlice() { 42 | return mr->getSlice(); 43 | } 44 | 45 | ibv::memoryregion::Slice getSlice(uint32_t offset, uint32_t sliceLength) { 46 | return mr->getSlice(offset, sliceLength); 47 | } 48 | 49 | RegisteredMemoryRegion(const RegisteredMemoryRegion &) = delete; 50 | 51 | RegisteredMemoryRegion &operator=(const RegisteredMemoryRegion &) = delete; 52 | 53 | RegisteredMemoryRegion(RegisteredMemoryRegion &&) = delete; 54 | 55 | RegisteredMemoryRegion &operator=(RegisteredMemoryRegion &&) = delete; 56 | 57 | ~RegisteredMemoryRegion() = default; 58 | }; 59 | } 60 | 61 | #endif //L5RDMA_MEMORYREGION_H 62 | -------------------------------------------------------------------------------- /rdma/Network.cpp: -------------------------------------------------------------------------------- 1 | #include "Network.hpp" 2 | #include "CompletionQueuePair.hpp" 3 | #include 4 | #include 5 | #include "NetworkException.h" 6 | 7 | using namespace std; 8 | 9 | static std::unique_ptr openUnambigousDevice(ibv::device::DeviceList &devices) { 10 | if (devices.size() == 0) { 11 | throw rdma::NetworkException("no Infiniband devices available"); 12 | } else if (devices.size() > 1) { 13 | throw rdma::NetworkException("more than 1 Infiniband devices available .. not handled right now"); 14 | } 15 | return devices[0]->open(); 16 | } 17 | 18 | namespace rdma { 19 | ostream &operator<<(ostream &os, const ibv::memoryregion::RemoteAddress &remoteMemoryRegion) { 20 | return os << "address=" << reinterpret_cast(remoteMemoryRegion.address) << " key=" 21 | << remoteMemoryRegion.rkey; 22 | } 23 | 24 | ostream &operator<<(ostream &os, const Address &address) { 25 | return os << "lid=" << address.lid << ", qpn=" << address.qpn; 26 | } 27 | 28 | Network::Network() : devices(), context(openUnambigousDevice(devices)), sharedCompletionQueuePair(*context) { 29 | // Create the protection domain 30 | protectionDomain = context->allocProtectionDomain(); 31 | 32 | // Create receive queue 33 | ibv::srq::InitAttributes initAttributes(ibv::srq::Attributes(maxWr, maxSge)); 34 | sharedReceiveQueue = protectionDomain->createSrq(initAttributes); 35 | } 36 | 37 | /// Get the LID 38 | uint16_t Network::getLID() { 39 | return context->queryPort(ibport).getLid(); 40 | } 41 | 42 | /// Get the GID 43 | ibv::Gid Network::getGID() { 44 | return context->queryGid(ibport, 0); 45 | } 46 | 47 | /// Print the capabilities of the RDMA host channel adapter 48 | void Network::printCapabilities() { 49 | using Cap = ibv::device::CapabilityFlag; 50 | // Get a list of all devices 51 | for (auto device : devices) { 52 | // Open the device 53 | auto context = device->open(); 54 | 55 | // Query device attributes 56 | const auto device_attr = context->queryAttributes(); 57 | 58 | // Print attributes 59 | cout << "[Device Information]" << '\n'; 60 | cout << left << setw(44) << " Device Name: " << context->getDevice()->getName() << '\n'; 61 | cout << left << setw(44) << " GUID: " << context->getDevice()->getGUID() << '\n'; 62 | cout << left << setw(44) << " Vendor ID: " << device_attr.getVendorId() << '\n'; 63 | cout << left << setw(44) << " Vendor Part ID: " << device_attr.getVendorPartId() << '\n'; 64 | cout << left << setw(44) << " Hardware Version: " << device_attr.getHwVer() << '\n'; 65 | cout << left << setw(44) << " Firmware Version: " << device_attr.getFwVer() << '\n'; 66 | cout << left << setw(44) << " Physical Ports: " << device_attr.getPhysPortCnt() << '\n'; 67 | cout << left << setw(44) << " CA ACK Delay: " << device_attr.getLocalCaAckDelay() << '\n'; 68 | 69 | cout << "[Memory]" << '\n'; 70 | cout << left << setw(44) << " Max MR size: " << device_attr.getMaxMrSize() << '\n'; 71 | cout << left << setw(44) << " Max page size: " << device_attr.getPageSizeCap() << '\n'; 72 | 73 | cout << "[Capabilities]" << '\n'; 74 | if (device_attr.hasCapability(Cap::RESIZE_MAX_WR)) { 75 | cout << " The device supports modifying the maximum number of outstanding Work Requests of a QP" 76 | << '\n'; 77 | } 78 | if (device_attr.hasCapability(Cap::BAD_PKEY_CNTR)) { 79 | cout << " The device supports bad P_Key counting for each port" << '\n'; 80 | } 81 | if (device_attr.hasCapability(Cap::BAD_QKEY_CNTR)) { 82 | cout << " The device supports P_Key violations counting for each port" << '\n'; 83 | } 84 | if (device_attr.hasCapability(Cap::RAW_MULTI)) { 85 | cout << " The device supports raw packet multicast" << '\n'; 86 | } 87 | if (device_attr.hasCapability(Cap::AUTO_PATH_MIG)) { 88 | cout << " The device supports automatic path migration" << '\n'; 89 | } 90 | if (device_attr.hasCapability(Cap::CHANGE_PHY_PORT)) { 91 | cout << " The device supports changing the primary port number of a QP when transitioning from SQD to " 92 | "SQD state" << '\n'; 93 | } 94 | if (device_attr.hasCapability(Cap::UD_AV_PORT_ENFORCE)) { 95 | cout << " The device supports AH port number enforcement" << '\n'; 96 | } 97 | if (device_attr.hasCapability(Cap::CURR_QP_STATE_MOD)) { 98 | cout << " The device supports the Current QP state modifier when calling ibv_modify_qp()" << '\n'; 99 | } 100 | if (device_attr.hasCapability(Cap::SHUTDOWN_PORT)) { 101 | cout << " The device supports shutdown port" << '\n'; 102 | } 103 | if (device_attr.hasCapability(Cap::INIT_TYPE)) { 104 | cout << " The device supports setting InitType and InitTypeReply" << '\n'; 105 | } 106 | if (device_attr.hasCapability(Cap::PORT_ACTIVE_EVENT)) { 107 | cout << " The device supports the IBV_EVENT_PORT_ACTIVE event generation" << '\n'; 108 | } 109 | if (device_attr.hasCapability(Cap::SYS_IMAGE_GUID)) { 110 | cout << " The device supports System Image GUID" << '\n'; 111 | } 112 | if (device_attr.hasCapability(Cap::RC_RNR_NAK_GEN)) { 113 | cout << " The device supports RNR-NAK generation for RC QPs" << '\n'; 114 | } 115 | if (device_attr.hasCapability(Cap::SRQ_RESIZE)) { 116 | cout << " The device supports modifying the maximum number of outstanding Work Requests in an SRQ" 117 | << '\n'; 118 | } 119 | if (device_attr.hasCapability(Cap::N_NOTIFY_CQ)) { 120 | cout << " The device supports Requesting Completion notification when N completions were added (and " 121 | "not only one) to a CQ" << '\n'; 122 | } 123 | 124 | cout << "[Resources]" << '\n'; 125 | cout << setw(44) << " Max number of QPs: " << device_attr.getMaxQp() << '\n'; 126 | cout << setw(44) << " Max number of WRs per Queue: " << device_attr.getMaxQpWr() << '\n'; 127 | cout << setw(44) << " Max number of SGE per WR: " << device_attr.getMaxSge() << '\n'; 128 | cout << setw(44) << " Max number of CQs: " << device_attr.getMaxCq() << '\n'; 129 | cout << setw(44) << " Max number of CQEs per CQ: " << device_attr.getMaxCqe() << '\n'; 130 | cout << setw(44) << " Max number of PDs: " << device_attr.getMaxPd() << '\n'; 131 | cout << setw(44) << " Max number of MRs: " << device_attr.getMaxMr() << '\n'; 132 | cout << setw(44) << " Max number of AHs: " << device_attr.getMaxAh() << '\n'; 133 | cout << setw(44) << " Max number of partitions: " << device_attr.getMaxPkeys() << '\n'; 134 | 135 | cout << "[Multicast]" << '\n'; 136 | cout << setw(44) << " Max multicast groups: " << device_attr.getMaxMcastGrp() << '\n'; 137 | cout << setw(44) << " Max QPs per multicast group: " << device_attr.getMaxMcastQpAttach() << '\n'; 138 | cout << setw(44) << " Max total multicast QPs: " << device_attr.getMaxTotalMcastQpAttach() << '\n'; 139 | 140 | cout << "[Atomics]" << '\n'; 141 | switch (device_attr.getAtomicCap()) { 142 | case (ibv::device::AtomicCapabilities::NONE): 143 | cout << " Atomic operations aren’t supported at all" << '\n'; 144 | break; 145 | case (ibv::device::AtomicCapabilities::HCA): 146 | cout << " Atomicity is guaranteed between QPs on this device only" << '\n'; 147 | break; 148 | case (ibv::device::AtomicCapabilities::GLOB): 149 | cout << " Atomicity is guaranteed between this device and any other component, such as CPUs and " 150 | "other devices" << '\n'; 151 | break; 152 | } 153 | cout << setw(44) << " Max outstanding reads/atomics per QP: " << device_attr.getMaxQpRdAtom() << '\n'; 154 | cout << setw(44) << " Resources for reads/atomics: " << device_attr.getMaxResRdAtom() << '\n'; 155 | cout << setw(44) << " Max depth per QP read/atomic initiation: " << device_attr.getMaxQpInitRdAtom() 156 | << '\n'; 157 | 158 | cout << "[Reliable Datagram]" << '\n'; 159 | cout << setw(44) << " Max number of SGEs per QP: " << device_attr.getMaxSgeRd() << '\n'; 160 | cout << setw(44) << " Max number of EECs: " << device_attr.getMaxEe() << '\n'; 161 | cout << setw(44) << " Max number of RDDs: " << device_attr.getMaxRdd() << '\n'; 162 | cout << setw(44) << " Max outstanding reads/atomics per EEC: " << device_attr.getMaxEeRdAtom() << '\n'; 163 | cout << setw(44) << " Max depth per EEC read/atomic initiation: " 164 | << device_attr.getMaxEeInitRdAtom() << '\n'; 165 | 166 | cout << "[Memory Windows]" << '\n'; 167 | cout << setw(44) << " Max number of MWs: " << device_attr.getMaxMw() << '\n'; 168 | 169 | cout << "[Fast Memory Registration]" << '\n'; 170 | cout << setw(44) << " Max number of FMRs: " << device_attr.getMaxFmr() << '\n'; 171 | cout << setw(44) << " Max number of maps per FMR: " << device_attr.getMaxMapPerFmr() << '\n'; 172 | 173 | cout << "[Shared Receive Queues]" << '\n'; 174 | cout << setw(44) << " Max number of SRQs: " << device_attr.getMaxSrq() << '\n'; 175 | cout << setw(44) << " Max number of WR per SRQ: " << device_attr.getMaxSrqWr() << '\n'; 176 | cout << setw(44) << " Max number of SGEs per WR: " << device_attr.getMaxSrqSge() << '\n'; 177 | 178 | cout << "[Raw]" << '\n'; 179 | cout << setw(44) << " Max number of IPv6 QPs: " << device_attr.getMaxRawIpv6Qp() << '\n'; 180 | cout << setw(44) << " Max number of Ethertype QPs: " << device_attr.getMaxRawEthyQp() << endl; 181 | } 182 | } 183 | 184 | unique_ptr 185 | Network::registerMr(void *addr, size_t length, initializer_list flags) { 186 | return protectionDomain->registerMemoryRegion(addr, length, flags); 187 | } 188 | 189 | CompletionQueuePair Network::newCompletionQueuePair() { 190 | return CompletionQueuePair(*context); 191 | } 192 | 193 | ibv::protectiondomain::ProtectionDomain &Network::getProtectionDomain() { 194 | return *protectionDomain; 195 | } 196 | 197 | CompletionQueuePair &Network::getSharedCompletionQueue() { 198 | return sharedCompletionQueuePair; 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /rdma/Network.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "CompletionQueuePair.hpp" 5 | 6 | namespace rdma { 7 | using MemoryRegion = std::unique_ptr; 8 | 9 | std::ostream &operator<<(std::ostream &os, const ibv::memoryregion::RemoteAddress &remoteMemoryRegion); 10 | 11 | /// The LID and QPN uniquely address a queue pair 12 | struct Address { 13 | ibv::Gid gid; 14 | uint32_t qpn; 15 | uint16_t lid; 16 | }; 17 | 18 | std::ostream &operator<<(std::ostream &os, const Address &address); 19 | 20 | /// Abstracts a global rdma context 21 | class Network { 22 | friend class QueuePair; 23 | 24 | static constexpr uint32_t maxWr = 16351; 25 | static constexpr uint32_t maxSge = 1; 26 | 27 | /// The port of the Infiniband device 28 | static constexpr uint8_t ibport = 1; 29 | 30 | /// The Infiniband devices 31 | ibv::device::DeviceList devices; 32 | /// The verbs context 33 | std::unique_ptr context; 34 | /// The global protection domain 35 | std::unique_ptr protectionDomain; 36 | 37 | /// Shared Queues 38 | CompletionQueuePair sharedCompletionQueuePair; 39 | 40 | std::unique_ptr sharedReceiveQueue; 41 | 42 | public: 43 | Network(); 44 | 45 | /// Get the LID 46 | uint16_t getLID(); 47 | 48 | /// Get the GID 49 | ibv::Gid getGID(); 50 | 51 | /// Print the capabilities of the RDMA host channel adapter 52 | void printCapabilities(); 53 | 54 | CompletionQueuePair newCompletionQueuePair(); 55 | 56 | CompletionQueuePair &getSharedCompletionQueue(); 57 | 58 | /// Register a new MemoryRegion 59 | std::unique_ptr 60 | registerMr(void *addr, size_t length, std::initializer_list flags); 61 | 62 | ibv::protectiondomain::ProtectionDomain& getProtectionDomain(); 63 | }; 64 | } 65 | -------------------------------------------------------------------------------- /rdma/NetworkException.cpp: -------------------------------------------------------------------------------- 1 | #include "NetworkException.h" 2 | 3 | rdma::NetworkException::~NetworkException() = default; 4 | -------------------------------------------------------------------------------- /rdma/NetworkException.h: -------------------------------------------------------------------------------- 1 | #ifndef L5RDMA_NETWORKEXCEPTION_H 2 | #define L5RDMA_NETWORKEXCEPTION_H 3 | 4 | #include 5 | 6 | namespace rdma { 7 | /// A network exception 8 | struct NetworkException : public std::runtime_error { 9 | using std::runtime_error::runtime_error; 10 | 11 | ~NetworkException() override; 12 | }; 13 | } 14 | 15 | #endif //L5RDMA_NETWORKEXCEPTION_H 16 | -------------------------------------------------------------------------------- /rdma/QueuePair.cpp: -------------------------------------------------------------------------------- 1 | #include "QueuePair.hpp" 2 | #include "Network.hpp" 3 | #include "CompletionQueuePair.hpp" 4 | #include 5 | 6 | using namespace std; 7 | namespace rdma { 8 | QueuePair::QueuePair(Network &network, ibv::queuepair::Type type) 9 | : QueuePair(network, type, network.sharedCompletionQueuePair, *network.sharedReceiveQueue) {} 10 | 11 | QueuePair::QueuePair(Network &network, ibv::queuepair::Type type, ibv::srq::SharedReceiveQueue &receiveQueue) 12 | : QueuePair(network, type, network.sharedCompletionQueuePair, receiveQueue) {} 13 | 14 | QueuePair::QueuePair(Network &network, ibv::queuepair::Type type, CompletionQueuePair &completionQueuePair) 15 | : QueuePair(network, type, completionQueuePair, *network.sharedReceiveQueue) {} 16 | 17 | QueuePair::QueuePair(Network &network, ibv::queuepair::Type type, CompletionQueuePair &completionQueuePair, 18 | ibv::srq::SharedReceiveQueue &receiveQueue) 19 | : defaultPort(network.ibport), receiveQueue(receiveQueue) { 20 | ibv::queuepair::InitAttributes queuePairAttributes{}; 21 | queuePairAttributes.setContext(context); 22 | // CQ to be associated with the Send Queue (SQ) 23 | queuePairAttributes.setSendCompletionQueue(completionQueuePair.getSendQueue()); 24 | // CQ to be associated with the Receive Queue (RQ) 25 | queuePairAttributes.setRecvCompletionQueue(completionQueuePair.getReceiveQueue()); 26 | // SRQ handle if QP is to be associated with an SRQ, otherwise NULL 27 | queuePairAttributes.setSharedReceiveQueue(receiveQueue); 28 | ibv::queuepair::Capabilities capabilities{}; 29 | capabilities.setMaxSendWr(maxOutstandingSendWrs); 30 | capabilities.setMaxRecvWr(maxOutstandingRecvWrs); 31 | capabilities.setMaxSendSge(maxSlicesPerSendWr); 32 | capabilities.setMaxRecvSge(maxSlicesPerRecvWr); 33 | capabilities.setMaxInlineData(maxInlineSize); 34 | queuePairAttributes.setCapabilities(capabilities); 35 | queuePairAttributes.setType(type); 36 | queuePairAttributes.setSignalAll(signalAll); 37 | 38 | // Create queue pair 39 | qp = network.protectionDomain->createQueuePair(queuePairAttributes); 40 | } 41 | 42 | uint32_t QueuePair::getQPN() { 43 | return qp->getNum(); 44 | } 45 | 46 | void QueuePair::postWorkRequest(ibv::workrequest::SendWr &workRequest) { 47 | ibv::workrequest::SendWr *badWorkRequest = nullptr; 48 | qp->postSend(workRequest, badWorkRequest); 49 | } 50 | 51 | void QueuePair::postRecvRequest(ibv::workrequest::Recv &recvRequest) { 52 | ibv::workrequest::Recv *badWorkRequest = nullptr; 53 | receiveQueue.postRecv(recvRequest, badWorkRequest); 54 | } 55 | 56 | namespace { // Anonymous helper namespace 57 | string queuePairAccessFlagsToString(ibv::queuepair::Attributes attr) { 58 | string result; 59 | if (attr.hasQpAccessFlags(ibv::AccessFlag::REMOTE_WRITE)) 60 | result += "IBV_ACCESS_REMOTE_WRITE, "; 61 | if (attr.hasQpAccessFlags(ibv::AccessFlag::REMOTE_READ)) 62 | result += "IBV_ACCESS_REMOTE_READ, "; 63 | if (attr.hasQpAccessFlags(ibv::AccessFlag::REMOTE_ATOMIC)) 64 | result += "IBV_ACCESS_REMOTE_ATOMIC, "; 65 | return result; 66 | } 67 | } // end of anonymous helper namespace 68 | 69 | void QueuePair::printQueuePairDetails() const { 70 | using Mask = ibv::queuepair::AttrMask; 71 | 72 | auto attr = qp->query({Mask::STATE, Mask::CUR_STATE, Mask::EN_SQD_ASYNC_NOTIFY, Mask::ACCESS_FLAGS, 73 | Mask::PKEY_INDEX, Mask::PORT, Mask::QKEY, Mask::AV, Mask::PATH_MTU, Mask::TIMEOUT, 74 | Mask::RETRY_CNT, Mask::RNR_RETRY, Mask::RQ_PSN, Mask::MAX_QP_RD_ATOMIC, Mask::ALT_PATH, 75 | Mask::MIN_RNR_TIMER, Mask::SQ_PSN, Mask::MAX_DEST_RD_ATOMIC, Mask::PATH_MIG_STATE, 76 | Mask::CAP, Mask::DEST_QPN}); 77 | 78 | cout << "[State of QP " << qp.get() << "]" << endl; 79 | cout << endl; 80 | cout << left << setw(44) << "qp_state:" << to_string(attr.getQpState()) << endl; 81 | cout << left << setw(44) << "cur_qp_state:" << to_string(attr.getQpState()) << endl; 82 | cout << left << setw(44) << "path_mtu:" << to_string(attr.getPathMtu()) << endl; 83 | cout << left << setw(44) << "path_mig_state:" << to_string(attr.getPathMigState()) << endl; 84 | cout << left << setw(44) << "qkey:" << attr.getQkey() << endl; 85 | cout << left << setw(44) << "rq_psn:" << attr.getRqPsn() << endl; 86 | cout << left << setw(44) << "sq_psn:" << attr.getSqPsn() << endl; 87 | cout << left << setw(44) << "dest_qp_num:" << attr.getDestQpNum() << endl; 88 | cout << left << setw(44) << "qp_access_flags:" << queuePairAccessFlagsToString(attr) << endl; 89 | cout << left << setw(44) << "cap:" << "" << endl; 90 | cout << left << setw(44) << "ah_attr:" << "" << endl; 91 | cout << left << setw(44) << "alt_ah_attr:" << "" << endl; 92 | cout << left << setw(44) << "pkey_index:" << attr.getPkeyIndex() << endl; 93 | cout << left << setw(44) << "alt_pkey_index:" << attr.getAltPkeyIndex() << endl; 94 | cout << left << setw(44) << "en_sqd_async_notify:" << static_cast(attr.getEnSqdAsyncNotify()) << endl; 95 | cout << left << setw(44) << "sq_draining:" << static_cast(attr.getSqDraining()) << endl; 96 | cout << left << setw(44) << "max_rd_atomic:" << static_cast(attr.getMaxRdAtomic()) << endl; 97 | cout << left << setw(44) << "max_dest_rd_atomic:" << static_cast(attr.getMaxDestRdAtomic()) << endl; 98 | cout << left << setw(44) << "min_rnr_timer:" << static_cast(attr.getMinRnrTimer()) << endl; 99 | cout << left << setw(44) << "port_num:" << static_cast(attr.getPortNum()) << endl; 100 | cout << left << setw(44) << "timeout:" << static_cast(attr.getTimeout()) << endl; 101 | cout << left << setw(44) << "retry_cnt:" << static_cast(attr.getRetryCnt()) << endl; 102 | cout << left << setw(44) << "rnr_retry:" << static_cast(attr.getRnrRetry()) << endl; 103 | cout << left << setw(44) << "alt_port_num:" << static_cast(attr.getAltPortNum()) << endl; 104 | cout << left << setw(44) << "alt_timeout:" << static_cast(attr.getAltTimeout()) << endl; 105 | } 106 | 107 | uint32_t QueuePair::getMaxInlineSize() const { 108 | return maxInlineSize; 109 | } 110 | 111 | QueuePair::~QueuePair() = default; 112 | } // End of namespace rdma 113 | -------------------------------------------------------------------------------- /rdma/QueuePair.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "Network.hpp" 5 | 6 | namespace rdma { 7 | struct Address; 8 | 9 | class Network; 10 | 11 | class CompletionQueuePair; 12 | 13 | class QueuePair { 14 | protected: 15 | static constexpr void *context = nullptr; // Associated context of the QP (returned in completion events) 16 | static constexpr uint32_t maxOutstandingSendWrs = 16351; // max number of outstanding WRs in the SQ 17 | static constexpr uint32_t maxOutstandingRecvWrs = 16351; // max number of outstanding WRs in the RQ 18 | static constexpr uint32_t maxSlicesPerSendWr = 1; // max number of scatter/gather elements in a WR in the SQ 19 | static constexpr uint32_t maxSlicesPerRecvWr = 1; // max number of scatter/gather elements in a WR in the RQ 20 | static constexpr uint32_t maxInlineSize = 512; // max number of bytes that can be posted inline to the SQ 21 | static constexpr auto signalAll = false; // If each Work Request (WR) submitted to the SQ generates a completion entry 22 | 23 | const uint8_t defaultPort; 24 | 25 | std::unique_ptr qp; 26 | 27 | ibv::srq::SharedReceiveQueue &receiveQueue; 28 | 29 | // Uses shared completion and receive Queue 30 | QueuePair(Network &network, ibv::queuepair::Type type); 31 | 32 | // Uses shared completion Queue 33 | QueuePair(Network &network, ibv::queuepair::Type type, ibv::srq::SharedReceiveQueue &receiveQueue); 34 | 35 | // Uses shared receive Queue 36 | QueuePair(Network &network, ibv::queuepair::Type type, CompletionQueuePair &completionQueuePair); 37 | 38 | QueuePair(Network &network, ibv::queuepair::Type type, CompletionQueuePair &completionQueuePair, 39 | ibv::srq::SharedReceiveQueue &receiveQueue); 40 | 41 | public: 42 | virtual ~QueuePair(); 43 | 44 | QueuePair(QueuePair&&) = default; 45 | 46 | uint32_t getQPN(); 47 | 48 | virtual void connect(const Address &address) = 0; 49 | 50 | void postWorkRequest(ibv::workrequest::SendWr &workRequest); 51 | 52 | void postRecvRequest(ibv::workrequest::Recv &recvRequest); 53 | 54 | uint32_t getMaxInlineSize() const; 55 | 56 | /// Print detailed information about this queue pair 57 | void printQueuePairDetails() const; 58 | }; 59 | } // End of namespace rdma 60 | -------------------------------------------------------------------------------- /rdma/RcQueuePair.cpp: -------------------------------------------------------------------------------- 1 | #include "RcQueuePair.h" 2 | 3 | void rdma::RcQueuePair::connect(const Address &address) { 4 | connect(address, defaultPort); 5 | } 6 | 7 | void rdma::RcQueuePair::connect(const Address &address, uint8_t port, uint8_t retryCount) { 8 | using Access = ibv::AccessFlag; 9 | using Mod = ibv::queuepair::AttrMask; 10 | 11 | { // First initialize the the QP 12 | ibv::queuepair::Attributes attributes{}; 13 | attributes.setQpState(ibv::queuepair::State::INIT); 14 | attributes.setPkeyIndex(0); // Partition the queue pair belongs to 15 | attributes.setPortNum(port); // The local physical port 16 | // Allowed access flags of the remote operations for incoming packets (i.e., none, RDMA read, RDMA write, or atomics) 17 | attributes.setQpAccessFlags({Access::REMOTE_WRITE, Access::REMOTE_READ, Access::REMOTE_ATOMIC}); 18 | 19 | qp->modify(attributes, {Mod::STATE, Mod::PKEY_INDEX, Mod::PORT, Mod::ACCESS_FLAGS}); 20 | } 21 | 22 | { // RTR (ready to receive) 23 | ibv::queuepair::Attributes attributes{}; 24 | attributes.setQpState(ibv::queuepair::State::RTR); 25 | attributes.setPathMtu(ibv::Mtu::_4096); // Maximum payload size 26 | attributes.setDestQpNum(address.qpn); // The remote QP number 27 | attributes.setRqPsn(0); // The packet sequence number of received packets 28 | attributes.setMaxDestRdAtomic(16); // The number of outstanding RDMA reads & atomic operations (destination) 29 | attributes.setMinRnrTimer(12); // The time before a RNR NACK is sent 30 | ibv::ah::Attributes ahAttributes{}; 31 | ahAttributes.setIsGlobal(false); // Whether there is a global routing header 32 | ahAttributes.setDlid(address.lid); // The LID of the remote host 33 | ahAttributes.setSl(0); // The service level (which determines the virtual lane) 34 | ahAttributes.setSrcPathBits(0); // Use the port base LID 35 | ahAttributes.setPortNum(port); // The local physical port 36 | // see rc_pingpong.c::pp_connect_ctx 37 | if (address.gid.getInterfaceId()) { 38 | ahAttributes.setIsGlobal(true); 39 | ibv::GlobalRoute globalRoute{}; 40 | globalRoute.setHopLimit(1); 41 | globalRoute.setDgid(address.gid); 42 | ahAttributes.setGrh(globalRoute); 43 | } 44 | attributes.setAhAttr(ahAttributes); 45 | 46 | qp->modify(attributes, {Mod::STATE, Mod::AV, Mod::PATH_MTU, Mod::DEST_QPN, Mod::RQ_PSN, 47 | Mod::MAX_DEST_RD_ATOMIC, Mod::MIN_RNR_TIMER}); 48 | } 49 | 50 | { // RTS (ready to send) 51 | ibv::queuepair::Attributes attributes{}; 52 | attributes.setQpState(ibv::queuepair::State::RTS); 53 | attributes.setSqPsn(0); // The packet sequence number of sent packets 54 | attributes.setTimeout(0); // The minimum timeout before retransmitting the packet (0 = infinite) 55 | attributes.setRetryCnt(retryCount); // How often to retry sending (7 = infinite) 56 | attributes.setRnrRetry(retryCount); // How often to retry sending when RNR NACK was received (7 = infinite) 57 | attributes.setMaxRdAtomic(128); // The number of outstanding RDMA reads & atomic operations (initiator) 58 | qp->modify(attributes, {Mod::STATE, Mod::TIMEOUT, Mod::RETRY_CNT, Mod::RNR_RETRY, Mod::SQ_PSN, 59 | Mod::MAX_QP_RD_ATOMIC}); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /rdma/RcQueuePair.h: -------------------------------------------------------------------------------- 1 | #ifndef L5RDMA_RCQUEUEPAIR_H 2 | #define L5RDMA_RCQUEUEPAIR_H 3 | 4 | #include "QueuePair.hpp" 5 | 6 | namespace rdma { 7 | class RcQueuePair : public QueuePair { 8 | public: 9 | explicit RcQueuePair(Network &network) : QueuePair(network, ibv::queuepair::Type::RC) {} 10 | 11 | RcQueuePair(Network &network, CompletionQueuePair &completionQueuePair) : 12 | QueuePair(network, ibv::queuepair::Type::RC, completionQueuePair) {} 13 | 14 | void connect(const Address &address) override; 15 | 16 | void connect(const Address &address, uint8_t port, uint8_t retryCount = 0); 17 | 18 | inline void setToResetState(){ 19 | using Mod = ibv::queuepair::AttrMask; 20 | ibv::queuepair::Attributes attributes{}; 21 | attributes.setQpState(ibv::queuepair::State::RESET); 22 | qp->modify(attributes, {Mod::STATE});}; 23 | }; 24 | } 25 | 26 | #endif //L5RDMA_RCQUEUEPAIR_H 27 | -------------------------------------------------------------------------------- /rdma/UcQueuePair.cpp: -------------------------------------------------------------------------------- 1 | #include "UcQueuePair.h" 2 | 3 | void rdma::UcQueuePair::connect(const Address &address) { 4 | connect(address, defaultPort); 5 | } 6 | 7 | void rdma::UcQueuePair::connect(const Address &address, uint8_t port) { 8 | using Access = ibv::AccessFlag; 9 | using Mod = ibv::queuepair::AttrMask; 10 | 11 | { // INIT 12 | ibv::queuepair::Attributes attributes{}; 13 | attributes.setQpState(ibv::queuepair::State::INIT); 14 | attributes.setPkeyIndex(0); 15 | attributes.setPortNum(port); 16 | attributes.setQpAccessFlags({Access::REMOTE_WRITE}); 17 | 18 | qp->modify(attributes, {Mod::STATE, Mod::PKEY_INDEX, Mod::PORT, Mod::ACCESS_FLAGS}); 19 | } 20 | 21 | { // Ready to receive 22 | ibv::queuepair::Attributes attributes{}; 23 | attributes.setQpState(ibv::queuepair::State::RTR); 24 | attributes.setPathMtu(ibv::Mtu::_4096); 25 | attributes.setDestQpNum(address.qpn); 26 | attributes.setRqPsn(0); 27 | ibv::ah::Attributes ahAttributes{}; 28 | ahAttributes.setIsGlobal(false); 29 | ahAttributes.setDlid(address.lid); 30 | ahAttributes.setSl(0); 31 | ahAttributes.setSrcPathBits(0); 32 | ahAttributes.setPortNum(port); 33 | attributes.setAhAttr(ahAttributes); 34 | 35 | qp->modify(attributes, {Mod::STATE, Mod::AV, Mod::PATH_MTU, Mod::DEST_QPN, Mod::RQ_PSN}); 36 | } 37 | 38 | { // Ready to send 39 | ibv::queuepair::Attributes attributes{}; 40 | attributes.setQpState(ibv::queuepair::State::RTS); 41 | attributes.setSqPsn(0); 42 | 43 | qp->modify(attributes, {Mod::STATE, Mod::SQ_PSN}); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /rdma/UcQueuePair.h: -------------------------------------------------------------------------------- 1 | #ifndef L5RDMA_UCQUEUEPAIR_H 2 | #define L5RDMA_UCQUEUEPAIR_H 3 | 4 | #include "QueuePair.hpp" 5 | 6 | namespace rdma { 7 | class UcQueuePair : public QueuePair { 8 | public: 9 | explicit UcQueuePair(Network& network) : QueuePair(network, ibv::queuepair::Type::UC) {} 10 | 11 | UcQueuePair(Network &network, CompletionQueuePair &completionQueuePair) : 12 | QueuePair(network, ibv::queuepair::Type::UC, completionQueuePair) {} 13 | 14 | void connect(const Address & address) override; 15 | 16 | /// Similar to RcQueuePair::connect(), just without retry and atomic settings 17 | void connect(const Address &address, uint8_t port); 18 | }; 19 | } 20 | 21 | #endif //L5RDMA_UCQUEUEPAIR_H 22 | -------------------------------------------------------------------------------- /rdma/UdQueuePair.cpp: -------------------------------------------------------------------------------- 1 | #include "UdQueuePair.h" 2 | 3 | void rdma::UdQueuePair::connect(const rdma::Address &) { 4 | connect(defaultPort); 5 | } 6 | 7 | void rdma::UdQueuePair::connect(uint8_t port, uint32_t packetSequenceNumber) { 8 | using Mod = ibv::queuepair::AttrMask; 9 | 10 | { 11 | ibv::queuepair::Attributes attr{}; 12 | attr.setQpState(ibv::queuepair::State::INIT); 13 | attr.setPkeyIndex(0); 14 | attr.setPortNum(port); 15 | attr.setQkey(0x22222222); // todo: bad magic constant 16 | 17 | qp->modify(attr, {Mod::STATE, Mod::PKEY_INDEX, Mod::PORT, Mod::QKEY}); 18 | } 19 | 20 | { // RTR 21 | ibv::queuepair::Attributes attr{}; 22 | attr.setQpState(ibv::queuepair::State::RTR); 23 | 24 | qp->modify(attr, {Mod::STATE}); 25 | } 26 | 27 | { // RTS 28 | ibv::queuepair::Attributes attr{}; 29 | attr.setQpState(ibv::queuepair::State::RTS); 30 | attr.setSqPsn(packetSequenceNumber); 31 | 32 | qp->modify(attr, {Mod::STATE, Mod::SQ_PSN}); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /rdma/UdQueuePair.h: -------------------------------------------------------------------------------- 1 | #ifndef L5RDMA_UDQUEUEPAIR_H 2 | #define L5RDMA_UDQUEUEPAIR_H 3 | 4 | #include "QueuePair.hpp" 5 | 6 | namespace rdma { 7 | class UdQueuePair : public QueuePair { 8 | public: 9 | explicit UdQueuePair(Network &network) : QueuePair(network, ibv::queuepair::Type::UD) {} 10 | 11 | void connect(const Address & address) override; 12 | 13 | void connect(uint8_t port, uint32_t packetSequenceNumber = 0); 14 | }; 15 | } 16 | 17 | #endif //L5RDMA_UDQUEUEPAIR_H 18 | -------------------------------------------------------------------------------- /src/Cache.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-06-24. 3 | // 4 | 5 | #include "Cache.h" 6 | #include "../util/GlobalAddressHash.h" 7 | #include 8 | 9 | 10 | Cache::Cache() : maxsize(), availablesize(), items(), state() { 11 | maxsize = 512; 12 | availablesize = 512; 13 | state = defs::CACHE_DIRECTORY_STATE::UNSHARED; 14 | } 15 | 16 | void Cache::addCacheItem(defs::GlobalAddress gaddr, CacheItem cacheItem) { 17 | if (gaddr.size <= maxsize) { 18 | if (availablesize >= gaddr.size) { 19 | items.insert(std::pair( 20 | GlobalAddressHash()(gaddr.sendable(0)), cacheItem)); 21 | availablesize = availablesize - gaddr.size; 22 | } else { 23 | while (availablesize < gaddr.size) { 24 | removeOldestItem(); 25 | } 26 | } 27 | } else { 28 | throw; 29 | } 30 | } 31 | 32 | uint64_t Cache::removeCacheItem(defs::GlobalAddress ga) { 33 | auto iterator = GlobalAddressHash()(ga.sendable(0)); 34 | auto res = items.erase(iterator); 35 | return res; 36 | } 37 | 38 | void Cache::removeOldestItem() { 39 | std::pair latest; 40 | 41 | for (auto &it: items) { 42 | if (latest.second.lastused.time_since_epoch() < it.second.lastused.time_since_epoch()) { 43 | latest = it; 44 | } 45 | std::cout << std::chrono::system_clock::to_time_t(latest.second.lastused) << std::endl; 46 | } 47 | availablesize = availablesize + latest.second.globalAddress.size; 48 | items.erase(latest.first); 49 | } 50 | 51 | CacheItem *Cache::getCacheItem(defs::GlobalAddress ga) { 52 | auto cacheItem = items.find(GlobalAddressHash()(ga.sendable(0))); 53 | if (cacheItem != items.end()) { 54 | cacheItem->second.lastused = std::chrono::system_clock::now(); 55 | return &cacheItem->second; 56 | } else { 57 | return nullptr; 58 | } 59 | } 60 | 61 | void Cache::alterCacheItem(CacheItem ci, defs::GlobalAddress ga){ 62 | auto cacheItem = items.find(GlobalAddressHash()(ga.sendable(0))); 63 | if(cacheItem != items.end()){ 64 | cacheItem->second = ci; 65 | } 66 | else{ 67 | addCacheItem(ga, ci); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/Cache.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-06-24. 3 | // 4 | 5 | #ifndef MEDMM_CACHE_H 6 | #define MEDMM_CACHE_H 7 | 8 | 9 | #include 10 | #include 11 | #include 12 | #include "../util/defs.h" 13 | #include 14 | 15 | 16 | 17 | struct CacheItem { 18 | defs::GlobalAddress globalAddress; 19 | uint64_t data; 20 | std::chrono::time_point created; 21 | std::chrono::time_point lastused; 22 | }; 23 | 24 | class Cache { 25 | private: 26 | size_t maxsize; //is set 512 in constructor 27 | size_t availablesize; 28 | std::map items; 29 | 30 | void removeOldestItem(); 31 | public: 32 | explicit Cache(); 33 | 34 | void addCacheItem(defs::GlobalAddress gaddr, CacheItem cacheItem); 35 | 36 | uint64_t removeCacheItem(defs::GlobalAddress ga); 37 | 38 | CacheItem *getCacheItem(defs::GlobalAddress ga); 39 | 40 | void alterCacheItem(CacheItem cacheItem, defs::GlobalAddress ga); 41 | 42 | defs::CACHE_DIRECTORY_STATE state; 43 | }; 44 | 45 | 46 | 47 | 48 | #endif //MEDMM_CACHE_H 49 | -------------------------------------------------------------------------------- /src/Connection.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-07-25. 3 | // 4 | 5 | #include "Connection.h" 6 | 7 | Connection::Connection(Connection &&c) noexcept : rcqp(), socket(){ 8 | rcqp = std::move(c.rcqp); 9 | socket = std::move(c.socket); 10 | } 11 | 12 | Connection::Connection(std::unique_ptr uniquePtr, l5::util::Socket s) { 13 | rcqp = std::move(uniquePtr); 14 | socket = std::move(s); 15 | } 16 | 17 | Connection &Connection::operator=(Connection &&other) noexcept { 18 | rcqp = std::move(other.rcqp); 19 | socket = std::move(other.socket); 20 | return *this; 21 | } -------------------------------------------------------------------------------- /src/Connection.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-07-25. 3 | // 4 | 5 | #ifndef MEDMM_CONNECTION_H 6 | #define MEDMM_CONNECTION_H 7 | 8 | 9 | #include 10 | #include "../rdma/RcQueuePair.h" 11 | #include "../util/socket/Socket.h" 12 | 13 | class Connection { 14 | public: 15 | Connection(Connection &&c) noexcept; 16 | 17 | Connection(std::unique_ptr uniquePtr, l5::util::Socket s); 18 | 19 | Connection &operator=(Connection &&other) noexcept; 20 | 21 | std::unique_ptr rcqp; 22 | l5::util::Socket socket; 23 | 24 | 25 | }; 26 | 27 | 28 | #endif //MEDMM_CONNECTION_H 29 | -------------------------------------------------------------------------------- /src/Node.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-04-11. 3 | // 4 | 5 | #include "Node.h" 6 | 7 | 8 | Node::Node() : network(), id(), locks(), cache() { 9 | id = 0; 10 | } 11 | 12 | bool Node::isLocal(defs::GlobalAddress gaddr) { 13 | return gaddr.getNodeId() == id; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/Node.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-04-11. 3 | // 4 | 5 | #ifndef MEDMM_NODE_H 6 | #define MEDMM_NODE_H 7 | 8 | 9 | #include "../util/RDMANetworking.h" 10 | #include "../rdma/CompletionQueuePair.hpp" 11 | #include "../util/defs.h" 12 | #include "Cache.h" 13 | #include "Connection.h" 14 | #include 15 | #include 16 | 17 | class Node { 18 | private: 19 | rdma::Network network; 20 | uint16_t id; 21 | std::unordered_map locks; 22 | Cache cache; 23 | 24 | 25 | void handleLocks(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 26 | rdma::CompletionQueuePair &cq, Connection &c); 27 | 28 | void handleAllocation(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 29 | rdma::CompletionQueuePair &cq, Connection &c); 30 | 31 | void handleFree(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 32 | rdma::CompletionQueuePair &cq, Connection &c); 33 | 34 | void handleRead(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 35 | rdma::CompletionQueuePair &cq, Connection &c); 36 | 37 | bool handleWrite(void *recvbuf, ibv::memoryregion::RemoteAddress 38 | remoteAddr, rdma::CompletionQueuePair &cq, Connection &c); 39 | 40 | void handleInvalidation(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 41 | rdma::CompletionQueuePair &cq, Connection &c); 42 | 43 | void handleReset(ibv::memoryregion::RemoteAddress remoteAddr, rdma::CompletionQueuePair &cq, 44 | Connection &c); 45 | 46 | bool setLock(uint16_t lockId, defs::LOCK_STATES state); 47 | 48 | bool sendLock(defs::Lock lock, defs::IMMDATA immData, Connection &c); 49 | 50 | defs::GlobalAddress performWrite(defs::Data *data, uint16_t srcID); 51 | 52 | defs::SaveData *performRead(defs::GlobalAddress gaddr, uint16_t srcID); 53 | 54 | void prepareForInvalidate(rdma::CompletionQueuePair &cq, Connection &c); 55 | 56 | void startInvalidations(defs::Data data, ibv::memoryregion::RemoteAddress remoteAddr, 57 | rdma::CompletionQueuePair &cq, std::vector nodes, 58 | uint16_t srcID, Connection &c); 59 | 60 | 61 | void broadcastInvalidations(std::vector nodes, defs::GlobalAddress gaddr); 62 | 63 | 64 | public: 65 | 66 | explicit Node(); 67 | 68 | Connection connectClientSocket(uint16_t port); 69 | 70 | void closeClientSocket(Connection &c); 71 | 72 | void *sendAddress(defs::SendGlobalAddr data, defs::IMMDATA immData, Connection &c); 73 | 74 | defs::GlobalAddress sendData(defs::SendingData data, defs::IMMDATA immData, Connection &c); 75 | 76 | 77 | void connectAndReceive(uint16_t port); 78 | 79 | bool receive(Connection &c); 80 | 81 | defs::GlobalAddress Malloc(size_t size); 82 | 83 | defs::GlobalAddress Free(defs::GlobalAddress gaddr); 84 | 85 | defs::GlobalAddress write(defs::Data *data); 86 | 87 | uint64_t read(defs::GlobalAddress gaddr); 88 | 89 | bool isLocal(defs::GlobalAddress gaddr); 90 | 91 | inline uint16_t getID() { return id; } 92 | 93 | inline void setID(uint16_t newID) { id = newID; } 94 | 95 | }; 96 | 97 | 98 | #endif //MEDMM_NODE_H 99 | -------------------------------------------------------------------------------- /src/Receive.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-06-15. 3 | // 4 | 5 | 6 | #include "Node.h" 7 | #include "../util/socket/tcp.h" 8 | 9 | 10 | bool Node::receive(Connection &c) { 11 | auto &cq = network.getSharedCompletionQueue(); 12 | auto recvbuf = malloc(defs::BIGBADBUFFER_SIZE * 2); 13 | auto recvmr = network.registerMr(recvbuf, defs::BIGBADBUFFER_SIZE * 2, 14 | {ibv::AccessFlag::LOCAL_WRITE, ibv::AccessFlag::REMOTE_WRITE}); 15 | auto remoteMr = ibv::memoryregion::RemoteAddress{reinterpret_cast(recvbuf), 16 | recvmr->getRkey()}; 17 | l5::util::tcp::write(c.socket, &remoteMr, sizeof(remoteMr)); 18 | l5::util::tcp::read(c.socket, &remoteMr, sizeof(remoteMr)); 19 | auto recv = ibv::workrequest::Recv{}; 20 | recv.setSge(nullptr, 0); 21 | c.rcqp->postRecvRequest(recv); 22 | auto wc = cq.pollRecvWorkCompletionBlocking(); 23 | auto immData = wc.getImmData(); 24 | //std::cout << "got this immdata: " << immData << std::endl; 25 | switch (immData) { 26 | case defs::IMMDATA::MALLOC: //immdata = 1, if it comes from another server 27 | { 28 | handleAllocation(recvbuf, remoteMr, cq, c); 29 | return true; 30 | } 31 | case defs::IMMDATA::READ: //immdata = 2, if it is a read 32 | { 33 | handleRead(recvbuf, remoteMr, cq, c); 34 | return true; 35 | } 36 | case defs::IMMDATA::FREE://immdata = 3, it should be freed 37 | { 38 | handleFree(recvbuf, remoteMr, cq, c); 39 | return true; 40 | } 41 | case defs::IMMDATA::WRITE: //immdata = 4, write 42 | { 43 | 44 | auto res = handleWrite(recvbuf, remoteMr, cq, c); 45 | std::cout << "result of write: " << res << std::endl; 46 | return res; 47 | } 48 | case defs::IMMDATA::LOCKS: //immdata = 5, save lock 49 | { 50 | handleLocks(recvbuf, remoteMr, cq, c); 51 | return true; 52 | } 53 | case defs::IMMDATA::RESET: //immdata = 6, reset state 54 | { 55 | handleReset(remoteMr, cq, c); 56 | return false; 57 | } 58 | case defs::IMMDATA::INVALIDATE: { 59 | handleInvalidation(recvbuf, remoteMr, cq, c); 60 | return true; 61 | } 62 | default: { 63 | return true; 64 | } 65 | } 66 | } 67 | 68 | void Node::connectAndReceive(uint16_t port) { 69 | auto soc = l5::util::Socket::create(); 70 | auto qp = std::make_unique( 71 | rdma::RcQueuePair(network, network.getSharedCompletionQueue())); 72 | auto c = Connection{std::move(qp), 73 | l5::util::Socket::create()}; 74 | l5::util::tcp::bind(soc, port); 75 | auto remoteAddr = rdma::Address{network.getGID(), c.rcqp->getQPN(), network.getLID()}; 76 | l5::util::tcp::listen(soc); 77 | std::cout << "now listening... "; 78 | c.socket = l5::util::tcp::accept(soc); 79 | soc.close(); 80 | std::cout << "and accepted" << std::endl; 81 | l5::util::tcp::write(c.socket, &remoteAddr, sizeof(remoteAddr)); 82 | l5::util::tcp::read(c.socket, &remoteAddr, sizeof(remoteAddr)); 83 | c.rcqp->connect(remoteAddr); 84 | bool connected = true; 85 | while (connected) { 86 | connected = receive(c); 87 | } 88 | } 89 | 90 | void Node::handleAllocation(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 91 | rdma::CompletionQueuePair &cq, Connection &c) { 92 | auto sga = reinterpret_cast(recvbuf); 93 | auto gaddr = defs::GlobalAddress(*sga); 94 | auto newgaddr = Malloc(gaddr.size).sendable(sga->srcID); 95 | auto sendmr = network.registerMr(&newgaddr, sizeof(defs::GlobalAddress), {}); 96 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, defs::IMMDATA::DEFAULT); 97 | c.rcqp->postWorkRequest(write); 98 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 99 | } 100 | 101 | void Node::handleFree(void *recvbuf, ibv::memoryregion::RemoteAddress 102 | remoteAddr, rdma::CompletionQueuePair &cq, Connection &c) { 103 | auto sga = reinterpret_cast(recvbuf); 104 | auto gaddr = defs::GlobalAddress(*sga); 105 | auto res = Free(gaddr).sendable(sga->srcID); 106 | auto sendmr = network.registerMr(&res, sizeof(defs::SendGlobalAddr), {}); 107 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, defs::IMMDATA::DEFAULT); 108 | c.rcqp->postWorkRequest(write); 109 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 110 | } 111 | 112 | void Node::handleLocks(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 113 | rdma::CompletionQueuePair &cq, Connection &c) { 114 | auto l = reinterpret_cast(recvbuf); 115 | auto lock = setLock(l->id, l->state); 116 | auto sendmr = network.registerMr(&lock, sizeof(bool), {}); 117 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, defs::IMMDATA::DEFAULT); 118 | c.rcqp->postWorkRequest(write); 119 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 120 | } 121 | 122 | void Node::handleRead(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 123 | rdma::CompletionQueuePair &cq, Connection &c) { 124 | auto sga = reinterpret_cast(recvbuf); 125 | auto gaddr = defs::GlobalAddress(*sga); 126 | auto data = performRead(gaddr, sga->srcID); 127 | data->iscached = defs::CACHE_DIRECTORY_STATE::SHARED; 128 | data->sharerNodes.push_back(sga->srcID); 129 | // std::cout << "datasize: " << sizeof(data->data) << ", data: " << data << std::endl; 130 | auto sendmr = network.registerMr(&data->data, sizeof(uint64_t), {}); 131 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, defs::IMMDATA::DEFAULT); 132 | c.rcqp->postWorkRequest(write); 133 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 134 | 135 | } 136 | 137 | bool Node::handleWrite(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 138 | rdma::CompletionQueuePair &cq, Connection &c) { 139 | auto senddata = reinterpret_cast(recvbuf); 140 | /* std::cout << "Write, SendData: data: " << senddata->data << ", ga-ID: " << senddata->sga.id 141 | << ", ga-size:" << senddata->sga.size << ", ptr: " << senddata->sga.ptr << ", size: " 142 | << senddata->size << std::endl;*/ 143 | auto data = defs::Data(*senddata); 144 | auto olddata = performRead(data.ga, senddata->sga.srcID); 145 | if (olddata != nullptr) { 146 | /*std::cout << "olddata: " << olddata->data << ", is cached: " << olddata->iscached 147 | << ", first sharernode: " << olddata->sharerNodes[0] << std::endl;*/ 148 | if ((olddata->iscached > defs::CACHE_DIRECTORY_STATE::UNSHARED) && 149 | (!olddata->sharerNodes.empty()) && olddata->iscached < 3) { 150 | startInvalidations(data, remoteAddr, cq, olddata->sharerNodes, senddata->sga.srcID, c); 151 | return false; 152 | } else { 153 | auto result = performWrite(&data, senddata->sga.srcID).sendable(senddata->sga.srcID); 154 | auto sendmr = network.registerMr(&result, sizeof(defs::SendGlobalAddr), {}); 155 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, 156 | defs::IMMDATA::DEFAULT); 157 | c.rcqp->postWorkRequest(write); 158 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 159 | } 160 | } else { 161 | std::cout << "helloq3" <sga.srcID).sendable(senddata->sga.srcID); 163 | auto sendmr = network.registerMr(&result, sizeof(defs::SendGlobalAddr), {}); 164 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, 165 | defs::IMMDATA::DEFAULT); 166 | c.rcqp->postWorkRequest(write); 167 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 168 | } 169 | return true; 170 | } 171 | 172 | void Node::handleInvalidation(void *recvbuf, ibv::memoryregion::RemoteAddress remoteAddr, 173 | rdma::CompletionQueuePair &cq, Connection &c) { 174 | auto sga = reinterpret_cast(recvbuf); 175 | auto res = cache.removeCacheItem(defs::GlobalAddress(*sga)); 176 | std::cout << "removed cacheitem" << std::endl; 177 | auto sendmr = network.registerMr(&res, sizeof(uint64_t), {}); 178 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, defs::IMMDATA::DEFAULT); 179 | c.rcqp->postWorkRequest(write); 180 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 181 | } 182 | 183 | void Node::handleReset(ibv::memoryregion::RemoteAddress 184 | remoteAddr, rdma::CompletionQueuePair &cq, Connection &c) { 185 | bool result = false; 186 | auto sendmr = network.registerMr(&result, sizeof(bool), {}); 187 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, 188 | defs::IMMDATA::RESET); 189 | c.rcqp->postWorkRequest(write); 190 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 191 | c.rcqp->setToResetState(); 192 | c.socket.close(); 193 | } 194 | 195 | 196 | void Node::startInvalidations(defs::Data data, ibv::memoryregion::RemoteAddress remoteAddr, 197 | rdma::CompletionQueuePair &cq, 198 | std::vector nodes, uint16_t srcID, Connection &c) { 199 | std::cout << "going to prepareForInvalidate" << std::endl; 200 | auto invalidation = data.ga.sendable(srcID); 201 | auto sendmr1 = network.registerMr(&invalidation, sizeof(defs::SendGlobalAddr), {}); 202 | auto write1 = defs::createWriteWithImm(sendmr1->getSlice(), remoteAddr, 203 | defs::IMMDATA::INVALIDATE); 204 | c.rcqp->postWorkRequest(write1); 205 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 206 | auto result = performWrite(&data, srcID).sendable(srcID); 207 | auto sendmr = network.registerMr(&result, sizeof(defs::SendGlobalAddr), {}); 208 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteAddr, 209 | defs::IMMDATA::DEFAULT); 210 | c.rcqp->postWorkRequest(write); 211 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 212 | c.rcqp->setToResetState(); 213 | c.socket.close(); 214 | broadcastInvalidations(nodes, data.ga); 215 | 216 | } -------------------------------------------------------------------------------- /src/Send.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-06-15. 3 | // 4 | 5 | #include 6 | #include 7 | #include "Node.h" 8 | #include "../util/defs.h" 9 | #include "../util/socket/tcp.h" 10 | 11 | 12 | Connection Node::connectClientSocket(uint16_t port) { 13 | auto qp = std::make_unique(rdma::RcQueuePair(network, network.getSharedCompletionQueue())); 14 | auto c = Connection{std::move(qp), l5::util::Socket::create()}; 15 | auto remoteAddr = rdma::Address{network.getGID(), c.rcqp->getQPN(), network.getLID()}; 16 | for (int i = 0;; ++i) { 17 | try { 18 | l5::util::tcp::connect(c.socket, defs::ip, port); 19 | break; 20 | } catch (...) { 21 | std::this_thread::sleep_for(std::chrono_literals::operator ""ms(20)); 22 | if (i > 10) throw; 23 | } 24 | } 25 | l5::util::tcp::write(c.socket, &remoteAddr, sizeof(remoteAddr)); 26 | l5::util::tcp::read(c.socket, &remoteAddr, sizeof(remoteAddr)); 27 | std::cout << "connecting queuepairs" << std::endl; 28 | c.rcqp->connect(remoteAddr); 29 | return std::move(c); 30 | } 31 | 32 | void Node::closeClientSocket(Connection &c) { 33 | std::cout << "closing socket" << std::endl; 34 | auto fakeLock = defs::Lock{id, defs::LOCK_STATES::UNLOCKED}; 35 | sendLock(fakeLock, defs::RESET, c); 36 | c.rcqp->setToResetState(); 37 | c.socket.close(); 38 | } 39 | 40 | 41 | void *Node::sendAddress(defs::SendGlobalAddr data, defs::IMMDATA immData, Connection &c) { 42 | auto &cq = network.getSharedCompletionQueue(); 43 | auto size = sizeof(defs::SendGlobalAddr) + data.size; 44 | auto sendmr = network.registerMr(&data, sizeof(data) + data.size, {}); 45 | auto recvbuf = malloc(size); 46 | auto recvmr = network.registerMr(recvbuf, size, 47 | {ibv::AccessFlag::LOCAL_WRITE, ibv::AccessFlag::REMOTE_WRITE}); 48 | auto remoteMr = ibv::memoryregion::RemoteAddress{reinterpret_cast(recvbuf), 49 | recvmr->getRkey()}; 50 | l5::util::tcp::write(c.socket, &remoteMr, sizeof(remoteMr)); 51 | l5::util::tcp::read(c.socket, &remoteMr, sizeof(remoteMr)); 52 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteMr, immData); 53 | c.rcqp->postWorkRequest(write); 54 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 55 | auto recv = ibv::workrequest::Recv{}; 56 | recv.setSge(nullptr, 0); 57 | c.rcqp->postRecvRequest(recv); 58 | cq.pollRecvWorkCompletionBlocking(); 59 | return recvbuf; 60 | 61 | } 62 | 63 | 64 | defs::GlobalAddress Node::sendData(defs::SendingData data, defs::IMMDATA immData, Connection &c) { 65 | std::cout << "sendable: " << data.data << std::endl; 66 | auto &cq = network.getSharedCompletionQueue(); 67 | auto sendmr = network.registerMr(&data, sizeof(defs::SendingData), {}); 68 | auto recvbuf = malloc(sizeof(defs::SendGlobalAddr)); 69 | auto recvmr = network.registerMr(recvbuf, sizeof(defs::SendGlobalAddr), 70 | {ibv::AccessFlag::LOCAL_WRITE, ibv::AccessFlag::REMOTE_WRITE}); 71 | auto remoteMr = ibv::memoryregion::RemoteAddress{reinterpret_cast(recvbuf), 72 | recvmr->getRkey()}; 73 | l5::util::tcp::write(c.socket, &remoteMr, sizeof(remoteMr)); 74 | l5::util::tcp::read(c.socket, &remoteMr, sizeof(remoteMr)); 75 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteMr, immData); 76 | c.rcqp->postWorkRequest(write); 77 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 78 | auto recv = ibv::workrequest::Recv{}; 79 | recv.setSge(nullptr, 0); 80 | c.rcqp->postRecvRequest(recv); 81 | auto wc = cq.pollRecvWorkCompletionBlocking(); 82 | auto newImmData = wc.getImmData(); 83 | if (newImmData == defs::IMMDATA::INVALIDATE) { 84 | std::cout << "before invalidation: " << c.socket.get() << ", " << c.rcqp->getQPN() 85 | << std::endl; 86 | prepareForInvalidate(cq, c); 87 | std::cout << "should be changed: " << c.socket.get() << ", " << c.rcqp->getQPN() << &c 88 | << std::endl; 89 | 90 | } 91 | auto sga = reinterpret_cast(recvbuf); 92 | auto gaddr = defs::GlobalAddress(*sga); 93 | return gaddr; 94 | } 95 | 96 | bool Node::sendLock(defs::Lock lock, defs::IMMDATA immData, Connection &c) { 97 | auto &cq = network.getSharedCompletionQueue(); 98 | auto sendmr = network.registerMr(&lock, sizeof(defs::Lock), {}); 99 | auto recvbuf = malloc(sizeof(defs::Lock)); 100 | auto recvmr = network.registerMr(recvbuf, sizeof(defs::Lock), 101 | {ibv::AccessFlag::LOCAL_WRITE, ibv::AccessFlag::REMOTE_WRITE}); 102 | auto remoteMr = ibv::memoryregion::RemoteAddress{reinterpret_cast(recvbuf), 103 | recvmr->getRkey()}; 104 | l5::util::tcp::write(c.socket, &remoteMr, sizeof(remoteMr)); 105 | l5::util::tcp::read(c.socket, &remoteMr, sizeof(remoteMr)); 106 | auto write = defs::createWriteWithImm(sendmr->getSlice(), remoteMr, immData); 107 | c.rcqp->postWorkRequest(write); 108 | cq.pollSendCompletionQueueBlocking(ibv::workcompletion::Opcode::RDMA_WRITE); 109 | auto recv = ibv::workrequest::Recv{}; 110 | recv.setSge(nullptr, 0); 111 | c.rcqp->postRecvRequest(recv); 112 | cq.pollRecvWorkCompletionBlocking(); 113 | auto result = reinterpret_cast(recvbuf); 114 | return *result; 115 | 116 | } 117 | 118 | 119 | void Node::prepareForInvalidate(rdma::CompletionQueuePair &cq, Connection &c) { 120 | std::cout << "invalidating cache" << std::endl; 121 | auto recv = ibv::workrequest::Recv{}; 122 | recv.setSge(nullptr, 0); 123 | c.rcqp->postRecvRequest(recv); 124 | cq.pollRecvWorkCompletionBlocking(); 125 | c.rcqp->setToResetState(); 126 | c.socket.close(); 127 | connectAndReceive(id); 128 | auto newcon = connectClientSocket(3000); 129 | std::cout << c.socket.get() << ", " << c.rcqp->getQPN() << std::endl; 130 | std::cout << c.socket.get() << ", " << c.rcqp->getQPN() << &c << std::endl; 131 | c = std::move(newcon); 132 | } 133 | 134 | void Node::broadcastInvalidations(std::vector nodes, 135 | defs::GlobalAddress gaddr) { 136 | for (const auto &n: nodes) { 137 | std::cout << "invalidation of node " << n << std::endl; 138 | auto connection = connectClientSocket(n); 139 | sendAddress(gaddr.sendable(id), defs::IMMDATA::INVALIDATE, connection); 140 | closeClientSocket(connection); 141 | 142 | } 143 | std::cout << "done invaldating" << std::endl; 144 | } -------------------------------------------------------------------------------- /src/api.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-04-30. 3 | // 4 | 5 | #include 6 | #include "Node.h" 7 | #include 8 | #include 9 | #include "../util/defs.h" 10 | 11 | 12 | defs::GlobalAddress Node::Malloc(size_t size) { 13 | auto msize = size+ sizeof(defs::SaveData); 14 | std::cout <<"malloc this size: "<< msize << std::endl; 15 | auto buffer = malloc(msize); 16 | if (buffer) { 17 | std::cout <<"ptr: "<< buffer << std::endl; 18 | auto gaddr = defs::GlobalAddress{size, buffer, id}; 19 | return gaddr; 20 | } else { 21 | auto port = defs::port; 22 | auto c = connectClientSocket(port); 23 | 24 | auto res = sendAddress(defs::GlobalAddress{size, nullptr, 0}.sendable(id), defs::IMMDATA::MALLOC, 25 | c); 26 | auto sga = reinterpret_cast(res); 27 | 28 | c.socket.close(); 29 | c.rcqp->setToResetState(); 30 | return defs::GlobalAddress(*sga); 31 | } 32 | } 33 | 34 | 35 | defs::GlobalAddress Node::Free(defs::GlobalAddress gaddr) { 36 | if (isLocal(gaddr)) { 37 | auto d = reinterpret_cast(gaddr.ptr); 38 | 39 | if (d->iscached >= 0 && !d->sharerNodes.empty()) { 40 | broadcastInvalidations(d->sharerNodes,gaddr); 41 | } 42 | 43 | free(gaddr.ptr); 44 | gaddr.ptr = nullptr; 45 | gaddr.size = 0; 46 | return gaddr; 47 | } else { 48 | auto port = defs::port; 49 | auto c = connectClientSocket(port); 50 | 51 | auto res = sendAddress(gaddr.sendable(id), defs::IMMDATA::FREE, c); 52 | 53 | auto sga = reinterpret_cast(res); 54 | auto add = defs::GlobalAddress(*sga); 55 | 56 | closeClientSocket(c); 57 | return add; 58 | } 59 | } 60 | 61 | uint64_t Node::read(defs::GlobalAddress gaddr) { 62 | if (setLock(gaddr.id, defs::LOCK_STATES::SHAREDLOCK)) { 63 | std::cout << "reading..." << std::endl; 64 | auto result = performRead(gaddr, id); 65 | setLock(gaddr.id, defs::LOCK_STATES::UNLOCKED); 66 | return result->data; 67 | } else { 68 | return 0; 69 | } 70 | } 71 | 72 | defs::SaveData *Node::performRead(defs::GlobalAddress gaddr, uint16_t srcID) { 73 | if (isLocal(gaddr)) { 74 | auto data = reinterpret_cast(gaddr.ptr); 75 | 76 | if (data->iscached < 0 || data->iscached > 2) { 77 | std::cout << data->iscached << std::endl; 78 | return nullptr; 79 | } 80 | return data; 81 | } else { 82 | std::cout<< "not local" << std::endl; 83 | auto cacheItem = cache.getCacheItem(gaddr); 84 | if (cacheItem == nullptr) { 85 | std::cout<< "not cached" << std::endl; 86 | 87 | auto port = defs::port; 88 | auto c = connectClientSocket(port); 89 | 90 | auto data = sendAddress(gaddr.sendable(srcID), defs::IMMDATA::READ, c); 91 | 92 | auto result = reinterpret_cast(data); 93 | auto newCacheItem = CacheItem{gaddr, result->data, std::chrono::system_clock::now(), 94 | std::chrono::system_clock::now()}; 95 | cache.addCacheItem(gaddr, newCacheItem); 96 | 97 | closeClientSocket(c); 98 | 99 | return result; 100 | } else { 101 | std::cout<< "cached" << std::endl; 102 | std::cout << cacheItem->data << ", " << cacheItem->globalAddress.ptr << std::endl; 103 | return new defs::SaveData{cacheItem->data, defs::CACHE_DIRECTORY_STATE::SHARED, 0, 104 | std::vector()}; 105 | } 106 | } 107 | } 108 | 109 | 110 | bool Node::setLock(uint16_t lockId, defs::LOCK_STATES state) { 111 | if (id == defs::locknode) { 112 | auto existing = locks.find(lockId); 113 | 114 | if (existing != locks.end()) { 115 | 116 | if (state == defs::LOCK_STATES::UNLOCKED) { 117 | existing->second = state; 118 | return true; 119 | } else if (state == defs::LOCK_STATES::EXCLUSIVE && 120 | existing->second == defs::LOCK_STATES::UNLOCKED) { 121 | existing->second = state; 122 | return true; 123 | } else if (state == defs::LOCK_STATES::SHAREDLOCK && 124 | existing->second != defs::LOCK_STATES::EXCLUSIVE) { 125 | existing->second = state; 126 | return true; 127 | } else { 128 | return false; 129 | } 130 | } else { 131 | locks.insert(std::pair(lockId, state)); 132 | return true; 133 | } 134 | } else { 135 | auto c = connectClientSocket(defs::locknode); 136 | auto res = sendLock(defs::Lock{lockId, state}, defs::IMMDATA::LOCKS, c); 137 | closeClientSocket(c); 138 | return res; 139 | } 140 | } 141 | 142 | defs::GlobalAddress Node::write(defs::Data *data) { 143 | auto result = defs::GlobalAddress{}; 144 | std::cout << data->data << ", " << data->size << std::endl; 145 | 146 | if (setLock(data->ga.id, defs::LOCK_STATES::EXCLUSIVE)) { 147 | 148 | if (isLocal(data->ga)) { 149 | auto d = reinterpret_cast(data->ga.ptr); 150 | 151 | if (d->iscached >= 0 && !d->sharerNodes.empty() && d->iscached < 3) { 152 | broadcastInvalidations(d->sharerNodes, data->ga); 153 | } 154 | 155 | auto writtenData = new defs::SaveData{data->data, defs::CACHE_DIRECTORY_STATE::UNSHARED, id, 156 | {}}; 157 | std::memcpy(data->ga.ptr, writtenData, sizeof(defs::SaveData)); 158 | result = data->ga; 159 | 160 | } else { 161 | result = performWrite(data, id); 162 | } 163 | 164 | setLock(data->ga.id, defs::LOCK_STATES::UNLOCKED); 165 | } 166 | return result; 167 | 168 | } 169 | 170 | defs::GlobalAddress Node::performWrite(defs::Data *data, uint16_t srcID) { 171 | if (isLocal(data->ga)) { 172 | auto writtenData = defs::SaveData{data->data, defs::CACHE_DIRECTORY_STATE::EXCLUS, srcID, {}}; 173 | std::memcpy(data->ga.ptr, &writtenData, sizeof(writtenData)); 174 | return data->ga; 175 | } else { 176 | auto port = defs::port; 177 | auto c = connectClientSocket(port); 178 | auto ci = CacheItem{data->ga, data->data, std::chrono::system_clock::now(), 179 | std::chrono::system_clock::now()}; 180 | cache.alterCacheItem(ci,data->ga); 181 | std::cout << ci.data << ", " << ci.globalAddress.ptr << std::endl; 182 | 183 | auto result = sendData(data->sendable(srcID), defs::IMMDATA::WRITE, c); 184 | 185 | std::cout << c.socket.get() << ", " << c.rcqp->getQPN() << std::endl; 186 | 187 | closeClientSocket(c); 188 | return result; 189 | } 190 | } 191 | 192 | -------------------------------------------------------------------------------- /util/GlobalAddressHash.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-07-02. 3 | // 4 | 5 | #ifndef MEDMM_GLOBALADDRESSHASH_H 6 | #define MEDMM_GLOBALADDRESSHASH_H 7 | 8 | 9 | #include 10 | #include "defs.h" 11 | 12 | template class GlobalAddressHash; 13 | 14 | template<> 15 | class GlobalAddressHash { 16 | public: 17 | uint64_t operator()(const defs::SendGlobalAddr &ga) const 18 | { 19 | uint64_t h1 = std::hash()(ga.size); 20 | uint64_t h2 = std::hash()(ga.ptr); 21 | uint64_t h3 = std::hash()(ga.id); 22 | return (h1 ^ (h2 ^(h3<<1))); 23 | } 24 | }; 25 | 26 | 27 | #endif //MEDMM_GLOBALADDRESSHASH_H 28 | -------------------------------------------------------------------------------- /util/NonCopyable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | namespace l5 { 3 | namespace util { 4 | struct NonCopyable { 5 | NonCopyable() = default; 6 | 7 | NonCopyable(const NonCopyable &) = delete; 8 | 9 | NonCopyable &operator=(const NonCopyable &) = delete; 10 | 11 | NonCopyable(NonCopyable &&) = default; 12 | 13 | NonCopyable &operator=(NonCopyable &&) = default; 14 | }; 15 | } // namespace util 16 | } // namespace l5 17 | -------------------------------------------------------------------------------- /util/RDMANetworking.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "RDMANetworking.h" 3 | #include "socket/tcp.h" 4 | 5 | namespace l5 { 6 | namespace util { 7 | static void exchangeQPNAndConnect(const Socket &sock, rdma::Network &network, rdma::QueuePair &queuePair) { 8 | rdma::Address addr{}; 9 | addr.gid = network.getGID(); 10 | addr.lid = network.getLID(); 11 | addr.qpn = queuePair.getQPN(); 12 | tcp::write(sock, &addr, sizeof(addr)); // Send own qpn to server 13 | tcp::read(sock, &addr, sizeof(addr)); // receive qpn 14 | queuePair.connect(addr); 15 | } 16 | 17 | RDMANetworking::RDMANetworking(const Socket &sock) : 18 | completionQueue(network.newCompletionQueuePair()), 19 | queuePair(network, completionQueue) { 20 | tcp::setBlocking(sock); // just set the socket to block for our setup. 21 | exchangeQPNAndConnect(sock, network, queuePair); 22 | } 23 | 24 | void receiveAndSetupRmr(const Socket &sock, ibv::memoryregion::RemoteAddress &buffer, 25 | ibv::memoryregion::RemoteAddress &readPos) { 26 | RmrInfo rmrInfo{}; 27 | tcp::read(sock, &rmrInfo, sizeof(rmrInfo)); 28 | buffer.rkey = rmrInfo.bufferKey; 29 | buffer.address = rmrInfo.bufferAddress; 30 | readPos.rkey = rmrInfo.readPosKey; 31 | readPos.address = rmrInfo.readPosAddress; 32 | } 33 | 34 | void sendRmrInfo(const Socket &sock, const ibv::memoryregion::MemoryRegion &buffer, 35 | const ibv::memoryregion::MemoryRegion &readPos) { 36 | RmrInfo rmrInfo{}; 37 | rmrInfo.bufferKey = buffer.getRkey(); 38 | rmrInfo.bufferAddress = reinterpret_cast(buffer.getAddr()); 39 | rmrInfo.readPosKey = readPos.getRkey(); 40 | rmrInfo.readPosAddress = reinterpret_cast(readPos.getAddr()); 41 | tcp::write(sock, &rmrInfo, sizeof(rmrInfo)); 42 | } 43 | } // namespace util 44 | } // namespace l5 45 | -------------------------------------------------------------------------------- /util/RDMANetworking.h: -------------------------------------------------------------------------------- 1 | #ifndef L5RDMA_RDMANETWORKING_H 2 | #define L5RDMA_RDMANETWORKING_H 3 | 4 | #include "../rdma/RcQueuePair.h" 5 | #include "../rdma/Network.hpp" 6 | #include "../rdma/CompletionQueuePair.hpp" 7 | #include "socket/Socket.h" 8 | 9 | namespace l5 { 10 | namespace util { 11 | struct RDMANetworking { 12 | rdma::Network network; 13 | rdma::CompletionQueuePair completionQueue; 14 | rdma::RcQueuePair queuePair; 15 | 16 | /// Exchange the basic RDMA connection info for the network and queues 17 | explicit RDMANetworking(const Socket &sock); 18 | }; 19 | 20 | struct RmrInfo { 21 | uint32_t bufferKey; 22 | uint32_t readPosKey; 23 | uintptr_t bufferAddress; 24 | uintptr_t readPosAddress; 25 | }; 26 | 27 | void receiveAndSetupRmr(const Socket &sock, ibv::memoryregion::RemoteAddress &buffer, 28 | ibv::memoryregion::RemoteAddress &readPos); 29 | 30 | void sendRmrInfo(const Socket &sock, const ibv::memoryregion::MemoryRegion &buffer, 31 | const ibv::memoryregion::MemoryRegion &readPos); 32 | } // namespace util 33 | } // namespace l5 34 | #endif //L5RDMA_RDMANETWORKING_H 35 | -------------------------------------------------------------------------------- /util/defs.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-06-15. 3 | // 4 | 5 | #include 6 | #include "defs.h" 7 | 8 | ibv::workrequest::Simple 9 | defs::createWriteWithImm(ibv::memoryregion::Slice slice, 10 | ibv::memoryregion::RemoteAddress remoteMr, IMMDATA immData) { 11 | auto write = ibv::workrequest::Simple{}; 12 | write.setLocalAddress(slice); 13 | write.setInline(); 14 | write.setSignaled(); 15 | write.setRemoteAddress(remoteMr); 16 | write.setImmData(immData); 17 | return write; 18 | } -------------------------------------------------------------------------------- /util/defs.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Magdalena Pröbstl on 2019-05-07. 3 | // 4 | #include 5 | #include "../include/libibverbscpp/libibverbscpp.h" 6 | #include 7 | 8 | #ifndef MEDMM_DEFS_H 9 | #define MEDMM_DEFS_H 10 | 11 | namespace defs { 12 | 13 | 14 | const char ip[] = "127.0.0.1"; 15 | const uint16_t port = 3000; 16 | 17 | const uint16_t locknode = 2000; 18 | 19 | constexpr size_t BIGBADBUFFER_SIZE = 1024 * 1024 * 8; // 8MB 20 | 21 | enum CACHE_DIRECTORY_STATE { 22 | UNSHARED = 0, 23 | SHARED = 1, 24 | EXCLUS = 2 25 | }; 26 | 27 | 28 | struct __attribute__ ((packed)) SendGlobalAddr { 29 | size_t size; 30 | uint64_t ptr; 31 | uint16_t id; 32 | uint16_t srcID; 33 | }; 34 | 35 | struct __attribute__ ((packed)) GlobalAddress { 36 | size_t size; 37 | void *ptr; 38 | uint16_t id; 39 | 40 | SendGlobalAddr sendable(uint16_t srcID) { 41 | SendGlobalAddr sga{}; 42 | sga.size = size; 43 | sga.ptr = reinterpret_cast(ptr); 44 | sga.id = id; 45 | sga.srcID = srcID; 46 | return sga; 47 | }; 48 | 49 | GlobalAddress() = default; 50 | 51 | GlobalAddress(size_t s, void *p, uint16_t i) { 52 | size = s; 53 | ptr = p; 54 | id = i; 55 | }; 56 | 57 | explicit GlobalAddress(SendGlobalAddr sga) { 58 | size = sga.size; 59 | ptr = reinterpret_cast(sga.ptr); 60 | id = sga.id; 61 | }; 62 | 63 | uint16_t getNodeId() { 64 | return id; 65 | }; 66 | }; 67 | 68 | 69 | struct __attribute__ ((packed)) SendingData { 70 | size_t size; 71 | uint64_t data; 72 | SendGlobalAddr sga; 73 | 74 | }; 75 | 76 | 77 | struct __attribute__ ((packed)) Data { 78 | size_t size; 79 | uint64_t data; 80 | GlobalAddress ga; 81 | 82 | SendingData sendable(uint16_t id) { 83 | SendingData sd{}; 84 | sd.sga = ga.sendable(id); 85 | sd.size = size; 86 | sd.data = data; 87 | return sd; 88 | } 89 | 90 | Data() = default; 91 | 92 | Data(size_t s, uint64_t d, GlobalAddress g) { 93 | size = s; 94 | data = d; 95 | ga = g; 96 | }; 97 | 98 | explicit Data(SendingData sd) { 99 | size = sd.size; 100 | data = sd.data; 101 | ga = GlobalAddress(sd.sga); 102 | } 103 | }; 104 | 105 | struct SaveData { 106 | uint64_t data; 107 | CACHE_DIRECTORY_STATE iscached; 108 | uint16_t ownerNode; 109 | std::vector sharerNodes; 110 | }; 111 | 112 | enum LOCK_STATES { 113 | UNLOCKED = 0, 114 | SHAREDLOCK = 1, 115 | EXCLUSIVE = 2 116 | }; 117 | 118 | enum IMMDATA { 119 | DEFAULT = 0, 120 | MALLOC = 1, 121 | READ = 2, 122 | FREE = 3, 123 | WRITE = 4, 124 | LOCKS = 5, 125 | RESET = 6, 126 | INVALIDATE = 7 127 | }; 128 | 129 | 130 | struct __attribute__ ((packed)) Lock { 131 | uint16_t id; 132 | LOCK_STATES state; 133 | }; 134 | 135 | 136 | ibv::workrequest::Simple 137 | createWriteWithImm(ibv::memoryregion::Slice slice, 138 | ibv::memoryregion::RemoteAddress remoteMr, IMMDATA immData); 139 | } 140 | #endif //MEDMM_DEFS_H 141 | -------------------------------------------------------------------------------- /util/socket/Socket.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Socket.h" 4 | 5 | using namespace std::string_literals; 6 | 7 | l5::util::Socket::~Socket() { 8 | if (s < 0) { 9 | return; 10 | } 11 | ::close(s); 12 | } 13 | 14 | l5::util::Socket::Socket(l5::util::Socket &&other) noexcept { 15 | std::swap(s, other.s); 16 | } 17 | 18 | l5::util::Socket &l5::util::Socket::operator=(l5::util::Socket &&other) noexcept { 19 | std::swap(s, other.s); 20 | return *this; 21 | } 22 | 23 | int l5::util::Socket::get() const noexcept { 24 | return s; 25 | } 26 | 27 | l5::util::Socket l5::util::Socket::fromRaw(int socket) noexcept { 28 | return Socket(socket); 29 | } 30 | 31 | l5::util::Socket l5::util::Socket::create(int domain, int type, int protocol) { 32 | auto raw = ::socket(domain, type, protocol); 33 | if (raw < 0) { 34 | throw std::runtime_error("Could not open socket: "s + strerror(errno)); 35 | } 36 | const int enable = 1; 37 | if (::setsockopt(raw, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)) < 0) { 38 | throw std::runtime_error{"Could not set SO_REUSEADDR: "s + strerror(errno)}; 39 | } 40 | return Socket(raw); 41 | } 42 | 43 | l5::util::Socket::Socket(int raw) : s(raw) { 44 | } 45 | 46 | void l5::util::Socket::close() noexcept { 47 | if (s < 0) { 48 | return; 49 | } 50 | ::close(s); 51 | s = -1; 52 | } 53 | -------------------------------------------------------------------------------- /util/socket/Socket.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "../NonCopyable.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace l5 { 13 | namespace util { 14 | class Socket : util::NonCopyable { 15 | 16 | int s = -1; 17 | 18 | explicit Socket(int raw); 19 | 20 | public: 21 | Socket() = default; 22 | 23 | static Socket create(int domain = AF_INET, int type = SOCK_STREAM, int protocol = 0); 24 | 25 | static Socket fromRaw(int socket) noexcept; 26 | 27 | ~Socket(); 28 | 29 | Socket(Socket &&other) noexcept; 30 | 31 | Socket &operator=(Socket &&other) noexcept; 32 | 33 | int get() const noexcept; 34 | 35 | template 36 | static SocketIterator poll_first(SocketIterator begin, SocketIterator end); 37 | 38 | void close() noexcept; 39 | }; 40 | 41 | template 42 | SocketIterator Socket::poll_first(SocketIterator begin, SocketIterator end) { 43 | using namespace std::string_literals; 44 | assert(std::distance(begin, end) > 0); 45 | const auto size = static_cast(std::distance(begin, end)); 46 | std::vector pollFds; 47 | 48 | std::transform(begin, end, std::back_inserter(pollFds), [](const auto &sock) { 49 | pollfd res{}; 50 | res.fd = sock.get(); 51 | res.events = POLLIN; 52 | return res; 53 | }); 54 | 55 | const auto ret = ::poll(pollFds.data(), size, 5 * 1000); // 5 seconds timeout 56 | if (ret < 0) { 57 | throw std::runtime_error("Could not poll sockets: "s + ::strerror(errno)); 58 | } 59 | const auto &readable = std::find_if(pollFds.begin(), pollFds.end(), [](const pollfd &pollFd) { 60 | return (pollFd.revents & POLLIN) != 0; 61 | }); 62 | 63 | return begin + std::distance(pollFds.begin(), readable); 64 | } 65 | } // namespace util 66 | } // namespace l5 67 | -------------------------------------------------------------------------------- /util/socket/domain.cpp: -------------------------------------------------------------------------------- 1 | #include "domain.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace l5 { 9 | namespace util { 10 | namespace domain { 11 | 12 | using namespace std::string_literals; 13 | 14 | Socket socket() { 15 | return Socket::create(AF_UNIX, SOCK_STREAM, 0); 16 | } 17 | 18 | void listen(const Socket &sock) { 19 | if (::listen(sock.get(), SOMAXCONN) < 0) { 20 | throw std::runtime_error{"error close'ing"s + strerror(errno)}; 21 | } 22 | } 23 | 24 | void connect(const Socket &sock, const std::string &pathToFile) { 25 | ::sockaddr_un local{}; 26 | local.sun_family = AF_UNIX; 27 | strncpy(local.sun_path, pathToFile.data(), pathToFile.size()); 28 | local.sun_path[pathToFile.size()] = '\0'; 29 | if (::connect(sock.get(), reinterpret_cast(&local), sizeof local) < 0) { 30 | throw std::runtime_error{"error connect'ing"s + strerror(errno)}; 31 | } 32 | } 33 | 34 | void write(const Socket &sock, const void* buffer, std::size_t size) { 35 | for (size_t current = 0; size > 0;) { 36 | auto res = ::send(sock.get(), reinterpret_cast(buffer) + current, size, 0); 37 | if (res < 0) { 38 | throw std::runtime_error("Couldn't write to socket: "s + strerror(errno)); 39 | } 40 | current += res; 41 | size -= res; 42 | } 43 | } 44 | 45 | void read(const Socket &sock, void* buffer, std::size_t size) { 46 | for (size_t current = 0; size > 0;) { 47 | auto res = ::recv(sock.get(), reinterpret_cast(buffer) + current, size, 0); 48 | if (res < 0) { 49 | throw std::runtime_error("Couldn't read from socket: "s + strerror(errno)); 50 | } 51 | if (static_cast(res) == size) { 52 | return; 53 | } 54 | current += res; 55 | size -= res; 56 | } 57 | } 58 | 59 | void bind(const Socket &sock, const std::string &pathToFile) { 60 | // c.f. http://beej.us/guide/bgipc/output/html/multipage/unixsock.html 61 | ::sockaddr_un local{}; 62 | local.sun_family = AF_UNIX; 63 | strncpy(local.sun_path, pathToFile.data(), pathToFile.size()); 64 | local.sun_path[pathToFile.size()] = '\0'; 65 | auto len = strlen(local.sun_path) + sizeof(local.sun_family); 66 | if (::bind(sock.get(), reinterpret_cast(&local), len) < 0) { 67 | throw std::runtime_error{"error bind'ing"s + strerror(errno)}; 68 | } 69 | } 70 | 71 | void unlink(const std::string &pathToFile) { 72 | if (::unlink(std::string(pathToFile.begin(), pathToFile.end()).c_str()) < 0) { 73 | throw std::runtime_error{"error unlink'ing"s + strerror(errno)}; 74 | } 75 | } 76 | 77 | Socket accept(const Socket &sock, ::sockaddr_un &inAddr) { 78 | socklen_t unAddrLen = sizeof(inAddr); 79 | auto acced = ::accept(sock.get(), reinterpret_cast(&inAddr), &unAddrLen); 80 | if (acced < 0) { 81 | throw std::runtime_error{"error accept'ing"s + strerror(errno)}; 82 | } 83 | return Socket::fromRaw(acced); 84 | } 85 | 86 | Socket accept(const Socket &sock) { 87 | auto acced = ::accept(sock.get(), nullptr, nullptr); 88 | if (acced < 0) { 89 | throw std::runtime_error{"error accept'ing"s + strerror(errno)}; 90 | } 91 | return Socket::fromRaw(acced); 92 | } 93 | 94 | void send_fd(const Socket &sock, int fd) { 95 | auto data = std::array(); 96 | auto iov = iovec(); 97 | iov.iov_base = data.data(); 98 | iov.iov_len = data.size(); 99 | 100 | auto ctrl_buf = std::array(); 101 | auto msg = msghdr(); 102 | msg.msg_name = nullptr; 103 | msg.msg_namelen = 0; 104 | msg.msg_iov = &iov; 105 | msg.msg_iovlen = 1; 106 | msg.msg_control = ctrl_buf.data(); 107 | msg.msg_controllen = ctrl_buf.size(); 108 | 109 | auto cmsg = CMSG_FIRSTHDR(&msg); 110 | cmsg->cmsg_level = SOL_SOCKET; 111 | cmsg->cmsg_type = SCM_RIGHTS; 112 | cmsg->cmsg_len = CMSG_LEN(sizeof(int)); 113 | 114 | *reinterpret_cast(CMSG_DATA(cmsg)) = fd; 115 | 116 | if (::sendmsg(sock.get(), &msg, 0) < 0) { 117 | throw std::runtime_error("send_fd: sendmsg failed"s + strerror(errno)); 118 | } 119 | } 120 | 121 | int receive_fd(const Socket &sock) { 122 | auto data = std::array(); 123 | auto iov = iovec(); 124 | iov.iov_base = data.data(); 125 | iov.iov_len = data.size(); 126 | 127 | auto ctrl_buf = std::array(); 128 | auto msg = msghdr(); 129 | msg.msg_name = nullptr; 130 | msg.msg_namelen = 0; 131 | msg.msg_iov = &iov; 132 | msg.msg_iovlen = 1; 133 | msg.msg_control = ctrl_buf.data(); 134 | msg.msg_controllen = ctrl_buf.size(); 135 | 136 | const auto n = ::recvmsg(sock.get(), &msg, 0); 137 | if (n < 0) { 138 | throw std::runtime_error("receive_fd: recvmsg failed"s + strerror(errno)); 139 | } 140 | if (n == 0) { 141 | throw std::runtime_error("receive_fd: invalid FD received"); 142 | } 143 | 144 | const auto cmsg = CMSG_FIRSTHDR(&msg); 145 | if (cmsg == nullptr) { 146 | throw std::runtime_error("receive_fd: invalid FD received"); 147 | } 148 | int fd = *reinterpret_cast(CMSG_DATA(cmsg)); 149 | return fd; 150 | } 151 | } // namespace domain 152 | } // namespace util 153 | } // namespace l5 154 | -------------------------------------------------------------------------------- /util/socket/domain.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include "Socket.h" 7 | 8 | namespace l5 { 9 | namespace util { 10 | namespace domain { 11 | struct sockaddr_un; 12 | 13 | Socket socket(); 14 | 15 | void listen(const Socket &sock); 16 | 17 | void connect(const Socket &sock, const std::string &pathToFile); 18 | 19 | void write(const Socket &sock, const void* buffer, std::size_t size); 20 | 21 | template 22 | void write(const Socket &sock, const T &object) { 23 | static_assert(std::is_trivially_copyable::value, ""); 24 | write(sock, reinterpret_cast(&object), sizeof(object)); 25 | } 26 | 27 | void read(const Socket &sock, void* buffer, std::size_t size); 28 | 29 | template 30 | void read(const Socket &sock, T &object) { 31 | static_assert(std::is_trivially_copyable::value, ""); 32 | read(sock, &object, sizeof(object)); 33 | } 34 | 35 | template 36 | T read(const Socket &sock) { 37 | static_assert(std::is_trivially_constructible::value, ""); 38 | T res; 39 | read(sock, res); 40 | return res; 41 | } 42 | 43 | void bind(const Socket &sock, const std::string &pathToFile); 44 | 45 | Socket accept(const Socket &sock, ::sockaddr_un &inAddr); 46 | 47 | Socket accept(const Socket &sock); 48 | 49 | void setBlocking(const Socket &sock); 50 | 51 | void unlink(const std::string &pathToFile); 52 | 53 | /// Send a file descriptor over a domain socket 54 | void send_fd(const Socket &sock, int fd); 55 | 56 | /// Receive a file descriptor over a domain socket 57 | int receive_fd(const Socket &sock); 58 | 59 | } // namespace domain 60 | } // namespace util 61 | } // namespace l5 62 | -------------------------------------------------------------------------------- /util/socket/tcp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tcp.h" 5 | 6 | using namespace std::string_literals; 7 | 8 | void l5::util::tcp::connect(const l5::util::Socket &sock, const sockaddr_in &dest) { 9 | auto addr = reinterpret_cast(&dest); 10 | if (::connect(sock.get(), addr, sizeof(dest)) < 0) { 11 | throw std::runtime_error("Couldn't connect: "s + strerror(errno)); 12 | } 13 | } 14 | 15 | void l5::util::tcp::connect(const l5::util::Socket &sock, const std::string &ip, uint16_t port) { 16 | sockaddr_in addr = {}; 17 | addr.sin_family = AF_INET; 18 | addr.sin_port = htons(port); 19 | inet_pton(AF_INET, ip.c_str(), &addr.sin_addr); 20 | 21 | connect(sock, addr); 22 | } 23 | 24 | void l5::util::tcp::write(const l5::util::Socket &sock, const void* buffer, std::size_t size) { 25 | for (size_t current = 0; size > 0;) { 26 | auto res = ::send(sock.get(), reinterpret_cast(buffer) + current, size, 0); 27 | if (res < 0) { 28 | throw std::runtime_error("Couldn't write to socket: "s + strerror(errno)); 29 | } 30 | current += res; 31 | size -= res; 32 | } 33 | } 34 | 35 | void l5::util::tcp::read(const l5::util::Socket &sock, void* buffer, std::size_t size) { 36 | for (size_t current = 0; size > 0;) { 37 | auto res = ::recv(sock.get(), reinterpret_cast(buffer) + current, size, 0); 38 | if (res < 0) { 39 | throw std::runtime_error("Couldn't read from socket: "s + strerror(errno)); 40 | } 41 | if (static_cast(res) == size) { 42 | return; 43 | } 44 | current += res; 45 | size -= res; 46 | } 47 | } 48 | 49 | void l5::util::tcp::bind(const l5::util::Socket &sock, const sockaddr_in &addr) { 50 | auto what = reinterpret_cast(&addr); 51 | if (::bind(sock.get(), what, sizeof(addr)) < 0) { 52 | throw std::runtime_error("Couldn't bind socket: "s + strerror(errno)); 53 | } 54 | } 55 | 56 | void l5::util::tcp::bind(const l5::util::Socket &sock, uint16_t port) { 57 | sockaddr_in addr{}; 58 | addr.sin_family = AF_INET; 59 | addr.sin_port = htons(port); 60 | addr.sin_addr.s_addr = INADDR_ANY; 61 | 62 | bind(sock, addr); 63 | } 64 | 65 | void l5::util::tcp::listen(const l5::util::Socket &sock) { 66 | if (::listen(sock.get(), SOMAXCONN) < 0) { 67 | throw std::runtime_error("Couldn't listen on socket: "s + strerror(errno)); 68 | } 69 | } 70 | 71 | l5::util::Socket l5::util::tcp::accept(const l5::util::Socket &sock, sockaddr_in &inAddr) { 72 | socklen_t inAddrLen = sizeof(inAddr); 73 | auto saddr = reinterpret_cast(&inAddr); 74 | auto acced = ::accept(sock.get(), saddr, &inAddrLen); 75 | if (acced < 0) { 76 | throw std::runtime_error("Couldn't accept from socket: "s + strerror(errno)); 77 | } 78 | return Socket::fromRaw(acced); 79 | } 80 | 81 | l5::util::Socket l5::util::tcp::accept(const l5::util::Socket &sock) { 82 | auto acced = ::accept(sock.get(), nullptr, nullptr); 83 | if (acced < 0) { 84 | throw std::runtime_error("Couldn't accept from socket: "s + strerror(errno)); 85 | } 86 | return Socket::fromRaw(acced); 87 | } 88 | 89 | void l5::util::tcp::setBlocking(const l5::util::Socket &sock) { 90 | auto opts = fcntl(sock.get(), F_GETFL); 91 | opts &= ~O_NONBLOCK; 92 | fcntl(sock.get(), F_SETFL, opts); 93 | } 94 | -------------------------------------------------------------------------------- /util/socket/tcp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include "Socket.h" 6 | 7 | namespace l5 { 8 | namespace util { 9 | namespace tcp { 10 | void connect(const Socket &sock, const sockaddr_in &dest); 11 | 12 | void connect(const Socket &sock, const std::string &ip, uint16_t port); 13 | 14 | void write(const Socket &sock, const void *buffer, std::size_t size); 15 | 16 | template 17 | void write(const Socket &sock, const T &object) { 18 | static_assert(std::is_trivially_copyable::value, ""); 19 | write(sock, reinterpret_cast(&object), sizeof(object)); 20 | } 21 | 22 | void read(const Socket &sock, void *buffer, std::size_t size); 23 | 24 | template 25 | void read(const Socket &sock, T &object) { 26 | static_assert(std::is_trivially_copyable::value, ""); 27 | return read(sock, &object, sizeof(object)); 28 | } 29 | 30 | template 31 | T read(const Socket &sock) { 32 | static_assert(std::is_trivially_constructible::value, ""); 33 | T res; 34 | read(sock, res); 35 | return res; 36 | } 37 | 38 | void bind(const Socket &sock, const sockaddr_in &addr); 39 | 40 | void bind(const Socket &sock, uint16_t port); 41 | 42 | void listen(const Socket &sock); 43 | 44 | Socket accept(const Socket &sock, sockaddr_in &inAddr); 45 | 46 | Socket accept(const Socket &sock); 47 | 48 | void setBlocking(const Socket &sock); 49 | } // namespace tcp 50 | } // namespace util 51 | } // namespace l5 52 | --------------------------------------------------------------------------------