├── AUTHORS
├── LICENSE
├── Makefile
├── README.md
├── alloc
    ├── alloc.hpp
    ├── alloc_adapter.hpp
    ├── block.hpp
    ├── block_alloc.hpp
    ├── local_block.hpp
    ├── memfd.h
    ├── size_table.hpp
    ├── superblock.hpp
    └── thread_alloc.hpp
├── common
    └── common.hpp
├── compact.cpp
├── compaction_latency.cpp
├── core.sh
├── latency.cpp
├── load.cpp
├── local_read_benchmark.cpp
├── main.cpp
├── paper
    └── corm.pdf
├── rdma
    ├── connectRDMA.hpp
    ├── rdma_helpers.hpp
    ├── rdma_memory_manager.hpp
    └── verbsEP.hpp
├── remote_read_benchmark.cpp
├── run_compaction.sh
├── run_latency.sh
├── run_read_throughput.sh
├── run_throughput.sh
├── run_throughput_compaction.sh
├── thread
    ├── messenger.hpp
    └── thread.hpp
├── unload.cpp
├── utilities
    ├── block_home_table.h
    ├── cxxopts.hpp
    ├── debug.h
    ├── rcu.h
    ├── timer.h
    ├── ycsb.hpp
    └── zipf.hpp
├── worker
    ├── ReaderWriter.hpp
    ├── client_api.hpp
    ├── communication.hpp
    ├── generic_worker.hpp
    └── worker.hpp
└── workload_readwrite.cpp


/AUTHORS:
--------------------------------------------------------------------------------
1 | # This is the official list of CoRM authors (individuals or organizations) for 
2 | # copyright purposes.
3 | 
4 | ETH Zurich
5 | Konstantin Taranov
6 | Salvatore Di Girolamo
7 | Torsten Hoefler
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020-2021, ETH Zurich, and all contributors listed in AUTHORS
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all:  remote_read_benchmark workload_readwrite latency server load unload  compaction compact  local_read_benchmark
 2 | 
 3 | CFLAGS += -Wall -std=c++14  -O2 -I./ -libverbs  -lpthread -lrdmacm  -lev #-DDEBUG #-g -D_GNU_SOURCE
 4 | CPP = #./alloc/alloc_adapter.cpp
 5 | 
 6 | 
 7 | remote_read_benchmark: 
 8 | 	rm -f remote_read_benchmark 
 9 | 	g++ remote_read_benchmark.cpp  $(CFLAGS) -o remote_read_benchmark
10 | 
11 | latency: 
12 | 	rm -f latency 
13 | 	g++ latency.cpp  $(CFLAGS) -o latency
14 | 
15 | server: 
16 | 	rm -f server 
17 | 	g++ main.cpp $(CFLAGS) $(CPP) -o server
18 | 
19 | load: 
20 | 	rm -f load 
21 | 	g++ load.cpp $(CFLAGS) $(CPP) -o load
22 | 
23 | unload: 
24 | 	rm -f unload 
25 | 	g++ unload.cpp $(CFLAGS) $(CPP) -o unload
26 | 
27 | compaction: 
28 | 	rm -f compaction 
29 | 	g++ compaction_latency.cpp $(CFLAGS) $(CPP) -o compaction
30 | 
31 | compact: 
32 | 	rm -f compact 
33 | 	g++ compact.cpp $(CFLAGS) $(CPP) -o compact
34 | 
35 | workload_readwrite: 
36 | 	rm -f workload_readwrite 
37 | 	g++ workload_readwrite.cpp $(CFLAGS) $(CPP) -o workload_readwrite
38 | 
39 | local_read_benchmark: 
40 | 	rm -f local_read_benchmark 
41 | 	g++ local_read_benchmark.cpp $(CFLAGS) $(CPP) -o local_read_benchmark
42 | 
43 | clean:
44 | 	rm -f remote_read_benchmark workload_readwrite latency server load unload  compaction compact  local_read_benchmark
45 | .DELETE_ON_ERROR:
46 | .PHONY: all clean
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CoRM: Compactable Remote Memory over RDMA
 2 | A remote memory system that supports compaction over RDMA networks.     
 3 | This is the source code for our [SIGMOD 2021 paper](paper/corm.pdf).
 4 | 
 5 | ## Requirements
 6 |  * GCC >= 4.9 with C++14 features
 7 |  * rdma-core library, or equivalent RDMA verbs library 
 8 |  * RDMA-capable network devices must have assigned IP addresses
 9 |  * boost lockfree queue 
10 |  * libev-dev library
11 | 
12 | ## Usage 
13 | 
14 | To compile the code simply run `make`.    
15 | We provide a series of bash scripts to launch CoRM. For that modify IP address of your servers accordingly.
16 | 
17 | 
18 | ## Basic usage
19 | ```
20 | make
21 | ./server -a 192.168.1.10 --threads=1 % to start code with 1 thread. Corm will print size class info and then periodically report stats of the worker thread.
22 | ./latency -a 192.168.1.10  % start basic latency test
23 | ```
24 | 
25 | Note the CoRM prints only RPC stats, as it is unaware of completed one-sided RDMA reads.
26 | 
27 | 
28 | ## Debugging
29 | For debugging include `-DDEBUG` flag in `CFLAGS` of the Makefile. It will enable printing debug messages.
30 | 
31 | ## Implementation details
32 | For research purposes, CoRM has the following implementation artifacts:
33 | 
34 | #### Connection establishment model
35 | Each new client is directly connected to a remote thread worker.
36 | The thread worker is assigned in a round-robin order. It helps to manage and debug the thread that is responsible for a client. 
37 | To have direct connections to all threads, a client can open multiple connections to CoRM. 
38 | 
39 | #### Key/Addr sizes
40 | All sizes of blocks, superblocks and ids are hard-coded. Refer to `common/common.hpp` to tune the parameters.
41 | 
42 | #### Compaction
43 | The compaction is triggered manually. For that you can use `./compact` binary.
44 | 
45 | #### Loading/Unloading
46 | For loading and unloading data please use `./load` and `./unload` binaries. `load` needs to know the number of remote threads to evenly load each remote worker with objects. It is a side effect of the "Connection establishment model".
47 | 
48 | 
49 | ## Implementation notice
50 | I am not a professional software developer, and that is why the code is not of the production-level quality.
51 | Notably, I did not invest time in splitting the code into `*.cpp` amd `*.hpp` files to improve compilation process.
52 | Also the settings related to block, ID, and key sizes are hard-coded and can be managed in `common/common.hpp`. 
53 | 
54 | 
55 | 
56 | ## Citing this work
57 | 
58 | If you use our code, please consider citing our [SIGMOD 2021 paper](paper/corm.pdf):
59 | 
60 | ```
61 | @inproceedings{taranov-corm,
62 | author = {Taranov, Konstantin and Di Girolamo, Salvatore and Hoefler, Torsten},
63 | title = {Co{RM}: {C}ompactable {R}emote {M}emory over {RDMA}},
64 | year = {2021},
65 | isbn = {9781450383431},
66 | publisher = {Association for Computing Machinery},
67 | url = {https://doi.org/10.1145/3448016.3452817},
68 | doi = {10.1145/3448016.3452817},
69 | booktitle = {Proceedings of the 2021 ACM SIGMOD International Conference on Management of Data},
70 | location = {Virtual Event, China},
71 | numpages = {14},
72 | series = {SIGMOD'21}
73 | }
74 | ```
75 | 
76 | ## Contact 
77 | If you have questions, please, contact:
78 | 
79 | Konstantin Taranov (konstantin.taranov "at" inf.ethz.ch)    
80 | 


--------------------------------------------------------------------------------
/alloc/alloc.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * Interfaces and callbacks for allocators
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "block.hpp"
 15 | #include "../common/common.hpp"
 16 | #include <chrono>
 17 | #include <boost/lockfree/queue.hpp>
 18 |  
 19 | static const int SUCCESS = 0;
 20 | static const int BLOCK_IS_NOT_FOUND = -1;
 21 | static const int OBJECT_DOES_NOT_EXIST = -2;
 22 |  
 23 | 
 24 | // Thread allocator callbacks.
 25 | typedef void (*thread_alloc_cb)( client_addr_t ret_addr, void *owner);
 26 | typedef void (*thread_free_cb)( int status,  addr_t newaddr, void *owner);
 27 | typedef void (*thread_find_cb)( addr_t newaddr,  uint16_t slot_size,  void *owner);
 28 | typedef void (*fixpointer_cb)( int ret, client_addr_t ret_addr, void *owner);
 29 | typedef void (*helper_cb)( void *owner);
 30 |  
 31 | 
 32 | // A per-thread allocator object.
 33 | // this class should be responsible for assigning object ids
 34 | class ThreadAlloc {
 35 | public:
 36 | 
 37 |     struct CompactionCtx{
 38 |         boost::lockfree::queue<void*> q;
 39 |         std::atomic<uint32_t> counter;
 40 |         const uint8_t type;
 41 |         const uint8_t initiator;
 42 |         ThreadAlloc* master;
 43 |         const bool with_compaction;
 44 |         std::chrono::time_point<std::chrono::high_resolution_clock> t1;
 45 |  
 46 |         CompactionCtx(uint8_t num_threads, uint8_t type, uint8_t initiator, ThreadAlloc* master, bool with_compaction):
 47 |         q(num_threads), counter(num_threads-1), type(type), initiator(initiator), master(master), with_compaction(with_compaction)
 48 |         {
 49 |             t1 = std::chrono::high_resolution_clock::now(); 
 50 |         }
 51 |     };
 52 | 
 53 | 
 54 |     virtual ~ThreadAlloc() = default;
 55 | 
 56 |     // Allocate a slot.
 57 |     virtual void Alloc(uint32_t size, thread_alloc_cb cb, void *owner) = 0;
 58 | 
 59 | 
 60 |     // Allocate a slot via adapter.
 61 |     virtual void AllocAtHome(ThreadAlloc* alloc, uint32_t size, thread_alloc_cb cb, void *owner) = 0;
 62 | 
 63 |     // Free a previously allocated block on this server.
 64 |     virtual void Free(client_addr_t client_addr, thread_free_cb cb, void *owner) = 0;
 65 | 
 66 |     // shortcut to find address of objects
 67 |     virtual void FindObjectAddr(client_addr_t client_addr, thread_find_cb cb, void *owner) = 0;
 68 | 
 69 |     // Get the id of the home thread.
 70 |     virtual int GetHomeThreadMpIdx() const = 0;
 71 | 
 72 |     virtual void FixClientAddr(client_addr_t client_addr, fixpointer_cb cb, void *owner ) = 0;
 73 | 
 74 |     virtual void Compaction(uint8_t type) = 0;
 75 | 
 76 |     virtual void print_stats() = 0;
 77 | 
 78 |     virtual void SendBlocksTo(CompactionCtx *ctx, helper_cb cb, void* owner) = 0;
 79 | };
 80 | 
 81 | 
 82 | #include <forward_list>
 83 | typedef void (*block_alloc_cb)(Block *b, addr_t addr, void *owner);
 84 | typedef void (*block_free_cb)(bool success, void *owner);
 85 | typedef void (*install_blocks_cb)( void *owner);
 86 | 
 87 | class BlockAlloc {
 88 | public:
 89 |     virtual ~BlockAlloc() = default;
 90 |    
 91 |     virtual void AllocBlock(ThreadAlloc *alloc, uint8_t type, block_alloc_cb cb, void *owner) = 0;
 92 | 
 93 |     virtual void RemoveVirtAddr(addr_t addr, helper_cb cb, void *owner) = 0;
 94 | 
 95 |     virtual bool FreePhysBlock(_block_phys_addr_t addr, uint8_t type) = 0;
 96 | 
 97 |     virtual uint32_t GetBlockSize() const = 0;
 98 |     // get the thread that owns this address
 99 |     virtual ThreadAlloc *GetHomeAlloc(addr_t addr) = 0 ; 
100 |  
101 |     virtual int GetHomeThreadMpIdx() const = 0 ;
102 | 
103 |     virtual void print_stats() = 0;
104 |     // move blocks from one thread to another
105 |     virtual void UpdateOwnership( std::forward_list<addr_t> *addresses, ThreadAlloc *newalloc, install_blocks_cb cb, void *owner) = 0;
106 | };
107 | 
108 | 


--------------------------------------------------------------------------------
/alloc/alloc_adapter.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * Allocation adapter that helps redirect allocation requests to threads depending on size-class 
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #pragma once
 12 | 
 13 | #include "size_table.hpp"
 14 | #include "alloc.hpp"
 15 | #include <atomic> 
 16 | #include <vector>
 17 | #include "../thread/messenger.hpp"
 18 |  
 19 | /// alloc adapter is also responsible for triggering compaction 
 20 | class AllocAdapter{
 21 | private:
 22 | 
 23 |     AllocAdapter(uint32_t threshold_popular_class, uint32_t threshold_size_class);
 24 | 
 25 |     static AllocAdapter& getInstanceImpl(uint32_t threshold_popular_class = 0, uint32_t threshold_size_class = 0)
 26 |     {
 27 |         static AllocAdapter instance{ threshold_popular_class, threshold_size_class};
 28 |         return instance;
 29 |     }
 30 | 
 31 | public:
 32 |     static AllocAdapter& getInstance()
 33 |     {
 34 |         return getInstanceImpl();
 35 |     }
 36 | 
 37 |     static void init(uint32_t threshold_popular_class, uint32_t threshold_size_class)
 38 |     {
 39 |        getInstanceImpl(threshold_popular_class,threshold_size_class );
 40 |     }
 41 | 
 42 |     AllocAdapter(AllocAdapter const&)    = delete;
 43 |     void operator=(AllocAdapter const&)  = delete;
 44 | 
 45 |  
 46 | public:
 47 |     void RegThread(ThreadAlloc* t, uint32_t id);
 48 |     std::atomic<uint64_t>* GetBstats( );
 49 | 
 50 | 
 51 |     void Alloc(uint32_t home_thread_id, uint32_t user_size, void *owner);
 52 |     void Free(uint32_t home_thread_id, client_addr_t client_addr,  void *owner);
 53 | 
 54 | 
 55 |     void processAllocReply(client_addr_t ret_addr);
 56 |     void processFreeReply(int status, uint8_t type );
 57 | 
 58 |     static void AllocReplyCb( client_addr_t ret_addr, void *owner);
 59 |     static void FreeReplyCb(int status, addr_t newaddr, void *owner);
 60 | 
 61 |     static void CompactionTmsg(mts::thread_msg_t *tmsg);
 62 |     static void CompactionMsgCb(void *owner);
 63 | 
 64 |     static void CompactionMsgRetCb(mts::thread_msg_t *msg);
 65 | 
 66 |     void print_stats(){
 67 |         info(log_fp, "[AllocAdapter] Stats; Compaction times in us:\n");
 68 | 
 69 |         for(uint32_t i = 0; i<SizeTable::ClassCount; i++)
 70 |         {   
 71 |             if(compaction_statistics[i].empty()){
 72 |                 continue;
 73 |             }
 74 |             info(log_fp, "\t SizeClass[%u]: \n", i );
 75 |             for(auto &x:  compaction_statistics[i]){
 76 |                 info(log_fp, "%.2f ", (x / 1000.0)) ;
 77 |             }
 78 |             info(log_fp, "\n" ) ;
 79 |         }       
 80 |     }
 81 | //private:
 82 |  
 83 |     std::atomic<uint64_t> alloc_stats[SizeTable::ClassCount];
 84 |     std::atomic<uint64_t> bstat[SizeTable::ClassCount]; 
 85 |     std::atomic<bool> pending[SizeTable::ClassCount];
 86 |     std::vector<ThreadAlloc*> all_allocs;
 87 | 
 88 |     std::vector<std::vector<uint64_t>> compaction_statistics;
 89 |  
 90 |     const uint32_t num_threads;
 91 |     const uint32_t threshold_popular_class;
 92 |     const uint32_t threshold_size_class; 
 93 |  
 94 |     ThreadAlloc* getAlloc(uint32_t id);
 95 |     uint32_t get_thread_id(uint32_t size , uint32_t home_thread_id);
 96 |     uint8_t get_compaction_master(uint8_t type);
 97 |     void trigger_type_collection(uint8_t type, bool with_compaction = false);
 98 |     int get_best_compaction_candidate();
 99 |     void finishCompaction(ThreadAlloc::CompactionCtx* ctx);
100 | };
101 | 
102 | 
103 | /*****************************************************************************
104 |     
105 |                     Implementation of alloc adapter           
106 | 
107 | ******************************************************************************/
108 | 
109 | 
110 | 
111 | AllocAdapter::AllocAdapter(uint32_t threshold_popular_class, uint32_t threshold_size_class): 
112 |     num_threads(mts::num_threads), threshold_popular_class(threshold_popular_class),threshold_size_class(threshold_size_class)
113 | {   
114 |     all_allocs.resize(num_threads);
115 |     for(uint32_t i = 0; i<SizeTable::ClassCount; i++)
116 |     {
117 |         alloc_stats[i] = 0;
118 |         pending[i] = false;
119 |         bstat[i] = 0;
120 |         compaction_statistics.push_back({});
121 |     }       
122 | }
123 | 
124 | void AllocAdapter::RegThread(ThreadAlloc* t, uint32_t id){
125 |     all_allocs[id] = t;
126 | }
127 | 
128 | std::atomic<uint64_t>* AllocAdapter::GetBstats(){
129 |     return this->bstat;
130 | }
131 |  
132 | 
133 | uint32_t AllocAdapter::get_thread_id(uint32_t user_size, uint32_t  home_thread_id){
134 |     //assert(0 && "Not implemented");
135 |     uint8_t type = SizeTable::getInstance().GetClassFromUserSize(user_size);
136 |  
137 |     if(type < threshold_size_class || alloc_stats[type].load(std::memory_order_relaxed) > threshold_popular_class ){
138 |         return home_thread_id;
139 |     }
140 | 
141 |     text(log_fp, "Redirect alloc to another thread! \n");
142 |     return type % num_threads;
143 | }
144 | 
145 | 
146 | void AllocAdapter::Alloc(uint32_t home_thread_id, uint32_t user_size, void *owner){
147 |     uint32_t thread_id = get_thread_id(user_size, home_thread_id);
148 |     all_allocs[home_thread_id]->AllocAtHome(all_allocs[thread_id], user_size, AllocReplyCb, owner);
149 | }
150 | 
151 | 
152 | 
153 | void AllocAdapter::processAllocReply( client_addr_t ret_addr ) {
154 |       // for debugging. To activate one compaction after 5 allocs
155 |   /*  if( alloc_stats[ret_addr.comp.type] > 5 &&  !( pending[ret_addr.comp.type].load(std::memory_order_relaxed) ) &&  num_threads > 1 ){  
156 | 
157 |         text(log_fp, "(%d) Trigger collection! \n", mts::thread_id);
158 |         trigger_type_collection(ret_addr.comp.type, true);
159 |     } */
160 |      
161 |     if( ret_addr.comp.addr == (0ULL) ){
162 |         int ret = get_best_compaction_candidate();
163 |         if(ret < 0){
164 |             //we run out of memory 
165 |             if (num_threads > 1 ){
166 |                 // we will enforce one thread to have the class and allocate from that thread
167 |                 // This compaction will collect all not full blocks at one thread and ask to allocate the object
168 |                 trigger_type_collection(ret_addr.comp.type, false);
169 |             }
170 |         } else {
171 |             // trigger compaction to find room for allocation
172 |             uint8_t type = (uint8_t)ret;
173 |             if( !pending[type].load(std::memory_order_relaxed) ){
174 | 
175 |                 if(num_threads > 1){
176 |                     trigger_type_collection(ret_addr.comp.type, true);
177 |                 }else{
178 |                     all_allocs[0]->Compaction(ret_addr.comp.type);
179 |                 }
180 |             }
181 |         }
182 |     } else {
183 |         alloc_stats[ret_addr.comp.type]++;
184 |     }
185 | }
186 | 
187 |  // This compaction will collect all not full blocks at one thread and ask to allocate the object
188 | void AllocAdapter::trigger_type_collection(uint8_t type, bool with_compaction ){
189 |     bool expected = false;
190 |     bool exchanged = pending[type].compare_exchange_strong(expected, true);
191 | 
192 |     if(!exchanged){
193 |         // only one collection at a time
194 |         return;
195 |     }
196 |  
197 |     text(log_fp, "(%d) Prepare compaction ctx! \n", mts::thread_id);
198 | 
199 | 
200 |     uint8_t compaction_master = get_compaction_master(type); // the thread which will gather the blocks
201 |     text(log_fp, "(%d) compaction_master for type %u is %u! \n",  mts::thread_id,type, compaction_master);
202 | 
203 |     ThreadAlloc::CompactionCtx* ctx = new ThreadAlloc::CompactionCtx(num_threads, type, mts::thread_id, all_allocs[compaction_master], with_compaction);
204 | 
205 |     mts::thread_msg_t *tmsg = new mts::thread_msg_t();
206 |     tmsg->cb = &CompactionTmsg; 
207 |     tmsg->payload[0] = ctx;
208 | 
209 |     for(uint32_t thread_id = 0; thread_id < num_threads; thread_id++){
210 |         if(thread_id!=compaction_master && thread_id!=mts::thread_id){
211 |             mts::send_msg_to_thread_and_notify(thread_id, tmsg);
212 |             text(log_fp, "Send ctx to thread ! %d \n", thread_id);
213 |         }
214 |     }
215 | 
216 |     if( mts::thread_id != compaction_master){
217 |         text(log_fp, "SendBlocksTo locally ctx to thread ! %d \n", mts::thread_id);
218 |         all_allocs[mts::thread_id]->SendBlocksTo(ctx, CompactionMsgCb, tmsg); // there is a chance that this thread will be the last
219 |     }
220 | }
221 | 
222 | uint8_t AllocAdapter::get_compaction_master(uint8_t type){
223 |     return type % num_threads;
224 | }
225 | 
226 | ThreadAlloc* AllocAdapter::getAlloc(uint32_t id) {
227 |     return all_allocs[id];
228 | }
229 | 
230 | void AllocAdapter::CompactionTmsg(mts::thread_msg_t *tmsg){
231 |     ThreadAlloc* alloc = AllocAdapter::getInstance().getAlloc(mts::thread_id);
232 |     ThreadAlloc::CompactionCtx* ctx = (ThreadAlloc::CompactionCtx*)(tmsg->payload[0]);
233 |     alloc->SendBlocksTo(ctx, CompactionMsgCb, tmsg);
234 | }
235 | 
236 | 
237 | 
238 | void AllocAdapter::CompactionMsgCb(void *owner){
239 |     mts::thread_msg_t *msg = (mts::thread_msg_t *)owner;
240 |     ThreadAlloc::CompactionCtx* ctx = (ThreadAlloc::CompactionCtx*)msg->payload[0];
241 | 
242 |     if(ctx->initiator != mts::thread_id){
243 |         msg->cb = &AllocAdapter::CompactionMsgRetCb;
244 |         mts::send_msg_to_thread_and_notify(ctx->initiator, msg);
245 |         return;
246 |     }
247 | 
248 |     CompactionMsgRetCb(msg);
249 | }
250 | 
251 | void AllocAdapter::CompactionMsgRetCb(mts::thread_msg_t *msg){
252 |    
253 |     ThreadAlloc::CompactionCtx* ctx = (ThreadAlloc::CompactionCtx*)msg->payload[0];
254 |     text(log_fp,"Collection is completed for class %u %s \n", ctx->type, ctx->with_compaction ? "with compaction": "");
255 |  
256 |     AllocAdapter::getInstance().finishCompaction(ctx);
257 |  
258 |     delete ctx;
259 |     delete msg;
260 | }
261 | 
262 | void AllocAdapter::finishCompaction( ThreadAlloc::CompactionCtx* ctx){
263 |     // measure latency
264 |     auto t2 = std::chrono::high_resolution_clock::now(); 
265 |     uint64_t nanosec = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - ctx->t1 ).count();
266 |     compaction_statistics[ctx->type].push_back(nanosec);
267 |     // measure latency
268 | 
269 |     pending[ctx->type].store( false );  
270 | }
271 | 
272 | 
273 |  
274 | int AllocAdapter::get_best_compaction_candidate(){
275 |     std::vector<float> res;
276 |     res.resize(SizeTable::ClassCount);
277 | 
278 |     int type = -1;
279 |     float best_score = 0;
280 | 
281 |     for( uint32_t i=0; i < SizeTable::ClassCount; i++){
282 |         uint64_t blocks_allocated = bstat[i].load(std::memory_order_relaxed);
283 | 
284 |         if( blocks_allocated && !( pending[i].load(std::memory_order_relaxed) ) ){
285 |             float score =  1.0 - 
286 |                             (alloc_stats[i].load(std::memory_order_relaxed) + 0.0) / 
287 |                             (blocks_allocated * SizeTable::getInstance().objects_per_class[i] ) ; 
288 |             if(score >best_score ){
289 |                 best_score = score;
290 |                 type = i;
291 |             }
292 |         }
293 |     }
294 | 
295 |     if(best_score > 0.3){ // some threshold
296 |         return type;
297 |     }
298 | 
299 |     return -1;
300 | }
301 |  
302 | void AllocAdapter::Free(uint32_t home_thread_id, client_addr_t client_addr,  void *owner){
303 |     all_allocs[home_thread_id]->Free(client_addr, FreeReplyCb, owner);
304 | }
305 | 
306 | 
307 | 
308 | void  AllocAdapter::processFreeReply(int status, uint8_t type){
309 |     if(status==SUCCESS){
310 |         alloc_stats[type]--;
311 |     }
312 | }
313 | 


--------------------------------------------------------------------------------
/alloc/block.hpp:
--------------------------------------------------------------------------------
 1 | /**                                                                                                      
 2 |  * CoRM: Compactable Remote Memory over RDMA
 3 |  * 
 4 |  * Block is a wraper over memfd descriptor that helps to create virtual addresses
 5 |  *
 6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 9 |  * 
10 |  */
11 | #pragma once
12 | #include <sys/mman.h>
13 | #include "../common/common.hpp"
14 | 
15 |  
16 | class Block{
17 |     const int fd;
18 |     const uint32_t offset_in_blocks; 
19 | public:
20 |  
21 |     Block(int fd, uint32_t offset_in_blocks): fd(fd), offset_in_blocks(offset_in_blocks){
22 |         // nothing
23 |     }
24 |  
25 |     uint32_t GetSize() const{
26 |         return BLOCK_SIZE;
27 |     }
28 | 
29 |     _block_phys_addr_t GetPhysAddr() const {
30 |         return _block_phys_addr_t({fd,offset_in_blocks});
31 |     }
32 | 
33 |     uint64_t CreateNewAddr() const{
34 |         // Make address alligned to the size of the block
35 |         if(BLOCK_SIZE > 4096 ){
36 |             addr_t futurebuf =  (addr_t)(char*)mmap(NULL, 2*BLOCK_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED| MAP_ANONYMOUS, -1, 0);
37 |             addr_t alligned_addr = GetVirtBaseAddr(futurebuf);
38 |             if(futurebuf - alligned_addr > 0){
39 |                 alligned_addr+=BLOCK_SIZE;
40 |               //  printf(" %" PRIx64 " %" PRIx64 " " , futurebuf, ( alligned_addr - futurebuf ) );
41 |                 munmap((void*)futurebuf, ( alligned_addr - futurebuf ) ) ;
42 |             }
43 |             uint64_t extra = futurebuf + 2*BLOCK_SIZE - alligned_addr - BLOCK_SIZE;
44 |             if(extra > 0){
45 |              //   printf(" %" PRIx64 " %" PRIx64 " " , alligned_addr + BLOCK_SIZE, extra );
46 |                 munmap((void*)(alligned_addr + BLOCK_SIZE),  extra ) ;
47 |             }
48 |      
49 |             char* res = (char*)mmap((void*)alligned_addr, BLOCK_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset_in_blocks*BLOCK_SIZE);
50 |             if (res == MAP_FAILED ){
51 |               perror("mmap failed with NULL");
52 |               exit(1);
53 |             }
54 |             return (uint64_t)alligned_addr;
55 |         } else {
56 |             char* alligned_addr = (char*)mmap(NULL, BLOCK_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset_in_blocks*BLOCK_SIZE);
57 |             if (alligned_addr == MAP_FAILED ){
58 |               perror("mmap failed with NULL");
59 |               exit(1);
60 |             }
61 |             return (uint64_t)alligned_addr;
62 |         }
63 |     }
64 | 
65 |     void RemapVirtAddrToMe(addr_t virt_addr) const {
66 |         char* ret = (char*)mmap(ADDR_T_TO_PTR(virt_addr), BLOCK_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED , fd, offset_in_blocks*BLOCK_SIZE);
67 |         if (ret == MAP_FAILED){
68 |            perror("mmap when is mapped to file with MAP_FIXED");
69 |            exit(1);
70 |         }
71 |     }
72 | 
73 |     ~Block(){
74 |         /* nothing */
75 |     }
76 | 
77 | };
78 |  
79 | 


--------------------------------------------------------------------------------
/alloc/block_alloc.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * Implmenetation of a block allocator. It also manages ownerships of each block.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "alloc.hpp"
 15 | 
 16 | 
 17 | #include "../utilities/block_home_table.h"
 18 | 
 19 | #include <unordered_map>
 20 | #include <list> 
 21 | 
 22 | #include "superblock.hpp"
 23 | 
 24 | 
 25 |  
 26 | class BlockAllocImpl: public BlockAlloc {
 27 | 
 28 | 
 29 | public:
 30 | 
 31 |     BlockAllocImpl(uint32_t thread_id, uint32_t prealloc_superblock_num, std::atomic<uint64_t> *bstat = NULL): 
 32 |        home_thread_id(thread_id) ,_superblock_counter (0), bstat(bstat)
 33 |     {
 34 |         preallocate_superblocks(prealloc_superblock_num);
 35 | 
 36 |     }
 37 |  
 38 |     virtual void AllocBlock(ThreadAlloc *alloc, uint8_t type,  block_alloc_cb cb, void *owner) override;
 39 |  
 40 |     virtual void RemoveVirtAddr(addr_t addr, helper_cb cb, void *owner) override;
 41 | 
 42 |     virtual bool FreePhysBlock(_block_phys_addr_t addr, uint8_t type) override;
 43 | 
 44 |     virtual uint32_t GetBlockSize() const override;
 45 | 
 46 |     virtual ThreadAlloc *GetHomeAlloc(addr_t addr) override; 
 47 | 
 48 |     virtual int GetHomeThreadMpIdx() const override{
 49 |         return home_thread_id;
 50 |     }
 51 | 
 52 |     virtual void print_stats() override{
 53 |  
 54 |         info(log_fp, "[BlockAllocImpl(%d)] Stats; \n", this->home_thread_id );
 55 |         // todo. print stats.
 56 |     }
 57 | 
 58 | 
 59 |  
 60 |     virtual void UpdateOwnership( std::forward_list<addr_t> *addresses,   ThreadAlloc *newalloc, install_blocks_cb cb, void *owner) override{
 61 |         assert(this->home_thread_id == mts::thread_id && "Only home thread can modify");
 62 |         while (!addresses->empty()){
 63 |             addr_t addr = addresses->front();
 64 |             addresses->pop_front();
 65 | 
 66 |             text(log_fp, "[UpdateOwnership] Find %" PRIx64 " \n",addr );
 67 |             ThreadAlloc * alloc = home_table.Lookup(addr); 
 68 |             assert(alloc!=nullptr && "no addr in table");
 69 | 
 70 |             bool changed = home_table.Update(addr, newalloc);
 71 |             assert(changed && "Failed to update owner of block addr");
 72 |         }
 73 |         
 74 |         if(cb!=NULL){
 75 |             cb(owner);
 76 |         }
 77 |     }
 78 | 
 79 |     // for debugging
 80 |     Block* AllocBlock();
 81 | 
 82 | 
 83 |     ~BlockAllocImpl(){
 84 |          
 85 |         for (auto const& item : all_superblocks)
 86 |         {
 87 |             delete item.second;                   
 88 |         }
 89 |     }
 90 | 
 91 | 
 92 | private:
 93 |     const int home_thread_id;
 94 |     
 95 |     uint32_t _superblock_counter;
 96 |  
 97 |     BlockHomeTable                          home_table;
 98 |     std::atomic<uint64_t>            *const bstat;
 99 |     std::unordered_map<int,SuperBlock*>     all_superblocks; 
100 |     std::list<SuperBlock*>                  free_superblocks;
101 |     void preallocate_superblocks(uint32_t prealloc_superblock_num);
102 | 
103 | 
104 | 
105 | };
106 | 
107 |  
108 | 
109 | ThreadAlloc*  BlockAllocImpl::GetHomeAlloc(addr_t addr){
110 | 
111 |     ThreadAlloc * alloc = home_table.Lookup(addr); 
112 |     
113 |     return alloc;
114 |  } 
115 | 
116 |  
117 | uint32_t BlockAllocImpl::GetBlockSize() const{
118 |     return BLOCK_SIZE;
119 | }
120 | 
121 | 
122 | void BlockAllocImpl::RemoveVirtAddr(addr_t addr, helper_cb cb, void *owner){
123 |     home_table.Remove(addr);
124 |     text(log_fp, " RemoveVirtAddr %" PRIx64 " \n",addr); 
125 |     int ret = munmap((void*)addr, BLOCK_SIZE);
126 |     assert(ret==0 && "munmap failed");
127 |     if(cb!=NULL){
128 |         cb(owner);
129 |     }
130 | }
131 | 
132 | Block* BlockAllocImpl::AllocBlock(){
133 |     SuperBlock *sb;
134 |     if(free_superblocks.empty()){
135 |         sb = new SuperBlock(_superblock_counter++);
136 |         all_superblocks.insert({sb->getFD(), sb});
137 |         free_superblocks.push_front(sb);  
138 |     }
139 |     sb = free_superblocks.front();
140 |     
141 | 
142 |     Block* b = sb->allocateBlock();
143 |     if(sb->isFull()){
144 |         free_superblocks.pop_front();
145 |     }
146 | 
147 |     return b;
148 | }
149 | 
150 | void BlockAllocImpl::AllocBlock(ThreadAlloc *alloc, uint8_t type, block_alloc_cb cb, void *owner){
151 | 
152 |     Block *b = AllocBlock();
153 |     text(log_fp, "[BlockAllocImpl] insert  %p \n",b);
154 |     addr_t addr = b->CreateNewAddr();
155 |     text(log_fp, "[BlockAllocImpl] insert  %" PRIx64 " \n",addr);
156 |     home_table.Insert(home_thread_id, addr, alloc);
157 |     text(log_fp, "[BlockAllocImpl] insert  %" PRIx64 " \n",addr);
158 |     assert(alloc==home_table.Lookup(addr) && "home_table does not work correctly");
159 |     assert(alloc!=nullptr && "home_table does not work correctly");
160 | 
161 |     if(bstat!=NULL){
162 |         bstat[type]++;
163 |     }
164 | 
165 |     cb(b, addr, owner);
166 | }
167 | 
168 | 
169 | bool BlockAllocImpl::FreePhysBlock(_block_phys_addr_t phys,  uint8_t type){
170 |     if(bstat!=NULL){
171 |         bstat[type]--;
172 |     }
173 |     auto it = all_superblocks.find(phys.fd);
174 |     assert(it != all_superblocks.end());
175 |     SuperBlock* sb = it->second;
176 |     
177 |     bool wasFull = sb->isFull();
178 | 
179 |     sb->freeBlock(phys);
180 | 
181 |     if(wasFull){
182 |         free_superblocks.push_front(sb);  
183 |     }
184 |     
185 |     return true;
186 | };
187 | 
188 | 
189 | 
190 | void BlockAllocImpl::preallocate_superblocks(uint32_t prealloc_superblock_num){
191 |     text(log_fp, "\t\t\t[BlockAllocImpl] preallocate_blocks %" PRIu32 " blocks \n",prealloc_superblock_num );
192 |     for(uint32_t i=0; i< prealloc_superblock_num; i++){
193 |         SuperBlock* sb = new SuperBlock(_superblock_counter++);
194 |         free_superblocks.push_front(sb);
195 |         all_superblocks.insert({sb->getFD(), sb});
196 |     }
197 | }
198 | 


--------------------------------------------------------------------------------
/alloc/local_block.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * Implmenetation of a thread-local block. It helps to allocate addresses and manage metadata. 
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #pragma once
 12 | 
 13 | #include "block.hpp"
 14 | #include <forward_list>
 15 | #include <list>
 16 | #include <map>
 17 | #include <random>
 18 | #include <algorithm> 
 19 | #include <infiniband/verbs.h>
 20 | 
 21 | 
 22 | struct RandomGenerator{
 23 |  
 24 |     std::default_random_engine generator; 
 25 |     std::uniform_int_distribution<uint16_t> dis;  // not that I hard-coded only 2^16 ids. Change to have more
 26 | 
 27 |     RandomGenerator(uint32_t seed):generator(std::default_random_engine(seed)), dis(0,0xFFFF){ // not that I hard-coded only 2^16 ids. Change to have more
 28 | 
 29 |     }
 30 |     uint16_t GetNewRandomNumber(){  // not that I hard-coded only 2^16 ids. Change to have more
 31 |         return  dis(generator);
 32 |     }
 33 | };
 34 | 
 35 | 
 36 | 
 37 | struct LocalBlock
 38 | {
 39 |  
 40 |     LocalBlock(RandomGenerator &gen, Block* b, uint8_t type, uint16_t slot_size):
 41 |     _gen(gen), _b(b),  _size(_b->GetSize()),  _type(type),  _slot_size(slot_size), _slots( (BLOCK_USEFUL_SIZE) / slot_size)
 42 |     {
 43 |         _freeslots = _slots;
 44 |         _allocatedslots = 0;
 45 |         _obj_ids.clear();
 46 | 
 47 | 
 48 |         std::vector<offset_t> temp_vector_for_shuffle;
 49 |         temp_vector_for_shuffle.reserve(_slots);
 50 |         for(uint32_t slot_id= _slots-1; slot_id > 0; slot_id--){
 51 | 
 52 |             temp_vector_for_shuffle.push_back(slot_id*slot_size);
 53 |         }
 54 |         temp_vector_for_shuffle.push_back(0);
 55 | 
 56 |         std::shuffle ( temp_vector_for_shuffle.begin(), temp_vector_for_shuffle.end(), _gen.generator);
 57 | 
 58 |         _free_list = std::forward_list<offset_t>(temp_vector_for_shuffle.begin(), temp_vector_for_shuffle.end());
 59 |  
 60 | 
 61 |     }
 62 | 
 63 |     std::list< struct ibv_mr* >  all_virt_addr; 
 64 | 
 65 |     RandomGenerator &_gen;
 66 |     Block * const _b;
 67 |     const uint16_t  _size; 
 68 |     const uint8_t   _type;  
 69 |     const uint16_t  _slot_size;  
 70 |     const uint16_t  _slots; 
 71 | 
 72 |     uint16_t  _allocatedslots;
 73 |     uint16_t  _freeslots; 
 74 |     
 75 |     std::forward_list<offset_t> _free_list;
 76 |     // it stores offsets
 77 |     std::map<uint16_t, offset_t> _obj_ids;  // obj_id to offset
 78 | 
 79 |     uint32_t hasObjects( ) const {
 80 |         return _obj_ids.size();
 81 |     }
 82 | 
 83 |     void RemapVirtAddrToMe(addr_t addr) const {
 84 |         _b->RemapVirtAddrToMe(addr);
 85 |     }
 86 | 
 87 |     void AddNewVirtAddr(struct ibv_mr* mr){
 88 |         if(all_virt_addr.empty()){
 89 |             mr->lkey = 0;  // I reuse lkey as allocated counted
 90 |         }
 91 |         all_virt_addr.push_back(mr);
 92 |     }
 93 | 
 94 |     void GetAllAddrs(std::forward_list<addr_t> *list){
 95 |         for(auto &mr: all_virt_addr){
 96 |             list->push_front((addr_t)mr->addr);
 97 |         }
 98 |     }
 99 | 
100 |     struct ibv_mr* RemoveVirtAddr(addr_t addr){
101 |         auto it = std::find_if(all_virt_addr.begin(), all_virt_addr.end(),  [&addr] (struct ibv_mr* mr) { return (uint64_t)mr->addr == addr; });
102 |         struct ibv_mr* mr = *it;
103 |         all_virt_addr.erase(it);
104 |         return mr;
105 |     }
106 | 
107 |     struct ibv_mr* PopVirtAddr(){
108 |         if(all_virt_addr.empty()){
109 |             return NULL;
110 |         }
111 | 
112 |         struct ibv_mr* mr = all_virt_addr.front();
113 |         all_virt_addr.pop_front();
114 |         return mr;
115 |     }
116 | 
117 |     addr_t GetBaseAddr() const{
118 |         assert(!all_virt_addr.empty());
119 |         return  (addr_t)(all_virt_addr.front()->addr);
120 |     }
121 | 
122 |     uint32_t GetRKey() const{
123 |         assert(!all_virt_addr.empty());
124 |         return  (all_virt_addr.front()->rkey);
125 |     }
126 | 
127 |     _block_phys_addr_t GetPhysAddr() const{
128 |         return  _b->GetPhysAddr();
129 |     }
130 | 
131 |     uint8_t  GetType() const{
132 |         return  _type;
133 |     }
134 | 
135 |     uint16_t GetSlotSize() const{
136 |         return _slot_size;
137 |     }
138 | 
139 |     offset_t AllocSlot( ){
140 |         offset_t offset = _free_list.front();
141 |         _free_list.pop_front();
142 |         _freeslots--;
143 |         _allocatedslots++;
144 | 
145 |         all_virt_addr.front()->lkey++; // use to count objects in this virtaddr
146 |         
147 |         return offset;    
148 |     }
149 | 
150 | 
151 |     offset_t AllocObject(uint16_t *obj_id){
152 |         text(log_fp, "\t\t\t[LocalBlock] AllocObject   \n");
153 | 
154 |         offset_t offset = AllocSlot();
155 | 
156 |         // find free id 
157 |         uint16_t number = (_gen.GetNewRandomNumber() & mask_of_bits(ID_SIZE_BITS)); // bits ID_SIZE_BITS 
158 |         auto it = _obj_ids.find (number);
159 |         
160 |         while ( it != _obj_ids.end() ){
161 |             number = _gen.GetNewRandomNumber() & mask_of_bits(ID_SIZE_BITS);
162 |             it = _obj_ids.find (number);
163 |         }
164 | 
165 |         text(log_fp, "\t\t\t[LocalBlock] Assigned obj_id = %" PRIu16 " \n", number);
166 |              
167 |         _obj_ids.insert({number, offset});
168 |         *obj_id = number;
169 |  
170 |         return offset;
171 |     }
172 | 
173 |     offset_t  FindObject( uint16_t obj_id){
174 |         text(log_fp, "\t\t\t[LocalBlock] Find obj_id = %" PRIu16 " \n", obj_id);
175 | 
176 |         auto it = _obj_ids.find (obj_id);
177 |         assert(it != _obj_ids.end());
178 | 
179 |         return it->second;
180 |     }
181 | 
182 | 
183 |     offset_t  RemoveObject(uint16_t obj_id){
184 | 
185 |         text(log_fp, "\t\t\t[LocalBlock] Remove obj_id = %" PRIu16 "   \n", obj_id);
186 | 
187 |         auto it = _obj_ids.find(obj_id);
188 | 
189 |         if(it == _obj_ids.end()){
190 |             info(log_fp, "\t\t\t[LocalBlock] obj_id = %" PRIu16 " does not exist \n", obj_id);
191 |             return std::numeric_limits<offset_t>::max();
192 |         }
193 | 
194 |         offset_t offset = it->second;
195 |  
196 |         _obj_ids.erase(it);
197 |         _free_list.push_front(offset);
198 | 
199 |         _freeslots++;
200 |         _allocatedslots--;
201 |         return offset;
202 |     }
203 | 
204 |     bool is_full() const{
205 |         return _freeslots == 0;
206 |     }        
207 | 
208 |     bool RemoveOneAddr(addr_t old_addr){
209 |         auto it = std::find_if(all_virt_addr.begin(), all_virt_addr.end(),  [&old_addr] (struct ibv_mr* mr) { return (uint64_t)mr->addr == old_addr; });
210 |         struct ibv_mr* mr = *it;
211 |         mr->lkey--;
212 |         bool can_be_unmapped = false;
213 | 
214 |         // if mr is not the main one and we deallocated all objects
215 |         if(all_virt_addr.front() != mr && mr->lkey==0 ){
216 |             can_be_unmapped = true;
217 |         }
218 |         return can_be_unmapped;
219 |     }
220 | 
221 |     bool Compactible(LocalBlock* from){
222 | 
223 |         if( from->_type != this->_type ||  this->_freeslots  < from->_allocatedslots){
224 |             return false;
225 |         }
226 | 
227 |         // it assumes that both maps are sorted. Use merge sort to find intersections
228 |         auto A_it = this->_obj_ids.begin();
229 |         auto B_it = from->_obj_ids.begin();
230 |         for (; A_it != this->_obj_ids.end() && B_it != from->_obj_ids.end(); )
231 |         {
232 |             if(A_it->first == B_it->first){
233 |                 return false;
234 |             }
235 | 
236 |             if(A_it->first < B_it->first)
237 |             {
238 |                 A_it++;
239 |             }else{
240 |                 B_it++;
241 |             }
242 |         }
243 |        
244 |         return true;
245 |     }
246 | 
247 | 
248 | 
249 |     void AddEntriesFrom(LocalBlock* from)
250 |     {
251 |         text(log_fp, "\t\t\t[LocalBlock] Moving  data from one block to another  \n");
252 | 
253 |         addr_t from_base = from->GetBaseAddr();
254 |         addr_t to_base = this->GetBaseAddr();
255 | 
256 |         std::set<offset_t>   set_free_list (_free_list.cbegin(),_free_list.cend() );
257 | 
258 | 
259 |         // here we try to place objects at the same offsets
260 |         for (auto it = from->_obj_ids.cbegin(); it!=from->_obj_ids.cend(); /* no increment */)
261 |         {
262 |         
263 |           auto sit = set_free_list.find(it->second);
264 |           if ( sit != set_free_list.end() )
265 |           {
266 |                 
267 |             offset_t offset = it->second; 
268 |             this->_obj_ids.insert({it->first, offset});
269 |             text(log_fp, "\t\t\t[LocalBlock] Moved object to the same offset  %" PRIx32 " \n", offset);
270 | 
271 |             while(!ReaderWriter::try_lock_slot_for_compaction(from_base+offset)) { /* empty */};
272 |             memcpy((void*)(to_base+offset), (void*)(from_base+offset), _slot_size );
273 |             while(!ReaderWriter::unlock_slot_from_compaction(to_base+offset)) {/* empty */ };
274 | 
275 |             this->_freeslots--;
276 |             this->_allocatedslots++;
277 |             from->_obj_ids.erase(it++); 
278 |             set_free_list.erase(sit);
279 |           }
280 |           else
281 |           { 
282 |             ++it;
283 |           }
284 |         }
285 | 
286 |         _free_list.clear();
287 | 
288 |         if(!set_free_list.empty()){
289 |             std::vector<offset_t> temp_vector_for_shuffle(set_free_list.begin(), set_free_list.end());
290 |             std::shuffle ( temp_vector_for_shuffle.begin(), temp_vector_for_shuffle.end(), _gen.generator);
291 |             _free_list = std::forward_list<offset_t>(temp_vector_for_shuffle.begin(), temp_vector_for_shuffle.end());
292 |         }  
293 | 
294 |         // here we try to place blocks at other offsets
295 |         for(auto it = from->_obj_ids.begin(); it!=from->_obj_ids.end(); it++ ){
296 | 
297 |             addr_t offset = AllocSlot();
298 |             text(log_fp, "\t\t\t[LocalBlock] Moved object to a new offset  %" PRIx64 " \n", offset);
299 |             this->_obj_ids.insert({it->first, offset});
300 |  
301 |             while(!ReaderWriter::try_lock_slot_for_compaction(from_base+it->second)) { /* empty */};
302 |             memcpy((void*)(to_base+offset), (void*)(from_base+it->second), _slot_size );
303 |             while(!ReaderWriter::unlock_slot_from_compaction(to_base+offset)) {/* empty */ };
304 |         }
305 |     }
306 | 
307 | 
308 | };
309 | // helps to sort classes in c++
310 | struct LocalBlockComp
311 | {
312 |   using is_transparent = void; 
313 |   bool operator()(const LocalBlock* lhs, const LocalBlock* rhs) const { 
314 |     if(lhs->_freeslots == rhs->_freeslots){
315 |         return lhs < rhs;
316 |     } else {
317 |         return lhs->_freeslots < rhs->_freeslots;
318 |     }
319 |   }
320 | 
321 | 
322 |   bool operator() (const LocalBlock* lhs, const uint32_t val) const
323 |   {
324 |       return lhs->_freeslots < val;
325 |   }
326 | };
327 | 


--------------------------------------------------------------------------------
/alloc/memfd.h:
--------------------------------------------------------------------------------
 1 | /**                                                                                                      
 2 |  * CoRM: Compactable Remote Memory over RDMA
 3 |  * 
 4 |  * It is a helper class to use memfd in old linux kernels. I do not use it anymore. 
 5 |  *
 6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 9 |  * 
10 |  */
11 | 
12 | #ifndef _MEMFD_H
13 | #define _MEMFD_H
14 | 
15 | /*
16 |  * No glibc wrappers exist for memfd_create(2), so provide our own.
17 |  *
18 |  * Also define memfd fcntl sealing macros. While they are already
19 |  * defined in the kernel header file <linux/fcntl.h>, that file as
20 |  * a whole conflicts with the original glibc header <fnctl.h>.
21 |  */
22 | 
23 | static inline int memfd_create(const char *name, unsigned int flags) {
24 |     return syscall(__NR_memfd_create, name, flags);
25 | }
26 | 
27 | #ifndef F_LINUX_SPECIFIC_BASE
28 | #define F_LINUX_SPECIFIC_BASE 1024
29 | #endif
30 | 
31 | #ifndef F_ADD_SEALS
32 | #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
33 | #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
34 | 
35 | #define F_SEAL_SEAL     0x0001  /* prevent further seals from being set */
36 | #define F_SEAL_SHRINK   0x0002  /* prevent file from shrinking */
37 | #define F_SEAL_GROW     0x0004  /* prevent file from growing */
38 | #define F_SEAL_WRITE    0x0008  /* prevent writes */
39 | #endif
40 | 
41 | #endif /* _MEMFD_H */
42 | 


--------------------------------------------------------------------------------
/alloc/size_table.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * It is a helper class to map user sizes to slot sizes. Note that I need to add cache verions and a header to each user's object.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #pragma once 
 13 | 
 14 | 
 15 | #include "../utilities/debug.h"
 16 | #include "../common/common.hpp"
 17 |  
 18 |  
 19 | class SizeTable{
 20 | public:
 21 |     // We want 256 classes so they can be indexed with uint8_t.
 22 |     static const uint32_t ClassCount = 64;
 23 |         // Align to word boundary.
 24 | public:
 25 |     static SizeTable& getInstance()
 26 |     {
 27 |         static SizeTable instance; // Guaranteed to be destroyed.
 28 |                               // Instantiated on first use.
 29 |         return instance;
 30 |     }
 31 | 
 32 |     SizeTable(SizeTable const&)               = delete;
 33 |     void operator=(SizeTable const&)  = delete;
 34 | 
 35 |     uint32_t objects_per_class[ClassCount]; // for compaction
 36 | 
 37 | private:
 38 |     static const uint32_t ClassAlignmentLog = 3;
 39 |     static const uint32_t ClassAlignment = (1 << ClassAlignmentLog);
 40 |  
 41 |     // The fist class is 64 bytes as we are unlikely to need more.
 42 |     static const uint32_t FirstClassSize = 8;
 43 | 
 44 |     static const uint32_t MaxAllowedFragmentation = 128;
 45 | 
 46 |     // Size of the lookup table.
 47 |     static const uint32_t LutBytes = 256;
 48 | 
 49 |     static const uint32_t MaxSizeInLut = LutBytes << ClassAlignmentLog;
 50 |  
 51 | 
 52 |     uint32_t size_map[ClassCount]; // real size
 53 |     uint8_t  lut[LutBytes];
 54 |     uint8_t last_lut_class;
 55 | 
 56 | 
 57 |     // private
 58 |     SizeTable(){
 59 | 
 60 |         uint32_t current_size = FirstClassSize;
 61 |         const uint32_t last_class_size = BLOCK_USEFUL_SIZE;
 62 | 
 63 |         // populate the array used for lookups
 64 |         for(uint32_t i = 0; i < ClassCount; i++) {
 65 |             // set the size
 66 |             uint32_t num_obj = last_class_size / current_size;
 67 |  
 68 |             while((current_size <=last_class_size) && (last_class_size % current_size > MaxAllowedFragmentation || num_obj == last_class_size / current_size)){
 69 |                current_size+=8;
 70 |             }
 71 | 
 72 |             num_obj = last_class_size / current_size;
 73 |             while((current_size <=last_class_size) && num_obj == last_class_size / current_size){
 74 |                current_size+=8;
 75 |             }
 76 |             current_size-=8;
 77 |  
 78 |             size_map[i] = current_size;
 79 | 
 80 |             objects_per_class[i] = (BLOCK_USEFUL_SIZE)/ current_size;
 81 |         }
 82 | 
 83 |         // initialize the lut
 84 |         for(uint32_t i = 0; i < LutBytes;i++) {
 85 |             uint32_t size = i << ClassAlignmentLog;
 86 |             lut[i] = (uint8_t)SearchClass(size, 0);
 87 |         }
 88 | 
 89 |         last_lut_class = lut[LutBytes - 1];
 90 |  
 91 |     };
 92 | 
 93 | 
 94 | public:
 95 | 
 96 |     void PrintTable(){
 97 |         for(uint32_t i=0; i < ClassCount; i++ ){
 98 |             info(log_fp,"Class[%u] -> real:%u user:%u;\n", i ,GetRealSize(i), GetUserSize(i));
 99 |         }
100 |     }
101 | 
102 |     uint32_t SearchClass(uint32_t size, uint32_t start_idx) const {
103 |         // Binary search classes with the caveat that we are not
104 |         // looking for the exact match, but for the next larger size.
105 |         uint32_t end_idx = ClassCount;
106 | 
107 |         while(start_idx < end_idx) {
108 |             uint32_t mid_idx = (start_idx + end_idx) / 2;
109 | 
110 |             if(size_map[mid_idx] == size) {
111 |                 start_idx = end_idx = mid_idx;
112 |             } else if(size_map[mid_idx] < size) {
113 |                 start_idx = mid_idx + 1;
114 |             } else {
115 |                 end_idx = mid_idx;
116 |             }
117 |         }
118 | 
119 |         return end_idx;
120 |     }
121 |  
122 |     
123 |     uint8_t GetClassFromUserSize(uint32_t user_size) const{
124 |         uint32_t real_size = user_size + sizeof(slot_header_t) + (user_size+sizeof(slot_header_t)-1)/CACHELINE  ;
125 |         uint8_t class1 = GetClass(real_size);
126 |         return GetUserSize(class1) >= user_size ? class1 : class1 + 1;
127 |     };
128 | 
129 | 
130 |     uint8_t GetClass(uint32_t real_size) const{
131 |         if(real_size <= MaxSizeInLut) {
132 |             return lut[(real_size + ClassAlignment - 1) >> ClassAlignmentLog];
133 |         }
134 | 
135 |         return SearchClass(real_size, last_lut_class);
136 |     };
137 | 
138 |     uint32_t GetRealSize(uint8_t size_class) const{
139 |         return size_map[size_class];
140 |     };
141 | 
142 |     uint32_t GetUserSize(uint8_t size_class) const{
143 |         uint32_t real_size = size_map[size_class];
144 |  
145 |         if(real_size <= CACHELINE && CACHELINE % real_size == 0){
146 |            return real_size - sizeof(slot_header_t); 
147 |         } else {
148 |            return real_size - sizeof(slot_header_t) - (real_size+CACHELINE-1)/CACHELINE;
149 |         }
150 | 
151 |         
152 |     };
153 |   
154 | 
155 | };
156 | 


--------------------------------------------------------------------------------
/alloc/superblock.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * It is a super-block allocator. IT exists to use less fds to address physical memeory. 
  5 |  * So I can use one physical region for multiple blocks.
  6 |  *
  7 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  8 |  * 
  9 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 10 |  * 
 11 |  */
 12 | 
 13 | #pragma once
 14 | #include <iostream>
 15 | #include <string>
 16 | #include <cassert>
 17 | #include <unistd.h>
 18 | #include <sys/mman.h>
 19 | #include <linux/memfd.h>
 20 | 
 21 | #include <sys/syscall.h>
 22 | #ifndef MFD_HUGE_SHIFT  // for old linux kernels
 23 |      #include "memfd.h"
 24 | #endif
 25 | #include "block.hpp"
 26 | #include <bitset>
 27 | 
 28 |  
 29 | class SuperBlock{
 30 |     const uint32_t _id;
 31 |     int _fd; 
 32 |     std::bitset<BLOCKS_IN_SUPERBLOCK> _blocks; // 1 - is free, 0 - is allocated
 33 |     
 34 | public:
 35 |  
 36 |     SuperBlock(uint32_t id): _id(id) {
 37 |         text(log_fp,"Create Block name: %s\n",std::to_string(_id).c_str() );
 38 |  
 39 |         _fd = memfd_create( std::to_string(_id).c_str() , MFD_CLOEXEC );
 40 |         if (_fd == -1){
 41 |            exit(1);
 42 |         }
 43 | 
 44 |         int ret = ftruncate(_fd, BLOCKS_IN_SUPERBLOCK * BLOCK_SIZE);
 45 |         if (ret == -1){
 46 |           exit(1);
 47 |         } 
 48 |         // set all bits to true;
 49 |         _blocks.set();
 50 |         text(log_fp,"Success for superblock %s with file descriptor %d \n",std::to_string(_id).c_str(), _fd);
 51 |     }
 52 | 
 53 |  
 54 |     uint32_t getID() const{
 55 |         return _id;
 56 |     }
 57 |  
 58 |     int getFD() const{
 59 |         return _fd;
 60 |     }
 61 | 
 62 |     size_t getSize() const{
 63 |         return BLOCKS_IN_SUPERBLOCK * BLOCK_SIZE;
 64 |     }
 65 | 
 66 |     bool isFree() const{
 67 |         return _blocks.all();
 68 |     }
 69 | 
 70 |     bool isFull() const{
 71 |         return _blocks.none();
 72 |     }
 73 | 
 74 |     bool hasFreeBlocks() const{
 75 |         return _blocks.any();
 76 |     }
 77 | 
 78 |     Block* allocateBlock(){
 79 |         assert(hasFreeBlocks() && "SuperBlock has no free blocks");
 80 | 
 81 |         uint32_t offset_in_blocks = 0;
 82 |         for (uint32_t i = 0; i < _blocks.size(); ++i) {
 83 |  
 84 |             if(_blocks[i]){
 85 |                 _blocks.reset(i); // set bit to 0
 86 |                 offset_in_blocks = i;
 87 |                 break;
 88 |             }
 89 |         }
 90 |         return new Block(_fd, offset_in_blocks);;
 91 |     }
 92 |  
 93 |     bool freeBlock(_block_phys_addr_t phys){
 94 |         assert(phys.fd == _fd && "attempt to deallocate to foreign block");
 95 |         _blocks.set(phys.offset_in_blocks); // set bit to 1
 96 |         return true;
 97 |     }
 98 |  
 99 |     ~SuperBlock(){
100 |         text(log_fp,"\n========== DESTROY SUPERBLOCK[%u] ============ \n", _id);
101 |         if(_fd!=-1){
102 |             close(_fd);    
103 |         }
104 |     }
105 | 
106 | };
107 | 
108 | 


--------------------------------------------------------------------------------
/common/common.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * It is a file responsible for corm's settings. It helps to choose size of pointers, addr_t, block headers.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | 
 13 | #pragma once
 14 | 
 15 | #include "../utilities/debug.h"
 16 | #include <string.h>
 17 | #include <atomic>
 18 | #include <type_traits>
 19 |  
 20 | typedef unsigned __int128 uint128_t;
 21 | 
 22 | 
 23 | static const uint64_t CACHELINE = 64ULL;
 24 | static const uint64_t CACHELINE_MASK = ~((uint64_t)0x3F);
 25 | 
 26 | static constexpr uint64_t mask_of_bits(uint16_t bits){
 27 |     return ((uint64_t)1 << bits) - 1;
 28 | }
 29 | 
 30 | static_assert ( mask_of_bits(16) == 0xFFFF , "mask_of_bits does not work correctly") ;
 31 | static_assert ( mask_of_bits(2) == 0b11 , "mask_of_bits does not work correctly") ;
 32 | static_assert ( mask_of_bits(12) == 0xFFF , "mask_of_bits does not work correctly") ;
 33 | 
 34 | // Note that in my experiments I manually changed the sizes and recompiled the code. 
 35 | 
 36 | // 4KB blocks
 37 | //static const size_t PAGE_SIZE = (4096); 
 38 |  
 39 | static const uint32_t BLOCK_BIT_SIZE = (12) ;
 40 | static const uint32_t BLOCK_SIZE = (uint32_t)(1 << BLOCK_BIT_SIZE); 
 41 | 
 42 | #define BLOCKS_IN_SUPERBLOCK (16)
 43 | 
 44 | /*
 45 | static const uint32_t BLOCK_BIT_SIZE = (30) ;
 46 | static const uint32_t BLOCK_SIZE = (uint32_t)(1 << BLOCK_BIT_SIZE); 
 47 | 
 48 | #define BLOCKS_IN_SUPERBLOCK (1)
 49 | */
 50 | 
 51 | // client pointer:   | object addr  | object id | rkey    | type    | padding |
 52 | // 128bits:          |  48 bits     | 16 bits   | 32 bits |  8 bits |   24    |
 53 | 
 54 | #define VIRTUAL_ADDRESS_SIZE (48) // on linux x86-64 it is guaranteed
 55 | #define ID_SIZE_BITS   (16) // 2^16 different ids
 56 | #define TYPE_SIZE (8) 
 57 | #define BASE_ADDR_BITS (VIRTUAL_ADDRESS_SIZE - BLOCK_BIT_SIZE) // 36
 58 | 
 59 | 
 60 | typedef union {
 61 |     uint128_t whole;
 62 |     struct {
 63 |       //  uint64_t padding: 128 - TYPE_SIZE - 32 - ID_SIZE_BITS - VIRTUAL_ADDRESS_SIZE - 8;
 64 |         uint64_t version: 8;
 65 |         uint64_t type   : TYPE_SIZE;
 66 |         uint64_t obj_id : ID_SIZE_BITS;
 67 |         uint64_t rkey   : 32;
 68 |         uint64_t addr ;//  : VIRTUAL_ADDRESS_SIZE;
 69 |     } comp;
 70 |     uint64_t parts[2];
 71 | } client_addr_t;  
 72 | 
 73 | typedef uint64_t addr_t;  // actual memory address
 74 | 
 75 | typedef uint32_t offset_t;  // offset from base 
 76 |  
 77 | 
 78 | #define ADDR_T_TO_PTR(x) ((void*)x)
 79 | 
 80 | 
 81 | typedef struct {
 82 |     int fd;
 83 |     uint32_t offset_in_blocks;
 84 | } _block_phys_addr_t;
 85 | 
 86 |  
 87 | typedef union {
 88 |     uint128_t whole;
 89 |     struct {
 90 |         uint64_t padding : 32 - TYPE_SIZE;
 91 |         uint64_t type    : TYPE_SIZE;
 92 |         uint64_t rkey    : 32;
 93 |         uint64_t base    : 64; //BASE_ADDR_BITS
 94 |     } comp;
 95 |     uint64_t parts[2];
 96 | } block_header_t;
 97 | 
 98 | static const uint32_t BLOCK_USEFUL_SIZE = (BLOCK_SIZE - sizeof(block_header_t));
 99 | 
100 | // Slot header: | V obj | Lock  | compaction | old addr | id     |
101 | //              |8 bits | 1 bit | 1   bits  | BASE_ADDR_BITS bits  | ID_SIZE_BITS bits | = 16 bytes per object
102 | 
103 | struct slot_header_t{
104 |     uint64_t version : 8;
105 |     uint64_t lock    : 1;  
106 |     uint64_t allocated : 1;
107 |     uint64_t compaction : 1;  // the idea is that if slot is compacted it means the object most likely has been moved
108 |     uint64_t padding: (53 - ID_SIZE_BITS - BASE_ADDR_BITS);
109 |     uint64_t obj_id : ID_SIZE_BITS;
110 |     uint64_t oldbase : BASE_ADDR_BITS; // is used to distinguish between native objects in block and one which came from another block
111 | };
112 | 
113 | 
114 | 
115 | static_assert(std::is_pod<slot_header_t>::value, "slot_header_t is not POD");
116 | static_assert(std::is_pod<client_addr_t>::value, "client_addr_t is not POD");
117 | static_assert(std::is_pod<block_header_t>::value, "block_header_t is not POD");
118 | static_assert(sizeof(client_addr_t) == sizeof(uint128_t), "client_addr_t is not 128 bits?");
119 | static_assert(sizeof(slot_header_t) == sizeof(uint64_t), "slot_header_t is not 64 bits?");
120 | static_assert(sizeof(uint64_t) == sizeof(unsigned long), " Type check ");
121 | static_assert(sizeof(uint64_t) == sizeof(unsigned long long), " Type check ");
122 | 
123 | 
124 | inline addr_t GetVirtBaseAddr(addr_t addr){
125 |     return (addr >> BLOCK_BIT_SIZE) << BLOCK_BIT_SIZE ;
126 | }
127 | 
128 | inline addr_t GetAddressOffset(addr_t addr){
129 |     return addr & mask_of_bits(BLOCK_BIT_SIZE);
130 | }
131 | 
132 | inline uint8_t GetSlotType(addr_t addr){
133 |     addr_t baseaddr = GetVirtBaseAddr(addr);
134 |     return ((block_header_t*)(baseaddr+BLOCK_USEFUL_SIZE))->comp.type;
135 | }
136 | 
137 | inline addr_t GetSlotNewestBaseAddr(addr_t addr){
138 |     addr_t baseaddr = GetVirtBaseAddr(addr);
139 |     return (addr_t)(((block_header_t*)(baseaddr+BLOCK_USEFUL_SIZE))->comp.base);
140 |     //return (addr_t)(((addr_t)((block_header_t*)(baseaddr+BLOCK_USEFUL_SIZE))->comp.base) << BLOCK_BIT_SIZE);
141 | }
142 | 
143 | inline uint32_t GetSlotNewestRkey(addr_t addr){
144 |     addr_t baseaddr = GetVirtBaseAddr(addr);
145 |     return (uint32_t)(((block_header_t*)(baseaddr+BLOCK_USEFUL_SIZE))->comp.rkey);
146 | }
147 | 
148 | inline uint8_t GetSlotVersion(addr_t addr){
149 |     return (uint8_t)(((slot_header_t*)(char*)addr)->version);
150 | }
151 | 
152 | inline uint16_t GetSlotObjId(addr_t addr){
153 |     return (uint16_t)(((slot_header_t*)(char*)addr)->obj_id);
154 | }
155 | 
156 | inline client_addr_t CreateClientAddr(addr_t addr, uint32_t rkey, uint16_t obj_id, uint8_t version, uint8_t type){
157 |     client_addr_t client_addr;
158 |     client_addr.comp.version  = version;
159 |     client_addr.comp.type  = type;
160 |     client_addr.comp.rkey = rkey;
161 |     client_addr.comp.obj_id = obj_id;
162 |     client_addr.comp.addr = addr;
163 |     return client_addr;
164 | } 
165 | 
166 | 
167 | inline addr_t GetObjectAddr(client_addr_t addr){
168 |     return (addr_t)(addr.comp.addr);
169 | }
170 | 
171 | inline uint16_t GetObjId(client_addr_t client_addr){
172 |     return (uint16_t)client_addr.comp.obj_id;  
173 | }
174 | 
175 | 
176 | 


--------------------------------------------------------------------------------
/compact.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code to trigger compaction.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono> 
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | #include <fstream>
 22 | #include <algorithm>
 23 | #include <atomic>
 24 |  
 25 | FILE *log_fp;
 26 |  
 27 | 
 28 | #include "worker/client_api.hpp"
 29 | #include "rdma/connectRDMA.hpp"
 30 |  
 31 | #include "utilities/cxxopts.hpp"
 32 |  
 33 | cxxopts::ParseResult
 34 | parse(int argc, char* argv[])
 35 | {
 36 |   cxxopts::Options options(argv[0], "Trigger compaction for a given size");
 37 |   options
 38 |     .positional_help("[optional args]")
 39 |     .show_positional_help();
 40 | 
 41 |   try
 42 |   {
 43 | 
 44 |     options.add_options()
 45 |       ("server", "Another address", cxxopts::value<std::string>(), "IP")
 46 |       ("size", "objects size", cxxopts::value<uint32_t>()->default_value("24"), "N")
 47 |       ("help", "Print help")
 48 |      ;
 49 |  
 50 |     auto result = options.parse(argc, argv);
 51 | 
 52 |     if (result.count("help"))
 53 |     {
 54 |       std::cout << options.help({""}) << std::endl;
 55 |       exit(0);
 56 |     }
 57 | 
 58 |     if (!result.count("server"))
 59 |     {
 60 |       throw cxxopts::OptionException("input must be specified");
 61 |     }
 62 | 
 63 |  
 64 | 
 65 |     return result;
 66 | 
 67 |   } catch (const cxxopts::OptionException& e)
 68 |   {
 69 |     std::cout << "error parsing options: " << e.what() << std::endl;
 70 |     std::cout << options.help({""}) << std::endl;
 71 |     exit(1);
 72 |   }
 73 | }
 74 |  
 75 |  
 76 | int main(int argc, char* argv[]){
 77 |  
 78 | 
 79 |     auto allparams = parse(argc,argv);
 80 |  
 81 |     log_fp=stdout;  
 82 | 
 83 |     std::string server = allparams["server"].as<std::string>();
 84 |     uint32_t size = allparams["size"].as<uint32_t>();
 85 |  
 86 |     ClientRDMA rdma((char*)server.c_str(),9999);
 87 |     struct rdma_cm_id * id = rdma.sendConnectRequest();
 88 | 
 89 |     struct ibv_pd * pd = ClientRDMA::create_pd(id);
 90 | 
 91 |     struct ibv_qp_init_attr attr;
 92 |     struct rdma_conn_param conn_param;
 93 |     memset(&attr, 0, sizeof(attr));
 94 |     attr.cap.max_send_wr = 32;
 95 |     attr.cap.max_recv_wr = 32;
 96 |     attr.cap.max_send_sge = 1;
 97 |     attr.cap.max_recv_sge = 1;
 98 |     attr.cap.max_inline_data = 0;
 99 |     attr.qp_type = IBV_QPT_RC;
100 | 
101 |     attr.send_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
102 |     attr.recv_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
103 |  
104 |     memset(&conn_param, 0 , sizeof(conn_param));
105 |     conn_param.responder_resources = 0;
106 |     conn_param.initiator_depth = 5;
107 |     conn_param.retry_count = 3;
108 |     conn_param.rnr_retry_count = 3; 
109 | 
110 |     VerbsEP* ep = ClientRDMA::connectEP(id, &attr, &conn_param, pd);
111 |   
112 |     printf("Connected\n");
113 |     sleep(1);
114 | 
115 |     RemoteMemoryClient* RMAPI = new RemoteMemoryClient(0,ep);
116 |  
117 |     
118 |     uint8_t slot_type = SizeTable::getInstance().GetClassFromUserSize(size);
119 |     printf("Trigger compaction for size-class %u \n",slot_type);
120 |  
121 |     RMAPI->TriggerCompaction(slot_type, true, true);
122 |     
123 |     sleep(0.5);
124 |     printf("done one\n");
125 |  
126 |     return 0;
127 | }
128 | 


--------------------------------------------------------------------------------
/compaction_latency.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code to measure compaction latency.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono> 
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | #include <fstream>
 22 | #include <algorithm>
 23 | #include <atomic>
 24 |  
 25 | FILE *log_fp;
 26 |  
 27 | 
 28 | #include "worker/client_api.hpp"
 29 | #include "rdma/connectRDMA.hpp"
 30 |  
 31 | #include "utilities/cxxopts.hpp"
 32 |  
 33 |  
 34 | 
 35 | uint64_t num;
 36 | 
 37 | cxxopts::ParseResult
 38 | parse(int argc, char* argv[])
 39 | {
 40 |   cxxopts::Options options(argv[0], "Measure compaction latency");
 41 |   options
 42 |     .positional_help("[optional args]")
 43 |     .show_positional_help();
 44 | 
 45 |   try
 46 |   {
 47 | 
 48 |     options.add_options()
 49 |       ("server", "Another address", cxxopts::value<std::string>(), "IP")
 50 |       ("t,threads", "the number of remote threads", cxxopts::value<uint32_t>()->default_value(std::to_string(1)), "N")
 51 |       ("n,num", "Number of requests to run", cxxopts::value<uint64_t>()->default_value("123"), "N")
 52 |       ("size", "objects size", cxxopts::value<uint32_t>()->default_value("24"), "N")
 53 |       ("compaction", "enable compact objects")
 54 |       ("collection", "enable collection objects")
 55 |       ("help", "Print help")
 56 |      ;
 57 |  
 58 |     auto result = options.parse(argc, argv);
 59 | 
 60 |     if (result.count("help"))
 61 |     {
 62 |       std::cout << options.help({""}) << std::endl;
 63 |       exit(0);
 64 |     }
 65 | 
 66 |     if (!result.count("server"))
 67 |     {
 68 |       throw cxxopts::OptionException("input must be specified");
 69 |     }
 70 | 
 71 |  
 72 | 
 73 |     return result;
 74 | 
 75 |   } catch (const cxxopts::OptionException& e)
 76 |   {
 77 |     std::cout << "error parsing options: " << e.what() << std::endl;
 78 |     std::cout << options.help({""}) << std::endl;
 79 |     exit(1);
 80 |   }
 81 | }
 82 |  
 83 |  
 84 | int main(int argc, char* argv[]){
 85 |  
 86 | 
 87 |     auto allparams = parse(argc,argv);
 88 |  
 89 |     log_fp=stdout;  
 90 | 
 91 |     std::string server = allparams["server"].as<std::string>();
 92 |     uint32_t threads = allparams["threads"].as<uint32_t>();
 93 |     uint32_t size = allparams["size"].as<uint32_t>();
 94 |     num = allparams["num"].as<uint64_t>();
 95 | 
 96 |     bool with_compaction = allparams.count("compaction");
 97 |     bool with_collection = allparams.count("collection");
 98 |  
 99 |     ClientRDMA rdma((char*)server.c_str(),9999);
100 |     struct rdma_cm_id * id = rdma.sendConnectRequest();
101 | 
102 |     struct ibv_pd * pd = ClientRDMA::create_pd(id);
103 | 
104 |     struct ibv_qp_init_attr attr;
105 |     struct rdma_conn_param conn_param;
106 |     memset(&attr, 0, sizeof(attr));
107 |     attr.cap.max_send_wr = 32;
108 |     attr.cap.max_recv_wr = 32;
109 |     attr.cap.max_send_sge = 1;
110 |     attr.cap.max_recv_sge = 1;
111 |     attr.cap.max_inline_data = 0;
112 |     attr.qp_type = IBV_QPT_RC;
113 | 
114 |     attr.send_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
115 |     attr.recv_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
116 |  
117 |     memset(&conn_param, 0 , sizeof(conn_param));
118 |     conn_param.responder_resources = 0;
119 |     conn_param.initiator_depth = 5;
120 |     conn_param.retry_count = 3;
121 |     conn_param.rnr_retry_count = 3; 
122 | 
123 |     std::vector<VerbsEP*> conns; 
124 | 
125 |     conns.push_back(ClientRDMA::connectEP(id, &attr, &conn_param, pd));
126 | 
127 |     for(uint32_t i = 1 ; i < threads; i++){
128 |         struct rdma_cm_id * tid = rdma.sendConnectRequest();
129 |         attr.send_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
130 |         attr.recv_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
131 |         conns.push_back(ClientRDMA::connectEP(tid, &attr, &conn_param, pd));
132 |     }
133 |     
134 |     if(threads>1){
135 |         assert(conns[0]->qp->send_cq != conns[1]->qp->send_cq && "Different connections must use Different CQ") ;
136 |     }
137 |  
138 |     printf("Connected\n");
139 |     sleep(1);
140 | 
141 |     std::vector<RemoteMemoryClient*> RMAPI;
142 |     for( auto ep:conns ){
143 |         RMAPI.push_back(new RemoteMemoryClient(0,ep));
144 |     }
145 |   
146 | 
147 |     printf("Start compaction test \n");
148 |     for(uint32_t i = 0; i< num; i++){
149 |         std::vector<LocalObjectHandler*> objects;
150 |         
151 |         for( auto x : RMAPI ){
152 |             LocalObjectHandler* obj1 = x->Alloc(size);
153 |             objects.push_back(obj1);
154 |         }
155 |         uint8_t slot_type = objects[0]->addr.comp.type;
156 |         RMAPI[0]->TriggerCompaction(slot_type,with_collection, with_compaction);
157 |         sleep(1.2);
158 |         for(uint32_t i = 0; i < threads; i++){
159 |             RMAPI[i]->Free(objects[i]);
160 |         }
161 |         sleep(0.5);
162 |         printf("done one\n");
163 |     }
164 | 
165 |     printf("Done compaction test\n");
166 |  
167 |     return 0;
168 | }
169 | 


--------------------------------------------------------------------------------
/core.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #                                                                                                      
  4 | # CoRM: Compactable Remote Memory over RDMA
  5 | # 
  6 | # Help functions to deploy CoRM
  7 | #
  8 | # Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  9 | # 
 10 | # Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 11 | #
 12 | 
 13 | 
 14 | 
 15 | define(){ IFS='\n' read -r -d '' ${1} || true; }
 16 | redirection=( "> out" "2> err" "< /dev/null" )
 17 | 
 18 | declare -A processPids
 19 | 
 20 | __count_process=0
 21 | __corm=""
 22 | 
 23 | 
 24 | WORKDIR="$PWD"
 25 | LOCALWORKDIR="$PWD"
 26 | __VERBOSE=1
 27 | 
 28 | function log () {
 29 |     if [[ $__VERBOSE -ge 1 ]]; then
 30 |         echo -e "$@"
 31 |     fi
 32 | }
 33 | 
 34 | function debug () {
 35 |     if [[ $__VERBOSE -ge 2 ]]; then
 36 |         echo -e "$@"
 37 |     fi
 38 | }
 39 |  
 40 | scpFileTo(){
 41 |     local server="$1"
 42 |     local filename="$2"
 43 |     local cmd=( "scp" "$2" "$USER@$server:${WORKDIR}/" )
 44 |     debug "\t\tExecuting: ${cmd[@]}"
 45 |     $("${cmd[@]}")
 46 | }
 47 | 
 48 | scpFileFrom(){
 49 |     local server="$1"
 50 |     local filename="$2"
 51 |     local cmd=("scp" "$USER@$server:${WORKDIR}/$2" ./)
 52 |     debug "\t\tExecuting: ${cmd[@]}"
 53 |     $("${cmd[@]}")
 54 | }
 55 | 
 56 | sshCommandAsync() {
 57 |     local server=$1
 58 |     local command=$2
 59 |     local valredirect="${redirection[@]}"
 60 |     if ! [[ -z $3 ]]
 61 |     then
 62 |         valredirect="> "$3" 2>/dev/null"
 63 |     fi
 64 |     local cmd=( "ssh" "-oStrictHostKeyChecking=no" "$USER@$server" "nohup" "$command" "$valredirect" "&" "echo \$!" )
 65 |     local pid=$("${cmd[@]}")
 66 |     echo "$pid"
 67 | }
 68 | 
 69 | sshCommandSync() {
 70 |     local server="$1"
 71 |     local command="$2"
 72 |     local valredirect="${redirection[@]}"
 73 |     if ! [[ -z $3 ]]
 74 |     then
 75 |         valredirect="> "$3" 2>/dev/null"
 76 |     fi
 77 |     local cmd=( "ssh" "-oStrictHostKeyChecking=no" "$USER@$server" "$command" "$valredirect" )
 78 |     debug "\t\tExecuting: ${cmd[@]}"
 79 |     $("${cmd[@]}")
 80 | }
 81 | 
 82 | sshKillCommand() {
 83 |     local server=$1
 84 |     local pid=$2
 85 |     cmd=( "ssh" "$USER@$server" "kill -9" "${pid}" )
 86 |     debug "\t\tExecuting: ${cmd[@]}"
 87 |     $("${cmd[@]}")
 88 | }
 89 | 
 90 | sshStopCommand() {
 91 |     local server=$1
 92 |     local pid=$2
 93 |     cmd=( "ssh" "$USER@$server" "kill -2" "${pid}" )
 94 |     debug "\t\tExecuting: ${cmd[@]}"
 95 |     $("${cmd[@]}")
 96 | }
 97 | 
 98 | startCorm(){
 99 |     local server=$1
100 |     local params=$2
101 |     local pid=$(sshCommandAsync $server "${WORKDIR}/server --server=$server ${params}")
102 |     log "\tCorm is started at ${server} with PID$pid and params ${params}"
103 | 
104 |     __corm="$server,$pid"
105 |     __corm_server="$server"
106 | }
107 | 
108 | loadCorm(){
109 |     local size=$1
110 |     local num=$2
111 |     local comm="$WORKDIR/load --server=${__corm_server} --num=$num --size=$size --threads=8 --randomize"
112 |     log "\tStart loading server with $num elements with user size: $size"
113 |     ${comm} 
114 |     log "\t Loading is done"
115 | }
116 | 
117 | unloadCorm(){
118 |     local num=$1
119 |     local comm="$WORKDIR/unload --server=${__corm_server} --num=$num "
120 |     log "\tStart unloading server with $num elements"
121 |     ${comm} 
122 |     log "\t unLoading is done"
123 | }
124 | 
125 | killCorm(){
126 |     local servername=$( echo $__corm | cut -d, -f1)
127 |     local pid=$( echo $__corm | cut -d, -f2)
128 |     sshKillCommand $servername $pid
129 |     log "\tCorm is killed at $servername"
130 | }
131 | 
132 | stopCorm(){
133 |     local servername=$( echo $__corm | cut -d, -f1)
134 |     local pid=$( echo $__corm | cut -d, -f2)
135 |     sshStopCommand $servername $pid
136 |     log "\tCorm is stoppped at $servername"
137 | }
138 | 
139 | killAllProcesses(){
140 |     echo "try to kill ${__count_process} processes"
141 |     echo "the dict has ${!processPids[@]} entries"
142 |     for id in "${!processPids[@]}"
143 |     do  
144 |         local temp=${processPids[$id]}
145 |         local servername=$( echo $temp | cut -d, -f1)
146 |         local pid=$( echo $temp | cut -d, -f2)
147 |         sshKillCommand $servername $pid
148 |         log "\tClient is killed at $servername"
149 |     done
150 |     processPids=()
151 |     __count_process=0
152 | }
153 |  
154 | runLatency(){
155 |     local size=$1
156 |     local num=$2
157 |     local filename=$3
158 | 
159 |     debug "runLatency "
160 |     local comm="$WORKDIR/latency --server=${__corm_server} --num=$num --size=$size"
161 |     log "\tStart runLatency with size=$size"
162 |     ${comm} > $filename
163 |     log "\t runLatency is done"
164 | }
165 |  
166 | runWorkloadRemoteAsync(){
167 |     local server=$1
168 |     local prob=$2
169 |     local num=$3
170 |     local threads=$4
171 |     local seed=$5
172 |     local flags=$6
173 |     local filename=$7
174 |     local comm="$WORKDIR/workload_readwrite --server=${__corm_server} --target=2000000 \
175 |     --prob=${prob} --num=${num} --threads=${threads} --seed=${seed} --input=${WORKDIR}/test.bin  ${flags}"
176 |     echo "$comm"
177 |     local pid=$(sshCommandAsync $server "$comm" $filename)
178 |     log "\tWorkload is started at ${server} with PID$pid"
179 |     processPids[${__count_process}]="$server,$pid"
180 |     __count_process=$((${__count_process}+1))
181 | } 
182 | 


--------------------------------------------------------------------------------
/latency.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code to measuring latency of various requests.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono> 
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | 
 22 | 
 23 | #include "thread/thread.hpp"
 24 | #include "utilities/timer.h"
 25 | 
 26 | FILE *log_fp;
 27 | 
 28 | #include "worker/client_api.hpp"
 29 | #include "rdma/connectRDMA.hpp"
 30 | 
 31 |   
 32 | #include "utilities/cxxopts.hpp"
 33 |  
 34 | 
 35 | cxxopts::ParseResult
 36 | parse(int argc, char* argv[])
 37 | {
 38 |   cxxopts::Options options(argv[0], "Various latency test for CoRM");
 39 |   options
 40 |     .positional_help("[optional args]")
 41 |     .show_positional_help();
 42 | 
 43 |   try
 44 |   {
 45 | 
 46 |     options.add_options()
 47 |       ("a,server", "Another address", cxxopts::value<std::string>(), "IP")
 48 |       ("s,size", "Object size", cxxopts::value<uint32_t>()->default_value("8"), "N")
 49 |       ("n,num", "Number of tests", cxxopts::value<uint32_t>()->default_value("1024"), "N")
 50 |       ("help", "Print help")
 51 |      ;
 52 |  
 53 |     auto result = options.parse(argc, argv);
 54 | 
 55 |     if (result.count("help"))
 56 |     {
 57 |       std::cout << options.help({""}) << std::endl;
 58 |       exit(0);
 59 |     }
 60 | 
 61 |     if (!result.count("server"))
 62 |     {
 63 |       throw cxxopts::OptionException("input must be specified");
 64 |     }
 65 | 
 66 |  
 67 | 
 68 |     return result;
 69 | 
 70 |   } catch (const cxxopts::OptionException& e)
 71 |   {
 72 |     std::cout << "error parsing options: " << e.what() << std::endl;
 73 |     std::cout << options.help({""}) << std::endl;
 74 |     exit(1);
 75 |   }
 76 | }
 77 | 
 78 |  
 79 | 
 80 | int main(int argc, char* argv[]){
 81 |     auto allparams = parse(argc,argv);
 82 |  
 83 |     log_fp=stdout;  
 84 | 
 85 |     std::string server = allparams["server"].as<std::string>();
 86 |     uint32_t size = allparams["size"].as<uint32_t>();
 87 |     uint32_t N = allparams["num"].as<uint32_t>();
 88 | 
 89 |  
 90 |     ClientRDMA rdma((char*)server.c_str(),9999);
 91 |     struct rdma_cm_id * id = rdma.sendConnectRequest();
 92 | 
 93 |     struct ibv_pd * pd = ClientRDMA::create_pd(id);
 94 | 
 95 |     struct ibv_qp_init_attr attr;
 96 |     struct rdma_conn_param conn_param;
 97 |     memset(&attr, 0, sizeof(attr));
 98 |     attr.cap.max_send_wr = 32;
 99 |     attr.cap.max_recv_wr = 32;
100 |     attr.cap.max_send_sge = 1;
101 |     attr.cap.max_recv_sge = 1;
102 |     attr.cap.max_inline_data = 0;
103 |     attr.qp_type = IBV_QPT_RC;
104 | 
105 |     memset(&conn_param, 0 , sizeof(conn_param));
106 |     conn_param.responder_resources = 0;
107 |     conn_param.initiator_depth = 5;
108 |     conn_param.retry_count = 3;
109 |     conn_param.rnr_retry_count = 3; 
110 | 
111 |     
112 |     VerbsEP* ep = ClientRDMA::connectEP(id, &attr, &conn_param, pd);
113 | 
114 |     printf("Connected\n");
115 |     sleep(1);
116 | 
117 |     RemoteMemoryClient* RMAPI = new RemoteMemoryClient(0,ep);
118 |     
119 | 
120 |     std::vector<double> time_alloc;
121 |     std::vector<double> time_free;
122 |     time_alloc.reserve(N);
123 |     time_free.reserve(N);
124 | 
125 |     printf("Start latency test for allocation/deallocation\n");
126 |     for(uint32_t i = 0; i< N; i++){
127 | 
128 |         auto t1 = std::chrono::high_resolution_clock::now();
129 |         LocalObjectHandler* obj1 = RMAPI->Alloc(size);
130 |         auto t2 = std::chrono::high_resolution_clock::now();
131 |         RMAPI->Free(obj1);
132 |         auto t3 = std::chrono::high_resolution_clock::now();
133 | 
134 |         auto alloc_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
135 |         auto free_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t3 - t2 ).count();
136 |         time_alloc.push_back(alloc_nano / (float)1000.0 );
137 |         time_free.push_back(free_nano / (float)1000.0 );
138 |     }
139 | 
140 |     printf("Start latency test for read/Write\n");
141 |     std::vector<double> time_read;
142 |     std::vector<double> time_read_rdma;
143 |     std::vector<double> time_write;
144 |     time_read.reserve(N);
145 |     time_read_rdma.reserve(N);
146 |     time_write.reserve(N);
147 | 
148 |     char* buffer = (char*)malloc(size);
149 |     LocalObjectHandler* obj1 = RMAPI->Alloc(size);
150 |     for(uint32_t i = 0; i< N; i++){
151 |         auto t1 = std::chrono::high_resolution_clock::now();
152 |         RMAPI->Read(obj1, buffer, size);
153 |         auto t2 = std::chrono::high_resolution_clock::now();
154 |         auto read_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
155 |         time_read.push_back(read_nano / (float)1000.0 );
156 |     }
157 | 
158 |     for(uint32_t i = 0; i< N; i++){
159 |         auto t1 = std::chrono::high_resolution_clock::now();
160 |         RMAPI->ReadOneSided(obj1, buffer, size);
161 |         auto t2 = std::chrono::high_resolution_clock::now();
162 |         auto read_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
163 |         time_read_rdma.push_back(read_nano / (float)1000.0 );
164 |     }
165 | 
166 |     for(uint32_t i = 0; i< N; i++){
167 |         auto t1 = std::chrono::high_resolution_clock::now();
168 |         RMAPI->Write(obj1, buffer, size, false);
169 |         auto t2 = std::chrono::high_resolution_clock::now();
170 |         auto write_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
171 |         time_write.push_back(write_nano / (float)1000.0 );
172 |     }
173 |     
174 | 
175 |     printf("Start latency test for fixpointer\n");
176 |     std::vector<double> time_fix;
177 |     std::vector<double> time_fix_read;
178 |     std::vector<double> time_fix_read_rdma;
179 |     std::vector<double> time_fix_write;
180 |     time_fix_write.reserve(N);
181 |     time_fix_read.reserve(N);
182 |     time_fix_read_rdma.reserve(N);
183 |     time_fix.reserve(N);
184 | 
185 |     LocalObjectHandler* obj2 = RMAPI->Alloc(size);
186 |     uint64_t direct_addr = obj2->addr.comp.addr;
187 |     uint64_t base_addr = GetVirtBaseAddr(obj2->addr.comp.addr);
188 |     if(base_addr == direct_addr){
189 |         printf("Warning! Object was block aligned! the test is not valid for it!\n");
190 |     }
191 |     if(obj2->addr.comp.rkey==0) printf("zero rkey;");
192 |     printf("Start latency test for fix read\n");
193 |     for(uint32_t i = 0; i< N; i++){
194 |         obj2->addr.comp.addr = base_addr; // set wrong offset. so we will need to find the object by ID
195 |         auto t1 = std::chrono::high_resolution_clock::now();
196 |         RMAPI->Read(obj2, buffer, size);
197 |         auto t2 = std::chrono::high_resolution_clock::now();
198 |         auto read_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
199 |         time_fix_read.push_back(read_nano / (float)1000.0 );
200 |     }
201 |     if(obj2->addr.comp.rkey==0) printf("zero rkey;");
202 | 
203 |     printf("Start latency test for fix read onesided\n");
204 |     for(uint32_t i = 0; i< N; i++){
205 |         obj2->addr.comp.addr = base_addr;
206 |         auto t1 = std::chrono::high_resolution_clock::now();
207 |         int ret = RMAPI->ReadOneSided(obj2, buffer, size);
208 |         if(ret==NOT_FOUND){
209 |             RMAPI->ReadOneSidedFix(obj2, buffer, size);
210 |         }
211 |         auto t2 = std::chrono::high_resolution_clock::now();
212 |         auto read_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
213 |         time_fix_read_rdma.push_back(read_nano / (float)1000.0 );
214 |     }
215 |     printf("Start latency test for fix write\n");
216 |     for(uint32_t i = 0; i< N; i++){
217 |         obj2->addr.comp.addr = base_addr;
218 |         auto t1 = std::chrono::high_resolution_clock::now();
219 |         RMAPI->Write(obj2, buffer, size, false);
220 |         auto t2 = std::chrono::high_resolution_clock::now();
221 |         auto write_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
222 |         time_fix_write.push_back(write_nano / (float)1000.0 );
223 |     }
224 | 
225 |  printf("Start latency test for fix fix pointer\n");
226 |     for(uint32_t i = 0; i< N; i++){
227 |         obj2->addr.comp.addr = base_addr;
228 |         auto t1 = std::chrono::high_resolution_clock::now();
229 |         RMAPI->FixPointer(obj2);
230 |         auto t2 = std::chrono::high_resolution_clock::now();
231 |         auto fix_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
232 |         time_fix.push_back(fix_nano / (float)1000.0 );
233 |     }
234 | 
235 | 
236 | 
237 |     std::vector<double> time_rpc;
238 |     std::vector<double> time_rdma;
239 |     time_rpc.reserve(N);
240 |     time_rdma.reserve(N);
241 | 
242 |     printf("Start latency test for RDMA rpc and onesided\n");
243 |     for(uint32_t i = 0; i< N; i++){
244 | 
245 |         auto t1 = std::chrono::high_resolution_clock::now();
246 |         RMAPI->RpcFake(buffer,size);
247 |         auto t2 = std::chrono::high_resolution_clock::now();
248 |         RMAPI->ReadOneSidedFake(obj1,buffer,size);
249 |         auto t3 = std::chrono::high_resolution_clock::now();
250 | 
251 |         auto rpc_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count();
252 |         auto rdma_nano = std::chrono::duration_cast< std::chrono::nanoseconds >( t3 - t2 ).count();
253 |         time_rpc.push_back(rpc_nano / (float)1000.0 );
254 |         time_rdma.push_back(rdma_nano / (float)1000.0 );
255 |     }
256 |     
257 | 
258 | 
259 |     printf("Done\n");
260 |     RMAPI->Free(obj1);
261 |     RMAPI->Free(obj2);
262 | 
263 |     printf("alloc: ");
264 |     for(auto &x : time_alloc){
265 |         printf("%.2f ",x);
266 |     }
267 |     printf("\nfree: ");
268 |     for(auto &x : time_free){
269 |         printf("%.2f ",x);
270 |     }
271 |     printf("\nread: ");
272 |     for(auto &x : time_read){
273 |         printf("%.2f ",x);
274 |     }
275 |     printf("\nreadrdma: ");
276 |     for(auto &x : time_read_rdma){
277 |         printf("%.2f ",x);
278 |     }
279 |     printf("\nwrite: ");
280 |     for(auto &x : time_write){
281 |         printf("%.2f ",x);
282 |     }
283 |     printf("\nfixread: ");
284 |     for(auto &x : time_fix_read){
285 |         printf("%.2f ",x);
286 |     }
287 |     printf("\nfixreadrdma: ");
288 |     for(auto &x : time_fix_read_rdma){
289 |         printf("%.2f ",x);
290 |     }
291 |     printf("\nfixwrite: ");
292 |     for(auto &x : time_fix_write){
293 |         printf("%.2f ",x);
294 |     }
295 |     printf("\nfixfix: ");
296 |     for(auto &x : time_fix){
297 |         printf("%.2f ",x);
298 |     }
299 |     printf("\nrpc: ");
300 |     for(auto &x : time_rpc){
301 |         printf("%.2f ",x);
302 |     }
303 |     printf("\nrdma: ");
304 |     for(auto &x : time_rdma){
305 |         printf("%.2f ",x);
306 |     }
307 |     printf("\n");
308 |     free(buffer);
309 |     return 0;
310 | }
311 | 


--------------------------------------------------------------------------------
/load.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code for loading CoRM
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono> 
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | #include <fstream>
 22 | 
 23 | #include "thread/thread.hpp" 
 24 | FILE *log_fp;
 25 | 
 26 | #include "worker/client_api.hpp"
 27 | #include "rdma/connectRDMA.hpp"
 28 |   
 29 | #include "utilities/cxxopts.hpp"
 30 |  
 31 | 
 32 | cxxopts::ParseResult
 33 | parse(int argc, char* argv[])
 34 | {
 35 |   cxxopts::Options options(argv[0], "load data to CoRM");
 36 |   options
 37 |     .positional_help("[optional args]")
 38 |     .show_positional_help();
 39 | 
 40 |   try
 41 |   {
 42 | 
 43 |     options.add_options()
 44 |       ("a,server", "Another address", cxxopts::value<std::string>(), "IP")
 45 |       ("threads", "Total threads CoRM has", cxxopts::value<uint32_t>()->default_value(std::to_string(1)), "N")
 46 |       ("o,output", "Output file", cxxopts::value<std::string>()->default_value("test.bin"), "FILE")
 47 |       ("size", "Object size in bytes", cxxopts::value<uint32_t>()->default_value("123"), "N")
 48 |       ("n,num", "Number of objects to allocate", cxxopts::value<uint32_t>()->default_value("123"), "N")
 49 |       ("unload",  "Number of objects to deallocate", cxxopts::value<uint32_t>()->default_value("0"), "N")
 50 |       ("randomize","randomize objects")
 51 |       ("help", "Print help")
 52 |      ;
 53 |  
 54 |     auto result = options.parse(argc, argv);
 55 | 
 56 |     if (result.count("help"))
 57 |     {
 58 |       std::cout << options.help({""}) << std::endl;
 59 |       exit(0);
 60 |     }
 61 | 
 62 |     if (!result.count("server"))
 63 |     {
 64 |       throw cxxopts::OptionException("input must be specified");
 65 |     }
 66 |  
 67 |     return result;
 68 | 
 69 |   } catch (const cxxopts::OptionException& e)
 70 |   {
 71 |     std::cout << "error parsing options: " << e.what() << std::endl;
 72 |     std::cout << options.help({""}) << std::endl;
 73 |     exit(1);
 74 |   }
 75 | }
 76 | 
 77 | int main(int argc, char* argv[]){
 78 |  
 79 |  
 80 |     auto allparams = parse(argc,argv);
 81 |  
 82 |     log_fp=stdout;  
 83 | 
 84 |     std::string server = allparams["server"].as<std::string>();
 85 | 
 86 |     uint32_t threads = allparams["threads"].as<uint32_t>();
 87 |     uint32_t N = allparams["num"].as<uint32_t>();
 88 |     uint32_t size = allparams["size"].as<uint32_t>();
 89 |     std::string output = allparams["output"].as<std::string>();
 90 | 
 91 | 
 92 |     ClientRDMA rdma((char*)server.c_str(),9999);
 93 |     struct rdma_cm_id * id = rdma.sendConnectRequest();
 94 | 
 95 |     struct ibv_pd * pd = ClientRDMA::create_pd(id);
 96 | 
 97 |     struct ibv_qp_init_attr attr;
 98 |     struct rdma_conn_param conn_param;
 99 |     memset(&attr, 0, sizeof(attr));
100 |     attr.cap.max_send_wr = 32;
101 |     attr.cap.max_recv_wr = 32;
102 |     attr.cap.max_send_sge = 1;
103 |     attr.cap.max_recv_sge = 1;
104 |     attr.cap.max_inline_data = 0;
105 |     attr.qp_type = IBV_QPT_RC;
106 | 
107 |     memset(&conn_param, 0 , sizeof(conn_param));
108 |     conn_param.responder_resources = 0;
109 |     conn_param.initiator_depth = 5;
110 |     conn_param.retry_count = 3;
111 |     conn_param.rnr_retry_count = 3; 
112 | 
113 |     
114 |     std::vector<VerbsEP*> connections(threads,NULL);
115 | 
116 |     connections[0] = ClientRDMA::connectEP(id, &attr, &conn_param, pd);
117 | 
118 |     for(uint32_t i =1; i<threads; i++ ){
119 |         struct rdma_cm_id * tid = rdma.sendConnectRequest();
120 |         connections[i] = ClientRDMA::connectEP(tid, &attr, &conn_param, pd);
121 |     }
122 |  
123 |     printf("Connected\n");
124 |     sleep(1);
125 | 
126 |     std::vector<RemoteMemoryClient*> apis(threads);
127 |     for(uint32_t i = 0; i<threads; i++ ){
128 |         apis[i] = new RemoteMemoryClient(0,connections[i]);
129 |     }
130 | 
131 | 
132 |     std::vector<LocalObjectHandler*> objects;
133 | 
134 |     for(uint32_t i = 0; i < N; i++){
135 |         objects.push_back(apis[i%threads]->Alloc(size));
136 |     }
137 |     
138 |     
139 | 
140 |     if(allparams.count("randomize")){
141 |         std::random_shuffle(objects.begin(), objects.end()); 
142 |     }
143 | 
144 |     uint32_t unload = allparams["unload"].as<uint32_t>();
145 |     if(unload > 0){
146 |         std::random_shuffle(objects.begin(), objects.end()); 
147 | 
148 |         for(uint32_t i = 0; i < unload; i++){
149 |             apis[0]->Free(objects[i]);
150 |             free(objects[i]);
151 |         }
152 |     } 
153 |     
154 |     std::fstream fout;
155 |     fout.open(output.c_str(), std::ios::out|std::ios::trunc|std::ios::binary);
156 | 
157 |     
158 |     uint32_t rest = N - unload;
159 |     fout.write((char*)&rest,sizeof(rest));
160 |     for(uint32_t i = 0; i < rest; i++){
161 |         fout.write((char*)objects[unload+i],sizeof(LocalObjectHandler));
162 |     }
163 | 
164 |     fout.close();
165 |     
166 |     printf("Object keys are written to file %s\n", output.c_str());
167 | 
168 |     return 0;
169 | }
170 | 


--------------------------------------------------------------------------------
/local_read_benchmark.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code for measuring read local bandwidth
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono>
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | #include <set>
 22 | 
 23 | FILE *log_fp;
 24 | 
 25 | #include "alloc/block_alloc.hpp"
 26 | #include <random>
 27 | 
 28 | #include <alloc/size_table.hpp>
 29 | #include "alloc/thread_alloc.hpp"
 30 | #include "worker/worker.hpp"
 31 | #include "rdma/rdma_memory_manager.hpp"
 32 |  
 33 | #include "utilities/zipf.hpp"
 34 | #include "utilities/ycsb.hpp"
 35 | 
 36 | #include "utilities/cxxopts.hpp"
 37 |  
 38 | 
 39 | 
 40 | cxxopts::ParseResult
 41 | parse(int argc, char* argv[])
 42 | {
 43 |   cxxopts::Options options(argv[0], "simple read local bandwidth benchmark");
 44 |   options
 45 |     .positional_help("[optional args]")
 46 |     .show_positional_help();
 47 | 
 48 |   try
 49 |   {
 50 | 
 51 |     options.add_options()
 52 |       ("size", "user entry size", cxxopts::value<uint32_t>()->default_value(std::to_string(8)), "N")
 53 |       ("help", "Print help")
 54 |      ;
 55 |  
 56 |     auto result = options.parse(argc, argv);
 57 | 
 58 |     if (result.count("help"))
 59 |     {
 60 |       std::cout << options.help({""}) << std::endl;
 61 |       exit(0);
 62 |     }
 63 | 
 64 |     return result;
 65 | 
 66 |   } catch (const cxxopts::OptionException& e)
 67 |   {
 68 |     std::cout << "error parsing options: " << e.what() << std::endl;
 69 |     std::cout << options.help({""}) << std::endl;
 70 |     exit(1);
 71 |   }
 72 | }
 73 | 
 74 | 
 75 | int main(int argc, char* argv[])
 76 | {
 77 | 
 78 |     auto allparams = parse(argc,argv);
 79 | 
 80 |     log_fp = stdout;
 81 |     uint32_t user_size = allparams["size"].as<uint32_t>();
 82 | 
 83 |     size_t totest = 1024*1024*1024*1ULL; // 1 GiB
 84 |     size_t sbsize = BLOCKS_IN_SUPERBLOCK * BLOCK_SIZE; 
 85 |      
 86 | 	uint8_t type = SizeTable::getInstance().GetClassFromUserSize(user_size);
 87 | 	uint16_t slot_size = SizeTable::getInstance().GetRealSize(type); 
 88 | 
 89 | 	uint32_t slotsperblock = BLOCK_SIZE / slot_size;
 90 | 
 91 | 	printf("entry size %u\n",type);
 92 | 
 93 | 	std::vector<SuperBlock*> superblocks;
 94 | 
 95 | 	for(size_t i=1; i <= totest/sbsize; i++){
 96 | 		superblocks.push_back(new SuperBlock(i));
 97 | 	}
 98 | 
 99 | 	printf("total superblocks %lu\n",superblocks.size());
100 | 
101 | 	std::vector<Block*> blocks;
102 | 	std::vector<uint64_t> addresses;
103 | 
104 | 	uint8_t version = 0;
105 | 	uint16_t object_id = 1;
106 | 
107 | 	for( auto & sb : superblocks){
108 | 		char* src = (char*)malloc(user_size);
109 | 		while(sb->hasFreeBlocks()){
110 | 			Block *b = sb->allocateBlock();
111 | 			uint64_t alligned_addr = b->CreateNewAddr();
112 | 			blocks.push_back(b);
113 | 			addresses.push_back(alligned_addr);
114 | 
115 | 			ReaderWriter::WriteBlockHeader(alligned_addr, type , 0);
116 | 			for(uint32_t i =0; i< slotsperblock; i++){
117 | 	
118 | 				ReaderWriter::SetNewObject(alligned_addr + i*slot_size, object_id, &version );
119 | 
120 | 				ReaderWriter::WriteBufToObject(alligned_addr + i*slot_size,  object_id, slot_size, 
121 |                                   src, user_size, NULL, NULL);
122 | 			}
123 | 		}
124 | 		free(src);
125 | 	}
126 | 	printf("total blocks %lu\n",blocks.size());
127 | 	uint64_t totalobj = blocks.size()*slotsperblock;
128 | 	printf("total objects %lu\n",totalobj);
129 | 
130 |  	Trace *trace = new Uniform(10,0.0,(uint32_t)totalobj);
131 | 
132 | 	char* dest = (char*)malloc(user_size);
133 | 
134 | 	std::vector<uint64_t> accesses;
135 | 
136 | 	uint32_t maxi = totest*4/slot_size;
137 | 
138 | 	for(uint32_t i  = 0; i< maxi; i++){
139 | 			uint32_t id = trace->get_next().first;
140 | 			uint64_t alligned_addr = addresses[id/slotsperblock] + (id%slotsperblock)*slot_size;
141 | 			accesses.push_back(alligned_addr);
142 | 	}
143 | 	 
144 |  
145 | 
146 | 	{
147 | 		auto t1 = std::chrono::high_resolution_clock::now();
148 | 		for(uint32_t i  = 0; i< maxi; i++){
149 | 			uint64_t alligned_addr = accesses[i];
150 | 			uint32_t lim_size = user_size;
151 | 			ReaderWriter::client_read_object_to_buffer_lim((uint64_t)dest, (uint64_t)alligned_addr, 
152 | 		    (uint64_t)alligned_addr, slot_size,object_id, &lim_size);
153 | 		}
154 | 		auto t2 = std::chrono::high_resolution_clock::now();
155 | 		printf("CoRM Time: %lu\n", std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count() );
156 | 	}
157 | 
158 | 	{
159 | 		auto t1 = std::chrono::high_resolution_clock::now();
160 | 		for(uint32_t i  = 0; i< maxi; i++){
161 | 			uint64_t alligned_addr = accesses[i];
162 | 			uint32_t lim_size = user_size;
163 | 			ReaderWriter::client_read_object_farm((uint64_t)dest, (uint64_t)alligned_addr, 
164 | 		    (uint64_t)alligned_addr, slot_size, &lim_size);
165 | 		}
166 | 		auto t2 = std::chrono::high_resolution_clock::now();
167 | 		printf("Farm Time: %lu\n", std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count() );
168 | 	}
169 | 
170 | 	{
171 | 		auto t1 = std::chrono::high_resolution_clock::now();
172 | 		for(uint32_t i  = 0; i< maxi; i++){
173 | 			uint64_t alligned_addr = accesses[i];
174 | 			uint32_t lim_size = user_size;
175 | 			ReaderWriter::client_read_fast((uint64_t)dest, (uint64_t)alligned_addr, slot_size, &lim_size);
176 | 		}
177 | 		auto t2 = std::chrono::high_resolution_clock::now();
178 | 		printf("Mesh Time: %lu\n", std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count() );
179 | 	}
180 | 
181 |     return 0;
182 | }
183 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code for launching CoRM
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono>
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | #include <set>
 22 | 
 23 | FILE *log_fp;
 24 | 
 25 | #include "alloc/block_alloc.hpp"
 26 | #include <random>
 27 | 
 28 | #include <alloc/size_table.hpp>
 29 | #include "alloc/thread_alloc.hpp"
 30 | #include "worker/worker.hpp"
 31 | #include "rdma/rdma_memory_manager.hpp"
 32 |  
 33 | 
 34 | 
 35 | #include "utilities/cxxopts.hpp"
 36 |  
 37 | 
 38 | 
 39 | cxxopts::ParseResult
 40 | parse(int argc, char* argv[])
 41 | {
 42 |   cxxopts::Options options(argv[0], "Launch CoRM server");
 43 |   options
 44 |     .positional_help("[optional args]")
 45 |     .show_positional_help();
 46 | 
 47 |   try
 48 |   {
 49 | 
 50 |     options.add_options()
 51 |       ("a,server", "server address. I use port 9999", cxxopts::value<std::string>(), "IP")
 52 |       ("threads", "Total threads CoRM has", cxxopts::value<uint32_t>()->default_value(std::to_string(1)), "N")
 53 |       ("thclass", "threshold class", cxxopts::value<uint32_t>()->default_value(std::to_string(50)), "N")
 54 |       ("thpopularity", "threshold popularity", cxxopts::value<uint32_t>()->default_value(std::to_string(100)), "N")
 55 |       ("preallocate", "preallocate superblocks", cxxopts::value<uint32_t>()->default_value(std::to_string(1)), "N")
 56 |       ("num_recv_buf", "Number of receive buffers per thread", cxxopts::value<uint32_t>()->default_value(std::to_string(256)), "N")
 57 |       ("recv_buf_size", "The size of each receive buffer", cxxopts::value<uint32_t>()->default_value(std::to_string(2048)), "N")
 58 |       ("send_buf_size", "The total size of send buffer per thread", cxxopts::value<uint32_t>()->default_value(std::to_string(1024*16)), "N")
 59 |       ("log_file", "output file", cxxopts::value<std::string>(), "file")
 60 |       ("odp", "enable ODP with prefetch if supported")
 61 |       ("help", "Print help")
 62 |      ;
 63 |  
 64 |     auto result = options.parse(argc, argv);
 65 | 
 66 |     if (result.count("help"))
 67 |     {
 68 |       std::cout << options.help({""}) << std::endl;
 69 |       exit(0);
 70 |     }
 71 | 
 72 |     if (!result.count("server"))
 73 |     {
 74 |       throw cxxopts::OptionException("input must be specified");
 75 |     }
 76 |  
 77 |     return result;
 78 | 
 79 |   } catch (const cxxopts::OptionException& e)
 80 |   {
 81 |     std::cout << "error parsing options: " << e.what() << std::endl;
 82 |     std::cout << options.help({""}) << std::endl;
 83 |     exit(1);
 84 |   }
 85 | }
 86 | 
 87 | 
 88 | int main(int argc, char* argv[])
 89 | {
 90 | 
 91 |     auto allparams = parse(argc,argv);
 92 |     if(allparams.count("log_file")){
 93 |       std::string name = allparams["log_file"].as<std::string>();
 94 |       log_fp = fopen(name.c_str(), "w+");
 95 |       if (log_fp==NULL) {
 96 |         printf("Cannot open log file\n");
 97 |         exit(1);
 98 |       }
 99 |     } else {
100 |       log_fp = stdout;
101 |     }
102 | 
103 |     std::string ip = allparams["server"].as<std::string>();
104 |     uint32_t threads = allparams["threads"].as<uint32_t>();
105 |  
106 |     uint32_t total_thread_num = threads;
107 |     LauncherMaster *m = new LauncherMaster(total_thread_num);  // 10 threads
108 | 
109 |     ServerRDMA *server = new ServerRDMA((char *)ip.c_str(), 9999);
110 |     struct ibv_pd *pd = server->create_pd();
111 | 
112 |     SizeTable::getInstance().PrintTable();
113 | 
114 |     uint32_t threshold_popular_class = allparams["thpopularity"].as<uint32_t>(); // popular classes are allocated by local thread. The class is popular once it has 100+ requests.
115 |     uint32_t threshold_size_class = allparams["thclass"].as<uint32_t>(); // smaller size classes are allocated by local thread
116 | 
117 |     AllocAdapter::init(threshold_popular_class, threshold_size_class);
118 | 
119 |     uint32_t preallocate = allparams["preallocate"].as<uint32_t>(); // how many super blocks preallocate
120 | 
121 |     BlockAllocImpl *balloc = new BlockAllocImpl(0, preallocate);
122 |     ibv_memory_manager *ibv = new ibv_memory_manager(pd, allparams.count("odp") );
123 | 
124 |     uint32_t recv_buffers_num = allparams["num_recv_buf"].as<uint32_t>(); 
125 |     uint32_t recv_buffer_size = allparams["recv_buf_size"].as<uint32_t>(); 
126 |     uint32_t send_buffer_size = allparams["send_buf_size"].as<uint32_t>(); 
127 | 
128 |     for(uint32_t i = 0; i < total_thread_num; i++)
129 |     {
130 |         ThreadAlloc *alloc = new ThreadAllocImpl(i, balloc, ibv);
131 |         AllocAdapter::getInstance().RegThread(alloc, i);
132 |         Worker *w = new Worker(i, alloc, pd, recv_buffers_num, recv_buffer_size, send_buffer_size );
133 |         if(i == 0)
134 |         {
135 |             w->set_rdma_server(server);
136 |         }
137 |         m->add_worker(w);
138 |     }
139 | 
140 |     m->launch();
141 | 
142 |     delete m;
143 | 
144 |     if(allparams.count("log_file")){
145 |       fclose(log_fp);
146 |     }
147 | 
148 |     return 0;
149 | }
150 | 


--------------------------------------------------------------------------------
/paper/corm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spcl/CoRM/2bcae859eafad28ba51a92ec73e57239febef147/paper/corm.pdf


--------------------------------------------------------------------------------
/rdma/connectRDMA.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple/naive code to connect 2 endpoints with rdma using RDMA-CM.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | 
 13 | #pragma once
 14 | #include <fcntl.h>
 15 | #include <unistd.h>
 16 | #include <string.h>
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | #include <inttypes.h>
 20 | #include <rdma/rdma_cma.h>
 21 | #include <rdma/rdma_verbs.h>
 22 | #include <infiniband/verbs.h>
 23 | #include "verbsEP.hpp"
 24 | 
 25 | 
 26 | 
 27 | struct ibv_device *ctx_find_dev(const char *ib_devname) {
 28 | 	int num_of_device;
 29 | 	struct ibv_device **dev_list;
 30 | 	struct ibv_device *ib_dev = NULL;
 31 | 
 32 | 	dev_list = ibv_get_device_list(&num_of_device);
 33 | 
 34 | 	if (num_of_device <= 0) {
 35 | 	  fprintf(stderr, " Did not detect devices \n");
 36 | 	  fprintf(stderr, " If device exists, check if driver is up\n");
 37 | 	  return NULL;
 38 | 	}
 39 | 
 40 | 	if (!ib_devname) {
 41 | 	  ib_dev = dev_list[0];
 42 | 	  if (!ib_dev) {
 43 | 	    fprintf(stderr, "No IB devices found\n");
 44 | 	    exit(1);
 45 | 	  }
 46 | 	} else {
 47 | 	  for (; (ib_dev = *dev_list); ++dev_list)
 48 | 	    if (!strcmp(ibv_get_device_name(ib_dev), ib_devname)) break;
 49 | 	  if (!ib_dev) fprintf(stderr, "IB device %s not found\n", ib_devname);
 50 | 	}
 51 | 	return ib_dev;
 52 | }
 53 | 
 54 | 
 55 | class ServerRDMA{
 56 | 
 57 | 	struct rdma_event_channel *cm_channel;
 58 | 	struct rdma_cm_id *listen_id = NULL;
 59 | 	//struct ibv_context *ctx;
 60 | 
 61 | public:
 62 | 	ServerRDMA(char* ip, int port){
 63 | 		int ret;
 64 | 		struct rdma_addrinfo hints;
 65 | 		struct rdma_addrinfo *addrinfo;
 66 | 	
 67 | 	/*	struct ibv_device    *ib_dev = NULL;
 68 | 		ib_dev = ctx_find_dev(devname);
 69 | 		ctx = ibv_open_device(ib_dev);*/
 70 | 
 71 | 		memset(&hints, 0, sizeof hints);
 72 | 		hints.ai_flags = RAI_PASSIVE;
 73 | 		hints.ai_port_space = RDMA_PS_TCP;
 74 |        
 75 |         char strport[80];
 76 |         sprintf(strport, "%d", port);
 77 | 
 78 | 		ret = rdma_getaddrinfo(ip, strport, &hints, &addrinfo);
 79 |         if (ret) {
 80 |             perror("rdma_getaddrinfo\n");
 81 |             exit(1);
 82 |         } 
 83 | /*
 84 |         this->cm_channel = rdma_create_event_channel();
 85 | 
 86 | 	    if (this->cm_channel == NULL) {
 87 | 	      perror(" rdma_create_event_channel failed\n");
 88 | 	      exit(1);
 89 | 	    }
 90 | 		 ret = rdma_create_id(this->cm_channel, &listen_id, NULL, RDMA_PS_TCP);
 91 | 	      if (ret) {
 92 | 	        perror("Failed to create RDMA CM server control ID.");
 93 | 	        exit(1);
 94 | 	      }
 95 | 
 96 | 	      ret = rdma_bind_addr(listen_id, addrinfo->ai_src_addr);
 97 | 	      if (ret) {
 98 | 	        perror("Failed to bind RDMA CM address on the server.");
 99 | 	        exit(1);
100 | 	      }
101 | */
102 | 
103 |         ret = rdma_create_ep(&listen_id, addrinfo, NULL, NULL);
104 |         if (ret) {
105 |             perror("rdma_create_ep\n");
106 |             exit(1);
107 |         }
108 | 
109 |         rdma_freeaddrinfo(addrinfo);
110 | 
111 |         ret = rdma_listen(listen_id, 2);
112 |         if (ret) {
113 |            perror("rdma_listen");
114 |            exit(1);
115 |         }
116 | 
117 | 	}
118 | 
119 |     int get_listen_fd() 
120 |     {    
121 |     
122 |         assert(this->listen_id->channel!=NULL);
123 |         int options = fcntl(this->listen_id->channel->fd, F_GETFL, 0);
124 | 
125 |         if (fcntl(this->listen_id->channel->fd, F_SETFL, options | O_NONBLOCK)) {
126 |               perror("[RDMA_COM] cannot set server_client to non-blocking mode");
127 |               exit(1);
128 |               return 0;
129 |         }
130 |  
131 |         return this->listen_id->channel->fd;
132 |     }
133 | 
134 | 	struct ibv_pd * create_pd(){
135 | 		return ibv_alloc_pd(listen_id->verbs);
136 | 	}
137 | 
138 | 
139 | 	static struct ibv_srq* create_srq(struct ibv_pd * pd, uint32_t max_wr, uint32_t max_sge=1){
140 | 
141 | 		struct ibv_srq_init_attr attr;
142 | 		memset(&attr, 0, sizeof attr);
143 | 		attr.attr.max_wr = max_wr;
144 | 		attr.attr.max_sge = max_sge;
145 | 		return ibv_create_srq(pd, &attr);
146 | 	}
147 | 
148 |     struct ibv_cq *create_cq(uint32_t max_wr, struct ibv_comp_channel *channel = NULL){
149 | 		return  ibv_create_cq(listen_id->verbs, max_wr, NULL,channel, 0);
150 |  	}
151 | 
152 |  	
153 |  	struct rdma_cm_id * getConnectRequest(){
154 |  		int ret;
155 | 		struct rdma_cm_id *id;
156 | 
157 | 		ret = rdma_get_request(this->listen_id, &id);
158 |         if (ret) {
159 |             perror("rdma_get_request");
160 |             exit(1);
161 |         }
162 |         return id;
163 |  	}
164 | 
165 | 
166 | 	static VerbsEP* acceptEP(struct rdma_cm_id *id, struct ibv_qp_init_attr *attr, struct rdma_conn_param *conn_param, struct ibv_pd* pd = NULL){
167 | 		int ret;
168 | 		attr->qp_type = IBV_QPT_RC;
169 |         
170 | 		ret = rdma_create_qp(id, pd, attr);
171 | 		if (ret) {
172 |             perror("rdma_create_qp");
173 |             exit(1);
174 |         }
175 |         
176 | 		ret = rdma_accept(id, conn_param);
177 |         if (ret) {
178 |             perror("rdma_accept");
179 |             exit(1);
180 |         }
181 | 
182 |         return new VerbsEP(id, attr->cap.max_inline_data, attr->cap.max_send_wr, attr->cap.max_recv_wr ); 
183 | 	}
184 | 
185 | };
186 | 
187 | 
188 | 
189 | class ClientRDMA{
190 | 
191 | 	struct rdma_addrinfo *addrinfo;
192 | 
193 | public:
194 | 	ClientRDMA(char* ip, int port){
195 | 		int ret;
196 | 		struct rdma_addrinfo hints;
197 | 
198 | 		memset(&hints, 0, sizeof hints);
199 | 		hints.ai_port_space = RDMA_PS_TCP;
200 |        
201 |         char strport[80];
202 |         sprintf(strport, "%d", port);
203 | 
204 | 		ret = rdma_getaddrinfo(ip, strport, &hints, &addrinfo);
205 |         if (ret) {
206 |             perror("rdma_getaddrinfo\n");
207 |             exit(1);
208 |         } 
209 |  
210 | 	}
211 | 
212 | 	~ClientRDMA(){
213 | 		rdma_freeaddrinfo(addrinfo);
214 | 	}
215 | 
216 | 	struct rdma_cm_id * sendConnectRequest(){
217 | 		int ret;
218 | 		struct rdma_cm_id *id;
219 | 
220 | 		ret = rdma_create_ep(&id, this->addrinfo, NULL, NULL); 
221 |         if (ret) {
222 |             perror("rdma_create_ep");
223 |             exit(1);
224 |         }
225 |         return id;
226 | 	}
227 | 
228 | 	static struct ibv_pd * create_pd(struct rdma_cm_id *id){
229 | 		return ibv_alloc_pd(id->verbs);
230 | 	}
231 | 
232 | 
233 | 	static struct ibv_srq* create_srq(struct ibv_pd * pd, uint32_t max_wr, uint32_t max_sge=1){
234 | 		struct ibv_srq_init_attr attr;
235 | 		memset(&attr, 0, sizeof attr);
236 | 		attr.attr.max_wr = max_wr;
237 | 		attr.attr.max_sge = max_sge;
238 | 		return ibv_create_srq(pd, &attr);
239 | 	}
240 | 
241 |     static struct ibv_cq *create_cq(struct rdma_cm_id * id, uint32_t max_wr, struct ibv_comp_channel *channel = NULL){
242 |      	return  ibv_create_cq(id->verbs, max_wr, NULL, channel, 0);
243 |  	}
244 | 
245 | 	static VerbsEP* connectEP(struct rdma_cm_id * id, struct ibv_qp_init_attr *attr, struct rdma_conn_param *conn_param, struct ibv_pd* pd = NULL){
246 |  		int ret;
247 | 		attr->qp_type = IBV_QPT_RC;
248 | 		ret = rdma_create_qp(id, pd, attr);
249 | 		if (ret) {
250 |             perror("rdma_create_qp");
251 |             exit(1);
252 |         }
253 |         
254 | 		ret = rdma_connect(id, conn_param);
255 |         if (ret) {
256 |             perror("rdma_accept");
257 |             exit(1);
258 |         }
259 |  
260 |         return new VerbsEP(id, attr->cap.max_inline_data, attr->cap.max_send_wr, attr->cap.max_recv_wr ); 
261 | 	}
262 | 
263 | };


--------------------------------------------------------------------------------
/rdma/rdma_helpers.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple manager of fixed-size regions used for send-receive communication.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include <infiniband/verbs.h>
 15 | #include <set>
 16 | 
 17 | // it allocates buffers of fixed size
 18 | struct ReceiveBuffers{
 19 | 
 20 |     uint32_t const num_of_buffers;
 21 |     uint64_t const buffer_size;
 22 |     
 23 |     char *buffer;
 24 |     size_t const total_size;
 25 |     uint32_t lkey;
 26 |     struct ibv_mr * mr;
 27 | 
 28 |     ReceiveBuffers(uint32_t num_of_buffers, uint32_t buffer_size, struct ibv_pd *pd):
 29 |     num_of_buffers(num_of_buffers), buffer_size(buffer_size), total_size(num_of_buffers*buffer_size)
 30 |     {
 31 |         this->buffer = (char*)aligned_alloc(64, total_size);
 32 |         assert(buffer != NULL && "error memory allocation");
 33 | 
 34 |         this->mr = ibv_reg_mr(pd, buffer, total_size, IBV_ACCESS_LOCAL_WRITE);
 35 |         this->lkey = mr->lkey; 
 36 |     }
 37 | 
 38 |     char* get_buffer(uint32_t i) const {
 39 |         return buffer + i*buffer_size;
 40 |     }
 41 | 
 42 |     uint32_t get_lkey() const {
 43 |         return lkey;
 44 |     }
 45 | 
 46 |     uint32_t get_buffer_length() const {
 47 |         return buffer_size;
 48 |     }
 49 |     
 50 |     ~ReceiveBuffers(){
 51 |         if(mr)
 52 |             ibv_dereg_mr(mr);
 53 |         if(buffer)
 54 |             free(buffer);
 55 |     }
 56 | 
 57 | };
 58 | 
 59 | // it allocates buffers of any size.
 60 | // but it assumes short lifetime of objects.
 61 | struct SendBuffers{
 62 |     char *buffer;
 63 |     size_t const total_size;
 64 |     uint32_t lkey;
 65 |     struct ibv_mr * mr;
 66 | 
 67 |     uint32_t current_offset;
 68 |     uint32_t first_notfree;
 69 |   
 70 |     std::set<uint32_t> allocated_offsets;
 71 |  
 72 |     const uint32_t Allignment_bits = 6;
 73 |     const uint32_t Allignment = 1<<Allignment_bits;
 74 |     const uint32_t Allignment_mask = mask_of_bits(Allignment_bits);
 75 | 
 76 |     SendBuffers(size_t total_size, struct ibv_pd *pd):
 77 |     total_size(total_size), current_offset(0), first_notfree(total_size)
 78 |     {
 79 |         this->buffer = (char*)aligned_alloc(Allignment, total_size);
 80 |         assert(buffer != NULL && "error memory allocation");
 81 | 
 82 |         this->mr = ibv_reg_mr(pd, buffer, total_size, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
 83 |         if(mr==NULL){
 84 |             printf("Error reg mr\n");
 85 |         }
 86 |         this->lkey = mr->lkey; 
 87 |     }
 88 | 
 89 |     uint32_t get_lkey() const {
 90 |         return lkey;
 91 |     }
 92 | 
 93 |     char* Alloc(uint32_t size) {
 94 | 
 95 |         uint32_t to_allocate = (size + Allignment - 1) & ~Allignment_mask;
 96 | 
 97 |         if(current_offset+to_allocate >= total_size){
 98 |         // wrap around
 99 |             current_offset = 0;
100 |             if(allocated_offsets.empty()){
101 |                 first_notfree = total_size;
102 |             }else{
103 |                 first_notfree = *(allocated_offsets.begin());
104 |             }
105 |         }
106 | 
107 |         if(first_notfree-current_offset < to_allocate){
108 |             // don't have memory
109 |             printf("return don't have memory %u, total mem%lu\n",size,  total_size);
110 |             return NULL;
111 |         }
112 | 
113 |         size_t return_offset = current_offset;
114 |         current_offset+=to_allocate;
115 |         allocated_offsets.insert(return_offset);
116 |         return buffer + return_offset;
117 |     }
118 | 
119 |     void Free(char* buf) {
120 |         uint32_t offset = (uint32_t)(buf - buffer);
121 |         auto it = allocated_offsets.find(offset);
122 |         assert(it != allocated_offsets.end() && "address does not exist");
123 |         if(first_notfree == offset){
124 |             auto next = std::next(it);
125 |             if(next == allocated_offsets.end()){
126 |                 first_notfree = total_size;
127 |             }else{
128 |                 first_notfree = *next;
129 |             }
130 |         }
131 |         allocated_offsets.erase(it);
132 |     }
133 | 
134 |     ~SendBuffers(){
135 |         if(mr)
136 |             ibv_dereg_mr(mr);
137 |         if(buffer)
138 |             free(buffer);       
139 |     }
140 | };
141 | 


--------------------------------------------------------------------------------
/rdma/rdma_memory_manager.hpp:
--------------------------------------------------------------------------------
 1 | /**                                                                                                      
 2 |  * CoRM: Compactable Remote Memory over RDMA
 3 |  * 
 4 |  * A simple manager of block registration. It also used to support experimental API, but I removed it as it is depricated by MOFED.
 5 |  *
 6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 9 |  * 
10 |  */
11 | 
12 | #pragma once 
13 | 
14 | #include <infiniband/verbs.h>
15 | 
16 | /*
17 | #ifdef HAVE_ODP_MR_PREFETCH 
18 | #warning "Prefecth is loaded"
19 | #include <infiniband/verbs_exp.h> 
20 | #else
21 | #warning "Prefecth is not supported"
22 | #endif
23 | */
24 | 
25 | struct ibv_memory_manager{
26 | 
27 | 	struct ibv_pd * const pd;
28 |     const bool _withODP;
29 | 
30 | 	ibv_memory_manager(struct ibv_pd *pd, bool with_odp): pd(pd), _withODP(with_odp) {
31 | 		// empty
32 | 	}	
33 | 
34 |     struct ibv_mr * mem_reg_odp(void *addr,uint32_t size){
35 |         return ibv_reg_mr(pd,addr,size,IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE \
36 |             | IBV_ACCESS_REMOTE_READ | (IBV_ACCESS_ON_DEMAND * (int)this->_withODP ));
37 |     }
38 | 
39 |     void mem_rereg(struct ibv_mr * mr){
40 |         if( !_withODP ){
41 |          int ret = ibv_rereg_mr(mr,IBV_REREG_MR_CHANGE_TRANSLATION, pd, mr->addr,mr->length,0);
42 |          assert(ret==0 && "ibv_rereg_mr failed");
43 |            return;
44 |         }  
45 |         
46 |         struct ibv_sge sge; 
47 |         sge.addr = (uint64_t)mr->addr;
48 |         sge.length = mr->length;
49 |         sge.lkey = mr->lkey;
50 | 
51 |         int ret = ibv_advise_mr(pd, IBV_ADVISE_MR_ADVICE_PREFETCH,
52 |                         IBV_ADVISE_MR_FLAG_FLUSH,
53 |                         &sge, 1);
54 |  
55 |         assert(ret==0 && "ibv_advise_mr failed");
56 | 
57 | #if 0
58 |       struct ibv_exp_prefetch_attr prefetch_attr;
59 |       prefetch_attr.flags = IBV_EXP_PREFETCH_WRITE_ACCESS;
60 |       prefetch_attr.addr = (uint64_t)mr->addr;
61 |       prefetch_attr.length = mr->length;
62 |       prefetch_attr.comp_mask = 0;
63 |       int ret =ibv_exp_prefetch_mr(mr, &prefetch_attr);          
64 | #endif
65 | 
66 | 
67 |     }
68 | 
69 | 
70 |     void mem_dereg(struct ibv_mr * mr){
71 |     	int ret = ibv_dereg_mr(mr);
72 |     	assert(ret==0 && "ibv_dereg_mr failed");
73 |         return;
74 |     }
75 | };


--------------------------------------------------------------------------------
/rdma/verbsEP.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple class for managing an endpoint. 
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #pragma once
 13 | #include <fcntl.h>
 14 | #include <unistd.h>
 15 | #include <string.h>
 16 | #include <stdio.h>
 17 | #include <stdlib.h>
 18 | #include <inttypes.h>
 19 | #include <infiniband/verbs.h>
 20 | 
 21 |  
 22 | class VerbsEP{
 23 |   struct rdma_cm_id * const id;
 24 | public:
 25 |   struct ibv_qp * const qp;
 26 |   struct ibv_pd * const pd;
 27 |   const uint32_t max_inline_data;
 28 |   const uint32_t max_send_size;
 29 |   const uint32_t max_recv_size;
 30 | 
 31 |   VerbsEP(struct rdma_cm_id *id, uint32_t max_inline_data, uint32_t max_send_size, uint32_t max_recv_size): 
 32 |           id(id), qp(id->qp), pd(qp->pd), max_inline_data(0), max_send_size(max_send_size), max_recv_size(max_recv_size)
 33 |   {
 34 |       // empty
 35 |   }
 36 | 
 37 |   ~VerbsEP(){
 38 |     // empty
 39 |   }
 40 | 
 41 |   int get_event_fd() 
 42 |   {    
 43 |       assert(this->id->channel!=NULL);
 44 |       int options = fcntl(this->id->channel->fd, F_GETFL, 0);
 45 | 
 46 |       if (fcntl(this->id->channel->fd, F_SETFL, options | O_NONBLOCK)) {
 47 |             perror("[RDMA_COM] cannot set server_client to non-blocking mode");
 48 |             exit(1);
 49 |             return 0;
 50 |       }
 51 | 
 52 |       return this->id->channel->fd;
 53 |   }
 54 | 
 55 |   enum rdma_cm_event_type get_event(){
 56 |       int ret;
 57 |       struct rdma_cm_event *event;
 58 |       
 59 |       ret = rdma_get_cm_event(id->channel, &event);
 60 |       if (ret) {
 61 |           perror("rdma_get_cm_event");
 62 |           exit(ret);
 63 |       }
 64 |       enum rdma_cm_event_type out = event->event;
 65 |      /* switch (event->event){
 66 |           case RDMA_CM_EVENT_ADDR_ERROR:
 67 |           case RDMA_CM_EVENT_ROUTE_ERROR:
 68 |           case RDMA_CM_EVENT_CONNECT_ERROR:
 69 |           case RDMA_CM_EVENT_UNREACHABLE:
 70 |           case RDMA_CM_EVENT_REJECTED:
 71 |    
 72 |                text(log_fp,"[rdma_get_cm_event] Error %u \n",event->event);
 73 |               break;
 74 | 
 75 |           case RDMA_CM_EVENT_DISCONNECTED:
 76 |               text(log_fp,"[rdma_get_cm_event] Disconnect %u \n",event->event);
 77 |               break;
 78 | 
 79 |           case RDMA_CM_EVENT_DEVICE_REMOVAL:
 80 |               text(log_fp,"[rdma_get_cm_event] Removal %u \n",event->event);
 81 |               break;
 82 |           default:
 83 |               text(log_fp,"[rdma_get_cm_event] %u \n",event->event);
 84 | 
 85 |       }*/
 86 |       rdma_ack_cm_event(event);
 87 |       return out;
 88 |   }    
 89 | 
 90 |   uint32_t get_qp_num() const{
 91 |     return qp->qp_num;
 92 |   }
 93 |  
 94 |   struct ibv_mr * reg_mem(void *buf, uint32_t size){
 95 |       return ibv_reg_mr(this->pd, buf, size, IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ);
 96 |   }
 97 | 
 98 |   struct ibv_mr * reg_mem_with_atomic(void *buf, uint32_t size){
 99 |     return ibv_reg_mr(this->pd, buf, size, IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_ATOMIC);
100 |   }  
101 | 
102 |   void dereg_mem(struct ibv_mr * mr){
103 |       ibv_dereg_mr(mr);
104 |   }
105 |  
106 |   inline int poll_send_completion(struct ibv_wc* wc, int num = 1){
107 |       return ibv_poll_cq(this->qp->send_cq, num, wc);
108 |   }
109 | 
110 |   inline int poll_recv_completion(struct ibv_wc* wc, int num = 1){
111 |       return ibv_poll_cq(this->qp->recv_cq, num, wc);
112 |   }
113 | 
114 |   static inline int post_srq_recv(struct ibv_srq *srq, uint64_t wr_id, uint64_t local_addr=0ULL, uint32_t lkey=0, uint32_t length=0){
115 |     struct ibv_sge sge;
116 | 
117 |     sge.addr = local_addr;
118 |     sge.length = length;
119 |     sge.lkey = lkey;
120 | 
121 |     struct ibv_recv_wr wr, *bad;
122 | 
123 |     wr.wr_id = wr_id;
124 |     wr.next = NULL;
125 |     wr.sg_list = &sge;
126 |     wr.num_sge = 1;
127 | 
128 |     return ibv_post_srq_recv(srq,&wr, &bad);
129 |   }
130 | 
131 |   inline int post_recv(uint64_t wr_id, struct ibv_mr * mr){
132 |       return post_recv(wr_id, (uint64_t)mr->addr, mr->lkey,  mr->length);
133 |   }
134 | 
135 |   inline int post_recv(uint64_t wr_id, uint64_t local_addr=0ULL, uint32_t lkey=0, uint32_t length=0){
136 |     struct ibv_sge sge;
137 | 
138 |     sge.addr = local_addr;
139 |     sge.length = length;
140 |     sge.lkey = lkey;
141 | 
142 |     struct ibv_recv_wr wr, *bad;
143 | 
144 |     wr.wr_id = wr_id;
145 |     wr.next = NULL;
146 |     wr.sg_list = &sge;
147 |     wr.num_sge = 1;
148 | 
149 |     return ibv_post_recv(qp, &wr, &bad);
150 |   }
151 | 
152 |   inline int post_recv(struct ibv_recv_wr * wr){
153 |     struct ibv_recv_wr *bad;
154 |     return ibv_post_recv(qp, wr, &bad);
155 |   }
156 | 
157 |   inline int post_shared_recv(uint64_t wr_id, struct ibv_mr * mr){
158 |       return post_shared_recv(wr_id, (uint64_t)mr->addr, mr->lkey,  mr->length);
159 |   }
160 | 
161 |   inline int post_shared_recv(uint64_t wr_id, uint64_t local_addr=0ULL, uint32_t lkey=0, uint32_t length=0){
162 |     struct ibv_sge sge;
163 | 
164 |     sge.addr = local_addr;
165 |     sge.length = length;
166 |     sge.lkey = lkey;
167 | 
168 |     struct ibv_recv_wr wr, *bad;
169 | 
170 |     wr.wr_id = wr_id;
171 |     wr.next = NULL;
172 |     wr.sg_list = &sge;
173 |     wr.num_sge = 1;
174 | 
175 |     return ibv_post_srq_recv(qp->srq, &wr, &bad);
176 |   }
177 | 
178 |   inline int send_signaled(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint32_t length){
179 |     unsigned int send_flags = IBV_SEND_SIGNALED;
180 | 
181 |     if(length!=0 && length<=max_inline_data){
182 |         send_flags |= IBV_SEND_INLINE;
183 |     }
184 | 
185 |     return two_sided( IBV_WR_SEND, send_flags, wr_id, 0,local_addr, lkey, length);
186 |   }
187 | 
188 |   inline int send(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint32_t length){
189 |     unsigned int send_flags = 0;
190 | 
191 |     if(length!=0 && length<=max_inline_data){
192 |         send_flags |= IBV_SEND_INLINE;
193 |     }
194 |     return two_sided( IBV_WR_SEND, send_flags, wr_id, 0,local_addr, lkey, length);
195 |   }
196 | 
197 | 
198 |   inline int send_with_imm_signaled(uint64_t wr_id, uint32_t imm_data, uint64_t local_addr, uint32_t lkey, uint32_t length){
199 |     unsigned int send_flags = IBV_SEND_SIGNALED;
200 | 
201 |     if(length!=0 && length<=max_inline_data){
202 |         send_flags |= IBV_SEND_INLINE;
203 |     }
204 | 
205 |     return two_sided( IBV_WR_SEND_WITH_IMM, send_flags, wr_id, imm_data,local_addr, lkey, length);
206 |   }
207 | 
208 |   inline int send_with_imm(uint64_t wr_id, uint32_t imm_data, uint64_t local_addr, uint32_t lkey, uint32_t length){
209 |     unsigned int send_flags = 0;
210 | 
211 |     if(length!=0 && length<=max_inline_data){
212 |         send_flags |= IBV_SEND_INLINE;
213 |     }
214 |     return two_sided( IBV_WR_SEND_WITH_IMM, send_flags, wr_id, imm_data,local_addr, lkey, length);
215 |   }
216 | 
217 |   inline int write_signaled(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length){
218 | 
219 |     unsigned int send_flags = IBV_SEND_SIGNALED;
220 | 
221 |     if(length!=0 && length<=max_inline_data){
222 |         send_flags |= IBV_SEND_INLINE;
223 |     }
224 |     return one_sided(IBV_WR_RDMA_WRITE,send_flags,wr_id,0,local_addr,lkey,remote_addr,rkey,length);
225 |   }
226 | 
227 | 
228 |   inline int write(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length){
229 | 
230 |     unsigned int send_flags = 0;
231 | 
232 |     if(length!=0 && length<=max_inline_data){
233 |         send_flags |= IBV_SEND_INLINE;
234 |     }
235 |     return one_sided(IBV_WR_RDMA_WRITE,send_flags,wr_id,0,local_addr,lkey,remote_addr,rkey,length);
236 |   }
237 | 
238 |   inline int write_send_signaled(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length, uint32_t payload){
239 |       struct ibv_sge sge[2];
240 | 
241 |   
242 |       sge[0].addr = local_addr;
243 |       sge[0].length = length;
244 |       sge[0].lkey = lkey;
245 |       struct ibv_send_wr wr[2], *bad;
246 | 
247 |       wr[0].wr_id = wr_id;
248 |       wr[0].next = &wr[1];
249 |       wr[0].sg_list = &sge[0];
250 |       wr[0].num_sge = 1;
251 |       wr[0].opcode = IBV_WR_RDMA_WRITE;
252 | 
253 |       wr[0].send_flags = (length<=max_inline_data ? IBV_SEND_INLINE : 0);   
254 | 
255 |       wr[0].wr.rdma.remote_addr = remote_addr;
256 |       wr[0].wr.rdma.rkey        = rkey;
257 | 
258 |       sge[1].addr = local_addr;
259 |       sge[1].length = payload;
260 |       sge[1].lkey = lkey;
261 | 
262 |       wr[1].wr_id = wr_id;
263 |       wr[1].next = NULL;
264 |       wr[1].sg_list = &sge[1];
265 |       wr[1].num_sge = 1;
266 |       wr[1].opcode = IBV_WR_SEND;
267 |       wr[1].send_flags = IBV_SEND_SIGNALED | (payload<=max_inline_data ? IBV_SEND_INLINE : 0);   
268 |  
269 |  
270 |       return ibv_post_send(this->qp, wr, &bad);    
271 | 
272 |   }
273 | 
274 | 
275 |     inline int write_write_signaled(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length, uint32_t payload){
276 |       struct ibv_sge sge[2];
277 |  
278 |       sge[0].addr = local_addr;
279 |       sge[0].length = length;
280 |       sge[0].lkey = lkey;
281 |       struct ibv_send_wr wr[2], *bad;
282 | 
283 |       wr[0].wr_id = wr_id;
284 |       wr[0].next = &wr[1];
285 |       wr[0].sg_list = &sge[0];
286 |       wr[0].num_sge = 1;
287 |       wr[0].opcode = IBV_WR_RDMA_WRITE;
288 | 
289 |       wr[0].send_flags = (length<=max_inline_data ? IBV_SEND_INLINE : 0);   ;   
290 | 
291 |       wr[0].wr.rdma.remote_addr = remote_addr;
292 |       wr[0].wr.rdma.rkey        = rkey;
293 | 
294 |       sge[1].addr = local_addr;
295 |       sge[1].length = payload;
296 |       sge[1].lkey = lkey;
297 | 
298 |       wr[1].wr_id = wr_id;
299 |       wr[1].next = NULL;
300 |       wr[1].sg_list = &sge[1];
301 |       wr[1].num_sge = 1;
302 |       wr[1].opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
303 |       wr[1].send_flags = IBV_SEND_SIGNALED | (payload<=max_inline_data ? IBV_SEND_INLINE : 0);  
304 |  
305 |       wr[1].wr.rdma.remote_addr = remote_addr;
306 |       wr[1].wr.rdma.rkey        = rkey; 
307 |       return ibv_post_send(this->qp, wr, &bad);    
308 |   }
309 | 
310 |   inline int send_cas_signaled(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint64_t expected, uint64_t swap ){
311 |  
312 |     struct ibv_sge sge;
313 | 
314 |     sge.addr = local_addr;
315 |     sge.length = 8;
316 |     sge.lkey = lkey;
317 |     struct ibv_send_wr wr, *bad;
318 | 
319 |     wr.wr_id = wr_id;
320 |     wr.next = NULL;
321 |     wr.sg_list = &sge;
322 |     wr.num_sge = 1;
323 |     wr.opcode = IBV_WR_ATOMIC_CMP_AND_SWP;
324 | 
325 |     wr.send_flags = IBV_SEND_SIGNALED ;   //| IBV_SEND_INLINE
326 |   
327 |     wr.wr.atomic.remote_addr = remote_addr;
328 |     wr.wr.atomic.rkey        = rkey;
329 |     wr.wr.atomic.compare_add = expected; /* expected value in remote address */
330 |     wr.wr.atomic.swap        = swap; /* the value that remote address will be assigned to */
331 |  
332 |     return ibv_post_send(this->qp, &wr, &bad);    
333 |  
334 |   }
335 | 
336 |   inline int write_with_imm_signaled(uint64_t wr_id, uint32_t imm_data, 
337 |       uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length){
338 | 
339 |     unsigned int send_flags = IBV_SEND_SIGNALED;
340 | 
341 |     if(length!=0 && length<=max_inline_data){
342 |         send_flags |= IBV_SEND_INLINE;
343 |     }
344 |     return one_sided(IBV_WR_RDMA_WRITE_WITH_IMM,send_flags,wr_id,imm_data,local_addr,lkey,remote_addr,rkey,length);
345 |   }
346 | 
347 | 
348 |   inline int write_with_imm(uint64_t wr_id, uint32_t imm_data, 
349 |       uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length){
350 | 
351 |     unsigned int send_flags = 0;
352 | 
353 |     if(length!=0 && length<=max_inline_data){
354 |         send_flags |= IBV_SEND_INLINE;
355 |     }
356 |     return one_sided(IBV_WR_RDMA_WRITE_WITH_IMM,send_flags,wr_id,imm_data,local_addr,lkey,remote_addr,rkey,length);
357 |   }
358 | 
359 | 
360 |   inline int read_signaled(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, 
361 |                            uint32_t rkey, uint32_t length)
362 |   {
363 |     unsigned int send_flags = IBV_SEND_SIGNALED;
364 |  
365 |     return one_sided(IBV_WR_RDMA_READ,send_flags,wr_id,0,local_addr,lkey,remote_addr,rkey,length);
366 |   }
367 | 
368 |   inline int read(uint64_t wr_id, uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length)
369 |   {
370 |     unsigned int send_flags = 0;
371 | 
372 |     return one_sided(IBV_WR_RDMA_READ,send_flags,wr_id,0,local_addr,lkey,remote_addr,rkey,length);
373 |   }
374 | 
375 | 
376 |   inline int one_sided(enum ibv_wr_opcode opcode, unsigned int send_flags, uint64_t wr_id, uint32_t imm_data, 
377 |     uint64_t local_addr, uint32_t lkey, uint64_t remote_addr, uint32_t rkey, uint32_t length)
378 |   {
379 |       struct ibv_sge sge;
380 | 
381 |       sge.addr = local_addr;
382 |       sge.length = length;
383 |       sge.lkey = lkey;
384 |       struct ibv_send_wr wr, *bad;
385 | 
386 |       wr.wr_id = wr_id;
387 |       wr.next = NULL;
388 |       wr.sg_list = &sge;
389 |       wr.num_sge = 1;
390 |       wr.opcode = opcode;
391 | 
392 |       wr.send_flags = send_flags;   
393 |       wr.imm_data = imm_data;
394 | 
395 | 
396 |       wr.wr.rdma.remote_addr = remote_addr;
397 |       wr.wr.rdma.rkey        = rkey;
398 | 
399 |       return ibv_post_send(this->qp, &wr, &bad);    
400 |   }
401 | 
402 | 
403 |   inline int two_sided(enum ibv_wr_opcode opcode, unsigned int send_flags, uint64_t wr_id, uint32_t imm_data, 
404 |     uint64_t local_addr, uint32_t lkey, uint32_t length)
405 |   {
406 |       struct ibv_sge sge;
407 | 
408 |       sge.addr = local_addr;
409 |       sge.length = length;
410 |       sge.lkey = lkey ;
411 |       struct ibv_send_wr wr, *bad;
412 | 
413 |       wr.wr_id = wr_id;
414 |       wr.next = NULL;
415 |       wr.sg_list = &sge;
416 |       wr.num_sge = 1;
417 |       wr.opcode = opcode;
418 | 
419 |       wr.send_flags = send_flags;  
420 |       wr.imm_data = imm_data; 
421 | 
422 |       return ibv_post_send(this->qp, &wr, &bad);
423 |   }
424 |  
425 |   inline int post_send(struct ibv_send_wr *wr)
426 |   {
427 |       struct ibv_send_wr *bad;
428 |       return ibv_post_send(this->qp, wr, &bad);
429 |   }
430 |  
431 | 
432 | };
433 | 


--------------------------------------------------------------------------------
/remote_read_benchmark.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code to measure Remote Read throughput for Farm/Mesh/Corm
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #include <iostream>       // std::cout
 13 | #include <thread>
 14 | #include <chrono> 
 15 | #include <vector>
 16 | #include <cstdlib>
 17 | #include <string>
 18 | #include <sstream>
 19 | #include <cassert>
 20 | #include <stdlib.h>
 21 | #include <csignal>
 22 | #include <fstream>
 23 | #include <algorithm>
 24 |  
 25 | 
 26 | #include "thread/thread.hpp" 
 27 | FILE *log_fp;
 28 | 
 29 | 
 30 | 
 31 | #include "worker/client_api.hpp"
 32 | #include "rdma/connectRDMA.hpp"
 33 | #include "utilities/zipf.hpp"
 34 | #include "utilities/ycsb.hpp"
 35 | 
 36 | #include "utilities/cxxopts.hpp"
 37 |  
 38 | 
 39 | cxxopts::ParseResult
 40 | parse(int argc, char* argv[])
 41 | {
 42 |   cxxopts::Options options(argv[0], "Remote Read benchmark for Farm/Mesh/Corm");
 43 |   options
 44 |     .positional_help("[optional args]")
 45 |     .show_positional_help();
 46 | 
 47 |   try
 48 |   {
 49 | 
 50 |     options.add_options()
 51 |       ("server", "Another address", cxxopts::value<std::string>(), "IP")
 52 |       ("i,input", "input file", cxxopts::value<std::string>()->default_value("test.bin"), "FILE")
 53 |       ("target", "expected rate ops/sec", cxxopts::value<uint64_t>()->default_value(std::to_string(1000)), "N")
 54 |       ("rpc", "Use rpc reads")
 55 |       ("rdmaread", "Use one-sided reads")
 56 |       ("mesh", "Use mesh reads")
 57 |       ("farm", "Use farm reads")
 58 |       ("n,num", "Number of requests to run", cxxopts::value<uint64_t>()->default_value("123"), "N")
 59 |       ("help", "Print help")
 60 |      ;
 61 |  
 62 |     auto result = options.parse(argc, argv);
 63 | 
 64 |     if (result.count("help"))
 65 |     {
 66 |       std::cout << options.help({""}) << std::endl;
 67 |       exit(0);
 68 |     }
 69 | 
 70 |     if (!result.count("server"))
 71 |     {
 72 |       throw cxxopts::OptionException("input must be specified");
 73 |     }
 74 | 
 75 |  
 76 | 
 77 |     return result;
 78 | 
 79 |   } catch (const cxxopts::OptionException& e)
 80 |   {
 81 |     std::cout << "error parsing options: " << e.what() << std::endl;
 82 |     std::cout << options.help({""}) << std::endl;
 83 |     exit(1);
 84 |   }
 85 | }
 86 |  
 87 | 
 88 | 
 89 | 
 90 |  
 91 |  
 92 | int main(int argc, char* argv[]){
 93 |  
 94 |     int seed = 3;
 95 | 
 96 |     auto allparams = parse(argc,argv);
 97 |  
 98 |     log_fp=stdout;  
 99 | 
100 |     std::string server = allparams["server"].as<std::string>();
101 |     std::string input = allparams["input"].as<std::string>();
102 |     uint64_t target = allparams["target"].as<uint64_t>();
103 |     uint64_t num = allparams["num"].as<uint64_t>();
104 |  
105 |     ClientRDMA rdma((char*)server.c_str(),9999);
106 |     struct rdma_cm_id * id = rdma.sendConnectRequest();
107 | 
108 |     struct ibv_pd * pd = ClientRDMA::create_pd(id);
109 | 
110 |     struct ibv_qp_init_attr attr;
111 |     struct rdma_conn_param conn_param;
112 |     memset(&attr, 0, sizeof(attr));
113 |     attr.cap.max_send_wr = 32;
114 |     attr.cap.max_recv_wr = 32;
115 |     attr.cap.max_send_sge = 1;
116 |     attr.cap.max_recv_sge = 1;
117 |     attr.cap.max_inline_data = 0;
118 |     attr.qp_type = IBV_QPT_RC;
119 | 
120 |     memset(&conn_param, 0 , sizeof(conn_param));
121 |     conn_param.responder_resources = 0;
122 |     conn_param.initiator_depth = 5;
123 |     conn_param.retry_count = 3;
124 |     conn_param.rnr_retry_count = 3; 
125 |     
126 |     VerbsEP* ep = ClientRDMA::connectEP(id, &attr, &conn_param, pd);
127 |  
128 |     printf("Connected\n");
129 |     sleep(1);
130 | 
131 |     RemoteMemoryClient* api = new RemoteMemoryClient(0,ep); 
132 |     
133 |     std::fstream fout;
134 |     fout.open(input.c_str(), std::ios::in|std::ios::binary);
135 |     uint32_t NN = 0;
136 | 
137 |     fout.read((char*)&NN,sizeof(NN));
138 |  
139 |     std::vector<LocalObjectHandler*> objects(NN);
140 | 
141 |     for(uint32_t i = 0; i < NN; i++){
142 |         LocalObjectHandler* obj = (LocalObjectHandler*)malloc(sizeof(LocalObjectHandler));
143 |         fout.read((char*)obj,sizeof(LocalObjectHandler));
144 |       //  obj->print();
145 |         objects[i] = obj;
146 |     }
147 |     fout.close();
148 |     printf("Finished reading %u objects from file\n", NN);
149 |     uint32_t size = objects[0]->requested_size;
150 |     char* buffer = (char*)malloc(size);
151 | 
152 |     Trace *trace = new Uniform(seed,1.0,NN);
153 |  
154 |     using ReadFuncPtr = int (RemoteMemoryClient::*)( LocalObjectHandler* obj,  char* buffer, uint32_t length );
155 | 
156 |     ReadFuncPtr readfunc = nullptr;
157 | 
158 |     if(allparams.count("rdmaread")){
159 |         if(allparams.count("mesh")){
160 |             readfunc = &RemoteMemoryClient::ReadOneSidedFake;
161 |         }else  if(allparams.count("farm")){ 
162 |             readfunc = &RemoteMemoryClient::ReadOneSidedFarm;
163 |         }
164 |         else{
165 |             readfunc = &RemoteMemoryClient::ReadOneSided;
166 |         }
167 |     } else {
168 |         if(allparams.count("mesh")){
169 |             readfunc = &RemoteMemoryClient::RpcFakeMesh;
170 |         }else  if(allparams.count("farm")){ 
171 |             readfunc = &RemoteMemoryClient::Read;
172 |         }
173 |         else{
174 |             readfunc = &RemoteMemoryClient::Read;
175 |         }
176 |     }
177 | 
178 |     std::chrono::seconds sec(1);
179 | 
180 |     uint64_t nanodelay =  std::chrono::nanoseconds(sec).count() / target ; // per request
181 |     auto starttime = std::chrono::high_resolution_clock::now();
182 |     
183 |     uint32_t interval = 256;
184 | 
185 |     std::vector<uint64_t> request_bw;
186 |     request_bw.reserve(1024);
187 | 
188 |     auto bwt1 = std::chrono::high_resolution_clock::now();
189 |     uint32_t count = 0;
190 |     auto req = trace->get_next();
191 |     for(uint64_t i=0; i<num; i++)
192 |     {
193 |         
194 |         assert(req.first<objects.size() && "generated number is out of bound");
195 |         LocalObjectHandler* obj = objects[req.first];
196 |         assert(obj!=nullptr && "object cannot be null");
197 | 
198 |         int ret = (api->*readfunc)(obj, buffer, size);
199 |         assert(ret==0 && "one sided read failed");
200 | 
201 |         count++;
202 |         if(count > interval){
203 |             auto bwt2 = std::chrono::high_resolution_clock::now();
204 |             request_bw.push_back(std::chrono::duration_cast<std::chrono::microseconds>(bwt2 - bwt1).count());
205 |             bwt1 = bwt2; 
206 |             count=0;  
207 |         }
208 | 
209 |         auto const sleep_end_time =  starttime + std::chrono::nanoseconds(nanodelay*i);
210 |         while (std::chrono::high_resolution_clock::now() < sleep_end_time){
211 |             // nothing
212 |         }
213 |     }
214 |     auto endtime = std::chrono::high_resolution_clock::now();
215 | 
216 |     printf("throughput(Kreq/sec): ");
217 |     for(auto &x : request_bw){
218 |         printf("%.2f ",(interval*1000.0)/x);
219 |     }
220 |  
221 |     printf("\nFinished workload in %lu ms\n", std::chrono::duration_cast< std::chrono::milliseconds >( endtime  - starttime ).count() );
222 |  
223 |     return 0;
224 | }
225 | 


--------------------------------------------------------------------------------
/run_compaction.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | #                                                                                                      
 5 | # CoRM: Compactable Remote Memory over RDMA
 6 | # 
 7 | # Help functions to deploy CoRM
 8 | #
 9 | # Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
10 | # 
11 | # Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
12 | #
13 | 
14 | source core.sh
15 | trap 'echo -ne "Stop all servers..." && killAllProcesses && killCorm && echo "done" && exit 1' INT
16 | 
17 | define HELP <<'EOF'
18 | 
19 | Script for starting a compaction experiment.
20 | usage  : $0 [options]
21 | options: --server=IP   # file containing IP addressof the CoRM server
22 |          --num=INT # the number of repetitions
23 |          --dir=PATH #absolute path to CoRM 
24 | EOF
25 | 
26 | usage () {
27 |     echo -e "$HELP"
28 | }
29 | 
30 | bits=20 # block size in bits. it is hard-coded in CoRM's code
31 | num=10000
32 | server=""
33 | 
34 | for arg in "$@"
35 | do
36 |     case ${arg} in
37 |     --help|-help|-h)
38 |         usage
39 |         exit 1
40 |         ;;
41 |     --server=*)
42 |         server=`echo $arg | sed -e 's/--server=//'`
43 |         server=`eval echo ${server}`    # tilde and variable expansion
44 |         ;;
45 |     --num=*)
46 |         num=`echo $arg | sed -e 's/--num=//'`
47 |         num=`eval echo ${num}`    # tilde and variable expansion
48 |         ;;
49 |     --dir=*)
50 |         WORKDIR=`echo $arg | sed -e 's/--dir=//'`
51 |         WORKDIR=`eval echo ${WORKDIR}`    # tilde and variable expansion
52 |         ;;
53 |     esac
54 | done
55 | 
56 | 
57 | for ((thread=2;thread<=16;thread=thread*2)); do
58 |     name="collection_${bits}_${thread}_24.txt"
59 |     startCorm $server "--send_buf_size=65536 --threads=${thread} --recv_buf_size=4096  --num_recv_buf=256 --log_file=${WORKDIR}/$name"
60 |     sleep 0.5
61 |     ./compaction --server=$server --threads=${thread} --num=50 --size=24 --collection
62 | 
63 |     sleep 0.5
64 |     stopCorm 
65 | 
66 |     sleep 1.5
67 |     killCorm
68 | done
69 | 
70 | echo "----------Done--------------"
71 | 
72 | 
73 | for ((thread=2;thread<=16;thread=thread*2)); do
74 |     name="compaction_${bits}_${thread}_24.txt"
75 |     startCorm $server "--send_buf_size=65536 --threads=${thread} --recv_buf_size=4096  --num_recv_buf=256 --log_file=${WORKDIR}/$name"
76 |     sleep 0.5
77 |     ./compaction --server=$server --threads=${thread} --num=50 --size=24 --collection --compaction
78 | 
79 |     sleep 0.5
80 |     stopCorm 
81 | 
82 |     sleep 1.5
83 |     killCorm
84 | done
85 | 
86 | echo "----------Done--------------"
87 | 


--------------------------------------------------------------------------------
/run_latency.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | #                                                                                                      
 5 | # CoRM: Compactable Remote Memory over RDMA
 6 | # 
 7 | # Help functions to measure latency of CoRM
 8 | #
 9 | # Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
10 | # 
11 | # Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
12 | #
13 | 
14 | source core.sh
15 | trap 'echo -ne "Stop all servers..." && killAllProcesses && killCorm && echo "done" && exit 1' INT
16 | 
17 | define HELP <<'EOF'
18 | 
19 | Script for measuring latency
20 | usage  : $0 [options]
21 | options: --server=IP   # file containing IP addressof the CoRM server
22 |          --num=INT # the number of records to consume
23 |          --dir=PATH #absolute path to corm 
24 | EOF
25 | 
26 | usage () {
27 |     echo -e "$HELP"
28 | }
29 | 
30 | 
31 | num=10000
32 | server=""
33 | 
34 | for arg in "$@"
35 | do
36 |     case ${arg} in
37 |     --help|-help|-h)
38 |         usage
39 |         exit 1
40 |         ;;
41 |     --server=*)
42 |         server=`echo $arg | sed -e 's/--server=//'`
43 |         server=`eval echo ${server}`    # tilde and variable expansion
44 |         ;;
45 |     --num=*)
46 |         num=`echo $arg | sed -e 's/--num=//'`
47 |         num=`eval echo ${num}`    # tilde and variable expansion
48 |         ;;
49 |     --dir=*)
50 |         WORKDIR=`echo $arg | sed -e 's/--dir=//'`
51 |         WORKDIR=`eval echo ${WORKDIR}`    # tilde and variable expansion
52 |         ;;
53 |     esac
54 | done
55 | 
56 | 
57 | startCorm $server "--send_buf_size=65536 --threads=1 --recv_buf_size=4096  --num_recv_buf=256"
58 |  
59 | sleep 0.5
60 | 
61 | #real:16 user:8;
62 | #real:24 user:15;
63 | #real:32 user:24;
64 | #real:64 user:56;
65 | #real:128 user:118;
66 | #real:248 user:236;
67 | #real:504 user:488;
68 | #real:1016 user:992;
69 | #real:2040 user:2000;
70 | allSizes=(8 15 24 56 118 236 488 992 2000)
71 | 
72 | echo "Starting latency test"
73 | for size in ${allSizes[@]}; do
74 |   outputfilename=latency_${size}.txt
75 |   runLatency $size $num $outputfilename
76 | done
77 | 
78 |  
79 | killCorm 
80 | echo "----------Done--------------"
81 | 


--------------------------------------------------------------------------------
/run_read_throughput.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #                                                                                                      
 4 | # CoRM: Compactable Remote Memory over RDMA
 5 | # 
 6 | # Help functions to deploy CoRM and read thoughput tests
 7 | #
 8 | # Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 9 | # 
10 | # Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
11 | #
12 | 
13 | source core.sh
14 | trap 'echo -ne "Stop all servers..." && killAllProcesses && killCorm && echo "done" && exit 1' INT
15 | 
16 | define HELP <<'EOF'
17 | 
18 | Script for starting a single consumer latency benchmark.
19 | The consumer is always launched locally.
20 | usage  : $0 [options]
21 | options: --server=IP   # file containing IP addressof the CoRM server
22 |          --num=INT # the number of records to consume
23 |          --dir=PATH #absolute path to corm 
24 | EOF
25 | 
26 | usage () {
27 |     echo -e "$HELP"
28 | }
29 | 
30 | 
31 | num=8000000
32 | size=24
33 | server=""
34 | 
35 | for arg in "$@"
36 | do
37 |     case ${arg} in
38 |     --help|-help|-h)
39 |         usage
40 |         exit 1
41 |         ;;
42 |     --server=*)
43 |         server=`echo $arg | sed -e 's/--server=//'`
44 |         server=`eval echo ${server}`    # tilde and variable expansion
45 |         ;;
46 |     --size=*)
47 |         size=`echo $arg | sed -e 's/--size=//'`
48 |         size=`eval echo ${size}`    # tilde and variable expansion
49 |         ;;
50 |     --num=*)
51 |         num=`echo $arg | sed -e 's/--num=//'`
52 |         num=`eval echo ${num}`    # tilde and variable expansion
53 |         ;;
54 |     --dir=*)
55 |         WORKDIR=`echo $arg | sed -e 's/--dir=//'`
56 |         WORKDIR=`eval echo ${WORKDIR}`    # tilde and variable expansion
57 |         ;;
58 |     esac
59 | done
60 | 
61 | 
62 | startCorm $server "--send_buf_size=116384 --threads=1 --recv_buf_size=4096  --num_recv_buf=256"
63 |  
64 | sleep 0.5
65 | 
66 | loadCorm $size $num
67 | 
68 | allFlags=("--rpc" "--rpc --farm" "--rpc --mesh" "--rdmaread" "--rdmaread --farm" "--rdmaread --mesh")
69 | 
70 | 
71 | echo "Starting throughput  test"
72 | 
73 | for flag in "${allFlags[@]}"; do
74 |     suffix=${flag//[[:blank:]]/}
75 |     outputfilename=meshworkload_${size}_${suffix}.txt
76 |     echo ${outputfilename}
77 |     ./remote_read_benchmark --server=${__corm_server} --target=2000000 --num=1000000 --seed=10 ${flag}> $outputfilename
78 |     echo "--------------done  $outputfilename"
79 | done
80 | 
81 | 
82 | killCorm 
83 | echo "----------Done--------------"
84 | 


--------------------------------------------------------------------------------
/run_throughput.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | 
  4 | #                                                                                                      
  5 | # CoRM: Compactable Remote Memory over RDMA
  6 | # 
  7 | # Help functions to deploy CoRM and run read/write benchmarks
  8 | #
  9 | # Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 10 | # 
 11 | # Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 12 | #
 13 | 
 14 | source core.sh
 15 | trap 'echo -ne "Stop all servers..." && killAllProcesses && killCorm && echo "done" && exit 1' INT
 16 | 
 17 | define HELP <<'EOF'
 18 | 
 19 | Script for running read/write benchmarks
 20 | The client is always launched locally.
 21 | usage  : $0 [options]
 22 | options: --server=IP   # file containing IP addressof the CoRM server
 23 |          --num=INT # the number of records to consume
 24 |          --dir=PATH #absolute path to corm 
 25 | EOF
 26 | 
 27 | usage () {
 28 |     echo -e "$HELP"
 29 | }
 30 | 
 31 | 
 32 | num=8000000
 33 | size=24
 34 | server=""
 35 | 
 36 | for arg in "$@"
 37 | do
 38 |     case ${arg} in
 39 |     --help|-help|-h)
 40 |         usage
 41 |         exit 1
 42 |         ;;
 43 |     --server=*)
 44 |         server=`echo $arg | sed -e 's/--server=//'`
 45 |         server=`eval echo ${server}`    # tilde and variable expansion
 46 |         ;;
 47 |     --size=*)
 48 |         size=`echo $arg | sed -e 's/--size=//'`
 49 |         size=`eval echo ${size}`    # tilde and variable expansion
 50 |         ;;
 51 |     --num=*)
 52 |         num=`echo $arg | sed -e 's/--num=//'`
 53 |         num=`eval echo ${num}`    # tilde and variable expansion
 54 |         ;;
 55 |     --dir=*)
 56 |         WORKDIR=`echo $arg | sed -e 's/--dir=//'`
 57 |         WORKDIR=`eval echo ${WORKDIR}`    # tilde and variable expansion
 58 |         ;;
 59 |     esac
 60 | done
 61 | 
 62 | 
 63 | startCorm $server "--send_buf_size=116384 --threads=8 --recv_buf_size=4096  --num_recv_buf=256"
 64 |  
 65 | sleep 0.5
 66 | 
 67 | loadCorm $size $num
 68 | 
 69 | 
 70 | allReadProb=(0.5) #1.0
 71 | allFlags=("" "--zipf" "--rdmaread" "--zipf --rdmaread")
 72 | allThreads=() #1 2 4
 73 | 
 74 | #allReadProb=(1.0)
 75 | #declare -a allFlags=("" "--zipf")
 76 | #allThreads=(1)
 77 | 
 78 | echo "Starting throughput  test"
 79 | for th in ${allThreads[@]}; do
 80 |     continue
 81 |     for rp in ${allReadProb[@]}; do
 82 |         for flag in "${allFlags[@]}"; do
 83 |             suffix=${flag//[[:blank:]]/}
 84 |             outputfilename=workload_1_1_${th}_${rp}_${suffix}.txt
 85 |             ./workload_readwrite --server=${__corm_server} --target=2000000 --prob=${rp} --num=1000000 --threads=$th --seed=10 ${flag}> $outputfilename
 86 |             echo "--------------done  $outputfilename"
 87 |         done
 88 |     done
 89 | done
 90 | 
 91 | nodes=("192.168.1.72" "192.168.1.73" "192.168.1.74" "192.168.1.75"  "192.168.1.76")
 92 | echo "Starting throughput  test woth multiple nodes"
 93 | echo "The clients will be deployed at ${nodes[@]}"
 94 |  
 95 | for rp in ${allReadProb[@]}; do
 96 |     for flag in "${allFlags[@]}"; do
 97 |         for ((total=2;total<=4;++total)); do
 98 |             seed=111
 99 |             for ((client=1;client<total;++client)); do
100 |                 ip=${nodes[$client]}
101 |                 suffix=${flag//[[:blank:]]/}
102 |                 outputfilename=workload_${client}_${total}_${th}_${rp}_${suffix}.txt
103 |                 runWorkloadRemoteAsync $ip $rp 800000 8 $seed "$flag" $WORKDIR/$outputfilename
104 |                 seed=$(($seed + 20))
105 |             done
106 |             client=$total
107 |             outputfilename=workload_${client}_${total}_8_${rp}_${suffix}.txt
108 |             ./workload_readwrite --server=${__corm_server} --target=2000000 --prob=${rp} --num=1000000 --threads=8 --seed=$seed ${flag}> $outputfilename
109 |             sleep 4
110 |             echo "------------------done $outputfilename"
111 |             killAllProcesses # for debugging
112 | #            break #for debugging
113 |         done
114 |  #       break
115 |     done
116 |   #  break
117 | done
118 | 
119 | 
120 | 
121 |  
122 | 
123 | killCorm 
124 | echo "----------Done--------------"
125 | 


--------------------------------------------------------------------------------
/run_throughput_compaction.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | 
  4 | #                                                                                                      
  5 | # CoRM: Compactable Remote Memory over RDMA
  6 | # 
  7 | # Help functions to deploy CoRM and do a compaction experiments
  8 | #
  9 | # Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 10 | # 
 11 | # Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 12 | #
 13 | 
 14 | source core.sh
 15 | trap 'echo -ne "Stop all servers..." && killAllProcesses && killCorm && echo "done" && exit 1' INT
 16 | 
 17 | define HELP <<'EOF'
 18 | 
 19 | Script for measuring compaction latency
 20 | usage  : $0 [options]
 21 | options: --server=IP   # file containing IP addressof the CoRM server
 22 |          --num=INT # the number of records to consume
 23 |          --dir=PATH #absolute path to corm 
 24 | EOF
 25 | 
 26 | usage () {
 27 |     echo -e "$HELP"
 28 | }
 29 | 
 30 | 
 31 | num=8000000
 32 | size=24
 33 | server=""
 34 | 
 35 | for arg in "$@"
 36 | do
 37 |     case ${arg} in
 38 |     --help|-help|-h)
 39 |         usage
 40 |         exit 1
 41 |         ;;
 42 |     --server=*)
 43 |         server=`echo $arg | sed -e 's/--server=//'`
 44 |         server=`eval echo ${server}`    # tilde and variable expansion
 45 |         ;;
 46 |     --size=*)
 47 |         size=`echo $arg | sed -e 's/--size=//'`
 48 |         size=`eval echo ${size}`    # tilde and variable expansion
 49 |         ;;
 50 |     --num=*)
 51 |         num=`echo $arg | sed -e 's/--num=//'`
 52 |         num=`eval echo ${num}`    # tilde and variable expansion
 53 |         ;;
 54 |     --dir=*)
 55 |         WORKDIR=`echo $arg | sed -e 's/--dir=//'`
 56 |         WORKDIR=`eval echo ${WORKDIR}`    # tilde and variable expansion
 57 |         ;;
 58 |     esac
 59 | done
 60 | 
 61 |  
 62 | 
 63 | allReadProb=(0.5 1.0 0.95) #1.0
 64 | allFlags=("" "--zipf" "--rdmaread" "--zipf --rdmaread")
 65 |  
 66 |  
 67 | nodes=("192.168.1.72" "192.168.1.73" "192.168.1.74" "192.168.1.75"  "192.168.1.76")
 68 | echo "Starting throughput  test woth multiple nodes"
 69 | echo "The clients will be deployed at ${nodes[@]}"
 70 |  
 71 | for rp in ${allReadProb[@]}; do
 72 |     for flag in "${allFlags[@]}"; do
 73 |         for ((total=1;total<=4;++total)); do
 74 |             seed=111
 75 |             startCorm $server "--send_buf_size=116384 --threads=8 --recv_buf_size=4096  --num_recv_buf=256"
 76 |             sleep 0.5
 77 |             loadCorm $size $num
 78 |             echo "loading is done"
 79 |             sleep 0.5
 80 |             unloadCorm "$(echo $num*0.8/1 | bc)"
 81 |             echo "unloading is done"
 82 |             sleep 0.5
 83 |             for ((client=0;client<total;++client)); do
 84 |                 ip=${nodes[$client]}
 85 |                 suffix=${flag//[[:blank:]]/}
 86 |                 outputfilename=compaction_${client}_${total}_8_${rp}_${suffix}.txt
 87 |                 runWorkloadRemoteAsync $ip $rp 800000 8 $seed "$flag" $WORKDIR/$outputfilename
 88 |                 seed=$(($seed + 20))
 89 |             done
 90 |             sleep 0.2
 91 |             ./compact --server=$server --size=$size # trigger compaction
 92 |   
 93 |             sleep 20
 94 |             echo "------------------done $outputfilename"
 95 |             killAllProcesses # for debugging
 96 |             stopCorm
 97 |             sleep 1
 98 |             killCorm
 99 |          #   break #for debugging
100 |         done
101 |     #   break
102 |     done
103 |   #  break
104 | done
105 | 
106 | 
107 |   
108 | echo "----------Done--------------"
109 | 


--------------------------------------------------------------------------------
/thread/messenger.hpp:
--------------------------------------------------------------------------------
 1 | /**                                                                                                      
 2 |  * CoRM: Compactable Remote Memory over RDMA
 3 |  * 
 4 |  * An implementation of a communication between threads.
 5 |  *
 6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 9 |  * 
10 |  */
11 | #pragma once
12 | 
13 | #define PAYLOAD_SIZE 7 // no more than 7 pointers as a payload.
14 | 
15 | #include <thread>
16 | #include <vector>
17 | #include <cstdlib>
18 | #include <cassert>
19 | #include <boost/lockfree/queue.hpp>
20 | #include <mutex>
21 | #include <atomic>
22 | #include <ev++.h>
23 | 
24 |  
25 | namespace mts
26 | {
27 |  
28 |     typedef void (*thread_delay_cb)( int revents, void *arg);
29 | 
30 |     struct thread_msg_t{
31 |         typedef void (*thread_msg_cb)(thread_msg_t *msg);
32 |         thread_msg_cb cb;
33 |         void* payload[PAYLOAD_SIZE]; // PAYLOAD_SIZE Can be avoided by using circular memory pool for objects. 
34 |         thread_msg_t() {}
35 |     };
36 |     static_assert(sizeof(thread_msg_t) == 64, "size of thread_msg_t is incorrect");
37 | 
38 |     thread_local uint8_t thread_id; 
39 | 
40 |     uint32_t num_threads;
41 |     std::mutex reg_mutex;
42 |     uint32_t reged_threads; 
43 | 
44 |     std::vector<boost::lockfree::queue<thread_msg_t*>*> msg_queues;
45 |     std::vector<ev::async*> notifies;
46 |     std::vector<ev::dynamic_loop*> loops;
47 |     std::vector<void*> workers;
48 | 
49 | 
50 |     void* GetWorker(uint32_t id){
51 |         assert(reged_threads==num_threads);
52 |         return workers[id];
53 |     }
54 | 
55 |     void SetWorker(uint32_t id, void* worker){
56 |         workers[id] = worker;
57 |     }
58 | 
59 |      
60 |     void send_msg_to_thread(uint32_t dst, thread_msg_t* msg){
61 |         assert(reged_threads==num_threads);
62 |         msg_queues[dst]->push(msg);
63 |     }
64 | 
65 |  
66 | 
67 |     void send_msg_to_thread_and_notify(uint32_t dst, thread_msg_t* msg){
68 |         assert(reged_threads==num_threads);
69 |         msg_queues[dst]->push(msg);
70 |         notifies[dst]->send();
71 |     }
72 | 
73 | 
74 |     bool poll_receive(uint32_t thread_id, thread_msg_t** msg){
75 |         return msg_queues[thread_id]->pop(*msg);
76 |     }
77 | 
78 | 
79 |     void setup_threads(uint32_t _num_threads){
80 |         num_threads = _num_threads;
81 |         reged_threads = 0;
82 |         msg_queues.resize(num_threads);
83 |         notifies.resize(num_threads);
84 |         loops.resize(num_threads);
85 |         workers.resize(num_threads);
86 |     };
87 | 
88 | } 


--------------------------------------------------------------------------------
/thread/thread.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A modifier of a thread to support io events
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #pragma once
 12 | 
 13 | #include "../worker/generic_worker.hpp"
 14 | #include "messenger.hpp"
 15 | #include <map>
 16 | #include <csignal>
 17 | #include "../utilities/debug.h"
 18 | 
 19 | #define NOW 0.000000001
 20 | 
 21 |   
 22 | class Thread: public IOWatcher{
 23 |     struct my_io: public ev::io{
 24 |         my_io(uint32_t id, io_cb cb, void *ctx): id(id), cb(cb), ctx(ctx){
 25 |             //empty
 26 |         }
 27 |         uint32_t id;
 28 |         io_cb cb;
 29 |         void *ctx;
 30 |     };
 31 | // events
 32 |     ev::async stopper;         // for termination
 33 |     ev::idle main_event;       // main event which is called when is not busy
 34 |     ev::timer timer_event;     // call something time to time
 35 |     ev::dynamic_loop   loop;   // loop of the thread
 36 | 
 37 | 
 38 |     std::map<uint32_t, my_io*>  io_events; // io events
 39 |     uint32_t current_io_id;
 40 | 
 41 |     ev::async notify;         // for notifying about incoming messeges
 42 | 
 43 | 
 44 |  
 45 |     const int queue_size = 100;
 46 |     const uint32_t _thread_id;
 47 | 
 48 |     boost::lockfree::queue<mts::thread_msg_t*> *_msg_queue;
 49 |     GenericWorker* const _worker;
 50 |     
 51 |     std::thread the_thread;
 52 | 
 53 | 
 54 | 
 55 | public:
 56 |     Thread(uint32_t id, GenericWorker *w);
 57 | 
 58 | 
 59 |     ~Thread(){
 60 |         text(log_fp,"\t[Thread] Try to destroy worker(%u)\n",_thread_id);
 61 |         delete _msg_queue;
 62 |         delete _worker;
 63 |     
 64 |         text(log_fp,"\t\t[Thread] Worker is destroyed\n");
 65 |     }
 66 | 
 67 |     void Start() {
 68 |         this->the_thread = std::thread(&Thread::main_method,this);
 69 |     }
 70 | 
 71 |     uint32_t GetId() const {
 72 |         return _thread_id;
 73 |     };
 74 | 
 75 |  
 76 |     boost::lockfree::queue<mts::thread_msg_t*>* GetMsgQueue() const{
 77 |         return _msg_queue;
 78 |     };
 79 | 
 80 |     ev::async* GetNotify(){
 81 |         return &(this->notify);
 82 |     };
 83 | 
 84 |     ev::dynamic_loop* GetLoop(){
 85 |         return &(this->loop);
 86 |     };
 87 | 
 88 | 
 89 |     void main_cb (ev::idle   &w, int revents){
 90 |        
 91 |        _worker->main_cb();
 92 |        //this->poll_message_cb();
 93 |     } 
 94 | 
 95 |  
 96 |     void main_method(){
 97 |         // create async stopper for terminating the tread
 98 |          
 99 | 
100 |         this->stopper.set(this->loop);
101 |         this->stopper.set<Thread, &Thread::terminate_cb>(this);
102 |         this->stopper.priority = EV_MAXPRI-1;
103 |         this->stopper.start();
104 | 
105 | 
106 |         this->notify.set(this->loop);
107 |         this->notify.set<Thread, &Thread::poll_message_cb>(this);
108 |         this->notify.priority = EV_MAXPRI-1;
109 |         this->notify.start();
110 | 
111 | 
112 | 
113 |         this->timer_event.set(this->loop);
114 |         this->timer_event.set<Thread, &Thread::timer_cb>(this);
115 |         this->timer_event.set(10,50); // after 10  repeat 50
116 |         this->timer_event.priority = EV_MAXPRI-1;
117 |         this->timer_event.start(10,50); // after 10  repeat 50
118 | 
119 |  
120 | 
121 |         this->main_event.set(this->loop);
122 |         this->main_event.set<Thread, &Thread::main_cb>(this);
123 |         this->main_event.priority = EV_MAXPRI;
124 |         this->main_event.start(); 
125 | 
126 |         mts::thread_id = _thread_id;
127 | 
128 |         this->loop.run(0); 
129 | 
130 |     }
131 | 
132 | 
133 |     void timer_cb  (ev::timer   &w, int revents){
134 |       _worker->sometimes_cb();
135 |       
136 |       w.repeat = 5; // repeat after 5
137 |       w.again();
138 |     } 
139 | 
140 |     void Stop(){
141 |        text(log_fp,"[Thread] Try stopping %d \n",  this->GetId() );
142 |         this->stopper.send();
143 |         if(this->the_thread.joinable()){
144 |             this->the_thread.join();
145 |         }
146 | 
147 |     }
148 | 
149 | 
150 |     void terminate_cb() {
151 | 
152 | 
153 |         for (auto &pair: io_events)
154 |         {       
155 |                 pair.second->stop();
156 |                 delete pair.second;
157 |         }
158 | 
159 | 
160 |         this->stopper.stop();
161 |         this->timer_event.stop();
162 |         this->main_event.stop();
163 |         this->notify.stop();
164 | 
165 |         this->loop.break_loop(ev::ALL);
166 |         text(log_fp,"[Thread] Thread(%d) is terminated\n",  this->GetId() );
167 | 
168 |         // print stats
169 |         _worker->print_stats();
170 | 
171 |     }
172 | 
173 | 
174 |     void poll_message_cb() {
175 |         //  text(log_fp, "Poll Message %d \n",  this->GetId() );
176 |         bool found = true;
177 |         while(found){
178 |             
179 |             mts::thread_msg_t* message = nullptr; 
180 |             this->_msg_queue->pop(message);
181 |             found = (message!=nullptr);
182 |             if(found){
183 |                 //mts::thread_msg_cb cb = (mts::thread_msg_cb)message->cb;
184 |                 message->cb(message);
185 |             } else {
186 |                // text(log_fp,"No message %d \n",  this->GetId() );
187 |             }
188 |         } 
189 | 
190 |     }
191 | 
192 |     void io_process (ev::io   &w, int revents){
193 |         my_io& new_d =  static_cast<my_io&>(w);
194 |         text(log_fp,"filed = %d\n", new_d.fd);
195 |         new_d.cb(new_d.id, new_d.ctx);
196 |     }
197 | 
198 |     void install_io(int fd, io_cb cb, void* ctx ) override{
199 |         my_io *io = new my_io(current_io_id, cb,ctx);
200 |         io_events[current_io_id] = io;
201 |         current_io_id++;
202 |         io->set(this->loop);
203 |         io->set<Thread, &Thread::io_process>(this);
204 |         io->start(fd, ev::READ);
205 |     }
206 | 
207 |     void stop_io(uint32_t io_id) override{
208 |         auto it = io_events.find(io_id);
209 |         assert(it!=io_events.end());
210 |         delete it->second;
211 |         io_events.erase(it);
212 |     }
213 | 
214 |     private:
215 |         Thread(const Thread &) = delete;
216 |         void operator=(const Thread &) = delete;
217 | 
218 | };
219 | 
220 | 
221 | 
222 | namespace mts{
223 |         void RegisterThread(Thread *t){
224 |                 reg_mutex.lock();
225 | 
226 |                     reged_threads++; 
227 |                     msg_queues[t->GetId()] = t->GetMsgQueue();
228 |                     notifies[t->GetId()] = t->GetNotify();   
229 |                     loops[t->GetId()] = t->GetLoop();
230 | 
231 |                 reg_mutex.unlock();
232 |         }
233 | 
234 |         void SCHEDULE_CALLBACK(int thread_id, ev_tstamp time, thread_delay_cb cb, void *arg) {
235 |             ev_once (*loops[thread_id], 0, 0, time, cb, arg); 
236 |         } 
237 | 
238 | }
239 | 
240 | 
241 | Thread::Thread(uint32_t id, GenericWorker *w): loop(ev::AUTO), _thread_id(id), _worker(w) 
242 | {
243 |     //empty
244 |     this->_msg_queue = new boost::lockfree::queue<mts::thread_msg_t*>(queue_size);
245 |     mts::RegisterThread(this);
246 | 
247 | }
248 | 
249 | 
250 | 
251 | 
252 | // it helps to launch all threads.
253 | class LauncherMaster{
254 | 
255 | 
256 | public:
257 | 
258 |     LauncherMaster (uint32_t tot_threads):  current_thread_id(0), num_threads(tot_threads)
259 |     {
260 |       
261 |       instance = this;
262 |       std::signal(SIGINT, LauncherMaster::signal_handler);
263 | 
264 |       mts::setup_threads(tot_threads);
265 |     }
266 | 
267 |     ~LauncherMaster(){
268 | 
269 |         text(log_fp,"[LauncherMaster] Try to destroy all threads\n");
270 |         
271 |         for (auto &iter: threads)
272 |         {
273 |                 delete iter;
274 |         }
275 | 
276 | 
277 |         text(log_fp,"\t[LauncherMaster] All threads are destroyed\n");
278 | 
279 |     }
280 | 
281 | 
282 |     uint32_t add_worker(GenericWorker *worker){
283 |         assert(current_thread_id<num_threads);
284 |         Thread* t = new Thread(current_thread_id, worker);
285 |         threads.push_back(t);
286 |         worker->set_thread_id(current_thread_id);
287 |         worker->set_io_watcher(t);
288 | 
289 |         return (current_thread_id++);
290 |     }
291 | 
292 |     void launch(){
293 |         
294 |         for (uint32_t i=1; i < current_thread_id; ++i) {
295 |             text(log_fp,"\t[LauncherMaster] start thread %u\n",i);
296 |             threads[i]->Start();
297 |         } 
298 |         text(log_fp,"\t[LauncherMaster] start thread 0\n");
299 | 
300 |         threads[0]->main_method();
301 |     }
302 | 
303 | 
304 | private:
305 | 
306 |     void handler_wraper (int signum)
307 |     {
308 |         
309 |         text(log_fp," Signal %d  detected \n",signum);
310 |         // handling code
311 | 
312 |         for (uint32_t i=0; i <current_thread_id; ++i) {
313 |             threads[i]->Stop();
314 |         }
315 |         text(log_fp," All threads are stopped\n");
316 |        
317 |     }
318 | 
319 | 
320 |     std::vector<Thread*> threads;
321 |     uint32_t current_thread_id;
322 |     const uint32_t num_threads;
323 | 
324 |   
325 | 
326 |     static LauncherMaster* instance;
327 | 
328 |     static void signal_handler(int signum)
329 |     {
330 |         instance->handler_wraper(signum);
331 |     }
332 | };
333 |  
334 | LauncherMaster* LauncherMaster::instance = nullptr;
335 | 


--------------------------------------------------------------------------------
/unload.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A simple code to partially unload data from CoRM to have fragmentation.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono> 
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | #include <fstream>
 22 | #include <algorithm>
 23 | 
 24 | #include "thread/thread.hpp" 
 25 | FILE *log_fp;
 26 | 
 27 | #include "worker/client_api.hpp"
 28 | #include "rdma/connectRDMA.hpp"
 29 |   
 30 | #include "utilities/cxxopts.hpp"
 31 |  
 32 | 
 33 | cxxopts::ParseResult
 34 | parse(int argc, char* argv[])
 35 | {
 36 |   cxxopts::Options options(argv[0], "unload random objects from CoRM");
 37 |   options
 38 |     .positional_help("[optional args]")
 39 |     .show_positional_help();
 40 | 
 41 |   try
 42 |   {
 43 | 
 44 |     options.add_options()
 45 |       ("server", "Another address", cxxopts::value<std::string>(), "IP")
 46 |       ("i,input", "input file", cxxopts::value<std::string>()->default_value("test.bin"), "FILE")
 47 |       ("n,num", "Number of objects to deallocate", cxxopts::value<uint32_t>()->default_value("123"), "N")
 48 |       ("help", "Print help")
 49 |      ;
 50 |  
 51 |     auto result = options.parse(argc, argv);
 52 | 
 53 |     if (result.count("help"))
 54 |     {
 55 |       std::cout << options.help({""}) << std::endl;
 56 |       exit(0);
 57 |     }
 58 | 
 59 |     if (!result.count("server"))
 60 |     {
 61 |       throw cxxopts::OptionException("input must be specified");
 62 |     }
 63 | 
 64 |  
 65 | 
 66 |     return result;
 67 | 
 68 |   } catch (const cxxopts::OptionException& e)
 69 |   {
 70 |     std::cout << "error parsing options: " << e.what() << std::endl;
 71 |     std::cout << options.help({""}) << std::endl;
 72 |     exit(1);
 73 |   }
 74 | }
 75 | 
 76 | int main(int argc, char* argv[]){
 77 |  
 78 |  
 79 |     auto allparams = parse(argc,argv);
 80 |  
 81 |     log_fp=stdout;  
 82 | 
 83 |     std::string server = allparams["server"].as<std::string>();
 84 |  
 85 |     uint32_t todelete = allparams["num"].as<uint32_t>();
 86 |     std::string input = allparams["input"].as<std::string>();
 87 |  
 88 |     ClientRDMA rdma((char*)server.c_str(),9999);
 89 |     struct rdma_cm_id * id = rdma.sendConnectRequest();
 90 | 
 91 |     struct ibv_pd * pd = ClientRDMA::create_pd(id);
 92 | 
 93 |     struct ibv_qp_init_attr attr;
 94 |     struct rdma_conn_param conn_param;
 95 |     memset(&attr, 0, sizeof(attr));
 96 |     attr.cap.max_send_wr = 32;
 97 |     attr.cap.max_recv_wr = 32;
 98 |     attr.cap.max_send_sge = 1;
 99 |     attr.cap.max_recv_sge = 1;
100 |     attr.cap.max_inline_data = 0;
101 |     attr.qp_type = IBV_QPT_RC;
102 | 
103 |     memset(&conn_param, 0 , sizeof(conn_param));
104 |     conn_param.responder_resources = 0;
105 |     conn_param.initiator_depth = 5;
106 |     conn_param.retry_count = 3;
107 |     conn_param.rnr_retry_count = 3; 
108 | 
109 |     
110 |     VerbsEP* ep = ClientRDMA::connectEP(id, &attr, &conn_param, pd);
111 | 
112 |     printf("Connected\n");
113 |     sleep(1);
114 | 
115 |     RemoteMemoryClient* api = new RemoteMemoryClient(0,ep);
116 |     
117 |   
118 | 
119 |     std::fstream fout;
120 |     fout.open(input.c_str(), std::ios::in|std::ios::binary);
121 |     uint32_t NN = 0;
122 | 
123 |     fout.read((char*)&NN,sizeof(NN));
124 |  
125 |     std::vector<LocalObjectHandler*> objects(NN);
126 | 
127 |     for(uint32_t i = 0; i < NN; i++){
128 |         LocalObjectHandler* obj = (LocalObjectHandler*)malloc(sizeof(LocalObjectHandler));
129 |         fout.read((char*)obj,sizeof(LocalObjectHandler));
130 |       //  obj->print();
131 |         objects[i] = obj;
132 |     }
133 |     fout.close();
134 | 
135 | 
136 |     if(NN != todelete){
137 |         std::random_shuffle(objects.begin(), objects.end()); 
138 |     }
139 | 
140 |     for(uint32_t i = 0; i < todelete; i++){
141 |         api->Free(objects[i]);
142 |         free(objects[i]);
143 |     }
144 |     
145 | 
146 |     if(NN != todelete){
147 |         uint32_t rest = NN-todelete;
148 |         std::fstream fout;
149 |         fout.open(input.c_str(), std::ios::trunc|std::ios::out|std::ios::binary);
150 | 
151 |         fout.write((char*)&rest,sizeof(rest));
152 | 
153 |         for(uint32_t i = 0; i < rest; i++){
154 |             fout.write((char*)objects[todelete+i],sizeof(LocalObjectHandler));
155 |         }
156 | 
157 |         fout.close();
158 |         
159 |         printf("Objects' keys are written to file %s\n", input.c_str());
160 |     } else {
161 |         std::remove(input.c_str()); 
162 |     }
163 |  
164 | 
165 |     return 0;
166 | }
167 | 


--------------------------------------------------------------------------------
/utilities/block_home_table.h:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A class for mapping a block to a thread allocator that owns that block.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "../alloc/alloc.hpp"
 15 | #include <atomic>
 16 | #include <stdlib.h>
 17 | #include "rcu.h"
 18 | 
 19 | class BlockHomeTable {
 20 | public:
 21 |     BlockHomeTable();
 22 |     ~BlockHomeTable();
 23 | 
 24 |     ThreadAlloc *Lookup(addr_t addr);
 25 |     void Insert(int thread_id, addr_t addr, ThreadAlloc *alloc);
 26 |     bool Update(addr_t addr, ThreadAlloc *alloc);
 27 |     void Remove(addr_t addr);
 28 | 
 29 | protected:
 30 |     struct KeyValuePair {
 31 |         addr_t addr;
 32 |         ThreadAlloc *alloc;
 33 |     };
 34 | 
 35 |     static const size_t BucketBytes = CACHELINE;
 36 |     static const size_t ItemsInBucket = BucketBytes / sizeof(KeyValuePair);
 37 |     
 38 |     static const size_t InitialBucketLog = 15;
 39 |     static const size_t InitialBucketCount = 1 << InitialBucketLog;
 40 | 
 41 |     struct Bucket {
 42 |         KeyValuePair items[ItemsInBucket];
 43 |     };
 44 | 
 45 |     static_assert(sizeof(Bucket) == BucketBytes, "Bucket of unexpected size");
 46 | 
 47 |     struct Header {
 48 |         size_t buckets;
 49 |         uint8_t pad[BucketBytes -
 50 |             sizeof(size_t) ];
 51 |     };
 52 | 
 53 |     static_assert(sizeof(Header) == BucketBytes, "Header of unexpected size");
 54 | 
 55 |     ThreadAlloc *LookupIn(Bucket *bucket_array, size_t bucket_count,
 56 |         uint64_t (*hash_fun)(addr_t addr), addr_t addr);
 57 |     ThreadAlloc *LookupIn(Bucket *bucket, addr_t addr);
 58 |  
 59 |     bool UpdateIn(ThreadAlloc *newalloc, Bucket *bucket, addr_t addr);
 60 | 
 61 | 
 62 |     bool InsertInto(Header *hdr, addr_t addr, ThreadAlloc *alloc);
 63 |     bool InsertInto(Bucket *bucket_array, size_t bucket_count,
 64 |         uint64_t (*hash_fun)(addr_t addr), addr_t addr, ThreadAlloc *alloc);
 65 |     bool InsertInto(Bucket *bucket, addr_t addr, ThreadAlloc *alloc);
 66 | 
 67 |     bool RemoveFrom(Bucket *array,  addr_t addr);
 68 |  
 69 |     void Grow(int thread_id);
 70 |     bool CopyItems(Header *new_hdr, Header *old_hdr);
 71 |     bool CopyBucket(Header *new_hdr, Bucket *bucket);
 72 | 
 73 |     static void AllFreeCb(void *owner);
 74 | 
 75 |     static void *AllocMemory(size_t size);
 76 |     static void FreeMemory(void *ptr);
 77 | 
 78 | protected:
 79 | 
 80 |     // a pointer to the table
 81 |     std::atomic<Header *> table;
 82 | };
 83 |   
 84 | // we use three hash functions for geting raget bucket in the hash table.
 85 | inline uint64_t hash_1(addr_t addr) {
 86 |     return addr >> BLOCK_BIT_SIZE;
 87 | }
 88 | 
 89 | inline uint64_t hash_2(addr_t addr) {
 90 |     uint64_t key = addr >> BLOCK_BIT_SIZE;
 91 |     key ^= key >> 33;
 92 |     key *= 0xff51afd7ed558ccdull;
 93 |     key ^= key >> 33;
 94 |     key *= 0xc4ceb9fe1a85ec53ull;
 95 |     key ^= key >> 33;
 96 |     return key;
 97 | }
 98 | 
 99 | inline uint64_t hash_3(addr_t addr) {
100 |     uint64_t key = addr >> BLOCK_BIT_SIZE;
101 |     key *= 0xc6a4a7935bd1e995ULL;
102 |     key ^= key >> 47;
103 |     key *= 0xc6a4a7935bd1e995ULL;
104 |     return key;
105 | }
106 | 
107 | inline uint64_t index(uint64_t hash, uint64_t buckets) {
108 |     return hash & (buckets - 1);
109 | }
110 | 
111 | BlockHomeTable::BlockHomeTable() {
112 |     size_t size = InitialBucketCount * sizeof(Bucket) + sizeof(Header);
113 |     Header *hdr = (Header *)AllocMemory(size);
114 |     hdr->buckets = InitialBucketCount;
115 |     table.store(hdr);
116 | }
117 | 
118 | BlockHomeTable::~BlockHomeTable() {
119 |     FreeMemory(table.load());
120 | }
121 | 
122 | ThreadAlloc *BlockHomeTable::Lookup(addr_t addr) {
123 |     ThreadAlloc *ret = nullptr;
124 |     Header *hdr = table.load(std::memory_order_acquire);
125 |     Bucket *bucket_array = (Bucket *)(hdr + 1);
126 |     Bucket *bucket = nullptr;
127 | 
128 |     bucket = bucket_array + index(hash_1(addr), hdr->buckets);
129 |     if((ret = LookupIn(bucket, addr)) != nullptr) {
130 |         return ret;
131 |     }
132 |     bucket = bucket_array + index(hash_2(addr), hdr->buckets);
133 |     if((ret = LookupIn(bucket, addr)) != nullptr) {
134 |         return ret;
135 |     }
136 |     bucket = bucket_array + index(hash_3(addr), hdr->buckets);
137 |     if((ret = LookupIn(bucket, addr)) != nullptr) {
138 |         return ret;
139 |     }
140 |  
141 |     // may return nullptr during compaction
142 |     return nullptr;
143 | }
144 | 
145 | ThreadAlloc *BlockHomeTable::LookupIn(Bucket *bucket, addr_t addr) {
146 | 
147 |     for(unsigned i = 0;i < ItemsInBucket;i++) {
148 |         if(bucket->items[i].addr == addr) {
149 |             std::atomic<addr_t> *addr_ptr = (std::atomic<addr_t> *)&bucket->items[i].addr;
150 |             if(addr_ptr->load(std::memory_order_acquire) == addr) {
151 |                 return bucket->items[i].alloc;
152 |             }
153 |         }
154 |     }
155 | 
156 |     return nullptr;
157 | }
158 | 
159 | 
160 | bool BlockHomeTable::Update(addr_t addr, ThreadAlloc *newalloc ) {
161 |     Header *hdr = table.load(std::memory_order_acquire);
162 |     Bucket *bucket_array = (Bucket *)(hdr + 1);
163 |     Bucket *bucket =  nullptr;
164 | 
165 |     bucket =  bucket_array + index(hash_1(addr), hdr->buckets);
166 |     if(UpdateIn(newalloc, bucket, addr)) {
167 |         return true;
168 |     }
169 |     bucket =  bucket_array + index(hash_2(addr), hdr->buckets);
170 |     if(UpdateIn(newalloc, bucket, addr)) {
171 |         return true;
172 |     }
173 |     bucket =  bucket_array + index(hash_3(addr), hdr->buckets);
174 |     if(UpdateIn(newalloc, bucket, addr)) {
175 |         return true;
176 |     }
177 | 
178 |     // may return nullptr during compaction
179 |     return false;
180 | }
181 | 
182 | bool BlockHomeTable::UpdateIn(ThreadAlloc *newalloc, Bucket *bucket, addr_t addr) {
183 |  
184 |     for(unsigned i = 0;i < ItemsInBucket;i++) {
185 |         if(bucket->items[i].addr == addr) {
186 |             std::atomic<addr_t> *addr_ptr = (std::atomic<addr_t> *)&bucket->items[i].addr;
187 | 
188 |             if(addr_ptr->load(std::memory_order_acquire) == addr) {
189 |                 bucket->items[i].alloc = newalloc;
190 |                 return true;
191 |             }
192 |         }
193 |     }
194 | 
195 |     return false;
196 | }
197 | 
198 | 
199 | 
200 | void BlockHomeTable::Insert(int thread_id, addr_t addr, ThreadAlloc *alloc) {
201 |     while(!InsertInto(table.load(std::memory_order_relaxed), addr, alloc)) {
202 |         printf("[BlockHomeTable] grow block table event\n");
203 |         Grow(thread_id);
204 |     }
205 | }
206 | 
207 | bool BlockHomeTable::InsertInto(Header *hdr, addr_t addr, ThreadAlloc *alloc) {
208 |     Bucket *bucket_array = (Bucket *)(hdr + 1);
209 |     Bucket *bucket = nullptr;
210 | 
211 |     bucket = bucket_array + index(hash_1(addr), hdr->buckets);
212 |     if(InsertInto(bucket, addr, alloc)) {
213 |         return true;
214 |     }
215 |     bucket = bucket_array + index(hash_2(addr), hdr->buckets);
216 |     if(InsertInto(bucket, addr, alloc)) {
217 |         return true;
218 |     }
219 |     bucket = bucket_array + index(hash_3(addr), hdr->buckets);
220 |     if(InsertInto(bucket, addr, alloc)) {
221 |         return true;
222 |     }
223 | 
224 |     return false;
225 | }
226 | 
227 | bool BlockHomeTable::InsertInto(Bucket *bucket, addr_t addr, ThreadAlloc *alloc) {
228 |     for(unsigned i = 0;i < ItemsInBucket;i++) {
229 |         if(bucket->items[i].addr == 0) {
230 |             bucket->items[i].alloc = alloc;
231 |             std::atomic<addr_t> *addr_ptr = (std::atomic<addr_t> *)&bucket->items[i].addr;
232 |             addr_ptr->store(addr, std::memory_order_release);
233 |             return true;
234 |         }
235 | 
236 |         assert(bucket->items[i].addr != addr);
237 |     }
238 | 
239 |     return false;
240 | }
241 | 
242 | void BlockHomeTable::Remove(addr_t addr) {
243 |     Header *hdr = table.load(std::memory_order_relaxed);
244 |     Bucket *bucket_array = (Bucket *)(hdr + 1);
245 |     Bucket *bucket = nullptr;
246 | 
247 |     bucket = bucket_array + index(hash_1(addr), hdr->buckets);
248 |     if(RemoveFrom(bucket, addr)) {
249 |         return;
250 |     }
251 |     bucket = bucket_array + index(hash_2(addr), hdr->buckets);
252 |     if(RemoveFrom(bucket, addr)) {
253 |         return;
254 |     }
255 |     bucket = bucket_array + index(hash_3(addr), hdr->buckets);
256 |     if(RemoveFrom(bucket, addr)) {
257 |         return;
258 |     }
259 | 
260 |     assert(0);
261 | }
262 | 
263 | bool BlockHomeTable::RemoveFrom(Bucket *bucket, addr_t addr) {
264 |     for(unsigned i = 0;i < ItemsInBucket;i++) {
265 |         if(bucket->items[i].addr == addr) {
266 |             std::atomic<addr_t> *addr_ptr = (std::atomic<addr_t> *)&bucket->items[i].addr;
267 |             addr_ptr->store(0, std::memory_order_release);
268 |             return true;
269 |         }
270 |     }
271 | 
272 |     return false;
273 | }
274 | 
275 | void BlockHomeTable::Grow(int thread_id) {
276 |     Header *hdr = table.load(std::memory_order_relaxed);
277 |     size_t new_buckets = hdr->buckets;
278 |     Header *new_hdr;
279 | 
280 |     while(true) {
281 |         new_buckets <<= 1;
282 |         size_t new_size = new_buckets * sizeof(Bucket) + sizeof(Header);
283 | 
284 |         new_hdr = (Header *)AllocMemory(new_size);
285 |         new_hdr->buckets = new_buckets;
286 | 
287 |         if(CopyItems(new_hdr, hdr)) {
288 |             break;
289 |         }
290 | 
291 |         FreeMemory(new_hdr);
292 |     }
293 | 
294 |     table.store(new_hdr, std::memory_order_release);
295 | 
296 |     BroadcastDrain::Drain(thread_id, AllFreeCb, hdr);    
297 | }
298 | 
299 | bool BlockHomeTable::CopyItems(Header *new_hdr, Header *old_hdr) {
300 |     size_t old_bucket_count = old_hdr->buckets;
301 |     Bucket *old_buckets = (Bucket *)(old_hdr + 1);
302 | 
303 |     for(unsigned i = 0;i < old_bucket_count;i++) {
304 |         if(!CopyBucket(new_hdr, old_buckets + i)) {
305 |             return false;
306 |         }
307 |     }
308 | 
309 |     return true;
310 | }
311 | 
312 | bool BlockHomeTable::CopyBucket(Header *new_hdr, Bucket *bucket) {
313 |     for(unsigned i = 0;i < ItemsInBucket;i++) {
314 |         ThreadAlloc *alloc = bucket->items[i].alloc;
315 | 
316 |         if(alloc != nullptr) {
317 |             addr_t addr = bucket->items[i].addr;
318 | 
319 |             if(!InsertInto(new_hdr, addr, alloc)) {
320 |                 return false;
321 |             }
322 |         }
323 |     }
324 | 
325 |     return true;
326 | }
327 | 
328 | void BlockHomeTable::AllFreeCb(void *owner) {
329 |     FreeMemory(owner);
330 | }
331 | 
332 | void *BlockHomeTable::AllocMemory(size_t size) {
333 |     void *ret = aligned_alloc(CACHELINE, size);
334 |     memset(ret, 0, size);
335 |     return ret;
336 | }
337 | 
338 | void BlockHomeTable::FreeMemory(void *ptr) {
339 |     free(ptr);
340 | }
341 | 
342 | 


--------------------------------------------------------------------------------
/utilities/debug.h:
--------------------------------------------------------------------------------
 1 | /**          
 2 |  * ring (Direct Access REplication)
 3 |  *                                                                                             
 4 |  * Debugging and logging utilities
 5 |  *
 6 |  * Copyright (c) 2014-2015 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Author(s): Marius Poke <marius.poke@inf.ethz.ch>
 9 |  * 
10 |  */
11 | 
12 | #ifndef DEBUG_H_
13 | #define DEBUG_H_
14 | #include <stdlib.h>
15 | #include <stdio.h>
16 | #include <errno.h>
17 | #define __STDC_FORMAT_MACROS
18 | #include <inttypes.h>
19 | #include <sys/time.h>
20 | 
21 | //#define DEBUG
22 | //extern struct timeval prev_tv;
23 | //extern uint64_t jump_cnt;
24 | 
25 | #define info(stream, fmt, ...) do {\
26 |     fprintf(stream, fmt, ##__VA_ARGS__); \
27 |     fflush(stream); \
28 | } while(0)
29 | #define info_wtime(stream, fmt, ...) do {\
30 |     struct timeval _debug_tv;\
31 |     gettimeofday(&_debug_tv,NULL);\
32 |     fprintf(stream, "[%lu:%06lu] " fmt, _debug_tv.tv_sec, _debug_tv.tv_usec, ##__VA_ARGS__); \
33 |     fflush(stream); \
34 | } while(0)
35 | 
36 | #ifdef DEBUG
37 | #define debug(stream, fmt, ...) do {\
38 |     struct timeval _debug_tv;\
39 |     gettimeofday(&_debug_tv,NULL);\
40 |     fprintf(stream, "[DEBUG %lu:%lu] %s/%d/%s() " fmt, _debug_tv.tv_sec, _debug_tv.tv_usec, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
41 |     fflush(stream); \
42 | } while(0)
43 | #define text(stream, fmt, ...) do {\
44 |     fprintf(stream, fmt, ##__VA_ARGS__); \
45 |     fflush(stream); \
46 | } while(0)
47 | #define text_wtime(stream, fmt, ...) do {\
48 |     struct timeval _debug_tv;\
49 |     gettimeofday(&_debug_tv,NULL);\
50 |     fprintf(stream, "[%lu:%lu] " fmt, _debug_tv.tv_sec, _debug_tv.tv_usec, ##__VA_ARGS__); \
51 |     fflush(stream); \
52 | } while(0)
53 | #else
54 | #define debug(stream, fmt, ...)
55 | #define text(stream, fmt, ...)
56 | #define text_wtime(stream, fmt, ...)
57 | #endif
58 | 
59 | //#ifdef DEBUG
60 |  
61 | //#else
62 | //#define error(stream, fmt, ...)
63 | //#endif
64 | 
65 | //#ifdef DEBUG
66 | #define error_return(rc, stream, fmt, ...) do { \
67 |     fprintf(stream, "[ERROR] %s/%d/%s() " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
68 |     fflush(stream); \
69 |     return (rc);  \
70 | } while(0)
71 | //#else
72 | //#define error_return(rc, stream, fmt, ...) return (rc)
73 | //#endif
74 | 
75 | //#ifdef DEBUG
76 | #define error_exit(rc, stream, fmt, ...) do { \
77 |     fprintf(stream, "[ERROR] %s/%d/%s() " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
78 |     fflush(stream); \
79 |     exit(rc); \
80 | } while(0)
81 | //#else
82 | //#define error_exit(rc, stream, fmt, ...) exit(rc)
83 | //#endif
84 | 
85 | extern FILE *log_fp;
86 | 
87 | #endif /* DEBUG_H_ */
88 | 
89 | 


--------------------------------------------------------------------------------
/utilities/rcu.h:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A class for ensuring that each thread does not access the hometable during a grow.
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "../thread/thread.hpp"
 15 | 
 16 | // This is a one-shot object.
 17 | class BroadcastDrain {
 18 | public:
 19 |     typedef void (*timer_rcu_cb)(void *owner);
 20 | 
 21 | public:
 22 |     static void Drain(int home_thread_id, timer_rcu_cb cb, void *owner);
 23 | 
 24 | protected:
 25 |     BroadcastDrain(int home_thread_id, timer_rcu_cb cb, void *owner);
 26 |     ~BroadcastDrain();
 27 | 
 28 |     void Drain();
 29 | 
 30 |     static void DrainingDoneOne(BroadcastDrain *_this);
 31 | 
 32 |     void SendTmsg(int thread_id);
 33 |     bool DrainingDoneOne();
 34 | 
 35 |     static void RequestTmsg(mts::thread_msg_t *tmsg);
 36 |     static void ResponseTmsg(mts::thread_msg_t *tmsg);
 37 | 
 38 | protected:
 39 |     const int host_thread_id;
 40 | 
 41 |     timer_rcu_cb cb;
 42 |     void *owner;
 43 | 
 44 |     // threads remaining to drain
 45 |     int remaining;
 46 | };
 47 | 
 48 | 
 49 | 
 50 | void BroadcastDrain::Drain(int home_thread_id, timer_rcu_cb cb, void *owner) {
 51 |     BroadcastDrain *rcu = new BroadcastDrain(home_thread_id, cb, owner);
 52 |     rcu->Drain();
 53 | }
 54 | 
 55 | BroadcastDrain::BroadcastDrain(int thread_id, timer_rcu_cb cb, void *owner)
 56 |         : host_thread_id(thread_id),
 57 |         cb(cb),
 58 |         owner(owner),
 59 |         remaining(0) {
 60 |     // empty
 61 | }
 62 | 
 63 | BroadcastDrain::~BroadcastDrain() {
 64 |     // empty
 65 | }
 66 | 
 67 | void BroadcastDrain::Drain() {
 68 |     int thread_count = mts::num_threads;
 69 |     remaining = 1;
 70 | 
 71 |     for(int i = 0;i < thread_count;i++) {
 72 |         if(i != host_thread_id) {
 73 |             SendTmsg(i);
 74 |             remaining++;
 75 |         }
 76 |     }
 77 | 
 78 |     DrainingDoneOne(this);
 79 | }
 80 | 
 81 | void BroadcastDrain::SendTmsg(int thread_id) {
 82 |     mts::thread_msg_t *tmsg = new mts::thread_msg_t();
 83 |     //Tmsg *ctx = new (tmsg->payload) Tmsg(this, host_thread_id);
 84 |     tmsg->cb = &BroadcastDrain::RequestTmsg;
 85 |     tmsg->payload[0] = this;
 86 |     tmsg->payload[1] = (void*)(uint64_t)host_thread_id;
 87 |     mts::send_msg_to_thread_and_notify(thread_id, tmsg);
 88 | }
 89 | 
 90 | void BroadcastDrain::DrainingDoneOne(BroadcastDrain *_this) {
 91 |     if(_this->DrainingDoneOne()) {
 92 |         delete _this;
 93 |     }
 94 | }
 95 | 
 96 | bool BroadcastDrain::DrainingDoneOne() {
 97 |     bool ret;
 98 | 
 99 |     if(--remaining == 0) {
100 |         timer_rcu_cb cb = this->cb;
101 |         void *owner = this->owner;
102 | 
103 |         cb(owner);
104 | 
105 |         ret = true;
106 |     } else {
107 |         ret = false;
108 |     }
109 | 
110 |     return ret;
111 | }
112 | 
113 | void BroadcastDrain::RequestTmsg(mts::thread_msg_t *tmsg) {
114 |     tmsg->cb = &BroadcastDrain::ResponseTmsg;
115 |     int _resp_thread_id = (int)(uint64_t)tmsg->payload[1];
116 |     mts::send_msg_to_thread_and_notify(_resp_thread_id, tmsg);
117 | }
118 | 
119 | void BroadcastDrain::ResponseTmsg(mts::thread_msg_t *tmsg) {
120 |      
121 |     BroadcastDrain *_this = (BroadcastDrain*)tmsg->payload[0];
122 |     free(tmsg);
123 | 
124 |     DrainingDoneOne(_this);
125 | }
126 | 


--------------------------------------------------------------------------------
/utilities/timer.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * ring (Direct Access REplication)
 3 |  * 
 4 |  * Timer implementation
 5 |  *
 6 |  * Copyright (c) 2014-2015 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Copyright (c) 2009 The Trustees of Indiana University and Indiana
 9 |  *                    University Research and Technology
10 |  *                    Corporation.  All rights reserved.
11 |  *
12 |  * Author(s): Torsten Hoefler <htor@cs.indiana.edu>
13 |  */
14 | 
15 | #ifndef TIMER_H_
16 | #define TIMER_H_
17 | 
18 | #include <unistd.h>
19 | 
20 | #define UINT32_T uint32_t
21 | #define UINT64_T uint64_t
22 | 
23 | #define HRT_CALIBRATE(freq) do {  \
24 |   static volatile HRT_TIMESTAMP_T t1, t2; \
25 |   static volatile UINT64_T elapsed_ticks, min = (UINT64_T)(~0x1); \
26 |   int notsmaller=0; \
27 |   while(notsmaller<3) { \
28 |     HRT_GET_TIMESTAMP(t1); \
29 |      sleep(1);  \
30 |     /* nanosleep((struct timespec[]){{0, 10000000}}, NULL); */ \
31 |     HRT_GET_TIMESTAMP(t2); \
32 |     HRT_GET_ELAPSED_TICKS(t1, t2, &elapsed_ticks); \
33 |     notsmaller++; \
34 |     if(elapsed_ticks<min) { \
35 |       min = elapsed_ticks; \
36 |       notsmaller = 0; \
37 |     } \
38 |   } \
39 |   freq = min; \
40 | } while(0);
41 | 
42 | #define HRT_INIT(freq) HRT_CALIBRATE(freq)
43 | 
44 | #define HRT_TIMESTAMP_T x86_64_timeval_t
45 | 
46 | #define HRT_GET_TIMESTAMP(t1)  __asm__ __volatile__ ("rdtsc" : "=a" (t1.l), "=d" (t1.h));
47 | 
48 | #define HRT_GET_ELAPSED_TICKS(t1, t2, numptr)   *numptr = (((( UINT64_T ) t2.h) << 32) | t2.l) - \
49 |                                                           (((( UINT64_T ) t1.h) << 32) | t1.l);
50 | 
51 | #define HRT_GET_TIME(t1, time) time = (((( UINT64_T ) t1.h) << 32) | t1.l)
52 | 
53 | typedef struct {
54 |     UINT32_T l;
55 |     UINT32_T h;
56 | } x86_64_timeval_t;
57 | 
58 | /* global timer frequency in Hz */
59 | extern unsigned long long g_timerfreq;
60 | 
61 | #define HRT_GET_USEC(ticks) 1e6/*1e4*/*(double)ticks/(double)g_timerfreq
62 | 
63 | #define usecs_wait(d) do {   \
64 |   HRT_TIMESTAMP_T ts;   \
65 |   unsigned long long targettime, time;  \
66 |   HRT_GET_TIMESTAMP(ts);    \
67 |   HRT_GET_TIME(ts,targettime);  \
68 |   targettime += g_timerfreq/1e6*(d);    \
69 |   do {  \
70 |     HRT_GET_TIMESTAMP(ts);  \
71 |     HRT_GET_TIME(ts,time);  \
72 |   } while (time < targettime);  \
73 | } while(0);
74 | 
75 | #ifdef DEBUG
76 | #define TIMER_INIT HRT_TIMESTAMP_T t1, t2;  \
77 |     uint64_t ticks; \
78 |     double usecs; 
79 | #define TIMER_START(stream, fmt, ...) info_wtime(stream, fmt, ##__VA_ARGS__); \
80 |     HRT_GET_TIMESTAMP(t1);
81 | #define TIMER_STOP(stream) HRT_GET_TIMESTAMP(t2);   \
82 |     HRT_GET_ELAPSED_TICKS(t1, t2, &ticks);  \
83 |     usecs = HRT_GET_USEC(ticks);    \
84 |     info(stream, "done (%lf usecs)\n", usecs);
85 | #define TIMER_INFO(stream, fmt, ...) info(stream, fmt, ##__VA_ARGS__);
86 | #else
87 | #define TIMER_INIT
88 | #define TIMER_START(stream, fmt, ...)
89 | #define TIMER_STOP(stream)
90 | #define TIMER_INFO(stream, fmt, ...)
91 | #endif
92 | 
93 | 
94 | #endif /* TIMER_H_ */
95 | 
96 | 


--------------------------------------------------------------------------------
/utilities/ycsb.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | /**                                                                                                      
 3 |  * CoRM: Compactable Remote Memory over RDMA
 4 |  * 
 5 |  * A simple YCSB implementation
 6 |  *
 7 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 8 |  * 
 9 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
10 |  * 
11 |  */
12 | #pragma once
13 | #include <iostream>       // std::cout
14 | #include <thread>
15 | #include <chrono>
16 | #include <vector>
17 | #include <cstdlib>
18 | #include <string>
19 | #include <sstream>
20 | #include <cassert>
21 | #include <stdlib.h>
22 | #include <csignal>
23 | #include <set>
24 | 
25 | 
26 | #include <random>
27 | #include "zipf.hpp"
28 | 
29 | class Trace{
30 | public:
31 |     virtual ~Trace() = default;
32 | 	virtual std::pair<uint32_t, char> get_next() = 0;
33 | };
34 | 
35 | class YCSB: public Trace
36 | {
37 | 
38 |  	const uint32_t max_value_uni = 0xFFFFFFFF;
39 | 	std::mt19937 generator;
40 | 	zipf_distribution<> zipf;
41 | 	std::uniform_int_distribution<uint32_t> dis;
42 | 	const uint32_t read_threshold;
43 | 
44 | public:
45 | 
46 | 	YCSB(unsigned long seed, double read_prob, uint32_t N, double theta): 
47 | 	generator(seed), zipf{N,theta}, dis(0,max_value_uni-1), read_threshold((uint32_t)(read_prob*max_value_uni))
48 | 	{
49 | 		//empty
50 | 	}
51 |  
52 | 
53 |  	virtual std::pair<uint32_t, char> get_next() override
54 |  	{
55 |  		uint32_t rank  = zipf(generator)-1;
56 |  		uint32_t val  = dis(generator);
57 |  		char type = (val < read_threshold) ? 'r' : 'w';
58 |  		return std::make_pair(rank, type);   
59 |  	}
60 | 
61 | };
62 | 
63 | 
64 | class Uniform: public Trace
65 | {
66 |  	const uint32_t max_value_uni = 0xFFFFFFFF;
67 | 	std::mt19937 generator;
68 | 	std::uniform_int_distribution<uint32_t> uni;
69 | 	std::uniform_int_distribution<uint32_t> dis;
70 | 	const uint32_t read_threshold;
71 | 
72 | public:
73 | 
74 | 	Uniform(unsigned long seed, double read_prob, uint32_t N): 
75 | 	generator(seed), uni{0,N-1}, dis(0,max_value_uni-1), read_threshold((uint32_t)(read_prob*max_value_uni))
76 | 	{
77 | 		//empty
78 | 	}
79 |  
80 |  	virtual std::pair<uint32_t, char> get_next() override
81 |  	{
82 |  		uint32_t rank  = uni(generator);
83 |  		uint32_t val  = dis(generator);
84 |  		char type = (val < read_threshold) ? 'r' : 'w';
85 |  		return std::make_pair(rank, type);   
86 |  	}
87 | 
88 | };


--------------------------------------------------------------------------------
/utilities/zipf.hpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * A zipf implementation from a repo on github. 
  5 |  *
  6 |  * 
  7 |  */
  8 | #pragma once
  9 | 
 10 | #include <algorithm>
 11 | #include <cmath>
 12 | #include <random>
 13 | 
 14 | /** Zipf-like random distribution.
 15 |  *
 16 |  * "Rejection-inversion to generate variates from monotone discrete
 17 |  * distributions", Wolfgang Hörmann and Gerhard Derflinger
 18 |  * ACM TOMACS 6.3 (1996): 169-184
 19 |  */
 20 | template<class IntType = unsigned long, class RealType = double>
 21 | class zipf_distribution
 22 | {
 23 | public:
 24 |     typedef RealType input_type;
 25 |     typedef IntType result_type;
 26 | 
 27 |     static_assert(std::numeric_limits<IntType>::is_integer, "");
 28 |     static_assert(!std::numeric_limits<RealType>::is_integer, "");
 29 | 
 30 |     zipf_distribution(const IntType n=std::numeric_limits<IntType>::max(),
 31 |                       const RealType q=1.0)
 32 |         : n(n)
 33 |         , q(q)
 34 |         , H_x1(H(1.5) - 1.0)
 35 |         , H_n(H(n + 0.5))
 36 |         , dist(H_x1, H_n)
 37 |     {}
 38 | 
 39 |     IntType operator()(std::mt19937& rng)
 40 |     {
 41 |         while (true) {
 42 |             const RealType u = dist(rng);
 43 |             const RealType x = H_inv(u);
 44 |             const IntType  k = clamp<IntType>(std::round(x), 1, n);
 45 |             if (u >= H(k + 0.5) - h(k)) {
 46 |                 return k;
 47 |             }
 48 |         }
 49 |     }
 50 | 
 51 | private:
 52 |     /** Clamp x to [min, max]. */
 53 |     template<typename T>
 54 |     static constexpr T clamp(const T x, const T min, const T max)
 55 |     {
 56 |         return std::max(min, std::min(max, x));
 57 |     }
 58 | 
 59 |     /** exp(x) - 1 / x */
 60 |     static double
 61 |     expxm1bx(const double x)
 62 |     {
 63 |         return (std::abs(x) > epsilon)
 64 |             ? std::expm1(x) / x
 65 |             : (1.0 + x/2.0 * (1.0 + x/3.0 * (1.0 + x/4.0)));
 66 |     }
 67 | 
 68 |     /** H(x) = log(x) if q == 1, (x^(1-q) - 1)/(1 - q) otherwise.
 69 |      * H(x) is an integral of h(x).
 70 |      *
 71 |      * Note the numerator is one less than in the paper order to work with all
 72 |      * positive q.
 73 |      */
 74 |     const RealType H(const RealType x)
 75 |     {
 76 |         const RealType log_x = std::log(x);
 77 |         return expxm1bx((1.0 - q) * log_x) * log_x;
 78 |     }
 79 | 
 80 |     /** log(1 + x) / x */
 81 |     static RealType
 82 |     log1pxbx(const RealType x)
 83 |     {
 84 |         return (std::abs(x) > epsilon)
 85 |             ? std::log1p(x) / x
 86 |             : 1.0 - x * ((1/2.0) - x * ((1/3.0) - x * (1/4.0)));
 87 |     }
 88 | 
 89 |     /** The inverse function of H(x) */
 90 |     const RealType H_inv(const RealType x)
 91 |     {
 92 |         const RealType t = std::max(-1.0, x * (1.0 - q));
 93 |         return std::exp(log1pxbx(t) * x);
 94 |     }
 95 | 
 96 |     /** That hat function h(x) = 1 / (x ^ q) */
 97 |     const RealType h(const RealType x)
 98 |     {
 99 |         return std::exp(-q * std::log(x));
100 |     }
101 | 
102 |     static constexpr RealType epsilon = 1e-8;
103 | 
104 |     IntType                                  n;     ///< Number of elements
105 |     RealType                                 q;     ///< Exponent
106 |     RealType                                 H_x1;  ///< H(x_1)
107 |     RealType                                 H_n;   ///< H(n)
108 |     std::uniform_real_distribution<RealType> dist;  ///< [H(x_1), H(n)]
109 | };


--------------------------------------------------------------------------------
/worker/communication.hpp:
--------------------------------------------------------------------------------
 1 | /**                                                                                                      
 2 |  * CoRM: Compactable Remote Memory over RDMA
 3 |  * 
 4 |  * Types for server-client communication
 5 |  *
 6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 9 |  * 
10 |  */
11 | 
12 | 
13 | #pragma once 
14 | 
15 |  
16 | #include "../common/common.hpp"
17 | 
18 | struct request_t{
19 |     uint8_t type;
20 |     uint8_t version;
21 |     uint32_t size;
22 |     uint32_t req_id;
23 |     client_addr_t addr;
24 | };
25 | 
26 | enum RequestType: uint8_t
27 | {
28 |     READ = 1,
29 |     WRITE,
30 |     WRITEATOMIC,
31 |     ALLOC,
32 |     FREE,
33 |     FIXPOINTER,
34 |     COMPACT, // for debugging and benchmarking
35 |     DISCONNECT
36 | };
37 | 
38 | struct message_header_t{
39 |     uint8_t thread_id;   // destination/source lid
40 |     uint8_t type;        // type of the message
41 | };
42 | 
43 | struct reply_t{
44 |     uint8_t version;
45 |     uint8_t status;
46 |     client_addr_t ret_addr;
47 |     uint32_t data_size;
48 |     uint32_t id;
49 | };
50 | 
51 |  


--------------------------------------------------------------------------------
/worker/generic_worker.hpp:
--------------------------------------------------------------------------------
 1 | /**                                                                                                      
 2 |  * CoRM: Compactable Remote Memory over RDMA
 3 |  * 
 4 |  * Interfaces for an IO-enabled thread
 5 |  *
 6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
 7 |  * 
 8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
 9 |  * 
10 |  */
11 | #pragma once 
12 | 
13 | 
14 | typedef void (*io_cb)(uint32_t id, void* ctx);
15 | class IOWatcher{
16 | public:
17 |     virtual void install_io(int fd, io_cb cb, void* ctx ) = 0;
18 |     virtual void stop_io(uint32_t io_id) = 0;
19 |     virtual ~IOWatcher() = default;
20 | };
21 | 
22 | 
23 | class GenericWorker {
24 | 
25 | 
26 | protected:
27 | 
28 |     uint8_t local_thread_id; 
29 |     IOWatcher* local_io_watcher;
30 |     
31 | public:
32 |     virtual ~GenericWorker() = default;
33 | 
34 |     // Allocate a block.
35 |     virtual void main_cb() = 0;
36 | 
37 |     virtual void sometimes_cb() = 0;
38 | 
39 |     virtual void set_io_watcher(IOWatcher *w){
40 |         this->local_io_watcher = w;
41 |     }
42 | 
43 |     virtual void set_thread_id(uint8_t id){
44 |         this->local_thread_id = id;
45 |     }
46 | 
47 |     virtual void print_stats() = 0;
48 | 
49 | };
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/workload_readwrite.cpp:
--------------------------------------------------------------------------------
  1 | /**                                                                                                      
  2 |  * CoRM: Compactable Remote Memory over RDMA
  3 |  * 
  4 |  * Various read/write workload for CoRM
  5 |  *
  6 |  * Copyright (c) 2020-2021 ETH-Zurich. All rights reserved.
  7 |  * 
  8 |  * Author(s): Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
  9 |  * 
 10 |  */
 11 | #include <iostream>       // std::cout
 12 | #include <thread>
 13 | #include <chrono> 
 14 | #include <vector>
 15 | #include <cstdlib>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <cassert>
 19 | #include <stdlib.h>
 20 | #include <csignal>
 21 | #include <fstream>
 22 | #include <algorithm>
 23 | #include <atomic>
 24 |  
 25 | FILE *log_fp;
 26 |  
 27 | 
 28 | #include "worker/client_api.hpp"
 29 | #include "rdma/connectRDMA.hpp"
 30 | #include "utilities/zipf.hpp"
 31 | #include "utilities/ycsb.hpp"
 32 | 
 33 | #include "utilities/cxxopts.hpp"
 34 |  
 35 | 
 36 | using ReadFuncPtr = int (RemoteMemoryClient::*)( LocalObjectHandler* obj,  char* buffer, uint32_t length );
 37 | 
 38 | std::atomic<int> order(0);
 39 | 
 40 | uint64_t num;
 41 | 
 42 | cxxopts::ParseResult
 43 | parse(int argc, char* argv[])
 44 | {
 45 |   cxxopts::Options options(argv[0], "Read write workload for CoRM");
 46 |   options
 47 |     .positional_help("[optional args]")
 48 |     .show_positional_help();
 49 | 
 50 |   try
 51 |   {
 52 | 
 53 |     options.add_options()
 54 |       ("server", "Another address", cxxopts::value<std::string>(), "IP")
 55 |       ("i,input", "input file", cxxopts::value<std::string>()->default_value("test.bin"), "FILE")
 56 |       ("t,threads", "the number of threads", cxxopts::value<uint32_t>()->default_value(std::to_string(1)), "N")
 57 |       ("target", "expected rate ops/sec", cxxopts::value<uint64_t>()->default_value(std::to_string(1000)), "N")
 58 |       ("p,prob", "Probability of read", cxxopts::value<float>()->default_value(std::to_string(0.5f)), "N")
 59 |       ("seed", "seed", cxxopts::value<int>()->default_value(std::to_string(3)), "N")
 60 |       ("zipf", "use zipf distribution as in YSCB")
 61 |       ("rdmaread", "Use one-sided reads")
 62 |       ("n,num", "Number of requests to run", cxxopts::value<uint64_t>()->default_value("123"), "N")
 63 |       ("help", "Print help")
 64 |      ;
 65 |  
 66 |     auto result = options.parse(argc, argv);
 67 | 
 68 |     if (result.count("help"))
 69 |     {
 70 |       std::cout << options.help({""}) << std::endl;
 71 |       exit(0);
 72 |     }
 73 | 
 74 |     if (!result.count("server"))
 75 |     {
 76 |       throw cxxopts::OptionException("input must be specified");
 77 |     }
 78 | 
 79 |  
 80 | 
 81 |     return result;
 82 | 
 83 |   } catch (const cxxopts::OptionException& e)
 84 |   {
 85 |     std::cout << "error parsing options: " << e.what() << std::endl;
 86 |     std::cout << options.help({""}) << std::endl;
 87 |     exit(1);
 88 |   }
 89 | }
 90 |  
 91 | 
 92 | 
 93 | void workload_worker(int threadid, VerbsEP *ep,  ReadFuncPtr readfunc, LocalObjectHandler *objects_orig, uint32_t  NN, bool is_zipf, int seed, float read_prob, uint64_t target){
 94 |     RemoteMemoryClient* api = new RemoteMemoryClient(0,ep); 
 95 |  
 96 |     Trace *trace = nullptr; 
 97 |     if (is_zipf)
 98 |     {
 99 |         trace = new YCSB(seed,read_prob,NN,0.99);
100 |     }
101 |     else
102 |     {
103 |         trace = new Uniform(seed,read_prob,NN);
104 |     }
105 | 
106 |     LocalObjectHandler *objects =  (LocalObjectHandler*)malloc(NN*sizeof(LocalObjectHandler));
107 |     memcpy((char*)objects,(char*)objects_orig,NN*sizeof(LocalObjectHandler));
108 |  
109 |     uint32_t size = objects[0].requested_size;
110 |     char* buffer = (char*)malloc(size);
111 | 
112 | 
113 |     std::chrono::seconds sec(1);
114 |     uint64_t nanodelay =  std::chrono::nanoseconds(sec).count() / target ; // per request
115 |     auto starttime = std::chrono::high_resolution_clock::now();
116 |     
117 |     uint32_t interval = 2560;
118 | 
119 |     std::vector<uint64_t> request_bw;
120 |     request_bw.reserve(1024);
121 | #ifdef LATENCY
122 |     std::vector<uint64_t> request_latency;
123 |     request_latency.reserve(num);
124 | #endif 
125 |     uint32_t conflicts = 0;
126 |     auto bwt1 = std::chrono::high_resolution_clock::now();
127 |     uint32_t count = 0;
128 |     for(uint64_t i=0; i<num; i++)
129 |     {
130 |         auto req = trace->get_next();
131 |         LocalObjectHandler* obj = &objects[req.first];
132 |         assert(obj!=nullptr && "object cannot be null");
133 | 
134 | 
135 | #ifdef LATENCY
136 |         auto t1 = std::chrono::high_resolution_clock::now();
137 | #endif 
138 | 
139 |         // the following piece of code were used to incur Indirect pointers
140 | //        uint64_t direct_addr = obj->addr.comp.addr;
141 | //	uint64_t base_addr = GetVirtBaseAddr(obj->addr.comp.addr);
142 | //        if(direct_addr == base_addr){
143 | //             base_addr += 32;
144 | //        }
145 | //        obj->addr.comp.addr = base_addr;
146 |         if(req.second == 'r'){
147 | 
148 |             int ret = (api->*readfunc)(obj, buffer, size);
149 |             if(ret<0){
150 |               conflicts++; 
151 |             }
152 |           //  api->Read(obj, buffer, size);
153 |         } else {
154 |             api->Write(obj, buffer, size, false);
155 |         }
156 | #ifdef LATENCY
157 |         auto t2 = std::chrono::high_resolution_clock::now();
158 |         request_latency.push_back( std::chrono::duration_cast< std::chrono::nanoseconds >( t2 - t1 ).count() );
159 | #endif 
160 |         count++;
161 |         if(count > interval){
162 |             auto bwt2 = std::chrono::high_resolution_clock::now();
163 |             request_bw.push_back(std::chrono::duration_cast<std::chrono::microseconds>(bwt2 - bwt1).count());
164 |             bwt1 = bwt2; 
165 |             count=0;  
166 |         }
167 | 
168 | 
169 |         auto const sleep_end_time =  starttime + std::chrono::nanoseconds(nanodelay*i);
170 |         while (std::chrono::high_resolution_clock::now() < sleep_end_time){
171 |             // nothing
172 |         }
173 |     }
174 |     auto endtime = std::chrono::high_resolution_clock::now();
175 | 
176 |  
177 |     while(order.load() != threadid ){
178 | 
179 |     }
180 | 
181 |     printf("Data thread #%u: \n",threadid);
182 |     printf("throughput(Kreq/sec): ");
183 |     for(auto &x : request_bw){
184 |         printf("%.2f ",(interval*1000.0)/x);
185 |     }
186 | #ifdef LATENCY
187 |     printf("latency(us): ");
188 |     for(auto &x : request_latency){
189 |         printf("%.2f ",x/1000.0);
190 |     }
191 | #endif 
192 |     printf("\nFinished workload in %lu ms with %u conflicts\n", std::chrono::duration_cast< std::chrono::milliseconds >( endtime  - starttime ).count(), conflicts );
193 |      
194 |     order++;
195 | 
196 |     return;
197 | }
198 |  
199 |  
200 | int main(int argc, char* argv[]){
201 |  
202 | 
203 |     auto allparams = parse(argc,argv);
204 |  
205 |     log_fp=stdout;  
206 | 
207 |     std::string server = allparams["server"].as<std::string>();
208 |     std::string input = allparams["input"].as<std::string>();
209 |     uint64_t target = allparams["target"].as<uint64_t>();
210 |     uint32_t threads = allparams["threads"].as<uint32_t>();
211 |     num = allparams["num"].as<uint64_t>();
212 |     float read_prob = allparams["prob"].as<float>();
213 |     int seed = allparams["seed"].as<int>();
214 | 
215 |     ClientRDMA rdma((char*)server.c_str(),9999);
216 |     struct rdma_cm_id * id = rdma.sendConnectRequest();
217 | 
218 |     struct ibv_pd * pd = ClientRDMA::create_pd(id);
219 | 
220 |     struct ibv_qp_init_attr attr;
221 |     struct rdma_conn_param conn_param;
222 |     memset(&attr, 0, sizeof(attr));
223 |     attr.cap.max_send_wr = 32;
224 |     attr.cap.max_recv_wr = 32;
225 |     attr.cap.max_send_sge = 1;
226 |     attr.cap.max_recv_sge = 1;
227 |     attr.cap.max_inline_data = 0;
228 |     attr.qp_type = IBV_QPT_RC;
229 | 
230 |     attr.send_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
231 |     attr.recv_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
232 |  
233 |     memset(&conn_param, 0 , sizeof(conn_param));
234 |     conn_param.responder_resources = 0;
235 |     conn_param.initiator_depth = 5;
236 |     conn_param.retry_count = 3;
237 |     conn_param.rnr_retry_count = 3; 
238 | 
239 |     std::vector<VerbsEP*> conns; 
240 | 
241 |     conns.push_back(ClientRDMA::connectEP(id, &attr, &conn_param, pd));
242 | 
243 |     for(uint32_t i = 1 ; i < threads; i++){
244 |         struct rdma_cm_id * tid = rdma.sendConnectRequest();
245 |         attr.send_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
246 |         attr.recv_cq = ibv_create_cq(pd->context, 32, NULL, NULL, 0);
247 |         conns.push_back(ClientRDMA::connectEP(tid, &attr, &conn_param, pd));
248 |     }
249 |     
250 |     if(threads>1){
251 |         assert(conns[0]->qp->send_cq != conns[1]->qp->send_cq && "Different connections must use Different CQ") ;
252 |     }
253 |  
254 |     printf("Connected\n");
255 |     sleep(1);
256 | 
257 |     std::fstream fout;
258 | //    printf("File name %s \n",input.c_str());
259 |     fout.open(input.c_str(), std::ios::in|std::ios::binary);
260 |     uint32_t NN = 0;
261 | 
262 |     fout.read((char*)&NN,sizeof(NN));
263 |  
264 |     LocalObjectHandler *objects;
265 |     objects =  (LocalObjectHandler*)malloc(NN*sizeof(LocalObjectHandler));
266 | 
267 |     for(uint32_t i = 0; i < NN; i++){
268 |         LocalObjectHandler* obj = &objects[i];
269 |         fout.read((char*)obj,sizeof(LocalObjectHandler));
270 |       //  obj->print();
271 |     }
272 |     fout.close();
273 |     printf("Finished reading %u objects from file\n", NN);
274 | 
275 |  
276 |     ReadFuncPtr readfunc = nullptr;
277 |     if(allparams.count("rdmaread")){
278 |         readfunc = &RemoteMemoryClient::ReadOneSided;
279 |     }else {
280 |         readfunc = &RemoteMemoryClient::Read;
281 |     }
282 | 
283 |     std::vector<std::thread> workers;
284 | 
285 |     for(int i = 0; i < (int)threads; i++){
286 |         workers.push_back(std::thread(workload_worker,i,conns[i],readfunc, objects, NN, allparams.count("zipf"), seed + i, read_prob,target));
287 |     }
288 | 
289 |     for (auto& th : workers) th.join();
290 |  
291 |     return 0;
292 | }
293 | 


--------------------------------------------------------------------------------