├── .clang-format ├── .gitignore ├── CMakeLists.txt ├── README.org ├── backend ├── .#CMakeLists.txt ├── CMakeLists.txt └── nam │ ├── Compute.cpp │ ├── Compute.hpp │ ├── Config.cpp │ ├── Config.hpp │ ├── NAM.cpp │ ├── NAM.hpp │ ├── Storage.cpp │ ├── Storage.hpp │ ├── profiling │ ├── CounterRegistry.hpp │ ├── ProfilingThread.hpp │ └── counters │ │ ├── CPUCounters.cpp │ │ ├── CPUCounters.hpp │ │ ├── RDMACounters.cpp │ │ ├── RDMACounters.hpp │ │ ├── WorkerCounters.cpp │ │ └── WorkerCounters.hpp │ ├── rdma │ └── CommunicationManager.hpp │ ├── syncprimitives │ ├── HybridLatch.hpp │ └── SpinLock.hpp │ ├── threads │ ├── Concurrency.hpp │ ├── CoreManager.hpp │ ├── ThreadContext.hpp │ ├── Worker.cpp │ ├── Worker.hpp │ ├── WorkerPool.cpp │ └── WorkerPool.hpp │ └── utils │ ├── BatchQueue.hpp │ ├── FNVHash.cpp │ ├── FNVHash.hpp │ ├── MPMCQueue.hpp │ ├── MemoryManagement.hpp │ ├── Parallelize.cpp │ ├── Parallelize.hpp │ ├── PerfEvent.hpp │ ├── RandomGenerator.cpp │ ├── RandomGenerator.hpp │ ├── ScrambledZipfGenerator.cpp │ ├── ScrambledZipfGenerator.hpp │ ├── Time.cpp │ ├── Time.hpp │ ├── ZipfGenerator.cpp │ ├── ZipfGenerator.hpp │ ├── ZipfRejectionInversion.hpp │ ├── crc64.cpp │ ├── crc64.hpp │ ├── crcspeed.cpp │ └── crcspeed.hpp ├── distexperiments ├── distexprunner │ ├── .gitignore │ ├── .vscode │ │ └── settings.json │ ├── README.md │ ├── client.py │ ├── client_demo.gif │ ├── distexprunner │ │ ├── __init__.py │ │ ├── _client_impl.py │ │ ├── _client_interface.py │ │ ├── _exceptions.py │ │ ├── _progressbar.py │ │ ├── _resume_manager.py │ │ ├── _rpc.py │ │ ├── _server_impl.py │ │ ├── _server_interface.py │ │ ├── enums.py │ │ ├── experiment_client.py │ │ ├── experiment_server.py │ │ ├── notification.py │ │ ├── outputs.py │ │ ├── parameter_grid.py │ │ ├── registry.py │ │ ├── server.py │ │ ├── server_list.py │ │ ├── stdin_controller.py │ │ └── utils.py │ ├── examples │ │ ├── basic.py │ │ ├── buffered_stdout.py │ │ ├── compile.py │ │ ├── config.py │ │ ├── cpu_load.py │ │ ├── csv_parser.py │ │ ├── cwd.py │ │ ├── dpkg_query.py │ │ ├── environment_variables.py │ │ ├── gdb.py │ │ ├── restart.py │ │ ├── serverlist_ops.py │ │ ├── simple_grid.py │ │ └── timeout.py │ ├── server.py │ ├── tests │ │ └── progressbar_test.py │ └── v1_compatibility │ │ ├── config.py │ │ ├── experiment.py │ │ └── old_example.py ├── examples │ ├── basic.py │ ├── buffered_stdout.py │ ├── compile.py │ ├── config.py │ ├── cpu_load.py │ ├── csv_parser.py │ ├── cwd.py │ ├── dpkg_query.py │ ├── environment_variables.py │ ├── gdb.py │ ├── restart.py │ ├── serverlist_ops.py │ ├── simple_grid.py │ └── timeout.py └── experiments │ ├── .#read_benchmark.py │ ├── alignment.py │ ├── alignment_hypo.py │ ├── atomic_cas.py │ ├── atomic_fa.py │ ├── atomic_nocontention.py │ ├── batched_atomics.py │ ├── batched_reads.py │ ├── broken_reads.py │ ├── btree.py │ ├── config.py │ ├── contention_reads_atomics.py │ ├── locking_ablation.py │ ├── locking_benchmark.py │ ├── locking_benchmark_sleep.py │ ├── nam_experiment.py │ ├── no_locking_benchmark.py │ ├── opt_btree.py │ ├── optdb_debug.py │ ├── optdb_experiment.py │ ├── optdb_nambaseline.py │ ├── optimistic_scaleout.py │ ├── optimistic_single_threaded_new.py │ ├── optimistic_zipf.py │ ├── optmistic_st.py │ ├── read_benchmark.py │ ├── sleep_effect_compute.py │ ├── sleep_effect_storage.py │ └── sleep_effect_two_cnics.py ├── frontend ├── BenchmarkHelper.hpp ├── CMakeLists.txt ├── OptimisticLocks.hpp ├── atomic_alignment.cpp ├── atomic_alignment_ws.cpp ├── atomic_benchmark.cpp ├── atomic_visibility.cpp ├── batched_atomics.cpp ├── batched_reads.cpp ├── broken_remote_write.cpp ├── btree.cpp ├── cas_benchmark.cpp ├── contention_reads_atomics.cpp ├── exception_hack.hpp ├── fa_benchmark.cpp ├── frontend.cpp ├── lock.cpp ├── locking_benchmark.cpp ├── locking_benchmark_tail.cpp ├── nam_experiment.cpp ├── no_locking_benchmark.cpp ├── opt_btree.cpp ├── optdb_experiment.cpp ├── optimistic_st.cpp ├── optmistic_benchmark.cpp ├── ordering_fixed.cpp ├── pause_effect.cpp ├── read_benchmark.cpp └── torn_writes.cpp ├── libs ├── FindNuma.cmake ├── gflags.cmake ├── ibverbs.cmake └── rdmacm.cmake ├── shared-headers ├── Defs.hpp ├── PerfEvent.hpp ├── Tabulate.hpp └── local.cmake └── vendor └── gflags ├── src └── gflags_src-stamp │ └── gflags_src-gitinfo.txt └── tmp ├── gflags_src-cfgcmd.txt ├── gflags_src-cfgcmd.txt.in └── gflags_src-gitclone.cmake /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Chromium 3 | BreakBeforeBraces: Linux 4 | SpaceInEmptyParentheses: 'false' 5 | ColumnLimit: 140 6 | IndentWidth: 3 7 | AllowShortBlocksOnASingleLine: true 8 | AllowShortIfStatementsOnASingleLine: true 9 | BreakBeforeBraces: Custom 10 | ... 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | bin/ 3 | build*/ 4 | cmake-*/ 5 | .DS_Store 6 | a.out 7 | data/ 8 | log/ 9 | adhoc/ 10 | #release/ 11 | #debug/ 12 | log.txt 13 | git-ignore 14 | *# 15 | debug* 16 | release* 17 | clang* 18 | paper* 19 | .dir-locals.el 20 | compile_commands.json 21 | CMakeFiles/ 22 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------- 2 | # NAM DB 3 | # --------------------------------------------------------------------------- 4 | 5 | project(namdb) 6 | cmake_minimum_required(VERSION 3.7) 7 | 8 | # --------------------------------------------------------------------------- 9 | # Environment 10 | # --------------------------------------------------------------------------- 11 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") 12 | set(CMAKE_CXX_STANDARD 17) 13 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -g") 15 | 16 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") 17 | add_compile_options(-Wall -Wextra -Werror -fnon-call-exceptions -fasynchronous-unwind-tables -mavx2 -mcx16 -m64) # -fno-elide-constructors no need for now 18 | else() 19 | add_compile_options(-Wall -Wextra -march=native) 20 | endif() 21 | 22 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") 23 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") 24 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 25 | 26 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 27 | message("${Red}!ATTENTION: debug mode enabled! ${ColourReset}") 28 | else() 29 | message("${BoldGreen}Release mode: ${ColourReset} " ${CMAKE_BUILD_TYPE}) 30 | endif (CMAKE_BUILD_TYPE STREQUAL "Debug") 31 | 32 | 33 | find_package(Threads REQUIRED) 34 | set(THREADS_PREFER_PTHREAD_FLAG ON) 35 | 36 | # --------------------------------------------------------------------------- 37 | # Includes 38 | # --------------------------------------------------------------------------- 39 | include("${CMAKE_SOURCE_DIR}/shared-headers/local.cmake") 40 | include("${CMAKE_SOURCE_DIR}/libs/gflags.cmake") 41 | # --------------------------------------------------------------------------- 42 | # Sources 43 | # --------------------------------------------------------------------------- 44 | 45 | add_subdirectory("backend") 46 | 47 | # --------------------------------------------------------------------------- 48 | # Executable 49 | # --------------------------------------------------------------------------- 50 | 51 | add_subdirectory("frontend") 52 | -------------------------------------------------------------------------------- /backend/.#CMakeLists.txt: -------------------------------------------------------------------------------- 1 | tobias@tobias-MS-7B00.3680:1663153370 -------------------------------------------------------------------------------- /backend/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------- 2 | # NAM-DB 3 | # --------------------------------------------------------------------------- 4 | project(nam) 5 | 6 | # --------------------------------------------------------------------------- 7 | # Files 8 | # --------------------------------------------------------------------------- 9 | file(GLOB_RECURSE NAM_CC **.cpp **/**.cpp **.hpp **/**.hpp) 10 | if (NOT UNIX) 11 | message(SEND_ERROR "unsupported platform") 12 | endif () 13 | 14 | # --------------------------------------------------------------------------- 15 | # Library 16 | # --------------------------------------------------------------------------- 17 | add_library(nam STATIC ${NAM_CC}) 18 | 19 | OPTION(SANI "Compile nam with sanitizers" OFF) 20 | IF(SANI) 21 | if (CMAKE_BUILD_TYPE MATCHES Debug) 22 | message("Compiling with Sanitizers") 23 | target_compile_options(nam PUBLIC -fsanitize=address) 24 | target_link_libraries(nam asan) 25 | endif () 26 | ENDIF(SANI) 27 | 28 | target_link_libraries(nam gflags Threads::Threads atomic numa rdmacm ibverbs aio) 29 | target_include_directories(nam PUBLIC ${SHARED_INCLUDE_DIRECTORY}) 30 | target_include_directories(nam PRIVATE ${CMAKE_CURRENT_LIST_DIR}) 31 | # --------------------------------------------------------------------------- 32 | set(NAM_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}) 33 | set_property(TARGET nam APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${NAM_INCLUDE_DIR}) 34 | -------------------------------------------------------------------------------- /backend/nam/Compute.cpp: -------------------------------------------------------------------------------- 1 | #include "Compute.hpp" 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace nam { 11 | Compute::Compute() { 12 | cm = std::make_unique>(); 13 | rdmaCounters = std::make_unique(); 14 | workerPool = std::make_unique(*cm, 0); 15 | } 16 | 17 | Compute::~Compute() { 18 | workerPool.reset(); 19 | } 20 | } // namespace nam 21 | -------------------------------------------------------------------------------- /backend/nam/Compute.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "profiling/ProfilingThread.hpp" 4 | #include "profiling/counters/RDMACounters.hpp" 5 | #include "rdma/CommunicationManager.hpp" 6 | #include "threads/CoreManager.hpp" 7 | #include "threads/WorkerPool.hpp" 8 | #include "nam/utils/RandomGenerator.hpp" 9 | // ------------------------------------------------------------------------------------- 10 | #include 11 | 12 | namespace nam 13 | { 14 | // ------------------------------------------------------------------------------------- 15 | class Compute 16 | { 17 | 18 | public: 19 | //! Default constructor 20 | Compute(); 21 | //! Destructor 22 | ~Compute(); 23 | // ------------------------------------------------------------------------------------- 24 | // Deleted constructors 25 | //! Copy constructor 26 | Compute(const Compute& other) = delete; 27 | //! Move constructor 28 | Compute(Compute&& other) noexcept = delete; 29 | //! Copy assignment operator 30 | Compute& operator=(const Compute& other) = delete; 31 | //! Move assignment operator 32 | Compute& operator=(Compute&& other) noexcept = delete; 33 | // ------------------------------------------------------------------------------------- 34 | rdma::CM& getCM() { return *cm; } 35 | // ------------------------------------------------------------------------------------- 36 | threads::WorkerPool& getWorkerPool(){ 37 | return *workerPool; 38 | } 39 | // ------------------------------------------------------------------------------------- 40 | void startProfiler(profiling::WorkloadInfo& wlInfo) { 41 | pt.running = true; 42 | profilingThread.emplace_back(&profiling::ProfilingThread::profile, &pt, 0, std::ref(wlInfo)); 43 | } 44 | // ------------------------------------------------------------------------------------- 45 | void stopProfiler() 46 | { 47 | if (pt.running == true) { 48 | pt.running = false; 49 | for (auto& p : profilingThread) 50 | p.join(); 51 | profilingThread.clear(); 52 | } 53 | std::locale::global(std::locale("C")); // hack to restore locale which is messed up in tabulate package 54 | }; 55 | 56 | private: 57 | std::unique_ptr> cm; 58 | std::unique_ptr rdmaCounters; 59 | profiling::ProfilingThread pt; 60 | std::vector profilingThread; 61 | std::unique_ptr workerPool; 62 | }; 63 | // ------------------------------------------------------------------------------------- 64 | } // namespace scalestore 65 | -------------------------------------------------------------------------------- /backend/nam/Config.cpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------------------------- 2 | #include "gflags/gflags.h" 3 | // ------------------------------------------------------------------------------------- 4 | DEFINE_double(dramGB, 1, "DRAM buffer pool size"); 5 | DEFINE_uint64(worker,1, "Number worker threads"); 6 | DEFINE_uint64(all_worker,1, "number of all worker threads in the cluster for barrier"); 7 | DEFINE_uint64(batchSize, 100, "batch size in free lists"); 8 | DEFINE_uint64(pageProviderThreads, 2, " Page Provider threads must be power two"); 9 | DEFINE_double(freePercentage, 1, "Percentage free for PP"); 10 | DEFINE_uint64(coolingPercentage, 10 , "Percentage cooling for PP"); 11 | DEFINE_double(evictCoolestEpochs, 0.1, "Percentage of coolest epchos choosen for eviction"); 12 | DEFINE_bool(csv, true , "If written to csv file or not"); 13 | DEFINE_string(csvFile, "stats.csv" , "filename for profiling output"); 14 | DEFINE_string(tag,"","descirption of experiment"); 15 | DEFINE_uint32(partitionBits, 6, "bits per partition"); 16 | DEFINE_uint32(page_pool_partitions, 8, "page pool partitions each is shifted by 512 byte to increase cache associativity"); 17 | // ------------------------------------------------------------------------------------- 18 | DEFINE_bool(backoff, true, "backoff enabled"); 19 | // ------------------------------------------------------------------------------------- 20 | DEFINE_bool(storage_node, false, "storage node"); 21 | DEFINE_uint64(storage_nodes, 1,"Number nodes participating"); 22 | DEFINE_double(rdmaMemoryFactor, 1.1, "Factor to be multiplied by dramGB"); // factor to be multiplied by dramGB 23 | DEFINE_uint32(port, 7174, "port"); 24 | DEFINE_string(ownIp, "172.18.94.80", "own IP server"); 25 | // ------------------------------------------------------------------------------------- 26 | DEFINE_uint64(pollingInterval, 16, " Number of unsignaled messages before a signaled (power of 2)"); 27 | DEFINE_bool(read, true, "read protocol"); 28 | DEFINE_bool(random, false, "use random pages"); 29 | DEFINE_uint64(messageHandlerThreads, 4, " number message handler "); 30 | DEFINE_uint64(messageHandlerMaxRetries, 10, "Number retries before message gets restarted at client"); // prevents deadlocks but also mitigates early aborts 31 | // ------------------------------------------------------------------------------------- 32 | DEFINE_uint32(sockets, 2 , "Number Sockets"); 33 | DEFINE_uint32(socket, 0, " Socket we are running on"); 34 | DEFINE_bool(pinThreads, true, " Pin threads"); 35 | DEFINE_bool(cpuCounters,true, " CPU counters profiling "); 36 | -------------------------------------------------------------------------------- /backend/nam/Config.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "gflags/gflags.h" 4 | // ------------------------------------------------------------------------------------- 5 | // Buffermanager Config 6 | // ------------------------------------------------------------------------------------- 7 | DECLARE_double(dramGB); 8 | DECLARE_uint64(worker); 9 | DECLARE_uint64(all_worker); 10 | DECLARE_uint64(batchSize); 11 | DECLARE_uint64(pageProviderThreads); 12 | DECLARE_double(freePercentage); 13 | DECLARE_uint64(coolingPercentage); 14 | DECLARE_double(evictCoolestEpochs); 15 | DECLARE_bool(csv); 16 | DECLARE_string(csvFile); 17 | DECLARE_string(tag); 18 | DECLARE_uint32(partitionBits); 19 | DECLARE_uint32(page_pool_partitions); 20 | /// ------------------------------------------------------------------------------------- 21 | // CONTENTION 22 | // ------------------------------------------------------------------------------------- 23 | DECLARE_bool(backoff); 24 | // ------------------------------------------------------------------------------------- 25 | // RDMA Config 26 | // ------------------------------------------------------------------------------------- 27 | DECLARE_bool(storage_node); 28 | DECLARE_uint64(storage_nodes); 29 | DECLARE_string(ownIp); 30 | DECLARE_double(rdmaMemoryFactor); // factor to be multiplied by dramGB 31 | DECLARE_uint32(port); 32 | DECLARE_uint64(pollingInterval); 33 | DECLARE_bool(read); 34 | DECLARE_bool(random); 35 | DECLARE_uint64(messageHandlerThreads); 36 | DECLARE_uint64(messageHandlerMaxRetries); 37 | 38 | // ------------------------------------------------------------------------------------- 39 | // Server Specific Part 40 | // ------------------------------------------------------------------------------------- 41 | DECLARE_uint32(sockets); 42 | DECLARE_uint32(socket); 43 | DECLARE_bool(pinThreads); 44 | DECLARE_bool(cpuCounters); 45 | -------------------------------------------------------------------------------- /backend/nam/NAM.cpp: -------------------------------------------------------------------------------- 1 | #include "NAM.hpp" 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | // ------------------------------------------------------------------------------------- 10 | namespace nam { 11 | NAM::NAM() { 12 | // ------------------------------------------------------------------------------------- 13 | // find node id 14 | if (FLAGS_storage_nodes != 1) { 15 | for (; nodeId < FLAGS_storage_nodes; nodeId++) { 16 | if (FLAGS_ownIp == STORAGE_NODES[FLAGS_storage_nodes][nodeId]) break; 17 | } 18 | } else { 19 | nodeId = 0; // fix to allow single node use on all nodes 20 | } 21 | ensure(nodeId < FLAGS_storage_nodes); 22 | // ------------------------------------------------------------------------------------- 23 | // order of construction is important 24 | cm = std::make_unique>(); 25 | rdmaCounters = std::make_unique(); 26 | } 27 | 28 | NAM::~NAM() { 29 | stopProfiler(); 30 | } 31 | } // namespace nam 32 | -------------------------------------------------------------------------------- /backend/nam/Storage.cpp: -------------------------------------------------------------------------------- 1 | #include "Storage.hpp" 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | // ------------------------------------------------------------------------------------- 10 | namespace nam { 11 | Storage::Storage() { 12 | // ------------------------------------------------------------------------------------- 13 | // find node id 14 | if (FLAGS_storage_nodes != 1) { 15 | for (; nodeId < FLAGS_storage_nodes; nodeId++) { 16 | if (FLAGS_ownIp == STORAGE_NODES[FLAGS_storage_nodes][nodeId]) break; 17 | } 18 | } else { 19 | nodeId = 0; // fix to allow single node use on all nodes 20 | } 21 | ensure(nodeId < FLAGS_storage_nodes); 22 | // ------------------------------------------------------------------------------------- 23 | // order of construction is important 24 | cm = std::make_unique>(); 25 | rdmaCounters = std::make_unique(); 26 | } 27 | 28 | Storage::~Storage() { 29 | stopProfiler(); 30 | } 31 | } // namespace nam 32 | -------------------------------------------------------------------------------- /backend/nam/profiling/CounterRegistry.hpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------------------------- 2 | #include "counters/WorkerCounters.hpp" 3 | #include "counters/CPUCounters.hpp" 4 | #include "counters/RDMACounters.hpp" 5 | // ------------------------------------------------------------------------------------- 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | // ------------------------------------------------------------------------------------- 15 | namespace nam { 16 | namespace profiling { 17 | 18 | struct CounterRegistry{ 19 | 20 | // ------------------------------------------------------------------------------------- 21 | static CounterRegistry& getInstance(){ 22 | static CounterRegistry instance; 23 | return instance; 24 | } 25 | // ------------------------------------------------------------------------------------- 26 | void aggregateWorkerCounters(std::vector& counters){ 27 | std::unique_lock guard(workerMutex); 28 | for (auto* c_ptr : workerCounters) { 29 | for (uint64_t c_i = 0; c_i < WorkerCounters::COUNT; c_i++) 30 | counters[c_i] += c_ptr->counters[c_i].exchange(0); 31 | } 32 | }; 33 | 34 | void registerWorkerCounter(WorkerCounters* counter){ 35 | std::unique_lock guard(workerMutex); 36 | workerCounters.push_back(counter); 37 | } 38 | 39 | 40 | void deregisterWorkerCounter(WorkerCounters* counter){ 41 | std::unique_lock guard(workerMutex); 42 | workerCounters.erase(std::remove_if(begin(workerCounters), end(workerCounters), [&](WorkerCounters* c) { return (c == counter); }), 43 | end(workerCounters)); 44 | } 45 | // ------------------------------------------------------------------------------------- 46 | 47 | void aggregateCPUCounter(std::unordered_map& counters) 48 | { 49 | std::unique_lock guard(cpuMutex); 50 | for (auto* c_ptr : cpuCounters) { 51 | if (c_ptr->threadName.rfind("worker", 0) == 0) 52 | { 53 | c_ptr->e->stopCounters(); 54 | auto eMap = c_ptr->e->getCountersMap(); 55 | for(auto& [eName,eValue] : eMap){ 56 | if(std::isnan(eValue)){ 57 | counters[eName] += 0; 58 | continue; 59 | } 60 | counters[eName] += eValue; 61 | } 62 | c_ptr->e->startCounters(); 63 | } 64 | } 65 | } 66 | 67 | 68 | void registerCPUCounter(CPUCounters* counter){ 69 | std::unique_lock guard(cpuMutex); 70 | cpuCounters.push_back(counter); 71 | } 72 | 73 | void deregisterCPUCounter(CPUCounters* counter){ 74 | std::unique_lock guard(cpuMutex); 75 | cpuCounters.erase(std::remove_if(begin(cpuCounters), end(cpuCounters), [&](CPUCounters* c) { return (c == counter); }), 76 | end(cpuCounters)); 77 | } 78 | 79 | // ------------------------------------------------------------------------------------- 80 | void aggregateRDMACounters(std::vector& counters){ 81 | std::unique_lock guard(rdmaMutex); 82 | for (auto* c_ptr : rdmaCounters) { 83 | counters[RDMACounters::sentGB] = c_ptr->getSentGB(); 84 | counters[RDMACounters::recvGB] = c_ptr->getRecvGB(); 85 | } 86 | }; 87 | 88 | void registerRDMACounter(RDMACounters* counter){ 89 | std::unique_lock guard(rdmaMutex); 90 | rdmaCounters.push_back(counter); 91 | } 92 | 93 | void deregisterRDMACounter(RDMACounters* counter){ 94 | std::unique_lock guard(rdmaMutex); 95 | rdmaCounters.erase(std::remove_if(begin(rdmaCounters), end(rdmaCounters), [&](RDMACounters* c) { return (c == counter); }), 96 | end(rdmaCounters)); 97 | } 98 | 99 | 100 | // ------------------------------------------------------------------------------------- 101 | std::mutex workerMutex; 102 | std::vector workerCounters; 103 | // ------------------------------------------------------------------------------------- 104 | std::mutex cpuMutex; 105 | uint64_t cpuCounterId; 106 | std::vector cpuCounters; 107 | // ------------------------------------------------------------------------------------- 108 | std::mutex rdmaMutex; 109 | std::vector rdmaCounters; 110 | }; 111 | 112 | 113 | } // profiling 114 | } // nam 115 | -------------------------------------------------------------------------------- /backend/nam/profiling/counters/CPUCounters.cpp: -------------------------------------------------------------------------------- 1 | #include "CPUCounters.hpp" 2 | #include "../CounterRegistry.hpp" 3 | #include "nam/Config.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | namespace nam { 6 | namespace profiling { 7 | // ------------------------------------------------------------------------------------- 8 | CPUCounters::CPUCounters(std::string name): threadName(name){ 9 | if(FLAGS_cpuCounters){ 10 | e = std::make_unique(false); 11 | CounterRegistry::getInstance().registerCPUCounter(this); 12 | } 13 | } 14 | // ------------------------------------------------------------------------------------- 15 | CPUCounters::~CPUCounters(){ 16 | if(FLAGS_cpuCounters){ 17 | CounterRegistry::getInstance().deregisterCPUCounter(this); 18 | } 19 | } 20 | // ------------------------------------------------------------------------------------- 21 | } // profiling 22 | } // nam 23 | -------------------------------------------------------------------------------- /backend/nam/profiling/counters/CPUCounters.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "PerfEvent.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | #include 6 | // ------------------------------------------------------------------------------------- 7 | 8 | namespace nam { 9 | namespace profiling { 10 | // ------------------------------------------------------------------------------------- 11 | // wrapper to perfevent for nameing convention 12 | struct CPUCounters { 13 | std::string threadName; 14 | std::unique_ptr e; 15 | 16 | CPUCounters(std::string name); 17 | ~CPUCounters(); 18 | }; 19 | // ------------------------------------------------------------------------------------- 20 | } // profiling 21 | } // nam 22 | -------------------------------------------------------------------------------- /backend/nam/profiling/counters/RDMACounters.cpp: -------------------------------------------------------------------------------- 1 | #include "RDMACounters.hpp" 2 | #include "../CounterRegistry.hpp" 3 | // ------------------------------------------------------------------------------------- 4 | 5 | namespace nam { 6 | namespace profiling { 7 | 8 | RDMACounters::RDMACounters(): rdmaRecv(rdmaPathRecv), rdmaSent(rdmaPathXmit){ 9 | CounterRegistry::getInstance().registerRDMACounter(this); 10 | }; 11 | // ------------------------------------------------------------------------------------- 12 | RDMACounters::~RDMACounters(){ 13 | CounterRegistry::getInstance().deregisterRDMACounter(this); 14 | } 15 | // ------------------------------------------------------------------------------------- 16 | double RDMACounters::getSentGB(){ 17 | return (rdmaSent() / (double)(1024*1024*1024)); 18 | } 19 | // ------------------------------------------------------------------------------------- 20 | double RDMACounters::getRecvGB(){ 21 | return (rdmaRecv() / (double)(1024*1024*1024)); 22 | } 23 | } // profiling 24 | } // nam 25 | -------------------------------------------------------------------------------- /backend/nam/profiling/counters/RDMACounters.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Defs.hpp" 3 | // ------------------------------------------------------------------------------------- 4 | #include 5 | #include 6 | #include 7 | #include // std::ifstream 8 | // ------------------------------------------------------------------------------------- 9 | 10 | // ------------------------------------------------------------------------------------- 11 | namespace nam 12 | { 13 | namespace profiling 14 | { 15 | // ------------------------------------------------------------------------------------- 16 | struct RDMACounters { 17 | // ------------------------------------------------------------------------------------- 18 | 19 | struct RDMAEventFunctor { 20 | std::ifstream counterFile; 21 | std::string path; 22 | size_t value = 0; 23 | size_t prevValue = 0; 24 | RDMAEventFunctor(RDMAEventFunctor&& ref) = default; 25 | RDMAEventFunctor(std::string counterFile) : path(counterFile){}; 26 | ~RDMAEventFunctor(){}; 27 | 28 | // calculates difference between calls and returns in bytes 29 | std::size_t operator()() 30 | { 31 | 32 | counterFile.open(path); 33 | counterFile >> value; 34 | counterFile.close(); 35 | 36 | size_t bytes = value * 4; // 4 lanes 37 | double diff = (bytes -prevValue); 38 | prevValue = bytes; 39 | return diff; 40 | } 41 | }; 42 | // ------------------------------------------------------------------------------------- 43 | enum { 44 | sentGB, 45 | recvGB, 46 | COUNT, 47 | }; 48 | // ------------------------------------------------------------------------------------- 49 | static const constexpr inline std::array translation{{"sent (GB)", "received (GB)"}}; 50 | static_assert(translation.size() == COUNT); 51 | // ------------------------------------------------------------------------------------- 52 | 53 | double getSentGB(); 54 | double getRecvGB(); 55 | 56 | RDMACounters(); 57 | ~RDMACounters(); 58 | 59 | 60 | double prevSent = 0; 61 | double prevRecv = 0; 62 | 63 | RDMAEventFunctor rdmaRecv; 64 | RDMAEventFunctor rdmaSent; 65 | }; 66 | // ------------------------------------------------------------------------------------- 67 | } // namespace profiling 68 | } // namespace nam 69 | -------------------------------------------------------------------------------- /backend/nam/profiling/counters/WorkerCounters.cpp: -------------------------------------------------------------------------------- 1 | #include "WorkerCounters.hpp" 2 | #include "../CounterRegistry.hpp" 3 | // ------------------------------------------------------------------------------------- 4 | namespace nam { 5 | namespace profiling { 6 | // ------------------------------------------------------------------------------------- 7 | WorkerCounters::WorkerCounters(){ 8 | CounterRegistry::getInstance().registerWorkerCounter(this); 9 | } 10 | // ------------------------------------------------------------------------------------- 11 | WorkerCounters::~WorkerCounters(){ 12 | CounterRegistry::getInstance().deregisterWorkerCounter(this); 13 | } 14 | // ------------------------------------------------------------------------------------- 15 | } // profiling 16 | } // nam 17 | -------------------------------------------------------------------------------- /backend/nam/syncprimitives/SpinLock.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace nam { 6 | namespace sync { 7 | 8 | } // sync 9 | } // nam 10 | -------------------------------------------------------------------------------- /backend/nam/threads/Concurrency.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Defs.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | #include 6 | #include 7 | #include 8 | // ------------------------------------------------------------------------------------- 9 | namespace nam { 10 | namespace concurrency { 11 | 12 | class Barrier { 13 | private: 14 | const std::size_t threadCount; 15 | alignas(64) std::atomic cntr; 16 | alignas(64) std::atomic round; 17 | 18 | public: 19 | explicit Barrier(std::size_t threadCount) 20 | : threadCount(threadCount), cntr(threadCount), round(0) {} 21 | 22 | template 23 | bool wait(F finalizer) { 24 | auto prevRound = round.load(); // Must happen before fetch_sub 25 | auto prev = cntr.fetch_sub(1); 26 | if (prev == 1) { 27 | // last thread arrived 28 | cntr = threadCount; 29 | auto r = finalizer(); 30 | round++; 31 | return r; 32 | } else { 33 | while (round == prevRound) { 34 | // wait until barrier is ready for re-use 35 | asm("pause"); 36 | asm("pause"); 37 | asm("pause"); 38 | } 39 | return false; 40 | } 41 | } 42 | inline bool wait() { 43 | return wait([]() { return true; }); 44 | } 45 | }; 46 | 47 | class Worker { 48 | protected: 49 | std::thread* t_ = nullptr; 50 | bool isJoined = false; 51 | int workerId = 0; 52 | 53 | public: 54 | template 55 | void start(Args&&... args) { 56 | t_ = new std::thread(args..., (workerId)); 57 | // pthread_t handle = t_->native_handle(); 58 | } 59 | 60 | Worker(int id) : workerId(id) {} 61 | 62 | Worker(){}; 63 | 64 | ~Worker() { delete t_; }; 65 | 66 | void join() { 67 | if (t_ && t_->joinable()) { 68 | t_->join(); 69 | } 70 | isJoined = true; 71 | } 72 | }; 73 | 74 | class WorkerGroup { 75 | private: 76 | std::size_t size_ = std::thread::hardware_concurrency(); 77 | std::vector workers; 78 | std::atomic stopped = false; 79 | public: 80 | WorkerGroup(){}; 81 | WorkerGroup(size_t numberThreads) : size_(numberThreads){}; 82 | 83 | template 84 | inline void run(Args&&... args) { 85 | workers.resize(size_); 86 | for (size_t i = 0; i < size_; ++i) { 87 | workers[i] = new Worker(i); 88 | workers[i]->start(std::forward(args)...); 89 | } 90 | } 91 | 92 | template 93 | inline void runForTime(Args&&... args) { 94 | workers.resize(size_); 95 | for (size_t i = 0; i < size_; ++i) { 96 | workers[i] = new Worker(i); 97 | workers[i]->start(std::forward(args)..., std::ref(stopped)); 98 | } 99 | } 100 | 101 | inline void waitForTime(uint64_t seconds){ 102 | std::this_thread::sleep_for(std::chrono::seconds(seconds)); 103 | stopped = true; 104 | wait(); 105 | stopped = false; // reset 106 | } 107 | 108 | inline void wait() { 109 | for (size_t i = 0; i < size_; ++i) { 110 | workers[i]->join(); 111 | } 112 | } 113 | 114 | size_t size() { return size_; }; 115 | virtual ~WorkerGroup() { 116 | for (auto* w : workers) delete w; 117 | } 118 | }; 119 | } // namespace concurrency 120 | } // nam 121 | -------------------------------------------------------------------------------- /backend/nam/threads/CoreManager.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | // ------------------------------------------------------------------------------------- 4 | namespace nam { 5 | namespace threads { 6 | // ------------------------------------------------------------------------------------- 7 | // Very Simplistic Core Manager to pin threads on OUR Severs 8 | // with that kind of cpu mappings 9 | // node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 28 29 30 31 32 33 34 35 36 37 38 39 40 41 10 | // ------------------------------------------------------------------------------------- 11 | struct CoreManager{ 12 | // vector of cpuids 13 | 14 | 15 | std::vector cores; 16 | std::vector hts; 17 | 18 | static CoreManager& getInstance(){ 19 | static CoreManager coremanager; 20 | return coremanager; 21 | } 22 | 23 | bool pinThreadToHT(pthread_t thread){ 24 | ensure(hts.size() > 0); 25 | uint64_t id = *hts.begin(); 26 | hts.erase(hts.begin()); 27 | schedAffinity(id, thread); 28 | return true; 29 | } 30 | bool pinThreadToCore(pthread_t thread){ 31 | ensure(cores.size() > 0); 32 | uint64_t id = *cores.begin(); 33 | cores.erase(cores.begin()); 34 | schedAffinity(id, thread); 35 | return true; 36 | } 37 | 38 | bool pinThread(pthread_t thread){ 39 | if(cores.size() != 0) 40 | return pinThreadToCore(thread); 41 | else if (hts.size() != 0) 42 | return pinThreadToHT(thread); 43 | else 44 | throw std::runtime_error(" Cannot pin thread"); 45 | } 46 | 47 | bool pinThreadRoundRobin(pthread_t thread){ 48 | 49 | if((cores.size() == 0) && (hts.size() == 0)){ 50 | throw std::runtime_error(" Cannot pin thread"); 51 | } 52 | if(cores.size() >= hts.size()) 53 | return pinThreadToCore(thread); 54 | else 55 | return pinThreadToHT(thread); 56 | } 57 | 58 | 59 | private: 60 | bool schedAffinity(uint64_t id, pthread_t thread) { 61 | cpu_set_t cpuset; 62 | CPU_ZERO(&cpuset); 63 | CPU_SET(id, &cpuset); 64 | if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset) != 0) { 65 | throw std::runtime_error("Could not pin thread " + std::to_string(id) + " to thread " + 66 | std::to_string(id)); 67 | } 68 | return true; 69 | } 70 | 71 | CoreManager(){ 72 | uint64_t count = std::thread::hardware_concurrency(); 73 | uint64_t socketCount = count / FLAGS_sockets; 74 | uint64_t pCores = socketCount / 2; 75 | uint64_t firstCPUId = pCores * FLAGS_socket; 76 | uint64_t firstHTId = pCores * (FLAGS_sockets + FLAGS_socket); 77 | cores.resize(pCores); 78 | hts.resize(pCores); 79 | std::iota(std::begin(cores), std::end(cores), firstCPUId); 80 | std::iota(std::begin(hts), std::end(hts), firstHTId); 81 | } 82 | 83 | }; 84 | 85 | } // threads 86 | } // nam 87 | -------------------------------------------------------------------------------- /backend/nam/threads/ThreadContext.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | namespace nam { 4 | namespace threads { 5 | // ensure that every thread in nam gets this thread context initialized 6 | struct ThreadContext{ 7 | // ------------------------------------------------------------------------------------- 8 | static inline thread_local ThreadContext* tlsPtr = nullptr; 9 | static inline ThreadContext& my() { 10 | return *ThreadContext::tlsPtr; 11 | } 12 | // ------------------------------------------------------------------------------------- 13 | 14 | /* 15 | struct DebugInfo{ 16 | std::string_view msg; 17 | PID pid; 18 | uint64_t version; 19 | uint64_t g_epoch; 20 | uintptr_t bf_ptr; 21 | }; 22 | 23 | 24 | template 25 | struct DebugStack { 26 | void push(const T& e) 27 | { 28 | assert(size <= N); 29 | if (full()) { 30 | size = 0; // overwrite 31 | } 32 | buffer[size++] = e; 33 | } 34 | 35 | [[nodiscard]] bool try_pop(T& e) 36 | { 37 | if (empty()) { 38 | return false; 39 | } 40 | e = buffer[--size]; 41 | return true; 42 | } 43 | bool empty() { return size == 0; } 44 | bool full() { return size == (N); } 45 | uint64_t get_size() { return size; } 46 | void reset() { size = 0; } 47 | 48 | private: 49 | uint64_t size = 0; 50 | std::array buffer{}; 51 | }; 52 | DebugStack debug_stack; 53 | */ 54 | }; 55 | 56 | } // threads 57 | } // nam 58 | -------------------------------------------------------------------------------- /backend/nam/threads/Worker.cpp: -------------------------------------------------------------------------------- 1 | #include "Worker.hpp" 2 | // ------------------------------------------------------------------------------------- 3 | namespace nam { 4 | namespace threads { 5 | // ------------------------------------------------------------------------------------- 6 | thread_local Worker* Worker::tlsPtr = nullptr; 7 | // ------------------------------------------------------------------------------------- 8 | Worker::Worker(uint64_t workerId, std::string name, rdma::CM& cm, NodeID nodeId) 9 | : workerId(workerId), 10 | name(name), 11 | cpuCounters(name), 12 | cm(cm), 13 | nodeId_(nodeId), 14 | cctxs(FLAGS_storage_nodes), 15 | threadContext(std::make_unique()) { 16 | ThreadContext::tlsPtr = threadContext.get(); 17 | // ------------------------------------------------------------------------------------- 18 | // Connection to MessageHandler 19 | // ------------------------------------------------------------------------------------- 20 | // First initiate connection 21 | for (uint64_t n_i = 0; n_i < FLAGS_storage_nodes; n_i++) { 22 | // ------------------------------------------------------------------------------------- 23 | auto& ip = STORAGE_NODES[FLAGS_storage_nodes][n_i]; 24 | cctxs[n_i].rctx = &(cm.initiateConnection(ip, rdma::Type::WORKER, workerId, nodeId)); 25 | cctxs[n_i].wqe = 0; 26 | // ------------------------------------------------------------------------------------- 27 | } 28 | 29 | // ------------------------------------------------------------------------------------- 30 | // Second finish connection 31 | rdma::InitMessage* init = (rdma::InitMessage*)cm.getGlobalBuffer().allocate(sizeof(rdma::InitMessage)); 32 | for (uint64_t n_i = 0; n_i < FLAGS_storage_nodes; n_i++) { 33 | init->nodeId = nodeId; 34 | init->threadId = workerId + (nodeId*FLAGS_worker); 35 | // ------------------------------------------------------------------------------------- 36 | cm.exchangeInitialMesssage(*(cctxs[n_i].rctx), init); 37 | // ------------------------------------------------------------------------------------- 38 | 39 | auto& msg = *reinterpret_cast((cctxs[n_i].rctx->applicationData)); 40 | auto num_regions = msg.num_regions; 41 | std::cout << "num regions " << num_regions << "\n"; 42 | // for(uint64_t t_i = 0; t_i < num_regions; t_i++){ 43 | // hack only supports one region at the moment 44 | catalog.insert({n_i,{.start = msg.mem_regions[0].offset, .size_bytes = msg.mem_regions[0].size_bytes, .region_id = (int)0}}); 45 | // } 46 | } 47 | 48 | std::cout << "Connected" << std::endl; 49 | } 50 | 51 | // ------------------------------------------------------------------------------------- 52 | Worker::~Worker() {} 53 | // ------------------------------------------------------------------------------------- 54 | } // namespace threads 55 | } // namespace nam 56 | -------------------------------------------------------------------------------- /backend/nam/threads/Worker.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Defs.hpp" 3 | #include "ThreadContext.hpp" 4 | #include "nam/profiling/counters/CPUCounters.hpp" 5 | #include "nam/profiling/counters/WorkerCounters.hpp" 6 | #include "nam/rdma/CommunicationManager.hpp" 7 | // ------------------------------------------------------------------------------------- 8 | namespace nam { 9 | namespace threads { 10 | using namespace rdma; 11 | constexpr static bool OPTIMIZED_COMPLETION = true; 12 | // ------------------------------------------------------------------------------------- 13 | struct Worker{ 14 | // ------------------------------------------------------------------------------------- 15 | static thread_local Worker* tlsPtr; 16 | static inline Worker& my() { return *Worker::tlsPtr; } 17 | // ------------------------------------------------------------------------------------- 18 | uint64_t workerId; 19 | std::string name; 20 | // ------------------------------------------------------------------------------------- 21 | profiling::CPUCounters cpuCounters; 22 | // ------------------------------------------------------------------------------------- 23 | profiling::WorkerCounters counters; 24 | // ------------------------------------------------------------------------------------- 25 | // RDMA 26 | // ------------------------------------------------------------------------------------- 27 | // context for every connection 28 | struct ConnectionContext { 29 | rdma::RdmaContext* rctx; 30 | uint64_t wqe; // wqe currently outstanding 31 | }; 32 | // ------------------------------------------------------------------------------------- 33 | struct PartitionInfo { 34 | uintptr_t offset; 35 | uint64_t begin; 36 | uint64_t end; 37 | NodeID nodeId; 38 | }; 39 | 40 | // ------------------------------------------------------------------------------------- 41 | rdma::CM& cm; 42 | NodeID nodeId_; 43 | std::vector cctxs; 44 | std::unique_ptr threadContext; 45 | std::unordered_map catalog; // ptr, size of region 46 | Worker(uint64_t workerId, std::string name, rdma::CM& cm, NodeID nodeId); 47 | ~Worker(); 48 | }; 49 | // ------------------------------------------------------------------------------------- 50 | } // namespace threads 51 | } // namespace nam 52 | -------------------------------------------------------------------------------- /backend/nam/threads/WorkerPool.cpp: -------------------------------------------------------------------------------- 1 | #include "../Config.hpp" 2 | #include "WorkerPool.hpp" 3 | #include "CoreManager.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | #include 6 | // ------------------------------------------------------------------------------------- 7 | namespace nam 8 | { 9 | namespace threads 10 | { 11 | // ------------------------------------------------------------------------------------- 12 | 13 | WorkerPool::WorkerPool(rdma::CM& cm, NodeID nodeId): workers(MAX_WORKER_THREADS,nullptr) 14 | { 15 | workersCount = FLAGS_worker; 16 | ensure(workersCount < MAX_WORKER_THREADS); 17 | workerThreads.reserve(workersCount); 18 | for (uint64_t t_i = 0; t_i < workersCount; t_i++) { 19 | workerThreads.emplace_back([&, t_i]() { 20 | std::string threadName("worker_" + std::to_string(t_i)); 21 | pthread_setname_np(pthread_self(), threadName.c_str()); 22 | // ------------------------------------------------------------------------------------- 23 | workers[t_i] = new Worker(t_i, threadName, cm, nodeId); 24 | Worker::tlsPtr = workers[t_i]; 25 | // ------------------------------------------------------------------------------------- 26 | runningThreads++; 27 | auto& meta = workerThreadsMeta[t_i]; 28 | while (keepRunning) { 29 | std::unique_lock guard(meta.mutex); 30 | meta.cv.wait(guard, [&]() { return keepRunning == false || meta.jobSet; }); 31 | if (!keepRunning) { 32 | break; 33 | } 34 | meta.wtReady = false; 35 | meta.job(); 36 | meta.wtReady = true; 37 | meta.jobDone = true; 38 | meta.jobSet = false; 39 | meta.cv.notify_one(); 40 | } 41 | runningThreads--; 42 | }); 43 | } 44 | if(FLAGS_pinThreads){ 45 | for (auto& t : workerThreads) { 46 | threads::CoreManager::getInstance().pinThreadRoundRobin(t.native_handle()); 47 | // threads::CoreManager::getInstance().pinThreadToCore(t.native_handle()); 48 | } 49 | } 50 | 51 | 52 | for (auto& t : workerThreads) { 53 | t.detach(); 54 | } 55 | // ------------------------------------------------------------------------------------- 56 | // Wait until all worker threads are initialized 57 | while (runningThreads < workersCount) { 58 | } 59 | } 60 | // ------------------------------------------------------------------------------------- 61 | WorkerPool::~WorkerPool(){ 62 | keepRunning = false; 63 | 64 | for (uint64_t t_i = 0; t_i < workersCount; t_i++) { 65 | workerThreadsMeta[t_i].cv.notify_one(); 66 | } 67 | while (runningThreads) { 68 | } 69 | 70 | for(auto& w : workers) 71 | if(w) delete w; 72 | 73 | } 74 | // ------------------------------------------------------------------------------------- 75 | 76 | // ------------------------------------------------------------------------------------- 77 | void WorkerPool::scheduleJobSync(uint64_t t_i, std::function job) 78 | { 79 | ensure(t_i < workersCount); 80 | auto& meta = workerThreadsMeta[t_i]; 81 | std::unique_lock guard(meta.mutex); 82 | meta.cv.wait(guard, [&]() { return !meta.jobSet && meta.wtReady; }); 83 | meta.jobSet = true; 84 | meta.jobDone = false; 85 | meta.job = job; 86 | guard.unlock(); 87 | meta.cv.notify_one(); 88 | guard.lock(); 89 | meta.cv.wait(guard, [&]() { return meta.jobDone; }); 90 | } 91 | // ------------------------------------------------------------------------------------- 92 | void WorkerPool::scheduleJobAsync(uint64_t t_i, std::function job) 93 | { 94 | ensure(t_i < workersCount); 95 | auto& meta = workerThreadsMeta[t_i]; 96 | std::unique_lock guard(meta.mutex); 97 | meta.cv.wait(guard, [&]() { return !meta.jobSet && meta.wtReady; }); 98 | meta.jobSet = true; 99 | meta.jobDone = false; 100 | meta.job = job; 101 | guard.unlock(); 102 | meta.cv.notify_one(); 103 | } 104 | // ------------------------------------------------------------------------------------- 105 | void WorkerPool::joinAll() 106 | { 107 | for (uint64_t t_i = 0; t_i < workersCount; t_i++) { 108 | auto& meta = workerThreadsMeta[t_i]; 109 | std::unique_lock guard(meta.mutex); 110 | meta.cv.wait(guard, [&]() { return meta.wtReady && !meta.jobSet; }); 111 | } 112 | } 113 | 114 | } // namespace threads 115 | } // namespace nam 116 | -------------------------------------------------------------------------------- /backend/nam/threads/WorkerPool.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Worker.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | // ------------------------------------------------------------------------------------- 12 | namespace nam 13 | { 14 | namespace threads 15 | { 16 | // ------------------------------------------------------------------------------------- 17 | class WorkerPool 18 | { 19 | static constexpr uint64_t MAX_WORKER_THREADS = 1030; 20 | 21 | std::atomic runningThreads = 0; 22 | std::atomic keepRunning = true; 23 | // ------------------------------------------------------------------------------------- 24 | struct WorkerThread { 25 | std::mutex mutex; 26 | std::condition_variable cv; 27 | std::function job; 28 | bool wtReady = true; 29 | bool jobSet = false; 30 | bool jobDone = false; 31 | }; 32 | // ------------------------------------------------------------------------------------- 33 | std::vector workerThreads; 34 | std::vector workers; 35 | WorkerThread workerThreadsMeta [MAX_WORKER_THREADS]; 36 | uint32_t workersCount; 37 | public: 38 | 39 | // ------------------------------------------------------------------------------------- 40 | WorkerPool(rdma::CM& cm, NodeID nodeId); 41 | ~WorkerPool(); 42 | // ------------------------------------------------------------------------------------- 43 | void scheduleJobAsync(uint64_t t_i, std::function job); 44 | void scheduleJobSync(uint64_t t_i, std::function job); 45 | void joinAll(); 46 | }; 47 | // ------------------------------------------------------------------------------------- 48 | } // namespace threads 49 | } // namespace nam 50 | -------------------------------------------------------------------------------- /backend/nam/utils/FNVHash.cpp: -------------------------------------------------------------------------------- 1 | #include "FNVHash.hpp" 2 | // ------------------------------------------------------------------------------------- 3 | // ------------------------------------------------------------------------------------- 4 | // ------------------------------------------------------------------------------------- 5 | namespace nam 6 | { 7 | namespace utils 8 | // ------------------------------------------------------------------------------------- 9 | { 10 | u64 FNV::hash(u64 val) 11 | { 12 | // from http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash 13 | u64 hash_val = FNV_OFFSET_BASIS_64; 14 | for (int i = 0; i < 8; i++) { 15 | u64 octet = val & 0x00ff; 16 | val = val >> 8; 17 | 18 | hash_val = hash_val ^ octet; 19 | hash_val = hash_val * FNV_PRIME_64; 20 | } 21 | return hash_val; 22 | } 23 | // ------------------------------------------------------------------------------------- 24 | } // namespace utils 25 | } // namespace nam 26 | -------------------------------------------------------------------------------- /backend/nam/utils/FNVHash.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Defs.hpp" 3 | // ------------------------------------------------------------------------------------- 4 | // ------------------------------------------------------------------------------------- 5 | // ------------------------------------------------------------------------------------- 6 | namespace nam 7 | { 8 | namespace utils 9 | { 10 | // ------------------------------------------------------------------------------------- 11 | class FNV 12 | { 13 | private: 14 | static constexpr u64 FNV_OFFSET_BASIS_64 = 0xCBF29CE484222325L; 15 | static constexpr u64 FNV_PRIME_64 = 1099511628211L; 16 | 17 | public: 18 | static u64 hash(u64 val); 19 | }; 20 | // ------------------------------------------------------------------------------------- 21 | } // namespace utils 22 | } // namespace leanstore 23 | -------------------------------------------------------------------------------- /backend/nam/utils/Parallelize.cpp: -------------------------------------------------------------------------------- 1 | #include "Parallelize.hpp" 2 | 3 | 4 | -------------------------------------------------------------------------------- /backend/nam/utils/Parallelize.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Defs.hpp" 4 | #include "../threads/Concurrency.hpp" 5 | // ------------------------------------------------------------------------------------- 6 | namespace nam { 7 | namespace utils { 8 | 9 | class Parallelize{ 10 | 11 | public: 12 | template 13 | static void parallelRange(uint64_t n, F function){ 14 | parallelRange(std::thread::hardware_concurrency(), n,function); 15 | } 16 | 17 | template 18 | static void parallelRange(int threads, uint64_t n, F function){ 19 | ensure(threads > 0); 20 | concurrency::WorkerGroup g(threads); 21 | const uint64_t blockSize = n / threads; 22 | ensure(blockSize > 0); 23 | g.run([&](int workerId){ 24 | auto begin = workerId * blockSize; 25 | auto end = begin + blockSize; 26 | if(workerId == threads - 1) 27 | end = n; 28 | function(begin,end); 29 | }); 30 | g.wait(); 31 | } 32 | }; 33 | 34 | } // utils 35 | } // nam 36 | -------------------------------------------------------------------------------- /backend/nam/utils/RandomGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include "RandomGenerator.hpp" 2 | #include 3 | // ------------------------------------------------------------------------------------- 4 | // ------------------------------------------------------------------------------------- 5 | // ------------------------------------------------------------------------------------- 6 | namespace nam 7 | { 8 | namespace utils 9 | { 10 | static std::atomic mt_counter = 0; 11 | // ------------------------------------------------------------------------------------- 12 | MersenneTwister::MersenneTwister(uint64_t seed) : mti(NN + 1) 13 | { 14 | std::random_device rd("/dev/urandom"); 15 | init((seed ^ (mt_counter++)) ^ rd()); 16 | } 17 | // ------------------------------------------------------------------------------------- 18 | void MersenneTwister::init(uint64_t seed) 19 | { 20 | mt[0] = seed; 21 | for (mti = 1; mti < NN; mti++) 22 | mt[mti] = (6364136223846793005ULL * (mt[mti - 1] ^ (mt[mti - 1] >> 62)) + mti); 23 | } 24 | // ------------------------------------------------------------------------------------- 25 | uint64_t MersenneTwister::rnd() 26 | { 27 | int i; 28 | uint64_t x; 29 | static uint64_t mag01[2] = {0ULL, MATRIX_A}; 30 | 31 | if (mti >= NN) { /* generate NN words at one time */ 32 | 33 | for (i = 0; i < NN - MM; i++) { 34 | x = (mt[i] & UM) | (mt[i + 1] & LM); 35 | mt[i] = mt[i + MM] ^ (x >> 1) ^ mag01[(int)(x & 1ULL)]; 36 | } 37 | for (; i < NN - 1; i++) { 38 | x = (mt[i] & UM) | (mt[i + 1] & LM); 39 | mt[i] = mt[i + (MM - NN)] ^ (x >> 1) ^ mag01[(int)(x & 1ULL)]; 40 | } 41 | x = (mt[NN - 1] & UM) | (mt[0] & LM); 42 | mt[NN - 1] = mt[MM - 1] ^ (x >> 1) ^ mag01[(int)(x & 1ULL)]; 43 | 44 | mti = 0; 45 | } 46 | 47 | x = mt[mti++]; 48 | 49 | x ^= (x >> 29) & 0x5555555555555555ULL; 50 | x ^= (x << 17) & 0x71D67FFFEDA60000ULL; 51 | x ^= (x << 37) & 0xFFF7EEE000000000ULL; 52 | x ^= (x >> 43); 53 | 54 | return x; 55 | } 56 | // ------------------------------------------------------------------------------------- 57 | Xorshift64star::Xorshift64star(uint64_t seed_){ 58 | std::random_device rd; 59 | seed = (seed_ ^ (mt_counter++)) ^ rd(); 60 | } 61 | // ------------------------------------------------------------------------------------- 62 | uint64_t Xorshift64star::rnd(){ 63 | uint64_t x = seed; /* state nicht mit 0 initialisieren */ 64 | x ^= x >> 12; // a 65 | x ^= x << 25; // b 66 | x ^= x >> 27; // c 67 | seed = x; 68 | return x * 0x2545F4914F6CDD1D; 69 | } 70 | 71 | // ------------------------------------------------------------------------------------- 72 | } // namespace utils 73 | } // namespace nam 74 | -------------------------------------------------------------------------------- /backend/nam/utils/RandomGenerator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Defs.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | #include 6 | #include 7 | // ------------------------------------------------------------------------------------- 8 | // ------------------------------------------------------------------------------------- 9 | // ------------------------------------------------------------------------------------- 10 | namespace nam 11 | { 12 | namespace utils 13 | { 14 | class MersenneTwister 15 | { 16 | private: 17 | static const int NN = 312; 18 | static const int MM = 156; 19 | static const uint64_t MATRIX_A = 0xB5026F5AA96619E9ULL; 20 | static const uint64_t UM = 0xFFFFFFFF80000000ULL; 21 | static const uint64_t LM = 0x7FFFFFFFULL; 22 | uint64_t mt[NN]; 23 | int mti; 24 | void init(uint64_t seed); 25 | 26 | public: 27 | MersenneTwister(uint64_t seed = 19650218ULL); 28 | uint64_t rnd(); 29 | }; 30 | 31 | class Xorshift64star 32 | { 33 | private: 34 | uint64_t seed; 35 | public: 36 | Xorshift64star(uint64_t seed = 19650218ULL); 37 | uint64_t rnd(); 38 | }; 39 | } // namespace utils 40 | } // namespace nam 41 | // ------------------------------------------------------------------------------------- 42 | static thread_local nam::utils::MersenneTwister mt_generator; 43 | static thread_local std::mt19937 random_generator; 44 | static thread_local nam::utils::Xorshift64star fast_generator; 45 | // ------------------------------------------------------------------------------------- 46 | namespace nam 47 | { 48 | namespace utils 49 | { 50 | // ------------------------------------------------------------------------------------- 51 | class RandomGenerator 52 | { 53 | public: 54 | // ATTENTION: open interval [min, max) 55 | static uint64_t getRandU64(uint64_t min, uint64_t max) 56 | { 57 | uint64_t rand = min + (mt_generator.rnd() % (max - min)); 58 | ensure(rand < max); 59 | ensure(rand >= min); 60 | return rand; 61 | } 62 | 63 | static uint64_t getRandU64Fast(){ 64 | return fast_generator.rnd(); 65 | } 66 | 67 | // ATTENTION: open interval [min, max) 68 | // ATTENTION: power two 69 | static uint64_t getRandU64PowerTwo(uint64_t maxPowerTwo) 70 | { 71 | uint64_t rand = (mt_generator.rnd() & (maxPowerTwo-1)); 72 | return rand; 73 | } 74 | static uint64_t getRandU64() { return mt_generator.rnd(); } 75 | static uint64_t getRandU64STD(uint64_t min, uint64_t max) 76 | { 77 | std::uniform_int_distribution distribution(min, max - 1); 78 | return distribution(random_generator); 79 | } 80 | 81 | template 82 | static inline T getRand(T min, T max) 83 | { 84 | uint64_t rand = getRandU64(min, max); 85 | return static_cast(rand); 86 | } 87 | 88 | 89 | static void getRandString(uint8_t* dst, u64 size) 90 | { 91 | for (u64 t_i = 0; t_i < size; t_i++) { 92 | dst[t_i] = getRand(48, 123); 93 | } 94 | 95 | } // namespace utils 96 | }; 97 | // ------------------------------------------------------------------------------------- 98 | } // namespace utils 99 | } // namespace nam 100 | // ------------------------------------------------------------------------------------- 101 | -------------------------------------------------------------------------------- /backend/nam/utils/ScrambledZipfGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include "ScrambledZipfGenerator.hpp" 2 | // ------------------------------------------------------------------------------------- 3 | // ------------------------------------------------------------------------------------- 4 | // ------------------------------------------------------------------------------------- 5 | namespace nam 6 | { 7 | namespace utils 8 | { 9 | 10 | // ------------------------------------------------------------------------------------- 11 | u64 ScrambledZipfGenerator::rand() 12 | { 13 | u64 zipf_value = zipf_generator(gen); 14 | return min + (nam::utils::FNV::hash(zipf_value) % n); 15 | } 16 | // ------------------------------------------------------------------------------------- 17 | u64 ScrambledZipfGenerator::rand(u64 offset) 18 | { 19 | u64 zipf_value = zipf_generator(gen); 20 | return (min + ((nam::utils::FNV::hash(zipf_value + offset)) % n)); 21 | } 22 | 23 | } // namespace utils 24 | } // namespace nam 25 | -------------------------------------------------------------------------------- /backend/nam/utils/ScrambledZipfGenerator.hpp: -------------------------------------------------------------------------------- 1 | #include "FNVHash.hpp" 2 | #include "Defs.hpp" 3 | // #include "ZipfGenerator.hpp" 4 | #include "ZipfRejectionInversion.hpp" 5 | // ------------------------------------------------------------------------------------- 6 | // ------------------------------------------------------------------------------------- 7 | // ------------------------------------------------------------------------------------- 8 | namespace nam 9 | { 10 | namespace utils 11 | { 12 | 13 | class ScrambledZipfGenerator 14 | { 15 | public: 16 | u64 min, max, n; 17 | double theta; 18 | std::random_device rd; 19 | std::mt19937 gen; 20 | zipf_distribution<> zipf_generator; 21 | // 10000000000ul 22 | // [min, max) 23 | ScrambledZipfGenerator(u64 min, u64 max, double theta) : min(min), max(max), n(max - min), gen(rd()), zipf_generator((max - min) * 2, theta) { 24 | } 25 | u64 rand(); 26 | u64 rand(u64 offset); 27 | }; 28 | 29 | // class ScrambledZipfGenerator 30 | // { 31 | // public: 32 | // u64 min, max, n; 33 | // double theta; 34 | // ZipfGenerator zipf_generator; 35 | // // 10000000000ul 36 | // // [min, max) 37 | // ScrambledZipfGenerator(u64 min, u64 max, double theta) : min(min), max(max), n(max - min), zipf_generator((max - min) * 2, theta) { 38 | // } 39 | // u64 rand(); 40 | // u64 rand(u64 offset); 41 | // }; 42 | // ------------------------------------------------------------------------------------- 43 | } // namespace utils 44 | } // namespace nam 45 | -------------------------------------------------------------------------------- /backend/nam/utils/Time.cpp: -------------------------------------------------------------------------------- 1 | #include "Time.hpp" 2 | // ------------------------------------------------------------------------------------- 3 | 4 | namespace nam 5 | { 6 | namespace utils 7 | { 8 | uint64_t getTimePoint() 9 | { 10 | using namespace std::chrono; 11 | auto now = system_clock::now(); 12 | auto now_micros = time_point_cast(now); 13 | auto value = now_micros.time_since_epoch(); 14 | return value.count(); 15 | } 16 | 17 | uint64_t getTimePointNanoseconds() 18 | { 19 | using namespace std::chrono; 20 | auto now = system_clock::now(); 21 | auto now_nanos = time_point_cast(now); 22 | auto value = now_nanos.time_since_epoch(); 23 | return value.count(); 24 | } 25 | } // utils 26 | } // namespace utils 27 | -------------------------------------------------------------------------------- /backend/nam/utils/Time.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace nam { 6 | namespace utils { 7 | uint64_t getTimePoint(); 8 | uint64_t getTimePointNanoseconds(); 9 | } // utils 10 | } // nam 11 | -------------------------------------------------------------------------------- /backend/nam/utils/ZipfGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include "ZipfGenerator.hpp" 2 | 3 | #include "RandomGenerator.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | // ------------------------------------------------------------------------------------- 6 | #include 7 | // ------------------------------------------------------------------------------------- 8 | using namespace std; 9 | // ------------------------------------------------------------------------------------- 10 | namespace nam 11 | { 12 | namespace utils 13 | { 14 | // ------------------------------------------------------------------------------------- 15 | ZipfGenerator::ZipfGenerator(u64 ex_n, double theta) : n(ex_n - 1), theta(theta) 16 | { 17 | alpha = 1.0 / (1.0 - theta); 18 | zetan = zeta(n, theta); 19 | eta = (1.0 - std::pow(2.0 / n, 1.0 - theta)) / (1.0 - zeta(2, theta) / zetan); 20 | } 21 | // ------------------------------------------------------------------------------------- 22 | double ZipfGenerator::zeta(u64 n, double theta) 23 | { 24 | double ans = 0; 25 | for (u64 i = 1; i <= n; i++) 26 | ans += std::pow(1.0 / n, theta); 27 | return ans; 28 | } 29 | // ------------------------------------------------------------------------------------- 30 | uint64_t ZipfGenerator::rand() 31 | { 32 | double constant = 1000000000000000000.0; 33 | u64 i = RandomGenerator::getRandU64(0, 1000000000000000001); 34 | double u = static_cast(i) / constant; 35 | // return (u64)u; 36 | double uz = u * zetan; 37 | if (uz < 1) { 38 | return 1; 39 | } 40 | if (uz < (1 + std::pow(0.5, theta))) 41 | return 2; 42 | u64 ret = 1 + (long)(n * pow(eta * u - eta + 1, alpha)); 43 | return ret; 44 | } 45 | // ------------------------------------------------------------------------------------- 46 | } // namespace utils 47 | } // namespace nam 48 | // ------------------------------------------------------------------------------------- 49 | -------------------------------------------------------------------------------- /backend/nam/utils/ZipfGenerator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Defs.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | #include 6 | #include 7 | // ------------------------------------------------------------------------------------- 8 | namespace nam 9 | { 10 | namespace utils 11 | { 12 | // ------------------------------------------------------------------------------------- 13 | // A Zipf distributed random number generator 14 | // Based on Jim Gray Algorithm as described in "Quickly Generating Billion-Record..." 15 | // ------------------------------------------------------------------------------------- 16 | class ZipfGenerator 17 | { 18 | // ------------------------------------------------------------------------------------- 19 | private: 20 | u64 n; 21 | double theta; 22 | // ------------------------------------------------------------------------------------- 23 | double alpha, zetan, eta; 24 | // ------------------------------------------------------------------------------------- 25 | double zeta(u64 n, double theta); 26 | 27 | public: 28 | // [0, n) 29 | ZipfGenerator(uint64_t ex_n, double theta); 30 | // uint64_t rand(u64 new_n); 31 | uint64_t rand(); 32 | }; 33 | // ------------------------------------------------------------------------------------- 34 | } // namespace utils 35 | } // namespace nam 36 | // ------------------------------------------------------------------------------------- 37 | -------------------------------------------------------------------------------- /backend/nam/utils/crc64.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CRC64_HPP 2 | #define CRC64_HPP 3 | 4 | #include 5 | 6 | void crc64_init(void); 7 | uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l); 8 | 9 | #ifdef REDIS_TEST 10 | int crc64Test(int argc, char *argv[], int flags); 11 | #endif 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /backend/nam/utils/crcspeed.hpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2014, Matt Stancliff 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * * Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * * Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * * Neither the name of Redis nor the names of its contributors may be used 13 | * to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | * POSSIBILITY OF SUCH DAMAGE. */ 27 | 28 | #ifndef CRCSPEED_HPP 29 | #define CRCSPEED_HPP 30 | 31 | #include 32 | #include 33 | 34 | typedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t); 35 | typedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t); 36 | 37 | /* CRC-64 */ 38 | void crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]); 39 | void crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]); 40 | void crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]); 41 | 42 | uint64_t crcspeed64little(uint64_t table[8][256], uint64_t crc, void *buf, 43 | size_t len); 44 | uint64_t crcspeed64big(uint64_t table[8][256], uint64_t crc, void *buf, 45 | size_t len); 46 | uint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf, 47 | size_t len); 48 | 49 | /* CRC-16 */ 50 | void crcspeed16little_init(crcfn16 fn, uint16_t table[8][256]); 51 | void crcspeed16big_init(crcfn16 fn, uint16_t table[8][256]); 52 | void crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]); 53 | 54 | uint16_t crcspeed16little(uint16_t table[8][256], uint16_t crc, void *buf, 55 | size_t len); 56 | uint16_t crcspeed16big(uint16_t table[8][256], uint16_t crc, void *buf, 57 | size_t len); 58 | uint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf, 59 | size_t len); 60 | #endif 61 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | *.log 133 | .distexprunner 134 | lsyncd.conf 135 | *.csv -------------------------------------------------------------------------------- /distexperiments/distexprunner/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.envFile": "${workspaceFolder}/.env" 3 | } -------------------------------------------------------------------------------- /distexperiments/distexprunner/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import logging 5 | import sys 6 | 7 | from distexprunner.experiment_client import ExperimentClient 8 | from distexprunner.notification import Notifier, Slack 9 | from distexprunner.outputs import LOG_LEVEL_CMD 10 | 11 | 12 | __author__ = 'mjasny' 13 | 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser(description='Distributed Experiment Runner Client Instance') 17 | parser.add_argument('-v', '--verbose', action="count", default=0, help='-v WARN -vv INFO -vvv DEBUG') 18 | parser.add_argument('--resume', action='store_true', help='Resume execution of experiments from last run') 19 | parser.add_argument('--compatibility-mode', action='store_true', default=False, help='Activate compatibiliy mode for class x(experiment.Base)') 20 | parser.add_argument('--slack-webhook', type=str, help='Notify to slack when execution finishes') 21 | parser.add_argument('--progress', action='store_true', default=False, help='Display progressbar, but disables logging') 22 | parser.add_argument('--log', type=str, help='Log into file') 23 | parser.add_argument('experiment', nargs='+', type=str, help='path to experiments, folders are searched recursively, order is important') 24 | args = parser.parse_args() 25 | 26 | 27 | logging_handlers = [] 28 | 29 | if not args.progress: 30 | logging_handlers.append(logging.StreamHandler()) 31 | 32 | 33 | if args.log: 34 | logging_handlers.append(logging.FileHandler(filename=args.log)) 35 | 36 | if logging_handlers: 37 | logging.basicConfig( 38 | format='%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s\r', # [%(filename)s:%(lineno)d]: 39 | datefmt='%Y-%m-%d %H:%M:%S', 40 | level=max(4 - args.verbose, 0) * 10, 41 | handlers=logging_handlers 42 | ) 43 | else: 44 | logging.disable(LOG_LEVEL_CMD) 45 | 46 | 47 | if args.slack_webhook: 48 | notifier = Slack(args.slack_webhook) 49 | else: 50 | notifier = Notifier() 51 | 52 | 53 | 54 | client = ExperimentClient( 55 | experiments=args.experiment, 56 | compatibility_mode=args.compatibility_mode, 57 | resume=args.resume, 58 | notifier=notifier, 59 | progress=args.progress 60 | ) 61 | client.start() 62 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/client_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/RDMA_synchronization/f658c2e7cdf77af2d9776cb01e4b1e3604a21df8/distexperiments/distexprunner/client_demo.gif -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/__init__.py: -------------------------------------------------------------------------------- 1 | from .server_list import ServerList 2 | from .server import Server 3 | from .utils import * 4 | from .registry import reg_exp 5 | from .parameter_grid import ParameterGrid, ComputedParam 6 | from .outputs import Console, File, SubstrMatcher, EnvParser, CSVGenerator 7 | from .enums import * 8 | from . import notification 9 | from .stdin_controller import StdinController 10 | 11 | 12 | __author__ = 'mjasny' 13 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/_client_impl.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import sys 4 | 5 | from ._exceptions import BadReturnCode 6 | from ._server_interface import ServerInterface 7 | from ._client_interface import ClientInterface 8 | from ._rpc import RPCReader, RPCWriter 9 | 10 | 11 | 12 | class ClientImpl(ClientInterface): 13 | def __init__(self, reader, writer): 14 | self.__rpc_reader = RPCReader(reader, writer, self) 15 | self.rpc = RPCWriter(ServerInterface)(writer) 16 | 17 | self.pings = 0 18 | self.rc_futures = {} 19 | self.stdout_handler = {} 20 | self.stderr_handler = {} 21 | 22 | async def _on_disconnect(self): 23 | pass 24 | # logging.info(f'pings={self.pings}') 25 | 26 | 27 | # TODO refactor out 28 | async def _run_cmd(self, uuid, cmd, env): 29 | loop = asyncio.get_running_loop() 30 | rc_future = loop.create_future() 31 | 32 | self.rc_futures[uuid] = rc_future 33 | await self.rpc.run_cmd(uuid, cmd, env=env) 34 | 35 | return rc_future 36 | 37 | def _set_stdout(self, uuid, handler): 38 | self.stdout_handler[uuid] = handler 39 | 40 | def _set_stderr(self, uuid, handler): 41 | self.stderr_handler[uuid] = handler 42 | 43 | 44 | async def pong(self, *args, **kwargs): 45 | # await asyncio.sleep(0.1) 46 | self.pings += 1 47 | await self.rpc.ping(*args, **kwargs) 48 | 49 | 50 | async def stdout(self, uuid, line): 51 | for handler in self.stdout_handler.get(uuid, []): 52 | handler(line) 53 | 54 | async def stderr(self, uuid, line): 55 | for handler in self.stderr_handler.get(uuid, []): 56 | handler(line) 57 | 58 | async def rc(self, uuid, rc): 59 | if uuid in self.rc_futures: 60 | if not self.rc_futures[uuid].done(): 61 | self.rc_futures[uuid].set_result(rc) 62 | logging.info(f'uuid={uuid} finished with exit code: {rc}') 63 | if rc != 0: 64 | raise BadReturnCode(rc) -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/_client_interface.py: -------------------------------------------------------------------------------- 1 | 2 | class ClientInterface: 3 | async def pong(self, x): 4 | raise NotImplementedError() 5 | 6 | async def stdout(self, uuid, line): 7 | raise NotImplementedError() 8 | 9 | async def stderr(self, uuid, line): 10 | raise NotImplementedError() 11 | 12 | async def rc(self, uuid, rc): 13 | raise NotImplementedError() -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/_exceptions.py: -------------------------------------------------------------------------------- 1 | class BadReturnCode(Exception): 2 | def __init__(self, rc): 3 | super().__init__(f'BadReturnCode: {rc}') -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/_progressbar.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import shutil 3 | import math 4 | 5 | 6 | 7 | class Progress: 8 | def __init__(self, max_steps, output=sys.stdout, disable_stdout=True): 9 | self.steps = 0 10 | self.max_steps = max_steps 11 | self.current_step = None 12 | self.output = sys.stdout 13 | self.line_width = 0 14 | if disable_stdout: 15 | sys.stdout = open('/dev/null', 'w') 16 | sys.stderr = open('/dev/null', 'w') 17 | 18 | 19 | def __write(self, s): 20 | self.output.write(s) 21 | self.line_width = len(s) 22 | 23 | 24 | def step_start(self, name): 25 | self.__write(f'{name} ...\033[K\n') 26 | self.current_step = name 27 | 28 | self.render_bar() 29 | 30 | 31 | def step_finish(self): 32 | self.steps += 1 33 | self.render_bar() 34 | 35 | 36 | def step_status(self, error=False, status=None): 37 | CHECK_MARK='\033[0;32m\u2714\033[0m' 38 | RED_CROSS='\033[0;31m\u2718\033[0m' 39 | INFO = '\033[1;33m=>\033[0m' 40 | 41 | width, _ = shutil.get_terminal_size((80, 20)) 42 | for _ in range(math.ceil(self.line_width / width)): 43 | self.output.write('\033[1A\033[K') # 1 up, clear line 44 | 45 | if status: 46 | self.__write(f'{self.current_step} {INFO} {status}\033[K\n') 47 | self.output.write('\033[K') 48 | self.line_width = 0 49 | self.render_bar() 50 | elif not error: 51 | self.__write(f'{self.current_step} {CHECK_MARK}\033[K\n') 52 | else: 53 | self.__write(f'{self.current_step} {RED_CROSS} => {error}\033[K\n') 54 | 55 | 56 | def render_bar(self): 57 | width, _ = shutil.get_terminal_size((80, 20)) 58 | 59 | percent = self.steps/self.max_steps 60 | steps_width = len(str(self.max_steps)) 61 | prefix = f'Progress: [{self.steps:{steps_width}d}/{self.max_steps} {percent:4.0%}]' 62 | 63 | if len(prefix)+8 > width: 64 | self.output.write('\033[0;31mWidth too small!\033[0m\n') 65 | return 66 | 67 | width_left = width - len(prefix) - 3 68 | hashes = math.floor(width_left*percent) 69 | dots = (width_left-hashes) 70 | suffix = f'[{"#"*hashes}{"."*dots}]' 71 | 72 | progress = f'\033[0;42;30m{prefix}\033[0m {suffix}' 73 | self.output.write('\033[K\n') 74 | self.output.write(progress) 75 | if self.steps < self.max_steps: 76 | self.output.write('\033[1A\r') #1 up, start 77 | self.output.flush() 78 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/_resume_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pathlib 3 | import json 4 | 5 | 6 | class ResumeManager: 7 | def __init__(self): 8 | self.path = pathlib.Path('.distexprunner') 9 | self.already_run = set() 10 | if self.path.exists(): 11 | with self.path.open('r') as f: 12 | self.already_run = set(l for l in f.read().splitlines() if len(l) > 0) 13 | 14 | 15 | 16 | 17 | def was_run(self, exp, params): 18 | s = json.dumps({ 19 | 'name': exp, 20 | 'params': params 21 | }, sort_keys=True) 22 | return s in self.already_run 23 | 24 | 25 | def add_run(self, exp, params): 26 | s = json.dumps({ 27 | 'name': exp, 28 | 'params': params 29 | }, sort_keys=True) 30 | with self.path.open('a+') as f: 31 | f.write(f'{s}\n') 32 | 33 | 34 | def reset(self): 35 | self.already_run = set() 36 | if self.path.exists(): 37 | self.path.unlink() 38 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/_rpc.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import json 4 | 5 | try: 6 | from asyncio.exceptions import IncompleteReadError 7 | except ModuleNotFoundError: # for python3.7 8 | from asyncio.streams import IncompleteReadError 9 | 10 | 11 | def RPCWriter(RPCInterface): 12 | class Writer(RPCInterface): 13 | def __init__(self, writer): 14 | self._writer = writer 15 | 16 | def __getattribute__(self, attr): 17 | writer = super().__getattribute__('_writer') 18 | 19 | async def func(*args, **kwargs): 20 | data = {'method': attr, 'args': args, 'kwargs': kwargs} 21 | logging.debug(f'call: {data}') 22 | writer.write(f'{json.dumps(data)}\n'.encode()) 23 | try: 24 | await writer.drain() 25 | except ConnectionResetError: 26 | return False 27 | return True 28 | 29 | return func 30 | 31 | return Writer 32 | 33 | 34 | class RPCReader: 35 | def __init__(self, reader, writer, impl): 36 | self.__reader = reader 37 | self.__writer = writer 38 | self.__impl = impl 39 | 40 | addr = writer.get_extra_info('peername') 41 | logging.info(f'Initiated connection with {addr[0]}:{addr[1]}') 42 | 43 | loop = asyncio.get_running_loop() 44 | loop.create_task(self._read_loop()) 45 | 46 | 47 | async def _read_loop(self): 48 | while True: 49 | if self.__writer.is_closing(): 50 | break 51 | 52 | try: 53 | data = await self.__reader.readuntil(separator=b'\n') 54 | except (IncompleteReadError, ConnectionResetError): 55 | break 56 | 57 | json_data = json.loads(data[:-1]) 58 | logging.debug(f'json: {json_data}') 59 | 60 | func = getattr(self.__impl, json_data['method']) 61 | # await func(*json_data['args'], **json_data['kwargs']) 62 | asyncio.create_task(func(*json_data['args'], **json_data['kwargs']) ) 63 | 64 | self.__writer.close() 65 | try: 66 | await self.__writer.wait_closed() 67 | except ConnectionResetError: 68 | pass 69 | 70 | addr = self.__writer.get_extra_info('peername') 71 | logging.info(f'Lost connection with {addr[0]}:{addr[1]}') 72 | await self.__impl._on_disconnect() 73 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/_server_interface.py: -------------------------------------------------------------------------------- 1 | 2 | class ServerInterface: 3 | async def ping(self, x): 4 | raise NotImplementedError() 5 | 6 | async def run_cmd(self, uuid, cmd, env={}): 7 | raise NotImplementedError() 8 | 9 | async def kill_cmd(self, uuid): 10 | raise NotImplementedError() 11 | 12 | async def stdin_cmd(self, uuid, line, close=False): 13 | raise NotImplementedError() 14 | 15 | async def cd(self, directory): 16 | raise NotImplementedError -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/enums.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class Action(Enum): 5 | RESTART = 1 6 | 7 | class ReturnCode(Enum): 8 | TIMEOUT = 1 -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/experiment_server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from contextlib import suppress 4 | 5 | from ._server_impl import ServerImpl 6 | 7 | 8 | class ExperimentServer: 9 | def __init__(self, ip, port, max_idle): 10 | self.ip = ip 11 | self.port = port 12 | self.loop = asyncio.get_event_loop() 13 | 14 | if max_idle > 0: 15 | self.start_terminator(max_idle) 16 | 17 | 18 | def start(self): 19 | self.stop_future = self.loop.create_future() 20 | self.loop.create_task(self.listen()) 21 | 22 | try: 23 | self.loop.run_until_complete(self.stop_future) 24 | except KeyboardInterrupt: 25 | pass 26 | 27 | logging.info('Closing server') 28 | tasks = asyncio.all_tasks(loop=self.loop) 29 | for task in tasks: 30 | task.cancel() 31 | with suppress(asyncio.CancelledError): 32 | self.loop.run_until_complete(task) 33 | logging.info(f'Cancelled {len(tasks)} running tasks.') 34 | self.loop.close() 35 | 36 | 37 | def start_terminator(self, max_idle): 38 | async def checker(): 39 | idle_time_left = max_idle 40 | while idle_time_left > 0: 41 | await asyncio.sleep(1) 42 | if len(asyncio.all_tasks(loop=self.loop)) > 2: # listen() and checker() 43 | idle_time_left = max_idle 44 | else: 45 | idle_time_left -= 1 46 | logging.info(f'Auto termination after being {max_idle} seconds idle.') 47 | self.stop_future.set_result(None) 48 | 49 | self.loop.create_task(checker()) 50 | 51 | 52 | async def listen(self): 53 | server = await asyncio.start_server(ServerImpl, self.ip, self.port) 54 | addr = server.sockets[0].getsockname() 55 | logging.info(f'Serving on {addr[0]}:{addr[1]}') 56 | 57 | async with server: 58 | await server.serve_forever() 59 | 60 | 61 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/notification.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import json 4 | import urllib.request 5 | import logging 6 | 7 | 8 | def get_user(): 9 | if 'SUDO_USER' in os.environ: 10 | return os.environ['SUDO_USER'] 11 | else: 12 | return os.environ['USER'] 13 | 14 | 15 | class Notifier: 16 | def send(self, message): 17 | pass 18 | 19 | def on_finish(self, num_exps): 20 | pass 21 | 22 | 23 | class Slack(Notifier): 24 | def __init__(self, webhook_url): 25 | self.webhook_url = webhook_url 26 | 27 | 28 | def send(self, message): 29 | # curl -X POST -H 'Content-type: application/json' --data '{"text":"Hello, World!"}' 30 | data = { 31 | 'text': message 32 | } 33 | 34 | req = urllib.request.Request( 35 | self.webhook_url, 36 | data=json.dumps(data).encode('utf8'), 37 | headers={ 38 | 'content-type': 'application/json' 39 | } 40 | ) 41 | response = urllib.request.urlopen(req) 42 | logging.info(f'Slack API: {response.read().decode("utf8")}') 43 | 44 | 45 | def on_finish(self, num_exps): 46 | self.send(f'*_Status report_*\n\n*{num_exps}* experiments from *{get_user()}* finished.') -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/parameter_grid.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import inspect 3 | from collections.abc import Iterable 4 | 5 | 6 | def product(params): 7 | # filter out empty params 8 | params = {k: v for k, v in params.items() if len(v) != 0} 9 | keys = list(params.keys()) 10 | values = params.values() 11 | 12 | def fn(x, pool): 13 | if isinstance(pool, ComputedParam): 14 | pool = pool.get(keys[:len(x)], x) 15 | 16 | if len(pool) == 0: 17 | yield x 18 | 19 | for y in pool: 20 | yield x + [y] 21 | 22 | 23 | result = [[]] 24 | for pool in values: 25 | result = [y for x in result for y in fn(x, pool)] 26 | 27 | for prod in result: 28 | yield {key: prod[i] for i, key in enumerate(keys)} 29 | 30 | 31 | 32 | class ParameterGrid: 33 | def __init__(self, **kwargs): 34 | self.__params = kwargs 35 | 36 | def get(self): 37 | for params in product(self.__params): 38 | yield params, '_'.join(f'{k}={v}' for k, v in params.items()) 39 | 40 | 41 | 42 | class ComputedParam: 43 | def __init__(self, fn): 44 | self.fn = fn 45 | self.fn_args = inspect.getfullargspec(fn).args 46 | 47 | 48 | def get(self, keys, values): 49 | args = {key: values[i] for i, key in enumerate(keys) if key in self.fn_args} 50 | ret = self.fn(**args) 51 | if not isinstance(ret, Iterable): 52 | return (ret, ) 53 | return ret 54 | 55 | def __len__(self): 56 | return 1 57 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/registry.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import collections 3 | 4 | from .server_list import ServerList 5 | from .parameter_grid import ParameterGrid 6 | 7 | 8 | class ExperimentStore: 9 | __experiments = [] 10 | 11 | @staticmethod 12 | def get(): 13 | return ExperimentStore.__experiments 14 | 15 | @staticmethod 16 | def add(*, name, servers, func, params, max_restarts, raise_on_rc): 17 | ExperimentStore.__experiments.append( 18 | (name, servers, func, params, max_restarts, raise_on_rc) 19 | ) 20 | 21 | 22 | 23 | def reg_exp(servers=None, params=None, max_restarts=0, raise_on_rc=True): 24 | if not isinstance(servers, ServerList): 25 | raise Exception('Servers needs to be a ServerList') 26 | 27 | if params: 28 | if not isinstance(params, ParameterGrid): 29 | raise Exception('params needs to be a ParameterGrid') 30 | 31 | def decorator_grid(func): 32 | for p, name in params.get(): 33 | name = func.__name__+'__'+name 34 | 35 | ExperimentStore.add( 36 | name=name, 37 | servers=servers, 38 | func=func, 39 | params=p, 40 | max_restarts=max_restarts, 41 | raise_on_rc=raise_on_rc 42 | ) 43 | return decorator_grid 44 | 45 | 46 | def decorator(func): 47 | ExperimentStore.add( 48 | name=func.__name__, 49 | servers=servers, 50 | func=func, 51 | params={}, 52 | max_restarts=max_restarts, 53 | raise_on_rc=raise_on_rc 54 | ) 55 | return decorator 56 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import collections 4 | import uuid 5 | import socket 6 | 7 | from .enums import ReturnCode 8 | from ._client_impl import ClientImpl 9 | from .stdin_controller import StdinController 10 | 11 | 12 | class Server: 13 | def __init__(self, id: str, ip: str, port: int=20000, **kwargs): 14 | self.id = id # TODO assert types 15 | self.ip = ip 16 | self.port = port 17 | 18 | for k, v in kwargs.items(): 19 | if k.startswith('_'): 20 | raise Exception(f'private/protected attributes not allowed, "{k}" starts with _ or __.') 21 | if not hasattr(self, k): 22 | setattr(self, k, v) 23 | else: 24 | raise Exception(f'Attribute {k} already exists.') 25 | 26 | 27 | async def _connect(self): 28 | self.__reader, self.__writer = await asyncio.open_connection(self.ip, self.port) 29 | self.__client = ClientImpl(self.__reader, self.__writer) 30 | 31 | 32 | async def _disconnect(self): 33 | self.__writer.close() 34 | await self.__writer.wait_closed() 35 | 36 | 37 | def cd(self, directory): 38 | loop = asyncio.get_event_loop() 39 | task = self.__client.rpc.cd(directory) 40 | loop.run_until_complete(task) 41 | 42 | 43 | 44 | def run_cmd(self, cmd, stdout=None, stderr=None, stdin=None, env={}, timeout=None): 45 | loop = asyncio.get_event_loop() 46 | _uuid = str(uuid.uuid4()) 47 | 48 | if stdout is not None: 49 | stdout = stdout if isinstance(stdout, collections.Iterable) else [stdout] 50 | self.__client._set_stdout(_uuid, stdout) 51 | 52 | if stderr is not None: 53 | stderr = stderr if isinstance(stderr, collections.Iterable) else [stderr] 54 | self.__client._set_stderr(_uuid, stderr) 55 | 56 | 57 | task = self.__client._run_cmd(_uuid, cmd, env) 58 | rc_future = loop.run_until_complete(task) 59 | logging.info(f'{self.id}: {repr(cmd)} uuid={_uuid}') 60 | rpc = self.__client.rpc 61 | 62 | 63 | if stdin is not None: 64 | if isinstance(stdin, StdinController): # TODO maybe accept file 65 | stdin.add(self.id, _uuid, cmd, rpc) 66 | else: 67 | logging.error(f'Stdin argument of unsupported type! {stdin}') 68 | 69 | 70 | async def kill_task(): 71 | await rpc.kill_cmd(_uuid) 72 | return await rc_future 73 | 74 | async def stdin_task(line, close): 75 | await rpc.stdin_cmd(_uuid, line, close) 76 | 77 | 78 | async def timeout_task(): 79 | await asyncio.sleep(timeout) 80 | # if not rc: 81 | # await kill_task() 82 | # except: #asyncio.TimeoutError 83 | if not rc_future.done(): 84 | rc_future.set_result(ReturnCode.TIMEOUT) 85 | logging.info(f'{self.id}: TIMEOUT {repr(cmd)} uuid={_uuid}') 86 | await kill_task() 87 | 88 | 89 | if timeout: 90 | loop.create_task(timeout_task()) 91 | 92 | 93 | 94 | class Actions: 95 | def wait(self, block=True): 96 | if block: 97 | return loop.run_until_complete(rc_future) 98 | 99 | if not rc_future.done(): 100 | return None 101 | return rc_future.result() 102 | 103 | 104 | def kill(self): 105 | return loop.run_until_complete(kill_task()) 106 | 107 | def stdin(self, line, close=False): 108 | loop.run_until_complete(stdin_task(line, close)) 109 | 110 | def async_stdin(self, line, close=False): 111 | loop.create_task(rpc.stdin_cmd(_uuid, line, close=close)) 112 | 113 | 114 | return Actions() -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/server_list.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import types 4 | from collections.abc import Iterable 5 | 6 | from .server import Server 7 | 8 | 9 | class ServerList: 10 | def __init__(self, *args, working_directory=None): 11 | servers = [] 12 | args = list(args) 13 | while args: 14 | arg = args.pop(0) 15 | if isinstance(arg, Server): 16 | servers.append(arg) 17 | elif isinstance(arg, Iterable): 18 | args[0:0] = arg # insert at front to preserve order 19 | else: 20 | raise Exception(f'Unsupported Argument type: {type(arg)}') 21 | 22 | if len(set(s.id for s in servers)) != len(servers): 23 | raise Exception('Server IDs must be unique') 24 | 25 | self.__servers = servers 26 | self.__id_to_server = {s.id: s for s in servers} 27 | self.__loop = asyncio.get_event_loop() 28 | self.__working_directory = working_directory 29 | 30 | 31 | def cd(self, directory): 32 | for s in self.__servers: 33 | s.cd(directory) 34 | 35 | 36 | def _connect_to_all(self): 37 | if not self.__servers: 38 | return 39 | task = asyncio.wait([s._connect() for s in self.__servers]) 40 | self.__loop.run_until_complete(task) 41 | self.cd(self.__working_directory) 42 | 43 | 44 | def _disconnect_from_all(self): 45 | if not self.__servers: 46 | return 47 | task = asyncio.wait([s._disconnect() for s in self.__servers]) 48 | self.__loop.run_until_complete(task) 49 | 50 | 51 | def wait_cmds_finish(self): 52 | raise NotImplementedError() 53 | 54 | 55 | def __getitem__(self, key): 56 | if isinstance(key, int): 57 | try: 58 | return self.__servers[key] 59 | except IndexError: 60 | raise Exception(f'IndexError for: {key}') 61 | elif isinstance(key, str): 62 | try: 63 | return self.__id_to_server[key] 64 | except KeyError: 65 | raise Exception(f'KeyError for: {key}') 66 | elif isinstance(key, slice): 67 | return ServerList(self.__servers[key]) 68 | elif isinstance(key, types.FunctionType): 69 | return ServerList(filter(key, self.__servers)) 70 | elif isinstance(key, tuple): 71 | return ServerList(self.__getitem__(k) for k in key) 72 | else: 73 | raise Exception(f'Lookup type: {type(key)} not supported') 74 | 75 | def __iter__(self): 76 | return iter(self.__servers) 77 | 78 | def __len__(self): 79 | return len(self.__servers) -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/stdin_controller.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tty 3 | import termios 4 | import logging 5 | import functools 6 | import asyncio 7 | 8 | 9 | 10 | class StdinController: 11 | def __init__(self): 12 | self.cmds = {} 13 | 14 | LOG_LEVEL_CMD = 98 15 | logging.addLevelName(LOG_LEVEL_CMD, 'CONTROL') 16 | self.__log = functools.partial(logging.log, LOG_LEVEL_CMD) 17 | self.__loop = asyncio.get_event_loop() 18 | self.__stop_future = self.__loop.create_future() 19 | self.__input = '' 20 | 21 | 22 | def add(self, server_id, _uuid, cmd, rpc): 23 | self.__log(f'Added cmd={cmd} uuid={_uuid} to controller') 24 | self.cmds[_uuid] = (server_id, cmd, rpc) 25 | 26 | 27 | def __menu(self, select=None): 28 | self.__cmd = None 29 | if select is None: 30 | sys.stdout.write('\n\r') 31 | self.__log(f'Interfacing with commands in progress...') 32 | self.__log('') 33 | self.__log(f'Press Ctrl-C to quit this controller.') 34 | self.__log(f'Press Ctrl-D to close stdin of active command.') 35 | self.__log(f'Press Ctrl-H to print this selection menu.') 36 | self.__log('') 37 | self.__log(f'Please select command from the list.') 38 | for i, (uuid, (server_id, cmd, _)) in enumerate(self.cmds.items()): 39 | self.__log(f'\t[{i}] {server_id}: {repr(cmd)}') 40 | self.__log(f'\t{" "*(len(server_id)+len(str(i))+5)}uuid={uuid}') 41 | 42 | sys.stdout.write('Enter selection: ') 43 | sys.stdout.flush() 44 | return 45 | 46 | try: 47 | index = int(select) 48 | except ValueError: 49 | sys.stdout.write('\n\r') 50 | self.__log(f'Number could not be converted to int: {select}') 51 | sys.stdout.write('Enter selection: ') 52 | sys.stdout.flush() 53 | return 54 | 55 | if index < 0 or index >= len(self.cmds): 56 | sys.stdout.write('\n\r') 57 | self.__log(f'Number out of range: {index}') 58 | sys.stdout.write('Enter selection: ') 59 | sys.stdout.flush() 60 | return 61 | 62 | self.__cmd = list(self.cmds)[index] 63 | server_id, cmd, _ = self.cmds[self.__cmd] 64 | sys.stdout.write('\n\r') 65 | self.__log(f'Opening to stdin of cmd={repr(cmd)} on {server_id}') 66 | 67 | 68 | 69 | def __on_stdin(self): 70 | c = sys.stdin.read(1) 71 | 72 | if c == '\x03': # Ctrl-C 73 | self.__stop_future.set_result(None) 74 | elif c == '\x08': # Ctrl-H: 75 | self.__menu() 76 | elif c == '\x04': # Ctrl-D 77 | if self.__cmd is not None: 78 | _, _, rpc = self.cmds[self.__cmd] 79 | self.__loop.create_task(rpc.stdin_cmd(self.__cmd, '', close=True)) 80 | self.__stop_future.set_result(None) 81 | self.__input = '' 82 | elif c == '\x7f': # Backspace 83 | self.__input = self.__input[:-1] 84 | sys.stdout.write(f'\b\033[K') 85 | elif c == '\r': # Enter 86 | if self.__cmd is not None: 87 | _, _, rpc = self.cmds[self.__cmd] 88 | self.__loop.create_task(rpc.stdin_cmd(self.__cmd, f'{self.__input}\n', close=False)) 89 | self.__input = '' 90 | sys.stdout.write('\r\n') 91 | else: 92 | self.__menu(select=self.__input) 93 | self.__input = '' 94 | else: 95 | self.__input += c 96 | # print(repr(c)) 97 | sys.stdout.write(c) 98 | 99 | sys.stdout.flush() 100 | 101 | 102 | def wait(self): 103 | self.__menu() 104 | 105 | fd = sys.stdin.fileno() 106 | old_settings = termios.tcgetattr(fd) 107 | tty.setraw(fd) 108 | 109 | self.__loop.add_reader(fd, self.__on_stdin) 110 | self.__loop.run_until_complete(self.__stop_future) 111 | self.__loop.remove_reader(fd) 112 | 113 | termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) 114 | sys.stdout.write('\r\n') 115 | self.__log(f'Controller exited. Continuing execution...') -------------------------------------------------------------------------------- /distexperiments/distexprunner/distexprunner/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import sys, tty, termios 3 | import logging 4 | import itertools 5 | import functools 6 | 7 | 8 | __all__ = ['GDB', 'SOCKET_BIND', 'sleep', 'forward_stdin_to', 'counter', 'log', 'IterClassGen', 'any_failed'] 9 | 10 | 11 | GDB = f'gdb -quiet --ex run --args' 12 | SOCKET_BIND = lambda nodes: f'numactl --cpunodebind={nodes} --membind={nodes}' 13 | 14 | 15 | def sleep(delay): 16 | """sequential sleep without blocking event loop processing""" 17 | 18 | loop = asyncio.get_event_loop() 19 | async def task(): 20 | await asyncio.sleep(delay) 21 | loop.run_until_complete(task()) 22 | 23 | 24 | def forward_stdin_to(cmd, esc='\x1b'): # \x02 ESC \x03 Ctrl-C 25 | """forward console stdin to running command (A BIT BUGGY)""" 26 | 27 | logging.info(f'Interfacing with command in progress... Press ESC to quit.') 28 | 29 | loop = asyncio.get_event_loop() 30 | async def task(): 31 | future = loop.create_future() 32 | 33 | fd = sys.stdin.fileno() 34 | old_settings = termios.tcgetattr(fd) 35 | tty.setraw(fd) 36 | 37 | def on_stdin(): 38 | c = sys.stdin.read(1) 39 | # print(repr(c)) 40 | 41 | if c == '\r': 42 | c = '\n' 43 | 44 | cmd.async_stdin(c) 45 | 46 | if c == esc: 47 | future.set_result(None) 48 | 49 | loop.add_reader(fd, on_stdin) 50 | await future 51 | loop.remove_reader(fd) 52 | 53 | termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) 54 | loop.run_until_complete(task()) 55 | 56 | 57 | def counter(start=0, step=1): 58 | """Helper to count, c = counter(0); next(c)==0 next(c)==1""" 59 | return itertools.count(start=start, step=step) 60 | 61 | 62 | LOG_LEVEL_CMD = 99 63 | logging.addLevelName(LOG_LEVEL_CMD, 'LOG') 64 | 65 | def log(message): 66 | """Log message using logging system with tag LOG""" 67 | logging.log(LOG_LEVEL_CMD, f'{message}') 68 | 69 | 70 | class IterClassGen: 71 | """Generates and stores conveniently as many instances of classes as needed""" 72 | def __init__(self, cls, *args, **kwargs): 73 | self.__factory = functools.partial(cls, *args, **kwargs) 74 | self.__instances = [] 75 | 76 | def __next__(self): 77 | instance = self.__factory() 78 | self.__instances.append(instance) 79 | return instance 80 | 81 | 82 | def __getitem__(self, key): 83 | return self.__instances[key] 84 | 85 | def __iter__(self): 86 | return iter(self.__instances) 87 | 88 | def __len__(self): 89 | return len(self.__instances) 90 | 91 | 92 | 93 | def any_failed(cmds, poll_interval=1): 94 | """Checks periodically return-codes of commands, returns first rc found to be != 0, else False""" 95 | while True: 96 | rcs = [cmd.wait(block=False) for cmd in cmds] 97 | for rc in rcs: 98 | if rc is not None and rc != 0: 99 | return rc 100 | if all(rc == 0 for rc in rcs): 101 | return False 102 | sleep(poll_interval) -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/basic.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | @reg_exp(servers=server_list) 8 | def bash_for(servers): 9 | for s in servers[:1]: 10 | cmd = s.run_cmd('for i in {1..10}; do echo $i; sleep 0.1; done') 11 | # time.sleep(3) would block event loop processing 12 | sleep(3) 13 | cmd.wait() 14 | 15 | 16 | 17 | @reg_exp(servers=server_list, raise_on_rc=False) 18 | def kill_yes(servers): 19 | for s in servers[:1]: 20 | yes_cmd = s.run_cmd('yes > /dev/null') 21 | sleep(3) 22 | yes_cmd.kill() 23 | 24 | 25 | @reg_exp(servers=server_list) 26 | def read_stdin(servers): 27 | cmd = servers['node01'].run_cmd('read p && echo $p', stdout=Console(fmt='node01: %s')) 28 | cmd.stdin('hello\n') 29 | cmd.wait() 30 | 31 | 32 | @reg_exp(servers=server_list) 33 | def many_trees(servers): 34 | cmds = [servers[0].run_cmd('tree || ls') for _ in range(20)] 35 | assert(all(cmd.wait() == 0 for cmd in cmds)) 36 | 37 | 38 | @reg_exp(servers=server_list, raise_on_rc=False) 39 | def exit_code(servers): 40 | cmds = [servers[0].run_cmd(f'exit {i}') for i in range(5)] 41 | assert(not all(cmd.wait() == 0 for cmd in cmds)) -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/buffered_stdout.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | @reg_exp(servers=config.server_list, raise_on_rc=True) 7 | def buffered_stdout(servers): 8 | s = servers[0] 9 | 10 | code = r""" 11 | #include 12 | #include 13 | 14 | int main(void) { 15 | for (int i = 0; i < 10; i++) { 16 | printf("%d\n", i); 17 | usleep(1000000); 18 | } 19 | return 0; 20 | } 21 | """ 22 | exe = 'unbuffered' 23 | 24 | cmd = s.run_cmd(f'gcc -xc - -o {exe}') 25 | cmd.stdin(code, close=True) 26 | cmd.wait() 27 | 28 | s.run_cmd(f'./{exe}', stdout=Console()).wait() 29 | s.run_cmd(f'rm -f {exe}').wait() -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/compile.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = ServerList() 6 | 7 | @reg_exp(servers=server_list) 8 | def compile(servers): 9 | cmake_cmd = f'mkdir -p build && cd build && cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..' 10 | procs = [s.run_cmd(cmake_cmd) for s in servers] 11 | assert(all(p.wait() == 0 for p in procs)) 12 | 13 | 14 | make_cmd = f'cd build && make -j' 15 | procs = [s.run_cmd(make_cmd) for s in servers] 16 | assert(all(p.wait() == 0 for p in procs)) 17 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/config.py: -------------------------------------------------------------------------------- 1 | from distexprunner import ServerList, Server 2 | 3 | 4 | SERVER_PORT = 20000 5 | 6 | 7 | server_list = ServerList( 8 | Server('node01', '127.0.0.1', SERVER_PORT), 9 | Server('node02', '127.0.0.1', SERVER_PORT), 10 | Server('node03', '127.0.0.1', SERVER_PORT), 11 | Server('node04', '127.0.0.1', SERVER_PORT), 12 | Server('node05', '127.0.0.1', SERVER_PORT), 13 | #Server('node0x', '192.168.94.2x', SERVER_PORT), 14 | ) 15 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/cpu_load.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | import json 9 | class String: 10 | def __init__(self): 11 | self.__s = [] 12 | 13 | def __call__(self, line): 14 | self.__s.append(line) 15 | 16 | def __str__(self): 17 | return ''.join(self.__s) 18 | 19 | def json(self): 20 | return json.loads(str(self)) 21 | 22 | 23 | class HighLoad(Exception): 24 | pass 25 | 26 | 27 | @reg_exp(servers=server_list) 28 | def cpu_load(servers, cpu_limit=None, node_limit=None): 29 | 30 | output = String() 31 | rc = servers[0].run_cmd(f'mpstat -P ALL -N ALL -o JSON 1 1', stdout=output).wait() 32 | assert(rc == 0) 33 | 34 | stat = output.json()['sysstat']['hosts'][0]['statistics'][0] 35 | load = { 36 | 'cpu': dict(map(lambda x: (x['cpu'], 100-x['idle']), stat['cpu-load'])), 37 | 'node': dict(map(lambda x: (x['node'], 100-x['idle']), stat['node-load'])) 38 | } 39 | for name, vals in load.items(): 40 | log(f'{name}:') 41 | for idx, util in vals.items(): 42 | log(f'\t{idx}: {util:0.2f}') 43 | 44 | 45 | if isinstance(cpu_limit, (int, float)): 46 | for cpu, util in load['cpu'].items(): 47 | if util > cpu_limit: 48 | raise HighLoad(f'too high load for cpu {repr(cpu)}: {util:0.2f}% > {cpu_limit}') 49 | elif callable(cpu_limit): 50 | for cpu, util in load['cpu'].items(): 51 | if not cpu_limit(cpu, util): 52 | raise HighLoad(f'too high load for cpu {repr(cpu)}: {util:0.2f}% cpu_limit()=False') 53 | 54 | if isinstance(node_limit, (int, float)): 55 | for node, util in load['node'].items(): 56 | if util > node_limit: 57 | raise HighLoad(f'too high load for node {repr(node)}: {util:0.2f}% > {node_limit}') 58 | elif callable(node_limit): 59 | for node, util in load['node'].items(): 60 | if not node_limit(node, util): 61 | raise HighLoad(f'too high load for node {repr(node)}: {util:0.2f}% node_limit()=False') 62 | 63 | # node_limit=10 64 | # cpu_limit=lambda c, u: u < 10 -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/csv_parser.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | @reg_exp(servers=server_list) 9 | def csv_parser(servers): 10 | s = servers[0] 11 | 12 | csvs = IterClassGen(CSVGenerator, 13 | r'value=(?P\d+)', # catches only latest printed value 14 | CSVGenerator.Array(r'other=(?P\d+)'), # collects all in a '|' separated array 15 | CSVGenerator.Sum(r'other=(?P\d+)'), 16 | CSVGenerator.SortedArray(r'value=(?P\d+),other=(?P\d+)'), 17 | foobar=1234, 18 | ) 19 | s.run_cmd('for i in {10..1}; do echo "value=$i,other=$((i*2))"; done', stdout=next(csvs)).wait() 20 | 21 | for csv in csvs: 22 | # writes header once and appends rows 23 | # csv has properties .header and .row 24 | csv.write('file.csv') 25 | 26 | # file.csv: 27 | # value,other,other_sum,sorted_other 28 | # 1,20|18|16|14|12|10|8|6|4|2,110,2|4|6|8|10|12|14|16|18|20 29 | 30 | 31 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/cwd.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | server_list = ServerList( 7 | config.server_list[0], 8 | working_directory='/tmp' 9 | ) 10 | 11 | 12 | @reg_exp(servers=server_list) 13 | def cwd(servers): 14 | for s in servers: 15 | s.run_cmd('ls && pwd', stdout=Console(fmt=f'{s.id}: %s')).wait() 16 | 17 | servers.cd('/') 18 | for s in servers: 19 | s.run_cmd('ls && pwd', stdout=Console(fmt=f'{s.id}: %s')).wait() 20 | 21 | for s in servers: 22 | s.cd('/home') 23 | s.run_cmd('ls && pwd', stdout=Console(fmt=f'{s.id}: %s')).wait() -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/dpkg_query.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | class PackageList: 7 | def __init__(self): 8 | self.packages = {} 9 | 10 | def __call__(self, line): 11 | name, version = line.strip().split('|', 1) 12 | self.packages[name] = version 13 | 14 | def __repr__(self): 15 | items = ', '.join(f'{name}={version}' for name, version in self.packages.items()) 16 | return f'<{self.__class__.__name__} [{items}]>' 17 | 18 | 19 | 20 | @reg_exp(servers=ServerList()) 21 | def check_packages(servers): 22 | package_lists = IterClassGen(PackageList) 23 | apps = ['gcc', 'cmake', 'python3.7'] 24 | cmd = f"dpkg-query -W -f='${{Package}}|${{Version}}\n' {' '.join(apps)}" 25 | 26 | procs = [s.run_cmd(cmd, stdout=next(package_lists)) for s in servers] 27 | [p.wait() for p in procs] 28 | 29 | for package_list in package_lists: 30 | assert(len(package_list.packages) == len(apps)) 31 | 32 | 33 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/environment_variables.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | server_list = config.server_list[0, ] 7 | 8 | 9 | @reg_exp(servers=server_list) 10 | def environment_variables(servers): 11 | for s in servers: 12 | s.run_cmd('env', env={'OMP_NUM_THREADS': 8}).wait() 13 | 14 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/gdb.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | ENABLED = False 6 | 7 | 8 | @reg_exp(servers=config.server_list[0, ]) 9 | def gdb(servers): 10 | if not ENABLED: 11 | log('Skipping, not enabled') 12 | return 13 | s = servers[0] 14 | 15 | code = r""" 16 | #include 17 | #include 18 | 19 | int main(void) { 20 | int i = 42; 21 | int *p = NULL; 22 | *p = i; 23 | return 0; 24 | } 25 | """ 26 | exe = 'unbuffered' 27 | 28 | cmd = s.run_cmd(f'gcc -g -xc - -o {exe}') 29 | cmd.stdin(code, close=True) 30 | cmd.wait() 31 | 32 | controller = StdinController() 33 | 34 | output = File('gdb.log', flush=True) 35 | cmd = s.run_cmd(f'{GDB} ./{exe}', stdout=output, stderr=output, stdin=controller) 36 | 37 | controller.wait() 38 | 39 | cmd.wait() 40 | 41 | s.run_cmd(f'rm -f {exe}').wait() -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/restart.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | @reg_exp(servers=server_list, max_restarts=3, raise_on_rc=False) 9 | def restart(servers): 10 | for s in servers: 11 | cmd = s.run_cmd(f'date && sleep 0.1 && exit 1', stdout=Console(fmt=f'{s.id}: %s')) 12 | if cmd.wait() != 0: 13 | return Action.RESTART -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/serverlist_ops.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | # pre-select by lambda function 7 | @reg_exp(servers=config.server_list[lambda s: hasattr(s, 'id')]) 8 | def serverlist_ops(servers): 9 | # select by index 10 | s = servers[0] 11 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 12 | 13 | # select by id 14 | s = servers['node02'] 15 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 16 | 17 | # select whole list 18 | for s in servers: 19 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 20 | 21 | # select by lambda 22 | for s in servers[lambda s: s.id >= 'node03']: 23 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 24 | 25 | # select by slice 26 | for s in servers[1:3]: 27 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 28 | 29 | # select reverse order 30 | for s in servers[::-1]: 31 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 32 | 33 | # select even entries 34 | for s in servers[::2]: 35 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 36 | 37 | # select by tuple-list, a combination of all 38 | for s in servers['node04', 2, lambda s: s.id=='node01']: 39 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 40 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/simple_grid.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | server_list = config.server_list[0, ] 7 | parameter_grid = ParameterGrid( 8 | a=range(1, 5), 9 | b=[2, 4], 10 | to_file=[True, False], 11 | computed=ComputedParam(lambda to_file: [1] if to_file else 2) 12 | ) 13 | 14 | 15 | @reg_exp(servers=server_list, params=parameter_grid) 16 | def simple_grid(servers, a, b, to_file, computed): 17 | for s in servers: 18 | stdout = File('simple_grid.log', append=True) 19 | if not to_file: 20 | stdout = [stdout, Console(fmt=f'{s.id}: %s')] 21 | 22 | s.run_cmd(f'echo {a} {b} {computed}', stdout=stdout).wait() 23 | 24 | 25 | @reg_exp(servers=ServerList()) 26 | def only_local(servers): 27 | File('simple_grid.log', append=False)('empty\n') 28 | -------------------------------------------------------------------------------- /distexperiments/distexprunner/examples/timeout.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | @reg_exp(servers=server_list, raise_on_rc=False, max_restarts=3) 9 | def timeout(servers): 10 | rc = servers[0].run_cmd('sleep infinity', timeout=1).wait() 11 | if rc == ReturnCode.TIMEOUT: 12 | return Action.RESTART -------------------------------------------------------------------------------- /distexperiments/distexprunner/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import logging 5 | import sys 6 | 7 | from distexprunner.experiment_server import ExperimentServer 8 | 9 | 10 | __author__ = 'mjasny' 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser(description='Distributed Experiment Runner Server') 15 | parser.add_argument('-v', '--verbose', action="count", default=0, help='-v WARN -vv INFO -vvv DEBUG') 16 | parser.add_argument('-ip', '--ip', default='0.0.0.0', help='Listening ip') 17 | parser.add_argument('-p', '--port', default=20000, help='Listening port') 18 | parser.add_argument('-rf', '--run-forever', default=False, action='store_true', help='Disable auto termination of server') 19 | parser.add_argument('-mi', '--max-idle', default=3600, type=int, help='Maximum idle time before auto termination (in seconds). Default 1 hour.') 20 | parser.add_argument('-o', '--log', type=str, help='Log into file') 21 | args = parser.parse_args() 22 | 23 | logging_handlers = [logging.StreamHandler()] 24 | if args.log: 25 | logging_handlers.append(logging.FileHandler(filename=args.log, mode='w')) 26 | 27 | 28 | logging.basicConfig( 29 | format='%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s', # [%(filename)s:%(lineno)d]: 30 | datefmt='%Y-%m-%d %H:%M:%S', 31 | level=max(4 - args.verbose, 0) * 10, 32 | handlers=logging_handlers 33 | ) 34 | 35 | 36 | server = ExperimentServer( 37 | ip=args.ip, 38 | port=args.port, 39 | max_idle=0 if args.run_forever else args.max_idle 40 | ) 41 | server.start() -------------------------------------------------------------------------------- /distexperiments/distexprunner/tests/progressbar_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import shutil 3 | import math 4 | 5 | 6 | class Progress: 7 | def __init__(self, max_steps, output=sys.stdout, disable_stdout=True): 8 | self.steps = 0 9 | self.max_steps = max_steps 10 | self.current_step = None 11 | self.output = sys.stdout 12 | self.line_width = 0 13 | if disable_stdout: 14 | sys.stdout = open('/dev/null', 'w') 15 | sys.stderr = open('/dev/null', 'w') 16 | 17 | 18 | def __write(self, s): 19 | self.output.write(s) 20 | self.line_width = len(s) 21 | 22 | 23 | def step_start(self, name): 24 | self.__write(f'{name} ...\033[K\n') 25 | self.current_step = name 26 | 27 | self.render_bar() 28 | 29 | 30 | def step_finish(self): 31 | self.steps += 1 32 | self.render_bar() 33 | 34 | 35 | def step_status(self, error=False, status=None): 36 | CHECK_MARK='\033[0;32m\u2714\033[0m' 37 | RED_CROSS='\033[0;31m\u2718\033[0m' 38 | INFO = '\033[1;33m=>\033[0m' 39 | 40 | width, _ = shutil.get_terminal_size((80, 20)) 41 | for _ in range(math.ceil(self.line_width / width)): 42 | self.output.write('\033[1A\033[K') # 1 up, clear line 43 | 44 | if status: 45 | self.__write(f'{self.current_step} {INFO} {status}\033[K\n') 46 | self.output.write('\033[K') 47 | self.line_width = 0 48 | self.render_bar() 49 | elif not error: 50 | self.__write(f'{self.current_step} {CHECK_MARK}\033[K\n') 51 | else: 52 | self.__write(f'{self.current_step} {RED_CROSS} => {error}\033[K\n') 53 | 54 | 55 | def render_bar(self): 56 | width, _ = shutil.get_terminal_size((80, 20)) 57 | 58 | percent = self.steps/self.max_steps 59 | steps_width = len(str(self.max_steps)) 60 | prefix = f'Progress: [{self.steps:{steps_width}d}/{self.max_steps} {percent:4.0%}]' 61 | 62 | if len(prefix)+8 > width: 63 | self.output.write('\033[0;31mWidth too small!\033[0m\n') 64 | return 65 | 66 | width_left = width - len(prefix) - 3 67 | hashes = math.floor(width_left*percent) 68 | dots = (width_left-hashes) 69 | suffix = f'[{"#"*hashes}{"."*dots}]' 70 | 71 | progress = f'\033[0;42;30m{prefix}\033[0m {suffix}' 72 | self.output.write('\033[K\n') 73 | self.output.write(progress) 74 | if self.steps < self.max_steps: 75 | self.output.write('\033[1A\r') #1 up, start 76 | self.output.flush() 77 | 78 | 79 | 80 | 81 | import time 82 | 83 | 84 | max_steps = 5 85 | p = Progress(max_steps) 86 | 87 | for i in range(max_steps): 88 | pad = '~'*i*100 89 | p.step_start(f'test_{i}_{pad}',) 90 | 91 | #if i % 2 == 1: 92 | # if i == max_steps - 1: 93 | # p.step(error="ASDF") 94 | 95 | time.sleep(1.0) 96 | p.step_status(error="foobar") 97 | time.sleep(0.5) 98 | if i % 2 == 1: 99 | p.step_finish() 100 | continue 101 | for _ in range(3): 102 | p.step_status(status=f"Retry {_}") 103 | time.sleep(0.5) 104 | p.step_status(error=False) 105 | p.step_finish() 106 | time.sleep(1.5) 107 | 108 | 109 | # p.finish() -------------------------------------------------------------------------------- /distexperiments/distexprunner/v1_compatibility/config.py: -------------------------------------------------------------------------------- 1 | CLIENT_IP = '127.0.0.1' 2 | CLIENT_PORT = 20000 3 | CLIENT_EXPERIMENT_FOLDER = 'experiments/' 4 | CLIENT_RESUME_FILE = '.exps_progress' 5 | 6 | SERVER_PORT = 20001 -------------------------------------------------------------------------------- /distexperiments/distexprunner/v1_compatibility/experiment.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import logging 3 | 4 | 5 | import distexprunner 6 | 7 | 8 | class Server(distexprunner.Server): 9 | @property 10 | def data(self): 11 | return self 12 | 13 | 14 | Logfile = distexprunner.File 15 | 16 | def Printer(**kwargs): 17 | if 'fmt' in kwargs: 18 | kwargs['fmt'] = kwargs['fmt'].replace('{line}', '%s') 19 | for x in ['rstrip', 'end']: 20 | if x in kwargs: 21 | del kwargs[x] 22 | return distexprunner.Console(**kwargs) 23 | 24 | 25 | class Base: 26 | pass 27 | 28 | 29 | 30 | class errors: 31 | class NoConnectionError(Exception): 32 | pass 33 | 34 | 35 | class actions: 36 | class Restart(Exception): 37 | pass 38 | 39 | 40 | class time: 41 | sleep = distexprunner.sleep 42 | 43 | 44 | class Proxy: 45 | def __init__(self, cls): 46 | self.server_list = distexprunner.ServerList(*cls.SERVERS) 47 | self.cls = cls 48 | self.__name__ = cls.__name__ 49 | 50 | def __call__(self, servers): 51 | def target(server): 52 | if not isinstance(server, Server): 53 | server = servers[server] 54 | if not hasattr(server, '_Server__client'): #connection has failed 55 | raise errors.NoConnectionError 56 | return server 57 | 58 | try: 59 | self.cls().experiment(target) 60 | except actions.Restart: 61 | return distexprunner.Action.RESTART 62 | 63 | 64 | class factory: 65 | class Grid: 66 | def __init__(self, factory_fn, *args): 67 | for params in itertools.product(*args): 68 | cls = factory_fn(*params) 69 | 70 | if not issubclass(cls, Base): 71 | raise Exception('Factory needs to return a child of experiment.Base') 72 | 73 | suffix = '_'.join(map(str, params)) 74 | cls.__name__ += f'_{suffix}' 75 | logging.info(f'Generated experiment: {cls.__name__}') 76 | 77 | proxy = Proxy(cls) 78 | distexprunner.reg_exp(proxy.server_list)(proxy) 79 | 80 | class Generator: 81 | def __init__(self, factory_fn, generator): 82 | for params in generator: 83 | cls = factory_fn(*params) 84 | 85 | if not issubclass(cls, Base): 86 | raise Exception('Factory needs to return a child of experiment.Base') 87 | 88 | suffix = '_'.join(map(str, params)) 89 | cls.__name__ += f'_{suffix}' 90 | logging.info(f'Generated experiment: {cls.__name__}') 91 | 92 | proxy = Proxy(cls) 93 | distexprunner.reg_exp(proxy.server_list)(proxy) -------------------------------------------------------------------------------- /distexperiments/distexprunner/v1_compatibility/old_example.py: -------------------------------------------------------------------------------- 1 | import experiment 2 | import time 3 | import config 4 | 5 | 6 | class exp1(experiment.Base): 7 | SERVERS = [ 8 | experiment.Server('node1', '127.0.0.1', custom_field=42) 9 | ] 10 | def experiment(self, target): 11 | long_cmd = target('node1').run_cmd('sleep 4', stdout=experiment.Printer(), stderr=experiment.Printer()) 12 | 13 | target('node1').run_cmd('ls', stdout=[experiment.Printer(), experiment.Logfile('ls.log', append=True)]) 14 | # self.SERVERS[0].data.custom_field = 69 15 | print(target('node1').data.custom_field) 16 | 17 | printer = experiment.Printer(fmt='stdin="{line}"\n', rstrip=True) 18 | cmd = target('node1').run_cmd('bash -c \'read p && echo $p\'', stdout=printer) 19 | # cmd.kill() 20 | cmd.stdin('foobar\n') 21 | 22 | long_cmd.wait() # We need to wait, else all running commands are killed 23 | 24 | 25 | class exp2(experiment.Base): 26 | SERVERS = exp1.SERVERS + [ 27 | experiment.Server('node2', '127.0.0.1', config.SERVER_PORT+1) 28 | ] 29 | def experiment(self, target): 30 | procs = [] 31 | for i, s in enumerate(self.SERVERS): 32 | try: 33 | p = target(s).run_cmd(f'sleep {5*(i+1)}', stdout=experiment.Printer(), stderr=experiment.Printer()) 34 | except experiment.errors.NoConnectionError: 35 | continue 36 | procs.append(p) 37 | 38 | rcs = [proc.wait() for proc in procs] 39 | assert(all(rc == 0 for rc in rcs)) 40 | 41 | 42 | 43 | def exp3_factory(a, b): 44 | class exp3(experiment.Base): 45 | SERVERS = [ 46 | experiment.Server('node', '127.0.0.1') 47 | ] 48 | def experiment(self, target): 49 | cmd = f'./foobar -a {a} -b {b}' 50 | print(cmd) 51 | return exp3 52 | 53 | a = ['x', 'y'] 54 | b = range(5, 10) 55 | experiment.factory.Grid(exp3_factory, a, b) 56 | 57 | 58 | class exp4(experiment.Base): 59 | SERVERS = [ 60 | experiment.Server('node1', '127.0.0.1') 61 | ] 62 | def experiment(self, target): 63 | env = { 64 | 'OMP_NUM_THREADS': 8 65 | } 66 | target('node1').run_cmd('env', stdout=experiment.Printer(), stderr=experiment.Printer(), env=env).wait() 67 | 68 | 69 | 70 | # class AA_CompileJob(experiment.Base): 71 | # SERVERS = exp1.SERVERS 72 | # RUN_ALWAYS = True # immune to filters 73 | # def experiment(self, target): 74 | # cmake_cmd = 'cmake -B../build -S../' 75 | # procs = [] 76 | # for s in self.SERVERS: 77 | # printer = experiment.Printer(fmt=f'{s.id}: '+'{line}') 78 | # try: 79 | # p = target(s).run_cmd(cmake_cmd, stdout=printer, stderr=printer) 80 | # except experiment.errors.NoConnectionError: 81 | # continue 82 | # procs.append(p) 83 | # [proc.wait() for proc in procs] 84 | 85 | # make_cmd = 'make -j -C ../build' 86 | # procs = [] 87 | # for s in self.SERVERS: 88 | # printer = experiment.Printer(fmt=f'{s.id}: '+'{line}') 89 | # try: 90 | # p = target(s).run_cmd(make_cmd, stdout=printer, stderr=printer) 91 | # except experiment.errors.NoConnectionError: 92 | # continue 93 | # procs.append(p) 94 | # rcs = [proc.wait() for proc in procs] 95 | # assert(all(rc == 0 for rc in rcs)) 96 | 97 | 98 | MAX_RESTARTS = 3 99 | class restart(experiment.Base): 100 | SERVERS = [ 101 | experiment.Server('node1', '127.0.0.1', custom_field=42) 102 | ] 103 | def experiment(self, target): 104 | cmd = target('node1').run_cmd('bash -c "sleep 1 && exit -11"', stdout=experiment.Printer()) 105 | rcs = [cmd.wait()] 106 | 107 | global MAX_RESTARTS 108 | MAX_RESTARTS -= 1 109 | if MAX_RESTARTS == 0: 110 | return 111 | 112 | if not all(rc == 0 for rc in rcs): 113 | raise experiment.actions.Restart 114 | 115 | 116 | class async_restart(experiment.Base): 117 | SERVERS = [ 118 | experiment.Server('node1', '127.0.0.1', custom_field=42) 119 | ] 120 | def experiment(self, target): 121 | procs = [] 122 | 123 | proc = target('node1').run_cmd('bash -c "sleep 1 && exit 0"', stdout=experiment.Printer()) 124 | procs.append(proc) 125 | 126 | while True: 127 | rcs = [proc.wait(block=False) for proc in procs] 128 | if any(rc is not None and rc != 0 for rc in rcs): 129 | raise experiment.actions.Restart 130 | 131 | if all(rc == 0 for rc in rcs): 132 | break 133 | 134 | time.sleep(0.1) -------------------------------------------------------------------------------- /distexperiments/examples/basic.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | @reg_exp(servers=server_list) 8 | def bash_for(servers): 9 | for s in servers[:1]: 10 | cmd = s.run_cmd('for i in {1..10}; do echo $i; sleep 0.1; done') 11 | # time.sleep(3) would block event loop processing 12 | sleep(3) 13 | cmd.wait() 14 | 15 | 16 | 17 | @reg_exp(servers=server_list, raise_on_rc=False) 18 | def kill_yes(servers): 19 | for s in servers[:1]: 20 | yes_cmd = s.run_cmd('yes > /dev/null') 21 | sleep(3) 22 | yes_cmd.kill() 23 | 24 | 25 | @reg_exp(servers=server_list) 26 | def read_stdin(servers): 27 | cmd = servers['node01'].run_cmd('read p && echo $p', stdout=Console(fmt='node01: %s')) 28 | cmd.stdin('hello\n') 29 | cmd.wait() 30 | 31 | 32 | @reg_exp(servers=server_list) 33 | def many_trees(servers): 34 | cmds = [servers[0].run_cmd('tree || ls') for _ in range(20)] 35 | assert(all(cmd.wait() == 0 for cmd in cmds)) 36 | 37 | 38 | @reg_exp(servers=server_list, raise_on_rc=False) 39 | def exit_code(servers): 40 | cmds = [servers[0].run_cmd(f'exit {i}') for i in range(5)] 41 | assert(not all(cmd.wait() == 0 for cmd in cmds)) -------------------------------------------------------------------------------- /distexperiments/examples/buffered_stdout.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | @reg_exp(servers=config.server_list, raise_on_rc=True) 7 | def buffered_stdout(servers): 8 | s = servers[0] 9 | 10 | code = r""" 11 | #include 12 | #include 13 | 14 | int main(void) { 15 | for (int i = 0; i < 10; i++) { 16 | printf("%d\n", i); 17 | usleep(1000000); 18 | } 19 | return 0; 20 | } 21 | """ 22 | exe = 'unbuffered' 23 | 24 | cmd = s.run_cmd(f'gcc -xc - -o {exe}') 25 | cmd.stdin(code, close=True) 26 | cmd.wait() 27 | 28 | s.run_cmd(f'./{exe}', stdout=Console()).wait() 29 | s.run_cmd(f'rm -f {exe}').wait() -------------------------------------------------------------------------------- /distexperiments/examples/compile.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = ServerList() 6 | 7 | @reg_exp(servers=server_list) 8 | def compile(servers): 9 | cmake_cmd = f'mkdir -p build && cd build && cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..' 10 | procs = [s.run_cmd(cmake_cmd) for s in servers] 11 | assert(all(p.wait() == 0 for p in procs)) 12 | 13 | 14 | make_cmd = f'cd build && make -j' 15 | procs = [s.run_cmd(make_cmd) for s in servers] 16 | assert(all(p.wait() == 0 for p in procs)) 17 | -------------------------------------------------------------------------------- /distexperiments/examples/config.py: -------------------------------------------------------------------------------- 1 | from distexprunner import ServerList, Server 2 | 3 | 4 | SERVER_PORT = 20000 5 | 6 | 7 | server_list = ServerList( 8 | Server('node01', '127.0.0.1', SERVER_PORT), 9 | Server('node02', '127.0.0.1', SERVER_PORT), 10 | Server('node03', '127.0.0.1', SERVER_PORT), 11 | Server('node04', '127.0.0.1', SERVER_PORT), 12 | Server('node05', '127.0.0.1', SERVER_PORT), 13 | #Server('node0x', '192.168.94.2x', SERVER_PORT), 14 | ) 15 | -------------------------------------------------------------------------------- /distexperiments/examples/cpu_load.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | import json 9 | class String: 10 | def __init__(self): 11 | self.__s = [] 12 | 13 | def __call__(self, line): 14 | self.__s.append(line) 15 | 16 | def __str__(self): 17 | return ''.join(self.__s) 18 | 19 | def json(self): 20 | return json.loads(str(self)) 21 | 22 | 23 | class HighLoad(Exception): 24 | pass 25 | 26 | 27 | @reg_exp(servers=server_list) 28 | def cpu_load(servers, cpu_limit=None, node_limit=None): 29 | 30 | output = String() 31 | rc = servers[0].run_cmd(f'mpstat -P ALL -N ALL -o JSON 1 1', stdout=output).wait() 32 | assert(rc == 0) 33 | 34 | stat = output.json()['sysstat']['hosts'][0]['statistics'][0] 35 | load = { 36 | 'cpu': dict(map(lambda x: (x['cpu'], 100-x['idle']), stat['cpu-load'])), 37 | 'node': dict(map(lambda x: (x['node'], 100-x['idle']), stat['node-load'])) 38 | } 39 | for name, vals in load.items(): 40 | log(f'{name}:') 41 | for idx, util in vals.items(): 42 | log(f'\t{idx}: {util:0.2f}') 43 | 44 | 45 | if isinstance(cpu_limit, (int, float)): 46 | for cpu, util in load['cpu'].items(): 47 | if util > cpu_limit: 48 | raise HighLoad(f'too high load for cpu {repr(cpu)}: {util:0.2f}% > {cpu_limit}') 49 | elif callable(cpu_limit): 50 | for cpu, util in load['cpu'].items(): 51 | if not cpu_limit(cpu, util): 52 | raise HighLoad(f'too high load for cpu {repr(cpu)}: {util:0.2f}% cpu_limit()=False') 53 | 54 | if isinstance(node_limit, (int, float)): 55 | for node, util in load['node'].items(): 56 | if util > node_limit: 57 | raise HighLoad(f'too high load for node {repr(node)}: {util:0.2f}% > {node_limit}') 58 | elif callable(node_limit): 59 | for node, util in load['node'].items(): 60 | if not node_limit(node, util): 61 | raise HighLoad(f'too high load for node {repr(node)}: {util:0.2f}% node_limit()=False') 62 | 63 | # node_limit=10 64 | # cpu_limit=lambda c, u: u < 10 -------------------------------------------------------------------------------- /distexperiments/examples/csv_parser.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | @reg_exp(servers=server_list) 9 | def csv_parser(servers): 10 | s = servers[0] 11 | 12 | csvs = IterClassGen(CSVGenerator, 13 | r'value=(?P\d+)', # catches only latest printed value 14 | CSVGenerator.Array(r'other=(?P\d+)'), # collects all in a '|' separated array 15 | CSVGenerator.Sum(r'other=(?P\d+)'), 16 | CSVGenerator.SortedArray(r'value=(?P\d+),other=(?P\d+)'), 17 | foobar=1234, 18 | ) 19 | s.run_cmd('for i in {10..1}; do echo "value=$i,other=$((i*2))"; done', stdout=next(csvs)).wait() 20 | 21 | for csv in csvs: 22 | # writes header once and appends rows 23 | # csv has properties .header and .row 24 | csv.write('file.csv') 25 | 26 | # file.csv: 27 | # value,other,other_sum,sorted_other 28 | # 1,20|18|16|14|12|10|8|6|4|2,110,2|4|6|8|10|12|14|16|18|20 29 | 30 | 31 | -------------------------------------------------------------------------------- /distexperiments/examples/cwd.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | server_list = ServerList( 7 | config.server_list[0], 8 | working_directory='/tmp' 9 | ) 10 | 11 | 12 | @reg_exp(servers=server_list) 13 | def cwd(servers): 14 | for s in servers: 15 | s.run_cmd('ls && pwd', stdout=Console(fmt=f'{s.id}: %s')).wait() 16 | 17 | servers.cd('/') 18 | for s in servers: 19 | s.run_cmd('ls && pwd', stdout=Console(fmt=f'{s.id}: %s')).wait() 20 | 21 | for s in servers: 22 | s.cd('/home') 23 | s.run_cmd('ls && pwd', stdout=Console(fmt=f'{s.id}: %s')).wait() -------------------------------------------------------------------------------- /distexperiments/examples/dpkg_query.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | class PackageList: 7 | def __init__(self): 8 | self.packages = {} 9 | 10 | def __call__(self, line): 11 | name, version = line.strip().split('|', 1) 12 | self.packages[name] = version 13 | 14 | def __repr__(self): 15 | items = ', '.join(f'{name}={version}' for name, version in self.packages.items()) 16 | return f'<{self.__class__.__name__} [{items}]>' 17 | 18 | 19 | 20 | @reg_exp(servers=ServerList()) 21 | def check_packages(servers): 22 | package_lists = IterClassGen(PackageList) 23 | apps = ['gcc', 'cmake', 'python3.7'] 24 | cmd = f"dpkg-query -W -f='${{Package}}|${{Version}}\n' {' '.join(apps)}" 25 | 26 | procs = [s.run_cmd(cmd, stdout=next(package_lists)) for s in servers] 27 | [p.wait() for p in procs] 28 | 29 | for package_list in package_lists: 30 | assert(len(package_list.packages) == len(apps)) 31 | 32 | 33 | -------------------------------------------------------------------------------- /distexperiments/examples/environment_variables.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | server_list = config.server_list[0, ] 7 | 8 | 9 | @reg_exp(servers=server_list) 10 | def environment_variables(servers): 11 | for s in servers: 12 | s.run_cmd('env', env={'OMP_NUM_THREADS': 8}).wait() 13 | 14 | -------------------------------------------------------------------------------- /distexperiments/examples/gdb.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | ENABLED = False 6 | 7 | 8 | @reg_exp(servers=config.server_list[0, ]) 9 | def gdb(servers): 10 | if not ENABLED: 11 | log('Skipping, not enabled') 12 | return 13 | s = servers[0] 14 | 15 | code = r""" 16 | #include 17 | #include 18 | 19 | int main(void) { 20 | int i = 42; 21 | int *p = NULL; 22 | *p = i; 23 | return 0; 24 | } 25 | """ 26 | exe = 'unbuffered' 27 | 28 | cmd = s.run_cmd(f'gcc -g -xc - -o {exe}') 29 | cmd.stdin(code, close=True) 30 | cmd.wait() 31 | 32 | controller = StdinController() 33 | 34 | output = File('gdb.log', flush=True) 35 | cmd = s.run_cmd(f'{GDB} ./{exe}', stdout=output, stderr=output, stdin=controller) 36 | 37 | controller.wait() 38 | 39 | cmd.wait() 40 | 41 | s.run_cmd(f'rm -f {exe}').wait() -------------------------------------------------------------------------------- /distexperiments/examples/restart.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | @reg_exp(servers=server_list, max_restarts=3, raise_on_rc=False) 9 | def restart(servers): 10 | for s in servers: 11 | cmd = s.run_cmd(f'date && sleep 0.1 && exit 1', stdout=Console(fmt=f'{s.id}: %s')) 12 | if cmd.wait() != 0: 13 | return Action.RESTART -------------------------------------------------------------------------------- /distexperiments/examples/serverlist_ops.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | # pre-select by lambda function 7 | @reg_exp(servers=config.server_list[lambda s: hasattr(s, 'id')]) 8 | def serverlist_ops(servers): 9 | # select by index 10 | s = servers[0] 11 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 12 | 13 | # select by id 14 | s = servers['node02'] 15 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 16 | 17 | # select whole list 18 | for s in servers: 19 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 20 | 21 | # select by lambda 22 | for s in servers[lambda s: s.id >= 'node03']: 23 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 24 | 25 | # select by slice 26 | for s in servers[1:3]: 27 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 28 | 29 | # select reverse order 30 | for s in servers[::-1]: 31 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 32 | 33 | # select even entries 34 | for s in servers[::2]: 35 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 36 | 37 | # select by tuple-list, a combination of all 38 | for s in servers['node04', 2, lambda s: s.id=='node01']: 39 | s.run_cmd(f'echo "{s.id}"', stdout=Console()).wait() 40 | -------------------------------------------------------------------------------- /distexperiments/examples/simple_grid.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | 6 | server_list = config.server_list[0, ] 7 | parameter_grid = ParameterGrid( 8 | a=range(1, 5), 9 | b=[2, 4], 10 | to_file=[True, False], 11 | computed=ComputedParam(lambda to_file: [1] if to_file else 2) 12 | ) 13 | 14 | 15 | @reg_exp(servers=server_list, params=parameter_grid) 16 | def simple_grid(servers, a, b, to_file, computed): 17 | for s in servers: 18 | stdout = File('simple_grid.log', append=True) 19 | if not to_file: 20 | stdout = [stdout, Console(fmt=f'{s.id}: %s')] 21 | 22 | s.run_cmd(f'echo {a} {b} {computed}', stdout=stdout).wait() 23 | 24 | 25 | @reg_exp(servers=ServerList()) 26 | def only_local(servers): 27 | File('simple_grid.log', append=False)('empty\n') 28 | -------------------------------------------------------------------------------- /distexperiments/examples/timeout.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | 5 | server_list = config.server_list[0, ] 6 | 7 | 8 | @reg_exp(servers=server_list, raise_on_rc=False, max_restarts=3) 9 | def timeout(servers): 10 | rc = servers[0].run_cmd('sleep infinity', timeout=1).wait() 11 | if rc == ReturnCode.TIMEOUT: 12 | return Action.RESTART -------------------------------------------------------------------------------- /distexperiments/experiments/.#read_benchmark.py: -------------------------------------------------------------------------------- 1 | tobias@tobias-MS-7B00.3680:1663153370 -------------------------------------------------------------------------------- /distexperiments/experiments/alignment.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 2 5 | 6 | parameter_grid = ParameterGrid( 7 | numberNodes=[2], 8 | alignment = [8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536], 9 | padding = [0,8], 10 | ) 11 | 12 | 13 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 14 | def compile(servers): 15 | servers.cd("/home/tziegler/rdma_synchronization/build/") 16 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 17 | procs = [s.run_cmd(cmake_cmd) for s in servers] 18 | assert(all(p.wait() == 0 for p in procs)) 19 | 20 | make_cmd = f'sudo make -j' 21 | procs = [s.run_cmd(make_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def alignment(servers, numberNodes, alignment, padding): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | 29 | cmds = [] 30 | 31 | cmd = f'numactl --membind=0 --cpunodebind=0 ./atomic_alignment -ownIp={servers[0].ibIp} -storage_node -worker=36' 32 | cmds += [servers[0].run_cmd(cmd)] 33 | 34 | cmd = f'numactl --membind=0 --cpunodebind=0 ./atomic_alignment -ownIp={servers[1].ibIp} -worker=36 -csvFile="alignment.csv" -run_for_seconds=10 -alignment={alignment} -padding={padding} -record_latency' 35 | cmds += [servers[1].run_cmd(cmd)] 36 | 37 | if not all(cmd.wait() == 0 for cmd in cmds): 38 | return Action.RESTART 39 | 40 | -------------------------------------------------------------------------------- /distexperiments/experiments/alignment_hypo.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | alignment=[8], 8 | padding = [4096], 9 | worker=[128], 10 | ) 11 | 12 | 13 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 14 | def compile(servers): 15 | servers.cd("/home/tziegler/rdma_synchronization/build/") 16 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 17 | procs = [s.run_cmd(cmake_cmd) for s in servers] 18 | assert(all(p.wait() == 0 for p in procs)) 19 | 20 | make_cmd = f'sudo make -j' 21 | procs = [s.run_cmd(make_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def alignment(servers, alignment, padding, worker): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | cmds = [] 29 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./atomic_alignment -ownIp={servers[0].ibIp} -storage_node -worker={worker}' 30 | cmds += [servers[0].run_cmd(cmd)] 31 | 32 | work = worker 33 | numberNodes=1 34 | if worker >=4: 35 | work = int(worker/4) 36 | numberNodes=4 37 | 38 | for i in range(1, numberNodes+1): 39 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./atomic_alignment -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="atomic_nocontention_hypo.csv" -run_for_seconds=30 -alignment={alignment} -padding={padding} -tag={worker} -nopinThreads -compute_id={i-1}' 40 | cmds += [servers[i].run_cmd(cmd)] 41 | 42 | if not all(cmd.wait() == 0 for cmd in cmds): 43 | return Action.RESTART 44 | 45 | -------------------------------------------------------------------------------- /distexperiments/experiments/atomic_cas.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 8 | options=["-success","-nosuccess"], 9 | ) 10 | 11 | 12 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 13 | def compile(servers): 14 | servers.cd("/home/tziegler/rdma_synchronization/build/") 15 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 16 | procs = [s.run_cmd(cmake_cmd) for s in servers] 17 | assert(all(p.wait() == 0 for p in procs)) 18 | 19 | make_cmd = f'sudo make -j' 20 | procs = [s.run_cmd(make_cmd) for s in servers] 21 | assert(all(p.wait() == 0 for p in procs)) 22 | 23 | 24 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 25 | def fa_benchmark(servers, worker, options): 26 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 27 | cmds = [] 28 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./cas_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker}' 29 | cmds += [servers[0].run_cmd(cmd)] 30 | 31 | work = worker 32 | numberNodes=1 33 | if worker >=4: 34 | work = int(worker/4) 35 | numberNodes=4 36 | 37 | for i in range(1, numberNodes+1): 38 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./cas_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="atomic_contention_benchmark.csv" -run_for_seconds=30 -tag={worker} -nopinThreads {options}' 39 | cmds += [servers[i].run_cmd(cmd)] 40 | 41 | if not all(cmd.wait() == 0 for cmd in cmds): 42 | return Action.RESTART 43 | -------------------------------------------------------------------------------- /distexperiments/experiments/atomic_fa.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 8 | ) 9 | 10 | 11 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 12 | def compile(servers): 13 | servers.cd("/home/tziegler/rdma_synchronization/build/") 14 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 15 | procs = [s.run_cmd(cmake_cmd) for s in servers] 16 | assert(all(p.wait() == 0 for p in procs)) 17 | 18 | make_cmd = f'sudo make -j' 19 | procs = [s.run_cmd(make_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | 23 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 24 | def fa_benchmark(servers, worker): 25 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 26 | cmds = [] 27 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./fa_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker}' 28 | cmds += [servers[0].run_cmd(cmd)] 29 | 30 | work = worker 31 | numberNodes=1 32 | if worker >=4: 33 | work = int(worker/4) 34 | numberNodes=4 35 | 36 | for i in range(1, numberNodes+1): 37 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./fa_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="atomic_contention_benchmark.csv" -run_for_seconds=30 -tag={worker} -nopinThreads' 38 | cmds += [servers[i].run_cmd(cmd)] 39 | 40 | if not all(cmd.wait() == 0 for cmd in cmds): 41 | return Action.RESTART 42 | -------------------------------------------------------------------------------- /distexperiments/experiments/atomic_nocontention.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | alignment=[8,16,32,64,128,256,512,1024,2048,4096,8192], 8 | padding = [0,8], 9 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 10 | ) 11 | 12 | 13 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 14 | def compile(servers): 15 | servers.cd("/home/tziegler/rdma_synchronization/build/") 16 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 17 | procs = [s.run_cmd(cmake_cmd) for s in servers] 18 | assert(all(p.wait() == 0 for p in procs)) 19 | 20 | make_cmd = f'sudo make -j' 21 | procs = [s.run_cmd(make_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def alignment(servers, alignment, padding, worker): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | cmds = [] 29 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./atomic_alignment -ownIp={servers[0].ibIp} -storage_node -worker={worker}' 30 | cmds += [servers[0].run_cmd(cmd)] 31 | 32 | work = worker 33 | numberNodes=1 34 | if worker >=4: 35 | work = int(worker/4) 36 | numberNodes=4 37 | 38 | for i in range(1, numberNodes+1): 39 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./atomic_alignment -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="atomic_nocontention.csv" -run_for_seconds=30 -alignment={alignment} -padding={padding} -tag={worker} -nopinThreads -compute_id={i-1}' 40 | cmds += [servers[i].run_cmd(cmd)] 41 | 42 | if not all(cmd.wait() == 0 for cmd in cmds): 43 | return Action.RESTART 44 | -------------------------------------------------------------------------------- /distexperiments/experiments/batched_atomics.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [0], 8 | storageNodes = [1], 9 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 10 | batch = [1,4,16,64], 11 | locks=[2000000], 12 | ) 13 | 14 | 15 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 16 | def compile(servers): 17 | servers.cd("/home/tziegler/rdma_synchronization/build/") 18 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 19 | procs = [s.run_cmd(cmake_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | make_cmd = f'sudo make -j' 23 | procs = [s.run_cmd(make_cmd) for s in servers] 24 | assert(all(p.wait() == 0 for p in procs)) 25 | 26 | 27 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 28 | def batch_benchmark(servers, padding,storageNodes, worker, batch,locks): 29 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 30 | cmds = [] 31 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./batched_atomics -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_nodes={storageNodes}' 32 | cmds += [servers[0].run_cmd(cmd)] 33 | 34 | work = worker 35 | numberNodes=1 36 | if worker >=4: 37 | work = int(worker/4) 38 | numberNodes=4 39 | 40 | for i in range(1, numberNodes+1): 41 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./batched_atomics -ownIp={servers[i].ibIp} -worker={work} -all_worker={worker} -csvFile="batch_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} -storage_nodes={storageNodes} -batch={batch}' 42 | cmds += [servers[i].run_cmd(cmd)] 43 | 44 | if not all(cmd.wait() == 0 for cmd in cmds): 45 | return Action.RESTART 46 | -------------------------------------------------------------------------------- /distexperiments/experiments/batched_reads.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [0], 8 | storageNodes = [1], 9 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 10 | batch = [1,4,16,64], 11 | locks=[2000000], 12 | ) 13 | 14 | 15 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 16 | def compile(servers): 17 | servers.cd("/home/tziegler/rdma_synchronization/build/") 18 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 19 | procs = [s.run_cmd(cmake_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | make_cmd = f'sudo make -j' 23 | procs = [s.run_cmd(make_cmd) for s in servers] 24 | assert(all(p.wait() == 0 for p in procs)) 25 | 26 | 27 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 28 | def batch_benchmark(servers, padding,storageNodes, worker, batch,locks): 29 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 30 | cmds = [] 31 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./batched_reads -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_nodes={storageNodes}' 32 | cmds += [servers[0].run_cmd(cmd)] 33 | 34 | work = worker 35 | numberNodes=1 36 | if worker >=4: 37 | work = int(worker/4) 38 | numberNodes=4 39 | 40 | for i in range(1, numberNodes+1): 41 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./batched_reads -ownIp={servers[i].ibIp} -worker={work} -all_worker={worker} -csvFile="batch_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} -storage_nodes={storageNodes} -batch={batch}' 42 | cmds += [servers[i].run_cmd(cmd)] 43 | 44 | if not all(cmd.wait() == 0 for cmd in cmds): 45 | return Action.RESTART 46 | -------------------------------------------------------------------------------- /distexperiments/experiments/broken_reads.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 2 5 | 6 | parameter_grid = ParameterGrid( 7 | numberNodes=[2], 8 | write_speed = [1,4,16,32], 9 | read_size = [128,512,2048,8192,32768], 10 | ) 11 | 12 | 13 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 14 | def compile(servers): 15 | servers.cd("/home/tziegler/rdma_synchronization/build/") 16 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 17 | procs = [s.run_cmd(cmake_cmd) for s in servers] 18 | assert(all(p.wait() == 0 for p in procs)) 19 | 20 | make_cmd = f'sudo make -j' 21 | procs = [s.run_cmd(make_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def broken_reads(servers, numberNodes, write_speed, read_size ): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | 29 | cmds = [] 30 | 31 | cmd = f'numactl --membind=0 --cpunodebind=0 ./broken_remote_write -ownIp={servers[0].ibIp} -storage_node -worker=2' 32 | cmds += [servers[0].run_cmd(cmd)] 33 | 34 | cmd = f'numactl --membind=0 --cpunodebind=0 ./broken_remote_write -ownIp={servers[1].ibIp} -worker=2 -csvFile="broken_ordering_rwrite.csv" -run_for_seconds=300 -block_size={read_size} -write_speed={write_speed}' 35 | cmds += [servers[1].run_cmd(cmd)] 36 | 37 | if not all(cmd.wait() == 0 for cmd in cmds): 38 | return Action.RESTART 39 | 40 | -------------------------------------------------------------------------------- /distexperiments/experiments/btree.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [8], 8 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 9 | options=["","-unsynchronized"], 10 | ) 11 | 12 | 13 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 14 | def compile(servers): 15 | servers.cd("/home/tziegler/rdma_synchronization/build/") 16 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 17 | procs = [s.run_cmd(cmake_cmd) for s in servers] 18 | assert(all(p.wait() == 0 for p in procs)) 19 | 20 | make_cmd = f'sudo make -j' 21 | procs = [s.run_cmd(make_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def locking_benchmark(servers, padding, worker,options): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | cmds = [] 29 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./btree -ownIp={servers[0].ibIp} -storage_node -worker={worker} -padding={padding} -dramGB=10' 30 | cmds += [servers[0].run_cmd(cmd)] 31 | 32 | work = worker 33 | numberNodes=1 34 | if worker >=4: 35 | work = int(worker/4) 36 | numberNodes=4 37 | 38 | for i in range(1, numberNodes+1): 39 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./btree -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="btree_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads {options}' 40 | cmds += [servers[i].run_cmd(cmd)] 41 | 42 | if not all(cmd.wait() == 0 for cmd in cmds): 43 | return Action.RESTART 44 | -------------------------------------------------------------------------------- /distexperiments/experiments/config.py: -------------------------------------------------------------------------------- 1 | from distexprunner import ServerList, Server 2 | 3 | 4 | SERVER_PORT = 20005 5 | 6 | 7 | server_list = ServerList( 8 | # fill in 9 | Server('node08', 'c08.lab.dm.informatik.tu-darmstadt.de', SERVER_PORT, ibIp='172.18.94.80', sibIP='172.18.94.81', ssdPath="/dev/md0"), 10 | Server('node07', 'c07.lab.dm.informatik.tu-darmstadt.de', SERVER_PORT, ibIp='172.18.94.70', sibIP='172.18.94.71', ssdPath="/dev/md0 "), 11 | Server('node06', 'c06.lab.dm.informatik.tu-darmstadt.de', SERVER_PORT, ibIp='172.18.94.60', sibIP='172.18.94.61', ssdPath="/dev/md0"), 12 | 13 | Server('node04', 'c04.lab.dm.informatik.tu-darmstadt.de', SERVER_PORT, ibIp='172.18.94.40', sibIP='172.18.94.41', ssdPath="/dev/md127"), 14 | Server('node05', 'c05.lab.dm.informatik.tu-darmstadt.de', SERVER_PORT, ibIp='172.18.94.50', sibIP='172.18.94.51', ssdPath="/dev/md0"), 15 | Server('node02', 'c02.lab.dm.informatik.tu-darmstadt.de', SERVER_PORT, ibIp='172.18.94.20', sibIP='172.18.94.21', ssdPath="/dev/md0"), 16 | Server('node01', 'c01.lab.dm.informatik.tu-darmstadt.de', SERVER_PORT, ibIp='172.18.94.10', sibIP='172.18.94.11', ssdPath="/dev/md0"), 17 | ) 18 | -------------------------------------------------------------------------------- /distexperiments/experiments/contention_reads_atomics.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [0], 8 | storageNodes = [1], 9 | reader=[16], 10 | writer=[0,4,8,16,32], 11 | options=["-atomics","-noatomics"], 12 | locks=[2000000], 13 | ) 14 | 15 | 16 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 17 | def compile(servers): 18 | servers.cd("/home/tziegler/rdma_synchronization/build/") 19 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 20 | procs = [s.run_cmd(cmake_cmd) for s in servers] 21 | assert(all(p.wait() == 0 for p in procs)) 22 | 23 | make_cmd = f'sudo make -j' 24 | procs = [s.run_cmd(make_cmd) for s in servers] 25 | assert(all(p.wait() == 0 for p in procs)) 26 | 27 | 28 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 29 | def contention_benchmark(servers, padding,storageNodes, reader, writer, options,locks): 30 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 31 | cmds = [] 32 | worker = writer + reader 33 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./contention_reads_atomics -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_nodes={storageNodes}' 34 | cmds += [servers[0].run_cmd(cmd)] 35 | 36 | work = worker 37 | read = reader 38 | numberNodes=1 39 | if worker >=4: 40 | work = int(worker/4) 41 | numberNodes=4 42 | read = int(reader/4) 43 | 44 | for i in range(1, numberNodes+1): 45 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./contention_reads_atomics -ownIp={servers[i].ibIp} -worker={work} -all_worker={worker} -csvFile="contention_reads_atomics_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={writer} -nopinThreads -lock_count={locks} -storage_nodes={storageNodes} -reader={read} {options}' 46 | cmds += [servers[i].run_cmd(cmd)] 47 | 48 | if not all(cmd.wait() == 0 for cmd in cmds): 49 | return Action.RESTART 50 | -------------------------------------------------------------------------------- /distexperiments/experiments/locking_ablation.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 2 5 | 6 | parameter_grid = ParameterGrid( 7 | numberNodes=[2], 8 | options=["","-unlock_write", "-speculative_read","-speculative_read -write_combining", "-speculative_read -write_combining -order_release", "-nowrite", "-nowrite -speculative_read", "-nowrite -speculative_read -order_release"], 9 | ) 10 | 11 | 12 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 13 | def compile(servers): 14 | servers.cd("/home/tziegler/rdma_synchronization/build/") 15 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 16 | procs = [s.run_cmd(cmake_cmd) for s in servers] 17 | assert(all(p.wait() == 0 for p in procs)) 18 | 19 | make_cmd = f'sudo make -j' 20 | procs = [s.run_cmd(make_cmd) for s in servers] 21 | assert(all(p.wait() == 0 for p in procs)) 22 | 23 | 24 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 25 | def locking_ablation(servers, numberNodes, options): 26 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 27 | 28 | cmds = [] 29 | 30 | cmd = f'numactl --membind=0 --cpunodebind=0 ./lock -ownIp={servers[0].ibIp} -storage_node ' 31 | cmds += [servers[0].run_cmd(cmd)] 32 | 33 | cmd = f'numactl --membind=0 --cpunodebind=0 ./lock -ownIp={servers[1].ibIp} -csvFile="locking_ablation.csv" -run_for_seconds=10 {options}' 34 | cmds += [servers[1].run_cmd(cmd)] 35 | 36 | if not all(cmd.wait() == 0 for cmd in cmds): 37 | return Action.RESTART 38 | 39 | -------------------------------------------------------------------------------- /distexperiments/experiments/locking_benchmark.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [0,8], 8 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 9 | locks=[200000], 10 | # options=["","-speculative_read","-speculative_read -write_combining", "-speculative_read -write_combining -order_release"], 11 | options=["-speculative_read -write_combining -order_release"], 12 | ) 13 | 14 | 15 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 16 | def compile(servers): 17 | servers.cd("/home/tziegler/rdma_synchronization/build/") 18 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 19 | procs = [s.run_cmd(cmake_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | make_cmd = f'sudo make -j' 23 | procs = [s.run_cmd(make_cmd) for s in servers] 24 | assert(all(p.wait() == 0 for p in procs)) 25 | 26 | 27 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 28 | def locking_benchmark(servers, padding, worker,locks, options): 29 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 30 | cmds = [] 31 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./locking_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10' 32 | cmds += [servers[0].run_cmd(cmd)] 33 | 34 | work = worker 35 | numberNodes=1 36 | if worker >=4: 37 | work = int(worker/4) 38 | numberNodes=4 39 | 40 | for i in range(1, numberNodes+1): 41 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./locking_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="locking_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} {options}' 42 | cmds += [servers[i].run_cmd(cmd)] 43 | 44 | if not all(cmd.wait() == 0 for cmd in cmds): 45 | return Action.RESTART 46 | -------------------------------------------------------------------------------- /distexperiments/experiments/locking_benchmark_sleep.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [8], 8 | worker=[1,2,4,8,16,32,64,128,256], 9 | sleep=[0,8,16,32,64], 10 | locks=[200000], 11 | options=["","-speculative_read"], 12 | ) 13 | 14 | 15 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 16 | def compile(servers): 17 | servers.cd("/home/tziegler/rdma_synchronization/build/") 18 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 19 | procs = [s.run_cmd(cmake_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | make_cmd = f'sudo make -j' 23 | procs = [s.run_cmd(make_cmd) for s in servers] 24 | assert(all(p.wait() == 0 for p in procs)) 25 | 26 | 27 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 28 | def locking_benchmark(servers, padding, worker,sleep,locks, options): 29 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 30 | if (options == "") and (sleep > 0): 31 | return 32 | 33 | cmds = [] 34 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./locking_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10' 35 | cmds += [servers[0].run_cmd(cmd)] 36 | 37 | 38 | 39 | work = worker 40 | numberNodes=1 41 | if worker >=4: 42 | work = int(worker/4) 43 | numberNodes=4 44 | 45 | for i in range(1, numberNodes+1): 46 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./locking_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="sleep_locking_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} {options} -sleep={sleep}' 47 | cmds += [servers[i].run_cmd(cmd)] 48 | 49 | if not all(cmd.wait() == 0 for cmd in cmds): 50 | return Action.RESTART 51 | -------------------------------------------------------------------------------- /distexperiments/experiments/nam_experiment.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 4 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [0,8], 8 | worker=[26], 9 | locks=[2000000], 10 | options=["","-speculative_read","-speculative_read -write_combining", "-speculative_read -write_combining -order_release"], 11 | ) 12 | 13 | 14 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 15 | def compile(servers): 16 | servers.cd("/home/tziegler/rdma_synchronization/build/") 17 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 18 | procs = [s.run_cmd(cmake_cmd) for s in servers] 19 | assert(all(p.wait() == 0 for p in procs)) 20 | 21 | make_cmd = f'sudo make -j' 22 | procs = [s.run_cmd(make_cmd) for s in servers] 23 | assert(all(p.wait() == 0 for p in procs)) 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def nam_benchmark(servers, padding, worker,locks, options): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | cmds = [] 29 | # if (options != "") and (padding == 0): 30 | # return 31 | tag = "NAMDB++" 32 | if padding == 0: 33 | tag = "NAMDB" 34 | 35 | for i in range(0, NUMBER_NODES): 36 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./nam_experiment -ownIp={servers[i].ibIp} -worker={worker} -csvFile="nam_db_benchmark_restart.csv" -run_for_seconds=30 -padding={padding} -tag={tag} -lock_count={locks} -storage_nodes={NUMBER_NODES} -storage_node {options} -dramGB=10' 37 | cmds += [servers[i].run_cmd(cmd)] 38 | sleep(1) 39 | if not all(cmd.wait() == 0 for cmd in cmds): 40 | return Action.RESTART 41 | -------------------------------------------------------------------------------- /distexperiments/experiments/no_locking_benchmark.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [0,8], 8 | storageNodes = [1], 9 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 10 | locks=[20000000,200000,], 11 | ) 12 | 13 | 14 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 15 | def compile(servers): 16 | servers.cd("/home/tziegler/rdma_synchronization/build/") 17 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 18 | procs = [s.run_cmd(cmake_cmd) for s in servers] 19 | assert(all(p.wait() == 0 for p in procs)) 20 | 21 | make_cmd = f'sudo make -j' 22 | procs = [s.run_cmd(make_cmd) for s in servers] 23 | assert(all(p.wait() == 0 for p in procs)) 24 | 25 | 26 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 27 | def locking_benchmark(servers, padding,storageNodes, worker,locks): 28 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 29 | cmds = [] 30 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./no_locking_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_nodes={storageNodes}' 31 | cmds += [servers[0].run_cmd(cmd)] 32 | if storageNodes > 1: 33 | cmd = f'numactl --membind=1 sudo ip netns exec ib0 ./no_locking_benchmark -ownIp=172.18.94.81 -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_n>odes={storageNodes}' 34 | cmds += [servers[0].run_cmd(cmd)] 35 | 36 | work = worker 37 | numberNodes=1 38 | if worker >=4: 39 | work = int(worker/4) 40 | numberNodes=4 41 | 42 | for i in range(1, numberNodes+1): 43 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./no_locking_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="locking_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} -storage_nodes={storageNodes}' 44 | cmds += [servers[i].run_cmd(cmd)] 45 | 46 | if not all(cmd.wait() == 0 for cmd in cmds): 47 | return Action.RESTART 48 | -------------------------------------------------------------------------------- /distexperiments/experiments/opt_btree.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [8], 8 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 9 | options=[""], 10 | ) 11 | 12 | 13 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 14 | def compile(servers): 15 | servers.cd("/home/tziegler/rdma_synchronization/build/") 16 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 17 | procs = [s.run_cmd(cmake_cmd) for s in servers] 18 | assert(all(p.wait() == 0 for p in procs)) 19 | 20 | make_cmd = f'sudo make -j' 21 | procs = [s.run_cmd(make_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def locking_benchmark(servers, padding, worker,options): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | cmds = [] 29 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./opt_btree -ownIp={servers[0].ibIp} -storage_node -worker={worker} -padding={padding} -dramGB=10' 30 | cmds += [servers[0].run_cmd(cmd)] 31 | 32 | work = worker 33 | numberNodes=1 34 | if worker >=4: 35 | work = int(worker/4) 36 | numberNodes=4 37 | 38 | for i in range(1, numberNodes+1): 39 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./opt_btree -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="btree_benchmark.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads {options}' 40 | cmds += [servers[i].run_cmd(cmd)] 41 | 42 | if not all(cmd.wait() == 0 for cmd in cmds): 43 | return Action.RESTART 44 | -------------------------------------------------------------------------------- /distexperiments/experiments/optdb_debug.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 4 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [0], 8 | worker=["104 -nopinThreads"], 9 | locks=[200], 10 | options=["-farm"], 11 | # options=["-versioning" , "-CRC", "farm"], 12 | footer=["-footer", "-nofooter"], 13 | ) 14 | 15 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 16 | def compile(servers): 17 | servers.cd("/home/tziegler/rdma_synchronization/build/") 18 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 19 | procs = [s.run_cmd(cmake_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | make_cmd = f'sudo make -j' 23 | procs = [s.run_cmd(make_cmd) for s in servers] 24 | assert(all(p.wait() == 0 for p in procs)) 25 | 26 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 27 | def nam_benchmark(servers, padding, worker,locks, options, footer): 28 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 29 | cmds = [] 30 | # if (options != "") and (padding == 0): 31 | # return 32 | tag = "OPTDB" 33 | 34 | for i in range(0, NUMBER_NODES): 35 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optdb_experiment -ownIp={servers[i].ibIp} -worker={worker} -csvFile="optdb_debug.csv" -run_for_seconds=300 -padding={padding} -tag={tag} -lock_count={locks} -storage_nodes={NUMBER_NODES} -storage_node {options} {footer} -dramGB=10' 36 | cmds += [servers[i].run_cmd(cmd)] 37 | sleep(1) 38 | if not all(cmd.wait() == 0 for cmd in cmds): 39 | return Action.RESTART 40 | -------------------------------------------------------------------------------- /distexperiments/experiments/optdb_experiment.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 4 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [8], 8 | worker=["26","52 -nopinThreads", "104 -nopinThreads"], 9 | locks=[2000000], 10 | options=["-CRC", "-farm" , "-versioning"], 11 | footer=["-nofooter","-footer"], 12 | ) 13 | 14 | 15 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 16 | def compile(servers): 17 | servers.cd("/home/tziegler/rdma_synchronization/build/") 18 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 19 | procs = [s.run_cmd(cmake_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | make_cmd = f'sudo make -j' 23 | procs = [s.run_cmd(make_cmd) for s in servers] 24 | assert(all(p.wait() == 0 for p in procs)) 25 | 26 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 27 | def nam_benchmark(servers, padding, worker,locks, options, footer): 28 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 29 | cmds = [] 30 | # if (options != "") and (padding == 0): 31 | # return 32 | tag = "OPTDB" 33 | 34 | for i in range(0, NUMBER_NODES): 35 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optdb_experiment -ownIp={servers[i].ibIp} -worker={worker} -csvFile="optdb_benchmark_full_new.csv" -run_for_seconds=30 -padding={padding} -tag={tag} -lock_count={locks} -storage_nodes={NUMBER_NODES} -storage_node {options} {footer} -dramGB=10' 36 | cmds += [servers[i].run_cmd(cmd)] 37 | sleep(1) 38 | if not all(cmd.wait() == 0 for cmd in cmds): 39 | return Action.RESTART 40 | -------------------------------------------------------------------------------- /distexperiments/experiments/optdb_nambaseline.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 4 5 | 6 | parameter_grid = ParameterGrid( 7 | padding = [8], 8 | worker=["26","52 -nopinThreads", "104 -nopinThreads"], 9 | locks=[2000000], 10 | options=["-speculative_read -write_combining -order_release"], 11 | ) 12 | 13 | 14 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 15 | def compile(servers): 16 | servers.cd("/home/tziegler/rdma_synchronization/build/") 17 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 18 | procs = [s.run_cmd(cmake_cmd) for s in servers] 19 | assert(all(p.wait() == 0 for p in procs)) 20 | 21 | make_cmd = f'sudo make -j' 22 | procs = [s.run_cmd(make_cmd) for s in servers] 23 | assert(all(p.wait() == 0 for p in procs)) 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def nam_benchmark(servers, padding, worker,locks, options): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | cmds = [] 29 | tag = "NAMDBrestart" 30 | 31 | for i in range(0, NUMBER_NODES): 32 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./nam_experiment -ownIp={servers[i].ibIp} -worker={worker} -csvFile="optdb_benchmark_full_new.csv" -run_for_seconds=30 -padding={padding} -tag={tag} -lock_count={locks} -storage_nodes={NUMBER_NODES} -storage_node {options} -dramGB=10' 33 | cmds += [servers[i].run_cmd(cmd)] 34 | sleep(1) 35 | if not all(cmd.wait() == 0 for cmd in cmds): 36 | return Action.RESTART 37 | -------------------------------------------------------------------------------- /distexperiments/experiments/optimistic_scaleout.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 8 | locks=[200000], 9 | options=["-CRC", "-farm" , "-versioning", "-broken", "-pessimistic"], 10 | footer=["-nofooter","-footer"], 11 | blocks=[256, 16384], 12 | 13 | ) 14 | 15 | 16 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 17 | def compile(servers): 18 | servers.cd("/home/tziegler/rdma_synchronization/build/") 19 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 20 | procs = [s.run_cmd(cmake_cmd) for s in servers] 21 | assert(all(p.wait() == 0 for p in procs)) 22 | 23 | make_cmd = f'sudo make -j' 24 | procs = [s.run_cmd(make_cmd) for s in servers] 25 | assert(all(p.wait() == 0 for p in procs)) 26 | 27 | 28 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 29 | def locking_benchmark(servers, worker,locks, options, footer, blocks): 30 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 31 | cmds = [] 32 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./optmistic_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -dramGB=10' 33 | cmds += [servers[0].run_cmd(cmd)] 34 | 35 | work = worker 36 | numberNodes=1 37 | if worker >=4: 38 | work = int(worker/4) 39 | numberNodes=4 40 | 41 | for i in range(1, numberNodes+1): 42 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optmistic_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="optimistic_benchmark_scaleout_new.csv" -run_for_seconds=30 -tag={worker} -nopinThreads -lock_count={locks} {options} {footer} -block_size={blocks}' 43 | cmds += [servers[i].run_cmd(cmd)] 44 | 45 | if not all(cmd.wait() == 0 for cmd in cmds): 46 | return Action.RESTART 47 | -------------------------------------------------------------------------------- /distexperiments/experiments/optimistic_single_threaded_new.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 2 5 | 6 | parameter_grid = ParameterGrid( 7 | numberNodes=[2], 8 | options=["-CRC", "-farm" , "-versioning", "-broken", "-pessimistic"], 9 | footer=["-nofooter","-footer"], 10 | blocks=[64, 256, 1024, 4096, 16384, 65536], 11 | ) 12 | 13 | 14 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 15 | def compile(servers): 16 | servers.cd("/home/tziegler/rdma_synchronization/build/") 17 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 18 | procs = [s.run_cmd(cmake_cmd) for s in servers] 19 | assert(all(p.wait() == 0 for p in procs)) 20 | 21 | make_cmd = f'sudo make -j' 22 | procs = [s.run_cmd(make_cmd) for s in servers] 23 | assert(all(p.wait() == 0 for p in procs)) 24 | 25 | 26 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 27 | def locking_ablation(servers, numberNodes, options, footer, blocks): 28 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 29 | if (options == "-pessimistic") and (options == "-footer"): 30 | return 31 | 32 | cmds = [] 33 | 34 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optmistic_benchmark -ownIp={servers[0].ibIp} -storage_node -block_size={blocks}' 35 | cmds += [servers[0].run_cmd(cmd)] 36 | 37 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optmistic_benchmark -ownIp={servers[1].ibIp} -csvFile="optimistic_runner_st_new.csv" -run_for_seconds=30 {options} -block_size={blocks} {footer} -readratios=100' 38 | cmds += [servers[1].run_cmd(cmd)] 39 | 40 | if not all(cmd.wait() == 0 for cmd in cmds): 41 | return Action.RESTART 42 | -------------------------------------------------------------------------------- /distexperiments/experiments/optimistic_zipf.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | worker=[128], 8 | zipf=['"250 200 150 100 0"'], 9 | locks=[200000], 10 | options=["-CRC", "-farm" , "-versioning", "-broken", "-pessimistic"], 11 | footer=["-nofooter","-footer"], 12 | readratios= ['"100"'], 13 | blocks=[256], 14 | 15 | ) 16 | 17 | 18 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 19 | def compile(servers): 20 | servers.cd("/home/tziegler/rdma_synchronization/build/") 21 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 22 | procs = [s.run_cmd(cmake_cmd) for s in servers] 23 | assert(all(p.wait() == 0 for p in procs)) 24 | 25 | make_cmd = f'sudo make -j' 26 | procs = [s.run_cmd(make_cmd) for s in servers] 27 | assert(all(p.wait() == 0 for p in procs)) 28 | 29 | 30 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 31 | def locking_benchmark(servers, worker, zipf ,locks, options, footer, readratios,blocks): 32 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 33 | cmds = [] 34 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./optmistic_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -dramGB=10' 35 | cmds += [servers[0].run_cmd(cmd)] 36 | 37 | work = worker 38 | numberNodes=1 39 | if worker >=4: 40 | work = int(worker/4) 41 | numberNodes=4 42 | 43 | for i in range(1, numberNodes+1): 44 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optmistic_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="optimistic_benchmark_contention_read_new.csv" -run_for_seconds=30 -tag={worker} -nopinThreads -lock_count={locks} {options} {footer} -block_size={blocks} -zipfs={zipf} -readratios={readratios}' 45 | cmds += [servers[i].run_cmd(cmd)] 46 | 47 | if not all(cmd.wait() == 0 for cmd in cmds): 48 | return Action.RESTART 49 | -------------------------------------------------------------------------------- /distexperiments/experiments/optmistic_st.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 2 5 | 6 | parameter_grid = ParameterGrid( 7 | numberNodes=[2], 8 | options=["-CRC", "-farm" , "-versioning", "-pessimistic"], 9 | blocks=[64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768], 10 | ) 11 | 12 | 13 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 14 | def compile(servers): 15 | servers.cd("/home/tziegler/rdma_synchronization/build/") 16 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 17 | procs = [s.run_cmd(cmake_cmd) for s in servers] 18 | assert(all(p.wait() == 0 for p in procs)) 19 | 20 | make_cmd = f'sudo make -j' 21 | procs = [s.run_cmd(make_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | 25 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 26 | def locking_ablation(servers, numberNodes, options,blocks): 27 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 28 | 29 | cmds = [] 30 | 31 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optimistic_st -ownIp={servers[0].ibIp} -storage_node -block_size={blocks}' 32 | cmds += [servers[0].run_cmd(cmd)] 33 | 34 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./optimistic_st -ownIp={servers[1].ibIp} -csvFile="optimistic_bytes.csv" -run_for_seconds=30 {options} -block_size={blocks}' 35 | cmds += [servers[1].run_cmd(cmd)] 36 | 37 | if not all(cmd.wait() == 0 for cmd in cmds): 38 | return Action.RESTART 39 | -------------------------------------------------------------------------------- /distexperiments/experiments/read_benchmark.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | worker=[1,2,4,8,16,32,64,128,256,512,1024,2048], 8 | ) 9 | 10 | 11 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 12 | def compile(servers): 13 | servers.cd("/home/tziegler/rdma_synchronization/build/") 14 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 15 | procs = [s.run_cmd(cmake_cmd) for s in servers] 16 | assert(all(p.wait() == 0 for p in procs)) 17 | 18 | make_cmd = f'sudo make -j' 19 | procs = [s.run_cmd(make_cmd) for s in servers] 20 | assert(all(p.wait() == 0 for p in procs)) 21 | 22 | 23 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 24 | def fa_benchmark(servers, worker): 25 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 26 | cmds = [] 27 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./read_benchmark -ownIp={servers[0].ibIp} -storage_node -worker={worker}' 28 | cmds += [servers[0].run_cmd(cmd)] 29 | 30 | work = worker 31 | numberNodes=1 32 | if worker >=4: 33 | work = int(worker/4) 34 | numberNodes=4 35 | 36 | for i in range(1, numberNodes+1): 37 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./read_benchmark -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="read_baseline_benchmark.csv" -run_for_seconds=30 -tag={worker} -nopinThreads -compute_id={i}' 38 | cmds += [servers[i].run_cmd(cmd)] 39 | 40 | if not all(cmd.wait() == 0 for cmd in cmds): 41 | return Action.RESTART 42 | -------------------------------------------------------------------------------- /distexperiments/experiments/sleep_effect_compute.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 3 5 | 6 | parameter_grid = ParameterGrid( 7 | storageNodes = [1], 8 | padding = [8], 9 | worker=[1,2,4,8,16,32,64,128,256,512,1024], 10 | sleep=[0,8,16,32,64], 11 | locks=[200000], 12 | options=["","-speculative_read"], 13 | ) 14 | 15 | 16 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 17 | def compile(servers): 18 | servers.cd("/home/tziegler/rdma_synchronization/build/") 19 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 20 | procs = [s.run_cmd(cmake_cmd) for s in servers] 21 | assert(all(p.wait() == 0 for p in procs)) 22 | 23 | make_cmd = f'sudo make -j' 24 | procs = [s.run_cmd(make_cmd) for s in servers] 25 | assert(all(p.wait() == 0 for p in procs)) 26 | 27 | 28 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 29 | def locking_benchmark(servers, storageNodes, padding, worker,sleep,locks, options): 30 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 31 | if (options == "") and (sleep > 0): 32 | return 33 | 34 | cmds = [] 35 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./pause_effect -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10' 36 | cmds += [servers[0].run_cmd(cmd)] 37 | if storageNodes > 1: 38 | cmd = f'numactl --membind=1 sudo ip netns exec ib1 ./no_locking_benchmark -ownIp=172.18.94.81 -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_nodes={storageNodes}' 39 | cmds += [servers[0].run_cmd(cmd)] 40 | 41 | 42 | 43 | work = worker 44 | numberNodes=1 45 | if worker >=2: 46 | work = int(worker/2) 47 | numberNodes=2 48 | 49 | for i in range(1, numberNodes+1): 50 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./pause_effect -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="effect_compute_two.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} {options} -sleep={sleep} -storage_nodes={storageNodes}' 51 | cmds += [servers[i].run_cmd(cmd)] 52 | 53 | if not all(cmd.wait() == 0 for cmd in cmds): 54 | return Action.RESTART 55 | -------------------------------------------------------------------------------- /distexperiments/experiments/sleep_effect_storage.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | storageNodes = [1,2], 8 | padding = [8], 9 | worker=[1,2,4,8,16,32,64,128,256,512,1024], 10 | sleep=[0,8,16,32,64], 11 | locks=[200000], 12 | options=["","-speculative_read"], 13 | ) 14 | 15 | 16 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 17 | def compile(servers): 18 | servers.cd("/home/tziegler/rdma_synchronization/build/") 19 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 20 | procs = [s.run_cmd(cmake_cmd) for s in servers] 21 | assert(all(p.wait() == 0 for p in procs)) 22 | 23 | make_cmd = f'sudo make -j' 24 | procs = [s.run_cmd(make_cmd) for s in servers] 25 | assert(all(p.wait() == 0 for p in procs)) 26 | 27 | 28 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 29 | def locking_benchmark(servers, storageNodes, padding, worker,sleep,locks, options): 30 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 31 | if (options == "") and (sleep > 0): 32 | return 33 | 34 | cmds = [] 35 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./pause_effect -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10' 36 | cmds += [servers[0].run_cmd(cmd)] 37 | if storageNodes > 1: 38 | cmd = f'numactl --membind=1 sudo ip netns exec ib1 ./no_locking_benchmark -ownIp=172.18.94.81 -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_nodes={storageNodes}' 39 | cmds += [servers[0].run_cmd(cmd)] 40 | 41 | 42 | 43 | work = worker 44 | numberNodes=1 45 | if worker >=4: 46 | work = int(worker/4) 47 | numberNodes=4 48 | 49 | for i in range(1, numberNodes+1): 50 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./pause_effect -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="effect_storage.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} {options} -sleep={sleep} -storage_nodes={storageNodes}' 51 | cmds += [servers[i].run_cmd(cmd)] 52 | 53 | if not all(cmd.wait() == 0 for cmd in cmds): 54 | return Action.RESTART 55 | -------------------------------------------------------------------------------- /distexperiments/experiments/sleep_effect_two_cnics.py: -------------------------------------------------------------------------------- 1 | import config 2 | from distexprunner import * 3 | 4 | NUMBER_NODES = 5 5 | 6 | parameter_grid = ParameterGrid( 7 | storageNodes = [1,2], 8 | computeNodes =[8], 9 | padding = [8], 10 | worker=[8,16,32,64,128,256,512,1024], 11 | sleep=[0,8,16,32,64], 12 | locks=[200000], 13 | options=["","-speculative_read"], 14 | ) 15 | 16 | 17 | @reg_exp(servers=config.server_list[:NUMBER_NODES]) 18 | def compile(servers): 19 | servers.cd("/home/tziegler/rdma_synchronization/build/") 20 | cmake_cmd = f'cmake -D CMAKE_C_COMPILER=gcc-10 -D CMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release ..' 21 | procs = [s.run_cmd(cmake_cmd) for s in servers] 22 | assert(all(p.wait() == 0 for p in procs)) 23 | 24 | make_cmd = f'sudo make -j' 25 | procs = [s.run_cmd(make_cmd) for s in servers] 26 | assert(all(p.wait() == 0 for p in procs)) 27 | 28 | 29 | @reg_exp(servers=config.server_list[:NUMBER_NODES], params=parameter_grid, raise_on_rc=True, max_restarts=1) 30 | def locking_benchmark(servers, storageNodes, computeNodes, padding, worker,sleep,locks, options): 31 | servers.cd("/home/tziegler/rdma_synchronization/build/frontend") 32 | if (options == "") and (sleep > 0): 33 | return 34 | 35 | cmds = [] 36 | cmd = f'numactl --membind=0 --cpunodebind=0 sudo ip netns exec ib0 ./pause_effect -ownIp={servers[0].ibIp} -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10' 37 | cmds += [servers[0].run_cmd(cmd)] 38 | if storageNodes > 1: 39 | cmd = f'numactl --membind=1 sudo ip netns exec ib1 ./no_locking_benchmark -ownIp=172.18.94.81 -storage_node -worker={worker} -lock_count={locks} -padding={padding} -dramGB=10 -storage_nodes={storageNodes}' 40 | cmds += [servers[0].run_cmd(cmd)] 41 | 42 | 43 | 44 | work = worker 45 | numberNodes=1 46 | if worker >=8: 47 | work = int(worker/8) 48 | numberNodes=4 49 | 50 | for i in range(1, numberNodes+1): 51 | cmd = f'numactl --membind=0 sudo ip netns exec ib0 ./pause_effect -ownIp={servers[i].ibIp} -all_worker={worker} -worker={work} -csvFile="compute_nics_1_mem.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} {options} -sleep={sleep} -storage_nodes={storageNodes}' 52 | cmds += [servers[i].run_cmd(cmd)] 53 | cmd = f'numactl --membind=1 sudo ip netns exec ib1 ./pause_effect -ownIp={servers[i].sibIP} -all_worker={worker} -worker={work} -csvFile="compute_nics_2_mem.csv" -run_for_seconds=30 -padding={padding} -tag={worker} -nopinThreads -lock_count={locks} {options} -sleep={sleep} -storage_nodes={storageNodes}' 54 | cmds += [servers[i].run_cmd(cmd)] 55 | 56 | if not all(cmd.wait() == 0 for cmd in cmds): 57 | return Action.RESTART 58 | -------------------------------------------------------------------------------- /frontend/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(frontend frontend.cpp) 2 | add_dependencies(frontend nam) 3 | target_link_libraries(frontend nam numa) 4 | 5 | 6 | add_executable(ordering_fixed ordering_fixed.cpp) 7 | add_dependencies(ordering_fixed nam) 8 | target_link_libraries(ordering_fixed nam numa) 9 | 10 | 11 | 12 | add_executable(broken_remote_write broken_remote_write.cpp) 13 | add_dependencies(broken_remote_write nam) 14 | target_link_libraries(broken_remote_write nam numa) 15 | 16 | 17 | add_executable(atomic_benchmark atomic_benchmark.cpp) 18 | add_dependencies(atomic_benchmark nam) 19 | target_link_libraries(atomic_benchmark nam numa) 20 | 21 | add_executable(cas_benchmark cas_benchmark.cpp) 22 | add_dependencies(cas_benchmark nam) 23 | target_link_libraries(cas_benchmark nam numa) 24 | 25 | add_executable(fa_benchmark fa_benchmark.cpp) 26 | add_dependencies(fa_benchmark nam) 27 | target_link_libraries(fa_benchmark nam numa) 28 | 29 | add_executable(read_benchmark read_benchmark.cpp) 30 | add_dependencies(read_benchmark nam) 31 | target_link_libraries(read_benchmark nam numa) 32 | 33 | 34 | 35 | add_executable(atomic_alignment atomic_alignment.cpp) 36 | add_dependencies(atomic_alignment nam) 37 | target_link_libraries(atomic_alignment nam numa) 38 | 39 | add_executable(atomic_alignment_ws atomic_alignment_ws.cpp) 40 | add_dependencies(atomic_alignment_ws nam) 41 | target_link_libraries(atomic_alignment_ws nam numa) 42 | 43 | 44 | add_executable(torn_writes torn_writes.cpp) 45 | add_dependencies(torn_writes nam) 46 | target_link_libraries(torn_writes nam numa) 47 | 48 | 49 | add_executable(lock lock.cpp) 50 | add_dependencies(lock nam) 51 | target_link_libraries(lock nam numa) 52 | 53 | add_executable(locking_benchmark locking_benchmark.cpp) 54 | add_dependencies(locking_benchmark nam) 55 | target_link_libraries(locking_benchmark nam numa) 56 | 57 | add_executable(no_locking_benchmark no_locking_benchmark.cpp) 58 | add_dependencies(no_locking_benchmark nam) 59 | target_link_libraries(no_locking_benchmark nam numa) 60 | 61 | 62 | add_executable(locking_benchmark_tail locking_benchmark_tail.cpp) 63 | add_dependencies(locking_benchmark_tail nam) 64 | target_link_libraries(locking_benchmark_tail nam numa) 65 | 66 | add_executable(atomic_visibility atomic_visibility.cpp) 67 | add_dependencies(atomic_visibility nam) 68 | target_link_libraries(atomic_visibility nam numa) 69 | 70 | add_executable(batched_reads batched_reads.cpp) 71 | add_dependencies(batched_reads nam) 72 | target_link_libraries(batched_reads nam numa) 73 | 74 | add_executable(nam_experiment nam_experiment.cpp) 75 | add_dependencies(nam_experiment nam) 76 | target_link_libraries(nam_experiment nam numa) 77 | 78 | 79 | add_executable(batched_atomics batched_atomics.cpp) 80 | add_dependencies(batched_atomics nam) 81 | target_link_libraries(batched_atomics nam numa) 82 | 83 | add_executable(contention_reads_atomics contention_reads_atomics.cpp) 84 | add_dependencies(contention_reads_atomics nam) 85 | target_link_libraries(contention_reads_atomics nam numa) 86 | 87 | add_executable(pause_effect pause_effect.cpp) 88 | add_dependencies(pause_effect nam) 89 | target_link_libraries(pause_effect nam numa) 90 | 91 | add_executable(btree btree.cpp) 92 | add_dependencies(btree nam) 93 | target_link_libraries(btree nam numa) 94 | 95 | 96 | add_executable(optimistic_st optimistic_st.cpp) 97 | add_dependencies(optimistic_st nam) 98 | target_link_libraries(optimistic_st nam numa) 99 | 100 | 101 | add_executable(optmistic_benchmark optmistic_benchmark.cpp) 102 | add_dependencies(optmistic_benchmark nam) 103 | target_link_libraries(optmistic_benchmark nam numa) 104 | target_link_libraries(optmistic_benchmark ${CMAKE_DL_LIBS}) 105 | 106 | 107 | add_executable(optdb_experiment optdb_experiment.cpp) 108 | add_dependencies(optdb_experiment nam) 109 | target_link_libraries(optdb_experiment nam numa) 110 | target_link_libraries(optdb_experiment ${CMAKE_DL_LIBS}) 111 | 112 | 113 | add_executable(opt_btree opt_btree.cpp) 114 | add_dependencies(opt_btree nam) 115 | target_link_libraries(opt_btree nam numa) 116 | target_link_libraries(opt_btree ${CMAKE_DL_LIBS}) 117 | -------------------------------------------------------------------------------- /frontend/frontend.cpp: -------------------------------------------------------------------------------- 1 | #include "Defs.hpp" 2 | #include "PerfEvent.hpp" 3 | #include "nam/Config.hpp" 4 | #include "nam/Storage.hpp" 5 | #include "nam/Compute.hpp" 6 | #include "nam/profiling/ProfilingThread.hpp" 7 | #include "nam/profiling/counters/WorkerCounters.hpp" 8 | #include "nam/threads/Concurrency.hpp" 9 | #include "nam/utils/RandomGenerator.hpp" 10 | #include "nam/utils/Time.hpp" 11 | #include "BenchmarkHelper.hpp" 12 | // ------------------------------------------------------------------------------------- 13 | #include 14 | // ------------------------------------------------------------------------------------- 15 | #include 16 | #include 17 | #include 18 | #include 19 | // ------------------------------------------------------------------------------------- 20 | 21 | constexpr size_t block_size =512; 22 | 23 | DEFINE_double(run_for_seconds, 10.0, ""); 24 | 25 | int main(int argc, char* argv[]) { 26 | gflags::SetUsageMessage("Storage-DB Frontend"); 27 | gflags::ParseCommandLineFlags(&argc, &argv, true); 28 | // ------------------------------------------------------------------------------------- 29 | using namespace nam; 30 | 31 | if (FLAGS_storage_node) { 32 | std::cout << "Storage Node" << std::endl; 33 | nam::Storage db; 34 | db.registerMemoryRegion("block", 1024 * 1024 * 1024); 35 | db.startAndConnect(); 36 | // ------------------------------------------------------------------------------------- 37 | // local writer; 38 | auto desc = db.getMemoryRegion("block"); 39 | auto number_entries = desc.size_bytes / sizeof(uint64_t); 40 | uint64_t version =1; 41 | uint64_t* buffer = (uint64_t*)desc.start; 42 | while(db.getCM().getNumberIncomingConnections()){ 43 | version++; 44 | std::cout << "run " << version << "\n"; 45 | for (size_t e_i = 0; e_i < number_entries; ++e_i) 46 | { 47 | buffer[e_i] = version; 48 | } 49 | } 50 | 51 | } else { 52 | std::cout << "Compute Node" << std::endl; 53 | nam::Compute compute; 54 | uint64_t inconsistencies = 0; 55 | uint64_t blocks_read = 0; 56 | benchmark::SYNC_workloadInfo experimentInfo{"broken_read_ordering", inconsistencies, blocks_read, block_size, 0}; 57 | compute.startProfiler(experimentInfo); 58 | // ------------------------------------------------------------------------------------- 59 | std::atomic keep_running = true; 60 | std::atomic running_threads_counter = 0; 61 | // ------------------------------------------------------------------------------------- 62 | // Reader 63 | compute.getWorkerPool().scheduleJobAsync(0, [&]() { 64 | auto& cm = compute.getCM(); 65 | auto* rctx = threads::Worker::my().cctxs[0].rctx; 66 | auto desc = threads::Worker::my().catalog[0]; 67 | auto number_blocks = desc.size_bytes / block_size; 68 | std::cout << "Number of blocks " << number_blocks << "\n"; 69 | auto* buffer = static_cast(cm.getGlobalBuffer().allocate(block_size, block_size)); 70 | running_threads_counter++; 71 | while (keep_running) { 72 | benchmark::readBlocks(desc.start, buffer, block_size, number_blocks, blocks_read, inconsistencies, *rctx, keep_running, 73 | [&]( [[maybe_unused]] uint64_t number_entries, uint64_t remote_addr, uint64_t*& buffer, nam::rdma::RdmaContext& rctx) { 74 | rdma::postRead(buffer, rctx, rdma::completion::signaled, remote_addr, block_size, 0); 75 | // ------------------------------------------------------------------------------------- 76 | int comp{0}; 77 | ibv_wc wcReturn; 78 | while (comp == 0) { 79 | comp = rdma::pollCompletion(rctx.id->qp->send_cq, 1, &wcReturn); 80 | } 81 | }); 82 | } 83 | running_threads_counter--; 84 | }); 85 | // ------------------------------------------------------------------------------------- 86 | sleep(FLAGS_run_for_seconds); 87 | keep_running = false; 88 | while (running_threads_counter) { 89 | _mm_pause(); 90 | } 91 | // ------------------------------------------------------------------------------------- 92 | compute.getWorkerPool().joinAll(); 93 | compute.stopProfiler(); 94 | } 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /frontend/ordering_fixed.cpp: -------------------------------------------------------------------------------- 1 | #include "Defs.hpp" 2 | #include "PerfEvent.hpp" 3 | #include "nam/Config.hpp" 4 | #include "nam/Storage.hpp" 5 | #include "nam/Compute.hpp" 6 | #include "nam/profiling/ProfilingThread.hpp" 7 | #include "nam/profiling/counters/WorkerCounters.hpp" 8 | #include "nam/threads/Concurrency.hpp" 9 | #include "nam/utils/RandomGenerator.hpp" 10 | #include "nam/utils/Time.hpp" 11 | #include "BenchmarkHelper.hpp" 12 | // ------------------------------------------------------------------------------------- 13 | #include 14 | // ------------------------------------------------------------------------------------- 15 | #include 16 | #include 17 | #include 18 | #include 19 | // ------------------------------------------------------------------------------------- 20 | 21 | constexpr size_t block_size =512; 22 | 23 | int main(int argc, char* argv[]) { 24 | gflags::SetUsageMessage("Storage-DB Frontend"); 25 | gflags::ParseCommandLineFlags(&argc, &argv, true); 26 | // ------------------------------------------------------------------------------------- 27 | using namespace nam; 28 | 29 | if (FLAGS_storage_node) { 30 | std::cout << "Storage Node" << std::endl; 31 | nam::Storage db; 32 | db.registerMemoryRegion("block", 1024 * 1024 * 1024); 33 | db.startAndConnect(); 34 | // ------------------------------------------------------------------------------------- 35 | // local writer; 36 | auto desc = db.getMemoryRegion("block"); 37 | auto number_entries = desc.size_bytes / sizeof(uint64_t); 38 | uint64_t version = 1; 39 | uint64_t* buffer = (uint64_t*)desc.start; 40 | while (true) { 41 | version++; 42 | for (size_t e_i = 0; e_i < number_entries; ++e_i) { 43 | buffer[e_i] = version; 44 | } 45 | } 46 | 47 | } else { 48 | std::cout << "Compute Node" << std::endl; 49 | nam::Compute compute; 50 | uint64_t inconsistencies = 0; 51 | uint64_t blocks_read = 0; 52 | std::atomic keep_running = true; 53 | benchmark::SYNC_workloadInfo experimentInfo{"fixed_ordering_64b", inconsistencies, blocks_read, block_size,0}; 54 | compute.startProfiler(experimentInfo); 55 | // ------------------------------------------------------------------------------------- 56 | // Reader 57 | compute.getWorkerPool().scheduleJobAsync(0, [&]() { 58 | auto& cm = compute.getCM(); 59 | auto* rctx = threads::Worker::my().cctxs[0].rctx; 60 | auto desc = threads::Worker::my().catalog[0]; 61 | auto number_blocks = desc.size_bytes / block_size; 62 | std::cout << "Number of blocks " << number_blocks << "\n"; 63 | auto* buffer = static_cast(cm.getGlobalBuffer().allocate(block_size, block_size)); 64 | // auto number_entries = block_size / sizeof(uint64_t); 65 | while (true) { 66 | benchmark::readBlocks(desc.start, buffer, block_size, number_blocks, blocks_read, inconsistencies, *rctx, keep_running, 67 | [&](uint64_t number_entries, uint64_t remote_addr, uint64_t*& buffer, nam::rdma::RdmaContext& rctx) { 68 | for (size_t e_i = 0; e_i < number_entries; ++e_i) { 69 | auto comp = (e_i == number_entries - 1) ? rdma::completion::signaled : rdma::completion::unsignaled; 70 | rdma::postRead(buffer, rctx, comp, remote_addr + (e_i * 64), 64, 0); 71 | } 72 | // ------------------------------------------------------------------------------------- 73 | int comp{0}; 74 | ibv_wc wcReturn; 75 | while (comp == 0) { 76 | comp = rdma::pollCompletion(rctx.id->qp->send_cq, 1, &wcReturn); 77 | } 78 | }); 79 | } 80 | }); 81 | compute.getWorkerPool().joinAll(); 82 | compute.stopProfiler(); 83 | } 84 | return 0; 85 | } 86 | -------------------------------------------------------------------------------- /libs/FindNuma.cmake: -------------------------------------------------------------------------------- 1 | # SOURCE https://github.com/videolan/x265/blob/master/source/cmake/FindNuma.cmake 2 | # Module for locating libnuma 3 | # 4 | # Read-only variables: 5 | # NUMA_FOUND 6 | # Indicates that the library has been found. 7 | # 8 | # NUMA_INCLUDE_DIR 9 | # Points to the libnuma include directory. 10 | # 11 | # NUMA_LIBRARY_DIR 12 | # Points to the directory that contains the libraries. 13 | # The content of this variable can be passed to link_directories. 14 | # 15 | # NUMA_LIBRARY 16 | # Points to the libnuma that can be passed to target_link_libararies. 17 | # 18 | # Copyright (c) 2015 Steve Borho 19 | 20 | include(FindPackageHandleStandardArgs) 21 | 22 | find_path(NUMA_ROOT_DIR 23 | NAMES include/numa.h 24 | PATHS ENV NUMA_ROOT 25 | DOC "NUMA root directory") 26 | 27 | find_path(NUMA_INCLUDE_DIR 28 | NAMES numa.h 29 | HINTS ${NUMA_ROOT_DIR} 30 | PATH_SUFFIXES include 31 | DOC "NUMA include directory") 32 | 33 | find_library(NUMA_LIBRARY 34 | NAMES numa 35 | HINTS ${NUMA_ROOT_DIR} 36 | DOC "NUMA library") 37 | 38 | if (NUMA_LIBRARY) 39 | get_filename_component(NUMA_LIBRARY_DIR ${NUMA_LIBRARY} PATH) 40 | endif() 41 | 42 | mark_as_advanced(NUMA_INCLUDE_DIR NUMA_LIBRARY_DIR NUMA_LIBRARY) 43 | 44 | find_package_handle_standard_args(NUMA REQUIRED_VARS NUMA_ROOT_DIR NUMA_INCLUDE_DIR NUMA_LIBRARY) 45 | -------------------------------------------------------------------------------- /libs/gflags.cmake: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------- 2 | # cengine 3 | # --------------------------------------------------------------------------- 4 | 5 | include(ExternalProject) 6 | find_package(Git REQUIRED) 7 | 8 | # Get gflags 9 | ExternalProject_Add( 10 | gflags_src 11 | PREFIX "vendor/gflags" 12 | GIT_REPOSITORY "https://github.com/gflags/gflags.git" 13 | GIT_TAG f8a0efe03aa69b3336d8e228b37d4ccb17324b88 14 | TIMEOUT 10 15 | CMAKE_ARGS 16 | -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/vendor/gflags 17 | -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} 18 | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} 19 | -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} 20 | UPDATE_COMMAND "" 21 | ) 22 | 23 | # Prepare gflags 24 | ExternalProject_Get_Property(gflags_src install_dir) 25 | set(GFLAGS_INCLUDE_DIR ${install_dir}/include) 26 | set(GFLAGS_LIBRARY_PATH ${install_dir}/lib/libgflags.a) 27 | file(MAKE_DIRECTORY ${GFLAGS_INCLUDE_DIR}) 28 | add_library(gflags STATIC IMPORTED) 29 | set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY_PATH}) 30 | set_property(TARGET gflags APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${GFLAGS_INCLUDE_DIR}) 31 | 32 | # Dependencies 33 | add_dependencies(gflags gflags_src) 34 | -------------------------------------------------------------------------------- /libs/ibverbs.cmake: -------------------------------------------------------------------------------- 1 | FIND_PATH(IBVERBS_INCLUDE_DIR infiniband/verbs.h 2 | PATHS 3 | $ENV{IBVERBS_HOME} 4 | NO_DEFAULT_PATH 5 | PATH_SUFFIXES include 6 | ) 7 | 8 | FIND_PATH(IBVERBS_INCLUDE_DIR infiniband/verbs.h 9 | PATHS 10 | /usr/local/include 11 | /usr/include 12 | /sw/include # Fink 13 | /opt/local/include # DarwinPorts 14 | /opt/csw/include # Blastwave 15 | /opt/include 16 | ) 17 | 18 | FIND_LIBRARY(IBVERBS_LIBRARY 19 | NAMES ibverbs 20 | PATHS $ENV{IBVERBS_HOME} 21 | NO_DEFAULT_PATH 22 | PATH_SUFFIXES lib64 lib 23 | ) 24 | 25 | FIND_LIBRARY(IBVERBS_LIBRARY 26 | NAMES ibverbs 27 | PATHS 28 | /usr/local 29 | /usr 30 | /sw 31 | /opt/local 32 | /opt/csw 33 | /opt 34 | /usr/freeware 35 | PATH_SUFFIXES lib64 lib 36 | ) 37 | SET(IBVERBS_FOUND FALSE) 38 | IF(IBVERBS_LIBRARY AND IBVERBS_INCLUDE_DIR) 39 | SET(IBVERBS_FOUND TRUE) 40 | ENDIF(IBVERBS_LIBRARY AND IBVERBS_INCLUDE_DIR) 41 | 42 | include(FindPackageHandleStandardArgs) 43 | find_package_handle_standard_args(IBVerbs DEFAULT_MSG IBVERBS_LIBRARY IBVERBS_INCLUDE_DIR) 44 | 45 | mark_as_advanced(IBVERBS_INCLUDE_DIR IBVERBS_LIBRARIES) 46 | -------------------------------------------------------------------------------- /libs/rdmacm.cmake: -------------------------------------------------------------------------------- 1 | FIND_PATH(RDMACM_INCLUDE_DIR rdma/rdma_verbs.h 2 | PATHS 3 | $ENV{RDMACM_HOME} 4 | NO_DEFAULT_PATH 5 | PATH_SUFFIXES include 6 | ) 7 | 8 | FIND_PATH(RDMACM_INCLUDE_DIR rdma/rdma_verbs.h 9 | PATHS 10 | /usr/local/include 11 | /usr/include 12 | /sw/include # Fink 13 | /opt/local/include # DarwinPorts 14 | /opt/csw/include # Blastwave 15 | /opt/include 16 | ) 17 | 18 | FIND_LIBRARY(RDMACM_LIBRARY 19 | NAMES rdmacm 20 | PATHS $ENV{RDMACM_HOME} 21 | NO_DEFAULT_PATH 22 | PATH_SUFFIXES lib64 lib 23 | ) 24 | 25 | FIND_LIBRARY(RDMACM_LIBRARY 26 | NAMES rdmacm 27 | PATHS 28 | /usr/local 29 | /usr 30 | /sw 31 | /opt/local 32 | /opt/csw 33 | /opt 34 | /usr/freeware 35 | PATH_SUFFIXES lib64 lib 36 | ) 37 | SET(RDMACM_FOUND FALSE) 38 | IF(RDMACM_LIBRARY AND RDMACM_INCLUDE_DIR) 39 | SET(RDMACM_FOUND TRUE) 40 | ENDIF(RDMACM_LIBRARY AND RDMACM_INCLUDE_DIR) 41 | 42 | include(FindPackageHandleStandardArgs) 43 | find_package_handle_standard_args(RdmaCm DEFAULT_MSG RDMACM_LIBRARY RDMACM_INCLUDE_DIR) 44 | 45 | mark_as_advanced(RDMACM_INCLUDE_DIR RDMACM_LIBRARIES) 46 | -------------------------------------------------------------------------------- /shared-headers/local.cmake: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------- 2 | # cengine 3 | # --------------------------------------------------------------------------- 4 | 5 | # --------------------------------------------------------------------------- 6 | # Files 7 | # --------------------------------------------------------------------------- 8 | 9 | set(SHARED_INCLUDE_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) 10 | -------------------------------------------------------------------------------- /vendor/gflags/src/gflags_src-stamp/gflags_src-gitinfo.txt: -------------------------------------------------------------------------------- 1 | repository='https://github.com/gflags/gflags.git' 2 | module='' 3 | tag='origin' 4 | -------------------------------------------------------------------------------- /vendor/gflags/tmp/gflags_src-cfgcmd.txt: -------------------------------------------------------------------------------- 1 | cmd='/usr/bin/cmake;-DCMAKE_INSTALL_PREFIX=/home/tobias/git/nam/vendor/gflags;-DCMAKE_C_COMPILER=/usr/bin/cc;-DCMAKE_CXX_COMPILER=/usr/bin/c++;-DCMAKE_CXX_FLAGS= -pthread -g;-GUnix Makefiles;' 2 | -------------------------------------------------------------------------------- /vendor/gflags/tmp/gflags_src-cfgcmd.txt.in: -------------------------------------------------------------------------------- 1 | cmd='@cmd@' 2 | -------------------------------------------------------------------------------- /vendor/gflags/tmp/gflags_src-gitclone.cmake: -------------------------------------------------------------------------------- 1 | 2 | if(NOT "/home/tobias/git/nam/vendor/gflags/src/gflags_src-stamp/gflags_src-gitinfo.txt" IS_NEWER_THAN "/home/tobias/git/nam/vendor/gflags/src/gflags_src-stamp/gflags_src-gitclone-lastrun.txt") 3 | message(STATUS "Avoiding repeated git clone, stamp file is up to date: '/home/tobias/git/nam/vendor/gflags/src/gflags_src-stamp/gflags_src-gitclone-lastrun.txt'") 4 | return() 5 | endif() 6 | 7 | execute_process( 8 | COMMAND ${CMAKE_COMMAND} -E remove_directory "/home/tobias/git/nam/vendor/gflags/src/gflags_src" 9 | RESULT_VARIABLE error_code 10 | ) 11 | if(error_code) 12 | message(FATAL_ERROR "Failed to remove directory: '/home/tobias/git/nam/vendor/gflags/src/gflags_src'") 13 | endif() 14 | 15 | # try the clone 3 times in case there is an odd git clone issue 16 | set(error_code 1) 17 | set(number_of_tries 0) 18 | while(error_code AND number_of_tries LESS 3) 19 | execute_process( 20 | COMMAND "/usr/bin/git" clone --no-checkout "https://github.com/gflags/gflags.git" "gflags_src" 21 | WORKING_DIRECTORY "/home/tobias/git/nam/vendor/gflags/src" 22 | RESULT_VARIABLE error_code 23 | ) 24 | math(EXPR number_of_tries "${number_of_tries} + 1") 25 | endwhile() 26 | if(number_of_tries GREATER 1) 27 | message(STATUS "Had to git clone more than once: 28 | ${number_of_tries} times.") 29 | endif() 30 | if(error_code) 31 | message(FATAL_ERROR "Failed to clone repository: 'https://github.com/gflags/gflags.git'") 32 | endif() 33 | 34 | execute_process( 35 | COMMAND "/usr/bin/git" checkout f8a0efe03aa69b3336d8e228b37d4ccb17324b88 -- 36 | WORKING_DIRECTORY "/home/tobias/git/nam/vendor/gflags/src/gflags_src" 37 | RESULT_VARIABLE error_code 38 | ) 39 | if(error_code) 40 | message(FATAL_ERROR "Failed to checkout tag: 'f8a0efe03aa69b3336d8e228b37d4ccb17324b88'") 41 | endif() 42 | 43 | set(init_submodules TRUE) 44 | if(init_submodules) 45 | execute_process( 46 | COMMAND "/usr/bin/git" submodule update --recursive --init 47 | WORKING_DIRECTORY "/home/tobias/git/nam/vendor/gflags/src/gflags_src" 48 | RESULT_VARIABLE error_code 49 | ) 50 | endif() 51 | if(error_code) 52 | message(FATAL_ERROR "Failed to update submodules in: '/home/tobias/git/nam/vendor/gflags/src/gflags_src'") 53 | endif() 54 | 55 | # Complete success, update the script-last-run stamp file: 56 | # 57 | execute_process( 58 | COMMAND ${CMAKE_COMMAND} -E copy 59 | "/home/tobias/git/nam/vendor/gflags/src/gflags_src-stamp/gflags_src-gitinfo.txt" 60 | "/home/tobias/git/nam/vendor/gflags/src/gflags_src-stamp/gflags_src-gitclone-lastrun.txt" 61 | RESULT_VARIABLE error_code 62 | ) 63 | if(error_code) 64 | message(FATAL_ERROR "Failed to copy script-last-run stamp file: '/home/tobias/git/nam/vendor/gflags/src/gflags_src-stamp/gflags_src-gitclone-lastrun.txt'") 65 | endif() 66 | 67 | --------------------------------------------------------------------------------