├── .gitignore ├── CMakeLists.txt ├── README.md ├── bandwidth └── bw.cpp ├── coroutine ├── OverridingList.hpp ├── coro_insert.cpp ├── coro_lookup.cpp └── reuse.cpp ├── inplace ├── Common.hpp ├── CowBased.hpp ├── InPlace-highBit.hpp ├── InPlace-sliding.hpp ├── LogBased.hpp ├── NonVolatileMemory.hpp ├── ValidationBased.hpp ├── bench.cpp └── tester.cpp ├── interference ├── Common.hpp ├── LogWriter.hpp ├── NonVolatileMemory.hpp ├── PageFlusher.hpp ├── Pages.hpp ├── RandomReader.hpp ├── SequentialReader.hpp └── interference.cpp ├── latency ├── read_latency.cpp ├── read_latency_alex.cpp └── write_latency.cpp ├── logging ├── Common.hpp ├── LW_Classic.hpp ├── LW_ClassicAligned.hpp ├── LW_ClassicCached.hpp ├── LW_Header.hpp ├── LW_HeaderAligned.hpp ├── LW_HeaderAlignedDancing.hpp ├── LW_HeaderDancing.hpp ├── LW_Mnemosyne.hpp ├── LW_MnemosyneAligned.hpp ├── LW_PMemLib.hpp ├── LW_Zero.hpp ├── LW_ZeroAligned.hpp ├── LW_ZeroBlocked.hpp ├── LW_ZeroCached.hpp ├── LW_ZeroSimd.hpp ├── NonVolatileMemory.hpp ├── Random.hpp └── logging.cpp ├── page_flush ├── Common.hpp ├── FullBufferFrame.hpp ├── NonVolatileMemory.hpp ├── Pages.hpp ├── Random.hpp ├── VolatileMemory.hpp └── page_flush.cpp ├── reproduce ├── all.sh ├── bw_cache_lines.sh ├── bw_threads.sh ├── coroutines.sh ├── inplace.sh ├── interference.sh ├── latency_read.sh ├── latency_write.sh ├── logging.sh ├── page_flush.sh └── validate.sh └── results └── .keep /.gitignore: -------------------------------------------------------------------------------- 1 | results 2 | a.out 3 | .DS_Store 4 | .idea 5 | cmake-build-debug/ 6 | intel.sh 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(damoncode) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | file(GLOB_RECURSE SOURCE_FILES ./*.cpp ./*.hpp) 7 | 8 | add_executable(damoncode ${SOURCE_FILES}) 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 3 | [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](http://unlicense.org/) 4 | 5 | pmem-bench 6 | ========== 7 | 8 | A set of algorithms and benchmarks for persistent memory (PMem). 9 | The algorithms are described in two research papers: 10 | - There is a [short version](https://db.in.tum.de/people/sites/vanrenen/papers/nvm_stats.pdf) published at [DaMoN](https://sites.google.com/view/damon2019/home-damon-2019). 11 | - And an [extended version](https://link.springer.com/content/pdf/10.1007/s00778-020-00622-9.pdf) that appeared in the [VLDBJ 2020](https://link.springer.com/journal/778/volumes-and-issues/29-6). 12 | 13 | Structure 14 | --------- 15 | Each algorithm and experiment has a benchmark script to compile and run it in the [reproduce/](reproduce/) folder. 16 | If, for example, you are interested in measuring the read latency of your PMem device, have a look at [reproduce/latency_read.sh](reproduce/latency_read.sh). 17 | These file also contain instructions (already in executable bash syntax) on how to compile and use the code. 18 | 19 | The source code of the algorithms and experiments are contained in the respective their folders on the root level of the project. 20 | For example, the read latency experiment can be found in [latency](latency/). 21 | 22 | The benchmark scripts (in [reproduce/](reproduce/)) print their results to `stdout` and also create a log file in [results/](results/) using `tee`. 23 | The output is in an easy to parse format and can be used for creating plots (not included in the repository as the ones in the paper are pgfplots). 24 | 25 | Issues & Contributions 26 | ---------------------- 27 | Note that the source code is a prototype implementation for a research paper. 28 | There might be bugs and other limitations. 29 | If you find an issue or run into troubles feel free to contact me via an issue in this repository. 30 | 31 | Licence 32 | ------- 33 | You are free to choose any of the above licences when using the source code. 34 | However, I encourage you in a non binding way to follow the [blessing from the SQLite folks](https://github.com/sqlite/sqlite/blob/master/LICENSE.md): 35 | 36 | ``` 37 | May you do good and not evil. 38 | May you find forgiveness for yourself and forgive others. 39 | May you share freely, never taking more than you give. 40 | ``` 41 | 42 | Authors 43 | ------- 44 | Alexander van Renen 45 | -------------------------------------------------------------------------------- /bandwidth/bw.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace std; 26 | 27 | #define _mm_clflush(addr)\ 28 | asm volatile("clflush %0" : "+m" (*(volatile char *)addr)); 29 | #define _mm_clflushopt(addr)\ 30 | asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr)); 31 | #define _mm_clwb(addr)\ 32 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr)); 33 | #define _mm_pcommit()\ 34 | asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); 35 | 36 | #ifndef BLOCK_SIZE 37 | #define BLOCK_SIZE 64 38 | #endif 39 | 40 | // clang++ -g0 -O3 -march=native -std=c++14 bw.cpp -pthread && ./a.out 1e9 1 ram /mnt/pmem0/renen 41 | int main(int argc, char **argv) 42 | { 43 | const uint32_t READ_COUNT = 10; 44 | const uint32_t block_size = BLOCK_SIZE; 45 | 46 | if (argc != 5) { 47 | cout << "usage: " << argv[0] << " datasize thread_count (nvm|ram) path" << endl; 48 | throw; 49 | } 50 | bool use_clwb = false; 51 | #ifdef USE_CLWB 52 | use_clwb=true; 53 | #endif 54 | bool use_streaming = false; 55 | #ifdef STREAMING 56 | use_streaming =true; 57 | #endif 58 | bool use_write = false; 59 | #ifdef WRITE 60 | use_write =true; 61 | #endif 62 | 63 | const uint64_t total_size = atof(argv[1]); 64 | const uint64_t thread_count = atof(argv[2]); 65 | bool use_ram = argv[3][0] == 'r'; 66 | const string PATH = argv[4]; 67 | 68 | if (thread_count == 0) { 69 | cout << "invalid thread count" << endl; 70 | return 0; 71 | } 72 | 73 | const uint64_t chunk_size = total_size / thread_count; 74 | vector> workers(thread_count); 75 | 76 | atomic start_barrier(0); 77 | atomic global_iterations(0); 78 | atomic global_counter(0); 79 | atomic running_flag(true); 80 | 81 | for (int t = 0; t([&, t]() { 83 | const uint64_t iteration_count = chunk_size / block_size; 84 | #ifdef STREAMING 85 | uint8_t write_data[64] = {0xaa}; 86 | __m512i write_data_vec = _mm512_stream_load_si512(write_data); 87 | #endif 88 | 89 | // Init data ---------------------------------------------- 90 | uint8_t *keys; 91 | if (use_ram) { 92 | keys = new uint8_t[chunk_size + block_size]; 93 | } else { 94 | int fd = open((PATH + "/file_" + to_string(t)).c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 95 | int td = ftruncate(fd, chunk_size + block_size); 96 | if (fd<0 || td<0) { 97 | cout << "unable to create file" << endl; 98 | exit(-1); 99 | } 100 | keys = (uint8_t *) mmap(nullptr, chunk_size + block_size, PROT_WRITE, MAP_SHARED, fd, 0); 101 | } 102 | // Align to 'block_size' byte 103 | while (((uint64_t) keys) % block_size != 0) { 104 | keys++; 105 | } 106 | 107 | assert(((uint64_t) keys) % 64 == 0); 108 | memset(keys, 'a', chunk_size); 109 | uint64_t *random_offsets = new uint64_t[iteration_count]; 110 | for (uint64_t i = 0; ijoin(); 162 | } 163 | 164 | double required_time = chrono::duration_cast(end - start).count(); 165 | double gbs = (global_iterations * block_size) / required_time; 166 | //@formatter:off 167 | std::cout << "res" 168 | << " use_clwb: " << use_clwb 169 | << " use_ram: " << use_ram 170 | << " use_streaming: " << use_streaming 171 | << " use_write: " << use_write 172 | << " thread_count: " << thread_count 173 | << " total_size: " << total_size 174 | << " block_size: " << BLOCK_SIZE 175 | << " global_counter: " << global_counter 176 | << " sum(GB/s): " << gbs << std::endl; 177 | //@formatter:on 178 | 179 | return 0; 180 | } 181 | -------------------------------------------------------------------------------- /coroutine/OverridingList.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | // ------------------------------------------------------------------------------------- 6 | template 7 | class OverridingList { 8 | public: 9 | OverridingList() 10 | : head(nullptr) {} 11 | 12 | void Push(Value ptr) 13 | { 14 | Entry *entry = reinterpret_cast(ptr); 15 | entry->next = head; 16 | head = ptr; 17 | } 18 | 19 | Value Pop() 20 | { 21 | assert(!Empty()); 22 | Value result = head; 23 | head = reinterpret_cast(head)->next; 24 | return result; 25 | } 26 | 27 | Value Top() 28 | { 29 | assert(!Empty()); 30 | return head; 31 | } 32 | 33 | bool Empty() const 34 | { 35 | return head == nullptr; 36 | } 37 | 38 | private: 39 | static_assert(std::is_pointer::value, "InPlaceList can not work on values."); 40 | 41 | struct Entry { 42 | Value next; 43 | }; 44 | 45 | Value head; 46 | }; 47 | // ------------------------------------------------------------------------------------- 48 | -------------------------------------------------------------------------------- /inplace/Common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "libpmem.h" 11 | // ------------------------------------------------------------------------------------- 12 | #define a_mm_clflush(addr)\ 13 | asm volatile("clflush %0" : "+m" (*(volatile char *)addr)); 14 | #define a_mm_clflushopt(addr)\ 15 | asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr)); 16 | #define a_mm_clwb(addr)\ 17 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr)); 18 | #define a_mm_pcommit()\ 19 | asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); 20 | // ------------------------------------------------------------------------------------- 21 | using ub1 = uint8_t; 22 | using ub2 = uint16_t; 23 | using ub4 = uint32_t; 24 | using ub8 = uint64_t; 25 | // ------------------------------------------------------------------------------------- 26 | using sb1 = int8_t; 27 | using sb2 = int16_t; 28 | using sb4 = int32_t; 29 | using sb8 = int64_t; 30 | // ------------------------------------------------------------------------------------- 31 | namespace constants { 32 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines 33 | } 34 | // ------------------------------------------------------------------------------------- 35 | inline void alex_WriteBack(void *addr, ub4 len) 36 | { 37 | for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) { 38 | a_mm_clwb((char *) uptr); 39 | } 40 | } 41 | // ------------------------------------------------------------------------------------- 42 | inline void alex_WriteBack(void *addr) 43 | { 44 | addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1)); 45 | a_mm_clwb((char *) addr); 46 | } 47 | // ------------------------------------------------------------------------------------- 48 | inline void alex_FlushOpt(void *addr, ub4 len) 49 | { 50 | for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) { 51 | a_mm_clflushopt((char *) uptr); 52 | } 53 | } 54 | // ------------------------------------------------------------------------------------- 55 | inline void alex_FlushOpt(void *addr) 56 | { 57 | a_mm_clflushopt((char *) addr); 58 | } 59 | // ------------------------------------------------------------------------------------- 60 | inline void alex_SFence() 61 | { 62 | _mm_sfence(); 63 | } 64 | // ------------------------------------------------------------------------------------- 65 | inline void alex_MFence() 66 | { 67 | _mm_mfence(); 68 | } 69 | // ------------------------------------------------------------------------------------- 70 | inline ub8 alex_PopCount(ub8 value) 71 | { 72 | return _mm_popcnt_u64(value); 73 | } 74 | // ------------------------------------------------------------------------------------- 75 | inline void alex_StreamClToNvm(void *dest, const void *src) 76 | { 77 | assert(((ub8) dest) % 64 == 0); 78 | assert(((ub8) src) % 64 == 0); 79 | __m512i reg = _mm512_load_si512(src); 80 | _mm512_stream_si512((__m512i *) dest, reg); 81 | } 82 | // ------------------------------------------------------------------------------------- 83 | void FastCopy512(ub1 *dest, const ub1 *src) 84 | { 85 | assert(((ub8) dest) % 64 == 0); 86 | memcpy(dest, src, 64); 87 | } 88 | // ------------------------------------------------------------------------------------- 89 | ub4 FastPopCount512(const ub1 *ptr) 90 | { 91 | ub4 res = 0; 92 | for (ub4 i = 0; i<64; i += 8) { 93 | res += alex_PopCount(*(ub8 *) (&ptr[i])); 94 | } 95 | return res; 96 | } 97 | // ------------------------------------------------------------------------------------- 98 | void FastCopy512Simd(ub1 *dest, const ub1 *src) 99 | { 100 | assert(((ub8) dest) % 64 == 0); 101 | __m512i reg = _mm512_loadu_si512(src); 102 | _mm512_store_si512((__m512i *) dest, reg); 103 | } 104 | // ------------------------------------------------------------------------------------- 105 | void alex_FastCopyAndWriteBack(ub1 *nvm_begin, const ub1 *ram_begin, ub4 size) 106 | { 107 | assert(size>0); 108 | 109 | // Copy head bytes 110 | ub4 pos = 0; 111 | ub8 off = (ub8) nvm_begin % 64; 112 | if (off != 0) { 113 | ub8 byte_count = (64 - off)>size ? size : (64 - off); 114 | memcpy(nvm_begin, ram_begin, byte_count); 115 | alex_WriteBack(nvm_begin); 116 | pos = byte_count; 117 | } 118 | 119 | // Copy full cache lines (and flush) 120 | for (; pos + 63(result); 145 | } 146 | // ------------------------------------------------------------------------------------- 147 | // Based on: https://en.wikipedia.org/wiki/Xorshift 148 | class Random { 149 | public: 150 | explicit Random(uint64_t seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3 151 | : seed(seed) 152 | { 153 | } 154 | 155 | uint64_t Rand() 156 | { 157 | seed ^= (seed << 13); 158 | seed ^= (seed >> 7); 159 | return (seed ^= (seed << 17)); 160 | } 161 | 162 | uint64_t seed; 163 | }; 164 | // ------------------------------------------------------------------------------------- 165 | void DumpHex(const void *data_in, uint32_t size, std::ostream &os) 166 | { 167 | char buffer[16]; 168 | 169 | const char *data = reinterpret_cast(data_in); 170 | for (uint32_t i = 0; i(data_in); 181 | for (int32_t i = size - 1; i>=0; i--) { 182 | sprintf(buffer, "%02hhx", data[i]); 183 | os << buffer[0] << buffer[1] << " "; 184 | } 185 | } 186 | // ------------------------------------------------------------------------------------- 187 | template 188 | inline TARGET Cast(SOURCE *ptr) { return reinterpret_cast(ptr); } 189 | // ------------------------------------------------------------------------------------- 190 | char *CreateAlignedString(Random &ranny, uint32_t len) 191 | { 192 | char *data = (char *) malloc(len + 1); 193 | assert((uint64_t) data % 4 == 0); 194 | 195 | for (uint32_t i = 0; i 204 | struct Operation { 205 | alignas(64) uint64_t entry_id; 206 | std::array data; 207 | }; 208 | template 209 | inline bool operator==(const Operation &lhs, const Operation &rhs) 210 | { 211 | return lhs.entry_id == rhs.entry_id && memcmp(lhs.data.data(), rhs.data.data(), complete_size - 8) == 0; 212 | } 213 | template 214 | inline bool operator!=(const Operation &lhs, const Operation &rhs) 215 | { 216 | return lhs.entry_id != rhs.entry_id || memcmp(lhs.data.data(), rhs.data.data(), complete_size - 8) != 0; 217 | } 218 | // ------------------------------------------------------------------------------------- 219 | -------------------------------------------------------------------------------- /inplace/CowBased.hpp: -------------------------------------------------------------------------------- 1 | #include "Common.hpp" 2 | #include "NonVolatileMemory.hpp" 3 | #include 4 | // ------------------------------------------------------------------------------------- 5 | namespace cow { 6 | // ------------------------------------------------------------------------------------- 7 | template 8 | struct InplaceCow { 9 | alignas(64) bool is_a_active; // Start at a cl to allow the use of streaming ops 10 | alignas(64) std::array a; 11 | alignas(64) std::array b; // Start at a cl to allow the use of streaming ops 12 | 13 | void Write(const char *input) 14 | { 15 | bool is_a_active_cache = is_a_active; 16 | if (is_a_active_cache) { 17 | alex_FastCopyAndWriteBack((ub1 *) b.data(), (const ub1 *) input, BYTE_COUNT); 18 | alex_SFence(); 19 | } else { 20 | alex_FastCopyAndWriteBack((ub1 *) a.data(), (const ub1 *) input, BYTE_COUNT); 21 | alex_SFence(); 22 | } 23 | 24 | is_a_active = !is_a_active_cache; 25 | alex_WriteBack(&is_a_active); 26 | alex_SFence(); 27 | } 28 | 29 | void Read(char *output) 30 | { 31 | if (is_a_active) { 32 | memcpy(output, a.data(), BYTE_COUNT); 33 | } else { 34 | memcpy(output, b.data(), BYTE_COUNT); 35 | } 36 | } 37 | 38 | void Init(Random &ranny) 39 | { 40 | is_a_active = true; // ranny.Rand() % 2; // Make it so that the branch is ~50% 41 | } 42 | }; 43 | // ------------------------------------------------------------------------------------- 44 | template<> 45 | struct InplaceCow<16> { 46 | alignas(64) uint8_t active_version_id; // Start at a cl to allow the use of streaming ops 47 | std::array, 2> versions; 48 | 49 | void Write(const char *input) 50 | { 51 | assert((uint64_t) this % 64 == 0); 52 | assert((uint64_t) input % 64 == 0); 53 | assert((void *) &active_version_id == (void *) this); 54 | assert(active_version_id == 0 || active_version_id == 1); 55 | 56 | // Load cl and update 57 | if (active_version_id == 0) { 58 | memcpy(versions[1].data(), input, 16); 59 | } else { 60 | memcpy(versions[0].data(), input, 16); 61 | } 62 | 63 | // Write new data 64 | __m512i reg = _mm512_loadu_si512(this); 65 | _mm512_stream_si512((__m512i *) this, reg); 66 | alex_SFence(); 67 | 68 | // Update version id 69 | __m512i mask = _mm512_castsi128_si512(_mm_cvtsi32_si128(0x01)); 70 | reg = _mm512_xor_si512(reg, mask); 71 | 72 | // Write new version id 73 | _mm512_stream_si512((__m512i *) this, reg); 74 | alex_SFence(); 75 | } 76 | 77 | void Read(char *output) 78 | { 79 | memcpy(output, versions[active_version_id].data(), 16); 80 | } 81 | 82 | void Init(Random &ranny) 83 | { 84 | active_version_id = true; // ranny.Rand() % 2; // Make it so that the branch is ~50% 85 | } 86 | }; 87 | // ------------------------------------------------------------------------------------- 88 | template<> 89 | struct InplaceCow<32> { 90 | alignas(64) uint8_t active_version_id; // Start at a cl to allow the use of streaming ops 91 | std::array a; 92 | alignas(64) std::array b; 93 | 94 | void Write(const char *input) 95 | { 96 | assert((uint64_t) this % 64 == 0); 97 | assert((uint64_t) input % 64 == 0); 98 | assert((void *) &active_version_id == (void *) this); 99 | assert(active_version_id == 0 || active_version_id == 1); 100 | 101 | // Load cl and update 102 | if (active_version_id == 0) { 103 | // Version and data on different cls -> use clwb, cause no need for streaming 104 | alex_FastCopyAndWriteBack((ub1 *) b.data(), (const ub1 *) input, 32); 105 | alex_SFence(); 106 | 107 | active_version_id = active_version_id ^ 0x1; 108 | alex_WriteBack(&active_version_id); 109 | alex_SFence(); 110 | } else { 111 | // Version and data both on first cl -> use streaming, because this cache line is re-written 112 | memcpy(a.data(), input, 32); 113 | 114 | // Write new data 115 | __m512i reg = _mm512_loadu_si512(this); 116 | _mm512_stream_si512((__m512i *) this, reg); 117 | alex_SFence(); 118 | 119 | // Update version id 120 | __m512i mask = _mm512_castsi128_si512(_mm_cvtsi32_si128(0x01)); 121 | reg = _mm512_xor_si512(reg, mask); 122 | 123 | // Write new version id 124 | _mm512_stream_si512((__m512i *) this, reg); 125 | alex_SFence(); 126 | } 127 | } 128 | 129 | void Read(char *output) 130 | { 131 | if (active_version_id == 0) { 132 | memcpy(output, a.data(), 32); 133 | } else { 134 | memcpy(output, b.data(), 32); 135 | } 136 | } 137 | 138 | void Init(Random &ranny) 139 | { 140 | active_version_id = true; // ranny.Rand() % 2; // Make it so that the branch is ~50% 141 | } 142 | }; 143 | // ------------------------------------------------------------------------------------- 144 | template<> 145 | struct InplaceCow<48> { 146 | alignas(64) uint8_t active_version_id; // Start at a cl to allow the use of streaming ops 147 | std::array a; 148 | alignas(64) std::array b; 149 | 150 | void Write(const char *input) 151 | { 152 | assert((uint64_t) this % 64 == 0); 153 | assert((uint64_t) input % 64 == 0); 154 | assert((void *) &active_version_id == (void *) this); 155 | assert(active_version_id == 0 || active_version_id == 1); 156 | 157 | // Load cl and update 158 | if (active_version_id == 0) { 159 | // Version and data on different cls -> use clwb, cause no need for streaming 160 | alex_FastCopyAndWriteBack((ub1 *) b.data(), (const ub1 *) input, 48); 161 | alex_SFence(); 162 | 163 | active_version_id = active_version_id ^ 0x1; 164 | alex_WriteBack(&active_version_id); 165 | alex_SFence(); 166 | } else { 167 | // Version and data both on first cl -> use streaming, because this cache line is re-written 168 | memcpy(a.data(), input, 48); 169 | 170 | // Write new data 171 | __m512i reg = _mm512_loadu_si512(this); 172 | _mm512_stream_si512((__m512i *) this, reg); 173 | alex_SFence(); 174 | 175 | // Update version id 176 | __m512i mask = _mm512_castsi128_si512(_mm_cvtsi32_si128(0x01)); 177 | reg = _mm512_xor_si512(reg, mask); 178 | 179 | // Write new version id 180 | _mm512_stream_si512((__m512i *) this, reg); 181 | alex_SFence(); 182 | } 183 | } 184 | 185 | void Read(char *output) 186 | { 187 | if (active_version_id == 0) { 188 | memcpy(output, a.data(), 48); 189 | } else { 190 | memcpy(output, b.data(), 48); 191 | } 192 | } 193 | 194 | void Init(Random &ranny) 195 | { 196 | active_version_id = true; // ranny.Rand() % 2; // Make it so that the branch is ~50% 197 | } 198 | }; 199 | // ------------------------------------------------------------------------------------- 200 | // For larger sizes it matters less and less .. 201 | // ------------------------------------------------------------------------------------- 202 | template 203 | struct CowBasedUpdates { 204 | NonVolatileMemory nvm_data; 205 | uint64_t entry_count; 206 | InplaceCow *entries; 207 | 208 | CowBasedUpdates(const std::string &path, uint64_t entry_count) 209 | : nvm_data(path + "/cowbased_data_file", entry_count * sizeof(InplaceCow)) 210 | , entry_count(entry_count) 211 | { 212 | assert(nvm_data.GetByteCount()>=entry_size * entry_count); 213 | 214 | memset(nvm_data.Data(), 'a', nvm_data.GetByteCount()); 215 | entries = (InplaceCow *) nvm_data.Data(); 216 | Random ranny; 217 | for (uint32_t i = 0; i &op, uint32_t id) 224 | { 225 | assert(id &result, uint32_t id) 230 | { 231 | entries[id].Read((char *) &result); 232 | return result.entry_id; 233 | } 234 | }; 235 | // ------------------------------------------------------------------------------------- 236 | } 237 | // ------------------------------------------------------------------------------------- -------------------------------------------------------------------------------- /inplace/InPlace-highBit.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include "NonVolatileMemory.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | // ------------------------------------------------------------------------------------- 10 | namespace high { 11 | // ------------------------------------------------------------------------------------- 12 | __m256i constAdd = _mm256_set1_epi64x(0x4000000000000000L); 13 | __m256i constAnd1 = _mm256_set1_epi64x(0xC000000000000000L); 14 | __m256i constAnd2 = _mm256_set1_epi64x(0x3FFFFFFFFFFFFFFFL); 15 | __m128i constAnd3 = _mm_set1_epi32(0x7FFFFFFF); 16 | __m128i constGatherIndex = _mm_set_epi32(6, 4, 2, 0); 17 | __m128i constAnd = _mm_set1_epi32(0x7FFFFFFF); 18 | // ------------------------------------------------------------------------------------- 19 | template 20 | struct InplaceField { 21 | static constexpr uint32_t BIT_COUNT = BYTE_COUNT * 8; 22 | static constexpr uint32_t BLOCK_COUNT = (BIT_COUNT + 30) / 31; 23 | static constexpr uint32_t META_BLOCK_COUNT = (BYTE_COUNT + 111) / 112; // For every 112 input bytes we need one meta block (8byte) 24 | 25 | alignas(64) uint64_t _blocks[BLOCK_COUNT]; 26 | 27 | void Reset() 28 | { 29 | memset(_blocks, 0, sizeof(uint64_t) * BLOCK_COUNT); 30 | } 31 | 32 | template 33 | void WriteRec(const uint32_t *values, uint64_t *blocks) 34 | { 35 | uint32_t currentHighBits = 0; 36 | 37 | constexpr uint32_t REMAINING_BYTE = BYTE_COUNT - (META * 112); 38 | constexpr uint32_t ITERATION_COUNT = (META == META_BLOCK_COUNT - 1) ? REMAINING_BYTE / 16 : 7; 39 | 40 | for (uint32_t i = 0; i(values + 28, blocks + 29); 61 | } 62 | 63 | template<> 64 | void WriteRec(const uint32_t *, uint64_t *) {} 65 | 66 | void WriteNoCheck(const char *data) 67 | { 68 | const uint32_t *values = (const uint32_t *) data; 69 | WriteRec<0>(values, _blocks); 70 | } 71 | 72 | template 73 | void ReadRec(const char *result, uint64_t *blocks) 74 | { 75 | constexpr uint32_t REMAINING_BYTE = BYTE_COUNT - (META * 112); 76 | constexpr uint32_t ITERATION_COUNT = (META == META_BLOCK_COUNT - 1) ? REMAINING_BYTE / 16 : 7; 77 | 78 | uint32_t high_bits = blocks[0]; 79 | 80 | for (int32_t i = ITERATION_COUNT - 1; i>=0; i--) { 81 | // Gather 4 * 32-Bit Values out of 64-Bit Array (gather lower 32 bits of each 64 bit value) 82 | __m128i values = _mm_i32gather_epi32((const int *) &blocks[1 + i * 4], constGatherIndex, 4); 83 | 84 | // Remove highest bit, so we have 4 * 31 bi values 85 | values = _mm_and_si128(values, constAnd); 86 | 87 | // Store the four high bits (lowest 4 bits in block[0]) to bit position 0, 8, 16, 24 (== align to byt boundary) 88 | __m128i highBitValue = _mm_cvtsi64_si128(_pdep_u64(high_bits, 0x01010101)); 89 | high_bits = high_bits >> 4; 90 | 91 | // Convert from 8 bit values to 32 bit values and shift left to the highest position 92 | highBitValue = _mm_cvtepu8_epi32(highBitValue); 93 | highBitValue = _mm_slli_epi32(highBitValue, 31); 94 | 95 | // OR together 96 | values = _mm_or_si128(values, highBitValue); 97 | 98 | // Store result 99 | _mm_storeu_si128((__m128i *) (result + i * 16), values); 100 | } 101 | 102 | ReadRec(result + 112, blocks + 29); 103 | } 104 | 105 | template<> 106 | void ReadRec(const char *result, uint64_t *blocks) {} 107 | 108 | void ReadNoCheck(char *result) 109 | { 110 | ReadRec<0>(result, _blocks); 111 | } 112 | }; 113 | // ------------------------------------------------------------------------------------- 114 | template 115 | struct InPlaceLikeUpdates { 116 | 117 | static bool CanBeUsed(uint32_t entry_size_param) { return entry_size_param % 16 == 0 && entry_size_param<128; } 118 | 119 | NonVolatileMemory nvm_data; 120 | uint64_t entry_count; 121 | InplaceField *entries; 122 | 123 | InPlaceLikeUpdates(const std::string &path, uint64_t entry_count) 124 | : nvm_data(path + "/inplace_file", sizeof(InplaceField) * entry_count) 125 | , entry_count(entry_count) 126 | { 127 | std::vector data(entry_size, 'a'); 128 | entries = (InplaceField *) nvm_data.Data(); 129 | for (uint64_t i = 0; i &op, uint32_t id) 136 | { 137 | entries[id].WriteNoCheck((const char *) &op); 138 | for (uint32_t i = 0; i); i += 64) { 139 | char *addr = (char *) (entries + id) + i; 140 | assert((uint64_t) addr % 64 == 0); 141 | alex_WriteBack(addr); 142 | } 143 | alex_SFence(); 144 | } 145 | 146 | uint64_t ReadSingleResult(Operation &result, uint32_t id) 147 | { 148 | entries[id].ReadNoCheck((char *) &result); 149 | return result.entry_id; 150 | } 151 | }; 152 | // ------------------------------------------------------------------------------------- 153 | } 154 | // ------------------------------------------------------------------------------------- 155 | -------------------------------------------------------------------------------- /inplace/LogBased.hpp: -------------------------------------------------------------------------------- 1 | #include "Common.hpp" 2 | #include "NonVolatileMemory.hpp" 3 | #include 4 | // ------------------------------------------------------------------------------------- 5 | template 6 | struct LogWriterZeroCached { 7 | static_assert(entry_size % 8 == 0); 8 | 9 | struct File { 10 | // Header 11 | uint8_t padding[constants::kCacheLineByteCount]; 12 | 13 | // Log data 14 | uint8_t data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | uint64_t next_free; 21 | uint64_t cl_pos; 22 | uint64_t active_cl_mem[16]; 23 | uint64_t *active_cl; 24 | uint64_t log_read_offset; 25 | 26 | LogWriterZeroCached(NonVolatileMemory &nvm) 27 | : nvm(nvm) 28 | , file(*reinterpret_cast(nvm.Data())) 29 | { 30 | next_free = 0; 31 | cl_pos = 0; 32 | log_read_offset = 0; 33 | 34 | active_cl = active_cl_mem; 35 | while ((uint64_t) active_cl % 64 != 0) { 36 | active_cl++; 37 | } 38 | assert((uint64_t) active_cl % 64 == 0); 39 | memset((uint8_t *) active_cl, 0, 64); 40 | } 41 | 42 | uint64_t AddLogEntry(const Operation &entry) 43 | { 44 | uint32_t blks = entry_size / 8; 45 | 46 | assert(next_free % 8 == 0); 47 | assert(next_free + entry_size(&entry); 51 | uint8_t *nvm_begin = reinterpret_cast(file.data + (next_free & ~63ull)); 52 | 53 | // Head 54 | uint32_t pos = 0; 55 | for (; pos 136 | struct LogBasedUpdates { 137 | const static uint64_t LOG_BUFFER_SIZE = 50e9; 138 | NonVolatileMemory nvm_log; 139 | NonVolatileMemory nvm_data; 140 | LogWriterZeroCached log_writer; 141 | uint64_t entry_count; 142 | Operation *data_on_nvm; 143 | 144 | LogBasedUpdates(const std::string &path, uint64_t entry_count) 145 | : nvm_log(path + "/logbased_log_file", LOG_BUFFER_SIZE) 146 | , nvm_data(path + "/logbased_data_file", entry_count * sizeof(Operation)) 147 | , log_writer(nvm_log) 148 | , entry_count(entry_count) 149 | { 150 | assert(nvm_log.GetByteCount()>=LOG_BUFFER_SIZE); 151 | assert(nvm_data.GetByteCount()>=entry_size * entry_count); 152 | 153 | memset(nvm_data.Data(), 'a', nvm_data.GetByteCount()); 154 | pmem_persist(nvm_data.Data(), nvm_data.GetByteCount()); 155 | 156 | data_on_nvm = (Operation *) nvm_data.Data(); 157 | } 158 | 159 | ~LogBasedUpdates() 160 | { 161 | if (log_writer.GetWrittenByteCount()>=nvm_log.GetByteCount()) { 162 | std::cout << "write more log than we had space.. not good" << std::endl; 163 | exit(-1); 164 | } 165 | } 166 | 167 | void DoUpdate(const Operation &op, uint32_t id) 168 | { 169 | assert(id)); 174 | alex_FastCopyAndWriteBack(entry_begin, (ub1 *) &op, entry_size); 175 | alex_SFence(); 176 | } 177 | 178 | uint64_t ReadSingleResult(Operation &result, uint32_t id) 179 | { 180 | result = data_on_nvm[id]; 181 | return result.entry_id; 182 | } 183 | }; 184 | // ------------------------------------------------------------------------------------- 185 | -------------------------------------------------------------------------------- /inplace/NonVolatileMemory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | // ------------------------------------------------------------------------------------- 13 | class NonVolatileMemory { 14 | public: 15 | NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed 16 | NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path 17 | NonVolatileMemory(const NonVolatileMemory &) = delete; 18 | NonVolatileMemory &operator=(const NonVolatileMemory &) = delete; 19 | 20 | ~NonVolatileMemory(); 21 | 22 | ub1 *Data() { return data_ptr; } 23 | ub1 *End() { return data_ptr + byte_count; } 24 | ub8 GetByteCount() { return byte_count; } 25 | 26 | bool IsNvm() const { return is_nvm; } 27 | 28 | private: 29 | ub1 *data_ptr; 30 | std::string file_name; 31 | const ub8 byte_count; 32 | bool is_nvm; 33 | bool is_mapped_file; 34 | int file_fd; 35 | }; 36 | // ------------------------------------------------------------------------------------- 37 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count) 38 | : byte_count(byte_count) 39 | , is_mapped_file(false) 40 | { 41 | assert(((ub8) ((off_t) byte_count)) == byte_count); 42 | 43 | assert(byte_count>0); // XXX 44 | 45 | data_ptr = AlignedAlloc(512, byte_count); 46 | 47 | is_nvm = false; 48 | } 49 | // ------------------------------------------------------------------------------------- 50 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count) 51 | : file_name(file_name) 52 | , byte_count(byte_count) 53 | , is_nvm(true) 54 | , is_mapped_file(true) 55 | { 56 | assert(((ub8) ((off_t) byte_count)) == byte_count); 57 | 58 | file_fd = open(file_name.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 59 | int td = ftruncate(file_fd, byte_count); 60 | if (file_fd<0 || td<0) { 61 | std::cout << "unable to create file" << std::endl; 62 | exit(-1); 63 | } 64 | data_ptr = (ub1 *) mmap(nullptr, byte_count, PROT_WRITE, MAP_SHARED, file_fd, 0); 65 | } 66 | // ------------------------------------------------------------------------------------- 67 | NonVolatileMemory::~NonVolatileMemory() 68 | { 69 | // Benchmark code .. dont care ;p 70 | } 71 | // ------------------------------------------------------------------------------------- 72 | -------------------------------------------------------------------------------- /inplace/ValidationBased.hpp: -------------------------------------------------------------------------------- 1 | #include "Common.hpp" 2 | #include 3 | #include 4 | // ------------------------------------------------------------------------------------- 5 | template 6 | struct ValidationBased { 7 | uint64_t entry_count; 8 | std::vector > data; 9 | 10 | ValidationBased(const std::string &, uint64_t entry_count) 11 | : entry_count(entry_count) 12 | , data(entry_count) 13 | { 14 | memset(data.data(), 'a', sizeof(Operation) * entry_count); 15 | } 16 | 17 | void DoUpdate(const Operation &op, uint32_t id) 18 | { 19 | assert(id &result, uint32_t id) 24 | { 25 | result = data[id]; 26 | return result.entry_id; 27 | } 28 | }; 29 | // ------------------------------------------------------------------------------------- 30 | -------------------------------------------------------------------------------- /inplace/bench.cpp: -------------------------------------------------------------------------------- 1 | #include "InPlace-highBit.hpp" 2 | #include "InPlace-sliding.hpp" 3 | #include "LogBased.hpp" 4 | #include "CowBased.hpp" 5 | #include "ValidationBased.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | // ------------------------------------------------------------------------------------- 13 | using namespace std; 14 | // ------------------------------------------------------------------------------------- 15 | #ifndef ENTRY_SIZE 16 | #error Please define ENTRY_SIZE 17 | #define ENTRY_SIZE 16 18 | #endif 19 | // ------------------------------------------------------------------------------------- 20 | uint64_t ENTRY_COUNT; 21 | uint64_t DATA_SIZE; 22 | string NVM_PATH; 23 | vector> log_result; 24 | constexpr bool VALIDATE = false; 25 | bool SEQUENTIAL; 26 | const uint64_t INDIVIDUAL_OPERATION_COUNT = 10000; 27 | const uint64_t ITERATION_COUNT = 10; 28 | // ------------------------------------------------------------------------------------- 29 | vector> PrepareSequentialOperations() 30 | { 31 | std::vector> results; 32 | results.resize(ENTRY_COUNT); 33 | for (uint64_t i = 0; i> PrepareRandomOperations() 41 | { 42 | // Init 43 | uint64_t *helper = new uint64_t[ENTRY_COUNT]; 44 | for (uint64_t i = 0; i> results; 56 | results.resize(ENTRY_COUNT); 57 | for (uint64_t i = 0; i GetConfidenceIntervall(vector &individual_times, string hint) 67 | { 68 | sort(individual_times.begin(), individual_times.end()); 69 | 70 | double lower_time = individual_times[INDIVIDUAL_OPERATION_COUNT * 0.025]; 71 | double med_time = individual_times[INDIVIDUAL_OPERATION_COUNT * 0.50]; 72 | double upper_time = individual_times[INDIVIDUAL_OPERATION_COUNT * 0.975]; 73 | double lower_per_second = 1e9 / upper_time; // swapped!! 74 | double med_per_second = 1e9 / med_time; 75 | double upper_per_second = 1e9 / lower_time; 76 | 77 | return make_tuple(med_per_second, med_per_second - lower_per_second, upper_per_second - med_per_second); 78 | } 79 | // ------------------------------------------------------------------------------------- 80 | template 81 | void RunExperiment(const vector> &operations, const std::string &competitor_name) 82 | { 83 | Operation buffer = {}; 84 | vector ids_only; 85 | ids_only.reserve(operations.size()); 86 | for (auto &iter : operations) { 87 | ids_only.push_back(iter.entry_id); 88 | } 89 | 90 | COMPETITOR competitor(NVM_PATH, ENTRY_COUNT); 91 | 92 | uint64_t check_sum_to_prevent_optimizations = 0; 93 | 94 | // Updates throughput -> execute many updates and take the avg time 95 | vector updates_per_second; 96 | for (uint32_t iteration = 0; iteration(end_ts - begin_ts).count(); 103 | updates_per_second.push_back((operations.size() * 1e9) / ns); 104 | } 105 | 106 | // // Individual updates -> get the time for individual updates 107 | // double lower_updates_per_second_factor = 0; 108 | // double upper_updates_per_second_factor = 0; 109 | // double med_updates_per_second = 0; 110 | // { 111 | // vector times(INDIVIDUAL_OPERATION_COUNT); 112 | // for (uint64_t i = 0; i(end_ts - begin_ts).count(); 117 | // alex_MFence(); 118 | // } 119 | // tie(med_updates_per_second, lower_updates_per_second_factor, upper_updates_per_second_factor) = GetConfidenceIntervall(times, "update"); 120 | // } 121 | 122 | // Read throughput -> execute many reads and take the avg time 123 | vector reads_per_second; 124 | for (uint32_t iteration = 0; iteration(end_ts - begin_ts).count(); 131 | reads_per_second.push_back((operations.size() * 1e9) / ns); 132 | } 133 | 134 | // // Individual reads -> get the time for individual reads 135 | // double lower_reads_per_second_factor = 0; 136 | // double upper_reads_per_second_factor = 0; 137 | // double med_reads_per_second = 0; 138 | // { 139 | // vector times(INDIVIDUAL_OPERATION_COUNT); 140 | // for (uint64_t i = 0; i(end_ts - begin_ts).count(); 145 | // alex_MFence(); 146 | // } 147 | // tie(med_reads_per_second, lower_reads_per_second_factor, upper_reads_per_second_factor) = GetConfidenceIntervall(times, "reads"); 148 | // } 149 | 150 | // Dependent read throughput -> execute many dependent reads and take the avg time 151 | vector dep_reads_per_second; 152 | for (uint32_t iteration = 0; iteration(end_ts - begin_ts).count(); 165 | dep_reads_per_second.push_back((operations.size() * 1e9) / ns); 166 | } 167 | 168 | // // Individual dependent reads -> get the time for individual dependent reads 169 | // double lower_dep_reads_per_second_factor = 0; 170 | // double upper_dep_reads_per_second_factor = 0; 171 | // double med_dep_reads_per_second = 0; 172 | // { 173 | // for (uint64_t u = 0; u times(INDIVIDUAL_OPERATION_COUNT); 178 | // uint64_t next_id = 0; 179 | // for (uint64_t i = 0; i(end_ts - begin_ts).count(); 184 | // alex_MFence(); 185 | // } 186 | // check_sum_to_prevent_optimizations += next_id; 187 | // tie(med_dep_reads_per_second, lower_dep_reads_per_second_factor, upper_dep_reads_per_second_factor) = GetConfidenceIntervall(times, "dep"); 188 | // } 189 | 190 | // //@formatter:off 191 | // cout << "res:" 192 | // << " technique: " << competitor_name 193 | // << " checksum: " << check_sum_to_prevent_optimizations 194 | // << " order: " << (SEQUENTIAL ? "seq" : "rand") 195 | // << " entry_size: " << ENTRY_SIZE 196 | // << " updates(M): " << med_updates_per_second / 1000 / 1000.0 197 | // << " " << (lower_updates_per_second_factor) / 1000 / 1000.0 198 | // << " " << (upper_updates_per_second_factor) / 1000 / 1000.0 199 | // << " reads(M): " << med_reads_per_second / 1000 / 1000.0 200 | // << " " << (lower_reads_per_second_factor) / 1000 / 1000.0 201 | // << " " << (upper_reads_per_second_factor) / 1000 / 1000.0 202 | // << " dep_reads(M): " << med_dep_reads_per_second / 1000 / 1000.0 203 | // << " " << (lower_dep_reads_per_second_factor) / 1000 / 1000.0 204 | // << " " << (upper_dep_reads_per_second_factor) / 1000 / 1000.0 205 | // << endl; 206 | // //@formatter:on 207 | 208 | // //@formatter:off 209 | // cout << "res:" 210 | // << " technique: " << competitor_name 211 | // << " checksum: " << check_sum_to_prevent_optimizations 212 | // << " order: " << (SEQUENTIAL ? "seq" : "rand") 213 | // << " entry_size: " << ENTRY_SIZE 214 | // << " updates(M): " << updates_per_second / 1000 / 1000.0 215 | // << " " << (updates_per_second - updates_per_second * lower_updates_per_second_factor) / 1000 / 1000.0 216 | // << " " << (updates_per_second * upper_updates_per_second_factor - updates_per_second) / 1000 / 1000.0 217 | // << " reads(M): " << reads_per_second / 1000 / 1000.0 218 | // << " " << (reads_per_second - reads_per_second * lower_reads_per_second_factor) / 1000 / 1000.0 219 | // << " " << (reads_per_second * upper_reads_per_second_factor - reads_per_second) / 1000 / 1000.0 220 | // << " dep_reads(M): " << dep_reads_per_second / 1000 / 1000.0 221 | // << " " << (dep_reads_per_second - dep_reads_per_second * lower_dep_reads_per_second_factor) / 1000 / 1000.0 222 | // << " " << (dep_reads_per_second * upper_dep_reads_per_second_factor - dep_reads_per_second) / 1000 / 1000.0 223 | // << endl; 224 | // //@formatter:on 225 | 226 | sort(updates_per_second.begin(), updates_per_second.end()); 227 | sort(reads_per_second.begin(), reads_per_second.end()); 228 | sort(dep_reads_per_second.begin(), dep_reads_per_second.end()); 229 | 230 | //@formatter:off 231 | cout << "res:" 232 | << " technique: " << competitor_name 233 | << " checksum: " << check_sum_to_prevent_optimizations 234 | << " order: " << (SEQUENTIAL ? "seq" : "rand") 235 | << " entry_size: " << ENTRY_SIZE 236 | << " updates(M): " << (updates_per_second[4] + updates_per_second[5]) / 2e6 237 | << " " << updates_per_second[0] / 1e6 238 | << " " << updates_per_second[9] / 1e6 239 | << " reads(M): " << (reads_per_second[4] + reads_per_second[5]) / 2e6 240 | << " " << reads_per_second[0] / 1e6 241 | << " " << reads_per_second[9] / 1e6 242 | << " dep_reads(M): " << (dep_reads_per_second[4] + dep_reads_per_second[5]) / 2e6 243 | << " " << dep_reads_per_second[0] / 1e6 244 | << " " << dep_reads_per_second[9] / 1e6 245 | << endl; 246 | //@formatter:on 247 | 248 | if constexpr (VALIDATE) { 249 | Operation validation_buffer; 250 | ValidationBased validation(NVM_PATH, ENTRY_COUNT); 251 | for (uint64_t u = 0; u) / 1000 / 1000 / 1000.0 << endl; 296 | cout << "order " << (SEQUENTIAL ? "sequential" : "random") << endl; 297 | cout << "nvm_path " << NVM_PATH << endl; 298 | cout << "------" << endl; 299 | 300 | cpu_set_t cpuset; 301 | CPU_ZERO(&cpuset); 302 | CPU_SET(0, &cpuset); 303 | pthread_t currentThread = pthread_self(); 304 | if (pthread_setaffinity_np(currentThread, sizeof(cpu_set_t), &cpuset) != 0) { 305 | throw; 306 | } 307 | 308 | if (ENTRY_COUNT == 0) { 309 | cout << "need at least one entry" << endl; 310 | throw; 311 | } 312 | 313 | // Sequential Experiments 314 | if (SEQUENTIAL) { 315 | vector> operations = PrepareSequentialOperations(); 316 | RunExperiment>(operations, "log"); 317 | RunExperiment>(operations, "cow"); 318 | // RunExperiment>(operations, "high-bit"); 319 | RunExperiment>(operations, "sliding-bit"); 320 | } 321 | 322 | // Random 323 | if (!SEQUENTIAL) { 324 | vector> operations = PrepareRandomOperations(); 325 | RunExperiment>(operations, "log"); 326 | RunExperiment>(operations, "cow"); 327 | // RunExperiment>(operations, "high-bit"); 328 | RunExperiment>(operations, "sliding-bit"); 329 | } 330 | 331 | cout << "done 3" << endl; 332 | return 0; 333 | } 334 | // ------------------------------------------------------------------------------------- 335 | -------------------------------------------------------------------------------- /inplace/tester.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | // ------------------------------------------------------------------------------------- 6 | // Based on: https://en.wikipedia.org/wiki/Xorshift 7 | class Random { 8 | public: 9 | explicit Random(uint64_t seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3 10 | : seed(seed) 11 | { 12 | } 13 | 14 | uint64_t Rand() 15 | { 16 | seed ^= (seed << 13); 17 | seed ^= (seed >> 7); 18 | return (seed ^= (seed << 17)); 19 | } 20 | 21 | uint64_t seed; 22 | }; 23 | // ------------------------------------------------------------------------------------- 24 | char *CreateAlignedString(Random &ranny, uint32_t len) 25 | { 26 | char *data = (char *) malloc(len + 1); 27 | assert((uint64_t) data % 4 == 0); 28 | 29 | for (uint32_t i = 0; i(data_in); 42 | for (uint32_t i = 0; i 49 | struct Block { 50 | static_assert(pos<=31); 51 | 52 | uint64_t data; 53 | 54 | Block() 55 | : data(0) {} 56 | 57 | uint32_t GetVersionNoCheck() const { return (((data & (1ull << (pos + 32))) << 1) | (data & (1ull << pos))) >> pos; } 58 | uint32_t GetOldStateNoCheck() const { return (data >> 32) & ~(1ull << pos); } 59 | uint32_t GetNewStateNoCheck() const { return (data & 0xffffffff) & ~(1ull << pos); } 60 | 61 | void WriteNoCheck(uint32_t new_state) 62 | { 63 | data = (data << 32) | new_state; 64 | } 65 | 66 | friend std::ostream &operator<<(std::ostream &os, const Block &b) 67 | { 68 | uint32_t version = b.GetVersionNoCheck(); 69 | uint32_t old_state = b.GetOldStateNoCheck(); 70 | uint32_t new_state = b.GetNewStateNoCheck(); 71 | os << "version: " << version << " old: "; 72 | DumpHex(&old_state, 4, os); 73 | os << " new: "; 74 | DumpHex(&new_state, 4, os); 75 | return os; 76 | } 77 | }; 78 | static_assert(sizeof(Block<0>) == 8); 79 | // ------------------------------------------------------------------------------------- 80 | struct InplaceField16 { 81 | 82 | alignas(64) 83 | Block<0> b0; 84 | Block<1> b1; 85 | Block<2> b2; 86 | Block<3> b3; 87 | Block<4> b4; 88 | 89 | void Print(std::ostream &out) 90 | { 91 | out << b0 << std::endl; 92 | out << b1 << std::endl; 93 | out << b2 << std::endl; 94 | out << b3 << std::endl; 95 | out << b4 << std::endl; 96 | } 97 | 98 | void Reset() 99 | { 100 | b0.data = 0; 101 | b1.data = 0; 102 | b2.data = 0; 103 | b3.data = 0; 104 | b4.data = 0; 105 | } 106 | 107 | void WriteNoCheck(const char *data) 108 | { 109 | static std::array VersionBit = {1, 1, 0, 0}; 110 | 111 | assert((uint64_t) data % 4 == 0); 112 | assert((uint64_t) &b0 % 64 == 0); 113 | 114 | uint32_t *input = (uint32_t *) data; 115 | 116 | uint32_t next_version_bit = VersionBit[b0.GetVersionNoCheck()]; 117 | if (next_version_bit) { 118 | //@formatter:off 119 | b1.WriteNoCheck( input[0] | 0x02); 120 | b2.WriteNoCheck( input[1] | 0x04); 121 | b3.WriteNoCheck( input[2] | 0x08); 122 | b4.WriteNoCheck( input[3] | 0x10); 123 | b0.WriteNoCheck( (input[0] & 0x02) 124 | | (input[1] & 0x04) 125 | | (input[2] & 0x08) 126 | | (input[3] & 0x10) 127 | | 0x1); 128 | //@formatter:on 129 | } else { 130 | //@formatter:off 131 | b1.WriteNoCheck( input[0] & ~0x02); 132 | b2.WriteNoCheck( input[1] & ~0x04); 133 | b3.WriteNoCheck( input[2] & ~0x08); 134 | b4.WriteNoCheck( input[3] & ~0x10); 135 | b0.WriteNoCheck( (input[0] & 0x02) 136 | | (input[1] & 0x04) 137 | | (input[2] & 0x08) 138 | | (input[3] & 0x10) 139 | | 0x0); 140 | //@formatter:on 141 | } 142 | } 143 | 144 | char *ReadNoCheck() 145 | { 146 | char *result = (char *) malloc(16); 147 | assert((uint64_t) result % 4 == 0); 148 | uint32_t *output = (uint32_t *) result; 149 | 150 | output[0] = (b1.GetNewStateNoCheck() & ~0x02) | (b0.GetNewStateNoCheck() & 0x02); 151 | output[1] = (b2.GetNewStateNoCheck() & ~0x04) | (b0.GetNewStateNoCheck() & 0x04); 152 | output[2] = (b3.GetNewStateNoCheck() & ~0x08) | (b0.GetNewStateNoCheck() & 0x08); 153 | output[3] = (b4.GetNewStateNoCheck() & ~0x10) | (b0.GetNewStateNoCheck() & 0x10); 154 | 155 | return result; 156 | } 157 | }; 158 | // ------------------------------------------------------------------------------------- 159 | template 160 | void TestInPlaceUpdates() 161 | { 162 | Random ranny; 163 | InplaceField16 field; 164 | 165 | for (uint32_t i = 0; i<10000; i++) { 166 | char *input = CreateAlignedString(ranny, BYTE_COUNT); 167 | field.WriteNoCheck(input); 168 | char *output = field.ReadNoCheck(); 169 | 170 | for (uint32_t i = 0; i(); 190 | // TestInPlaceUpdates<20>(); 191 | // TestInPlaceUpdates<64>(); 192 | // TestInPlaceUpdates<1000>(); 193 | // TestInPlaceUpdates<10000>(); 194 | } 195 | // ------------------------------------------------------------------------------------- 196 | int main() 197 | { 198 | TestInPlaceUpdates<16>(); 199 | return 0; 200 | } 201 | // ------------------------------------------------------------------------------------- 202 | -------------------------------------------------------------------------------- /interference/Common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "libpmem.h" 11 | // ------------------------------------------------------------------------------------- 12 | using ub1 = uint8_t; 13 | using ub2 = uint16_t; 14 | using ub4 = uint32_t; 15 | using ub8 = uint64_t; 16 | // ------------------------------------------------------------------------------------- 17 | using sb1 = int8_t; 18 | using sb2 = int16_t; 19 | using sb4 = int32_t; 20 | using sb8 = int64_t; 21 | // ------------------------------------------------------------------------------------- 22 | namespace constants { 23 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines 24 | const static ub4 kPageByteCount = 1 << 14; // 16 KB 25 | const static ub4 kCacheLinesPerPage = kPageByteCount / kCacheLineByteCount; // 16KB/64Byte 26 | const static ub4 kPageAlignment = 512; // For O_Direct 27 | const static ub8 kInvalidPageId = ~0; 28 | } 29 | // ------------------------------------------------------------------------------------- 30 | static std::mutex global_io_mutex; 31 | // ------------------------------------------------------------------------------------- 32 | #define a_mm_clflush(addr)\ 33 | asm volatile("clflush %0" : "+m" (*(volatile char *)addr)); 34 | #define a_mm_clflushopt(addr)\ 35 | asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr)); 36 | #define a_mm_clwb(addr)\ 37 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr)); 38 | #define a_mm_pcommit()\ 39 | asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); 40 | // ------------------------------------------------------------------------------------- 41 | // Based on: https://en.wikipedia.org/wiki/Xorshift 42 | class Random { 43 | public: 44 | explicit Random(ub8 seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3 45 | : seed(seed) 46 | { 47 | } 48 | 49 | uint64_t Rand() 50 | { 51 | seed ^= (seed << 13); 52 | seed ^= (seed >> 7); 53 | return (seed ^= (seed << 17)); 54 | } 55 | 56 | ub8 seed; 57 | }; 58 | // ------------------------------------------------------------------------------------- 59 | template 60 | bool IsAlignedAt(const void *ptr) 61 | { 62 | return ((uint64_t) ptr) % byteCount == 0; 63 | } 64 | // ------------------------------------------------------------------------------------- 65 | uint8_t *AlignedAlloc(uint64_t alignment, uint64_t size) 66 | { 67 | void *result = nullptr; 68 | int error = posix_memalign(&result, alignment, size); 69 | if (error) { 70 | std::cout << "error while allocating" << std::endl; 71 | throw; 72 | } 73 | return reinterpret_cast(result); 74 | } 75 | // ------------------------------------------------------------------------------------- 76 | inline void alex_FlushOpt(void *addr) 77 | { 78 | a_mm_clflushopt((char *) addr); 79 | } 80 | // ------------------------------------------------------------------------------------- 81 | inline void alex_WriteBack(void *addr, ub4 len) 82 | { 83 | for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) { 84 | a_mm_clwb((char *) uptr); 85 | } 86 | } 87 | // ------------------------------------------------------------------------------------- 88 | inline void alex_WriteBack(void *addr) 89 | { 90 | addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1)); 91 | a_mm_clwb((char *) addr); 92 | } 93 | // ------------------------------------------------------------------------------------- 94 | inline void alex_SFence() 95 | { 96 | _mm_sfence(); 97 | } 98 | // ------------------------------------------------------------------------------------- 99 | struct Worker { 100 | Worker(ub4 tid, std::string config) 101 | : tid(tid) 102 | , config(config) {} 103 | 104 | ub4 tid; 105 | std::string config; 106 | std::atomic run = false; 107 | std::atomic ready = false; 108 | std::atomic stop = false; 109 | std::atomic performed_iteration_count = 0; 110 | 111 | virtual void PrintResultOfLastIteration(ub4 iteration) = 0; 112 | }; 113 | // ------------------------------------------------------------------------------------- 114 | -------------------------------------------------------------------------------- /interference/LogWriter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include "NonVolatileMemory.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | // ------------------------------------------------------------------------------------- 14 | namespace log_utils { 15 | // ------------------------------------------------------------------------------------- 16 | inline ub8 alex_PopCount(ub8 value) 17 | { 18 | return _mm_popcnt_u64(value); 19 | } 20 | // ------------------------------------------------------------------------------------- 21 | inline void alex_FlushClToNvm(void *dest, void *src) 22 | { 23 | assert(((ub8) dest) % 64 == 0); 24 | assert(((ub8) src) % 64 == 0); 25 | __m512i reg = _mm512_load_si512(src); 26 | _mm512_stream_si512((__m512i *) dest, reg); 27 | } 28 | // ------------------------------------------------------------------------------------- 29 | ub4 FastPopCount512(const ub1 *ptr) 30 | { 31 | ub4 res = 0; 32 | for (ub4 i = 0; i<64; i += 8) { 33 | res += alex_PopCount(*(ub8 *) (&ptr[i])); 34 | } 35 | return res; 36 | } 37 | // ------------------------------------------------------------------------------------- 38 | void FastCopy512(ub1 *dest, const ub1 *src) 39 | { 40 | assert(((ub8) dest) % 64 == 0); 41 | memcpy(dest, src, 64); 42 | } 43 | // ------------------------------------------------------------------------------------- 44 | } 45 | // ------------------------------------------------------------------------------------- 46 | struct LogWriterZeroBlocked { 47 | 48 | struct Entry { 49 | ub4 payload_size; // header 50 | ub4 bit_count; // header 51 | ub1 data[]; 52 | }; 53 | 54 | struct File { 55 | // Header 56 | ub1 padding[constants::kCacheLineByteCount]; 57 | 58 | // Log data 59 | ub1 data[]; 60 | }; 61 | static_assert(sizeof(File) == 64, ""); 62 | 63 | NonVolatileMemory &nvm; 64 | File &file; // == nvm 65 | ub8 next_free; 66 | 67 | LogWriterZeroBlocked(NonVolatileMemory &nvm) 68 | : nvm(nvm) 69 | , file(*reinterpret_cast(nvm.Data())) 70 | { 71 | Reset(); 72 | } 73 | 74 | void Reset() 75 | { 76 | next_free = 0; 77 | } 78 | 79 | ub8 AddLogEntry(const Entry &entry) 80 | { 81 | ub4 size = entry.payload_size + 8; 82 | 83 | assert(next_free % 8 == 0); 84 | assert(entry.bit_count == 0); 85 | assert(entry.payload_size % 8 == 0); 86 | assert(next_free + size=64); 89 | 90 | const ub1 *ram_begin = reinterpret_cast(&entry); 91 | ub1 *nvm_begin = reinterpret_cast(nvm.Data() + next_free); 92 | 93 | // Copy first cache line (and do not flush) 94 | ub4 pop_cnt = 0; 95 | log_utils::FastCopy512(nvm_begin, ram_begin); 96 | pop_cnt += log_utils::FastPopCount512(ram_begin); 97 | 98 | // Copy remaining full cache lines (and flush) 99 | ub4 pos = 64; 100 | for (; pos + 63(nvm_begin)->bit_count = pop_cnt; 117 | alex_WriteBack(nvm_begin); 118 | alex_SFence(); 119 | 120 | // Advance and done 121 | next_free += entry.payload_size + 8; 122 | next_free = (next_free + 63) & ~63ull; 123 | assert(next_free % 64 == 0); 124 | return next_free; 125 | } 126 | 127 | ub8 GetWrittenByteCount() const 128 | { 129 | return next_free + sizeof(File); 130 | } 131 | 132 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 133 | { 134 | ub8 current = 0; 135 | ub8 used_size = 0; 136 | 137 | vectorentries; 138 | while (used_sizepayload_size = payload_size; 143 | entry->bit_count = 0; 144 | entries.push_back(entry); 145 | current += payload_size + 8; 146 | used_size += payload_size; 147 | } 148 | return entries; 149 | } 150 | }; 151 | // ------------------------------------------------------------------------------------- 152 | class LogWriter : public Worker { 153 | unique_ptr wal; 154 | unique_ptr nvm; 155 | vector memory; 156 | vector entries; 157 | 158 | ub8 byte_count; 159 | string nvm_file; 160 | 161 | const ub4 ENTRY_SIZE = 104; 162 | 163 | vector nano_seconds; 164 | 165 | public: 166 | LogWriter(const string &nvm_file, ub8 byte_count, ub4 tid, string config) 167 | : Worker(tid, config) 168 | , byte_count(byte_count) 169 | , nvm_file(nvm_file) 170 | { 171 | } 172 | 173 | void Run() 174 | { 175 | Setup(); 176 | ready = true; 177 | while (!run) { 178 | this_thread::yield(); 179 | } 180 | 181 | while (!stop) { 182 | auto begin = chrono::high_resolution_clock::now(); 183 | DoOneRun(); 184 | auto end = chrono::high_resolution_clock::now(); 185 | nano_seconds.push_back(chrono::duration_cast(end - begin).count()); 186 | performed_iteration_count++; 187 | } 188 | } 189 | 190 | void PrintResultOfLastIteration(ub4 iteration) 191 | { 192 | if (!stop || iteration>=performed_iteration_count) { 193 | throw; 194 | } 195 | double ns = nano_seconds[iteration]; 196 | //@formatter:off 197 | cout << "RES log_writer " << config 198 | << " tid: " << tid 199 | << " iterations: " << iteration << "/" << performed_iteration_count 200 | << " perf(logs/s): " << ub8(entries.size() / (ns / 1e9)) << endl; 201 | //@formatter:on 202 | } 203 | 204 | LogWriter(const LogWriter &) = delete; 205 | LogWriter &operator=(const LogWriter &) = delete; 206 | 207 | private: 208 | void Setup() 209 | { 210 | Random ranny; 211 | nvm = make_unique(nvm_file, byte_count * 2); // * 2 to not overflow because of header 212 | wal = make_unique(*nvm); 213 | 214 | memory = RandomizedMemory(byte_count * 2, ranny); 215 | entries = LogWriterZeroBlocked::CreateRandomEntries(memory, ENTRY_SIZE / 8, ENTRY_SIZE / 8, byte_count, ranny); 216 | } 217 | 218 | void DoOneRun() 219 | { 220 | wal->Reset(); 221 | for (LogWriterZeroBlocked::Entry *entry : entries) { 222 | wal->AddLogEntry(*entry); 223 | } 224 | } 225 | 226 | vector RandomizedMemory(ub8 size, Random &ranny) 227 | { 228 | vector memory(size); 229 | for (ub8 i = 0; i 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | // ------------------------------------------------------------------------------------- 14 | class NonVolatileMemory { 15 | public: 16 | NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed 17 | NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path 18 | NonVolatileMemory(const NonVolatileMemory &) = delete; 19 | NonVolatileMemory &operator=(const NonVolatileMemory &) = delete; 20 | 21 | ~NonVolatileMemory(); 22 | 23 | ub1 *Data() { return data_ptr; } 24 | ub1 *End() { return data_ptr + byte_count; } 25 | ub8 GetByteCount() { return byte_count; } 26 | 27 | void FlushAll(); 28 | void Flush(ub8 from, ub8 length); 29 | 30 | NvmBufferFrame &GetNvmBufferFrame(ub8 id) 31 | { 32 | assert(data_ptr + id * sizeof(NvmBufferFrame)<=End()); 33 | assert(reinterpret_cast(data_ptr)[id].GetPage().Ptr() != nullptr); 34 | return reinterpret_cast(data_ptr)[id]; 35 | } 36 | 37 | bool IsRealNvm() const { return is_real_nvm; } 38 | 39 | private: 40 | ub1 *data_ptr; 41 | std::string file_name; 42 | const ub8 byte_count; 43 | bool is_real_nvm; 44 | bool is_mapped_file; 45 | }; 46 | // ------------------------------------------------------------------------------------- 47 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count) 48 | : byte_count(byte_count) 49 | , is_mapped_file(false) 50 | { 51 | assert(((ub8) ((off_t) byte_count)) == byte_count); 52 | assert(byte_count>0); 53 | 54 | data_ptr = AlignedAlloc(512, byte_count); 55 | 56 | is_real_nvm = false; 57 | } 58 | // ------------------------------------------------------------------------------------- 59 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count) 60 | : file_name(file_name) 61 | , byte_count(byte_count) 62 | , is_mapped_file(true) 63 | { 64 | assert(((ub8) ((off_t) byte_count)) == byte_count); 65 | 66 | // No need to do anything if zero byte are requested; Does this happen ?? 67 | assert(byte_count>0); // XXX 68 | if (byte_count == 0) { 69 | data_ptr = nullptr; 70 | return; 71 | } 72 | 73 | // Map the file (our pmem wrapper works with normal memory by falling back to mmap) 74 | size_t acquired_byte_count; 75 | data_ptr = reinterpret_cast(pmem_map_file(file_name.c_str(), byte_count, PMEM_FILE_CREATE, 0666, &acquired_byte_count, nullptr)); 76 | if (data_ptr == nullptr) { 77 | std::cout << "Failed to create file: '" << file_name << "'." << std::endl; 78 | throw; 79 | } 80 | if (acquired_byte_count != byte_count) { 81 | std::cout << "Failed to allocate requested size for file: '" << file_name << "'. (Requested=" << byte_count << ", Aquired=" << acquired_byte_count << ")" << std::endl; 82 | throw; 83 | } 84 | 85 | // Do this only once, as it is expensive 86 | is_real_nvm = pmem_is_pmem(data_ptr, 1); 87 | } 88 | // ------------------------------------------------------------------------------------- 89 | NonVolatileMemory::~NonVolatileMemory() 90 | { 91 | if (is_mapped_file) { 92 | pmem_unmap(data_ptr, byte_count); 93 | } else { 94 | free(data_ptr); 95 | } 96 | } 97 | // ------------------------------------------------------------------------------------- 98 | void NonVolatileMemory::FlushAll() 99 | { 100 | if (is_mapped_file) { 101 | pmem_persist(data_ptr, byte_count); 102 | } 103 | } 104 | // ------------------------------------------------------------------------------------- 105 | void NonVolatileMemory::Flush(ub8 from, ub8 length) 106 | { 107 | if (is_mapped_file) { 108 | pmem_persist(data_ptr + from, length); 109 | } 110 | } 111 | // ------------------------------------------------------------------------------------- 112 | 113 | // ------------------------------------------------------------------------------------- 114 | class VolatileMemory { 115 | 116 | public: 117 | VolatileMemory(ub8 byte_count); 118 | ~VolatileMemory(); 119 | VolatileMemory(const VolatileMemory &) = delete; 120 | VolatileMemory &operator=(const VolatileMemory &) = delete; 121 | 122 | ub1 *Data() { return data_ptr; } 123 | ub1 *End() { return data_ptr + byte_count; } 124 | 125 | ub8 GetByteCount() const { return byte_count; } 126 | 127 | template 128 | T *GetPtr(ub8 offset = 0) { return reinterpret_cast(data_ptr) + offset; } 129 | ub1 *GetPtr(ub8 offset = 0) { return data_ptr + offset; } 130 | 131 | private: 132 | ub1 *const data_ptr; 133 | const ub8 byte_count; 134 | }; 135 | // ------------------------------------------------------------------------------------- 136 | VolatileMemory::VolatileMemory(ub8 byte_count) 137 | : data_ptr(new ub1[byte_count]) 138 | , byte_count(byte_count) 139 | { 140 | assert(byte_count != 0); 141 | } 142 | // ------------------------------------------------------------------------------------- 143 | VolatileMemory::~VolatileMemory() 144 | { 145 | delete[] data_ptr; 146 | } 147 | // ------------------------------------------------------------------------------------- 148 | -------------------------------------------------------------------------------- /interference/Pages.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include 5 | #include 6 | // ------------------------------------------------------------------------------------- 7 | class Page { 8 | public: 9 | template 10 | T *Ptr() { return reinterpret_cast( data.data()); } 11 | 12 | template 13 | const T *Ptr() const { return reinterpret_cast( data.data()); } 14 | 15 | private: 16 | std::array data; 17 | }; 18 | // ------------------------------------------------------------------------------------- 19 | template 20 | class AutoAlignedPage { 21 | public: 22 | Page &GetPage() 23 | { 24 | assert(padding.data()(res)); 27 | return *reinterpret_cast( res); 28 | } 29 | 30 | const Page &GetPage() const 31 | { 32 | assert(padding.data()(res)); 35 | return *reinterpret_cast( res); 36 | } 37 | 38 | private: 39 | std::array padding; 40 | std::array data; 41 | }; 42 | // ------------------------------------------------------------------------------------- 43 | static_assert(sizeof(Page) == constants::kPageByteCount, "Pages are always 16kb"); 44 | // ------------------------------------------------------------------------------------- 45 | class NvmBufferFrame { 46 | public: 47 | inline void init() 48 | { 49 | dirty = false; 50 | page_id = constants::kInvalidPageId; 51 | pvn = 0; 52 | } 53 | 54 | Page &GetPage() { return page.GetPage(); } 55 | const Page &GetPage() const { return page.GetPage(); } 56 | 57 | bool dirty; 58 | ub8 page_id; 59 | ub8 pvn; 60 | 61 | private: 62 | AutoAlignedPage page; 63 | }; 64 | // ------------------------------------------------------------------------------------- 65 | static_assert(sizeof(NvmBufferFrame) % 8 == 0, "NvmBufferFrame should be eight byte aligned."); 66 | // ------------------------------------------------------------------------------------- 67 | -------------------------------------------------------------------------------- /interference/RandomReader.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include "NonVolatileMemory.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | // ------------------------------------------------------------------------------------- 14 | using namespace std; 15 | // ------------------------------------------------------------------------------------- 16 | class RandomReader : public Worker { 17 | string nvm_file; 18 | ub8 byte_count; 19 | bool is_ram; 20 | ub8 *data; 21 | unique_ptr nvm; 22 | vector nano_seconds; 23 | ub8 sum; 24 | 25 | public: 26 | RandomReader(const string &nvm_file, ub8 byte_count, bool is_ram, ub4 tid, string config) 27 | : Worker(tid, config) 28 | , byte_count(byte_count) 29 | , nvm_file(nvm_file) 30 | , is_ram(is_ram) 31 | { 32 | assert(byte_count % 8 == 0); 33 | if (byte_count % 8 != 0) { 34 | throw "byte_count % 8"; 35 | } 36 | } 37 | 38 | void Run() 39 | { 40 | Setup(); 41 | ready = true; 42 | while (!run) { 43 | this_thread::yield(); 44 | } 45 | 46 | while (!stop) { 47 | auto begin = chrono::high_resolution_clock::now(); 48 | sum += DoOneRun(); 49 | auto end = chrono::high_resolution_clock::now(); 50 | nano_seconds.push_back(chrono::duration_cast(end - begin).count()); 51 | performed_iteration_count++; 52 | } 53 | } 54 | 55 | void PrintResultOfLastIteration(ub4 iteration) 56 | { 57 | // if (!stop || iteration>=performed_iteration_count) { 58 | // throw; 59 | // } 60 | double ns = nano_seconds[iteration]; 61 | //@formatter:off 62 | cout << "RES " << (is_ram ? "rnd_ram_reader " : "rnd_nvm_reader ") << config 63 | << " tid: " << tid 64 | << " iterations: " << iteration << "/" << performed_iteration_count 65 | << " sum: " << sum 66 | << " perf(reads/s): " << ub8((byte_count / 8) / (ns / 1e9)) << endl; 67 | //@formatter:on 68 | } 69 | 70 | RandomReader(const RandomReader &) = delete; 71 | RandomReader &operator=(const RandomReader &) = delete; 72 | 73 | private: 74 | void Setup() 75 | { 76 | // Alloc 77 | if (is_ram) { 78 | data = (ub8 *) malloc(byte_count + 64); 79 | } else { 80 | nvm = make_unique(nvm_file, byte_count + 64); 81 | data = (ub8 *) nvm->Data(); 82 | } 83 | 84 | while ((ub8) data % 8 != 0) { 85 | data++; 86 | } 87 | assert((ub8) data % 8 == 0); 88 | 89 | Random ranny; 90 | for (uint64_t i = 0; i<(byte_count / 8); i++) { 91 | data[i] = ranny.Rand(); 92 | } 93 | } 94 | 95 | ub8 DoOneRun() 96 | { 97 | Random ranny; 98 | ub8 sum = 0; 99 | for (ub8 i = 0; i 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | // ------------------------------------------------------------------------------------- 14 | using namespace std; 15 | // ------------------------------------------------------------------------------------- 16 | class SequentialReader : public Worker { 17 | string nvm_file; 18 | ub8 byte_count; 19 | bool is_ram; 20 | ub8 *data; 21 | ub8 expected_sum = 0; 22 | unique_ptr nvm; 23 | vector nano_seconds; 24 | 25 | public: 26 | SequentialReader(const string &nvm_file, ub8 byte_count, bool is_ram, ub4 tid, string config) 27 | : Worker(tid, config) 28 | , byte_count(byte_count) 29 | , nvm_file(nvm_file) 30 | , is_ram(is_ram) 31 | { 32 | assert(byte_count % 8); 33 | if (byte_count % 8 != 0) { 34 | throw "byte_count % 8"; 35 | } 36 | } 37 | 38 | void Run() 39 | { 40 | Setup(); 41 | ready = true; 42 | while (!run) { 43 | this_thread::yield(); 44 | } 45 | 46 | while (!stop) { 47 | auto begin = chrono::high_resolution_clock::now(); 48 | DoOneRun(); 49 | auto end = chrono::high_resolution_clock::now(); 50 | nano_seconds.push_back(chrono::duration_cast(end - begin).count()); 51 | performed_iteration_count++; 52 | } 53 | } 54 | 55 | void PrintResultOfLastIteration(ub4 iteration) 56 | { 57 | if (!stop || iteration>=performed_iteration_count) { 58 | throw; 59 | } 60 | double ns = nano_seconds[iteration]; 61 | //@formatter:off 62 | cout << "RES " << (is_ram ? "seq_ram_reader " : "seq_nvm_reader ") << config 63 | << " tid: " << tid 64 | << " iterations: " << iteration << "/" << performed_iteration_count 65 | << " perf(gb/s): " << (byte_count / ns) << endl; 66 | //@formatter:on 67 | } 68 | 69 | SequentialReader(const SequentialReader &) = delete; 70 | SequentialReader &operator=(const SequentialReader &) = delete; 71 | 72 | private: 73 | void Setup() 74 | { 75 | Random ranny; 76 | if (is_ram) { 77 | data = (ub8 *) malloc(byte_count + 64); 78 | } else { 79 | nvm = make_unique(nvm_file, byte_count + 64); 80 | data = (ub8 *) nvm->Data(); 81 | } 82 | 83 | while ((ub8) data % 8 != 0) { 84 | data++; 85 | } 86 | assert((ub8) data % 8 == 0); 87 | 88 | for (ub8 i = 0; i> seq_ram_readers; 21 | vector> seq_nvm_readers; 22 | vector> rnd_ram_readers; 23 | vector> rnd_nvm_readers; 24 | vector> log_writers; 25 | vector> page_flushers; 26 | vector all_workers; 27 | vector> all_threads; 28 | // ------------------------------------------------------------------------------------- 29 | // Common config 30 | string NVM_PATH = ""; 31 | string CONFIG_STRING = ""; 32 | // ------------------------------------------------------------------------------------- 33 | void CreateAllThreads() 34 | { 35 | // Sequential read ram 36 | for (ub4 tid = 0; tid(NVM_PATH + "/seq_ram_reader_" + to_string(tid), SEQ_READER_BYTE_COUNT, true, tid, CONFIG_STRING)); 38 | all_threads.emplace_back(make_unique([&, tid]() { 39 | seq_ram_readers[tid]->Run(); 40 | })); 41 | all_workers.push_back(seq_ram_readers[tid].get()); 42 | } 43 | 44 | // Sequential read nvm 45 | for (ub4 tid = 0; tid(NVM_PATH + "/seq_nvm_reader_" + to_string(tid), SEQ_READER_BYTE_COUNT, false, tid, CONFIG_STRING)); 47 | all_threads.emplace_back(make_unique([&, tid]() { 48 | seq_nvm_readers[tid]->Run(); 49 | })); 50 | all_workers.push_back(seq_nvm_readers[tid].get()); 51 | } 52 | 53 | // Random read ram 54 | for (ub4 tid = 0; tid(NVM_PATH + "/rnd_ram_reader_" + to_string(tid), RND_READER_BYTE_COUNT, true, tid, CONFIG_STRING)); 56 | all_threads.emplace_back(make_unique([&, tid]() { 57 | rnd_ram_readers[tid]->Run(); 58 | })); 59 | all_workers.push_back(rnd_ram_readers[tid].get()); 60 | } 61 | 62 | // Random read nvm 63 | for (ub4 tid = 0; tid(NVM_PATH + "/rnd_nvm_reader_" + to_string(tid), RND_READER_BYTE_COUNT, false, tid, CONFIG_STRING)); 65 | all_threads.emplace_back(make_unique([&, tid]() { 66 | rnd_nvm_readers[tid]->Run(); 67 | })); 68 | all_workers.push_back(rnd_nvm_readers[tid].get()); 69 | } 70 | 71 | // Log writer 72 | for (ub4 tid = 0; tid(NVM_PATH + "/log_writer_" + to_string(tid), LOG_BYTE_COUNT, tid, CONFIG_STRING)); 74 | all_threads.emplace_back(make_unique([&, tid]() { 75 | log_writers[tid]->Run(); 76 | })); 77 | all_workers.push_back(log_writers[tid].get()); 78 | } 79 | 80 | // Page flush 81 | for (ub4 tid = 0; tid(NVM_PATH + "/page_flush_" + to_string(tid), PAGE_FLUSH_PAGE_COUNT, tid, CONFIG_STRING)); 83 | all_threads.emplace_back(make_unique([&, tid]() { 84 | page_flushers[tid]->Run(); 85 | })); 86 | all_workers.push_back(page_flushers[tid].get()); 87 | } 88 | } 89 | // ------------------------------------------------------------------------------------- 90 | void WaitForAllToGetReady() 91 | { 92 | for (auto iter : all_workers) { 93 | while (!iter->ready) { 94 | usleep(1e6); // Check every second 95 | } 96 | } 97 | } 98 | // ------------------------------------------------------------------------------------- 99 | void StartAll() 100 | { 101 | for (auto iter : all_workers) { 102 | iter->run = true; 103 | } 104 | } 105 | // ------------------------------------------------------------------------------------- 106 | void WaitForAllToPerformAtLeastInterations(ub4 iteration_count) 107 | { 108 | for (auto iter : all_workers) { 109 | while (iter->performed_iteration_countstop = true; 119 | } 120 | } 121 | // ------------------------------------------------------------------------------------- 122 | void WaitForAllToDie() 123 | { 124 | for (auto &iter : all_threads) { 125 | iter->join(); 126 | } 127 | } 128 | // ------------------------------------------------------------------------------------- 129 | void PrintAll(ub4 iteration) 130 | { 131 | for (auto iter : all_workers) { 132 | iter->PrintResultOfLastIteration(iteration); 133 | } 134 | } 135 | // ------------------------------------------------------------------------------------- 136 | int main(int argc, char **argv) 137 | { 138 | if (argc != 8) { 139 | cout << "usage: " << argv[0] << " SEQ_RAM SEQ_NVM RND_RAM RND_NVM LOG_NVM PAGE_NVM path" << endl; 140 | throw; 141 | } 142 | 143 | SEQ_RAM_THREADS = stof(argv[1]); 144 | SEQ_NVM_THREADS = stof(argv[2]); 145 | RND_RAM_THREADS = stof(argv[3]); 146 | RND_NVM_THREADS = stof(argv[4]); 147 | LOG_NVM_THREADS = stof(argv[5]); 148 | PAGE_NVM_THREADS = stof(argv[6]); 149 | NVM_PATH = argv[7]; 150 | 151 | 152 | //@formatter:off 153 | CONFIG_STRING = to_string(SEQ_RAM_THREADS) + "_" 154 | + to_string(SEQ_NVM_THREADS) + "_" 155 | + to_string(RND_RAM_THREADS) + "_" 156 | + to_string(RND_NVM_THREADS) + "_" 157 | + to_string(LOG_NVM_THREADS) + "_" 158 | + to_string(PAGE_NVM_THREADS); 159 | //@formatter:on 160 | 161 | cout << "Config:" << endl; 162 | cout << "----------------------------" << endl; 163 | cout << "SEQ_READER_BYTE_COUNT: " << SEQ_READER_BYTE_COUNT << endl; 164 | cout << "RND_READER_BYTE_COUNT: " << RND_READER_BYTE_COUNT << endl; 165 | cout << "LOG_BYTE_COUNT: " << LOG_BYTE_COUNT << endl; 166 | cout << "PAGE_FLUSH_PAGE_COUNT: " << PAGE_FLUSH_PAGE_COUNT << endl; 167 | cout << "SEQ_RAM_THREADS: " << SEQ_RAM_THREADS << endl; 168 | cout << "SEQ_NVM_THREADS: " << SEQ_NVM_THREADS << endl; 169 | cout << "RND_RAM_THREADS: " << RND_RAM_THREADS << endl; 170 | cout << "RND_NVM_THREADS: " << RND_NVM_THREADS << endl; 171 | cout << "LOG_NVM_THREADS: " << LOG_NVM_THREADS << endl; 172 | cout << "PAGE_NVM_THREADS: " << PAGE_NVM_THREADS << endl; 173 | cout << "CONFIG_STRING: " << CONFIG_STRING << endl; 174 | 175 | cout << "NVM_PATH: " << NVM_PATH << endl; 176 | #ifdef STREAMING 177 | cout << "STREAMING: " << "yes" << endl; 178 | #else 179 | cout << "STREAMING: " << "no" << endl; 180 | #endif 181 | 182 | CreateAllThreads(); 183 | WaitForAllToGetReady(); 184 | StartAll(); 185 | WaitForAllToPerformAtLeastInterations(3); // First one might be bad if not every one has started and last on might be bad because some body might finish earlier 186 | StopAll(); 187 | WaitForAllToDie(); 188 | PrintAll(1); // run id, zero-based 189 | 190 | cout << "all good :)" << endl; 191 | return 0; 192 | } 193 | // ------------------------------------------------------------------------------------- 194 | -------------------------------------------------------------------------------- /latency/read_latency.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | 18 | uint64_t rdtsc() 19 | { 20 | uint32_t hi, lo; 21 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 22 | return static_cast(lo) | (static_cast(hi) << 32); 23 | } 24 | 25 | #include 26 | 27 | static inline double gettime(void) 28 | { 29 | struct timeval now_tv; 30 | gettimeofday(&now_tv, NULL); 31 | return ((double) now_tv.tv_sec) + ((double) now_tv.tv_usec) / 1000000.0; 32 | } 33 | 34 | uint64_t COUNT; // In number of uint64_t 35 | uint64_t SIZE; // In byte 36 | uint64_t *v; 37 | atomic go(0); 38 | uintptr_t REPETITIONS; // Number of lookups in total (we report performance every STEP times) 39 | const uint64_t STEP = 10e7; 40 | const char* PATH; 41 | bool USE_RAM; 42 | 43 | static void *readThread(void *arg) 44 | { 45 | while (!go); 46 | uintptr_t threadNum = reinterpret_cast(arg); 47 | 48 | cpu_set_t cpuset; 49 | CPU_ZERO(&cpuset); 50 | CPU_SET(threadNum, &cpuset); 51 | pthread_t currentThread = pthread_self(); 52 | if (pthread_setaffinity_np(currentThread, sizeof(cpu_set_t), &cpuset) != 0) 53 | throw; 54 | 55 | auto total_start = chrono::high_resolution_clock::now(); 56 | 57 | uint64_t x = 0; 58 | uint64_t run = 0; 59 | for (uint64_t i = 0; i> 15); 94 | return (seed ^= (seed << 5)); 95 | } 96 | 97 | uint64_t seed; 98 | }; 99 | 100 | uint64_t *CreateRandomCycle(bool ram) 101 | { 102 | uint64_t *helper; 103 | uint64_t *result; 104 | 105 | { 106 | auto start = chrono::high_resolution_clock::now(); 107 | cout << "init " << flush; 108 | helper = new uint64_t[COUNT]; 109 | for (uint64_t i = 0; i(end - start).count(); 118 | cout << "(" << s << ")" << endl; 119 | } 120 | 121 | { 122 | auto start = chrono::high_resolution_clock::now(); 123 | cout << "shuffle " << flush; 124 | FastRandom ranny; 125 | for (uint64_t i = 0; i(end - start).count(); 135 | cout << "(" << s << ")" << endl; 136 | } 137 | 138 | { 139 | auto start = chrono::high_resolution_clock::now(); 140 | cout << "cycle " << flush; 141 | if (ram) { 142 | result = new uint64_t[COUNT]; 143 | } else { 144 | int fd = open(PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 145 | int td = ftruncate(fd, COUNT * 8); 146 | if (fd<0 || td<0) { 147 | cout << "unable to create file" << endl; 148 | exit(-1); 149 | } 150 | result = (uint64_t *) mmap(nullptr, COUNT * 8, PROT_WRITE, MAP_SHARED, fd, 0); 151 | } 152 | for (uint64_t i = 0; i(end - start).count(); 161 | cout << "(" << s << ")" << endl; 162 | } 163 | 164 | return result; 165 | } 166 | 167 | // clang++ -g0 -O3 -march=native -std=c++14 read_latency.cpp -pthread && ./a.out 1 1e9 1e9 ram /mnt/pmem0/renen/file_0 168 | int main(int argc, char **argv) 169 | { 170 | if (argc != 6) { 171 | cout << "usage: " << argv[0] << " thread_count datasize(byte) repetitions (nvm|ram) path" << endl; 172 | throw; 173 | } 174 | 175 | unsigned threadCount = atoi(argv[1]); 176 | SIZE = atof(argv[2]); 177 | COUNT = SIZE / 8; 178 | REPETITIONS = atof(argv[3]); 179 | USE_RAM = argv[4][0] == 'r'; 180 | PATH = argv[5]; 181 | 182 | cout << "Config: thread_count=" << threadCount << " repetitions=" << REPETITIONS << " use_ram=" << USE_RAM << " size=" << SIZE << " path=" << PATH << endl; 183 | 184 | v = CreateRandomCycle(USE_RAM); 185 | cout << "starting .." << endl; 186 | 187 | pthread_t threads[threadCount]; 188 | for (unsigned i = 0; i(i)); 190 | } 191 | 192 | uint64_t times[threadCount]; 193 | auto start = chrono::high_resolution_clock::now(); 194 | go = 1; 195 | for (unsigned i = 0; i(end - start).count(); 203 | double gb = ((threadCount * 64 * REPETITIONS) / (1000.0 * 1000.0 * 1000.0)); 204 | double gbs = gb / ns * 1e9; 205 | double latency = ns / REPETITIONS; 206 | 207 | // @formatter:off 208 | cout << "res:" 209 | << " thread_count=" << threadCount 210 | << " repetitions=" << REPETITIONS 211 | << " use_ram=" << USE_RAM 212 | << " size=" << SIZE 213 | << " throughput(GB/s) " << gbs 214 | << " latency(ns) " << latency 215 | << endl; 216 | // @formatter:on 217 | 218 | return 0; 219 | } 220 | -------------------------------------------------------------------------------- /latency/read_latency_alex.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | 18 | inline void Clwb(void *addr) 19 | { 20 | #ifdef NOCLWB 21 | (void) addr; 22 | #else 23 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *) addr)); 24 | #endif 25 | } 26 | 27 | uint64_t rdtsc() 28 | { 29 | uint32_t hi, lo; 30 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 31 | return static_cast(lo) | (static_cast(hi) << 32); 32 | } 33 | 34 | inline void SFence() 35 | { 36 | _mm_mfence(); 37 | } 38 | 39 | #include 40 | 41 | static inline double gettime(void) 42 | { 43 | struct timeval now_tv; 44 | gettimeofday(&now_tv, NULL); 45 | return ((double) now_tv.tv_sec) + ((double) now_tv.tv_usec) / 1000000.0; 46 | } 47 | 48 | uint64_t COUNT; // In number of uint64_t 49 | uint64_t SIZE; // In byte 50 | uint64_t *v; 51 | atomic go(0); 52 | const char *PATH; 53 | bool USE_RAM; 54 | 55 | static void *readThread(void *arg) 56 | { 57 | while (!go); 58 | uintptr_t threadNum = reinterpret_cast(arg); 59 | 60 | uint64_t x = 0; 61 | auto start = chrono::high_resolution_clock::now(); 62 | for (uint64_t i = 0; i(end - start).count(); 71 | 72 | cout << threadNum << ": " << (ns / (double) COUNT) << " ns/rep (result=" << x << ")" << endl; 73 | return (void *) (ns / COUNT); 74 | } 75 | 76 | class FastRandom { 77 | public: 78 | explicit FastRandom(uint64_t seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3 79 | : seed(seed) {} 80 | 81 | uint64_t Next() 82 | { 83 | seed ^= (seed << 13); 84 | seed ^= (seed >> 15); 85 | return (seed ^= (seed << 5)); 86 | } 87 | 88 | uint64_t seed; 89 | }; 90 | 91 | uint64_t *CreateRandomCycle(bool ram) 92 | { 93 | uint64_t *helper; 94 | uint64_t *result; 95 | 96 | { 97 | auto start = chrono::high_resolution_clock::now(); 98 | cout << "init " << flush; 99 | helper = new uint64_t[COUNT]; 100 | for (uint64_t i = 0; i(end - start).count(); 109 | cout << "(" << s << ")" << endl; 110 | } 111 | 112 | { 113 | auto start = chrono::high_resolution_clock::now(); 114 | cout << "shuffle " << flush; 115 | FastRandom ranny; 116 | for (uint64_t i = 0; i(end - start).count(); 126 | cout << "(" << s << ")" << endl; 127 | } 128 | 129 | { 130 | auto start = chrono::high_resolution_clock::now(); 131 | cout << "cycle " << flush; 132 | if (ram) { 133 | result = new uint64_t[COUNT]; 134 | } else { 135 | int fd = open(PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 136 | int td = ftruncate(fd, COUNT * 8); 137 | if (fd<0 || td<0) { 138 | cout << "unable to create file" << endl; 139 | exit(-1); 140 | } 141 | result = (uint64_t *) mmap(nullptr, COUNT * 8, PROT_WRITE, MAP_SHARED, fd, 0); 142 | } 143 | for (uint64_t i = 0; i(end - start).count(); 152 | cout << "(" << s << ")" << endl; 153 | } 154 | 155 | return result; 156 | } 157 | 158 | // clang++ -g0 -O3 -march=native -std=c++14 read_latency.cpp -pthread && ./a.out 1 1e9 1e9 ram /mnt/pmem0/renen/file_0 159 | int main(int argc, char **argv) 160 | { 161 | if (argc != 5) { 162 | cout << "usage: " << argv[0] << " thread_count datasize(byte) (nvm|ram) path" << endl; 163 | throw; 164 | } 165 | 166 | unsigned threadCount = atoi(argv[1]); 167 | SIZE = atof(argv[2]); 168 | COUNT = SIZE / 8; 169 | USE_RAM = argv[3][0] == 'r'; 170 | PATH = argv[4]; 171 | 172 | cout << "Config: thread_count=" << threadCount << " use_ram=" << USE_RAM << " size=" << SIZE << " path=" << PATH << endl; 173 | 174 | v = CreateRandomCycle(USE_RAM); 175 | cout << "starting .." << endl; 176 | 177 | pthread_t threads[threadCount]; 178 | for (unsigned i = 0; i(i)); 180 | } 181 | 182 | uint64_t times[threadCount]; 183 | auto start = chrono::high_resolution_clock::now(); 184 | go = 1; 185 | for (unsigned i = 0; i(end - start).count(); 200 | double gb = ((threadCount * 64 * COUNT) / (1000.0 * 1000.0 * 1000.0)); 201 | double gbs = gb / ns * 1e9; 202 | double latency = ns / COUNT; 203 | 204 | // @formatter:off 205 | cout << "res:" 206 | << " thread_count=" << threadCount 207 | << " use_ram=" << USE_RAM 208 | << " size=" << SIZE 209 | << " throughput(GB/s) " << gbs 210 | << " latency(ns) " << latency 211 | << endl; 212 | // @formatter:on 213 | 214 | return 0; 215 | } 216 | -------------------------------------------------------------------------------- /latency/write_latency.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace std; 26 | 27 | #define _mm_clflush(addr)\ 28 | asm volatile("clflush %0" : "+m" (*(volatile char *)addr)); 29 | #define _mm_clflushopt(addr)\ 30 | asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr)); 31 | #define _mm_clwb(addr)\ 32 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr)); 33 | #define _mm_pcommit()\ 34 | asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); 35 | 36 | enum struct Type { 37 | RANDOM_CL, 38 | SEQUENTIAL_CL, 39 | SAME_CL 40 | }; 41 | 42 | Type GetClWriteType(const string &str) 43 | { 44 | if (str == "single") 45 | return Type::SAME_CL; 46 | if (str == "sequential") 47 | return Type::SEQUENTIAL_CL; 48 | if (str == "random") { 49 | return Type::RANDOM_CL; 50 | } 51 | cout << "unknown benchmark type: '" << str << "'" << endl; 52 | throw; 53 | } 54 | 55 | std::string GetStringFromType(Type benchmark_type) 56 | { 57 | switch (benchmark_type) { 58 | case Type::RANDOM_CL: { 59 | return "RANDOM_CL"; 60 | } 61 | case Type::SEQUENTIAL_CL: { 62 | return "SEQUENTIAL_CL"; 63 | } 64 | case Type::SAME_CL: { 65 | return "SAME_CL"; 66 | } 67 | } 68 | cout << "unkown type" << endl; 69 | throw; 70 | } 71 | 72 | // clang++ -g0 -O3 -march=native -std=c++14 write_latency.cpp -pthread -DFLUSH=1 && ./a.out ram sequential 1e9 /mnt/pmem0/renen/file_0 73 | int main(int argc, char **argv) 74 | { 75 | if (argc != 5) { 76 | cout << "usage: " << argv[0] << " (nvm|ram) (single|sequential|random) size(byte) path" << endl; 77 | throw; 78 | } 79 | bool use_flush = false; 80 | bool use_flush_opt = false; 81 | bool use_clwb = false; 82 | bool use_streaming = false; 83 | int technique_count = 0; 84 | #ifdef FLUSH 85 | use_flush = true; 86 | technique_count++; 87 | #endif 88 | #ifdef FLUSH_OPT 89 | use_flush_opt = true; 90 | technique_count++; 91 | #endif 92 | #ifdef CLWB 93 | use_clwb= true; 94 | technique_count++; 95 | #endif 96 | #ifdef STREAMING 97 | use_streaming = true; 98 | technique_count++; 99 | #endif 100 | if (technique_count != 1) { 101 | cout << "Need to specify exactly one flush technique" << endl; 102 | throw; 103 | } 104 | const bool use_ram = argv[1][0] == 'r'; 105 | const Type benchmark_type = GetClWriteType(argv[2]); 106 | const uint64_t size = atof(argv[3]); 107 | const string path = argv[4]; 108 | 109 | cout << "use_flush: " << (use_flush ? "yes" : "no") << endl; 110 | cout << "use_flush_opt: " << (use_flush_opt ? "yes" : "no") << endl; 111 | cout << "use_clwb: " << (use_clwb ? "yes" : "no") << endl; 112 | cout << "use_streaming: " << (use_streaming ? "yes" : "no") << endl; 113 | cout << "use_ram: " << (use_ram ? "yes" : "no") << endl; 114 | cout << "benchmark_type: " << GetStringFromType(benchmark_type) << endl; 115 | cout << "size: " << size << endl; 116 | 117 | uint8_t *keys; 118 | if (use_ram) { 119 | keys = new uint8_t[size + 64]; 120 | while (((uint64_t) keys) % 64 != 0) // Align to 64 byte ;p 121 | keys++; 122 | } else { 123 | int fd = open(path.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 124 | int td = ftruncate(fd, size); 125 | if (fd<0 || td<0) { 126 | cout << "unable to create file" << endl; 127 | exit(-1); 128 | } 129 | keys = (uint8_t *) mmap(nullptr, size, PROT_WRITE, MAP_SHARED, fd, 0); 130 | } 131 | 132 | uint8_t write_data[64] = {0xaa}; 133 | __m512i write_data_vec = _mm512_stream_load_si512(write_data); 134 | 135 | vector write_offsets(size / 64); 136 | switch (benchmark_type) { 137 | case Type::RANDOM_CL: { 138 | for (uint64_t i = 0; i(end - begin).count(); 189 | uint64_t latency = nano_seconds / iteration_count; 190 | 191 | // @formatter:off 192 | std::cout << "res " 193 | << " use_ram: " << use_ram 194 | << " size: " << size 195 | << " benchmark_type: " << GetStringFromType(benchmark_type) 196 | << " use_flush: " << use_flush 197 | << " use_flush_opt: " << use_flush_opt 198 | << " use_clwb: " << use_clwb 199 | << " use_streaming: " << use_streaming 200 | << " latency(ns): " << latency << std::endl; 201 | // @formatter:on 202 | 203 | return 0; 204 | } 205 | 206 | -------------------------------------------------------------------------------- /logging/Common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "libpmem.h" 8 | // ------------------------------------------------------------------------------------- 9 | #define a_mm_clflush(addr)\ 10 | asm volatile("clflush %0" : "+m" (*(volatile char *)addr)); 11 | #define a_mm_clflushopt(addr)\ 12 | asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr)); 13 | #define a_mm_clwb(addr)\ 14 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr)); 15 | #define a_mm_pcommit()\ 16 | asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); 17 | // ------------------------------------------------------------------------------------- 18 | using ub1 = uint8_t; 19 | using ub2 = uint16_t; 20 | using ub4 = uint32_t; 21 | using ub8 = uint64_t; 22 | // ------------------------------------------------------------------------------------- 23 | using sb1 = int8_t; 24 | using sb2 = int16_t; 25 | using sb4 = int32_t; 26 | using sb8 = int64_t; 27 | // ------------------------------------------------------------------------------------- 28 | namespace constants { 29 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines 30 | } 31 | // ------------------------------------------------------------------------------------- 32 | inline void alex_WriteBack(void *addr, ub4 len) 33 | { 34 | for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) { 35 | a_mm_clwb((char *) uptr); 36 | } 37 | } 38 | // ------------------------------------------------------------------------------------- 39 | inline void alex_WriteBack(void *addr) 40 | { 41 | addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1)); 42 | a_mm_clwb((char *) addr); 43 | } 44 | // ------------------------------------------------------------------------------------- 45 | inline void alex_FlushOpt(void *addr, ub4 len) 46 | { 47 | for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) { 48 | a_mm_clflushopt((char *) uptr); 49 | } 50 | } 51 | // ------------------------------------------------------------------------------------- 52 | inline void alex_FlushOpt(void *addr) 53 | { 54 | a_mm_clflushopt((char *) addr); 55 | } 56 | // ------------------------------------------------------------------------------------- 57 | inline void alex_SFence() 58 | { 59 | _mm_sfence(); 60 | } 61 | // ------------------------------------------------------------------------------------- 62 | inline ub8 alex_PopCount(ub8 value) 63 | { 64 | return _mm_popcnt_u64(value); 65 | } 66 | // ------------------------------------------------------------------------------------- 67 | inline void alex_FlushClToNvm(void *dest, void *src) 68 | { 69 | assert(((ub8) dest) % 64 == 0); 70 | assert(((ub8) src) % 64 == 0); 71 | __m512i reg = _mm512_load_si512(src); 72 | _mm512_stream_si512((__m512i *) dest, reg); 73 | } 74 | // ------------------------------------------------------------------------------------- 75 | void FastCopy512(ub1 *dest, const ub1 *src) 76 | { 77 | assert(((ub8) dest) % 64 == 0); 78 | memcpy(dest, src, 64); 79 | } 80 | // ------------------------------------------------------------------------------------- 81 | ub4 FastPopCount512(const ub1 *ptr) 82 | { 83 | ub4 res = 0; 84 | for (ub4 i = 0; i<64; i += 8) { 85 | res += alex_PopCount(*(ub8 *) (&ptr[i])); 86 | } 87 | return res; 88 | } 89 | // ------------------------------------------------------------------------------------- 90 | void FastCopy512Simd(ub1 *dest, const ub1 *src) 91 | { 92 | assert(((ub8) dest) % 64 == 0); 93 | __m512i reg = _mm512_loadu_si512(src); 94 | _mm512_store_si512((__m512i *) dest, reg); 95 | } 96 | // ------------------------------------------------------------------------------------- 97 | void alex_FastCopyAndWriteBack(ub1 *nvm_begin, const ub1 *ram_begin, ub4 size) 98 | { 99 | assert(((ub8) nvm_begin) % 64 == 0); // nvm needs to be aligned 100 | 101 | // Copy full cache lines (and flush) 102 | ub4 pos = 0; 103 | for (; pos + 63(result); 126 | } 127 | // ------------------------------------------------------------------------------------- -------------------------------------------------------------------------------- /logging/LW_Classic.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterClassic { 2 | 3 | struct Entry { 4 | ub8 payload_size; // header 5 | ub1 data[]; 6 | // footer: ub8 start; 7 | }; 8 | 9 | struct File { 10 | // Header 11 | ub1 padding[constants::kCacheLineByteCount]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | ub8 next_free; 21 | 22 | LogWriterClassic(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | } 28 | 29 | ub8 AddLogEntry(const Entry &entry) 30 | { 31 | assert(next_free % 8 == 0); 32 | assert(entry.payload_size % 8 == 0); 33 | assert(sizeof(File) + next_free + entry.payload_size + 16(footer_begin) = next_free; 42 | pmem_persist(footer_begin, 8); 43 | 44 | // Advance and done 45 | next_free += entry.payload_size + 16; 46 | assert(next_free % 8 == 0); 47 | return next_free; 48 | } 49 | 50 | ub8 GetWrittenByteCount() const 51 | { 52 | return next_free + sizeof(File); 53 | } 54 | 55 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 56 | { 57 | ub8 current = 0; 58 | ub8 used_size = 0; 59 | 60 | vectorentries; 61 | while (used_sizepayload_size = payload_size; 66 | entries.push_back(entry); 67 | current += payload_size + 16; 68 | used_size += payload_size; 69 | } 70 | return entries; 71 | } 72 | }; -------------------------------------------------------------------------------- /logging/LW_ClassicAligned.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterClassicAligned { 2 | 3 | struct Entry { 4 | ub8 payload_size; // header 5 | ub1 data[]; 6 | // footer: ub8 start; 7 | }; 8 | 9 | struct File { 10 | // Header 11 | ub1 padding[constants::kCacheLineByteCount]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | ub8 next_free; 21 | 22 | LogWriterClassicAligned(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | } 28 | 29 | ub8 AddLogEntry(const Entry &entry) 30 | { 31 | assert(next_free % 8 == 0); 32 | assert(entry.payload_size % 8 == 0); 33 | assert(sizeof(File) + next_free + entry.payload_size + 16(&entry); 38 | alex_FastCopyAndWriteBack(entry_begin, ram_bgein, entry.payload_size + 8); 39 | alex_SFence(); 40 | next_free = (next_free + entry.payload_size + 8 + 63) & ~63ull; 41 | 42 | // Copy the footer 43 | ub1 *footer_begin = file.data + next_free; 44 | *reinterpret_cast(footer_begin) = next_free; 45 | alex_WriteBack(footer_begin); 46 | alex_SFence(); 47 | 48 | // Advance and done 49 | next_free += 64; 50 | assert(next_free % 8 == 0); 51 | return next_free; 52 | } 53 | 54 | ub8 GetWrittenByteCount() const 55 | { 56 | return next_free + sizeof(File); 57 | } 58 | 59 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 60 | { 61 | ub8 current = 0; 62 | ub8 used_size = 0; 63 | 64 | vectorentries; 65 | while (used_sizepayload_size = payload_size; 70 | entries.push_back(entry); 71 | current += payload_size + 16; 72 | used_size += payload_size; 73 | } 74 | return entries; 75 | } 76 | }; -------------------------------------------------------------------------------- /logging/LW_ClassicCached.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | struct LogWriterClassicCached { 5 | 6 | struct Entry { 7 | ub8 payload_size; // header 8 | ub1 data[]; 9 | // footer: ub8 start; 10 | }; 11 | 12 | struct File { 13 | // Header 14 | ub1 padding[constants::kCacheLineByteCount]; 15 | 16 | // Log data 17 | ub1 data[]; 18 | }; 19 | static_assert(sizeof(File) == 64, ""); 20 | 21 | NonVolatileMemory &nvm; 22 | File &file; // == nvm 23 | ub8 next_free; 24 | ub8 *cl_buffer; 25 | ub4 cl_offset = 0; 26 | 27 | LogWriterClassicCached(NonVolatileMemory &nvm) 28 | : nvm(nvm) 29 | , file(*reinterpret_cast(nvm.Data())) 30 | { 31 | next_free = 0; 32 | cl_buffer = reinterpret_cast(operator new(128)); 33 | while (((ub8) cl_buffer) % 64 != 0) { 34 | cl_buffer++; 35 | } 36 | memset(cl_buffer, 0, 64); 37 | } 38 | 39 | ub8 AddLogEntry(const Entry &entry) 40 | { 41 | assert(next_free % 8 == 0); 42 | assert(entry.payload_size % 8 == 0); 43 | assert(sizeof(File) + next_free + entry.payload_size + 16(&entry); 50 | for (ub4 i = 0; i0) { 60 | alex_FlushClToNvm(nvm, cl_buffer); 61 | } 62 | alex_SFence(); 63 | 64 | // Copy the footer 65 | cl_buffer[cl_offset] = next_free; 66 | cl_offset++; 67 | alex_FlushClToNvm(nvm, cl_buffer); 68 | if (cl_offset == 8) { 69 | cl_offset = 0; 70 | nvm += 64; 71 | } 72 | alex_SFence(); 73 | 74 | // Advance and done 75 | next_free += entry.payload_size + 16; 76 | assert(next_free % 8 == 0); 77 | assert((ub8)nvm == (ub8(file.data + next_free) & ~ub8(63))); 78 | return next_free; 79 | } 80 | 81 | ub8 GetWrittenByteCount() const 82 | { 83 | return next_free + sizeof(File); 84 | } 85 | 86 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 87 | { 88 | ub8 current = 0; 89 | ub8 used_size = 0; 90 | 91 | vectorentries; 92 | while (used_sizepayload_size = payload_size; 97 | entries.push_back(entry); 98 | current += payload_size + 16; 99 | used_size += payload_size; 100 | } 101 | return entries; 102 | } 103 | }; -------------------------------------------------------------------------------- /logging/LW_Header.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterHeader { 2 | 3 | struct Entry { 4 | ub8 payload_size; 5 | ub1 data[]; 6 | }; 7 | 8 | struct File { 9 | // Header 10 | ub8 next_free; 11 | ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | 21 | LogWriterHeader(NonVolatileMemory &nvm) 22 | : nvm(nvm) 23 | , file(*reinterpret_cast(nvm.Data())) 24 | { 25 | file.next_free = 0; 26 | } 27 | 28 | ub8 AddLogEntry(const Entry &entry) 29 | { 30 | assert(file.next_free % 8 == 0); 31 | assert(entry.payload_size % 8 == 0); 32 | assert(sizeof(File) + file.next_free + entry.payload_size + 8 CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 52 | { 53 | ub8 current = 0; 54 | ub8 used_size = 0; 55 | 56 | vectorentries; 57 | while (used_sizepayload_size = payload_size; 62 | entries.push_back(entry); 63 | current += payload_size + 8; 64 | used_size += payload_size; 65 | } 66 | return entries; 67 | } 68 | }; -------------------------------------------------------------------------------- /logging/LW_HeaderAligned.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterHeaderAligned { 2 | 3 | struct Entry { 4 | ub8 payload_size; 5 | ub1 data[]; 6 | }; 7 | 8 | struct File { 9 | // Header 10 | ub8 next_free; 11 | ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | ub8 next_free; 18 | 19 | NonVolatileMemory &nvm; 20 | File &file; // == nvm 21 | 22 | LogWriterHeaderAligned(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | file.next_free = 0; 27 | next_free = 0; 28 | } 29 | 30 | ub8 AddLogEntry(const Entry &entry) 31 | { 32 | assert(entry.payload_size % 8 == 0); 33 | assert(next_free % 64 == 0); 34 | assert(sizeof(File) + next_free + entry.payload_size + 8(&entry); 40 | alex_FastCopyAndWriteBack(entry_begin, ram_begin, size); 41 | alex_SFence(); 42 | 43 | // Update the header 44 | next_free += size; 45 | next_free = (next_free + 63) & ~63ull; // 64 byte aligned 46 | file.next_free = next_free; 47 | alex_WriteBack((void *) &file); 48 | alex_SFence(); 49 | 50 | assert(next_free % 64 == 0); 51 | return next_free; 52 | } 53 | 54 | ub8 GetWrittenByteCount() const 55 | { 56 | return file.next_free + sizeof(File); 57 | } 58 | 59 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 60 | { 61 | ub8 current = 0; 62 | ub8 used_size = 0; 63 | 64 | vector entries; 65 | while (used_sizepayload_size = payload_size; 70 | entries.push_back(entry); 71 | current += payload_size + 8; 72 | used_size += payload_size; 73 | } 74 | return entries; 75 | } 76 | }; -------------------------------------------------------------------------------- /logging/LW_HeaderAlignedDancing.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterHeaderAlignedDancing { 2 | 3 | struct Entry { 4 | ub8 payload_size; 5 | ub1 data[]; 6 | }; 7 | 8 | struct Header { 9 | ub8 next_free; 10 | ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)]; 11 | }; 12 | 13 | static const ub4 HEADER_COUNT = 64; 14 | 15 | struct File { 16 | // Header 17 | Header headers[HEADER_COUNT]; 18 | 19 | // Log data 20 | ub1 data[]; 21 | }; 22 | static_assert(sizeof(File) == 64 * HEADER_COUNT, ""); 23 | ub8 next_free; 24 | ub8 next_header; 25 | 26 | NonVolatileMemory &nvm; 27 | File &file; // == nvm 28 | 29 | LogWriterHeaderAlignedDancing(NonVolatileMemory &nvm) 30 | : nvm(nvm) 31 | , file(*reinterpret_cast(nvm.Data())) 32 | { 33 | for (ub4 i = 0; i(&entry); 50 | alex_FastCopyAndWriteBack(entry_begin, ram_begin, size); 51 | alex_SFence(); 52 | 53 | // Update the header 54 | next_free += size; 55 | next_free = (next_free + 63) & ~63ull; // 64 byte aligned 56 | file.headers[next_header].next_free = next_free; 57 | alex_WriteBack((void *) &file.headers[next_header]); 58 | alex_SFence(); 59 | next_header = (next_header + 1) % HEADER_COUNT; 60 | 61 | assert(next_free % 64 == 0); 62 | return next_free; 63 | } 64 | 65 | ub8 GetWrittenByteCount() const 66 | { 67 | return next_free + sizeof(File); 68 | } 69 | 70 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 71 | { 72 | ub8 current = 0; 73 | ub8 used_size = 0; 74 | 75 | vectorentries; 76 | while (used_sizepayload_size = payload_size; 81 | entries.push_back(entry); 82 | current += payload_size + 8; 83 | used_size += payload_size; 84 | } 85 | return entries; 86 | } 87 | }; -------------------------------------------------------------------------------- /logging/LW_HeaderDancing.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterHeaderDancing { 2 | 3 | struct Entry { 4 | ub8 payload_size; 5 | ub1 data[]; 6 | }; 7 | 8 | struct Header { 9 | ub8 next_free; 10 | ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)]; 11 | }; 12 | 13 | static const ub4 HEADER_COUNT = 64; 14 | 15 | struct File { 16 | // Header 17 | Header headers[HEADER_COUNT]; 18 | 19 | // Log data 20 | ub1 data[]; 21 | }; 22 | static_assert(sizeof(File) == 64 * HEADER_COUNT, ""); 23 | ub8 next_free; 24 | ub8 next_header; 25 | 26 | NonVolatileMemory &nvm; 27 | File &file; // == nvm 28 | 29 | LogWriterHeaderDancing(NonVolatileMemory &nvm) 30 | : nvm(nvm) 31 | , file(*reinterpret_cast(nvm.Data())) 32 | { 33 | for (ub4 i = 0; i CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 68 | { 69 | ub8 current = 0; 70 | ub8 used_size = 0; 71 | 72 | vectorentries; 73 | while (used_sizepayload_size = payload_size; 78 | entries.push_back(entry); 79 | current += payload_size + 8; 80 | used_size += payload_size; 81 | } 82 | return entries; 83 | } 84 | }; -------------------------------------------------------------------------------- /logging/LW_Mnemosyne.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterMnemosyne { 2 | 3 | struct Entry { 4 | ub8 payload_size; 5 | ub1 data[]; 6 | }; 7 | 8 | struct File { 9 | // Header 10 | ub1 padding[constants::kCacheLineByteCount]; 11 | 12 | // Log data 13 | ub1 data[]; 14 | }; 15 | static_assert(sizeof(File) == 64, ""); 16 | 17 | NonVolatileMemory &nvm; 18 | File &file; // == nvm 19 | ub8 next_free; 20 | ub8 log_read_offset; 21 | 22 | LogWriterMnemosyne(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | log_read_offset = 0; 28 | } 29 | 30 | ub8 AddLogEntry(const Entry &entry) 31 | { 32 | assert(next_free + entry.payload_size + 80 && entry.payload_size % 8 == 0); 34 | 35 | // Write length of log entry 36 | (uint64_t &) file.data[next_free] = entry.payload_size | 0x1; 37 | next_free += 8; 38 | 39 | // Write payload of log entry 40 | uint64_t buffer = 0; 41 | uint64_t checker = 0; 42 | 43 | uint32_t i = 0; 44 | for (i = 0; i0 && (i + 8) % 504 == 0) { 61 | // cout << "flush checker" << endl; 62 | assert(checker == 63); 63 | buffer = (buffer << 1) | 0x1; 64 | *((uint64_t *) &file.data[next_free]) = buffer; 65 | next_free += 8; 66 | checker = 0; 67 | buffer = 0; 68 | 69 | // Flush cache line every cache line 70 | if (next_free % 64 == 0) { 71 | alex_WriteBack(file.data + next_free - 8); 72 | } 73 | } 74 | } 75 | 76 | if (i % 504 != 0) { 77 | assert(checker>0); 78 | // cout << "flush tail checker: " << buffer << " @" << next_free << endl; 79 | 80 | buffer = (buffer << 1) | 0x1; 81 | *((uint64_t *) &file.data[next_free]) = buffer; 82 | next_free += 8; 83 | } 84 | alex_WriteBack(file.data + next_free - 8); 85 | alex_SFence(); 86 | 87 | // Advance and done 88 | return next_free; 89 | } 90 | 91 | unique_ptr GetNextLogEntry() // Read code is only to verify correctnes 92 | { 93 | if (log_read_offset == next_free) { 94 | return nullptr; 95 | } 96 | 97 | // Read length 98 | uint64_t len = *((uint64_t *) &file.data[log_read_offset]) & ~(0x1ull); 99 | log_read_offset += 8; 100 | 101 | // cout << "got len: " << len << endl; 102 | 103 | uint64_t checker = 0; 104 | 105 | vector result(len); 106 | uint32_t i = 0; 107 | for (; i0 && (i + 8) % 504 == 0) { 114 | assert(checker == 63); 115 | checker = 0; 116 | // cout << "apply checker" << endl; 117 | 118 | uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1; 119 | log_read_offset += 8; 120 | for (uint32_t c = 0; c<63; c++) { 121 | *((uint64_t *) &result[i - c * 8]) |= buffer & 0x1; 122 | buffer = buffer >> 1; 123 | } 124 | } 125 | } 126 | 127 | if (i % 504 != 0) { 128 | i -= 8; 129 | assert(checker>0); 130 | 131 | uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1; 132 | // cout << "apply tail checker " << buffer << " @" << log_read_offset << endl; 133 | 134 | log_read_offset += 8; 135 | for (uint32_t c = 0; c> 1; 138 | } 139 | checker = 0; 140 | } 141 | 142 | Entry *entry = new(malloc(sizeof(Entry) + result.size())) Entry(); 143 | entry->payload_size = result.size(); 144 | memcpy(entry->data, result.data(), result.size()); 145 | return unique_ptr(entry); 146 | } 147 | 148 | ub8 GetWrittenByteCount() const 149 | { 150 | return next_free + sizeof(File); 151 | } 152 | 153 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 154 | { 155 | ub8 current = 0; 156 | ub8 used_size = 0; 157 | 158 | vector entries; 159 | while (used_sizepayload_size = payload_size; 164 | entries.push_back(entry); 165 | current += payload_size + 8; 166 | used_size += payload_size; 167 | } 168 | return entries; 169 | } 170 | }; -------------------------------------------------------------------------------- /logging/LW_MnemosyneAligned.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterMnemosyneAligned { 2 | 3 | struct Entry { 4 | ub8 payload_size; 5 | ub1 data[]; 6 | }; 7 | 8 | struct File { 9 | // Header 10 | ub1 padding[constants::kCacheLineByteCount]; 11 | 12 | // Log data 13 | ub1 data[]; 14 | }; 15 | static_assert(sizeof(File) == 64, ""); 16 | 17 | NonVolatileMemory &nvm; 18 | File &file; // == nvm 19 | ub8 next_free; 20 | ub8 log_read_offset; 21 | 22 | LogWriterMnemosyneAligned(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | log_read_offset = 0; 28 | } 29 | 30 | ub8 AddLogEntry(const Entry &entry) 31 | { 32 | assert(next_free + entry.payload_size + 80 && entry.payload_size % 8 == 0); 34 | assert((uint64_t) &file.data[next_free] % 64 == 0); 35 | assert(next_free % 64 == 0); 36 | 37 | // Write length of log entry 38 | (uint64_t &) file.data[next_free] = entry.payload_size | 0x1; 39 | next_free += 8; 40 | 41 | // Write payload of log entry 42 | uint64_t buffer = 0; 43 | uint64_t checker = 0; 44 | 45 | uint32_t i = 0; 46 | for (i = 0; i0 && (i + 8) % 504 == 0) { 63 | // cout << "flush checker" << endl; 64 | assert(checker == 63); 65 | buffer = (buffer << 1) | 0x1; 66 | *((uint64_t *) &file.data[next_free]) = buffer; 67 | next_free += 8; 68 | checker = 0; 69 | buffer = 0; 70 | 71 | // Flush cache line every cache line 72 | if (next_free % 64 == 0) { 73 | alex_WriteBack(file.data + next_free - 8); 74 | } 75 | } 76 | } 77 | 78 | if (i % 504 != 0) { 79 | assert(checker>0); 80 | // cout << "flush tail checker: " << buffer << " @" << next_free << endl; 81 | 82 | buffer = (buffer << 1) | 0x1; 83 | *((uint64_t *) &file.data[next_free]) = buffer; 84 | next_free += 8; 85 | } 86 | alex_WriteBack(file.data + next_free - 8); 87 | alex_SFence(); 88 | 89 | // Advance and done 90 | next_free = (next_free + 63) & ~63ull; 91 | assert(next_free % 64 == 0); 92 | return next_free; 93 | } 94 | 95 | unique_ptr GetNextLogEntry() // Read code is only to verify correctnes 96 | { 97 | if (log_read_offset == next_free) { 98 | return nullptr; 99 | } 100 | 101 | // Read length 102 | uint64_t len = *((uint64_t *) &file.data[log_read_offset]) & ~(0x1ull); 103 | log_read_offset += 8; 104 | 105 | // cout << "got len: " << len << endl; 106 | 107 | uint64_t checker = 0; 108 | 109 | vector result(len); 110 | uint32_t i = 0; 111 | for (; i0 && (i + 8) % 504 == 0) { 118 | assert(checker == 63); 119 | checker = 0; 120 | // cout << "apply checker" << endl; 121 | 122 | uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1; 123 | log_read_offset += 8; 124 | for (uint32_t c = 0; c<63; c++) { 125 | *((uint64_t *) &result[i - c * 8]) |= buffer & 0x1; 126 | buffer = buffer >> 1; 127 | } 128 | } 129 | } 130 | 131 | if (i % 504 != 0) { 132 | i -= 8; 133 | assert(checker>0); 134 | 135 | uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1; 136 | // cout << "apply tail checker " << buffer << " @" << log_read_offset << endl; 137 | 138 | log_read_offset += 8; 139 | for (uint32_t c = 0; c> 1; 142 | } 143 | checker = 0; 144 | } 145 | 146 | // Advance 147 | log_read_offset = (log_read_offset + 63) & ~63ull; 148 | assert(log_read_offset % 64 == 0); 149 | 150 | Entry *entry = new(malloc(sizeof(Entry) + result.size())) Entry(); 151 | entry->payload_size = result.size(); 152 | memcpy(entry->data, result.data(), result.size()); 153 | return unique_ptr(entry); 154 | } 155 | 156 | ub8 GetWrittenByteCount() const 157 | { 158 | return next_free + sizeof(File); 159 | } 160 | 161 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 162 | { 163 | ub8 current = 0; 164 | ub8 used_size = 0; 165 | 166 | vector entries; 167 | while (used_sizepayload_size = payload_size; 172 | entries.push_back(entry); 173 | current += payload_size + 8; 174 | used_size += payload_size; 175 | } 176 | return entries; 177 | } 178 | }; -------------------------------------------------------------------------------- /logging/LW_PMemLib.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct LogWriterPMemLib { 4 | 5 | struct Entry { 6 | ub8 payload_size; // header 7 | ub1 data[]; 8 | }; 9 | 10 | ub8 next_free; 11 | PMEMlogpool *log; 12 | 13 | LogWriterPMemLib(const std::string &file_name, ub8 file_size) 14 | { 15 | system(("rm -f " + file_name + "_pmemlib").c_str()); 16 | log = pmemlog_create((file_name + "_pmemlib").c_str(), file_size, 0666); 17 | if (log == nullptr) { 18 | cout << "fail initializing pmemlog log" << endl; 19 | exit(-1); 20 | } 21 | 22 | next_free = 0; 23 | } 24 | ub8 AddLogEntry(const Entry &entry) 25 | { 26 | int res = pmemlog_append(log, &entry, entry.payload_size + 8); // We need to writte the size of the log entry, because they dont 27 | if (res<0) { 28 | cout << "error writing to the pmem log" << endl; 29 | assert(false); 30 | throw; 31 | } 32 | 33 | return 11111111; 34 | } 35 | 36 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 37 | { 38 | ub8 current = 0; 39 | ub8 used_size = 0; 40 | 41 | vectorentries; 42 | while (used_sizepayload_size = payload_size; 47 | entries.push_back(entry); 48 | current += payload_size + 16; 49 | used_size += payload_size; 50 | } 51 | return entries; 52 | } 53 | }; -------------------------------------------------------------------------------- /logging/LW_Zero.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterZero { 2 | 3 | struct Entry { 4 | ub4 payload_size; // header 5 | ub4 bit_count; // header 6 | ub1 data[]; 7 | }; 8 | 9 | struct File { 10 | // Header 11 | ub1 padding[constants::kCacheLineByteCount]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | ub8 next_free; 21 | 22 | LogWriterZero(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | } 28 | 29 | ub8 AddLogEntry(const Entry &entry) 30 | { 31 | assert(next_free % 8 == 0); 32 | assert(entry.bit_count == 0); 33 | assert(entry.payload_size % 8 == 0); 34 | assert(sizeof(File) + next_free + entry.payload_size + 8(&entry); 39 | ub8 *entry_nvm = reinterpret_cast(nvm.Data() + next_free); 40 | for (ub4 i = 0; i(entry_nvm)->bit_count = pop_cnt; 45 | alex_WriteBack(entry_nvm, entry.payload_size + 8); 46 | alex_SFence(); 47 | 48 | // Advance and done 49 | next_free += entry.payload_size + 8; 50 | assert(next_free % 8 == 0); 51 | return next_free; 52 | } 53 | 54 | ub8 GetWrittenByteCount() const 55 | { 56 | return next_free + sizeof(File); 57 | } 58 | 59 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 60 | { 61 | ub8 current = 0; 62 | ub8 used_size = 0; 63 | 64 | vector entries; 65 | while (used_sizepayload_size = payload_size; 70 | entry->bit_count = 0; 71 | entries.push_back(entry); 72 | current += payload_size + 8; 73 | used_size += payload_size; 74 | } 75 | return entries; 76 | } 77 | }; -------------------------------------------------------------------------------- /logging/LW_ZeroAligned.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterZeroAligned { 2 | 3 | struct Entry { 4 | ub4 payload_size; // header 5 | ub4 bit_count; // header 6 | ub1 data[]; 7 | }; 8 | 9 | struct File { 10 | // Header 11 | ub1 padding[constants::kCacheLineByteCount]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | ub8 next_free; 21 | 22 | LogWriterZeroAligned(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | } 28 | 29 | ub8 AddLogEntry(const Entry &entry) 30 | { 31 | assert(next_free % 8 == 0); 32 | assert(entry.bit_count == 0); 33 | assert(entry.payload_size % 8 == 0); 34 | assert(sizeof(File) + next_free + entry.payload_size + 8(&entry); 39 | ub8 *entry_nvm = reinterpret_cast(nvm.Data() + next_free); 40 | for (ub4 i = 0; i(entry_nvm)->bit_count = pop_cnt; 45 | alex_WriteBack(entry_nvm, entry.payload_size + 8); 46 | alex_SFence(); 47 | 48 | // Advance and done 49 | next_free += entry.payload_size + 8; 50 | next_free = (next_free + 63) & ~63ull; 51 | assert(next_free % 64 == 0); 52 | return next_free; 53 | } 54 | 55 | ub8 GetWrittenByteCount() const 56 | { 57 | return next_free + sizeof(File); 58 | } 59 | 60 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 61 | { 62 | ub8 current = 0; 63 | ub8 used_size = 0; 64 | 65 | vector entries; 66 | while (used_sizepayload_size = payload_size; 71 | entry->bit_count = 0; 72 | entries.push_back(entry); 73 | current += payload_size + 8; 74 | used_size += payload_size; 75 | } 76 | return entries; 77 | } 78 | }; -------------------------------------------------------------------------------- /logging/LW_ZeroBlocked.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterZeroBlocked { 2 | 3 | struct Entry { 4 | ub4 payload_size; // header 5 | ub4 bit_count; // header 6 | ub1 data[]; 7 | }; 8 | 9 | struct File { 10 | // Header 11 | ub1 padding[constants::kCacheLineByteCount]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | ub8 next_free; 21 | 22 | LogWriterZeroBlocked(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | } 28 | 29 | ub8 AddLogEntry(const Entry &entry) 30 | { 31 | ub4 size = entry.payload_size + 8; 32 | 33 | assert(next_free % 8 == 0); 34 | assert(entry.bit_count == 0); 35 | assert(entry.payload_size % 8 == 0); 36 | assert(next_free + size=64); 39 | 40 | const ub1 *ram_begin = reinterpret_cast(&entry); 41 | ub1 *nvm_begin = reinterpret_cast(nvm.Data() + next_free); 42 | 43 | // Copy first cache line (and do not flush) 44 | ub4 pop_cnt = 0; 45 | FastCopy512(nvm_begin, ram_begin); 46 | pop_cnt += FastPopCount512(ram_begin); 47 | 48 | // Copy remaining full cache lines (and flush) 49 | ub4 pos = 64; 50 | for (; pos + 63(nvm_begin)->bit_count = pop_cnt; 67 | alex_WriteBack(nvm_begin); 68 | alex_SFence(); 69 | 70 | // Advance and done 71 | next_free += entry.payload_size + 8; 72 | next_free = (next_free + 63) & ~63ull; 73 | assert(next_free % 64 == 0); 74 | return next_free; 75 | } 76 | 77 | ub8 GetWrittenByteCount() const 78 | { 79 | return next_free + sizeof(File); 80 | } 81 | 82 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 83 | { 84 | ub8 current = 0; 85 | ub8 used_size = 0; 86 | 87 | vector entries; 88 | while (used_sizepayload_size = payload_size; 93 | entry->bit_count = 0; 94 | entries.push_back(entry); 95 | current += payload_size + 8; 96 | used_size += payload_size; 97 | } 98 | return entries; 99 | } 100 | }; -------------------------------------------------------------------------------- /logging/LW_ZeroCached.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterZeroCached { 2 | 3 | struct Entry { 4 | ub8 payload_size; // header 5 | ub1 data[]; 6 | }; 7 | 8 | struct File { 9 | // Header 10 | ub1 padding[constants::kCacheLineByteCount]; 11 | 12 | // Log data 13 | ub1 data[]; 14 | }; 15 | static_assert(sizeof(File) == 64, ""); 16 | 17 | NonVolatileMemory &nvm; 18 | File &file; // == nvm 19 | ub8 next_free; 20 | ub8 cl_pos; 21 | ub8 active_cl_mem[16]; 22 | ub8 *active_cl; 23 | ub8 log_read_offset; 24 | 25 | LogWriterZeroCached(NonVolatileMemory &nvm) 26 | : nvm(nvm) 27 | , file(*reinterpret_cast(nvm.Data())) 28 | { 29 | next_free = 0; 30 | cl_pos = 0; 31 | log_read_offset = 0; 32 | 33 | active_cl = active_cl_mem; 34 | while ((ub8) active_cl % 64 != 0) { 35 | active_cl++; 36 | } 37 | assert((ub8) active_cl % 64 == 0); 38 | memset((ub1 *) active_cl, 0, 64); 39 | } 40 | 41 | ub8 AddLogEntry(const Entry &entry) 42 | { 43 | ub4 size = entry.payload_size + 8; 44 | ub4 blks = size / 8; 45 | 46 | assert(next_free % 8 == 0); 47 | assert(entry.payload_size % 8 == 0); 48 | assert(next_free + size(&entry); 52 | ub1 *nvm_begin = reinterpret_cast(file.data + (next_free & ~63ull)); 53 | 54 | // Head 55 | ub4 pos = 0; 56 | for (; pos GetNextLogEntry() // Read code is only to verify correctnes 126 | { 127 | if (log_read_offset == next_free) { 128 | return nullptr; 129 | } 130 | 131 | // Read length 132 | ub8 len = *(ub8 *) &file.data[log_read_offset]; 133 | log_read_offset += 8; 134 | ub8 pop_cnt = 0; 135 | pop_cnt += alex_PopCount(len); 136 | 137 | // Read data 138 | vector result(len); 139 | for (ub4 pos = 0; pospayload_size = result.size(); 155 | memcpy(entry->data, (ub1 *) result.data(), result.size()); 156 | return unique_ptr(entry); 157 | } 158 | 159 | ub8 GetWrittenByteCount() const 160 | { 161 | return next_free + sizeof(File); 162 | } 163 | 164 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 165 | { 166 | ub8 current = 0; 167 | ub8 used_size = 0; 168 | 169 | vector entries; 170 | while (used_sizepayload_size = payload_size; 175 | entries.push_back(entry); 176 | current += payload_size + 8; 177 | used_size += payload_size; 178 | } 179 | return entries; 180 | } 181 | }; -------------------------------------------------------------------------------- /logging/LW_ZeroSimd.hpp: -------------------------------------------------------------------------------- 1 | struct LogWriterZeroSimd { 2 | 3 | struct Entry { 4 | ub4 payload_size; // header 5 | ub4 bit_count; // header 6 | ub1 data[]; 7 | }; 8 | 9 | struct File { 10 | // Header 11 | ub1 padding[constants::kCacheLineByteCount]; 12 | 13 | // Log data 14 | ub1 data[]; 15 | }; 16 | static_assert(sizeof(File) == 64, ""); 17 | 18 | NonVolatileMemory &nvm; 19 | File &file; // == nvm 20 | ub8 next_free; 21 | 22 | LogWriterZeroSimd(NonVolatileMemory &nvm) 23 | : nvm(nvm) 24 | , file(*reinterpret_cast(nvm.Data())) 25 | { 26 | next_free = 0; 27 | } 28 | 29 | ub8 AddLogEntry(const Entry &entry) 30 | { 31 | ub4 size = entry.payload_size + 8; 32 | 33 | assert(next_free % 8 == 0); 34 | assert(entry.bit_count == 0); 35 | assert(entry.payload_size % 8 == 0); 36 | assert(next_free + size=64); 39 | 40 | const ub1 *ram_begin = reinterpret_cast(&entry); 41 | ub1 *nvm_begin = reinterpret_cast(nvm.Data() + next_free); 42 | 43 | // Copy first cache line (and do not flush) 44 | ub4 pop_cnt = 0; 45 | FastCopy512Simd(nvm_begin, ram_begin); 46 | pop_cnt += FastPopCount512(ram_begin); 47 | 48 | // Copy remaining full cache lines (and flush) 49 | ub4 pos = 64; 50 | for (; pos + 63(nvm_begin)->bit_count = pop_cnt; 67 | alex_WriteBack(nvm_begin); 68 | alex_SFence(); 69 | 70 | // Advance and done 71 | next_free += entry.payload_size + 8; 72 | next_free = (next_free + 63) & ~63ull; 73 | assert(next_free % 64 == 0); 74 | return next_free; 75 | } 76 | 77 | ub8 GetWrittenByteCount() const 78 | { 79 | return next_free + sizeof(File); 80 | } 81 | 82 | static vector CreateRandomEntries(vector &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny) 83 | { 84 | ub8 current = 0; 85 | ub8 used_size = 0; 86 | 87 | vector entries; 88 | while (used_sizepayload_size = payload_size; 93 | entry->bit_count = 0; 94 | entries.push_back(entry); 95 | current += payload_size + 8; 96 | used_size += payload_size; 97 | } 98 | return entries; 99 | } 100 | }; -------------------------------------------------------------------------------- /logging/NonVolatileMemory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | // ------------------------------------------------------------------------------------- 13 | class NonVolatileMemory { 14 | public: 15 | NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed 16 | NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path 17 | NonVolatileMemory(const NonVolatileMemory &) = delete; 18 | NonVolatileMemory &operator=(const NonVolatileMemory &) = delete; 19 | 20 | ~NonVolatileMemory(); 21 | 22 | ub1 *Data() { return data_ptr; } 23 | ub1 *End() { return data_ptr + byte_count; } 24 | ub8 GetByteCount() { return byte_count; } 25 | 26 | bool IsNvm() const { return is_nvm; } 27 | 28 | private: 29 | ub1 *data_ptr; 30 | std::string file_name; 31 | const ub8 byte_count; 32 | bool is_nvm; 33 | bool is_mapped_file; 34 | int file_fd; 35 | }; 36 | // ------------------------------------------------------------------------------------- 37 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count) 38 | : byte_count(byte_count) 39 | , is_mapped_file(false) 40 | { 41 | assert(((ub8) ((off_t) byte_count)) == byte_count); 42 | 43 | assert(byte_count>0); // XXX 44 | 45 | data_ptr = AlignedAlloc(512, byte_count); 46 | 47 | is_nvm = false; 48 | } 49 | // ------------------------------------------------------------------------------------- 50 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count) 51 | : file_name(file_name) 52 | , byte_count(byte_count) 53 | , is_nvm(true) 54 | , is_mapped_file(true) 55 | { 56 | assert(((ub8) ((off_t) byte_count)) == byte_count); 57 | 58 | file_fd = open(file_name.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 59 | int td = ftruncate(file_fd, byte_count); 60 | if (file_fd<0 || td<0) { 61 | std::cout << "unable to create file" << std::endl; 62 | exit(-1); 63 | } 64 | data_ptr = (ub1 *) mmap(nullptr, byte_count, PROT_WRITE, MAP_SHARED, file_fd, 0); 65 | } 66 | // ------------------------------------------------------------------------------------- 67 | NonVolatileMemory::~NonVolatileMemory() 68 | { 69 | // Benchmark code .. dont care ;p 70 | } 71 | // ------------------------------------------------------------------------------------- 72 | -------------------------------------------------------------------------------- /logging/Random.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | // Based on: https://en.wikipedia.org/wiki/Xorshift 6 | class Random { 7 | public: 8 | explicit Random(ub8 seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3 9 | : seed(seed) 10 | { 11 | } 12 | 13 | uint64_t Rand() 14 | { 15 | seed ^= (seed << 13); 16 | seed ^= (seed >> 7); 17 | return (seed ^= (seed << 17)); 18 | } 19 | 20 | ub8 seed; 21 | }; 22 | // ------------------------------------------------------------------------------------- 23 | -------------------------------------------------------------------------------- /page_flush/Common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "libpmem.h" 8 | // ------------------------------------------------------------------------------------- 9 | using ub1 = uint8_t; 10 | using ub2 = uint16_t; 11 | using ub4 = uint32_t; 12 | using ub8 = uint64_t; 13 | // ------------------------------------------------------------------------------------- 14 | using sb1 = int8_t; 15 | using sb2 = int16_t; 16 | using sb4 = int32_t; 17 | using sb8 = int64_t; 18 | // ------------------------------------------------------------------------------------- 19 | namespace constants { 20 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines 21 | const static ub4 kPageByteCount = 1 << 14; // 16 KB 22 | const static ub4 kCacheLinesPerPage = kPageByteCount / kCacheLineByteCount; // 16KB/64Byte 23 | const static ub4 kPageAlignment = 512; // For O_Direct 24 | const static ub8 kInvalidPageId = ~0; 25 | } 26 | // ------------------------------------------------------------------------------------- 27 | #define a_mm_clflush(addr)\ 28 | asm volatile("clflush %0" : "+m" (*(volatile char *)addr)); 29 | #define a_mm_clflushopt(addr)\ 30 | asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr)); 31 | #define a_mm_clwb(addr)\ 32 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr)); 33 | #define a_mm_pcommit()\ 34 | asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); 35 | // ------------------------------------------------------------------------------------- 36 | inline void alex_WriteBack(void *addr, ub4 len) 37 | { 38 | for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) 39 | a_mm_clwb((char *) uptr); 40 | } 41 | // ------------------------------------------------------------------------------------- 42 | inline void alex_WriteBack(void *addr) 43 | { 44 | addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1)); 45 | a_mm_clwb((char *) addr); 46 | } 47 | // ------------------------------------------------------------------------------------- 48 | inline void alex_FlushOpt(void *addr, ub4 len) 49 | { 50 | for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) 51 | a_mm_clflushopt((char *) uptr); 52 | } 53 | // ------------------------------------------------------------------------------------- 54 | inline void alex_FlushOpt(void *addr) 55 | { 56 | a_mm_clflushopt((char *) addr); 57 | } 58 | // ------------------------------------------------------------------------------------- 59 | inline void alex_SFence() 60 | { 61 | _mm_sfence(); 62 | } 63 | // ------------------------------------------------------------------------------------- 64 | inline int numberOfSetBits(uint32_t i) 65 | { 66 | i = i - ((i >> 1) & 0x55555555); 67 | i = (i & 0x33333333) + ((i >> 2) & 0x33333333); 68 | return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; 69 | } 70 | // ------------------------------------------------------------------------------------- 71 | inline ub8 alex_PopCount(ub8 value) 72 | { 73 | #ifdef __APPLE__ 74 | return numberOfSetBits(value & ((1ull << 32) - 1)) + numberOfSetBits(value >> 32); 75 | #else 76 | return _mm_popcnt_u64(value); 77 | #endif 78 | } 79 | // ------------------------------------------------------------------------------------- 80 | inline ub8 alex_PopCount(void *addr_in, ub4 len) 81 | { 82 | assert(len % 8 == 0); 83 | ub8 *addr = (ub8 *) addr_in; 84 | 85 | ub8 result = 0; 86 | for (ub4 i = 0; i 106 | bool IsAlignedAt(const void *ptr) 107 | { 108 | return ((uint64_t) ptr) % byteCount == 0; 109 | } 110 | // ------------------------------------------------------------------------------------- 111 | uint8_t *AlignedAlloc(uint64_t alignment, uint64_t size) 112 | { 113 | void *result = nullptr; 114 | int error = posix_memalign(&result, alignment, size); 115 | if (error) { 116 | std::cout << "error while allocating" << std::endl; 117 | throw; 118 | } 119 | return reinterpret_cast(result); 120 | } 121 | // ------------------------------------------------------------------------------------- 122 | -------------------------------------------------------------------------------- /page_flush/NonVolatileMemory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include "Pages.hpp" 5 | #include 6 | // ------------------------------------------------------------------------------------- 7 | class NonVolatileMemory { 8 | public: 9 | NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed 10 | NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path 11 | NonVolatileMemory(const NonVolatileMemory &) = delete; 12 | NonVolatileMemory &operator=(const NonVolatileMemory &) = delete; 13 | 14 | ~NonVolatileMemory(); 15 | 16 | ub1 *Data() { return data_ptr; } 17 | ub1 *End() { return data_ptr + byte_count; } 18 | ub8 GetByteCount() { return byte_count; } 19 | 20 | void FlushAll(); 21 | void Flush(ub8 from, ub8 length); 22 | 23 | NvmBufferFrame &GetNvmBufferFrame(ub8 id) 24 | { 25 | assert(data_ptr + id * sizeof(NvmBufferFrame)<=End()); 26 | assert(reinterpret_cast(data_ptr)[id].GetPage().Ptr() != nullptr); 27 | return reinterpret_cast(data_ptr)[id]; 28 | } 29 | 30 | bool IsRealNvm() const { return is_real_nvm; } 31 | 32 | private: 33 | ub1 *data_ptr; 34 | std::string file_name; 35 | const ub8 byte_count; 36 | bool is_real_nvm; 37 | bool is_mapped_file; 38 | }; 39 | // ------------------------------------------------------------------------------------- 40 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count) 41 | : byte_count(byte_count) 42 | , is_mapped_file(false) 43 | { 44 | assert(((ub8) ((off_t) byte_count)) == byte_count); 45 | assert(byte_count>0); 46 | 47 | data_ptr = AlignedAlloc(512, byte_count); 48 | 49 | is_real_nvm = false; 50 | } 51 | // ------------------------------------------------------------------------------------- 52 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count) 53 | : file_name(file_name) 54 | , byte_count(byte_count) 55 | , is_mapped_file(true) 56 | { 57 | assert(((ub8) ((off_t) byte_count)) == byte_count); 58 | 59 | // No need to do anything if zero byte are requested; Does this happen ?? 60 | assert(byte_count>0); // XXX 61 | if (byte_count == 0) { 62 | data_ptr = nullptr; 63 | return; 64 | } 65 | 66 | // Map the file (our pmem wrapper works with normal memory by falling back to mmap) 67 | size_t acquired_byte_count; 68 | data_ptr = reinterpret_cast(pmem_map_file(file_name.c_str(), byte_count, PMEM_FILE_CREATE, 0666, &acquired_byte_count, nullptr)); 69 | if (data_ptr == nullptr) { 70 | std::cout << "Failed to create file: '" << file_name << "'." << std::endl; 71 | throw; 72 | } 73 | if (acquired_byte_count != byte_count) { 74 | std::cout << "Failed to allocate requested size for file: '" << file_name << "'. (Requested=" << byte_count << ", Aquired=" << acquired_byte_count << ")" << std::endl; 75 | throw; 76 | } 77 | 78 | // Do this only once, as it is expensive 79 | is_real_nvm = pmem_is_pmem(data_ptr, 1); 80 | } 81 | // ------------------------------------------------------------------------------------- 82 | NonVolatileMemory::~NonVolatileMemory() 83 | { 84 | if (is_mapped_file) { 85 | pmem_unmap(data_ptr, byte_count); 86 | } else { 87 | free(data_ptr); 88 | } 89 | } 90 | // ------------------------------------------------------------------------------------- 91 | void NonVolatileMemory::FlushAll() 92 | { 93 | if (is_mapped_file) { 94 | pmem_persist(data_ptr, byte_count); 95 | } 96 | } 97 | // ------------------------------------------------------------------------------------- 98 | void NonVolatileMemory::Flush(ub8 from, ub8 length) 99 | { 100 | if (is_mapped_file) { 101 | pmem_persist(data_ptr + from, length); 102 | } 103 | } 104 | // ------------------------------------------------------------------------------------- 105 | -------------------------------------------------------------------------------- /page_flush/Pages.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | #include 5 | #include 6 | // ------------------------------------------------------------------------------------- 7 | class Page { 8 | public: 9 | template 10 | T *Ptr() { return reinterpret_cast( data.data()); } 11 | 12 | template 13 | const T *Ptr() const { return reinterpret_cast( data.data()); } 14 | 15 | private: 16 | std::array data; 17 | }; 18 | // ------------------------------------------------------------------------------------- 19 | template 20 | class AutoAlignedPage { 21 | public: 22 | Page &GetPage() 23 | { 24 | assert(padding.data()(res)); 27 | return *reinterpret_cast( res); 28 | } 29 | 30 | const Page &GetPage() const 31 | { 32 | assert(padding.data()(res)); 35 | return *reinterpret_cast( res); 36 | } 37 | 38 | private: 39 | std::array padding; 40 | std::array data; 41 | }; 42 | // ------------------------------------------------------------------------------------- 43 | static_assert(sizeof(Page) == constants::kPageByteCount, "Pages are always 16kb"); 44 | // ------------------------------------------------------------------------------------- 45 | class NvmBufferFrame { 46 | public: 47 | inline void init() 48 | { 49 | dirty = false; 50 | page_id = constants::kInvalidPageId; 51 | pvn = 0; 52 | } 53 | 54 | Page &GetPage() { return page.GetPage(); } 55 | const Page &GetPage() const { return page.GetPage(); } 56 | 57 | bool dirty; 58 | ub8 page_id; 59 | ub8 pvn; 60 | 61 | private: 62 | AutoAlignedPage page; 63 | }; 64 | // ------------------------------------------------------------------------------------- 65 | static_assert(sizeof(NvmBufferFrame) % 8 == 0, "NvmBufferFrame should be eight byte aligned."); 66 | // ------------------------------------------------------------------------------------- 67 | -------------------------------------------------------------------------------- /page_flush/Random.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | // Based on: https://en.wikipedia.org/wiki/Xorshift 6 | class Random { 7 | public: 8 | explicit Random(ub8 seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3 9 | : seed(seed) 10 | { 11 | } 12 | 13 | uint64_t Rand() 14 | { 15 | seed ^= (seed << 13); 16 | seed ^= (seed >> 7); 17 | return (seed ^= (seed << 17)); 18 | } 19 | 20 | ub8 seed; 21 | }; 22 | // ------------------------------------------------------------------------------------- 23 | -------------------------------------------------------------------------------- /page_flush/VolatileMemory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // ------------------------------------------------------------------------------------- 3 | #include "Common.hpp" 4 | // ------------------------------------------------------------------------------------- 5 | class VolatileMemory { 6 | 7 | public: 8 | VolatileMemory(ub8 byte_count); 9 | ~VolatileMemory(); 10 | VolatileMemory(const VolatileMemory &) = delete; 11 | VolatileMemory &operator=(const VolatileMemory &) = delete; 12 | 13 | ub1 *Data() { return data_ptr; } 14 | ub1 *End() { return data_ptr + byte_count; } 15 | 16 | ub8 GetByteCount() const { return byte_count; } 17 | 18 | template 19 | T *GetPtr(ub8 offset = 0) { return reinterpret_cast(data_ptr) + offset; } 20 | ub1 *GetPtr(ub8 offset = 0) { return data_ptr + offset; } 21 | 22 | private: 23 | ub1 *const data_ptr; 24 | const ub8 byte_count; 25 | }; 26 | // ------------------------------------------------------------------------------------- 27 | VolatileMemory::VolatileMemory(ub8 byte_count) 28 | : data_ptr(new ub1[byte_count]) 29 | , byte_count(byte_count) 30 | { 31 | assert(byte_count != 0); 32 | } 33 | // ------------------------------------------------------------------------------------- 34 | VolatileMemory::~VolatileMemory() 35 | { 36 | delete[] data_ptr; 37 | } 38 | // ------------------------------------------------------------------------------------- -------------------------------------------------------------------------------- /page_flush/page_flush.cpp: -------------------------------------------------------------------------------- 1 | #include "FullBufferFrame.hpp" 2 | #include "Pages.hpp" 3 | #include "Random.hpp" 4 | #include "VolatileMemory.hpp" 5 | #include "NonVolatileMemory.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | // Config 17 | ub4 DIRTY_CL_COUNT; // How many cache lines are dirty 18 | string NVM_FILE; // Path to the nvm file 19 | ub4 THREAD_COUNT; // How many threads -> 0 runs single threaded config 20 | ub4 PAGE_COUNT_PER_THREAD; // How many pages to use 21 | 22 | ub8 RunWithTiming(function foo) 23 | { 24 | auto begin = chrono::high_resolution_clock::now(); 25 | foo(); 26 | auto end = chrono::high_resolution_clock::now(); 27 | return chrono::duration_cast(end - begin).count(); 28 | } 29 | 30 | struct FlushTest { 31 | 32 | Random ranny; 33 | 34 | ub8 page_count; 35 | 36 | VolatileMemory ram; 37 | NonVolatileMemory nvm; // One more page than in dram, because of shadow 38 | unordered_map nvm_mapping; 39 | 40 | NvmBufferFrame *free_nvm_bf; 41 | MicroLog *micro_log; 42 | MicroLog2 *micro_log_2; 43 | 44 | NvmBufferFrame *GetMappedNvmBf(ub8 id) { return nvm_mapping.find(id)->second; } 45 | FullBufferFrame *GetRamBf(ub8 id) { return ram.GetPtr(id); } 46 | 47 | PMEMblkpool *pbp; 48 | 49 | FlushTest(const string &nvm_file, ub8 page_count) 50 | : page_count(page_count) 51 | , ram(page_count * sizeof(FullBufferFrame)) 52 | , nvm(nvm_file, (page_count + 10) * sizeof(NvmBufferFrame)) // + 10 to have some space after the memory for the pages to put the micro log or the shadow-copy page of CoW 53 | { 54 | auto file = (nvm_file + "_pmemlib"); 55 | 56 | // Setup pmdk blk pool 57 | ub8 pmemlib_pool_size = constants::kPageByteCount * page_count + PMEMBLK_MIN_POOL; // A bit to much, but ensures that it works.. 58 | system(("rm -rf " + file).c_str()); 59 | pbp = pmemblk_create(file.c_str(), constants::kPageByteCount, pmemlib_pool_size, 0666); 60 | if (pbp == NULL) { 61 | cout << "failed creating pmdk pmem pool" << endl; 62 | assert(false); 63 | throw; 64 | } 65 | 66 | // Check that we got enough pages in the pool (should always work) 67 | ub4 nelements = pmemblk_nblock(pbp); 68 | if (nelementsinit(); 85 | nvm_bf->page_id = p; 86 | GetRamBf(p)->Init(); 87 | GetRamBf(p)->SwapIn(p, nvm_bf); 88 | nvm_mapping[p] = nvm_bf; 89 | } 90 | 91 | // All techniques (cow and micro log) need a small buffer, we put this after all regular pages 92 | // The buffers also overlap because only one of them is used in a given experiment 93 | // Kind of a hacky design, but good enough for some benchmark code 94 | free_nvm_bf = &nvm.GetNvmBufferFrame(page_count); 95 | 96 | // Logs need to be aligned nicely for SIMD cpy 97 | ub1 *nice_aligned_position = reinterpret_cast(&nvm.GetNvmBufferFrame(page_count)); 98 | while (ub8(nice_aligned_position) % 64 != 0) { 99 | nice_aligned_position++; 100 | } 101 | micro_log = reinterpret_cast(nice_aligned_position); 102 | micro_log_2 = reinterpret_cast(nice_aligned_position); 103 | assert(reinterpret_cast(micro_log) + sizeof(MicroLog)(micro_log_2) + sizeof(MicroLog2)Translate(); 112 | memset(data, 'a', constants::kPageByteCount); 113 | } 114 | } 115 | 116 | // Set cl_count cls to 'a' in DRAM and those which are still 'x' in dram to 'a' in NVM. 117 | // Hence, 'a' is the dirty data in DRAM and after a flush everything on NVM should be 'a'. 118 | void MakeRandomCacheLinesDirty(ub4 cl_count, bool make_other_cls_resident) 119 | { 120 | set offsets; 121 | for (ub4 i = 0; i offsets_cpy = offsets; 127 | for (ub4 i = 0; iTranslate(cl * constants::kCacheLineByteCount, constants::kCacheLineByteCount); 133 | memset(data, 'a', constants::kCacheLineByteCount); 134 | offsets_cpy.erase(random_cl); 135 | } 136 | 137 | for (auto iter : offsets_cpy) { 138 | ub1 *data = GetMappedNvmBf(p)->GetPage().Ptr() + iter * constants::kCacheLineByteCount; 139 | memset(data, 'a', constants::kCacheLineByteCount); 140 | if (make_other_cls_resident) { 141 | ub1 *ram_data = GetRamBf(p)->Translate(iter * constants::kCacheLineByteCount, constants::kCacheLineByteCount); 142 | memset(ram_data, 'a', constants::kCacheLineByteCount); 143 | } 144 | } 145 | 146 | // Check 147 | if (make_other_cls_resident) { 148 | assert(GetRamBf(p)->GetResidentCacheLineCount() == constants::kCacheLinesPerPage); 149 | } else { 150 | assert(GetRamBf(p)->GetResidentCacheLineCount() == cl_count); 151 | } 152 | assert(GetRamBf(p)->GetDirtyCacheLineCount() == cl_count); 153 | } 154 | } 155 | 156 | void FlushAll_Strawman() 157 | { 158 | for (ub4 i = 0; iFlush(); 160 | } 161 | } 162 | 163 | void FlushAll_PMDK() 164 | { 165 | for (ub4 p = 0; pIsAnythingDirty()) { 167 | pmemblk_write(pbp, GetRamBf(p)->RamPtr(), p); 168 | GetRamBf(p)->MakeCleanAfterFlushOfPMDK(); 169 | } 170 | } 171 | } 172 | 173 | void FlushAll_Shadow() 174 | { 175 | ub8 lsn = 0; 176 | free_nvm_bf->init(); 177 | free_nvm_bf->page_id = constants::kInvalidPageId; 178 | 179 | for (ub4 p = 0; pIsAnythingDirty()) { 181 | NvmBufferFrame *new_free_one = GetRamBf(p)->FlushShadow(free_nvm_bf, lsn++); 182 | assert(GetRamBf(p)->GetNvmBufferFrame() == free_nvm_bf); 183 | free_nvm_bf = new_free_one; 184 | nvm_mapping[p] = GetRamBf(p)->GetNvmBufferFrame(); 185 | } 186 | } 187 | } 188 | 189 | void FlushAll_MicroLog() 190 | { 191 | micro_log->page_id = constants::kInvalidPageId; 192 | micro_log->count = 0; 193 | 194 | for (ub4 p = 0; pIsAnythingDirty()) { 196 | GetRamBf(p)->FlushMicroLog(*micro_log); 197 | } 198 | } 199 | } 200 | 201 | void FlushAll_MicroLog2() 202 | { 203 | memset(micro_log_2, 0, sizeof(MicroLog2)); 204 | 205 | for (ub4 p = 0; pIsAnythingDirty()) { 207 | GetRamBf(p)->FlushMicroLog(*micro_log_2); 208 | } 209 | } 210 | } 211 | 212 | void PrintPages() 213 | { 214 | for (ub4 p = 0; pRamPtr()[i]; 224 | } 225 | cout << endl << p << " nvm: "; 226 | for (ub4 i = 0; iGetPage().Ptr()[i]; 228 | } 229 | cout << endl; 230 | } 231 | 232 | // Checks that each cl in NVM is c 233 | void CheckNvmContentEqualsTo(char c) 234 | { 235 | for (ub4 p = 0; ppage_id == p); 237 | assert(GetMappedNvmBf(p)->page_id == p); 238 | for (ub4 i = 0; iGetPage().Ptr()[i]); 240 | } 241 | } 242 | } 243 | }; 244 | 245 | void RunBenchmarkThreaded(string tech, bool all_resident, function callback) 246 | { 247 | atomic ready_count(0); 248 | atomic start_barrier(false); 249 | vector> threads; 250 | vector times(THREAD_COUNT, 0); 251 | for (ub4 tid = 0; tid([&, tid]() { 253 | FlushTest ft(NVM_FILE + string("_") + to_string(tid), PAGE_COUNT_PER_THREAD); 254 | ft.InitializePages(); 255 | ft.MakeRandomCacheLinesDirty(DIRTY_CL_COUNT, all_resident); 256 | ready_count++; 257 | while (!start_barrier); 258 | times[tid] += RunWithTiming([&]() { callback(ft); }); 259 | ft.CheckNvmContentEqualsTo('a'); 260 | })); 261 | } 262 | while (ready_count != THREAD_COUNT); 263 | start_barrier = true; 264 | for (ub4 tid = 0; tidjoin(); 266 | } 267 | 268 | ub8 time_sum_of_all_threads = 0; 269 | for (ub4 tid = 0; tidconstants::kCacheLinesPerPage) { 327 | cout << "invalid DIRTY_CL_COUNT " << DIRTY_CL_COUNT << endl; 328 | exit(-1); 329 | } 330 | 331 | cerr << "Config:" << endl; 332 | cerr << "----------------------------" << endl; 333 | cerr << "PAGE_COUNT_PER_THREAD: " << PAGE_COUNT_PER_THREAD << endl; 334 | cerr << "DIRTY_CL_COUNT: " << DIRTY_CL_COUNT << endl; 335 | cerr << "THREAD_COUNT: " << THREAD_COUNT << endl; 336 | cerr << "NVM_FILE: " << NVM_FILE << endl; 337 | #ifdef STREAMING 338 | cerr << "STREAMING: " << "yes" << endl; 339 | #else 340 | cerr << "STREAMING: " << "no" << endl; 341 | #endif 342 | 343 | RunMultiThreaded(); 344 | 345 | return 0; 346 | }; 347 | -------------------------------------------------------------------------------- /reproduce/all.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for all figure 4 | ./reproduce/bw_cache_lines.sh 5 | ./reproduce/bw_threads.sh 6 | ./reproduce/coroutines.sh 7 | ./reproduce/inplace.sh 8 | ./reproduce/latency_read.sh 9 | ./reproduce/latency_write.sh 10 | ./reproduce/logging.sh 11 | ./reproduce/page_flush.sh 12 | -------------------------------------------------------------------------------- /reproduce/bw_cache_lines.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 1 4 | # PMem Bandwidth: Varying Access Granularity 5 | 6 | echo "" > results/bw_cache_lines.txt 7 | 8 | COMPILE="clang++ -g0 -O3 -march=native -std=c++14 -DNDEBUG=1 bandwidth/bw.cpp -pthread" 9 | 10 | BYTE_COUNT=10e9 11 | THREAD_COUNT=24 12 | for BLOCK_SIZE in `seq 64 64 768`; do 13 | 14 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -o a1.out || exit -1 15 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -o a2.out || exit -1 16 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DUSE_CLWB=1 -o a3.out || exit -1 17 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DSTREAMING=1 -o a4.out || exit -1 18 | 19 | # Read nvm 20 | ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 21 | 22 | # Read ram 23 | ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 24 | 25 | # Write nvm 26 | ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 27 | ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 28 | ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 29 | 30 | # Write ram 31 | ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 32 | ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 33 | ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt 34 | done; 35 | -------------------------------------------------------------------------------- /reproduce/bw_threads.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 2 4 | # PMem Bandwidth: Varying Thread Count 5 | 6 | echo "" > results/bw_threads.txt 7 | 8 | COMPILE="clang++ -g0 -O3 -march=native -std=c++14 -DNDEBUG=1 bandwidth/bw.cpp -pthread" 9 | 10 | BYTE_COUNT=10e9 11 | 12 | for BLOCK_SIZE in 256 1048576; do 13 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -o a1.out || exit -1 14 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -o a2.out || exit -1 15 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DUSE_CLWB=1 -o a3.out || exit -1 16 | ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DSTREAMING=1 -o a4.out || exit -1 17 | 18 | for THREAD_COUNT in `seq 1 30`; do 19 | # Read nvm 20 | ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt 21 | 22 | # Read ram 23 | ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt 24 | 25 | # Write nvm 26 | ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt 27 | ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt 28 | ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt 29 | 30 | # Write ram 31 | ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt 32 | ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt 33 | ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt 34 | done; 35 | done; -------------------------------------------------------------------------------- /reproduce/coroutines.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Building clang++ is required 4 | 5 | # cd build 6 | # cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi;clang" -DCMAKE_BUILD_TYPE=Release -G "Unix Makefiles" ../llvm 7 | # make -j48 8 | 9 | echo "" > results/coroutines.txt 10 | 11 | export CLANG_DIR=~/workspace/llvm-project/build 12 | 13 | ${CLANG_DIR}/bin/clang++ -fcoroutines-ts -g0 -O3 -march=native -std=c++2a -mllvm -inline-threshold=50000 coroutine/coro_insert.cpp -stdlib=libc++ -nostdinc++ -I${CLANG_DIR}/include/c++/v1 -L${CLANG_DIR}/lib -Wl,-rpath,${CLANG_DIR}/lib -DNDEBUG=1 || exit 14 | 15 | for GROUP_SIZE in 1 2 3 4 5 6 7 8 10 12 14 16 24 32 40 48 56 64; do 16 | ./a.out 1e7 1e7 ${GROUP_SIZE} nvm /mnt/pmem0/renen | tee -a results/coroutines.txt 17 | ./a.out 1e7 1e7 ${GROUP_SIZE} ram /mnt/pmem0/renen | tee -a results/coroutines.txt 18 | done 19 | 20 | ${CLANG_DIR}/bin/clang++ -fcoroutines-ts -g0 -O3 -march=native -std=c++2a -mllvm -inline-threshold=50000 coroutine/coro_lookup.cpp -stdlib=libc++ -nostdinc++ -I${CLANG_DIR}/include/c++/v1 -L${CLANG_DIR}/lib -Wl,-rpath,${CLANG_DIR}/lib -DNDEBUG=1 || exit 21 | 22 | for GROUP_SIZE in 1 2 3 4 5 6 7 8 10 12 14 16 24 32 40 48 56 64; do 23 | ./a.out 1e7 1e7 ${GROUP_SIZE} nvm /mnt/pmem0/renen | tee -a results/coroutines.txt 24 | ./a.out 1e7 1e7 ${GROUP_SIZE} ram /mnt/pmem0/renen | tee -a results/coroutines.txt 25 | done 26 | -------------------------------------------------------------------------------- /reproduce/inplace.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 4 4 | # Write latency 5 | 6 | echo "" > results/inplace.txt 7 | 8 | COMPILE="clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++17 inplace/bench.cpp -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemlog.a -lpthread -lndctl -ldaxctl" 9 | 10 | for ENTRY_SIZE in `seq 16 16 128`; do 11 | ${COMPILE} -DENTRY_SIZE=${ENTRY_SIZE} || exit -1 12 | ./a.out 1e9 seq /mnt/pmem0/renen | tee -a results/inplace.txt 13 | ./a.out 1e9 rnd /mnt/pmem0/renen | tee -a results/inplace.txt 14 | done 15 | -------------------------------------------------------------------------------- /reproduce/interference.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 4 4 | # Interference 5 | 6 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++17 interference/interference.cpp -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemlog.a -lpthread -lndctl -ldaxctl || exit -1 7 | 8 | # Seqential ram 9 | echo "" > results/interference_seq_ram.txt 10 | ./a.out 14 0 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 11 | ./a.out 14 1 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 12 | ./a.out 14 5 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 13 | ./a.out 14 10 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 14 | ./a.out 14 0 1 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 15 | ./a.out 14 0 5 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 16 | ./a.out 14 0 10 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 17 | ./a.out 14 0 0 1 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 18 | ./a.out 14 0 0 5 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 19 | ./a.out 14 0 0 10 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 20 | ./a.out 14 0 0 0 1 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 21 | ./a.out 14 0 0 0 5 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 22 | ./a.out 14 0 0 0 10 0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 23 | ./a.out 14 0 0 0 0 1 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 24 | ./a.out 14 0 0 0 0 5 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 25 | ./a.out 14 0 0 0 0 10 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt 26 | 27 | # Seqential nvm 28 | echo "" > results/interference_seq_nvm.txt 29 | ./a.out 0 14 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 30 | ./a.out 1 14 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 31 | ./a.out 5 14 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 32 | ./a.out 10 14 0 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 33 | ./a.out 0 14 1 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 34 | ./a.out 0 14 5 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 35 | ./a.out 0 14 10 0 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 36 | ./a.out 0 14 0 1 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 37 | ./a.out 0 14 0 5 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 38 | ./a.out 0 14 0 10 0 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 39 | ./a.out 0 14 0 0 1 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 40 | ./a.out 0 14 0 0 5 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 41 | ./a.out 0 14 0 0 10 0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 42 | ./a.out 0 14 0 0 0 1 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 43 | ./a.out 0 14 0 0 0 5 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 44 | ./a.out 0 14 0 0 0 10 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt 45 | 46 | # Random ram 47 | echo "" > results/interference_rnd_ram.txt 48 | ./a.out 0 0 14 0 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 49 | ./a.out 1 0 14 0 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 50 | ./a.out 5 0 14 0 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 51 | ./a.out 10 0 14 0 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 52 | ./a.out 0 1 14 0 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 53 | ./a.out 0 5 14 0 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 54 | ./a.out 0 10 14 0 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 55 | ./a.out 0 0 14 1 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 56 | ./a.out 0 0 14 5 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 57 | ./a.out 0 0 14 10 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 58 | ./a.out 0 0 14 0 1 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 59 | ./a.out 0 0 14 0 5 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 60 | ./a.out 0 0 14 0 10 0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 61 | ./a.out 0 0 14 0 0 1 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 62 | ./a.out 0 0 14 0 0 5 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 63 | ./a.out 0 0 14 0 0 10 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt 64 | 65 | # Random nvm 66 | echo "" > results/interference_rnd_nvm.txt 67 | ./a.out 0 0 0 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 68 | ./a.out 1 0 0 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 69 | ./a.out 5 0 0 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 70 | ./a.out 10 0 0 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 71 | ./a.out 0 1 0 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 72 | ./a.out 0 5 0 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 73 | ./a.out 0 10 0 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 74 | ./a.out 0 0 1 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 75 | ./a.out 0 0 5 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 76 | ./a.out 0 0 10 14 0 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 77 | ./a.out 0 0 0 14 1 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 78 | ./a.out 0 0 0 14 5 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 79 | ./a.out 0 0 0 14 10 0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 80 | ./a.out 0 0 0 14 0 1 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 81 | ./a.out 0 0 0 14 0 5 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 82 | ./a.out 0 0 0 14 0 10 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt 83 | 84 | ## Log nvm 85 | #./a.out 0 0 0 0 10 0 /mnt/pmem0/renen 86 | #./a.out 1 0 0 0 10 0 /mnt/pmem0/renen 87 | #./a.out 5 0 0 0 10 0 /mnt/pmem0/renen 88 | #./a.out 10 0 0 0 10 0 /mnt/pmem0/renen 89 | #./a.out 0 1 0 0 10 0 /mnt/pmem0/renen 90 | #./a.out 0 5 0 0 10 0 /mnt/pmem0/renen 91 | #./a.out 0 10 0 0 10 0 /mnt/pmem0/renen 92 | #./a.out 0 0 1 0 10 0 /mnt/pmem0/renen 93 | #./a.out 0 0 5 0 10 0 /mnt/pmem0/renen 94 | #./a.out 0 0 10 0 10 0 /mnt/pmem0/renen 95 | #./a.out 0 0 0 1 10 0 /mnt/pmem0/renen 96 | #./a.out 0 0 0 5 10 0 /mnt/pmem0/renen 97 | #./a.out 0 0 0 10 10 0 /mnt/pmem0/renen 98 | #./a.out 0 0 0 0 10 1 /mnt/pmem0/renen 99 | #./a.out 0 0 0 0 10 5 /mnt/pmem0/renen 100 | #./a.out 0 0 0 0 10 10 /mnt/pmem0/renen 101 | # 102 | ## Page nvm 103 | #./a.out 0 0 0 0 0 10 /mnt/pmem0/renen 104 | #./a.out 1 0 0 0 0 10 /mnt/pmem0/renen 105 | #./a.out 5 0 0 0 0 10 /mnt/pmem0/renen 106 | #./a.out 10 0 0 0 0 10 /mnt/pmem0/renen 107 | #./a.out 0 1 0 0 0 10 /mnt/pmem0/renen 108 | #./a.out 0 5 0 0 0 10 /mnt/pmem0/renen 109 | #./a.out 0 10 0 0 0 10 /mnt/pmem0/renen 110 | #./a.out 0 0 1 0 0 10 /mnt/pmem0/renen 111 | #./a.out 0 0 5 0 0 10 /mnt/pmem0/renen 112 | #./a.out 0 0 10 0 0 10 /mnt/pmem0/renen 113 | #./a.out 0 0 0 1 0 10 /mnt/pmem0/renen 114 | #./a.out 0 0 0 5 0 10 /mnt/pmem0/renen 115 | #./a.out 0 0 0 10 0 10 /mnt/pmem0/renen 116 | #./a.out 0 0 0 0 1 10 /mnt/pmem0/renen 117 | #./a.out 0 0 0 0 5 10 /mnt/pmem0/renen 118 | #./a.out 0 0 0 0 10 10 /mnt/pmem0/renen 119 | -------------------------------------------------------------------------------- /reproduce/latency_read.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 3 4 | # Read latency 5 | 6 | echo "" > results/latency_read.txt 7 | 8 | # Read ram 9 | clang++ -g0 -O3 -march=native -DNDEBUG=1 -std=c++14 latency/read_latency.cpp -pthread && ./a.out 1 10e9 1e9 ram ${PMEM_PATH}/file_0 | tee -a results/latency_read.txt 10 | 11 | # Read nvm 12 | clang++ -g0 -O3 -march=native -DNDEBUG=1 -std=c++14 latency/read_latency.cpp -pthread && ./a.out 1 10e9 1e9 nvm ${PMEM_PATH}/file_0 | tee -a results/latency_read.txt 13 | -------------------------------------------------------------------------------- /reproduce/latency_write.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 4 4 | # Write latency 5 | 6 | echo "" > results/latency_write.txt 7 | 8 | for type in single sequential random; do 9 | # Write FLUSH 10 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DFLUSH=1 \ 11 | && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt 12 | 13 | # Write FLUSH_OPT 14 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DFLUSH_OPT=1 \ 15 | && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt 16 | 17 | # Write CLWB 18 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DCLWB=1 \ 19 | && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt 20 | 21 | # Write STREAMING 22 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DSTREAMING=1 \ 23 | && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt 24 | 25 | done -------------------------------------------------------------------------------- /reproduce/logging.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 6 4 | # Logging 5 | 6 | echo "" > results/logging.txt 7 | 8 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++17 logging/logging.cpp -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemlog.a -lpthread -lndctl -ldaxctl \ 9 | && ./a.out 56 512 10e9 5 ${PMEM_PATH}/file 0 | tee -a results/logging.txt 10 | -------------------------------------------------------------------------------- /reproduce/page_flush.sh: -------------------------------------------------------------------------------- 1 | ./reproduce/validate.sh || exit -1 2 | 3 | # Reproduces data for figure 5 4 | # Page flush 5 | PAGE_COUNT=100000 # ~1.6GB per thread 6 | 7 | echo "" > results/page_flush_cls_1t.txt 8 | echo "" > results/page_flush_threads_16cls.txt 9 | echo "" > results/page_flush_cls_7t.txt 10 | 11 | clang++ page_flush/page_flush.cpp -std=c++17 -g0 -O3 -march=native -DNDEBUG=1 -DSTREAMING=1 -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemblk.a -lpthread -lndctl -ldaxctl || exit 12 | 13 | # Experiment 1: 1 thread, _x_ dirty cls, streaming 14 | THREAD_COUNT=1 15 | for DIRTY_CL_COUNT in `seq 4 4 256`; do 16 | ./a.out ${PAGE_COUNT} ${DIRTY_CL_COUNT} ${THREAD_COUNT} ${PMEM_PATH}/file | tee -a results/page_flush_cls_1t.txt 17 | done 18 | 19 | # Experiment 2: _x_ thread, 16 dirty cls, streaming 20 | DIRTY_CL_COUNT=16 21 | for THREAD_COUNT in `seq 1 30`; do 22 | ./a.out ${PAGE_COUNT} ${DIRTY_CL_COUNT} ${THREAD_COUNT} ${PMEM_PATH}/file | tee -a results/page_flush_threads_16cls.txt 23 | done 24 | 25 | # Experiment 3: 7 thread, _x_ dirty cls, streaming 26 | THREAD_COUNT=7 27 | for DIRTY_CL_COUNT in `seq 4 4 256`; do 28 | ./a.out ${PAGE_COUNT} ${DIRTY_CL_COUNT} ${THREAD_COUNT} ${PMEM_PATH}/file | tee -a results/page_flush_cls_7t.txt 29 | done -------------------------------------------------------------------------------- /reproduce/validate.sh: -------------------------------------------------------------------------------- 1 | 2 | # Check that pmem path is set 3 | if [ -z "$PMEM_PATH" ]; 4 | then 5 | echo "Please set PMEM_PATH to point to a directory on PMem." 6 | echo "Example: export PMEM_PATH=/mnt/pmem0/renen" 7 | exit -1 8 | fi 9 | 10 | if [ ! -d ${PMEM_PATH} ]; 11 | then 12 | echo "The configured PMEM_PATH '${PMEM_PATH}' is not a directory." 13 | echo "Example: export PMEM_PATH=/mnt/pmem0/renen" 14 | exit -1 15 | fi 16 | -------------------------------------------------------------------------------- /results/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexandervanrenen/pmembench/7ee9286caad36efc996768fb5d2f5d3158c7dde8/results/.keep --------------------------------------------------------------------------------