├── .gitignore
├── CMakeLists.txt
├── README.md
├── bandwidth
    └── bw.cpp
├── coroutine
    ├── OverridingList.hpp
    ├── coro_insert.cpp
    ├── coro_lookup.cpp
    └── reuse.cpp
├── inplace
    ├── Common.hpp
    ├── CowBased.hpp
    ├── InPlace-highBit.hpp
    ├── InPlace-sliding.hpp
    ├── LogBased.hpp
    ├── NonVolatileMemory.hpp
    ├── ValidationBased.hpp
    ├── bench.cpp
    └── tester.cpp
├── interference
    ├── Common.hpp
    ├── LogWriter.hpp
    ├── NonVolatileMemory.hpp
    ├── PageFlusher.hpp
    ├── Pages.hpp
    ├── RandomReader.hpp
    ├── SequentialReader.hpp
    └── interference.cpp
├── latency
    ├── read_latency.cpp
    ├── read_latency_alex.cpp
    └── write_latency.cpp
├── logging
    ├── Common.hpp
    ├── LW_Classic.hpp
    ├── LW_ClassicAligned.hpp
    ├── LW_ClassicCached.hpp
    ├── LW_Header.hpp
    ├── LW_HeaderAligned.hpp
    ├── LW_HeaderAlignedDancing.hpp
    ├── LW_HeaderDancing.hpp
    ├── LW_Mnemosyne.hpp
    ├── LW_MnemosyneAligned.hpp
    ├── LW_PMemLib.hpp
    ├── LW_Zero.hpp
    ├── LW_ZeroAligned.hpp
    ├── LW_ZeroBlocked.hpp
    ├── LW_ZeroCached.hpp
    ├── LW_ZeroSimd.hpp
    ├── NonVolatileMemory.hpp
    ├── Random.hpp
    └── logging.cpp
├── page_flush
    ├── Common.hpp
    ├── FullBufferFrame.hpp
    ├── NonVolatileMemory.hpp
    ├── Pages.hpp
    ├── Random.hpp
    ├── VolatileMemory.hpp
    └── page_flush.cpp
├── reproduce
    ├── all.sh
    ├── bw_cache_lines.sh
    ├── bw_threads.sh
    ├── coroutines.sh
    ├── inplace.sh
    ├── interference.sh
    ├── latency_read.sh
    ├── latency_write.sh
    ├── logging.sh
    ├── page_flush.sh
    └── validate.sh
└── results
    └── .keep


/.gitignore:
--------------------------------------------------------------------------------
1 | results
2 | a.out
3 | .DS_Store
4 | .idea
5 | cmake-build-debug/
6 | intel.sh
7 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.14)
2 | project(damoncode)
3 | 
4 | set(CMAKE_CXX_STANDARD 17)
5 | 
6 | file(GLOB_RECURSE SOURCE_FILES ./*.cpp ./*.hpp)
7 | 
8 | add_executable(damoncode ${SOURCE_FILES})
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 3 | [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](http://unlicense.org/)
 4 | 
 5 | pmem-bench
 6 | ==========
 7 | 
 8 | A set of algorithms and benchmarks for persistent memory (PMem).
 9 | The algorithms are described in two research papers:
10 | - There is a [short version](https://db.in.tum.de/people/sites/vanrenen/papers/nvm_stats.pdf) published at [DaMoN](https://sites.google.com/view/damon2019/home-damon-2019). 
11 | - And an [extended version](https://link.springer.com/content/pdf/10.1007/s00778-020-00622-9.pdf) that appeared in the [VLDBJ 2020](https://link.springer.com/journal/778/volumes-and-issues/29-6).
12 | 
13 | Structure
14 | ---------
15 | Each algorithm and experiment has a benchmark script to compile and run it in the [reproduce/](reproduce/) folder.
16 | If, for example, you are interested in measuring the read latency of your PMem device, have a look at [reproduce/latency_read.sh](reproduce/latency_read.sh).
17 | These file also contain instructions (already in executable bash syntax) on how to compile and use the code.
18 | 
19 | The source code of the algorithms and experiments are contained in the respective their folders on the root level of the project.
20 | For example, the read latency experiment can be found in [latency](latency/).
21 | 
22 | The benchmark scripts (in [reproduce/](reproduce/)) print their results to `stdout` and also create a log file in [results/](results/) using `tee`.
23 | The output is in an easy to parse format and can be used for creating plots (not included in the repository as the ones in the paper are pgfplots).
24 | 
25 | Issues & Contributions
26 | ----------------------
27 | Note that the source code is a prototype implementation for a research paper. 
28 | There might be bugs and other limitations.
29 | If you find an issue or run into troubles feel free to contact me via an issue in this repository.
30 | 
31 | Licence
32 | -------
33 | You are free to choose any of the above licences when using the source code.
34 | However, I encourage you in a non binding way to follow the [blessing from the SQLite folks](https://github.com/sqlite/sqlite/blob/master/LICENSE.md):
35 | 
36 | ```
37 | May you do good and not evil.
38 | May you find forgiveness for yourself and forgive others.
39 | May you share freely, never taking more than you give.
40 | ```
41 | 
42 | Authors
43 | -------
44 | Alexander van Renen
45 | 


--------------------------------------------------------------------------------
/bandwidth/bw.cpp:
--------------------------------------------------------------------------------
  1 | #include <sys/time.h>
  2 | #include <iostream>
  3 | #include <atomic>
  4 | #include <iostream>
  5 | #include <vector>
  6 | #include <cstdint>
  7 | #include <immintrin.h>
  8 | #include <chrono>
  9 | #include <sys/mman.h>
 10 | #include <sys/types.h>
 11 | #include <sys/stat.h>
 12 | #include <fcntl.h>
 13 | #include <iostream>
 14 | #include <memory>
 15 | #include <thread>
 16 | #include <unistd.h>
 17 | #include <xmmintrin.h>
 18 | #include <fcntl.h>
 19 | #include <sstream>
 20 | #include <cstring>
 21 | #include <cassert>
 22 | #include <sys/stat.h>
 23 | #include <algorithm>
 24 | 
 25 | using namespace std;
 26 | 
 27 | #define _mm_clflush(addr)\
 28 |     asm volatile("clflush %0" : "+m" (*(volatile char *)addr));
 29 | #define _mm_clflushopt(addr)\
 30 |     asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr));
 31 | #define _mm_clwb(addr)\
 32 |     asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr));
 33 | #define _mm_pcommit()\
 34 |     asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8");
 35 | 
 36 | #ifndef BLOCK_SIZE
 37 | #define  BLOCK_SIZE 64
 38 | #endif
 39 | 
 40 | // clang++ -g0 -O3 -march=native -std=c++14 bw.cpp -pthread && ./a.out 1e9 1 ram /mnt/pmem0/renen
 41 | int main(int argc, char **argv)
 42 | {
 43 |    const uint32_t READ_COUNT = 10;
 44 |    const uint32_t block_size = BLOCK_SIZE;
 45 | 
 46 |    if (argc != 5) {
 47 |       cout << "usage: " << argv[0] << " datasize thread_count (nvm|ram) path" << endl;
 48 |       throw;
 49 |    }
 50 |    bool use_clwb = false;
 51 | #ifdef USE_CLWB
 52 |    use_clwb=true;
 53 | #endif
 54 |    bool use_streaming = false;
 55 | #ifdef STREAMING
 56 |    use_streaming =true;
 57 | #endif
 58 |    bool use_write = false;
 59 | #ifdef WRITE
 60 |    use_write =true;
 61 | #endif
 62 | 
 63 |    const uint64_t total_size = atof(argv[1]);
 64 |    const uint64_t thread_count = atof(argv[2]);
 65 |    bool use_ram = argv[3][0] == 'r';
 66 |    const string PATH = argv[4];
 67 | 
 68 |    if (thread_count == 0) {
 69 |       cout << "invalid thread count" << endl;
 70 |       return 0;
 71 |    }
 72 | 
 73 |    const uint64_t chunk_size = total_size / thread_count;
 74 |    vector<unique_ptr<thread>> workers(thread_count);
 75 | 
 76 |    atomic<int> start_barrier(0);
 77 |    atomic<uint64_t> global_iterations(0);
 78 |    atomic<uint64_t> global_counter(0);
 79 |    atomic<bool> running_flag(true);
 80 | 
 81 |    for (int t = 0; t<thread_count; t++) { // Spawn threads
 82 |       workers[t] = make_unique<thread>([&, t]() {
 83 |          const uint64_t iteration_count = chunk_size / block_size;
 84 | #ifdef STREAMING
 85 |          uint8_t write_data[64] = {0xaa};
 86 |          __m512i write_data_vec = _mm512_stream_load_si512(write_data);
 87 | #endif
 88 | 
 89 |          // Init data ----------------------------------------------
 90 |          uint8_t *keys;
 91 |          if (use_ram) {
 92 |             keys = new uint8_t[chunk_size + block_size];
 93 |          } else {
 94 |             int fd = open((PATH + "/file_" + to_string(t)).c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
 95 |             int td = ftruncate(fd, chunk_size + block_size);
 96 |             if (fd<0 || td<0) {
 97 |                cout << "unable to create file" << endl;
 98 |                exit(-1);
 99 |             }
100 |             keys = (uint8_t *) mmap(nullptr, chunk_size + block_size, PROT_WRITE, MAP_SHARED, fd, 0);
101 |          }
102 |          // Align to 'block_size' byte
103 |          while (((uint64_t) keys) % block_size != 0) {
104 |             keys++;
105 |          }
106 | 
107 |          assert(((uint64_t) keys) % 64 == 0);
108 |          memset(keys, 'a', chunk_size);
109 |          uint64_t *random_offsets = new uint64_t[iteration_count];
110 |          for (uint64_t i = 0; i<iteration_count; i++) {
111 |             random_offsets[i] = i * block_size;
112 |          }
113 |          random_shuffle(random_offsets, random_offsets + iteration_count);
114 | 
115 |          // Warm up, just a little bit ----------------------------
116 |          start_barrier++;
117 |          while (start_barrier != thread_count + 1) {
118 |          }
119 | 
120 |          uint64_t local_counter = 1;
121 |          uint64_t local_iteration = 0;
122 |          while (true) {
123 |             for (uint64_t idx = 0; idx<iteration_count; idx++) {
124 |                if (!running_flag) {
125 |                   assert(local_counter);
126 |                   global_counter += local_counter;
127 |                   global_iterations += idx + local_iteration;
128 |                   return;
129 |                }
130 |                uint64_t *start = (uint64_t *) (keys + random_offsets[idx]);
131 |                for (uint64_t j = 0; j<BLOCK_SIZE / 8; j += 8) {
132 | #ifdef WRITE
133 | #ifdef STREAMING
134 |                   _mm512_stream_si512 ((__m512i*)(start + j), write_data_vec);
135 | #else // STREAMING
136 |                   start[j]++;
137 | #ifdef USE_CLWB
138 |                   uint64_t* ptr = start + j;
139 |                   _mm_clwb(ptr);
140 | #endif // USE_CLWB
141 | #endif // STREAMING
142 | #else // WRITE
143 |                   local_counter += start[j];
144 | #endif // WRITE
145 |                }
146 |             }
147 |             local_iteration += iteration_count;
148 |          }
149 |       });
150 |    }
151 | 
152 |    // Main thread printing and controlling
153 |    start_barrier++;
154 |    while (start_barrier != thread_count + 1) {
155 |    }
156 |    auto start = chrono::high_resolution_clock::now();
157 |    usleep(20000000);
158 |    auto end = chrono::high_resolution_clock::now();
159 |    running_flag = false;
160 |    for (auto &worker : workers) {
161 |       worker->join();
162 |    }
163 | 
164 |    double required_time = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
165 |    double gbs = (global_iterations * block_size) / required_time;
166 |    //@formatter:off
167 |    std::cout << "res"
168 |              << " use_clwb: " << use_clwb
169 |              << " use_ram: " << use_ram
170 |              << " use_streaming: " << use_streaming
171 |              << " use_write: " << use_write
172 |              << " thread_count: " << thread_count
173 |              << " total_size: " << total_size
174 |              << " block_size: " << BLOCK_SIZE
175 |              << " global_counter: " << global_counter
176 |              << " sum(GB/s): " << gbs << std::endl;
177 |    //@formatter:on
178 | 
179 |    return 0;
180 | }
181 | 


--------------------------------------------------------------------------------
/coroutine/OverridingList.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include <type_traits>
 4 | #include <cassert>
 5 | // -------------------------------------------------------------------------------------
 6 | template<typename Value>
 7 | class OverridingList {
 8 | public:
 9 |    OverridingList()
10 |            : head(nullptr) {}
11 | 
12 |    void Push(Value ptr)
13 |    {
14 |       Entry *entry = reinterpret_cast<Entry *>(ptr);
15 |       entry->next = head;
16 |       head = ptr;
17 |    }
18 | 
19 |    Value Pop()
20 |    {
21 |       assert(!Empty());
22 |       Value result = head;
23 |       head = reinterpret_cast<Entry *>(head)->next;
24 |       return result;
25 |    }
26 | 
27 |    Value Top()
28 |    {
29 |       assert(!Empty());
30 |       return head;
31 |    }
32 | 
33 |    bool Empty() const
34 |    {
35 |       return head == nullptr;
36 |    }
37 | 
38 | private:
39 |    static_assert(std::is_pointer<Value>::value, "InPlaceList can not work on values.");
40 | 
41 |    struct Entry {
42 |       Value next;
43 |    };
44 | 
45 |    Value head;
46 | };
47 | // -------------------------------------------------------------------------------------
48 | 


--------------------------------------------------------------------------------
/inplace/Common.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include <cstdint>
  4 | #include <iostream>
  5 | #include <array>
  6 | #include <immintrin.h>
  7 | #include <cassert>
  8 | #include <cstring>
  9 | #include <iostream>
 10 | #include "libpmem.h"
 11 | // -------------------------------------------------------------------------------------
 12 | #define a_mm_clflush(addr)\
 13 |     asm volatile("clflush %0" : "+m" (*(volatile char *)addr));
 14 | #define a_mm_clflushopt(addr)\
 15 |     asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr));
 16 | #define a_mm_clwb(addr)\
 17 |     asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr));
 18 | #define a_mm_pcommit()\
 19 |     asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8");
 20 | // -------------------------------------------------------------------------------------
 21 | using ub1 = uint8_t;
 22 | using ub2 = uint16_t;
 23 | using ub4 = uint32_t;
 24 | using ub8 = uint64_t;
 25 | // -------------------------------------------------------------------------------------
 26 | using sb1 = int8_t;
 27 | using sb2 = int16_t;
 28 | using sb4 = int32_t;
 29 | using sb8 = int64_t;
 30 | // -------------------------------------------------------------------------------------
 31 | namespace constants {
 32 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines
 33 | }
 34 | // -------------------------------------------------------------------------------------
 35 | inline void alex_WriteBack(void *addr, ub4 len)
 36 | {
 37 |    for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) {
 38 |       a_mm_clwb((char *) uptr);
 39 |    }
 40 | }
 41 | // -------------------------------------------------------------------------------------
 42 | inline void alex_WriteBack(void *addr)
 43 | {
 44 |    addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1));
 45 |    a_mm_clwb((char *) addr);
 46 | }
 47 | // -------------------------------------------------------------------------------------
 48 | inline void alex_FlushOpt(void *addr, ub4 len)
 49 | {
 50 |    for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) {
 51 |       a_mm_clflushopt((char *) uptr);
 52 |    }
 53 | }
 54 | // -------------------------------------------------------------------------------------
 55 | inline void alex_FlushOpt(void *addr)
 56 | {
 57 |    a_mm_clflushopt((char *) addr);
 58 | }
 59 | // -------------------------------------------------------------------------------------
 60 | inline void alex_SFence()
 61 | {
 62 |    _mm_sfence();
 63 | }
 64 | // -------------------------------------------------------------------------------------
 65 | inline void alex_MFence()
 66 | {
 67 |    _mm_mfence();
 68 | }
 69 | // -------------------------------------------------------------------------------------
 70 | inline ub8 alex_PopCount(ub8 value)
 71 | {
 72 |    return _mm_popcnt_u64(value);
 73 | }
 74 | // -------------------------------------------------------------------------------------
 75 | inline void alex_StreamClToNvm(void *dest, const void *src)
 76 | {
 77 |    assert(((ub8) dest) % 64 == 0);
 78 |    assert(((ub8) src) % 64 == 0);
 79 |    __m512i reg = _mm512_load_si512(src);
 80 |    _mm512_stream_si512((__m512i *) dest, reg);
 81 | }
 82 | // -------------------------------------------------------------------------------------
 83 | void FastCopy512(ub1 *dest, const ub1 *src)
 84 | {
 85 |    assert(((ub8) dest) % 64 == 0);
 86 |    memcpy(dest, src, 64);
 87 | }
 88 | // -------------------------------------------------------------------------------------
 89 | ub4 FastPopCount512(const ub1 *ptr)
 90 | {
 91 |    ub4 res = 0;
 92 |    for (ub4 i = 0; i<64; i += 8) {
 93 |       res += alex_PopCount(*(ub8 *) (&ptr[i]));
 94 |    }
 95 |    return res;
 96 | }
 97 | // -------------------------------------------------------------------------------------
 98 | void FastCopy512Simd(ub1 *dest, const ub1 *src)
 99 | {
100 |    assert(((ub8) dest) % 64 == 0);
101 |    __m512i reg = _mm512_loadu_si512(src);
102 |    _mm512_store_si512((__m512i *) dest, reg);
103 | }
104 | // -------------------------------------------------------------------------------------
105 | void alex_FastCopyAndWriteBack(ub1 *nvm_begin, const ub1 *ram_begin, ub4 size)
106 | {
107 |    assert(size>0);
108 | 
109 |    // Copy head bytes
110 |    ub4 pos = 0;
111 |    ub8 off = (ub8) nvm_begin % 64;
112 |    if (off != 0) {
113 |       ub8 byte_count = (64 - off)>size ? size : (64 - off);
114 |       memcpy(nvm_begin, ram_begin, byte_count);
115 |       alex_WriteBack(nvm_begin);
116 |       pos = byte_count;
117 |    }
118 | 
119 |    // Copy full cache lines (and flush)
120 |    for (; pos + 63<size; pos += 64) {
121 |       assert(((ub8) nvm_begin + pos) % 64 == 0);
122 |       memcpy(nvm_begin + pos, ram_begin + pos, 64);
123 |       alex_WriteBack(nvm_begin + pos);
124 |    }
125 | 
126 |    // Copy remaining bytes (and flush)
127 |    if (pos<size) {
128 |       memcpy(nvm_begin + pos, ram_begin + pos, size - pos);
129 |       alex_WriteBack(nvm_begin + pos);
130 |       pos = size;
131 |    }
132 | 
133 |    assert(pos == size && "aahhhhh");
134 | }
135 | // -------------------------------------------------------------------------------------
136 | uint8_t *AlignedAlloc(uint64_t alignment, uint64_t size)
137 | {
138 |    void *result = nullptr;
139 |    int error = posix_memalign(&result, alignment, size);
140 |    if (error) {
141 |       std::cout << "error while allocating" << std::endl;
142 |       throw;
143 |    }
144 |    return reinterpret_cast<uint8_t *>(result);
145 | }
146 | // -------------------------------------------------------------------------------------
147 | // Based on: https://en.wikipedia.org/wiki/Xorshift
148 | class Random {
149 | public:
150 |    explicit Random(uint64_t seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3
151 |            : seed(seed)
152 |    {
153 |    }
154 | 
155 |    uint64_t Rand()
156 |    {
157 |       seed ^= (seed << 13);
158 |       seed ^= (seed >> 7);
159 |       return (seed ^= (seed << 17));
160 |    }
161 | 
162 |    uint64_t seed;
163 | };
164 | // -------------------------------------------------------------------------------------
165 | void DumpHex(const void *data_in, uint32_t size, std::ostream &os)
166 | {
167 |    char buffer[16];
168 | 
169 |    const char *data = reinterpret_cast<const char *>(data_in);
170 |    for (uint32_t i = 0; i<size; i++) {
171 |       sprintf(buffer, "%02hhx", data[i]);
172 |       os << buffer[0] << buffer[1] << " ";
173 |    }
174 | }
175 | // -------------------------------------------------------------------------------------
176 | void DumpHexReverse(const void *data_in, uint32_t size, std::ostream &os)
177 | {
178 |    char buffer[16];
179 | 
180 |    const char *data = reinterpret_cast<const char *>(data_in);
181 |    for (int32_t i = size - 1; i>=0; i--) {
182 |       sprintf(buffer, "%02hhx", data[i]);
183 |       os << buffer[0] << buffer[1] << " ";
184 |    }
185 | }
186 | // -------------------------------------------------------------------------------------
187 | template<class TARGET, class SOURCE>
188 | inline TARGET Cast(SOURCE *ptr) { return reinterpret_cast<TARGET>(ptr); }
189 | // -------------------------------------------------------------------------------------
190 | char *CreateAlignedString(Random &ranny, uint32_t len)
191 | {
192 |    char *data = (char *) malloc(len + 1);
193 |    assert((uint64_t) data % 4 == 0);
194 | 
195 |    for (uint32_t i = 0; i<len; i++) {
196 |       data[i] = ranny.Rand() % 256;
197 |    }
198 |    data[len] = '\0';
199 | 
200 |    return data;
201 | }
202 | // -------------------------------------------------------------------------------------
203 | template<uint64_t complete_size>
204 | struct Operation {
205 |    alignas(64) uint64_t entry_id;
206 |    std::array<char, complete_size - 8> data;
207 | };
208 | template<uint64_t complete_size>
209 | inline bool operator==(const Operation<complete_size> &lhs, const Operation<complete_size> &rhs)
210 | {
211 |    return lhs.entry_id == rhs.entry_id && memcmp(lhs.data.data(), rhs.data.data(), complete_size - 8) == 0;
212 | }
213 | template<uint64_t complete_size>
214 | inline bool operator!=(const Operation<complete_size> &lhs, const Operation<complete_size> &rhs)
215 | {
216 |    return lhs.entry_id != rhs.entry_id || memcmp(lhs.data.data(), rhs.data.data(), complete_size - 8) != 0;
217 | }
218 | // -------------------------------------------------------------------------------------
219 | 


--------------------------------------------------------------------------------
/inplace/CowBased.hpp:
--------------------------------------------------------------------------------
  1 | #include "Common.hpp"
  2 | #include "NonVolatileMemory.hpp"
  3 | #include <memory>
  4 | // -------------------------------------------------------------------------------------
  5 | namespace cow {
  6 | // -------------------------------------------------------------------------------------
  7 | template<uint32_t BYTE_COUNT>
  8 | struct InplaceCow {
  9 |    alignas(64) bool is_a_active; // Start at a cl to allow the use of streaming ops
 10 |    alignas(64) std::array<char, BYTE_COUNT> a;
 11 |    alignas(64) std::array<char, BYTE_COUNT> b; // Start at a cl to allow the use of streaming ops
 12 | 
 13 |    void Write(const char *input)
 14 |    {
 15 |       bool is_a_active_cache = is_a_active;
 16 |       if (is_a_active_cache) {
 17 |          alex_FastCopyAndWriteBack((ub1 *) b.data(), (const ub1 *) input, BYTE_COUNT);
 18 |          alex_SFence();
 19 |       } else {
 20 |          alex_FastCopyAndWriteBack((ub1 *) a.data(), (const ub1 *) input, BYTE_COUNT);
 21 |          alex_SFence();
 22 |       }
 23 | 
 24 |       is_a_active = !is_a_active_cache;
 25 |       alex_WriteBack(&is_a_active);
 26 |       alex_SFence();
 27 |    }
 28 | 
 29 |    void Read(char *output)
 30 |    {
 31 |       if (is_a_active) {
 32 |          memcpy(output, a.data(), BYTE_COUNT);
 33 |       } else {
 34 |          memcpy(output, b.data(), BYTE_COUNT);
 35 |       }
 36 |    }
 37 | 
 38 |    void Init(Random &ranny)
 39 |    {
 40 |       is_a_active = true; // ranny.Rand() % 2; // Make it so that the branch is ~50%
 41 |    }
 42 | };
 43 | // -------------------------------------------------------------------------------------
 44 | template<>
 45 | struct InplaceCow<16> {
 46 |    alignas(64) uint8_t active_version_id; // Start at a cl to allow the use of streaming ops
 47 |    std::array<std::array<char, 16>, 2> versions;
 48 | 
 49 |    void Write(const char *input)
 50 |    {
 51 |       assert((uint64_t) this % 64 == 0);
 52 |       assert((uint64_t) input % 64 == 0);
 53 |       assert((void *) &active_version_id == (void *) this);
 54 |       assert(active_version_id == 0 || active_version_id == 1);
 55 | 
 56 |       // Load cl and update
 57 |       if (active_version_id == 0) {
 58 |          memcpy(versions[1].data(), input, 16);
 59 |       } else {
 60 |          memcpy(versions[0].data(), input, 16);
 61 |       }
 62 | 
 63 |       // Write new data
 64 |       __m512i reg = _mm512_loadu_si512(this);
 65 |       _mm512_stream_si512((__m512i *) this, reg);
 66 |       alex_SFence();
 67 | 
 68 |       // Update version id
 69 |       __m512i mask = _mm512_castsi128_si512(_mm_cvtsi32_si128(0x01));
 70 |       reg = _mm512_xor_si512(reg, mask);
 71 | 
 72 |       // Write new version id
 73 |       _mm512_stream_si512((__m512i *) this, reg);
 74 |       alex_SFence();
 75 |    }
 76 | 
 77 |    void Read(char *output)
 78 |    {
 79 |       memcpy(output, versions[active_version_id].data(), 16);
 80 |    }
 81 | 
 82 |    void Init(Random &ranny)
 83 |    {
 84 |       active_version_id = true; // ranny.Rand() % 2; // Make it so that the branch is ~50%
 85 |    }
 86 | };
 87 | // -------------------------------------------------------------------------------------
 88 | template<>
 89 | struct InplaceCow<32> {
 90 |    alignas(64) uint8_t active_version_id; // Start at a cl to allow the use of streaming ops
 91 |    std::array<char, 32> a;
 92 |    alignas(64) std::array<char, 32> b;
 93 | 
 94 |    void Write(const char *input)
 95 |    {
 96 |       assert((uint64_t) this % 64 == 0);
 97 |       assert((uint64_t) input % 64 == 0);
 98 |       assert((void *) &active_version_id == (void *) this);
 99 |       assert(active_version_id == 0 || active_version_id == 1);
100 | 
101 |       // Load cl and update
102 |       if (active_version_id == 0) {
103 |          // Version and data on different cls -> use clwb, cause no need for streaming
104 |          alex_FastCopyAndWriteBack((ub1 *) b.data(), (const ub1 *) input, 32);
105 |          alex_SFence();
106 | 
107 |          active_version_id = active_version_id ^ 0x1;
108 |          alex_WriteBack(&active_version_id);
109 |          alex_SFence();
110 |       } else {
111 |          // Version and data both on first cl -> use streaming, because this cache line is re-written
112 |          memcpy(a.data(), input, 32);
113 | 
114 |          // Write new data
115 |          __m512i reg = _mm512_loadu_si512(this);
116 |          _mm512_stream_si512((__m512i *) this, reg);
117 |          alex_SFence();
118 | 
119 |          // Update version id
120 |          __m512i mask = _mm512_castsi128_si512(_mm_cvtsi32_si128(0x01));
121 |          reg = _mm512_xor_si512(reg, mask);
122 | 
123 |          // Write new version id
124 |          _mm512_stream_si512((__m512i *) this, reg);
125 |          alex_SFence();
126 |       }
127 |    }
128 | 
129 |    void Read(char *output)
130 |    {
131 |       if (active_version_id == 0) {
132 |          memcpy(output, a.data(), 32);
133 |       } else {
134 |          memcpy(output, b.data(), 32);
135 |       }
136 |    }
137 | 
138 |    void Init(Random &ranny)
139 |    {
140 |       active_version_id = true; // ranny.Rand() % 2; // Make it so that the branch is ~50%
141 |    }
142 | };
143 | // -------------------------------------------------------------------------------------
144 | template<>
145 | struct InplaceCow<48> {
146 |    alignas(64) uint8_t active_version_id; // Start at a cl to allow the use of streaming ops
147 |    std::array<char, 48> a;
148 |    alignas(64) std::array<char, 48> b;
149 | 
150 |    void Write(const char *input)
151 |    {
152 |       assert((uint64_t) this % 64 == 0);
153 |       assert((uint64_t) input % 64 == 0);
154 |       assert((void *) &active_version_id == (void *) this);
155 |       assert(active_version_id == 0 || active_version_id == 1);
156 | 
157 |       // Load cl and update
158 |       if (active_version_id == 0) {
159 |          // Version and data on different cls -> use clwb, cause no need for streaming
160 |          alex_FastCopyAndWriteBack((ub1 *) b.data(), (const ub1 *) input, 48);
161 |          alex_SFence();
162 | 
163 |          active_version_id = active_version_id ^ 0x1;
164 |          alex_WriteBack(&active_version_id);
165 |          alex_SFence();
166 |       } else {
167 |          // Version and data both on first cl -> use streaming, because this cache line is re-written
168 |          memcpy(a.data(), input, 48);
169 | 
170 |          // Write new data
171 |          __m512i reg = _mm512_loadu_si512(this);
172 |          _mm512_stream_si512((__m512i *) this, reg);
173 |          alex_SFence();
174 | 
175 |          // Update version id
176 |          __m512i mask = _mm512_castsi128_si512(_mm_cvtsi32_si128(0x01));
177 |          reg = _mm512_xor_si512(reg, mask);
178 | 
179 |          // Write new version id
180 |          _mm512_stream_si512((__m512i *) this, reg);
181 |          alex_SFence();
182 |       }
183 |    }
184 | 
185 |    void Read(char *output)
186 |    {
187 |       if (active_version_id == 0) {
188 |          memcpy(output, a.data(), 48);
189 |       } else {
190 |          memcpy(output, b.data(), 48);
191 |       }
192 |    }
193 | 
194 |    void Init(Random &ranny)
195 |    {
196 |       active_version_id = true; // ranny.Rand() % 2; // Make it so that the branch is ~50%
197 |    }
198 | };
199 | // -------------------------------------------------------------------------------------
200 | // For larger sizes it matters less and less ..
201 | // -------------------------------------------------------------------------------------
202 | template<uint32_t entry_size>
203 | struct CowBasedUpdates {
204 |    NonVolatileMemory nvm_data;
205 |    uint64_t entry_count;
206 |    InplaceCow<entry_size> *entries;
207 | 
208 |    CowBasedUpdates(const std::string &path, uint64_t entry_count)
209 |            : nvm_data(path + "/cowbased_data_file", entry_count * sizeof(InplaceCow<entry_size>))
210 |              , entry_count(entry_count)
211 |    {
212 |       assert(nvm_data.GetByteCount()>=entry_size * entry_count);
213 | 
214 |       memset(nvm_data.Data(), 'a', nvm_data.GetByteCount());
215 |       entries = (InplaceCow<entry_size> *) nvm_data.Data();
216 |       Random ranny;
217 |       for (uint32_t i = 0; i<entry_count; i++) {
218 |          entries[i].Init(ranny);
219 |       }
220 |       pmem_persist(nvm_data.Data(), nvm_data.GetByteCount());
221 |    }
222 | 
223 |    void DoUpdate(const Operation<entry_size> &op, uint32_t id)
224 |    {
225 |       assert(id<entry_count);
226 |       entries[id].Write((const char *) &op);
227 |    }
228 | 
229 |    uint64_t ReadSingleResult(Operation<entry_size> &result, uint32_t id)
230 |    {
231 |       entries[id].Read((char *) &result);
232 |       return result.entry_id;
233 |    }
234 | };
235 | // -------------------------------------------------------------------------------------
236 | }
237 | // -------------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/inplace/InPlace-highBit.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include "Common.hpp"
  4 | #include "NonVolatileMemory.hpp"
  5 | #include <iostream>
  6 | #include <vector>
  7 | #include <string>
  8 | #include <array>
  9 | // -------------------------------------------------------------------------------------
 10 | namespace high {
 11 | // -------------------------------------------------------------------------------------
 12 | __m256i constAdd = _mm256_set1_epi64x(0x4000000000000000L);
 13 | __m256i constAnd1 = _mm256_set1_epi64x(0xC000000000000000L);
 14 | __m256i constAnd2 = _mm256_set1_epi64x(0x3FFFFFFFFFFFFFFFL);
 15 | __m128i constAnd3 = _mm_set1_epi32(0x7FFFFFFF);
 16 | __m128i constGatherIndex = _mm_set_epi32(6, 4, 2, 0);
 17 | __m128i constAnd = _mm_set1_epi32(0x7FFFFFFF);
 18 | // -------------------------------------------------------------------------------------
 19 | template<uint32_t BYTE_COUNT>
 20 | struct InplaceField {
 21 |    static constexpr uint32_t BIT_COUNT = BYTE_COUNT * 8;
 22 |    static constexpr uint32_t BLOCK_COUNT = (BIT_COUNT + 30) / 31;
 23 |    static constexpr uint32_t META_BLOCK_COUNT = (BYTE_COUNT + 111) / 112; // For every 112 input bytes we need one meta block (8byte)
 24 | 
 25 |    alignas(64) uint64_t _blocks[BLOCK_COUNT];
 26 | 
 27 |    void Reset()
 28 |    {
 29 |       memset(_blocks, 0, sizeof(uint64_t) * BLOCK_COUNT);
 30 |    }
 31 | 
 32 |    template<uint32_t META>
 33 |    void WriteRec(const uint32_t *values, uint64_t *blocks)
 34 |    {
 35 |       uint32_t currentHighBits = 0;
 36 | 
 37 |       constexpr uint32_t REMAINING_BYTE = BYTE_COUNT - (META * 112);
 38 |       constexpr uint32_t ITERATION_COUNT = (META == META_BLOCK_COUNT - 1) ? REMAINING_BYTE / 16 : 7;
 39 | 
 40 |       for (uint32_t i = 0; i<ITERATION_COUNT; i++) {
 41 |          // 4 * 32-Bit input values into SSE register
 42 |          __m128i input32 = _mm_loadu_si128((const __m128i *) &values[i * 4]);
 43 | 
 44 |          // The four 31-bit values
 45 |          __m256i input64 = _mm256_cvtepu32_epi64(_mm_and_si128(input32, constAnd3));      // 4 * 64-Bit input value
 46 |          __m256i block1_4 = _mm256_loadu_si256((const __m256i *) &blocks[1 + i * 4]);
 47 |          __m256i newVersionSimd = _mm256_and_si256(_mm256_add_epi64(block1_4, constAdd), constAnd1);
 48 |          __m256i newPayloadSimd = _mm256_or_si256(_mm256_and_si256(_mm256_slli_epi64(block1_4, 31), constAnd2), input64);
 49 |          _mm256_storeu_si256((__m256i *) &blocks[1 + i * 4], _mm256_or_si256(newVersionSimd, newPayloadSimd));
 50 |          currentHighBits = (currentHighBits << 4) | _mm_movemask_ps(_mm_castsi128_ps(input32));
 51 |       }
 52 | 
 53 |       // The four high bits
 54 |       uint64_t block0 = blocks[0];
 55 |       uint64_t newVersion = (block0 + 0x4000000000000000L) & 0xC000000000000000L;      // 2 bits for version
 56 |       uint64_t newPayload = ((block0 << 31) & 0x3FFFFFFFFFFFFFFFL) | currentHighBits;   // Payload with new and old value
 57 | 
 58 |       blocks[0] = newVersion | newPayload;
 59 | 
 60 |       WriteRec<META + 1>(values + 28, blocks + 29);
 61 |    }
 62 | 
 63 |    template<>
 64 |    void WriteRec<META_BLOCK_COUNT>(const uint32_t *, uint64_t *) {}
 65 | 
 66 |    void WriteNoCheck(const char *data)
 67 |    {
 68 |       const uint32_t *values = (const uint32_t *) data;
 69 |       WriteRec<0>(values, _blocks);
 70 |    }
 71 | 
 72 |    template<uint32_t META>
 73 |    void ReadRec(const char *result, uint64_t *blocks)
 74 |    {
 75 |       constexpr uint32_t REMAINING_BYTE = BYTE_COUNT - (META * 112);
 76 |       constexpr uint32_t ITERATION_COUNT = (META == META_BLOCK_COUNT - 1) ? REMAINING_BYTE / 16 : 7;
 77 | 
 78 |       uint32_t high_bits = blocks[0];
 79 | 
 80 |       for (int32_t i = ITERATION_COUNT - 1; i>=0; i--) {
 81 |          // Gather 4 * 32-Bit Values out of 64-Bit Array (gather lower 32 bits of each 64 bit value)
 82 |          __m128i values = _mm_i32gather_epi32((const int *) &blocks[1 + i * 4], constGatherIndex, 4);
 83 | 
 84 |          // Remove highest bit, so we have 4 * 31 bi values
 85 |          values = _mm_and_si128(values, constAnd);
 86 | 
 87 |          // Store the four high bits (lowest 4 bits in block[0]) to bit position 0, 8, 16, 24 (== align to byt boundary)
 88 |          __m128i highBitValue = _mm_cvtsi64_si128(_pdep_u64(high_bits, 0x01010101));
 89 |          high_bits = high_bits >> 4;
 90 | 
 91 |          // Convert from 8 bit values to 32 bit values and shift left to the highest position
 92 |          highBitValue = _mm_cvtepu8_epi32(highBitValue);
 93 |          highBitValue = _mm_slli_epi32(highBitValue, 31);
 94 | 
 95 |          // OR together
 96 |          values = _mm_or_si128(values, highBitValue);
 97 | 
 98 |          // Store result
 99 |          _mm_storeu_si128((__m128i *) (result + i * 16), values);
100 |       }
101 | 
102 |       ReadRec<META + 1>(result + 112, blocks + 29);
103 |    }
104 | 
105 |    template<>
106 |    void ReadRec<META_BLOCK_COUNT>(const char *result, uint64_t *blocks) {}
107 | 
108 |    void ReadNoCheck(char *result)
109 |    {
110 |       ReadRec<0>(result, _blocks);
111 |    }
112 | };
113 | // -------------------------------------------------------------------------------------
114 | template<uint32_t entry_size>
115 | struct InPlaceLikeUpdates {
116 | 
117 |    static bool CanBeUsed(uint32_t entry_size_param) { return entry_size_param % 16 == 0 && entry_size_param<128; }
118 | 
119 |    NonVolatileMemory nvm_data;
120 |    uint64_t entry_count;
121 |    InplaceField<entry_size> *entries;
122 | 
123 |    InPlaceLikeUpdates(const std::string &path, uint64_t entry_count)
124 |            : nvm_data(path + "/inplace_file", sizeof(InplaceField<entry_size>) * entry_count)
125 |              , entry_count(entry_count)
126 |    {
127 |       std::vector<char> data(entry_size, 'a');
128 |       entries = (InplaceField<entry_size> *) nvm_data.Data();
129 |       for (uint64_t i = 0; i<entry_count; i++) {
130 |          entries[i].Reset();
131 |          entries[i].WriteNoCheck(data.data());
132 |       }
133 |    }
134 | 
135 |    void DoUpdate(const Operation<entry_size> &op, uint32_t id)
136 |    {
137 |       entries[id].WriteNoCheck((const char *) &op);
138 |       for (uint32_t i = 0; i<sizeof(InplaceField<entry_size>); i += 64) {
139 |          char *addr = (char *) (entries + id) + i;
140 |          assert((uint64_t) addr % 64 == 0);
141 |          alex_WriteBack(addr);
142 |       }
143 |       alex_SFence();
144 |    }
145 | 
146 |    uint64_t ReadSingleResult(Operation<entry_size> &result, uint32_t id)
147 |    {
148 |       entries[id].ReadNoCheck((char *) &result);
149 |       return result.entry_id;
150 |    }
151 | };
152 | // -------------------------------------------------------------------------------------
153 | }
154 | // -------------------------------------------------------------------------------------
155 | 


--------------------------------------------------------------------------------
/inplace/LogBased.hpp:
--------------------------------------------------------------------------------
  1 | #include "Common.hpp"
  2 | #include "NonVolatileMemory.hpp"
  3 | #include <memory>
  4 | // -------------------------------------------------------------------------------------
  5 | template<uint32_t entry_size>
  6 | struct LogWriterZeroCached {
  7 |    static_assert(entry_size % 8 == 0);
  8 | 
  9 |    struct File {
 10 |       // Header
 11 |       uint8_t padding[constants::kCacheLineByteCount];
 12 | 
 13 |       // Log data
 14 |       uint8_t data[];
 15 |    };
 16 |    static_assert(sizeof(File) == 64, "");
 17 | 
 18 |    NonVolatileMemory &nvm;
 19 |    File &file; // == nvm
 20 |    uint64_t next_free;
 21 |    uint64_t cl_pos;
 22 |    uint64_t active_cl_mem[16];
 23 |    uint64_t *active_cl;
 24 |    uint64_t log_read_offset;
 25 | 
 26 |    LogWriterZeroCached(NonVolatileMemory &nvm)
 27 |            : nvm(nvm)
 28 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 29 |    {
 30 |       next_free = 0;
 31 |       cl_pos = 0;
 32 |       log_read_offset = 0;
 33 | 
 34 |       active_cl = active_cl_mem;
 35 |       while ((uint64_t) active_cl % 64 != 0) {
 36 |          active_cl++;
 37 |       }
 38 |       assert((uint64_t) active_cl % 64 == 0);
 39 |       memset((uint8_t *) active_cl, 0, 64);
 40 |    }
 41 | 
 42 |    uint64_t AddLogEntry(const Operation<entry_size> &entry)
 43 |    {
 44 |       uint32_t blks = entry_size / 8;
 45 | 
 46 |       assert(next_free % 8 == 0);
 47 |       assert(next_free + entry_size<nvm.GetByteCount());
 48 | 
 49 |       uint32_t pop_cnt = 0;
 50 |       const uint64_t *ram_begin = reinterpret_cast<const uint64_t *>(&entry);
 51 |       uint8_t *nvm_begin = reinterpret_cast<uint8_t *>(file.data + (next_free & ~63ull));
 52 | 
 53 |       // Head
 54 |       uint32_t pos = 0;
 55 |       for (; pos<blks && cl_pos != 0; pos++) {
 56 |          active_cl[cl_pos] = ram_begin[pos];
 57 |          pop_cnt += alex_PopCount(ram_begin[pos]);
 58 |          cl_pos++;
 59 | 
 60 |          if (cl_pos % 8 == 0) {
 61 |             alex_StreamClToNvm(nvm_begin, (uint8_t *) active_cl);
 62 |             memset((uint8_t *) active_cl, 0, 64);
 63 |             cl_pos = 0;
 64 |             nvm_begin += 64;
 65 |          }
 66 |       }
 67 | 
 68 |       // Copy all 8-byte-blocks of log-entry via cl-buffer to nvm
 69 |       for (; pos + 7<blks; pos += 8) {
 70 |          active_cl[0] = ram_begin[pos + 0];
 71 |          pop_cnt += alex_PopCount(ram_begin[pos + 0]);
 72 | 
 73 |          active_cl[1] = ram_begin[pos + 1];
 74 |          pop_cnt += alex_PopCount(ram_begin[pos + 1]);
 75 | 
 76 |          active_cl[2] = ram_begin[pos + 2];
 77 |          pop_cnt += alex_PopCount(ram_begin[pos + 2]);
 78 | 
 79 |          active_cl[3] = ram_begin[pos + 3];
 80 |          pop_cnt += alex_PopCount(ram_begin[pos + 3]);
 81 | 
 82 |          active_cl[4] = ram_begin[pos + 4];
 83 |          pop_cnt += alex_PopCount(ram_begin[pos + 4]);
 84 | 
 85 |          active_cl[5] = ram_begin[pos + 5];
 86 |          pop_cnt += alex_PopCount(ram_begin[pos + 5]);
 87 | 
 88 |          active_cl[6] = ram_begin[pos + 6];
 89 |          pop_cnt += alex_PopCount(ram_begin[pos + 6]);
 90 | 
 91 |          active_cl[7] = ram_begin[pos + 7];
 92 |          pop_cnt += alex_PopCount(ram_begin[pos + 7]);
 93 | 
 94 |          alex_StreamClToNvm(nvm_begin, (uint8_t *) active_cl);
 95 |          memset((uint8_t *) active_cl, 0, 64);
 96 |          cl_pos = 0;
 97 |          nvm_begin += 64;
 98 |       }
 99 | 
100 |       // Tail
101 |       for (; pos<blks; pos++) {
102 |          active_cl[cl_pos] = ram_begin[pos];
103 |          pop_cnt += alex_PopCount(ram_begin[pos]);
104 |          cl_pos++;
105 | 
106 |          if (cl_pos % 8 == 0) {
107 |             memset((uint8_t *) active_cl, 0, 64);
108 |             cl_pos = 0;
109 |             nvm_begin += 64;
110 |          }
111 |       }
112 | 
113 |       active_cl[cl_pos] = pop_cnt;
114 |       cl_pos++;
115 |       alex_StreamClToNvm(nvm_begin, (uint8_t *) active_cl);
116 |       alex_SFence();
117 | 
118 |       if (cl_pos % 8 == 0) {
119 |          memset((uint8_t *) active_cl, 0, 64);
120 |          cl_pos = 0;
121 |          nvm_begin += 64;
122 |       }
123 | 
124 |       // Advance and done
125 |       next_free += entry_size + 8;
126 |       return next_free;
127 |    }
128 | 
129 |    uint64_t GetWrittenByteCount() const
130 |    {
131 |       return next_free + sizeof(File);
132 |    }
133 | };
134 | // -------------------------------------------------------------------------------------
135 | template<uint32_t entry_size>
136 | struct LogBasedUpdates {
137 |    const static uint64_t LOG_BUFFER_SIZE = 50e9;
138 |    NonVolatileMemory nvm_log;
139 |    NonVolatileMemory nvm_data;
140 |    LogWriterZeroCached<entry_size> log_writer;
141 |    uint64_t entry_count;
142 |    Operation<entry_size> *data_on_nvm;
143 | 
144 |    LogBasedUpdates(const std::string &path, uint64_t entry_count)
145 |            : nvm_log(path + "/logbased_log_file", LOG_BUFFER_SIZE)
146 |              , nvm_data(path + "/logbased_data_file", entry_count * sizeof(Operation<entry_size>))
147 |              , log_writer(nvm_log)
148 |              , entry_count(entry_count)
149 |    {
150 |       assert(nvm_log.GetByteCount()>=LOG_BUFFER_SIZE);
151 |       assert(nvm_data.GetByteCount()>=entry_size * entry_count);
152 | 
153 |       memset(nvm_data.Data(), 'a', nvm_data.GetByteCount());
154 |       pmem_persist(nvm_data.Data(), nvm_data.GetByteCount());
155 | 
156 |       data_on_nvm = (Operation<entry_size> *) nvm_data.Data();
157 |    }
158 | 
159 |    ~LogBasedUpdates()
160 |    {
161 |       if (log_writer.GetWrittenByteCount()>=nvm_log.GetByteCount()) {
162 |          std::cout << "write more log than we had space.. not good" << std::endl;
163 |          exit(-1);
164 |       }
165 |    }
166 | 
167 |    void DoUpdate(const Operation<entry_size> &op, uint32_t id)
168 |    {
169 |       assert(id<entry_count);
170 | 
171 |       log_writer.AddLogEntry(op);
172 | 
173 |       ub1 *entry_begin = nvm_data.Data() + (id * sizeof(Operation<entry_size>));
174 |       alex_FastCopyAndWriteBack(entry_begin, (ub1 *) &op, entry_size);
175 |       alex_SFence();
176 |    }
177 | 
178 |    uint64_t ReadSingleResult(Operation<entry_size> &result, uint32_t id)
179 |    {
180 |       result = data_on_nvm[id];
181 |       return result.entry_id;
182 |    }
183 | };
184 | // -------------------------------------------------------------------------------------
185 | 


--------------------------------------------------------------------------------
/inplace/NonVolatileMemory.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include "Common.hpp"
 4 | #include <cstdlib>
 5 | #include <string>
 6 | #include <inttypes.h>
 7 | #include <sys/stat.h>
 8 | #include <unistd.h>
 9 | #include <sys/types.h>
10 | #include <sys/mman.h>
11 | #include <fcntl.h>
12 | // -------------------------------------------------------------------------------------
13 | class NonVolatileMemory {
14 | public:
15 |    NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed
16 |    NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path
17 |    NonVolatileMemory(const NonVolatileMemory &) = delete;
18 |    NonVolatileMemory &operator=(const NonVolatileMemory &) = delete;
19 | 
20 |    ~NonVolatileMemory();
21 | 
22 |    ub1 *Data() { return data_ptr; }
23 |    ub1 *End() { return data_ptr + byte_count; }
24 |    ub8 GetByteCount() { return byte_count; }
25 | 
26 |    bool IsNvm() const { return is_nvm; }
27 | 
28 | private:
29 |    ub1 *data_ptr;
30 |    std::string file_name;
31 |    const ub8 byte_count;
32 |    bool is_nvm;
33 |    bool is_mapped_file;
34 |    int file_fd;
35 | };
36 | // -------------------------------------------------------------------------------------
37 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count)
38 |         : byte_count(byte_count)
39 |           , is_mapped_file(false)
40 | {
41 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
42 | 
43 |    assert(byte_count>0); // XXX
44 | 
45 |    data_ptr = AlignedAlloc(512, byte_count);
46 | 
47 |    is_nvm = false;
48 | }
49 | // -------------------------------------------------------------------------------------
50 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count)
51 |         : file_name(file_name)
52 |           , byte_count(byte_count)
53 |           , is_nvm(true)
54 |           , is_mapped_file(true)
55 | {
56 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
57 | 
58 |    file_fd = open(file_name.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
59 |    int td = ftruncate(file_fd, byte_count);
60 |    if (file_fd<0 || td<0) {
61 |       std::cout << "unable to create file" << std::endl;
62 |       exit(-1);
63 |    }
64 |    data_ptr = (ub1 *) mmap(nullptr, byte_count, PROT_WRITE, MAP_SHARED, file_fd, 0);
65 | }
66 | // -------------------------------------------------------------------------------------
67 | NonVolatileMemory::~NonVolatileMemory()
68 | {
69 |    // Benchmark code .. dont care ;p
70 | }
71 | // -------------------------------------------------------------------------------------
72 | 


--------------------------------------------------------------------------------
/inplace/ValidationBased.hpp:
--------------------------------------------------------------------------------
 1 | #include "Common.hpp"
 2 | #include <memory>
 3 | #include <vector>
 4 | // -------------------------------------------------------------------------------------
 5 | template<uint32_t entry_size>
 6 | struct ValidationBased {
 7 |    uint64_t entry_count;
 8 |    std::vector <Operation<entry_size>> data;
 9 | 
10 |    ValidationBased(const std::string &, uint64_t entry_count)
11 |            : entry_count(entry_count)
12 |            , data(entry_count)
13 |    {
14 |       memset(data.data(), 'a', sizeof(Operation<entry_size>) * entry_count);
15 |    }
16 | 
17 |    void DoUpdate(const Operation<entry_size> &op, uint32_t id)
18 |    {
19 |       assert(id<entry_count);
20 |       data[id] = op;
21 |    }
22 | 
23 |    uint64_t ReadSingleResult(Operation<entry_size> &result, uint32_t id)
24 |    {
25 |       result = data[id];
26 |       return result.entry_id;
27 |    }
28 | };
29 | // -------------------------------------------------------------------------------------
30 | 


--------------------------------------------------------------------------------
/inplace/bench.cpp:
--------------------------------------------------------------------------------
  1 | #include "InPlace-highBit.hpp"
  2 | #include "InPlace-sliding.hpp"
  3 | #include "LogBased.hpp"
  4 | #include "CowBased.hpp"
  5 | #include "ValidationBased.hpp"
  6 | #include <sys/mman.h>
  7 | #include <sys/types.h>
  8 | #include <sys/stat.h>
  9 | #include <chrono>
 10 | #include <algorithm>
 11 | #include <fstream>
 12 | // -------------------------------------------------------------------------------------
 13 | using namespace std;
 14 | // -------------------------------------------------------------------------------------
 15 | #ifndef ENTRY_SIZE
 16 | #error Please define ENTRY_SIZE
 17 | #define ENTRY_SIZE 16
 18 | #endif
 19 | // -------------------------------------------------------------------------------------
 20 | uint64_t ENTRY_COUNT;
 21 | uint64_t DATA_SIZE;
 22 | string NVM_PATH;
 23 | vector<Operation<ENTRY_SIZE>> log_result;
 24 | constexpr bool VALIDATE = false;
 25 | bool SEQUENTIAL;
 26 | const uint64_t INDIVIDUAL_OPERATION_COUNT = 10000;
 27 | const uint64_t ITERATION_COUNT = 10;
 28 | // -------------------------------------------------------------------------------------
 29 | vector<Operation<ENTRY_SIZE>> PrepareSequentialOperations()
 30 | {
 31 |    std::vector<Operation<ENTRY_SIZE>> results;
 32 |    results.resize(ENTRY_COUNT);
 33 |    for (uint64_t i = 0; i<ENTRY_COUNT; i++) {
 34 |       results[i].entry_id = (i + 1) % ENTRY_COUNT; // Need +1 to be able to read from i and then go to i+1
 35 |       memset(results[i].data.data(), (char) i, results[i].data.size());
 36 |    }
 37 |    return results;
 38 | }
 39 | // -------------------------------------------------------------------------------------
 40 | vector<Operation<ENTRY_SIZE>> PrepareRandomOperations()
 41 | {
 42 |    // Init
 43 |    uint64_t *helper = new uint64_t[ENTRY_COUNT];
 44 |    for (uint64_t i = 0; i<ENTRY_COUNT; i++) {
 45 |       helper[i] = i;
 46 |    }
 47 | 
 48 |    // Shuffle
 49 |    Random ranny;
 50 |    for (uint64_t i = 0; i<ENTRY_COUNT; i++) {
 51 |       swap(helper[i], helper[ranny.Rand() % ENTRY_COUNT]);
 52 |    }
 53 | 
 54 |    // Assign
 55 |    std::vector<Operation<ENTRY_SIZE>> results;
 56 |    results.resize(ENTRY_COUNT);
 57 |    for (uint64_t i = 0; i<ENTRY_COUNT; i++) {
 58 |       results[helper[i]].entry_id = helper[(i + 1) % ENTRY_COUNT];
 59 |       memset(results[i].data.data(), (char) i, results[i].data.size());
 60 |    }
 61 |    delete[] helper;
 62 | 
 63 |    return results;
 64 | }
 65 | // -------------------------------------------------------------------------------------
 66 | tuple<double, double, double> GetConfidenceIntervall(vector<uint64_t> &individual_times, string hint)
 67 | {
 68 |    sort(individual_times.begin(), individual_times.end());
 69 | 
 70 |    double lower_time = individual_times[INDIVIDUAL_OPERATION_COUNT * 0.025];
 71 |    double med_time = individual_times[INDIVIDUAL_OPERATION_COUNT * 0.50];
 72 |    double upper_time = individual_times[INDIVIDUAL_OPERATION_COUNT * 0.975];
 73 |    double lower_per_second = 1e9 / upper_time; // swapped!!
 74 |    double med_per_second = 1e9 / med_time;
 75 |    double upper_per_second = 1e9 / lower_time;
 76 | 
 77 |    return make_tuple(med_per_second, med_per_second - lower_per_second, upper_per_second - med_per_second);
 78 | }
 79 | // -------------------------------------------------------------------------------------
 80 | template<class COMPETITOR>
 81 | void RunExperiment(const vector<Operation<ENTRY_SIZE>> &operations, const std::string &competitor_name)
 82 | {
 83 |    Operation<ENTRY_SIZE> buffer = {};
 84 |    vector<uint32_t> ids_only;
 85 |    ids_only.reserve(operations.size());
 86 |    for (auto &iter : operations) {
 87 |       ids_only.push_back(iter.entry_id);
 88 |    }
 89 | 
 90 |    COMPETITOR competitor(NVM_PATH, ENTRY_COUNT);
 91 | 
 92 |    uint64_t check_sum_to_prevent_optimizations = 0;
 93 | 
 94 |    // Updates throughput -> execute many updates and take the avg time
 95 |    vector<double> updates_per_second;
 96 |    for (uint32_t iteration = 0; iteration<ITERATION_COUNT; iteration++) {
 97 |       auto begin_ts = chrono::high_resolution_clock::now();
 98 |       for (uint64_t u = 0; u<operations.size(); u++) {
 99 |          competitor.DoUpdate(operations[u], operations[u].entry_id);
100 |       }
101 |       auto end_ts = chrono::high_resolution_clock::now();
102 |       uint64_t ns = chrono::duration_cast<chrono::nanoseconds>(end_ts - begin_ts).count();
103 |       updates_per_second.push_back((operations.size() * 1e9) / ns);
104 |    }
105 | 
106 |    //   // Individual updates -> get the time for individual updates
107 |    //   double lower_updates_per_second_factor = 0;
108 |    //   double upper_updates_per_second_factor = 0;
109 |    //   double med_updates_per_second = 0;
110 |    //   {
111 |    //      vector<uint64_t> times(INDIVIDUAL_OPERATION_COUNT);
112 |    //      for (uint64_t i = 0; i<INDIVIDUAL_OPERATION_COUNT; i++) {
113 |    //         auto begin_ts = chrono::high_resolution_clock::now();
114 |    //         competitor.DoUpdate(operations[i], operations[i].entry_id);
115 |    //         auto end_ts = chrono::high_resolution_clock::now();
116 |    //         times[i] = chrono::duration_cast<chrono::nanoseconds>(end_ts - begin_ts).count();
117 |    //         alex_MFence();
118 |    //      }
119 |    //      tie(med_updates_per_second, lower_updates_per_second_factor, upper_updates_per_second_factor) = GetConfidenceIntervall(times, "update");
120 |    //   }
121 | 
122 |    // Read throughput -> execute many reads and take the avg time
123 |    vector<double> reads_per_second;
124 |    for (uint32_t iteration = 0; iteration<ITERATION_COUNT; iteration++) {
125 |       auto begin_ts = chrono::high_resolution_clock::now();
126 |       for (uint64_t u = 0; u<operations.size(); u++) {
127 |          check_sum_to_prevent_optimizations += competitor.ReadSingleResult(buffer, ids_only[u]);
128 |       }
129 |       auto end_ts = chrono::high_resolution_clock::now();
130 |       uint64_t ns = chrono::duration_cast<chrono::nanoseconds>(end_ts - begin_ts).count();
131 |       reads_per_second.push_back((operations.size() * 1e9) / ns);
132 |    }
133 | 
134 |    //   // Individual reads -> get the time for individual reads
135 |    //   double lower_reads_per_second_factor = 0;
136 |    //   double upper_reads_per_second_factor = 0;
137 |    //   double med_reads_per_second = 0;
138 |    //   {
139 |    //      vector<uint64_t> times(INDIVIDUAL_OPERATION_COUNT);
140 |    //      for (uint64_t i = 0; i<INDIVIDUAL_OPERATION_COUNT; i++) {
141 |    //         auto begin_ts = chrono::high_resolution_clock::now();
142 |    //         check_sum_to_prevent_optimizations += competitor.ReadSingleResult(buffer, ids_only[i]);
143 |    //         auto end_ts = chrono::high_resolution_clock::now();
144 |    //         times[i] = chrono::duration_cast<chrono::nanoseconds>(end_ts - begin_ts).count();
145 |    //         alex_MFence();
146 |    //      }
147 |    //      tie(med_reads_per_second, lower_reads_per_second_factor, upper_reads_per_second_factor) = GetConfidenceIntervall(times, "reads");
148 |    //   }
149 | 
150 |    // Dependent read throughput -> execute many dependent reads and take the avg time
151 |    vector<double> dep_reads_per_second;
152 |    for (uint32_t iteration = 0; iteration<ITERATION_COUNT; iteration++) {
153 |       for (uint64_t u = 0; u<operations.size(); u++) {
154 |          competitor.DoUpdate(operations[u], u);
155 |       }
156 | 
157 |       auto begin_ts = chrono::high_resolution_clock::now();
158 |       uint64_t next_id = 0;
159 |       for (uint64_t u = 0; u<operations.size(); u++) {
160 |          next_id = competitor.ReadSingleResult(buffer, next_id);
161 |       }
162 |       check_sum_to_prevent_optimizations += next_id;
163 |       auto end_ts = chrono::high_resolution_clock::now();
164 |       uint64_t ns = chrono::duration_cast<chrono::nanoseconds>(end_ts - begin_ts).count();
165 |       dep_reads_per_second.push_back((operations.size() * 1e9) / ns);
166 |    }
167 | 
168 |    //   // Individual dependent reads -> get the time for individual dependent reads
169 |    //   double lower_dep_reads_per_second_factor = 0;
170 |    //   double upper_dep_reads_per_second_factor = 0;
171 |    //   double med_dep_reads_per_second = 0;
172 |    //   {
173 |    //      for (uint64_t u = 0; u<operations.size(); u++) {
174 |    //         competitor.DoUpdate(operations[u], u);
175 |    //      }
176 |    //
177 |    //      vector<uint64_t> times(INDIVIDUAL_OPERATION_COUNT);
178 |    //      uint64_t next_id = 0;
179 |    //      for (uint64_t i = 0; i<INDIVIDUAL_OPERATION_COUNT; i++) {
180 |    //         auto begin_ts = chrono::high_resolution_clock::now();
181 |    //         next_id = competitor.ReadSingleResult(buffer, next_id);
182 |    //         auto end_ts = chrono::high_resolution_clock::now();
183 |    //         times[i] = chrono::duration_cast<chrono::nanoseconds>(end_ts - begin_ts).count();
184 |    //         alex_MFence();
185 |    //      }
186 |    //      check_sum_to_prevent_optimizations += next_id;
187 |    //      tie(med_dep_reads_per_second, lower_dep_reads_per_second_factor, upper_dep_reads_per_second_factor) = GetConfidenceIntervall(times, "dep");
188 |    //   }
189 | 
190 |    //   //@formatter:off
191 | //   cout << "res:"
192 | //        << " technique: " << competitor_name
193 | //        << " checksum: " << check_sum_to_prevent_optimizations
194 | //        << " order: " << (SEQUENTIAL ? "seq" : "rand")
195 | //        << " entry_size: " << ENTRY_SIZE
196 | //        << " updates(M): " << med_updates_per_second / 1000 / 1000.0
197 | //        << " " << (lower_updates_per_second_factor)  / 1000 / 1000.0
198 | //        << " " << (upper_updates_per_second_factor) / 1000 / 1000.0
199 | //        << " reads(M): " << med_reads_per_second / 1000 / 1000.0
200 | //        << " " << (lower_reads_per_second_factor) / 1000 / 1000.0
201 | //        << " " << (upper_reads_per_second_factor) / 1000 / 1000.0
202 | //        << " dep_reads(M): " << med_dep_reads_per_second / 1000 / 1000.0
203 | //        << " " << (lower_dep_reads_per_second_factor)  / 1000 / 1000.0
204 | //        << " " << (upper_dep_reads_per_second_factor) / 1000 / 1000.0
205 | //        << endl;
206 | //   //@formatter:on
207 | 
208 |    //   //@formatter:off
209 | //   cout << "res:"
210 | //        << " technique: " << competitor_name
211 | //        << " checksum: " << check_sum_to_prevent_optimizations
212 | //        << " order: " << (SEQUENTIAL ? "seq" : "rand")
213 | //        << " entry_size: " << ENTRY_SIZE
214 | //        << " updates(M): " << updates_per_second / 1000 / 1000.0
215 | //        << " " << (updates_per_second - updates_per_second * lower_updates_per_second_factor)  / 1000 / 1000.0
216 | //        << " " << (updates_per_second * upper_updates_per_second_factor - updates_per_second) / 1000 / 1000.0
217 | //        << " reads(M): " << reads_per_second / 1000 / 1000.0
218 | //        << " " << (reads_per_second - reads_per_second * lower_reads_per_second_factor) / 1000 / 1000.0
219 | //        << " " << (reads_per_second * upper_reads_per_second_factor - reads_per_second) / 1000 / 1000.0
220 | //        << " dep_reads(M): " << dep_reads_per_second / 1000 / 1000.0
221 | //        << " " << (dep_reads_per_second - dep_reads_per_second * lower_dep_reads_per_second_factor)  / 1000 / 1000.0
222 | //        << " " << (dep_reads_per_second * upper_dep_reads_per_second_factor - dep_reads_per_second) / 1000 / 1000.0
223 | //        << endl;
224 | //   //@formatter:on
225 | 
226 |    sort(updates_per_second.begin(), updates_per_second.end());
227 |    sort(reads_per_second.begin(), reads_per_second.end());
228 |    sort(dep_reads_per_second.begin(), dep_reads_per_second.end());
229 | 
230 |    //@formatter:off
231 |    cout << "res:"
232 |         << " technique: " << competitor_name
233 |         << " checksum: " << check_sum_to_prevent_optimizations
234 |         << " order: " << (SEQUENTIAL ? "seq" : "rand")
235 |         << " entry_size: " << ENTRY_SIZE
236 |         << " updates(M): " << (updates_per_second[4] + updates_per_second[5]) / 2e6
237 |         << " " << updates_per_second[0]  / 1e6
238 |         << " " << updates_per_second[9] / 1e6
239 |         << " reads(M): " << (reads_per_second[4] + reads_per_second[5]) / 2e6
240 |         << " " << reads_per_second[0] / 1e6
241 |         << " " << reads_per_second[9] / 1e6
242 |         << " dep_reads(M): " << (dep_reads_per_second[4] + dep_reads_per_second[5]) / 2e6
243 |         << " " << dep_reads_per_second[0]  / 1e6
244 |         << " " << dep_reads_per_second[9] / 1e6
245 |         << endl;
246 |    //@formatter:on
247 | 
248 |    if constexpr (VALIDATE) {
249 |       Operation<ENTRY_SIZE> validation_buffer;
250 |       ValidationBased<ENTRY_SIZE> validation(NVM_PATH, ENTRY_COUNT);
251 |       for (uint64_t u = 0; u<ENTRY_COUNT; u++) {
252 |          validation.DoUpdate(operations[u], u);
253 |       }
254 | 
255 |       for (ub4 i = 0; i<operations.size(); i++) {
256 |          validation.ReadSingleResult(validation_buffer, operations[i].entry_id);
257 |          competitor.ReadSingleResult(buffer, operations[i].entry_id);
258 |          if (validation_buffer != buffer) {
259 |             cerr << "validation failed! at i=" << i << endl;
260 |             throw;
261 |          }
262 |       }
263 |       cout << "validation ok ! *hurray* :)" << endl;
264 |    }
265 | }
266 | // -------------------------------------------------------------------------------------
267 | int main(int argc, char **argv)
268 | {
269 |    if (argc != 4) {
270 |       cout << "usage: " << argv[0] << " data_size [seq|rand] nvm_path" << endl;
271 |       throw;
272 |    }
273 | 
274 |    if (ENTRY_SIZE % 4 != 0) {
275 |       cout << "ENTRY_SIZE needs to be a multiple of 4 for now" << endl;
276 |       throw;
277 |    }
278 | 
279 |    // Config
280 |    DATA_SIZE = atof(argv[1]);
281 |    SEQUENTIAL = argv[2][0] == 's';
282 |    NVM_PATH = argv[3]; // Path to the nvm folder
283 |    ENTRY_COUNT = DATA_SIZE / ENTRY_SIZE;
284 |    if (ENTRY_COUNT<INDIVIDUAL_OPERATION_COUNT) {
285 |       cout << "Need higher ENTRY_COUNT than INDIVIDUAL_OPERATION_COUNT" << endl;
286 |       throw;
287 |    }
288 | 
289 |    cout << "Config" << endl;
290 |    cout << "------" << endl;
291 |    cout << "entry_size         " << ENTRY_SIZE << endl;
292 |    cout << "data_size          " << DATA_SIZE << endl;
293 |    cout << "entry_count(M)     " << ENTRY_COUNT / 1000 / 1000.0 << endl;
294 |    cout << "needed_data(GB)    " << ENTRY_COUNT * ENTRY_SIZE / 1000 / 1000 / 1000.0 << endl;
295 |    cout << "actual_data(GB)    " << ENTRY_COUNT * sizeof(Operation<ENTRY_SIZE>) / 1000 / 1000 / 1000.0 << endl;
296 |    cout << "order              " << (SEQUENTIAL ? "sequential" : "random") << endl;
297 |    cout << "nvm_path           " << NVM_PATH << endl;
298 |    cout << "------" << endl;
299 | 
300 |    cpu_set_t cpuset;
301 |    CPU_ZERO(&cpuset);
302 |    CPU_SET(0, &cpuset);
303 |    pthread_t currentThread = pthread_self();
304 |    if (pthread_setaffinity_np(currentThread, sizeof(cpu_set_t), &cpuset) != 0) {
305 |       throw;
306 |    }
307 | 
308 |    if (ENTRY_COUNT == 0) {
309 |       cout << "need at least one entry" << endl;
310 |       throw;
311 |    }
312 | 
313 |    // Sequential Experiments
314 |    if (SEQUENTIAL) {
315 |       vector<Operation<ENTRY_SIZE>> operations = PrepareSequentialOperations();
316 |       RunExperiment<LogBasedUpdates<ENTRY_SIZE>>(operations, "log");
317 |       RunExperiment<cow::CowBasedUpdates<ENTRY_SIZE>>(operations, "cow");
318 |       //      RunExperiment<high::InPlaceLikeUpdates<ENTRY_SIZE>>(operations, "high-bit");
319 |       RunExperiment<sliding::InPlaceLikeUpdates<ENTRY_SIZE>>(operations, "sliding-bit");
320 |    }
321 | 
322 |    // Random
323 |    if (!SEQUENTIAL) {
324 |       vector<Operation<ENTRY_SIZE>> operations = PrepareRandomOperations();
325 |       RunExperiment<LogBasedUpdates<ENTRY_SIZE>>(operations, "log");
326 |       RunExperiment<cow::CowBasedUpdates<ENTRY_SIZE>>(operations, "cow");
327 |       //      RunExperiment<high::InPlaceLikeUpdates<ENTRY_SIZE>>(operations, "high-bit");
328 |       RunExperiment<sliding::InPlaceLikeUpdates<ENTRY_SIZE>>(operations, "sliding-bit");
329 |    }
330 | 
331 |    cout << "done 3" << endl;
332 |    return 0;
333 | }
334 | // -------------------------------------------------------------------------------------
335 | 


--------------------------------------------------------------------------------
/inplace/tester.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <vector>
  3 | #include <string>
  4 | #include <array>
  5 | // -------------------------------------------------------------------------------------
  6 | // Based on: https://en.wikipedia.org/wiki/Xorshift
  7 | class Random {
  8 | public:
  9 |    explicit Random(uint64_t seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3
 10 |            : seed(seed)
 11 |    {
 12 |    }
 13 | 
 14 |    uint64_t Rand()
 15 |    {
 16 |       seed ^= (seed << 13);
 17 |       seed ^= (seed >> 7);
 18 |       return (seed ^= (seed << 17));
 19 |    }
 20 | 
 21 |    uint64_t seed;
 22 | };
 23 | // -------------------------------------------------------------------------------------
 24 | char *CreateAlignedString(Random &ranny, uint32_t len)
 25 | {
 26 |    char *data = (char *) malloc(len + 1);
 27 |    assert((uint64_t) data % 4 == 0);
 28 | 
 29 |    for (uint32_t i = 0; i<len; i++) {
 30 |       data[i] = ranny.Rand() % 256;
 31 |    }
 32 |    data[len] = '\0';
 33 | 
 34 |    return data;
 35 | }
 36 | // -------------------------------------------------------------------------------------
 37 | void DumpHex(const void *data_in, uint32_t size, std::ostream &os)
 38 | {
 39 |    char buffer[16];
 40 | 
 41 |    const char *data = reinterpret_cast<const char *>(data_in);
 42 |    for (uint32_t i = 0; i<size; i++) {
 43 |       sprintf(buffer, "%02hhx", data[i]);
 44 |       os << buffer[0] << buffer[1] << " ";
 45 |    }
 46 | }
 47 | // -------------------------------------------------------------------------------------
 48 | template<uint32_t pos>
 49 | struct Block {
 50 |    static_assert(pos<=31);
 51 | 
 52 |    uint64_t data;
 53 | 
 54 |    Block()
 55 |            : data(0) {}
 56 | 
 57 |    uint32_t GetVersionNoCheck() const { return (((data & (1ull << (pos + 32))) << 1) | (data & (1ull << pos))) >> pos; }
 58 |    uint32_t GetOldStateNoCheck() const { return (data >> 32) & ~(1ull << pos); }
 59 |    uint32_t GetNewStateNoCheck() const { return (data & 0xffffffff) & ~(1ull << pos); }
 60 | 
 61 |    void WriteNoCheck(uint32_t new_state)
 62 |    {
 63 |       data = (data << 32) | new_state;
 64 |    }
 65 | 
 66 |    friend std::ostream &operator<<(std::ostream &os, const Block &b)
 67 |    {
 68 |       uint32_t version = b.GetVersionNoCheck();
 69 |       uint32_t old_state = b.GetOldStateNoCheck();
 70 |       uint32_t new_state = b.GetNewStateNoCheck();
 71 |       os << "version: " << version << " old: ";
 72 |       DumpHex(&old_state, 4, os);
 73 |       os << " new: ";
 74 |       DumpHex(&new_state, 4, os);
 75 |       return os;
 76 |    }
 77 | };
 78 | static_assert(sizeof(Block<0>) == 8);
 79 | // -------------------------------------------------------------------------------------
 80 | struct InplaceField16 {
 81 | 
 82 |    alignas(64)
 83 |    Block<0> b0;
 84 |    Block<1> b1;
 85 |    Block<2> b2;
 86 |    Block<3> b3;
 87 |    Block<4> b4;
 88 | 
 89 |    void Print(std::ostream &out)
 90 |    {
 91 |       out << b0 << std::endl;
 92 |       out << b1 << std::endl;
 93 |       out << b2 << std::endl;
 94 |       out << b3 << std::endl;
 95 |       out << b4 << std::endl;
 96 |    }
 97 | 
 98 |    void Reset()
 99 |    {
100 |       b0.data = 0;
101 |       b1.data = 0;
102 |       b2.data = 0;
103 |       b3.data = 0;
104 |       b4.data = 0;
105 |    }
106 | 
107 |    void WriteNoCheck(const char *data)
108 |    {
109 |       static std::array<uint32_t, 4> VersionBit = {1, 1, 0, 0};
110 | 
111 |       assert((uint64_t) data % 4 == 0);
112 |       assert((uint64_t) &b0 % 64 == 0);
113 | 
114 |       uint32_t *input = (uint32_t *) data;
115 | 
116 |       uint32_t next_version_bit = VersionBit[b0.GetVersionNoCheck()];
117 |       if (next_version_bit) {
118 |          //@formatter:off
119 |          b1.WriteNoCheck( input[0] | 0x02);
120 |          b2.WriteNoCheck( input[1] | 0x04);
121 |          b3.WriteNoCheck( input[2] | 0x08);
122 |          b4.WriteNoCheck( input[3] | 0x10);
123 |          b0.WriteNoCheck( (input[0] & 0x02)
124 |                         | (input[1] & 0x04)
125 |                         | (input[2] & 0x08)
126 |                         | (input[3] & 0x10)
127 |                         | 0x1);
128 |          //@formatter:on
129 |       } else {
130 |          //@formatter:off
131 |          b1.WriteNoCheck( input[0] & ~0x02);
132 |          b2.WriteNoCheck( input[1] & ~0x04);
133 |          b3.WriteNoCheck( input[2] & ~0x08);
134 |          b4.WriteNoCheck( input[3] & ~0x10);
135 |          b0.WriteNoCheck( (input[0] & 0x02)
136 |                         | (input[1] & 0x04)
137 |                         | (input[2] & 0x08)
138 |                         | (input[3] & 0x10)
139 |                         | 0x0);
140 |          //@formatter:on
141 |       }
142 |    }
143 | 
144 |    char *ReadNoCheck()
145 |    {
146 |       char *result = (char *) malloc(16);
147 |       assert((uint64_t) result % 4 == 0);
148 |       uint32_t *output = (uint32_t *) result;
149 | 
150 |       output[0] = (b1.GetNewStateNoCheck() & ~0x02) | (b0.GetNewStateNoCheck() & 0x02);
151 |       output[1] = (b2.GetNewStateNoCheck() & ~0x04) | (b0.GetNewStateNoCheck() & 0x04);
152 |       output[2] = (b3.GetNewStateNoCheck() & ~0x08) | (b0.GetNewStateNoCheck() & 0x08);
153 |       output[3] = (b4.GetNewStateNoCheck() & ~0x10) | (b0.GetNewStateNoCheck() & 0x10);
154 | 
155 |       return result;
156 |    }
157 | };
158 | // -------------------------------------------------------------------------------------
159 | template<uint32_t BYTE_COUNT>
160 | void TestInPlaceUpdates()
161 | {
162 |    Random ranny;
163 |    InplaceField16 field;
164 | 
165 |    for (uint32_t i = 0; i<10000; i++) {
166 |       char *input = CreateAlignedString(ranny, BYTE_COUNT);
167 |       field.WriteNoCheck(input);
168 |       char *output = field.ReadNoCheck();
169 | 
170 |       for (uint32_t i = 0; i<BYTE_COUNT; i++) {
171 |          if (input[i] != output[i]) {
172 |             std::cout << i << ": ";
173 |             DumpHex(input + i, 1, std::cout);
174 |             std::cout << " vs ";
175 |             DumpHex(output + i, 1, std::cout);
176 |             std::cout << std::endl;
177 |             throw;
178 |          }
179 |       }
180 | 
181 |       free(output);
182 |       free(input);
183 |    }
184 |    std::cout << "all good for " << BYTE_COUNT << std::endl;
185 | }
186 | // -------------------------------------------------------------------------------------
187 | void TestSomeInPlaceUpdateConfigurations()
188 | {
189 |    TestInPlaceUpdates<16>();
190 |    //   TestInPlaceUpdates<20>();
191 |    //   TestInPlaceUpdates<64>();
192 |    //   TestInPlaceUpdates<1000>();
193 |    //   TestInPlaceUpdates<10000>();
194 | }
195 | // -------------------------------------------------------------------------------------
196 | int main()
197 | {
198 |    TestInPlaceUpdates<16>();
199 |    return 0;
200 | }
201 | // -------------------------------------------------------------------------------------
202 | 


--------------------------------------------------------------------------------
/interference/Common.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include <immintrin.h>
  4 | #include <cassert>
  5 | #include <cstring>
  6 | #include <iostream>
  7 | #include <vector>
  8 | #include <mutex>
  9 | #include <atomic>
 10 | #include "libpmem.h"
 11 | // -------------------------------------------------------------------------------------
 12 | using ub1 = uint8_t;
 13 | using ub2 = uint16_t;
 14 | using ub4 = uint32_t;
 15 | using ub8 = uint64_t;
 16 | // -------------------------------------------------------------------------------------
 17 | using sb1 = int8_t;
 18 | using sb2 = int16_t;
 19 | using sb4 = int32_t;
 20 | using sb8 = int64_t;
 21 | // -------------------------------------------------------------------------------------
 22 | namespace constants {
 23 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines
 24 | const static ub4 kPageByteCount = 1 << 14; // 16 KB
 25 | const static ub4 kCacheLinesPerPage = kPageByteCount / kCacheLineByteCount; // 16KB/64Byte
 26 | const static ub4 kPageAlignment = 512; // For O_Direct
 27 | const static ub8 kInvalidPageId = ~0;
 28 | }
 29 | // -------------------------------------------------------------------------------------
 30 | static std::mutex global_io_mutex;
 31 | // -------------------------------------------------------------------------------------
 32 | #define a_mm_clflush(addr)\
 33 |     asm volatile("clflush %0" : "+m" (*(volatile char *)addr));
 34 | #define a_mm_clflushopt(addr)\
 35 |     asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr));
 36 | #define a_mm_clwb(addr)\
 37 |     asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr));
 38 | #define a_mm_pcommit()\
 39 |     asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8");
 40 | // -------------------------------------------------------------------------------------
 41 | // Based on: https://en.wikipedia.org/wiki/Xorshift
 42 | class Random {
 43 | public:
 44 |    explicit Random(ub8 seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3
 45 |            : seed(seed)
 46 |    {
 47 |    }
 48 | 
 49 |    uint64_t Rand()
 50 |    {
 51 |       seed ^= (seed << 13);
 52 |       seed ^= (seed >> 7);
 53 |       return (seed ^= (seed << 17));
 54 |    }
 55 | 
 56 |    ub8 seed;
 57 | };
 58 | // -------------------------------------------------------------------------------------
 59 | template<uint32_t byteCount>
 60 | bool IsAlignedAt(const void *ptr)
 61 | {
 62 |    return ((uint64_t) ptr) % byteCount == 0;
 63 | }
 64 | // -------------------------------------------------------------------------------------
 65 | uint8_t *AlignedAlloc(uint64_t alignment, uint64_t size)
 66 | {
 67 |    void *result = nullptr;
 68 |    int error = posix_memalign(&result, alignment, size);
 69 |    if (error) {
 70 |       std::cout << "error while allocating" << std::endl;
 71 |       throw;
 72 |    }
 73 |    return reinterpret_cast<uint8_t *>(result);
 74 | }
 75 | // -------------------------------------------------------------------------------------
 76 | inline void alex_FlushOpt(void *addr)
 77 | {
 78 |    a_mm_clflushopt((char *) addr);
 79 | }
 80 | // -------------------------------------------------------------------------------------
 81 | inline void alex_WriteBack(void *addr, ub4 len)
 82 | {
 83 |    for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) {
 84 |       a_mm_clwb((char *) uptr);
 85 |    }
 86 | }
 87 | // -------------------------------------------------------------------------------------
 88 | inline void alex_WriteBack(void *addr)
 89 | {
 90 |    addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1));
 91 |    a_mm_clwb((char *) addr);
 92 | }
 93 | // -------------------------------------------------------------------------------------
 94 | inline void alex_SFence()
 95 | {
 96 |    _mm_sfence();
 97 | }
 98 | // -------------------------------------------------------------------------------------
 99 | struct Worker {
100 |    Worker(ub4 tid, std::string config)
101 |            : tid(tid)
102 |              , config(config) {}
103 | 
104 |    ub4 tid;
105 |    std::string config;
106 |    std::atomic<bool> run = false;
107 |    std::atomic<bool> ready = false;
108 |    std::atomic<bool> stop = false;
109 |    std::atomic<ub4> performed_iteration_count = 0;
110 | 
111 |    virtual void PrintResultOfLastIteration(ub4 iteration) = 0;
112 | };
113 | // -------------------------------------------------------------------------------------
114 | 


--------------------------------------------------------------------------------
/interference/LogWriter.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include "Common.hpp"
  4 | #include "NonVolatileMemory.hpp"
  5 | #include <atomic>
  6 | #include <thread>
  7 | #include <immintrin.h>
  8 | #include <array>
  9 | #include <bitset>
 10 | #include <cstring>
 11 | #include <unordered_map>
 12 | #include <iostream>
 13 | // -------------------------------------------------------------------------------------
 14 | namespace log_utils {
 15 | // -------------------------------------------------------------------------------------
 16 | inline ub8 alex_PopCount(ub8 value)
 17 | {
 18 |    return _mm_popcnt_u64(value);
 19 | }
 20 | // -------------------------------------------------------------------------------------
 21 | inline void alex_FlushClToNvm(void *dest, void *src)
 22 | {
 23 |    assert(((ub8) dest) % 64 == 0);
 24 |    assert(((ub8) src) % 64 == 0);
 25 |    __m512i reg = _mm512_load_si512(src);
 26 |    _mm512_stream_si512((__m512i *) dest, reg);
 27 | }
 28 | // -------------------------------------------------------------------------------------
 29 | ub4 FastPopCount512(const ub1 *ptr)
 30 | {
 31 |    ub4 res = 0;
 32 |    for (ub4 i = 0; i<64; i += 8) {
 33 |       res += alex_PopCount(*(ub8 *) (&ptr[i]));
 34 |    }
 35 |    return res;
 36 | }
 37 | // -------------------------------------------------------------------------------------
 38 | void FastCopy512(ub1 *dest, const ub1 *src)
 39 | {
 40 |    assert(((ub8) dest) % 64 == 0);
 41 |    memcpy(dest, src, 64);
 42 | }
 43 | // -------------------------------------------------------------------------------------
 44 | }
 45 | // -------------------------------------------------------------------------------------
 46 | struct LogWriterZeroBlocked {
 47 | 
 48 |    struct Entry {
 49 |       ub4 payload_size; // header
 50 |       ub4 bit_count; // header
 51 |       ub1 data[];
 52 |    };
 53 | 
 54 |    struct File {
 55 |       // Header
 56 |       ub1 padding[constants::kCacheLineByteCount];
 57 | 
 58 |       // Log data
 59 |       ub1 data[];
 60 |    };
 61 |    static_assert(sizeof(File) == 64, "");
 62 | 
 63 |    NonVolatileMemory &nvm;
 64 |    File &file; // == nvm
 65 |    ub8 next_free;
 66 | 
 67 |    LogWriterZeroBlocked(NonVolatileMemory &nvm)
 68 |            : nvm(nvm)
 69 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 70 |    {
 71 |       Reset();
 72 |    }
 73 | 
 74 |    void Reset()
 75 |    {
 76 |       next_free = 0;
 77 |    }
 78 | 
 79 |    ub8 AddLogEntry(const Entry &entry)
 80 |    {
 81 |       ub4 size = entry.payload_size + 8;
 82 | 
 83 |       assert(next_free % 8 == 0);
 84 |       assert(entry.bit_count == 0);
 85 |       assert(entry.payload_size % 8 == 0);
 86 |       assert(next_free + size<nvm.GetByteCount());
 87 | 
 88 |       assert(size>=64);
 89 | 
 90 |       const ub1 *ram_begin = reinterpret_cast<const ub1 *>(&entry);
 91 |       ub1 *nvm_begin = reinterpret_cast<ub1 *>(nvm.Data() + next_free);
 92 | 
 93 |       // Copy first cache line (and do not flush)
 94 |       ub4 pop_cnt = 0;
 95 |       log_utils::FastCopy512(nvm_begin, ram_begin);
 96 |       pop_cnt += log_utils::FastPopCount512(ram_begin);
 97 | 
 98 |       // Copy remaining full cache lines (and flush)
 99 |       ub4 pos = 64;
100 |       for (; pos + 63<size; pos += 64) {
101 |          log_utils::FastCopy512(nvm_begin + pos, ram_begin + pos);
102 |          pop_cnt += log_utils::FastPopCount512(ram_begin + pos);
103 |          alex_WriteBack(nvm_begin + pos);
104 |       }
105 | 
106 |       // Copy remaining bytes (and flush)
107 |       if (pos<size) {
108 |          for (; pos<size; pos += 8) {
109 |             *(ub8 *) (nvm_begin + pos) = *(ub8 *) (ram_begin + pos);
110 |             pop_cnt += log_utils::alex_PopCount(*(ub8 *) (ram_begin + pos));
111 |          }
112 |          alex_WriteBack(nvm_begin + pos - 8);
113 |       }
114 | 
115 |       // Write pop count and flush first cache line
116 |       reinterpret_cast<Entry *>(nvm_begin)->bit_count = pop_cnt;
117 |       alex_WriteBack(nvm_begin);
118 |       alex_SFence();
119 | 
120 |       // Advance and done
121 |       next_free += entry.payload_size + 8;
122 |       next_free = (next_free + 63) & ~63ull;
123 |       assert(next_free % 64 == 0);
124 |       return next_free;
125 |    }
126 | 
127 |    ub8 GetWrittenByteCount() const
128 |    {
129 |       return next_free + sizeof(File);
130 |    }
131 | 
132 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
133 |    {
134 |       ub8 current = 0;
135 |       ub8 used_size = 0;
136 | 
137 |       vector<Entry *>entries;
138 |       while (used_size<log_payload_size) {
139 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
140 |          assert(current + payload_size + 8<memory.size());
141 |          Entry *entry = (Entry *) &memory[current];
142 |          entry->payload_size = payload_size;
143 |          entry->bit_count = 0;
144 |          entries.push_back(entry);
145 |          current += payload_size + 8;
146 |          used_size += payload_size;
147 |       }
148 |       return entries;
149 |    }
150 | };
151 | // -------------------------------------------------------------------------------------
152 | class LogWriter : public Worker {
153 |    unique_ptr <LogWriterZeroBlocked> wal;
154 |    unique_ptr <NonVolatileMemory> nvm;
155 |    vector <ub1> memory;
156 |    vector<LogWriterZeroBlocked::Entry *> entries;
157 | 
158 |    ub8 byte_count;
159 |    string nvm_file;
160 | 
161 |    const ub4 ENTRY_SIZE = 104;
162 | 
163 |    vector<double> nano_seconds;
164 | 
165 | public:
166 |    LogWriter(const string &nvm_file, ub8 byte_count, ub4 tid, string config)
167 |            : Worker(tid, config)
168 |              , byte_count(byte_count)
169 |              , nvm_file(nvm_file)
170 |    {
171 |    }
172 | 
173 |    void Run()
174 |    {
175 |       Setup();
176 |       ready = true;
177 |       while (!run) {
178 |          this_thread::yield();
179 |       }
180 | 
181 |       while (!stop) {
182 |          auto begin = chrono::high_resolution_clock::now();
183 |          DoOneRun();
184 |          auto end = chrono::high_resolution_clock::now();
185 |          nano_seconds.push_back(chrono::duration_cast<chrono::nanoseconds>(end - begin).count());
186 |          performed_iteration_count++;
187 |       }
188 |    }
189 | 
190 |    void PrintResultOfLastIteration(ub4 iteration)
191 |    {
192 |       if (!stop || iteration>=performed_iteration_count) {
193 |          throw;
194 |       }
195 |       double ns = nano_seconds[iteration];
196 |       //@formatter:off
197 |       cout << "RES log_writer " << config
198 |            << " tid: " << tid
199 |            << " iterations: " << iteration << "/" << performed_iteration_count
200 |            << " perf(logs/s): " << ub8(entries.size() / (ns / 1e9)) << endl;
201 |       //@formatter:on
202 |    }
203 | 
204 |    LogWriter(const LogWriter &) = delete;
205 |    LogWriter &operator=(const LogWriter &) = delete;
206 | 
207 | private:
208 |    void Setup()
209 |    {
210 |       Random ranny;
211 |       nvm = make_unique<NonVolatileMemory>(nvm_file, byte_count * 2); // * 2 to not overflow because of header
212 |       wal = make_unique<LogWriterZeroBlocked>(*nvm);
213 | 
214 |       memory = RandomizedMemory(byte_count * 2, ranny);
215 |       entries = LogWriterZeroBlocked::CreateRandomEntries(memory, ENTRY_SIZE / 8, ENTRY_SIZE / 8, byte_count, ranny);
216 |    }
217 | 
218 |    void DoOneRun()
219 |    {
220 |       wal->Reset();
221 |       for (LogWriterZeroBlocked::Entry *entry : entries) {
222 |          wal->AddLogEntry(*entry);
223 |       }
224 |    }
225 | 
226 |    vector <ub1> RandomizedMemory(ub8 size, Random &ranny)
227 |    {
228 |       vector <ub1> memory(size);
229 |       for (ub8 i = 0; i<size; i += 8) {
230 |          ((ub8 &) memory[i]) = ranny.Rand();
231 |       }
232 |       return memory;
233 |    }
234 | };
235 | // -------------------------------------------------------------------------------------
236 | 


--------------------------------------------------------------------------------
/interference/NonVolatileMemory.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include "Common.hpp"
  4 | #include <cstdlib>
  5 | #include <string>
  6 | #include <inttypes.h>
  7 | #include <sys/stat.h>
  8 | #include <unistd.h>
  9 | #include <sys/types.h>
 10 | #include <sys/mman.h>
 11 | #include <fcntl.h>
 12 | #include <libpmem.h>
 13 | // -------------------------------------------------------------------------------------
 14 | class NonVolatileMemory {
 15 | public:
 16 |    NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed
 17 |    NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path
 18 |    NonVolatileMemory(const NonVolatileMemory &) = delete;
 19 |    NonVolatileMemory &operator=(const NonVolatileMemory &) = delete;
 20 | 
 21 |    ~NonVolatileMemory();
 22 | 
 23 |    ub1 *Data() { return data_ptr; }
 24 |    ub1 *End() { return data_ptr + byte_count; }
 25 |    ub8 GetByteCount() { return byte_count; }
 26 | 
 27 |    void FlushAll();
 28 |    void Flush(ub8 from, ub8 length);
 29 | 
 30 |    NvmBufferFrame &GetNvmBufferFrame(ub8 id)
 31 |    {
 32 |       assert(data_ptr + id * sizeof(NvmBufferFrame)<=End());
 33 |       assert(reinterpret_cast<NvmBufferFrame *>(data_ptr)[id].GetPage().Ptr() != nullptr);
 34 |       return reinterpret_cast<NvmBufferFrame *>(data_ptr)[id];
 35 |    }
 36 | 
 37 |    bool IsRealNvm() const { return is_real_nvm; }
 38 | 
 39 | private:
 40 |    ub1 *data_ptr;
 41 |    std::string file_name;
 42 |    const ub8 byte_count;
 43 |    bool is_real_nvm;
 44 |    bool is_mapped_file;
 45 | };
 46 | // -------------------------------------------------------------------------------------
 47 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count)
 48 |         : byte_count(byte_count)
 49 |           , is_mapped_file(false)
 50 | {
 51 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
 52 |    assert(byte_count>0);
 53 | 
 54 |    data_ptr = AlignedAlloc(512, byte_count);
 55 | 
 56 |    is_real_nvm = false;
 57 | }
 58 | // -------------------------------------------------------------------------------------
 59 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count)
 60 |         : file_name(file_name)
 61 |           , byte_count(byte_count)
 62 |           , is_mapped_file(true)
 63 | {
 64 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
 65 | 
 66 |    // No need to do anything if zero byte are requested; Does this happen ??
 67 |    assert(byte_count>0); // XXX
 68 |    if (byte_count == 0) {
 69 |       data_ptr = nullptr;
 70 |       return;
 71 |    }
 72 | 
 73 |    // Map the file (our pmem wrapper works with normal memory by falling back to mmap)
 74 |    size_t acquired_byte_count;
 75 |    data_ptr = reinterpret_cast<ub1 *>(pmem_map_file(file_name.c_str(), byte_count, PMEM_FILE_CREATE, 0666, &acquired_byte_count, nullptr));
 76 |    if (data_ptr == nullptr) {
 77 |       std::cout << "Failed to create file: '" << file_name << "'." << std::endl;
 78 |       throw;
 79 |    }
 80 |    if (acquired_byte_count != byte_count) {
 81 |       std::cout << "Failed to allocate requested size for file: '" << file_name << "'. (Requested=" << byte_count << ", Aquired=" << acquired_byte_count << ")" << std::endl;
 82 |       throw;
 83 |    }
 84 | 
 85 |    // Do this only once, as it is expensive
 86 |    is_real_nvm = pmem_is_pmem(data_ptr, 1);
 87 | }
 88 | // -------------------------------------------------------------------------------------
 89 | NonVolatileMemory::~NonVolatileMemory()
 90 | {
 91 |    if (is_mapped_file) {
 92 |       pmem_unmap(data_ptr, byte_count);
 93 |    } else {
 94 |       free(data_ptr);
 95 |    }
 96 | }
 97 | // -------------------------------------------------------------------------------------
 98 | void NonVolatileMemory::FlushAll()
 99 | {
100 |    if (is_mapped_file) {
101 |       pmem_persist(data_ptr, byte_count);
102 |    }
103 | }
104 | // -------------------------------------------------------------------------------------
105 | void NonVolatileMemory::Flush(ub8 from, ub8 length)
106 | {
107 |    if (is_mapped_file) {
108 |       pmem_persist(data_ptr + from, length);
109 |    }
110 | }
111 | // -------------------------------------------------------------------------------------
112 | 
113 | // -------------------------------------------------------------------------------------
114 | class VolatileMemory {
115 | 
116 | public:
117 |    VolatileMemory(ub8 byte_count);
118 |    ~VolatileMemory();
119 |    VolatileMemory(const VolatileMemory &) = delete;
120 |    VolatileMemory &operator=(const VolatileMemory &) = delete;
121 | 
122 |    ub1 *Data() { return data_ptr; }
123 |    ub1 *End() { return data_ptr + byte_count; }
124 | 
125 |    ub8 GetByteCount() const { return byte_count; }
126 | 
127 |    template<class T>
128 |    T *GetPtr(ub8 offset = 0) { return reinterpret_cast<T *>(data_ptr) + offset; }
129 |    ub1 *GetPtr(ub8 offset = 0) { return data_ptr + offset; }
130 | 
131 | private:
132 |    ub1 *const data_ptr;
133 |    const ub8 byte_count;
134 | };
135 | // -------------------------------------------------------------------------------------
136 | VolatileMemory::VolatileMemory(ub8 byte_count)
137 |         : data_ptr(new ub1[byte_count])
138 |           , byte_count(byte_count)
139 | {
140 |    assert(byte_count != 0);
141 | }
142 | // -------------------------------------------------------------------------------------
143 | VolatileMemory::~VolatileMemory()
144 | {
145 |    delete[] data_ptr;
146 | }
147 | // -------------------------------------------------------------------------------------
148 | 


--------------------------------------------------------------------------------
/interference/Pages.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include "Common.hpp"
 4 | #include <list>
 5 | #include <array>
 6 | // -------------------------------------------------------------------------------------
 7 | class Page {
 8 | public:
 9 |    template<class T = ub1>
10 |    T *Ptr() { return reinterpret_cast<T *>( data.data()); }
11 | 
12 |    template<class T = ub1>
13 |    const T *Ptr() const { return reinterpret_cast<const T *>( data.data()); }
14 | 
15 | private:
16 |    std::array<ub1, constants::kPageByteCount> data;
17 | };
18 | // -------------------------------------------------------------------------------------
19 | template<ub8 ALIGNMENT>
20 | class AutoAlignedPage {
21 | public:
22 |    Page &GetPage()
23 |    {
24 |       assert(padding.data()<data.data());
25 |       auto res = (ub1 *) (((uintptr_t) data.data()) & ~(ALIGNMENT - 1));
26 |       assert(IsAlignedAt<ALIGNMENT>(res));
27 |       return *reinterpret_cast<Page *>( res);
28 |    }
29 | 
30 |    const Page &GetPage() const
31 |    {
32 |       assert(padding.data()<data.data());
33 |       auto res = (const ub1 *) (((uintptr_t) data.data()) & ~(ALIGNMENT - 1));
34 |       assert(IsAlignedAt<ALIGNMENT>(res));
35 |       return *reinterpret_cast<const Page *>( res);
36 |    }
37 | 
38 | private:
39 |    std::array<ub1, ALIGNMENT> padding;
40 |    std::array<ub1, constants::kPageByteCount> data;
41 | };
42 | // -------------------------------------------------------------------------------------
43 | static_assert(sizeof(Page) == constants::kPageByteCount, "Pages are always 16kb");
44 | // -------------------------------------------------------------------------------------
45 | class NvmBufferFrame {
46 | public:
47 |    inline void init()
48 |    {
49 |       dirty = false;
50 |       page_id = constants::kInvalidPageId;
51 |       pvn = 0;
52 |    }
53 | 
54 |    Page &GetPage() { return page.GetPage(); }
55 |    const Page &GetPage() const { return page.GetPage(); }
56 | 
57 |    bool dirty;
58 |    ub8 page_id;
59 |    ub8 pvn;
60 | 
61 | private:
62 |    AutoAlignedPage<constants::kPageAlignment> page;
63 | };
64 | // -------------------------------------------------------------------------------------
65 | static_assert(sizeof(NvmBufferFrame) % 8 == 0, "NvmBufferFrame should be eight byte aligned.");
66 | // -------------------------------------------------------------------------------------
67 | 


--------------------------------------------------------------------------------
/interference/RandomReader.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include "Common.hpp"
  4 | #include "NonVolatileMemory.hpp"
  5 | #include <atomic>
  6 | #include <thread>
  7 | #include <immintrin.h>
  8 | #include <array>
  9 | #include <bitset>
 10 | #include <cstring>
 11 | #include <unordered_map>
 12 | #include <iostream>
 13 | // -------------------------------------------------------------------------------------
 14 | using namespace std;
 15 | // -------------------------------------------------------------------------------------
 16 | class RandomReader : public Worker {
 17 |    string nvm_file;
 18 |    ub8 byte_count;
 19 |    bool is_ram;
 20 |    ub8 *data;
 21 |    unique_ptr<NonVolatileMemory> nvm;
 22 |    vector<double> nano_seconds;
 23 |    ub8 sum;
 24 | 
 25 | public:
 26 |    RandomReader(const string &nvm_file, ub8 byte_count, bool is_ram, ub4 tid, string config)
 27 |            : Worker(tid, config)
 28 |              , byte_count(byte_count)
 29 |              , nvm_file(nvm_file)
 30 |              , is_ram(is_ram)
 31 |    {
 32 |       assert(byte_count % 8 == 0);
 33 |       if (byte_count % 8 != 0) {
 34 |          throw "byte_count % 8";
 35 |       }
 36 |    }
 37 | 
 38 |    void Run()
 39 |    {
 40 |       Setup();
 41 |       ready = true;
 42 |       while (!run) {
 43 |          this_thread::yield();
 44 |       }
 45 | 
 46 |       while (!stop) {
 47 |          auto begin = chrono::high_resolution_clock::now();
 48 |          sum += DoOneRun();
 49 |          auto end = chrono::high_resolution_clock::now();
 50 |          nano_seconds.push_back(chrono::duration_cast<chrono::nanoseconds>(end - begin).count());
 51 |          performed_iteration_count++;
 52 |       }
 53 |    }
 54 | 
 55 |    void PrintResultOfLastIteration(ub4 iteration)
 56 |    {
 57 |       //      if (!stop || iteration>=performed_iteration_count) {
 58 |       //         throw;
 59 |       //      }
 60 |       double ns = nano_seconds[iteration];
 61 |       //@formatter:off
 62 |       cout << "RES " << (is_ram ? "rnd_ram_reader " : "rnd_nvm_reader ")  << config
 63 |            << " tid: " << tid
 64 |            << " iterations: " << iteration << "/" << performed_iteration_count
 65 |            << " sum: " << sum
 66 |            << " perf(reads/s): " << ub8((byte_count / 8) / (ns / 1e9)) << endl;
 67 |       //@formatter:on
 68 |    }
 69 | 
 70 |    RandomReader(const RandomReader &) = delete;
 71 |    RandomReader &operator=(const RandomReader &) = delete;
 72 | 
 73 | private:
 74 |    void Setup()
 75 |    {
 76 |       // Alloc
 77 |       if (is_ram) {
 78 |          data = (ub8 *) malloc(byte_count + 64);
 79 |       } else {
 80 |          nvm = make_unique<NonVolatileMemory>(nvm_file, byte_count + 64);
 81 |          data = (ub8 *) nvm->Data();
 82 |       }
 83 | 
 84 |       while ((ub8) data % 8 != 0) {
 85 |          data++;
 86 |       }
 87 |       assert((ub8) data % 8 == 0);
 88 | 
 89 |       Random ranny;
 90 |       for (uint64_t i = 0; i<(byte_count / 8); i++) {
 91 |          data[i] = ranny.Rand();
 92 |       }
 93 |    }
 94 | 
 95 |    ub8 DoOneRun()
 96 |    {
 97 |       Random ranny;
 98 |       ub8 sum = 0;
 99 |       for (ub8 i = 0; i<byte_count / 8; i++) {
100 |          ub8 offset = ranny.Rand() % (byte_count / 8);
101 |          sum += data[offset];
102 |       }
103 |       return sum;
104 |    }
105 | };
106 | // -------------------------------------------------------------------------------------
107 | 


--------------------------------------------------------------------------------
/interference/SequentialReader.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include "Common.hpp"
  4 | #include "NonVolatileMemory.hpp"
  5 | #include <atomic>
  6 | #include <thread>
  7 | #include <immintrin.h>
  8 | #include <array>
  9 | #include <bitset>
 10 | #include <cstring>
 11 | #include <unordered_map>
 12 | #include <iostream>
 13 | // -------------------------------------------------------------------------------------
 14 | using namespace std;
 15 | // -------------------------------------------------------------------------------------
 16 | class SequentialReader : public Worker {
 17 |    string nvm_file;
 18 |    ub8 byte_count;
 19 |    bool is_ram;
 20 |    ub8 *data;
 21 |    ub8 expected_sum = 0;
 22 |    unique_ptr<NonVolatileMemory> nvm;
 23 |    vector<double> nano_seconds;
 24 | 
 25 | public:
 26 |    SequentialReader(const string &nvm_file, ub8 byte_count, bool is_ram, ub4 tid, string config)
 27 |            : Worker(tid, config)
 28 |              , byte_count(byte_count)
 29 |              , nvm_file(nvm_file)
 30 |              , is_ram(is_ram)
 31 |    {
 32 |       assert(byte_count % 8);
 33 |       if (byte_count % 8 != 0) {
 34 |          throw "byte_count % 8";
 35 |       }
 36 |    }
 37 | 
 38 |    void Run()
 39 |    {
 40 |       Setup();
 41 |       ready = true;
 42 |       while (!run) {
 43 |          this_thread::yield();
 44 |       }
 45 | 
 46 |       while (!stop) {
 47 |          auto begin = chrono::high_resolution_clock::now();
 48 |          DoOneRun();
 49 |          auto end = chrono::high_resolution_clock::now();
 50 |          nano_seconds.push_back(chrono::duration_cast<chrono::nanoseconds>(end - begin).count());
 51 |          performed_iteration_count++;
 52 |       }
 53 |    }
 54 | 
 55 |    void PrintResultOfLastIteration(ub4 iteration)
 56 |    {
 57 |       if (!stop || iteration>=performed_iteration_count) {
 58 |          throw;
 59 |       }
 60 |       double ns = nano_seconds[iteration];
 61 |       //@formatter:off
 62 |       cout << "RES " << (is_ram ? "seq_ram_reader " : "seq_nvm_reader ") << config
 63 |            << " tid: " << tid
 64 |            << " iterations: " << iteration << "/" << performed_iteration_count
 65 |            << " perf(gb/s): " << (byte_count / ns) << endl;
 66 |       //@formatter:on
 67 |    }
 68 | 
 69 |    SequentialReader(const SequentialReader &) = delete;
 70 |    SequentialReader &operator=(const SequentialReader &) = delete;
 71 | 
 72 | private:
 73 |    void Setup()
 74 |    {
 75 |       Random ranny;
 76 |       if (is_ram) {
 77 |          data = (ub8 *) malloc(byte_count + 64);
 78 |       } else {
 79 |          nvm = make_unique<NonVolatileMemory>(nvm_file, byte_count + 64);
 80 |          data = (ub8 *) nvm->Data();
 81 |       }
 82 | 
 83 |       while ((ub8) data % 8 != 0) {
 84 |          data++;
 85 |       }
 86 |       assert((ub8) data % 8 == 0);
 87 | 
 88 |       for (ub8 i = 0; i<byte_count / 8; i++) {
 89 |          ub8 val = ranny.Rand();
 90 |          data[i] = val;
 91 |          expected_sum += val;
 92 |       }
 93 |    }
 94 | 
 95 |    void DoOneRun()
 96 |    {
 97 |       ub8 sum = 0;
 98 |       for (ub8 i = 0; i<byte_count / 8; i++) {
 99 |          sum += data[i];
100 |       }
101 |       if (sum != expected_sum) {
102 |          cout << "sequential reader thread broke" << endl;
103 |          throw;
104 |       }
105 |    }
106 | };
107 | // -------------------------------------------------------------------------------------
108 | 


--------------------------------------------------------------------------------
/interference/interference.cpp:
--------------------------------------------------------------------------------
  1 | #include "PageFlusher.hpp"
  2 | #include "LogWriter.hpp"
  3 | #include "SequentialReader.hpp"
  4 | #include "RandomReader.hpp"
  5 | // -------------------------------------------------------------------------------------
  6 | using namespace std;
  7 | // -------------------------------------------------------------------------------------
  8 | ub8 PAGE_FLUSH_PAGE_COUNT = 62500;
  9 | ub8 LOG_BYTE_COUNT = 1e9;
 10 | ub8 SEQ_READER_BYTE_COUNT = 10e9;
 11 | ub8 RND_READER_BYTE_COUNT = 1e9;
 12 | // -------------------------------------------------------------------------------------
 13 | ub4 SEQ_RAM_THREADS = 0;
 14 | ub4 SEQ_NVM_THREADS = 0;
 15 | ub4 RND_RAM_THREADS = 0;
 16 | ub4 RND_NVM_THREADS = 0;
 17 | ub4 LOG_NVM_THREADS = 0;
 18 | ub4 PAGE_NVM_THREADS = 0;
 19 | // -------------------------------------------------------------------------------------
 20 | vector<unique_ptr<SequentialReader>> seq_ram_readers;
 21 | vector<unique_ptr<SequentialReader>> seq_nvm_readers;
 22 | vector<unique_ptr<RandomReader>> rnd_ram_readers;
 23 | vector<unique_ptr<RandomReader>> rnd_nvm_readers;
 24 | vector<unique_ptr<LogWriter>> log_writers;
 25 | vector<unique_ptr<PageFlusher>> page_flushers;
 26 | vector<Worker *> all_workers;
 27 | vector<unique_ptr<thread>> all_threads;
 28 | // -------------------------------------------------------------------------------------
 29 | // Common config
 30 | string NVM_PATH = "";
 31 | string CONFIG_STRING = "";
 32 | // -------------------------------------------------------------------------------------
 33 | void CreateAllThreads()
 34 | {
 35 |    // Sequential read ram
 36 |    for (ub4 tid = 0; tid<SEQ_RAM_THREADS; tid++) {
 37 |       seq_ram_readers.emplace_back(make_unique<SequentialReader>(NVM_PATH + "/seq_ram_reader_" + to_string(tid), SEQ_READER_BYTE_COUNT, true, tid, CONFIG_STRING));
 38 |       all_threads.emplace_back(make_unique<thread>([&, tid]() {
 39 |          seq_ram_readers[tid]->Run();
 40 |       }));
 41 |       all_workers.push_back(seq_ram_readers[tid].get());
 42 |    }
 43 | 
 44 |    // Sequential read nvm
 45 |    for (ub4 tid = 0; tid<SEQ_NVM_THREADS; tid++) {
 46 |       seq_nvm_readers.emplace_back(make_unique<SequentialReader>(NVM_PATH + "/seq_nvm_reader_" + to_string(tid), SEQ_READER_BYTE_COUNT, false, tid, CONFIG_STRING));
 47 |       all_threads.emplace_back(make_unique<thread>([&, tid]() {
 48 |          seq_nvm_readers[tid]->Run();
 49 |       }));
 50 |       all_workers.push_back(seq_nvm_readers[tid].get());
 51 |    }
 52 | 
 53 |    // Random read ram
 54 |    for (ub4 tid = 0; tid<RND_RAM_THREADS; tid++) {
 55 |       rnd_ram_readers.emplace_back(make_unique<RandomReader>(NVM_PATH + "/rnd_ram_reader_" + to_string(tid), RND_READER_BYTE_COUNT, true, tid, CONFIG_STRING));
 56 |       all_threads.emplace_back(make_unique<thread>([&, tid]() {
 57 |          rnd_ram_readers[tid]->Run();
 58 |       }));
 59 |       all_workers.push_back(rnd_ram_readers[tid].get());
 60 |    }
 61 | 
 62 |    // Random read nvm
 63 |    for (ub4 tid = 0; tid<RND_NVM_THREADS; tid++) {
 64 |       rnd_nvm_readers.emplace_back(make_unique<RandomReader>(NVM_PATH + "/rnd_nvm_reader_" + to_string(tid), RND_READER_BYTE_COUNT, false, tid, CONFIG_STRING));
 65 |       all_threads.emplace_back(make_unique<thread>([&, tid]() {
 66 |          rnd_nvm_readers[tid]->Run();
 67 |       }));
 68 |       all_workers.push_back(rnd_nvm_readers[tid].get());
 69 |    }
 70 | 
 71 |    // Log writer
 72 |    for (ub4 tid = 0; tid<LOG_NVM_THREADS; tid++) {
 73 |       log_writers.emplace_back(make_unique<LogWriter>(NVM_PATH + "/log_writer_" + to_string(tid), LOG_BYTE_COUNT, tid, CONFIG_STRING));
 74 |       all_threads.emplace_back(make_unique<thread>([&, tid]() {
 75 |          log_writers[tid]->Run();
 76 |       }));
 77 |       all_workers.push_back(log_writers[tid].get());
 78 |    }
 79 | 
 80 |    // Page flush
 81 |    for (ub4 tid = 0; tid<PAGE_NVM_THREADS; tid++) {
 82 |       page_flushers.emplace_back(make_unique<PageFlusher>(NVM_PATH + "/page_flush_" + to_string(tid), PAGE_FLUSH_PAGE_COUNT, tid, CONFIG_STRING));
 83 |       all_threads.emplace_back(make_unique<thread>([&, tid]() {
 84 |          page_flushers[tid]->Run();
 85 |       }));
 86 |       all_workers.push_back(page_flushers[tid].get());
 87 |    }
 88 | }
 89 | // -------------------------------------------------------------------------------------
 90 | void WaitForAllToGetReady()
 91 | {
 92 |    for (auto iter : all_workers) {
 93 |       while (!iter->ready) {
 94 |          usleep(1e6); // Check every second
 95 |       }
 96 |    }
 97 | }
 98 | // -------------------------------------------------------------------------------------
 99 | void StartAll()
100 | {
101 |    for (auto iter : all_workers) {
102 |       iter->run = true;
103 |    }
104 | }
105 | // -------------------------------------------------------------------------------------
106 | void WaitForAllToPerformAtLeastInterations(ub4 iteration_count)
107 | {
108 |    for (auto iter : all_workers) {
109 |       while (iter->performed_iteration_count<iteration_count) {
110 |          usleep(1e6); // Check every second
111 |       }
112 |    }
113 | }
114 | // -------------------------------------------------------------------------------------
115 | void StopAll()
116 | {
117 |    for (auto iter : all_workers) {
118 |       iter->stop = true;
119 |    }
120 | }
121 | // -------------------------------------------------------------------------------------
122 | void WaitForAllToDie()
123 | {
124 |    for (auto &iter : all_threads) {
125 |       iter->join();
126 |    }
127 | }
128 | // -------------------------------------------------------------------------------------
129 | void PrintAll(ub4 iteration)
130 | {
131 |    for (auto iter : all_workers) {
132 |       iter->PrintResultOfLastIteration(iteration);
133 |    }
134 | }
135 | // -------------------------------------------------------------------------------------
136 | int main(int argc, char **argv)
137 | {
138 |    if (argc != 8) {
139 |       cout << "usage: " << argv[0] << " SEQ_RAM SEQ_NVM RND_RAM RND_NVM LOG_NVM PAGE_NVM path" << endl;
140 |       throw;
141 |    }
142 | 
143 |    SEQ_RAM_THREADS = stof(argv[1]);
144 |    SEQ_NVM_THREADS = stof(argv[2]);
145 |    RND_RAM_THREADS = stof(argv[3]);
146 |    RND_NVM_THREADS = stof(argv[4]);
147 |    LOG_NVM_THREADS = stof(argv[5]);
148 |    PAGE_NVM_THREADS = stof(argv[6]);
149 |    NVM_PATH = argv[7];
150 | 
151 | 
152 |    //@formatter:off
153 |    CONFIG_STRING = to_string(SEQ_RAM_THREADS) + "_"
154 |                  + to_string(SEQ_NVM_THREADS) + "_"
155 |                  + to_string(RND_RAM_THREADS) + "_"
156 |                  + to_string(RND_NVM_THREADS) + "_"
157 |                  + to_string(LOG_NVM_THREADS) + "_"
158 |                  + to_string(PAGE_NVM_THREADS);
159 |    //@formatter:on
160 | 
161 |    cout << "Config:" << endl;
162 |    cout << "----------------------------" << endl;
163 |    cout << "SEQ_READER_BYTE_COUNT: " << SEQ_READER_BYTE_COUNT << endl;
164 |    cout << "RND_READER_BYTE_COUNT: " << RND_READER_BYTE_COUNT << endl;
165 |    cout << "LOG_BYTE_COUNT:        " << LOG_BYTE_COUNT << endl;
166 |    cout << "PAGE_FLUSH_PAGE_COUNT: " << PAGE_FLUSH_PAGE_COUNT << endl;
167 |    cout << "SEQ_RAM_THREADS:       " << SEQ_RAM_THREADS << endl;
168 |    cout << "SEQ_NVM_THREADS:       " << SEQ_NVM_THREADS << endl;
169 |    cout << "RND_RAM_THREADS:       " << RND_RAM_THREADS << endl;
170 |    cout << "RND_NVM_THREADS:       " << RND_NVM_THREADS << endl;
171 |    cout << "LOG_NVM_THREADS:       " << LOG_NVM_THREADS << endl;
172 |    cout << "PAGE_NVM_THREADS:      " << PAGE_NVM_THREADS << endl;
173 |    cout << "CONFIG_STRING:         " << CONFIG_STRING << endl;
174 | 
175 |    cout << "NVM_PATH:              " << NVM_PATH << endl;
176 | #ifdef STREAMING
177 |    cout << "STREAMING:             " << "yes" << endl;
178 | #else
179 |    cout << "STREAMING:             " << "no" << endl;
180 | #endif
181 | 
182 |    CreateAllThreads();
183 |    WaitForAllToGetReady();
184 |    StartAll();
185 |    WaitForAllToPerformAtLeastInterations(3); // First one might be bad if not every one has started and last on might be bad because some body might finish earlier
186 |    StopAll();
187 |    WaitForAllToDie();
188 |    PrintAll(1); // run id, zero-based
189 | 
190 |    cout << "all good :)" << endl;
191 |    return 0;
192 | }
193 | // -------------------------------------------------------------------------------------
194 | 


--------------------------------------------------------------------------------
/latency/read_latency.cpp:
--------------------------------------------------------------------------------
  1 | #include <atomic>
  2 | #include <iostream>
  3 | #include <algorithm>
  4 | #include <pthread.h>
  5 | #include <cstdint>
  6 | #include <immintrin.h>
  7 | #include <chrono>
  8 | #include <sys/mman.h>
  9 | #include <sys/types.h>
 10 | #include <sys/stat.h>
 11 | #include <fcntl.h>
 12 | #include <iostream>
 13 | #include <vector>
 14 | #include <unistd.h>
 15 | 
 16 | using namespace std;
 17 | 
 18 | uint64_t rdtsc()
 19 | {
 20 |    uint32_t hi, lo;
 21 |    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
 22 |    return static_cast<uint64_t>(lo) | (static_cast<uint64_t>(hi) << 32);
 23 | }
 24 | 
 25 | #include <sys/time.h>
 26 | 
 27 | static inline double gettime(void)
 28 | {
 29 |    struct timeval now_tv;
 30 |    gettimeofday(&now_tv, NULL);
 31 |    return ((double) now_tv.tv_sec) + ((double) now_tv.tv_usec) / 1000000.0;
 32 | }
 33 | 
 34 | uint64_t COUNT; // In number of uint64_t
 35 | uint64_t SIZE; // In byte
 36 | uint64_t *v;
 37 | atomic<bool> go(0);
 38 | uintptr_t REPETITIONS; // Number of lookups in total (we report performance every STEP times)
 39 | const uint64_t STEP = 10e7;
 40 | const char* PATH;
 41 | bool USE_RAM;
 42 | 
 43 | static void *readThread(void *arg)
 44 | {
 45 |    while (!go);
 46 |    uintptr_t threadNum = reinterpret_cast<uintptr_t>(arg);
 47 | 
 48 |    cpu_set_t cpuset;
 49 |    CPU_ZERO(&cpuset);
 50 |    CPU_SET(threadNum, &cpuset);
 51 |    pthread_t currentThread = pthread_self();
 52 |    if (pthread_setaffinity_np(currentThread, sizeof(cpu_set_t), &cpuset) != 0)
 53 |       throw;
 54 | 
 55 |    auto total_start = chrono::high_resolution_clock::now();
 56 | 
 57 |    uint64_t x = 0;
 58 |    uint64_t run = 0;
 59 |    for (uint64_t i = 0; i<REPETITIONS; run++) {
 60 |       uint64_t limit = min(i + STEP, REPETITIONS);
 61 |       uint64_t count = limit - i;
 62 |       cout << "run= " << run << " count=" << count << flush;
 63 |       auto start = chrono::high_resolution_clock::now();
 64 |       auto rdts_start = rdtsc();
 65 |       for (; i<limit; i++) {
 66 |          x = v[x];
 67 |       }
 68 |       auto end = chrono::high_resolution_clock::now();
 69 |       auto rdts_end = rdtsc();
 70 |       double s = chrono::duration_cast<chrono::seconds>(end - start).count();
 71 |       double ns = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
 72 |       cout << " total_time(s)= " << s << " time(ns)= " << (ns / count) << " rdts=" << ((rdts_end - rdts_start) / count) << endl;
 73 |       if (run>=500) {
 74 |          break;
 75 |       }
 76 |    }
 77 | 
 78 |    auto total_end = chrono::high_resolution_clock::now();
 79 |    uint64_t ns = chrono::duration_cast<chrono::nanoseconds>(total_end - total_start).count();
 80 | 
 81 |    cout << threadNum << ": " << (ns / (double) REPETITIONS) << " ns/rep (result=" << x << ")" << endl;
 82 |    return (void *) (ns / REPETITIONS);
 83 | }
 84 | 
 85 | class FastRandom {
 86 | public:
 87 |    explicit FastRandom(uint64_t seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3
 88 |            : seed(seed) {}
 89 | 
 90 |    uint64_t Next()
 91 |    {
 92 |       seed ^= (seed << 13);
 93 |       seed ^= (seed >> 15);
 94 |       return (seed ^= (seed << 5));
 95 |    }
 96 | 
 97 |    uint64_t seed;
 98 | };
 99 | 
100 | uint64_t *CreateRandomCycle(bool ram)
101 | {
102 |    uint64_t *helper;
103 |    uint64_t *result;
104 | 
105 |    {
106 |       auto start = chrono::high_resolution_clock::now();
107 |       cout << "init " << flush;
108 |       helper = new uint64_t[COUNT];
109 |       for (uint64_t i = 0; i<COUNT;) {
110 |          uint64_t limit = min(i + COUNT / 10, COUNT);
111 |          for (; i<limit; i++) {
112 |             helper[i] = i;
113 |          }
114 |          cout << "." << flush;
115 |       }
116 |       auto end = chrono::high_resolution_clock::now();
117 |       double s = chrono::duration_cast<chrono::seconds>(end - start).count();
118 |       cout << "(" << s << ")" << endl;
119 |    }
120 | 
121 |    {
122 |       auto start = chrono::high_resolution_clock::now();
123 |       cout << "shuffle " << flush;
124 |       FastRandom ranny;
125 |       for (uint64_t i = 0; i<COUNT;) {
126 |          uint64_t limit = min(i + COUNT / 10, COUNT);
127 |          for (; i<limit; i++) {
128 |             uint64_t pos = (ranny.Next() % (COUNT - i)) + i;
129 |             swap(helper[i], helper[pos]);
130 |          }
131 |          cout << "." << flush;
132 |       }
133 |       auto end = chrono::high_resolution_clock::now();
134 |       double s = chrono::duration_cast<chrono::seconds>(end - start).count();
135 |       cout << "(" << s << ")" << endl;
136 |    }
137 | 
138 |    {
139 |       auto start = chrono::high_resolution_clock::now();
140 |       cout << "cycle " << flush;
141 |       if (ram) {
142 |          result = new uint64_t[COUNT];
143 |       } else {
144 |          int fd = open(PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
145 |          int td = ftruncate(fd, COUNT * 8);
146 |          if (fd<0 || td<0) {
147 |             cout << "unable to create file" << endl;
148 |             exit(-1);
149 |          }
150 |          result = (uint64_t *) mmap(nullptr, COUNT * 8, PROT_WRITE, MAP_SHARED, fd, 0);
151 |       }
152 |       for (uint64_t i = 0; i<COUNT;) {
153 |          uint64_t limit = min(i + COUNT / 10, COUNT);
154 |          for (; i<limit; i++) {
155 |             result[helper[i]] = helper[(i + 1) % COUNT];
156 |          }
157 |          cout << "." << flush;
158 |       }
159 |       auto end = chrono::high_resolution_clock::now();
160 |       double s = chrono::duration_cast<chrono::seconds>(end - start).count();
161 |       cout << "(" << s << ")" << endl;
162 |    }
163 | 
164 |    return result;
165 | }
166 | 
167 | // clang++ -g0 -O3 -march=native -std=c++14 read_latency.cpp -pthread && ./a.out 1 1e9 1e9 ram /mnt/pmem0/renen/file_0
168 | int main(int argc, char **argv)
169 | {
170 |    if (argc != 6) {
171 |       cout << "usage: " << argv[0] << " thread_count datasize(byte) repetitions (nvm|ram) path" << endl;
172 |       throw;
173 |    }
174 | 
175 |    unsigned threadCount = atoi(argv[1]);
176 |    SIZE = atof(argv[2]);
177 |    COUNT = SIZE / 8;
178 |    REPETITIONS = atof(argv[3]);
179 |    USE_RAM = argv[4][0] == 'r';
180 |    PATH = argv[5];
181 | 
182 |    cout << "Config: thread_count=" << threadCount << " repetitions=" << REPETITIONS << " use_ram=" << USE_RAM << " size=" << SIZE << " path=" << PATH << endl;
183 | 
184 |    v = CreateRandomCycle(USE_RAM);
185 |    cout << "starting .." << endl;
186 | 
187 |    pthread_t threads[threadCount];
188 |    for (unsigned i = 0; i<threadCount; i++) {
189 |       pthread_create(&threads[i], NULL, readThread, reinterpret_cast<void *>(i));
190 |    }
191 | 
192 |    uint64_t times[threadCount];
193 |    auto start = chrono::high_resolution_clock::now();
194 |    go = 1;
195 |    for (unsigned i = 0; i<threadCount; i++) {
196 |       void *ret;
197 |       pthread_join(threads[i], &ret);
198 |       times[i] = (uint64_t) ret;
199 |    }
200 |    auto end = chrono::high_resolution_clock::now();
201 | 
202 |    double ns = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
203 |    double gb = ((threadCount * 64 * REPETITIONS) / (1000.0 * 1000.0 * 1000.0));
204 |    double gbs = gb / ns * 1e9;
205 |    double latency = ns / REPETITIONS;
206 | 
207 |    // @formatter:off
208 |    cout << "res:"
209 |         << " thread_count=" << threadCount
210 |         << " repetitions=" << REPETITIONS
211 |         << " use_ram=" << USE_RAM
212 |         << " size=" << SIZE
213 |         << " throughput(GB/s) " << gbs
214 |         << " latency(ns) " << latency
215 |         << endl;
216 |    // @formatter:on
217 | 
218 |    return 0;
219 | }
220 | 


--------------------------------------------------------------------------------
/latency/read_latency_alex.cpp:
--------------------------------------------------------------------------------
  1 | #include <atomic>
  2 | #include <iostream>
  3 | #include <algorithm>
  4 | #include <pthread.h>
  5 | #include <cstdint>
  6 | #include <immintrin.h>
  7 | #include <chrono>
  8 | #include <sys/mman.h>
  9 | #include <sys/types.h>
 10 | #include <sys/stat.h>
 11 | #include <fcntl.h>
 12 | #include <iostream>
 13 | #include <vector>
 14 | #include <unistd.h>
 15 | 
 16 | using namespace std;
 17 | 
 18 | inline void Clwb(void *addr)
 19 | {
 20 | #ifdef NOCLWB
 21 |    (void) addr;
 22 | #else
 23 |    asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *) addr));
 24 | #endif
 25 | }
 26 | 
 27 | uint64_t rdtsc()
 28 | {
 29 |    uint32_t hi, lo;
 30 |    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
 31 |    return static_cast<uint64_t>(lo) | (static_cast<uint64_t>(hi) << 32);
 32 | }
 33 | 
 34 | inline void SFence()
 35 | {
 36 |    _mm_mfence();
 37 | }
 38 | 
 39 | #include <sys/time.h>
 40 | 
 41 | static inline double gettime(void)
 42 | {
 43 |    struct timeval now_tv;
 44 |    gettimeofday(&now_tv, NULL);
 45 |    return ((double) now_tv.tv_sec) + ((double) now_tv.tv_usec) / 1000000.0;
 46 | }
 47 | 
 48 | uint64_t COUNT; // In number of uint64_t
 49 | uint64_t SIZE; // In byte
 50 | uint64_t *v;
 51 | atomic<bool> go(0);
 52 | const char *PATH;
 53 | bool USE_RAM;
 54 | 
 55 | static void *readThread(void *arg)
 56 | {
 57 |    while (!go);
 58 |    uintptr_t threadNum = reinterpret_cast<uintptr_t>(arg);
 59 | 
 60 |    uint64_t x = 0;
 61 |    auto start = chrono::high_resolution_clock::now();
 62 |    for (uint64_t i = 0; i<COUNT; i++) {
 63 |       uint64_t next = v[x];
 64 |       v[x] = x;
 65 |       Clwb(&v[x]);
 66 |       SFence();
 67 |       x = next;
 68 |    }
 69 |    auto end = chrono::high_resolution_clock::now();
 70 |    uint64_t ns = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
 71 | 
 72 |    cout << threadNum << ": " << (ns / (double) COUNT) << " ns/rep (result=" << x << ")" << endl;
 73 |    return (void *) (ns / COUNT);
 74 | }
 75 | 
 76 | class FastRandom {
 77 | public:
 78 |    explicit FastRandom(uint64_t seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3
 79 |            : seed(seed) {}
 80 | 
 81 |    uint64_t Next()
 82 |    {
 83 |       seed ^= (seed << 13);
 84 |       seed ^= (seed >> 15);
 85 |       return (seed ^= (seed << 5));
 86 |    }
 87 | 
 88 |    uint64_t seed;
 89 | };
 90 | 
 91 | uint64_t *CreateRandomCycle(bool ram)
 92 | {
 93 |    uint64_t *helper;
 94 |    uint64_t *result;
 95 | 
 96 |    {
 97 |       auto start = chrono::high_resolution_clock::now();
 98 |       cout << "init " << flush;
 99 |       helper = new uint64_t[COUNT];
100 |       for (uint64_t i = 0; i<COUNT;) {
101 |          uint64_t limit = min(i + COUNT / 10, COUNT);
102 |          for (; i<limit; i++) {
103 |             helper[i] = i;
104 |          }
105 |          cout << "." << flush;
106 |       }
107 |       auto end = chrono::high_resolution_clock::now();
108 |       double s = chrono::duration_cast<chrono::seconds>(end - start).count();
109 |       cout << "(" << s << ")" << endl;
110 |    }
111 | 
112 |    {
113 |       auto start = chrono::high_resolution_clock::now();
114 |       cout << "shuffle " << flush;
115 |       FastRandom ranny;
116 |       for (uint64_t i = 0; i<COUNT;) {
117 |          uint64_t limit = min(i + COUNT / 10, COUNT);
118 |          for (; i<limit; i++) {
119 |             uint64_t pos = (ranny.Next() % (COUNT - i)) + i;
120 |             swap(helper[i], helper[pos]);
121 |          }
122 |          cout << "." << flush;
123 |       }
124 |       auto end = chrono::high_resolution_clock::now();
125 |       double s = chrono::duration_cast<chrono::seconds>(end - start).count();
126 |       cout << "(" << s << ")" << endl;
127 |    }
128 | 
129 |    {
130 |       auto start = chrono::high_resolution_clock::now();
131 |       cout << "cycle " << flush;
132 |       if (ram) {
133 |          result = new uint64_t[COUNT];
134 |       } else {
135 |          int fd = open(PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
136 |          int td = ftruncate(fd, COUNT * 8);
137 |          if (fd<0 || td<0) {
138 |             cout << "unable to create file" << endl;
139 |             exit(-1);
140 |          }
141 |          result = (uint64_t *) mmap(nullptr, COUNT * 8, PROT_WRITE, MAP_SHARED, fd, 0);
142 |       }
143 |       for (uint64_t i = 0; i<COUNT;) {
144 |          uint64_t limit = min(i + COUNT / 10, COUNT);
145 |          for (; i<limit; i++) {
146 |             result[helper[i]] = helper[(i + 1) % COUNT];
147 |          }
148 |          cout << "." << flush;
149 |       }
150 |       auto end = chrono::high_resolution_clock::now();
151 |       double s = chrono::duration_cast<chrono::seconds>(end - start).count();
152 |       cout << "(" << s << ")" << endl;
153 |    }
154 | 
155 |    return result;
156 | }
157 | 
158 | // clang++ -g0 -O3 -march=native -std=c++14 read_latency.cpp -pthread && ./a.out 1 1e9 1e9 ram /mnt/pmem0/renen/file_0
159 | int main(int argc, char **argv)
160 | {
161 |    if (argc != 5) {
162 |       cout << "usage: " << argv[0] << " thread_count datasize(byte) (nvm|ram) path" << endl;
163 |       throw;
164 |    }
165 | 
166 |    unsigned threadCount = atoi(argv[1]);
167 |    SIZE = atof(argv[2]);
168 |    COUNT = SIZE / 8;
169 |    USE_RAM = argv[3][0] == 'r';
170 |    PATH = argv[4];
171 | 
172 |    cout << "Config: thread_count=" << threadCount << " use_ram=" << USE_RAM << " size=" << SIZE << " path=" << PATH << endl;
173 | 
174 |    v = CreateRandomCycle(USE_RAM);
175 |    cout << "starting .." << endl;
176 | 
177 |    pthread_t threads[threadCount];
178 |    for (unsigned i = 0; i<threadCount; i++) {
179 |       pthread_create(&threads[i], NULL, readThread, reinterpret_cast<void *>(i));
180 |    }
181 | 
182 |    uint64_t times[threadCount];
183 |    auto start = chrono::high_resolution_clock::now();
184 |    go = 1;
185 |    for (unsigned i = 0; i<threadCount; i++) {
186 |       void *ret;
187 |       pthread_join(threads[i], &ret);
188 |       times[i] = (uint64_t) ret;
189 |    }
190 |    auto end = chrono::high_resolution_clock::now();
191 | 
192 |    for (uint64_t i = 0; i<COUNT; i++) {
193 |       if (i != v[i]) {
194 |          cout << "at position " << i << " we have a " << v[i] << endl;
195 |          throw;
196 |       }
197 |    }
198 | 
199 |    double ns = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
200 |    double gb = ((threadCount * 64 * COUNT) / (1000.0 * 1000.0 * 1000.0));
201 |    double gbs = gb / ns * 1e9;
202 |    double latency = ns / COUNT;
203 | 
204 |    // @formatter:off
205 |    cout << "res:"
206 |         << " thread_count=" << threadCount
207 |         << " use_ram=" << USE_RAM
208 |         << " size=" << SIZE
209 |         << " throughput(GB/s) " << gbs
210 |         << " latency(ns) " << latency
211 |         << endl;
212 |    // @formatter:on
213 | 
214 |    return 0;
215 | }
216 | 


--------------------------------------------------------------------------------
/latency/write_latency.cpp:
--------------------------------------------------------------------------------
  1 | #include <sys/time.h>
  2 | #include <iostream>
  3 | #include <atomic>
  4 | #include <iostream>
  5 | #include <vector>
  6 | #include <cstdint>
  7 | #include <immintrin.h>
  8 | #include <chrono>
  9 | #include <sys/mman.h>
 10 | #include <sys/types.h>
 11 | #include <sys/stat.h>
 12 | #include <fcntl.h>
 13 | #include <iostream>
 14 | #include <memory>
 15 | #include <thread>
 16 | #include <unistd.h>
 17 | #include <xmmintrin.h>
 18 | #include <fcntl.h>
 19 | #include <sstream>
 20 | #include <cstring>
 21 | #include <cassert>
 22 | #include <sys/stat.h>
 23 | #include <algorithm>
 24 | 
 25 | using namespace std;
 26 | 
 27 | #define _mm_clflush(addr)\
 28 |     asm volatile("clflush %0" : "+m" (*(volatile char *)addr));
 29 | #define _mm_clflushopt(addr)\
 30 |     asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr));
 31 | #define _mm_clwb(addr)\
 32 |     asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr));
 33 | #define _mm_pcommit()\
 34 |     asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8");
 35 | 
 36 | enum struct Type {
 37 |    RANDOM_CL,
 38 |    SEQUENTIAL_CL,
 39 |    SAME_CL
 40 | };
 41 | 
 42 | Type GetClWriteType(const string &str)
 43 | {
 44 |    if (str == "single")
 45 |       return Type::SAME_CL;
 46 |    if (str == "sequential")
 47 |       return Type::SEQUENTIAL_CL;
 48 |    if (str == "random") {
 49 |       return Type::RANDOM_CL;
 50 |    }
 51 |    cout << "unknown benchmark type: '" << str << "'" << endl;
 52 |    throw;
 53 | }
 54 | 
 55 | std::string GetStringFromType(Type benchmark_type)
 56 | {
 57 |    switch (benchmark_type) {
 58 |       case Type::RANDOM_CL: {
 59 |          return "RANDOM_CL";
 60 |       }
 61 |       case Type::SEQUENTIAL_CL: {
 62 |          return "SEQUENTIAL_CL";
 63 |       }
 64 |       case Type::SAME_CL: {
 65 |          return "SAME_CL";
 66 |       }
 67 |    }
 68 |    cout << "unkown type" << endl;
 69 |    throw;
 70 | }
 71 | 
 72 | // clang++ -g0 -O3 -march=native -std=c++14 write_latency.cpp -pthread -DFLUSH=1 && ./a.out ram sequential 1e9 /mnt/pmem0/renen/file_0
 73 | int main(int argc, char **argv)
 74 | {
 75 |    if (argc != 5) {
 76 |       cout << "usage: " << argv[0] << " (nvm|ram) (single|sequential|random) size(byte) path" << endl;
 77 |       throw;
 78 |    }
 79 |    bool use_flush = false;
 80 |    bool use_flush_opt = false;
 81 |    bool use_clwb = false;
 82 |    bool use_streaming = false;
 83 |    int technique_count = 0;
 84 | #ifdef FLUSH
 85 |    use_flush = true;
 86 |    technique_count++;
 87 | #endif
 88 | #ifdef FLUSH_OPT
 89 |    use_flush_opt = true;
 90 |    technique_count++;
 91 | #endif
 92 | #ifdef CLWB
 93 |    use_clwb= true;
 94 |    technique_count++;
 95 | #endif
 96 | #ifdef STREAMING
 97 |    use_streaming = true;
 98 |    technique_count++;
 99 | #endif
100 |    if (technique_count != 1) {
101 |       cout << "Need to specify exactly one flush technique" << endl;
102 |       throw;
103 |    }
104 |    const bool use_ram = argv[1][0] == 'r';
105 |    const Type benchmark_type = GetClWriteType(argv[2]);
106 |    const uint64_t size = atof(argv[3]);
107 |    const string path = argv[4];
108 | 
109 |    cout << "use_flush: " << (use_flush ? "yes" : "no") << endl;
110 |    cout << "use_flush_opt: " << (use_flush_opt ? "yes" : "no") << endl;
111 |    cout << "use_clwb: " << (use_clwb ? "yes" : "no") << endl;
112 |    cout << "use_streaming: " << (use_streaming ? "yes" : "no") << endl;
113 |    cout << "use_ram: " << (use_ram ? "yes" : "no") << endl;
114 |    cout << "benchmark_type: " << GetStringFromType(benchmark_type) << endl;
115 |    cout << "size: " << size << endl;
116 | 
117 |    uint8_t *keys;
118 |    if (use_ram) {
119 |       keys = new uint8_t[size + 64];
120 |       while (((uint64_t) keys) % 64 != 0) // Align to 64 byte ;p
121 |          keys++;
122 |    } else {
123 |       int fd = open(path.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
124 |       int td = ftruncate(fd, size);
125 |       if (fd<0 || td<0) {
126 |          cout << "unable to create file" << endl;
127 |          exit(-1);
128 |       }
129 |       keys = (uint8_t *) mmap(nullptr, size, PROT_WRITE, MAP_SHARED, fd, 0);
130 |    }
131 | 
132 |    uint8_t write_data[64] = {0xaa};
133 |    __m512i write_data_vec = _mm512_stream_load_si512(write_data);
134 | 
135 |    vector<int64_t> write_offsets(size / 64);
136 |    switch (benchmark_type) {
137 |       case Type::RANDOM_CL: {
138 |          for (uint64_t i = 0; i<write_offsets.size(); i++) {
139 |             write_offsets[i] = i * 64;
140 |          }
141 |          random_shuffle(write_offsets.begin(), write_offsets.end());
142 |          break;
143 |       }
144 |       case Type::SEQUENTIAL_CL: {
145 |          for (uint64_t i = 0; i<write_offsets.size(); i++) {
146 |             write_offsets[i] = i * 64;
147 |          }
148 |          break;
149 |       }
150 |       case Type::SAME_CL: {
151 |          for (uint64_t i = 0; i<write_offsets.size(); i++) {
152 |             write_offsets[i] = 0;
153 |          }
154 |          break;
155 |       }
156 |    }
157 | 
158 |    uint64_t iteration_count = size / 64;
159 |    auto begin = chrono::high_resolution_clock::now();
160 |    for (uint64_t i = 0; i<iteration_count; i++) {
161 |       uint64_t *write_ptr = (uint64_t *) (keys + write_offsets[i]);
162 | 
163 | #ifdef FLUSH
164 |       *write_ptr = i;
165 |       _mm_clflush(write_ptr);
166 |       _mm_sfence();
167 | #endif
168 | 
169 | #ifdef FLUSH_OPT
170 |       *write_ptr = i;
171 |       _mm_clflushopt(write_ptr);
172 |       _mm_sfence();
173 | #endif
174 | 
175 | #ifdef CLWB
176 |       *write_ptr = i;
177 |       _mm_clwb(write_ptr);
178 |       _mm_sfence();
179 | #endif
180 | 
181 | #ifdef STREAMING
182 |       _mm512_stream_si512 ((__m512i*)(write_ptr), write_data_vec);
183 |       _mm_sfence();
184 | #endif
185 |    }
186 |    auto end = chrono::high_resolution_clock::now();
187 | 
188 |    uint64_t nano_seconds = chrono::duration_cast<chrono::nanoseconds>(end - begin).count();
189 |    uint64_t latency = nano_seconds / iteration_count;
190 | 
191 |    // @formatter:off
192 |    std::cout << "res "
193 |              << " use_ram: " << use_ram
194 |              << " size: " << size
195 |              << " benchmark_type: " << GetStringFromType(benchmark_type)
196 |              << " use_flush: " << use_flush
197 |              << " use_flush_opt: " << use_flush_opt
198 |              << " use_clwb: " << use_clwb
199 |              << " use_streaming: " << use_streaming
200 |              << " latency(ns): " << latency << std::endl;
201 |    // @formatter:on
202 | 
203 |    return 0;
204 | }
205 | 
206 | 


--------------------------------------------------------------------------------
/logging/Common.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include <immintrin.h>
  4 | #include <cassert>
  5 | #include <cstring>
  6 | #include <iostream>
  7 | #include "libpmem.h"
  8 | // -------------------------------------------------------------------------------------
  9 | #define a_mm_clflush(addr)\
 10 |     asm volatile("clflush %0" : "+m" (*(volatile char *)addr));
 11 | #define a_mm_clflushopt(addr)\
 12 |     asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr));
 13 | #define a_mm_clwb(addr)\
 14 |     asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr));
 15 | #define a_mm_pcommit()\
 16 |     asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8");
 17 | // -------------------------------------------------------------------------------------
 18 | using ub1 = uint8_t;
 19 | using ub2 = uint16_t;
 20 | using ub4 = uint32_t;
 21 | using ub8 = uint64_t;
 22 | // -------------------------------------------------------------------------------------
 23 | using sb1 = int8_t;
 24 | using sb2 = int16_t;
 25 | using sb4 = int32_t;
 26 | using sb8 = int64_t;
 27 | // -------------------------------------------------------------------------------------
 28 | namespace constants {
 29 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines
 30 | }
 31 | // -------------------------------------------------------------------------------------
 32 | inline void alex_WriteBack(void *addr, ub4 len)
 33 | {
 34 |    for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) {
 35 |       a_mm_clwb((char *) uptr);
 36 |    }
 37 | }
 38 | // -------------------------------------------------------------------------------------
 39 | inline void alex_WriteBack(void *addr)
 40 | {
 41 |    addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1));
 42 |    a_mm_clwb((char *) addr);
 43 | }
 44 | // -------------------------------------------------------------------------------------
 45 | inline void alex_FlushOpt(void *addr, ub4 len)
 46 | {
 47 |    for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64) {
 48 |       a_mm_clflushopt((char *) uptr);
 49 |    }
 50 | }
 51 | // -------------------------------------------------------------------------------------
 52 | inline void alex_FlushOpt(void *addr)
 53 | {
 54 |    a_mm_clflushopt((char *) addr);
 55 | }
 56 | // -------------------------------------------------------------------------------------
 57 | inline void alex_SFence()
 58 | {
 59 |    _mm_sfence();
 60 | }
 61 | // -------------------------------------------------------------------------------------
 62 | inline ub8 alex_PopCount(ub8 value)
 63 | {
 64 |    return _mm_popcnt_u64(value);
 65 | }
 66 | // -------------------------------------------------------------------------------------
 67 | inline void alex_FlushClToNvm(void *dest, void *src)
 68 | {
 69 |    assert(((ub8) dest) % 64 == 0);
 70 |    assert(((ub8) src) % 64 == 0);
 71 |    __m512i reg = _mm512_load_si512(src);
 72 |    _mm512_stream_si512((__m512i *) dest, reg);
 73 | }
 74 | // -------------------------------------------------------------------------------------
 75 | void FastCopy512(ub1 *dest, const ub1 *src)
 76 | {
 77 |    assert(((ub8) dest) % 64 == 0);
 78 |    memcpy(dest, src, 64);
 79 | }
 80 | // -------------------------------------------------------------------------------------
 81 | ub4 FastPopCount512(const ub1 *ptr)
 82 | {
 83 |    ub4 res = 0;
 84 |    for (ub4 i = 0; i<64; i += 8) {
 85 |       res += alex_PopCount(*(ub8 *) (&ptr[i]));
 86 |    }
 87 |    return res;
 88 | }
 89 | // -------------------------------------------------------------------------------------
 90 | void FastCopy512Simd(ub1 *dest, const ub1 *src)
 91 | {
 92 |    assert(((ub8) dest) % 64 == 0);
 93 |    __m512i reg = _mm512_loadu_si512(src);
 94 |    _mm512_store_si512((__m512i *) dest, reg);
 95 | }
 96 | // -------------------------------------------------------------------------------------
 97 | void alex_FastCopyAndWriteBack(ub1 *nvm_begin, const ub1 *ram_begin, ub4 size)
 98 | {
 99 |    assert(((ub8) nvm_begin) % 64 == 0); // nvm needs to be aligned
100 | 
101 |    // Copy full cache lines (and flush)
102 |    ub4 pos = 0;
103 |    for (; pos + 63<size; pos += 64) {
104 |       FastCopy512(nvm_begin + pos, ram_begin + pos);
105 |       alex_WriteBack(nvm_begin + pos);
106 |    }
107 | 
108 |    // Copy remaining bytes (and flush)
109 |    if (pos<size) {
110 |       for (; pos<size; pos += 8) {
111 |          *(ub8 *) (nvm_begin + pos) = *(ub8 *) (ram_begin + pos);
112 |       }
113 |       alex_WriteBack(nvm_begin + pos - 8);
114 |    }
115 | }
116 | // -------------------------------------------------------------------------------------
117 | uint8_t *AlignedAlloc(uint64_t alignment, uint64_t size)
118 | {
119 |    void *result = nullptr;
120 |    int error = posix_memalign(&result, alignment, size);
121 |    if (error) {
122 |       std::cout << "error while allocating" << std::endl;
123 |       throw;
124 |    }
125 |    return reinterpret_cast<uint8_t *>(result);
126 | }
127 | // -------------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/logging/LW_Classic.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterClassic {
 2 | 
 3 |    struct Entry {
 4 |       ub8 payload_size; // header
 5 |       ub1 data[];
 6 |       // footer: ub8 start;
 7 |    };
 8 | 
 9 |    struct File {
10 |       // Header
11 |       ub1 padding[constants::kCacheLineByteCount];
12 | 
13 |       // Log data
14 |       ub1 data[];
15 |    };
16 |    static_assert(sizeof(File) == 64, "");
17 | 
18 |    NonVolatileMemory &nvm;
19 |    File &file; // == nvm
20 |    ub8 next_free;
21 | 
22 |    LogWriterClassic(NonVolatileMemory &nvm)
23 |            : nvm(nvm)
24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
25 |    {
26 |       next_free = 0;
27 |    }
28 | 
29 |    ub8 AddLogEntry(const Entry &entry)
30 |    {
31 |       assert(next_free % 8 == 0);
32 |       assert(entry.payload_size % 8 == 0);
33 |       assert(sizeof(File) + next_free + entry.payload_size + 16<nvm.GetByteCount());
34 | 
35 |       // Copy the log entry (header + body - footer)
36 |       ub1 *entry_begin = file.data + next_free;
37 |       pmem_memcpy_persist(entry_begin, &entry, entry.payload_size + 8);
38 | 
39 |       // Copy the footer
40 |       ub1 *footer_begin = entry_begin + entry.payload_size + 8;
41 |       *reinterpret_cast<ub8 *>(footer_begin) = next_free;
42 |       pmem_persist(footer_begin, 8);
43 | 
44 |       // Advance and done
45 |       next_free += entry.payload_size + 16;
46 |       assert(next_free % 8 == 0);
47 |       return next_free;
48 |    }
49 | 
50 |    ub8 GetWrittenByteCount() const
51 |    {
52 |       return next_free + sizeof(File);
53 |    }
54 | 
55 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
56 |    {
57 |       ub8 current = 0;
58 |       ub8 used_size = 0;
59 | 
60 |       vector<Entry *>entries;
61 |       while (used_size<log_payload_size) {
62 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
63 |          assert(current + payload_size + 16<memory.size());
64 |          Entry *entry = (Entry *) &memory[current];
65 |          entry->payload_size = payload_size;
66 |          entries.push_back(entry);
67 |          current += payload_size + 16;
68 |          used_size += payload_size;
69 |       }
70 |       return entries;
71 |    }
72 | };


--------------------------------------------------------------------------------
/logging/LW_ClassicAligned.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterClassicAligned {
 2 | 
 3 |    struct Entry {
 4 |       ub8 payload_size; // header
 5 |       ub1 data[];
 6 |       // footer: ub8 start;
 7 |    };
 8 | 
 9 |    struct File {
10 |       // Header
11 |       ub1 padding[constants::kCacheLineByteCount];
12 | 
13 |       // Log data
14 |       ub1 data[];
15 |    };
16 |    static_assert(sizeof(File) == 64, "");
17 | 
18 |    NonVolatileMemory &nvm;
19 |    File &file; // == nvm
20 |    ub8 next_free;
21 | 
22 |    LogWriterClassicAligned(NonVolatileMemory &nvm)
23 |            : nvm(nvm)
24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
25 |    {
26 |       next_free = 0;
27 |    }
28 | 
29 |    ub8 AddLogEntry(const Entry &entry)
30 |    {
31 |       assert(next_free % 8 == 0);
32 |       assert(entry.payload_size % 8 == 0);
33 |       assert(sizeof(File) + next_free + entry.payload_size + 16<nvm.GetByteCount());
34 | 
35 |       // Copy the log entry (header + body - footer)
36 |       ub1 *entry_begin = file.data + next_free;
37 |       const ub1 *ram_bgein = reinterpret_cast<const ub1 *>(&entry);
38 |       alex_FastCopyAndWriteBack(entry_begin, ram_bgein, entry.payload_size + 8);
39 |       alex_SFence();
40 |       next_free = (next_free + entry.payload_size + 8 + 63) & ~63ull;
41 | 
42 |       // Copy the footer
43 |       ub1 *footer_begin = file.data + next_free;
44 |       *reinterpret_cast<ub8 *>(footer_begin) = next_free;
45 |       alex_WriteBack(footer_begin);
46 |       alex_SFence();
47 | 
48 |       // Advance and done
49 |       next_free += 64;
50 |       assert(next_free % 8 == 0);
51 |       return next_free;
52 |    }
53 | 
54 |    ub8 GetWrittenByteCount() const
55 |    {
56 |       return next_free + sizeof(File);
57 |    }
58 | 
59 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
60 |    {
61 |       ub8 current = 0;
62 |       ub8 used_size = 0;
63 | 
64 |       vector<Entry *>entries;
65 |       while (used_size<log_payload_size) {
66 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
67 |          assert(current + payload_size + 16<memory.size());
68 |          Entry *entry = (Entry *) &memory[current];
69 |          entry->payload_size = payload_size;
70 |          entries.push_back(entry);
71 |          current += payload_size + 16;
72 |          used_size += payload_size;
73 |       }
74 |       return entries;
75 |    }
76 | };


--------------------------------------------------------------------------------
/logging/LW_ClassicCached.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | struct LogWriterClassicCached {
  5 | 
  6 |    struct Entry {
  7 |       ub8 payload_size; // header
  8 |       ub1 data[];
  9 |       // footer: ub8 start;
 10 |    };
 11 | 
 12 |    struct File {
 13 |       // Header
 14 |       ub1 padding[constants::kCacheLineByteCount];
 15 | 
 16 |       // Log data
 17 |       ub1 data[];
 18 |    };
 19 |    static_assert(sizeof(File) == 64, "");
 20 | 
 21 |    NonVolatileMemory &nvm;
 22 |    File &file; // == nvm
 23 |    ub8 next_free;
 24 |    ub8 *cl_buffer;
 25 |    ub4 cl_offset = 0;
 26 | 
 27 |    LogWriterClassicCached(NonVolatileMemory &nvm)
 28 |            : nvm(nvm)
 29 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 30 |    {
 31 |       next_free = 0;
 32 |       cl_buffer = reinterpret_cast<ub8 *>(operator new(128));
 33 |       while (((ub8) cl_buffer) % 64 != 0) {
 34 |          cl_buffer++;
 35 |       }
 36 |       memset(cl_buffer, 0, 64);
 37 |    }
 38 | 
 39 |    ub8 AddLogEntry(const Entry &entry)
 40 |    {
 41 |       assert(next_free % 8 == 0);
 42 |       assert(entry.payload_size % 8 == 0);
 43 |       assert(sizeof(File) + next_free + entry.payload_size + 16<nvm.GetByteCount());
 44 |       ub4 entry_size = entry.payload_size + 8;
 45 | 
 46 |       // Copy the log entry (header + body - footer)
 47 |       ub1 *nvm = (ub1 * )(((ub8)(file.data + next_free)) & ~ub8(63));
 48 |       assert(((ub8) nvm) % 64 == 0);
 49 |       const ub8 *entry_cur = reinterpret_cast<const ub8 *>(&entry);
 50 |       for (ub4 i = 0; i<entry_size; i += 8) {
 51 |          cl_buffer[cl_offset] = entry_cur[i];
 52 |          cl_offset++;
 53 |          if (cl_offset == 8) {
 54 |             cl_offset = 0;
 55 |             alex_FlushClToNvm(nvm, cl_buffer);
 56 |             nvm += 64;
 57 |          }
 58 |       }
 59 |       if (cl_offset>0) {
 60 |          alex_FlushClToNvm(nvm, cl_buffer);
 61 |       }
 62 |       alex_SFence();
 63 | 
 64 |       // Copy the footer
 65 |       cl_buffer[cl_offset] = next_free;
 66 |       cl_offset++;
 67 |       alex_FlushClToNvm(nvm, cl_buffer);
 68 |       if (cl_offset == 8) {
 69 |          cl_offset = 0;
 70 |          nvm += 64;
 71 |       }
 72 |       alex_SFence();
 73 | 
 74 |       // Advance and done
 75 |       next_free += entry.payload_size + 16;
 76 |       assert(next_free % 8 == 0);
 77 |       assert((ub8)nvm == (ub8(file.data + next_free) & ~ub8(63)));
 78 |       return next_free;
 79 |    }
 80 | 
 81 |    ub8 GetWrittenByteCount() const
 82 |    {
 83 |       return next_free + sizeof(File);
 84 |    }
 85 | 
 86 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
 87 |    {
 88 |       ub8 current = 0;
 89 |       ub8 used_size = 0;
 90 | 
 91 |       vector<Entry *>entries;
 92 |       while (used_size<log_payload_size) {
 93 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
 94 |          assert(current + payload_size + 16<memory.size());
 95 |          Entry *entry = (Entry *) &memory[current];
 96 |          entry->payload_size = payload_size;
 97 |          entries.push_back(entry);
 98 |          current += payload_size + 16;
 99 |          used_size += payload_size;
100 |       }
101 |       return entries;
102 |    }
103 | };


--------------------------------------------------------------------------------
/logging/LW_Header.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterHeader {
 2 | 
 3 |    struct Entry {
 4 |       ub8 payload_size;
 5 |       ub1 data[];
 6 |    };
 7 | 
 8 |    struct File {
 9 |       // Header
10 |       ub8 next_free;
11 |       ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)];
12 | 
13 |       // Log data
14 |       ub1 data[];
15 |    };
16 |    static_assert(sizeof(File) == 64, "");
17 | 
18 |    NonVolatileMemory &nvm;
19 |    File &file; // == nvm
20 | 
21 |    LogWriterHeader(NonVolatileMemory &nvm)
22 |            : nvm(nvm)
23 |              , file(*reinterpret_cast<File *>(nvm.Data()))
24 |    {
25 |       file.next_free = 0;
26 |    }
27 | 
28 |    ub8 AddLogEntry(const Entry &entry)
29 |    {
30 |       assert(file.next_free % 8 == 0);
31 |       assert(entry.payload_size % 8 == 0);
32 |       assert(sizeof(File) + file.next_free + entry.payload_size + 8<nvm.GetByteCount());
33 | 
34 |       // Copy the log entry (header + body)
35 |       ub1 *entry_begin = file.data + file.next_free;
36 |       pmem_memcpy_persist(entry_begin, &entry, entry.payload_size + 8);
37 | 
38 |       // Update the header
39 |       file.next_free += entry.payload_size + 8;
40 |       pmem_persist((void *) &file, sizeof(File));
41 | 
42 |       assert(file.next_free % 8 == 0);
43 |       return file.next_free;
44 |    }
45 | 
46 |    ub8 GetWrittenByteCount() const
47 |    {
48 |       return file.next_free + sizeof(File);
49 |    }
50 | 
51 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
52 |    {
53 |       ub8 current = 0;
54 |       ub8 used_size = 0;
55 | 
56 |       vector<Entry *>entries;
57 |       while (used_size<log_payload_size) {
58 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
59 |          assert(current + payload_size + 8<memory.size());
60 |          Entry *entry = (Entry *) &memory[current];
61 |          entry->payload_size = payload_size;
62 |          entries.push_back(entry);
63 |          current += payload_size + 8;
64 |          used_size += payload_size;
65 |       }
66 |       return entries;
67 |    }
68 | };


--------------------------------------------------------------------------------
/logging/LW_HeaderAligned.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterHeaderAligned {
 2 | 
 3 |    struct Entry {
 4 |       ub8 payload_size;
 5 |       ub1 data[];
 6 |    };
 7 | 
 8 |    struct File {
 9 |       // Header
10 |       ub8 next_free;
11 |       ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)];
12 | 
13 |       // Log data
14 |       ub1 data[];
15 |    };
16 |    static_assert(sizeof(File) == 64, "");
17 |    ub8 next_free;
18 | 
19 |    NonVolatileMemory &nvm;
20 |    File &file; // == nvm
21 | 
22 |    LogWriterHeaderAligned(NonVolatileMemory &nvm)
23 |            : nvm(nvm)
24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
25 |    {
26 |       file.next_free = 0;
27 |       next_free = 0;
28 |    }
29 | 
30 |    ub8 AddLogEntry(const Entry &entry)
31 |    {
32 |       assert(entry.payload_size % 8 == 0);
33 |       assert(next_free % 64 == 0);
34 |       assert(sizeof(File) + next_free + entry.payload_size + 8<nvm.GetByteCount());
35 |       ub8 size = entry.payload_size + 8;
36 | 
37 |       // Copy the log entry (header + body)
38 |       ub1 *entry_begin = file.data + next_free;
39 |       const ub1 *ram_begin = reinterpret_cast<const ub1 *>(&entry);
40 |       alex_FastCopyAndWriteBack(entry_begin, ram_begin, size);
41 |       alex_SFence();
42 | 
43 |       // Update the header
44 |       next_free += size;
45 |       next_free = (next_free + 63) & ~63ull; // 64 byte aligned
46 |       file.next_free = next_free;
47 |       alex_WriteBack((void *) &file);
48 |       alex_SFence();
49 | 
50 |       assert(next_free % 64 == 0);
51 |       return next_free;
52 |    }
53 | 
54 |    ub8 GetWrittenByteCount() const
55 |    {
56 |       return file.next_free + sizeof(File);
57 |    }
58 | 
59 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
60 |    {
61 |       ub8 current = 0;
62 |       ub8 used_size = 0;
63 | 
64 |       vector<Entry *> entries;
65 |       while (used_size<log_payload_size) {
66 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
67 |          assert(current + payload_size + 8<memory.size());
68 |          Entry *entry = (Entry *) &memory[current];
69 |          entry->payload_size = payload_size;
70 |          entries.push_back(entry);
71 |          current += payload_size + 8;
72 |          used_size += payload_size;
73 |       }
74 |       return entries;
75 |    }
76 | };


--------------------------------------------------------------------------------
/logging/LW_HeaderAlignedDancing.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterHeaderAlignedDancing {
 2 | 
 3 |    struct Entry {
 4 |       ub8 payload_size;
 5 |       ub1 data[];
 6 |    };
 7 | 
 8 |    struct Header {
 9 |       ub8 next_free;
10 |       ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)];
11 |    };
12 | 
13 |    static const ub4 HEADER_COUNT = 64;
14 | 
15 |    struct File {
16 |       // Header
17 |       Header headers[HEADER_COUNT];
18 | 
19 |       // Log data
20 |       ub1 data[];
21 |    };
22 |    static_assert(sizeof(File) == 64 * HEADER_COUNT, "");
23 |    ub8 next_free;
24 |    ub8 next_header;
25 | 
26 |    NonVolatileMemory &nvm;
27 |    File &file; // == nvm
28 | 
29 |    LogWriterHeaderAlignedDancing(NonVolatileMemory &nvm)
30 |            : nvm(nvm)
31 |              , file(*reinterpret_cast<File *>(nvm.Data()))
32 |    {
33 |       for (ub4 i = 0; i<HEADER_COUNT; ++i) {
34 |          file.headers[i].next_free = 0;
35 |       }
36 |       next_header = 0;
37 |       next_free = 0;
38 |    }
39 | 
40 |    ub8 AddLogEntry(const Entry &entry)
41 |    {
42 |       assert(entry.payload_size % 8 == 0);
43 |       assert(next_free % 64 == 0);
44 |       assert(sizeof(File) + next_free + entry.payload_size + 8<nvm.GetByteCount());
45 |       ub8 size = entry.payload_size + 8;
46 | 
47 |       // Copy the log entry (header + body)
48 |       ub1 *entry_begin = file.data + next_free;
49 |       const ub1 *ram_begin = reinterpret_cast<const ub1 *>(&entry);
50 |       alex_FastCopyAndWriteBack(entry_begin, ram_begin, size);
51 |       alex_SFence();
52 | 
53 |       // Update the header
54 |       next_free += size;
55 |       next_free = (next_free + 63) & ~63ull; // 64 byte aligned
56 |       file.headers[next_header].next_free = next_free;
57 |       alex_WriteBack((void *) &file.headers[next_header]);
58 |       alex_SFence();
59 |       next_header = (next_header + 1) % HEADER_COUNT;
60 | 
61 |       assert(next_free % 64 == 0);
62 |       return next_free;
63 |    }
64 | 
65 |    ub8 GetWrittenByteCount() const
66 |    {
67 |       return next_free + sizeof(File);
68 |    }
69 | 
70 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
71 |    {
72 |       ub8 current = 0;
73 |       ub8 used_size = 0;
74 | 
75 |       vector<Entry *>entries;
76 |       while (used_size<log_payload_size) {
77 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
78 |          assert(current + payload_size + 8<memory.size());
79 |          Entry *entry = (Entry *) &memory[current];
80 |          entry->payload_size = payload_size;
81 |          entries.push_back(entry);
82 |          current += payload_size + 8;
83 |          used_size += payload_size;
84 |       }
85 |       return entries;
86 |    }
87 | };


--------------------------------------------------------------------------------
/logging/LW_HeaderDancing.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterHeaderDancing {
 2 | 
 3 |    struct Entry {
 4 |       ub8 payload_size;
 5 |       ub1 data[];
 6 |    };
 7 | 
 8 |    struct Header {
 9 |       ub8 next_free;
10 |       ub1 padding[constants::kCacheLineByteCount - sizeof(ub8)];
11 |    };
12 | 
13 |    static const ub4 HEADER_COUNT = 64;
14 | 
15 |    struct File {
16 |       // Header
17 |       Header headers[HEADER_COUNT];
18 | 
19 |       // Log data
20 |       ub1 data[];
21 |    };
22 |    static_assert(sizeof(File) == 64 * HEADER_COUNT, "");
23 |    ub8 next_free;
24 |    ub8 next_header;
25 | 
26 |    NonVolatileMemory &nvm;
27 |    File &file; // == nvm
28 | 
29 |    LogWriterHeaderDancing(NonVolatileMemory &nvm)
30 |            : nvm(nvm)
31 |              , file(*reinterpret_cast<File *>(nvm.Data()))
32 |    {
33 |       for (ub4 i = 0; i<HEADER_COUNT; ++i) {
34 |          file.headers[i].next_free = 0;
35 |       }
36 |       next_header = 0;
37 |       next_free = 0;
38 |    }
39 | 
40 |    ub8 AddLogEntry(const Entry &entry)
41 |    {
42 |       assert(file.headers[next_header].next_free % 8 == 0);
43 |       assert(entry.payload_size % 8 == 0);
44 |       assert(sizeof(File) + file.headers[next_header].next_free + entry.payload_size + 8<nvm.GetByteCount());
45 |       ub8 size = entry.payload_size + 8;
46 | 
47 |       // Copy the log entry (header + body)
48 |       ub1 *entry_begin = file.data + next_free;
49 |       pmem_memcpy_persist(entry_begin, &entry, size);
50 | 
51 |       // Update the header
52 |       next_free += size;
53 |       file.headers[next_header].next_free = next_free;
54 |       alex_WriteBack((void *) &file.headers[next_header]);
55 |       alex_SFence();
56 |       next_header = (next_header + 1) % HEADER_COUNT;
57 | 
58 |       assert(next_free % 8 == 0);
59 |       return next_free;
60 |    }
61 | 
62 |    ub8 GetWrittenByteCount() const
63 |    {
64 |       return next_free + sizeof(File);
65 |    }
66 | 
67 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
68 |    {
69 |       ub8 current = 0;
70 |       ub8 used_size = 0;
71 | 
72 |       vector<Entry *>entries;
73 |       while (used_size<log_payload_size) {
74 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
75 |          assert(current + payload_size + 8<memory.size());
76 |          Entry *entry = (Entry *) &memory[current];
77 |          entry->payload_size = payload_size;
78 |          entries.push_back(entry);
79 |          current += payload_size + 8;
80 |          used_size += payload_size;
81 |       }
82 |       return entries;
83 |    }
84 | };


--------------------------------------------------------------------------------
/logging/LW_Mnemosyne.hpp:
--------------------------------------------------------------------------------
  1 | struct LogWriterMnemosyne {
  2 | 
  3 |    struct Entry {
  4 |       ub8 payload_size;
  5 |       ub1 data[];
  6 |    };
  7 | 
  8 |    struct File {
  9 |       // Header
 10 |       ub1 padding[constants::kCacheLineByteCount];
 11 | 
 12 |       // Log data
 13 |       ub1 data[];
 14 |    };
 15 |    static_assert(sizeof(File) == 64, "");
 16 | 
 17 |    NonVolatileMemory &nvm;
 18 |    File &file; // == nvm
 19 |    ub8 next_free;
 20 |    ub8 log_read_offset;
 21 | 
 22 |    LogWriterMnemosyne(NonVolatileMemory &nvm)
 23 |            : nvm(nvm)
 24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 25 |    {
 26 |       next_free = 0;
 27 |       log_read_offset = 0;
 28 |    }
 29 | 
 30 |    ub8 AddLogEntry(const Entry &entry)
 31 |    {
 32 |       assert(next_free + entry.payload_size + 8<nvm.GetByteCount());
 33 |       assert(entry.payload_size>0 && entry.payload_size % 8 == 0);
 34 | 
 35 |       // Write length of log entry
 36 |       (uint64_t &) file.data[next_free] = entry.payload_size | 0x1;
 37 |       next_free += 8;
 38 | 
 39 |       // Write payload of log entry
 40 |       uint64_t buffer = 0;
 41 |       uint64_t checker = 0;
 42 | 
 43 |       uint32_t i = 0;
 44 |       for (i = 0; i<entry.payload_size; i += 8) {
 45 |          uint64_t cur = *(const uint64_t *) &entry.data[i];
 46 | 
 47 |          buffer = (buffer << 1) | (cur & 0x1);
 48 |          cur = (cur | 0x1);
 49 |          *((uint64_t *) &file.data[next_free]) = cur;
 50 |          next_free += 8;
 51 |          //         cout << i << ": " << buffer << endl;
 52 | 
 53 |          // Flush cache line every cache line
 54 |          if (next_free % 64 == 0) {
 55 |             alex_WriteBack(file.data + next_free - 8);
 56 |          }
 57 | 
 58 |          // Flush buffer every 63 8-byte blocks
 59 |          checker++;
 60 |          if (i>0 && (i + 8) % 504 == 0) {
 61 |             //            cout << "flush checker" << endl;
 62 |             assert(checker == 63);
 63 |             buffer = (buffer << 1) | 0x1;
 64 |             *((uint64_t *) &file.data[next_free]) = buffer;
 65 |             next_free += 8;
 66 |             checker = 0;
 67 |             buffer = 0;
 68 | 
 69 |             // Flush cache line every cache line
 70 |             if (next_free % 64 == 0) {
 71 |                alex_WriteBack(file.data + next_free - 8);
 72 |             }
 73 |          }
 74 |       }
 75 | 
 76 |       if (i % 504 != 0) {
 77 |          assert(checker>0);
 78 |          //         cout << "flush tail checker: " << buffer << " @" << next_free << endl;
 79 | 
 80 |          buffer = (buffer << 1) | 0x1;
 81 |          *((uint64_t *) &file.data[next_free]) = buffer;
 82 |          next_free += 8;
 83 |       }
 84 |       alex_WriteBack(file.data + next_free - 8);
 85 |       alex_SFence();
 86 | 
 87 |       // Advance and done
 88 |       return next_free;
 89 |    }
 90 | 
 91 |    unique_ptr<Entry> GetNextLogEntry() // Read code is only to verify correctnes
 92 |    {
 93 |       if (log_read_offset == next_free) {
 94 |          return nullptr;
 95 |       }
 96 | 
 97 |       // Read length
 98 |       uint64_t len = *((uint64_t *) &file.data[log_read_offset]) & ~(0x1ull);
 99 |       log_read_offset += 8;
100 | 
101 |       //      cout << "got len: " << len << endl;
102 | 
103 |       uint64_t checker = 0;
104 | 
105 |       vector<char> result(len);
106 |       uint32_t i = 0;
107 |       for (; i<len; i += 8) {
108 |          *((uint64_t *) &result[i]) = *((uint64_t *) &file.data[log_read_offset]);
109 |          *((uint64_t *) &result[i]) &= ~(0x1ull);
110 |          log_read_offset += 8;
111 |          checker++;
112 | 
113 |          if (i>0 && (i + 8) % 504 == 0) {
114 |             assert(checker == 63);
115 |             checker = 0;
116 |             //            cout << "apply checker" << endl;
117 | 
118 |             uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1;
119 |             log_read_offset += 8;
120 |             for (uint32_t c = 0; c<63; c++) {
121 |                *((uint64_t *) &result[i - c * 8]) |= buffer & 0x1;
122 |                buffer = buffer >> 1;
123 |             }
124 |          }
125 |       }
126 | 
127 |       if (i % 504 != 0) {
128 |          i -= 8;
129 |          assert(checker>0);
130 | 
131 |          uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1;
132 |          //         cout << "apply tail checker " << buffer << " @" << log_read_offset << endl;
133 | 
134 |          log_read_offset += 8;
135 |          for (uint32_t c = 0; c<checker; c++) {
136 |             *((uint64_t *) &result[i - c * 8]) |= buffer & 0x1;
137 |             buffer = buffer >> 1;
138 |          }
139 |          checker = 0;
140 |       }
141 | 
142 |       Entry *entry = new(malloc(sizeof(Entry) + result.size())) Entry();
143 |       entry->payload_size = result.size();
144 |       memcpy(entry->data, result.data(), result.size());
145 |       return unique_ptr<Entry>(entry);
146 |    }
147 | 
148 |    ub8 GetWrittenByteCount() const
149 |    {
150 |       return next_free + sizeof(File);
151 |    }
152 | 
153 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
154 |    {
155 |       ub8 current = 0;
156 |       ub8 used_size = 0;
157 | 
158 |       vector<Entry *> entries;
159 |       while (used_size<log_payload_size) {
160 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
161 |          assert(current + payload_size + 8<memory.size());
162 |          Entry *entry = (Entry *) &memory[current];
163 |          entry->payload_size = payload_size;
164 |          entries.push_back(entry);
165 |          current += payload_size + 8;
166 |          used_size += payload_size;
167 |       }
168 |       return entries;
169 |    }
170 | };


--------------------------------------------------------------------------------
/logging/LW_MnemosyneAligned.hpp:
--------------------------------------------------------------------------------
  1 | struct LogWriterMnemosyneAligned {
  2 | 
  3 |    struct Entry {
  4 |       ub8 payload_size;
  5 |       ub1 data[];
  6 |    };
  7 | 
  8 |    struct File {
  9 |       // Header
 10 |       ub1 padding[constants::kCacheLineByteCount];
 11 | 
 12 |       // Log data
 13 |       ub1 data[];
 14 |    };
 15 |    static_assert(sizeof(File) == 64, "");
 16 | 
 17 |    NonVolatileMemory &nvm;
 18 |    File &file; // == nvm
 19 |    ub8 next_free;
 20 |    ub8 log_read_offset;
 21 | 
 22 |    LogWriterMnemosyneAligned(NonVolatileMemory &nvm)
 23 |            : nvm(nvm)
 24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 25 |    {
 26 |       next_free = 0;
 27 |       log_read_offset = 0;
 28 |    }
 29 | 
 30 |    ub8 AddLogEntry(const Entry &entry)
 31 |    {
 32 |       assert(next_free + entry.payload_size + 8<nvm.GetByteCount());
 33 |       assert(entry.payload_size>0 && entry.payload_size % 8 == 0);
 34 |       assert((uint64_t) &file.data[next_free] % 64 == 0);
 35 |       assert(next_free % 64 == 0);
 36 | 
 37 |       // Write length of log entry
 38 |       (uint64_t &) file.data[next_free] = entry.payload_size | 0x1;
 39 |       next_free += 8;
 40 | 
 41 |       // Write payload of log entry
 42 |       uint64_t buffer = 0;
 43 |       uint64_t checker = 0;
 44 | 
 45 |       uint32_t i = 0;
 46 |       for (i = 0; i<entry.payload_size; i += 8) {
 47 |          uint64_t cur = *(const uint64_t *) &entry.data[i];
 48 | 
 49 |          buffer = (buffer << 1) | (cur & 0x1);
 50 |          cur = (cur | 0x1);
 51 |          *((uint64_t *) &file.data[next_free]) = cur;
 52 |          next_free += 8;
 53 |          //         cout << i << ": " << buffer << endl;
 54 | 
 55 |          // Flush cache line every cache line
 56 |          if (next_free % 64 == 0) {
 57 |             alex_WriteBack(file.data + next_free - 8);
 58 |          }
 59 | 
 60 |          // Flush buffer every 63 8-byte blocks
 61 |          checker++;
 62 |          if (i>0 && (i + 8) % 504 == 0) {
 63 |             //            cout << "flush checker" << endl;
 64 |             assert(checker == 63);
 65 |             buffer = (buffer << 1) | 0x1;
 66 |             *((uint64_t *) &file.data[next_free]) = buffer;
 67 |             next_free += 8;
 68 |             checker = 0;
 69 |             buffer = 0;
 70 | 
 71 |             // Flush cache line every cache line
 72 |             if (next_free % 64 == 0) {
 73 |                alex_WriteBack(file.data + next_free - 8);
 74 |             }
 75 |          }
 76 |       }
 77 | 
 78 |       if (i % 504 != 0) {
 79 |          assert(checker>0);
 80 |          //         cout << "flush tail checker: " << buffer << " @" << next_free << endl;
 81 | 
 82 |          buffer = (buffer << 1) | 0x1;
 83 |          *((uint64_t *) &file.data[next_free]) = buffer;
 84 |          next_free += 8;
 85 |       }
 86 |       alex_WriteBack(file.data + next_free - 8);
 87 |       alex_SFence();
 88 | 
 89 |       // Advance and done
 90 |       next_free = (next_free + 63) & ~63ull;
 91 |       assert(next_free % 64 == 0);
 92 |       return next_free;
 93 |    }
 94 | 
 95 |    unique_ptr<Entry> GetNextLogEntry() // Read code is only to verify correctnes
 96 |    {
 97 |       if (log_read_offset == next_free) {
 98 |          return nullptr;
 99 |       }
100 | 
101 |       // Read length
102 |       uint64_t len = *((uint64_t *) &file.data[log_read_offset]) & ~(0x1ull);
103 |       log_read_offset += 8;
104 | 
105 |       //      cout << "got len: " << len << endl;
106 | 
107 |       uint64_t checker = 0;
108 | 
109 |       vector<char> result(len);
110 |       uint32_t i = 0;
111 |       for (; i<len; i += 8) {
112 |          *((uint64_t *) &result[i]) = *((uint64_t *) &file.data[log_read_offset]);
113 |          *((uint64_t *) &result[i]) &= ~(0x1ull);
114 |          log_read_offset += 8;
115 |          checker++;
116 | 
117 |          if (i>0 && (i + 8) % 504 == 0) {
118 |             assert(checker == 63);
119 |             checker = 0;
120 |             //            cout << "apply checker" << endl;
121 | 
122 |             uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1;
123 |             log_read_offset += 8;
124 |             for (uint32_t c = 0; c<63; c++) {
125 |                *((uint64_t *) &result[i - c * 8]) |= buffer & 0x1;
126 |                buffer = buffer >> 1;
127 |             }
128 |          }
129 |       }
130 | 
131 |       if (i % 504 != 0) {
132 |          i -= 8;
133 |          assert(checker>0);
134 | 
135 |          uint64_t buffer = *((uint64_t *) &file.data[log_read_offset]) >> 1;
136 |          //         cout << "apply tail checker " << buffer << " @" << log_read_offset << endl;
137 | 
138 |          log_read_offset += 8;
139 |          for (uint32_t c = 0; c<checker; c++) {
140 |             *((uint64_t *) &result[i - c * 8]) |= buffer & 0x1;
141 |             buffer = buffer >> 1;
142 |          }
143 |          checker = 0;
144 |       }
145 | 
146 |       // Advance
147 |       log_read_offset = (log_read_offset + 63) & ~63ull;
148 |       assert(log_read_offset % 64 == 0);
149 | 
150 |       Entry *entry = new(malloc(sizeof(Entry) + result.size())) Entry();
151 |       entry->payload_size = result.size();
152 |       memcpy(entry->data, result.data(), result.size());
153 |       return unique_ptr<Entry>(entry);
154 |    }
155 | 
156 |    ub8 GetWrittenByteCount() const
157 |    {
158 |       return next_free + sizeof(File);
159 |    }
160 | 
161 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
162 |    {
163 |       ub8 current = 0;
164 |       ub8 used_size = 0;
165 | 
166 |       vector<Entry *> entries;
167 |       while (used_size<log_payload_size) {
168 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
169 |          assert(current + payload_size + 8<memory.size());
170 |          Entry *entry = (Entry *) &memory[current];
171 |          entry->payload_size = payload_size;
172 |          entries.push_back(entry);
173 |          current += payload_size + 8;
174 |          used_size += payload_size;
175 |       }
176 |       return entries;
177 |    }
178 | };


--------------------------------------------------------------------------------
/logging/LW_PMemLib.hpp:
--------------------------------------------------------------------------------
 1 | #include <libpmemlog.h>
 2 | 
 3 | struct LogWriterPMemLib {
 4 | 
 5 |    struct Entry {
 6 |       ub8 payload_size; // header
 7 |       ub1 data[];
 8 |    };
 9 | 
10 |    ub8 next_free;
11 |    PMEMlogpool *log;
12 | 
13 |    LogWriterPMemLib(const std::string &file_name, ub8 file_size)
14 |    {
15 |       system(("rm -f " + file_name + "_pmemlib").c_str());
16 |       log = pmemlog_create((file_name + "_pmemlib").c_str(), file_size, 0666);
17 |       if (log == nullptr) {
18 |          cout << "fail initializing pmemlog log" << endl;
19 |          exit(-1);
20 |       }
21 | 
22 |       next_free = 0;
23 |    }
24 |    ub8 AddLogEntry(const Entry &entry)
25 |    {
26 |       int res = pmemlog_append(log, &entry, entry.payload_size + 8); // We need to writte the size of the log entry, because they dont
27 |       if (res<0) {
28 |          cout << "error writing to the pmem log" << endl;
29 |          assert(false);
30 |          throw;
31 |       }
32 | 
33 |       return 11111111;
34 |    }
35 | 
36 |    static vector<Entry *> CreateRandomEntries(vector <ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
37 |    {
38 |       ub8 current = 0;
39 |       ub8 used_size = 0;
40 | 
41 |       vector<Entry *>entries;
42 |       while (used_size<log_payload_size) {
43 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
44 |          assert(current + payload_size + 16<memory.size());
45 |          Entry *entry = (Entry *) &memory[current];
46 |          entry->payload_size = payload_size;
47 |          entries.push_back(entry);
48 |          current += payload_size + 16;
49 |          used_size += payload_size;
50 |       }
51 |       return entries;
52 |    }
53 | };


--------------------------------------------------------------------------------
/logging/LW_Zero.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterZero {
 2 | 
 3 |    struct Entry {
 4 |       ub4 payload_size; // header
 5 |       ub4 bit_count; // header
 6 |       ub1 data[];
 7 |    };
 8 | 
 9 |    struct File {
10 |       // Header
11 |       ub1 padding[constants::kCacheLineByteCount];
12 | 
13 |       // Log data
14 |       ub1 data[];
15 |    };
16 |    static_assert(sizeof(File) == 64, "");
17 | 
18 |    NonVolatileMemory &nvm;
19 |    File &file; // == nvm
20 |    ub8 next_free;
21 | 
22 |    LogWriterZero(NonVolatileMemory &nvm)
23 |            : nvm(nvm)
24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
25 |    {
26 |       next_free = 0;
27 |    }
28 | 
29 |    ub8 AddLogEntry(const Entry &entry)
30 |    {
31 |       assert(next_free % 8 == 0);
32 |       assert(entry.bit_count == 0);
33 |       assert(entry.payload_size % 8 == 0);
34 |       assert(sizeof(File) + next_free + entry.payload_size + 8<nvm.GetByteCount());
35 | 
36 |       // Copy the log entry (header + body - footer)
37 |       ub4 pop_cnt = 0;
38 |       const ub8 *entry_ram = reinterpret_cast<const ub8 *>(&entry);
39 |       ub8 *entry_nvm = reinterpret_cast<ub8 *>(nvm.Data() + next_free);
40 |       for (ub4 i = 0; i<entry.payload_size + 8; i += 8) {
41 |          entry_nvm[i / 8] = entry_ram[i / 8];
42 |          pop_cnt += alex_PopCount(entry_ram[i / 8]);
43 |       }
44 |       reinterpret_cast<Entry *>(entry_nvm)->bit_count = pop_cnt;
45 |       alex_WriteBack(entry_nvm, entry.payload_size + 8);
46 |       alex_SFence();
47 | 
48 |       // Advance and done
49 |       next_free += entry.payload_size + 8;
50 |       assert(next_free % 8 == 0);
51 |       return next_free;
52 |    }
53 | 
54 |    ub8 GetWrittenByteCount() const
55 |    {
56 |       return next_free + sizeof(File);
57 |    }
58 | 
59 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
60 |    {
61 |       ub8 current = 0;
62 |       ub8 used_size = 0;
63 | 
64 |       vector<Entry *> entries;
65 |       while (used_size<log_payload_size) {
66 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
67 |          assert(current + payload_size + 8<memory.size());
68 |          Entry *entry = (Entry *) &memory[current];
69 |          entry->payload_size = payload_size;
70 |          entry->bit_count = 0;
71 |          entries.push_back(entry);
72 |          current += payload_size + 8;
73 |          used_size += payload_size;
74 |       }
75 |       return entries;
76 |    }
77 | };


--------------------------------------------------------------------------------
/logging/LW_ZeroAligned.hpp:
--------------------------------------------------------------------------------
 1 | struct LogWriterZeroAligned {
 2 | 
 3 |    struct Entry {
 4 |       ub4 payload_size; // header
 5 |       ub4 bit_count; // header
 6 |       ub1 data[];
 7 |    };
 8 | 
 9 |    struct File {
10 |       // Header
11 |       ub1 padding[constants::kCacheLineByteCount];
12 | 
13 |       // Log data
14 |       ub1 data[];
15 |    };
16 |    static_assert(sizeof(File) == 64, "");
17 | 
18 |    NonVolatileMemory &nvm;
19 |    File &file; // == nvm
20 |    ub8 next_free;
21 | 
22 |    LogWriterZeroAligned(NonVolatileMemory &nvm)
23 |            : nvm(nvm)
24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
25 |    {
26 |       next_free = 0;
27 |    }
28 | 
29 |    ub8 AddLogEntry(const Entry &entry)
30 |    {
31 |       assert(next_free % 8 == 0);
32 |       assert(entry.bit_count == 0);
33 |       assert(entry.payload_size % 8 == 0);
34 |       assert(sizeof(File) + next_free + entry.payload_size + 8<nvm.GetByteCount());
35 | 
36 |       // Copy the log entry (header + body - footer)
37 |       ub4 pop_cnt = 0;
38 |       const ub8 *entry_ram = reinterpret_cast<const ub8 *>(&entry);
39 |       ub8 *entry_nvm = reinterpret_cast<ub8 *>(nvm.Data() + next_free);
40 |       for (ub4 i = 0; i<entry.payload_size + 8; i += 8) {
41 |          entry_nvm[i / 8] = entry_ram[i / 8];
42 |          pop_cnt += alex_PopCount(entry_ram[i / 8]);
43 |       }
44 |       reinterpret_cast<Entry *>(entry_nvm)->bit_count = pop_cnt;
45 |       alex_WriteBack(entry_nvm, entry.payload_size + 8);
46 |       alex_SFence();
47 | 
48 |       // Advance and done
49 |       next_free += entry.payload_size + 8;
50 |       next_free = (next_free + 63) & ~63ull;
51 |       assert(next_free % 64 == 0);
52 |       return next_free;
53 |    }
54 | 
55 |    ub8 GetWrittenByteCount() const
56 |    {
57 |       return next_free + sizeof(File);
58 |    }
59 | 
60 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
61 |    {
62 |       ub8 current = 0;
63 |       ub8 used_size = 0;
64 | 
65 |       vector<Entry *> entries;
66 |       while (used_size<log_payload_size) {
67 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
68 |          assert(current + payload_size + 8<memory.size());
69 |          Entry *entry = (Entry *) &memory[current];
70 |          entry->payload_size = payload_size;
71 |          entry->bit_count = 0;
72 |          entries.push_back(entry);
73 |          current += payload_size + 8;
74 |          used_size += payload_size;
75 |       }
76 |       return entries;
77 |    }
78 | };


--------------------------------------------------------------------------------
/logging/LW_ZeroBlocked.hpp:
--------------------------------------------------------------------------------
  1 | struct LogWriterZeroBlocked {
  2 | 
  3 |    struct Entry {
  4 |       ub4 payload_size; // header
  5 |       ub4 bit_count; // header
  6 |       ub1 data[];
  7 |    };
  8 | 
  9 |    struct File {
 10 |       // Header
 11 |       ub1 padding[constants::kCacheLineByteCount];
 12 | 
 13 |       // Log data
 14 |       ub1 data[];
 15 |    };
 16 |    static_assert(sizeof(File) == 64, "");
 17 | 
 18 |    NonVolatileMemory &nvm;
 19 |    File &file; // == nvm
 20 |    ub8 next_free;
 21 | 
 22 |    LogWriterZeroBlocked(NonVolatileMemory &nvm)
 23 |            : nvm(nvm)
 24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 25 |    {
 26 |       next_free = 0;
 27 |    }
 28 | 
 29 |    ub8 AddLogEntry(const Entry &entry)
 30 |    {
 31 |       ub4 size = entry.payload_size + 8;
 32 | 
 33 |       assert(next_free % 8 == 0);
 34 |       assert(entry.bit_count == 0);
 35 |       assert(entry.payload_size % 8 == 0);
 36 |       assert(next_free + size<nvm.GetByteCount());
 37 | 
 38 |       assert(size>=64);
 39 | 
 40 |       const ub1 *ram_begin = reinterpret_cast<const ub1 *>(&entry);
 41 |       ub1 *nvm_begin = reinterpret_cast<ub1 *>(nvm.Data() + next_free);
 42 | 
 43 |       // Copy first cache line (and do not flush)
 44 |       ub4 pop_cnt = 0;
 45 |       FastCopy512(nvm_begin, ram_begin);
 46 |       pop_cnt += FastPopCount512(ram_begin);
 47 | 
 48 |       // Copy remaining full cache lines (and flush)
 49 |       ub4 pos = 64;
 50 |       for (; pos + 63<size; pos += 64) {
 51 |          FastCopy512(nvm_begin + pos, ram_begin + pos);
 52 |          pop_cnt += FastPopCount512(ram_begin + pos);
 53 |          alex_WriteBack(nvm_begin + pos);
 54 |       }
 55 | 
 56 |       // Copy remaining bytes (and flush)
 57 |       if (pos<size) {
 58 |          for (; pos<size; pos += 8) {
 59 |             *(ub8 *) (nvm_begin + pos) = *(ub8 *) (ram_begin + pos);
 60 |             pop_cnt += alex_PopCount(*(ub8 *) (ram_begin + pos));
 61 |          }
 62 |          alex_WriteBack(nvm_begin + pos - 8);
 63 |       }
 64 | 
 65 |       // Write pop count and flush first cache line
 66 |       reinterpret_cast<Entry *>(nvm_begin)->bit_count = pop_cnt;
 67 |       alex_WriteBack(nvm_begin);
 68 |       alex_SFence();
 69 | 
 70 |       // Advance and done
 71 |       next_free += entry.payload_size + 8;
 72 |       next_free = (next_free + 63) & ~63ull;
 73 |       assert(next_free % 64 == 0);
 74 |       return next_free;
 75 |    }
 76 | 
 77 |    ub8 GetWrittenByteCount() const
 78 |    {
 79 |       return next_free + sizeof(File);
 80 |    }
 81 | 
 82 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
 83 |    {
 84 |       ub8 current = 0;
 85 |       ub8 used_size = 0;
 86 | 
 87 |       vector<Entry *> entries;
 88 |       while (used_size<log_payload_size) {
 89 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
 90 |          assert(current + payload_size + 8<memory.size());
 91 |          Entry *entry = (Entry *) &memory[current];
 92 |          entry->payload_size = payload_size;
 93 |          entry->bit_count = 0;
 94 |          entries.push_back(entry);
 95 |          current += payload_size + 8;
 96 |          used_size += payload_size;
 97 |       }
 98 |       return entries;
 99 |    }
100 | };


--------------------------------------------------------------------------------
/logging/LW_ZeroCached.hpp:
--------------------------------------------------------------------------------
  1 | struct LogWriterZeroCached {
  2 | 
  3 |    struct Entry {
  4 |       ub8 payload_size; // header
  5 |       ub1 data[];
  6 |    };
  7 | 
  8 |    struct File {
  9 |       // Header
 10 |       ub1 padding[constants::kCacheLineByteCount];
 11 | 
 12 |       // Log data
 13 |       ub1 data[];
 14 |    };
 15 |    static_assert(sizeof(File) == 64, "");
 16 | 
 17 |    NonVolatileMemory &nvm;
 18 |    File &file; // == nvm
 19 |    ub8 next_free;
 20 |    ub8 cl_pos;
 21 |    ub8 active_cl_mem[16];
 22 |    ub8 *active_cl;
 23 |    ub8 log_read_offset;
 24 | 
 25 |    LogWriterZeroCached(NonVolatileMemory &nvm)
 26 |            : nvm(nvm)
 27 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 28 |    {
 29 |       next_free = 0;
 30 |       cl_pos = 0;
 31 |       log_read_offset = 0;
 32 | 
 33 |       active_cl = active_cl_mem;
 34 |       while ((ub8) active_cl % 64 != 0) {
 35 |          active_cl++;
 36 |       }
 37 |       assert((ub8) active_cl % 64 == 0);
 38 |       memset((ub1 *) active_cl, 0, 64);
 39 |    }
 40 | 
 41 |    ub8 AddLogEntry(const Entry &entry)
 42 |    {
 43 |       ub4 size = entry.payload_size + 8;
 44 |       ub4 blks = size / 8;
 45 | 
 46 |       assert(next_free % 8 == 0);
 47 |       assert(entry.payload_size % 8 == 0);
 48 |       assert(next_free + size<nvm.GetByteCount());
 49 | 
 50 |       ub4 pop_cnt = 0;
 51 |       const ub8 *ram_begin = reinterpret_cast<const ub8 *>(&entry);
 52 |       ub1 *nvm_begin = reinterpret_cast<ub1 *>(file.data + (next_free & ~63ull));
 53 | 
 54 |       // Head
 55 |       ub4 pos = 0;
 56 |       for (; pos<blks && cl_pos != 0; pos++) {
 57 |          active_cl[cl_pos] = ram_begin[pos];
 58 |          pop_cnt += alex_PopCount(ram_begin[pos]);
 59 |          cl_pos++;
 60 | 
 61 |          if (cl_pos % 8 == 0) {
 62 |             alex_FlushClToNvm(nvm_begin, (ub1 *) active_cl);
 63 |             memset((ub1 *) active_cl, 0, 64);
 64 |             cl_pos = 0;
 65 |             nvm_begin += 64;
 66 |          }
 67 |       }
 68 | 
 69 |       // Copy all 8-byte-blocks of log-entry via cl-buffer to nvm
 70 |       for (; pos + 7<blks; pos += 8) {
 71 |          active_cl[0] = ram_begin[pos + 0];
 72 |          pop_cnt += alex_PopCount(ram_begin[pos + 0]);
 73 | 
 74 |          active_cl[1] = ram_begin[pos + 1];
 75 |          pop_cnt += alex_PopCount(ram_begin[pos + 1]);
 76 | 
 77 |          active_cl[2] = ram_begin[pos + 2];
 78 |          pop_cnt += alex_PopCount(ram_begin[pos + 2]);
 79 | 
 80 |          active_cl[3] = ram_begin[pos + 3];
 81 |          pop_cnt += alex_PopCount(ram_begin[pos + 3]);
 82 | 
 83 |          active_cl[4] = ram_begin[pos + 4];
 84 |          pop_cnt += alex_PopCount(ram_begin[pos + 4]);
 85 | 
 86 |          active_cl[5] = ram_begin[pos + 5];
 87 |          pop_cnt += alex_PopCount(ram_begin[pos + 5]);
 88 | 
 89 |          active_cl[6] = ram_begin[pos + 6];
 90 |          pop_cnt += alex_PopCount(ram_begin[pos + 6]);
 91 | 
 92 |          active_cl[7] = ram_begin[pos + 7];
 93 |          pop_cnt += alex_PopCount(ram_begin[pos + 7]);
 94 | 
 95 |          alex_FlushClToNvm(nvm_begin, (ub1 *) active_cl);
 96 |          memset((ub1 *) active_cl, 0, 64);
 97 |          cl_pos = 0;
 98 |          nvm_begin += 64;
 99 |       }
100 | 
101 |       // Tail
102 |       for (; pos<blks; pos++) {
103 |          active_cl[cl_pos] = ram_begin[pos];
104 |          pop_cnt += alex_PopCount(ram_begin[pos]);
105 |          cl_pos++;
106 |       }
107 | 
108 |       //      cout << "writing pop_cnt to " << (ub8(nvm_begin) - ub8(file.data)) + cl_pos << endl;
109 |       active_cl[cl_pos] = pop_cnt;
110 |       cl_pos++;
111 |       alex_FlushClToNvm(nvm_begin, (ub1 *) active_cl);
112 |       alex_SFence();
113 | 
114 |       if (cl_pos % 8 == 0) {
115 |          memset((ub1 *) active_cl, 0, 64);
116 |          cl_pos = 0;
117 |          nvm_begin += 64;
118 |       }
119 | 
120 |       // Advance and done
121 |       next_free += entry.payload_size + 16;
122 |       return next_free;
123 |    }
124 | 
125 |    unique_ptr<Entry> GetNextLogEntry() // Read code is only to verify correctnes
126 |    {
127 |       if (log_read_offset == next_free) {
128 |          return nullptr;
129 |       }
130 | 
131 |       // Read length
132 |       ub8 len = *(ub8 *) &file.data[log_read_offset];
133 |       log_read_offset += 8;
134 |       ub8 pop_cnt = 0;
135 |       pop_cnt += alex_PopCount(len);
136 | 
137 |       // Read data
138 |       vector<ub8> result(len);
139 |       for (ub4 pos = 0; pos<len; pos += 8) {
140 |          result[pos / 8] = *(ub8 *) (file.data + log_read_offset);
141 |          log_read_offset += 8;
142 |          pop_cnt += alex_PopCount(result[pos / 8]);
143 |       }
144 | 
145 |       // Read pop cnt
146 |       ub8 read_pop_cnt = *(ub8 *) (file.data + log_read_offset);
147 |       log_read_offset += 8;
148 |       if (read_pop_cnt != pop_cnt) {
149 |          cout << "read_pop_cnt does not match !! " << read_pop_cnt << " vs " << pop_cnt << endl;
150 |          throw;
151 |       }
152 | 
153 |       Entry *entry = new(malloc(sizeof(Entry) + result.size())) Entry();
154 |       entry->payload_size = result.size();
155 |       memcpy(entry->data, (ub1 *) result.data(), result.size());
156 |       return unique_ptr<Entry>(entry);
157 |    }
158 | 
159 |    ub8 GetWrittenByteCount() const
160 |    {
161 |       return next_free + sizeof(File);
162 |    }
163 | 
164 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
165 |    {
166 |       ub8 current = 0;
167 |       ub8 used_size = 0;
168 | 
169 |       vector<Entry *> entries;
170 |       while (used_size<log_payload_size) {
171 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
172 |          assert(current + payload_size + 8<memory.size());
173 |          Entry *entry = (Entry *) &memory[current];
174 |          entry->payload_size = payload_size;
175 |          entries.push_back(entry);
176 |          current += payload_size + 8;
177 |          used_size += payload_size;
178 |       }
179 |       return entries;
180 |    }
181 | };


--------------------------------------------------------------------------------
/logging/LW_ZeroSimd.hpp:
--------------------------------------------------------------------------------
  1 | struct LogWriterZeroSimd {
  2 | 
  3 |    struct Entry {
  4 |       ub4 payload_size; // header
  5 |       ub4 bit_count; // header
  6 |       ub1 data[];
  7 |    };
  8 | 
  9 |    struct File {
 10 |       // Header
 11 |       ub1 padding[constants::kCacheLineByteCount];
 12 | 
 13 |       // Log data
 14 |       ub1 data[];
 15 |    };
 16 |    static_assert(sizeof(File) == 64, "");
 17 | 
 18 |    NonVolatileMemory &nvm;
 19 |    File &file; // == nvm
 20 |    ub8 next_free;
 21 | 
 22 |    LogWriterZeroSimd(NonVolatileMemory &nvm)
 23 |            : nvm(nvm)
 24 |              , file(*reinterpret_cast<File *>(nvm.Data()))
 25 |    {
 26 |       next_free = 0;
 27 |    }
 28 | 
 29 |    ub8 AddLogEntry(const Entry &entry)
 30 |    {
 31 |       ub4 size = entry.payload_size + 8;
 32 | 
 33 |       assert(next_free % 8 == 0);
 34 |       assert(entry.bit_count == 0);
 35 |       assert(entry.payload_size % 8 == 0);
 36 |       assert(next_free + size<nvm.GetByteCount());
 37 | 
 38 |       assert(size>=64);
 39 | 
 40 |       const ub1 *ram_begin = reinterpret_cast<const ub1 *>(&entry);
 41 |       ub1 *nvm_begin = reinterpret_cast<ub1 *>(nvm.Data() + next_free);
 42 | 
 43 |       // Copy first cache line (and do not flush)
 44 |       ub4 pop_cnt = 0;
 45 |       FastCopy512Simd(nvm_begin, ram_begin);
 46 |       pop_cnt += FastPopCount512(ram_begin);
 47 | 
 48 |       // Copy remaining full cache lines (and flush)
 49 |       ub4 pos = 64;
 50 |       for (; pos + 63<size; pos += 64) {
 51 |          FastCopy512(nvm_begin + pos, ram_begin + pos);
 52 |          pop_cnt += FastPopCount512(ram_begin + pos);
 53 |          alex_WriteBack(nvm_begin + pos);
 54 |       }
 55 | 
 56 |       // Copy remaining bytes (and flush)
 57 |       if (pos<size) {
 58 |          for (; pos<size; pos += 8) {
 59 |             *(ub8 *) (nvm_begin + pos) = *(ub8 *) (ram_begin + pos);
 60 |             pop_cnt += alex_PopCount(*(ub8 *) (ram_begin + pos));
 61 |          }
 62 |          alex_WriteBack(nvm_begin + pos - 8);
 63 |       }
 64 | 
 65 |       // Write pop count and flush first cache line
 66 |       reinterpret_cast<Entry *>(nvm_begin)->bit_count = pop_cnt;
 67 |       alex_WriteBack(nvm_begin);
 68 |       alex_SFence();
 69 | 
 70 |       // Advance and done
 71 |       next_free += entry.payload_size + 8;
 72 |       next_free = (next_free + 63) & ~63ull;
 73 |       assert(next_free % 64 == 0);
 74 |       return next_free;
 75 |    }
 76 | 
 77 |    ub8 GetWrittenByteCount() const
 78 |    {
 79 |       return next_free + sizeof(File);
 80 |    }
 81 | 
 82 |    static vector<Entry *> CreateRandomEntries(vector<ub1> &memory, ub4 min_size, ub4 max_size, ub8 log_payload_size, Random &ranny)
 83 |    {
 84 |       ub8 current = 0;
 85 |       ub8 used_size = 0;
 86 | 
 87 |       vector<Entry *> entries;
 88 |       while (used_size<log_payload_size) {
 89 |          ub8 payload_size = ((ranny.Rand() % (max_size - min_size + 1)) + min_size) * 8;
 90 |          assert(current + payload_size + 8<memory.size());
 91 |          Entry *entry = (Entry *) &memory[current];
 92 |          entry->payload_size = payload_size;
 93 |          entry->bit_count = 0;
 94 |          entries.push_back(entry);
 95 |          current += payload_size + 8;
 96 |          used_size += payload_size;
 97 |       }
 98 |       return entries;
 99 |    }
100 | };


--------------------------------------------------------------------------------
/logging/NonVolatileMemory.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include "Common.hpp"
 4 | #include <cstdlib>
 5 | #include <string>
 6 | #include <inttypes.h>
 7 | #include <sys/stat.h>
 8 | #include <unistd.h>
 9 | #include <sys/types.h>
10 | #include <sys/mman.h>
11 | #include <fcntl.h>
12 | // -------------------------------------------------------------------------------------
13 | class NonVolatileMemory {
14 | public:
15 |    NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed
16 |    NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path
17 |    NonVolatileMemory(const NonVolatileMemory &) = delete;
18 |    NonVolatileMemory &operator=(const NonVolatileMemory &) = delete;
19 | 
20 |    ~NonVolatileMemory();
21 | 
22 |    ub1 *Data() { return data_ptr; }
23 |    ub1 *End() { return data_ptr + byte_count; }
24 |    ub8 GetByteCount() { return byte_count; }
25 | 
26 |    bool IsNvm() const { return is_nvm; }
27 | 
28 | private:
29 |    ub1 *data_ptr;
30 |    std::string file_name;
31 |    const ub8 byte_count;
32 |    bool is_nvm;
33 |    bool is_mapped_file;
34 |    int file_fd;
35 | };
36 | // -------------------------------------------------------------------------------------
37 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count)
38 |         : byte_count(byte_count)
39 |           , is_mapped_file(false)
40 | {
41 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
42 | 
43 |    assert(byte_count>0); // XXX
44 | 
45 |    data_ptr = AlignedAlloc(512, byte_count);
46 | 
47 |    is_nvm = false;
48 | }
49 | // -------------------------------------------------------------------------------------
50 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count)
51 |         : file_name(file_name)
52 |           , byte_count(byte_count)
53 |           , is_nvm(true)
54 |           , is_mapped_file(true)
55 | {
56 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
57 | 
58 |    file_fd = open(file_name.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
59 |    int td = ftruncate(file_fd, byte_count);
60 |    if (file_fd<0 || td<0) {
61 |       std::cout << "unable to create file" << std::endl;
62 |       exit(-1);
63 |    }
64 |    data_ptr = (ub1 *) mmap(nullptr, byte_count, PROT_WRITE, MAP_SHARED, file_fd, 0);
65 | }
66 | // -------------------------------------------------------------------------------------
67 | NonVolatileMemory::~NonVolatileMemory()
68 | {
69 |    // Benchmark code .. dont care ;p
70 | }
71 | // -------------------------------------------------------------------------------------
72 | 


--------------------------------------------------------------------------------
/logging/Random.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include "Common.hpp"
 4 | // -------------------------------------------------------------------------------------
 5 | // Based on: https://en.wikipedia.org/wiki/Xorshift
 6 | class Random {
 7 | public:
 8 |    explicit Random(ub8 seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3
 9 |            : seed(seed)
10 |    {
11 |    }
12 | 
13 |    uint64_t Rand()
14 |    {
15 |       seed ^= (seed << 13);
16 |       seed ^= (seed >> 7);
17 |       return (seed ^= (seed << 17));
18 |    }
19 | 
20 |    ub8 seed;
21 | };
22 | // -------------------------------------------------------------------------------------
23 | 


--------------------------------------------------------------------------------
/page_flush/Common.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include <immintrin.h>
  4 | #include <cassert>
  5 | #include <cstring>
  6 | #include <iostream>
  7 | #include "libpmem.h"
  8 | // -------------------------------------------------------------------------------------
  9 | using ub1 = uint8_t;
 10 | using ub2 = uint16_t;
 11 | using ub4 = uint32_t;
 12 | using ub8 = uint64_t;
 13 | // -------------------------------------------------------------------------------------
 14 | using sb1 = int8_t;
 15 | using sb2 = int16_t;
 16 | using sb4 = int32_t;
 17 | using sb8 = int64_t;
 18 | // -------------------------------------------------------------------------------------
 19 | namespace constants {
 20 | const static ub4 kCacheLineByteCount = 64; // 64 Byte cache lines
 21 | const static ub4 kPageByteCount = 1 << 14; // 16 KB
 22 | const static ub4 kCacheLinesPerPage = kPageByteCount / kCacheLineByteCount; // 16KB/64Byte
 23 | const static ub4 kPageAlignment = 512; // For O_Direct
 24 | const static ub8 kInvalidPageId = ~0;
 25 | }
 26 | // -------------------------------------------------------------------------------------
 27 | #define a_mm_clflush(addr)\
 28 |     asm volatile("clflush %0" : "+m" (*(volatile char *)addr));
 29 | #define a_mm_clflushopt(addr)\
 30 |     asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)addr));
 31 | #define a_mm_clwb(addr)\
 32 |     asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)addr));
 33 | #define a_mm_pcommit()\
 34 |     asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8");
 35 | // -------------------------------------------------------------------------------------
 36 | inline void alex_WriteBack(void *addr, ub4 len)
 37 | {
 38 |    for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64)
 39 |            a_mm_clwb((char *) uptr);
 40 | }
 41 | // -------------------------------------------------------------------------------------
 42 | inline void alex_WriteBack(void *addr)
 43 | {
 44 |    addr = (ub1 *) ((uintptr_t) addr & ~(64 - 1));
 45 |    a_mm_clwb((char *) addr);
 46 | }
 47 | // -------------------------------------------------------------------------------------
 48 | inline void alex_FlushOpt(void *addr, ub4 len)
 49 | {
 50 |    for (uintptr_t uptr = (uintptr_t) addr & ~(64 - 1); uptr<(uintptr_t) addr + len; uptr += 64)
 51 |            a_mm_clflushopt((char *) uptr);
 52 | }
 53 | // -------------------------------------------------------------------------------------
 54 | inline void alex_FlushOpt(void *addr)
 55 | {
 56 |    a_mm_clflushopt((char *) addr);
 57 | }
 58 | // -------------------------------------------------------------------------------------
 59 | inline void alex_SFence()
 60 | {
 61 |    _mm_sfence();
 62 | }
 63 | // -------------------------------------------------------------------------------------
 64 | inline int numberOfSetBits(uint32_t i)
 65 | {
 66 |    i = i - ((i >> 1) & 0x55555555);
 67 |    i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
 68 |    return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
 69 | }
 70 | // -------------------------------------------------------------------------------------
 71 | inline ub8 alex_PopCount(ub8 value)
 72 | {
 73 | #ifdef __APPLE__
 74 |    return numberOfSetBits(value & ((1ull << 32) - 1)) + numberOfSetBits(value >> 32);
 75 | #else
 76 |    return _mm_popcnt_u64(value);
 77 | #endif
 78 | }
 79 | // -------------------------------------------------------------------------------------
 80 | inline ub8 alex_PopCount(void *addr_in, ub4 len)
 81 | {
 82 |    assert(len % 8 == 0);
 83 |    ub8 *addr = (ub8 *) addr_in;
 84 | 
 85 |    ub8 result = 0;
 86 |    for (ub4 i = 0; i<len; i += 8) {
 87 |       alex_PopCount(addr[i / 8]);
 88 |    }
 89 |    return result;
 90 | }
 91 | // -------------------------------------------------------------------------------------
 92 | inline void alex_CopyAndFlush512(void *dest, void *src)
 93 | {
 94 |    assert((ub8) dest % 64 == 0);
 95 |    assert((ub8) src % 64 == 0);
 96 | #ifdef STREAMING
 97 |    __m512i reg = _mm512_load_si512(src);
 98 |    _mm512_stream_si512((__m512i *) dest, reg);
 99 | #else
100 |    memcpy(dest, src, constants::kCacheLineByteCount);
101 |    alex_WriteBack(dest);
102 | #endif
103 | }
104 | // -------------------------------------------------------------------------------------
105 | template<uint32_t byteCount>
106 | bool IsAlignedAt(const void *ptr)
107 | {
108 |    return ((uint64_t) ptr) % byteCount == 0;
109 | }
110 | // -------------------------------------------------------------------------------------
111 | uint8_t *AlignedAlloc(uint64_t alignment, uint64_t size)
112 | {
113 |    void *result = nullptr;
114 |    int error = posix_memalign(&result, alignment, size);
115 |    if (error) {
116 |       std::cout << "error while allocating" << std::endl;
117 |       throw;
118 |    }
119 |    return reinterpret_cast<uint8_t *>(result);
120 | }
121 | // -------------------------------------------------------------------------------------
122 | 


--------------------------------------------------------------------------------
/page_flush/NonVolatileMemory.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // -------------------------------------------------------------------------------------
  3 | #include "Common.hpp"
  4 | #include "Pages.hpp"
  5 | #include <libpmem.h>
  6 | // -------------------------------------------------------------------------------------
  7 | class NonVolatileMemory {
  8 | public:
  9 |    NonVolatileMemory(ub8 byte_count); // Uses dram that is not file backed
 10 |    NonVolatileMemory(const std::string &file_name, ub8 byte_count); // Uses dram or nvm depending on the file path
 11 |    NonVolatileMemory(const NonVolatileMemory &) = delete;
 12 |    NonVolatileMemory &operator=(const NonVolatileMemory &) = delete;
 13 | 
 14 |    ~NonVolatileMemory();
 15 | 
 16 |    ub1 *Data() { return data_ptr; }
 17 |    ub1 *End() { return data_ptr + byte_count; }
 18 |    ub8 GetByteCount() { return byte_count; }
 19 | 
 20 |    void FlushAll();
 21 |    void Flush(ub8 from, ub8 length);
 22 | 
 23 |    NvmBufferFrame &GetNvmBufferFrame(ub8 id)
 24 |    {
 25 |       assert(data_ptr + id * sizeof(NvmBufferFrame)<=End());
 26 |       assert(reinterpret_cast<NvmBufferFrame *>(data_ptr)[id].GetPage().Ptr() != nullptr);
 27 |       return reinterpret_cast<NvmBufferFrame *>(data_ptr)[id];
 28 |    }
 29 | 
 30 |    bool IsRealNvm() const { return is_real_nvm; }
 31 | 
 32 | private:
 33 |    ub1 *data_ptr;
 34 |    std::string file_name;
 35 |    const ub8 byte_count;
 36 |    bool is_real_nvm;
 37 |    bool is_mapped_file;
 38 | };
 39 | // -------------------------------------------------------------------------------------
 40 | NonVolatileMemory::NonVolatileMemory(ub8 byte_count)
 41 |         : byte_count(byte_count)
 42 |           , is_mapped_file(false)
 43 | {
 44 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
 45 |    assert(byte_count>0);
 46 | 
 47 |    data_ptr = AlignedAlloc(512, byte_count);
 48 | 
 49 |    is_real_nvm = false;
 50 | }
 51 | // -------------------------------------------------------------------------------------
 52 | NonVolatileMemory::NonVolatileMemory(const std::string &file_name, ub8 byte_count)
 53 |         : file_name(file_name)
 54 |           , byte_count(byte_count)
 55 |           , is_mapped_file(true)
 56 | {
 57 |    assert(((ub8) ((off_t) byte_count)) == byte_count);
 58 | 
 59 |    // No need to do anything if zero byte are requested; Does this happen ??
 60 |    assert(byte_count>0); // XXX
 61 |    if (byte_count == 0) {
 62 |       data_ptr = nullptr;
 63 |       return;
 64 |    }
 65 | 
 66 |    // Map the file (our pmem wrapper works with normal memory by falling back to mmap)
 67 |    size_t acquired_byte_count;
 68 |    data_ptr = reinterpret_cast<ub1 *>(pmem_map_file(file_name.c_str(), byte_count, PMEM_FILE_CREATE, 0666, &acquired_byte_count, nullptr));
 69 |    if (data_ptr == nullptr) {
 70 |       std::cout << "Failed to create file: '" << file_name << "'." << std::endl;
 71 |       throw;
 72 |    }
 73 |    if (acquired_byte_count != byte_count) {
 74 |       std::cout << "Failed to allocate requested size for file: '" << file_name << "'. (Requested=" << byte_count << ", Aquired=" << acquired_byte_count << ")" << std::endl;
 75 |       throw;
 76 |    }
 77 | 
 78 |    // Do this only once, as it is expensive
 79 |    is_real_nvm = pmem_is_pmem(data_ptr, 1);
 80 | }
 81 | // -------------------------------------------------------------------------------------
 82 | NonVolatileMemory::~NonVolatileMemory()
 83 | {
 84 |    if (is_mapped_file) {
 85 |       pmem_unmap(data_ptr, byte_count);
 86 |    } else {
 87 |       free(data_ptr);
 88 |    }
 89 | }
 90 | // -------------------------------------------------------------------------------------
 91 | void NonVolatileMemory::FlushAll()
 92 | {
 93 |    if (is_mapped_file) {
 94 |       pmem_persist(data_ptr, byte_count);
 95 |    }
 96 | }
 97 | // -------------------------------------------------------------------------------------
 98 | void NonVolatileMemory::Flush(ub8 from, ub8 length)
 99 | {
100 |    if (is_mapped_file) {
101 |       pmem_persist(data_ptr + from, length);
102 |    }
103 | }
104 | // -------------------------------------------------------------------------------------
105 | 


--------------------------------------------------------------------------------
/page_flush/Pages.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include "Common.hpp"
 4 | #include <list>
 5 | #include <array>
 6 | // -------------------------------------------------------------------------------------
 7 | class Page {
 8 | public:
 9 |    template<class T = ub1>
10 |    T *Ptr() { return reinterpret_cast<T *>( data.data()); }
11 | 
12 |    template<class T = ub1>
13 |    const T *Ptr() const { return reinterpret_cast<const T *>( data.data()); }
14 | 
15 | private:
16 |    std::array<ub1, constants::kPageByteCount> data;
17 | };
18 | // -------------------------------------------------------------------------------------
19 | template<ub8 ALIGNMENT>
20 | class AutoAlignedPage {
21 | public:
22 |    Page &GetPage()
23 |    {
24 |       assert(padding.data()<data.data());
25 |       auto res = (ub1 *) (((uintptr_t) data.data()) & ~(ALIGNMENT - 1));
26 |       assert(IsAlignedAt<ALIGNMENT>(res));
27 |       return *reinterpret_cast<Page *>( res);
28 |    }
29 | 
30 |    const Page &GetPage() const
31 |    {
32 |       assert(padding.data()<data.data());
33 |       auto res = (const ub1 *) (((uintptr_t) data.data()) & ~(ALIGNMENT - 1));
34 |       assert(IsAlignedAt<ALIGNMENT>(res));
35 |       return *reinterpret_cast<const Page *>( res);
36 |    }
37 | 
38 | private:
39 |    std::array<ub1, ALIGNMENT> padding;
40 |    std::array<ub1, constants::kPageByteCount> data;
41 | };
42 | // -------------------------------------------------------------------------------------
43 | static_assert(sizeof(Page) == constants::kPageByteCount, "Pages are always 16kb");
44 | // -------------------------------------------------------------------------------------
45 | class NvmBufferFrame {
46 | public:
47 |    inline void init()
48 |    {
49 |       dirty = false;
50 |       page_id = constants::kInvalidPageId;
51 |       pvn = 0;
52 |    }
53 | 
54 |    Page &GetPage() { return page.GetPage(); }
55 |    const Page &GetPage() const { return page.GetPage(); }
56 | 
57 |    bool dirty;
58 |    ub8 page_id;
59 |    ub8 pvn;
60 | 
61 | private:
62 |    AutoAlignedPage<constants::kPageAlignment> page;
63 | };
64 | // -------------------------------------------------------------------------------------
65 | static_assert(sizeof(NvmBufferFrame) % 8 == 0, "NvmBufferFrame should be eight byte aligned.");
66 | // -------------------------------------------------------------------------------------
67 | 


--------------------------------------------------------------------------------
/page_flush/Random.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include "Common.hpp"
 4 | // -------------------------------------------------------------------------------------
 5 | // Based on: https://en.wikipedia.org/wiki/Xorshift
 6 | class Random {
 7 | public:
 8 |    explicit Random(ub8 seed = 2305843008139952128ull) // The 8th perfect number found 1772 by Euler with <3
 9 |            : seed(seed)
10 |    {
11 |    }
12 | 
13 |    uint64_t Rand()
14 |    {
15 |       seed ^= (seed << 13);
16 |       seed ^= (seed >> 7);
17 |       return (seed ^= (seed << 17));
18 |    }
19 | 
20 |    ub8 seed;
21 | };
22 | // -------------------------------------------------------------------------------------
23 | 


--------------------------------------------------------------------------------
/page_flush/VolatileMemory.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // -------------------------------------------------------------------------------------
 3 | #include "Common.hpp"
 4 | // -------------------------------------------------------------------------------------
 5 | class VolatileMemory {
 6 | 
 7 | public:
 8 |    VolatileMemory(ub8 byte_count);
 9 |    ~VolatileMemory();
10 |    VolatileMemory(const VolatileMemory &) = delete;
11 |    VolatileMemory &operator=(const VolatileMemory &) = delete;
12 | 
13 |    ub1 *Data() { return data_ptr; }
14 |    ub1 *End() { return data_ptr + byte_count; }
15 | 
16 |    ub8 GetByteCount() const { return byte_count; }
17 | 
18 |    template<class T>
19 |    T *GetPtr(ub8 offset = 0) { return reinterpret_cast<T *>(data_ptr) + offset; }
20 |    ub1 *GetPtr(ub8 offset = 0) { return data_ptr + offset; }
21 | 
22 | private:
23 |    ub1 *const data_ptr;
24 |    const ub8 byte_count;
25 | };
26 | // -------------------------------------------------------------------------------------
27 | VolatileMemory::VolatileMemory(ub8 byte_count)
28 |         : data_ptr(new ub1[byte_count])
29 |           , byte_count(byte_count)
30 | {
31 |    assert(byte_count != 0);
32 | }
33 | // -------------------------------------------------------------------------------------
34 | VolatileMemory::~VolatileMemory()
35 | {
36 |    delete[] data_ptr;
37 | }
38 | // -------------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/page_flush/page_flush.cpp:
--------------------------------------------------------------------------------
  1 | #include "FullBufferFrame.hpp"
  2 | #include "Pages.hpp"
  3 | #include "Random.hpp"
  4 | #include "VolatileMemory.hpp"
  5 | #include "NonVolatileMemory.hpp"
  6 | #include <set>
  7 | #include <thread>
  8 | #include <atomic>
  9 | #include <inttypes.h>
 10 | #include <unordered_map>
 11 | #include <libpmemblk.h>
 12 | #include <functional>
 13 | 
 14 | using namespace std;
 15 | 
 16 | // Config
 17 | ub4 DIRTY_CL_COUNT; // How many cache lines are dirty
 18 | string NVM_FILE; // Path to the nvm file
 19 | ub4 THREAD_COUNT; // How many threads -> 0 runs single threaded config
 20 | ub4 PAGE_COUNT_PER_THREAD; // How many pages to use
 21 | 
 22 | ub8 RunWithTiming(function<void()> foo)
 23 | {
 24 |    auto begin = chrono::high_resolution_clock::now();
 25 |    foo();
 26 |    auto end = chrono::high_resolution_clock::now();
 27 |    return chrono::duration_cast<chrono::nanoseconds>(end - begin).count();
 28 | }
 29 | 
 30 | struct FlushTest {
 31 | 
 32 |    Random ranny;
 33 | 
 34 |    ub8 page_count;
 35 | 
 36 |    VolatileMemory ram;
 37 |    NonVolatileMemory nvm; // One more page than in dram, because of shadow
 38 |    unordered_map<ub4, NvmBufferFrame *> nvm_mapping;
 39 | 
 40 |    NvmBufferFrame *free_nvm_bf;
 41 |    MicroLog *micro_log;
 42 |    MicroLog2 *micro_log_2;
 43 | 
 44 |    NvmBufferFrame *GetMappedNvmBf(ub8 id) { return nvm_mapping.find(id)->second; }
 45 |    FullBufferFrame *GetRamBf(ub8 id) { return ram.GetPtr<FullBufferFrame>(id); }
 46 | 
 47 |    PMEMblkpool *pbp;
 48 | 
 49 |    FlushTest(const string &nvm_file, ub8 page_count)
 50 |            : page_count(page_count)
 51 |              , ram(page_count * sizeof(FullBufferFrame))
 52 |              , nvm(nvm_file, (page_count + 10) * sizeof(NvmBufferFrame)) // + 10 to have some space after the memory for the pages to put the micro log or the shadow-copy page of CoW
 53 |    {
 54 |       auto file = (nvm_file + "_pmemlib");
 55 | 
 56 |       // Setup pmdk blk pool
 57 |       ub8 pmemlib_pool_size = constants::kPageByteCount * page_count + PMEMBLK_MIN_POOL; // A bit to much, but ensures that it works..
 58 |       system(("rm -rf " + file).c_str());
 59 |       pbp = pmemblk_create(file.c_str(), constants::kPageByteCount, pmemlib_pool_size, 0666);
 60 |       if (pbp == NULL) {
 61 |          cout << "failed creating pmdk pmem pool" << endl;
 62 |          assert(false);
 63 |          throw;
 64 |       }
 65 | 
 66 |       // Check that we got enough pages in the pool (should always work)
 67 |       ub4 nelements = pmemblk_nblock(pbp);
 68 |       if (nelements<page_count) {
 69 |          cout << "pmdk pool is too small" << endl;
 70 |          assert(false);
 71 |          throw;
 72 |       }
 73 |    }
 74 |    FlushTest(const FlushTest &) = delete;
 75 |    FlushTest &operator=(const FlushTest &) = delete;
 76 | 
 77 |    // Set nvm and dram content to 'x'
 78 |    void InitializePages()
 79 |    {
 80 |       memset(ram.Data(), 'x', ram.GetByteCount());
 81 |       memset(nvm.Data(), 'x', nvm.GetByteCount());
 82 |       for (ub4 p = 0; p<page_count; ++p) {
 83 |          NvmBufferFrame *nvm_bf = &nvm.GetNvmBufferFrame(p);
 84 |          nvm_bf->init();
 85 |          nvm_bf->page_id = p;
 86 |          GetRamBf(p)->Init();
 87 |          GetRamBf(p)->SwapIn(p, nvm_bf);
 88 |          nvm_mapping[p] = nvm_bf;
 89 |       }
 90 | 
 91 |       // All techniques (cow and micro log) need a small buffer, we put this after all regular pages
 92 |       // The buffers also overlap because only one of them is used in a given experiment
 93 |       // Kind of a hacky design, but good enough for some benchmark code
 94 |       free_nvm_bf = &nvm.GetNvmBufferFrame(page_count);
 95 | 
 96 |       // Logs need to be aligned nicely for SIMD cpy
 97 |       ub1 *nice_aligned_position = reinterpret_cast<ub1 *>(&nvm.GetNvmBufferFrame(page_count));
 98 |       while (ub8(nice_aligned_position) % 64 != 0) {
 99 |          nice_aligned_position++;
100 |       }
101 |       micro_log = reinterpret_cast<MicroLog *>(nice_aligned_position);
102 |       micro_log_2 = reinterpret_cast<MicroLog2 *>(nice_aligned_position);
103 |       assert(reinterpret_cast<ub1 *>(micro_log) + sizeof(MicroLog)<nvm.Data() + nvm.GetByteCount());
104 |       assert(reinterpret_cast<ub1 *>(micro_log_2) + sizeof(MicroLog2)<nvm.Data() + nvm.GetByteCount());
105 |    }
106 | 
107 |    // Set all DRAM content to 'a'
108 |    void MakeAllCacheLinesDirty()
109 |    {
110 |       for (ub4 i = 0; i<page_count; ++i) {
111 |          ub1 *data = GetRamBf(i)->Translate<EnsureMode::MarkDirty>();
112 |          memset(data, 'a', constants::kPageByteCount);
113 |       }
114 |    }
115 | 
116 |    // Set cl_count cls to 'a' in DRAM and those which are still 'x' in dram to 'a' in NVM.
117 |    // Hence, 'a' is the dirty data in DRAM and after a flush everything on NVM should be 'a'.
118 |    void MakeRandomCacheLinesDirty(ub4 cl_count, bool make_other_cls_resident)
119 |    {
120 |       set<ub4> offsets;
121 |       for (ub4 i = 0; i<constants::kCacheLinesPerPage; ++i) {
122 |          offsets.insert(i);
123 |       }
124 | 
125 |       for (ub4 p = 0; p<page_count; ++p) {
126 |          set<ub4> offsets_cpy = offsets;
127 |          for (ub4 i = 0; i<cl_count; ++i) {
128 |             assert(!offsets_cpy.empty());
129 |             auto random_cl = offsets_cpy.begin();
130 |             advance(random_cl, ranny.Rand() % offsets_cpy.size());
131 |             ub4 cl = *random_cl;
132 |             ub1 *data = GetRamBf(p)->Translate<EnsureMode::MarkDirty>(cl * constants::kCacheLineByteCount, constants::kCacheLineByteCount);
133 |             memset(data, 'a', constants::kCacheLineByteCount);
134 |             offsets_cpy.erase(random_cl);
135 |          }
136 | 
137 |          for (auto iter : offsets_cpy) {
138 |             ub1 *data = GetMappedNvmBf(p)->GetPage().Ptr() + iter * constants::kCacheLineByteCount;
139 |             memset(data, 'a', constants::kCacheLineByteCount);
140 |             if (make_other_cls_resident) {
141 |                ub1 *ram_data = GetRamBf(p)->Translate<EnsureMode::Ensure>(iter * constants::kCacheLineByteCount, constants::kCacheLineByteCount);
142 |                memset(ram_data, 'a', constants::kCacheLineByteCount);
143 |             }
144 |          }
145 | 
146 |          // Check
147 |          if (make_other_cls_resident) {
148 |             assert(GetRamBf(p)->GetResidentCacheLineCount() == constants::kCacheLinesPerPage);
149 |          } else {
150 |             assert(GetRamBf(p)->GetResidentCacheLineCount() == cl_count);
151 |          }
152 |          assert(GetRamBf(p)->GetDirtyCacheLineCount() == cl_count);
153 |       }
154 |    }
155 | 
156 |    void FlushAll_Strawman()
157 |    {
158 |       for (ub4 i = 0; i<page_count; ++i) {
159 |          GetRamBf(i)->Flush();
160 |       }
161 |    }
162 | 
163 |    void FlushAll_PMDK()
164 |    {
165 |       for (ub4 p = 0; p<page_count; ++p) {
166 |          if (GetRamBf(p)->IsAnythingDirty()) {
167 |             pmemblk_write(pbp, GetRamBf(p)->RamPtr<ub1>(), p);
168 |             GetRamBf(p)->MakeCleanAfterFlushOfPMDK();
169 |          }
170 |       }
171 |    }
172 | 
173 |    void FlushAll_Shadow()
174 |    {
175 |       ub8 lsn = 0;
176 |       free_nvm_bf->init();
177 |       free_nvm_bf->page_id = constants::kInvalidPageId;
178 | 
179 |       for (ub4 p = 0; p<page_count; ++p) {
180 |          if (GetRamBf(p)->IsAnythingDirty()) {
181 |             NvmBufferFrame *new_free_one = GetRamBf(p)->FlushShadow(free_nvm_bf, lsn++);
182 |             assert(GetRamBf(p)->GetNvmBufferFrame() == free_nvm_bf);
183 |             free_nvm_bf = new_free_one;
184 |             nvm_mapping[p] = GetRamBf(p)->GetNvmBufferFrame();
185 |          }
186 |       }
187 |    }
188 | 
189 |    void FlushAll_MicroLog()
190 |    {
191 |       micro_log->page_id = constants::kInvalidPageId;
192 |       micro_log->count = 0;
193 | 
194 |       for (ub4 p = 0; p<page_count; ++p) {
195 |          if (GetRamBf(p)->IsAnythingDirty()) {
196 |             GetRamBf(p)->FlushMicroLog(*micro_log);
197 |          }
198 |       }
199 |    }
200 | 
201 |    void FlushAll_MicroLog2()
202 |    {
203 |       memset(micro_log_2, 0, sizeof(MicroLog2));
204 | 
205 |       for (ub4 p = 0; p<page_count; ++p) {
206 |          if (GetRamBf(p)->IsAnythingDirty()) {
207 |             GetRamBf(p)->FlushMicroLog(*micro_log_2);
208 |          }
209 |       }
210 |    }
211 | 
212 |    void PrintPages()
213 |    {
214 |       for (ub4 p = 0; p<page_count; ++p) {
215 |          PrintPage(p);
216 |       }
217 |    }
218 | 
219 |    void PrintPage(ub4 p)
220 |    {
221 |       cout << p << " ram: ";
222 |       for (ub4 i = 0; i<constants::kPageByteCount; i += 64) {
223 |          cout << GetRamBf(p)->RamPtr<ub1>()[i];
224 |       }
225 |       cout << endl << p << " nvm: ";
226 |       for (ub4 i = 0; i<constants::kPageByteCount; i += 64) {
227 |          cout << GetMappedNvmBf(p)->GetPage().Ptr()[i];
228 |       }
229 |       cout << endl;
230 |    }
231 | 
232 |    // Checks that each cl in NVM is c
233 |    void CheckNvmContentEqualsTo(char c)
234 |    {
235 |       for (ub4 p = 0; p<page_count; ++p) {
236 |          assert(nvm_mapping[p]->page_id == p);
237 |          assert(GetMappedNvmBf(p)->page_id == p);
238 |          for (ub4 i = 0; i<constants::kPageByteCount; ++i) {
239 |             assert(c == GetMappedNvmBf(p)->GetPage().Ptr()[i]);
240 |          }
241 |       }
242 |    }
243 | };
244 | 
245 | void RunBenchmarkThreaded(string tech, bool all_resident, function<void(FlushTest &ft)> callback)
246 | {
247 |    atomic<ub4> ready_count(0);
248 |    atomic<bool> start_barrier(false);
249 |    vector<unique_ptr<thread>> threads;
250 |    vector<ub8> times(THREAD_COUNT, 0);
251 |    for (ub4 tid = 0; tid<THREAD_COUNT; tid++) {
252 |       threads.push_back(make_unique<thread>([&, tid]() {
253 |          FlushTest ft(NVM_FILE + string("_") + to_string(tid), PAGE_COUNT_PER_THREAD);
254 |          ft.InitializePages();
255 |          ft.MakeRandomCacheLinesDirty(DIRTY_CL_COUNT, all_resident);
256 |          ready_count++;
257 |          while (!start_barrier);
258 |          times[tid] += RunWithTiming([&]() { callback(ft); });
259 |          ft.CheckNvmContentEqualsTo('a');
260 |       }));
261 |    }
262 |    while (ready_count != THREAD_COUNT);
263 |    start_barrier = true;
264 |    for (ub4 tid = 0; tid<THREAD_COUNT; tid++) {
265 |       threads[tid]->join();
266 |    }
267 | 
268 |    ub8 time_sum_of_all_threads = 0;
269 |    for (ub4 tid = 0; tid<THREAD_COUNT; tid++) {
270 |       time_sum_of_all_threads += times[tid];
271 |    }
272 |    double avg_time_per_thread = time_sum_of_all_threads * 1.0 / THREAD_COUNT;
273 |    ub8 total_page_count = PAGE_COUNT_PER_THREAD * THREAD_COUNT;
274 |    ub8 byte_count = total_page_count * DIRTY_CL_COUNT * constants::kCacheLineByteCount;
275 |    double page_per_second = (total_page_count * 1000000000) / (avg_time_per_thread);
276 | 
277 |    //@formatter:off
278 |    cout << "res"
279 |         << " tech= " << tech
280 |         << " dirty_cl_count= " << DIRTY_CL_COUNT
281 |         << " thread_count= " << THREAD_COUNT
282 |         << " page_count_per_thread= " << PAGE_COUNT_PER_THREAD
283 |         << " avg_time= " << avg_time_per_thread
284 |         << " perf(GB/s)= " << (byte_count / avg_time_per_thread)
285 |         << " perf(pages/s)= " << page_per_second
286 |         << endl;
287 |    //@formatter:on
288 | }
289 | 
290 | void RunMultiThreaded()
291 | {
292 |    // PMDK
293 |    RunBenchmarkThreaded("PMDK", true, [](FlushTest &ft) {
294 |       ft.FlushAll_Shadow();
295 |    });
296 | 
297 |    // Shadow some resident
298 |    RunBenchmarkThreaded("Shadow", false, [](FlushTest &ft) {
299 |       ft.FlushAll_Shadow();
300 |    });
301 | 
302 |    // Shadow all resident
303 |    RunBenchmarkThreaded("ShadowResident", true, [](FlushTest &ft) {
304 |       ft.FlushAll_Shadow();
305 |    });
306 | 
307 |    // Micro log
308 |    RunBenchmarkThreaded("Micro", false, [](FlushTest &ft) {
309 |       ft.FlushAll_MicroLog();
310 |    });
311 | }
312 | 
313 | // clang++ -DSTREAMING=1 page_flush/page_flush.cpp -std=c++17 -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemblk.a -g0 -O3 -march=native -lpthread -lndctl -ldaxctl
314 | int main(int argc, char **argv)
315 | {
316 |    if (argc != 5) {
317 |       cout << "usage: " << argv[0] << " page_count_per_thread dirty_cl_count thread_count path" << endl;
318 |       throw;
319 |    }
320 | 
321 |    PAGE_COUNT_PER_THREAD = atof(argv[1]);
322 |    DIRTY_CL_COUNT = atof(argv[2]);
323 |    THREAD_COUNT = atof(argv[3]);
324 |    NVM_FILE = argv[4];
325 | 
326 |    if (DIRTY_CL_COUNT == 0 && DIRTY_CL_COUNT>constants::kCacheLinesPerPage) {
327 |       cout << "invalid DIRTY_CL_COUNT " << DIRTY_CL_COUNT << endl;
328 |       exit(-1);
329 |    }
330 | 
331 |    cerr << "Config:" << endl;
332 |    cerr << "----------------------------" << endl;
333 |    cerr << "PAGE_COUNT_PER_THREAD: " << PAGE_COUNT_PER_THREAD << endl;
334 |    cerr << "DIRTY_CL_COUNT:        " << DIRTY_CL_COUNT << endl;
335 |    cerr << "THREAD_COUNT:          " << THREAD_COUNT << endl;
336 |    cerr << "NVM_FILE:              " << NVM_FILE << endl;
337 | #ifdef STREAMING
338 |    cerr << "STREAMING:             " << "yes" << endl;
339 | #else
340 |    cerr << "STREAMING:             " << "no" << endl;
341 | #endif
342 | 
343 |    RunMultiThreaded();
344 | 
345 |    return 0;
346 | };
347 | 


--------------------------------------------------------------------------------
/reproduce/all.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for all figure
 4 | ./reproduce/bw_cache_lines.sh
 5 | ./reproduce/bw_threads.sh
 6 | ./reproduce/coroutines.sh
 7 | ./reproduce/inplace.sh
 8 | ./reproduce/latency_read.sh
 9 | ./reproduce/latency_write.sh
10 | ./reproduce/logging.sh
11 | ./reproduce/page_flush.sh
12 | 


--------------------------------------------------------------------------------
/reproduce/bw_cache_lines.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for figure 1
 4 | # PMem Bandwidth: Varying Access Granularity
 5 | 
 6 | echo "" > results/bw_cache_lines.txt
 7 | 
 8 | COMPILE="clang++ -g0 -O3 -march=native -std=c++14 -DNDEBUG=1 bandwidth/bw.cpp -pthread"
 9 | 
10 | BYTE_COUNT=10e9
11 | THREAD_COUNT=24
12 | for BLOCK_SIZE in `seq 64 64 768`; do
13 | 
14 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -o a1.out || exit -1
15 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -o a2.out || exit -1
16 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DUSE_CLWB=1 -o a3.out || exit -1
17 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DSTREAMING=1 -o a4.out || exit -1
18 | 
19 |    # Read nvm
20 |    ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
21 | 
22 |    # Read ram
23 |    ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
24 | 
25 |    # Write nvm
26 |    ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
27 |    ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
28 |    ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
29 | 
30 |    # Write ram
31 |    ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
32 |    ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
33 |    ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_cache_lines.txt
34 | done;
35 | 


--------------------------------------------------------------------------------
/reproduce/bw_threads.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for figure 2
 4 | # PMem Bandwidth: Varying Thread Count
 5 | 
 6 | echo "" > results/bw_threads.txt
 7 | 
 8 | COMPILE="clang++ -g0 -O3 -march=native -std=c++14 -DNDEBUG=1 bandwidth/bw.cpp -pthread"
 9 | 
10 | BYTE_COUNT=10e9
11 | 
12 | for BLOCK_SIZE in 256 1048576; do
13 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -o a1.out || exit -1
14 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -o a2.out || exit -1
15 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DUSE_CLWB=1 -o a3.out || exit -1
16 |   ${COMPILE} -DBLOCK_SIZE=${BLOCK_SIZE} -DWRITE=1 -DSTREAMING=1 -o a4.out || exit -1
17 | 
18 |   for THREAD_COUNT in `seq 1 30`; do
19 |      # Read nvm
20 |      ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt
21 | 
22 |      # Read ram
23 |      ./a1.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt
24 | 
25 |      # Write nvm
26 |      ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt
27 |      ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt
28 |      ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} nvm ${PMEM_PATH} | tee -a results/bw_threads.txt
29 | 
30 |      # Write ram
31 |      ./a2.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt
32 |      ./a3.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt
33 |      ./a4.out ${BYTE_COUNT} ${THREAD_COUNT} ram ${PMEM_PATH} | tee -a results/bw_threads.txt
34 |   done;
35 | done;


--------------------------------------------------------------------------------
/reproduce/coroutines.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Building clang++ is required
 4 | 
 5 | # cd build
 6 | # cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi;clang" -DCMAKE_BUILD_TYPE=Release -G "Unix Makefiles" ../llvm
 7 | # make -j48
 8 | 
 9 | echo "" > results/coroutines.txt
10 | 
11 | export CLANG_DIR=~/workspace/llvm-project/build
12 | 
13 | ${CLANG_DIR}/bin/clang++ -fcoroutines-ts -g0 -O3 -march=native -std=c++2a -mllvm -inline-threshold=50000 coroutine/coro_insert.cpp -stdlib=libc++ -nostdinc++ -I${CLANG_DIR}/include/c++/v1 -L${CLANG_DIR}/lib -Wl,-rpath,${CLANG_DIR}/lib -DNDEBUG=1 || exit
14 | 
15 | for GROUP_SIZE in 1 2 3 4 5 6 7 8 10 12 14 16 24 32 40 48 56 64; do
16 |   ./a.out 1e7 1e7 ${GROUP_SIZE} nvm /mnt/pmem0/renen | tee -a results/coroutines.txt
17 |   ./a.out 1e7 1e7 ${GROUP_SIZE} ram /mnt/pmem0/renen | tee -a results/coroutines.txt
18 | done
19 | 
20 | ${CLANG_DIR}/bin/clang++ -fcoroutines-ts -g0 -O3 -march=native -std=c++2a -mllvm -inline-threshold=50000 coroutine/coro_lookup.cpp -stdlib=libc++ -nostdinc++ -I${CLANG_DIR}/include/c++/v1 -L${CLANG_DIR}/lib -Wl,-rpath,${CLANG_DIR}/lib -DNDEBUG=1 || exit
21 | 
22 | for GROUP_SIZE in 1 2 3 4 5 6 7 8 10 12 14 16 24 32 40 48 56 64; do
23 |   ./a.out 1e7 1e7 ${GROUP_SIZE} nvm /mnt/pmem0/renen | tee -a results/coroutines.txt
24 |   ./a.out 1e7 1e7 ${GROUP_SIZE} ram /mnt/pmem0/renen | tee -a results/coroutines.txt
25 | done
26 | 


--------------------------------------------------------------------------------
/reproduce/inplace.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for figure 4
 4 | # Write latency
 5 | 
 6 | echo "" > results/inplace.txt
 7 | 
 8 | COMPILE="clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++17 inplace/bench.cpp -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemlog.a -lpthread -lndctl -ldaxctl"
 9 | 
10 | for ENTRY_SIZE in `seq 16 16 128`; do
11 |   ${COMPILE} -DENTRY_SIZE=${ENTRY_SIZE} || exit -1
12 |   ./a.out 1e9 seq /mnt/pmem0/renen | tee -a results/inplace.txt
13 |   ./a.out 1e9 rnd /mnt/pmem0/renen | tee -a results/inplace.txt
14 | done
15 | 


--------------------------------------------------------------------------------
/reproduce/interference.sh:
--------------------------------------------------------------------------------
  1 | ./reproduce/validate.sh || exit -1
  2 | 
  3 | # Reproduces data for figure 4
  4 | # Interference
  5 | 
  6 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++17 interference/interference.cpp -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemlog.a -lpthread -lndctl -ldaxctl || exit -1
  7 | 
  8 | # Seqential ram
  9 | echo "" > results/interference_seq_ram.txt
 10 | ./a.out 14  0  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 11 | ./a.out 14  1  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 12 | ./a.out 14  5  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 13 | ./a.out 14 10  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 14 | ./a.out 14  0  1  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 15 | ./a.out 14  0  5  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 16 | ./a.out 14  0 10  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 17 | ./a.out 14  0  0  1  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 18 | ./a.out 14  0  0  5  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 19 | ./a.out 14  0  0 10  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 20 | ./a.out 14  0  0  0  1  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 21 | ./a.out 14  0  0  0  5  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 22 | ./a.out 14  0  0  0 10  0 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 23 | ./a.out 14  0  0  0  0  1 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 24 | ./a.out 14  0  0  0  0  5 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 25 | ./a.out 14  0  0  0  0 10 /mnt/pmem0/renen | tee -a results/interference_seq_ram.txt
 26 | 
 27 | # Seqential nvm
 28 | echo "" > results/interference_seq_nvm.txt
 29 | ./a.out  0 14  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 30 | ./a.out  1 14  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 31 | ./a.out  5 14  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 32 | ./a.out 10 14  0  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 33 | ./a.out  0 14  1  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 34 | ./a.out  0 14  5  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 35 | ./a.out  0 14 10  0  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 36 | ./a.out  0 14  0  1  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 37 | ./a.out  0 14  0  5  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 38 | ./a.out  0 14  0 10  0  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 39 | ./a.out  0 14  0  0  1  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 40 | ./a.out  0 14  0  0  5  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 41 | ./a.out  0 14  0  0 10  0 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 42 | ./a.out  0 14  0  0  0  1 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 43 | ./a.out  0 14  0  0  0  5 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 44 | ./a.out  0 14  0  0  0 10 /mnt/pmem0/renen | tee -a results/interference_seq_nvm.txt
 45 | 
 46 | # Random ram
 47 | echo "" > results/interference_rnd_ram.txt
 48 | ./a.out  0  0 14  0  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 49 | ./a.out  1  0 14  0  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 50 | ./a.out  5  0 14  0  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 51 | ./a.out 10  0 14  0  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 52 | ./a.out  0  1 14  0  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 53 | ./a.out  0  5 14  0  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 54 | ./a.out  0 10 14  0  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 55 | ./a.out  0  0 14  1  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 56 | ./a.out  0  0 14  5  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 57 | ./a.out  0  0 14 10  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 58 | ./a.out  0  0 14  0  1  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 59 | ./a.out  0  0 14  0  5  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 60 | ./a.out  0  0 14  0 10  0 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 61 | ./a.out  0  0 14  0  0  1 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 62 | ./a.out  0  0 14  0  0  5 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 63 | ./a.out  0  0 14  0  0 10 /mnt/pmem0/renen | tee -a results/interference_rnd_ram.txt
 64 | 
 65 | # Random nvm
 66 | echo "" > results/interference_rnd_nvm.txt
 67 | ./a.out  0  0  0 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 68 | ./a.out  1  0  0 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 69 | ./a.out  5  0  0 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 70 | ./a.out 10  0  0 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 71 | ./a.out  0  1  0 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 72 | ./a.out  0  5  0 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 73 | ./a.out  0 10  0 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 74 | ./a.out  0  0  1 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 75 | ./a.out  0  0  5 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 76 | ./a.out  0  0 10 14  0  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 77 | ./a.out  0  0  0 14  1  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 78 | ./a.out  0  0  0 14  5  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 79 | ./a.out  0  0  0 14 10  0 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 80 | ./a.out  0  0  0 14  0  1 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 81 | ./a.out  0  0  0 14  0  5 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 82 | ./a.out  0  0  0 14  0 10 /mnt/pmem0/renen | tee -a results/interference_rnd_nvm.txt
 83 | 
 84 | ## Log nvm
 85 | #./a.out  0  0  0  0 10  0 /mnt/pmem0/renen
 86 | #./a.out  1  0  0  0 10  0 /mnt/pmem0/renen
 87 | #./a.out  5  0  0  0 10  0 /mnt/pmem0/renen
 88 | #./a.out 10  0  0  0 10  0 /mnt/pmem0/renen
 89 | #./a.out  0  1  0  0 10  0 /mnt/pmem0/renen
 90 | #./a.out  0  5  0  0 10  0 /mnt/pmem0/renen
 91 | #./a.out  0 10  0  0 10  0 /mnt/pmem0/renen
 92 | #./a.out  0  0  1  0 10  0 /mnt/pmem0/renen
 93 | #./a.out  0  0  5  0 10  0 /mnt/pmem0/renen
 94 | #./a.out  0  0 10  0 10  0 /mnt/pmem0/renen
 95 | #./a.out  0  0  0  1 10  0 /mnt/pmem0/renen
 96 | #./a.out  0  0  0  5 10  0 /mnt/pmem0/renen
 97 | #./a.out  0  0  0 10 10  0 /mnt/pmem0/renen
 98 | #./a.out  0  0  0  0 10  1 /mnt/pmem0/renen
 99 | #./a.out  0  0  0  0 10  5 /mnt/pmem0/renen
100 | #./a.out  0  0  0  0 10 10 /mnt/pmem0/renen
101 | #
102 | ## Page nvm
103 | #./a.out  0  0  0  0  0 10 /mnt/pmem0/renen
104 | #./a.out  1  0  0  0  0 10 /mnt/pmem0/renen
105 | #./a.out  5  0  0  0  0 10 /mnt/pmem0/renen
106 | #./a.out 10  0  0  0  0 10 /mnt/pmem0/renen
107 | #./a.out  0  1  0  0  0 10 /mnt/pmem0/renen
108 | #./a.out  0  5  0  0  0 10 /mnt/pmem0/renen
109 | #./a.out  0 10  0  0  0 10 /mnt/pmem0/renen
110 | #./a.out  0  0  1  0  0 10 /mnt/pmem0/renen
111 | #./a.out  0  0  5  0  0 10 /mnt/pmem0/renen
112 | #./a.out  0  0 10  0  0 10 /mnt/pmem0/renen
113 | #./a.out  0  0  0  1  0 10 /mnt/pmem0/renen
114 | #./a.out  0  0  0  5  0 10 /mnt/pmem0/renen
115 | #./a.out  0  0  0 10  0 10 /mnt/pmem0/renen
116 | #./a.out  0  0  0  0  1 10 /mnt/pmem0/renen
117 | #./a.out  0  0  0  0  5 10 /mnt/pmem0/renen
118 | #./a.out  0  0  0  0 10 10 /mnt/pmem0/renen
119 | 


--------------------------------------------------------------------------------
/reproduce/latency_read.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for figure 3
 4 | # Read latency
 5 | 
 6 | echo "" > results/latency_read.txt
 7 | 
 8 | # Read ram
 9 | clang++ -g0 -O3 -march=native -DNDEBUG=1 -std=c++14 latency/read_latency.cpp -pthread && ./a.out 1 10e9 1e9 ram ${PMEM_PATH}/file_0 | tee -a results/latency_read.txt
10 | 
11 | # Read nvm
12 | clang++ -g0 -O3 -march=native -DNDEBUG=1 -std=c++14 latency/read_latency.cpp -pthread && ./a.out 1 10e9 1e9 nvm ${PMEM_PATH}/file_0 | tee -a results/latency_read.txt
13 | 


--------------------------------------------------------------------------------
/reproduce/latency_write.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for figure 4
 4 | # Write latency
 5 | 
 6 | echo "" > results/latency_write.txt
 7 | 
 8 | for type in single sequential random; do
 9 |   # Write FLUSH
10 |   clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DFLUSH=1 \
11 |   && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt
12 | 
13 |   # Write FLUSH_OPT
14 |   clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DFLUSH_OPT=1 \
15 |   && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt
16 | 
17 |   # Write CLWB
18 |   clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DCLWB=1 \
19 |   && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt
20 | 
21 |   # Write STREAMING
22 |   clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++14 latency/write_latency.cpp -pthread -DSTREAMING=1 \
23 |   && ./a.out nvm $type 10e9 ${PMEM_PATH}/file_0 | tee -a results/latency_write.txt
24 | 
25 | done


--------------------------------------------------------------------------------
/reproduce/logging.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for figure 6
 4 | # Logging
 5 | 
 6 | echo "" > results/logging.txt
 7 | 
 8 | clang++ -g0 -O3 -DNDEBUG=1 -march=native -std=c++17 logging/logging.cpp -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemlog.a -lpthread -lndctl -ldaxctl \
 9 | && ./a.out 56 512 10e9 5 ${PMEM_PATH}/file 0 | tee -a results/logging.txt
10 | 


--------------------------------------------------------------------------------
/reproduce/page_flush.sh:
--------------------------------------------------------------------------------
 1 | ./reproduce/validate.sh || exit -1
 2 | 
 3 | # Reproduces data for figure 5
 4 | # Page flush
 5 | PAGE_COUNT=100000 # ~1.6GB per thread
 6 | 
 7 | echo "" > results/page_flush_cls_1t.txt
 8 | echo "" > results/page_flush_threads_16cls.txt
 9 | echo "" > results/page_flush_cls_7t.txt
10 | 
11 | clang++ page_flush/page_flush.cpp -std=c++17 -g0 -O3  -march=native -DNDEBUG=1 -DSTREAMING=1 -Invml/src/include/ nvml/src/nondebug/libpmem.a nvml/src/nondebug/libpmemblk.a -lpthread -lndctl -ldaxctl || exit
12 | 
13 | # Experiment 1: 1 thread, _x_ dirty cls, streaming
14 | THREAD_COUNT=1
15 | for DIRTY_CL_COUNT in `seq 4 4 256`; do
16 |   ./a.out ${PAGE_COUNT} ${DIRTY_CL_COUNT} ${THREAD_COUNT} ${PMEM_PATH}/file | tee -a results/page_flush_cls_1t.txt
17 | done
18 | 
19 | # Experiment 2: _x_ thread, 16 dirty cls, streaming
20 | DIRTY_CL_COUNT=16
21 | for THREAD_COUNT in `seq 1 30`; do
22 |   ./a.out ${PAGE_COUNT} ${DIRTY_CL_COUNT} ${THREAD_COUNT} ${PMEM_PATH}/file | tee -a results/page_flush_threads_16cls.txt
23 | done
24 | 
25 | # Experiment 3: 7 thread, _x_ dirty cls, streaming
26 | THREAD_COUNT=7
27 | for DIRTY_CL_COUNT in `seq 4 4 256`; do
28 |   ./a.out ${PAGE_COUNT} ${DIRTY_CL_COUNT} ${THREAD_COUNT} ${PMEM_PATH}/file | tee -a results/page_flush_cls_7t.txt
29 | done


--------------------------------------------------------------------------------
/reproduce/validate.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Check that pmem path is set
 3 | if [ -z "$PMEM_PATH" ];
 4 | then
 5 |     echo "Please set PMEM_PATH to point to a directory on PMem."
 6 |     echo "Example: export PMEM_PATH=/mnt/pmem0/renen"
 7 |     exit -1
 8 | fi
 9 | 
10 | if [ ! -d ${PMEM_PATH} ];
11 | then
12 |     echo "The configured PMEM_PATH '${PMEM_PATH}' is not a directory."
13 |     echo "Example: export PMEM_PATH=/mnt/pmem0/renen"
14 |     exit -1
15 | fi
16 | 


--------------------------------------------------------------------------------
/results/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexandervanrenen/pmembench/7ee9286caad36efc996768fb5d2f5d3158c7dde8/results/.keep


--------------------------------------------------------------------------------