├── run.sh ├── source ├── util.h ├── port_posix.h ├── timer.h ├── PHAST.h └── PHAST.cc ├── README.md └── test └── simple_test.cc /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CINCLUDE="-I ./source/" 4 | CDEBUG="-O3" 5 | # CDEBUG="-O0 -g" 6 | 7 | # CWARNING="-Wall" # open warning info 8 | CWARNING="-w" #close warning info. 9 | 10 | CFLAGS="-lpmemobj -lpthread -march=native" 11 | 12 | g++ $CINCLUDE $CDEBUG $CWARNING -o simple_test test/simple_test.cc source/PHAST.cc $CFLAGS 13 | 14 | -------------------------------------------------------------------------------- /source/util.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | 20 | #include "timer.h" 21 | #include "port_posix.h" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PHAST: Hierarchical Concurrent Log-Free Skip List for Persistent Memory 2 | 3 | ## Introduction 4 | 5 | PHAST is a persistent skip list which leverages persistent memory to tackle the memory overhead and boost indexing performance. PHAST proposes a relaxed rwlock-based concurrency control strategy to support writelock-free concurrent insert and lock-free concurrent search. 6 | 7 | Please read the following paper for more details: 8 | 9 | [Zhenxin Li, Bing Jiao, Shuibing He, Weikuan Yu. PHAST: Hierarchical Concurrent Log-Free Skip List for Persistent Memory. TPDS 2022.](https://ieeexplore.ieee.org/abstract/document/9772399) 10 | 11 | ## Directories 12 | 13 | * source: source files for PHAST. 14 | * test: the test file. 15 | 16 | ## Running 17 | 18 | The code is designed for machines equipped with Intel Optane DCPMMs. 19 | 20 | Please change the NVM file path `PMEM_PATH` in `source/PHAST.h` before you run experiments. 21 | 22 | And then execute the script as following: 23 | 24 | ``` 25 | sh run.sh 26 | ./simple_test [the number of threads] 27 | ``` 28 | -------------------------------------------------------------------------------- /source/port_posix.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | class RWMutex { 5 | public: 6 | RWMutex() { 7 | Init(); 8 | } 9 | 10 | // No copying allowed 11 | RWMutex(const RWMutex&) = delete; 12 | void operator=(const RWMutex&) = delete; 13 | 14 | ~RWMutex() { pthread_rwlock_destroy(&lock_); } 15 | 16 | void Init() { 17 | pthread_rwlock_init(&lock_, NULL); 18 | #ifndef NDEBUG 19 | locked_ = false; 20 | #endif 21 | } 22 | 23 | void ReadLock() { 24 | pthread_rwlock_rdlock(&lock_); 25 | } 26 | 27 | void WriteLock() { 28 | pthread_rwlock_wrlock(&lock_); 29 | #ifndef NDEBUG 30 | locked_ = true; 31 | #endif 32 | } 33 | 34 | void ReadUnlock() { 35 | pthread_rwlock_unlock(&lock_); 36 | } 37 | 38 | void WriteUnlock() { 39 | pthread_rwlock_unlock(&lock_); 40 | #ifndef NDEBUG 41 | locked_ = false; 42 | #endif 43 | } 44 | 45 | bool AssertReadHeld() { 46 | #ifndef NDEBUG 47 | return (locked_ == false); 48 | #endif 49 | return true; 50 | } 51 | 52 | bool AssertWriteHeld() { 53 | #ifndef NDEBUG 54 | return (locked_ == true); 55 | #endif 56 | return true; 57 | } 58 | 59 | private: 60 | pthread_rwlock_t lock_; 61 | #ifndef NDEBUG 62 | bool locked_; 63 | #endif 64 | }; 65 | 66 | // Exclusive Mutex 67 | class EXMutex { 68 | public: 69 | EXMutex() { 70 | Init(); 71 | } 72 | 73 | // No copying allowed 74 | EXMutex(const EXMutex&) = delete; 75 | void operator=(const EXMutex&) = delete; 76 | 77 | ~EXMutex() { pthread_mutex_destroy(&lock_); } 78 | 79 | void Init() { 80 | pthread_mutex_init(&lock_, NULL); 81 | #ifndef NDEBUG 82 | locked_ = false; 83 | #endif 84 | } 85 | 86 | void Lock() { 87 | pthread_mutex_lock(&lock_); 88 | #ifndef NDEBUG 89 | locked_ = true; 90 | #endif 91 | } 92 | 93 | void Unlock() { 94 | pthread_mutex_unlock(&lock_); 95 | #ifndef NDEBUG 96 | locked_ = false; 97 | #endif 98 | } 99 | 100 | bool AssertHeld() { 101 | #ifndef NDEBUG 102 | return (locked_ == true); 103 | #endif 104 | return true; 105 | } 106 | 107 | private: 108 | pthread_mutex_t lock_; 109 | #ifndef NDEBUG 110 | bool locked_; 111 | #endif 112 | }; -------------------------------------------------------------------------------- /source/timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | static inline uint64_t NowNanos() { 13 | struct timespec ts; 14 | clock_gettime(CLOCK_MONOTONIC, &ts); 15 | return (uint64_t)(ts.tv_sec) * 1000000000 + ts.tv_nsec; 16 | } 17 | 18 | static inline uint64_t NowMicros() { 19 | struct timeval tv; 20 | gettimeofday(&tv, nullptr); 21 | return (uint64_t)(tv.tv_sec) * 1000000 + tv.tv_usec; 22 | } 23 | 24 | static inline uint64_t ElapsedNanos(uint64_t start_time) { 25 | uint64_t now = NowNanos(); 26 | return now - start_time; 27 | } 28 | 29 | static inline uint64_t ElapsedMicros(uint64_t start_time) { 30 | uint64_t now = NowMicros(); 31 | return now - start_time; 32 | } 33 | 34 | class Counter { 35 | public: 36 | explicit Counter(std::string name) : num_(0), name_(name) {}; 37 | ~Counter() {}; 38 | 39 | void Clear() { num_ = 0; } 40 | 41 | void Add(uint64_t t) { 42 | #ifndef THREAD_SAFE_TIMER 43 | num_ += t; 44 | #else 45 | __sync_add_and_fetch(&num_, t); 46 | #endif 47 | } 48 | 49 | void PrintResult() { 50 | fprintf(stderr, "%s: %lu\n", name_.c_str(), num_); 51 | } 52 | 53 | private: 54 | uint64_t num_; 55 | std::string name_; 56 | }; 57 | 58 | class Histogram { 59 | public: 60 | explicit Histogram(std::string name) : name_(name) { 61 | finallized_ = false; 62 | #ifdef THREAD_SAFE_TIMER 63 | pthread_mutex_init(&mu_, NULL); 64 | #endif 65 | } 66 | ~Histogram() { 67 | #ifdef THREAD_SAFE_TIMER 68 | pthread_mutex_destroy(&mu_); 69 | #endif 70 | } 71 | 72 | void Clear() { values_.clear(); } 73 | 74 | void Add(uint64_t t) { 75 | #ifndef THREAD_SAFE_TIMER 76 | values_.push_back(t); 77 | #else 78 | pthread_mutex_lock(&mu_); 79 | values_.push_back(t); 80 | pthread_mutex_unlock(&mu_); 81 | #endif 82 | } 83 | 84 | void Finallize() { 85 | if (!finallized_) { 86 | std::sort(values_.begin(), values_.end()); 87 | finallized_ = true; 88 | } 89 | } 90 | 91 | // call after Finallize. 92 | size_t ValizePos(size_t pos) { 93 | if (pos >= values_.size()) return values_.size() - 1; 94 | } 95 | 96 | double Min() { 97 | return (values_.empty()) ? 0 : (double)values_.front(); 98 | } 99 | 100 | double Max() { 101 | return (values_.empty()) ? 0 : (double)values_.back(); 102 | } 103 | 104 | double Sum() { 105 | double sum = 0; 106 | for (auto x : values_) { 107 | sum += x; 108 | } 109 | return sum; 110 | } 111 | 112 | double Avg() { 113 | if (values_.empty()) return 0; 114 | double sum = 0; 115 | for (auto x : values_) { 116 | sum += x; 117 | } 118 | return sum / values_.size(); 119 | } 120 | 121 | double P50() { 122 | if (values_.empty()) return 0; 123 | size_t pos = values_.size() / 2; 124 | return (double)values_[pos]; 125 | } 126 | 127 | double P99() { 128 | if (values_.empty()) return 0; 129 | size_t pos = values_.size() * 99 / 100; 130 | return (double)values_[pos]; 131 | } 132 | 133 | double P995() { 134 | if (values_.empty()) return 0; 135 | size_t pos = values_.size() * 995 / 1000; 136 | return (double)values_[pos]; 137 | } 138 | 139 | double P999() { 140 | if (values_.empty()) return 0; 141 | size_t pos = values_.size() * 999 / 1000; 142 | return (double)values_[pos]; 143 | } 144 | 145 | double PXX(int x, int y) { 146 | if (values_.empty()) return 0; 147 | size_t pos = values_.size() * x / y; 148 | return (double)values_[pos]; 149 | } 150 | 151 | void PrintResult() { 152 | if (values_.size() == 0) { 153 | fprintf(stderr, "%s: NO STAT\n", name_.c_str()); 154 | return; 155 | } 156 | Finallize(); 157 | fprintf(stderr, "%s:\n", name_.c_str()); 158 | fprintf(stderr, "\tCount: %zu\n", values_.size()); 159 | fprintf(stderr, "\tMin: %.6f\n", Min()); 160 | fprintf(stderr, "\tp50: %.6f\n", P50()); 161 | fprintf(stderr, "\tp99: %.6f\n", P99()); 162 | fprintf(stderr, "\tp995: %.6f\n", P995()); 163 | fprintf(stderr, "\tp999: %.6f\n", P999()); 164 | fprintf(stderr, "\tMax: %.6f\n", Max()); 165 | fprintf(stderr, "\tSum: %.6f\n", Sum()); 166 | } 167 | 168 | void dump_to_file(std::string fname, size_t dump_num) { 169 | if (dump_num < 100) { 170 | fprintf(stderr, "%s() dump_num %zu is too small\n", __FUNCTION__, dump_num); 171 | return; 172 | } 173 | 174 | if (!finallized_) { 175 | Finallize(); 176 | } 177 | 178 | if (dump_num > values_.size()) { 179 | dump_num = values_.size(); 180 | } 181 | 182 | int i = 0; 183 | int step = values_.size() / dump_num; 184 | std::string buf; 185 | for (int i = 0; i < values_.size(); i += step) { 186 | buf.append(std::to_string(values_[i])); 187 | buf.append("\n"); 188 | } 189 | 190 | // check the mas value. 191 | if (i - step != values_.size() - 1) { 192 | buf.append(std::to_string(values_[values_.size() - 1])); 193 | buf.append("\n"); 194 | } 195 | 196 | // open file and write buf. 197 | std::ofstream fout(fname); 198 | 199 | fout << buf; 200 | fout.close(); 201 | } 202 | 203 | std::vector values_; 204 | private: 205 | bool finallized_; 206 | 207 | std::string name_; 208 | #ifdef THREAD_SAFE_TIMER 209 | pthread_mutex_t mu_; 210 | #endif 211 | }; 212 | 213 | enum STATISTICS { 214 | DO_INSERT = 0, 215 | DO_SEARCH, 216 | DO_UPDATE, 217 | DO_SCAN, 218 | DO_DELETE, 219 | DO_SEARCH_LIST_1, 220 | DO_SEARCH_LIST_1_1, 221 | DO_SEARCH_LIST_1_2, 222 | DO_SEARCH_LIST_1_3, 223 | DO_SEARCH_LIST_2, 224 | DO_SEARCH_LIST_2_1, 225 | DO_SEARCH_LIST_2_2, 226 | DO_SEARCH_LIST_2_3, 227 | DO_SEARCH_BLOCK, 228 | DO_SEARCH_LEAF, 229 | DO_SEARCH_LEAF_PROBE, 230 | DO_INSERT_LEAF, 231 | DO_INSERT_LEAF_1, 232 | DO_INSERT_LEAF_2, 233 | DO_INSERT_LEAF_3, 234 | DO_INSERT_LEAF_4, 235 | DO_SPLIT_LAEF, 236 | DO_SPLIT_BLOCK, 237 | DO_UPDATE_AGG_KEYS, 238 | DO_SEARCH_AGG_KEYS, 239 | ELE_IN_LB, 240 | ELE_IN_LN, 241 | FLUSH_TIME, 242 | FFZ, 243 | 244 | MAX_NUM, 245 | }; 246 | 247 | static std::string STATISTICS_STRING[] = { 248 | "DO_INSERT", 249 | "DO_SEARCH", 250 | "DO_UPDATE", 251 | "DO_SCAN", 252 | "DO_DELETE", 253 | "DO_SEARCH_LIST_1", 254 | "DO_SEARCH_LIST_1_1", 255 | "DO_SEARCH_LIST_1_2", 256 | "DO_SEARCH_LIST_1_3", 257 | "DO_SEARCH_LIST_2", 258 | "DO_SEARCH_LIST_2_1", 259 | "DO_SEARCH_LIST_2_2", 260 | "DO_SEARCH_LIST_2_3", 261 | "DO_SEARCH_BLOCK", 262 | "DO_SEARCH_LEAF", 263 | "DO_SEARCH_LEAF_PROBE", 264 | "DO_INSERT_LEAF", 265 | "DO_INSERT_LEAF_1", 266 | "DO_INSERT_LEAF_2", 267 | "DO_INSERT_LEAF_3", 268 | "DO_INSERT_LEAF_4", 269 | "DO_SPLIT_LAEF", 270 | "DO_SPLIT_BLOCK", 271 | "DO_UPDATE_AGG_KEYS", 272 | "DO_SEARCH_AGG_KEYS", 273 | "ELE_IN_LB", 274 | "ELE_IN_LN", 275 | "FLUSH_TIME", 276 | "FFZ", 277 | 278 | "MAX_NUM" 279 | }; 280 | 281 | class HistogramSet { 282 | public: 283 | HistogramSet() { 284 | for (int i = 0; i < STATISTICS::MAX_NUM; ++i) { 285 | hist_set_.emplace_back(new Histogram(STATISTICS_STRING[i])); 286 | } 287 | } 288 | 289 | ~HistogramSet() { 290 | for (auto x : hist_set_) { 291 | delete x; 292 | } 293 | } 294 | 295 | void Clear(size_t pos) { 296 | hist_set_[(size_t)pos]->Clear(); 297 | } 298 | 299 | void Clear() { 300 | for (auto x : hist_set_) { 301 | x->Clear(); 302 | } 303 | } 304 | 305 | void AddNewHist(std::string name) { 306 | hist_set_.emplace_back(new Histogram(name)); 307 | } 308 | 309 | void Add(size_t pos, uint64_t t) { 310 | hist_set_[(size_t)pos]->Add(t); 311 | } 312 | 313 | void PrintResult(size_t pos) { 314 | hist_set_[(size_t)pos]->PrintResult(); 315 | } 316 | 317 | void PrintResult() { 318 | for (size_t i = 0; i < hist_set_.size(); ++i) { 319 | // fprintf(stderr, "%s:\n", STATISTICS_STRING[i].c_str()); 320 | hist_set_[i]->PrintResult(); 321 | fprintf(stderr, "\n"); 322 | } 323 | } 324 | 325 | 326 | private: 327 | std::vector hist_set_; 328 | }; 329 | 330 | 331 | enum COUNTER_STATS { 332 | FIND_HATBLE_NUM = 0, 333 | FIND_PLN_HTABLE_NUM, 334 | FIND_ISN_FROM_CACHE, 335 | FIND_SEQ_NUM, 336 | GOT_NOT_FOUND, 337 | GOT_WRONG_VALUE, 338 | CFLUSH_NUM, 339 | CFLUSH_SIZE, 340 | MFENCE_NUM, 341 | SCAN_ORDER_ERROR_NUM, 342 | UPDATE_FAILED, 343 | GOT_NOT_FOUND_AFTER_UPDATE, 344 | GOT_WRONG_VALUE_AFTER_UPDATE, 345 | DELETE_FAILED, 346 | GOT_FOUND_AFTER_DETELE, 347 | INSTALL_CMAP_RETRY, 348 | SEARCH_LIST_2_X1, 349 | SEARCH_LIST_2_X2, 350 | 351 | 352 | MAX_COUNTER_NUM, 353 | }; 354 | 355 | static std::string COUNTER_STATS_STRING[] = { 356 | "FIND_HATBLE_NUM", 357 | "FIND_PLN_HTABLE_NUM", 358 | "FIND_ISN_FROM_CACHE", 359 | "FIND_SEQ_NUM", 360 | "GOT_NOT_FOUND", 361 | "GOT_WRONG_VALUE", 362 | "CFLUSH_NUM", 363 | "CFLUSH_SIZE", 364 | "MFENCE_NUM", 365 | "SCAN_ORDER_ERROR_NUM", 366 | "UPDATE_FAILED", 367 | "GOT_NOT_FOUND_AFTER_UPDATE", 368 | "GOT_WRONG_VALUE_AFTER_UPDATE", 369 | "DELETE_FAILED", 370 | "GOT_FOUND_AFTER_DETELE", 371 | "INSTALL_CMAP_RETRY", 372 | "SEARCH_LIST_2_X1", 373 | "SEARCH_LIST_2_X2", 374 | 375 | "MAX_COUNTER_NUM" 376 | }; 377 | 378 | class CounterSet { 379 | public: 380 | CounterSet() { 381 | for (int i = 0; i < COUNTER_STATS::MAX_COUNTER_NUM; ++i) { 382 | counter_set_.emplace_back(new Counter(COUNTER_STATS_STRING[i])); 383 | } 384 | }; 385 | ~CounterSet() { 386 | for (auto x : counter_set_) { 387 | delete x; 388 | } 389 | }; 390 | 391 | void Clear() { 392 | for (auto x : counter_set_) { 393 | x->Clear(); 394 | } 395 | } 396 | 397 | void AddNewCounter(std::string name) { 398 | counter_set_.emplace_back(new Counter(name)); 399 | } 400 | 401 | void Add(size_t pos, uint64_t t = 1) { 402 | counter_set_[(size_t)pos]->Add(t); 403 | } 404 | 405 | void PrintResult() { 406 | for (size_t i = 0; i < counter_set_.size(); ++i) { 407 | counter_set_[i]->PrintResult(); 408 | fprintf(stderr, "\n"); 409 | } 410 | } 411 | 412 | private: 413 | std::vector counter_set_; 414 | }; 415 | 416 | -------------------------------------------------------------------------------- /source/PHAST.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "util.h" 3 | 4 | #define USE_PMDK 5 | #ifdef USE_PMDK 6 | #include 7 | #define PMEM_PATH "/mnt/pmem/PHAST/mempool" 8 | #define POOL_SIZE (10737418240ULL) // pool size : 10GB 9 | typedef struct SLOT_HEAD_ARRAY SHA; 10 | typedef struct LeafSkipGroup LSG; 11 | 12 | POBJ_LAYOUT_BEGIN(PHAST); 13 | POBJ_LAYOUT_ROOT(PHAST, SHA); 14 | POBJ_LAYOUT_TOID(PHAST, LSG) 15 | POBJ_LAYOUT_END(PHAST); 16 | #endif 17 | 18 | #define LIKELY(x) __builtin_expect((x), 1) 19 | #define UNLIKELY(x) __builtin_expect((x), 0) 20 | 21 | #define CACHE_LINE_SIZE 64 22 | #define MAX_U64_KEY 0xffffffffffffffffULL // max key in uint64_t 23 | 24 | #define HEAD_COUNT 128 // the number of partitions. 25 | #define HASH_KEY (MAX_U64_KEY / HEAD_COUNT) 26 | 27 | #define USE_AGG_KEYS // index cache design. 28 | #ifdef USE_AGG_KEYS 29 | #define AGG_UPDATE_LEVEL 1 // if the height of an InnerSkipNode > AGG_UPDATE_LEVEL, put this node into the index cache. 30 | #define AGG_SLOT_INIT_NUM 8 // the initial number of slots in the index cache 31 | #define AGG_REDUNDANT_SPACE 4 // redundant space in case overflow. 32 | class AGGIndex; 33 | #endif 34 | 35 | #define SPAN_TH 1 // for deterministic design of inner node 36 | 37 | #define MAX_ENTRY_NUM 56 // 56*1 (fingerprints) + 8 (bitmap) = 64 (cache line size) 38 | #define GROUP_BITMAP_FULL 0x00ffffffffffffffULL // MAX_ENTRY_NUM capacity. 2^56-1 39 | #define MAX_L 32 // max level of InnerSkipNode 40 | #define MAX_LEAF_CAPACITY 128 // the max size of InnerSkipNode 41 | #define MIN_LEAF_CAPACITY (MAX_LEAF_CAPACITY / 2) 42 | 43 | // for unsigned long long only. 44 | #define firstzero(x) __builtin_ffsll((~(x))) 45 | // for unsigned long long only. 46 | #define popcount1(x) __builtin_popcountll(x) 47 | 48 | typedef struct alignas(16) Entry 49 | { 50 | uint64_t key = 0; 51 | uint64_t value = 0; 52 | } Entry; 53 | 54 | typedef struct LeafSkipGroup 55 | { 56 | alignas(64) uint64_t commit_bitmap = 0; // control read access to LN. 57 | uint8_t fingerprints[MAX_ENTRY_NUM]; 58 | uint64_t max_key = 0; 59 | LeafSkipGroup *next; 60 | bool is_head; 61 | uint8_t pad[47]; 62 | alignas(16) Entry entries[MAX_ENTRY_NUM]; 63 | } LSG; 64 | 65 | typedef struct InnerSkipNode 66 | { 67 | uint64_t max_key; 68 | RWMutex *locker; 69 | #ifdef USE_AGG_KEYS // only head has agg_index. 70 | AGGIndex *agg_index; 71 | #endif 72 | uint16_t nKeys; 73 | bool is_head; 74 | bool is_split; // indicate LB/LN is split. 75 | uint8_t nLevel; 76 | uint8_t pad[3]; 77 | struct InnerSkipNode *next[MAX_L]; 78 | uint64_t keys[MAX_LEAF_CAPACITY]; 79 | LSG *leaves[MAX_LEAF_CAPACITY]; 80 | uint64_t mem_bitmap[MAX_LEAF_CAPACITY]; 81 | } ISN; 82 | 83 | #define new_node(n) ((ISN *)malloc(sizeof(ISN))) 84 | 85 | typedef struct InnerSkipList 86 | { 87 | uint8_t level[HEAD_COUNT]; 88 | ISN *head[HEAD_COUNT]; 89 | } ISL; 90 | 91 | typedef struct PHAST 92 | { 93 | ISL *inner_list; 94 | int size; 95 | } PHAST; 96 | 97 | typedef struct SLOT_HEAD_ARRAY 98 | { 99 | LSG *slot_head_array[HEAD_COUNT]; 100 | } SHA; 101 | 102 | ISN *create_inner_node(int level); 103 | 104 | PHAST *init_list(); 105 | 106 | //////////////////////////////////// 107 | // main functions 108 | //////////////////////////////////// 109 | 110 | // REQUIRES: key and value are not 0. 111 | // RETURN: true if succeeded. otherwise false. 112 | bool Insert(PHAST *list, uint64_t key, uint64_t value); 113 | 114 | // RETURN: value if succeeded. otherwise 0. 115 | uint64_t Search(PHAST *list, uint64_t key); 116 | 117 | void dram_free(PHAST *list); 118 | 119 | PHAST *recovery(int n_thread); 120 | 121 | // RETURN the old value if exist. 122 | uint64_t Update(PHAST *list, uint64_t key, uint64_t newValue); 123 | 124 | // delete key from inode, does not remove it, 125 | // just reset the value to indicate this entry has been deleted. 126 | // use MAX uint64_t as the indicator. 127 | // THUS: only use update function to do this instead of a new function. 128 | // RETURN the old value if exist. 129 | uint64_t Delete(PHAST *list, uint64_t key); 130 | 131 | // lock-free version. 132 | int Range_Search(PHAST *list, uint64_t start_key, int num, uint64_t *buf); 133 | 134 | //////////////////////////////////// 135 | 136 | // BRIEF: search key in skiplist and return the target inner node with 137 | // pre_nodes which is previous to the returned node in the search 138 | // path for the new inserted nodes's pre nodes, similarly next_nodes. 139 | // RETURN: header node if the inner_list is empty, otherwise the target node. 140 | ISN *SearchList(ISL *inner_list, uint64_t key, ISN *pre_nodes[], ISN *next_nodes[]); 141 | 142 | // BRIEF: same as previous one, but do not record the pre/next-nodes. 143 | ISN *SearchList(ISL *inner_list, uint64_t key, uint64_t *target_maxkey, bool lock = false); 144 | 145 | // BRIEF: the key is belong to a new inner node, this function is to find 146 | // the previous and next node according this key and the level. 147 | void FindUpdateNodeForLevel(uint64_t key, int level, 148 | ISN **pre_node, ISN **next_node); 149 | 150 | // REQUIRES: hold inode's read lock that make sure no split in accessing. 151 | // RETURN: 0 if succeeded. +1 if need get the target inode again. -1 if failed. 152 | int InsertIntoINode(ISN *inode, uint64_t key, uint64_t value, 153 | ISN *pre_nodes[], ISN *next_nodes[]); 154 | 155 | // BRIEF: used to install a new inner node / leaf block. 156 | void InstallNewInnerNode(ISN *old_in, ISN *new_in, 157 | ISN *pre_nodes[], ISN *next_nodes[]); 158 | 159 | // REQUIRES: hold inode's read lock that make sure no split in accessing. 160 | // BRIEF: thread safe if hold read lock. 161 | // RETURN: the target value. 0 if not found. 162 | uint64_t SearchINode(ISN *inode, uint64_t key); 163 | 164 | bool TryToGetWriteLock(ISN *inode, const bool is_split); 165 | 166 | int randomLevel(); 167 | 168 | void free_inner_list(PHAST *list); 169 | 170 | void nv_free(PHAST *list); 171 | 172 | int find_zero_bit(uint64_t x, uint16_t size); 173 | 174 | void update_agg_keys(ISN *head, const int head_idx); 175 | 176 | ISN *find_in_agg_keys(ISN *head, const uint64_t key, uint64_t *target_maxkey); 177 | 178 | void print_list_all(PHAST *list, uint64_t key); 179 | void print_list_all(PHAST *list); 180 | void print_list_all(ISN *header); 181 | void print_inode_and_next(ISN *node); 182 | void print_lnode_all(LSG *node); 183 | void print_lnode_all(LSG *node, uint64_t maxkey, const uint64_t bitmap); 184 | void print_lnode_and_next(LSG *node); 185 | void print_list_skeleton(PHAST *list); 186 | void print_list_skeleton(ISN *header); 187 | void print_mem_nvm_comsumption(PHAST *list); 188 | 189 | static void for_debug() 190 | { 191 | sleep(1); 192 | } 193 | 194 | #ifdef USE_AGG_KEYS 195 | // BRIEF: cannot be modified after construct. 196 | class AGGIndex 197 | { 198 | public: 199 | // REQUIRES: num must be larger than the number of expected nodes in this head. 200 | AGGIndex(ISN *head, const size_t num) 201 | : agg_cap(num), 202 | agg_num(0) 203 | { 204 | assert(head->is_head); 205 | agg_keys.reserve(agg_cap); 206 | agg_nodes.reserve(agg_cap); 207 | ISN *cursor = head->next[AGG_UPDATE_LEVEL]; 208 | 209 | while (cursor != NULL && !cursor->is_head) 210 | { 211 | agg_keys.push_back(cursor->max_key); 212 | agg_nodes.push_back(cursor); 213 | agg_num++; 214 | cursor = cursor->next[AGG_UPDATE_LEVEL]; 215 | } 216 | } 217 | 218 | ~AGGIndex() 219 | { 220 | agg_keys.clear(); 221 | agg_nodes.clear(); 222 | } 223 | 224 | inline size_t NewSize() const 225 | { 226 | return (agg_num + AGG_REDUNDANT_SPACE > agg_cap) ? (agg_cap * 2) : (agg_cap); 227 | } 228 | 229 | inline size_t Cap() const { return agg_cap; } 230 | 231 | // BRIEF: thread safe. 232 | // RETURN: return a inner node according the gaven key. 233 | // NULL if failed. 234 | ISN *Find(const uint64_t key, uint64_t *target_maxkey, bool debug_info = false) const 235 | { 236 | // binary search. 237 | int low = 0, high = agg_num, mid = 0; 238 | while (low < high) 239 | { 240 | if (key <= agg_keys[low]) 241 | { 242 | break; 243 | } 244 | mid = (low + high) / 2; 245 | // if (debug_info) fprintf(stderr, "key: %lu; low: %d; mid: %d; hig: %d; %lu, %lu, %lu\n", 246 | // key, low, mid, high, agg_keys[low], agg_keys[mid], agg_keys[high]); 247 | if (agg_keys[mid] > key) 248 | { 249 | high = mid; 250 | } 251 | else if (agg_keys[mid] < key) 252 | { 253 | low = mid + 1; 254 | } 255 | else 256 | { 257 | break; 258 | } 259 | } 260 | if (low > mid) 261 | { 262 | mid = low; 263 | } 264 | assert(mid <= agg_num); 265 | --mid; 266 | if (mid < 0) 267 | return NULL; 268 | *target_maxkey = agg_keys[mid]; 269 | // if (agg_nodes[mid] == NULL) { 270 | // for_debug(); 271 | // if (debug_info) exit(1); 272 | // fprintf(stderr, "%lu; %lu; %lu; %lu; %lu; %lu\n", key, agg_num, mid, agg_keys[mid-1], agg_keys[mid], agg_keys[mid+1]); 273 | // fprintf(stderr, "%p; %p; %p\n", agg_nodes[mid-1], agg_nodes[mid], agg_nodes[mid+1]); 274 | // for (size_t x = 0; x < agg_keys.size(); ++x) { 275 | // fprintf(stderr, "%d: %lu\n", x, agg_keys[x]); 276 | // } 277 | // fprintf(stderr, "\n"); 278 | // this->Find(key, target_maxkey, true); 279 | // exit(1); 280 | // } 281 | return agg_nodes[mid]; 282 | } 283 | 284 | // seq search. 285 | ISN *SeqFind(const uint64_t key, uint64_t *target_maxkey, bool debug_info = false) const 286 | { 287 | const size_t max = agg_num; 288 | size_t i = 0; 289 | if (max == 0) 290 | return NULL; 291 | for (i = 0; i < max; ++i) 292 | { 293 | if (key <= agg_keys[i]) 294 | { 295 | break; 296 | } 297 | } 298 | 299 | assert(i <= max); 300 | if (i == 0) 301 | { 302 | return NULL; 303 | } 304 | else 305 | { 306 | --i; 307 | } 308 | *target_maxkey = agg_keys[i]; 309 | return agg_nodes[i]; 310 | } 311 | 312 | public: 313 | const size_t agg_cap; // capacity of this array. 314 | size_t agg_num; // number of keys in the array. 315 | std::vector agg_keys; // agg keys 316 | std::vector agg_nodes; // corresponding inner nodes 317 | }; 318 | #endif 319 | -------------------------------------------------------------------------------- /test/simple_test.cc: -------------------------------------------------------------------------------- 1 | #include "PHAST.h" 2 | 3 | #define INSERT_NUM (5000000) // shared by threads. 4 | #define SEARCH_NUM (5000000) // shared by threads. 5 | #define MIX_INSERT_NUM (5000000) // shared by threads. 6 | #define MIX_SEARCH_NUM (5000000) // shared by threads. 7 | 8 | #define TEST_SCAN true // wether to test scan ,update and delete. 9 | #define TEST_RECOVERY false // wether to test recovery. 10 | // #define MIXED_WORKLOAD 11 | #define CHECK_AFTER_OPS 12 | 13 | #define SEQ_KEYS_ORDER false 14 | 15 | void clear_cache() 16 | { 17 | // Remove cache 18 | int size = 256 * 1024 * 1024; 19 | char *garbage = new char[size]; 20 | for (int i = 0; i < size; ++i) 21 | garbage[i] = i; 22 | for (int i = 100; i < size; ++i) 23 | garbage[i] += garbage[i - 100]; 24 | delete[] garbage; 25 | } 26 | 27 | void preformace_test(int n_threads, int num = 0) 28 | { 29 | fprintf(stderr, "/////////////////////////////////////////\n"); 30 | // fprintf(stderr, "the number of Agglevel is %d\n",AGG_UPDATE_LEVEL); 31 | fprintf(stderr, "PHAST(partition) : n_thread is: %d\n", n_threads); 32 | PHAST *list = init_list(); 33 | assert(list != NULL); 34 | 35 | if (num == 0) 36 | { 37 | num = (INSERT_NUM + SEARCH_NUM); 38 | } 39 | uint64_t t1; 40 | 41 | #ifdef CHECK_AFTER_OPS 42 | uint64_t chk_num = 0; 43 | uint64_t chk_num_not_find = 0; 44 | #endif 45 | 46 | ////////////////////////// 47 | // generate keys. 48 | ///////////////////////// 49 | uint64_t *keys = (uint64_t *)malloc(num * sizeof(uint64_t)); 50 | std::random_device rd; 51 | std::mt19937_64 eng(rd()); 52 | #if 0 53 | std::uniform_int_distribution uniform_dist; 54 | for (uint64_t i = 0; i < num;) 55 | { 56 | uint64_t x = uniform_dist(eng); 57 | if (x > 0 && x < MAX_U64_KEY) 58 | { 59 | keys[i++] = x; 60 | // keys[i++] = i; 61 | } 62 | else 63 | { 64 | continue; 65 | } 66 | } 67 | #else 68 | uint64_t step = MAX_U64_KEY / num; 69 | for (uint64_t i = 0; i < num; i++) 70 | keys[i] = i * step + 1; 71 | if (!SEQ_KEYS_ORDER) 72 | std::shuffle(keys, keys + num, eng); 73 | #endif 74 | 75 | /////////////////////////// 76 | //-----Warm up----- 77 | /////////////////////////// 78 | t1 = NowNanos(); 79 | fprintf(stderr, "single thread start warm up!\n"); 80 | for (uint64_t i = 0; i < num / 2; ++i) 81 | { 82 | Insert(list, keys[i], keys[i]); 83 | } 84 | 85 | // fprintf(stderr, "single thread finish warm up! %llu ns.\n", ElapsedNanos(t1)); 86 | 87 | /////////////////////////// 88 | // Multithreading 89 | ////////////////////////// 90 | std::vector> futures(n_threads); 91 | uint64_t data_per_thread = (num / 2) / n_threads; 92 | 93 | #ifndef MIXED_WORKLOAD 94 | /////////////////////////// 95 | //-----Test Insert----- 96 | /////////////////////////// 97 | fprintf(stderr, "/////////////////////////////////////////\n"); 98 | fprintf(stderr, "%d threads start insert\n", n_threads); 99 | clear_cache(); 100 | size_t seq_cursor = (num / 2) - 1; // add_and_fetch is a little faster than fetch_and_add. 101 | t1 = NowNanos(); 102 | 103 | for (int tid = 0; tid < n_threads; tid++) 104 | { 105 | int from = data_per_thread * tid; 106 | int to = (tid == n_threads - 1) ? num / 2 : from + data_per_thread; 107 | 108 | auto f = std::async( 109 | std::launch::async, 110 | [&list, &keys, &num, &seq_cursor](int from, int to, int tid) 111 | { 112 | if (SEQ_KEYS_ORDER) 113 | { 114 | size_t i = __sync_add_and_fetch(&seq_cursor, 1); 115 | while (i < num) 116 | { 117 | Insert(list, keys[i], keys[i]); 118 | i = __sync_add_and_fetch(&seq_cursor, 1); 119 | } 120 | } 121 | else 122 | { 123 | for (int i = from + num / 2; i < to + num / 2; ++i) 124 | Insert(list, keys[i], keys[i]); 125 | } 126 | }, 127 | from, to, tid); 128 | futures.push_back(move(f)); 129 | } 130 | for (auto &&f : futures) 131 | if (f.valid()) 132 | f.get(); 133 | fprintf(stderr, "%d threads insert time cost is %llu ns.\n", n_threads, ElapsedNanos(t1)); 134 | 135 | 136 | ///////////////////////////// 137 | //-----Test Search----- 138 | //////////////////////////// 139 | fprintf(stderr, "/////////////////////////////////////////\n"); 140 | fprintf(stderr, "%d threads start search\n", n_threads); 141 | clear_cache(); 142 | futures.clear(); 143 | t1 = NowNanos(); 144 | #ifdef CHECK_AFTER_OPS 145 | chk_num = 0; 146 | #endif 147 | 148 | for (int tid = 0; tid < n_threads; tid++) 149 | { 150 | int from = data_per_thread * tid; 151 | int to = (tid == n_threads - 1) ? num / 2 : from + data_per_thread; 152 | 153 | #ifdef CHECK_AFTER_OPS 154 | auto f = std::async( 155 | std::launch::async, 156 | [&list, &keys, &num, &chk_num](int from, int to, int tid) 157 | { 158 | for (int i = from + num / 2; i < to + num / 2; ++i) 159 | { 160 | if (Search(list, keys[i]) != keys[i]) 161 | { 162 | __sync_fetch_and_add(&chk_num, 1); 163 | } 164 | } 165 | }, 166 | from, to, tid); 167 | #else 168 | auto f = std::async( 169 | std::launch::async, 170 | [&list, &keys, &num](int from, int to, int tid) 171 | { 172 | for (int i = from + num / 2; i < to + num / 2; ++i) 173 | Search(list, keys[i]); 174 | }, 175 | from, to, tid); 176 | #endif 177 | futures.push_back(move(f)); 178 | } 179 | for (auto &&f : futures) 180 | if (f.valid()) 181 | f.get(); 182 | fprintf(stderr, "%d threads search time cost is %llu ns.\n", n_threads, ElapsedNanos(t1)); 183 | #ifdef CHECK_AFTER_OPS 184 | fprintf(stderr, "%llu/%llu (%.2f%%) wrong search results\n", chk_num, num / 2, 185 | ((float)chk_num) / (float)(num / 2)); 186 | #endif 187 | 188 | #else 189 | ///////////////////////////// 190 | //-----Test Mixed----- 191 | //////////////////////////// 192 | fprintf(stderr, "/////////////////////////////////////////\n"); 193 | fprintf(stderr, "start mixed!\n"); 194 | clear_cache(); 195 | futures.clear(); 196 | uint64_t half_num_data = num / 2; 197 | size_t seq_cursor = (num / 2) - 1; // add_and_fetch is a little faster than fetch_and_add. 198 | t1 = NowNanos(); 199 | #ifdef CHECK_AFTER_OPS 200 | chk_num = 0; 201 | #endif 202 | 203 | for (int tid = 0; tid < n_threads; tid++) 204 | { 205 | int from = half_num_data + data_per_thread * tid; 206 | int to = (tid == n_threads - 1) ? num : from + data_per_thread; 207 | 208 | #ifdef CHECK_AFTER_OPS 209 | auto f = std::async( 210 | std::launch::async, 211 | [&list, &keys, &half_num_data, &chk_num, &chk_num_not_find](int from, int to, int tid) 212 | { 213 | for (int i = from; i < to; ++i) 214 | { 215 | int sidx = i - half_num_data; 216 | int jid = i % 4; 217 | uint64_t res = 0; 218 | switch (jid) 219 | { 220 | case 0: 221 | Insert(list, keys[i], keys[i]); 222 | for (int j = 0; j < 4; j++) 223 | { 224 | res = Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 225 | if (res != keys[(sidx + j + jid * 8) % half_num_data]) 226 | { 227 | __sync_fetch_and_add(&chk_num, 1); 228 | } 229 | if (res == 0) 230 | { 231 | __sync_fetch_and_add(&chk_num_not_find, 1); 232 | } 233 | } 234 | break; 235 | case 1: 236 | for (int j = 0; j < 3; j++) 237 | { 238 | res = Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 239 | if (res != keys[(sidx + j + jid * 8) % half_num_data]) 240 | { 241 | __sync_fetch_and_add(&chk_num, 1); 242 | } 243 | if (res == 0) 244 | { 245 | __sync_fetch_and_add(&chk_num_not_find, 1); 246 | } 247 | } 248 | Insert(list, keys[i], keys[i]); 249 | res = Search(list, keys[(sidx + 3 + jid * 8) % half_num_data]); 250 | if (res != keys[(sidx + 3 + jid * 8) % half_num_data]) 251 | { 252 | __sync_fetch_and_add(&chk_num, 1); 253 | } 254 | if (res == 0) 255 | { 256 | __sync_fetch_and_add(&chk_num_not_find, 1); 257 | } 258 | break; 259 | case 2: 260 | for (int j = 0; j < 2; j++) 261 | { 262 | res = Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 263 | if (res != keys[(sidx + j + jid * 8) % half_num_data]) 264 | { 265 | __sync_fetch_and_add(&chk_num, 1); 266 | } 267 | if (res == 0) 268 | { 269 | __sync_fetch_and_add(&chk_num_not_find, 1); 270 | } 271 | } 272 | Insert(list, keys[i], keys[i]); 273 | for (int j = 2; j < 4; j++) 274 | { 275 | res = Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 276 | if (res != 277 | keys[(sidx + j + jid * 8) % half_num_data]) 278 | { 279 | __sync_fetch_and_add(&chk_num, 1); 280 | } 281 | if (res == 0) 282 | { 283 | __sync_fetch_and_add(&chk_num_not_find, 1); 284 | } 285 | } 286 | break; 287 | case 3: 288 | for (int j = 0; j < 4; j++) 289 | { 290 | res = Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 291 | if (res != 292 | keys[(sidx + j + jid * 8) % half_num_data]) 293 | { 294 | __sync_fetch_and_add(&chk_num, 1); 295 | } 296 | if (res == 0) 297 | { 298 | __sync_fetch_and_add(&chk_num_not_find, 1); 299 | } 300 | } 301 | Insert(list, keys[i], keys[i]); 302 | break; 303 | default: 304 | break; 305 | } 306 | } 307 | }, 308 | from, to, tid); 309 | #else 310 | auto f = std::async( 311 | std::launch::async, 312 | [&list, &keys, &half_num_data, &num, &seq_cursor](int from, int to, int tid) 313 | { 314 | if (SEQ_KEYS_ORDER) 315 | { 316 | size_t i = __sync_add_and_fetch(&seq_cursor, 1); 317 | while (i < num) 318 | { 319 | int jid = i % 4; 320 | switch (jid) 321 | { 322 | case 0: 323 | Insert(list, keys[i], keys[i]); 324 | for (int j = 0; j < 4; j++) 325 | { 326 | Search(list, keys[(i - j - jid * 8)]); 327 | } 328 | break; 329 | case 1: 330 | for (int j = 0; j < 3; j++) 331 | { 332 | Search(list, keys[(i - j - jid * 8)]); 333 | } 334 | Insert(list, keys[i], keys[i]); 335 | Search(list, keys[(i - 3 - jid * 8)]); 336 | break; 337 | case 2: 338 | for (int j = 0; j < 2; j++) 339 | { 340 | Search(list, keys[(i - j - jid * 8)]); 341 | } 342 | Insert(list, keys[i], keys[i]); 343 | for (int j = 2; j < 4; j++) 344 | { 345 | Search(list, keys[(i - j - jid * 8)]); 346 | } 347 | break; 348 | case 3: 349 | for (int j = 0; j < 4; j++) 350 | { 351 | Search(list, keys[(i - j - jid * 8)]); 352 | } 353 | Insert(list, keys[i], keys[i]); 354 | break; 355 | default: 356 | break; 357 | } 358 | i = __sync_add_and_fetch(&seq_cursor, 1); 359 | } 360 | } 361 | else 362 | { 363 | for (int i = from; i < to; ++i) 364 | { 365 | int sidx = i - half_num_data; 366 | 367 | int jid = i % 4; 368 | switch (jid) 369 | { 370 | case 0: 371 | Insert(list, keys[i], keys[i]); 372 | for (int j = 0; j < 4; j++) 373 | { 374 | Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 375 | } 376 | break; 377 | case 1: 378 | for (int j = 0; j < 3; j++) 379 | { 380 | Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 381 | } 382 | Insert(list, keys[i], keys[i]); 383 | Search(list, keys[(sidx + 3 + jid * 8) % half_num_data]); 384 | break; 385 | case 2: 386 | for (int j = 0; j < 2; j++) 387 | { 388 | Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 389 | } 390 | Insert(list, keys[i], keys[i]); 391 | for (int j = 2; j < 4; j++) 392 | { 393 | Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 394 | } 395 | break; 396 | case 3: 397 | for (int j = 0; j < 4; j++) 398 | { 399 | Search(list, keys[(sidx + j + jid * 8) % half_num_data]); 400 | } 401 | Insert(list, keys[i], keys[i]); 402 | break; 403 | default: 404 | break; 405 | } 406 | } 407 | } 408 | }, 409 | from, to, tid); 410 | #endif 411 | futures.push_back(move(f)); 412 | } 413 | 414 | for (auto &&f : futures) 415 | if (f.valid()) 416 | f.get(); 417 | 418 | fprintf(stderr, "%d threads mixed time cost is %llu\n", n_threads, ElapsedNanos(t1)); 419 | #ifdef CHECK_AFTER_OPS 420 | fprintf(stderr, "%llu/%llu (%.2f%%) wrong search results\n", chk_num, num * 2, 421 | ((float)chk_num) / (float)(num * 2)); 422 | fprintf(stderr, "%llu/%llu (%.2f%%) search not find\n", chk_num_not_find, num * 2, 423 | ((float)chk_num_not_find) / (float)(num * 2)); 424 | #endif 425 | #endif 426 | 427 | #if TEST_SCAN 428 | /////////////////////////// 429 | //-----Test Scan----- 430 | /////////////////////////// 431 | fprintf(stderr, "/////////////////////////////////////////\n"); 432 | fprintf(stderr, "%d threads start scan\n", n_threads); 433 | clear_cache(); 434 | t1 = NowNanos(); 435 | 436 | for (int tid = 0; tid < n_threads; tid++) 437 | { 438 | uint64_t from = data_per_thread * tid; 439 | uint64_t to = (tid == n_threads - 1) ? num / 2 : from + data_per_thread; 440 | 441 | auto f = std::async( 442 | std::launch::async, 443 | [&list, &keys, &num](uint64_t from, uint64_t to, int tid) 444 | { 445 | uint64_t scan_buf[51]; 446 | for (uint64_t i = from + num / 2; i < to + num / 2; ++i) 447 | { 448 | Range_Search(list, keys[i], 50, scan_buf); 449 | } 450 | }, 451 | from, to, tid); 452 | futures.push_back(move(f)); 453 | } 454 | for (auto &&f : futures) 455 | if (f.valid()) 456 | f.get(); 457 | fprintf(stderr, "%d threads scan time cost is %llu ns.\n", n_threads, ElapsedNanos(t1)); 458 | 459 | /////////////////////////// 460 | //-----Test Update----- 461 | /////////////////////////// 462 | fprintf(stderr, "/////////////////////////////////////////\n"); 463 | fprintf(stderr, "%d threads start update\n", n_threads); 464 | clear_cache(); 465 | t1 = NowNanos(); 466 | 467 | for (int tid = 0; tid < n_threads; tid++) 468 | { 469 | uint64_t from = data_per_thread * tid; 470 | uint64_t to = (tid == n_threads - 1) ? num / 2 : from + data_per_thread; 471 | 472 | auto f = std::async( 473 | std::launch::async, 474 | [&list, &keys, &num](uint64_t from, uint64_t to, int tid) 475 | { 476 | for (uint64_t i = from + num / 2; i < to + num / 2; ++i) 477 | { 478 | Update(list, keys[i], keys[i] + 1); 479 | } 480 | }, 481 | from, to, tid); 482 | futures.push_back(move(f)); 483 | } 484 | for (auto &&f : futures) 485 | if (f.valid()) 486 | f.get(); 487 | fprintf(stderr, "%d threads update time cost is %llu ns.\n", n_threads, ElapsedNanos(t1)); 488 | 489 | /////////////////////////// 490 | //-----Test Delete----- 491 | /////////////////////////// 492 | fprintf(stderr, "/////////////////////////////////////////\n"); 493 | fprintf(stderr, "%d threads start delete\n", n_threads); 494 | clear_cache(); 495 | t1 = NowNanos(); 496 | 497 | for (int tid = 0; tid < n_threads; tid++) 498 | { 499 | uint64_t from = data_per_thread * tid; 500 | uint64_t to = (tid == n_threads - 1) ? num / 2 : from + data_per_thread; 501 | 502 | auto f = std::async( 503 | std::launch::async, 504 | [&list, &keys, &num](uint64_t from, uint64_t to, int tid) 505 | { 506 | for (uint64_t i = from + num / 2; i < to + num / 2; ++i) 507 | { 508 | Delete(list, keys[i]); 509 | } 510 | }, 511 | from, to, tid); 512 | futures.push_back(move(f)); 513 | } 514 | for (auto &&f : futures) 515 | if (f.valid()) 516 | f.get(); 517 | fprintf(stderr, "%d threads delete time cost is %llu ns.\n", n_threads, ElapsedNanos(t1)); 518 | 519 | #endif 520 | 521 | #if TEST_RECOVERY 522 | /////////////////////////////////////////// 523 | //-----Test recovery----- 524 | ////////////////////////////////////////// 525 | for (int i = 0; i < 6; i++) 526 | { 527 | int n_threads = std::pow(2, i); 528 | fprintf(stderr, "/////////////////////////////////////////\n"); 529 | fprintf(stderr, "%d threads start recovery\n", n_threads); 530 | dram_free(list); 531 | clear_cache(); 532 | t1 = NowNanos(); 533 | list = recovery(n_threads); 534 | fprintf(stderr, "%d threads recovery time cost is %llu ns.\n", n_threads, ElapsedNanos(t1)); 535 | } 536 | ///////////////////////////// 537 | //-----Test Search(after recovery)----- 538 | //////////////////////////// 539 | fprintf(stderr, "/////////////////////////////////////////\n"); 540 | fprintf(stderr, "%d threads start search\n", n_threads); 541 | clear_cache(); 542 | futures.clear(); 543 | t1 = NowNanos(); 544 | #ifdef CHECK_AFTER_OPS 545 | chk_num = 0; 546 | #endif 547 | 548 | for (int tid = 0; tid < n_threads; tid++) 549 | { 550 | int from = data_per_thread * tid; 551 | int to = (tid == n_threads - 1) ? num / 2 : from + data_per_thread; 552 | 553 | #ifdef CHECK_AFTER_OPS 554 | auto f = std::async( 555 | std::launch::async, 556 | [&list, &keys, &num, &chk_num](int from, int to, int tid) 557 | { 558 | for (int i = from + num / 2; i < to + num / 2; ++i) 559 | { 560 | if (Search(list, keys[i]) != keys[i]) 561 | { 562 | __sync_fetch_and_add(&chk_num, 1); 563 | } 564 | } 565 | }, 566 | from, to, tid); 567 | #else 568 | auto f = std::async( 569 | std::launch::async, 570 | [&list, &keys, &num](int from, int to, int tid) 571 | { 572 | for (int i = from + num / 2; i < to + num / 2; ++i) 573 | Search(list, keys[i]); 574 | }, 575 | from, to, tid); 576 | #endif 577 | futures.push_back(move(f)); 578 | } 579 | for (auto &&f : futures) 580 | if (f.valid()) 581 | f.get(); 582 | fprintf(stderr, "%d threads search time cost is %llu ns.\n", n_threads, ElapsedNanos(t1)); 583 | #ifdef CHECK_AFTER_OPS 584 | fprintf(stderr, "%llu/%llu (%.2f%%) wrong search results\n", chk_num, num / 2, 585 | ((float)chk_num) / (float)(num / 2)); 586 | #endif 587 | 588 | #endif 589 | delete list; 590 | free(keys); 591 | } 592 | 593 | int main(int argc, char **argv) 594 | { 595 | if (argc != 2) 596 | { 597 | fprintf(stderr, "The parameter numThread is required!\n"); 598 | return 0; 599 | } 600 | 601 | int num_thread = atoi(argv[1]); 602 | preformace_test(num_thread); 603 | return 0; 604 | } 605 | -------------------------------------------------------------------------------- /source/PHAST.cc: -------------------------------------------------------------------------------- 1 | #include "PHAST.h" 2 | 3 | #ifdef USE_PMDK 4 | PMEMobjpool *pop; // global pmemobj pool 5 | #endif 6 | 7 | inline LSG *AllocNewLeafNode() 8 | { 9 | TOID(LSG) 10 | leaf = TOID_NULL(LSG); 11 | POBJ_ZNEW(pop, &leaf, LSG); 12 | if (TOID_IS_NULL(leaf)) 13 | { 14 | fprintf(stderr, "failed to create a LSG in nvmm.\n"); 15 | exit(0); 16 | } 17 | 18 | return D_RW(leaf); 19 | } 20 | 21 | ISN *create_inner_node(int level) 22 | { 23 | ISN *p = new_node(level); 24 | if (!p) 25 | return NULL; 26 | p->locker = new RWMutex; 27 | p->locker->Init(); 28 | p->max_key = 0; 29 | p->is_head = false; 30 | #ifdef USE_AGG_KEYS 31 | p->agg_index = NULL; 32 | #endif 33 | p->is_split = false; 34 | p->nLevel = level; 35 | for (int i = 0; i <= level; i++) 36 | { 37 | p->next[i] = NULL; 38 | } 39 | p->nKeys = 0; 40 | for (int i = 0; i < MAX_LEAF_CAPACITY; ++i) 41 | { 42 | p->mem_bitmap[i] = 0; 43 | } 44 | return p; 45 | } 46 | 47 | static inline int file_exists(const char *filename) 48 | { 49 | struct stat buffer; 50 | return stat(filename, &buffer); 51 | } 52 | 53 | ISL *create_inner_list() 54 | { 55 | ISL *list = (ISL *)malloc(sizeof(ISL)); 56 | if (list == NULL) 57 | return NULL; 58 | 59 | /* force-disable SDS feature during pool creation*/ 60 | int sds_write_value = 0; 61 | pmemobj_ctl_set(NULL, "sds.at_create", &sds_write_value); 62 | 63 | // create pmemobj pool and create the root 64 | 65 | if (file_exists(PMEM_PATH) != 0) 66 | { 67 | printf("create new one.\n"); 68 | if ((pop = pmemobj_create(PMEM_PATH, "PHAST", POOL_SIZE, 0666)) == NULL) 69 | { 70 | perror("failed to create pool.\n"); 71 | return NULL; 72 | } 73 | } 74 | else 75 | { 76 | printf("open existing one.\n"); 77 | if ((pop = pmemobj_open(PMEM_PATH, POBJ_LAYOUT_NAME(PHAST))) == NULL) 78 | { 79 | perror("failed to open pool.\n"); 80 | return NULL; 81 | } 82 | } 83 | 84 | TOID(SHA) 85 | root = POBJ_ROOT(pop, SHA); 86 | assert(!TOID_IS_NULL(root)); 87 | 88 | // init multiple header. 89 | ISN *head = NULL; 90 | for (int i = 0; i < HEAD_COUNT; i++) 91 | { 92 | head = create_inner_node(0); 93 | if (head == NULL) 94 | { 95 | fprintf(stderr, "Memory allocation failed for head!"); 96 | free(list); 97 | return NULL; 98 | } 99 | head->is_head = true; 100 | list->head[i] = head; 101 | list->level[i] = 0; 102 | 103 | // create the first inner node for this head. 104 | ISN *node = create_inner_node(0); 105 | assert(node); 106 | head->next[0] = node; 107 | 108 | // set the max key as the upper bound of this head. 109 | if (UNLIKELY(i == HEAD_COUNT - 1)) 110 | { 111 | node->max_key = MAX_U64_KEY; 112 | } 113 | else 114 | { 115 | node->max_key = (i + 1) * HASH_KEY; 116 | } 117 | 118 | // create the first leaf node for this inner node. 119 | LSG *slot = AllocNewLeafNode(); 120 | slot->is_head = true; // is the first slot in this inner node. 121 | slot->max_key = node->max_key; 122 | node->nKeys = 1; 123 | node->keys[0] = node->max_key; 124 | node->leaves[0] = slot; 125 | 126 | // link the root and the first leaf node. 127 | D_RW(root)->slot_head_array[i] = slot; 128 | 129 | // L1: head[2] -> head[3] -> ... -> head[X] -> NULL 130 | // L0: head[2] -> IN -> head[3] -> ... -> head[X] -> IN -> NULL 131 | // PM: leafnode[2] -> leafnode[3] -> leafnode[x] -> NULL 132 | if (i > 0) 133 | { 134 | for (int j = 1; j < MAX_L; j++) 135 | { 136 | list->head[i - 1]->next[j] = list->head[i]; 137 | } 138 | list->head[i - 1]->next[0]->next[0] = head; 139 | list->head[i - 1]->next[0]->leaves[0]->next = slot; 140 | } 141 | else if (i == HEAD_COUNT - 1) 142 | { 143 | for (int j = 1; j < MAX_L; j++) 144 | { 145 | list->head[i]->next[j] = NULL; 146 | } 147 | list->head[i]->next[0]->next[0] = NULL; 148 | list->head[i]->next[0]->leaves[0]->next = NULL; 149 | } 150 | 151 | #ifdef USE_AGG_KEYS 152 | head->agg_index = new AGGIndex(head, AGG_SLOT_INIT_NUM); 153 | #endif 154 | } 155 | // the last head's max key is +INF; 156 | 157 | return list; 158 | } 159 | 160 | PHAST *init_list() 161 | { 162 | PHAST *list = (PHAST *)malloc(sizeof(PHAST)); 163 | if (list == NULL) 164 | return NULL; 165 | list->size = 0; 166 | list->inner_list = create_inner_list(); 167 | if (list->inner_list == NULL) 168 | return NULL; 169 | srand(time(0)); 170 | 171 | return list; 172 | } 173 | 174 | // RETURN: [0, size) if succeeded, size if failed. 175 | inline int find_zero_bit(uint64_t x, uint16_t size) 176 | { 177 | int ret = firstzero(x); 178 | if (ret == 0) 179 | return size; 180 | return (ret - 1); 181 | } 182 | 183 | uint64_t sl_hash(uint64_t key) 184 | { 185 | key = (~key) + (key << 21); 186 | key = key ^ (key >> 24); 187 | key = (key + (key << 3)) + (key << 8); 188 | key = key ^ (key >> 14); 189 | key = (key + (key << 2)) + (key << 4); 190 | key = key ^ (key >> 28); 191 | key = key + (key << 31); 192 | return key; 193 | } 194 | 195 | static inline uint8_t hashcode1B(uint64_t x) 196 | { 197 | x ^= x >> 32; 198 | x ^= x >> 16; 199 | x ^= x >> 8; 200 | return (uint8_t)(x & 0x0ffULL); 201 | } 202 | 203 | static inline uint8_t hashcode_v1(uint64_t x) 204 | { 205 | return (uint8_t)(x & 0x0ffULL); 206 | } 207 | 208 | uint8_t f_hash(uint64_t key) 209 | { 210 | uint8_t hash_key = sl_hash(key) % 256; 211 | return hash_key; 212 | } 213 | 214 | void insertion_sort_entry(Entry *base, int num) 215 | { 216 | int i, j; 217 | Entry temp; 218 | 219 | for (i = 1; i < num; i++) 220 | { 221 | for (j = i; j > 0; j--) 222 | { 223 | if (base[j - 1].key > base[j].key) 224 | { 225 | temp.key = base[j - 1].key; 226 | temp.value = base[j - 1].value; 227 | base[j - 1].key = base[j].key; 228 | base[j - 1].value = base[j].value; 229 | base[j].key = temp.key; 230 | base[j].value = temp.value; 231 | } 232 | else 233 | break; 234 | } 235 | } 236 | } 237 | 238 | // select [s, e] includes start and end elements. 239 | void quick_select(Entry *entries, int k, int s, int e) 240 | { 241 | if (s >= e) 242 | return; 243 | int i = s, j = e; 244 | Entry tmp = entries[s]; 245 | while (i != j) 246 | { 247 | while (i < j && entries[j].key >= tmp.key) 248 | j--; 249 | if (i < j) 250 | { 251 | entries[i].key = entries[j].key; 252 | entries[i].value = entries[j].value; 253 | } 254 | while (i < j && entries[i].key <= tmp.key) 255 | i++; 256 | if (i < j) 257 | { 258 | entries[j].key = entries[i].key; 259 | entries[j].value = entries[i].value; 260 | } 261 | } 262 | entries[i] = tmp; 263 | if (i == k - 1) 264 | return; 265 | else if (i > k - 1) 266 | quick_select(entries, k, s, i - 1); 267 | else 268 | quick_select(entries, k, i + 1, e); 269 | } 270 | 271 | void quick_select_index(const Entry *entries, int *index, int k, int s, int e) 272 | { 273 | if (s >= e) 274 | return; 275 | int i = s, j = e; 276 | int tmp = index[s]; 277 | while (i != j) 278 | { 279 | while (i < j && entries[index[j]].key >= entries[tmp].key) 280 | j--; 281 | if (i < j) 282 | { 283 | index[i] = index[j]; 284 | } 285 | while (i < j && entries[index[i]].key <= entries[tmp].key) 286 | i++; 287 | if (i < j) 288 | { 289 | index[j] = index[i]; 290 | } 291 | } 292 | index[i] = tmp; 293 | if (i == k - 1) 294 | return; 295 | else if (i > k - 1) 296 | quick_select_index(entries, index, k, s, i - 1); 297 | else 298 | quick_select_index(entries, index, k, i + 1, e); 299 | } 300 | 301 | static inline int binary_search(ISN *node, uint64_t key) 302 | { 303 | int low = 0, mid = 0; 304 | int high = node->nKeys; 305 | if (high == 0) 306 | return 0; 307 | while (low < high) 308 | { 309 | if (key <= node->keys[low]) 310 | return low; 311 | mid = (low + high) / 2; 312 | if (node->keys[mid] > key) 313 | { 314 | high = mid; 315 | } 316 | else if (node->keys[mid] < key) 317 | { 318 | low = mid + 1; 319 | } 320 | else 321 | { 322 | break; 323 | } 324 | } 325 | if (low > mid) 326 | mid = low; 327 | return mid; 328 | } 329 | 330 | static inline int seq_search(ISN *node, uint64_t key) 331 | { 332 | int pos = 0, high = node->nKeys; 333 | for (pos = 0; pos < high; ++pos) 334 | { 335 | if (key <= node->keys[pos]) 336 | { 337 | return pos; 338 | } 339 | } 340 | return pos - 1; 341 | } 342 | 343 | bool TryToGetWriteLock(ISN *inode, const bool is_split) 344 | { 345 | if (__sync_bool_compare_and_swap(&(inode->is_split), 346 | false, true)) 347 | { 348 | inode->locker->ReadUnlock(); 349 | inode->locker->WriteLock(); 350 | return true; 351 | } 352 | inode->locker->ReadUnlock(); 353 | return false; 354 | } 355 | 356 | ISN *SearchList(ISL *inner_list, uint64_t key, 357 | ISN *pre_nodes[], ISN *next_nodes[]) 358 | { 359 | 360 | // pre->max_key < key <= next->max_key if it is not head. 361 | int head_idx = key / HASH_KEY; 362 | ISN *pre = inner_list->head[head_idx], *next = NULL, *target = NULL; 363 | assert(pre != NULL); 364 | if (head_idx < HEAD_COUNT - 1) 365 | { 366 | next = inner_list->head[head_idx + 1]; 367 | } 368 | 369 | int height = pre->nLevel; 370 | assert(height >= 0 && height < MAX_L); 371 | uint64_t pre_maxkey, next_maxkey; 372 | ISN *starter = NULL; 373 | ISN *starter_next = NULL; 374 | int span = 0; 375 | 376 | for (int i = 0; i < MAX_L + 1; ++i) 377 | { 378 | pre_nodes[i] = pre; // this is the current head. 379 | next_nodes[i] = next; // this is the next head. 380 | } 381 | 382 | // search from top to bottom level. 383 | for (int level = height; level >= 0; --level) 384 | { 385 | span = 0; 386 | starter = pre; 387 | next = pre->next[level]; 388 | pre_maxkey = pre->max_key; 389 | next_maxkey = (next && !next->is_head) ? next->max_key : 0; 390 | // next_maxkey == 0 means we reached the next head or the tail. 391 | 392 | level_retry: 393 | while (next_maxkey && next_maxkey < key) 394 | { 395 | // pre->max_key < next->max_key < key <= ... 396 | // move to the next node in the same level. 397 | #if 1 398 | span++; 399 | if (span > SPAN_TH && level == next->nLevel) 400 | { 401 | // do something,now we have the read lock of next_node; 402 | if (level < MAX_L - 1) 403 | { // fail a: level >=MAX_L-1 ,pass 404 | if (__sync_bool_compare_and_swap(&next->nLevel, level, level + 1)) 405 | { // fail b:other thread increase the level,pass 406 | // update the the link; 407 | starter_next = starter->next[level + 1]; 408 | next->next[level + 1] = starter_next; 409 | bool success_flag = false; 410 | while (starter_next == NULL || starter_next->is_head || starter_next->max_key > next_maxkey) 411 | { // fail c:there is a node growth between starter and next,pass 412 | if (__sync_bool_compare_and_swap(&starter->next[level + 1], starter_next, next)) 413 | { 414 | // update the head's level;if fail,other increase the head's level,pass 415 | if (level + 1 > height && level + 1 < MAX_L) 416 | __sync_bool_compare_and_swap(&inner_list->head[head_idx]->nLevel, height, level + 1); 417 | success_flag = true; 418 | break; // success,deterministic design finished!; 419 | } 420 | starter_next = starter->next[level + 1]; 421 | next->next[level + 1] = starter_next; 422 | } 423 | if (success_flag == false) 424 | __sync_bool_compare_and_swap(&next->nLevel, level + 1, level); // reset the level; 425 | #ifdef USE_AGG_KEYS 426 | if (next->nLevel == AGG_UPDATE_LEVEL) 427 | { 428 | update_agg_keys(inner_list->head[head_idx], head_idx); 429 | } 430 | #endif 431 | } 432 | } 433 | span = 0; 434 | starter = next; 435 | } 436 | #endif 437 | pre = next; 438 | next = pre->next[level]; 439 | pre_maxkey = pre->max_key; 440 | next_maxkey = (next && !next->is_head) ? next->max_key : 0; 441 | } 442 | 443 | // pre->max_key < key <= next->max_key. 444 | pre_nodes[level] = pre; 445 | next_nodes[level] = next; 446 | } 447 | 448 | if (next_maxkey == 0) 449 | { 450 | // means next == NULL or next is head which indicate pre's max_key is 451 | // changed due to split and the new node has not installed to the list 452 | // yet which causes pre's max_key != head's upper bound. 453 | // this issue will be processed in check again if statement. 454 | target = pre; 455 | pre_nodes[0] = pre; 456 | next_nodes[0] = next; 457 | } 458 | else 459 | { 460 | // reset pre_nodes and next_nodes in level 0. 461 | // target = pre_nodes[0] and target'max_key >= key. 462 | assert(next != NULL && !next->is_head); 463 | target = next; 464 | pre_nodes[0] = next; 465 | next_nodes[0] = next->next[0]; 466 | } 467 | 468 | // obtain the read lock of the target node. 469 | target->locker->ReadLock(); 470 | 471 | // check again in case target was splitting before get the read lock. 472 | if (next_maxkey != target->max_key || target->max_key < key) 473 | { 474 | while (target->max_key < key) 475 | { 476 | ISN *tmp = target; 477 | target = target->next[0]; 478 | 479 | assert(target && !target->is_head); 480 | 481 | target->locker->ReadLock(); 482 | tmp->locker->ReadUnlock(); 483 | } 484 | // got the right bottom level node. 485 | assert(target != NULL && !target->is_head); 486 | pre_nodes[0] = target; 487 | next_nodes[0] = target->next[0]; 488 | } 489 | 490 | return target; 491 | } 492 | 493 | ISN *SearchList(ISL *inner_list, uint64_t key, uint64_t *target_maxkey, bool lock) 494 | { 495 | 496 | // pre->max_key < key <= next->max_key if it is not head. 497 | int head_idx = key / HASH_KEY; 498 | ISN *pre = inner_list->head[head_idx], *next = NULL, *target = NULL; 499 | assert(pre != NULL); 500 | 501 | #ifdef USE_AGG_KEYS 502 | uint64_t next_maxkey; 503 | int height = pre->nLevel; 504 | target = find_in_agg_keys(pre, key, target_maxkey); 505 | if (target) 506 | { 507 | pre = target; 508 | height = AGG_UPDATE_LEVEL; 509 | } 510 | 511 | // search from agg level to bottom level. 512 | for (int level = height; level >= 0; --level) 513 | { 514 | next = pre->next[level]; 515 | next_maxkey = (next && !next->is_head) ? next->max_key : 0; 516 | // next_maxkey == 0 means we reached the next head or the tail. 517 | 518 | while (next_maxkey && next_maxkey < key) 519 | { 520 | // pre->max_key < next->max_key < key <= ... 521 | // move to the next node in the same level. 522 | pre = next; 523 | next = pre->next[level]; 524 | next_maxkey = (next && !next->is_head) ? next->max_key : 0; 525 | } 526 | // pre->max_key < key < next->max_key. 527 | } 528 | 529 | if (next_maxkey == 0) 530 | { 531 | // means next == NULL or next is head which indicate pre's max_key is 532 | // changed due to split and the new node has not installed to the list 533 | // yet which causes pre's max_key != head's upper bound. 534 | // this issue will be processed in check again if statement. 535 | target = pre; 536 | } 537 | else 538 | { 539 | // reset pre_nodes and next_nodes in level 0. 540 | // target = pre_nodes[0] and target'max_key >= key. 541 | assert(next != NULL && !next->is_head); 542 | target = next; 543 | } 544 | *target_maxkey = target->max_key; 545 | #else 546 | uint64_t next_maxkey; 547 | int height = pre->nLevel; 548 | assert(height >= 0 && height < MAX_L); 549 | 550 | // search from top to bottom level. 551 | for (int level = height; level >= 0; --level) 552 | { 553 | next = pre->next[level]; 554 | next_maxkey = (next && !next->is_head) ? next->max_key : 0; 555 | // next_maxkey == 0 means we reached the next head or the tail. 556 | 557 | while (next_maxkey && next_maxkey < key) 558 | { 559 | // pre->max_key < next->max_key < key <= ... 560 | // move to the next node in the same level. 561 | pre = next; 562 | next = pre->next[level]; 563 | next_maxkey = (next && !next->is_head) ? next->max_key : 0; 564 | } 565 | // pre->max_key < key < next->max_key. 566 | } 567 | 568 | if (next_maxkey == 0) 569 | { 570 | // means next == NULL or next is head which indicate pre's max_key is 571 | // changed due to split and the new node has not installed to the list 572 | // yet which causes pre's max_key != head's upper bound. 573 | // this issue will be processed in check again if statement. 574 | target = pre; 575 | } 576 | else 577 | { 578 | // reset pre_nodes and next_nodes in level 0. 579 | // target = pre_nodes[0] and target'max_key >= key. 580 | assert(next != NULL && !next->is_head); 581 | target = next; 582 | } 583 | *target_maxkey = target->max_key; 584 | #endif 585 | 586 | // obtain the read lock of the target node. 587 | if (lock) 588 | { 589 | target->locker->ReadLock(); 590 | } 591 | 592 | // check again in case target was splitting before get the read lock. 593 | if (target->max_key < key) 594 | { 595 | while (target->max_key < key) 596 | { 597 | ISN *tmp = target; 598 | target = target->next[0]; 599 | *target_maxkey = target->max_key; 600 | 601 | assert(target && !target->is_head); 602 | 603 | if (lock) 604 | { 605 | target->locker->ReadLock(); 606 | tmp->locker->ReadUnlock(); 607 | } 608 | } 609 | // got the right bottom level node. 610 | assert(target != NULL && !target->is_head); 611 | } 612 | 613 | return target; 614 | } 615 | 616 | int InsertIntoINode(ISN *inode, uint64_t key, uint64_t value, 617 | ISN *pre_nodes[], ISN *next_nodes[]) 618 | { 619 | // we have got the read lock which means thread safe to access inode's meta. 620 | inode->locker->AssertReadHeld(); 621 | 622 | // search the target leaf node. 623 | int loc = binary_search(inode, key); 624 | LSG *lfnode = inode->leaves[loc]; 625 | 626 | uint64_t wbitmap; 627 | // probe the empty slot of working bitmap. 628 | while ((wbitmap = 629 | __atomic_load_n(&(inode->mem_bitmap[loc]), __ATOMIC_CONSUME)) < GROUP_BITMAP_FULL) 630 | { 631 | assert(wbitmap < GROUP_BITMAP_FULL); 632 | 633 | // get empty working bitmap slot. 634 | 635 | int slot = find_zero_bit(wbitmap, MAX_ENTRY_NUM); 636 | 637 | uint64_t new_wbitmap = wbitmap | (1ULL << slot); 638 | 639 | // check whether this slot has been assigned to other thread. 640 | if (__sync_bool_compare_and_swap(&(inode->mem_bitmap[loc]), 641 | wbitmap, new_wbitmap)) 642 | { 643 | // this slot has been assigned to this thread. 644 | // install KV to this slot. 645 | uint8_t fp = f_hash(key); 646 | lfnode->entries[slot].key = key; 647 | lfnode->entries[slot].value = value; 648 | lfnode->fingerprints[slot] = fp; 649 | 650 | // flush the KVpairs. 651 | pmemobj_persist(pop, &lfnode->entries[slot], sizeof(Entry)); 652 | 653 | uint64_t cbitmap = __atomic_load_n(&(lfnode->commit_bitmap), 654 | __ATOMIC_CONSUME); 655 | 656 | while (true) 657 | { 658 | // install commit bitmap. 659 | assert(cbitmap < GROUP_BITMAP_FULL); 660 | 661 | uint64_t ret_cbitmap = 0; 662 | uint64_t new_cbitmap = cbitmap | (1ULL << slot); 663 | if ((ret_cbitmap = __sync_val_compare_and_swap(&(lfnode->commit_bitmap), 664 | cbitmap, new_cbitmap)) != cbitmap) 665 | { 666 | // install failed: a. cbitmap's other slot has been changed 667 | // by other thread; b. this slot is changed. 668 | if (ret_cbitmap & (0x1ULL << slot)) 669 | { 670 | inode->locker->ReadUnlock(); 671 | return -1; // case b. 672 | } 673 | else 674 | { 675 | 676 | cbitmap = ret_cbitmap; 677 | continue; // case a. try again. 678 | } 679 | } 680 | 681 | // flush the commitbitmap and the fingerprints; 682 | pmemobj_persist(pop, &lfnode->commit_bitmap, 64); 683 | 684 | // insert has done. 685 | inode->locker->ReadUnlock(); 686 | 687 | return 0; 688 | } 689 | } 690 | } 691 | 692 | assert(wbitmap == GROUP_BITMAP_FULL); 693 | assert(inode->nKeys <= MAX_LEAF_CAPACITY); 694 | 695 | if (inode->nKeys == MAX_LEAF_CAPACITY) 696 | { 697 | // this leaf block is full. 698 | if (!TryToGetWriteLock(inode, inode->is_split)) 699 | { 700 | return +1; // other thread got the write lock. 701 | } 702 | inode->locker->AssertWriteHeld(); 703 | 704 | // got write lock, split this inner node. 705 | { 706 | // split inner node. 707 | // fprintf(stderr, "split inner node!\n"); 708 | 709 | //////////////////////////////////////////////////////////////////////////////////////////////// 710 | // step 1 : create new inner node, set the next pointer and the max key and the slot pointer. 711 | //////////////////////////////////////////////////////////////////////////////////////////////// 712 | ISN *new_in = create_inner_node(0); 713 | 714 | new_in->max_key = inode->max_key; 715 | new_in->next[0] = inode->next[0]; 716 | new_in->nKeys = MAX_LEAF_CAPACITY - MIN_LEAF_CAPACITY; 717 | memcpy(&new_in->keys, &(inode->keys[MIN_LEAF_CAPACITY]), 718 | sizeof(uint64_t) * new_in->nKeys); 719 | memcpy(&new_in->leaves, &(inode->leaves[MIN_LEAF_CAPACITY]), 720 | sizeof(LSG *) * new_in->nKeys); 721 | memcpy(&new_in->mem_bitmap, &(inode->mem_bitmap[MIN_LEAF_CAPACITY]), 722 | sizeof(uint64_t) * new_in->nKeys); 723 | // memset(&(inode->mem_bitmap[MIN_LEAF_CAPACITY]), 0, sizeof(uint64_t) * new_in->nKeys); 724 | // set the boundary 725 | new_in->leaves[0]->is_head = true; 726 | pmemobj_persist(pop, &new_in->leaves[0]->is_head, sizeof(bool)); 727 | 728 | //////////////////////////////////////////////////////////////////////////////////////////////// 729 | // step 2 : reset the old inner node's nKey and maxKey and next[0]. 730 | //////////////////////////////////////////////////////////////////////////////////////////////// 731 | inode->nKeys = MIN_LEAF_CAPACITY; 732 | inode->max_key = inode->keys[inode->nKeys - 1]; 733 | inode->next[0] = new_in; 734 | } 735 | // leaf block split is done, release write lock. 736 | inode->is_split = false; 737 | inode->locker->WriteUnlock(); 738 | // __atomic_store_n(&(inode->is_split), false, __ATOMIC_RELEASE); 739 | // #ifdef USE_AGG_KEYS 740 | // update_agg_keys(pre_nodes[MAX_L]); 741 | // #endif 742 | 743 | // insert again. 744 | return +99; 745 | } 746 | else 747 | { 748 | // this leaf node is full. 749 | if (!TryToGetWriteLock(inode, inode->is_split)) 750 | { 751 | return +2; // other thread got the write lock. 752 | } 753 | inode->locker->AssertWriteHeld(); 754 | 755 | // got write lock, split this leaf node. 756 | { 757 | assert(lfnode->commit_bitmap == GROUP_BITMAP_FULL); 758 | int group_idx[MAX_ENTRY_NUM], mid_idx = (MAX_ENTRY_NUM / 2); 759 | for (int i = 0; i < MAX_ENTRY_NUM; ++i) 760 | { 761 | group_idx[i] = i; 762 | } 763 | 764 | // partition sort the index of entries. 765 | quick_select_index(lfnode->entries, group_idx, mid_idx, 766 | 0, MAX_ENTRY_NUM - 1); 767 | 768 | // find the largest key in the left part. 769 | uint64_t left_largest = lfnode->entries[group_idx[0]].key; 770 | for (int i = 1; i < mid_idx; ++i) 771 | { 772 | if (lfnode->entries[group_idx[i]].key > left_largest) 773 | { 774 | left_largest = lfnode->entries[group_idx[i]].key; 775 | } 776 | } 777 | 778 | //////////////////////////////////////////////////////////////////////////////////////////////// 779 | // step 1 : create a new leaf node, and set the flag , the next , the bitmap and fingerprints. 780 | //////////////////////////////////////////////////////////////////////////////////////////////// 781 | LSG *new_slot = AllocNewLeafNode(); 782 | new_slot->next = lfnode->next; 783 | // insert the last half entries to the new leaf node. 784 | int new_child_loc_slot = 0; 785 | uint64_t new_slot_bitmap = 0; 786 | for (int i = mid_idx; i < MAX_ENTRY_NUM; ++i, ++new_child_loc_slot) 787 | { 788 | new_slot->entries[new_child_loc_slot].key = 789 | lfnode->entries[group_idx[i]].key; 790 | new_slot->entries[new_child_loc_slot].value = 791 | lfnode->entries[group_idx[i]].value; 792 | new_slot->fingerprints[new_child_loc_slot] = 793 | lfnode->fingerprints[group_idx[i]]; 794 | new_slot_bitmap |= (1ULL << new_child_loc_slot); 795 | } 796 | // change new leaf node's bitmap and maxkey. 797 | new_slot->commit_bitmap = new_slot_bitmap; 798 | // new_slot->working_bitmap = new_slot_bitmap; 799 | new_slot->max_key = lfnode->max_key; 800 | // flush the new leaf node. 801 | pmemobj_persist(pop, &new_slot, 128 + 16 * new_child_loc_slot); // 2 cache line size + key-value size 802 | 803 | //////////////////////////////////////////////////////////////////////////////////////////////// 804 | // step 2 : change the slot's next pointer to new slot. 805 | //////////////////////////////////////////////////////////////////////////////////////////////// 806 | lfnode->next = new_slot; 807 | pmemobj_persist(pop, &lfnode->next, sizeof(LSG *)); 808 | 809 | //////////////////////////////////////////////////////////////////////////////////////////////// 810 | // step 3 : reset the slot's bitmap. 811 | //////////////////////////////////////////////////////////////////////////////////////////////// 812 | new_slot_bitmap = 0; 813 | for (int i = 0; i < mid_idx; ++i) 814 | { 815 | new_slot_bitmap |= (1ULL << (group_idx[i])); 816 | } 817 | lfnode->commit_bitmap = new_slot_bitmap; 818 | // lfnode->working_bitmap = new_slot_bitmap; 819 | // flush the old slot's commit bitmap. 820 | pmemobj_persist(pop, &lfnode->commit_bitmap, 8); 821 | 822 | //////////////////////////////////////////////////////////////////////////////////////////////// 823 | // step 4 : change the old slot's max_key. 824 | //////////////////////////////////////////////////////////////////////////////////////////////// 825 | lfnode->max_key = left_largest; 826 | pmemobj_persist(pop, &lfnode->max_key, 8); 827 | 828 | //////////////////////////////////////////////////////////////////////////////////////////////// 829 | // step 5 : move inner node's max key and slot pointer to keep order. 830 | //////////////////////////////////////////////////////////////////////////////////////////////// 831 | for (int i = inode->nKeys; i > loc + 1; --i) 832 | { 833 | inode->leaves[i] = inode->leaves[i - 1]; 834 | // inode->keys[i] = inode->keys[i - 1]; 835 | inode->mem_bitmap[i] = inode->leaves[i - 1]->commit_bitmap; 836 | } 837 | _mm_sfence(); 838 | for (int i = inode->nKeys; i > loc + 1; --i) 839 | { 840 | // inode->leaves[i] = inode->leaves[i - 1]; 841 | inode->keys[i] = inode->keys[i - 1]; 842 | // inode->mem_bitmap[i] = inode->leaves[i - 1]->commit_bitmap; 843 | } 844 | inode->keys[loc + 1] = inode->keys[loc]; 845 | inode->leaves[loc + 1] = new_slot; 846 | inode->mem_bitmap[loc + 1] = new_slot->commit_bitmap; 847 | inode->keys[loc] = left_largest; 848 | inode->mem_bitmap[loc] = lfnode->commit_bitmap; 849 | __atomic_add_fetch(&(inode->nKeys), 1, __ATOMIC_RELEASE); 850 | } 851 | // leaf node split is done, release write lock. 852 | inode->is_split = false; 853 | inode->locker->WriteUnlock(); 854 | // __atomic_store_n(&(inode->is_split), false, __ATOMIC_RELEASE); 855 | 856 | #ifdef PERF_PROFILING_W 857 | hist_set->Add(DO_SPLIT_LAEF, ElapsedNanos(t1)); 858 | #endif 859 | // insert again. 860 | return +99; 861 | } 862 | } 863 | 864 | uint64_t SearchINode(ISN *inode, uint64_t key) 865 | { 866 | 867 | const uint8_t fp = f_hash(key); 868 | 869 | // May the leaf node we get is not the target leaf node, but the target leaf node must behind this leaf node. 870 | int child_loc = seq_search(inode, key); 871 | LSG *lfnode = inode->leaves[child_loc]; 872 | if (lfnode == NULL) 873 | { 874 | printf("something wrong 1!\n"); 875 | return 0; 876 | } 877 | uint64_t mLKey; 878 | uint64_t result = 0; 879 | while (true) 880 | { 881 | // mLKey = lfnode->max_key; 882 | mLKey = __atomic_load_n(&lfnode->max_key, __ATOMIC_CONSUME); 883 | while (mLKey < key) 884 | { 885 | // lfnode = lfnode->next; 886 | lfnode = __atomic_load_n(&lfnode->next, __ATOMIC_CONSUME); 887 | if (lfnode == NULL) 888 | { 889 | printf("something wrong 2!\n"); 890 | return 0; 891 | } 892 | // mLKey = lfnode->max_key; 893 | mLKey = __atomic_load_n(&lfnode->max_key, __ATOMIC_CONSUME); 894 | } 895 | // At this moment, this maxkey and this lfnode is right. 896 | 897 | // probe bitmap one by one. 898 | const uint64_t bitmap = lfnode->commit_bitmap; 899 | for (int i = 0; i < MAX_ENTRY_NUM; ++i) 900 | { 901 | if ((bitmap & (0x1ULL << i)) && (lfnode->fingerprints[i] == fp) && (lfnode->entries[i].key == key)) 902 | { 903 | result = lfnode->entries[i].value; 904 | break; 905 | } 906 | } 907 | 908 | if (inode->is_split || mLKey != lfnode->max_key) 909 | { 910 | continue; 911 | } 912 | break; 913 | } 914 | return result; 915 | } 916 | 917 | bool Insert(PHAST *list, uint64_t key, uint64_t value) 918 | { 919 | int ret = 0; 920 | // [MAX_L] is assigned for the head. 921 | ISN *pre_nodes[MAX_L + 1], *next_nodes[MAX_L + 1], *target = NULL; 922 | 923 | whole_retry: 924 | // search the target inner node first. 925 | target = SearchList(list->inner_list, key, pre_nodes, next_nodes); 926 | 927 | // we have assigned a inner node for each head. 928 | assert(target != NULL && !target->is_head && (target == pre_nodes[0])); 929 | 930 | target->locker->AssertReadHeld(); 931 | 932 | ret = InsertIntoINode(target, key, value, pre_nodes, next_nodes); 933 | if (ret == 0) 934 | { 935 | return true; 936 | } 937 | else if (ret < 0) 938 | { 939 | return false; 940 | } 941 | else 942 | { 943 | if (ret == 1) 944 | { 945 | usleep(5); // sleep 5us if is splitting leaf block. 946 | } 947 | else if (ret == 2) 948 | { 949 | usleep(1); // sleep 1us if is splitting leaf block. 950 | } 951 | goto whole_retry; 952 | } 953 | } 954 | 955 | uint64_t Search(PHAST *list, uint64_t key) 956 | { 957 | ISN *target = NULL; 958 | uint64_t ret = 0, target_maxkey; 959 | 960 | // search the target inner node first. 961 | target = SearchList(list->inner_list, key, &target_maxkey); 962 | 963 | // we have assigned a inner node for each head, so the target 964 | // cannot be a head. 965 | assert(target != NULL && !target->is_head); 966 | // target->locker->AssertReadHeld(); 967 | 968 | return SearchINode(target, key); 969 | } 970 | 971 | int randomLevel() 972 | { 973 | int level = 0; 974 | float f = 0.5 * 0xFFFF; 975 | while ((rand() & 0xFFFF) < f) 976 | level++; 977 | return (level < MAX_L) ? level : MAX_L - 1; 978 | } 979 | 980 | #ifdef USE_AGG_KEYS 981 | void update_agg_keys(ISN *head, const int head_idx) 982 | { 983 | assert(head->is_head); 984 | assert(head->agg_index != NULL); 985 | 986 | AGGIndex *new_idx = new AGGIndex(head, head->agg_index->NewSize()); 987 | __atomic_store_n(&(head->agg_index), new_idx, __ATOMIC_RELEASE); 988 | } 989 | 990 | ISN *find_in_agg_keys(ISN *head, const uint64_t key, uint64_t *target_maxkey) 991 | { 992 | assert(head->is_head); 993 | assert(head->agg_index != NULL); 994 | 995 | AGGIndex *old_idx = head->agg_index; 996 | 997 | return old_idx->Find(key, target_maxkey); 998 | } 999 | #endif 1000 | 1001 | void free_inner_list(PHAST *list) 1002 | { 1003 | ISL *inner_list = list->inner_list; 1004 | 1005 | ISN *q = inner_list->head[0]; 1006 | ISN *next = NULL; 1007 | while (q) 1008 | { 1009 | next = q->next[0]; 1010 | free(q); 1011 | q = next; 1012 | } 1013 | free(inner_list); 1014 | } 1015 | 1016 | void ISN_free(ISN *innernode) 1017 | { 1018 | #ifdef USE_AGG_KEYS 1019 | free(innernode->agg_index); 1020 | #endif 1021 | free(innernode->locker); 1022 | free(innernode); 1023 | } 1024 | 1025 | void dram_free(PHAST *list) 1026 | { 1027 | if (!list) 1028 | return; 1029 | ISL *inner_list = list->inner_list; 1030 | ISN *q, *next; 1031 | 1032 | // free the inner node 1033 | q = inner_list->head[0]; 1034 | while (q) 1035 | { 1036 | next = q->next[0]; 1037 | ISN_free(q); 1038 | q = next; 1039 | } 1040 | free(inner_list); 1041 | free(list); 1042 | } 1043 | 1044 | PHAST *recovery(int n_threads) 1045 | { 1046 | /////////////////////////// 1047 | // create new PHAST. 1048 | /////////////////////////// 1049 | PHAST *phast = new PHAST; 1050 | phast->inner_list = new InnerSkipList; 1051 | InnerSkipList *list = phast->inner_list; 1052 | 1053 | /////////////////////////// 1054 | // init multiple header. 1055 | /////////////////////////// 1056 | ISN *head = NULL; 1057 | for (int i = 0; i < HEAD_COUNT; i++) 1058 | { 1059 | head = create_inner_node(0); 1060 | if (head == NULL) 1061 | { 1062 | fprintf(stderr, "Memory allocation failed for head!"); 1063 | free(list); 1064 | return NULL; 1065 | } 1066 | head->is_head = true; 1067 | list->head[i] = head; 1068 | list->level[i] = 0; 1069 | // head1 [0] -> head2 [0] -> ...headx[0]-> NULL ; 1070 | if (i > 0) 1071 | for (int j = 0; j < MAX_L; j++) 1072 | list->head[i - 1]->next[j] = list->head[i]; 1073 | } 1074 | 1075 | TOID(SHA) 1076 | root = POBJ_ROOT(pop, SHA); 1077 | LSG **head_slot_array = D_RW(root)->slot_head_array; 1078 | 1079 | /////////////////////////// 1080 | // Multithreading 1081 | ////////////////////////// 1082 | std::vector> futures(n_threads); 1083 | uint64_t head_per_thread = HEAD_COUNT / n_threads; 1084 | 1085 | for (int tid = 0; tid < n_threads; tid++) 1086 | { 1087 | int from = head_per_thread * tid; 1088 | int to = (tid == n_threads - 1) ? HEAD_COUNT : from + head_per_thread; 1089 | 1090 | auto f = std::async( 1091 | std::launch::async, 1092 | [&list, &head_slot_array](int from, int to) 1093 | { 1094 | for (int i = from; i < to; ++i) 1095 | { // loop:head 1096 | ISN *head = list->head[i]; 1097 | ISN *cur_inode = head; 1098 | 1099 | LSG *head_slot = head_slot_array[i]; 1100 | LSG *pre_slot = NULL; 1101 | LSG *cur_slot = head_slot; 1102 | 1103 | ISN *pre_inode[MAX_L]; 1104 | for (int i = 0; i < MAX_L; i++) 1105 | pre_inode[i] = head; 1106 | 1107 | uint64_t pre_maxkey = 0; 1108 | uint64_t count_pnode = 0; 1109 | uint64_t key_boundary = (i == HEAD_COUNT - 1) ? MAX_U64_KEY : (i + 1) * HASH_KEY; 1110 | while (cur_slot && cur_slot->max_key <= key_boundary) 1111 | { // loop:slot 1112 | //////////////////////////////////////////////////////////////////////////// 1113 | // step 1: recalculate the fp; 1114 | //////////////////////////////////////////////////////////////////////////// 1115 | uint64_t bitmap = cur_slot->commit_bitmap; 1116 | // if (cur_slot->working_bitmap != bitmap) 1117 | // cur_slot->working_bitmap = bitmap; 1118 | for (int j = 0; j < MAX_ENTRY_NUM; j++) 1119 | if ((bitmap & (0x1ULL << j))) 1120 | { 1121 | uint8_t fp = f_hash(cur_slot->entries[j].key); 1122 | if (cur_slot->fingerprints[j] != fp) 1123 | cur_slot->fingerprints[j] = fp; 1124 | } 1125 | 1126 | //////////////////////////////////////////////////////////////////////////// 1127 | // step 2: determine if there are two identical max_keys 1128 | //////////////////////////////////////////////////////////////////////////// 1129 | if (cur_slot->max_key == pre_maxkey) 1130 | { 1131 | // redo the slot split process. (1)reset the commit_bitmap.(2)update the maxkey(3)update innernode 1132 | // assert(pre_slot->commit_bitmap == GROUP_BITMAP_FULL); 1133 | pre_slot->commit_bitmap = ~cur_slot->commit_bitmap; 1134 | // pre_slot->working_bitmap = pre_slot->commit_bitmap; 1135 | pmemobj_persist(pop, &pre_slot->commit_bitmap, 8); 1136 | 1137 | uint64_t bitmap = pre_slot->commit_bitmap; 1138 | uint64_t maxkey = 0; 1139 | for (int j = 0; j < MAX_ENTRY_NUM; j++) 1140 | if ((bitmap & (0x1ULL << j)) && pre_slot->entries[j].key > maxkey) 1141 | maxkey = pre_slot->entries[j].key; 1142 | 1143 | assert(maxkey != 0); 1144 | pre_slot->max_key = maxkey; 1145 | pmemobj_persist(pop, &pre_slot->max_key, 8); 1146 | 1147 | cur_inode->keys[cur_inode->nKeys - 1] = maxkey; 1148 | cur_inode->mem_bitmap[cur_inode->nKeys - 1] = pre_slot->commit_bitmap; 1149 | cur_inode->max_key = maxkey; 1150 | } 1151 | 1152 | //////////////////////////////////////////////////////////////////////////// 1153 | // step 3: add this slot to cur_inode; 1154 | //////////////////////////////////////////////////////////////////////////// 1155 | if (cur_slot->is_head == true) 1156 | { 1157 | // create a new innernode and update the link. 1158 | int level = randomLevel(); 1159 | if (level > list->level[i]) 1160 | list->level[i] = level; 1161 | ISN *innode = create_inner_node(level); 1162 | for (int j = 0; j <= level; j++) 1163 | { 1164 | pre_inode[j]->next[j] = innode; 1165 | pre_inode[j] = innode; 1166 | } 1167 | count_pnode++; 1168 | cur_inode = innode; 1169 | } 1170 | 1171 | cur_inode->keys[cur_inode->nKeys] = cur_slot->max_key; 1172 | cur_inode->mem_bitmap[cur_inode->nKeys] = cur_slot->commit_bitmap; 1173 | cur_inode->leaves[cur_inode->nKeys] = cur_slot; 1174 | cur_inode->max_key = cur_slot->max_key; 1175 | cur_inode->nKeys++; 1176 | 1177 | //////////////////////////////////////////////////////////////////////////// 1178 | // step 4: update variables for the next loop; 1179 | //////////////////////////////////////////////////////////////////////////// 1180 | pre_maxkey = cur_slot->max_key; 1181 | pre_slot = cur_slot; 1182 | cur_slot = cur_slot->next; 1183 | } 1184 | 1185 | // update the aggindex; 1186 | #ifdef USE_AGG_KEYS 1187 | head->agg_index = new AGGIndex(head, count_pnode + AGG_REDUNDANT_SPACE); 1188 | #endif 1189 | } 1190 | }, 1191 | from, to); 1192 | futures.push_back(move(f)); 1193 | } 1194 | 1195 | for (auto &&f : futures) 1196 | if (f.valid()) 1197 | f.get(); 1198 | 1199 | return phast; 1200 | } 1201 | 1202 | uint64_t UpdateINode(ISN *inode, uint64_t key, uint64_t new_value) 1203 | { 1204 | assert(inode->locker->AssertReadHeld()); 1205 | 1206 | uint64_t old_value = 0; 1207 | int threshold = 0; 1208 | 1209 | const uint8_t fp = f_hash(key); 1210 | // search the target leaf node. 1211 | const int child_loc = binary_search(inode, key); 1212 | LSG *lfnode = inode->leaves[child_loc]; 1213 | if (UNLIKELY(lfnode == NULL)) 1214 | { 1215 | return 0; 1216 | } 1217 | 1218 | // probe bitmap one by one. 1219 | const uint64_t bitmap = __atomic_load_n(&(lfnode->commit_bitmap), __ATOMIC_CONSUME); 1220 | for (int i = 0; i < MAX_ENTRY_NUM; ++i) 1221 | { 1222 | if ((bitmap & (0x1ULL << i)) && 1223 | (lfnode->fingerprints[i] == fp) && 1224 | (lfnode->entries[i].key) == key) 1225 | { 1226 | old_value = lfnode->entries[i].value; 1227 | // update the old value. 1228 | lfnode->entries[i].value = new_value; 1229 | pmemobj_persist(pop, &(lfnode->entries[i].value), 8); 1230 | return old_value; 1231 | } 1232 | } 1233 | 1234 | return old_value; 1235 | } 1236 | 1237 | uint64_t Update(PHAST *list, uint64_t key, uint64_t newValue) 1238 | { 1239 | ISN *target = NULL; 1240 | uint64_t ret = 0, target_maxkey; 1241 | 1242 | // search the target inner node first. 1243 | target = SearchList(list->inner_list, key, &target_maxkey, true); 1244 | 1245 | // we have assigned a inner node for each head, so the target 1246 | // cannot be a head. 1247 | assert(target != NULL && !target->is_head); 1248 | assert(target->locker->AssertReadHeld()); 1249 | 1250 | ret = UpdateINode(target, key, newValue); 1251 | target->locker->ReadUnlock(); 1252 | 1253 | return ret; 1254 | } 1255 | 1256 | int GetRangeFromSlot(LSG *slot, uint64_t start_key, Entry *candidate) 1257 | { 1258 | // probe bitmap one by one. 1259 | const uint64_t bitmap = __atomic_load_n(&(slot->commit_bitmap), __ATOMIC_CONSUME); 1260 | int count = 0; 1261 | if (start_key == 0) 1262 | { 1263 | for (int i = 0; i < MAX_ENTRY_NUM; ++i) 1264 | { 1265 | if (bitmap & (0x1ULL << i)) 1266 | { 1267 | candidate[count++] = slot->entries[i]; 1268 | } 1269 | } 1270 | } 1271 | else 1272 | { 1273 | for (int i = 0; i < MAX_ENTRY_NUM; ++i) 1274 | { 1275 | if (bitmap & (0x1ULL << i) && 1276 | slot->entries[i].key >= start_key) 1277 | { 1278 | candidate[count++] = slot->entries[i]; 1279 | } 1280 | } 1281 | } 1282 | return count; 1283 | } 1284 | 1285 | int Range_Search(PHAST *list, uint64_t key, int num, uint64_t *buf) 1286 | { 1287 | ISN *target = NULL; 1288 | uint64_t target_maxkey; 1289 | 1290 | // search the target inner node first. 1291 | target = SearchList(list->inner_list, key, &target_maxkey); 1292 | 1293 | // we have assigned a inner node for each head, so the target 1294 | // cannot be a head. 1295 | assert(target != NULL && !target->is_head); 1296 | 1297 | //////////////////////////////////////// 1298 | // 1. get the right slot. 1299 | // Because slot is never free, it must be the one even moved to another node. 1300 | //////////////////////////////////////// 1301 | int child_loc = binary_search(target, key); 1302 | LSG *lfnode = target->leaves[child_loc]; 1303 | while (true) 1304 | { 1305 | if (UNLIKELY(__atomic_load_n(&(target->max_key), __ATOMIC_CONSUME) < key)) 1306 | { 1307 | // target has split and the range has changed. 1308 | target = __atomic_load_n(&(target->next[0]), __ATOMIC_CONSUME); 1309 | while (target != NULL && target->is_head) 1310 | { 1311 | // skip head node. 1312 | target = __atomic_load_n(&(target->next[0]), __ATOMIC_CONSUME); 1313 | } 1314 | if (UNLIKELY(target == NULL || target->is_head)) 1315 | { 1316 | // reach the tail of the skiplist || abort. 1317 | return 0; 1318 | } 1319 | 1320 | child_loc = binary_search(target, key); 1321 | lfnode = target->leaves[child_loc]; 1322 | continue; 1323 | } 1324 | break; 1325 | } 1326 | 1327 | if (lfnode == NULL) 1328 | { 1329 | // abort! 1330 | for_debug(); 1331 | assert(false); 1332 | return 0; 1333 | } 1334 | 1335 | //////////////////////////////////////// 1336 | // 2. get values from the slot. 1337 | //////////////////////////////////////// 1338 | Entry candidate[num + MAX_ENTRY_NUM]; 1339 | int got_count = 0; // no. elements in candidate. 1340 | int xnum = 0; // no. entries got from one slot. 1341 | uint64_t low_key = key; 1342 | while (lfnode != NULL && got_count < num) 1343 | { 1344 | LSG *lf_next = lfnode->next; 1345 | xnum = GetRangeFromSlot(lfnode, low_key, &(candidate[got_count])); 1346 | if (lf_next != __atomic_load_n(&(lfnode->next), __ATOMIC_CONSUME)) 1347 | { 1348 | // this lfnode has been split, re-scan this slot to avoid double refs. 1349 | continue; 1350 | } 1351 | else 1352 | { 1353 | lfnode = lf_next; 1354 | } 1355 | 1356 | // lfnode has been reset as the next, if the scanned one is splitting, 1357 | // it doesnot matter due to we has skipped it. 1358 | got_count += xnum; 1359 | if (got_count > num) 1360 | { 1361 | // we have got the full keys, just break from the while loop. 1362 | break; 1363 | } 1364 | 1365 | // reset start_key to indicate no compare when get keys from the next slot. 1366 | low_key = 0; 1367 | // sort the got keys. 1368 | insertion_sort_entry(&(candidate[got_count - xnum]), xnum); 1369 | } 1370 | 1371 | //////////////////////////////////////// 1372 | // 3. check sort status and keys got from the last scan. 1373 | //////////////////////////////////////// 1374 | int ret_count = (got_count > num) ? num : got_count; 1375 | if (got_count > num) 1376 | { 1377 | // partition the keys got from the last scan to avoid 1378 | // sort all keys. 1379 | quick_select(candidate, num, got_count - xnum, got_count - 1); 1380 | 1381 | // sort the first part of the keys. 1382 | insertion_sort_entry(&(candidate[got_count - xnum]), num - (got_count - xnum)); 1383 | } 1384 | 1385 | // copy value. 1386 | for (int i = 0; i < ret_count; ++i) 1387 | { 1388 | buf[i] = candidate[i].value; 1389 | } 1390 | return ret_count; 1391 | } 1392 | 1393 | uint64_t Delete(PHAST *list, uint64_t key) 1394 | { 1395 | return Update(list, key, MAX_U64_KEY); 1396 | } 1397 | 1398 | void print_list_all(PHAST *list, uint64_t key) 1399 | { 1400 | int head_idx = key / HASH_KEY; 1401 | ISN *header = list->inner_list->head[head_idx]; 1402 | print_list_all(header); 1403 | } 1404 | 1405 | void print_list_all(PHAST *list) 1406 | { 1407 | for (int i = 0; i < HEAD_COUNT; ++i) 1408 | { 1409 | print_list_all(list->inner_list->head[i]); 1410 | } 1411 | } 1412 | 1413 | void print_list_all(ISN *header) 1414 | { 1415 | assert(header->is_head); 1416 | ISN *node = header->next[0]; 1417 | int pos = 0; 1418 | while (node != NULL && !(node->is_head)) 1419 | { 1420 | fprintf(stderr, "node[%d]: max key: %llu, level: %d, nKeys: %d\n", 1421 | pos++, node->max_key, node->nLevel, node->nKeys); 1422 | for (int i = 0; i < node->nKeys; ++i) 1423 | { 1424 | fprintf(stderr, "%llu, ", node->keys[i]); 1425 | } 1426 | fprintf(stderr, "\n"); 1427 | node = node->next[0]; 1428 | } 1429 | } 1430 | 1431 | void print_inode_and_next(ISN *node) 1432 | { 1433 | int pos = 0; 1434 | ISN *next = node->next[0]; 1435 | 1436 | fprintf(stderr, "node[%d]: max key: %llu, level: %d, nKeys: %d\n", 1437 | pos++, node->max_key, node->nLevel, node->nKeys); 1438 | for (int i = 0; i < node->nKeys; ++i) 1439 | { 1440 | fprintf(stderr, "%llu, ", node->keys[i]); 1441 | } 1442 | fprintf(stderr, "\n"); 1443 | 1444 | if (next != NULL && !(next->is_head)) 1445 | { 1446 | fprintf(stderr, "node[%d]: max key: %llu, level: %d, nKeys: %d\n", 1447 | pos++, node->max_key, node->nLevel, node->nKeys); 1448 | for (int i = 0; i < node->nKeys; ++i) 1449 | { 1450 | fprintf(stderr, "%llu, ", node->keys[i]); 1451 | } 1452 | fprintf(stderr, "\n"); 1453 | } 1454 | } 1455 | 1456 | void print_lnode_all(LSG *node, uint64_t maxkey, const uint64_t bitmap) 1457 | { 1458 | fprintf(stderr, "--> pre_maxkey: %lu, max key: %lu\n", maxkey, node->max_key); 1459 | for (int i = 0; i < MAX_ENTRY_NUM; ++i) 1460 | { 1461 | if ((bitmap & (0x1ULL << i))) 1462 | { 1463 | fprintf(stderr, "\t#%d %lu %lu\n", i, node->entries[i].key, node->entries[i].value); 1464 | } 1465 | } 1466 | } 1467 | 1468 | void print_lnode_all(LSG *node) 1469 | { 1470 | fprintf(stderr, "--> max key: %lu\n", node->max_key); 1471 | const uint64_t bitmap = __atomic_load_n(&(node->commit_bitmap), __ATOMIC_CONSUME); 1472 | for (int i = 0; i < MAX_ENTRY_NUM; ++i) 1473 | { 1474 | if ((bitmap & (0x1ULL << i))) 1475 | { 1476 | fprintf(stderr, "\t#%d %lu %lu\n", i, node->entries[i].key, node->entries[i].value); 1477 | } 1478 | } 1479 | } 1480 | 1481 | void print_lnode_and_next(LSG *node) 1482 | { 1483 | print_lnode_all(node); 1484 | LSG *next = node->next; 1485 | if (next != NULL) 1486 | { 1487 | print_lnode_all(next); 1488 | } 1489 | } 1490 | 1491 | void print_list_skeleton(PHAST *list) 1492 | { 1493 | // for (int i = 0; i < HEAD_COUNT; ++i) { 1494 | print_list_skeleton(list->inner_list->head[0]); 1495 | // } 1496 | } 1497 | 1498 | void print_list_skeleton(ISN *header) 1499 | { 1500 | size_t level_nodes[MAX_L]; 1501 | ISN *header_nodes[MAX_L]; 1502 | 1503 | for (int level = 0; level < MAX_L; ++level) 1504 | { 1505 | level_nodes[level] = 0; 1506 | header_nodes[level] = header->next[level]; 1507 | } 1508 | 1509 | for (int level = 0; level < MAX_L; ++level) 1510 | { 1511 | ISN *node = header_nodes[level]; 1512 | while (node != NULL) 1513 | { 1514 | ++level_nodes[level]; 1515 | node = node->next[level]; 1516 | } 1517 | } 1518 | 1519 | for (int level = MAX_L - 1; level >= 0; --level) 1520 | { 1521 | printf("Level: %2d has %zu nodes\n", level + 1, level_nodes[level]); 1522 | } 1523 | } 1524 | /* 1525 | void print_mem_nvm_comsumption(PHAST *list) { 1526 | uint64_t in_num = 0, hd_num = 0, lb_num = 0; 1527 | uint64_t agg_size = 0, mem_size = 0, nvmm_size = 0; 1528 | 1529 | #ifdef USE_AGG_KEYS 1530 | agg_size = sizeof(int) * HEAD_COUNT * 2; 1531 | for (int i = 0; i < HEAD_COUNT; ++i) { 1532 | agg_size += sizeof(uint64_t) * list->inner_list->head[i]->agg_index->Cap(); 1533 | agg_size += sizeof(ISN*) * list->inner_list->head[i]->agg_index->Cap(); 1534 | } 1535 | #endif 1536 | 1537 | #ifdef PERF_PROFILING_M 1538 | hist_set->Clear(ELE_IN_LB); 1539 | hist_set->Clear(ELE_IN_LN); 1540 | #endif 1541 | 1542 | ISN *node = list->inner_list->head[0]; 1543 | while (node != NULL) { 1544 | ISN *tmp = node; 1545 | node = node->next[0]; 1546 | if (tmp->is_head) { 1547 | ++hd_num; 1548 | continue; 1549 | } 1550 | 1551 | ++in_num; 1552 | if (tmp->leaf_node != NULL) { 1553 | ++lb_num; 1554 | } 1555 | 1556 | #ifdef PERF_PROFILING_M 1557 | int lb_load = 0, ln_load = 0; 1558 | for (int i = 0; i < MAX_LEAF_CAPACITY / BITMAP_SIZE; ++i) { 1559 | lb_load += popcount1(tmp->leaf_node->bitmap_LN[i]); 1560 | } 1561 | for (int i = 0; i < MAX_LEAF_CAPACITY; ++i) { 1562 | ln_load = popcount1(tmp->leaf_node->leaves[i].commit_bitmap); 1563 | hist_set->Add(ELE_IN_LN, ln_load); 1564 | } 1565 | hist_set->Add(ELE_IN_LB, lb_load); 1566 | #endif 1567 | } 1568 | 1569 | #ifdef PERF_PROFILING_M 1570 | hist_set->PrintResult(ELE_IN_LB); 1571 | hist_set->PrintResult(ELE_IN_LN); 1572 | hist_set->Clear(ELE_IN_LB); 1573 | hist_set->Clear(ELE_IN_LN); 1574 | #endif 1575 | 1576 | mem_size += agg_size; 1577 | mem_size += (hd_num + in_num) * sizeof(ISN); 1578 | nvmm_size += lb_num * sizeof(LSN); 1579 | 1580 | fprintf(stderr, "Memory consumption: %lu bytes\n", mem_size); 1581 | fprintf(stderr, "NVMM consumption: %lu bytes\n", nvmm_size); 1582 | } 1583 | */ 1584 | --------------------------------------------------------------------------------