├── .gitignore ├── util ├── index_arena.cpp ├── debug_helper.h ├── timer.h ├── thread_status.h ├── util.h ├── persist.h ├── var_key.h └── lock.h ├── .gitmodules ├── example ├── CMakeLists.txt └── example.cpp ├── scripts ├── clean_pmem_dir.sh ├── show_result.sh ├── eval_recovery.sh ├── eval_utilization.sh ├── eval_threads.sh ├── eval_breakdown.sh ├── kick_the_tires.sh ├── eval_value_size.sh ├── eval_key_space.sh ├── eval_write_ratio.sh ├── eval_case.sh ├── eval_ycsb.sh └── eval_etc.sh ├── db ├── index │ ├── masstree │ │ ├── masstree_wrapper.cc │ │ ├── string_slice.cc │ │ ├── misc.cc │ │ ├── kvstats.hh │ │ ├── timestamp.hh │ │ ├── mtcounters.hh │ │ ├── compiler.cc │ │ ├── str.cc │ │ ├── memdebug.cc │ │ ├── misc.hh │ │ ├── masstree.hh │ │ ├── hashcode.hh │ │ ├── btree_leaflink.hh │ │ ├── circular_int.hh │ │ ├── memdebug.hh │ │ ├── str.hh │ │ ├── masstree_get.hh │ │ └── masstree_wrapper.h │ └── CCEH │ │ ├── pair.h │ │ └── CCEH.h ├── index_cceh.h ├── index_fastfair.h ├── hotkeyset.h ├── index_masstree.h ├── log_structured.h └── hotkeyset.cpp ├── benchmarks ├── bench_config.h.in ├── CMakeLists.txt ├── other │ ├── viper │ │ ├── hotkeyset.h │ │ ├── hash.hpp │ │ └── hotkeyset.cpp │ ├── ChameleonDB │ │ ├── hotkeyset.h │ │ ├── log.h │ │ ├── log_gc.h │ │ ├── log_gc.cpp │ │ ├── hotkeyset.cpp │ │ ├── log.cpp │ │ └── segment.h │ ├── pmemkv_bench.cpp │ ├── CMakeLists.txt │ ├── chameleondb_bench.cpp │ └── pmem_rocksdb_bench.cpp ├── murmur_hash2.h ├── histogram.h ├── trace.h ├── benchmark.cpp └── histogram.cpp ├── include ├── config.h.in ├── slice.h ├── db.h └── db_common.h ├── CMakeLists.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | build/ 3 | results/ 4 | *.json 5 | -------------------------------------------------------------------------------- /util/index_arena.cpp: -------------------------------------------------------------------------------- 1 | #include "index_arena.h" 2 | 3 | IndexAllocator *g_index_allocator = nullptr; 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "benchmarks/other/pmem-rocksdb"] 2 | path = benchmarks/other/pmem-rocksdb 3 | url = https://github.com/starkwj/pmem-rocksdb.git 4 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # example 2 | add_executable(example ${DB_FILES} example.cpp) 3 | target_link_libraries(example ${PMEM} ${PMEMOBJ}) 4 | if (INDEX_TYPE EQUAL 1) 5 | target_link_libraries(example cceh) 6 | elseif (INDEX_TYPE EQUAL 2) 7 | target_link_libraries(example fastfair) 8 | elseif (INDEX_TYPE EQUAL 3) 9 | target_link_libraries(example masstree) 10 | endif() 11 | -------------------------------------------------------------------------------- /scripts/clean_pmem_dir.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | PMEM_DIR="/mnt/pmem0" 4 | 5 | DB_PATH=( 6 | "${PMEM_DIR}"/log_kvs 7 | "${PMEM_DIR}"/viper 8 | "${PMEM_DIR}"/chameleondb 9 | "${PMEM_DIR}"/pmem_rocksdb 10 | "${PMEM_DIR}"/pmemkv_pool 11 | ) 12 | 13 | for path in "${DB_PATH[@]}"; do 14 | if [[ -e ${path} ]]; then 15 | echo "remove ${path}" 16 | rm -rf ${path} 17 | fi 18 | done 19 | -------------------------------------------------------------------------------- /db/index/masstree/masstree_wrapper.cc: -------------------------------------------------------------------------------- 1 | #include "masstree_wrapper.h" 2 | 3 | __thread typename MasstreeWrapper::table_params::threadinfo_type* 4 | MasstreeWrapper::ti = nullptr; 5 | bool MasstreeWrapper::stopping = false; 6 | uint32_t MasstreeWrapper::printing = 0; 7 | kvtimestamp_t initial_timestamp; 8 | 9 | volatile mrcu_epoch_type active_epoch = 1; 10 | volatile uint64_t globalepoch = 1; 11 | volatile bool recovering = false; 12 | -------------------------------------------------------------------------------- /db/index/CCEH/pair.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "db_common.h" 6 | 7 | typedef uint64_t Key_t; 8 | typedef uint64_t Value_t; 9 | 10 | const Key_t SENTINEL = -2; // 11111...110 11 | const Key_t INVALID = -1; // 11111...111 12 | 13 | const Value_t NONE = 0x0; 14 | 15 | struct Pair { 16 | Key_t key; 17 | Value_t value; 18 | 19 | Pair(void) : key{INVALID} {} 20 | 21 | Pair(Key_t _key, Value_t _value) : key{_key}, value{_value} {} 22 | 23 | Pair &operator=(const Pair &other) { 24 | key = other.key; 25 | value = other.value; 26 | return *this; 27 | } 28 | 29 | void *operator new(size_t size) = delete; 30 | 31 | void *operator new[](size_t size) = delete; 32 | }; 33 | -------------------------------------------------------------------------------- /example/example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "config.h" 6 | #include "db.h" 7 | 8 | 9 | int main() { 10 | size_t log_size = 1ul << 30; 11 | int num_workers = 1; 12 | int num_cleaners = 1; 13 | std::string db_path = std::string(PMEM_DIR) + "log_kvs"; 14 | DB *db = new DB(db_path, log_size, num_workers, num_cleaners); 15 | std::unique_ptr worker = db->GetWorker(); 16 | 17 | uint64_t key = 0x1234; 18 | std::string value = "hello world"; 19 | worker->Put(Slice((const char *)&key, sizeof(uint64_t)), Slice(value)); 20 | 21 | std::string val; 22 | worker->Get(Slice((const char *)&key, sizeof(uint64_t)), &val); 23 | std::cout << "value: " << val << std::endl; 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /scripts/show_result.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # extract data from benchmark_out 3 | 4 | if [ -n "$1" ]; then 5 | if ! test -e $1; then 6 | echo "file does not exist" 7 | exit 8 | fi 9 | else 10 | echo "missing argument" 11 | exit 12 | fi 13 | 14 | echo "Throughput:" 15 | grep "items_per" $1 | awk -F"[ ,]+" '{print $3}' 16 | 17 | echo "Get_P99:" 18 | grep "Get_P99" $1 | awk -F"[ ,]+" '{print $3}' 19 | 20 | echo "Put_P99:" 21 | grep "Put_P99" $1 | awk -F"[ ,]+" '{print $3}' 22 | 23 | echo "Get_P50:" 24 | grep "Get_P50" $1 | awk -F"[ ,]+" '{print $3}' 25 | 26 | echo "Put_P50:" 27 | grep "Put_P50" $1 | awk -F"[ ,]+" '{print $3}' 28 | 29 | echo "Compaction Bandwidth:" 30 | grep "CompactionThroughput" $1 | awk -F"[ ,]+" '{print $3}' 31 | 32 | echo "CPU Usage:" 33 | grep "CPUUsage" $1 | awk -F"[ ,]+" '{print $3}' 34 | -------------------------------------------------------------------------------- /util/debug_helper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // #define LOGGING 7 | 8 | #ifdef LOGGING 9 | #define LOG(fmt, ...) \ 10 | fprintf(stderr, "\033[1;31mLOG(<%s>:%d %s): \033[0m" fmt "\n", __FILE__, \ 11 | __LINE__, __func__, ##__VA_ARGS__) 12 | #else 13 | #define LOG(fmt, ...) 14 | #endif 15 | 16 | #define ERROR_EXIT(fmt, ...) \ 17 | do { \ 18 | fprintf(stderr, "\033[1;31mError(<%s>:%d %s): \033[0m" fmt "\n", __FILE__, \ 19 | __LINE__, __func__, ##__VA_ARGS__); \ 20 | abort(); \ 21 | } while (0) 22 | -------------------------------------------------------------------------------- /db/index/masstree/string_slice.cc: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #include "string_slice.hh" 17 | -------------------------------------------------------------------------------- /benchmarks/bench_config.h.in: -------------------------------------------------------------------------------- 1 | /* bench_config.h is generated from bench_config.h.in by CMake */ 2 | 3 | #pragma once 4 | 5 | #include "benchmarks/trace.h" 6 | #include 7 | 8 | static constexpr size_t NUM_KEYS = @NUM_KEYS@; 9 | static constexpr size_t NUM_OPS_PER_THREAD = @NUM_OPS_PER_THREAD@; 10 | static constexpr size_t NUM_WARMUP_OPS_PER_THREAD = @NUM_WARMUP_OPS_PER_THREAD@; 11 | static constexpr size_t MAX_SCAN_LENGTH = 100; 12 | static constexpr size_t VALUE_SIZE = @VALUE_SIZE@; 13 | static constexpr int NUM_GC_THREADS = @NUM_GC_THREADS@; 14 | enum BenchmarkWorkload { YCSB, ETC }; 15 | static constexpr BenchmarkWorkload benchmark_workload = @WORKLOAD_TYPE@; 16 | static constexpr YCSB_Type ycsb_type = @YCSB_TYPE@; 17 | static constexpr bool skew = @SKEW@; 18 | static constexpr double ZIPF_THETA = 0.99; 19 | 20 | 21 | #cmakedefine MEASURE_LATENCY 22 | #cmakedefine USE_ALL_CORES 23 | #cmakedefine TEST_LOAD 24 | 25 | static constexpr int NUM_ALL_CORES = 36; 26 | -------------------------------------------------------------------------------- /include/config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h is generated from config.h.in by CMake */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | static constexpr char PMEM_DIR[] = "/mnt/pmem@USE_NUMA_NODE@/"; 8 | static constexpr size_t IDX_POOL_SIZE = 40ul << 30; 9 | 10 | static constexpr int USE_NUMA_NODE = @USE_NUMA_NODE@; 11 | // CPU cores information 12 | static constexpr int CORES_PER_SOCKET = 18; 13 | static constexpr int NUM_SOCKETS = 4; 14 | static constexpr int NUMA_SPAN = CORES_PER_SOCKET * NUM_SOCKETS; 15 | static constexpr int CPU_BIND_BEGIN_CORE = USE_NUMA_NODE * CORES_PER_SOCKET; 16 | 17 | #define CACHE_LINE_SIZE 64 18 | 19 | #cmakedefine IDX_PERSISTENT 20 | #cmakedefine LOG_PERSISTENT 21 | #cmakedefine USE_PMDK 22 | 23 | #define INDEX_TYPE @INDEX_TYPE@ 24 | 25 | #ifdef LOG_PERSISTENT 26 | #cmakedefine LOG_BATCHING // simulate FlatStore's batching 27 | static constexpr size_t LOG_BATCHING_SIZE = 512; 28 | #endif 29 | 30 | #cmakedefine GC_SHORTCUT 31 | #cmakedefine BATCH_COMPACTION 32 | #cmakedefine REDUCE_PM_ACCESS 33 | #cmakedefine HOT_COLD_SEPARATE 34 | 35 | #ifdef GC_SHORTCUT 36 | #define PREFETCH_ENTRY 37 | #endif 38 | 39 | #ifdef BATCH_COMPACTION 40 | #define BATCH_FLUSH_INDEX_ENTRY 41 | #endif 42 | 43 | #if defined(REDUCE_PM_ACCESS) || !defined(LOG_PERSISTENT) 44 | #define WRITE_TOMBSTONE 45 | #endif 46 | -------------------------------------------------------------------------------- /db/index_cceh.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db.h" 4 | #include "index/CCEH/CCEH.h" 5 | 6 | class CCEHIndex : public Index { 7 | public: 8 | CCEHIndex() { table_ = new CCEH_NAMESPACE::CCEH(128 * 1024); } 9 | 10 | virtual ~CCEHIndex() override { delete table_; } 11 | 12 | virtual ValueType Get(const Slice &key) override { 13 | return table_->Get(*(KeyType *)key.data()); 14 | } 15 | 16 | virtual void Put(const Slice &key, LogEntryHelper &le_helper) override { 17 | table_->Insert(*(KeyType *)key.data(), le_helper); 18 | } 19 | 20 | virtual void GCMove(const Slice &key, LogEntryHelper &le_helper) override { 21 | #ifdef GC_SHORTCUT 22 | if (le_helper.shortcut.None() || 23 | !table_->TryGCUpdate(*(KeyType *)key.data(), le_helper)) { 24 | table_->Insert(*(KeyType *)key.data(), le_helper); 25 | } 26 | #else 27 | table_->Insert(*(KeyType *)key.data(), le_helper); 28 | #endif 29 | } 30 | 31 | virtual void Delete(const Slice &key) override { 32 | // TODO 33 | } 34 | 35 | virtual void PrefetchEntry(const Shortcut &sc) override { 36 | CCEH_NAMESPACE::Segment *s = (CCEH_NAMESPACE::Segment *)sc.GetNodeAddr(); 37 | __builtin_prefetch(&s->sema); 38 | } 39 | 40 | private: 41 | CCEH_NAMESPACE::CCEH *table_; 42 | 43 | DISALLOW_COPY_AND_ASSIGN(CCEHIndex); 44 | }; 45 | -------------------------------------------------------------------------------- /util/timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | static inline uint64_t NowMicros() { 7 | static constexpr uint64_t kUsecondsPerSecond = 1000000; 8 | struct timeval tv; 9 | gettimeofday(&tv, nullptr); 10 | return static_cast(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec; 11 | } 12 | 13 | #define TIMER_START(x) \ 14 | const auto timer_##x = std::chrono::steady_clock::now() 15 | 16 | #define TIMER_STOP(x) \ 17 | x += std::chrono::duration_cast( \ 18 | std::chrono::steady_clock::now() - timer_##x) \ 19 | .count() 20 | 21 | template 22 | struct Timer { 23 | Timer(T &res) : start_time_(std::chrono::steady_clock::now()), res_(res) {} 24 | 25 | ~Timer() { 26 | res_ += std::chrono::duration_cast( 27 | std::chrono::steady_clock::now() - start_time_) 28 | .count(); 29 | } 30 | 31 | std::chrono::steady_clock::time_point start_time_; 32 | T &res_; 33 | }; 34 | 35 | #ifdef LOGGING 36 | #define TIMER_START_LOGGING(x) TIMER_START(x) 37 | #define TIMER_STOP_LOGGING(x) TIMER_STOP(x) 38 | #define COUNTER_ADD_LOGGING(x, y) x += (y) 39 | #else 40 | #define TIMER_START_LOGGING(x) 41 | #define TIMER_STOP_LOGGING(x) 42 | #define COUNTER_ADD_LOGGING(x, y) 43 | #endif 44 | -------------------------------------------------------------------------------- /benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # pacman_bench 2 | find_package(benchmark) 3 | if (NOT benchmark_FOUND) 4 | include(FetchContent) 5 | FetchContent_Declare( 6 | benchmark 7 | GIT_REPOSITORY https://github.com/google/benchmark.git 8 | GIT_TAG f91b6b42b1b9854772a90ae9501464a161707d1e # v1.6.0 9 | ) 10 | set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE) 11 | FetchContent_MakeAvailable(benchmark) 12 | endif() 13 | 14 | set(BENCHMARK_FILES 15 | ${PROJECT_SOURCE_DIR}/benchmarks/benchmark.cpp 16 | ${PROJECT_SOURCE_DIR}/benchmarks/histogram.cpp 17 | ) 18 | 19 | add_executable(pacman_bench ${BENCHMARK_FILES} ${DB_FILES}) 20 | target_link_libraries(pacman_bench benchmark::benchmark ${PMEM} ${PMEMOBJ} jemalloc) 21 | if (INDEX_TYPE EQUAL 1) 22 | target_link_libraries(pacman_bench cceh) 23 | elseif (INDEX_TYPE EQUAL 2) 24 | target_link_libraries(pacman_bench fastfair) 25 | elseif (INDEX_TYPE EQUAL 3) 26 | target_link_libraries(pacman_bench masstree) 27 | endif() 28 | 29 | # recovery_test 30 | add_executable(recovery_test 31 | ${PROJECT_SOURCE_DIR}/benchmarks/recovery_test.cpp ${DB_FILES}) 32 | target_link_libraries(recovery_test ${PMEM} ${PMEMOBJ}) 33 | if (INDEX_TYPE EQUAL 1) 34 | target_link_libraries(recovery_test cceh) 35 | elseif (INDEX_TYPE EQUAL 2) 36 | target_link_libraries(recovery_test fastfair) 37 | elseif (INDEX_TYPE EQUAL 3) 38 | target_link_libraries(recovery_test masstree) 39 | endif() 40 | 41 | if (EVAL_OTHER_SYSTEMS) 42 | add_subdirectory(other) 43 | endif() 44 | -------------------------------------------------------------------------------- /db/index_fastfair.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db.h" 4 | #include "index/FAST_FAIR/ff_btree.h" 5 | 6 | class FastFairIndex : public Index { 7 | public: 8 | FastFairIndex() { bt_ = new btree(); } 9 | 10 | virtual ~FastFairIndex() override { delete bt_; } 11 | 12 | virtual ValueType Get(const Slice &key) override { 13 | return (ValueType)bt_->btree_search(*(KeyType *)key.data()); 14 | } 15 | 16 | virtual void Put(const Slice &key, LogEntryHelper &le_helper) override { 17 | bt_->btree_insert(*(KeyType *)key.data(), le_helper); 18 | } 19 | 20 | virtual void GCMove(const Slice &key, LogEntryHelper &le_helper) override { 21 | #ifdef GC_SHORTCUT 22 | if (le_helper.shortcut.None() || 23 | !bt_->btree_try_update(*(KeyType *)key.data(), le_helper)) { 24 | bt_->btree_insert(*(KeyType *)key.data(), le_helper); 25 | } 26 | #else 27 | bt_->btree_insert(*(KeyType *)key.data(), le_helper); 28 | #endif 29 | } 30 | 31 | virtual void Delete(const Slice &key) override { 32 | // TODO 33 | } 34 | 35 | virtual void Scan(const Slice &key, int cnt, 36 | std::vector &vec) override { 37 | bt_->btree_search_range(*(KeyType *)key.data(), cnt, vec); 38 | } 39 | 40 | virtual void PrefetchEntry(const Shortcut &sc) override { 41 | page *p = (page *)sc.GetNodeAddr(); 42 | entry *entry_addr = &p->records[sc.GetPos()]; 43 | __builtin_prefetch(p); 44 | __builtin_prefetch(entry_addr); 45 | } 46 | 47 | private: 48 | btree *bt_; 49 | 50 | DISALLOW_COPY_AND_ASSIGN(FastFairIndex); 51 | }; 52 | -------------------------------------------------------------------------------- /db/hotkeyset.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "slice.h" 11 | #include "config.h" 12 | #include "util/lock.h" 13 | 14 | static constexpr size_t HOT_NUM = 128 * 1024; 15 | static constexpr int RECORD_BATCH_CNT = 4096; 16 | static constexpr size_t RECORD_BUFFER_SIZE = 16 * 1024; 17 | 18 | struct RecordEntry { 19 | uint64_t key; 20 | int64_t cnt; 21 | 22 | bool operator>(const RecordEntry &other) const { 23 | return cnt > other.cnt; 24 | } 25 | }; 26 | 27 | struct alignas(CACHE_LINE_SIZE) UpdateKeyRecord { 28 | int hit_cnt = 0; 29 | int total_cnt = 0; 30 | SpinLock lock; 31 | std::list > records_list; 32 | std::vector records; 33 | 34 | UpdateKeyRecord() : lock("") { 35 | records.reserve(RECORD_BUFFER_SIZE); 36 | } 37 | }; 38 | 39 | class DB; 40 | class HotKeySet { 41 | public: 42 | explicit HotKeySet(DB *db); 43 | ~HotKeySet(); 44 | 45 | void Record(const Slice &key, int worker_id, bool hit); 46 | void BeginUpdateHotKeySet(); 47 | bool Exist(const Slice &key); 48 | 49 | private: 50 | DB *db_; 51 | std::unordered_set *current_set_; 52 | std::unique_ptr update_record_; 53 | std::thread update_hot_set_thread_; 54 | std::atomic_flag update_schedule_flag_{ATOMIC_FLAG_INIT}; 55 | volatile bool need_record_ = false; 56 | volatile bool need_count_hit_ = true; 57 | std::atomic_bool stop_flag_{false}; 58 | 59 | void UpdateHotSet(); 60 | }; 61 | -------------------------------------------------------------------------------- /benchmarks/other/viper/hotkeyset.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "slice.h" 11 | #include "config.h" 12 | #include "util/lock.h" 13 | 14 | static constexpr size_t HOT_NUM = 128 * 1024; 15 | static constexpr int RECORD_BATCH_CNT = 4096; 16 | static constexpr size_t RECORD_BUFFER_SIZE = 16 * 1024; 17 | 18 | struct RecordEntry { 19 | uint64_t key; 20 | int64_t cnt; 21 | 22 | bool operator>(const RecordEntry &other) const { 23 | return cnt > other.cnt; 24 | } 25 | }; 26 | 27 | struct alignas(CACHE_LINE_SIZE) UpdateKeyRecord { 28 | int hit_cnt = 0; 29 | int total_cnt = 0; 30 | SpinLock lock; 31 | std::list > records_list; 32 | std::vector records; 33 | 34 | UpdateKeyRecord() : lock("") { 35 | records.reserve(RECORD_BUFFER_SIZE); 36 | } 37 | }; 38 | 39 | class HotKeySet { 40 | public: 41 | explicit HotKeySet(int num_workers); 42 | ~HotKeySet(); 43 | 44 | void Record(const Slice &key, int worker_id, bool hit); 45 | void BeginUpdateHotKeySet(); 46 | bool Exist(const Slice &key); 47 | 48 | private: 49 | int num_workers_; 50 | std::unordered_set *current_set_; 51 | std::unique_ptr update_record_; 52 | std::thread update_hot_set_thread_; 53 | std::atomic_flag update_schedule_flag_{ATOMIC_FLAG_INIT}; 54 | volatile bool need_record_ = false; 55 | volatile bool need_count_hit_ = true; 56 | std::atomic_bool stop_flag_{false}; 57 | 58 | void UpdateHotSet(); 59 | }; 60 | -------------------------------------------------------------------------------- /util/thread_status.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // epoch & rcu 6 | class ThreadStatus { 7 | public: 8 | struct alignas(64) Status { 9 | volatile uint64_t epoch; 10 | volatile bool running; 11 | volatile bool safe_waiting; 12 | }; 13 | 14 | ThreadStatus(int num_threads) : num_threads(num_threads) { 15 | status_set = std::make_unique(num_threads); 16 | for (int i = 0; i < num_threads; i++) { 17 | status_set[i].epoch = 0; 18 | status_set[i].running = false; 19 | status_set[i].safe_waiting = false; 20 | } 21 | } 22 | 23 | void rcu_progress(int worker_id) { 24 | status_set[worker_id].running = true; 25 | ++status_set[worker_id].epoch; 26 | // mfence(); 27 | } 28 | 29 | // void rcu_safe_wait(int worker_id) { 30 | // status_set[worker_id].safe_waiting = true; 31 | // } 32 | 33 | // void rcu_continue(int worker_id) { 34 | // status_set[worker_id].safe_waiting = false; 35 | // } 36 | 37 | void rcu_exit(int worker_id) { 38 | status_set[worker_id].running = false; 39 | } 40 | 41 | void rcu_barrier() { 42 | uint64_t prev_status[num_threads]; 43 | for (int i = 0; i < num_threads; i++) { 44 | prev_status[i] = status_set[i].epoch; 45 | } 46 | for (int i = 0; i < num_threads; i++) { 47 | while (status_set[i].running && prev_status[i] == status_set[i].epoch) 48 | ; 49 | } 50 | } 51 | 52 | uint64_t get_epoch(int worker_id) { 53 | return status_set[worker_id].epoch; 54 | } 55 | 56 | private: 57 | int num_threads; 58 | std::unique_ptr status_set; 59 | }; 60 | -------------------------------------------------------------------------------- /db/index_masstree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db.h" 4 | #include "index/masstree/masstree_wrapper.h" 5 | 6 | class MasstreeIndex : public Index { 7 | public: 8 | MasstreeIndex() { mt_ = new MasstreeWrapper(); } 9 | 10 | virtual ~MasstreeIndex() override { delete mt_; } 11 | 12 | void MasstreeThreadInit(int thread_id) { mt_->thread_init(thread_id); } 13 | 14 | virtual ValueType Get(const Slice &key) override { 15 | ValueType val; 16 | bool found = mt_->search(*(KeyType *)key.data(), val); 17 | if (found) { 18 | return val; 19 | } else { 20 | return INVALID_VALUE; 21 | } 22 | } 23 | 24 | virtual void Put(const Slice &key, LogEntryHelper &le_helper) override { 25 | mt_->insert(*(KeyType *)key.data(), le_helper); 26 | } 27 | 28 | virtual void GCMove(const Slice &key, LogEntryHelper &le_helper) override { 29 | #ifdef GC_SHORTCUT 30 | if (le_helper.shortcut.None()) { 31 | mt_->gc_insert(*(KeyType *)key.data(), le_helper); 32 | } else { 33 | mt_->gc_insert_with_shortcut(*(KeyType *)key.data(), le_helper); 34 | } 35 | #else 36 | mt_->gc_insert(*(KeyType *)key.data(), le_helper); 37 | #endif 38 | } 39 | 40 | virtual void Delete(const Slice &key) override { 41 | // TODO 42 | } 43 | 44 | virtual void Scan(const Slice &key, int cnt, 45 | std::vector &vec) override { 46 | mt_->scan(*(KeyType *)key.data(), cnt, vec); 47 | } 48 | 49 | // virtual void PrefetchEntry(const Shortcut &sc) override {} 50 | 51 | private: 52 | MasstreeWrapper *mt_; 53 | 54 | DISALLOW_COPY_AND_ASSIGN(MasstreeIndex); 55 | }; 56 | -------------------------------------------------------------------------------- /util/util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "util/debug_helper.h" 4 | // #include "config.h" 5 | #include 6 | // #include 7 | // #include 8 | // #include 9 | 10 | static __attribute__((always_inline)) inline void compiler_barrier() { 11 | asm volatile("" ::: "memory"); 12 | } 13 | 14 | #define likely(x) __builtin_expect(!!(x), 1) 15 | #define unlikely(x) __builtin_expect(!!(x), 0) 16 | 17 | // A macro to disallow the copy constructor and operator= functions 18 | #ifndef DISALLOW_COPY_AND_ASSIGN 19 | #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ 20 | TypeName(const TypeName &) = delete; \ 21 | TypeName &operator=(const TypeName &) = delete; 22 | #endif 23 | 24 | 25 | // // bind core 26 | // static inline void bind_core(uint16_t core_id) { 27 | // cpu_set_t cpuset; 28 | // CPU_ZERO(&cpuset); 29 | // CPU_SET(core_id, &cpuset); 30 | // int rc = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); 31 | // if (rc != 0) { 32 | // ERROR_EXIT("can't bind core %d!", core_id); 33 | // } 34 | // } 35 | 36 | // // bind core on the same numa 37 | // static inline void bind_core_on_numa(uint16_t core_seq) { 38 | // uint16_t core_id; 39 | // if (core_seq < CORES_PER_SOCKET) { 40 | // core_id = CPU_BIND_BEGIN_CORE + core_seq; 41 | // } else { 42 | // if (core_seq > CORES_PER_SOCKET * 2) { 43 | // ERROR_EXIT("core seq %d is out of range", core_seq); 44 | // } 45 | // core_id = CPU_BIND_BEGIN_CORE + NUMA_SPAN + core_seq - CORES_PER_SOCKET; 46 | // } 47 | // bind_core(core_id); 48 | // } 49 | -------------------------------------------------------------------------------- /benchmarks/murmur_hash2.h: -------------------------------------------------------------------------------- 1 | #if !defined(_MURMUR_HASH2_H_) 2 | #define _MURMUR_HASH2_H_ 3 | 4 | #include 5 | #include 6 | 7 | #define BIG_CONSTANT(x) (x##LLU) 8 | 9 | /* MURMUR_PLATFORM_H */ 10 | 11 | /*----------------------------------------------------------------------------- 12 | // MurmurHash2, 64-bit versions, by Austin Appleby 13 | // 14 | // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 15 | // and endian-ness issues if used across multiple platforms. 16 | // 17 | // 64-bit hash for 64-bit platforms 18 | */ 19 | 20 | inline uint64_t 21 | MurmurHash64A(const void *key, int len, uint64_t seed = 931901) 22 | { 23 | const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); 24 | const int r = 47; 25 | 26 | uint64_t h = seed ^ (len * m); 27 | 28 | const uint64_t *data = (const uint64_t *)key; 29 | const uint64_t *end = data + (len / 8); 30 | 31 | while (data != end) { 32 | uint64_t k = *data++; 33 | 34 | k *= m; 35 | k ^= k >> r; 36 | k *= m; 37 | 38 | h ^= k; 39 | h *= m; 40 | } 41 | 42 | const unsigned char *data2 = (const unsigned char *)data; 43 | 44 | switch (len & 7) { 45 | case 7: 46 | h ^= ((uint64_t)data2[6]) << 48; 47 | case 6: 48 | h ^= ((uint64_t)data2[5]) << 40; 49 | case 5: 50 | h ^= ((uint64_t)data2[4]) << 32; 51 | case 4: 52 | h ^= ((uint64_t)data2[3]) << 24; 53 | case 3: 54 | h ^= ((uint64_t)data2[2]) << 16; 55 | case 2: 56 | h ^= ((uint64_t)data2[1]) << 8; 57 | case 1: 58 | h ^= ((uint64_t)data2[0]); 59 | h *= m; 60 | }; 61 | 62 | h ^= h >> r; 63 | h *= m; 64 | h ^= h >> r; 65 | 66 | return h; 67 | } 68 | 69 | #endif // _MURMUR_HASH2_H_ 70 | -------------------------------------------------------------------------------- /benchmarks/other/ChameleonDB/hotkeyset.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "slice.h" 11 | #include "config.h" 12 | #include "util/lock.h" 13 | 14 | namespace CHAMELEONDB_NAMESPACE { 15 | 16 | static constexpr uint64_t HOT_NUM = 128 * 1024; 17 | static constexpr int RECORD_BATCH_CNT = 4096; 18 | static constexpr uint32_t RECORD_BUFFER_SIZE = 16 * 1024; 19 | 20 | struct RecordEntry { 21 | uint64_t key; 22 | int64_t cnt; 23 | 24 | bool operator>(const RecordEntry &other) const { 25 | return cnt > other.cnt; 26 | } 27 | }; 28 | 29 | struct alignas(CACHE_LINE_SIZE) UpdateKeyRecord { 30 | int hit_cnt = 0; 31 | int total_cnt = 0; 32 | SpinLock lock; 33 | std::list > records_list; 34 | std::vector records; 35 | 36 | UpdateKeyRecord() : lock("") { 37 | records.reserve(RECORD_BUFFER_SIZE); 38 | } 39 | }; 40 | 41 | class ChameleonDB; 42 | class HotKeySet { 43 | public: 44 | explicit HotKeySet(ChameleonDB *db); 45 | ~HotKeySet(); 46 | 47 | void Record(const Slice &key, int worker_id, bool hit); 48 | void BeginUpdateHotKeySet(); 49 | bool Exist(const Slice &key); 50 | 51 | private: 52 | ChameleonDB *db_; 53 | std::unordered_set *current_set_; 54 | std::unique_ptr update_record_; 55 | std::thread update_hot_set_thread_; 56 | std::atomic_flag update_schedule_flag_{ATOMIC_FLAG_INIT}; 57 | volatile bool need_record_ = false; 58 | volatile bool need_count_hit_ = true; 59 | std::atomic_bool stop_flag_{false}; 60 | 61 | void UpdateHotSet(); 62 | }; 63 | 64 | } // namespace CHAMELEONDB_NAMESPACE 65 | -------------------------------------------------------------------------------- /db/index/masstree/misc.cc: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #include "misc.hh" 17 | #include 18 | #include "kvthread.hh" 19 | 20 | int clp_parse_suffixdouble(Clp_Parser *clp, const char *vstr, 21 | int complain, void *) 22 | { 23 | const char *post; 24 | if (*vstr == 0 || isspace((unsigned char) *vstr)) 25 | post = vstr; 26 | else 27 | clp->val.d = strtod(vstr, (char **) &post); 28 | if (vstr != post && (*post == 'K' || *post == 'k')) 29 | clp->val.d *= 1000, ++post; 30 | else if (vstr != post && (*post == 'M' || *post == 'm')) 31 | clp->val.d *= 1000000, ++post; 32 | else if (vstr != post && (*post == 'B' || *post == 'b' || *post == 'G' || *post == 'g')) 33 | clp->val.d *= 1000000000, ++post; 34 | if (*vstr != 0 && *post == 0) 35 | return 1; 36 | else if (complain) 37 | return Clp_OptionError(clp, "%<%O%> expects a real number, not %<%s%>", vstr); 38 | else 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /benchmarks/other/ChameleonDB/log.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "segment.h" 4 | #include "util/util.h" 5 | 6 | 7 | namespace CHAMELEONDB_NAMESPACE { 8 | 9 | class ChameleonDB; 10 | class LogGCer; 11 | 12 | enum FreeStatus { FS_Sufficient, FS_Trigger, FS_Insufficient }; 13 | 14 | class Log { 15 | public: 16 | explicit Log(std::string db_path, size_t log_size, ChameleonDB *db, 17 | int num_workers, int num_cleaners); 18 | ~Log(); 19 | 20 | LogSegment *NewSegment(bool hot); 21 | void FreezeSegment(LogSegment *old_segment); 22 | LogSegment *GetSegment(int segment_id); 23 | int GetSegmentID(const char *addr); 24 | int GetSegmentCleanerID(const char *addr); 25 | 26 | private: 27 | const int num_workers_; 28 | const int num_cleaners_; 29 | char *pool_start_; 30 | const size_t total_log_size_; 31 | const int num_segments_; 32 | std::atomic stop_flag_{false}; 33 | SpinLock free_list_lock_; 34 | 35 | std::vector all_segments_; 36 | std::queue free_segments_; 37 | std::vector log_cleaners_; 38 | 39 | std::atomic num_free_segments_{0}; 40 | std::atomic alloc_counter_{0}; 41 | const int num_limit_free_segments_; 42 | 43 | volatile FreeStatus free_status_ = FS_Sufficient; 44 | std::atomic_flag FS_flag_{ATOMIC_FLAG_INIT}; 45 | 46 | // statistics 47 | #ifdef LOGGING 48 | std::atomic num_new_segment_{0}; 49 | std::atomic num_new_hot_{0}; 50 | std::atomic num_new_cold_{0}; 51 | #endif 52 | 53 | void AddClosedSegment(LogSegment *segment); 54 | void LockFreeList() { free_list_lock_.lock(); } 55 | void UnlockFreeList() { free_list_lock_.unlock(); } 56 | 57 | friend class LogGCer; 58 | DISALLOW_COPY_AND_ASSIGN(Log); 59 | }; 60 | 61 | } // namespace CHAMELEONDB_NAMESPACE 62 | -------------------------------------------------------------------------------- /scripts/eval_recovery.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M" 11 | } 12 | 13 | if [[ $# != 1 || $1 < 1 || $1 > 4 ]]; then 14 | help 15 | exit 16 | fi 17 | 18 | # to avoid no available space 19 | ./clean_pmem_dir.sh 20 | 21 | if [[ $1 == 1 || $1 == 2 ]]; then 22 | INDEX_TYPE=1 23 | elif [[ $1 == 3 ]]; then 24 | INDEX_TYPE=2 25 | elif [[ $1 == 4 ]]; then 26 | INDEX_TYPE=3 27 | fi 28 | 29 | if [[ $1 == 1 || $1 == 4 ]]; then 30 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 31 | else 32 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 33 | fi 34 | 35 | NUMA_AFFINITY=0 36 | 37 | NUM=200000000 38 | NUM_OPS=100000000 39 | INIT_UTIL=80 40 | VALUE_SIZE=256 41 | 42 | SERVICE_THREADS=24 # number of workload threads 43 | GC_THREADS=8 # number of compaction threads and recovery threads 44 | 45 | mkdir -p ../results 46 | mkdir -p ../build 47 | cd ../build 48 | 49 | # it may take long to get third-party dependencies, so don't delete _deps 50 | ls | grep -v _deps | xargs rm -rf 51 | # build 52 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=$NUMA_AFFINITY \ 53 | -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} -DPACMAN=ON .. 54 | 55 | make recovery_test -j 56 | 57 | # disable cpu scaling 58 | sudo cpupower frequency-set --governor performance > /dev/null 59 | # clean cache 60 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 61 | 62 | numactl --membind=$NUMA_AFFINITY --cpunodebind=$NUMA_AFFINITY \ 63 | ./benchmarks/recovery_test --num=$NUM --num_ops=$NUM_OPS --threads=$SERVICE_THREADS --gc_threads=$GC_THREADS --value_size=$VALUE_SIZE --init_util=$INIT_UTIL 64 | 65 | sudo cpupower frequency-set --governor powersave > /dev/null 66 | -------------------------------------------------------------------------------- /db/index/masstree/kvstats.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef KVSTATS_HH 17 | #define KVSTATS_HH 1 18 | #include 19 | 20 | struct kvstats { 21 | double min, max, sum, sumsq; 22 | long count; 23 | kvstats() 24 | : min(-1), max(-1), sum(0), sumsq(0), count(0) { 25 | } 26 | void add(double x) { 27 | if (!count || x < min) 28 | min = x; 29 | if (max < x) 30 | max = x; 31 | sum += x; 32 | sumsq += x * x; 33 | count += 1; 34 | } 35 | typedef void (kvstats::*unspecified_bool_type)(double); 36 | operator unspecified_bool_type() const { 37 | return count ? &kvstats::add : 0; 38 | } 39 | void print_report(const char *name) const { 40 | if (count) 41 | printf("%s: n %ld, total %.0f, average %.0f, min %.0f, max %.0f, stddev %.0f\n", 42 | name, count, sum, sum / count, min, max, 43 | sqrt((sumsq - sum * sum / count) / (count - 1))); 44 | } 45 | double avg() { 46 | if (count) 47 | return sum / count; 48 | else 49 | return 0; 50 | } 51 | }; 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /db/index/masstree/timestamp.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef TIMESTAMP_HH 17 | #define TIMESTAMP_HH 18 | #include "compiler.hh" 19 | #include 20 | #include 21 | #include 22 | 23 | #if HAVE_INT64_T_IS_LONG_LONG 24 | #define PRIuKVTS "llu" 25 | #else 26 | #define PRIuKVTS "lu" 27 | #endif 28 | #define PRIKVTSPARTS "%lu.%06lu" 29 | 30 | #define KVTS_HIGHPART(t) ((unsigned long) ((t) >> 32)) 31 | #define KVTS_LOWPART(t) ((unsigned long) (uint32_t) (t)) 32 | 33 | typedef uint64_t kvtimestamp_t; 34 | 35 | inline kvtimestamp_t timestamp() { 36 | struct timeval tv; 37 | gettimeofday(&tv, 0); 38 | return ((kvtimestamp_t) tv.tv_sec << 32) | (unsigned int)tv.tv_usec; 39 | } 40 | 41 | inline kvtimestamp_t timestamp_sub(kvtimestamp_t a, kvtimestamp_t b) { 42 | a -= b; 43 | if (KVTS_LOWPART(a) > 999999) 44 | a -= ((kvtimestamp_t) 1 << 32) - 1000000; 45 | return a; 46 | } 47 | 48 | extern kvtimestamp_t initial_timestamp; 49 | 50 | inline double now() { 51 | struct timeval tv; 52 | gettimeofday(&tv, 0); 53 | return tv.tv_sec + tv.tv_usec / 1000000.0; 54 | } 55 | 56 | inline struct timespec &set_timespec(struct timespec &x, double y) { 57 | double ipart = floor(y); 58 | x.tv_sec = (long) ipart; 59 | x.tv_nsec = (long) ((y - ipart) * 1e9); 60 | return x; 61 | } 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /benchmarks/other/viper/hash.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * This code was taken and modified from https://github.com/DICL/CCEH, the original authors of CCEH. 3 | * 4 | * Orignial License: 5 | * Copyright (c) 2018, Sungkyunkwan University. All rights reserved. 6 | * The license is a free non-exclusive, non-transferable license to reproduce, 7 | * use, modify and display the source code version of the Software, with or 8 | * without modifications solely for non-commercial research, educational or 9 | * evaluation purposes. The license does not entitle Licensee to technical 10 | * support, telephone assistance, enhancements or updates to the Software. All 11 | * rights, title to and ownership interest in the Software, including all 12 | * intellectual property rights therein shall remain in Sungkyunkwan University. 13 | */ 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | namespace viper::cceh { 21 | 22 | inline size_t standard(const void* _ptr, size_t _len, 23 | size_t _seed = static_cast(0xc70f6907UL)) { 24 | return std::_Hash_bytes(_ptr, _len, _seed); 25 | } 26 | 27 | inline size_t murmur2(const void* key, size_t len, size_t seed = 0xc70f6907UL) { 28 | const unsigned int m = 0x5bd1e995; 29 | const int r = 24; 30 | unsigned int h = seed ^len; 31 | const unsigned char* data = (const unsigned char*) key; 32 | 33 | while (len >= 4) { 34 | unsigned int k = *(unsigned int*) data; 35 | k *= m; 36 | k ^= k >> r; 37 | k *= m; 38 | h *= m; 39 | h ^= k; 40 | data += 4; 41 | len -= 4; 42 | } 43 | 44 | switch (len) { 45 | case 3: h ^= data[2] << 16; 46 | case 2: h ^= data[1] << 8; 47 | case 1: h ^= data[0]; 48 | h *= m; 49 | }; 50 | 51 | h ^= h >> 13; 52 | h *= m; 53 | h ^= h >> 15; 54 | return h; 55 | } 56 | 57 | static size_t 58 | (* hash_funcs[2])(const void* key, size_t len, size_t seed) = { 59 | standard, 60 | murmur2 61 | }; 62 | 63 | inline size_t h(const void* key, size_t len, size_t seed = 0xc70697UL) { 64 | return hash_funcs[0](key, len, seed); 65 | } 66 | 67 | } // namespace viper::cceh 68 | -------------------------------------------------------------------------------- /db/index/masstree/mtcounters.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef MTCOUNTERS_HH 17 | #define MTCOUNTERS_HH 1 18 | 19 | enum memtag { 20 | // memtags are divided into a *type* and a *pool*. 21 | // The type is purely for debugging. The pool indicates the pool from 22 | // which an allocation was taken. 23 | memtag_none = 0x000, 24 | memtag_value = 0x100, 25 | memtag_limbo = 0x500, 26 | memtag_masstree_leaf = 0x1000, 27 | memtag_masstree_internode = 0x1100, 28 | memtag_masstree_ksuffixes = 0x1200, 29 | memtag_masstree_gc = 0x1300, 30 | memtag_pool_mask = 0xFF 31 | }; 32 | 33 | enum threadcounter { 34 | // order is important among tc_alloc constants: 35 | tc_alloc, 36 | tc_alloc_value = tc_alloc, 37 | tc_alloc_other = tc_alloc + 1, 38 | // end tc_alloc constants 39 | tc_gc, 40 | tc_limbo_slots, 41 | tc_replay_create_delta, 42 | tc_replay_remove_delta, 43 | tc_root_retry, 44 | tc_internode_retry, 45 | tc_leaf_retry, 46 | tc_leaf_walk, 47 | // order is important among tc_stable constants: 48 | tc_stable, 49 | tc_stable_internode_insert = tc_stable + 0, 50 | tc_stable_internode_split = tc_stable + 1, 51 | tc_stable_leaf_insert = tc_stable + 2, 52 | tc_stable_leaf_split = tc_stable + 3, 53 | // end tc_stable constants 54 | tc_internode_lock, 55 | tc_leaf_lock, 56 | tc_max 57 | }; 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /benchmarks/other/ChameleonDB/log_gc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "chameleon_db.h" 8 | #include "log.h" 9 | #include "util/util.h" 10 | 11 | namespace CHAMELEONDB_NAMESPACE { 12 | 13 | class LogGCer { 14 | public: 15 | 16 | uint64_t clean_time_ns_ = 0; 17 | 18 | LogGCer(ChameleonDB *db, int gc_id, Log *log, LogSegment *reserved_segment) 19 | : db_(db), 20 | gc_id_(gc_id), 21 | log_(log), 22 | reserved_segment_(reserved_segment), 23 | backup_segment_(nullptr), 24 | list_lock_(std::string("gc_list_lock_") + std::to_string(gc_id)) { 25 | reserved_segment_->StartUsing(false); 26 | } 27 | 28 | ~LogGCer() { 29 | gc_thread_.join(); 30 | list_lock_.report(); 31 | } 32 | 33 | void StartGCThread() { gc_thread_ = std::thread(&LogGCer::GCEntry, this); } 34 | 35 | void AddClosedSegment(LogSegment *segment) { 36 | LockUsedList(); 37 | #ifdef HOT_COLD_SEPARATE 38 | if (segment->IsHot()) { 39 | closed_hot_segments_.push_back(segment); 40 | } else { 41 | closed_cold_segments_.push_back(segment); 42 | } 43 | #else 44 | closed_hot_segments_.push_back(segment); 45 | #endif 46 | UnlockUsedList(); 47 | } 48 | 49 | private: 50 | ChameleonDB *db_; 51 | int gc_id_; 52 | Log *log_; 53 | std::thread gc_thread_; 54 | LogSegment *reserved_segment_; 55 | LogSegment *backup_segment_; // prevent gc dead lock 56 | double last_update_time_ = 0; 57 | 58 | std::list closed_hot_segments_; 59 | std::list closed_cold_segments_; 60 | int clean_segment_cnt_ = 0; 61 | const int gc_hot_per_cold_ = 5; 62 | SpinLock list_lock_; 63 | 64 | bool IsGarbage(KVItem *kv) { 65 | ValueType val = db_->IndexGet(kv->GetKey()); 66 | return TaggedPointer(val).GetAddr() != reinterpret_cast(kv); 67 | return true; 68 | } 69 | 70 | void LockUsedList() { list_lock_.lock(); } 71 | void UnlockUsedList() { list_lock_.unlock(); } 72 | 73 | void GCEntry(); 74 | bool NeedGC(); 75 | void DoMemoryClean(); 76 | void CompactSegment(LogSegment *gc_segment); 77 | void FreezeReservedAndGetNew(); 78 | 79 | DISALLOW_COPY_AND_ASSIGN(LogGCer); 80 | }; 81 | 82 | } // namespace CHAMELEONDB_NAMESPACE 83 | 84 | -------------------------------------------------------------------------------- /db/index/masstree/compiler.cc: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2014 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2014 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #include "compiler.hh" 17 | #include 18 | #include 19 | 20 | void fail_always_assert(const char* file, int line, 21 | const char* assertion, const char* message) { 22 | if (message) 23 | fprintf(stderr, "assertion \"%s\" [%s] failed: file \"%s\", line %d\n", 24 | message, assertion, file, line); 25 | else 26 | fprintf(stderr, "assertion \"%s\" failed: file \"%s\", line %d\n", 27 | assertion, file, line); 28 | abort(); 29 | } 30 | 31 | void fail_masstree_invariant(const char* file, int line, 32 | const char* assertion, const char* message) { 33 | if (message) 34 | fprintf(stderr, "invariant \"%s\" [%s] failed: file \"%s\", line %d\n", 35 | message, assertion, file, line); 36 | else 37 | fprintf(stderr, "invariant \"%s\" failed: file \"%s\", line %d\n", 38 | assertion, file, line); 39 | abort(); 40 | } 41 | 42 | void fail_masstree_precondition(const char* file, int line, 43 | const char* assertion, const char* message) { 44 | if (message) 45 | fprintf(stderr, "precondition \"%s\" [%s] failed: file \"%s\", line %d\n", 46 | message, assertion, file, line); 47 | else 48 | fprintf(stderr, "precondition \"%s\" failed: file \"%s\", line %d\n", 49 | assertion, file, line); 50 | abort(); 51 | } 52 | -------------------------------------------------------------------------------- /scripts/eval_utilization.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M, 5: Viper" 11 | echo " : 0: false, 1: true" 12 | } 13 | 14 | if [[ $# != 2 || $1 < 1 || $1 > 5 || ($2 != 0 && $2 != 1) ]]; then 15 | help 16 | exit 17 | fi 18 | 19 | # to avoid no available space 20 | ./clean_pmem_dir.sh 21 | 22 | if [[ $1 == 1 || $1 == 2 || $1 == 5 ]]; then 23 | INDEX_TYPE=1 24 | elif [[ $1 == 3 ]]; then 25 | INDEX_TYPE=2 26 | elif [[ $1 == 4 ]]; then 27 | INDEX_TYPE=3 28 | fi 29 | 30 | if [[ $1 == 1 || $1 == 4 || $1 == 5 ]]; then 31 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 32 | else 33 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 34 | fi 35 | 36 | if [[ $1 != 5 ]]; then 37 | TARGET="pacman_bench" 38 | TARGET_CMD="./benchmarks/pacman_bench" 39 | else 40 | TARGET="viper_bench" 41 | TARGET_CMD="./benchmarks/other/viper_bench" 42 | WITH_OTHERS="-DEVAL_OTHER_SYSTEMS=ON" 43 | fi 44 | 45 | PACMAN_OPT="" 46 | if [[ $2 == 1 ]]; then 47 | PACMAN_OPT="-DPACMAN=ON" 48 | fi 49 | 50 | FILTER="--benchmark_filter=/(50|60|70|80|90)/.*/threads:(12)$" 51 | SKEW="true" # true (Zipfian), false (uniform) 52 | 53 | NUMA_AFFINITY=0 54 | 55 | mkdir -p ../results 56 | mkdir -p ../build 57 | cd ../build 58 | 59 | OUTPUT_FILE=../results/utilization_$1_$2 60 | 61 | # it may take long to get third-party dependencies, so don't delete _deps 62 | ls | grep -v _deps | xargs rm -rf 63 | 64 | # build 65 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 66 | ${WITH_OTHERS} -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} ${PACMAN_OPT} \ 67 | -DNUM_KEYS=200000000 -DNUM_OPS_PER_THREAD=25000000 \ 68 | -DNUM_WARMUP_OPS_PER_THREAD=25000000 -DNUM_GC_THREADS=4 -DSKEW=${SKEW} .. 69 | 70 | make ${TARGET} -j 71 | 72 | # disable cpu scaling 73 | sudo cpupower frequency-set --governor performance > /dev/null 74 | 75 | # clean cache 76 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 77 | 78 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 79 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} \ 80 | --benchmark_out=${OUTPUT_FILE} --benchmark_out_format=json 81 | 82 | sudo cpupower frequency-set --governor powersave > /dev/null 83 | -------------------------------------------------------------------------------- /scripts/eval_threads.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M, 5: Viper" 11 | echo " : 0: false, 1: true" 12 | } 13 | 14 | if [[ $# != 2 || $1 < 1 || $1 > 5 || ($2 != 0 && $2 != 1) ]]; then 15 | help 16 | exit 17 | fi 18 | 19 | # to avoid no available space 20 | ./clean_pmem_dir.sh 21 | 22 | if [[ $1 == 1 || $1 == 2 || $1 == 5 ]]; then 23 | INDEX_TYPE=1 24 | elif [[ $1 == 3 ]]; then 25 | INDEX_TYPE=2 26 | elif [[ $1 == 4 ]]; then 27 | INDEX_TYPE=3 28 | fi 29 | 30 | if [[ $1 == 1 || $1 == 4 || $1 == 5 ]]; then 31 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 32 | else 33 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 34 | fi 35 | 36 | if [[ $1 != 5 ]]; then 37 | TARGET="pacman_bench" 38 | TARGET_CMD="./benchmarks/pacman_bench" 39 | else 40 | TARGET="viper_bench" 41 | TARGET_CMD="./benchmarks/other/viper_bench" 42 | WITH_OTHERS="-DEVAL_OTHER_SYSTEMS=ON" 43 | fi 44 | 45 | PACMAN_OPT="" 46 | if [[ $2 == 1 ]]; then 47 | PACMAN_OPT="-DPACMAN=ON" 48 | fi 49 | 50 | THREADS="1|6|12|18|24|30" 51 | FILTER="--benchmark_filter=/(80)/.*/threads:(${THREADS})$" 52 | # skew (Zipfian) or uniform 53 | SKEW="true" # true (Zipfian), false (uniform) 54 | 55 | NUMA_AFFINITY=0 56 | 57 | mkdir -p ../results 58 | mkdir -p ../build 59 | cd ../build 60 | 61 | OUTPUT_FILE=../results/threads_$1_$2 62 | 63 | # it may take long to get third-party dependencies, so don't delete _deps 64 | ls | grep -v _deps | xargs rm -rf 65 | 66 | # build 67 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 68 | ${WITH_OTHERS} -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} ${PACMAN_OPT} \ 69 | -DNUM_KEYS=200000000 -DNUM_OPS_PER_THREAD=25000000 \ 70 | -DNUM_WARMUP_OPS_PER_THREAD=25000000 -DNUM_GC_THREADS=4 -DSKEW=${SKEW} .. 71 | 72 | make ${TARGET} -j 73 | 74 | # disable cpu scaling 75 | sudo cpupower frequency-set --governor performance > /dev/null 76 | 77 | # clean cache 78 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 79 | 80 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 81 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} \ 82 | --benchmark_out=${OUTPUT_FILE} --benchmark_out_format=json 83 | 84 | sudo cpupower frequency-set --governor powersave > /dev/null 85 | -------------------------------------------------------------------------------- /scripts/eval_breakdown.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M" 11 | } 12 | 13 | if [[ $# != 1 || $1 < 1 || $1 > 4 ]]; then 14 | help 15 | exit 16 | fi 17 | 18 | # to avoid no available space 19 | ./clean_pmem_dir.sh 20 | 21 | if [[ $1 == 1 || $1 == 2 ]]; then 22 | INDEX_TYPE=1 23 | elif [[ $1 == 3 ]]; then 24 | INDEX_TYPE=2 25 | elif [[ $1 == 4 ]]; then 26 | INDEX_TYPE=3 27 | fi 28 | 29 | if [[ $1 == 1 || $1 == 4 ]]; then 30 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 31 | else 32 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 33 | fi 34 | 35 | PACMAN_OPT="" 36 | OPT=( 37 | "" 38 | "-DREDUCE_PM_ACCESS=ON" 39 | "-DHOT_COLD_SEPARATE=ON" 40 | "-DGC_SHORTCUT=ON" 41 | "-DBATCH_COMPACTION=ON" 42 | ) 43 | 44 | FILTER="--benchmark_filter=/(80)/.*/threads:(12)$" 45 | SKEW="true" # true (Zipfian), false (uniform) 46 | 47 | NUMA_AFFINITY=0 48 | 49 | mkdir -p ../results 50 | mkdir -p ../build 51 | cd ../build 52 | 53 | OUTPUT_FILE=../results/breakdown_$1 54 | TMP_OUTPUT=../results/breakdown_$1_tmp 55 | # clean the result file 56 | cat /dev/null > ${OUTPUT_FILE} 57 | 58 | # disable cpu scaling 59 | sudo cpupower frequency-set --governor performance > /dev/null 60 | 61 | # it may take long to get third-party dependencies, so don't delete _deps 62 | ls | grep -v _deps | xargs rm -rf 63 | for opt in "${OPT[@]}"; do 64 | PACMAN_OPT="${PACMAN_OPT} ${opt}" 65 | echo | tee -a ${OUTPUT_FILE} 66 | echo ${PACMAN_OPT} | tee -a ${OUTPUT_FILE} 67 | # build 68 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 69 | -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} ${PACMAN_OPT} \ 70 | -DNUM_KEYS=200000000 -DNUM_OPS_PER_THREAD=20000000 \ 71 | -DNUM_WARMUP_OPS_PER_THREAD=20000000 -DSKEW=${SKEW} \ 72 | -DNUM_GC_THREADS=2 -DYCSB_TYPE=YCSB_W100 .. 73 | 74 | make pacman_bench -j 75 | # clean cache 76 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 77 | 78 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 79 | ./benchmarks/pacman_bench --benchmark_repetitions=1 ${FILTER} \ 80 | --benchmark_out=${TMP_OUTPUT} --benchmark_out_format=json 81 | cat ${TMP_OUTPUT} >> ${OUTPUT_FILE} 82 | 83 | sleep 5s 84 | done 85 | rm ${TMP_OUTPUT} 86 | 87 | sudo cpupower frequency-set --governor powersave > /dev/null 88 | -------------------------------------------------------------------------------- /scripts/kick_the_tires.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | # to avoid no available space 9 | ./clean_pmem_dir.sh 10 | 11 | mkdir -p ../results 12 | mkdir -p ../build 13 | cd ../build 14 | 15 | THREADS=4 16 | NUMA_AFFINITY=0 17 | FILTER="--benchmark_filter=/(80)/.*/threads:(${THREADS})$" 18 | 19 | # disable cpu scaling 20 | sudo cpupower frequency-set --governor performance > /dev/null 21 | 22 | # it may take long to get third-party dependencies, so don't delete _deps 23 | ls | grep -v _deps | xargs rm -rf 24 | 25 | for i in $(seq 1 3); do 26 | INDEX_TYPE=${i} 27 | TARGET="pacman_bench" 28 | TARGET_CMD="./benchmarks/pacman_bench" 29 | 30 | # build 31 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 32 | -DEVAL_OTHER_SYSTEMS=ON -DINDEX_TYPE=${INDEX_TYPE} \ 33 | -DIDX_PERSISTENT=OFF -DPACMAN=ON \ 34 | -DNUM_KEYS=10000 -DNUM_OPS_PER_THREAD=10000 \ 35 | -DNUM_GC_THREADS=2 -DWORKLOAD_TYPE=ETC -DMEASURE_LATENCY=ON .. 36 | make ${TARGET} -j 37 | 38 | 39 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 40 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} 41 | done 42 | 43 | # evaluate other systems 44 | # viper, ChameleonDB 45 | OTHER_SYSTEMS=( 46 | "viper_bench" 47 | "chameleondb_bench" 48 | ) 49 | 50 | FILTER="--benchmark_filter=/(80)/.*/threads:(${THREADS})$" 51 | 52 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 53 | -DEVAL_OTHER_SYSTEMS=ON -DINDEX_TYPE=1 -DIDX_PERSISTENT=OFF -DPACMAN=ON \ 54 | -DNUM_KEYS=10000 -DNUM_OPS_PER_THREAD=10000 \ 55 | -DNUM_GC_THREADS=2 -DWORKLOAD_TYPE=YCSB -DMEASURE_LATENCY=ON .. 56 | 57 | for sys in "${OTHER_SYSTEMS[@]}"; do 58 | make ${sys} -j 59 | TARGET_CMD="./benchmarks/other/${sys}" 60 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 61 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} 62 | done 63 | 64 | # pmem_rocksdb, pmemkv 65 | OTHER_SYSTEMS=( 66 | "pmem_rocksdb_bench" 67 | "pmemkv_bench" 68 | ) 69 | FILTER="--benchmark_filter=/.*/threads:(${THREADS})$" 70 | 71 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 72 | -DEVAL_OTHER_SYSTEMS=ON -DINDEX_TYPE=1 -DIDX_PERSISTENT=ON \ 73 | -DNUM_KEYS=10000 -DNUM_OPS_PER_THREAD=10000 \ 74 | -DNUM_GC_THREADS=2 -DWORKLOAD_TYPE=ETC -DMEASURE_LATENCY=ON .. 75 | 76 | for sys in "${OTHER_SYSTEMS[@]}"; do 77 | make ${sys} -j 78 | TARGET_CMD="./benchmarks/other/${sys}" 79 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 80 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} 81 | done 82 | 83 | 84 | sudo cpupower frequency-set --governor powersave > /dev/null 85 | 86 | echo 87 | echo "Kick-the-tires passed!" 88 | -------------------------------------------------------------------------------- /scripts/eval_value_size.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M, 5: Viper" 11 | echo " : 0: false, 1: true" 12 | } 13 | 14 | if [[ $# != 2 || $1 < 1 || $1 > 5 || ($2 != 0 && $2 != 1) ]]; then 15 | help 16 | exit 17 | fi 18 | 19 | # to avoid no available space 20 | ./clean_pmem_dir.sh 21 | 22 | if [[ $1 == 1 || $1 == 2 || $1 == 5 ]]; then 23 | INDEX_TYPE=1 24 | elif [[ $1 == 3 ]]; then 25 | INDEX_TYPE=2 26 | elif [[ $1 == 4 ]]; then 27 | INDEX_TYPE=3 28 | fi 29 | 30 | if [[ $1 == 1 || $1 == 4 || $1 == 5 ]]; then 31 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 32 | else 33 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 34 | fi 35 | 36 | if [[ $1 != 5 ]]; then 37 | TARGET="pacman_bench" 38 | TARGET_CMD="./benchmarks/pacman_bench" 39 | else 40 | TARGET="viper_bench" 41 | TARGET_CMD="./benchmarks/other/viper_bench" 42 | WITH_OTHERS="-DEVAL_OTHER_SYSTEMS=ON" 43 | fi 44 | 45 | PACMAN_OPT="" 46 | if [[ $2 == 1 ]]; then 47 | PACMAN_OPT="-DPACMAN=ON" 48 | fi 49 | 50 | FILTER="--benchmark_filter=/(80)/.*/threads:(12)$" 51 | SKEW="true" # true (Zipfian), false (uniform) 52 | 53 | NUMA_AFFINITY=0 54 | 55 | mkdir -p ../results 56 | mkdir -p ../build 57 | cd ../build 58 | 59 | OUTPUT_FILE=../results/value_size_$1_$2 60 | TMP_OUTPUT=../results/value_size_$1_$2_tmp 61 | # clean the result file 62 | cat /dev/null > ${OUTPUT_FILE} 63 | 64 | # disable cpu scaling 65 | sudo cpupower frequency-set --governor performance > /dev/null 66 | 67 | VALUE_SIZE=(32 64 128 256 512 1024) 68 | 69 | # it may take long to get third-party dependencies, so don't delete _deps 70 | ls | grep -v _deps | xargs rm -rf 71 | for size in "${VALUE_SIZE[@]}"; do 72 | echo | tee -a ${OUTPUT_FILE} 73 | echo ${size} | tee -a ${OUTPUT_FILE} 74 | # build 75 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 76 | ${WITH_OTHERS} -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} ${PACMAN_OPT} \ 77 | -DNUM_KEYS=200000000 -DNUM_OPS_PER_THREAD=25000000 \ 78 | -DNUM_WARMUP_OPS_PER_THREAD=25000000 -DVALUE_SIZE=${size} \ 79 | -DNUM_GC_THREADS=4 -DSKEW=${SKEW} .. 80 | 81 | make ${TARGET} -j 82 | # clean cache 83 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 84 | 85 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 86 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} \ 87 | --benchmark_out=${TMP_OUTPUT} --benchmark_out_format=json 88 | cat ${TMP_OUTPUT} >> ${OUTPUT_FILE} 89 | 90 | sleep 5s 91 | done 92 | rm ${TMP_OUTPUT} 93 | 94 | sudo cpupower frequency-set --governor powersave > /dev/null 95 | -------------------------------------------------------------------------------- /db/index/masstree/str.cc: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #include "str.hh" 17 | namespace lcdf { 18 | 19 | const Str Str::maxkey("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 20 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 21 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 22 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 23 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 24 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 25 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 26 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 27 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 28 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 29 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 30 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 31 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 32 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 33 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 34 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 35 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 36 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 37 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 38 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 39 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 40 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 41 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 42 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 43 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 44 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 45 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 46 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 47 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 48 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 49 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 50 | "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 51 | "\xFF", 257); 52 | 53 | } // namespace lcdf 54 | -------------------------------------------------------------------------------- /scripts/eval_key_space.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M, 5: Viper" 11 | echo " : 0: false, 1: true" 12 | } 13 | 14 | if [[ $# != 2 || $1 < 1 || $1 > 5 || ($2 != 0 && $2 != 1) ]]; then 15 | help 16 | exit 17 | fi 18 | 19 | # to avoid no available space 20 | ./clean_pmem_dir.sh 21 | 22 | if [[ $1 == 1 || $1 == 2 || $1 == 5 ]]; then 23 | INDEX_TYPE=1 24 | elif [[ $1 == 3 ]]; then 25 | INDEX_TYPE=2 26 | elif [[ $1 == 4 ]]; then 27 | INDEX_TYPE=3 28 | fi 29 | 30 | if [[ $1 == 1 || $1 == 4 || $1 == 5 ]]; then 31 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 32 | else 33 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 34 | fi 35 | 36 | if [[ $1 != 5 ]]; then 37 | TARGET="pacman_bench" 38 | TARGET_CMD="./benchmarks/pacman_bench" 39 | else 40 | TARGET="viper_bench" 41 | TARGET_CMD="./benchmarks/other/viper_bench" 42 | WITH_OTHERS="-DEVAL_OTHER_SYSTEMS=ON" 43 | fi 44 | 45 | PACMAN_OPT="" 46 | if [[ $2 == 1 ]]; then 47 | PACMAN_OPT="-DPACMAN=ON" 48 | fi 49 | 50 | FILTER="--benchmark_filter=/(80)/.*/threads:(12)$" 51 | SKEW="true" # true (Zipfian), false (uniform) 52 | 53 | NUMA_AFFINITY=0 54 | 55 | mkdir -p ../results 56 | mkdir -p ../build 57 | cd ../build 58 | 59 | OUTPUT_FILE=../results/key_space_$1_$2 60 | TMP_OUTPUT=../results/key_space_$1_$2_tmp 61 | # clean the result file 62 | cat /dev/null > ${OUTPUT_FILE} 63 | 64 | # disable cpu scaling 65 | sudo cpupower frequency-set --governor performance > /dev/null 66 | 67 | NUM_KEYS=( 68 | 50000000 69 | 100000000 70 | 200000000 71 | 400000000 72 | 800000000 73 | ) 74 | 75 | # it may take long to get third-party dependencies, so don't delete _deps 76 | ls | grep -v _deps | xargs rm -rf 77 | for num in "${NUM_KEYS[@]}"; do 78 | echo | tee -a ${OUTPUT_FILE} 79 | echo ${num} | tee -a ${OUTPUT_FILE} 80 | num_warmup=$((${num} / 8)) 81 | # build 82 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 83 | ${WITH_OTHERS} -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} ${PACMAN_OPT} \ 84 | -DNUM_KEYS=${num} -DNUM_OPS_PER_THREAD=40000000 \ 85 | -DNUM_WARMUP_OPS_PER_THREAD=${num_warmup} -DNUM_GC_THREADS=4 \ 86 | -DSKEW=${SKEW} .. 87 | 88 | make ${TARGET} -j 89 | # clean cache 90 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 91 | 92 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 93 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} \ 94 | --benchmark_out=${TMP_OUTPUT} --benchmark_out_format=json 95 | cat ${TMP_OUTPUT} >> ${OUTPUT_FILE} 96 | 97 | sleep 5s 98 | done 99 | rm ${TMP_OUTPUT} 100 | 101 | sudo cpupower frequency-set --governor powersave > /dev/null 102 | -------------------------------------------------------------------------------- /scripts/eval_write_ratio.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M, 5: Viper" 11 | echo " : 0: false, 1: true" 12 | } 13 | 14 | if [[ $# != 2 || $1 < 1 || $1 > 5 || ($2 != 0 && $2 != 1) ]]; then 15 | help 16 | exit 17 | fi 18 | 19 | # to avoid no available space 20 | ./clean_pmem_dir.sh 21 | 22 | if [[ $1 == 1 || $1 == 2 || $1 == 5 ]]; then 23 | INDEX_TYPE=1 24 | elif [[ $1 == 3 ]]; then 25 | INDEX_TYPE=2 26 | elif [[ $1 == 4 ]]; then 27 | INDEX_TYPE=3 28 | fi 29 | 30 | if [[ $1 == 1 || $1 == 4 || $1 == 5 ]]; then 31 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 32 | else 33 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 34 | fi 35 | 36 | if [[ $1 != 5 ]]; then 37 | TARGET="pacman_bench" 38 | TARGET_CMD="./benchmarks/pacman_bench" 39 | else 40 | TARGET="viper_bench" 41 | TARGET_CMD="./benchmarks/other/viper_bench" 42 | WITH_OTHERS="-DEVAL_OTHER_SYSTEMS=ON" 43 | fi 44 | 45 | PACMAN_OPT="" 46 | if [[ $2 == 1 ]]; then 47 | PACMAN_OPT="-DPACMAN=ON" 48 | fi 49 | 50 | FILTER="--benchmark_filter=/(80)/.*/threads:(12)$" 51 | SKEW="true" # true (Zipfian), false (uniform) 52 | 53 | NUMA_AFFINITY=0 54 | 55 | mkdir -p ../results 56 | mkdir -p ../build 57 | cd ../build 58 | 59 | OUTPUT_FILE=../results/write_ratio_$1_$2 60 | TMP_OUTPUT=../results/write_ratio_$1_$2_tmp 61 | # clean the result file 62 | cat /dev/null > ${OUTPUT_FILE} 63 | 64 | # disable cpu scaling 65 | sudo cpupower frequency-set --governor performance > /dev/null 66 | 67 | WORKLOAD_TYPE=( 68 | YCSB_W0 69 | YCSB_W20 70 | YCSB_W40 71 | YCSB_W60 72 | YCSB_W80 73 | YCSB_W100 74 | ) 75 | 76 | # it may take long to get third-party dependencies, so don't delete _deps 77 | ls | grep -v _deps | xargs rm -rf 78 | for workload in "${WORKLOAD_TYPE[@]}"; do 79 | echo | tee -a ${OUTPUT_FILE} 80 | echo ${workload} | tee -a ${OUTPUT_FILE} 81 | # build 82 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 83 | ${WITH_OTHERS} -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} ${PACMAN_OPT} \ 84 | -DNUM_KEYS=200000000 -DNUM_OPS_PER_THREAD=25000000 \ 85 | -DNUM_WARMUP_OPS_PER_THREAD=25000000 -DNUM_GC_THREADS=4 \ 86 | -DYCSB_TYPE=${workload} -DSKEW=${SKEW} .. 87 | 88 | make ${TARGET} -j 89 | # clean cache 90 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 91 | 92 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 93 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} \ 94 | --benchmark_out=${TMP_OUTPUT} --benchmark_out_format=json 95 | cat ${TMP_OUTPUT} >> ${OUTPUT_FILE} 96 | 97 | sleep 5s 98 | done 99 | rm ${TMP_OUTPUT} 100 | 101 | sudo cpupower frequency-set --governor powersave > /dev/null 102 | -------------------------------------------------------------------------------- /scripts/eval_case.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | # to avoid no available space 9 | ./clean_pmem_dir.sh 10 | 11 | ########## configurations ########## 12 | 13 | # basic 14 | INDEX_TYPE=1 # INDEX TYPE: 1 CCEH 2 FastFair 3 Masstree 15 | # index is on PM or not 16 | IDX_PERSISTENT="ON" # ON OFF 17 | # log is on PM or not 18 | LOG_PERSISTENT="ON" # ON OFF 19 | LOG_BATCHING="ON" # ON OFF. simulate FlatStore's batching 20 | 21 | # PACMAN option 22 | REDUCE_PM_ACCESS="ON" # ON OFF 23 | HOT_COLD_SEPARATE="ON" # ON OFF 24 | GC_SHORTCUT="ON" # ON OFF 25 | BATCH_COMPACTION="ON" # ON OFF 26 | 27 | # benchmark config 28 | INIT_UTIL="80" # 0 50 60 70 80 90. capacity utilization 29 | THREADS="12" # 1~32. threads for workloads 30 | NUM_KEYS="200000000" 31 | NUM_OPS_PER_THREAD="20000000" 32 | NUM_WARMUP_OPS_PER_THREAD="0" 33 | VALUE_SIZE="48" 34 | NUM_GC_THREADS="4" 35 | WORKLOAD_TYPE="YCSB" # YCSB ETC 36 | YCSB_TYPE="YCSB_A" # YCSB_A, YCSB_B, YCSB_C, YCSB_E, YCSB_W0, YCSB_W20, YCSB_W40, YCSB_W60, YCSB_W80, YCSB_W100 37 | SKEW="true" # true (Zipfian), false (uniform) 38 | 39 | MEASURE_LATENCY="OFF" # ON OFF 40 | USE_ALL_CORES="OFF" # ON OFF. If set, num_gc_threads will be (NUM_ALL_CORES - num_worker_threads) 41 | TEST_LOAD="OFF" # ON OFF. If set, only evaluate random loading phase 42 | 43 | #################################### 44 | 45 | NUMA_AFFINITY=0 # running on which NUMA node 46 | FILTER="--benchmark_filter=/($INIT_UTIL)/.*/threads:($THREADS)$" 47 | 48 | mkdir -p ../results 49 | mkdir -p ../build 50 | cd ../build 51 | 52 | OUTPUT_FILE=../results/case 53 | 54 | # it may take long to get third-party dependencies, so don't delete _deps 55 | ls | grep -v _deps | xargs rm -rf 56 | 57 | # build 58 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 59 | -DINDEX_TYPE=${INDEX_TYPE} -DIDX_PERSISTENT=${IDX_PERSISTENT} \ 60 | -DLOG_PERSISTENT=${LOG_PERSISTENT} -DLOG_BATCHING=${LOG_BATCHING} \ 61 | -DREDUCE_PM_ACCESS=${REDUCE_PM_ACCESS} \ 62 | -DHOT_COLD_SEPARATE=${HOT_COLD_SEPARATE} -DGC_SHORTCUT=${GC_SHORTCUT} \ 63 | -DBATCH_COMPACTION=${BATCH_COMPACTION} \ 64 | -DNUM_KEYS=${NUM_KEYS} -DNUM_OPS_PER_THREAD=${NUM_OPS_PER_THREAD} \ 65 | -DNUM_WARMUP_OPS_PER_THREAD=${NUM_WARMUP_OPS_PER_THREAD} \ 66 | -DVALUE_SIZE=${VALUE_SIZE} -DNUM_GC_THREADS=${NUM_GC_THREADS} \ 67 | -DWORKLOAD_TYPE=${WORKLOAD_TYPE} -DYCSB_TYPE=${YCSB_TYPE} -DSKEW=${SKEW} \ 68 | -DMEASURE_LATENCY=${MEASURE_LATENCY} -DUSE_ALL_CORES=${USE_ALL_CORES} \ 69 | -DTEST_LOAD=${TEST_LOAD} .. 70 | 71 | make pacman_bench -j 72 | 73 | # disable cpu scaling 74 | sudo cpupower frequency-set --governor performance > /dev/null 75 | # clean cache 76 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 77 | 78 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 79 | ./benchmarks/pacman_bench --benchmark_repetitions=1 ${FILTER} \ 80 | --benchmark_out=${OUTPUT_FILE} --benchmark_out_format=json 81 | 82 | sudo cpupower frequency-set --governor powersave > /dev/null 83 | -------------------------------------------------------------------------------- /util/persist.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include "config.h" 6 | 7 | #define CACHE_LINE_SIZE 64 8 | #define FLUSH_ALIGN ((uintptr_t)CACHE_LINE_SIZE) 9 | #define force_inline __attribute__((always_inline)) inline 10 | 11 | static inline void mfence() { asm volatile("mfence" ::: "memory"); } 12 | 13 | static inline void sfence() { _mm_sfence(); } 14 | 15 | static inline void clflush(const void *data, int len) { 16 | volatile char *ptr = (char *)((unsigned long)data & ~(CACHE_LINE_SIZE - 1)); 17 | for (; ptr < (char *)data + len; ptr += CACHE_LINE_SIZE) { 18 | asm volatile("clflush %0" : "+m"(*(volatile char *)ptr)); 19 | } 20 | sfence(); 21 | } 22 | 23 | static inline void idx_clflush(const void *data, int len) { 24 | #ifdef IDX_PERSISTENT 25 | clflush(data, len); 26 | #endif 27 | } 28 | 29 | static force_inline void 30 | pmem_clflushopt(const void *addr) 31 | { 32 | asm volatile(".byte 0x66; clflush %0" : "+m" \ 33 | (*(volatile char *)(addr))); 34 | } 35 | static force_inline void 36 | pmem_clwb(const void *addr) 37 | { 38 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" \ 39 | (*(volatile char *)(addr))); 40 | } 41 | 42 | typedef void flush_fn(const void *, size_t); 43 | 44 | static force_inline void 45 | flush_clflush_nolog(const void *addr, size_t len) 46 | { 47 | uintptr_t uptr; 48 | 49 | /* 50 | * Loop through cache-line-size (typically 64B) aligned chunks 51 | * covering the given range. 52 | */ 53 | for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); 54 | uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) 55 | _mm_clflush((char *)uptr); 56 | } 57 | 58 | static force_inline void 59 | clflush_fence(const void *addr, size_t len) 60 | { 61 | uintptr_t uptr; 62 | for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); 63 | uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) 64 | _mm_clflush((char *)uptr); 65 | _mm_sfence(); 66 | } 67 | 68 | static force_inline void 69 | clflushopt_fence(const void *addr, size_t len) 70 | { 71 | uintptr_t uptr; 72 | for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); 73 | uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) 74 | pmem_clflushopt((char *)uptr); 75 | _mm_sfence(); 76 | } 77 | 78 | static force_inline void 79 | clwb_fence(const void *addr, size_t len) 80 | { 81 | uintptr_t uptr; 82 | for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); 83 | uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) 84 | pmem_clwb((char *)uptr); 85 | _mm_sfence(); 86 | } 87 | 88 | static force_inline void 89 | idx_clflush_fence(const void *addr, size_t len) 90 | { 91 | #ifdef IDX_PERSISTENT 92 | clflush_fence(addr, len); 93 | #endif 94 | } 95 | 96 | static force_inline void 97 | idx_clflushopt_fence(const void *addr, size_t len) 98 | { 99 | #ifdef IDX_PERSISTENT 100 | clflushopt_fence(addr, len); 101 | #endif 102 | } 103 | 104 | static force_inline void 105 | idx_clwb_fence(const void *addr, size_t len) 106 | { 107 | #ifdef IDX_PERSISTENT 108 | clwb_fence(addr, len); 109 | #endif 110 | } 111 | -------------------------------------------------------------------------------- /scripts/eval_ycsb.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M, 5: Viper" 11 | echo " : 0: false, 1: true" 12 | } 13 | 14 | if [[ $# != 2 || $1 < 1 || $1 > 5 || ($2 != 0 && $2 != 1) ]]; then 15 | help 16 | exit 17 | fi 18 | 19 | # to avoid no available space 20 | ./clean_pmem_dir.sh 21 | 22 | if [[ $1 == 1 || $1 == 2 || $1 == 5 ]]; then 23 | INDEX_TYPE=1 24 | elif [[ $1 == 3 ]]; then 25 | INDEX_TYPE=2 26 | elif [[ $1 == 4 ]]; then 27 | INDEX_TYPE=3 28 | fi 29 | 30 | if [[ $1 == 1 || $1 == 4 || $1 == 5 ]]; then 31 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 32 | else 33 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 34 | fi 35 | 36 | if [[ $1 != 5 ]]; then 37 | TARGET="pacman_bench" 38 | TARGET_CMD="./benchmarks/pacman_bench" 39 | else 40 | TARGET="viper_bench" 41 | TARGET_CMD="./benchmarks/other/viper_bench" 42 | WITH_OTHERS="-DEVAL_OTHER_SYSTEMS=ON" 43 | fi 44 | 45 | PACMAN_OPT="" 46 | if [[ $2 == 1 ]]; then 47 | PACMAN_OPT="-DPACMAN=ON" 48 | fi 49 | 50 | FILTER="--benchmark_filter=/(80)/.*/threads:(24)$" 51 | SKEW="true" # true (Zipfian), false (uniform) 52 | 53 | NUMA_AFFINITY=0 54 | NUM_KEYS=200000000 55 | NUM_OPS_PER_THREAD=20000000 56 | 57 | mkdir -p ../results 58 | mkdir -p ../build 59 | cd ../build 60 | 61 | OUTPUT_FILE=../results/ycsb_$1_$2 62 | TMP_OUTPUT=../results/ycsb_$1_$2_tmp 63 | # clean the result file 64 | cat /dev/null > ${OUTPUT_FILE} 65 | 66 | if [[ $1 == 3 || $1 == 4 ]]; then 67 | WORKLOAD_TYPE=( 68 | "YCSB_A" 69 | "YCSB_B" 70 | "YCSB_C" 71 | "YCSB_E" 72 | ) 73 | else 74 | WORKLOAD_TYPE=( 75 | "YCSB_A" 76 | "YCSB_B" 77 | "YCSB_C" 78 | ) 79 | fi 80 | 81 | # it may take long to get third-party dependencies, so don't delete _deps 82 | ls | grep -v _deps | xargs rm -rf 83 | 84 | # disable cpu scaling 85 | sudo cpupower frequency-set --governor performance > /dev/null 86 | 87 | for workload in "${WORKLOAD_TYPE[@]}"; do 88 | echo | tee -a ${OUTPUT_FILE} 89 | echo ${workload} | tee -a ${OUTPUT_FILE} 90 | # build 91 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 92 | ${WITH_OTHERS} -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} ${PACMAN_OPT} \ 93 | -DNUM_KEYS=${NUM_KEYS} -DNUM_OPS_PER_THREAD=${NUM_OPS_PER_THREAD} \ 94 | -DNUM_GC_THREADS=4 -DYCSB_TYPE=${workload} -DSKEW=${SKEW} .. 95 | make ${TARGET} -j 96 | 97 | # clean cache 98 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 99 | 100 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 101 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} \ 102 | --benchmark_out=${TMP_OUTPUT} --benchmark_out_format=json 103 | cat ${TMP_OUTPUT} >> ${OUTPUT_FILE} 104 | 105 | sleep 5s 106 | done 107 | rm ${TMP_OUTPUT} 108 | 109 | sudo cpupower frequency-set --governor powersave > /dev/null 110 | -------------------------------------------------------------------------------- /scripts/eval_etc.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | if [[ $(basename $PWD) != "scripts" ]]; then 4 | echo 'run this script in "scripts"' 5 | exit 6 | fi 7 | 8 | help() { 9 | echo "Usage: $0 " 10 | echo " : 1: FlatStore-H, 2: FlatStore-PH, 3: FlatStore-FF, 4: FlatStore-M, 5: Viper, 6: ChameleonDB 7: PMem-RocksDB 8: pmemkv" 11 | echo " : 0: false, 1: true (not affects PMem-RocksDB and pmemkv)" 12 | } 13 | 14 | if [[ $# == 1 ]]; then 15 | if [[ $1 > 0 && $1 < 6 ]]; then 16 | help 17 | exit 18 | fi 19 | elif [[ $# != 2 || $1 < 1 || $1 > 8 || ($2 != 0 && $2 != 1) ]]; then 20 | help 21 | exit 22 | fi 23 | 24 | # to avoid no available space 25 | ./clean_pmem_dir.sh 26 | 27 | INDEX_TYPE=1 28 | if [[ $1 == 1 || $1 == 2 || $1 == 5 ]]; then 29 | INDEX_TYPE=1 30 | elif [[ $1 == 3 ]]; then 31 | INDEX_TYPE=2 32 | elif [[ $1 == 4 ]]; then 33 | INDEX_TYPE=3 34 | fi 35 | 36 | if [[ $1 == 1 || $1 == 4 || $1 == 5 ]]; then 37 | IDX_PERSISTENT="-DIDX_PERSISTENT=OFF" 38 | else 39 | IDX_PERSISTENT="-DIDX_PERSISTENT=ON" 40 | fi 41 | 42 | if [[ $1 -le 4 ]]; then 43 | TARGET="pacman_bench" 44 | TARGET_CMD="./benchmarks/pacman_bench" 45 | else 46 | WITH_OTHERS="-DEVAL_OTHER_SYSTEMS=ON" 47 | if [[ $1 == 5 ]]; then 48 | TARGET="viper_bench" 49 | elif [[ $1 == 6 ]]; then 50 | TARGET="chameleondb_bench" 51 | elif [[ $1 == 7 ]]; then 52 | TARGET="pmem_rocksdb_bench" 53 | elif [[ $1 == 8 ]]; then 54 | TARGET="pmemkv_bench" 55 | fi 56 | TARGET_CMD="./benchmarks/other/${TARGET}" 57 | fi 58 | 59 | PACMAN_OPT="" 60 | if [[ $2 == 1 ]]; then 61 | PACMAN_OPT="-DPACMAN=ON" 62 | fi 63 | 64 | NUMA_AFFINITY=0 65 | LOG_BATCHING=OFF # simulate FlatStore's batching (if LOG_PERSISTENT), diabled for fair comparison 66 | NUM_KEYS=200000000 67 | NUM_OPS_PER_THREAD=20000000 68 | 69 | mkdir -p ../results 70 | mkdir -p ../build 71 | cd ../build 72 | 73 | THREADS=24 74 | if [[ $1 -le 6 ]]; then 75 | FILTER="--benchmark_filter=/(80)/.*/threads:(${THREADS})$" 76 | OUTPUT_FILE=../results/etc_$1_$2 77 | else 78 | FILTER="--benchmark_filter=/.*/threads:(${THREADS})$" 79 | OUTPUT_FILE=../results/etc_$1 80 | fi 81 | # clean the result file 82 | 83 | # it may take long to get third-party dependencies, so don't delete _deps 84 | ls | grep -v _deps | xargs rm -rf 85 | # build 86 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_NUMA_NODE=${NUMA_AFFINITY} \ 87 | ${WITH_OTHERS} -DINDEX_TYPE=${INDEX_TYPE} ${IDX_PERSISTENT} \ 88 | -DLOG_BATCHING=${LOG_BATCHING} ${PACMAN_OPT} \ 89 | -DNUM_KEYS=${NUM_KEYS} -DNUM_OPS_PER_THREAD=${NUM_OPS_PER_THREAD} \ 90 | -DNUM_GC_THREADS=4 -DWORKLOAD_TYPE=ETC -DMEASURE_LATENCY=ON .. 91 | make ${TARGET} -j 92 | 93 | # disable cpu scaling 94 | sudo cpupower frequency-set --governor performance > /dev/null 95 | # clean cache 96 | sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" 97 | 98 | numactl --membind=${NUMA_AFFINITY} --cpunodebind=${NUMA_AFFINITY} \ 99 | ${TARGET_CMD} --benchmark_repetitions=1 ${FILTER} \ 100 | --benchmark_out=${OUTPUT_FILE} --benchmark_out_format=json 101 | 102 | sudo cpupower frequency-set --governor powersave > /dev/null 103 | -------------------------------------------------------------------------------- /benchmarks/other/ChameleonDB/log_gc.cpp: -------------------------------------------------------------------------------- 1 | #include "log_gc.h" 2 | 3 | #include 4 | 5 | namespace CHAMELEONDB_NAMESPACE { 6 | 7 | void LogGCer::GCEntry() { 8 | // bind_core_on_numa(log_->num_workers_ + gc_id_); 9 | 10 | while (!log_->stop_flag_.load(std::memory_order_relaxed)) { 11 | if (NeedGC()) { 12 | Timer timer(clean_time_ns_); 13 | DoMemoryClean(); 14 | } else { 15 | usleep(1); 16 | } 17 | } 18 | } 19 | 20 | bool LogGCer::NeedGC() { 21 | int total_segments = log_->num_segments_ - log_->num_cleaners_; 22 | int free_segments = 23 | (uint64_t)(log_->num_free_segments_.load(std::memory_order_relaxed)); 24 | constexpr double threshold = 0.2; 25 | if (free_segments < total_segments * threshold) { 26 | return true; 27 | } else { 28 | return false; 29 | } 30 | } 31 | 32 | void LogGCer::DoMemoryClean() { 33 | LogSegment *gc_segment = nullptr; 34 | LockUsedList(); 35 | #ifdef HOT_COLD_SEPARATE 36 | if (clean_segment_cnt_ % gc_hot_per_cold_ != 0 && 37 | !closed_hot_segments_.empty()) { 38 | gc_segment = closed_hot_segments_.front(); 39 | closed_hot_segments_.pop_front(); 40 | } else if (!closed_cold_segments_.empty()) { 41 | gc_segment = closed_cold_segments_.front(); 42 | closed_cold_segments_.pop_front(); 43 | } 44 | #else 45 | if (!closed_hot_segments_.empty()) { 46 | gc_segment = closed_hot_segments_.front(); 47 | closed_hot_segments_.pop_front(); 48 | } 49 | #endif 50 | UnlockUsedList(); 51 | if (gc_segment == nullptr) { 52 | // no closed segment 53 | return; 54 | } 55 | 56 | CompactSegment(gc_segment); 57 | } 58 | 59 | void LogGCer::CompactSegment(LogSegment *gc_segment) { 60 | char *p = gc_segment->get_data_start(); 61 | char *tail = gc_segment->get_tail(); 62 | while (p < tail) { 63 | KVItem *kv = reinterpret_cast(p); 64 | uint32_t sz = sizeof(KVItem) + kv->key_size + kv->val_size; 65 | if (sz == sizeof(KVItem)) { 66 | break; 67 | } 68 | if (!reserved_segment_->HasSpaceFor(sz)) { 69 | FreezeReservedAndGetNew(); 70 | } 71 | Slice key_slice = kv->GetKey(); 72 | KeyType key = *(KeyType *)(key_slice.data()); 73 | ValueType old_val = TaggedPointer(p, sz); 74 | // if (db_->LockIfValid(key, old_val)) { 75 | // ValueType new_val = 76 | // reserved_segment_->Append(key_slice, kv->GetValue(), kv->epoch); 77 | // db_->GCMoveAndUnlock(key, new_val); 78 | // } 79 | if (db_->IfValid(key, old_val)) { 80 | ValueType new_val = 81 | reserved_segment_->Append(key_slice, kv->GetValue(), kv->epoch); 82 | db_->IndexPut(key_slice, new_val); 83 | } 84 | p += sz; 85 | } 86 | 87 | db_->thread_status_.rcu_barrier(); 88 | ++clean_segment_cnt_; 89 | gc_segment->Clear(); 90 | if (backup_segment_ == nullptr) { 91 | backup_segment_ = gc_segment; 92 | } else { 93 | std::lock_guard guard(log_->free_list_lock_); 94 | log_->free_segments_.push(gc_segment); 95 | ++log_->num_free_segments_; 96 | } 97 | } 98 | 99 | void LogGCer::FreezeReservedAndGetNew() { 100 | assert(backup_segment_); 101 | log_->FreezeSegment(reserved_segment_); 102 | reserved_segment_ = backup_segment_; 103 | reserved_segment_->StartUsing(false); 104 | backup_segment_ = nullptr; 105 | } 106 | 107 | } // namespace CHAMELEONDB_NAMESPACE 108 | -------------------------------------------------------------------------------- /util/var_key.h: -------------------------------------------------------------------------------- 1 | #if 0 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // variable-sized Key 9 | 10 | #if KEY_SIZE == 8 11 | using raw_key_t = uint64_t; 12 | using KeyType = uint64_t; 13 | #else 14 | using raw_key_t = VarKey; 15 | using KeyType = VarKeyPtr; 16 | #endif 17 | 18 | 19 | static inline KeyType to_Key_t(const Slice &key) { 20 | #if KEY_SIZE == 8 21 | return *(uint64_t *)key.data(); 22 | #else 23 | return KeyType((raw_key_t *)key.data()); 24 | #endif 25 | } 26 | 27 | static inline uint64_t get_u64_value(const KeyType &key) { 28 | #if KEY_SIZE == 8 29 | return key; 30 | #else 31 | return key.GetKey(); 32 | #endif 33 | } 34 | 35 | static inline const raw_key_t *get_raw_key_addr(const KeyType &key) { 36 | #if KEY_SIZE == 8 37 | return &key; 38 | #else 39 | return key.key_ptr; 40 | #endif 41 | } 42 | 43 | 44 | template 45 | struct VarKey { 46 | static_assert( 47 | N % sizeof(uint64_t) == 0, 48 | "The size should be multiple of sizeof(uint64_t)"); // for simplicity 49 | static constexpr size_t sz = N / sizeof(uint64_t); 50 | 51 | // uint64_t data[sz]; 52 | std::array data; 53 | 54 | VarKey(const uint64_t &k) { 55 | for (size_t i = 0; i < sz; i++) { 56 | data[i] = k; 57 | } 58 | } 59 | 60 | VarKey &operator=(const uint64_t &k) { 61 | for (size_t i = 0; i < sz; i++) { 62 | data[i] = k; 63 | } 64 | return *this; 65 | } 66 | 67 | uint64_t GetKey() const { 68 | return data[0]; 69 | } 70 | 71 | // operator uint64_t() const { 72 | // return GetKey(); 73 | // } 74 | 75 | bool operator==(const VarKey &other) const { 76 | return GetKey() == other.GetKey(); 77 | } 78 | 79 | bool operator!=(const VarKey &other) const { 80 | return GetKey() != other.GetKey(); 81 | } 82 | 83 | bool operator<(const VarKey &other) const { 84 | return GetKey() < other.GetKey(); 85 | } 86 | 87 | bool operator>(const VarKey &other) const { 88 | return GetKey() > other.GetKey(); 89 | } 90 | 91 | bool operator<=(const VarKey &other) const { 92 | return GetKey() <= other.GetKey(); 93 | } 94 | 95 | bool operator>=(const VarKey &other) const { 96 | return GetKey() >= other.GetKey(); 97 | } 98 | }; 99 | 100 | template 101 | struct VarKeyPtr { 102 | union { 103 | const struct VarKey *key_ptr; 104 | uint64_t data; 105 | }; 106 | 107 | VarKeyPtr() : key_ptr(nullptr) {} 108 | 109 | VarKeyPtr(const struct VarKey *p) : key_ptr(p) {} 110 | 111 | uint64_t GetKey() const { 112 | return key_ptr->GetKey(); 113 | } 114 | 115 | bool operator==(const VarKeyPtr &other) const { 116 | return *key_ptr == *(other.key_ptr); 117 | } 118 | 119 | bool operator!=(const VarKeyPtr &other) const { 120 | return *key_ptr != *(other.key_ptr); 121 | } 122 | 123 | bool operator<(const VarKeyPtr &other) const { 124 | return *key_ptr < *(other.key_ptr); 125 | } 126 | 127 | bool operator>(const VarKeyPtr &other) const { 128 | return *key_ptr > *(other.key_ptr); 129 | } 130 | 131 | bool operator<=(const VarKeyPtr &other) const { 132 | return *key_ptr <= *(other.key_ptr); 133 | } 134 | 135 | bool operator>=(const VarKeyPtr &other) const { 136 | return *key_ptr >= *(other.key_ptr); 137 | } 138 | }; 139 | 140 | #endif 141 | -------------------------------------------------------------------------------- /benchmarks/other/pmemkv_bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarks/bench_base.h" 2 | #include "config.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | static constexpr uint64_t pmemkv_pool_size = 128ul << 30; 11 | 12 | enum FixtureArg { Arg_value_size }; 13 | 14 | class PMEMKVFixture : public BaseFixture { 15 | protected: 16 | virtual void OpenDB(benchmark::State &st) override { 17 | if (st.thread_index() == 0) { 18 | pmem::kv::config cfg; 19 | pmem::kv::status s = cfg.put_size(pmemkv_pool_size); 20 | assert(s == pmem::kv::status::OK); 21 | s = cfg.put_create_or_error_if_exists(true); 22 | assert(s == pmem::kv::status::OK); 23 | db = new pmem::kv::db(); 24 | #ifdef IDX_PERSISTENT 25 | std::string pool_path = std::string(PMEM_DIR) + "pmemkv_pool"; 26 | remove(pool_path.c_str()); 27 | s = cfg.put_path(pool_path); 28 | assert(s == pmem::kv::status::OK); 29 | s = db->open("cmap", std::move(cfg)); 30 | #else 31 | ERROR_EXIT("not supported"); 32 | #endif 33 | if (s != pmem::kv::status::OK) { 34 | ERROR_EXIT("pmemkv open failed"); 35 | } 36 | } 37 | } 38 | 39 | virtual void CloseDB(benchmark::State &st) override { 40 | if (st.thread_index() == 0) { 41 | db->close(); 42 | delete db; 43 | db = nullptr; 44 | } 45 | } 46 | 47 | virtual bool Get(const Slice &key, std::string *value) override { 48 | pmem::kv::status s = 49 | db->get(pmem::kv::string_view(key.data(), key.size()), value); 50 | return s == pmem::kv::status::OK; 51 | } 52 | 53 | virtual void Put(const Slice &key, const Slice &value) override { 54 | pmem::kv::status s = 55 | db->put(pmem::kv::string_view(key.data(), key.size()), 56 | pmem::kv::string_view(value.data(), value.size())); 57 | if (s != pmem::kv::status::OK) { 58 | ERROR_EXIT("put failed"); 59 | } 60 | } 61 | 62 | virtual void PreSetUp(benchmark::State &st) override { 63 | // bind_core_on_numa(st.thread_index()); 64 | } 65 | 66 | private: 67 | pmem::kv::db *db = nullptr; 68 | }; 69 | 70 | 71 | BENCHMARK_DEFINE_F(PMEMKVFixture, bench)(benchmark::State &st) { 72 | for (auto _ : st) { 73 | RunWorkload(st); 74 | } 75 | assert(st.iterations() == 1); 76 | st.SetItemsProcessed(st.iterations() * actual_num_ops_per_thread); 77 | #ifdef MEASURE_LATENCY 78 | if (st.thread_index() == 0) { 79 | for (int i = 0; i < TypeEnumMax; i++) { 80 | HistogramData hist_data; 81 | for (int j = 1; j < st.threads(); j++) { 82 | latency_statistics[0].histograms[i].Merge( 83 | latency_statistics[j].histograms[i]); 84 | } 85 | latency_statistics[0].histograms[i].Data(&hist_data); 86 | std::string name = std::string("Lat_") + TypeStrings[i] + "_"; 87 | st.counters[name + "Avg"] = hist_data.average; 88 | st.counters[name + "P50"] = hist_data.median; 89 | st.counters[name + "P95"] = hist_data.percentile95; 90 | st.counters[name + "P99"] = hist_data.percentile99; 91 | } 92 | latency_statistics.reset(); 93 | } 94 | #endif 95 | } 96 | 97 | BENCHMARK_REGISTER_F(PMEMKVFixture, bench) 98 | ->DenseThreadRange(1, 36, 1) 99 | ->Iterations(1) 100 | ->Unit(benchmark::kMicrosecond) 101 | ->UseRealTime(); 102 | 103 | BENCHMARK_MAIN(); 104 | -------------------------------------------------------------------------------- /db/index/masstree/memdebug.cc: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2016 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2016 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #include "memdebug.hh" 17 | #include 18 | #include 19 | 20 | #if HAVE_MEMDEBUG 21 | void memdebug::landmark(char* buf, size_t bufsz) const { 22 | if (this->magic != magic_value && this->magic != magic_free_value) 23 | snprintf(buf, bufsz, "???"); 24 | else if (this->file) 25 | snprintf(buf, bufsz, "%s:%d", this->file, this->line); 26 | else if (this->line) 27 | snprintf(buf, bufsz, "%d", this->line); 28 | else 29 | snprintf(buf, bufsz, "0"); 30 | } 31 | 32 | void 33 | memdebug::hard_free_checks(const memdebug *m, size_t sz, memtag tag, 34 | int after_rcu, const char *op) { 35 | char buf[256]; 36 | m->landmark(buf, sizeof(buf)); 37 | if (m->magic == magic_free_value) 38 | fprintf(stderr, "%s(%p): double free, was @%s\n", 39 | op, m + 1, buf); 40 | else if (m->magic != magic_value) 41 | fprintf(stderr, "%s(%p): freeing unallocated pointer (%x)\n", 42 | op, m + 1, m->magic); 43 | assert(m->magic == magic_value); 44 | if (tag && m->tag != tag) 45 | fprintf(stderr, "%s(%p): expected type %x, saw %x, " 46 | "allocated %s\n", op, m + 1, tag, m->tag, buf); 47 | if (!after_rcu && m->size != sz) 48 | fprintf(stderr, "%s(%p): expected size %lu, saw %lu, " 49 | "allocated %s\n", op, m + 1, 50 | (unsigned long) sz, (unsigned long) m->size, buf); 51 | if (m->after_rcu != after_rcu) 52 | fprintf(stderr, "%s(%p): double free after rcu, allocated @%s\n", 53 | op, m + 1, buf); 54 | if (tag) 55 | assert(m->tag == tag); 56 | if (!after_rcu) 57 | assert(m->size == sz); 58 | assert(m->after_rcu == after_rcu); 59 | } 60 | 61 | void 62 | memdebug::hard_assert_use(const void* ptr, memtag allowed) { 63 | const memdebug* m = reinterpret_cast(ptr) - 1; 64 | char buf[256]; 65 | m->landmark(buf, sizeof(buf)); 66 | if (m->magic == magic_free_value) 67 | fprintf(stderr, "%p: use tag %x after free, allocated %s\n", 68 | m + 1, allowed, buf); 69 | else if (m->magic != magic_value) 70 | fprintf(stderr, "%p: pointer is unallocated, not tag %x\n", 71 | m + 1, allowed); 72 | assert(m->magic == magic_value); 73 | if (allowed != 0 && (m->tag ^ allowed) > memtag_pool_mask) 74 | fprintf(stderr, "%p: expected tag %x, got tag %x, allocated %s\n", 75 | m + 1, allowed, m->tag, buf); 76 | if (allowed != 0) 77 | assert((m->tag ^ allowed) <= memtag_pool_mask); 78 | } 79 | #endif 80 | -------------------------------------------------------------------------------- /benchmarks/histogram.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace HistogramInternal { 11 | 12 | class HistogramBucketMapper { 13 | public: 14 | HistogramBucketMapper(); 15 | 16 | // converts a value to the bucket index. 17 | size_t IndexForValue(uint64_t value) const; 18 | 19 | // number of buckets required. 20 | size_t BucketCount() const { return bucketValues_.size(); } 21 | 22 | uint64_t LastValue() const { return maxBucketValue_; } 23 | 24 | uint64_t FirstValue() const { return minBucketValue_; } 25 | 26 | uint64_t BucketLimit(const size_t bucketNumber) const { 27 | assert(bucketNumber < BucketCount()); 28 | return bucketValues_[bucketNumber]; 29 | } 30 | 31 | private: 32 | std::vector bucketValues_; 33 | uint64_t maxBucketValue_; 34 | uint64_t minBucketValue_; 35 | std::map valueIndexMap_; 36 | }; 37 | 38 | } // namespace HistogramInternal 39 | 40 | struct HistogramData { 41 | double median; 42 | double percentile95; 43 | double percentile99; 44 | double average; 45 | double standard_deviation; 46 | // zero-initialize new members since old Statistics::histogramData() 47 | // implementations won't write them. 48 | double max = 0.0; 49 | uint64_t count = 0; 50 | uint64_t sum = 0; 51 | double min = 0.0; 52 | }; 53 | 54 | // not thread-safe 55 | struct Histogram { 56 | Histogram(); 57 | ~Histogram() {} 58 | 59 | Histogram(const Histogram&) = delete; 60 | Histogram& operator=(const Histogram&) = delete; 61 | 62 | void Clear(); 63 | bool Empty() const; 64 | void Add(uint64_t value); 65 | void Merge(const Histogram& other); 66 | 67 | inline uint64_t min() const { return min_; } 68 | inline uint64_t max() const { return max_; } 69 | inline uint64_t num() const { return num_; } 70 | inline uint64_t sum() const { return sum_; } 71 | inline uint64_t sum_squares() const { return sum_squares_; } 72 | inline uint64_t bucket_at(size_t b) const { return buckets_[b];} 73 | 74 | double Median() const; 75 | double Percentile(double p) const; 76 | double Average() const; 77 | double StandardDeviation() const; 78 | void Data(HistogramData *const data) const; 79 | 80 | // To be able to use Histogram as thread local variable, it 81 | // cannot have dynamic allocated member. That's why we're 82 | // using manually values from BucketMapper 83 | std::uint64_t min_; 84 | std::uint64_t max_; 85 | std::uint64_t num_; 86 | std::uint64_t sum_; 87 | std::uint64_t sum_squares_; 88 | std::uint64_t buckets_[109]; // 109==BucketMapper::BucketCount() 89 | const uint64_t num_buckets_; 90 | }; 91 | 92 | 93 | class StopWatch { 94 | public: 95 | StopWatch(Histogram *hist) : hist_(hist) { 96 | if (hist_) { 97 | start_time_ = std::chrono::steady_clock::now(); 98 | } 99 | } 100 | 101 | ~StopWatch() { 102 | if (hist_) { 103 | uint64_t duration = std::chrono::duration_cast( 104 | std::chrono::steady_clock::now() - start_time_) 105 | .count(); 106 | hist_->Add(duration); 107 | } 108 | } 109 | 110 | StopWatch(const StopWatch&) = delete; 111 | StopWatch& operator=(const StopWatch&) = delete; 112 | 113 | private: 114 | Histogram *hist_; 115 | std::chrono::steady_clock::time_point start_time_; 116 | }; 117 | -------------------------------------------------------------------------------- /include/slice.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Slice is a simple structure containing a pointer into some external 6 | // storage and a size. The user of a Slice must ensure that the slice 7 | // is not used after the corresponding external storage has been 8 | // deallocated. 9 | // 10 | // Multiple threads can invoke const methods on a Slice without 11 | // external synchronization, but if any of the threads may call a 12 | // non-const method, all threads accessing the same Slice must use 13 | // external synchronization. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | class Slice { 23 | public: 24 | // Create an empty slice. 25 | Slice() : data_(""), size_(0) {} 26 | 27 | // Create a slice that refers to d[0,n-1]. 28 | Slice(const char* d, size_t n) : data_(d), size_(n) {} 29 | 30 | // Create a slice that refers to the contents of "s" 31 | Slice(const std::string& s) : data_(s.data()), size_(s.size()) {} 32 | 33 | // Create a slice that refers to s[0,strlen(s)-1] 34 | Slice(const char* s) : data_(s), size_(strlen(s)) {} 35 | 36 | // Intentionally copyable. 37 | Slice(const Slice&) = default; 38 | Slice& operator=(const Slice&) = default; 39 | 40 | // Return a pointer to the beginning of the referenced data 41 | const char* data() const { return data_; } 42 | 43 | // Return the length (in bytes) of the referenced data 44 | size_t size() const { return size_; } 45 | 46 | // Return true iff the length of the referenced data is zero 47 | bool empty() const { return size_ == 0; } 48 | 49 | // Return the ith byte in the referenced data. 50 | // REQUIRES: n < size() 51 | char operator[](size_t n) const { 52 | assert(n < size()); 53 | return data_[n]; 54 | } 55 | 56 | // Change this slice to refer to an empty array 57 | void clear() { 58 | data_ = ""; 59 | size_ = 0; 60 | } 61 | 62 | // Drop the first "n" bytes from this slice. 63 | void remove_prefix(size_t n) { 64 | assert(n <= size()); 65 | data_ += n; 66 | size_ -= n; 67 | } 68 | 69 | // Return a string that contains the copy of the referenced data. 70 | std::string ToString() const { return std::string(data_, size_); } 71 | 72 | // Three-way comparison. Returns value: 73 | // < 0 iff "*this" < "b", 74 | // == 0 iff "*this" == "b", 75 | // > 0 iff "*this" > "b" 76 | int compare(const Slice& b) const; 77 | 78 | // Return true iff "x" is a prefix of "*this" 79 | bool starts_with(const Slice& x) const { 80 | return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0)); 81 | } 82 | 83 | private: 84 | const char* data_; 85 | size_t size_; 86 | }; 87 | 88 | inline bool operator==(const Slice& x, const Slice& y) { 89 | return ((x.size() == y.size()) && 90 | (memcmp(x.data(), y.data(), x.size()) == 0)); 91 | } 92 | 93 | inline bool operator!=(const Slice& x, const Slice& y) { return !(x == y); } 94 | 95 | inline int Slice::compare(const Slice& b) const { 96 | const size_t min_len = (size_ < b.size_) ? size_ : b.size_; 97 | int r = memcmp(data_, b.data_, min_len); 98 | if (r == 0) { 99 | if (size_ < b.size_) 100 | r = -1; 101 | else if (size_ > b.size_) 102 | r = +1; 103 | } 104 | return r; 105 | } 106 | -------------------------------------------------------------------------------- /db/log_structured.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "segment.h" 14 | #include "util/lock.h" 15 | #include "util/util.h" 16 | 17 | 18 | class DB; 19 | class LogCleaner; 20 | 21 | enum FreeStatus { FS_Sufficient, FS_Trigger, FS_Insufficient }; 22 | 23 | class LogStructured { 24 | public: 25 | explicit LogStructured(std::string db_path, size_t log_size, DB *db, 26 | int num_workers, int num_cleaners); 27 | ~LogStructured(); 28 | 29 | LogSegment *NewSegment(bool hot); 30 | void FreezeSegment(LogSegment *old_segment); 31 | void SyncCleanerGarbageBytes(std::vector &tmp_garbage_bytes); 32 | LogSegment *GetSegment(int segment_id); 33 | int GetSegmentID(const char *addr); 34 | int GetSegmentCleanerID(const char *addr); 35 | 36 | void StartCleanStatistics(); 37 | double GetCompactionCPUUsage(); 38 | double GetCompactionThroughput(); 39 | void RecoverySegments(DB *db); 40 | void RecoveryInfo(DB *db); 41 | void RecoveryAll(DB *db); 42 | 43 | // char *get_pool_start() { return pool_start_; } 44 | 45 | private: 46 | const int num_workers_; 47 | const int num_cleaners_; 48 | char *pool_start_; 49 | const size_t total_log_size_; 50 | const int num_segments_; 51 | // SpinLock reserved_list_lock_; 52 | std::atomic stop_flag_{false}; 53 | // const int max_reserved_segments_; 54 | SpinLock free_list_lock_; 55 | 56 | std::vector all_segments_; 57 | std::vector log_cleaners_; 58 | std::queue free_segments_; 59 | // std::queue reserved_segments_; 60 | 61 | std::atomic num_free_segments_{0}; 62 | std::atomic alloc_counter_{0}; 63 | const int num_limit_free_segments_; 64 | volatile int clean_threshold_ = 10; 65 | 66 | volatile FreeStatus free_status_ = FS_Sufficient; 67 | std::atomic_flag FS_flag_{ATOMIC_FLAG_INIT}; 68 | 69 | std::atomic recovery_counter_{0}; 70 | std::mutex rec_mu_; 71 | std::condition_variable rec_cv_; 72 | 73 | // statistics 74 | #ifdef LOGGING 75 | std::atomic num_new_segment_{0}; 76 | std::atomic num_new_hot_{0}; 77 | std::atomic num_new_cold_{0}; 78 | #endif 79 | uint64_t start_clean_statistics_time_ = 0; 80 | 81 | void AddClosedSegment(LogSegment *segment); 82 | void LockFreeList() { free_list_lock_.lock(); } 83 | void UnlockFreeList() { free_list_lock_.unlock(); } 84 | void UpdateCleanThreshold(); 85 | 86 | // LogSegment *NewReservedSegment() { 87 | // std::lock_guard guard(reserved_list_lock_); 88 | // LogSegment *segment = nullptr; 89 | // if (unlikely(reserved_segments_.empty())) { 90 | // ERROR_EXIT("no reserved segment left"); 91 | // } else { 92 | // segment = reserved_segments_.front(); 93 | // reserved_segments_.pop(); 94 | // } 95 | // assert(segment); 96 | // return segment; 97 | // } 98 | 99 | // bool TryAddReservedSegment(LogSegment *segment) { 100 | // std::lock_guard guard(reserved_list_lock_); 101 | // bool ret = reserved_segments_.size() < max_reserved_segments_; 102 | // if (ret) { 103 | // reserved_segments_.push(segment); 104 | // } 105 | // return ret; 106 | // } 107 | 108 | friend class LogCleaner; 109 | 110 | DISALLOW_COPY_AND_ASSIGN(LogStructured); 111 | }; 112 | -------------------------------------------------------------------------------- /db/index/masstree/misc.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef MISC_HH 17 | #define MISC_HH 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "str.hh" 24 | #include "timestamp.hh" 25 | #include "clp.h" 26 | 27 | inline void xalarm(double d) { 28 | double ip, fp = modf(d, &ip); 29 | struct itimerval x; 30 | timerclear(&x.it_interval); 31 | x.it_value.tv_sec = (long) ip; 32 | x.it_value.tv_usec = (long) (fp * 1000000); 33 | setitimer(ITIMER_REAL, &x, 0); 34 | } 35 | 36 | inline void napms(int n) /* nap n milliseconds */ 37 | { 38 | int ret; 39 | struct timespec req, rem; 40 | 41 | req.tv_sec = n / 1000; 42 | req.tv_nsec = (n % 1000) * 1000000; 43 | ret = nanosleep(&req, &rem); 44 | if(ret == -1 && errno != EINTR){ 45 | perror("nanosleep"); 46 | exit(EXIT_FAILURE); 47 | } 48 | } 49 | 50 | struct quick_istr { 51 | char* bbuf_; 52 | char buf_[32]; 53 | quick_istr() { 54 | buf_[sizeof(buf_) - 1] = 0; 55 | set(0); 56 | } 57 | quick_istr(unsigned long x, int minlen = 0) { 58 | buf_[sizeof(buf_) - 1] = 0; 59 | set(x, minlen); 60 | } 61 | void set(unsigned long x, int minlen = 0) { 62 | bbuf_ = buf_ + sizeof(buf_) - 1; 63 | do { 64 | *--bbuf_ = (x % 10) + '0'; 65 | x /= 10; 66 | } while (--minlen > 0 || x != 0); 67 | } 68 | lcdf::Str string() const { 69 | return lcdf::Str(bbuf_, buf_ + sizeof(buf_) - 1); 70 | } 71 | const char* data() const { 72 | return bbuf_; 73 | } 74 | size_t length() const { 75 | return (buf_ + sizeof(buf_) - 1) - bbuf_; 76 | } 77 | const char* c_str() const { 78 | return bbuf_; 79 | } 80 | bool operator==(lcdf::Str s) const { 81 | return s.len == int(length()) && memcmp(s.s, data(), s.len) == 0; 82 | } 83 | bool operator!=(lcdf::Str s) const { 84 | return !(*this == s); 85 | } 86 | static void increment_from_end(char* ends) { 87 | while (true) { 88 | --ends; 89 | ++*ends; 90 | if (*ends <= '9') { 91 | return; 92 | } 93 | *ends = '0'; 94 | } 95 | } 96 | static void binary_increment_from_end(char* ends) { 97 | while (true) { 98 | --ends; 99 | *ends = (char) ((unsigned char) *ends + 1); 100 | if (*ends != 0) { 101 | return; 102 | } 103 | } 104 | } 105 | }; 106 | 107 | struct Clp_Parser; 108 | int clp_parse_suffixdouble(struct Clp_Parser *clp, const char *vstr, 109 | int complain, void *user_data); 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /db/index/masstree/masstree.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2014 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2014 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef MASSTREE_HH 17 | #define MASSTREE_HH 18 | #include "compiler.hh" 19 | #include "str.hh" 20 | #include "ksearch.hh" 21 | 22 | namespace Masstree { 23 | using lcdf::Str; 24 | using lcdf::String; 25 | 26 | class key_unparse_printable_string; 27 | template class value_print; 28 | 29 | template struct nodeparams { 30 | static constexpr int leaf_width = LW; 31 | static constexpr int internode_width = IW; 32 | static constexpr bool concurrent = true; 33 | static constexpr bool prefetch = true; 34 | static constexpr int bound_method = bound_method_binary; 35 | static constexpr int debug_level = 0; 36 | typedef uint64_t ikey_type; 37 | typedef uint32_t nodeversion_value_type; 38 | static constexpr bool need_phantom_epoch = true; 39 | typedef uint64_t phantom_epoch_type; 40 | static constexpr ssize_t print_max_indent_depth = 12; 41 | typedef key_unparse_printable_string key_unparse_type; 42 | }; 43 | 44 | template constexpr int nodeparams::leaf_width; 45 | template constexpr int nodeparams::internode_width; 46 | template constexpr int nodeparams::debug_level; 47 | 48 | template class node_base; 49 | template class leaf; 50 | template class internode; 51 | template class leafvalue; 52 | template class key; 53 | template class basic_table; 54 | template class unlocked_tcursor; 55 | template class tcursor; 56 | 57 | template 58 | class basic_table { 59 | public: 60 | typedef P parameters_type; 61 | typedef node_base

node_type; 62 | typedef leaf

leaf_type; 63 | typedef typename P::value_type value_type; 64 | typedef typename P::threadinfo_type threadinfo; 65 | typedef unlocked_tcursor

unlocked_cursor_type; 66 | typedef tcursor

cursor_type; 67 | 68 | inline basic_table(); 69 | 70 | void initialize(threadinfo& ti); 71 | void destroy(threadinfo& ti); 72 | 73 | inline node_type* root() const; 74 | inline node_type* fix_root(); 75 | 76 | bool get(Str key, value_type& value, threadinfo& ti) const; 77 | 78 | template 79 | int scan(Str firstkey, bool matchfirst, F& scanner, threadinfo& ti) const; 80 | template 81 | int rscan(Str firstkey, bool matchfirst, F& scanner, threadinfo& ti) const; 82 | 83 | inline void print(FILE* f = 0) const; 84 | 85 | private: 86 | node_type* root_; 87 | 88 | template 89 | int scan(H helper, Str firstkey, bool matchfirst, 90 | F& scanner, threadinfo& ti) const; 91 | 92 | friend class unlocked_tcursor

; 93 | friend class tcursor

; 94 | }; 95 | 96 | } // namespace Masstree 97 | #endif 98 | -------------------------------------------------------------------------------- /db/index/masstree/hashcode.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef CLICK_HASHCODE_HH 17 | #define CLICK_HASHCODE_HH 18 | #include 19 | #include 20 | #if HAVE_STD_HASH 21 | #include 22 | #endif 23 | 24 | // Notes about the hashcode template: On GCC 4.3.0, "template <>" is required 25 | // on the specializations or they aren't used. Just plain overloaded 26 | // functions aren't used. The specializations must be e.g. "const char &", 27 | // not "char", or GCC complains about a specialization not matching the 28 | // general template. The main template takes a const reference for two 29 | // reasons. First, providing both "hashcode_t hashcode(T)" and "hashcode_t 30 | // hashcode(const T&)" leads to ambiguity errors. Second, providing only 31 | // "hashcode_t hashcode(T)" is slower by looks like 8% when T is a String, 32 | // because of copy constructors; for types with more expensive non-default 33 | // copy constructors this would probably be worse. 34 | 35 | typedef size_t hashcode_t; ///< Typical type for a hashcode() value. 36 | 37 | template 38 | inline hashcode_t hashcode(T const &x) { 39 | return x.hashcode(); 40 | } 41 | 42 | template <> 43 | inline hashcode_t hashcode(char const &x) { 44 | return x; 45 | } 46 | 47 | template <> 48 | inline hashcode_t hashcode(signed char const &x) { 49 | return x; 50 | } 51 | 52 | template <> 53 | inline hashcode_t hashcode(unsigned char const &x) { 54 | return x; 55 | } 56 | 57 | template <> 58 | inline hashcode_t hashcode(short const &x) { 59 | return x; 60 | } 61 | 62 | template <> 63 | inline hashcode_t hashcode(unsigned short const &x) { 64 | return x; 65 | } 66 | 67 | template <> 68 | inline hashcode_t hashcode(int const &x) { 69 | return x; 70 | } 71 | 72 | template <> 73 | inline hashcode_t hashcode(unsigned const &x) { 74 | return x; 75 | } 76 | 77 | template <> 78 | inline hashcode_t hashcode(long const &x) { 79 | return x; 80 | } 81 | 82 | template <> 83 | inline hashcode_t hashcode(unsigned long const &x) { 84 | return x; 85 | } 86 | 87 | template <> 88 | inline hashcode_t hashcode(long long const &x) { 89 | return (x >> 32) ^ x; 90 | } 91 | 92 | template <> 93 | inline hashcode_t hashcode(unsigned long long const &x) { 94 | return (x >> 32) ^ x; 95 | } 96 | 97 | #if HAVE_INT64_TYPES && !HAVE_INT64_IS_LONG && !HAVE_INT64_IS_LONG_LONG 98 | template <> 99 | inline hashcode_t hashcode(int64_t const &x) { 100 | return (x >> 32) ^ x; 101 | } 102 | 103 | template <> 104 | inline hashcode_t hashcode(uint64_t const &x) { 105 | return (x >> 32) ^ x; 106 | } 107 | #endif 108 | 109 | template 110 | inline hashcode_t hashcode(T * const &x) { 111 | return reinterpret_cast(x) >> 3; 112 | } 113 | 114 | template 115 | inline typename T::key_const_reference hashkey(const T &x) { 116 | return x.hashkey(); 117 | } 118 | 119 | #endif 120 | -------------------------------------------------------------------------------- /benchmarks/other/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(BENCHMARK_HISTOGRAM 3 | ${CMAKE_SOURCE_DIR}/benchmarks/histogram.cpp 4 | ) 5 | 6 | # pmemkv 7 | add_executable(pmemkv_bench 8 | ${CMAKE_CURRENT_LIST_DIR}/pmemkv_bench.cpp 9 | ${BENCHMARK_HISTOGRAM} 10 | ) 11 | find_library(PMEMKV pmemkv) 12 | if (NOT PMEMKV_FOUND) 13 | include(FetchContent) 14 | FetchContent_Declare( 15 | pmemkv 16 | GIT_REPOSITORY https://github.com/pmem/pmemkv.git 17 | GIT_TAG a92abed550ece9c5c70b6be17db8e9cb19e328e4 18 | ) 19 | set(BUILD_TESTS OFF CACHE BOOL "Suppressing pmemkv' tests" FORCE) 20 | set(BUILD_JSON_CONFIG OFF CACHE BOOL "Suppressing pmemkv' helper library" FORCE) 21 | FetchContent_MakeAvailable(pmemkv) 22 | target_include_directories(pmemkv_bench PRIVATE ${pmemkv_SOURCE_DIR}/src) 23 | endif() 24 | 25 | 26 | target_link_libraries(pmemkv_bench benchmark::benchmark pmemkv) 27 | 28 | 29 | # viper 30 | # CONCURRENTQUEUE 31 | add_library(viper INTERFACE) 32 | target_include_directories(viper INTERFACE ${CMAKE_CURRENT_LIST_DIR}/viper/) 33 | target_sources(viper INTERFACE ${CMAKE_CURRENT_LIST_DIR}/viper/hotkeyset.cpp) 34 | target_compile_options(viper INTERFACE -march=native) 35 | 36 | option(VIPER_CONCURRENT_QUEUE_PROVIDED "Set ON if the concurrentqueue dependency is provided and should not be downloaded by Viper." OFF) 37 | if (NOT ${VIPER_CONCURRENT_QUEUE_PROVIDED}) 38 | FetchContent_Declare( 39 | concurrentqueue 40 | GIT_REPOSITORY https://github.com/cameron314/concurrentqueue.git 41 | GIT_TAG v1.0.3 42 | ) 43 | FetchContent_MakeAvailable(concurrentqueue) 44 | target_link_libraries(viper INTERFACE concurrentqueue ${PMEM} ${PMEMOBJ}) 45 | endif() 46 | 47 | add_executable(viper_bench 48 | ${CMAKE_CURRENT_LIST_DIR}/viper_bench.cpp 49 | ${BENCHMARK_HISTOGRAM} 50 | ) 51 | target_link_libraries(viper_bench viper benchmark::benchmark ${PMEM} ${PMEMOBJ}) 52 | 53 | 54 | # pmem_rocksdb 55 | add_executable(pmem_rocksdb_bench 56 | ${CMAKE_CURRENT_LIST_DIR}/pmem_rocksdb_bench.cpp 57 | ${BENCHMARK_HISTOGRAM} 58 | ) 59 | find_package(Git QUIET) 60 | set(PMEM_ROCKSDB_SRC_DIR ${CMAKE_CURRENT_LIST_DIR}/pmem-rocksdb) 61 | if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") 62 | # Update submodules as needed 63 | option(GIT_SUBMODULE "Check submodules during build" ON) 64 | if(GIT_SUBMODULE) 65 | message(STATUS "Submodule update") 66 | execute_process( 67 | COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive 68 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 69 | RESULT_VARIABLE PMEM_ROCKSDB_SUBMODULE_RESULT 70 | ) 71 | if(NOT PMEM_ROCKSDB_SUBMODULE_RESULT EQUAL "0") 72 | message(FATAL_ERROR "git submodule update --init --recursive failed with ${PMEM_ROCKSDB_SUBMODULE_RESULT}, please checkout submodules") 73 | endif() 74 | endif() 75 | endif() 76 | # build pmem-rocksdb::rocksdb 77 | add_custom_command(TARGET pmem_rocksdb_bench PRE_BUILD 78 | COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Release -DWITH_DCPMM=ON -DWITH_SNAPPY=ON -DFAIL_ON_WARNINGS=OFF -S . -B "build" 79 | COMMAND ${CMAKE_COMMAND} --build "build" --target rocksdb -j 80 | WORKING_DIRECTORY ${PMEM_ROCKSDB_SRC_DIR} 81 | ) 82 | 83 | target_compile_definitions(pmem_rocksdb_bench PRIVATE ON_DCPMM) 84 | target_include_directories(pmem_rocksdb_bench PRIVATE ${PMEM_ROCKSDB_SRC_DIR}/include) 85 | target_link_directories(pmem_rocksdb_bench PRIVATE ${PMEM_ROCKSDB_SRC_DIR}/build) 86 | target_link_libraries(pmem_rocksdb_bench benchmark::benchmark rocksdb ${PMEM} ${PMEMOBJ} snappy) 87 | 88 | 89 | # ChameleonDB 90 | aux_source_directory(${CMAKE_CURRENT_LIST_DIR}/ChameleonDB CHAMELEONDB_SOURCE_FILES) 91 | add_executable(chameleondb_bench 92 | ${CMAKE_CURRENT_LIST_DIR}/chameleondb_bench.cpp 93 | ${CHAMELEONDB_SOURCE_FILES} 94 | ${BENCHMARK_HISTOGRAM} 95 | ${UTIL_FILES} 96 | ) 97 | target_link_libraries(chameleondb_bench benchmark::benchmark ${PMEM} ${PMEMOBJ}) 98 | -------------------------------------------------------------------------------- /include/db.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "config.h" 10 | #include "slice.h" 11 | #include "db_common.h" 12 | #include "util/util.h" 13 | #include "util/thread_status.h" 14 | #include "util/index_arena.h" 15 | 16 | 17 | // index operations 18 | class Index { 19 | public: 20 | virtual ~Index(){}; 21 | virtual ValueType Get(const Slice &key) = 0; 22 | virtual void Put(const Slice &key, LogEntryHelper &le_helper) = 0; 23 | virtual void Delete(const Slice &key) = 0; 24 | virtual void Scan(const Slice &key, int cnt, std::vector &vec) { 25 | ERROR_EXIT("not supported in this class"); 26 | } 27 | virtual void GCMove(const Slice &key, LogEntryHelper &le_helper) = 0; 28 | virtual void PrefetchEntry(const Shortcut &sc) {} 29 | }; 30 | 31 | class LogSegment; 32 | class LogStructured; 33 | class HotKeySet; 34 | class DB { 35 | public: 36 | class Worker { 37 | public: 38 | explicit Worker(DB *db); 39 | ~Worker(); 40 | 41 | bool Get(const Slice &key, std::string *value); 42 | void Put(const Slice &key, const Slice &value); 43 | size_t Scan(const Slice &key, int cnt); 44 | bool Delete(const Slice &key); 45 | 46 | #ifdef LOG_BATCHING 47 | void FlushRemainAndUpdateIndex(); 48 | #endif 49 | 50 | private: 51 | int worker_id_; 52 | DB *db_; 53 | LogSegment *log_head_ = nullptr; 54 | #ifdef HOT_COLD_SEPARATE 55 | LogSegment *cold_log_head_ = nullptr; 56 | #endif 57 | 58 | // lazily update garbage bytes for cleaner, avoid too many FAAs 59 | std::vector tmp_cleaner_garbage_bytes_; 60 | 61 | ValueType MakeKVItem(const Slice &key, const Slice &value, bool hot); 62 | void UpdateIndex(const Slice &key, ValueType val, bool hot); 63 | void MarkGarbage(ValueType tagged_val); 64 | void FreezeSegment(LogSegment *segment); 65 | 66 | #ifdef LOG_BATCHING 67 | void BatchIndexInsert(int cnt, bool hot); 68 | 69 | std::queue> buffer_queue_; 70 | #ifdef HOT_COLD_SEPARATE 71 | std::queue> cold_buffer_queue_; 72 | #endif 73 | #endif 74 | 75 | DISALLOW_COPY_AND_ASSIGN(Worker); 76 | }; 77 | 78 | DB(std::string pool_path, size_t log_size, int num_workers, int num_cleaners); 79 | virtual ~DB(); 80 | 81 | std::unique_ptr GetWorker() { 82 | return std::make_unique(this); 83 | } 84 | 85 | // statistics 86 | void StartCleanStatistics(); 87 | double GetCompactionCPUUsage(); 88 | double GetCompactionThroughput(); 89 | 90 | // recovery 91 | void RecoverySegments(); 92 | void RecoveryInfo(); 93 | void RecoveryAll(); 94 | void NewIndexForRecoveryTest(); 95 | 96 | private: 97 | Index *index_; 98 | LogStructured *log_; 99 | const int num_workers_; 100 | const int num_cleaners_; 101 | std::atomic cur_num_workers_{0}; 102 | #ifdef HOT_COLD_SEPARATE 103 | HotKeySet *hot_key_set_ = nullptr; 104 | #endif 105 | ThreadStatus thread_status_; 106 | 107 | static constexpr int EPOCH_MAP_SIZE = 1024; 108 | std::array epoch_map_{}; 109 | 110 | // // index operations 111 | // virtual ValueType IndexGet(const Slice &key) = 0; 112 | // virtual void IndexPut(const Slice &key, LogEntryHelper &le_helper) = 0; 113 | // virtual void IndexDelete(const Slice &key) = 0; 114 | // virtual void IndexScan(const Slice &key, int cnt, 115 | // std::vector &vec) { 116 | // ERROR_EXIT("not supported"); 117 | // } 118 | // virtual void GCMove(const Slice &key, LogEntryHelper &le_helper) = 0; 119 | // virtual void PrefetchEntry(const Shortcut &sc) {} 120 | 121 | uint32_t GetKeyEpoch(uint64_t i_key) { 122 | size_t idx = i_key % EPOCH_MAP_SIZE; 123 | return epoch_map_[idx].fetch_add(1, std::memory_order_relaxed); 124 | } 125 | 126 | friend class LogCleaner; 127 | friend class HotKeySet; 128 | 129 | DISALLOW_COPY_AND_ASSIGN(DB); 130 | }; 131 | -------------------------------------------------------------------------------- /util/lock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "util/debug_helper.h" 4 | #include "util/timer.h" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | class SpinLock { 11 | public: 12 | SpinLock() : mutex(false) {} 13 | SpinLock(std::string name) : mutex(false), name(name) {} 14 | 15 | bool try_lock() { 16 | bool expect = false; 17 | return mutex.compare_exchange_strong( 18 | expect, true, std::memory_order_release, std::memory_order_relaxed); 19 | } 20 | 21 | void lock() { 22 | uint64_t startOfContention = 0; 23 | bool expect = false; 24 | while (!mutex.compare_exchange_weak(expect, true, std::memory_order_release, 25 | std::memory_order_relaxed)) { 26 | expect = false; 27 | debugLongWaitAndDeadlock(&startOfContention); 28 | } 29 | if (startOfContention != 0) { 30 | contendedTime += NowMicros() - startOfContention; 31 | ++contendedAcquisitions; 32 | } 33 | } 34 | 35 | void unlock() { mutex.store(0, std::memory_order_release); } 36 | 37 | void report() { 38 | LOG("spinlock %s: contendedAcquisitions %lu contendedTime %lu us", 39 | name.c_str(), contendedAcquisitions, contendedTime); 40 | } 41 | 42 | private: 43 | std::atomic_bool mutex; 44 | std::string name; 45 | uint64_t contendedAcquisitions = 0; 46 | uint64_t contendedTime = 0; 47 | 48 | void debugLongWaitAndDeadlock(uint64_t *startOfContention) { 49 | if (*startOfContention == 0) { 50 | *startOfContention = NowMicros(); 51 | } else { 52 | uint64_t now = NowMicros(); 53 | if (now >= *startOfContention + 1000000) { 54 | LOG("%s SpinLock locked for one second; deadlock?", name.c_str()); 55 | } 56 | } 57 | } 58 | }; 59 | 60 | // read write lock 61 | class ReadWriteLock { 62 | // the lowest bit is used for writer 63 | public: 64 | bool TryReadLock() { 65 | uint64_t old_val = lock_value.load(std::memory_order_acquire); 66 | while (true) { 67 | if (old_val & 1 || old_val > 1024) { 68 | break; 69 | } 70 | uint64_t new_val = old_val + 2; 71 | bool cas = lock_value.compare_exchange_weak(old_val, new_val, 72 | std::memory_order_acq_rel, 73 | std::memory_order_acquire); 74 | if (cas) { 75 | return true; 76 | } 77 | } 78 | return false; 79 | } 80 | 81 | void ReadLock() { 82 | while (!TryReadLock()) 83 | ; 84 | } 85 | 86 | void ReadUnlock() { 87 | uint64_t old_val = lock_value.load(std::memory_order_acquire); 88 | while (true) { 89 | if (old_val <= 1) { 90 | assert(old_val >= 2); 91 | return; 92 | } 93 | uint64_t new_val = old_val - 2; 94 | if (lock_value.compare_exchange_weak(old_val, new_val)) { 95 | break; 96 | } 97 | } 98 | } 99 | 100 | bool TryWriteLock() { 101 | uint64_t old_val = lock_value.load(std::memory_order_acquire); 102 | while (true) { 103 | if (old_val & 1) { 104 | return false; 105 | } 106 | uint64_t new_val = old_val | 1; 107 | bool cas = lock_value.compare_exchange_weak(old_val, new_val); 108 | if (cas) { 109 | break; 110 | } 111 | } 112 | // got write lock, waiting for readers 113 | while (lock_value.load(std::memory_order_acquire) != 1) { 114 | asm("nop"); 115 | } 116 | return true; 117 | } 118 | 119 | void WriteLock() { 120 | while (!TryWriteLock()) 121 | ; 122 | } 123 | 124 | void WriteUnlock() { 125 | assert(lock_value == 1); 126 | lock_value.store(0); 127 | } 128 | 129 | private: 130 | std::atomic_uint_fast64_t lock_value{0}; 131 | }; 132 | 133 | class ReadLockHelper { 134 | public: 135 | explicit ReadLockHelper(ReadWriteLock &rwlock) : rwlock_(rwlock) { 136 | rwlock_.ReadLock(); 137 | } 138 | 139 | ~ReadLockHelper() { rwlock_.ReadUnlock(); } 140 | 141 | private: 142 | ReadWriteLock &rwlock_; 143 | }; 144 | -------------------------------------------------------------------------------- /db/hotkeyset.cpp: -------------------------------------------------------------------------------- 1 | #include "hotkeyset.h" 2 | #include "db.h" 3 | 4 | #include 5 | #include 6 | 7 | HotKeySet::HotKeySet(DB *db) : db_(db) { 8 | current_set_ = nullptr; 9 | update_record_ = std::make_unique(db_->num_workers_); 10 | } 11 | 12 | HotKeySet::~HotKeySet() { 13 | need_record_ = false; 14 | stop_flag_.store(true); 15 | if (update_hot_set_thread_.joinable()) { 16 | update_hot_set_thread_.join(); 17 | } 18 | if (current_set_) { 19 | delete current_set_; 20 | } 21 | } 22 | 23 | void HotKeySet::Record(const Slice &key, int worker_id, bool hit) { 24 | UpdateKeyRecord &record = update_record_[worker_id]; 25 | if (need_record_) { 26 | uint64_t i_key = *(uint64_t *)key.data(); 27 | record.records.push_back(i_key); 28 | if (record.records.size() >= RECORD_BUFFER_SIZE) { 29 | std::lock_guard lock(record.lock); 30 | record.records_list.push_back(std::move(record.records)); 31 | record.records.reserve(RECORD_BUFFER_SIZE); 32 | } 33 | } else if (need_count_hit_) { 34 | record.hit_cnt += hit; 35 | ++record.total_cnt; 36 | if (record.total_cnt == RECORD_BATCH_CNT) { 37 | if (record.hit_cnt < RECORD_BATCH_CNT * 0.5) { 38 | // LOG("hit ratio = %.1lf%%", 100. * record.hit_cnt / record.total_cnt); 39 | if (!update_schedule_flag_.test_and_set()) { 40 | BeginUpdateHotKeySet(); 41 | } 42 | } 43 | record.hit_cnt = record.total_cnt = 0; 44 | } 45 | } 46 | } 47 | 48 | void HotKeySet::BeginUpdateHotKeySet() { 49 | need_record_ = true; 50 | need_count_hit_ = false; 51 | if (update_hot_set_thread_.joinable()) { 52 | update_hot_set_thread_.join(); 53 | } 54 | update_hot_set_thread_ = std::thread(&HotKeySet::UpdateHotSet, this); 55 | } 56 | 57 | bool HotKeySet::Exist(const Slice &key) { 58 | uint64_t i_key = *(uint64_t *)key.data(); 59 | return current_set_ && current_set_->find(i_key) != current_set_->end(); 60 | } 61 | 62 | void HotKeySet::UpdateHotSet() { 63 | // bind_core_on_numa(db_->num_workers_); 64 | 65 | std::unordered_map count; 66 | uint64_t update_cnt = 0; 67 | while (!stop_flag_.load(std::memory_order_relaxed)) { 68 | if (count.size() > HOT_NUM * 4 || update_cnt > HOT_NUM * 16) { 69 | break; 70 | } 71 | std::list> list; 72 | for (int i = 0; i < db_->num_workers_; i++) { 73 | std::lock_guard lock(update_record_[i].lock); 74 | list.splice(list.end(), update_record_[i].records_list); 75 | } 76 | for (auto it = list.begin(); it != list.end(); it++) { 77 | for (int i = 0; i < it->size(); i++) { 78 | ++count[it->at(i)]; 79 | } 80 | } 81 | update_cnt += list.size() * RECORD_BUFFER_SIZE; 82 | } 83 | 84 | need_record_ = false; 85 | 86 | std::priority_queue, 87 | std::greater> 88 | topK; 89 | int max_cnt = 0; 90 | for (auto it = count.begin(); it != count.end(); it++) { 91 | if (it->second > 1) { 92 | max_cnt = std::max(max_cnt, it->second); 93 | if (topK.size() < HOT_NUM) { 94 | topK.push({it->first, it->second}); 95 | } else if (it->second > topK.top().cnt) { 96 | topK.pop(); 97 | topK.push({it->first, it->second}); 98 | } 99 | } 100 | } 101 | 102 | std::unordered_set *old_set = current_set_; 103 | std::unordered_set *new_set = nullptr; 104 | if (!topK.empty()) { 105 | if (max_cnt > 3 * topK.top().cnt) { 106 | new_set = new std::unordered_set(); 107 | new_set->reserve(topK.size()); 108 | while (!topK.empty()) { 109 | new_set->insert(topK.top().key); 110 | topK.pop(); 111 | } 112 | LOG("new set size %lu", new_set->size()); 113 | } 114 | } 115 | 116 | current_set_ = new_set; 117 | db_->thread_status_.rcu_barrier(); 118 | for (int i = 0; i < db_->num_workers_; i++) { 119 | // need_record_ is false, other threads cannot operate on records 120 | update_record_[i].records.clear(); 121 | update_record_[i].records_list.clear(); 122 | update_record_[i].hit_cnt = update_record_[i].total_cnt = 0; 123 | } 124 | if (old_set) { 125 | delete old_set; 126 | } 127 | 128 | update_schedule_flag_.clear(std::memory_order_relaxed); 129 | need_count_hit_ = true; 130 | } 131 | -------------------------------------------------------------------------------- /benchmarks/trace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include "util/debug_helper.h" 6 | 7 | class Random { 8 | private: 9 | uint32_t seed_; 10 | 11 | public: 12 | explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { 13 | // Avoid bad seeds. 14 | if (seed_ == 0 || seed_ == 2147483647L) { 15 | seed_ = 1; 16 | } 17 | } 18 | uint32_t Next() { 19 | static const uint32_t M = 2147483647L; // 2^31-1 20 | static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 21 | // We are computing 22 | // seed_ = (seed_ * A) % M, where M = 2^31-1 23 | // 24 | // seed_ must not be zero or M, or else all subsequent computed values 25 | // will be zero or M respectively. For all other values, seed_ will end 26 | // up cycling through every number in [1,M-1] 27 | uint64_t product = seed_ * A; 28 | 29 | // Compute (product % M) using the fact that ((x << 31) % M) == x. 30 | seed_ = static_cast((product >> 31) + (product & M)); 31 | // The first reduction may overflow by 1 bit, so we may need to 32 | // repeat. mod == M is not possible; using > allows the faster 33 | // sign-bit-based test. 34 | if (seed_ > M) { 35 | seed_ -= M; 36 | } 37 | return seed_; 38 | } 39 | // Returns a uniformly distributed value in the range [0..n-1] 40 | // REQUIRES: n > 0 41 | uint32_t Uniform(int n) { return Next() % n; } 42 | 43 | // Randomly returns true ~"1/n" of the time, and false otherwise. 44 | // REQUIRES: n > 0 45 | bool OneIn(int n) { return (Next() % n) == 0; } 46 | 47 | // Skewed: pick "base" uniformly from range [0,max_log] and then 48 | // return "base" random bits. The effect is to pick a number in the 49 | // range [0,2^max_log-1] with exponential bias towards smaller numbers. 50 | uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); } 51 | }; 52 | 53 | enum OP_Type { 54 | OP_Read, OP_Update, OP_Insert, OP_Scan 55 | }; 56 | 57 | enum YCSB_Type { 58 | YCSB_A, 59 | YCSB_B, 60 | YCSB_C, 61 | YCSB_E, 62 | YCSB_W20, 63 | YCSB_W40, 64 | YCSB_W60, 65 | YCSB_W80, 66 | YCSB_W100, 67 | YCSB_Type_END, 68 | YCSB_W0 = YCSB_C 69 | }; 70 | 71 | static const char *ycsb_name[] = {"YCSB_A", "YCSB_B", "YCSB_C (YCSB_W0)", 72 | "YCSB_E", "YCSB_W20", "YCSB_W40", 73 | "YCSB_W60", "YCSB_W80", "YCSB_W100"}; 74 | 75 | static constexpr int YCSB_Put_Ratio[YCSB_Type_END] = {50, 5, 0, 5, 20, 76 | 40, 60, 80, 100}; 77 | 78 | inline OP_Type get_op_type(Random *rand, YCSB_Type type) { 79 | if (type == YCSB_W100) { 80 | return OP_Update; 81 | } else if (type == YCSB_C) { 82 | return OP_Read; 83 | } 84 | OP_Type t; 85 | uint32_t k = rand->Next() % 100; 86 | switch (type) { 87 | case YCSB_A: 88 | case YCSB_B: 89 | case YCSB_W20: 90 | case YCSB_W40: 91 | case YCSB_W60: 92 | case YCSB_W80: { 93 | if (k < YCSB_Put_Ratio[type]) { 94 | t = OP_Update; 95 | } else { 96 | t = OP_Read; 97 | } 98 | break; 99 | } 100 | case YCSB_E: { 101 | if (k < YCSB_Put_Ratio[type]) { 102 | t = OP_Update; 103 | } else { 104 | t = OP_Scan; 105 | } 106 | break; 107 | } 108 | default: 109 | ERROR_EXIT("not supported"); 110 | break; 111 | } 112 | 113 | return t; 114 | } 115 | 116 | /** 117 | * Facebook ETC 118 | * 40% 1~13 bytes 119 | * 55% 14~300 bytes 120 | * 5% > 300 bytes (up to 4000) 121 | * return kind 122 | */ 123 | enum class ETC_Kind { small, medium, large }; 124 | 125 | inline ETC_Kind ETC_get_kind(Random &rand) { 126 | int k = rand.Uniform(100); 127 | if (k < 40) { 128 | return ETC_Kind::small; 129 | } else if (k < 95) { 130 | return ETC_Kind::medium; 131 | } else { 132 | return ETC_Kind::large; 133 | } 134 | } 135 | 136 | size_t ETC_get_value_size(Random &rand, ETC_Kind kind) { 137 | if (kind == ETC_Kind::small) { 138 | return 8; 139 | } else if (kind == ETC_Kind::medium) { 140 | return 8 * (2 + rand.Uniform(36)); 141 | } else { 142 | return 8 * (38 + rand.Uniform(463)); 143 | } 144 | } 145 | 146 | static constexpr size_t ETC_LARGE_VALUE_BOUNDARY = 300; 147 | static constexpr double ETC_AVG_VALUE_SIZE = 148 | 0.4 * 8 + 0.55 * (16 + 296) / 2 + 0.05 * (304 + 4000) / 2; 149 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(PACMAN) 3 | set(CMAKE_CXX_STANDARD 17) 4 | 5 | # configurations 6 | 7 | # config.h 8 | 9 | set(USE_NUMA_NODE 0 CACHE STRING "To use which numa node") 10 | option(IDX_PERSISTENT "IDX_PERSISTENT" ON) 11 | option(LOG_PERSISTENT "LOG_PERSISTENT" ON) 12 | set(INDEX_TYPE 1 CACHE STRING "Index type of DB") # 1 CCEH 2 FastFair 3 Masstree 13 | if (INDEX_TYPE EQUAL 3) # MASSTREE 14 | set(IDX_PERSISTENT OFF) 15 | endif() 16 | 17 | option(LOG_BATCHING "LOG_BATCHING to simulate FlatStore's batching (if LOG_PERSISTENT)" ON) 18 | 19 | option(REDUCE_PM_ACCESS "REDUCE_PM_ACCESS" OFF) 20 | option(HOT_COLD_SEPARATE "HOT_COLD_SEPARATE" OFF) 21 | option(GC_SHORTCUT "GC_SHORTCUT" OFF) 22 | option(BATCH_COMPACTION "BATCH_COMPACTION" OFF) 23 | 24 | option(PACMAN "PACMAN" OFF) 25 | if (PACMAN) 26 | set(REDUCE_PM_ACCESS ON) 27 | set(HOT_COLD_SEPARATE ON) 28 | set(BATCH_COMPACTION ON) 29 | if (INDEX_TYPE EQUAL 1 AND NOT IDX_PERSISTENT) 30 | set(GC_SHORTCUT OFF) 31 | else() 32 | set(GC_SHORTCUT ON) 33 | endif() 34 | endif() 35 | 36 | message(STATUS "SET USE_NUMA_NODE ${USE_NUMA_NODE}") 37 | message(STATUS "SET INDEX_TYPE ${INDEX_TYPE}") 38 | message(STATUS "SET IDX_PERSISTENT ${IDX_PERSISTENT}") 39 | message(STATUS "SET LOG_PERSISTENT ${LOG_PERSISTENT}") 40 | message(STATUS "SET REDUCE_PM_ACCESS ${REDUCE_PM_ACCESS}") 41 | message(STATUS "SET HOT_COLD_SEPARATE ${HOT_COLD_SEPARATE}") 42 | message(STATUS "SET GC_SHORTCUT ${GC_SHORTCUT}") 43 | message(STATUS "SET BATCH_COMPACTION ${BATCH_COMPACTION}") 44 | 45 | configure_file( 46 | "${PROJECT_SOURCE_DIR}/include/config.h.in" 47 | "${PROJECT_BINARY_DIR}/config.h" 48 | ) 49 | 50 | # bench_config.h 51 | 52 | set(NUM_KEYS 200000000 CACHE STRING "number of keys") 53 | set(NUM_OPS_PER_THREAD 20000000 CACHE STRING "number of operations per thread") 54 | set(NUM_WARMUP_OPS_PER_THREAD 0 CACHE STRING "number of warmup operations per thread") 55 | set(VALUE_SIZE 48 CACHE STRING "value size") 56 | set(NUM_GC_THREADS 4 CACHE STRING "number of cleaner threads") 57 | set(WORKLOAD_TYPE YCSB CACHE STRING "workload type") # YCSB ETC 58 | set(YCSB_TYPE YCSB_A CACHE STRING "ycsb workload type if using YCSB") 59 | # YCSB_A, YCSB_B, YCSB_C, YCSB_E, YCSB_W0, YCSB_W20, YCSB_W40, YCSB_W60, YCSB_W80, YCSB_W100 60 | set(SKEW true CACHE STRING "Zipfian (skew) workload") # true false 61 | 62 | option(MEASURE_LATENCY "MEASURE_LATENCY" OFF) 63 | option(USE_ALL_CORES "use all cores in a numa node" OFF) 64 | option(TEST_LOAD "TEST_LOAD" OFF) 65 | 66 | configure_file( 67 | "${PROJECT_SOURCE_DIR}/benchmarks/bench_config.h.in" 68 | "${PROJECT_BINARY_DIR}/bench_config.h" 69 | ) 70 | 71 | ############ 72 | 73 | option(EVAL_OTHER_SYSTEMS "build and evaluate other systems" ON) 74 | if (EVAL_OTHER_SYSTEMS) 75 | message(STATUS "Will build and evaluate other systems") 76 | endif() 77 | 78 | option(LOGGING "LOGGING" OFF) 79 | 80 | if (CMAKE_BUILD_TYPE MATCHES Release) 81 | message(STATUS "Setting build type to 'Release'") 82 | else() 83 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb") 84 | set(LOGGING ON) 85 | endif() 86 | 87 | if (LOGGING) 88 | add_compile_definitions(LOGGING) 89 | message(STATUS "enable LOGGING") 90 | endif() 91 | 92 | ################################################################# 93 | 94 | find_library(PMEM pmem) 95 | find_library(PMEMOBJ pmemobj) 96 | 97 | include_directories( 98 | "${PROJECT_SOURCE_DIR}/include" 99 | "${PROJECT_SOURCE_DIR}" 100 | "${PROJECT_BINARY_DIR}" 101 | ) 102 | 103 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -march=native") 104 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread -g -ggdb -pthread -march=native") 105 | 106 | aux_source_directory(${PROJECT_SOURCE_DIR}/db DB_FILES) 107 | aux_source_directory(${PROJECT_SOURCE_DIR}/util UTIL_FILES) 108 | set(DB_FILES ${DB_FILES} ${UTIL_FILES}) 109 | 110 | # FastFair 111 | add_library(fastfair ${PROJECT_SOURCE_DIR}/db/index/FAST_FAIR/ff_btree.cpp) 112 | 113 | # CCEH 114 | add_library(cceh ${PROJECT_SOURCE_DIR}/db/index/CCEH/CCEH_MSB.cpp) 115 | 116 | # Masstree 117 | aux_source_directory(${PROJECT_SOURCE_DIR}/db/index/masstree MASSTREE_SOURCE_FILES) 118 | add_library(masstree ${MASSTREE_SOURCE_FILES}) 119 | # target_include_directories(masstree ${PROJECT_SOURCE_DIR}/db/index/masstree) 120 | target_compile_options(masstree PRIVATE 121 | $<$:-include ${PROJECT_SOURCE_DIR}/db/index/masstree/config.h>) 122 | 123 | add_subdirectory(benchmarks) 124 | add_subdirectory(example) 125 | -------------------------------------------------------------------------------- /benchmarks/other/viper/hotkeyset.cpp: -------------------------------------------------------------------------------- 1 | #include "hotkeyset.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | HotKeySet::HotKeySet(int num_workers) : num_workers_(num_workers) { 8 | current_set_ = nullptr; 9 | update_record_ = std::make_unique(num_workers_); 10 | } 11 | 12 | HotKeySet::~HotKeySet() { 13 | need_record_ = false; 14 | stop_flag_.store(true); 15 | if (update_hot_set_thread_.joinable()) { 16 | update_hot_set_thread_.join(); 17 | } 18 | if (current_set_) { 19 | delete current_set_; 20 | } 21 | } 22 | 23 | void HotKeySet::Record(const Slice &key, int worker_id, bool hit) { 24 | UpdateKeyRecord &record = update_record_[worker_id]; 25 | if (need_record_) { 26 | uint64_t i_key = *(uint64_t *)key.data(); 27 | record.records.push_back(i_key); 28 | if (record.records.size() >= RECORD_BUFFER_SIZE) { 29 | std::lock_guard lock(record.lock); 30 | record.records_list.push_back(std::move(record.records)); 31 | record.records.reserve(RECORD_BUFFER_SIZE); 32 | } 33 | } else if (need_count_hit_) { 34 | record.hit_cnt += hit; 35 | ++record.total_cnt; 36 | if (record.total_cnt == RECORD_BATCH_CNT) { 37 | if (record.hit_cnt < RECORD_BATCH_CNT * 0.5) { 38 | // LOG("hit ratio = %.1lf%%", 100. * record.hit_cnt / record.total_cnt); 39 | if (!update_schedule_flag_.test_and_set()) { 40 | BeginUpdateHotKeySet(); 41 | } 42 | } 43 | record.hit_cnt = record.total_cnt = 0; 44 | } 45 | } 46 | } 47 | 48 | void HotKeySet::BeginUpdateHotKeySet() { 49 | need_record_ = true; 50 | need_count_hit_ = false; 51 | if (update_hot_set_thread_.joinable()) { 52 | update_hot_set_thread_.join(); 53 | } 54 | update_hot_set_thread_ = std::thread(&HotKeySet::UpdateHotSet, this); 55 | } 56 | 57 | bool HotKeySet::Exist(const Slice &key) { 58 | uint64_t i_key = *(uint64_t *)key.data(); 59 | return current_set_ && current_set_->find(i_key) != current_set_->end(); 60 | } 61 | 62 | void HotKeySet::UpdateHotSet() { 63 | // bind_core_on_numa(num_workers_); 64 | LOG("begin update hot set"); 65 | 66 | std::unordered_map count; 67 | uint64_t update_cnt = 0; 68 | while (!stop_flag_.load(std::memory_order_relaxed)) { 69 | if (count.size() > HOT_NUM * 4 || update_cnt > HOT_NUM * 16) { 70 | break; 71 | } 72 | std::list> list; 73 | for (int i = 0; i < num_workers_; i++) { 74 | std::lock_guard lock(update_record_[i].lock); 75 | list.splice(list.end(), update_record_[i].records_list); 76 | } 77 | for (auto it = list.begin(); it != list.end(); it++) { 78 | for (int i = 0; i < it->size(); i++) { 79 | ++count[it->at(i)]; 80 | } 81 | } 82 | update_cnt += list.size() * RECORD_BUFFER_SIZE; 83 | } 84 | 85 | need_record_ = false; 86 | 87 | std::priority_queue, 88 | std::greater> 89 | topK; 90 | int max_cnt = 0; 91 | for (auto it = count.begin(); it != count.end(); it++) { 92 | if (it->second > 1) { 93 | max_cnt = std::max(max_cnt, it->second); 94 | if (topK.size() < HOT_NUM) { 95 | topK.push({it->first, it->second}); 96 | } else if (it->second > topK.top().cnt) { 97 | topK.pop(); 98 | topK.push({it->first, it->second}); 99 | } 100 | } 101 | } 102 | 103 | std::unordered_set *old_set = current_set_; 104 | std::unordered_set *new_set = nullptr; 105 | if (!topK.empty()) { 106 | if (max_cnt > 3 * topK.top().cnt) { 107 | new_set = new std::unordered_set(); 108 | new_set->reserve(topK.size()); 109 | while (!topK.empty()) { 110 | new_set->insert(topK.top().key); 111 | topK.pop(); 112 | } 113 | LOG("new set size %lu", new_set->size()); 114 | } 115 | } 116 | 117 | current_set_ = new_set; 118 | // no rcu here, simply sleep for a while 119 | usleep(10); 120 | for (int i = 0; i < num_workers_; i++) { 121 | // need_record_ is false, other threads cannot operate on records 122 | update_record_[i].records.clear(); 123 | update_record_[i].records_list.clear(); 124 | update_record_[i].hit_cnt = update_record_[i].total_cnt = 0; 125 | } 126 | if (old_set) { 127 | delete old_set; 128 | } 129 | 130 | update_schedule_flag_.clear(std::memory_order_relaxed); 131 | need_count_hit_ = true; 132 | } 133 | -------------------------------------------------------------------------------- /benchmarks/other/ChameleonDB/hotkeyset.cpp: -------------------------------------------------------------------------------- 1 | #include "hotkeyset.h" 2 | #include "chameleon_db.h" 3 | 4 | #include 5 | 6 | namespace CHAMELEONDB_NAMESPACE { 7 | 8 | HotKeySet::HotKeySet(ChameleonDB *db) : db_(db) { 9 | current_set_ = nullptr; 10 | update_record_ = std::make_unique(db_->num_workers_); 11 | } 12 | 13 | HotKeySet::~HotKeySet() { 14 | need_record_ = false; 15 | stop_flag_.store(true); 16 | if (update_hot_set_thread_.joinable()) { 17 | update_hot_set_thread_.join(); 18 | } 19 | if (current_set_) { 20 | delete current_set_; 21 | } 22 | } 23 | 24 | void HotKeySet::Record(const Slice &key, int worker_id, bool hit) { 25 | UpdateKeyRecord &record = update_record_[worker_id]; 26 | if (need_record_) { 27 | uint64_t i_key = *(uint64_t *)key.data(); 28 | record.records.push_back(i_key); 29 | if (record.records.size() >= RECORD_BUFFER_SIZE) { 30 | record.lock.lock(); 31 | record.records_list.push_back(std::move(record.records)); 32 | record.lock.unlock(); 33 | record.records.reserve(RECORD_BUFFER_SIZE); 34 | } 35 | } else if (need_count_hit_) { 36 | record.hit_cnt += hit; 37 | ++record.total_cnt; 38 | if (record.total_cnt == RECORD_BUFFER_SIZE) { 39 | if (record.hit_cnt < RECORD_BATCH_CNT * 0.5) { 40 | // LOG("hit ratio = %.1lf%%", 100. * record.hit_cnt / record.total_cnt); 41 | if (!update_schedule_flag_.test_and_set(std::memory_order_acq_rel)) { 42 | BeginUpdateHotKeySet(); 43 | } 44 | } 45 | record.hit_cnt = record.total_cnt = 0; 46 | } 47 | } 48 | } 49 | 50 | void HotKeySet::BeginUpdateHotKeySet() { 51 | need_record_ = true; 52 | need_count_hit_ = false; 53 | if (update_hot_set_thread_.joinable()) { 54 | update_hot_set_thread_.join(); 55 | } 56 | update_hot_set_thread_ = std::thread(&HotKeySet::UpdateHotSet, this); 57 | } 58 | 59 | bool HotKeySet::Exist(const Slice &key) { 60 | uint64_t i_key = *(uint64_t *)key.data(); 61 | return current_set_ && current_set_->find(i_key) != current_set_->end(); 62 | } 63 | 64 | void HotKeySet::UpdateHotSet() { 65 | // bind_core_on_numa(db_->num_workers_); 66 | 67 | std::unordered_map count; 68 | uint64_t update_cnt = 0; 69 | while (true) { 70 | if (count.size() > HOT_NUM * 4 || update_cnt > HOT_NUM * 16) { 71 | break; 72 | } 73 | std::list> list; 74 | for (int i = 0; i < db_->num_workers_; i++) { 75 | update_record_[i].lock.lock(); 76 | list.splice(list.end(), update_record_[i].records_list); 77 | update_record_[i].lock.unlock(); 78 | } 79 | for (auto it = list.begin(); it != list.end(); it++) { 80 | for (int i = 0; i < it->size(); i++) { 81 | ++count[it->at(i)]; 82 | } 83 | } 84 | update_cnt += list.size() * RECORD_BUFFER_SIZE; 85 | } 86 | 87 | need_record_ = false; 88 | 89 | std::priority_queue, 90 | std::greater> 91 | topK; 92 | int max_cnt = 0; 93 | for (auto it = count.begin(); it != count.end(); it++) { 94 | if (it->second > 1) { 95 | max_cnt = std::max(max_cnt, it->second); 96 | if (topK.size() < HOT_NUM) { 97 | topK.push({it->first, it->second}); 98 | } else if (it->second > topK.top().cnt) { 99 | topK.pop(); 100 | topK.push({it->first, it->second}); 101 | } 102 | } 103 | } 104 | 105 | std::unordered_set *old_set = current_set_; 106 | std::unordered_set *new_set = nullptr; 107 | if (!topK.empty()) { 108 | if (max_cnt > 3 * topK.top().cnt) { 109 | new_set = new std::unordered_set(); 110 | new_set->reserve(topK.size()); 111 | while (!topK.empty()) { 112 | new_set->insert(topK.top().key); 113 | topK.pop(); 114 | } 115 | LOG("new set size %lu", new_set->size()); 116 | } 117 | } 118 | 119 | current_set_ = new_set; 120 | db_->thread_status_.rcu_barrier(); 121 | for (int i = 0; i < db_->num_workers_; i++) { 122 | // need_record_ is false, other threads cannot operate on records 123 | update_record_[i].records.clear(); 124 | update_record_[i].records_list.clear(); 125 | update_record_[i].hit_cnt = update_record_[i].total_cnt = 0; 126 | } 127 | if (old_set) { 128 | delete old_set; 129 | } 130 | 131 | need_count_hit_ = true; 132 | update_schedule_flag_.clear(std::memory_order_relaxed); 133 | } 134 | 135 | } // namespace CHAMELEONDB_NAMESPACE 136 | -------------------------------------------------------------------------------- /db/index/masstree/btree_leaflink.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2014 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2014 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef BTREE_LEAFLINK_HH 17 | #define BTREE_LEAFLINK_HH 1 18 | #include "compiler.hh" 19 | 20 | /** @brief Operations to manage linked lists of B+tree leaves. 21 | 22 | N is the type of nodes. CONCURRENT is true to make operations 23 | concurrency-safe (e.g. compare-and-swaps, fences), false to leave them 24 | unsafe (only OK on single threaded code, but faster). */ 25 | template struct btree_leaflink {}; 26 | 27 | 28 | // This is the normal version of btree_leaflink; it uses lock-free linked list 29 | // operations. 30 | template struct btree_leaflink { 31 | private: 32 | static inline N *mark(N *n) { 33 | return reinterpret_cast(reinterpret_cast(n) + 1); 34 | } 35 | static inline bool is_marked(N *n) { 36 | return reinterpret_cast(n) & 1; 37 | } 38 | template 39 | static inline N *lock_next(N *n, SF spin_function) { 40 | while (1) { 41 | N *next = n->next_.ptr; 42 | if (!next 43 | || (!is_marked(next) 44 | && bool_cmpxchg(&n->next_.ptr, next, mark(next)))) 45 | return next; 46 | spin_function(); 47 | } 48 | } 49 | 50 | public: 51 | /** @brief Insert a new node @a nr at the right of node @a n. 52 | @pre @a n is locked. 53 | 54 | Concurrency correctness: Ensures that all "next" pointers are always 55 | valid, even if @a n's successor is deleted concurrently. */ 56 | static void link_split(N *n, N *nr) { 57 | link_split(n, nr, relax_fence_function()); 58 | } 59 | /** @overload */ 60 | template 61 | static void link_split(N *n, N *nr, SF spin_function) { 62 | nr->prev_ = n; 63 | N *next = lock_next(n, spin_function); 64 | nr->next_.ptr = next; 65 | if (next) 66 | next->prev_ = nr; 67 | fence(); 68 | n->next_.ptr = nr; 69 | } 70 | 71 | /** @brief Unlink @a n from the list. 72 | @pre @a n is locked. 73 | 74 | Concurrency correctness: Works even in the presence of concurrent 75 | splits and deletes. */ 76 | static void unlink(N *n) { 77 | unlink(n, relax_fence_function()); 78 | } 79 | /** @overload */ 80 | template 81 | static void unlink(N *n, SF spin_function) { 82 | // Assume node order A <-> N <-> B. Since n is locked, n cannot split; 83 | // next node will always be B or one of its successors. 84 | N *next = lock_next(n, spin_function); 85 | N *prev; 86 | while (1) { 87 | prev = n->prev_; 88 | if (bool_cmpxchg(&prev->next_.ptr, n, mark(n))) 89 | break; 90 | spin_function(); 91 | } 92 | if (next) 93 | next->prev_ = prev; 94 | fence(); 95 | prev->next_.ptr = next; 96 | } 97 | }; 98 | 99 | 100 | // This is the single-threaded-only fast version of btree_leaflink. 101 | template struct btree_leaflink { 102 | static void link_split(N *n, N *nr) { 103 | link_split(n, nr, do_nothing()); 104 | } 105 | template 106 | static void link_split(N *n, N *nr, SF) { 107 | nr->prev_ = n; 108 | nr->next_.ptr = n->next_.ptr; 109 | n->next_.ptr = nr; 110 | if (nr->next_.ptr) 111 | nr->next_.ptr->prev_ = nr; 112 | } 113 | static void unlink(N *n) { 114 | unlink(n, do_nothing()); 115 | } 116 | template 117 | static void unlink(N *n, SF) { 118 | if (n->next_.ptr) 119 | n->next_.ptr->prev_ = n->prev_; 120 | n->prev_->next_.ptr = n->next_.ptr; 121 | } 122 | }; 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /benchmarks/other/chameleondb_bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarks/bench_base.h" 2 | #include "ChameleonDB/chameleon_db.h" 3 | #include "util/index_arena.h" 4 | 5 | #include 6 | 7 | using namespace CHAMELEONDB_NAMESPACE; 8 | 9 | thread_local std::unique_ptr worker; 10 | 11 | class ChameleonDBFixture : public BaseFixture { 12 | public: 13 | enum FixtureArg { Arg_init_util }; 14 | ChameleonDBFixture() {} 15 | 16 | protected: 17 | ChameleonDB *db_ = nullptr; 18 | 19 | virtual void OpenDB(benchmark::State &st) override { 20 | if (st.thread_index() == 0) { 21 | if (db_ != nullptr) { 22 | ERROR_EXIT("barrier error"); 23 | } 24 | const int num_threads = st.threads(); 25 | const int init_util = st.range(Arg_init_util); 26 | #ifdef USE_ALL_CORES 27 | int num_gc_threads = NUM_ALL_CORES - num_threads; 28 | #else 29 | int num_gc_threads = NUM_GC_THREADS; 30 | #endif 31 | printf("threads of service / gc : %d / %d\n", num_threads, 32 | num_gc_threads); 33 | 34 | double avg_val_size = VALUE_SIZE; 35 | if constexpr (benchmark_workload == ETC) { 36 | avg_val_size = ETC_AVG_VALUE_SIZE; 37 | } 38 | double object_size = sizeof(KVItem) + sizeof(KeyType) + avg_val_size; 39 | uint64_t total_size = 0; 40 | if (init_util > 0) { 41 | double init_size = object_size * NUM_KEYS * SEGMENT_SIZE / 42 | LogSegment::SEGMENT_DATA_SIZE; 43 | total_size = init_size * 100. / init_util; 44 | total_size = 45 | (total_size + SEGMENT_SIZE - 1) / SEGMENT_SIZE * SEGMENT_SIZE; 46 | if (total_size < init_size + num_threads * 3 * SEGMENT_SIZE) { 47 | printf("Warning: not enough space for free segment per thread\n"); 48 | total_size = init_size + num_threads * 3 * SEGMENT_SIZE; 49 | } 50 | } else { 51 | // infinity log space <=> no gc 52 | YCSB_Type type = ycsb_type; 53 | if constexpr (benchmark_workload == ETC) { 54 | type = YCSB_A; 55 | } 56 | uint64_t total_put_ops = 57 | NUM_KEYS + (uint64_t)actual_num_ops_per_thread * 58 | (YCSB_Put_Ratio[type] + 10) / 100 * num_threads; 59 | total_size = 60 | total_put_ops * object_size + num_threads * SEGMENT_SIZE * 2; 61 | num_gc_threads = 0; 62 | } 63 | std::string db_path = std::string(PMEM_DIR) + "chameleondb"; 64 | std::filesystem::remove_all(db_path); 65 | std::filesystem::create_directory(db_path); 66 | 67 | db_ = new ChameleonDB(db_path, total_size, num_threads, num_gc_threads); 68 | } 69 | barrier.Wait(st.threads()); 70 | 71 | worker = db_->GetWorker(); 72 | } 73 | 74 | virtual void CloseDB(benchmark::State &st) override { 75 | worker.reset(); 76 | barrier.Wait(st.threads()); 77 | if (st.thread_index() == 0) { 78 | delete db_; 79 | db_ = nullptr; 80 | } 81 | } 82 | 83 | virtual bool Get(const Slice &key, std::string *value) override { 84 | return worker->Get(key, value); 85 | } 86 | 87 | virtual void Put(const Slice &key, const Slice &value) override { 88 | worker->Put(key, value); 89 | } 90 | 91 | virtual void PreSetUp(benchmark::State &st) override { 92 | // bind_core_on_numa(st.thread_index()); 93 | } 94 | }; 95 | 96 | BENCHMARK_DEFINE_F(ChameleonDBFixture, bench)(benchmark::State &st) { 97 | for (auto _ : st) { 98 | RunWorkload(st); 99 | } 100 | #ifdef LOG_BATCHING 101 | worker->FlushRemainAndUpdateIndex(); 102 | #endif 103 | assert(st.iterations() == 1); 104 | st.SetItemsProcessed(st.iterations() * actual_num_ops_per_thread); 105 | if (st.thread_index() == 0) { 106 | #ifdef MEASURE_LATENCY 107 | for (int i = 0; i < TypeEnumMax; i++) { 108 | HistogramData hist_data; 109 | for (int j = 1; j < st.threads(); j++) { 110 | latency_statistics[0].histograms[i].Merge( 111 | latency_statistics[j].histograms[i]); 112 | } 113 | latency_statistics[0].histograms[i].Data(&hist_data); 114 | std::string name = std::string("Lat_") + TypeStrings[i] + "_"; 115 | st.counters[name + "Avg"] = hist_data.average; 116 | st.counters[name + "P50"] = hist_data.median; 117 | st.counters[name + "P95"] = hist_data.percentile95; 118 | st.counters[name + "P99"] = hist_data.percentile99; 119 | } 120 | latency_statistics.reset(); 121 | #endif 122 | } 123 | } 124 | 125 | BENCHMARK_REGISTER_F(ChameleonDBFixture, bench) 126 | ->Arg(0) 127 | ->DenseRange(20, 90, 10) 128 | ->DenseThreadRange(1, 32, 1) 129 | ->Iterations(1) 130 | ->Unit(benchmark::kMicrosecond) 131 | ->UseRealTime(); 132 | 133 | BENCHMARK_MAIN(); 134 | -------------------------------------------------------------------------------- /benchmarks/other/ChameleonDB/log.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #include "chameleon_db.h" 11 | #include "log.h" 12 | #include "log_gc.h" 13 | 14 | namespace CHAMELEONDB_NAMESPACE { 15 | 16 | Log::Log(std::string db_path, size_t log_size, ChameleonDB *db, int num_workers, 17 | int num_cleaners) 18 | : num_workers_(num_workers), 19 | num_cleaners_(num_cleaners), 20 | total_log_size_(log_size), 21 | num_segments_(log_size / SEGMENT_SIZE), 22 | free_list_lock_("free_list"), 23 | num_limit_free_segments_(num_workers * (10 - num_cleaners)) { 24 | #ifdef LOG_PERSISTENT 25 | std::string log_pool_path = db_path + "/log_pool"; 26 | int log_pool_fd = open(log_pool_path.c_str(), O_CREAT | O_RDWR, 0644); 27 | if (log_pool_fd < 0) { 28 | ERROR_EXIT("open file failed"); 29 | } 30 | if (fallocate(log_pool_fd, 0, 0, total_log_size_) != 0) { 31 | ERROR_EXIT("fallocate file failed"); 32 | } 33 | pool_start_ = (char *)mmap(NULL, total_log_size_, PROT_READ | PROT_WRITE, 34 | MAP_SHARED, log_pool_fd, 0); 35 | close(log_pool_fd); 36 | #else 37 | pool_start_ = (char *)mmap(NULL, total_log_size_, PROT_READ | PROT_WRITE, 38 | MAP_SHARED | MAP_ANONYMOUS, -1, 0); 39 | #endif 40 | if (pool_start_ == nullptr || pool_start_ == MAP_FAILED) { 41 | ERROR_EXIT("mmap failed"); 42 | } 43 | LOG("Log: pool_start %p total segments: %d cleaners: %d\n", pool_start_, 44 | num_segments_, num_cleaners_); 45 | all_segments_.resize(num_segments_, nullptr); 46 | int i = 0; 47 | for (i = 0; i < num_segments_ - num_cleaners; i++) { 48 | all_segments_[i] = 49 | new LogSegment(pool_start_ + i * SEGMENT_SIZE, SEGMENT_SIZE); 50 | free_segments_.push(all_segments_[i]); 51 | } 52 | num_free_segments_ = num_segments_ - num_cleaners_; 53 | 54 | log_cleaners_.resize(num_cleaners_, nullptr); 55 | for (int j = 0; i < num_segments_; i++, j++) { 56 | all_segments_[i] = 57 | new LogSegment(pool_start_ + i * SEGMENT_SIZE, SEGMENT_SIZE); 58 | log_cleaners_[j] = new LogGCer(db, j, this, all_segments_[i]); 59 | } 60 | for (int j = 0; j < num_cleaners_; j++) { 61 | log_cleaners_[j]->StartGCThread(); 62 | } 63 | 64 | if (num_cleaners_ == 0) { 65 | stop_flag_.store(true, std::memory_order_release); 66 | } 67 | } 68 | 69 | Log::~Log() { 70 | stop_flag_.store(true, std::memory_order_release); 71 | for (int i = 0; i < num_cleaners_; i++) { 72 | delete log_cleaners_[i]; 73 | } 74 | 75 | LOG("num_newSegment %d new_hot %d new_cold %d", num_new_segment_.load(), 76 | num_new_hot_.load(), num_new_cold_.load()); 77 | munmap(pool_start_, total_log_size_); 78 | free_list_lock_.report(); 79 | } 80 | 81 | LogSegment *Log::NewSegment(bool hot) { 82 | LogSegment *ret = nullptr; 83 | // uint64_t waiting_time = 0; 84 | // TIMER_START(waiting_time); 85 | while (true) { 86 | if (num_free_segments_ > 0) { 87 | std::lock_guard guard(free_list_lock_); 88 | if (!free_segments_.empty()) { 89 | ret = free_segments_.front(); 90 | free_segments_.pop(); 91 | --num_free_segments_; 92 | } 93 | } else { 94 | if (num_cleaners_ == 0) { 95 | ERROR_EXIT("No free segments and no cleaners"); 96 | } 97 | usleep(1); 98 | } 99 | if (ret) { 100 | break; 101 | } 102 | } 103 | // TIMER_STOP(waiting_time); 104 | 105 | COUNTER_ADD_LOGGING(num_new_segment_, 1); 106 | ret->StartUsing(hot); 107 | if (hot) { 108 | COUNTER_ADD_LOGGING(num_new_hot_, 1); 109 | } else { 110 | COUNTER_ADD_LOGGING(num_new_cold_, 1); 111 | } 112 | 113 | return ret; 114 | } 115 | 116 | void Log::FreezeSegment(LogSegment *old_segment) { 117 | if (old_segment && num_cleaners_ > 0) { 118 | old_segment->Close(); 119 | AddClosedSegment(old_segment); 120 | } 121 | } 122 | 123 | LogSegment *Log::GetSegment(int segment_id) { 124 | assert(segment_id < num_segments_); 125 | return all_segments_[segment_id]; 126 | } 127 | 128 | int Log::GetSegmentID(const char *addr) { 129 | assert(addr >= pool_start_); 130 | int seg_id = (addr - pool_start_) / SEGMENT_SIZE; 131 | assert(seg_id < num_segments_); 132 | return seg_id; 133 | } 134 | 135 | int Log::GetSegmentCleanerID(const char *addr) { 136 | return GetSegmentID(addr) % num_cleaners_; 137 | } 138 | 139 | void Log::AddClosedSegment(LogSegment *segment) { 140 | int cleaner_id = GetSegmentCleanerID(segment->get_segment_start()); 141 | log_cleaners_[cleaner_id]->AddClosedSegment(segment); 142 | } 143 | 144 | 145 | } // namespace CHAMELEONDB_NAMESPACE 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pacman 2 | 3 | This repository contains the artifact for the ATC'22 paper: "Pacman: An Efficient Compaction Approach for Log-Structured Key-Value Store on Persistent Memory". 4 | 5 | For ATC'22 Artifact Evaluation, please check out the Artifact README from the atc22ae hotcrp system. 6 | 7 | ## Directory structure 8 | 9 | ``` 10 | pacman 11 | |---- benchmarks # code of the benchmarks and other systems (Viper, ChameleonDB, PMem-RocksDB, and pmemkv) 12 | |---- include # public headers of pacman db 13 | |---- db # source code of pacman with PM-based log-structured KV store 14 | |---- util # utilities used for programming 15 | |---- example # a simple example of pacman db 16 | |---- scripts # main evaluation scripts 17 | ``` 18 | 19 | ## Functionality of code 20 | * `include/db.h, db/db.cpp`: DB interface 21 | * `db/index/*`: indexes used by DB, modified to utilize `shortcut` 22 | 23 | * `db/segment.h`: structure and basic functionality of log segment 24 | * `db/log_structured.[h|cpp]`: management of log segments in DB 25 | * `db/loc_cleaner.[h|cpp]`: main functionality of segment compaction 26 | 27 | 28 | The major parts of Pacman's techniques are enabled by macro `GC_SHORTCUT` (Sec 3.1), `REDUCE_PM_ACCESS` (Sec 3.2), `BATCH_COMPACTION` (Sec 3.3), and `HOT_COLD_SEPARATE` (Sec 3.4). Check out the source code for more details. 29 | 30 | ## Dependencies 31 | 32 | * libraries 33 | - [PMDK](https://github.com/pmem/pmdk) (libpmem & libpmemobj) 34 | - jemalloc 35 | 36 | * utilities for experiments: 37 | - numactl 38 | - cpupower 39 | 40 | If you're going to evaluate other systems (e.g., [PMem-RocksDB](https://github.com/pmem/pmem-rocksdb), [pmemkv](https://github.com/pmem/pmemkv)), please install their dependencies. You don't need to install other systems manually since the cmake in this repository will fetch and build them automatically. 41 | 42 | ## System configuration 43 | 44 | ```shell 45 | 1. set Optane DCPMM to AppDirect mode 46 | $ sudo ipmctl create -f -goal persistentmemorytype=appdirect 47 | 48 | 2. configure PM device to fsdax mode 49 | $ sudo ndctl create-namespace -m fsdax 50 | 51 | 3. create and mount a file system with DAX 52 | $ sudo mkfs.ext4 -f /dev/pmem0 53 | $ sudo mount -o dax /dev/pmem0 /mnt/pmem0 54 | ``` 55 | 56 | 57 | ## Building the benchmarks 58 | 59 | **We recommend that you use the scripts from the [scripts](scripts) directly. The scripts will build and run experiments automatically.** 60 | 61 | Quick start: 62 | 63 | ```shell 64 | $ mkdir -p build && cd build 65 | $ cmake -DCMAKE_BUILD_TYPE=Release -DINDEX_TYPE=1 -IDX_PERSISTENT=OFF -DPACMAN=ON .. 66 | $ cmake --build . 67 | ``` 68 | 69 | This will build benchmark for FlatStore-H with Pacman. You may need to set the path to pmem (`PMEM_DIR`) in the `include/config.h.in`. 70 | 71 | [PMem-RocksDB](https://github.com/starkwj/pmem-rocksdb) and [pmemkv](https://github.com/pmem/pmemkv) will be downloaded and built by default. If you don't want to evaluate with them, pass `-DEVAL_OTHER_SYSTEMS=OFF` to cmake. 72 | 73 | 74 | To facilitate evaluation, we use cmake to set most configurations for both systems and benchmarks. Please check out the [CMakeLists.txt](CMakeLists.txt) for more details. 75 | 76 | 77 | ## Running experiments 78 | 79 | 80 | ``` 81 | scripts 82 | |---- kick_the_tires.sh # script of "Hello world"-sized examples 83 | |---- eval_utilization.sh # impact of capacity utilization (Sec 4.2.1) 84 | |---- eval_ycsb.sh # YCSB benchmarks (Sec 4.2.2) 85 | |---- eval_threads.sh # thread scalability (Sec 4.2.3) 86 | |---- eval_value_size.sh # different value sizes (Sec 4.2.3) 87 | |---- eval_write_ratio.sh # different write ratios (Sec 4.2.3) 88 | |---- eval_key_space.sh # different key spaces (Sec 4.2.3) 89 | |---- eval_breakdown.sh # breakdown of techniques in Pacman (Sec 4.3) 90 | |---- eval_etc.sh # Facebook ETC workload (Sec 4.4) 91 | |---- eval_recovery.sh # recovery test (Sec 4.5) 92 | |---- eval_case.sh # for evaluating other cases with customized configurations 93 | ``` 94 | 95 | ### Usage 96 | 97 | * `kick_the_tires.sh` and `eval_case.sh`: Execute directly with no arguments. 98 | 99 | * Usage for other scripts is: 100 | 101 | ```shell 102 | $ ./script_name.sh # for eval_breakdown.sh and eval_recovery.sh 103 | $ ./script_name.sh # for the others 104 | ``` 105 | 106 | Please check out the `help` information at the top of the scripts for details. 107 | 108 | These scripts should be run inside the `scripts` directory. Each experiment will be run for several minutes. Evaluation on PMem-RocksDB and pmemkv will take much longer (PMem-RocksDB takes about 20 minutes on the Facebook ETC workload and may even abort probably). 109 | -------------------------------------------------------------------------------- /db/index/masstree/circular_int.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2014 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2014 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef KVDB_CIRCULAR_INT_HH 17 | #define KVDB_CIRCULAR_INT_HH 1 18 | #include "compiler.hh" 19 | 20 | template 21 | class circular_int { 22 | public: 23 | typedef typename mass::make_unsigned::type value_type; 24 | typedef typename mass::make_signed::type difference_type; 25 | 26 | circular_int() 27 | : v_() { 28 | } 29 | circular_int(T x) 30 | : v_(x) { 31 | } 32 | 33 | value_type value() const { 34 | return v_; 35 | } 36 | 37 | circular_int &operator++() { 38 | ++v_; 39 | return *this; 40 | } 41 | circular_int operator++(int) { 42 | ++v_; 43 | return circular_int(v_ - 1); 44 | } 45 | circular_int &operator--() { 46 | --v_; 47 | return *this; 48 | } 49 | circular_int operator--(int) { 50 | --v_; 51 | return circular_int(v_ + 1); 52 | } 53 | circular_int &operator+=(unsigned x) { 54 | v_ += x; 55 | return *this; 56 | } 57 | circular_int &operator+=(int x) { 58 | v_ += x; 59 | return *this; 60 | } 61 | circular_int &operator-=(unsigned x) { 62 | v_ -= x; 63 | return *this; 64 | } 65 | circular_int &operator-=(int x) { 66 | v_ -= x; 67 | return *this; 68 | } 69 | 70 | circular_int cmpxchg(circular_int expected, circular_int desired) { 71 | return ::cmpxchg(&v_, expected.v_, desired.v_); 72 | } 73 | circular_int cmpxchg(T expected, T desired) { 74 | return ::cmpxchg(&v_, expected, desired); 75 | } 76 | 77 | typedef value_type (circular_int::*unspecified_bool_type)() const; 78 | operator unspecified_bool_type() const { 79 | return v_ != 0 ? &circular_int::value : 0; 80 | } 81 | bool operator!() const { 82 | return v_ == 0; 83 | } 84 | 85 | circular_int operator+(unsigned x) const { 86 | return circular_int(v_ + x); 87 | } 88 | circular_int operator+(int x) const { 89 | return circular_int(v_ + x); 90 | } 91 | circular_int next_nonzero() const { 92 | value_type v = v_ + 1; 93 | return circular_int(v + !v); 94 | } 95 | static value_type next_nonzero(value_type x) { 96 | ++x; 97 | return x + !x; 98 | } 99 | circular_int operator-(unsigned x) const { 100 | return circular_int(v_ - x); 101 | } 102 | circular_int operator-(int x) const { 103 | return circular_int(v_ - x); 104 | } 105 | difference_type operator-(circular_int x) const { 106 | return v_ - x.v_; 107 | } 108 | 109 | bool operator==(circular_int x) const { 110 | return v_ == x.v_; 111 | } 112 | bool operator!=(circular_int x) const { 113 | return !(*this == x); 114 | } 115 | static bool less(value_type a, value_type b) { 116 | return difference_type(a - b) < 0; 117 | } 118 | static bool less_equal(value_type a, value_type b) { 119 | return difference_type(a - b) <= 0; 120 | } 121 | bool operator<(circular_int x) const { 122 | return less(v_, x.v_); 123 | } 124 | bool operator<=(circular_int x) const { 125 | return !less(x.v_, v_); 126 | } 127 | bool operator>=(circular_int x) const { 128 | return !less(v_, x.v_); 129 | } 130 | bool operator>(circular_int x) const { 131 | return less(x.v_, v_); 132 | } 133 | 134 | private: 135 | value_type v_; 136 | }; 137 | 138 | typedef circular_int kvepoch_t; 139 | 140 | template 141 | inline circular_int cmpxchg(circular_int *object, circular_int expected, 142 | circular_int desired) { 143 | return object->cmpxchg(expected, desired); 144 | } 145 | 146 | #endif 147 | -------------------------------------------------------------------------------- /db/index/CCEH/CCEH.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pair.h" 4 | #include "util/index_arena.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace CCEH_NAMESPACE { 13 | 14 | #define f_seed 0xc70697UL 15 | #define s_seed 0xc70697UL 16 | 17 | // CAS 18 | #define CAS(_p, _u, _v) \ 19 | __atomic_compare_exchange_n(_p, _u, _v, false, __ATOMIC_ACQ_REL, \ 20 | __ATOMIC_ACQUIRE) 21 | 22 | constexpr size_t kSegmentBits = 8; 23 | constexpr size_t kMask = (1 << kSegmentBits) - 1; 24 | constexpr size_t kShift = kSegmentBits; 25 | constexpr size_t kSegmentSize = (1 << kSegmentBits) * 16 * 4; 26 | constexpr size_t kNumPairPerCacheLine = 4; 27 | constexpr size_t kNumCacheLine = 8; 28 | 29 | struct alignas(64) Segment { 30 | static const size_t kNumSlot = kSegmentSize / sizeof(Pair); 31 | 32 | Segment(void) : local_depth{0} {} 33 | 34 | Segment(size_t depth) : local_depth{depth} {} 35 | 36 | ~Segment(void) { 37 | is_deleted = true; 38 | } 39 | 40 | bool suspend(void) { 41 | int64_t val; 42 | do { 43 | val = sema; 44 | if (val < 0) 45 | return false; 46 | } while (!CAS(&sema, &val, -1)); 47 | 48 | int64_t wait = 0 - val - 1; 49 | while (val && sema != wait) { 50 | asm("nop"); 51 | } 52 | return true; 53 | } 54 | 55 | bool lock(void) { 56 | int64_t val = sema; 57 | while (val > -1) { 58 | if (CAS(&sema, &val, val + 1)) 59 | return true; 60 | val = sema; 61 | } 62 | return false; 63 | } 64 | 65 | void unlock(void) { 66 | int64_t val = sema; 67 | while (!CAS(&sema, &val, val - 1)) { 68 | val = sema; 69 | } 70 | } 71 | 72 | void *operator new(size_t size) { 73 | return g_index_allocator->Alloc(size); 74 | } 75 | 76 | void *operator new[](size_t size) { 77 | return g_index_allocator->Alloc(size); 78 | } 79 | 80 | void operator delete(void *ptr, size_t size) { 81 | g_index_allocator->Free(ptr, size); 82 | } 83 | void operator delete[](void *ptr, size_t size) { 84 | g_index_allocator->Free(ptr, size); 85 | } 86 | 87 | int Insert(Key_t &, Value_t, size_t, size_t); 88 | bool Insert4split(Key_t &, Value_t, size_t); 89 | bool Put(Key_t &, Value_t, size_t); 90 | Segment **Split(void); 91 | size_t numElem(void); 92 | 93 | Pair _[kNumSlot]; 94 | int64_t sema = 0; 95 | size_t local_depth; 96 | bool is_deleted = false; 97 | }; 98 | 99 | struct alignas(64) Directory { 100 | static const size_t kDefaultDepth = 10; 101 | Segment **_; 102 | int64_t sema = 0; 103 | size_t capacity; 104 | size_t depth; 105 | 106 | bool suspend(void) { 107 | int64_t val; 108 | do { 109 | val = sema; 110 | if (val < 0) 111 | return false; 112 | } while (!CAS(&sema, &val, -1)); 113 | 114 | int64_t wait = 0 - val - 1; 115 | while (val && sema != wait) { 116 | asm("nop"); 117 | } 118 | return true; 119 | } 120 | 121 | bool lock(void) { 122 | int64_t val = sema; 123 | while (val > -1) { 124 | if (CAS(&sema, &val, val + 1)) 125 | return true; 126 | val = sema; 127 | } 128 | return false; 129 | } 130 | 131 | void unlock(void) { 132 | int64_t val = sema; 133 | while (!CAS(&sema, &val, val - 1)) { 134 | val = sema; 135 | } 136 | } 137 | 138 | Directory(void) { 139 | depth = kDefaultDepth; 140 | capacity = pow(2, depth); 141 | _ = new Segment *[capacity]; 142 | memset(_, 0, sizeof(Segment *) * capacity); 143 | sema = 0; 144 | } 145 | 146 | Directory(size_t _depth) { 147 | depth = _depth; 148 | capacity = pow(2, depth); 149 | _ = new Segment *[capacity]; 150 | memset(_, 0, sizeof(Segment *) * capacity); 151 | sema = 0; 152 | } 153 | 154 | ~Directory(void) { 155 | delete[] _; 156 | } 157 | 158 | void *operator new(size_t size) { 159 | return g_index_allocator->Alloc(size); 160 | } 161 | 162 | void operator delete(void *ptr, size_t size) { 163 | g_index_allocator->Free(ptr, size); 164 | } 165 | 166 | void SanityCheck(void *); 167 | void LSBUpdate(int, int, int, int, Segment **); 168 | }; 169 | 170 | class CCEH { 171 | public: 172 | CCEH(void); 173 | CCEH(size_t); 174 | ~CCEH(void); 175 | void Insert(const Key_t &, LogEntryHelper &le_helper); 176 | // bool InsertOnly(const Key_t &, Value_t); 177 | bool TryGCUpdate(const Key_t &, LogEntryHelper &le_helper); 178 | bool Delete(const Key_t &); 179 | Value_t Get(const Key_t &); 180 | Value_t FindAnyway(const Key_t &); 181 | double Utilization(void); 182 | size_t Capacity(void); 183 | bool Recovery(void); 184 | 185 | // private: 186 | Directory *dir; 187 | }; 188 | 189 | } // namespace CCEH_NAMESPACE 190 | -------------------------------------------------------------------------------- /include/db_common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "config.h" 7 | #include "slice.h" 8 | #include "util/debug_helper.h" 9 | #include "util/persist.h" 10 | #include "util/index_arena.h" 11 | 12 | // key, value in Index 13 | using KeyType = uint64_t; 14 | using ValueType = uint64_t; 15 | static constexpr ValueType INVALID_VALUE = 0; 16 | 17 | static constexpr uint64_t SEGMENT_SIZE = 4ul << 20; 18 | 19 | // shortcut 20 | class __attribute__((__packed__)) Shortcut { 21 | public: 22 | Shortcut() : addr_(0), pos_(0) {} 23 | Shortcut(char *node_addr, int pos) { 24 | assert(((uint64_t)node_addr & 0xf) == 0); 25 | #ifdef IDX_PERSISTENT 26 | addr_ = g_index_allocator->ToPoolOffset(node_addr); 27 | #else 28 | addr_ = (uint64_t)node_addr >> 4; 29 | #endif 30 | pos_ = pos; 31 | } 32 | 33 | char *GetNodeAddr() const { 34 | #ifdef IDX_PERSISTENT 35 | return addr_ ? (char *)g_index_allocator->ToDirectPointer(addr_) : nullptr; 36 | #else 37 | return (char *)((uint64_t)addr_ << 4); 38 | #endif 39 | } 40 | 41 | int GetPos() const { return pos_; } 42 | 43 | bool None() { return addr_ == 0; } 44 | 45 | private: 46 | uint64_t addr_ : 43; 47 | uint8_t pos_ : 5; 48 | }; 49 | static_assert(sizeof(Shortcut) == 6); 50 | 51 | 52 | // KVItem: log entry 53 | struct KVItem { 54 | #ifdef REDUCE_PM_ACCESS 55 | uint16_t key_size; 56 | #else 57 | uint16_t key_size : 15; 58 | volatile uint16_t is_garbage : 1; 59 | #endif 60 | uint16_t val_size; 61 | // uint32_t checksum = 0; 62 | // uint64_t epoch; 63 | uint32_t epoch; 64 | // uint64_t magic = 0xDEADBEAF; 65 | uint8_t kv_pair[0]; 66 | 67 | KVItem() { 68 | memset(this, 0, sizeof(KVItem)); 69 | } 70 | 71 | KVItem(const Slice &_key, const Slice &_val, uint32_t _epoch) 72 | : key_size(_key.size()), val_size(_val.size()), epoch(_epoch) { 73 | #ifndef REDUCE_PM_ACCESS 74 | is_garbage = false; 75 | #endif 76 | assert(val_size >= 8); 77 | memcpy(kv_pair, _key.data(), key_size); 78 | memcpy(kv_pair + key_size, _val.data(), val_size); 79 | // CalcChecksum(); 80 | } 81 | 82 | Slice GetKey() { 83 | return Slice((char *)kv_pair, key_size); 84 | } 85 | 86 | Slice GetValue() { 87 | return Slice((char *)kv_pair + key_size, val_size); 88 | } 89 | 90 | void GetValue(std::string &value) { 91 | value.assign((char *)kv_pair + key_size, val_size); 92 | } 93 | 94 | // void CalcChecksum() { 95 | // // checksum = 0; 96 | // // uint64_t *p = (uint64_t *)this; 97 | // // uint64_t x_sum = 0; 98 | // // size_t sz = sizeof(KVItem) + key_size + val_size; 99 | // // for (size_t i = 0; i < sz / sizeof(uint64_t); i++) { 100 | // // x_sum ^= p[i]; 101 | // // } 102 | // // checksum = x_sum ^ (x_sum >> 32); 103 | // } 104 | 105 | // bool VerifyChecksum() { 106 | // // uint64_t *p = (uint64_t *)this; 107 | // // uint64_t x_sum = 0; 108 | // // size_t sz = sizeof(KVItem) + key_size + val_size; 109 | // // for (size_t i = 0; i < sz / sizeof(uint64_t); i++) { 110 | // // x_sum ^= p[i]; 111 | // // } 112 | // // uint32_t res = x_sum ^ (x_sum >> 32); 113 | // // return (res == 0); 114 | // return true; 115 | // } 116 | 117 | void Flush() { 118 | #ifdef LOG_PERSISTENT 119 | clwb_fence((char *)this, sizeof(KVItem) + key_size + val_size); 120 | #endif 121 | } 122 | }; 123 | 124 | 125 | // TaggedPointer 126 | struct TaggedPointer { 127 | union { 128 | uint64_t data = 0; 129 | struct { 130 | uint64_t addr : 48; 131 | uint64_t size : 16; 132 | }; 133 | }; 134 | 135 | TaggedPointer(char *ptr, uint64_t sz) { 136 | #ifdef REDUCE_PM_ACCESS 137 | addr = (uint64_t)ptr; 138 | size = sz <= 0xFFFF ? sz : 0; 139 | #else 140 | data = (uint64_t)ptr; 141 | #endif 142 | } 143 | TaggedPointer(ValueType val) : data(val) {} 144 | 145 | operator ValueType() { 146 | return (ValueType)data; 147 | } 148 | KVItem *GetKVItem() { 149 | return (KVItem *)(uint64_t)addr; 150 | } 151 | char *GetAddr() { 152 | return (char *)(uint64_t)addr; 153 | } 154 | }; 155 | static_assert(sizeof(TaggedPointer) == sizeof(ValueType)); 156 | 157 | 158 | struct LogEntryHelper { 159 | ValueType new_val = INVALID_VALUE; 160 | ValueType old_val = INVALID_VALUE; // in and out for gc put, out for db put 161 | Shortcut shortcut; 162 | char *index_entry = nullptr; 163 | bool fast_path = false; 164 | 165 | LogEntryHelper(ValueType _new_val) : new_val(_new_val) {} 166 | }; 167 | 168 | struct ValidItem { 169 | Slice key; 170 | ValueType old_val; 171 | ValueType new_val; 172 | uint32_t size; 173 | Shortcut shortcut; 174 | 175 | ValidItem(const Slice &key, ValueType old_val, ValueType new_val, 176 | uint32_t size, Shortcut shortcut) 177 | : key(key), 178 | old_val(old_val), 179 | new_val(new_val), 180 | size(size), 181 | shortcut(shortcut) {} 182 | 183 | // bool operator<(const ValidItem &other) const { 184 | // return shortcut < other.shortcut; 185 | // } 186 | }; 187 | -------------------------------------------------------------------------------- /db/index/masstree/memdebug.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2016 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2016 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef MEMDEBUG_HH 17 | #define MEMDEBUG_HH 1 18 | #include "mtcounters.hh" 19 | #include 20 | 21 | struct memdebug { 22 | static inline void* make(void* ptr, size_t sz, memtag tag); 23 | static inline void set_landmark(void* ptr, const char* file, int line); 24 | static inline void* check_free(void* ptr, size_t sz, memtag tag); 25 | static inline void check_rcu(void* ptr, size_t sz, memtag tag); 26 | static inline void* check_free_after_rcu(void* ptr, memtag tag); 27 | static inline bool check_use(const void* ptr, memtag allowed); 28 | static inline void assert_use(const void* ptr, memtag allowed); 29 | 30 | #if HAVE_MEMDEBUG 31 | private: 32 | enum { 33 | magic_value = 389612313 /* = 0x17390319 */, 34 | magic_free_value = 2015593488 /* = 0x78238410 */ 35 | }; 36 | int magic; 37 | memtag tag; 38 | size_t size; 39 | int after_rcu; 40 | int line; 41 | const char* file; 42 | 43 | static void free_checks(const memdebug* m, size_t size, memtag tag, 44 | int after_rcu, const char* op) { 45 | if (m->magic != magic_value 46 | || m->tag != tag 47 | || (!after_rcu && m->size != size) 48 | || m->after_rcu != after_rcu) 49 | hard_free_checks(m, size, tag, after_rcu, op); 50 | } 51 | void landmark(char* buf, size_t size) const; 52 | static void hard_free_checks(const memdebug* m, size_t size, memtag tag, 53 | int after_rcu, const char* op); 54 | static void hard_assert_use(const void* ptr, memtag allowed); 55 | #endif 56 | }; 57 | 58 | enum { 59 | #if HAVE_MEMDEBUG 60 | memdebug_size = sizeof(memdebug) 61 | #else 62 | memdebug_size = 0 63 | #endif 64 | }; 65 | 66 | inline void* memdebug::make(void* ptr, size_t sz, memtag tag) { 67 | #if HAVE_MEMDEBUG 68 | if (ptr) { 69 | memdebug* m = reinterpret_cast(ptr); 70 | m->magic = magic_value; 71 | m->tag = tag; 72 | m->size = sz; 73 | m->after_rcu = 0; 74 | m->line = 0; 75 | m->file = 0; 76 | return m + 1; 77 | } else 78 | return ptr; 79 | #else 80 | (void) sz, (void) tag; 81 | return ptr; 82 | #endif 83 | } 84 | 85 | inline void memdebug::set_landmark(void* ptr, const char* file, int line) { 86 | #if HAVE_MEMDEBUG 87 | if (ptr) { 88 | memdebug* m = reinterpret_cast(ptr) - 1; 89 | m->file = file; 90 | m->line = line; 91 | } 92 | #else 93 | (void) ptr, (void) file, (void) line; 94 | #endif 95 | } 96 | 97 | inline void* memdebug::check_free(void* ptr, size_t sz, memtag tag) { 98 | #if HAVE_MEMDEBUG 99 | memdebug* m = reinterpret_cast(ptr) - 1; 100 | free_checks(m, sz, tag, false, "deallocate"); 101 | m->magic = magic_free_value; 102 | return m; 103 | #else 104 | (void) sz, (void) tag; 105 | return ptr; 106 | #endif 107 | } 108 | 109 | inline void memdebug::check_rcu(void* ptr, size_t sz, memtag tag) { 110 | #if HAVE_MEMDEBUG 111 | memdebug* m = reinterpret_cast(ptr) - 1; 112 | free_checks(m, sz, tag, false, "deallocate_rcu"); 113 | m->after_rcu = 1; 114 | #else 115 | (void) ptr, (void) sz, (void) tag; 116 | #endif 117 | } 118 | 119 | inline void* memdebug::check_free_after_rcu(void* ptr, memtag tag) { 120 | #if HAVE_MEMDEBUG 121 | memdebug* m = reinterpret_cast(ptr) - 1; 122 | free_checks(m, 0, tag, true, "free_after_rcu"); 123 | m->magic = magic_free_value; 124 | return m; 125 | #else 126 | (void) tag; 127 | return ptr; 128 | #endif 129 | } 130 | 131 | inline bool memdebug::check_use(const void* ptr, memtag allowed) { 132 | #if HAVE_MEMDEBUG 133 | const memdebug* m = reinterpret_cast(ptr) - 1; 134 | return m->magic == magic_value && (allowed == 0 || (m->tag ^ allowed) <= memtag_pool_mask); 135 | #else 136 | (void) ptr, (void) allowed; 137 | return true; 138 | #endif 139 | } 140 | 141 | inline void memdebug::assert_use(const void* ptr, memtag allowed) { 142 | #if HAVE_MEMDEBUG 143 | if (!check_use(ptr, allowed)) 144 | hard_assert_use(ptr, allowed); 145 | #else 146 | (void) ptr, (void) allowed; 147 | #endif 148 | } 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /benchmarks/benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include "bench_base.h" 2 | #include "db.h" 3 | #include "db_common.h" 4 | 5 | #include 6 | 7 | thread_local std::unique_ptr worker; 8 | 9 | class DBFixture : public BaseFixture { 10 | public: 11 | enum FixtureArg { Arg_init_util }; 12 | 13 | DBFixture() {} 14 | 15 | protected: 16 | DB *db_ = nullptr; 17 | 18 | virtual void OpenDB(benchmark::State &st) override { 19 | if (st.thread_index() == 0) { 20 | if (db_ != nullptr) { 21 | ERROR_EXIT("barrier error"); 22 | } 23 | const int num_threads = st.threads(); 24 | const int init_util = st.range(Arg_init_util); 25 | #ifdef USE_ALL_CORES 26 | int num_gc_threads = NUM_ALL_CORES - num_threads; 27 | #else 28 | int num_gc_threads = NUM_GC_THREADS; 29 | #endif 30 | 31 | double avg_val_size = VALUE_SIZE; 32 | if constexpr (benchmark_workload == ETC) { 33 | avg_val_size = ETC_AVG_VALUE_SIZE; 34 | } 35 | double object_size = sizeof(KVItem) + sizeof(KeyType) + avg_val_size; 36 | uint64_t total_size = 0; 37 | if (init_util > 0) { 38 | // double init_size = object_size * NUM_KEYS * SEGMENT_SIZE / 39 | // LogSegment::SEGMENT_DATA_SIZE; 40 | double init_size = object_size * NUM_KEYS; 41 | total_size = init_size * 100. / init_util; 42 | total_size = 43 | (total_size + SEGMENT_SIZE - 1) / SEGMENT_SIZE * SEGMENT_SIZE; 44 | if (total_size < init_size + num_threads * 3 * SEGMENT_SIZE) { 45 | printf("Warning: not enough space for free segment per thread\n"); 46 | total_size = init_size + num_threads * 3 * SEGMENT_SIZE; 47 | } 48 | } else { 49 | // infinity log space <=> no gc 50 | YCSB_Type type = ycsb_type; 51 | if constexpr (benchmark_workload == ETC) { 52 | type = YCSB_A; 53 | } 54 | uint64_t total_put_ops = 55 | NUM_KEYS + (uint64_t)actual_num_ops_per_thread * 56 | (YCSB_Put_Ratio[type] + 10) / 100 * num_threads; 57 | total_size = 58 | total_put_ops * object_size + num_threads * SEGMENT_SIZE * 2; 59 | num_gc_threads = 0; 60 | } 61 | printf( 62 | "Init capacity utilization %d%% threads of service / gc : %d / " 63 | "%d\n", 64 | init_util, num_threads, num_gc_threads); 65 | std::string db_path = std::string(PMEM_DIR) + "log_kvs"; 66 | std::filesystem::remove_all(db_path); 67 | std::filesystem::create_directory(db_path); 68 | 69 | db_ = new DB(db_path, total_size, num_threads, num_gc_threads); 70 | } 71 | 72 | barrier.Wait(st.threads()); 73 | worker = db_->GetWorker(); 74 | } 75 | 76 | virtual void CloseDB(benchmark::State &st) override { 77 | worker.reset(); 78 | barrier.Wait(st.threads()); 79 | if (st.thread_index() == 0) { 80 | delete db_; 81 | db_ = nullptr; 82 | } 83 | } 84 | 85 | virtual bool Get(const Slice &key, std::string *value) override { 86 | return worker->Get(key, value); 87 | } 88 | 89 | virtual void Put(const Slice &key, const Slice &value) override { 90 | worker->Put(key, value); 91 | } 92 | 93 | virtual size_t Scan(const Slice &key, int scan_length) override { 94 | return worker->Scan(key, scan_length); 95 | } 96 | 97 | virtual void PreSetUp(benchmark::State &st) override { 98 | // bind_core_on_numa(st.thread_index()); 99 | } 100 | }; 101 | 102 | 103 | BENCHMARK_DEFINE_F(DBFixture, bench)(benchmark::State &st) { 104 | if (st.thread_index() == 0) { 105 | db_->StartCleanStatistics(); 106 | } 107 | #ifdef LOG_BATCHING 108 | worker->FlushRemainAndUpdateIndex(); 109 | #endif 110 | for (auto _ : st) { 111 | RunWorkload(st); 112 | } 113 | assert(st.iterations() == 1); 114 | st.SetItemsProcessed(st.iterations() * actual_num_ops_per_thread); 115 | if (st.thread_index() == 0) { 116 | double compaction_cpu_usage = db_->GetCompactionCPUUsage(); 117 | double compaction_tp = db_->GetCompactionThroughput(); 118 | st.counters["CompactionCPUUsage"] = compaction_cpu_usage; 119 | st.counters["CompactionThroughput"] = 120 | benchmark::Counter(compaction_tp, benchmark::Counter::kDefaults, 121 | benchmark::Counter::kIs1024); 122 | #ifdef MEASURE_LATENCY 123 | for (int i = 0; i < TypeEnumMax; i++) { 124 | HistogramData hist_data; 125 | for (int j = 1; j < st.threads(); j++) { 126 | latency_statistics[0].histograms[i].Merge( 127 | latency_statistics[j].histograms[i]); 128 | } 129 | latency_statistics[0].histograms[i].Data(&hist_data); 130 | std::string name = std::string("Lat_") + TypeStrings[i] + "_"; 131 | st.counters[name + "Avg"] = hist_data.average; 132 | st.counters[name + "P50"] = hist_data.median; 133 | st.counters[name + "P95"] = hist_data.percentile95; 134 | st.counters[name + "P99"] = hist_data.percentile99; 135 | } 136 | latency_statistics.reset(); 137 | #endif 138 | } 139 | } 140 | 141 | 142 | BENCHMARK_REGISTER_F(DBFixture, bench) 143 | ->Arg(0) 144 | ->DenseRange(50, 90, 10) 145 | ->DenseThreadRange(1, 32, 1) 146 | ->Iterations(1) 147 | ->Unit(benchmark::kMicrosecond) 148 | ->UseRealTime(); 149 | 150 | BENCHMARK_MAIN(); 151 | -------------------------------------------------------------------------------- /db/index/masstree/str.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2013 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2013 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef STR_HH 17 | #define STR_HH 18 | #include "string_base.hh" 19 | #include 20 | #include 21 | namespace lcdf { 22 | 23 | struct Str : public String_base { 24 | typedef Str substring_type; 25 | typedef Str argument_type; 26 | 27 | const char *s; 28 | int len; 29 | 30 | Str() 31 | : s(0), len(0) { 32 | } 33 | template 34 | Str(const String_base& x) 35 | : s(x.data()), len(x.length()) { 36 | } 37 | Str(const char* s_) 38 | : s(s_), len(strlen(s_)) { 39 | } 40 | Str(const char* s_, int len_) 41 | : s(s_), len(len_) { 42 | } 43 | Str(const unsigned char* s_, int len_) 44 | : s(reinterpret_cast(s_)), len(len_) { 45 | } 46 | Str(const char *first, const char *last) 47 | : s(first), len(last - first) { 48 | precondition(first <= last); 49 | } 50 | Str(const unsigned char *first, const unsigned char *last) 51 | : s(reinterpret_cast(first)), len(last - first) { 52 | precondition(first <= last); 53 | } 54 | Str(const std::string& str) 55 | : s(str.data()), len(str.length()) { 56 | } 57 | Str(const uninitialized_type &unused) { 58 | (void) unused; 59 | } 60 | 61 | static const Str maxkey; 62 | 63 | void assign() { 64 | s = 0; 65 | len = 0; 66 | } 67 | template 68 | void assign(const String_base &x) { 69 | s = x.data(); 70 | len = x.length(); 71 | } 72 | void assign(const char *s_) { 73 | s = s_; 74 | len = strlen(s_); 75 | } 76 | void assign(const char *s_, int len_) { 77 | s = s_; 78 | len = len_; 79 | } 80 | 81 | const char *data() const { 82 | return s; 83 | } 84 | int length() const { 85 | return len; 86 | } 87 | char* mutable_data() { 88 | return const_cast(s); 89 | } 90 | 91 | Str prefix(int lenx) const { 92 | return Str(s, lenx < len ? lenx : len); 93 | } 94 | Str substring(const char *first, const char *last) const { 95 | if (first <= last && first >= s && last <= s + len) 96 | return Str(first, last); 97 | else 98 | return Str(); 99 | } 100 | Str substring(const unsigned char *first, const unsigned char *last) const { 101 | const unsigned char *u = reinterpret_cast(s); 102 | if (first <= last && first >= u && last <= u + len) 103 | return Str(first, last); 104 | else 105 | return Str(); 106 | } 107 | Str fast_substring(const char *first, const char *last) const { 108 | assert(begin() <= first && first <= last && last <= end()); 109 | return Str(first, last); 110 | } 111 | Str fast_substring(const unsigned char *first, const unsigned char *last) const { 112 | assert(ubegin() <= first && first <= last && last <= uend()); 113 | return Str(first, last); 114 | } 115 | Str ltrim() const { 116 | return String_generic::ltrim(*this); 117 | } 118 | Str rtrim() const { 119 | return String_generic::rtrim(*this); 120 | } 121 | Str trim() const { 122 | return String_generic::trim(*this); 123 | } 124 | 125 | long to_i() const { // XXX does not handle negative 126 | long x = 0; 127 | int p; 128 | for (p = 0; p < len && s[p] >= '0' && s[p] <= '9'; ++p) 129 | x = (x * 10) + s[p] - '0'; 130 | return p == len && p != 0 ? x : -1; 131 | } 132 | 133 | static Str snprintf(char *buf, size_t size, const char *fmt, ...) { 134 | va_list val; 135 | va_start(val, fmt); 136 | int n = vsnprintf(buf, size, fmt, val); 137 | va_end(val); 138 | return Str(buf, n); 139 | } 140 | }; 141 | 142 | struct inline_string : public String_base { 143 | int len; 144 | char s[0]; 145 | 146 | const char *data() const { 147 | return s; 148 | } 149 | int length() const { 150 | return len; 151 | } 152 | 153 | size_t size() const { 154 | return sizeof(inline_string) + len; 155 | } 156 | static size_t size(int len) { 157 | return sizeof(inline_string) + len; 158 | } 159 | }; 160 | 161 | } // namespace lcdf 162 | 163 | LCDF_MAKE_STRING_HASH(lcdf::Str) 164 | LCDF_MAKE_STRING_HASH(lcdf::inline_string) 165 | #endif 166 | -------------------------------------------------------------------------------- /benchmarks/other/pmem_rocksdb_bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarks/bench_base.h" 2 | #include "config.h" 3 | 4 | #include "rocksdb/db.h" 5 | #include "rocksdb/slice.h" 6 | #include "rocksdb/options.h" 7 | #include "rocksdb/table.h" 8 | #include "rocksdb/filter_policy.h" 9 | 10 | #include 11 | #include 12 | 13 | using namespace ROCKSDB_NAMESPACE; 14 | 15 | class PMEMRocksDBFixture : public BaseFixture { 16 | protected: 17 | virtual void OpenDB(benchmark::State &st) override { 18 | if (st.thread_index() == 0) { 19 | failed_cnt_ = 0; 20 | if (db != nullptr) { 21 | ERROR_EXIT("barrier error"); 22 | } 23 | 24 | Options options; 25 | #ifdef ON_DCPMM 26 | options.env = rocksdb::NewDCPMMEnv(rocksdb::DCPMMEnvOptions()); 27 | options.dcpmm_kvs_enable = false; 28 | options.dcpmm_compress_value = false; 29 | options.allow_mmap_reads = true; 30 | options.allow_mmap_writes = true; 31 | options.allow_dcpmm_writes = true; 32 | options.recycle_dcpmm_sst = true; 33 | printf("enable ON_DCPMM\n"); 34 | #endif 35 | rocksdb::BlockBasedTableOptions bbto; 36 | bbto.cache_index_and_filter_blocks_for_mmap_read = true; 37 | bbto.block_size = 256; 38 | bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); 39 | options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbto)); 40 | const int num_threads = st.threads(); 41 | #ifdef USE_ALL_CORES 42 | int num_gc_threads = NUM_ALL_CORES - num_threads; 43 | #else 44 | int num_gc_threads = NUM_GC_THREADS; 45 | #endif 46 | printf("threads of service / gc : %d / %d\n", num_threads, 47 | num_gc_threads); 48 | options.max_background_jobs = num_gc_threads; 49 | options.allow_concurrent_memtable_write = true; 50 | options.enable_pipelined_write = true; 51 | options.compression = kNoCompression; 52 | options.create_if_missing = true; 53 | options.error_if_exists = true; 54 | // options.statistics = rocksdb::CreateDBStatistics(); 55 | 56 | std::string db_path = std::string(PMEM_DIR) + "pmem_rocksdb"; 57 | std::filesystem::remove_all(db_path); 58 | std::filesystem::create_directory(db_path); 59 | Status s = DB::Open(options, db_path, &db); 60 | if (!s.ok()) { 61 | ERROR_EXIT("open failed: %s\n", s.ToString().c_str()); 62 | } 63 | } 64 | } 65 | 66 | virtual void CloseDB(benchmark::State &st) override { 67 | if (st.thread_index() == 0) { 68 | if (failed_cnt_ > 0) { 69 | printf("put failed count: %ld\n", failed_cnt_.load()); 70 | } 71 | delete db; 72 | db = nullptr; 73 | } 74 | } 75 | 76 | virtual bool Get(const ::Slice &key, std::string *value) override { 77 | Status s = 78 | db->Get(ReadOptions(), rocksdb::Slice(key.data(), key.size()), value); 79 | return s.ok(); 80 | } 81 | 82 | virtual void Put(const ::Slice &key, const ::Slice &value) override { 83 | Status s = db->Put(WriteOptions(), rocksdb::Slice(key.data(), key.size()), 84 | rocksdb::Slice(value.data(), value.size())); 85 | if (!s.ok()) { 86 | ERROR_EXIT("put failed"); 87 | // failed_cnt_++; 88 | } 89 | } 90 | 91 | // virtual void PreSetUp(benchmark::State &st) override { 92 | // // don't bind core, otherwise background threads also bind 93 | // // bind_core_on_numa(st.thread_index()); 94 | // } 95 | 96 | // virtual void PreTearDown(benchmark::State &st) override { 97 | // if (st.thread_index() == 0) { 98 | // std::string stat; 99 | // db->GetProperty("rocksdb.stats", &stat); 100 | // std::cout << stat << std::endl; 101 | 102 | // if (db->GetProperty("rocksdb.options-statistics", &stat)) { 103 | // std::cout << "options-statistics" << std::endl; 104 | // std::cout << stat << std::endl; 105 | // } else { 106 | // std::cout << "statistics is nullptr" << std::endl; 107 | // } 108 | // } 109 | // } 110 | 111 | private: 112 | DB *db = nullptr; 113 | std::atomic_int_fast64_t failed_cnt_{0}; 114 | }; 115 | 116 | 117 | BENCHMARK_DEFINE_F(PMEMRocksDBFixture, bench)(benchmark::State &st) { 118 | for (auto _ : st) { 119 | RunWorkload(st); 120 | } 121 | assert(st.iterations() == 1); 122 | st.SetItemsProcessed(st.iterations() * actual_num_ops_per_thread); 123 | #ifdef MEASURE_LATENCY 124 | if (st.thread_index() == 0) { 125 | for (int i = 0; i < TypeEnumMax; i++) { 126 | ::HistogramData hist_data; 127 | for (int j = 1; j < st.threads(); j++) { 128 | latency_statistics[0].histograms[i].Merge( 129 | latency_statistics[j].histograms[i]); 130 | } 131 | latency_statistics[0].histograms[i].Data(&hist_data); 132 | std::string name = std::string("Lat_") + TypeStrings[i] + "_"; 133 | st.counters[name + "Avg"] = hist_data.average; 134 | st.counters[name + "P50"] = hist_data.median; 135 | st.counters[name + "P95"] = hist_data.percentile95; 136 | st.counters[name + "P99"] = hist_data.percentile99; 137 | } 138 | latency_statistics.reset(); 139 | } 140 | #endif 141 | } 142 | 143 | BENCHMARK_REGISTER_F(PMEMRocksDBFixture, bench) 144 | ->DenseThreadRange(1, 32, 1) 145 | ->Iterations(1) 146 | ->Unit(benchmark::kMicrosecond) 147 | ->UseRealTime(); 148 | 149 | BENCHMARK_MAIN(); 150 | -------------------------------------------------------------------------------- /benchmarks/histogram.cpp: -------------------------------------------------------------------------------- 1 | #include "histogram.h" 2 | 3 | namespace HistogramInternal { 4 | 5 | HistogramBucketMapper::HistogramBucketMapper() { 6 | // If you change this, you also need to change 7 | // size of array buckets_ in HistogramImpl 8 | bucketValues_ = {1, 2}; 9 | valueIndexMap_ = {{1, 0}, {2, 1}}; 10 | double bucket_val = static_cast(bucketValues_.back()); 11 | while ((bucket_val = 1.5 * bucket_val) <= static_cast(UINT64_MAX)) { 12 | bucketValues_.push_back(static_cast(bucket_val)); 13 | // Extracts two most significant digits to make histogram buckets more 14 | // human-readable. E.g., 172 becomes 170. 15 | uint64_t pow_of_ten = 1; 16 | while (bucketValues_.back() / 10 > 10) { 17 | bucketValues_.back() /= 10; 18 | pow_of_ten *= 10; 19 | } 20 | bucketValues_.back() *= pow_of_ten; 21 | valueIndexMap_[bucketValues_.back()] = bucketValues_.size() - 1; 22 | } 23 | maxBucketValue_ = bucketValues_.back(); 24 | minBucketValue_ = bucketValues_.front(); 25 | } 26 | 27 | size_t HistogramBucketMapper::IndexForValue(uint64_t value) const { 28 | if (value >= maxBucketValue_) { 29 | return bucketValues_.size() - 1; 30 | } else if (value >= minBucketValue_) { 31 | std::map::const_iterator lowerBound = 32 | valueIndexMap_.lower_bound(value); 33 | if (lowerBound != valueIndexMap_.end()) { 34 | return static_cast(lowerBound->second); 35 | } else { 36 | return 0; 37 | } 38 | } else { 39 | return 0; 40 | } 41 | } 42 | 43 | } // namespace HistogramInternal 44 | 45 | namespace { 46 | const HistogramInternal::HistogramBucketMapper bucketMapper; 47 | } 48 | 49 | Histogram::Histogram() : num_buckets_(bucketMapper.BucketCount()) { 50 | assert(num_buckets_ == sizeof(buckets_) / sizeof(*buckets_)); 51 | Clear(); 52 | } 53 | 54 | void Histogram::Clear() { 55 | min_ = bucketMapper.LastValue(); 56 | max_ = 0; 57 | num_ = 0; 58 | sum_ = 0; 59 | sum_squares_ = 0; 60 | for (unsigned int b = 0; b < num_buckets_; b++) { 61 | buckets_[b] = 0; 62 | } 63 | } 64 | 65 | bool Histogram::Empty() const { return num() == 0; } 66 | 67 | void Histogram::Add(uint64_t value) { 68 | // This function is designed to be lock free, as it's in the critical path 69 | // of any operation. Each individual value is atomic and the order of 70 | // updates by concurrent threads is tolerable. 71 | const size_t index = bucketMapper.IndexForValue(value); 72 | assert(index < num_buckets_); 73 | buckets_[index] += 1; 74 | 75 | min_ = std::min(min_, value); 76 | max_ = std::max(max_, value); 77 | 78 | num_ += 1; 79 | sum_ += value; 80 | sum_squares_ += value * value; 81 | } 82 | 83 | void Histogram::Merge(const Histogram &other) { 84 | min_ = std::min(min_, other.min_); 85 | max_ = std::max(max_, other.max_); 86 | num_ += other.num_; 87 | sum_ += other.sum_; 88 | sum_squares_ += other.sum_squares_; 89 | for (unsigned int b = 0; b < num_buckets_; b++) { 90 | buckets_[b] += other.buckets_[b]; 91 | } 92 | } 93 | 94 | double Histogram::Median() const { return Percentile(50.0); } 95 | 96 | double Histogram::Percentile(double p) const { 97 | double threshold = num() * (p / 100.0); 98 | uint64_t cumulative_sum = 0; 99 | for (unsigned int b = 0; b < num_buckets_; b++) { 100 | uint64_t bucket_value = bucket_at(b); 101 | cumulative_sum += bucket_value; 102 | if (cumulative_sum >= threshold) { 103 | // Scale linearly within this bucket 104 | uint64_t left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b - 1); 105 | uint64_t right_point = bucketMapper.BucketLimit(b); 106 | uint64_t left_sum = cumulative_sum - bucket_value; 107 | uint64_t right_sum = cumulative_sum; 108 | double pos = 0; 109 | uint64_t right_left_diff = right_sum - left_sum; 110 | if (right_left_diff != 0) { 111 | pos = (threshold - left_sum) / right_left_diff; 112 | } 113 | double r = left_point + (right_point - left_point) * pos; 114 | uint64_t cur_min = min(); 115 | uint64_t cur_max = max(); 116 | if (r < cur_min) 117 | r = static_cast(cur_min); 118 | if (r > cur_max) 119 | r = static_cast(cur_max); 120 | return r; 121 | } 122 | } 123 | return static_cast(max()); 124 | } 125 | 126 | double Histogram::Average() const { 127 | uint64_t cur_num = num(); 128 | uint64_t cur_sum = sum(); 129 | if (cur_num == 0) 130 | return 0; 131 | return static_cast(cur_sum) / static_cast(cur_num); 132 | } 133 | 134 | double Histogram::StandardDeviation() const { 135 | uint64_t cur_num = num(); 136 | uint64_t cur_sum = sum(); 137 | uint64_t cur_sum_squares = sum_squares(); 138 | if (cur_num == 0) 139 | return 0; 140 | double variance = 141 | static_cast(cur_sum_squares * cur_num - cur_sum * cur_sum) / 142 | static_cast(cur_num * cur_num); 143 | return std::sqrt(variance); 144 | } 145 | 146 | void Histogram::Data(HistogramData *const data) const { 147 | assert(data); 148 | data->median = Median(); 149 | data->percentile95 = Percentile(95); 150 | data->percentile99 = Percentile(99); 151 | data->max = static_cast(max()); 152 | data->average = Average(); 153 | data->standard_deviation = StandardDeviation(); 154 | data->count = num(); 155 | data->sum = sum(); 156 | data->min = static_cast(min()); 157 | } 158 | -------------------------------------------------------------------------------- /benchmarks/other/ChameleonDB/segment.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db_common.h" 4 | #include "util/util.h" 5 | 6 | // static constexpr int NUM_HEADERS = 1; 7 | static constexpr int HEADER_ALIGN_SIZE = 256; 8 | // weird slow when 4 * 64 9 | // rotating counter with multi logs reduce performance 10 | 11 | /** 12 | * segment header 13 | * tail pointer (offset): 4 bytes 14 | * status: free, in-used, close 15 | */ 16 | enum SegmentStatus { StatusAvailable, StatusUsing, StatusClosed }; 17 | class LogSegment { 18 | public: 19 | struct alignas(HEADER_ALIGN_SIZE) Header { 20 | uint32_t offset; // only valid when status is closed 21 | uint32_t status; 22 | uint32_t objects_tail_offset; 23 | 24 | void Flush() { 25 | #ifdef LOG_PERSISTENT 26 | clflushopt_fence(this, sizeof(Header)); 27 | #endif 28 | } 29 | }; 30 | 31 | static constexpr uint32_t HEADERS_SIZE = sizeof(Header); 32 | static constexpr uint32_t SEGMENT_DATA_SIZE = (SEGMENT_SIZE - HEADERS_SIZE); 33 | 34 | int cur_cnt_ = 0; 35 | 36 | LogSegment(char *start_addr, uint64_t size, bool init = true) 37 | : segment_start_(start_addr), 38 | data_start_(start_addr + HEADERS_SIZE), 39 | end_(start_addr + size) { 40 | assert(((uint64_t)header_ & (HEADER_ALIGN_SIZE - 1)) == 0); 41 | if (init) { 42 | Init(); 43 | } 44 | } 45 | 46 | void Init() { 47 | tail_ = data_start_; 48 | // header_->offset = 0; 49 | header_->objects_tail_offset = 0; 50 | header_->status = StatusAvailable; 51 | header_->Flush(); 52 | #ifdef LOG_BATCHING 53 | flush_tail_ = data_start_; 54 | #endif 55 | } 56 | 57 | bool HasSpaceFor(uint32_t sz) { 58 | char *tmp_end = tail_ + sz; 59 | return tmp_end <= end_; 60 | } 61 | 62 | char *AllocOne(size_t size) { 63 | char *ret = tail_; 64 | if (ret + size <= end_) { 65 | tail_ += size; 66 | ++cur_cnt_; 67 | return ret; 68 | } else { 69 | return nullptr; 70 | } 71 | } 72 | 73 | char *AllocSpace(size_t size) { 74 | char *ret = tail_; 75 | if (ret + size <= end_) { 76 | tail_ += size; 77 | return ret; 78 | } else { 79 | return nullptr; 80 | } 81 | } 82 | 83 | void StartUsing(bool is_hot, bool has_shortcut = false) { 84 | header_->status = StatusUsing; 85 | header_->Flush(); 86 | is_hot_ = is_hot; 87 | } 88 | 89 | void Close() { 90 | if (HasSpaceFor(sizeof(KVItem))) { 91 | KVItem *end = new (tail_) KVItem(); 92 | end->Flush(); 93 | tail_ += sizeof(KVItem); 94 | } 95 | 96 | close_time_ = NowMicros(); 97 | header_->offset = get_offset(); 98 | header_->status = StatusClosed; 99 | header_->objects_tail_offset = get_offset(); 100 | header_->Flush(); 101 | } 102 | 103 | void Clear() { 104 | tail_ = data_start_; 105 | is_hot_ = false; 106 | header_->status = StatusAvailable; 107 | header_->objects_tail_offset = 0; 108 | header_->Flush(); 109 | #ifdef LOG_BATCHING 110 | not_flushed_cnt_ = 0; 111 | flush_tail_ = data_start_; 112 | #endif 113 | cur_cnt_ = 0; 114 | } 115 | 116 | // append kv to log 117 | ValueType Append(const Slice &key, const Slice &value, uint32_t epoch) { 118 | uint32_t sz = sizeof(KVItem) + key.size() + value.size(); 119 | if (!HasSpaceFor(sz)) { 120 | return INVALID_VALUE; 121 | } 122 | KVItem *kv = new (tail_) KVItem(key, value, epoch); 123 | kv->Flush(); 124 | tail_ += sz; 125 | ++cur_cnt_; 126 | return TaggedPointer((char *)kv, sz); 127 | } 128 | 129 | #ifdef LOG_BATCHING 130 | int FlushRemain() { 131 | clwb_fence(flush_tail_, tail_ - flush_tail_); 132 | flush_tail_ = tail_; 133 | int persist_cnt = not_flushed_cnt_; 134 | not_flushed_cnt_ = 0; 135 | return persist_cnt; 136 | } 137 | 138 | ValueType AppendBatchFlush(const Slice &key, const Slice &value, 139 | uint32_t epoch, int *persist_cnt) { 140 | uint32_t sz = sizeof(KVItem) + key.size() + value.size(); 141 | if (!HasSpaceFor(sz)) { 142 | return INVALID_VALUE; 143 | } 144 | KVItem *kv = new (tail_) KVItem(key, value, epoch); 145 | ++not_flushed_cnt_; 146 | tail_ += sz; 147 | ++cur_cnt_; 148 | char *align_addr = (char *)((uint64_t)tail_ & ~(LOG_BATCHING_SIZE - 1)); 149 | if (align_addr - flush_tail_ >= LOG_BATCHING_SIZE) { 150 | clwb_fence(flush_tail_, align_addr - flush_tail_); 151 | flush_tail_ = align_addr; 152 | if (tail_ == align_addr) { 153 | *persist_cnt = not_flushed_cnt_; 154 | not_flushed_cnt_ = 0; 155 | } else { 156 | *persist_cnt = not_flushed_cnt_ - 1; 157 | not_flushed_cnt_ = 1; 158 | } 159 | } else { 160 | *persist_cnt = 0; 161 | } 162 | return TaggedPointer((char *)kv, sz); 163 | } 164 | #endif 165 | 166 | uint64_t get_close_time() { return close_time_; } 167 | 168 | bool IsHot() { return is_hot_; } 169 | 170 | uint32_t get_offset() { return tail_ - data_start_; } 171 | char *get_segment_start() { return segment_start_; } 172 | char *get_data_start() { return data_start_; } 173 | char *get_tail() { return tail_; } 174 | char *get_end() { return end_; } 175 | 176 | private: 177 | union { 178 | char *const segment_start_; // const 179 | Header *header_; 180 | }; 181 | char *const data_start_; 182 | char *const end_; // const 183 | char *tail_; 184 | uint64_t close_time_; 185 | bool is_hot_ = false; 186 | 187 | #ifdef LOG_BATCHING 188 | int not_flushed_cnt_ = 0; 189 | char *flush_tail_ = nullptr; 190 | #endif 191 | 192 | friend class LogGCer; 193 | DISALLOW_COPY_AND_ASSIGN(LogSegment); 194 | }; 195 | -------------------------------------------------------------------------------- /db/index/masstree/masstree_get.hh: -------------------------------------------------------------------------------- 1 | /* Masstree 2 | * Eddie Kohler, Yandong Mao, Robert Morris 3 | * Copyright (c) 2012-2014 President and Fellows of Harvard College 4 | * Copyright (c) 2012-2014 Massachusetts Institute of Technology 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, subject to the conditions 9 | * listed in the Masstree LICENSE file. These conditions include: you must 10 | * preserve this copyright notice, and you cannot mention the copyright 11 | * holders in advertising related to the Software without their permission. 12 | * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This 13 | * notice is a summary of the Masstree LICENSE file; the license in that file 14 | * is legally binding. 15 | */ 16 | #ifndef MASSTREE_GET_HH 17 | #define MASSTREE_GET_HH 18 | #include "masstree_tcursor.hh" 19 | #include "masstree_key.hh" 20 | namespace Masstree { 21 | 22 | template 23 | bool unlocked_tcursor

::find_unlocked(threadinfo& ti) 24 | { 25 | int match; 26 | key_indexed_position kx; 27 | node_base

* root = const_cast*>(root_); 28 | 29 | retry: 30 | n_ = root->reach_leaf(ka_, v_, ti); 31 | 32 | forward: 33 | if (v_.deleted()) 34 | goto retry; 35 | 36 | n_->prefetch(); 37 | perm_ = n_->permutation(); 38 | kx = leaf

::bound_type::lower(ka_, *this); 39 | if (kx.p >= 0) { 40 | lv_ = n_->lv_[kx.p]; 41 | lv_.prefetch(n_->keylenx_[kx.p]); 42 | match = n_->ksuf_matches(kx.p, ka_); 43 | } else 44 | match = 0; 45 | if (n_->has_changed(v_)) { 46 | ti.mark(threadcounter(tc_stable_leaf_insert + n_->simple_has_split(v_))); 47 | n_ = n_->advance_to_key(ka_, v_, ti); 48 | goto forward; 49 | } 50 | 51 | if (match < 0) { 52 | ka_.shift_by(-match); 53 | root = lv_.layer(); 54 | goto retry; 55 | } else 56 | return match; 57 | } 58 | 59 | template 60 | inline bool basic_table

::get(Str key, value_type &value, 61 | threadinfo& ti) const 62 | { 63 | unlocked_tcursor

lp(*this, key); 64 | bool found = lp.find_unlocked(ti); 65 | if (found) 66 | value = lp.value(); 67 | return found; 68 | } 69 | 70 | template 71 | bool tcursor

::find_locked(threadinfo& ti) 72 | { 73 | node_base

* root = const_cast*>(root_); 74 | nodeversion_type v; 75 | permuter_type perm; 76 | 77 | retry: 78 | n_ = root->reach_leaf(ka_, v, ti); 79 | 80 | forward: 81 | if (v.deleted()) 82 | goto retry; 83 | 84 | n_->prefetch(); 85 | perm = n_->permutation(); 86 | fence(); 87 | kx_ = leaf

::bound_type::lower(ka_, *n_); 88 | if (kx_.p >= 0) { 89 | leafvalue

lv = n_->lv_[kx_.p]; 90 | lv.prefetch(n_->keylenx_[kx_.p]); 91 | state_ = n_->ksuf_matches(kx_.p, ka_); 92 | if (state_ < 0 && !n_->has_changed(v) && lv.layer()->is_root()) { 93 | ka_.shift_by(-state_); 94 | root = lv.layer(); 95 | goto retry; 96 | } 97 | } else 98 | state_ = 0; 99 | 100 | n_->lock(v, ti.lock_fence(tc_leaf_lock)); 101 | if (n_->has_changed(v) || n_->permutation() != perm) { 102 | ti.mark(threadcounter(tc_stable_leaf_insert + n_->simple_has_split(v))); 103 | n_->unlock(); 104 | n_ = n_->advance_to_key(ka_, v, ti); 105 | goto forward; 106 | } else if (unlikely(state_ < 0)) { 107 | ka_.shift_by(-state_); 108 | n_->lv_[kx_.p] = root = n_->lv_[kx_.p].layer()->maybe_parent(); 109 | n_->unlock(); 110 | goto retry; 111 | } else if (unlikely(n_->deleted_layer())) { 112 | ka_.unshift_all(); 113 | root = const_cast*>(root_); 114 | n_->unlock(); 115 | goto retry; 116 | } 117 | return state_; 118 | } 119 | 120 | template 121 | bool tcursor

::find_locked_with_shortcut(threadinfo& ti, LogEntryHelper &le_helper) 122 | { 123 | node_base

* root = const_cast*>(root_); 124 | nodeversion_type v; 125 | bool check_sibling = true; 126 | 127 | v = n_->stable_annotated(ti.stable_fence()); 128 | if (v.deleted() || !v.isleaf()) { 129 | goto fallback; 130 | } 131 | 132 | retry: 133 | n_->prefetch(); 134 | if (kx_.p >= 0) { 135 | leafvalue

lv = n_->lv_[kx_.p]; 136 | lv.prefetch(n_->keylenx_[kx_.p]); 137 | state_ = n_->ksuf_matches_shortcut(kx_.p, ka_); 138 | if (state_ <= 0) { 139 | // check one sibling node 140 | leaf

*next; 141 | if (check_sibling && (next = n_->safe_next()) 142 | && compare(ka_.ikey(), next->ikey_bound()) >= 0) { 143 | n_ = next; 144 | v = n_->stable_annotated(ti.stable_fence()); 145 | if (v.deleted()) 146 | goto fallback; 147 | kx_ = leaf

::bound_type::lower(ka_, *n_); 148 | check_sibling = false; 149 | goto retry; 150 | } 151 | goto fallback; 152 | } 153 | } else { 154 | goto fallback; 155 | } 156 | 157 | n_->lock(v, ti.lock_fence(tc_leaf_lock)); 158 | if (unlikely(n_->deleted_layer())) { 159 | n_->unlock(); 160 | goto fallback; 161 | } 162 | le_helper.fast_path = true; 163 | return state_; 164 | 165 | fallback: 166 | return find_locked(ti); 167 | } 168 | 169 | } // namespace Masstree 170 | #endif 171 | -------------------------------------------------------------------------------- /db/index/masstree/masstree_wrapper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "config.h" 11 | #include "compiler.hh" 12 | 13 | #include "masstree.hh" 14 | #include "kvthread.hh" 15 | #include "masstree_tcursor.hh" 16 | #include "masstree_insert.hh" 17 | #include "masstree_print.hh" 18 | #include "masstree_remove.hh" 19 | #include "masstree_scan.hh" 20 | #include "string.hh" 21 | 22 | #include "db_common.h" 23 | 24 | class key_unparse_unsigned { 25 | public: 26 | static int unparse_key(Masstree::key key, char* buf, int buflen) { 27 | return snprintf(buf, buflen, "%" PRIu64, key.ikey()); 28 | } 29 | }; 30 | 31 | class MasstreeWrapper { 32 | public: 33 | static constexpr uint64_t insert_bound = 0xfffff; //0xffffff; 34 | struct table_params : public Masstree::nodeparams<15,15> { 35 | typedef uint64_t value_type; 36 | typedef Masstree::value_print value_print_type; 37 | typedef threadinfo threadinfo_type; 38 | typedef key_unparse_unsigned key_unparse_type; 39 | static constexpr ssize_t print_max_indent_depth = 12; 40 | }; 41 | 42 | typedef Masstree::Str Str; 43 | typedef Masstree::basic_table table_type; 44 | typedef Masstree::unlocked_tcursor unlocked_cursor_type; 45 | typedef Masstree::tcursor cursor_type; 46 | typedef Masstree::leaf leaf_type; 47 | typedef Masstree::internode internode_type; 48 | 49 | typedef typename table_type::node_type node_type; 50 | typedef typename unlocked_cursor_type::nodeversion_value_type nodeversion_value_type; 51 | 52 | struct Scanner { 53 | const int cnt; 54 | std::vector &vec; 55 | 56 | Scanner(int cnt, std::vector &v) 57 | : cnt(cnt), vec(v) { 58 | vec.reserve(cnt); 59 | } 60 | 61 | template 62 | void visit_leaf(const SS &, const K &, threadinfo &) {} 63 | 64 | bool visit_value(Str key, table_params::value_type val, threadinfo &) { 65 | vec.push_back(val); 66 | if (vec.size() == cnt) { 67 | return false; 68 | } 69 | return true; 70 | } 71 | }; 72 | 73 | static __thread typename table_params::threadinfo_type *ti; 74 | 75 | MasstreeWrapper() { 76 | this->table_init(); 77 | } 78 | 79 | void table_init() { 80 | if (ti == nullptr) 81 | ti = threadinfo::make(threadinfo::TI_MAIN, -1); 82 | table_.initialize(*ti); 83 | key_gen_ = 0; 84 | } 85 | 86 | void keygen_reset() { 87 | key_gen_ = 0; 88 | } 89 | 90 | static void thread_init(int thread_id) { 91 | if (ti == nullptr) 92 | ti = threadinfo::make(threadinfo::TI_PROCESS, thread_id); 93 | } 94 | 95 | void insert(uint64_t int_key, LogEntryHelper &le_helper) { 96 | uint64_t key_buf; 97 | Str key = make_key(int_key, key_buf); 98 | cursor_type lp(table_, key); 99 | bool found = lp.find_insert(*ti); 100 | if (found) { 101 | le_helper.old_val = lp.value(); 102 | } 103 | #ifdef GC_SHORTCUT 104 | le_helper.shortcut = Shortcut((char *)lp.node(), lp.offset()); 105 | #endif 106 | lp.value() = le_helper.new_val; 107 | fence(); 108 | lp.finish(1, *ti); 109 | } 110 | 111 | bool search(uint64_t int_key, uint64_t &value) { 112 | uint64_t key_buf; 113 | Str key = make_key(int_key, key_buf); 114 | bool found = table_.get(key, value, *ti); 115 | return found; 116 | } 117 | 118 | void scan(uint64_t int_key, int cnt, std::vector &vec) { 119 | uint64_t key_buf; 120 | Str key = make_key(int_key, key_buf); 121 | Scanner scanner(cnt, vec); 122 | table_.scan(key, true, scanner, *ti); 123 | } 124 | 125 | bool remove(uint64_t int_key) { 126 | uint64_t key_buf; 127 | Str key = make_key(int_key, key_buf); 128 | cursor_type lp(table_, key); 129 | bool found = lp.find_locked(*ti); 130 | lp.finish(-1, *ti); 131 | return true; 132 | } 133 | 134 | void gc_insert(uint64_t int_key, LogEntryHelper &le_helper) { 135 | uint64_t key_buf; 136 | Str key = make_key(int_key, key_buf); 137 | cursor_type lp(table_, key); 138 | bool found = lp.find_insert(*ti); 139 | assert(found); 140 | if (lp.value() == le_helper.old_val) { 141 | lp.value() = le_helper.new_val; 142 | } else { 143 | le_helper.old_val = le_helper.new_val; 144 | } 145 | fence(); 146 | lp.finish(1, *ti); 147 | } 148 | 149 | void gc_insert_with_shortcut(uint64_t int_key, LogEntryHelper &le_helper) { 150 | uint64_t key_buf; 151 | Str key = make_key(int_key, key_buf); 152 | cursor_type lp(table_, key, le_helper); 153 | bool found = lp.find_insert_with_shortcut(*ti, le_helper); 154 | assert(found); 155 | // #ifdef GC_SHORTCUT 156 | // le_helper.shortcut = Shortcut((char *)lp.node(), lp.offset()); 157 | // #endif 158 | if (lp.value() == le_helper.old_val) { 159 | lp.value() = le_helper.new_val; 160 | } else { 161 | le_helper.old_val = le_helper.new_val; 162 | } 163 | fence(); 164 | lp.finish(1, *ti); 165 | } 166 | 167 | private: 168 | table_type table_; 169 | uint64_t key_gen_; 170 | static bool stopping; 171 | static uint32_t printing; 172 | 173 | static inline Str make_key(uint64_t int_key, uint64_t& key_buf) { 174 | key_buf = __builtin_bswap64(int_key); 175 | return Str((const char *)&key_buf, sizeof(key_buf)); 176 | } 177 | }; 178 | 179 | --------------------------------------------------------------------------------