├── test.sh ├── .gitignore ├── pic ├── key.png ├── wal.png ├── bloom.png ├── sst1.png ├── sst2.png ├── sst3.png ├── arena实现.png ├── arena流程.png ├── status.png ├── system.png ├── version.png └── SkipListPic.png ├── lsmkv ├── db │ ├── option.cc │ ├── version │ │ ├── merge.h │ │ ├── iterator_wrapper.h │ │ ├── merge.cc │ │ ├── version_edit.h │ │ ├── version_edit.cc │ │ └── version.h │ ├── format │ │ ├── dbformat.h │ │ ├── internal_key.cc │ │ └── internal_key.h │ ├── log │ │ ├── log_format.h │ │ ├── log_writer.h │ │ ├── log_reader.h │ │ ├── log_writer.cc │ │ └── log_reader.cc │ ├── memtable │ │ ├── 1.pu │ │ ├── arena.h │ │ ├── arena.cc │ │ ├── memtable.h │ │ ├── memtable.cc │ │ └── skiplist.h │ ├── sstable │ │ ├── block_reader.h │ │ ├── block_builder.h │ │ ├── table_cache.h │ │ ├── block_format.h │ │ ├── block_builder.cc │ │ ├── table_cache.cc │ │ ├── block_format.cc │ │ ├── sstable_reader.cc │ │ ├── block_reader.cc │ │ └── sstable_builder.cc │ ├── filter │ │ ├── filter_block.h │ │ ├── bloom.cc │ │ └── filter_block.cc │ ├── writebatch │ │ ├── writebatch_helper.h │ │ └── writebatch.cc │ └── dbimpl.h ├── CMakeLists.txt ├── include │ ├── comparator.h │ ├── db.h │ ├── writebatch.h │ ├── filter_policy.h │ ├── sstable_reader.h │ ├── sstable_builder.h │ ├── cache.h │ ├── iterator.h │ ├── status.h │ ├── env.h │ └── option.h └── util │ ├── filename.h │ ├── mutex.h │ ├── thread_annotations.h │ ├── status.cc │ ├── comparator.cc │ ├── MurmurHash3.h │ ├── coding.h │ ├── logger.h │ ├── coding.cc │ ├── filename.cc │ ├── iterator.cc │ ├── file.h │ ├── cache.cc │ └── MurmurHash3.cc ├── clean.sh ├── test ├── CMakeLists.txt ├── cache_test.cc ├── sstable_write_test.cc ├── block_test.cc ├── env_test.cc ├── example_test.cc ├── sstable_test.cc ├── coding_test.cc ├── filter_block_test.cc ├── memtable_test.cc └── db_test.cc ├── CMakeLists.txt └── README.md /test.sh: -------------------------------------------------------------------------------- 1 | cd build 2 | ctest -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | data/ 3 | lib/ 4 | .vscode/ 5 | build/ -------------------------------------------------------------------------------- /pic/key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/key.png -------------------------------------------------------------------------------- /pic/wal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/wal.png -------------------------------------------------------------------------------- /pic/bloom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/bloom.png -------------------------------------------------------------------------------- /pic/sst1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/sst1.png -------------------------------------------------------------------------------- /pic/sst2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/sst2.png -------------------------------------------------------------------------------- /pic/sst3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/sst3.png -------------------------------------------------------------------------------- /pic/arena实现.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/arena实现.png -------------------------------------------------------------------------------- /pic/arena流程.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/arena流程.png -------------------------------------------------------------------------------- /pic/status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/status.png -------------------------------------------------------------------------------- /pic/system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/system.png -------------------------------------------------------------------------------- /pic/version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/version.png -------------------------------------------------------------------------------- /pic/SkipListPic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leiyx/My_LSM_KVStore/HEAD/pic/SkipListPic.png -------------------------------------------------------------------------------- /lsmkv/db/option.cc: -------------------------------------------------------------------------------- 1 | #include "include/option.h" 2 | #include "include/env.h" 3 | #include "include/comparator.h" 4 | 5 | namespace lsmkv { 6 | Option::Option() 7 | : comparator(DefaultComparator()), 8 | env(DefaultEnv()) {} 9 | } -------------------------------------------------------------------------------- /lsmkv/db/version/merge.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_VERSION_MERGE_H_ 2 | #define STORAGE_XDB_DB_VERSION_MERGE_H_ 3 | 4 | #include "include/iterator.h" 5 | #include "include/comparator.h" 6 | 7 | namespace lsmkv { 8 | 9 | class MergedIterator; 10 | 11 | Iterator* NewMergedIterator(Iterator** list, size_t num, const Comparator* cmp); 12 | 13 | } 14 | #endif // STORAGE_XDB_DB_VERSION_MERGE_H_ 15 | -------------------------------------------------------------------------------- /lsmkv/db/format/dbformat.h: -------------------------------------------------------------------------------- 1 | #ifndef DBFORMAT_H 2 | #define DBFORMAT_H 3 | #include 4 | namespace lsmkv { 5 | 6 | namespace config { 7 | static constexpr int kNumLevels = 7; 8 | 9 | static constexpr int kL0CompactionThreshold = 4; 10 | 11 | static constexpr int kL0StopWriteThreshold = 12; 12 | 13 | static constexpr uint64_t kMaxSequenceNumber = ((0x1ull << 56) - 1); 14 | } // namespace config 15 | 16 | } // namespace lsmkv 17 | 18 | #endif // DBFORMAT_H 19 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | func(){ 2 | cd $1 3 | for file in `ls .` 4 | do 5 | if test -f $file 6 | then 7 | echo "removed `pwd`/"$file 8 | rm -rf $file 9 | elif test -d $file 10 | then 11 | echo "into path: `pwd`/"$file 12 | func $file 13 | fi 14 | done 15 | cd .. 16 | } 17 | 18 | path="bin" 19 | func $path 20 | path="folder_for_test" 21 | func $path 22 | 23 | 24 | rm -rf build 25 | echo "removed `pwd`/build" 26 | rm -rf lib/* 27 | echo "removed `pwd`/lib/*" -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(add_test_exe src_name) 2 | add_executable(${src_name} ${src_name}.cc) 3 | target_link_libraries(${src_name} lsmkv gtest gtest_main) 4 | add_test(NAME ${src_name} COMMAND ${src_name}) 5 | endfunction() 6 | 7 | add_test_exe(block_test) 8 | add_test_exe(cache_test) 9 | add_test_exe(coding_test) 10 | add_test_exe(db_test) 11 | add_test_exe(env_test) 12 | add_test_exe(example_test) 13 | add_test_exe(filter_block_test) 14 | add_test_exe(memtable_test) 15 | add_test_exe(sstable_test) 16 | add_test_exe(sstable_write_test) -------------------------------------------------------------------------------- /lsmkv/db/log/log_format.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_LOG_LOG_FORMAT_H_ 2 | #define STORAGE_XDB_DB_LOG_LOG_FORMAT_H_ 3 | 4 | #include 5 | 6 | namespace lsmkv { 7 | namespace log { 8 | 9 | enum LogRecodeType{ 10 | KZeroType = 0, 11 | KFullType = 1, 12 | KFirstType = 2, 13 | KMiddleType = 3, 14 | KLastType = 4 15 | }; 16 | 17 | // 2 bytes max length 18 | 19 | static constexpr int KMaxType = 4; 20 | 21 | static constexpr int kBlockSize = 32768; 22 | 23 | // 32bits crc | 16bits length | 8bits Type 24 | static constexpr int KLogHeadSize = 4 + 2 + 1; 25 | 26 | } 27 | } 28 | 29 | #endif // STORAGE_XDB_DB_LOG_LOG_FORMAT_H_ 30 | -------------------------------------------------------------------------------- /test/cache_test.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "include/cache.h" 3 | namespace lsmkv { 4 | void NullDeleter(std::string_view, void* value) { 5 | 6 | } 7 | TEST(ExampleTest, InsertAndSearch) { 8 | 9 | size_t capacity = 1 << 8; 10 | Cache* cache = NewLRUCache(capacity); 11 | std::string val1{"test1"}; 12 | Cache::Handle* e = cache->Insert("test1",&val1, 5, &NullDeleter); 13 | cache->Release(e); 14 | e = cache->Lookup("test1"); 15 | std::string* result = reinterpret_cast(cache->Value(e)); 16 | cache->Release(e); 17 | ASSERT_EQ(*result, val1); 18 | delete cache; 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /lsmkv/db/memtable/1.pu: -------------------------------------------------------------------------------- 1 | @startuml 2 | class Arena { 3 | + Arena(); 4 | + ~Arena() 5 | + char* Allocate(size_t bytes); 6 | + char* AllocateAlign(size_t bytes); 7 | + size_t MemoryUsed() const; 8 | - char* AllocateFallBack(size_t bytes); 9 | - char* AllocateNewBlock(size_t bytes); 10 | - char* alloc_ptr_; 11 | - size_t alloc_bytes_remaining_; 12 | - std::vector blocks_; 13 | - std::atomic memory_used_; 14 | } 15 | struct Node { 16 | + Node* Next(int level) 17 | + void SetNext(int level, Node* x) 18 | + Key const key_; 19 | + std::atomic next_[1]; 20 | } 21 | class SkipList { 22 | 23 | } 24 | class MemTable { 25 | 26 | } 27 | MemTable "1" *-- "1" SkipList 28 | SkipList "1" *-- "1" Arena 29 | @enduml -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | project(MyLSMKV) 3 | 4 | 5 | set(CMAKE_BUILD_TYPE DEBUG) 6 | SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb") 7 | SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") 8 | 9 | set(CMAKE_C_STANDARD 11) 10 | set(CMAKE_C_STANDARD_REQUIRED OFF) 11 | set(CMAKE_C_COMPILER "/usr/bin/gcc") 12 | 13 | set(CMAKE_CXX_STANDARD 20) 14 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 15 | set(CMAKE_CXX_COMPILER "/usr/bin/g++") 16 | 17 | # 开启ctest 18 | enable_testing() 19 | 20 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) 21 | set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) 22 | 23 | include_directories(${PROJECT_SOURCE_DIR}/lsmkv) 24 | 25 | add_subdirectory(lsmkv) 26 | add_subdirectory(test) -------------------------------------------------------------------------------- /lsmkv/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB UTIL_SRC util/**.cc) 3 | 4 | set(DB_SRC 5 | "db/dbimpl.cc" 6 | "db/option.cc" 7 | "db/filter/filter_block.cc" 8 | "db/filter/bloom.cc" 9 | "db/format/internal_key.cc" 10 | "db/log/log_reader.cc" 11 | "db/log/log_writer.cc" 12 | "db/memtable/arena.cc" 13 | "db/memtable/memtable.cc" 14 | "db/sstable/block_builder.cc" 15 | "db/sstable/block_format.cc" 16 | "db/sstable/block_reader.cc" 17 | "db/sstable/sstable_builder.cc" 18 | "db/sstable/sstable_reader.cc" 19 | "db/sstable/table_cache.cc" 20 | "db/version/merge.cc" 21 | "db/version/version_edit.cc" 22 | "db/version/version.cc" 23 | "db/writebatch/writebatch.cc" 24 | ) 25 | 26 | 27 | set(SRCS 28 | ${UTIL_SRC} 29 | ${DB_SRC} 30 | ) 31 | 32 | add_library(lsmkv ${SRCS}) 33 | 34 | target_link_libraries(lsmkv snappy crc32c pthread) -------------------------------------------------------------------------------- /lsmkv/include/comparator.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_INCLUDE_COMPARATOR_H_ 2 | #define STORAGE_XDB_DB_INCLUDE_COMPARATOR_H_ 3 | 4 | #include 5 | 6 | #include 7 | namespace lsmkv { 8 | class Comparator { 9 | public: 10 | virtual ~Comparator() = default; 11 | 12 | virtual int Compare(std::string_view a, std::string_view b) const = 0; 13 | 14 | virtual const char* Name() const = 0; 15 | // find the shortest string between with start and limit. 16 | virtual void FindShortestMiddle(std::string* start, 17 | std::string_view limit) const = 0; 18 | 19 | virtual void FindShortestBigger(std::string* start) const = 0; 20 | }; 21 | 22 | const Comparator* DefaultComparator(); 23 | 24 | } // namespace lsmkv 25 | 26 | #endif // STORAGE_XDB_DB_INCLUDE_COMPARATOR_H_ -------------------------------------------------------------------------------- /test/sstable_write_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "crc32c/crc32c.h" 5 | #include "gtest/gtest.h" 6 | #include "include/db.h" 7 | 8 | namespace lsmkv { 9 | TEST(ExampleTest, SSTableTest) { 10 | Option option; 11 | WriteOption write_option; 12 | DB* db; 13 | DestoryDB(option, "/home/lei/MyLSMKV/folder_for_test/sst_write_test"); 14 | Status s = 15 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/sst_write_test", &db); 16 | ASSERT_TRUE(s.ok()); 17 | for (int i = 0; i < 10000; i++) { 18 | s = db->Put(write_option, std::to_string(i), std::to_string(i)); 19 | ASSERT_TRUE(s.ok()); 20 | } 21 | delete db; 22 | db = nullptr; 23 | s = DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/sst_write_test", &db); 24 | ASSERT_TRUE(s.ok()); 25 | } 26 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/sstable/block_reader.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_SSTABLE_BLOCK_READER_H_ 2 | #define STORAGE_XDB_DB_SSTABLE_BLOCK_READER_H_ 3 | 4 | #include "include/iterator.h" 5 | #include "include/comparator.h" 6 | 7 | namespace lsmkv { 8 | 9 | struct BlockContents; 10 | 11 | class BlockReader { 12 | public: 13 | explicit BlockReader(const BlockContents& contents); 14 | 15 | ~BlockReader(); 16 | 17 | BlockReader(const BlockReader&) = delete; 18 | BlockReader& operator=(const BlockReader&) = delete; 19 | 20 | Iterator* NewIterator(const Comparator* cmp); 21 | private: 22 | class Iter; 23 | 24 | size_t NumRestarts() const; 25 | const char* data_; 26 | size_t size_; 27 | uint32_t restarts_offset_; 28 | bool owned_; 29 | }; 30 | 31 | } 32 | 33 | #endif // STORAGE_XDB_DB_SSTABLE_BLOCK_READER_H_ -------------------------------------------------------------------------------- /lsmkv/include/db.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_DB_H_ 2 | #define STORAGE_XDB_INCLUDE_DB_H_ 3 | 4 | #include "include/status.h" 5 | #include "include/writebatch.h" 6 | #include "include/option.h" 7 | namespace lsmkv { 8 | 9 | class DB { 10 | public: 11 | virtual ~DB() = default; 12 | 13 | static Status Open(const Option& option, const std::string& name, DB** ptr); 14 | 15 | virtual Status Get(const ReadOption& option,std::string_view key, std::string* value) = 0; 16 | 17 | virtual Status Put(const WriteOption& option, std::string_view key, std::string_view value) = 0; 18 | 19 | virtual Status Delete(const WriteOption& option,std::string_view key) = 0; 20 | 21 | virtual Status Write(const WriteOption& option,WriteBatch* batch) = 0; 22 | }; 23 | 24 | Status DestoryDB(const Option& option, const std::string& name); 25 | } 26 | #endif // STORAGE_XDB_INCLUDE_DB_H -------------------------------------------------------------------------------- /lsmkv/include/writebatch.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_WRITEBATCH_H_ 2 | #define STORAGE_XDB_INCLUDE_WRITEBATCH_H_ 3 | 4 | 5 | #include "util/coding.h" 6 | #include "include/status.h" 7 | namespace lsmkv { 8 | 9 | class WriteBatch { 10 | public: 11 | class Handle { 12 | public: 13 | virtual ~Handle() = default; 14 | virtual void Put(std::string_view key, std::string_view value) = 0; 15 | virtual void Delete(std::string_view key) = 0; 16 | }; 17 | WriteBatch(); 18 | 19 | ~WriteBatch() = default; 20 | 21 | WriteBatch(const WriteBatch&) = default; 22 | 23 | WriteBatch& operator=(const WriteBatch&) = default; 24 | 25 | void Put(std::string_view key, std::string_view value); 26 | 27 | void Delete(std::string_view key); 28 | 29 | void Clear(); 30 | 31 | Status Iterate(Handle* handle) const; 32 | private: 33 | friend class WriteBatchHelper; 34 | 35 | std::string rep_; 36 | 37 | }; 38 | 39 | 40 | } 41 | 42 | 43 | #endif // STORAGE_XDB_INCLUDE_WRITEBATCH_H_ -------------------------------------------------------------------------------- /lsmkv/db/log/log_writer.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_LOG_LOG_WRITER_H_ 2 | #define STORAGE_XDB_DB_LOG_LOG_WRITER_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include "include/status.h" 8 | #include "db/log/log_format.h" 9 | 10 | namespace lsmkv { 11 | 12 | class WritableFile; 13 | 14 | namespace log { 15 | 16 | class Writer { 17 | public: 18 | explicit Writer(WritableFile* dest); 19 | 20 | // initialize the writer with file "dest" not empty 21 | // which has "dest_length" length. 22 | Writer(WritableFile* dest, uint64_t dest_length); 23 | 24 | Writer(const Writer&) = delete; 25 | Writer& operator=(const Writer&) = delete; 26 | 27 | ~Writer() = default; 28 | 29 | Status AddRecord(std::string_view sv); 30 | private: 31 | Status WritePhysicalRecord(LogRecodeType type, const char* p, size_t len); 32 | WritableFile* dest_; 33 | size_t block_offset_; 34 | uint32_t type_crc_[KMaxType + 1]; 35 | }; 36 | 37 | }; 38 | 39 | } 40 | 41 | #endif // STORAGE_XDB_DB_LOG_LOG_WRITER_H_ -------------------------------------------------------------------------------- /lsmkv/include/filter_policy.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_FILTER_POLICY_H_ 2 | #define STORAGE_XDB_INCLUDE_FILTER_POLICY_H_ 3 | #include 4 | 5 | #include 6 | namespace lsmkv { 7 | 8 | class FilterPolicy { 9 | public: 10 | virtual ~FilterPolicy() = default; 11 | 12 | virtual const char* Name() const = 0; 13 | 14 | // keys[0..n-1] contains a list of keys. 15 | // the keys is used for create the new filter. 16 | // and the filter should be Append to dst (encoded into string) 17 | virtual void CreatFilter(std::string_view* keys, int n, 18 | std::string* dst) const = 0; 19 | 20 | // the filter is created by "CreatFilter" 21 | // Return true : if key is potentially contained 22 | // by "keys" of "CreatFilter". 23 | // Return false : if key must not be contained by keys. 24 | virtual bool KeyMayMatch(std::string_view key, std::string_view filter) const = 0; 25 | }; 26 | 27 | FilterPolicy* NewBloomFilterPolicy(int bits_per_key); 28 | } // namespace lsmkv 29 | #endif // STORAGE_XDB_INCLUDE_FILTER_POLICY_H_ -------------------------------------------------------------------------------- /lsmkv/db/sstable/block_builder.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_SSTABLE_BLOCK_BUILDER_H_ 2 | #define STORAGE_XDB_DB_SSTABLE_BLOCK_BUILDER_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include "include/option.h" 8 | namespace lsmkv { 9 | 10 | class BlockBuilder { 11 | public: 12 | explicit BlockBuilder(const Option* option); 13 | 14 | BlockBuilder(const BlockBuilder &) = delete; 15 | BlockBuilder &operator=(const BlockBuilder &) = delete; 16 | 17 | void Add(std::string_view key, std::string_view value); 18 | 19 | std::string_view Finish(); 20 | 21 | size_t ByteSize() const; 22 | 23 | void Reset(); 24 | 25 | bool Empty() const { return buffer_.empty(); }; 26 | private: 27 | const Option* option_; 28 | std::string buffer_; 29 | // restart is a record that key has full length. 30 | // use for speed up searching. 31 | std::vector restarts_; 32 | // num of key after last restrat 33 | int counter_; 34 | // last key has been addd. 35 | std::string last_key_; 36 | bool finished_; 37 | }; 38 | } 39 | 40 | #endif -------------------------------------------------------------------------------- /test/block_test.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "db/sstable/block_reader.h" 3 | #include "db/sstable/block_builder.h" 4 | #include "db/sstable/block_format.h" 5 | 6 | namespace lsmkv { 7 | TEST(ExampleTest, LinearIterater) { 8 | Option option; 9 | BlockBuilder builder(&option); 10 | for (int i = 0; i < 1000; i++) { 11 | builder.Add(std::to_string(i),"VAL"); 12 | } 13 | std::string_view s = builder.Finish(); 14 | BlockContents contents; 15 | contents.data = s; 16 | contents.heap_allocated_ = false; 17 | contents.table_cache_ = false; 18 | BlockReader reader(contents); 19 | Iterator* iter = reader.NewIterator(DefaultComparator()); 20 | iter->SeekToFirst(); 21 | for (int i = 0; i < 1000; i++) { 22 | ASSERT_TRUE(iter->Valid()); 23 | ASSERT_EQ(iter->Key(),std::to_string(i)); 24 | ASSERT_EQ(iter->Value(),"VAL"); 25 | iter->Next(); 26 | } 27 | ASSERT_FALSE(iter->Valid()); 28 | delete iter; 29 | } 30 | 31 | } -------------------------------------------------------------------------------- /test/env_test.cc: -------------------------------------------------------------------------------- 1 | #include "include/env.h" 2 | 3 | #include "gtest/gtest.h" 4 | #include "util/file.h" 5 | 6 | namespace lsmkv { 7 | TEST(ExampleTest, WriteAndRead) { 8 | // Write file 9 | RandomReadFile* read_file; 10 | WritableFile* write_file; 11 | Env* env = DefaultEnv(); 12 | std::string filename{"/home/lei/MyLSMKV/folder_for_test/env_test"}; 13 | Status s = env->NewWritableFile(filename, &write_file); 14 | ASSERT_TRUE(s.ok()); 15 | std::string magic = "123456124365"; 16 | s = write_file->Append(magic); 17 | ASSERT_TRUE(s.ok()); 18 | s = write_file->Sync(); 19 | ASSERT_TRUE(s.ok()); 20 | uint64_t file_size; 21 | // read file and check 22 | s = env->NewRamdomReadFile(filename, &read_file); 23 | ASSERT_TRUE(s.ok()); 24 | s = env->FileSize(filename, &file_size); 25 | ASSERT_TRUE(s.ok()); 26 | env->RemoveFile(filename); 27 | std::string_view result; 28 | char buf[50]; 29 | s = read_file->Read(0, 12, &result, buf); 30 | ASSERT_TRUE(s.ok()); 31 | ASSERT_EQ(magic, result); 32 | delete read_file; 33 | delete write_file; 34 | } 35 | 36 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/sstable/table_cache.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_SSTABLE_TABLE_CACHE_H_ 2 | #define STORAGE_XDB_DB_SSTABLE_TABLE_CACHE_H_ 3 | 4 | 5 | #include "include/cache.h" 6 | #include "include/env.h" 7 | #include "include/option.h" 8 | 9 | namespace lsmkv { 10 | 11 | class Iterator; 12 | 13 | class TableCache { 14 | public: 15 | TableCache(const std::string name, const Option& option, size_t capacity) 16 | : name_(name), option_(option), 17 | env_(option.env), cache_(NewLRUCache(capacity)) {} 18 | 19 | ~TableCache() { delete cache_; } 20 | 21 | Status Get(const ReadOption& option, uint64_t file_number, uint64_t file_size, 22 | std::string_view key, void* arg, void (*handle_result)(void*, std::string_view, std::string_view)); 23 | 24 | void Evict(uint64_t file_number); 25 | 26 | Iterator* NewIterator(const ReadOption& option, uint64_t file_number, uint64_t file_size); 27 | 28 | private: 29 | Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle** handle); 30 | 31 | const std::string name_; 32 | const Option& option_; 33 | Env* env_; 34 | Cache* cache_; 35 | }; 36 | 37 | } 38 | 39 | #endif // STORAGE_XDB_DB_SSTABLE_TABLE_CACHE_H_ -------------------------------------------------------------------------------- /test/example_test.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "crc32c/crc32c.h" 3 | #include "snappy.h" 4 | #include 5 | namespace lsmkv { 6 | TEST(ExampleTest, AddTest) { 7 | EXPECT_EQ(0, 0); 8 | EXPECT_EQ(1, 1); 9 | } 10 | TEST(ExampleTest, crcTest) { 11 | const std::uint8_t buffer[] = {'1', '3', '2', '4'}; 12 | std::uint32_t result1,result2,result3; 13 | 14 | // Process a raw buffer. 15 | result1 = crc32c::Crc32c(buffer, 4); 16 | 17 | // Process a std::string. 18 | std::string string = "1324"; 19 | string.resize(4); 20 | result2 = crc32c::Crc32c(string); 21 | 22 | std::string string1 = "1323"; 23 | string1.resize(4); 24 | result3 = crc32c::Crc32c(string1); 25 | 26 | ASSERT_EQ(result1, result2); 27 | ASSERT_NE(result1, result3); 28 | } 29 | TEST(ExampleTest, snappyTest) { 30 | std::string buf{"12412415132512351324"}; 31 | std::string compress; 32 | snappy::Compress(buf.data(), buf.size(), &compress); 33 | std::string uncompress; 34 | snappy::Uncompress(compress.data(), compress.size(), &uncompress); 35 | ASSERT_EQ(buf, uncompress); 36 | } 37 | } -------------------------------------------------------------------------------- /lsmkv/db/log/log_reader.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_LOG_LOG_READER_H_ 2 | #define STORAGE_XDB_DB_LOG_LOG_READER_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include "include/status.h" 8 | #include "db/log/log_format.h" 9 | 10 | namespace lsmkv { 11 | class SequentialFile; 12 | 13 | namespace log { 14 | 15 | class Reader { 16 | public: 17 | Reader(SequentialFile* src, bool checksum, uint64_t initial_offset); 18 | 19 | ~Reader(); 20 | 21 | bool ReadRecord(std::string_view* record, std::string* buffer); 22 | 23 | uint64_t LastRecordOffset() { return last_record_offset_; } 24 | private: 25 | enum { 26 | // KEof : finish the record reading. 27 | KEof = KMaxType + 1, 28 | // KBadRecord : ignore the record and read next. 29 | KBadRecord = KMaxType + 2 30 | }; 31 | unsigned int ReadPhysicalRecord(std::string_view* fragments); 32 | 33 | SequentialFile* src_; 34 | const bool checksum_; 35 | const uint64_t initial_offset_; 36 | std::string_view buffer_; 37 | char* const buffer_mem_; 38 | bool eof_; 39 | 40 | uint64_t last_record_offset_; 41 | uint64_t buffer_end_offset_; 42 | }; 43 | 44 | }; 45 | 46 | } 47 | 48 | #endif // STORAGE_XDB_DB_LOG_LOG_READER_H_ -------------------------------------------------------------------------------- /lsmkv/include/sstable_reader.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_SSTABLE_READER_H_ 2 | #define STORAGE_XDB_INCLUDE_SSTABLE_READER_H_ 3 | 4 | #include "include/option.h" 5 | #include "util/file.h" 6 | namespace lsmkv { 7 | 8 | class Footer; 9 | class Iterator; 10 | 11 | class SSTableReader { 12 | public: 13 | ~SSTableReader(); 14 | 15 | SSTableReader(const SSTableReader&) = delete; 16 | SSTableReader operator=(const SSTableReader&) = delete; 17 | 18 | static Status Open(const Option& option, RandomReadFile* file, 19 | uint64_t file_size, SSTableReader** table); 20 | 21 | Iterator* NewIterator(const ReadOption& option) const; 22 | private: 23 | struct Rep; 24 | friend class TableCache; 25 | 26 | explicit SSTableReader(Rep* rep) : rep_(rep) {} 27 | 28 | void ReadFilterIndex(const Footer& footer); 29 | 30 | void ReadFilter(std::string_view handle_contents); 31 | 32 | Status InternalGet(const ReadOption& option, std::string_view key, void* arg, 33 | void (*handle_result)(void*, std::string_view, std::string_view)); 34 | 35 | static Iterator* ReadBlockHandle(void* arg, const ReadOption& option, std::string_view handle_contents); 36 | 37 | Rep* const rep_; 38 | }; 39 | 40 | } 41 | 42 | #endif // STORAGE_XDB_INCLUDE_SSTABLE_READER_H_ -------------------------------------------------------------------------------- /lsmkv/include/sstable_builder.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_SSTABLE_BUILDER_H_ 2 | #define STORAGE_XDB_INCLUDE_SSTABLE_BUILDER_H_ 3 | 4 | #include "include/option.h" 5 | #include "util/file.h" 6 | #include "db/sstable/table_cache.h" 7 | 8 | namespace lsmkv { 9 | 10 | class BlockBuilder; 11 | class BlockHandle; 12 | class Iterator; 13 | class FileMeta; 14 | 15 | class SSTableBuilder { 16 | public: 17 | SSTableBuilder(const Option& option,WritableFile* file); 18 | 19 | SSTableBuilder(const SSTableBuilder&) = delete; 20 | SSTableBuilder& operator=(const SSTableBuilder&) = delete; 21 | 22 | ~SSTableBuilder(); 23 | 24 | void Add(std::string_view key, std::string_view value); 25 | 26 | void Flush(); 27 | 28 | Status Finish(); 29 | 30 | Status status() const; 31 | 32 | uint64_t FileSize() const; 33 | 34 | uint64_t NumEntries() const; 35 | private: 36 | bool ok() const { return status().ok(); } 37 | void WriteBlock(BlockBuilder* builder, BlockHandle* handle); 38 | 39 | void WriteRawBlock(std::string_view contents, 40 | CompressType type, BlockHandle* handle); 41 | struct Rep; 42 | Rep* rep_; 43 | }; 44 | 45 | Status BuildSSTable(const std::string name, const Option& option, 46 | TableCache* table_cache, Iterator* iter, FileMeta* meta); 47 | } 48 | 49 | #endif // STORAGE_XDB_INCLUDE_SSTABLE_BUILDER_H_ -------------------------------------------------------------------------------- /lsmkv/db/memtable/arena.h: -------------------------------------------------------------------------------- 1 | #ifndef ARENA_H 2 | #define ARENA_H 3 | /** 4 | * @file arena.h 5 | * @brief 内存分配器,管理跳表的节点的内存 6 | */ 7 | #include 8 | #include 9 | 10 | namespace lsmkv { 11 | 12 | class Arena { 13 | public: 14 | Arena() : alloc_ptr_(nullptr), alloc_bytes_remaining_(0), memory_used_(0) {} 15 | ~Arena() { 16 | for (char* block : blocks_) delete[] block; 17 | } 18 | 19 | /** 20 | * @brief 分配bytes个字节的连续内存块 21 | * @note 内存未对齐 22 | * @param[in] bytes 申请字节数 23 | * @return char* 指向分配内存块的指针 24 | */ 25 | char* Allocate(size_t bytes); 26 | 27 | /** 28 | * @brief 分配bytes个字节的连续内存块 29 | * @note 内存已对齐 30 | * @param[in] bytes 申请字节数 31 | * @return char* 指向分配内存块的指针 32 | */ 33 | char* AllocateAlign(size_t bytes); 34 | 35 | /** 36 | * @brief 记录本内存分配器已使用的内存大小 37 | */ 38 | size_t MemoryUsed() const { 39 | return memory_used_.load(std::memory_order_relaxed); 40 | } 41 | 42 | private: 43 | /** 44 | * @brief 当前块中剩余可用内存不能满足需要时,调用该函数 45 | */ 46 | char* AllocateFallBack(size_t bytes); 47 | 48 | /** 49 | * @brief 由内存分配器向操作系统申请一个新内存块,由AllocateFallBack调用 50 | */ 51 | char* AllocateNewBlock(size_t bytes); 52 | 53 | private: 54 | /// 记录内存分配器当前使用的内存块 55 | char* alloc_ptr_; 56 | /// 记录当前内存块中剩余可用字节数 57 | size_t alloc_bytes_remaining_; 58 | /// 记录所有指向内存块的指针 59 | std::vector blocks_; 60 | /// 记录内存分配器使用的内存大小 61 | std::atomic memory_used_; 62 | }; 63 | 64 | } // namespace lsmkv 65 | 66 | #endif // ARENA_H -------------------------------------------------------------------------------- /lsmkv/util/filename.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_UTIL_FILENAME_H_ 2 | #define STORAGE_XDB_UTIL_FILENAME_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | namespace lsmkv { 9 | 10 | class Env; 11 | class Status; 12 | 13 | enum FileType { 14 | KLogFile = 0, 15 | KLockFile = 1, 16 | KCurrentFile = 2, 17 | KMetaFile = 3, 18 | KTmpFile = 4, 19 | KSSTableFile = 5, 20 | KLoggerFile = 6, 21 | }; 22 | std::string LogFileName(const std::string& dbname, uint64_t number); 23 | 24 | std::string LockFileName(const std::string& dbname); 25 | 26 | std::string LoggerFileName(const std::string& dbname); 27 | 28 | std::string MetaFileName(const std::string& dbname, uint64_t number); 29 | 30 | std::string TmpFileName(const std::string& dbname, uint64_t number); 31 | 32 | std::string SSTableFileName(const std::string& dbname, uint64_t number); 33 | 34 | std::string CurrentFileName(const std::string& dbname); 35 | 36 | bool ParseFilename(const std::string& filename, uint64_t* number, 37 | FileType* type); 38 | 39 | bool ParseNumder(std::string_view* input, uint64_t* num); 40 | 41 | Status SetCurrentFile(Env* env, const std::string& dbname, uint64_t number); 42 | 43 | Status ReadStringFromFile(Env* env, std::string* str, 44 | const std::string& filename); 45 | 46 | Status WriteStringToFileSync(Env* env, std::string_view str, 47 | const std::string& filename); 48 | 49 | } // namespace lsmkv 50 | #endif // STORAGE_XDB_UTIL_FILENAME_H_ -------------------------------------------------------------------------------- /lsmkv/db/filter/filter_block.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_FILTER_FILTER_BLOCK_H_ 2 | #define STORAGE_XDB_DB_FILTER_FILTER_BLOCK_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "include/filter_policy.h" 10 | namespace lsmkv { 11 | 12 | class FilterPolicy; 13 | /** 14 | * @brief 用于SSTBlockBuilder 15 | */ 16 | class FilterBlockBuilder { 17 | public: 18 | explicit FilterBlockBuilder(const FilterPolicy* policy); 19 | 20 | FilterBlockBuilder(const FilterBlockBuilder&) = delete; 21 | FilterBlockBuilder* operator=(const FilterBlockBuilder&) = delete; 22 | 23 | void StartBlock(uint64_t block_offset); 24 | void AddKey(std::string_view key); 25 | std::string_view Finish(); 26 | 27 | private: 28 | void GenerateFilter(); 29 | 30 | private: 31 | const FilterPolicy* policy_; 32 | std::string key_buffer_; 33 | std::vector key_starts_; 34 | std::string result_; 35 | std::vector filter_offsets_; 36 | std::vector tmp_keys_; 37 | }; 38 | 39 | /** 40 | * @brief 用于SSTBlockReader 41 | */ 42 | class FilterBlockReader { 43 | public: 44 | FilterBlockReader(const FilterPolicy* policy, std::string_view contents); 45 | 46 | bool KeyMayMatch(uint64_t block_offset, std::string_view key); 47 | 48 | private: 49 | const FilterPolicy* policy_; 50 | const char* data_; 51 | const char* filter_offsets_start_; 52 | size_t filter_offsets_num_; 53 | size_t filter_block_size_length_; 54 | }; 55 | 56 | } // namespace lsmkv 57 | 58 | #endif // STORAGE_XDB_DB_FILTER_FILTER_BLOCK_H_ -------------------------------------------------------------------------------- /lsmkv/include/cache.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_INCLUDE_CACHE_H_ 2 | #define STORAGE_XDB_DB_INCLUDE_CACHE_H_ 3 | 4 | #include 5 | 6 | namespace lsmkv { 7 | class Cache { 8 | public: 9 | Cache() = default; 10 | 11 | Cache(const Cache&) = delete; 12 | Cache& operator=(const Cache&) = delete; 13 | 14 | virtual ~Cache() = default; 15 | 16 | // handle is used to handle an entry stored in cache 17 | struct Handle {}; 18 | 19 | // If cache include a record with "key", return it; 20 | // the handle must be "Release()" after used; 21 | virtual Handle* Lookup(std::string_view key) = 0; 22 | 23 | // handle must be called after Lookup() or Insert() 24 | // a handle must not be Release() more than once; 25 | virtual void Release(Handle* handle) = 0; 26 | 27 | // Erase the record with "key" in cache 28 | // the record will be kept until the handle point to it is release 29 | virtual void Erase(std::string_view key) = 0; 30 | 31 | // Return the value stored in handle 32 | // the handle must not have been Release() 33 | virtual void* Value(Handle* handle) = 0; 34 | 35 | // insert an record into cache.charge is the size of the record 36 | // deleter will be call after the record is deleted 37 | // the handle must be "Release()" after used. 38 | virtual Handle* Insert(std::string_view key, void* value, 39 | size_t charge, void (*deleter)(std::string_view key, void* value)) = 0; 40 | }; 41 | 42 | Cache* NewLRUCache(size_t capacity); 43 | 44 | } 45 | 46 | #endif // STORAGE_XDB_DB_INCLUDE_CACHE_H_ -------------------------------------------------------------------------------- /lsmkv/util/mutex.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_UTIL_MUTEX_H_ 2 | #define STORAGE_XDB_UTIL_MUTEX_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "util/thread_annotations.h" 9 | namespace lsmkv { 10 | 11 | class CondVar; 12 | 13 | class LOCKABLE Mutex { 14 | public: 15 | Mutex() = default; 16 | ~Mutex() = default; 17 | Mutex(const Mutex&) = delete; 18 | Mutex& operator=(const Mutex&) = delete; 19 | 20 | void Lock() EXCLUSIVE_LOCK_FUNCTION() { mu_.lock(); } 21 | void Unlock() UNLOCK_FUNCTION() { mu_.unlock(); } 22 | void AssertHeld() ASSERT_EXCLUSIVE_LOCK() {} 23 | private: 24 | friend class CondVar; 25 | std::mutex mu_; 26 | }; 27 | 28 | class CondVar { 29 | public: 30 | explicit CondVar(Mutex* mu) : mu_(mu) { 31 | assert( mu != nullptr); 32 | } 33 | ~CondVar() = default; 34 | CondVar(const CondVar&) = delete; 35 | CondVar& operator=(const CondVar&) = delete; 36 | 37 | void Wait() { 38 | std::unique_lock l(mu_->mu_, std::adopt_lock); 39 | cv_.wait(l); 40 | l.release(); 41 | } 42 | void Signal() { cv_.notify_one(); } 43 | void SignalAll() { cv_.notify_all(); } 44 | private: 45 | std::condition_variable cv_; 46 | Mutex* const mu_; 47 | }; 48 | 49 | class SCOPED_LOCKABLE MutexLock { 50 | public: 51 | explicit MutexLock(Mutex* mu) EXCLUSIVE_LOCK_FUNCTION(mu) 52 | : mu_(mu) { mu_->Lock(); } 53 | ~MutexLock() UNLOCK_FUNCTION() { mu_->Unlock(); } 54 | private: 55 | Mutex* const mu_; 56 | }; 57 | 58 | } 59 | 60 | #endif // STORAGE_XDB_UTIL_MUTEX_H_ -------------------------------------------------------------------------------- /lsmkv/include/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_ITERATOR_H_ 2 | #define STORAGE_XDB_INCLUDE_ITERATOR_H_ 3 | 4 | #include 5 | #include "include/status.h" 6 | 7 | namespace lsmkv { 8 | 9 | class ReadOption; 10 | 11 | class Iterator { 12 | public: 13 | Iterator(); 14 | 15 | Iterator(const Iterator&) = delete; 16 | Iterator& operator=(const Iterator&) = delete; 17 | 18 | virtual ~Iterator(); 19 | 20 | virtual bool Valid() const = 0; 21 | 22 | virtual std::string_view Key() const = 0; 23 | 24 | virtual std::string_view Value() const = 0; 25 | 26 | virtual void Next() = 0; 27 | 28 | virtual void Prev() = 0; 29 | 30 | virtual void Seek(std::string_view key) = 0; 31 | 32 | virtual void SeekToFirst() = 0; 33 | 34 | virtual void SeekToLast() = 0; 35 | 36 | virtual Status status() = 0; 37 | 38 | using CleanupFunction = void(*)(void* arg1, void* arg2); 39 | void AppendCleanup (CleanupFunction fun, void* arg1, void* arg2); 40 | private: 41 | struct CleanupNode { 42 | void Run() { 43 | (*fun)(arg1, arg2); 44 | } 45 | CleanupFunction fun; 46 | void* arg1; 47 | void* arg2; 48 | CleanupNode* next; 49 | }; 50 | CleanupNode cleanup_head_; 51 | }; 52 | 53 | Iterator* NewEmptyIterator(); 54 | 55 | Iterator* NewErrorIterator(Status status); 56 | 57 | Iterator* NewTwoLevelIterator(Iterator* index_iter, 58 | Iterator* (*block_funtion)(void* arg, const ReadOption& option, std::string_view handle_contents), 59 | void* arg, const ReadOption& option); 60 | } 61 | #endif // STORAGE_XDB_INCLUDE_ITERATOR_H_ -------------------------------------------------------------------------------- /lsmkv/db/writebatch/writebatch_helper.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_WRITEBATCH_WRITEBATCH_HELPER_H_ 2 | #define STORAGE_XDB_DB_WRITEBATCH_WRITEBATCH_HELPER_H_ 3 | 4 | #include "include/writebatch.h" 5 | #include "db/memtable/memtable.h" 6 | 7 | namespace lsmkv { 8 | 9 | constexpr const size_t KHeaderSize = 12; 10 | 11 | class WriteBatchHelper { 12 | public: 13 | static SequenceNum GetSequenceNum(const WriteBatch *b) { return static_cast(DecodeFixed64(b->rep_.data())); } 14 | 15 | static void SetSequenceNum(WriteBatch *b, uint64_t val) { EncodeFixed64(&b->rep_[0],val); } 16 | 17 | static uint32_t GetCount(const WriteBatch *b) { return DecodeFixed32(b->rep_.data() + 8); } 18 | 19 | static void SetCount(WriteBatch *b, uint32_t val) { EncodeFixed32(&b->rep_[8],val); } 20 | 21 | static void SetContent(WriteBatch *b, std::string_view content); 22 | 23 | static std::string_view GetContent(WriteBatch *b); 24 | 25 | static void Append(WriteBatch *dst , const WriteBatch *src); 26 | 27 | static size_t GetSize(const WriteBatch *b) { return b->rep_.size(); } 28 | 29 | static Status InsertMemTable(const WriteBatch* b, MemTable* mem); 30 | }; 31 | 32 | class MemTableInserter : public WriteBatch::Handle { 33 | public: 34 | SequenceNum seq_; 35 | MemTable* mem_; 36 | void Put(std::string_view key, std::string_view value) override { 37 | mem_->Put(seq_, KTypeInsertion, key, value); 38 | seq_++; 39 | } 40 | void Delete(std::string_view key) override { 41 | mem_->Put(seq_, KTypeDeletion, key, ""); 42 | seq_++; 43 | } 44 | 45 | }; 46 | } 47 | 48 | #endif // STORAGE_XDB_DB_WRITEBATCH_WRITEBATCH_HELPER_H_ -------------------------------------------------------------------------------- /lsmkv/db/memtable/arena.cc: -------------------------------------------------------------------------------- 1 | #include "db/memtable/arena.h" 2 | 3 | #include 4 | 5 | namespace lsmkv { 6 | 7 | static constexpr int kBlockSize = 4096; 8 | 9 | char* Arena::AllocateNewBlock(size_t bytes) { 10 | char* block = new char[bytes]; 11 | blocks_.push_back(block); 12 | memory_used_.fetch_add(bytes + sizeof(char*), std::memory_order_release); 13 | return block; 14 | } 15 | 16 | char* Arena::Allocate(size_t bytes) { 17 | if (alloc_bytes_remaining_ < bytes) { 18 | return AllocateFallBack(bytes); 19 | } 20 | char* result = alloc_ptr_; 21 | alloc_ptr_ += bytes; 22 | alloc_bytes_remaining_ -= bytes; 23 | return result; 24 | } 25 | 26 | char* Arena::AllocateAlign(size_t bytes) { 27 | const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8; 28 | static_assert((align & (align - 1)) == 0, 29 | "Pointer size should be a power of 2"); 30 | size_t buf_low_bit = reinterpret_cast(alloc_ptr_) & (align - 1); 31 | size_t need = bytes + (align - buf_low_bit); 32 | char* result; 33 | if (need <= alloc_bytes_remaining_) { 34 | result = alloc_ptr_ + (align - buf_low_bit); 35 | alloc_ptr_ += need; 36 | alloc_bytes_remaining_ -= need; 37 | } else { 38 | result = AllocateFallBack(bytes); 39 | } 40 | assert((reinterpret_cast(result) & (align - 1)) == 0); 41 | return result; 42 | } 43 | 44 | char* Arena::AllocateFallBack(size_t bytes) { 45 | if (bytes > kBlockSize / 4) { 46 | return AllocateNewBlock(bytes); 47 | } 48 | alloc_ptr_ = AllocateNewBlock(kBlockSize); 49 | alloc_bytes_remaining_ = kBlockSize; 50 | char* result = alloc_ptr_; 51 | alloc_ptr_ += bytes; 52 | alloc_bytes_remaining_ -= bytes; 53 | return result; 54 | } 55 | 56 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/util/thread_annotations.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_UTIL_THREAD_ANNOTATIONS_H_ 2 | #define STORAGE_XDB_UTIL_THREAD_ANNOTATIONS_H_ 3 | 4 | #if !defined(THREAD_ANNOTATION_ATTRIBUTE__) 5 | 6 | #if defined(__clang__) 7 | #define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) 8 | #else 9 | #define THREAD_ANNOTATION_ATTRIBUTE__(x) 10 | #endif 11 | 12 | #endif // !define(THREAD_ANNOTATION_ATTRIBUTE__) 13 | 14 | #ifndef GUARDED_BY 15 | #define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) 16 | #endif 17 | 18 | #ifndef PT_GUARDED_BY 19 | #define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x)) 20 | #endif 21 | 22 | #ifndef EXCLUSIVE_LOCK_FUNCTION 23 | #define EXCLUSIVE_LOCK_FUNCTION(...) \ 24 | THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__)) 25 | #endif 26 | 27 | #ifndef SHARED_LOCK_FUNCTION 28 | #define SHARED_LOCK_FUNCTION(...) \ 29 | THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__)) 30 | #endif 31 | 32 | #ifndef UNLOCK_FUNCTION 33 | #define UNLOCK_FUNCTION(...) \ 34 | THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__)) 35 | #endif 36 | 37 | #ifndef EXCLUSIVE_LOCKS_REQUIRED 38 | #define EXCLUSIVE_LOCKS_REQUIRED(...) \ 39 | THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(__VA_ARGS__)) 40 | #endif 41 | 42 | #ifndef ASSERT_EXCLUSIVE_LOCK 43 | #define ASSERT_EXCLUSIVE_LOCK(...) \ 44 | THREAD_ANNOTATION_ATTRIBUTE__(assert_exclusive_lock(__VA_ARGS__)) 45 | #endif 46 | 47 | #ifndef ASSERT_SHARED_LOCK 48 | #define ASSERT_SHARED_LOCK(...) \ 49 | THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_lock(__VA_ARGS__)) 50 | #endif 51 | 52 | #ifndef LOCKABLE 53 | #define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable) 54 | #endif 55 | 56 | #ifndef SCOPED_LOCKABLE 57 | #define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) 58 | #endif 59 | 60 | #endif // STORAGE_XDB_UTIL_THREAD_ANNOTATIONS_H_ -------------------------------------------------------------------------------- /test/sstable_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "gtest/gtest.h" 4 | #include "include/env.h" 5 | #include "include/iterator.h" 6 | #include "include/sstable_builder.h" 7 | #include "include/sstable_reader.h" 8 | #include "util/file.h" 9 | 10 | namespace lsmkv { 11 | TEST(ExampleTest, LinearIterater) { 12 | Option option; 13 | option.compress_type = KUnCompress; 14 | RandomReadFile* read_file; 15 | WritableFile* write_file; 16 | Env* env = DefaultEnv(); 17 | std::string filename{"/home/lei/MyLSMKV/folder_for_test/sst_test"}; 18 | Status s = env->NewWritableFile(filename, &write_file); 19 | ASSERT_TRUE(s.ok()); 20 | SSTableBuilder builder(option, write_file); 21 | char buf[10]; 22 | for (int i = 0; i < 1000000; i++) { 23 | std::sprintf(buf, "%06d", i); 24 | builder.Add(std::string_view(buf, 6), "VAL"); 25 | } 26 | s = builder.Finish(); 27 | ASSERT_TRUE(s.ok()); 28 | s = write_file->Sync(); 29 | ASSERT_TRUE(s.ok()); 30 | 31 | uint64_t file_size; 32 | s = env->NewRamdomReadFile(filename, &read_file); 33 | ASSERT_TRUE(s.ok()); 34 | env->FileSize(filename, &file_size); 35 | std::cout << "builder.FileSize():" << builder.FileSize() << std::endl; 36 | ASSERT_EQ(file_size, builder.FileSize()); 37 | SSTableReader* reader; 38 | s = SSTableReader::Open(option, read_file, file_size, &reader); 39 | ASSERT_TRUE(s.ok()); 40 | ReadOption read_option; 41 | Iterator* iter = reader->NewIterator(read_option); 42 | iter->SeekToFirst(); 43 | for (int i = 0; i < 1000000; i++) { 44 | std::sprintf(buf, "%06d", i); 45 | std::string_view s = std::string_view(buf, 6); 46 | ASSERT_TRUE(iter->Valid()); 47 | ASSERT_EQ(iter->Key(), s); 48 | ASSERT_EQ(iter->Value(), "VAL"); 49 | iter->Next(); 50 | } 51 | ASSERT_FALSE(iter->Valid()); 52 | env->RemoveFile(filename); 53 | delete iter; 54 | delete reader; 55 | delete write_file; 56 | } 57 | 58 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/util/status.cc: -------------------------------------------------------------------------------- 1 | #include "include/status.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | namespace lsmkv { 8 | 9 | Status::Status(Code code, std::string_view msg1, std::string_view msg2) { 10 | assert(code != KOK); 11 | const uint32_t len1 = static_cast(msg1.size()); 12 | const uint32_t len2 = static_cast(msg2.size()); 13 | const uint32_t len = len1 + (len2 == 0 ? 0 : len2 + 2); 14 | char* result = new char[len + 5]; 15 | std::memcpy(result, &len, sizeof(len)); 16 | result[4] = static_cast(code); 17 | std::memcpy(result + 5, msg1.data(), len1); 18 | if (len2 != 0) { 19 | result[5 + len1] = ':'; 20 | result[6 + len1] = ' '; 21 | std::memcpy(result + 7 + len1, msg2.data(), len2); 22 | } 23 | state_ = result; 24 | } 25 | 26 | const char* CopyState(const char* state) { 27 | uint32_t len; 28 | std::memcpy(&len, state, sizeof(len)); 29 | char* result = new char[len + 5]; 30 | std::memcpy(result, state, len + 5); 31 | return result; 32 | } 33 | 34 | std::string Status::ToString() const { 35 | if (state_ == nullptr) { 36 | return "OK"; 37 | } else { 38 | char tmp[30]; 39 | const char* type; 40 | switch (code()) { 41 | case KOK: 42 | type = "OK"; 43 | break; 44 | case KNotFound: 45 | type = "NotFound: "; 46 | break; 47 | case KIOError: 48 | type = "IO error: "; 49 | break; 50 | case KCorruption: 51 | type = "Corruption: "; 52 | break; 53 | default: 54 | std::snprintf(tmp, sizeof(tmp), 55 | "Unknown(%d): ", static_cast(code())); 56 | type = tmp; 57 | break; 58 | } 59 | std::string ret(type); 60 | uint32_t len; 61 | std::memcpy(&len, state_, sizeof(len)); 62 | ret.append(state_ + 5, len); 63 | return ret; 64 | } 65 | } 66 | 67 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/version/iterator_wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_VERSION_ITERATOR_WRAPPER_H_ 2 | #define STORAGE_XDB_DB_VERSION_ITERATOR_WRAPPER_H_ 3 | 4 | #include 5 | 6 | #include "include/comparator.h" 7 | #include "include/iterator.h" 8 | namespace lsmkv { 9 | 10 | class IteratorWrapper { 11 | public: 12 | IteratorWrapper() : valid_(false), iter_(nullptr) {} 13 | 14 | explicit IteratorWrapper(Iterator *iter) : iter_(nullptr) { Set(iter); } 15 | IteratorWrapper(const IteratorWrapper &) = delete; 16 | IteratorWrapper &operator=(const IteratorWrapper &) = delete; 17 | 18 | ~IteratorWrapper() { delete iter_; } 19 | 20 | void Set(Iterator *iter) { 21 | delete iter_; 22 | iter_ = iter; 23 | if (iter == nullptr) { 24 | valid_ = false; 25 | } else { 26 | Update(); 27 | } 28 | } 29 | void Update() { 30 | valid_ = iter_->Valid(); 31 | if (valid_) { 32 | key_ = iter_->Key(); 33 | } 34 | } 35 | bool Valid() const { return valid_; } 36 | void Next() { 37 | assert(iter_); 38 | iter_->Next(); 39 | Update(); 40 | } 41 | void Prev() { 42 | assert(iter_); 43 | iter_->Prev(); 44 | Update(); 45 | } 46 | void Seek(const std::string_view &key) { 47 | assert(iter_); 48 | iter_->Seek(key); 49 | Update(); 50 | } 51 | void SeekToFirst() { 52 | assert(iter_); 53 | iter_->SeekToFirst(); 54 | Update(); 55 | } 56 | void SeekToLast() { 57 | assert(iter_); 58 | iter_->SeekToLast(); 59 | Update(); 60 | } 61 | Status status() const { 62 | assert(iter_); 63 | return iter_->status(); 64 | } 65 | std::string_view Key() const { 66 | assert(iter_); 67 | return key_; 68 | } 69 | std::string_view Value() const { 70 | assert(iter_); 71 | return iter_->Value(); 72 | } 73 | 74 | private: 75 | bool valid_; 76 | Iterator *iter_; 77 | std::string_view key_; 78 | }; 79 | 80 | } // namespace lsmkv 81 | 82 | #endif // STORAGE_XDB_DB_VERSION_ITERATOR_WRAPPER_H_ 83 | -------------------------------------------------------------------------------- /lsmkv/util/comparator.cc: -------------------------------------------------------------------------------- 1 | #include "include/comparator.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace lsmkv { 9 | class ByteWiseComparator : public Comparator { 10 | public: 11 | int Compare(std::string_view a, std::string_view b) const override { 12 | return a.compare(b); 13 | } 14 | const char* Name() const override { return "lsmkv.ByteWiseComparator"; } 15 | void FindShortestMiddle(std::string* start, 16 | std::string_view limit) const override { 17 | int min_len = std::min(start->size(), limit.size()); 18 | size_t diff_index = 0; 19 | while ((diff_index < min_len) && 20 | (*start)[diff_index] == limit[diff_index]) { 21 | diff_index++; 22 | } 23 | if (diff_index < min_len) { 24 | uint8_t diff_byte = static_cast((*start)[diff_index]); 25 | if (diff_byte < static_cast(0xff) && 26 | diff_byte + 1 < static_cast(limit[diff_index])) { 27 | (*start)[diff_index]++; 28 | start->resize(diff_index + 1); 29 | assert(Compare(*start, limit) < 0); 30 | } 31 | } 32 | } 33 | void FindShortestBigger(std::string* start) const override { 34 | for (size_t i = 0; i < start->size(); i++) { 35 | uint8_t ch = static_cast((*start)[i]); 36 | if (ch < static_cast(0xff)) { 37 | (*start)[i] = ch + 1; 38 | start->resize(i + 1); 39 | return; 40 | } 41 | } 42 | } 43 | }; 44 | 45 | class SingletonDefaultComparator { 46 | public: 47 | SingletonDefaultComparator() { new (storage_) ByteWiseComparator; } 48 | ByteWiseComparator* Get() { 49 | return reinterpret_cast(storage_); 50 | } 51 | 52 | private: 53 | alignas(ByteWiseComparator) char storage_[sizeof(ByteWiseComparator)]; 54 | }; 55 | 56 | const Comparator* DefaultComparator() { 57 | static SingletonDefaultComparator singleton; 58 | return singleton.Get(); 59 | } 60 | 61 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/sstable/block_format.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_SSTABLE_BLOCK_FORMAT_H_ 2 | #define STORAGE_XDB_DB_SSTABLE_BLOCK_FORMAT_H_ 3 | 4 | #include 5 | #include "include/status.h" 6 | #include "include/option.h" 7 | namespace lsmkv { 8 | 9 | struct BlockContents { 10 | std::string_view data; 11 | bool heap_allocated_; 12 | bool table_cache_; 13 | }; 14 | // 1 byte compress type | 4 bytes crc 15 | static const size_t KBlockTailSize = 5; 16 | 17 | static const uint64_t KFooterMagicNum = 0xaf41de78ull; 18 | 19 | struct BlockHandle { 20 | public: 21 | enum {KMaxEncodeLength = 20}; 22 | 23 | BlockHandle(); 24 | 25 | void SetOffset(uint64_t offset) { offset_ = offset; } 26 | 27 | uint64_t GetOffset() const { return offset_; } 28 | 29 | void SetSize(uint64_t size) { size_ = size; } 30 | 31 | uint64_t GetSize() const { return size_; } 32 | 33 | void EncodeTo(std::string* dst); 34 | 35 | Status DecodeFrom(std::string_view* input); 36 | private: 37 | uint64_t offset_; 38 | uint64_t size_; 39 | }; 40 | 41 | struct Footer { 42 | public: 43 | Footer() = default; 44 | 45 | enum {KEncodeLength = 2 * BlockHandle::KMaxEncodeLength + 8}; 46 | void SetIndexHandle(const BlockHandle& index_block_handle) { 47 | index_block_handle_ = index_block_handle; 48 | } 49 | 50 | BlockHandle GetIndexHandle() const { return index_block_handle_; } 51 | 52 | void SetFilterHandle(const BlockHandle& filter_index_handle) { 53 | filter_index_handle_ = filter_index_handle; 54 | } 55 | 56 | BlockHandle GetFilterHandle() const { return filter_index_handle_; } 57 | 58 | void EncodeTo(std::string* dst); 59 | 60 | Status DecodeFrom(std::string_view* input); 61 | private: 62 | BlockHandle index_block_handle_; 63 | BlockHandle filter_index_handle_; 64 | }; 65 | 66 | Status ReadBlock(const ReadOption& opt, RandomReadFile* file, 67 | const BlockHandle& handle, BlockContents* result); 68 | } 69 | 70 | #endif // STORAGE_XDB_DB_SSTABLE_BLOCK_FORMAT_H_ -------------------------------------------------------------------------------- /lsmkv/db/sstable/block_builder.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "db/sstable/block_builder.h" 5 | #include "util/coding.h" 6 | namespace lsmkv { 7 | 8 | BlockBuilder::BlockBuilder(const Option* option) 9 | : option_(option), restarts_(), counter_(0), finished_(false) { 10 | restarts_.push_back(0); 11 | } 12 | void BlockBuilder::Add(std::string_view key, std::string_view value) { 13 | assert(!finished_); 14 | std::string_view last_key(last_key_); 15 | size_t shared = 0; 16 | if (counter_ < option_->block_restart_interval) { 17 | size_t min_size = std::min(key.size(), last_key.size()); 18 | while ((shared < min_size) && (key[shared] == last_key[shared])) { 19 | shared++; 20 | } 21 | } else { 22 | restarts_.push_back(buffer_.size()); 23 | counter_ = 0; 24 | } 25 | size_t non_shard = key.size() - shared; 26 | PutVarint32(&buffer_, shared); 27 | PutVarint32(&buffer_, non_shard); 28 | PutVarint32(&buffer_, value.size()); 29 | 30 | buffer_.append(key.data() + shared, non_shard); 31 | buffer_.append(value.data(), value.size()); 32 | last_key_.resize(shared); 33 | last_key_.append(key.data() + shared, non_shard); 34 | counter_++; 35 | } 36 | 37 | std::string_view BlockBuilder::Finish() { 38 | for (uint32_t restart : restarts_) { 39 | PutFixed32(&buffer_, restart); 40 | } 41 | PutFixed32(&buffer_, restarts_.size()); 42 | finished_ = true; 43 | return std::string_view(buffer_); 44 | } 45 | 46 | size_t BlockBuilder::ByteSize() const { 47 | if (finished_) { 48 | return buffer_.size(); 49 | } else { 50 | return (buffer_.size() + // actual key - value 51 | restarts_.size() * sizeof(uint32_t) + // restarts_ 52 | sizeof(uint32_t)); // restart num 53 | } 54 | } 55 | 56 | void BlockBuilder::Reset() { 57 | buffer_.clear(); 58 | restarts_.clear(); 59 | counter_ = 0; 60 | last_key_.clear(); 61 | finished_ = false; 62 | restarts_.push_back(0); 63 | } 64 | 65 | } -------------------------------------------------------------------------------- /lsmkv/include/status.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_STATUS_H_ 2 | #define STORAGE_XDB_INCLUDE_STATUS_H_ 3 | 4 | #include 5 | 6 | #include 7 | namespace lsmkv { 8 | 9 | class Status { 10 | public: 11 | Status() : state_(nullptr) {} 12 | 13 | ~Status() { delete[] state_; } 14 | 15 | Status(const Status& rhs); 16 | 17 | Status& operator=(const Status& rhs); 18 | 19 | Status& operator=(Status&& rhs); 20 | 21 | static Status OK() { return Status(); } 22 | 23 | static Status NotFound(std::string_view msg1, std::string_view msg2 = std::string_view()) { 24 | return Status(KNotFound, msg1, msg2); 25 | } 26 | static Status IOError(std::string_view msg1, std::string_view msg2 = std::string_view()) { 27 | return Status(KIOError, msg1, msg2); 28 | } 29 | static Status Corruption(std::string_view msg1, std::string_view msg2 = std::string_view()) { 30 | return Status(KCorruption, msg1, msg2); 31 | } 32 | bool IsNotFound() const { return code() == KNotFound; } 33 | 34 | bool IsIOError() const { return code() == KIOError; } 35 | 36 | bool IsCorruption() const { return code() == KCorruption; } 37 | 38 | bool ok() const { return state_ == nullptr; } 39 | 40 | std::string ToString() const; 41 | 42 | private: 43 | enum Code { KOK = 0, KNotFound = 1, KIOError = 2, KCorruption = 3 }; 44 | 45 | Code code() const { 46 | return (state_ == nullptr ? KOK : static_cast(state_[4])); 47 | } 48 | Status(Code code, std::string_view msg, std::string_view msg2); 49 | 50 | const char* state_; 51 | }; 52 | 53 | const char* CopyState(const char* state); 54 | 55 | inline Status& Status::operator=(const Status& rhs) { 56 | if (rhs.state_ != state_) { 57 | if (state_ != nullptr) { 58 | delete[] state_; 59 | } 60 | state_ = (rhs.state_ == nullptr ? nullptr : CopyState(rhs.state_)); 61 | } 62 | return *this; 63 | } 64 | 65 | inline Status& Status::operator=(Status&& rhs) { 66 | std::swap(rhs.state_, state_); 67 | return *this; 68 | } 69 | 70 | inline Status::Status(const Status& rhs) { 71 | state_ = (rhs.state_ == nullptr ? nullptr : CopyState(rhs.state_)); 72 | } 73 | 74 | } // namespace lsmkv 75 | #endif // STORAGE_XDB_INCLUDE_STATUS_H_ -------------------------------------------------------------------------------- /lsmkv/db/memtable/memtable.h: -------------------------------------------------------------------------------- 1 | #ifndef MEMTABLE_H 2 | #define MEMTABLE_H 3 | 4 | #include "db/format/internal_key.h" 5 | #include "db/memtable/arena.h" 6 | #include "db/memtable/skiplist.h" 7 | #include "include/iterator.h" 8 | #include "include/status.h" 9 | 10 | namespace lsmkv { 11 | 12 | class MemTable { 13 | public: 14 | explicit MemTable(const InternalKeyComparator& cmp); 15 | 16 | MemTable(const MemTable&) = delete; 17 | MemTable& operator=(const MemTable&) = delete; 18 | 19 | /** 20 | * @brief 引用计数+1 21 | */ 22 | void Ref() { refs_++; }; 23 | 24 | /** 25 | * @brief 引用计数-1 26 | */ 27 | void Unref() { 28 | refs_--; 29 | if (refs_ <= 0) { 30 | delete this; 31 | } 32 | } 33 | 34 | /** 35 | * @brief Put接口实现 36 | * @details DB的Put和Del接口都会调用此函数 37 | * @param[in] seq record的序号 38 | * @param[in] type record的类型 39 | * @param[in] key record的key 40 | * @param[in] value record的value 41 | */ 42 | void Put(SequenceNum seq, RecordType type, std::string_view key, 43 | std::string_view value); 44 | 45 | /** 46 | * @brief Get接口实现 47 | * @param[in] key 查找的key值 48 | * @param[out] result 查找的结果 49 | * @param[out] status 查找的状态 50 | @retval true Get成功 51 | @retval false Get失败 52 | */ 53 | bool Get(const LookupKey& key, std::string* result, Status* status); 54 | 55 | /** 56 | * @brief 生成一个迭代器 57 | */ 58 | Iterator* NewIterator(); 59 | 60 | /** 61 | * @brief 返回内存表使用的近似内存大小 62 | */ 63 | size_t ApproximateSize() { return arena_.MemoryUsed(); } 64 | 65 | private: 66 | struct KeyComparator { 67 | const InternalKeyComparator comparator; 68 | explicit KeyComparator(const InternalKeyComparator& cmp) 69 | : comparator(cmp) {} 70 | int operator()(const char* a, const char* b) const; 71 | }; 72 | 73 | using Table = SkipList; 74 | 75 | friend class MemTableIterator; 76 | 77 | ~MemTable() { assert(refs_ == 0); }; 78 | 79 | private: 80 | /// 比较器,用于key之间的比较 81 | KeyComparator comparator_; 82 | /// 内存分配器,为跳表分配内存 83 | Arena arena_; 84 | /// 存放数据的底层结构,跳表 85 | Table table_; 86 | /// 引用计数 87 | int refs_; 88 | }; 89 | 90 | } // namespace lsmkv 91 | 92 | #endif // MEMTABLE_H -------------------------------------------------------------------------------- /lsmkv/db/filter/bloom.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "include/filter_policy.h" 4 | #include "util/MurmurHash3.h" 5 | namespace lsmkv { 6 | 7 | static uint32_t BloomHash(std::string_view key) { 8 | return murmur3::MurmurHash3_x86_32(key.data(), key.size(), 0x789fed11); 9 | } 10 | 11 | class BloomFilterPolicy : public FilterPolicy { 12 | public: 13 | explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) { 14 | hash_num_ = static_cast(bits_per_key * 0.69); 15 | if (hash_num_ < 1) hash_num_ = 1; 16 | if (hash_num_ > 30) hash_num_ = 30; 17 | } 18 | 19 | const char* Name() const override { return "lsmkv.BloomFilterPolicy"; } 20 | 21 | void CreatFilter(std::string_view* keys, int n, 22 | std::string* dst) const override { 23 | int bits = n * bits_per_key_; 24 | 25 | if (bits < 64) bits = 64; 26 | size_t bytes = (bits + 7) / 8; 27 | bits = bytes * 8; 28 | 29 | const size_t old_size = dst->size(); 30 | dst->resize(old_size + bytes, 0); 31 | dst->push_back(hash_num_); 32 | char* array = &(*dst)[old_size]; 33 | for (int i = 0; i < n; i++) { 34 | uint32_t h = BloomHash(keys[i]); 35 | const uint32_t delta = (h >> 17) | (h << 15); 36 | for (size_t j = 0; j < hash_num_; j++) { 37 | uint32_t pos = h % bits; 38 | array[pos / 8] |= (1 << (pos % 8)); 39 | h += delta; 40 | } 41 | } 42 | } 43 | 44 | bool KeyMayMatch(std::string_view key, 45 | std::string_view filter) const override { 46 | const size_t len = filter.size(); 47 | if (len < 2) return false; 48 | const char* array = filter.data(); 49 | const size_t bits = (len - 1) * 8; 50 | const size_t hash_num = array[len - 1]; 51 | if (hash_num > 30) { 52 | return true; 53 | } 54 | uint32_t h = BloomHash(key); 55 | const uint32_t delta = (h >> 17) | (h << 15); 56 | for (size_t i = 0; i < hash_num; i++) { 57 | uint32_t pos = h % bits; 58 | if ((array[pos / 8] & (1 << (pos % 8))) == 0) { 59 | return false; 60 | } 61 | h += delta; 62 | } 63 | return true; 64 | } 65 | 66 | private: 67 | size_t bits_per_key_; 68 | size_t hash_num_; 69 | }; 70 | 71 | FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { 72 | return new BloomFilterPolicy(bits_per_key); 73 | } 74 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/util/MurmurHash3.h: -------------------------------------------------------------------------------- 1 | // This source file was originally from: 2 | // https://github.com/PeterScott/murmur3 3 | // 4 | // We've changed it for use with VoltDB: 5 | // - We changed the functions declared below to return their hash by 6 | // value, rather than accept a pointer to storage for the result 7 | 8 | //----------------------------------------------------------------------------- 9 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 10 | // domain. The author hereby disclaims copyright to this source code. 11 | 12 | #ifndef _MURMURHASH3_H_ 13 | #define _MURMURHASH3_H_ 14 | 15 | #include 16 | 17 | namespace murmur3 { 18 | 19 | //----------------------------------------------------------------------------- 20 | // Platform-specific functions and macros 21 | 22 | // Microsoft Visual Studio 23 | 24 | #if defined(_MSC_VER) 25 | 26 | typedef unsigned char uint8_t; 27 | typedef unsigned long uint32_t; 28 | typedef unsigned __int64 uint64_t; 29 | 30 | // Other compilers 31 | 32 | #else // defined(_MSC_VER) 33 | 34 | #include 35 | 36 | #endif // !defined(_MSC_VER) 37 | 38 | //----------------------------------------------------------------------------- 39 | 40 | int32_t MurmurHash3_x64_128 (const void * key, int len, uint32_t seed ); 41 | 42 | inline int32_t MurmurHash3_x64_128(int32_t value, uint32_t seed) { 43 | return MurmurHash3_x64_128(&value, 4, seed); 44 | } 45 | inline int32_t MurmurHash3_x64_128(int32_t value) { 46 | return MurmurHash3_x64_128(&value, 4, 0); 47 | } 48 | 49 | inline int32_t MurmurHash3_x64_128(int64_t value, uint32_t seed) { 50 | return MurmurHash3_x64_128(&value, 8, seed); 51 | } 52 | inline int32_t MurmurHash3_x64_128(int64_t value) { 53 | return MurmurHash3_x64_128(&value, 8, 0); 54 | } 55 | 56 | inline int32_t MurmurHash3_x64_128(double value, uint32_t seed) { 57 | return MurmurHash3_x64_128(&value, 8, seed); 58 | } 59 | 60 | inline int32_t MurmurHash3_x64_128(std::string &value, uint32_t seed) { 61 | return MurmurHash3_x64_128(value.data(), static_cast(value.size()), seed); 62 | } 63 | 64 | uint32_t MurmurHash3_x86_32(const void* key, uint32_t len, uint32_t seed); 65 | 66 | void MurmurHash3_x64_128 ( const void * key, const int len, 67 | const uint32_t seed, void * out ); 68 | 69 | //----------------------------------------------------------------------------- 70 | 71 | } 72 | #endif // _MURMURHASH3_H_ 73 | -------------------------------------------------------------------------------- /lsmkv/db/version/merge.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "db/version/merge.h" 3 | 4 | #include "db/version/iterator_wrapper.h" 5 | 6 | namespace lsmkv { 7 | 8 | class MergedIterator : public Iterator { 9 | public: 10 | MergedIterator(Iterator** list, size_t num, const Comparator* cmp) 11 | : list_(new IteratorWrapper[num]), 12 | num_(num), 13 | current_(nullptr), 14 | cmp_(cmp) { 15 | for (int i = 0; i < num; i++) { 16 | list_[i].Set(list[i]); 17 | } 18 | } 19 | 20 | ~MergedIterator() override { delete[] list_; } 21 | bool Valid() const override { return current_ != nullptr; } 22 | 23 | std::string_view Key() const override { 24 | assert(Valid()); 25 | return current_->Key(); 26 | } 27 | 28 | std::string_view Value() const override { 29 | assert(Valid()); 30 | return current_->Value(); 31 | } 32 | 33 | void Next() override { 34 | assert(Valid()); 35 | current_->Next(); 36 | FindSmallest(); 37 | } 38 | 39 | /// 不会调用该函数 40 | void Prev() override { assert(false); } 41 | 42 | void Seek(std::string_view key) override { 43 | for (int i = 0; i < num_; i++) { 44 | list_[i].Seek(key); 45 | } 46 | FindSmallest(); 47 | } 48 | 49 | void SeekToFirst() override { 50 | for (int i = 0; i < num_; i++) { 51 | list_[i].SeekToFirst(); 52 | } 53 | FindSmallest(); 54 | } 55 | void SeekToLast() override { assert(false); } 56 | 57 | Status status() override { 58 | Status status; 59 | for (int i = 0; i < num_; i++) { 60 | status = list_[i].status(); 61 | if (!status.ok()) { 62 | return status; 63 | } 64 | } 65 | return Status::OK(); 66 | } 67 | 68 | private: 69 | void FindSmallest(); 70 | 71 | private: 72 | IteratorWrapper* list_; 73 | size_t num_; 74 | IteratorWrapper* current_; 75 | const Comparator* cmp_; 76 | }; 77 | 78 | void MergedIterator::FindSmallest() { 79 | IteratorWrapper* smallest = nullptr; 80 | for (size_t i = 0; i < num_; i++) { 81 | IteratorWrapper* ptr = &list_[i]; 82 | if (ptr->Valid()) { 83 | if (smallest == nullptr || 84 | cmp_->Compare(ptr->Key(), smallest->Key()) < 0) { 85 | smallest = ptr; 86 | } 87 | } 88 | } 89 | current_ = smallest; 90 | } 91 | 92 | Iterator* NewMergedIterator(Iterator** list, size_t num, 93 | const Comparator* cmp) { 94 | return new MergedIterator(list, num, cmp); 95 | } 96 | 97 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/include/env.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_ENV_H_ 2 | #define STORAGE_XDB_INCLUDE_ENV_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "util/mutex.h" 13 | #include "util/file.h" 14 | 15 | // allow mmap file if 64bits because of enough address capacity. 16 | 17 | namespace lsmkv { 18 | 19 | class FileLock { 20 | public: 21 | FileLock() = default; 22 | FileLock(const FileLock&) = delete; 23 | FileLock& operator=(const FileLock&) = delete; 24 | virtual ~FileLock() = default; 25 | }; 26 | 27 | class Logger { 28 | public: 29 | Logger() = default; 30 | Logger(const Logger&) = delete; 31 | Logger& operator=(const Logger&) = delete; 32 | virtual ~Logger() = default; 33 | 34 | virtual void Logv(const char* format, std::va_list ap) = 0; 35 | }; 36 | 37 | class Env { 38 | public: 39 | Env() = default; 40 | 41 | virtual Status NewSequentialFile(const std::string& filename, SequentialFile** result) = 0; 42 | 43 | virtual Status NewRamdomReadFile(const std::string& filename, RandomReadFile** result) = 0; 44 | 45 | virtual Status NewWritableFile(const std::string& filename, WritableFile** result) = 0; 46 | 47 | virtual Status NewAppendableFile(const std::string& filename, WritableFile** result) = 0; 48 | 49 | virtual Status CreatDir(const std::string& filename) = 0; 50 | 51 | virtual Status RemoveDir(const std::string& filename) = 0; 52 | 53 | virtual Status GetChildren(const std::string& dirname, std::vector* filenames) = 0; 54 | 55 | virtual Status FileSize(const std::string& filename, uint64_t* result) = 0; 56 | 57 | virtual Status RenameFile(const std::string& from, const std::string& to) = 0; 58 | 59 | virtual Status RemoveFile(const std::string& filename) = 0; 60 | 61 | virtual bool FileExist(const std::string& filename) = 0; 62 | 63 | virtual Status LockFile(const std::string& filename, FileLock** lock) = 0; 64 | 65 | virtual Status UnlockFile(FileLock* lock) = 0; 66 | 67 | virtual void Schedule(void (*function)(void *arg), void* arg) = 0; 68 | 69 | virtual void StartThread(void (*function)(void *arg), void* arg) = 0; 70 | 71 | virtual void SleepMicroseconds(int n) = 0; 72 | 73 | virtual Status NewLogger(const std::string& filename, Logger** result) = 0; 74 | }; 75 | 76 | void Log(Logger* logger, const char* format, ...); 77 | 78 | Env* DefaultEnv(); 79 | 80 | } 81 | #endif // STORAGE_XDB_INCLUDE_ENV_H_ -------------------------------------------------------------------------------- /lsmkv/include/option.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_INCLUDE_OPTION_H_ 2 | #define STORAGE_XDB_INCLUDE_OPTION_H_ 3 | 4 | #include "include/env.h" 5 | namespace lsmkv { 6 | 7 | class Comparator; 8 | class Env; 9 | class FilterPolicy; 10 | 11 | enum CompressType { 12 | KUnCompress = 0, 13 | KSnappyCompress = 1 14 | }; 15 | 16 | struct Option { 17 | Option(); 18 | 19 | // the method to compare two user keys. 20 | // default:: by byte wise compare 21 | const Comparator* comparator; 22 | 23 | // when access a record, use this is helpful to check if the 24 | // record is possible in the storage.It is helpful to reduce 25 | // the disk. 26 | const FilterPolicy* filter_policy = nullptr; 27 | 28 | // Compress the sstable use the compression algorithm. 29 | CompressType compress_type = KSnappyCompress; 30 | 31 | // include the methods that interact with the os. 32 | // such as start thread, open file, lock file, etc. 33 | Env* env; 34 | 35 | // the numbers N of key between with two restarts 36 | // restarts : the keys of block are prefix compressed. 37 | // For speed up the random access, every N 38 | // keys is stored a full key. 39 | int block_restart_interval = 16; 40 | 41 | // the approximate size of block(unit of sstable) that 42 | // store the user data. A access to SSTable will return 43 | // a block_size memory. 44 | // default : 4KB 45 | size_t block_size = 4 * 1024; 46 | 47 | // if true, while check the block of sstable is whether complete. 48 | // when one record is break, it may cause all the block is unseenable. 49 | bool check_crc = false; 50 | 51 | // when memtable's size greater than this, 52 | // memtable will be writen to disk as SStable. 53 | // default : 4MB 54 | size_t write_mem_size = 4 * 1024 * 1024; 55 | 56 | // Numbers of open files that can be used by db. 57 | int max_open_file = 1000; 58 | 59 | // the error/progress information will be written to logger 60 | Logger* logger = nullptr; 61 | 62 | // write up to this amount bytes to a file before switch 63 | uint64_t max_file_size = 2 * 1024 * 1024; 64 | }; 65 | 66 | struct WriteOption { 67 | // if true, the log of write will be recorded to os before 68 | // the write is considered over. 69 | bool sync = false; 70 | }; 71 | 72 | struct ReadOption { 73 | // if true, the data are readed will be checked the completeness 74 | // when read from files. 75 | bool check_crc = false; 76 | }; 77 | 78 | } 79 | 80 | #endif // STORAGE_XDB_INCLUDE_OPTION_H_ -------------------------------------------------------------------------------- /lsmkv/util/coding.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_UTIL_CODING_H_ 2 | #define STORAGE_XDB_UTIL_CODING_H_ 3 | 4 | #include 5 | 6 | #include 7 | 8 | // encoding 32bits or 64bits into string. 9 | // varint or fixed 10 | #include 11 | namespace lsmkv { 12 | 13 | size_t VarintLength(uint64_t v); 14 | 15 | void PutVarint32(std::string* dst, uint32_t val); 16 | void PutVarint64(std::string* dst, uint64_t val); 17 | void PutFixed32(std::string* dst, uint32_t val); 18 | void PutFixed64(std::string* dst, uint64_t val); 19 | 20 | bool GetVarint32(std::string_view* input, uint32_t* value); 21 | bool GetVarint64(std::string_view* input, uint64_t* value); 22 | bool GetLengthPrefixedSlice(std::string_view* input, std::string_view* result); 23 | void PutLengthPrefixedSlice(std::string* dst, std::string_view input); 24 | 25 | void EncodeFixed32(char* dst, uint32_t val); 26 | void EncodeFixed64(char* dst, uint64_t val); 27 | char* EncodeVarint32(char* dst, uint32_t val); 28 | char* EncodeVarint64(char* dst, uint64_t val); 29 | 30 | const char* DecodeVarint32(const char* dst, const char* limit, 31 | uint32_t* result); 32 | const char* DecodeVarint64(const char* dst, const char* limit, 33 | uint64_t* result); 34 | uint32_t DecodeFixed32(const char* dst); 35 | uint64_t DecodeFixed64(const char* dst); 36 | 37 | inline void EncodeFixed32(char* dst, uint32_t val) { 38 | uint8_t* buf = reinterpret_cast(dst); 39 | 40 | buf[0] = static_cast(val); 41 | buf[1] = static_cast(val >> 8); 42 | buf[2] = static_cast(val >> 16); 43 | buf[3] = static_cast(val >> 24); 44 | } 45 | 46 | inline void EncodeFixed64(char* dst, uint64_t val) { 47 | uint8_t* buf = reinterpret_cast(dst); 48 | 49 | buf[0] = static_cast(val); 50 | buf[1] = static_cast(val >> 8); 51 | buf[2] = static_cast(val >> 16); 52 | buf[3] = static_cast(val >> 24); 53 | buf[4] = static_cast(val >> 32); 54 | buf[5] = static_cast(val >> 40); 55 | buf[6] = static_cast(val >> 48); 56 | buf[7] = static_cast(val >> 56); 57 | } 58 | 59 | inline void PutFixed32(std::string* dst, uint32_t val) { 60 | char buf[sizeof(val)]; 61 | EncodeFixed32(buf, val); 62 | dst->append(buf, sizeof(buf)); 63 | } 64 | 65 | inline void PutFixed64(std::string* dst, uint64_t val) { 66 | char buf[sizeof(val)]; 67 | EncodeFixed64(buf, val); 68 | dst->append(buf, sizeof(buf)); 69 | } 70 | 71 | static const uint32_t KMaskValue = 0x1af289ae; 72 | 73 | uint32_t CrcMask(uint32_t crc); 74 | 75 | uint32_t CrcUnMask(uint32_t crc); 76 | 77 | } // namespace lsmkv 78 | #endif // STORAGE_XDB_UTIL_CODING_H_ -------------------------------------------------------------------------------- /lsmkv/db/version/version_edit.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_VERSION_VERSION_EDIT_H_ 2 | #define STORAGE_XDB_DB_VERSION_VERSION_EDIT_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "db/format/internal_key.h" 8 | 9 | namespace lsmkv { 10 | class VersionSet; 11 | 12 | struct FileMeta { 13 | FileMeta() : refs(0), file_size(0), allow_seeks(1 << 30) {} 14 | int refs; 15 | uint64_t number; 16 | uint64_t file_size; 17 | InternalKey smallest; 18 | InternalKey largest; 19 | int allow_seeks; 20 | }; 21 | 22 | class VersionEdit { 23 | public: 24 | VersionEdit() 25 | : log_number_(0), 26 | last_sequence_(0), 27 | next_file_number_(0), 28 | has_log_number_(false), 29 | has_last_sequence_(false), 30 | has_next_file_number_(false), 31 | has_comparator_name_(false) { 32 | new_files_.clear(); 33 | delete_files_.clear(); 34 | compaction_pointers_.clear(); 35 | } 36 | 37 | void SetLogNumber(uint64_t log_number) { 38 | has_log_number_ = true; 39 | log_number_ = log_number; 40 | } 41 | void SetLastSequence(SequenceNum last_sequence) { 42 | has_last_sequence_ = true; 43 | last_sequence_ = last_sequence; 44 | } 45 | void SetNextFileNumber(uint64_t next_file_number) { 46 | has_next_file_number_ = true; 47 | next_file_number_ = next_file_number; 48 | } 49 | void SetComparatorName(std::string_view name) { 50 | has_comparator_name_ = true; 51 | comparator_name_ = name; 52 | } 53 | void AddFile(int level, uint64_t number, uint64_t file_size, 54 | const InternalKey& smallest, const InternalKey& largest) { 55 | FileMeta meta; 56 | meta.number = number; 57 | meta.file_size = file_size; 58 | meta.smallest = smallest; 59 | meta.largest = largest; 60 | new_files_.emplace_back(level, meta); 61 | } 62 | void DeleteFile(int level, uint64_t file_number) { 63 | delete_files_.emplace(level, file_number); 64 | } 65 | void SetCompactionPointer(int level, InternalKey key) { 66 | compaction_pointers_.emplace_back(level, key); 67 | } 68 | void EncodeTo(std::string* dst); 69 | 70 | Status DecodeFrom(std::string_view src); 71 | 72 | private: 73 | friend class VersionSet; 74 | using DeleteSet = std::set>; 75 | 76 | std::vector> new_files_; 77 | DeleteSet delete_files_; 78 | std::vector> compaction_pointers_; 79 | uint64_t log_number_; 80 | SequenceNum last_sequence_; 81 | uint64_t next_file_number_; 82 | std::string comparator_name_; 83 | bool has_log_number_; 84 | bool has_last_sequence_; 85 | bool has_next_file_number_; 86 | bool has_comparator_name_; 87 | }; 88 | 89 | }; // namespace lsmkv 90 | 91 | #endif // STORAGE_XDB_DB_VERSION_VERSION_EDIT_H_ -------------------------------------------------------------------------------- /test/coding_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/coding.h" 2 | 3 | #include "gtest/gtest.h" 4 | 5 | namespace lsmkv { 6 | 7 | TEST(CodingTest, Fixed32) { 8 | std::string str; 9 | for (uint32_t i = 0; i < 1000; i++) { 10 | PutFixed32(&str, i); 11 | } 12 | const char* data = str.data(); 13 | for (uint32_t i = 0; i < 1000; i++) { 14 | uint32_t val = DecodeFixed32(data); 15 | ASSERT_EQ(i, val); 16 | data += sizeof(uint32_t); 17 | } 18 | } 19 | 20 | TEST(CodingTest, Fixed64) { 21 | std::string str; 22 | for (uint64_t i = 0; i < 1000; i++) { 23 | PutFixed64(&str, i); 24 | } 25 | const char* data = str.data(); 26 | for (uint64_t i = 0; i < 1000; i++) { 27 | uint64_t val = DecodeFixed64(data); 28 | ASSERT_EQ(i, val); 29 | data += sizeof(uint64_t); 30 | } 31 | } 32 | 33 | TEST(CodingTest, Varint32) { 34 | std::string str; 35 | for (uint32_t i = 0; i < (1 << 16); i++) { 36 | PutVarint32(&str, i); 37 | } 38 | std::string_view s(str); 39 | for (uint32_t i = 0; i < (1 << 16); i++) { 40 | uint32_t val; 41 | bool done = GetVarint32(&s, &val); 42 | ASSERT_EQ(i, val); 43 | ASSERT_EQ(done, true); 44 | } 45 | for (uint32_t i = 0; i < 100; i++) { 46 | uint32_t val; 47 | bool done = GetVarint32(&s, &val); 48 | ASSERT_EQ(done, false); 49 | } 50 | } 51 | 52 | TEST(CodingTest, Varint64) { 53 | std::string str; 54 | for (uint64_t i = 0; i < (1 << 16); i++) { 55 | PutVarint64(&str, i); 56 | } 57 | std::string_view s(str); 58 | for (uint64_t i = 0; i < (1 << 16); i++) { 59 | uint64_t val; 60 | bool done = GetVarint64(&s, &val); 61 | ASSERT_EQ(i, val); 62 | ASSERT_EQ(done, true); 63 | } 64 | for (uint32_t i = 0; i < 100; i++) { 65 | uint64_t val; 66 | bool done = GetVarint64(&s, &val); 67 | ASSERT_EQ(done, false); 68 | } 69 | } 70 | 71 | TEST(CodingTest, Varint32Overflow) { 72 | std::string_view s("\x81\x82\x83\x84\x85"); 73 | uint32_t val; 74 | ASSERT_EQ(DecodeVarint32(s.data(), s.data() + s.size(), &val), nullptr); 75 | } 76 | 77 | TEST(CodingTest, LengthPrefixed) { 78 | std::string str; 79 | PutLengthPrefixedSlice(&str, std::string_view("huangxuan")); 80 | PutLengthPrefixedSlice(&str, std::string_view("love")); 81 | PutLengthPrefixedSlice(&str, std::string_view("xiurui")); 82 | PutLengthPrefixedSlice(&str, std::string_view("qaq")); 83 | 84 | std::string_view sv(str); 85 | std::string_view result; 86 | ASSERT_TRUE(GetLengthPrefixedSlice(&sv, &result)); 87 | ASSERT_EQ("huangxuan", result); 88 | ASSERT_TRUE(GetLengthPrefixedSlice(&sv, &result)); 89 | ASSERT_EQ("love", result); 90 | ASSERT_TRUE(GetLengthPrefixedSlice(&sv, &result)); 91 | ASSERT_EQ("xiurui", result); 92 | ASSERT_TRUE(GetLengthPrefixedSlice(&sv, &result)); 93 | ASSERT_EQ("qaq", result); 94 | ASSERT_FALSE(GetLengthPrefixedSlice(&sv, &result)); 95 | } 96 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/writebatch/writebatch.cc: -------------------------------------------------------------------------------- 1 | #include "db/writebatch/writebatch_helper.h" 2 | 3 | namespace lsmkv { 4 | 5 | void WriteBatchHelper::Append(WriteBatch *dst , const WriteBatch *src) { 6 | SetCount(dst, GetCount(dst) + GetCount(src)); 7 | dst->rep_.append(src->rep_.data() + KHeaderSize, src->rep_.size() - KHeaderSize); 8 | } 9 | void WriteBatch::Clear() { 10 | rep_.clear(); 11 | rep_.resize(KHeaderSize); 12 | } 13 | 14 | void WriteBatchHelper::SetContent(WriteBatch *b, std::string_view content) { 15 | assert(content.size() > KHeaderSize); 16 | b->rep_.assign(content.data(), content.size()); 17 | } 18 | 19 | std::string_view WriteBatchHelper::GetContent(WriteBatch *b) { 20 | return b->rep_; 21 | } 22 | WriteBatch::WriteBatch() { Clear(); } 23 | 24 | void WriteBatch::Put(std::string_view key, std::string_view value) { 25 | WriteBatchHelper::SetCount(this, WriteBatchHelper::GetCount(this) + 1); 26 | rep_.push_back(static_cast(KTypeInsertion)); 27 | PutLengthPrefixedSlice(&rep_,key); 28 | PutLengthPrefixedSlice(&rep_,value); 29 | } 30 | 31 | void WriteBatch::Delete(std::string_view key) { 32 | WriteBatchHelper::SetCount(this, WriteBatchHelper::GetCount(this) + 1); 33 | rep_.push_back(static_cast(KTypeDeletion)); 34 | PutLengthPrefixedSlice(&rep_,key); 35 | } 36 | 37 | Status WriteBatch::Iterate(Handle* handle) const { 38 | std::string_view input(rep_); 39 | if (input.size() < KHeaderSize) { 40 | return Status::Corruption("writebatch is too small ( < KHeaderSize)"); 41 | } 42 | input.remove_prefix(KHeaderSize); 43 | std::string_view key, value; 44 | int count = 0; 45 | 46 | while(!input.empty()) { 47 | count++; 48 | RecordType type = static_cast(input[0]); 49 | input.remove_prefix(1); 50 | switch (type) 51 | { 52 | case KTypeInsertion: 53 | if (GetLengthPrefixedSlice(&input, &key) && 54 | GetLengthPrefixedSlice(&input, &value)) { 55 | handle->Put(key, value); 56 | } else { 57 | return Status::Corruption("writebatch insert record bad"); 58 | } 59 | break; 60 | case KTypeDeletion: 61 | if (GetLengthPrefixedSlice(&input, &key)) { 62 | handle->Delete(key); 63 | } else { 64 | return Status::Corruption("writebatch delete record bad"); 65 | } 66 | break; 67 | } 68 | } 69 | if (count != WriteBatchHelper::GetCount(this)) { 70 | return Status::Corruption("writebatch count err"); 71 | } else { 72 | return Status::OK(); 73 | } 74 | } 75 | 76 | Status WriteBatchHelper::InsertMemTable(const WriteBatch* b, MemTable* mem) { 77 | MemTableInserter inserter; 78 | inserter.seq_ = WriteBatchHelper::GetSequenceNum(b); 79 | inserter.mem_ = mem; 80 | return b->Iterate(&inserter); 81 | } 82 | 83 | } -------------------------------------------------------------------------------- /lsmkv/db/log/log_writer.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "crc32c/crc32c.h" 3 | 4 | #include "db/log/log_writer.h" 5 | #include "util/file.h" 6 | #include "util/coding.h" 7 | #include 8 | 9 | namespace lsmkv { 10 | namespace log { 11 | static void InitTypeCrc(uint32_t* type_crc) { 12 | // Encode the type into the crc. 13 | for (int i = 0; i <= KMaxType; i++) { 14 | char t = static_cast(i); 15 | type_crc[i] = crc32c::Crc32c(&t,1); 16 | } 17 | } 18 | Writer::Writer(WritableFile* dest) 19 | : dest_(dest),block_offset_(0) { 20 | InitTypeCrc(type_crc_); 21 | } 22 | 23 | Writer::Writer(WritableFile* dest, uint64_t dest_length) 24 | :dest_(dest), block_offset_(dest_length % kBlockSize) { 25 | InitTypeCrc(type_crc_); 26 | } 27 | Status Writer::AddRecord(std::string_view sv) { 28 | const char* ptr = sv.data(); 29 | size_t remain = sv.size(); 30 | 31 | Status s; 32 | bool begin = true; 33 | 34 | do { 35 | size_t block_left = kBlockSize - block_offset_; 36 | assert(block_left >= 0); 37 | if (block_left < KLogHeadSize) { 38 | if (block_left > 0) { 39 | dest_->Append(std::string_view("\x00\x00\x00\x00\x00\x00\x00", block_left)); 40 | } 41 | block_offset_ = 0; 42 | } 43 | size_t fragment_avail = kBlockSize - block_offset_ - KLogHeadSize; 44 | size_t fragment_length = (remain < fragment_avail) ? remain : fragment_avail; 45 | bool end = (remain == fragment_length); 46 | 47 | LogRecodeType type; 48 | if (begin && end) { 49 | type = KFullType; 50 | } else if(begin) { 51 | type = KFirstType; 52 | } else if (end) { 53 | type = KLastType; 54 | } else { 55 | type = KMiddleType; 56 | } 57 | 58 | s = WritePhysicalRecord(type, ptr, fragment_length); 59 | remain -= fragment_length; 60 | ptr += fragment_length; 61 | begin = false; 62 | } while(s.ok() && remain > 0); 63 | return s; 64 | } 65 | Status Writer::WritePhysicalRecord(LogRecodeType type, const char* p, size_t len) { 66 | char header[KLogHeadSize]; 67 | uint32_t crc = crc32c::Extend(type_crc_[type], reinterpret_cast(p), len); 68 | crc = CrcMask(crc); 69 | EncodeFixed32(header, crc); 70 | 71 | header[4] = static_cast(len & 0xff); 72 | header[5] = static_cast(len >> 8); 73 | header[6] = static_cast(type); 74 | 75 | Status s = dest_->Append(std::string_view(header, KLogHeadSize)); 76 | if (s.ok()) { 77 | s = dest_->Append(std::string_view(p, len)); 78 | if(s.ok()) { 79 | s = dest_->Flush(); 80 | } 81 | } 82 | block_offset_ += KLogHeadSize + len; 83 | return s; 84 | } 85 | } 86 | 87 | } -------------------------------------------------------------------------------- /lsmkv/db/sstable/table_cache.cc: -------------------------------------------------------------------------------- 1 | #include "db/sstable/table_cache.h" 2 | 3 | #include 4 | 5 | #include "include/cache.h" 6 | #include "include/env.h" 7 | #include "include/iterator.h" 8 | #include "include/sstable_reader.h" 9 | #include "util/coding.h" 10 | #include "util/filename.h" 11 | namespace lsmkv { 12 | 13 | struct TableAndFile { 14 | SSTableReader* table; 15 | RandomReadFile* file; 16 | }; 17 | static void DeleteEntry(std::string_view key, void* value) { 18 | TableAndFile* tf = reinterpret_cast(value); 19 | delete tf->table; 20 | delete tf->file; 21 | delete tf; 22 | } 23 | 24 | static void UnrefEntry(void* arg1, void* arg2) { 25 | Cache* cache = reinterpret_cast(arg1); 26 | Cache::Handle* handle = reinterpret_cast(arg2); 27 | cache->Release(handle); 28 | } 29 | Status TableCache::Get(const ReadOption& option, uint64_t file_number, 30 | uint64_t file_size, std::string_view key, void* arg, 31 | void (*handle_result)(void*, std::string_view, 32 | std::string_view)) { 33 | Cache::Handle* handle = nullptr; 34 | Status s = FindTable(file_number, file_size, &handle); 35 | if (s.ok()) { 36 | SSTableReader* table = 37 | reinterpret_cast(cache_->Value(handle))->table; 38 | s = table->InternalGet(option, key, arg, handle_result); 39 | cache_->Release(handle); 40 | } 41 | return s; 42 | } 43 | 44 | void TableCache::Evict(uint64_t file_number) { 45 | char buf[sizeof(file_number)]; 46 | EncodeFixed64(buf, file_number); 47 | cache_->Erase(std::string_view(buf, sizeof(buf))); 48 | } 49 | 50 | Status TableCache::FindTable(uint64_t file_number, uint64_t file_size, 51 | Cache::Handle** handle) { 52 | Status s; 53 | char buf[sizeof(file_number)]; 54 | EncodeFixed64(buf, file_number); 55 | std::string_view key(buf, sizeof(buf)); 56 | *handle = cache_->Lookup(key); 57 | if (*handle == nullptr) { 58 | std::string filename = SSTableFileName(name_, file_number); 59 | RandomReadFile* file = nullptr; 60 | s = env_->NewRamdomReadFile(filename, &file); 61 | SSTableReader* table = nullptr; 62 | if (s.ok()) { 63 | s = SSTableReader::Open(option_, file, file_size, &table); 64 | } 65 | if (!s.ok()) { 66 | delete file; 67 | } else { 68 | TableAndFile* tf = new TableAndFile; 69 | tf->table = table; 70 | tf->file = file; 71 | *handle = cache_->Insert(key, tf, 1, &DeleteEntry); 72 | } 73 | } 74 | return s; 75 | } 76 | 77 | Iterator* TableCache::NewIterator(const ReadOption& option, 78 | uint64_t file_number, uint64_t file_size) { 79 | Cache::Handle* handle = nullptr; 80 | Status s = FindTable(file_number, file_size, &handle); 81 | if (!s.ok()) { 82 | return NewErrorIterator(s); 83 | } 84 | SSTableReader* table = 85 | reinterpret_cast(cache_->Value(handle))->table; 86 | Iterator* iter = table->NewIterator(option); 87 | iter->AppendCleanup(&UnrefEntry, cache_, handle); 88 | return iter; 89 | } 90 | 91 | } // namespace lsmkv 92 | -------------------------------------------------------------------------------- /lsmkv/db/filter/filter_block.cc: -------------------------------------------------------------------------------- 1 | #include "db/filter/filter_block.h" 2 | 3 | #include 4 | 5 | #include "util/coding.h" 6 | namespace lsmkv { 7 | 8 | // every FilterBlock has 2KB of data 9 | static const size_t KFilterBlockSizeLg = 11; 10 | 11 | static const size_t KFilterBlockSize = (1 << KFilterBlockSizeLg); 12 | 13 | FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) 14 | : policy_(policy) {} 15 | void FilterBlockBuilder::StartBlock(uint64_t block_offset) { 16 | size_t filter_index = block_offset / KFilterBlockSize; 17 | assert(filter_index >= filter_offsets_.size()); 18 | while (filter_index > filter_offsets_.size()) { 19 | GenerateFilter(); 20 | } 21 | } 22 | 23 | void FilterBlockBuilder::AddKey(std::string_view key) { 24 | key_starts_.push_back(key_buffer_.size()); 25 | key_buffer_.append(key.data(), key.size()); 26 | } 27 | 28 | std::string_view FilterBlockBuilder::Finish() { 29 | if (!key_starts_.empty()) { 30 | GenerateFilter(); 31 | } 32 | const uint32_t filter_offsets_start = result_.size(); 33 | for (size_t i = 0; i < filter_offsets_.size(); i++) { 34 | PutFixed32(&result_, filter_offsets_[i]); 35 | } 36 | PutFixed32(&result_, filter_offsets_start); 37 | result_.push_back(static_cast(KFilterBlockSizeLg)); 38 | return std::string_view(result_); 39 | } 40 | 41 | void FilterBlockBuilder::GenerateFilter() { 42 | size_t key_num = key_starts_.size(); 43 | if (key_num == 0) { 44 | filter_offsets_.push_back(result_.size()); 45 | return; 46 | } 47 | key_starts_.push_back(key_buffer_.size()); 48 | tmp_keys_.resize(key_num); 49 | for (size_t i = 0; i < key_num; i++) { 50 | const char* p = key_buffer_.data() + key_starts_[i]; 51 | const size_t len = key_starts_[i + 1] - key_starts_[i]; 52 | tmp_keys_[i] = std::string_view(p, len); 53 | } 54 | filter_offsets_.push_back(result_.size()); 55 | policy_->CreatFilter(&tmp_keys_[0], static_cast(key_num), &result_); 56 | 57 | tmp_keys_.clear(); 58 | key_starts_.clear(); 59 | key_buffer_.clear(); 60 | } 61 | 62 | FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, 63 | std::string_view contents) 64 | : policy_(policy), 65 | data_(nullptr), 66 | filter_offsets_start_(nullptr), 67 | filter_offsets_num_(0), 68 | filter_block_size_length_(0) { 69 | size_t n = contents.size(); 70 | if (n < 5) return; 71 | filter_block_size_length_ = contents[n - 1]; 72 | uint32_t filter_offsets_start = DecodeFixed32(contents.data() + n - 5); 73 | if (filter_offsets_start > n - 5) return; 74 | data_ = contents.data(); 75 | filter_offsets_start_ = data_ + filter_offsets_start; 76 | filter_offsets_num_ = (n - 5 - filter_offsets_start) / 4; 77 | } 78 | 79 | bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, 80 | std::string_view key) { 81 | size_t index = block_offset >> filter_block_size_length_; 82 | if (index < filter_offsets_num_) { 83 | uint32_t start = DecodeFixed32(filter_offsets_start_ + index * 4); 84 | uint32_t limit = DecodeFixed32(filter_offsets_start_ + index * 4 + 4); 85 | if (start < limit && 86 | limit <= static_cast(filter_offsets_start_ - data_)) { 87 | std::string_view filter(data_ + start, limit - start); 88 | return policy_->KeyMayMatch(key, filter); 89 | } else if (start == limit) { 90 | return false; 91 | } 92 | } 93 | return true; 94 | } 95 | 96 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/dbimpl.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_DBIMPL_H_ 2 | #define STORAGE_XDB_DB_DBIMPL_H_ 3 | 4 | #include 5 | 6 | #include "db/log/log_writer.h" 7 | #include "db/memtable/memtable.h" 8 | #include "db/sstable/table_cache.h" 9 | #include "db/version/version.h" 10 | #include "include/db.h" 11 | #include "include/env.h" 12 | 13 | namespace lsmkv { 14 | 15 | class DBImpl : public DB { 16 | public: 17 | DBImpl(const Option& option, const std::string& name); 18 | 19 | ~DBImpl() override; 20 | 21 | Status Get(const ReadOption& option, std::string_view key, 22 | std::string* value) override; 23 | 24 | Status Put(const WriteOption& option, std::string_view key, 25 | std::string_view value) override; 26 | 27 | Status Delete(const WriteOption& option, std::string_view key) override; 28 | 29 | Status Write(const WriteOption& option, WriteBatch* batch) override; 30 | 31 | private: 32 | friend class DB; 33 | struct Writer; 34 | struct CompactionState; 35 | 36 | WriteBatch* MergeBatchGroup(Writer** last_writer); 37 | 38 | Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mu_); 39 | 40 | Status Initialize(); 41 | 42 | Status MakeRoomForWrite() EXCLUSIVE_LOCKS_REQUIRED(mu_); 43 | 44 | Status RecoverLogFile(uint64_t number, SequenceNum* max_sequence, 45 | VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mu_); 46 | 47 | Status WriteLevel0SSTable(MemTable* mem, VersionEdit* edit) 48 | EXCLUSIVE_LOCKS_REQUIRED(mu_); 49 | 50 | void MayScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mu_); 51 | 52 | static void CompactionSchedule(void* db); 53 | 54 | void BackgroundCompactionCall(); 55 | 56 | void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mu_); 57 | 58 | void CompactionMemtable() EXCLUSIVE_LOCKS_REQUIRED(mu_); 59 | 60 | Status DoCompactionLevel(CompactionState* state) 61 | EXCLUSIVE_LOCKS_REQUIRED(mu_); 62 | 63 | Status LogCompactionResult(CompactionState* state) 64 | EXCLUSIVE_LOCKS_REQUIRED(mu_); 65 | 66 | void RecordBackgroundError(Status s) EXCLUSIVE_LOCKS_REQUIRED(mu_); 67 | 68 | void GarbageFilesClean() EXCLUSIVE_LOCKS_REQUIRED(mu_); 69 | 70 | Status FinishCompactionSSTable(CompactionState* state, Iterator* input); 71 | 72 | Status OpenCompactionSSTable(CompactionState* state); 73 | 74 | void CleanCompaction(CompactionState* state) EXCLUSIVE_LOCKS_REQUIRED(mu_); 75 | 76 | const std::string name_; 77 | const InternalKeyComparator internal_comparator_; 78 | const InteralKeyFilterPolicy internal_policy_; 79 | const Option option_; 80 | 81 | FileLock* file_lock_; 82 | Env* env_; 83 | // in memory cache and its write-ahead logger 84 | MemTable* mem_; 85 | MemTable* imm_; 86 | log::Writer* log_; 87 | WritableFile* logfile_; 88 | uint64_t logfile_number_ GUARDED_BY(mu_); 89 | SequenceNum last_seq_; 90 | Mutex mu_; 91 | 92 | CondVar background_cv_; 93 | Status background_status_ GUARDED_BY(mu_); 94 | bool background_scheduled_ GUARDED_BY(mu_); 95 | std::atomic closed_; 96 | std::atomic has_imm_; 97 | 98 | std::set files_writing_ GUARDED_BY(mu_); 99 | 100 | TableCache* table_cache_; 101 | VersionSet* vset_; 102 | WriteBatch* tmp_batch_ GUARDED_BY(mu_); 103 | std::deque writers_ GUARDED_BY(mu_); 104 | }; 105 | 106 | Option AdaptOption(const std::string& name, const InternalKeyComparator* icmp, 107 | const InteralKeyFilterPolicy* ipolicy, const Option& option); 108 | } // namespace lsmkv 109 | 110 | #endif // STORAGE_XDB_DB_DBIMPL_H_ -------------------------------------------------------------------------------- /lsmkv/db/log/log_reader.cc: -------------------------------------------------------------------------------- 1 | #include "db/log/log_reader.h" 2 | 3 | #include 4 | 5 | #include "crc32c/crc32c.h" 6 | #include "util/coding.h" 7 | #include "util/file.h" 8 | 9 | namespace lsmkv { 10 | namespace log { 11 | 12 | Reader::Reader(SequentialFile* src, bool checksum, uint64_t initial_offset) 13 | : src_(src), 14 | checksum_(checksum), 15 | initial_offset_(initial_offset), 16 | buffer_mem_(new char[kBlockSize]), 17 | eof_(false), 18 | last_record_offset_(0), 19 | buffer_end_offset_(0) {} 20 | Reader::~Reader() { delete[] buffer_mem_; } 21 | bool Reader::ReadRecord(std::string_view* record, std::string* buffer) { 22 | *record = ""; 23 | buffer->clear(); 24 | 25 | uint64_t first_fragment_offset; 26 | std::string_view fragment; 27 | while (true) { 28 | const unsigned int type = ReadPhysicalRecord(&fragment); 29 | uint64_t fragment_offset = 30 | buffer_end_offset_ - buffer_.size() - KLogHeadSize - fragment.size(); 31 | switch (type) { 32 | case KFullType: 33 | buffer->clear(); 34 | *record = fragment; 35 | last_record_offset_ = fragment_offset; 36 | return true; 37 | case KFirstType: 38 | first_fragment_offset = fragment_offset; 39 | buffer->assign(fragment.data(), fragment.size()); 40 | break; 41 | case KMiddleType: 42 | buffer->append(fragment.data(), fragment.size()); 43 | break; 44 | case KLastType: 45 | buffer->append(fragment.data(), fragment.size()); 46 | *record = std::string_view(*buffer); 47 | last_record_offset_ = first_fragment_offset; 48 | return true; 49 | case KBadRecord: 50 | // ignore the bad record. 51 | break; 52 | case KEof: 53 | return false; 54 | } 55 | } 56 | return false; 57 | } 58 | 59 | unsigned int Reader::ReadPhysicalRecord(std::string_view* fragments) { 60 | while (true) { 61 | if (buffer_.size() < KLogHeadSize) { 62 | if (eof_) { 63 | buffer_ = ""; 64 | return KEof; 65 | } else { 66 | buffer_ = ""; 67 | Status s = src_->Read(kBlockSize, &buffer_, buffer_mem_); 68 | if (!s.ok()) { 69 | buffer_ = ""; 70 | eof_ = true; 71 | return KEof; 72 | } 73 | if (buffer_.size() < kBlockSize) { 74 | eof_ = true; 75 | } 76 | continue; 77 | } 78 | } 79 | 80 | const char* header = buffer_.data(); 81 | const uint32_t length_lo = static_cast(header[4]) & 0xff; 82 | const uint32_t length_hi = static_cast(header[5]) & 0xff; 83 | const unsigned int type = static_cast(header[6]); 84 | const uint32_t length = length_lo | (length_hi << 8); 85 | 86 | if (KLogHeadSize + length > buffer_.size()) { 87 | buffer_ = ""; 88 | if (!eof_) { 89 | return KBadRecord; 90 | } 91 | return KEof; 92 | } 93 | 94 | if (checksum_) { 95 | uint32_t record_crc = CrcUnMask(DecodeFixed32(header)); 96 | uint32_t expect_crc = crc32c::Crc32c(header + 6, length + 1); 97 | if (record_crc != expect_crc) { 98 | buffer_ = ""; 99 | return KBadRecord; 100 | } 101 | } 102 | buffer_.remove_prefix(KLogHeadSize + length); 103 | // read the record befor initial, just ignore this record 104 | if (buffer_end_offset_ - buffer_.size() - KLogHeadSize - length < 105 | initial_offset_) { 106 | *fragments = ""; 107 | return KBadRecord; 108 | } 109 | *fragments = std::string_view(header + KLogHeadSize, length); 110 | return type; 111 | } 112 | } 113 | 114 | }; // namespace log 115 | 116 | } // namespace lsmkv 117 | -------------------------------------------------------------------------------- /lsmkv/db/format/internal_key.cc: -------------------------------------------------------------------------------- 1 | #include "db/format/internal_key.h" 2 | 3 | #include 4 | namespace lsmkv { 5 | 6 | void AppendInternalKey(std::string* dst, const ParsedInternalKey& key) { 7 | dst->append(key.user_key_.data(), key.user_key_.size()); 8 | ; 9 | PutFixed64(dst, PackSequenceAndType(key.seq_, key.type_)); 10 | } 11 | 12 | bool ParseInternalKey(std::string_view internal_key, 13 | ParsedInternalKey* result) { 14 | const size_t n = internal_key.size(); 15 | if (n < 8) { 16 | return false; 17 | } 18 | uint64_t num = DecodeFixed64(internal_key.data() + n - 8); 19 | uint8_t type = num & 0xff; 20 | result->seq_ = num >> 8; 21 | result->type_ = static_cast(type); 22 | result->user_key_ = std::string_view(internal_key.data(), n - 8); 23 | return (type <= static_cast(KTypeInsertion)); 24 | } 25 | 26 | LookupKey::LookupKey(std::string_view user_key, SequenceNum seq) { 27 | size_t key_size = user_key.size(); 28 | size_t need = key_size + 13; 29 | if (need < sizeof(buf_)) { 30 | start_ = buf_; 31 | } else { 32 | start_ = new char[need]; 33 | } 34 | kstart_ = EncodeVarint32(start_, key_size + 8); 35 | std::memcpy(kstart_, user_key.data(), key_size); 36 | EncodeFixed64(kstart_ + key_size, (seq << 8) | KTypeLookup); 37 | end_ = kstart_ + key_size + 8; 38 | } 39 | 40 | LookupKey::~LookupKey() { 41 | if (start_ != buf_) { 42 | delete[] start_; 43 | } 44 | } 45 | 46 | int InternalKeyComparator::Compare(std::string_view a, 47 | std::string_view b) const { 48 | int r = user_cmp_->Compare(ExtractUserKey(a), ExtractUserKey(b)); 49 | if (r == 0) { 50 | // num is Sequence | RecodeType 51 | const uint64_t anum = DecodeFixed64(a.data() + a.size() - 8); 52 | const uint64_t bnum = DecodeFixed64(b.data() + b.size() - 8); 53 | if (anum > bnum) { 54 | r = -1; 55 | } else if (anum < bnum) { 56 | r = +1; 57 | } 58 | } 59 | return r; 60 | } 61 | 62 | void InternalKeyComparator::FindShortestMiddle(std::string* start, 63 | std::string_view limit) const { 64 | std::string_view start_user = ExtractUserKey(*start); 65 | std::string_view limit_user = ExtractUserKey(limit); 66 | std::string tmp(start_user.data(), start_user.size()); 67 | user_cmp_->FindShortestMiddle(&tmp, limit_user); 68 | if (tmp.size() < start_user.size() && 69 | user_cmp_->Compare(start_user, tmp) < 0) { 70 | PutFixed64(&tmp, PackSequenceAndType(KMaxSequenceNum, KTypeLookup)); 71 | start->swap(tmp); 72 | } 73 | } 74 | 75 | void InternalKeyComparator::FindShortestBigger(std::string* start) const { 76 | std::string_view start_user = ExtractUserKey(*start); 77 | std::string tmp(start->data(), start->size()); 78 | user_cmp_->FindShortestBigger(&tmp); 79 | if (tmp.size() < start_user.size() && 80 | user_cmp_->Compare(start_user, tmp) < 0) { 81 | PutFixed64(&tmp, PackSequenceAndType(KMaxSequenceNum, KTypeLookup)); 82 | start->swap(tmp); 83 | } 84 | } 85 | 86 | const char* InteralKeyFilterPolicy::Name() const { 87 | return user_policy_->Name(); 88 | } 89 | 90 | void InteralKeyFilterPolicy::CreatFilter(std::string_view* keys, int n, 91 | std::string* dst) const { 92 | std::string_view* user_keys = const_cast(keys); 93 | for (int i = 0; i < n; i++) { 94 | user_keys[i] = ExtractUserKey(keys[i]); 95 | } 96 | user_policy_->CreatFilter(user_keys, n, dst); 97 | } 98 | 99 | bool InteralKeyFilterPolicy::KeyMayMatch(std::string_view key, 100 | std::string_view filter) const { 101 | return user_policy_->KeyMayMatch(ExtractUserKey(key), filter); 102 | } 103 | 104 | } // namespace lsmkv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | - 系统环境:2023 x86_64 GNU/Linux 6.1.41-1-MANJARO 2 | - 编码风格:Google C++ Style 3 | - C++标准:C++20 4 | - 编译器:GCC13.1.0 5 | 6 | ```shell 7 | # 依赖安装 8 | sudo pacman -S snappy 9 | sudo pacman -S googletest 10 | sudo pacman -S benchmark 11 | yay -S google-crc32c 12 | ``` 13 | 14 | ```cpp 15 | # 编译、测试 16 | chmod +x clean.sh 17 | ./clean.sh 18 | chmod +x build.sh 19 | ./build.sh 20 | chmod +x test.sh 21 | ./test.sh 22 | ``` 23 | 24 | 使用该项目示例见test/example_test.cc文件。 25 | 26 | ## 简介 27 | 28 | 项目架构图: 29 | 30 | ![项目架构图](./pic/system.png) 31 | 32 | 33 | 34 | - 内存部分:实现内存分配器Arena类和无锁跳表SkipList类,并以跳表为底层存储结构实现内存表MemTable类。 35 | - 磁盘部分:预写日志、SST文件格式的设计和SST文件compaction实现,并通过MVCC多版本控制解决读写冲突的问题。 36 | - 优化部分:通过Bloom Filter和缓存模块等手段提高读操作效率,同时将Bloom Filter和索引区等SST文件元信息储存在内存TableCache中,减少磁盘IO次数。 37 | 38 | 39 | 40 | ## 内容 41 | 42 | db/include/status.h中的Status类用于表示命令执行结果状态,其中数据成员state_结构: 43 | 44 | ![status](./pic/status.png) 45 | 46 | 47 | 48 | ### db/format 49 | 50 | 项目中关于key的概念: 51 | 52 | - user key 53 | - internal key 54 | - lookup key 55 | 56 | 57 | 58 | ![](./pic/key.png) 59 | 60 | ### db/log 61 | 62 | 预写日志是为了保证内存中数据的完整性,防止机器宕机等情况导致内存中MemTable的数据丢失。 63 | 64 | WAL(write ahead log)格式: 65 | 66 | ![wal](./pic/wal.png) 67 | 68 | 69 | 70 | ### db/format 71 | 72 | ### db/filter 73 | 74 | 采用布隆过滤器加速读操作,主要是对于一些不存在的key可及时返回 75 | 76 | ![bloom](./pic/bloom.png) 77 | 78 | 通过Bloom Filter和二分查找提高搜索效率,同时将Bloom Filter和索引部分储存在内存TableCache中,减少磁盘IO 79 | 80 | ### db/memtable 81 | 82 | 内存分配器Arena底层结构: 83 | 84 | ![arena实现](./pic/arena实现.png) 85 | 86 | 向Arena申请空间流程: 87 | 88 | ![arena流程](./pic/arena流程.png) 89 | 90 | MemTable的底层实现: 91 | 92 | ![](./pic/SkipListPic.png) 93 | 94 | 内存中保存两个跳表,一个用于写数据的MemTable,一个只读的Immutable MemTable,防止当Memtable写满时造成的写阻塞。 95 | 96 | 后台开启新线程执行Immutable MemTable的MinorCompaction,不影响主线程对MemTable的操作。 97 | 98 | ### db/sst 99 | 100 | 磁盘中的SST文件由多个`{block, type, crc}`组成(最后还有48字节的`footer`): 101 | 102 | ![](./pic/sst1.png) 103 | 104 | 其中`{block, type, crc}`中的`block`可以是`data block`、`meta block`、`meta index block`、`index block`: 105 | 106 | ![](./pic/sst2.png) 107 | 108 | 其中`data block`格式: 109 | 110 | ![](./pic/sst3.png) 111 | 112 | ### db/version 113 | 114 | `Version`用于表示某次 compaction 或者打开/恢复的数据库状态。 115 | 116 | 针对共享的资源,这里通过MVCC进行处理。MVCC,MVCC 是一个数据库常用的概念。Multiversion concurrency control 多版本并发控制。每一个执行操作的用户,看到的都是数据库特定时刻的的快照 (snapshot), writer 的任何未完成的修改都不会被其他的用户所看到;当对数据进行更新的时候并是不直接覆盖,而是先进行标记,然后在其他地方添加新的数据(这些变更存储在versionedit),从而形成一个新版本,此时再来读取的 reader 看到的就是最新的版本了。所以这种处理策略是维护了多个版本的数据的,但只有一个是最新的(versionset中维护着全局最新的seqnum)。 117 | 118 | ![](./pic/version.png) 119 | 120 | 121 | ## 存储引擎接口流程 122 | 123 | ### 接口Get逻辑 124 | 125 | ``Status Get(const ReadOption& option, std::string_view key, 126 | std::string* value) override;`` 127 | 128 | 1. 内存中:查询cache。如果查到直接返回 129 | 2. 内存中:查询MemTable。如果查到,且valuetype为kDelSign,则表示该key已删除,返回"",否则直接返回val。 130 | 3. 内存中:查询Immutable MemTable,逻辑同1。 131 | 4. 磁盘上:查询TableCache(对应代码`Version::Get`),按L0层到Ln层、每层从SSTable0开始查找,如果找到直接返回,否则继续查找下一个SST文件。对于每个SST文件: 132 | 1. 判断key是否在该SST文件的键值范围[min_key\_, max\_key_]内,如果不是,则进入下一个SST文件的查找 133 | 2. 布隆过滤器查找该key,如果不存在,则进入下一个SST文件的查找 134 | 3. 进入该SST文件的索引区,拿到key和key的下一个键的偏移量offset。读取该SST文件取出key对应的val 135 | 136 | 137 | ### 接口Put逻辑 138 | 139 | ``Status Put(const WriteOption& option, std::string_view key, 140 | std::string_view value);`` 141 | 142 | 查询key是否已在MemTable,记录使用的内存容量,检测此次put操作是否会超过设定的容量阀值。 143 | 1. 如果不会,直接调用跳表的put接口 144 | 2. 如果会,将该MemTable转为Immutable MemTable,然后新生成一个Memtable,插入该键值对。 145 | 后台启动一个线程,负责将内存中的Immutable MemTable经过minor compaction到磁盘中L0层。 146 | 然后检测L0层的SST文件是否超过了2: 147 | 1. 如果超过,则进行L0层 -> L1层的major compaction。 148 | 2. 如果没有,不进行compaction操作。 149 | 150 | ### 接口Del逻辑 151 | 152 | ``Status Delete(const WriteOption& option, std::string_view key)`` 153 | 154 | del接口调用Put接口,由于LSM树异位更新的特性,所有删除操作仅是为key打上一个删除标记(设置Internal Key中的value type为del类型)。对于该元素的实际删除会在后面的SST文件压缩(Compaction)操作中进行。 155 | 156 | -------------------------------------------------------------------------------- /lsmkv/db/format/internal_key.h: -------------------------------------------------------------------------------- 1 | #ifndef INTERNAL_KEY_H 2 | #define INTERNAL_KEY_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "include/comparator.h" 9 | #include "include/filter_policy.h" 10 | #include "include/status.h" 11 | #include "util/coding.h" 12 | 13 | namespace lsmkv { 14 | 15 | enum RecordType { 16 | KTypeDeletion = 0x0, 17 | KTypeInsertion = 0x1, 18 | KTypeLookup = 0x1 19 | }; 20 | 21 | typedef uint64_t SequenceNum; 22 | 23 | static const SequenceNum KMaxSequenceNum = (0x1ull << 56) - 1; 24 | static uint64_t PackSequenceAndType(SequenceNum seq, RecordType type) { 25 | return (seq << 8) | type; 26 | } 27 | 28 | inline std::string_view ExtractUserKey(std::string_view internal_key) { 29 | assert(internal_key.size() >= 8); 30 | return std::string_view(internal_key.data(), internal_key.size() - 8); 31 | } 32 | 33 | class ParsedInternalKey { 34 | public: 35 | ParsedInternalKey() {} 36 | ParsedInternalKey(SequenceNum seq, std::string_view key, RecordType type) 37 | : user_key_(key), seq_(seq), type_(type) {} 38 | 39 | std::string_view user_key_; 40 | SequenceNum seq_; 41 | RecordType type_; 42 | }; 43 | 44 | void AppendInternalKey(std::string* dst, const ParsedInternalKey& key); 45 | 46 | bool ParseInternalKey(std::string_view internal_key, ParsedInternalKey* result); 47 | 48 | // Key | Sequence 56bits | Type 8bits 49 | class InternalKey { 50 | public: 51 | InternalKey() {} 52 | InternalKey(SequenceNum seq, std::string_view key, RecordType type) { 53 | AppendInternalKey(&rep_, ParsedInternalKey(seq, key, type)); 54 | } 55 | ~InternalKey() = default; 56 | 57 | std::string_view user_key() const { return ExtractUserKey(rep_); } 58 | 59 | std::string_view Encode() const { return rep_; } 60 | 61 | void DecodeFrom(std::string_view s) { rep_.assign(s.data(), s.size()); } 62 | 63 | void SetFrom(const ParsedInternalKey& key) { 64 | rep_.clear(); 65 | AppendInternalKey(&rep_, key); 66 | } 67 | 68 | void Clear() { rep_.clear(); } 69 | 70 | private: 71 | std::string rep_; 72 | }; 73 | 74 | class InternalKeyComparator : public Comparator { 75 | public: 76 | explicit InternalKeyComparator(const Comparator* c) : user_cmp_(c) {} 77 | 78 | const char* Name() const override { return "lsmkv.InternalKeyComparator"; } 79 | 80 | int Compare(std::string_view a, std::string_view b) const override; 81 | 82 | const Comparator* UserComparator() const { return user_cmp_; } 83 | 84 | int Compare(const InternalKey& a, const InternalKey& b) const { 85 | return Compare(a.Encode(), b.Encode()); 86 | } 87 | 88 | void FindShortestMiddle(std::string* start, 89 | std::string_view limit) const override; 90 | 91 | void FindShortestBigger(std::string* start) const override; 92 | 93 | private: 94 | const Comparator* user_cmp_; 95 | }; 96 | 97 | class InteralKeyFilterPolicy : public FilterPolicy { 98 | public: 99 | InteralKeyFilterPolicy(const FilterPolicy* policy) : user_policy_(policy) {} 100 | const char* Name() const; 101 | void CreatFilter(std::string_view* keys, int n, std::string* dst) const; 102 | bool KeyMayMatch(std::string_view key, std::string_view filter) const; 103 | 104 | private: 105 | const FilterPolicy* user_policy_; 106 | }; 107 | 108 | class LookupKey { 109 | public: 110 | LookupKey(std::string_view user_key, SequenceNum seq); 111 | 112 | ~LookupKey(); 113 | 114 | LookupKey(const LookupKey&) = delete; 115 | LookupKey& operator=(const LookupKey&) = delete; 116 | 117 | std::string_view UserKey() const { 118 | return std::string_view(kstart_, end_ - kstart_ - 8); 119 | } 120 | 121 | std::string_view InternalKey() const { 122 | return std::string_view(kstart_, end_ - kstart_); 123 | } 124 | 125 | std::string_view FullKey() const { 126 | return std::string_view(start_, end_ - start_); 127 | } 128 | 129 | private: 130 | char* kstart_; 131 | char* end_; 132 | char* start_; 133 | char buf_[200]; 134 | }; 135 | 136 | } // namespace lsmkv 137 | 138 | #endif // INTERNAL_KEY_H -------------------------------------------------------------------------------- /lsmkv/db/memtable/memtable.cc: -------------------------------------------------------------------------------- 1 | #include "db/memtable/memtable.h" 2 | 3 | #include 4 | 5 | #include "include/iterator.h" 6 | #include "util/coding.h" 7 | namespace lsmkv { 8 | 9 | static std::string_view DecodeLengthPrefixedSlice(const char* p) { 10 | uint32_t len; 11 | const char* q = DecodeVarint32(p, p + 5, &len); 12 | return std::string_view(q, len); 13 | } 14 | 15 | static const char* MakeKey(std::string* buf, std::string_view s) { 16 | buf->clear(); 17 | PutVarint32(buf, s.size()); 18 | buf->append(s.data(), s.size()); 19 | return buf->data(); 20 | } 21 | 22 | int MemTable::KeyComparator::operator()(const char* a, const char* b) const { 23 | std::string_view as = DecodeLengthPrefixedSlice(a); 24 | std::string_view bs = DecodeLengthPrefixedSlice(b); 25 | return comparator.Compare(as, bs); 26 | } 27 | 28 | MemTable::MemTable(const InternalKeyComparator& cmp) 29 | : comparator_(cmp), table_(comparator_, &arena_), refs_(0) {} 30 | 31 | // Format : 32 | // Varint32 : key size + 8. 33 | // char[key size] : key 34 | // seq and type : Sequence | RecodeType 35 | // Varint32 : value size. 36 | // char[value size] : value 37 | void MemTable::Put(SequenceNum seq, RecordType type, std::string_view key, 38 | std::string_view value) { 39 | uint64_t seq_and_type = PackSequenceAndType(seq, type); 40 | size_t key_size = key.size(); 41 | size_t val_size = value.size(); 42 | size_t internal_key_size = key_size + 8; 43 | size_t total_size = VarintLength(internal_key_size) + internal_key_size + 44 | VarintLength(val_size) + val_size; 45 | char* buf = arena_.Allocate(total_size); 46 | char* p = EncodeVarint32(buf, internal_key_size); 47 | std::memcpy(p, key.data(), key_size); 48 | p += key_size; 49 | EncodeFixed64(p, seq_and_type); 50 | p += 8; 51 | p = EncodeVarint32(p, val_size); 52 | std::memcpy(p, value.data(), val_size); 53 | table_.Insert(buf); 54 | } 55 | 56 | bool MemTable::Get(const LookupKey& key, std::string* result, Status* status) { 57 | Table::Iterator iter(&table_); 58 | std::string_view full_key = key.FullKey(); 59 | iter.Seek(full_key.data()); 60 | 61 | if (iter.Valid()) { 62 | const char* record = iter.key(); 63 | uint32_t key_size; 64 | const char* key_ptr = DecodeVarint32(record, record + 5, &key_size); 65 | if (comparator_.comparator.UserComparator()->Compare( 66 | key.UserKey(), std::string_view(key_ptr, key_size - 8)) == 0) { 67 | uint64_t seq_and_type = DecodeFixed64(key_ptr + key_size - 8); 68 | switch (static_cast(seq_and_type & 0xff)) { 69 | case KTypeInsertion: { 70 | std::string_view val = DecodeLengthPrefixedSlice(key_ptr + key_size); 71 | result->assign(val.data(), val.size()); 72 | return true; 73 | } 74 | case KTypeDeletion: 75 | *status = Status::NotFound(std::string_view()); 76 | return true; 77 | } 78 | } 79 | } 80 | return false; 81 | } 82 | 83 | class MemTableIterator : public Iterator { 84 | public: 85 | explicit MemTableIterator(MemTable::Table* table) : iter_(table) {} 86 | 87 | MemTableIterator(const MemTableIterator&) = delete; 88 | MemTableIterator& operator=(const MemTableIterator&) = delete; 89 | 90 | ~MemTableIterator() = default; 91 | 92 | bool Valid() const { return iter_.Valid(); } 93 | 94 | std::string_view Key() const { 95 | return DecodeLengthPrefixedSlice(iter_.key()); 96 | } 97 | 98 | std::string_view Value() const { 99 | std::string_view key = DecodeLengthPrefixedSlice(iter_.key()); 100 | return DecodeLengthPrefixedSlice(key.data() + key.size()); 101 | } 102 | 103 | void Next() { iter_.Next(); } 104 | 105 | void Prev() { iter_.Prev(); } 106 | 107 | void Seek(std::string_view key) { iter_.Seek(MakeKey(&alloc_ptr_, key)); } 108 | 109 | void SeekToFirst() { iter_.SeekToFirst(); } 110 | 111 | void SeekToLast() { iter_.SeekToLast(); } 112 | 113 | Status status() { return Status::OK(); } 114 | 115 | private: 116 | MemTable::Table::Iterator iter_; 117 | std::string alloc_ptr_; 118 | }; 119 | 120 | Iterator* MemTable::NewIterator() { return new MemTableIterator(&table_); } 121 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/sstable/block_format.cc: -------------------------------------------------------------------------------- 1 | #include "crc32c/crc32c.h" 2 | #include "db/sstable/block_format.h" 3 | #include "util/coding.h" 4 | #include "include/option.h" 5 | #include "snappy.h" 6 | 7 | namespace lsmkv { 8 | 9 | BlockHandle::BlockHandle() 10 | : offset_(0),size_(0) {} 11 | 12 | Status BlockHandle::DecodeFrom(std::string_view* input) { 13 | if(GetVarint64(input, &offset_) && GetVarint64(input, &size_)) { 14 | return Status::OK(); 15 | } else { 16 | return Status::Corruption("bad block handle"); 17 | } 18 | } 19 | 20 | void BlockHandle::EncodeTo(std::string* dst) { 21 | PutVarint64(dst, offset_); 22 | PutVarint64(dst, size_); 23 | } 24 | 25 | Status Footer::DecodeFrom(std::string_view* input) { 26 | const char* magic_ptr = input->data() + KEncodeLength - 8; 27 | uint64_t magic = DecodeFixed64(magic_ptr); 28 | if (magic != KFooterMagicNum) { 29 | return Status::Corruption("not a sstable"); 30 | } 31 | Status s = index_block_handle_.DecodeFrom(input); 32 | if (s.ok()) { 33 | s = filter_index_handle_.DecodeFrom(input); 34 | } 35 | if (s.ok()) { 36 | const char* end = magic_ptr + 8; 37 | *input = std::string_view(end, input->data() + input->size() - end); 38 | } 39 | return s; 40 | } 41 | 42 | void Footer::EncodeTo(std::string* dst) { 43 | index_block_handle_.EncodeTo(dst); 44 | filter_index_handle_.EncodeTo(dst); 45 | dst->resize(2 * BlockHandle::KMaxEncodeLength); 46 | PutFixed64(dst, KFooterMagicNum); 47 | } 48 | 49 | Status ReadBlock(const ReadOption& option, RandomReadFile* file, 50 | const BlockHandle& handle, BlockContents* result) { 51 | result->data = std::string_view(); 52 | result->table_cache_ = false; 53 | result->heap_allocated_ = false; 54 | 55 | uint64_t n = handle.GetSize(); 56 | char* buf = new char[n + KBlockTailSize]; 57 | std::string_view contents; 58 | Status s = file->Read(handle.GetOffset(), n + KBlockTailSize, 59 | &contents, buf); 60 | if (contents.size() != n + KBlockTailSize) { 61 | delete[] buf; 62 | return Status::Corruption("file size is uncorrect"); 63 | } 64 | 65 | const char* data = contents.data(); 66 | if (option.check_crc) { 67 | const uint32_t crc = CrcUnMask(DecodeFixed32(data + n + 1)); 68 | const uint32_t actual = crc32c::Crc32c(data, n + 1); 69 | if (crc != actual) { 70 | delete[] buf; 71 | return Status::Corruption("crc check mismatch"); 72 | } 73 | } 74 | 75 | switch(data[n]) { 76 | case KUnCompress: 77 | if (data != buf) { 78 | // the mem of data is stored in other place. 79 | // don't need to cache and new heap buffer. 80 | delete[] buf; 81 | result->data = std::string_view(data, n); 82 | result->heap_allocated_ = false; 83 | result->table_cache_ = false; 84 | } else { 85 | result->data = std::string_view(data, n); 86 | result->heap_allocated_ = true; 87 | result->table_cache_ = true; 88 | } 89 | break; 90 | case KSnappyCompress: { 91 | size_t uncompress_len = 0; 92 | if (!snappy::GetUncompressedLength(data, n, &uncompress_len)) { 93 | delete[] buf; 94 | return Status::Corruption("ReadBlock: snappy GetUncompressedLength error"); 95 | } 96 | char* uncompress_buf = new char[uncompress_len]; 97 | if(!snappy::RawUncompress(data, n, uncompress_buf)) { 98 | delete[] buf; 99 | delete[] uncompress_buf; 100 | return Status::Corruption("ReadBlock: snappy RawUncompress error"); 101 | } 102 | delete[] buf; 103 | result->data = std::string_view(uncompress_buf,uncompress_len); 104 | result->heap_allocated_ = true; 105 | result->table_cache_ = true; 106 | break; 107 | } 108 | default: 109 | delete[] buf; 110 | return Status::Corruption("ReadBlock: bad block record"); 111 | } 112 | return Status::OK(); 113 | } 114 | 115 | } -------------------------------------------------------------------------------- /lsmkv/util/logger.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_UTIL_LOGGER_H_ 2 | #define STORAGE_XDB_UTIL_LOGGER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "include/env.h" 10 | 11 | namespace lsmkv { 12 | class LoggerImpl : public Logger { 13 | public: 14 | explicit LoggerImpl(std::FILE* fp) : fp_(fp) {} 15 | ~LoggerImpl() override { std::fclose(fp_); } 16 | void Logv(const char* format, std::va_list ap) override { 17 | // Record the time as close to the Logv() call as possible. 18 | struct ::timeval now_timeval; 19 | ::gettimeofday(&now_timeval, nullptr); 20 | const std::time_t now_seconds = now_timeval.tv_sec; 21 | struct std::tm now_components; 22 | ::localtime_r(&now_seconds, &now_components); 23 | 24 | // Record the thread ID. 25 | constexpr const int kMaxThreadIdSize = 32; 26 | std::ostringstream thread_stream; 27 | thread_stream << std::this_thread::get_id(); 28 | std::string thread_id = thread_stream.str(); 29 | if (thread_id.size() > kMaxThreadIdSize) { 30 | thread_id.resize(kMaxThreadIdSize); 31 | } 32 | 33 | // We first attempt to print into a stack-allocated buffer. If this attempt 34 | // fails, we make a second attempt with a dynamically allocated buffer. 35 | constexpr const int kStackBufferSize = 512; 36 | char stack_buffer[kStackBufferSize]; 37 | static_assert(sizeof(stack_buffer) == static_cast(kStackBufferSize), 38 | "sizeof(char) is expected to be 1 in C++"); 39 | 40 | int dynamic_buffer_size = 0; // Computed in the first iteration. 41 | for (int iteration = 0; iteration < 2; ++iteration) { 42 | const int buffer_size = 43 | (iteration == 0) ? kStackBufferSize : dynamic_buffer_size; 44 | char* const buffer = 45 | (iteration == 0) ? stack_buffer : new char[dynamic_buffer_size]; 46 | 47 | // Print the header into the buffer. 48 | int buffer_offset = std::snprintf( 49 | buffer, buffer_size, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %s ", 50 | now_components.tm_year + 1900, now_components.tm_mon + 1, 51 | now_components.tm_mday, now_components.tm_hour, now_components.tm_min, 52 | now_components.tm_sec, static_cast(now_timeval.tv_usec), 53 | thread_id.c_str()); 54 | 55 | // The header can be at most 28 characters (10 date + 15 time + 56 | // 3 delimiters) plus the thread ID, which should fit comfortably into the 57 | // static buffer. 58 | assert(buffer_offset <= 28 + kMaxThreadIdSize); 59 | static_assert(28 + kMaxThreadIdSize < kStackBufferSize, 60 | "stack-allocated buffer may not fit the message header"); 61 | assert(buffer_offset < buffer_size); 62 | 63 | // Print the message into the buffer. 64 | std::va_list arguments_copy; 65 | va_copy(arguments_copy, ap); 66 | buffer_offset += 67 | std::vsnprintf(buffer + buffer_offset, buffer_size - buffer_offset, 68 | format, arguments_copy); 69 | va_end(arguments_copy); 70 | 71 | // The code below may append a newline at the end of the buffer, which 72 | // requires an extra character. 73 | if (buffer_offset >= buffer_size - 1) { 74 | // The message did not fit into the buffer. 75 | if (iteration == 0) { 76 | // Re-run the loop and use a dynamically-allocated buffer. The buffer 77 | // will be large enough for the log message, an extra newline and a 78 | // null terminator. 79 | dynamic_buffer_size = buffer_offset + 2; 80 | continue; 81 | } 82 | 83 | // The dynamically-allocated buffer was incorrectly sized. This should 84 | // not happen, assuming a correct implementation of std::(v)snprintf. 85 | // Fail in tests, recover by truncating the log message in production. 86 | assert(false); 87 | buffer_offset = buffer_size - 1; 88 | } 89 | 90 | // Add a newline if necessary. 91 | if (buffer[buffer_offset - 1] != '\n') { 92 | buffer[buffer_offset] = '\n'; 93 | ++buffer_offset; 94 | } 95 | 96 | assert(buffer_offset <= buffer_size); 97 | std::fwrite(buffer, 1, buffer_offset, fp_); 98 | std::fflush(fp_); 99 | 100 | if (iteration != 0) { 101 | delete[] buffer; 102 | } 103 | break; 104 | } 105 | } 106 | private: 107 | std::FILE* const fp_; 108 | }; 109 | 110 | } 111 | 112 | #endif // STORAGE_XDB_UTIL_LOGGER_H_ -------------------------------------------------------------------------------- /test/filter_block_test.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "include/filter_policy.h" 3 | #include "db/filter/filter_block.h" 4 | #include "util/MurmurHash3.h" 5 | #include "util/coding.h" 6 | 7 | namespace lsmkv { 8 | class TestPolicy : public FilterPolicy { 9 | public: 10 | const char* Name() const override { 11 | return "TestPolicy"; 12 | } 13 | void CreatFilter(std::string_view* keys, int n, std::string* dst) const override { 14 | for (int i = 0; i < n; i++) { 15 | uint32_t h = murmur3::MurmurHash3_x86_32(keys[i].data(), keys[i].size(), 0x789fed11); 16 | PutFixed32(dst, h); 17 | } 18 | } 19 | bool KeyMayMatch(std::string_view key, std::string_view filter) const override { 20 | uint32_t h = murmur3::MurmurHash3_x86_32(key.data(), key.size(), 0x789fed11); 21 | for (size_t i = 0; i + 4 <= filter.size(); i += 4) { 22 | if (h == DecodeFixed32(filter.data() + i)) { 23 | return true; 24 | } 25 | } 26 | return false; 27 | } 28 | }; 29 | TEST(FilterTest, SimpleTest) { 30 | FilterPolicy* policy = NewBloomFilterPolicy(15); 31 | FilterBlockBuilder builder(policy); 32 | builder.StartBlock(0); 33 | builder.AddKey("xiao"); 34 | builder.AddKey("huang"); 35 | builder.AddKey("tong"); 36 | builder.AddKey("xue"); 37 | std::string_view content = builder.Finish(); 38 | FilterBlockReader reader(policy, content); 39 | ASSERT_TRUE(reader.KeyMayMatch(0,"xiao")); 40 | ASSERT_TRUE(reader.KeyMayMatch(0,"huang")); 41 | ASSERT_TRUE(reader.KeyMayMatch(0,"tong")); 42 | ASSERT_TRUE(reader.KeyMayMatch(0,"xue")); 43 | ASSERT_TRUE(reader.KeyMayMatch(100,"xiao")); 44 | ASSERT_TRUE(reader.KeyMayMatch(100,"huang")); 45 | ASSERT_TRUE(reader.KeyMayMatch(100,"tong")); 46 | ASSERT_TRUE(reader.KeyMayMatch(100,"xue")); 47 | delete policy; 48 | } 49 | 50 | TEST(FilterTest, MissingTest) { 51 | TestPolicy policy; 52 | FilterBlockBuilder builder(&policy); 53 | builder.StartBlock(0); 54 | builder.AddKey("xiao"); 55 | builder.AddKey("huang"); 56 | builder.AddKey("tong"); 57 | builder.AddKey("xue"); 58 | std::string_view content = builder.Finish(); 59 | FilterBlockReader reader(&policy, content); 60 | ASSERT_TRUE(reader.KeyMayMatch(0,"xiao")); 61 | ASSERT_TRUE(reader.KeyMayMatch(0,"huang")); 62 | ASSERT_TRUE(reader.KeyMayMatch(0,"tong")); 63 | ASSERT_TRUE(reader.KeyMayMatch(0,"xue")); 64 | ASSERT_TRUE(!reader.KeyMayMatch(0,"da")); 65 | ASSERT_TRUE(!reader.KeyMayMatch(0,"huai")); 66 | ASSERT_TRUE(!reader.KeyMayMatch(0,"dan")); 67 | ASSERT_TRUE(!reader.KeyMayMatch(0,"qaq")); 68 | } 69 | 70 | TEST(FilterTest, DifferentOffset) { 71 | FilterPolicy* policy = NewBloomFilterPolicy(10000); 72 | FilterBlockBuilder builder(policy); 73 | builder.StartBlock(0); 74 | builder.AddKey("xiao"); 75 | builder.AddKey("huang"); 76 | builder.AddKey("tong"); 77 | builder.AddKey("xue"); 78 | builder.StartBlock(100); 79 | builder.AddKey("da"); 80 | builder.AddKey("huai"); 81 | builder.StartBlock(3000); 82 | builder.AddKey("dan"); 83 | builder.StartBlock(6000); 84 | builder.AddKey("xiu"); 85 | std::string_view content = builder.Finish(); 86 | FilterBlockReader reader(policy, content); 87 | ASSERT_TRUE(reader.KeyMayMatch(0,"xiao")); 88 | ASSERT_TRUE(reader.KeyMayMatch(0,"huang")); 89 | ASSERT_TRUE(reader.KeyMayMatch(0,"tong")); 90 | ASSERT_TRUE(reader.KeyMayMatch(0,"xue")); 91 | ASSERT_TRUE(reader.KeyMayMatch(100,"xiao")); 92 | ASSERT_TRUE(reader.KeyMayMatch(100,"huang")); 93 | ASSERT_TRUE(reader.KeyMayMatch(100,"tong")); 94 | ASSERT_TRUE(reader.KeyMayMatch(100,"xue")); 95 | ASSERT_TRUE(reader.KeyMayMatch(0,"da")); 96 | ASSERT_TRUE(reader.KeyMayMatch(0,"huai")); 97 | ASSERT_TRUE(reader.KeyMayMatch(100,"da")); 98 | ASSERT_TRUE(reader.KeyMayMatch(100,"huai")); 99 | ASSERT_TRUE(!reader.KeyMayMatch(3000,"da")); 100 | ASSERT_TRUE(!reader.KeyMayMatch(3000,"huai")); 101 | ASSERT_TRUE(reader.KeyMayMatch(3000,"dan")); 102 | ASSERT_TRUE(reader.KeyMayMatch(6000,"xiu")); 103 | ASSERT_TRUE(!reader.KeyMayMatch(3000,"do")); 104 | ASSERT_TRUE(!reader.KeyMayMatch(6000,"int")); 105 | } 106 | } -------------------------------------------------------------------------------- /lsmkv/util/coding.cc: -------------------------------------------------------------------------------- 1 | #include "util/coding.h" 2 | 3 | namespace lsmkv { 4 | 5 | size_t VarintLength(uint64_t v) { 6 | size_t ret = 1; 7 | while (v >= 128) { 8 | ret++; 9 | v >>= 7; 10 | } 11 | return ret; 12 | } 13 | 14 | char* EncodeVarint32(char* dst,uint32_t val) { 15 | uint8_t* ptr = reinterpret_cast(dst); 16 | static const uint8_t B = (1 << 7); 17 | 18 | while(val >= B) { 19 | *(ptr++) = static_cast(val | B); 20 | val >>= 7; 21 | } 22 | *(ptr++) = static_cast(val); 23 | return reinterpret_cast(ptr); 24 | } 25 | 26 | char* EncodeVarint64(char* dst,uint64_t val) { 27 | uint8_t* ptr = reinterpret_cast(dst); 28 | static const uint8_t B = (1 << 7); 29 | 30 | while(val >= B) { 31 | *(ptr++) = static_cast(val | B); 32 | val >>= 7; 33 | } 34 | *(ptr++) = static_cast(val); 35 | return reinterpret_cast(ptr); 36 | } 37 | 38 | void PutVarint32(std::string* dst,uint32_t val) { 39 | char buf[5]; 40 | char* p = EncodeVarint32(buf, val); 41 | dst->append(buf, p - buf); 42 | } 43 | 44 | void PutVarint64(std::string* dst,uint64_t val) { 45 | char buf[10]; 46 | char* p = EncodeVarint64(buf, val); 47 | dst->append(buf, p - buf); 48 | } 49 | 50 | const char* DecodeVarint32(const char* p,const char* limit, uint32_t* result) { 51 | uint32_t buf = 0; 52 | for(uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) { 53 | uint32_t byte = *(reinterpret_cast(p)); 54 | p++; 55 | if (byte & 128) { 56 | buf |= ((byte & 127) << shift); 57 | } else { 58 | buf |= (byte << shift); 59 | *result = buf; 60 | return reinterpret_cast(p); 61 | } 62 | } 63 | return nullptr; 64 | } 65 | 66 | const char* DecodeVarint64(const char* p,const char* limit, uint64_t* result) { 67 | uint32_t buf = 0; 68 | for(uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) { 69 | uint32_t byte = *(reinterpret_cast(p)); 70 | p++; 71 | if (byte & 128) { 72 | buf |= ((byte & 127) << shift); 73 | } else { 74 | buf |= (byte << shift); 75 | *result = buf; 76 | return reinterpret_cast(p); 77 | } 78 | } 79 | return nullptr; 80 | } 81 | 82 | uint32_t DecodeFixed32(const char* dst) { 83 | const uint8_t* const p = reinterpret_cast(dst); 84 | 85 | return static_cast(p[0]) | 86 | (static_cast(p[1]) << 8) | 87 | (static_cast(p[2]) << 16) | 88 | (static_cast(p[3]) << 24); 89 | } 90 | 91 | uint64_t DecodeFixed64(const char* dst) { 92 | const uint8_t* const p = reinterpret_cast(dst); 93 | 94 | return static_cast(p[0]) | 95 | (static_cast(p[1]) << 8) | 96 | (static_cast(p[2]) << 16) | 97 | (static_cast(p[3]) << 24) | 98 | (static_cast(p[4]) << 32) | 99 | (static_cast(p[5]) << 40) | 100 | (static_cast(p[6]) << 48) | 101 | (static_cast(p[7]) << 56); 102 | } 103 | bool GetVarint32(std::string_view* input, uint32_t* value) { 104 | const char* p = input->data(); 105 | const char* limit = input->data() + input->size(); 106 | const char* p_end = DecodeVarint32(p, limit, value); 107 | if(p_end == nullptr) { 108 | return false; 109 | } else { 110 | *input = std::string_view(p_end, limit - p_end); 111 | return true; 112 | } 113 | } 114 | 115 | bool GetVarint64(std::string_view* input, uint64_t* value) { 116 | const char* p = input->data(); 117 | const char* limit = input->data() + input->size(); 118 | const char* p_end = DecodeVarint64(p, limit, value); 119 | if(p_end == nullptr) { 120 | return false; 121 | } else { 122 | *input = std::string_view(p_end, limit - p_end); 123 | return true; 124 | } 125 | } 126 | 127 | bool GetLengthPrefixedSlice(std::string_view* input, std::string_view* result) { 128 | uint32_t len; 129 | if(GetVarint32(input,&len) && input->size() >= len) { 130 | *result = std::string_view(input->data(),len); 131 | input->remove_prefix(len); 132 | return true; 133 | } else { 134 | return false; 135 | } 136 | } 137 | 138 | void PutLengthPrefixedSlice(std::string* dst, std::string_view input) { 139 | PutVarint32(dst,input.size()); 140 | dst->append(input.data(),input.size()); 141 | } 142 | 143 | uint32_t CrcMask(uint32_t crc) { 144 | return ((crc >> 17) | (crc << 15)) + KMaskValue; 145 | } 146 | 147 | uint32_t CrcUnMask(uint32_t crc) { 148 | crc -= KMaskValue; 149 | return (crc << 17) | (crc >> 15); 150 | } 151 | 152 | } -------------------------------------------------------------------------------- /lsmkv/util/filename.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "util/filename.h" 4 | #include "include/status.h" 5 | #include "include/env.h" 6 | 7 | namespace lsmkv { 8 | static std::string MakeFileName(const std::string& dbname, uint64_t number, const char* suffix) { 9 | char buf[100]; 10 | std::snprintf(buf, sizeof(buf), "/%06llu.%s", 11 | static_cast(number), suffix); 12 | return dbname + buf; 13 | } 14 | 15 | std::string LogFileName(const std::string& dbname, uint64_t number) { 16 | return MakeFileName(dbname, number, "log"); 17 | } 18 | 19 | std::string LockFileName(const std::string& dbname) { 20 | return dbname + "/LOCK"; 21 | } 22 | 23 | std::string LoggerFileName(const std::string& dbname) { 24 | return dbname + "/LOGGER"; 25 | } 26 | 27 | bool ParseFilename(const std::string& filename, uint64_t* number, FileType* type) { 28 | std::string_view rest(filename); 29 | if (filename == "LOCK") { 30 | *number = 0; 31 | *type = KLockFile; 32 | } else if (filename == "CURRENT"){ 33 | *number = 0; 34 | *type = KCurrentFile; 35 | } else if (filename == "LOGGER"){ 36 | *number = 0; 37 | *type = KLoggerFile; 38 | } else { 39 | uint64_t num; 40 | if(!ParseNumder(&rest,&num)) { 41 | return false; 42 | } 43 | if(rest == ".log") { 44 | *type = KLogFile; 45 | } else if (rest == ".meta"){ 46 | *type = KMetaFile; 47 | } else if (rest == ".tmp") { 48 | *type = KTmpFile; 49 | } else if (rest == ".sst") { 50 | *type = KSSTableFile; 51 | } else { 52 | return false; 53 | } 54 | *number = num; 55 | } 56 | return true; 57 | } 58 | bool ParseNumder(std::string_view* input, uint64_t* num) { 59 | constexpr const uint64_t KUint64Max = std::numeric_limits::max(); 60 | constexpr const char KLastCharOfUint64Max = 61 | '0' + static_cast(KUint64Max % 10); 62 | const uint8_t* start = reinterpret_cast(input->data()); 63 | const uint8_t* end = start + input->size(); 64 | const uint8_t* p = start; 65 | uint64_t value = 0; 66 | for ( ;p != end; ++p) { 67 | uint8_t ch = *p; 68 | if (ch > '9' || ch < '0') break; 69 | if (value > KUint64Max / 10 || 70 | (value == KUint64Max / 10 && ch > KLastCharOfUint64Max)) { 71 | return false; 72 | } 73 | value = value * 10 + (ch - '0'); 74 | } 75 | *num = value; 76 | const size_t num_length = p - start; 77 | input->remove_prefix(num_length); 78 | return num_length != 0; 79 | } 80 | 81 | std::string MetaFileName(const std::string& dbname, uint64_t number) { 82 | return MakeFileName(dbname, number, "meta"); 83 | } 84 | 85 | std::string TmpFileName(const std::string& dbname, uint64_t number) { 86 | return MakeFileName(dbname, number, "tmp"); 87 | } 88 | 89 | std::string CurrentFileName(const std::string& dbname) { 90 | return dbname + "/CURRENT"; 91 | } 92 | 93 | std::string SSTableFileName(const std::string& dbname, uint64_t number) { 94 | return MakeFileName(dbname, number, "sst"); 95 | } 96 | 97 | Status SetCurrentFile(Env* env, const std::string& dbname, uint64_t number) { 98 | std::string content = MetaFileName(dbname, number); 99 | std::string_view meta_file_name = content; 100 | std::string tmp = TmpFileName(dbname, number); 101 | Status s = WriteStringToFileSync(env, meta_file_name, tmp); 102 | if (s.ok()) { 103 | s = env->RenameFile(tmp, CurrentFileName(dbname)); 104 | } else { 105 | env->RemoveFile(tmp); 106 | } 107 | return s; 108 | } 109 | 110 | Status ReadStringFromFile(Env* env, std::string* str, const std::string& filename) { 111 | str->clear(); 112 | SequentialFile* file; 113 | Status s = env->NewSequentialFile(filename, &file); 114 | if (!s.ok()) { 115 | return s; 116 | } 117 | static const size_t KBufSize = 4096; 118 | char* buf = new char[KBufSize]; 119 | while(true) { 120 | std::string_view sv; 121 | s = file->Read(KBufSize, &sv, buf); 122 | if (s.ok()) { 123 | str->append(sv.data(), sv.size()); 124 | } 125 | if (!s.ok() || sv.empty()) { 126 | break; 127 | } 128 | } 129 | delete[] buf; 130 | delete file; 131 | return s; 132 | } 133 | 134 | Status WriteStringToFileSync(Env* env, std::string_view str, const std::string& filename) { 135 | WritableFile* file; 136 | Status s = env->NewWritableFile(filename, &file); 137 | if (!s.ok()) { 138 | return s; 139 | } 140 | s = file->Append(str); 141 | if (s.ok()) { 142 | s = file->Sync(); 143 | } 144 | if (s.ok()) { 145 | s = file->Close(); 146 | } 147 | delete file; 148 | if (!s.ok()) { 149 | env->RemoveFile(filename); 150 | } 151 | return s; 152 | 153 | } 154 | } -------------------------------------------------------------------------------- /lsmkv/db/version/version_edit.cc: -------------------------------------------------------------------------------- 1 | #include "db/version/version_edit.h" 2 | 3 | #include "db/format/dbformat.h" 4 | #include "include/status.h" 5 | 6 | namespace lsmkv { 7 | 8 | enum Tag { 9 | KLogNumber = 1, 10 | KLastSequence = 2, 11 | KNextFileNumber = 3, 12 | KComparatorName = 4, 13 | KNewFiles = 5, 14 | KDeleteFiles = 6, 15 | KCompactionPointers = 7, 16 | }; 17 | 18 | void VersionEdit::EncodeTo(std::string* dst) { 19 | if (has_log_number_) { 20 | PutVarint32(dst, KLogNumber); 21 | PutVarint64(dst, log_number_); 22 | } 23 | if (has_last_sequence_) { 24 | PutVarint32(dst, KLastSequence); 25 | PutVarint64(dst, last_sequence_); 26 | } 27 | if (has_next_file_number_) { 28 | PutVarint32(dst, KNextFileNumber); 29 | PutVarint64(dst, next_file_number_); 30 | } 31 | if (has_comparator_name_) { 32 | PutVarint32(dst, KComparatorName); 33 | PutLengthPrefixedSlice(dst, comparator_name_); 34 | } 35 | for (size_t i = 0; i < new_files_.size(); i++) { 36 | const FileMeta& meta = new_files_[i].second; 37 | PutVarint32(dst, KNewFiles); 38 | PutVarint32(dst, new_files_[i].first); 39 | PutVarint64(dst, meta.number); 40 | PutVarint64(dst, meta.file_size); 41 | PutLengthPrefixedSlice(dst, meta.smallest.Encode()); 42 | PutLengthPrefixedSlice(dst, meta.largest.Encode()); 43 | } 44 | for (const auto& delete_file : delete_files_) { 45 | PutVarint32(dst, KDeleteFiles); 46 | PutVarint32(dst, delete_file.first); 47 | PutVarint64(dst, delete_file.second); 48 | } 49 | for (const auto& compaction_pointer : compaction_pointers_) { 50 | PutVarint32(dst, KCompactionPointers); 51 | PutVarint32(dst, compaction_pointer.first); 52 | PutLengthPrefixedSlice(dst, compaction_pointer.second.Encode()); 53 | } 54 | } 55 | 56 | bool GetLevel(std::string_view* input, int* value) { 57 | uint32_t v; 58 | if (GetVarint32(input, &v) && v < config::kNumLevels) { 59 | *value = v; 60 | return true; 61 | } 62 | return false; 63 | } 64 | 65 | bool GetInternalKey(std::string_view* input, InternalKey* key) { 66 | std::string_view str; 67 | if (GetLengthPrefixedSlice(input, &str)) { 68 | key->DecodeFrom(str); 69 | return true; 70 | } 71 | return false; 72 | } 73 | 74 | Status VersionEdit::DecodeFrom(std::string_view src) { 75 | std::string_view input = src; 76 | int level; 77 | uint32_t tag; 78 | uint64_t number; 79 | std::string_view str; 80 | FileMeta meta; 81 | InternalKey key; 82 | 83 | while (GetVarint32(&input, &tag)) { 84 | switch (tag) { 85 | case KComparatorName: 86 | if (!GetLengthPrefixedSlice(&input, &str)) { 87 | return Status::Corruption("VersionEdit DecodeFrom: comparator"); 88 | } 89 | comparator_name_ = str; 90 | has_comparator_name_ = true; 91 | break; 92 | case KLastSequence: 93 | if (!GetVarint64(&input, &number)) { 94 | return Status::Corruption("VersionEdit DecodeFrom: last_sequence"); 95 | } 96 | last_sequence_ = number; 97 | has_last_sequence_ = true; 98 | break; 99 | case KLogNumber: 100 | if (!GetVarint64(&input, &number)) { 101 | return Status::Corruption("VersionEdit DecodeFrom: log_number"); 102 | } 103 | log_number_ = number; 104 | has_log_number_ = true; 105 | break; 106 | case KNextFileNumber: 107 | if (!GetVarint64(&input, &number)) { 108 | return Status::Corruption("VersionEdit DecodeFrom: next_file_number"); 109 | } 110 | next_file_number_ = number; 111 | has_next_file_number_ = true; 112 | break; 113 | case KNewFiles: 114 | if (!GetLevel(&input, &level) || !GetVarint64(&input, &meta.number) || 115 | !GetVarint64(&input, &meta.file_size) || 116 | !GetInternalKey(&input, &meta.smallest) || 117 | !GetInternalKey(&input, &meta.largest)) { 118 | return Status::Corruption( 119 | "VersionEdit DecodeFrom: " 120 | "new_fhttps://github.com/ByteTech-7355608/douyin-server/pull/" 121 | "64iles"); 122 | } 123 | new_files_.emplace_back(level, meta); 124 | break; 125 | case KDeleteFiles: 126 | if (!GetLevel(&input, &level) || !GetVarint64(&input, &number)) { 127 | return Status::Corruption("VersionEdit DecodeFrom: delete_files"); 128 | } 129 | delete_files_.emplace(level, number); 130 | break; 131 | case KCompactionPointers: 132 | if (!GetLevel(&input, &level) || !GetInternalKey(&input, &key)) { 133 | return Status::Corruption( 134 | "VersionEdit DecodeFrom: compaction_pointer"); 135 | } 136 | compaction_pointers_.emplace_back(level, key); 137 | break; 138 | default: 139 | return Status::Corruption("VersionEdit DecodeFrom: unknown tag"); 140 | } 141 | } 142 | return Status::OK(); 143 | } 144 | 145 | }; // namespace lsmkv -------------------------------------------------------------------------------- /test/memtable_test.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "db/memtable/memtable.h" 3 | #include "include/env.h" 4 | #include "iostream" 5 | 6 | namespace lsmkv { 7 | 8 | TEST(MemTableTest, InternalKeyCmp) { 9 | InternalKeyComparator cmp(DefaultComparator()); 10 | InternalKey a(0, std::string_view("abc"), KTypeInsertion); 11 | InternalKey b(1, std::string_view("abc"), KTypeInsertion); 12 | ASSERT_EQ(cmp.Compare(a,b), +1); 13 | a = InternalKey(0, std::string_view("abc"), KTypeInsertion); 14 | b = InternalKey(1, std::string_view("bcd"), KTypeInsertion); 15 | ASSERT_LT(cmp.Compare(a,b), 0); 16 | } 17 | 18 | TEST(MemTableTest, MemtableInsertAndDelete) { 19 | InternalKeyComparator cmp(DefaultComparator()); 20 | MemTable* mem = new MemTable(cmp); 21 | mem->Ref(); 22 | mem->Put(0, KTypeInsertion, "a", "1"); 23 | mem->Put(1, KTypeInsertion, "a", "2"); 24 | mem->Put(2, KTypeInsertion, "a", "3"); 25 | mem->Put(3, KTypeInsertion, "a", "4"); 26 | // check the insert 27 | LookupKey key("a",4); 28 | std::string result; 29 | Status status; 30 | ASSERT_EQ(mem->Get(key,&result,&status), true); 31 | std::string str = result; 32 | ASSERT_EQ(str,std::string{"4"}); 33 | //check tje delete 34 | mem->Put(4, KTypeDeletion, "a", ""); 35 | ASSERT_EQ(mem->Get(key,&result,&status), true); 36 | ASSERT_EQ(status.IsNotFound(),true); 37 | // insert after delete 38 | LookupKey key1("a",5); 39 | mem->Put(5, KTypeInsertion, "a", "5"); 40 | ASSERT_EQ(mem->Get(key1,&result,&status), true); 41 | ASSERT_EQ(result,std::string{"5"}); 42 | // lower sequence get 43 | mem->Put(6, KTypeInsertion, "a", "6"); 44 | ASSERT_EQ(mem->Get(key1,&result,&status), true); 45 | ASSERT_EQ(result,std::string{"5"}); 46 | mem->Unref(); 47 | } 48 | 49 | class ConcurrencyTester { 50 | public: 51 | ConcurrencyTester(int N) 52 | : rng_(std::random_device{}()), 53 | data_(N), cv_(&mu_), env(DefaultEnv()),done_num_(0) { 54 | InternalKeyComparator cmp(DefaultComparator()); 55 | mem_ = new MemTable(cmp); 56 | mem_->Ref(); 57 | } 58 | ~ConcurrencyTester() { mem_->Unref(); } 59 | 60 | void Writer() { 61 | for(int i = 0; i < data_.size(); i++) { 62 | std::string val = std::to_string(rng_() % 100); 63 | data_[i] = val; 64 | mem_->Put(i, KTypeInsertion, std::to_string(i), val); 65 | } 66 | for(int i = 0; i < data_.size() / 10; i++) { 67 | uint32_t key_delete = rng_() % data_.size(); 68 | data_[key_delete] = "DELETE"; 69 | mem_->Put(i+data_.size(), KTypeDeletion, std::to_string(key_delete), ""); 70 | } 71 | } 72 | 73 | void WaitDone(int n) { 74 | mu_.Lock(); 75 | while (done_num_ < n) { 76 | cv_.Wait(); 77 | } 78 | mu_.Unlock(); 79 | } 80 | MemTable* mem_; 81 | std::mt19937 rng_; 82 | std::vector data_; 83 | Mutex mu_; 84 | CondVar cv_; 85 | Env* env; 86 | int done_num_; 87 | }; 88 | void Reader(void* arg) { 89 | //std::cout<<"Reader start"<(arg); 91 | tester->env->SleepMicroseconds(tester->rng_() % 100); 92 | for (int iter = 0; iter < 10; iter++){ 93 | for(int i = 0; i < tester->data_.size(); i++){ 94 | LookupKey key(std::to_string(i),tester->data_.size() * 2); 95 | std::string result; 96 | Status status; 97 | ASSERT_EQ(tester->mem_->Get(key,&result,&status), true); 98 | if (tester->data_[i] == "DELETE") { 99 | //std::cout<<"i = " << i << ": "<< tester->data_[i]<data_[i], result); 104 | } 105 | } 106 | } 107 | tester->mu_.Lock(); 108 | tester->done_num_++; 109 | tester->cv_.Signal(); 110 | tester->mu_.Unlock(); 111 | //std::cout<<"Reader done"<Read(file_size - Footer::KEncodeLength, Footer::KEncodeLength 40 | ,&footer_result,footer_buffer); 41 | if(!s.ok()) return s; 42 | Footer footer; 43 | s = footer.DecodeFrom(&footer_result); 44 | if(!s.ok()) return s; 45 | // read index block from file 46 | ReadOption read_option; 47 | if (option.check_crc) { 48 | read_option.check_crc = true; 49 | } 50 | BlockContents index_block_contents; 51 | s = ReadBlock(read_option, file, footer.GetIndexHandle(), &index_block_contents); 52 | if(!s.ok()) return s; 53 | 54 | BlockReader* index_block = new BlockReader(index_block_contents); 55 | Rep* rep = new SSTableReader::Rep; 56 | rep->option = option; 57 | rep->file = file; 58 | rep->index_block = index_block; 59 | rep->filter_data = nullptr; 60 | rep->filter = nullptr; 61 | *table = new SSTableReader(rep); 62 | (*table)->ReadFilterIndex(footer); 63 | return s; 64 | } 65 | 66 | void SSTableReader::ReadFilterIndex(const Footer& footer) { 67 | if (rep_->option.filter_policy == nullptr) { 68 | return; 69 | } 70 | ReadOption read_option; 71 | if (rep_->option.check_crc) { 72 | read_option.check_crc = true; 73 | } 74 | BlockContents filter_index_contents; 75 | if (!ReadBlock(read_option,rep_->file, footer.GetFilterHandle(), 76 | &filter_index_contents).ok()) { 77 | return; 78 | } 79 | 80 | BlockReader* filter_index_block = new BlockReader(filter_index_contents); 81 | Iterator* iter = filter_index_block->NewIterator(DefaultComparator()); 82 | std::string key = "filter"; 83 | key.append(rep_->option.filter_policy->Name()); 84 | iter->Seek(key); 85 | 86 | if (iter->Valid() && iter->Key() == std::string_view(key)) { 87 | ReadFilter(iter->Value()); 88 | } 89 | 90 | delete filter_index_block; 91 | delete iter; 92 | } 93 | 94 | void SSTableReader::ReadFilter(std::string_view handle_contents) { 95 | std::string_view s = handle_contents; 96 | BlockHandle filter_handle; 97 | if(!filter_handle.DecodeFrom(&s).ok()) { 98 | return; 99 | } 100 | ReadOption read_option; 101 | if (rep_->option.check_crc) { 102 | read_option.check_crc = true; 103 | } 104 | BlockContents filter_contents; 105 | if (!ReadBlock(read_option,rep_->file, filter_handle, 106 | &filter_contents).ok()) { 107 | return; 108 | } 109 | if (!filter_contents.heap_allocated_) { 110 | rep_->filter_data = filter_contents.data.data(); 111 | } 112 | rep_->filter = new FilterBlockReader(rep_->option.filter_policy, filter_contents.data); 113 | } 114 | 115 | static void DeleteBlock(void* arg, void* none) { 116 | delete reinterpret_cast(arg); 117 | } 118 | Iterator* SSTableReader::ReadBlockHandle(void* arg, const ReadOption& option, std::string_view handle_contents) { 119 | SSTableReader* table = reinterpret_cast(arg); 120 | BlockHandle handle; 121 | BlockReader* block = nullptr; 122 | BlockContents block_contents; 123 | std::string_view tmp = handle_contents; 124 | Status s = handle.DecodeFrom(&tmp); 125 | 126 | if (s.ok()) { 127 | s = ReadBlock(option, table->rep_->file, handle, &block_contents); 128 | if (s.ok()) { 129 | block = new BlockReader(block_contents); 130 | } 131 | } 132 | 133 | Iterator* iter; 134 | if (block == nullptr) { 135 | iter = NewErrorIterator(s); 136 | } else { 137 | iter = block->NewIterator(table->rep_->option.comparator); 138 | iter->AppendCleanup(&DeleteBlock, block, nullptr); 139 | } 140 | 141 | return iter; 142 | } 143 | 144 | Iterator* SSTableReader::NewIterator(const ReadOption& option) const { 145 | return NewTwoLevelIterator( 146 | rep_->index_block->NewIterator(rep_->option.comparator), 147 | &ReadBlockHandle, const_cast(this), option 148 | ); 149 | } 150 | 151 | Status SSTableReader::InternalGet(const ReadOption& option, std::string_view key, void* arg, 152 | void (*handle_result)(void*, std::string_view, std::string_view)) { 153 | Status s; 154 | Iterator* index_iter = rep_->index_block->NewIterator(rep_->option.comparator); 155 | index_iter->Seek(key); 156 | if (index_iter->Valid()) { 157 | std::string_view handle_content = index_iter->Value(); 158 | BlockHandle handle; 159 | FilterBlockReader* filter = rep_->filter; 160 | if (filter != nullptr && handle.DecodeFrom(&handle_content).ok() 161 | && filter->KeyMayMatch(handle.GetOffset(), key)) { 162 | // key is not found. 163 | } else { 164 | Iterator* block_iter = ReadBlockHandle(this, option, handle_content); 165 | block_iter->Seek(key); 166 | if (block_iter->Valid()) { 167 | (*handle_result)(arg, block_iter->Key(), block_iter->Value()); 168 | } 169 | s = block_iter->status(); 170 | delete block_iter; 171 | } 172 | } 173 | if (s.ok()) { 174 | s = index_iter->status(); 175 | } 176 | delete index_iter; 177 | return s; 178 | } 179 | 180 | } 181 | 182 | -------------------------------------------------------------------------------- /lsmkv/util/iterator.cc: -------------------------------------------------------------------------------- 1 | #include "include/iterator.h" 2 | 3 | #include 4 | 5 | #include "include/option.h" 6 | 7 | namespace lsmkv { 8 | Iterator::Iterator() { 9 | cleanup_head_.fun = nullptr; 10 | cleanup_head_.next = nullptr; 11 | } 12 | 13 | void Iterator::AppendCleanup(CleanupFunction fun, void* arg1, void* arg2) { 14 | assert(fun != nullptr); 15 | CleanupNode* node; 16 | if (cleanup_head_.fun == nullptr) { 17 | node = &cleanup_head_; 18 | } else { 19 | node = new CleanupNode(); 20 | node->next = cleanup_head_.next; 21 | cleanup_head_.next = node; 22 | } 23 | node->fun = fun; 24 | node->arg1 = arg1; 25 | node->arg2 = arg2; 26 | } 27 | 28 | Iterator::~Iterator() { 29 | if (cleanup_head_.fun != nullptr) { 30 | cleanup_head_.Run(); 31 | CleanupNode* node = cleanup_head_.next; 32 | while (node != nullptr) { 33 | node->Run(); 34 | CleanupNode* next = node->next; 35 | delete node; 36 | node = next; 37 | } 38 | } 39 | } 40 | class EmptyIterator : public Iterator { 41 | public: 42 | EmptyIterator(const Status s) : status_(s) {} 43 | 44 | EmptyIterator(const EmptyIterator&) = delete; 45 | EmptyIterator& operator=(const EmptyIterator&) = delete; 46 | 47 | ~EmptyIterator() = default; 48 | 49 | bool Valid() const { return false; } 50 | void Next() { assert(false); } 51 | void Prev() { assert(false); } 52 | void Seek(std::string_view key) {} 53 | void SeekToFirst() {} 54 | void SeekToLast() {} 55 | Status status() { return status_; } 56 | std::string_view Key() const { 57 | assert(false); 58 | return std::string_view(); 59 | } 60 | std::string_view Value() const { 61 | assert(false); 62 | return std::string_view(); 63 | } 64 | 65 | private: 66 | Status status_; 67 | }; 68 | 69 | Iterator* NewEmptyIterator() { return new EmptyIterator(Status::OK()); } 70 | 71 | Iterator* NewErrorIterator(Status status) { return new EmptyIterator(status); } 72 | 73 | using BlockFunction = Iterator* (*)(void* arg, const ReadOption& option, 74 | std::string_view handle_contents); 75 | 76 | class TwoLevelIterator : public Iterator { 77 | public: 78 | TwoLevelIterator(Iterator* index_iter, BlockFunction block_funtion, void* arg, 79 | const ReadOption& option); 80 | ~TwoLevelIterator() override { 81 | delete data_iter_; 82 | delete index_iter_; 83 | } 84 | bool Valid() const override { 85 | return data_iter_ != nullptr && data_iter_->Valid(); 86 | } 87 | 88 | std::string_view Key() const override { 89 | assert(Valid()); 90 | return data_iter_->Key(); 91 | } 92 | 93 | std::string_view Value() const override { 94 | assert(Valid()); 95 | return data_iter_->Value(); 96 | } 97 | void Next() override; 98 | void Prev() override; 99 | void Seek(std::string_view key) override; 100 | void SeekToFirst() override; 101 | void SeekToLast() override; 102 | 103 | Status status() override { 104 | if (!index_iter_->status().ok()) { 105 | return index_iter_->status(); 106 | } else if (data_iter_ != nullptr && !data_iter_->status().ok()) { 107 | return data_iter_->status(); 108 | } 109 | return status_; 110 | } 111 | 112 | private: 113 | void UpdateDataBlock(); 114 | void SetDataIterator(Iterator* data_iter); 115 | void SaveError(const Status& s) { 116 | if (status_.ok() && !s.ok()) { 117 | status_ = s; 118 | } 119 | } 120 | 121 | void SkipEmptyDataBlock(bool is_forward); 122 | 123 | BlockFunction block_function_; 124 | Iterator* index_iter_; 125 | Iterator* data_iter_; 126 | void* args_; 127 | const ReadOption option_; 128 | Status status_; 129 | std::string data_block_handle_; 130 | }; 131 | Iterator* NewTwoLevelIterator( 132 | Iterator* index_iter, 133 | Iterator* (*block_funtion)(void* arg, const ReadOption& option, 134 | std::string_view handle_contents), 135 | void* arg, const ReadOption& option) { 136 | return new TwoLevelIterator(index_iter, block_funtion, arg, option); 137 | } 138 | 139 | TwoLevelIterator::TwoLevelIterator(Iterator* index_iter, 140 | BlockFunction block_funtion, void* arg, 141 | const ReadOption& option) 142 | : index_iter_(index_iter), 143 | data_iter_(nullptr), 144 | block_function_(block_funtion), 145 | args_(arg), 146 | option_(option) {} 147 | 148 | void TwoLevelIterator::SetDataIterator(Iterator* data_iter) { 149 | if (data_iter_ != nullptr) { 150 | SaveError(data_iter_->status()); 151 | } 152 | delete data_iter_; 153 | data_iter_ = data_iter; 154 | } 155 | void TwoLevelIterator::UpdateDataBlock() { 156 | if (!index_iter_->Valid()) { 157 | SetDataIterator(nullptr); 158 | } else { 159 | std::string_view handle = index_iter_->Value(); 160 | if (data_iter_ != nullptr && handle.compare(data_block_handle_) == 0) { 161 | return; 162 | } 163 | Iterator* iter = (*block_function_)(args_, option_, handle); 164 | data_block_handle_.assign(handle.data(), handle.size()); 165 | SetDataIterator(iter); 166 | } 167 | } 168 | 169 | void TwoLevelIterator::SkipEmptyDataBlock(bool is_forward) { 170 | while (data_iter_ == nullptr || !data_iter_->Valid()) { 171 | if (!index_iter_->Valid()) { 172 | SetDataIterator(nullptr); 173 | return; 174 | } 175 | if (is_forward) { 176 | index_iter_->Next(); 177 | UpdateDataBlock(); 178 | if (data_iter_ != nullptr) { 179 | data_iter_->SeekToFirst(); 180 | } 181 | } else { 182 | index_iter_->Prev(); 183 | UpdateDataBlock(); 184 | if (data_iter_ != nullptr) { 185 | data_iter_->SeekToLast(); 186 | } 187 | } 188 | } 189 | } 190 | 191 | void TwoLevelIterator::Next() { 192 | assert(Valid()); 193 | data_iter_->Next(); 194 | SkipEmptyDataBlock(true); 195 | } 196 | 197 | void TwoLevelIterator::Prev() { 198 | assert(Valid()); 199 | data_iter_->Prev(); 200 | SkipEmptyDataBlock(false); 201 | } 202 | 203 | void TwoLevelIterator::Seek(std::string_view key) { 204 | index_iter_->Seek(key); 205 | UpdateDataBlock(); 206 | if (data_iter_ != nullptr) { 207 | data_iter_->Seek(key); 208 | } 209 | SkipEmptyDataBlock(true); 210 | } 211 | 212 | void TwoLevelIterator::SeekToFirst() { 213 | index_iter_->SeekToFirst(); 214 | UpdateDataBlock(); 215 | if (data_iter_ != nullptr) { 216 | data_iter_->SeekToFirst(); 217 | } 218 | SkipEmptyDataBlock(true); 219 | } 220 | 221 | void TwoLevelIterator::SeekToLast() { 222 | index_iter_->SeekToLast(); 223 | UpdateDataBlock(); 224 | if (data_iter_ != nullptr) { 225 | data_iter_->SeekToLast(); 226 | } 227 | SkipEmptyDataBlock(false); 228 | } 229 | } // namespace lsmkv -------------------------------------------------------------------------------- /test/db_test.cc: -------------------------------------------------------------------------------- 1 | #include "include/db.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "crc32c/crc32c.h" 7 | #include "gtest/gtest.h" 8 | namespace lsmkv { 9 | 10 | TEST(DBTest, Sometest) { 11 | uint8_t buf[10] = {0x01, 0x02, 0x03}; 12 | uint32_t crc1 = crc32c::Extend(0, buf, 1); 13 | uint32_t crc2 = crc32c::Extend(0, buf, 3); 14 | uint32_t crc3 = crc32c::Extend(crc1, buf + 1, 2); 15 | ASSERT_EQ(crc2, crc3); 16 | } 17 | 18 | TEST(DBTest, OpenTest) { 19 | Option option; 20 | DB* db; 21 | DestoryDB(option, "/home/lei/MyLSMKV/folder_for_test/db_test"); 22 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 23 | delete db; 24 | db = nullptr; 25 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 26 | delete db; 27 | } 28 | 29 | TEST(DBTest, RecoverTest) { 30 | Option option; 31 | WriteOption write_option; 32 | ReadOption read_option; 33 | DB* db; 34 | DestoryDB(option, "/home/lei/MyLSMKV/folder_for_test/db_test"); 35 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 36 | db->Put(write_option, "a", "1"); 37 | db->Put(write_option, "a", "2"); 38 | db->Put(write_option, "a", "3"); 39 | db->Put(write_option, "a", "4"); 40 | db->Put(write_option, "a", "5"); 41 | db->Put(write_option, "a", "6"); 42 | std::string result; 43 | db->Get(read_option, "a", &result); 44 | ASSERT_EQ(result, "6"); 45 | delete db; 46 | db = nullptr; 47 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 48 | std::string result1; 49 | db->Get(read_option, "a", &result1); 50 | ASSERT_EQ(result1, "6"); 51 | delete db; 52 | } 53 | 54 | TEST(DBTest, SimpleTest) { 55 | Option option; 56 | WriteOption write_option; 57 | ReadOption read_option; 58 | DB* db; 59 | DestoryDB(option, "/home/lei/MyLSMKV/folder_for_test/db_test"); 60 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 61 | db->Put(write_option, "a", "1"); 62 | db->Put(write_option, "a", "2"); 63 | db->Put(write_option, "a", "3"); 64 | db->Put(write_option, "a", "4"); 65 | db->Put(write_option, "a", "5"); 66 | db->Put(write_option, "a", "6"); 67 | std::string result; 68 | db->Get(read_option, "a", &result); 69 | ASSERT_EQ(result, "6"); 70 | delete db; 71 | } 72 | TEST(DBTest, MutiKeyTest) { 73 | Option option; 74 | WriteOption write_option; 75 | ReadOption read_option; 76 | DB* db; 77 | DestoryDB(option, "/home/lei/MyLSMKV/folder_for_test/db_test"); 78 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 79 | std::mt19937 rng(std::random_device{}()); 80 | const size_t data_size = 1000; 81 | std::vector data(data_size); 82 | for (int iter = 0; iter < 10; iter++) { 83 | for (int i = 0; i < data_size; i++) { 84 | std::string val = std::to_string(rng() % data_size); 85 | db->Put(write_option, std::to_string(i), val); 86 | data[i] = val; 87 | } 88 | } 89 | std::string result; 90 | for (int i = 0; i < data_size; i++) { 91 | db->Get(read_option, std::to_string(i), &result); 92 | ASSERT_EQ(result, data[i]); 93 | } 94 | delete db; 95 | } 96 | 97 | TEST(DBTest, DeletionTest) { 98 | Option option; 99 | WriteOption write_option; 100 | ReadOption read_option; 101 | DB* db; 102 | DestoryDB(option, "/home/lei/MyLSMKV/folder_for_test/db_test"); 103 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 104 | std::mt19937 rng(std::random_device{}()); 105 | std::vector data(1000); 106 | for (int iter = 0; iter < 1; iter++) { 107 | for (int i = 0; i < 1000; i++) { 108 | std::string val = std::to_string(rng() % 1000); 109 | db->Put(write_option, std::to_string(i), val); 110 | data[i] = val; 111 | } 112 | } 113 | for (int i = 0; i < 100; i++) { 114 | size_t k = (rng() % 1000); 115 | db->Delete(write_option, std::to_string(i)); 116 | data[i] = ""; 117 | } 118 | std::string result; 119 | for (int i = 0; i < 1000; i++) { 120 | db->Get(read_option, std::to_string(i), &result); 121 | ASSERT_EQ(result, data[i]); 122 | } 123 | delete db; 124 | } 125 | const int KthreadNum = 30; 126 | struct TestState { 127 | TestState(DB* db) : db(db), rng(std::random_device{}()), done(0) {} 128 | DB* db; 129 | std::atomic kv[KthreadNum]; 130 | std::mt19937 rng; 131 | std::atomic done; 132 | }; 133 | struct ThreadState { 134 | int id; 135 | TestState* state; 136 | }; 137 | void TestThread(void* arg) { 138 | ThreadState* state = reinterpret_cast(arg); 139 | int id = state->id; 140 | TestState* ts = state->state; 141 | WriteOption write_option; 142 | ReadOption read_option; 143 | Status s; 144 | char val[2000]; 145 | for (int i = 0; i < 100000; i++) { 146 | int key = ts->rng() % 10; 147 | ts->kv[id].store(i, std::memory_order_release); 148 | if ((ts->rng() % 2) == 0) { 149 | std::snprintf(val, sizeof(val), "%d.%d.%-100d", key, id, i); 150 | s = ts->db->Put(write_option, std::to_string(key), val); 151 | // std::cout<<"put key = "<kv[v_id].load(std::memory_order_acquire)); 164 | } 165 | } 166 | } 167 | ts->done.fetch_add(1, std::memory_order_release); 168 | } 169 | void TestThread1(void* arg) { 170 | // std::cout<<"testthread "<< std::this_thread::get_id() <<"start" 171 | // <(arg); 173 | TestState* ts = state->state; 174 | WriteOption write_option; 175 | Status s; 176 | for (int i = 0; i < 100000; i++) { 177 | s = ts->db->Put(write_option, "1", "0"); 178 | } 179 | ts->done.fetch_add(1, std::memory_order_release); 180 | } 181 | TEST(DBTest, ConcurrencyTest) { 182 | Option option; 183 | WriteOption write_option; 184 | DB* db; 185 | DestoryDB(option, "/home/lei/MyLSMKV/folder_for_test/db_test"); 186 | DB::Open(option, "/home/lei/MyLSMKV/folder_for_test/db_test", &db); 187 | TestState test_state(db); 188 | ThreadState thread[KthreadNum]; 189 | // std::cout<<"ConcurrencyTest start"< sizeof(uint32_t)); 10 | return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); 11 | } 12 | 13 | BlockReader::BlockReader(const BlockContents& contents) 14 | : data_(contents.data.data()), 15 | size_(contents.data.size()), 16 | owned_(contents.heap_allocated_) { 17 | if (size_ < sizeof(uint32_t)) { 18 | size_ = 0; 19 | } else { 20 | size_t restarts_max = (size_ - sizeof(uint32_t)) / sizeof(uint32_t); 21 | if (restarts_max < NumRestarts()) { 22 | size_ = 0; 23 | } else { 24 | restarts_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); 25 | } 26 | } 27 | } 28 | BlockReader::~BlockReader() { 29 | if (owned_) { 30 | delete[] data_; 31 | } 32 | } 33 | class BlockReader::Iter : public Iterator { 34 | public: 35 | Iter(const Comparator* cmp, const char* data, uint32_t restart_offset, 36 | uint32_t num_restarts) 37 | : cmp_(cmp), 38 | data_(data), 39 | num_restarts_(num_restarts), 40 | restart_offset_(restart_offset), 41 | offset_(restart_offset), 42 | restart_index_(num_restarts) { 43 | assert(num_restarts_ > 0); 44 | } 45 | bool Valid() const override { return offset_ < restart_offset_; } 46 | 47 | std::string_view Key() const override { 48 | assert(Valid()); 49 | return key_; 50 | } 51 | 52 | std::string_view Value() const override { 53 | assert(Valid()); 54 | return value_; 55 | } 56 | 57 | void Next() override { 58 | assert(Valid()); 59 | SeekNextKey(); 60 | } 61 | 62 | void Prev() override { 63 | assert(Valid()); 64 | uint32_t old_offset = offset_; 65 | while (GetRestartOffset(restart_index_) >= old_offset) { 66 | // the entry is first entry, set Vaild() to false 67 | if (restart_index_ == 0) { 68 | offset_ = restart_offset_; 69 | restart_index_ = num_restarts_; 70 | return; 71 | } 72 | --restart_index_; 73 | } 74 | SeekToRestart(restart_index_); 75 | while (SeekNextKey() && NextEntryOffset() < old_offset) { 76 | } 77 | } 78 | 79 | void Seek(std::string_view key) override { 80 | uint32_t lo = 0; 81 | uint32_t hi = num_restarts_ - 1; 82 | 83 | int current_compare = 0; 84 | // if iter is valid, use current key to speed up. 85 | if (Valid()) { 86 | current_compare = cmp_->Compare(key_, key); 87 | if (current_compare < 0) { 88 | lo = restart_index_; 89 | } else if (current_compare > 0) { 90 | hi = restart_index_; 91 | } else { 92 | return; 93 | } 94 | } 95 | // binart search target: 96 | // find a max "lo" which key at "lo" is less than "key". 97 | // smaller "lo" is ok but bigger "lo" will cause uncorrect. 98 | while (lo < hi) { 99 | // in case of "lo == hi - 1", mid will be hi. 100 | // avoid dead loop when "lo == hi - 1 && cmp < 0". 101 | int mid = (lo + hi + 1) / 2; 102 | uint32_t restart_offset = GetRestartOffset(mid); 103 | uint32_t shared, non_shared, value_len; 104 | const char* p = 105 | ParsedEntry(data_ + restart_offset, data_ + restart_offset_, &shared, 106 | &non_shared, &value_len); 107 | if (p == nullptr || shared != 0) { 108 | Error(); 109 | return; 110 | } 111 | std::string_view mid_key(p, non_shared); 112 | if (cmp_->Compare(mid_key, key) < 0) { 113 | // key at "lo" is less than "key" 114 | // set lo to mid is safe. 115 | lo = mid; 116 | } else { 117 | // key at "hi" is >= than "key" 118 | // it is ok to set a small hi (even to -1). 119 | hi = mid - 1; 120 | } 121 | } 122 | // if iter is valid and current key is less than "key" 123 | // skip the seek. 124 | if (!(lo == restart_index_ && current_compare < 0)) { 125 | SeekToRestart(lo); 126 | } 127 | // linear search the first key >= "key" 128 | while (true) { 129 | if (!SeekNextKey()) { 130 | return; 131 | } 132 | if (cmp_->Compare(key_, key) >= 0) { 133 | return; 134 | } 135 | } 136 | } 137 | 138 | void SeekToFirst() override { 139 | SeekToRestart(0); 140 | SeekNextKey(); 141 | } 142 | 143 | void SeekToLast() override { 144 | SeekToRestart(num_restarts_ - 1); 145 | while (SeekNextKey() && NextEntryOffset() < restart_offset_) { 146 | } 147 | } 148 | Status status() override { return status_; } 149 | 150 | private: 151 | uint32_t NextEntryOffset() const { 152 | return value_.data() + value_.size() - data_; 153 | } 154 | void Error() { 155 | offset_ = restart_offset_; 156 | restart_index_ = num_restarts_; 157 | status_ = Status::Corruption("bad entry in block"); 158 | key_.clear(); 159 | value_ = ""; 160 | } 161 | const char* ParsedEntry(const char* p, const char* limit, uint32_t* shared, 162 | uint32_t* non_shared, uint32_t* value_len) { 163 | if ((p = DecodeVarint32(p, limit, shared)) == nullptr) return nullptr; 164 | if ((p = DecodeVarint32(p, limit, non_shared)) == nullptr) return nullptr; 165 | if ((p = DecodeVarint32(p, limit, value_len)) == nullptr) return nullptr; 166 | 167 | if (static_cast(limit - p) < (*non_shared + *value_len)) { 168 | return nullptr; 169 | } 170 | return p; 171 | } 172 | uint32_t GetRestartOffset(uint32_t restart_index) const { 173 | assert(restart_index < num_restarts_); 174 | return DecodeFixed32(data_ + restart_offset_ + 175 | restart_index * sizeof(uint32_t)); 176 | } 177 | void SeekToRestart(uint32_t restart_index) { 178 | key_.clear(); 179 | restart_index_ = restart_index; 180 | uint32_t offset = GetRestartOffset(restart_index); 181 | value_ = std::string_view(data_ + offset, 0); 182 | } 183 | bool SeekNextKey() { 184 | offset_ = NextEntryOffset(); 185 | const char* p = data_ + offset_; 186 | const char* limit = data_ + restart_offset_; 187 | if (p >= limit) { 188 | // the entry is last entry, set Vaild() to false 189 | offset_ = restart_offset_; 190 | restart_index_ = num_restarts_; 191 | return false; 192 | } 193 | uint32_t shared, non_shared, value_len; 194 | p = ParsedEntry(p, limit, &shared, &non_shared, &value_len); 195 | if (p == nullptr || key_.size() < shared) { 196 | Error(); 197 | return false; 198 | } 199 | key_.resize(shared); 200 | key_.append(p, non_shared); 201 | value_ = std::string_view(p + non_shared, value_len); 202 | while (restart_index_ + 1 < num_restarts_ && 203 | GetRestartOffset(restart_index_ + 1) <= offset_) { 204 | ++restart_index_; 205 | } 206 | return true; 207 | } 208 | const Comparator* cmp_; 209 | const char* data_; 210 | const uint32_t num_restarts_; 211 | const uint32_t restart_offset_; 212 | uint32_t offset_; 213 | uint32_t restart_index_; 214 | std::string key_; 215 | std::string_view value_; 216 | Status status_; 217 | }; 218 | 219 | Iterator* BlockReader::NewIterator(const Comparator* cmp) { 220 | if (size_ < sizeof(uint32_t)) { 221 | return NewErrorIterator(Status::Corruption("bad block record")); 222 | } 223 | int num_restarts = NumRestarts(); 224 | if (num_restarts == 0) { 225 | return NewEmptyIterator(); 226 | } 227 | return new Iter(cmp, data_, restarts_offset_, num_restarts); 228 | } 229 | 230 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/memtable/skiplist.h: -------------------------------------------------------------------------------- 1 | #ifndef SKIPLIST_H 2 | #define SKIPLIST_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "db/memtable/arena.h" 10 | namespace lsmkv { 11 | 12 | template 13 | class SkipList { 14 | private: 15 | struct Node; 16 | 17 | public: 18 | explicit SkipList(Comparator cmp_, Arena* arena); 19 | 20 | SkipList(const SkipList&) = delete; 21 | SkipList& operator=(const SkipList&) = delete; 22 | 23 | /** 24 | * @brief 插入一个跳表节点 25 | * @param[in] key 插入节点的key 26 | */ 27 | void Insert(const Key& key); 28 | 29 | /** 30 | * @brief 判断一个key在不在跳表中 31 | * @param[in] key 节点的key 32 | * @return 33 | @retval true 跳表包含该key 34 | @retval false 跳表不包含该key 35 | */ 36 | bool Contain(const Key& key); 37 | 38 | class Iterator { 39 | public: 40 | explicit Iterator(const SkipList* list) : list_(list), node_(nullptr) {} 41 | 42 | bool Valid() const { return node_ != nullptr; } 43 | 44 | const Key& key() const { 45 | assert(Valid()); 46 | return node_->key_; 47 | } 48 | 49 | void Next() { 50 | assert(Valid()); 51 | node_ = node_->next_[0]; 52 | } 53 | 54 | void Prev(); 55 | 56 | void Seek(const Key& key); 57 | 58 | void SeekToFirst(); 59 | 60 | void SeekToLast(); 61 | 62 | private: 63 | const SkipList* list_; 64 | Node* node_; 65 | }; 66 | 67 | private: 68 | /// 跳表中节点的最大高度 69 | static constexpr int KMaxHeight = 12; 70 | 71 | /** 72 | * @brief 生成一个新的跳表节点 73 | * @param[in] key 节点的key 74 | * @param[in] height 节点的高度 75 | * @return Node* 指向新节点的指针 76 | */ 77 | Node* NewNode(const Key& key, int height); 78 | 79 | /** 80 | * @brief 通过随机概率算法得到新跳表节点的高度 81 | */ 82 | int RandomHeight(); 83 | 84 | /** 85 | * @brief 获取跳表当前的最大高度 86 | */ 87 | int GetMaxHeight() const { 88 | return max_height_.load(std::memory_order_relaxed); 89 | } 90 | 91 | /** 92 | * @brief 找到比key大或相等的第一个节点 93 | */ 94 | Node* FindGreaterOrEqual(const Key& key, Node** prev) const; 95 | 96 | /** 97 | * @brief 找到比key小或相等,但最接近key的节点 98 | */ 99 | Node* FindLess(const Key& key) const; 100 | 101 | /** 102 | * @brief 返回跳表最后一个节点,即右下角的节点 103 | */ 104 | Node* FindLast() const; 105 | 106 | private: 107 | /// 节点之间的key比较器 108 | Comparator cmp_; 109 | /// 为跳表节点申请内存的内存分配器 110 | Arena* arena_; 111 | /// 跳表的头节点 112 | Node* head_; 113 | /// 跳表当前的最大高度 114 | std::atomic max_height_; 115 | /// 用于随机概率算法 116 | std::mt19937 rng_; 117 | }; 118 | 119 | template 120 | struct SkipList::Node { 121 | explicit Node(const Key& key) : key_(key) {} 122 | Node* Next(int level) { 123 | assert(level >= 0); 124 | return next_[level].load(std::memory_order_acquire); 125 | } 126 | void SetNext(int level, Node* x) { 127 | assert(level >= 0); 128 | next_[level].store(x, std::memory_order_release); 129 | } 130 | Node* RelaxedNext(int level) { 131 | assert(level >= 0); 132 | return next_[level].load(std::memory_order_relaxed); 133 | } 134 | void RelaxedSetNext(int level, Node* x) { 135 | assert(level >= 0); 136 | next_[level].store(x, std::memory_order_relaxed); 137 | } 138 | Key const key_; 139 | std::atomic next_[1]; 140 | }; 141 | 142 | template 143 | SkipList::SkipList(Comparator cmp, Arena* arena) 144 | : cmp_(cmp), 145 | arena_(arena), 146 | head_(NewNode(0, KMaxHeight)), 147 | max_height_(1), 148 | rng_(std::random_device{}()) { 149 | for (int i = 0; i < KMaxHeight; i++) head_->SetNext(i, nullptr); 150 | } 151 | 152 | template 153 | typename SkipList::Node* SkipList::NewNode( 154 | const Key& key, int height) { 155 | char* const node_memory = arena_->AllocateAlign( 156 | sizeof(Node) + sizeof(std::atomic) * (height - 1)); 157 | return new (node_memory) Node(key); 158 | } 159 | 160 | template 161 | int SkipList::RandomHeight() { 162 | int height = 1; 163 | static constexpr int KProbability = 4; 164 | while (height < KMaxHeight) { 165 | if ((rng_() % KProbability) != 0) break; 166 | height++; 167 | } 168 | return height; 169 | } 170 | template 171 | typename SkipList::Node* 172 | SkipList::FindGreaterOrEqual(const Key& key, 173 | Node** prev) const { 174 | Node* ret = head_; 175 | int height = GetMaxHeight() - 1; 176 | while (true) { 177 | Node* next = ret->next_[height]; 178 | if (next != nullptr && cmp_(key, next->key_) > 0) { 179 | ret = next; 180 | } else { 181 | if (prev != nullptr) prev[height] = ret; 182 | if (height == 0) return next; 183 | height--; 184 | } 185 | } 186 | } 187 | 188 | template 189 | typename SkipList::Node* SkipList::FindLess( 190 | const Key& key) const { 191 | Node* ret = head_; 192 | int height = GetMaxHeight() - 1; 193 | while (true) { 194 | Node* next = ret->next_[height]; 195 | if (next != nullptr && cmp_(key, next->key_) > 0) { 196 | ret = next; 197 | } else { 198 | if (height == 0) return ret; 199 | height--; 200 | } 201 | } 202 | } 203 | 204 | template 205 | typename SkipList::Node* SkipList::FindLast() 206 | const { 207 | Node* node = head_; 208 | int height = GetMaxHeight() - 1; 209 | while (true) { 210 | Node* next = node->Next(height); 211 | if (next != nullptr) { 212 | node = next; 213 | } else { 214 | if (height == 0) return node; 215 | height--; 216 | } 217 | } 218 | } 219 | 220 | template 221 | void SkipList::Insert(const Key& key) { 222 | Node* prev[KMaxHeight]; 223 | Node* next = FindGreaterOrEqual(key, prev); 224 | 225 | int height = RandomHeight(); 226 | Node* node = NewNode(key, height); 227 | 228 | if (height > GetMaxHeight()) { 229 | for (int i = GetMaxHeight(); i < height; i++) { 230 | prev[i] = head_; 231 | } 232 | max_height_.store(height, std::memory_order_relaxed); 233 | } 234 | // (1) RelaxedSetNext is ok, because the node hasn't been add to SkipList. 235 | // Other users won't see the change. 236 | // (2) RelaxedNext is ok, "prev[i]->SetNext(i, node)" 237 | // will make other users' "prev[i]->RelaxedNext(i)" see the change; 238 | for (int i = 0; i < height; i++) { 239 | node->RelaxedSetNext(i, prev[i]->RelaxedNext(i)); 240 | prev[i]->SetNext(i, node); 241 | } 242 | } 243 | 244 | template 245 | bool SkipList::Contain(const Key& key) { 246 | Node* node = FindGreaterOrEqual(key, nullptr); 247 | return (node != nullptr) && cmp_(key, node->key_) == 0; 248 | } 249 | 250 | template 251 | void SkipList::Iterator::Prev() { 252 | assert(Valid()); 253 | node_ = list_->FindLess(node_->key_); 254 | if (node_ == list_->head_) { 255 | node_ = nullptr; 256 | } 257 | } 258 | 259 | template 260 | void SkipList::Iterator::SeekToFirst() { 261 | node_ = list_->head_->Next(0); 262 | } 263 | 264 | template 265 | void SkipList::Iterator::Seek(const Key& key) { 266 | node_ = list_->FindGreaterOrEqual(key, nullptr); 267 | } 268 | 269 | template 270 | void SkipList::Iterator::SeekToLast() { 271 | node_ = list_->FindLast(); 272 | if (node_ == list_->head_) { 273 | node_ = nullptr; 274 | } 275 | } 276 | 277 | } // namespace lsmkv 278 | 279 | #endif // SKIPLIST_H -------------------------------------------------------------------------------- /lsmkv/util/file.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_UTIL_FILE_H_ 2 | #define STORAGE_XDB_UTIL_FILE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "include/status.h" 14 | 15 | namespace lsmkv { 16 | 17 | constexpr const size_t KWritableFileBufferSize = 1 << 16; 18 | 19 | static Status SystemError(const std::string& msg, int error_num) { 20 | if (error_num == ENOENT) { 21 | return Status::NotFound(msg, std::strerror(error_num)); 22 | } else { 23 | return Status::IOError(msg, std::strerror(error_num)); 24 | } 25 | } 26 | 27 | class Limiter { 28 | public: 29 | Limiter(int max_acquire) : acquire_remains_(max_acquire) { 30 | assert(max_acquire >= 0); 31 | } 32 | 33 | Limiter(const Limiter&) = delete; 34 | Limiter& operator=(const Limiter&) = delete; 35 | 36 | bool Acquire() { 37 | int old_acquire_remains = 38 | acquire_remains_.fetch_sub(1, std::memory_order_relaxed); 39 | if (old_acquire_remains > 0) return true; 40 | acquire_remains_.fetch_add(1, std::memory_order_relaxed); 41 | return false; 42 | } 43 | 44 | void Release() { acquire_remains_.fetch_add(1, std::memory_order_relaxed); } 45 | 46 | private: 47 | std::atomic acquire_remains_; 48 | }; 49 | 50 | class SequentialFile { 51 | public: 52 | SequentialFile(std::string filename, int fd) 53 | : fd_(fd), filename_(std::move(filename)) {} 54 | ~SequentialFile() { ::close(fd_); } 55 | 56 | Status Read(size_t n, std::string_view* result, char* buffer) { 57 | while (true) { 58 | ::ssize_t read_n = ::read(fd_, buffer, n); 59 | if (read_n < 0) { 60 | if (errno == EINTR) { 61 | continue; 62 | } 63 | return SystemError(filename_, errno); 64 | } 65 | *result = std::string_view(buffer, read_n); 66 | break; 67 | } 68 | return Status::OK(); 69 | } 70 | 71 | Status Skip(uint64_t n) { 72 | if (::lseek(fd_, n, SEEK_CUR) == static_cast(-1)) { 73 | return SystemError(filename_, errno); 74 | } 75 | return Status::OK(); 76 | } 77 | 78 | private: 79 | const int fd_; 80 | const std::string filename_; 81 | }; 82 | 83 | class RandomReadFile { 84 | public: 85 | RandomReadFile() = default; 86 | RandomReadFile(const RandomReadFile&) = delete; 87 | RandomReadFile& operator=(const RandomReadFile&) = delete; 88 | 89 | virtual ~RandomReadFile() = default; 90 | virtual Status Read(uint64_t offset, uint64_t n, std::string_view* result, 91 | char* buffer) const = 0; 92 | }; 93 | 94 | class MmapRandomReadFile : public RandomReadFile { 95 | public: 96 | MmapRandomReadFile(std::string filename, char* mmap_base, size_t length, 97 | Limiter* limiter) 98 | : limiter_(limiter), 99 | filename_(std::move(filename)), 100 | length_(length), 101 | mmap_base_(mmap_base) {} 102 | ~MmapRandomReadFile() { 103 | ::munmap(static_cast(mmap_base_), length_); 104 | limiter_->Release(); 105 | } 106 | 107 | Status Read(uint64_t offset, uint64_t n, std::string_view* result, 108 | char* buffer) const override { 109 | if (offset + n > length_) { 110 | *result = std::string_view(); 111 | return SystemError(filename_, EINVAL); 112 | } 113 | *result = std::string_view(mmap_base_ + offset, n); 114 | return Status::OK(); 115 | } 116 | 117 | private: 118 | Limiter* const limiter_; 119 | const std::string filename_; 120 | const size_t length_; 121 | char* const mmap_base_; 122 | }; 123 | 124 | class PreadRamdomReadFile : public RandomReadFile { 125 | public: 126 | PreadRamdomReadFile(std::string filename, Limiter* limiter, int fd) 127 | : limiter_(limiter), 128 | filename_(std::move(filename)), 129 | has_fd_(limiter_->Acquire()), 130 | fd_(has_fd_ ? fd : -1) {} 131 | ~PreadRamdomReadFile() { 132 | if (has_fd_) { 133 | assert(fd_ != -1); 134 | ::close(fd_); 135 | limiter_->Release(); 136 | } 137 | } 138 | 139 | Status Read(uint64_t offset, uint64_t n, std::string_view* result, 140 | char* buffer) const override { 141 | int fd = fd_; 142 | Status status; 143 | if (!has_fd_) { 144 | fd = ::open(filename_.data(), O_RDONLY); 145 | if (fd < 0) { 146 | return SystemError(filename_, errno); 147 | } 148 | } 149 | ssize_t read_size = ::pread(fd, buffer, n, static_cast(offset)); 150 | if (read_size < 0) { 151 | status = SystemError(filename_, errno); 152 | } 153 | if (!has_fd_) { 154 | ::close(fd); 155 | } 156 | *result = std::string_view(buffer, read_size); 157 | return status; 158 | } 159 | 160 | private: 161 | Limiter* const limiter_; 162 | const std::string filename_; 163 | const bool has_fd_; 164 | const int fd_; 165 | }; 166 | class WritableFile { 167 | public: 168 | WritableFile(std::string filename, int fd) 169 | : pos_(0), 170 | fd_(fd), 171 | filename_(std::move(filename)), 172 | dirname_(Dirname(filename_)) {} 173 | ~WritableFile() { 174 | if (fd_ >= 0) { 175 | ::close(fd_); 176 | } 177 | } 178 | 179 | Status Append(std::string_view data) { 180 | size_t write_size = data.size(); 181 | const char* write_data = data.data(); 182 | 183 | // Append data to Buffer if buffer is enough 184 | size_t copy_size = std::min(write_size, KWritableFileBufferSize - pos_); 185 | std::memcpy(buffer_ + pos_, write_data, copy_size); 186 | write_data += copy_size; 187 | write_size -= copy_size; 188 | pos_ += copy_size; 189 | if (write_size == 0) { 190 | return Status::OK(); 191 | } 192 | 193 | Status s = Flush(); 194 | if (!s.ok()) { 195 | return s; 196 | } 197 | return WriteUnbuffer(write_data, write_size); 198 | } 199 | 200 | Status Flush() { 201 | Status status; 202 | status = WriteUnbuffer(buffer_, pos_); 203 | pos_ = 0; 204 | return status; 205 | } 206 | 207 | Status Sync() { 208 | Status status; 209 | status = Flush(); 210 | if (!status.ok()) { 211 | return status; 212 | } 213 | return SyncFd(fd_, filename_); 214 | } 215 | 216 | Status SyncDir() { 217 | Status status; 218 | int fd; 219 | if ((fd = ::open(dirname_.data(), O_RDONLY)) < 0) { 220 | return SystemError(dirname_, errno); 221 | } else { 222 | status = SyncFd(fd, dirname_); 223 | ::close(fd); 224 | } 225 | return status; 226 | } 227 | 228 | Status Close() { 229 | Status status; 230 | status = Flush(); 231 | if (!status.ok()) { 232 | return status; 233 | } 234 | if (::close(fd_) < 0) { 235 | return SystemError(filename_, errno); 236 | } 237 | fd_ = -1; 238 | return status; 239 | } 240 | 241 | private: 242 | std::string Dirname(const std::string& filename) const { 243 | auto seperator_pos = filename.rfind('/'); 244 | if (seperator_pos == std::string::npos) { 245 | return std::string("."); 246 | } 247 | return filename.substr(0, seperator_pos); 248 | } 249 | Status SyncFd(int fd, const std::string& filename) { 250 | if (::fsync(fd) != 0) { 251 | return SystemError(filename_, errno); 252 | } 253 | return Status::OK(); 254 | } 255 | Status WriteUnbuffer(const char* data, size_t size) { 256 | while (size > 0) { 257 | ssize_t write_size = ::write(fd_, data, size); 258 | if (write_size < 0) { 259 | if (errno == EINTR) { 260 | continue; 261 | } 262 | return SystemError(filename_, errno); 263 | } 264 | size -= write_size; 265 | data += write_size; 266 | } 267 | return Status::OK(); 268 | } 269 | 270 | char buffer_[KWritableFileBufferSize]; 271 | size_t pos_; 272 | int fd_; 273 | 274 | const std::string filename_; 275 | const std::string dirname_; 276 | }; 277 | 278 | } // namespace lsmkv 279 | #endif // STORAGE_XDB_INCLUDE_FILE_H_ 280 | -------------------------------------------------------------------------------- /lsmkv/db/version/version.h: -------------------------------------------------------------------------------- 1 | #ifndef STORAGE_XDB_DB_VERSION_VERSION_H_ 2 | #define STORAGE_XDB_DB_VERSION_VERSION_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "db/format/dbformat.h" 8 | #include "db/log/log_writer.h" 9 | #include "db/sstable/table_cache.h" 10 | #include "db/version/version_edit.h" 11 | #include "include/comparator.h" 12 | #include "include/iterator.h" 13 | #include "include/option.h" 14 | #include "util/mutex.h" 15 | 16 | namespace lsmkv { 17 | 18 | class VersionSet; 19 | class Compaction; 20 | 21 | // find first file that "largest >= internal_key", 22 | // the files must be sorted by largest and have no overlap(level > 0) 23 | size_t FindFile(const std::vector& files, std::string_view user_key, 24 | const Comparator* ucmp); 25 | 26 | class Version { 27 | public: 28 | // 存放seek file相关信息: seek_file_level、seek_file 29 | struct GetStats { 30 | int seek_file_level; 31 | FileMeta* seek_file; 32 | }; 33 | void Ref(); 34 | void Unref(); 35 | 36 | /** 37 | * @brief 内存中没有找到,从Version(SST 文件)中找 38 | * @details 由DBImpl::Get调用 39 | * @param[in] option 读操作选项 40 | * @param[in] key 查找的key 41 | * @param[out] result 存放Get结果 42 | * @param[out] stats 43 | * @return Status 表示执行状态 44 | */ 45 | Status Get(const ReadOption& option, const LookupKey& key, 46 | std::string* result, GetStats* stats); 47 | 48 | /** 49 | * @brief seek SST文件一次的相应处理 50 | * @details DBImpl::Get中被调用 51 | * @param[in] stats 52 | * @return 53 | * @retval true 触发major compaction 54 | * @retval false 未触发major compaction 55 | */ 56 | bool UpdateStats(const GetStats& stats); 57 | 58 | /** 59 | * @brief Get the Overlapping FileMeta 60 | * @details 被VersionSet::PickCompaction调用 61 | * @param[in] level 62 | * @param[in] smallest 63 | * @param[in] largest 64 | * @param[out] input 结果存放 65 | */ 66 | void GetOverlappingFiles(int level, const InternalKey& smallest, 67 | const InternalKey& largest, 68 | std::vector* input); 69 | 70 | private: 71 | friend class VersionSet; 72 | friend class Compaction; 73 | class LevelFileIterator; 74 | explicit Version(VersionSet* vset) 75 | : vset_(vset), 76 | next_(this), 77 | prev_(this), 78 | refs_(0), 79 | file_to_compact_level_(-1), 80 | file_to_compact_(nullptr), 81 | compaction_level(-1), 82 | compaction_score(-1) {} 83 | 84 | Version(const Version&) = delete; 85 | Version& operator=(const Version&) = delete; 86 | 87 | ~Version(); 88 | 89 | /** 90 | * @brief 91 | * @details 仅仅用于Version::Get 92 | * @param[in] user_key 93 | * @param[in] internal_key 94 | * @param[in] arg 95 | * @param[in] fun 96 | */ 97 | void ForEachFile(std::string_view user_key, std::string_view internal_key, 98 | void* arg, bool (*fun)(void*, int, FileMeta*)); 99 | 100 | private: 101 | VersionSet* vset_; 102 | Version* next_; 103 | Version* prev_; 104 | int refs_; 105 | 106 | std::vector files_[config::kNumLevels]; 107 | 108 | // compaction case 1: When file is seeked to many times during 109 | // "Version->Get", it should be compact. 110 | int file_to_compact_level_; 111 | FileMeta* file_to_compact_; 112 | 113 | // compaction case 2: When a level's usage is to much(file num 114 | // for level0, file total size for other level), compact this level. 115 | // the score is setted by EvalCompactionScore() 116 | int compaction_level; 117 | double compaction_score; 118 | }; 119 | 120 | class VersionSet { 121 | public: 122 | VersionSet(const std::string name, const Option* option, TableCache* cache, 123 | const InternalKeyComparator* cmp); 124 | 125 | VersionSet(const VersionSet&) = delete; 126 | VersionSet& operator=(const VersionSet&) = delete; 127 | 128 | ~VersionSet(); 129 | 130 | Version* Current() { return current_; } 131 | 132 | Status LogAndApply(VersionEdit* edit, Mutex* mu) EXCLUSIVE_LOCKS_REQUIRED(mu); 133 | 134 | Status Recover(); 135 | 136 | uint64_t NextFileNumber() { return next_file_number_++; } 137 | 138 | uint64_t LastSequence() const { return last_sequence_; } 139 | 140 | uint64_t LogNumber() const { return log_number_; } 141 | 142 | uint64_t MetaFileNumber() const { return meta_file_number_; } 143 | 144 | void MarkFileNumberUsed(uint64_t number) { 145 | if (number >= next_file_number_) { 146 | next_file_number_ = number + 1; 147 | } 148 | } 149 | 150 | uint64_t LevelFileNum(int level) { 151 | assert(level >= 0 && level <= config::kNumLevels); 152 | return current_->files_[level].size(); 153 | } 154 | void SetLastSequence(uint64_t s) { 155 | assert(s >= last_sequence_); 156 | last_sequence_ = s; 157 | } 158 | 159 | void AddLiveFiles(std::set* live); 160 | 161 | Compaction* PickCompaction(); 162 | 163 | Iterator* MakeMergedIterator(Compaction* c); 164 | 165 | bool NeedCompaction() { 166 | Version* v = current_; 167 | return (v->compaction_score >= 1) || (v->file_to_compact_ != nullptr); 168 | } 169 | 170 | private: 171 | class Builder; 172 | 173 | friend class Version; 174 | friend class Compaction; 175 | 176 | Status WriteSnapShot(log::Writer* writer); 177 | 178 | void AppendVersion(Version* v); 179 | 180 | void EvalCompactionScore(Version* v); 181 | 182 | void GetRange(const std::vector& input, InternalKey* smallest, 183 | InternalKey* largest); 184 | 185 | void GetTwoRange(const std::vector& input1, 186 | const std::vector& input2, InternalKey* smallest, 187 | InternalKey* largest); 188 | 189 | private: 190 | const std::string name_; 191 | const Option* option_; 192 | Env* env_; 193 | const InternalKeyComparator icmp_; 194 | 195 | TableCache* table_cache_; 196 | Version dummy_head_; 197 | Version* current_; 198 | 199 | uint64_t log_number_; 200 | SequenceNum last_sequence_; 201 | uint64_t next_file_number_; 202 | uint64_t meta_file_number_; 203 | 204 | WritableFile* meta_log_file_; 205 | log::Writer* meta_log_writer_; 206 | 207 | std::string compactor_pointer_[config::kNumLevels]; 208 | }; 209 | 210 | class Compaction { 211 | public: 212 | Compaction(const Option* option, int level); 213 | 214 | ~Compaction() { 215 | if (input_version_ != nullptr) { 216 | input_version_->Unref(); 217 | input_version_ = nullptr; 218 | } 219 | } 220 | bool SingalMove() const; 221 | 222 | int level() { return level_; } 223 | 224 | FileMeta* input(int which, int i) { return input_[which][i]; } 225 | 226 | VersionEdit* edit() { return &edit_; } 227 | 228 | size_t InputFilesNum(int which) { return input_[which].size(); } 229 | 230 | bool StopBefore(std::string_view key); 231 | 232 | bool IsBaseLevelForKey(std::string_view key); 233 | 234 | uint64_t MaxOutputFileBytes() { return max_output_file_bytes_; } 235 | 236 | void ReleaseInput() { 237 | if (input_version_ != nullptr) { 238 | input_version_->Unref(); 239 | input_version_ = nullptr; 240 | } 241 | } 242 | 243 | void AddInputDeletions(VersionEdit* edit); 244 | 245 | std::string InputToString(int which) { 246 | std::string ret{"{"}; 247 | for (int i = 0; i < input_[which].size(); i++) { 248 | ret += std::to_string(input_[which][i]->number); 249 | if (i != input_[which].size() - 1) { 250 | ret += ", "; 251 | } 252 | } 253 | ret.push_back('}'); 254 | return ret; 255 | } 256 | 257 | private: 258 | friend class Version; 259 | friend class VersionSet; 260 | 261 | private: 262 | int level_; 263 | uint64_t max_output_file_bytes_; 264 | std::vector input_[2]; 265 | std::vector grandparents_; // level_ + 1 266 | Version* input_version_; 267 | VersionEdit edit_; 268 | 269 | uint64_t grandparents_overlap_; 270 | size_t grandparents_index_; 271 | bool seen_key_; 272 | }; 273 | 274 | } // namespace lsmkv 275 | 276 | #endif // STORAGE_XDB_DB_VERSION_VERSION_H_ -------------------------------------------------------------------------------- /lsmkv/util/cache.cc: -------------------------------------------------------------------------------- 1 | #include "include/cache.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "util/MurmurHash3.h" 8 | #include "util/mutex.h" 9 | #include "util/thread_annotations.h" 10 | namespace lsmkv { 11 | 12 | struct LRUHandle { 13 | void* value; 14 | void (*deleter)(std::string_view, void* value); 15 | LRUHandle* next_hash; 16 | LRUHandle* next; 17 | LRUHandle* prev; 18 | size_t charge; 19 | size_t key_length; 20 | uint32_t refs; 21 | uint32_t hash; 22 | bool in_cache; 23 | char key_data[1]; 24 | 25 | std::string_view key() const { 26 | assert(next != this); 27 | return std::string_view(key_data, key_length); 28 | } 29 | }; 30 | 31 | class HandleTable { 32 | public: 33 | HandleTable() : element_num_(0), capacity_(0), list_(nullptr) { Resize(); } 34 | ~HandleTable() { delete[] list_; } 35 | 36 | LRUHandle* Lookup(std::string_view key, uint32_t hash) { 37 | return *Find(key, hash); 38 | } 39 | 40 | LRUHandle* Insert(LRUHandle* h) { 41 | LRUHandle** ptr = Find(h->key(), h->hash); 42 | LRUHandle* old = *ptr; 43 | h->next_hash = (old == nullptr ? nullptr : old->next_hash); 44 | *ptr = h; 45 | if (old == nullptr) { 46 | ++element_num_; 47 | if (element_num_ > capacity_) { 48 | Resize(); 49 | } 50 | } 51 | return old; 52 | } 53 | 54 | LRUHandle* Remove(std::string_view key, uint32_t hash) { 55 | LRUHandle** ptr = Find(key, hash); 56 | LRUHandle* result = *ptr; 57 | if (result != nullptr) { 58 | *ptr = result->next_hash; 59 | --element_num_; 60 | } 61 | return result; 62 | } 63 | 64 | void Resize() { 65 | uint32_t new_capacity = 4; 66 | while (new_capacity < element_num_) { 67 | new_capacity *= 2; 68 | } 69 | LRUHandle** new_list = new LRUHandle*[new_capacity]; 70 | std::memset(new_list, 0, sizeof(new_list[0]) * new_capacity); 71 | 72 | uint32_t count = 0; 73 | for (uint32_t i = 0; i < capacity_; i++) { 74 | LRUHandle* h = list_[i]; 75 | while (h != nullptr) { 76 | LRUHandle* next = h->next_hash; 77 | uint32_t hash = h->hash; 78 | LRUHandle** ptr = &new_list[hash & (new_capacity - 1)]; 79 | h->next_hash = *ptr; 80 | *ptr = h; 81 | h = next; 82 | count++; 83 | } 84 | } 85 | assert(element_num_ == count); 86 | delete[] list_; 87 | list_ = new_list; 88 | capacity_ = new_capacity; 89 | } 90 | 91 | private: 92 | LRUHandle** Find(std::string_view key, uint32_t hash) { 93 | LRUHandle** ptr = &list_[hash & (capacity_ - 1)]; 94 | while (*ptr != nullptr && ((*ptr)->hash != hash || (*ptr)->key() != key)) { 95 | ptr = &(*ptr)->next_hash; 96 | } 97 | return ptr; 98 | } 99 | LRUHandle** list_; 100 | uint32_t element_num_; 101 | uint32_t capacity_; 102 | }; 103 | 104 | class LRUCache { 105 | public: 106 | LRUCache() : capacity_(0), usage_(0) { 107 | lru_.prev = &lru_; 108 | in_use_.prev = &in_use_; 109 | lru_.next = &lru_; 110 | in_use_.next = &in_use_; 111 | } 112 | ~LRUCache(); 113 | 114 | void SetCapacity(size_t capacity) { capacity_ = capacity; } 115 | 116 | Cache::Handle* Lookup(std::string_view key, uint32_t hash); 117 | Cache::Handle* Insert(std::string_view key, uint32_t hash, void* value, 118 | size_t charge, 119 | void (*deleter)(std::string_view key, void* value)); 120 | void Release(Cache::Handle* handle); 121 | void Erase(std::string_view key, uint32_t hash); 122 | 123 | private: 124 | void Ref(LRUHandle* e); 125 | void Unref(LRUHandle* e); 126 | void LRU_Append(LRUHandle* list, LRUHandle* e); 127 | void LRU_Remove(LRUHandle* e); 128 | void FinishErase(LRUHandle* e); 129 | 130 | Mutex mu_; 131 | size_t capacity_; 132 | size_t usage_ GUARDED_BY(mu_); 133 | LRUHandle lru_ GUARDED_BY(mu_); 134 | LRUHandle in_use_ GUARDED_BY(mu_); 135 | HandleTable table_ GUARDED_BY(mu_); 136 | }; 137 | 138 | LRUCache::~LRUCache() { 139 | LRUHandle* e = lru_.next; 140 | while (e != &lru_) { 141 | LRUHandle* next = e->next; 142 | assert(e->in_cache); 143 | e->in_cache = false; 144 | assert(e->refs == 1); 145 | Unref(e); 146 | e = next; 147 | } 148 | } 149 | Cache::Handle* LRUCache::Lookup(std::string_view key, uint32_t hash) { 150 | MutexLock lock(&mu_); 151 | LRUHandle* e = table_.Lookup(key, hash); 152 | if (e != nullptr) { 153 | Ref(e); 154 | } 155 | return reinterpret_cast(e); 156 | } 157 | 158 | Cache::Handle* LRUCache::Insert(std::string_view key, uint32_t hash, 159 | void* value, size_t charge, 160 | void (*deleter)(std::string_view key, 161 | void* value)) { 162 | MutexLock lock(&mu_); 163 | 164 | LRUHandle* handle = 165 | reinterpret_cast(malloc(sizeof(LRUHandle) - 1 + key.size())); 166 | handle->value = value; 167 | handle->deleter = deleter; 168 | handle->hash = hash; 169 | handle->charge = charge; 170 | handle->key_length = key.size(); 171 | handle->refs = 1; 172 | handle->in_cache = false; 173 | memcpy(handle->key_data, key.data(), key.size()); 174 | 175 | if (capacity_ > 0) { 176 | ++handle->refs; 177 | handle->in_cache = true; 178 | LRU_Append(&in_use_, handle); 179 | usage_ += charge; 180 | FinishErase(table_.Insert(handle)); 181 | } else { 182 | handle->next = nullptr; 183 | } 184 | 185 | while (usage_ > capacity_ && lru_.next != &lru_) { 186 | LRUHandle* old = lru_.next; 187 | assert(old->refs == 1); 188 | FinishErase(table_.Remove(old->key(), old->hash)); 189 | } 190 | 191 | return reinterpret_cast(handle); 192 | } 193 | 194 | void LRUCache::Release(Cache::Handle* handle) { 195 | MutexLock lock(&mu_); 196 | Unref(reinterpret_cast(handle)); 197 | } 198 | 199 | void LRUCache::Erase(std::string_view key, uint32_t hash) { 200 | MutexLock lock(&mu_); 201 | FinishErase(table_.Remove(key, hash)); 202 | } 203 | 204 | void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) { 205 | e->next = list; 206 | e->prev = list->prev; 207 | e->next->prev = e; 208 | e->prev->next = e; 209 | } 210 | 211 | void LRUCache::LRU_Remove(LRUHandle* e) { 212 | e->next->prev = e->prev; 213 | e->prev->next = e->next; 214 | } 215 | 216 | void LRUCache::Ref(LRUHandle* e) { 217 | if (e->refs == 1 && e->in_cache) { 218 | LRU_Remove(e); 219 | LRU_Append(&in_use_, e); 220 | } 221 | ++e->refs; 222 | } 223 | 224 | void LRUCache::Unref(LRUHandle* e) { 225 | --e->refs; 226 | if (e->refs == 0) { 227 | assert(!e->in_cache); 228 | (*e->deleter)(e->key(), e->value); 229 | free(e); 230 | } else if (e->in_cache && e->refs == 1) { 231 | LRU_Remove(e); 232 | LRU_Append(&lru_, e); 233 | } 234 | } 235 | 236 | void LRUCache::FinishErase(LRUHandle* e) EXCLUSIVE_LOCKS_REQUIRED(mu_) { 237 | if (e != nullptr) { 238 | assert(e->in_cache); 239 | LRU_Remove(e); 240 | e->in_cache = false; 241 | usage_ -= e->charge; 242 | Unref(e); 243 | } 244 | } 245 | 246 | static constexpr int KNumShardBits = 1; 247 | static constexpr int KNumShard = 1 << KNumShardBits; 248 | 249 | class ShardLRUCache : public Cache { 250 | public: 251 | ShardLRUCache(size_t capacity) { 252 | size_t shard_capacity = (capacity + (KNumShard - 1)) / KNumShard; 253 | for (int i = 0; i < KNumShard; i++) { 254 | shard_[i].SetCapacity(shard_capacity); 255 | } 256 | } 257 | Handle* Lookup(std::string_view key) override { 258 | uint32_t hash = Hash(key); 259 | return shard_[Shard(hash)].Lookup(key, hash); 260 | } 261 | 262 | void Release(Handle* handle) override { 263 | LRUHandle* e = reinterpret_cast(handle); 264 | return shard_[Shard(e->hash)].Release(handle); 265 | } 266 | 267 | void Erase(std::string_view key) override { 268 | uint32_t hash = Hash(key); 269 | shard_[Shard(hash)].Erase(key, hash); 270 | } 271 | 272 | void* Value(Handle* handle) override { 273 | return reinterpret_cast(handle)->value; 274 | } 275 | 276 | Handle* Insert(std::string_view key, void* value, size_t charge, 277 | void (*deleter)(std::string_view key, void* value)) override { 278 | uint32_t hash = Hash(key); 279 | return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter); 280 | } 281 | 282 | private: 283 | uint32_t Hash(std::string_view key) { 284 | return murmur3::MurmurHash3_x86_32(key.data(), key.size(), 0); 285 | } 286 | uint32_t Shard(uint32_t hash) { return hash >> (32 - KNumShardBits); } 287 | LRUCache shard_[KNumShard]; 288 | }; 289 | 290 | Cache* NewLRUCache(size_t capacity) { return new ShardLRUCache(capacity); } 291 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/db/sstable/sstable_builder.cc: -------------------------------------------------------------------------------- 1 | #include "include/sstable_builder.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "crc32c/crc32c.h" 7 | #include "db/filter/filter_block.h" 8 | #include "db/sstable/block_builder.h" 9 | #include "db/sstable/block_format.h" 10 | #include "db/version/version_edit.h" 11 | #include "include/comparator.h" 12 | #include "include/iterator.h" 13 | #include "include/option.h" 14 | #include "snappy.h" 15 | #include "util/coding.h" 16 | #include "util/filename.h" 17 | 18 | namespace lsmkv { 19 | 20 | Status BuildSSTable(const std::string name, const Option& option, 21 | TableCache* table_cache, Iterator* iter, FileMeta* meta) { 22 | Status s; 23 | meta->file_size = 0; 24 | iter->SeekToFirst(); 25 | std::string filename = SSTableFileName(name, meta->number); 26 | 27 | if (iter->Valid()) { 28 | WritableFile* file; 29 | s = option.env->NewWritableFile(filename, &file); 30 | if (!s.ok()) { 31 | return s; 32 | } 33 | SSTableBuilder* builder = new SSTableBuilder(option, file); 34 | meta->smallest.DecodeFrom(iter->Key()); 35 | std::string_view key; 36 | for (; iter->Valid(); iter->Next()) { 37 | key = iter->Key(); 38 | builder->Add(key, iter->Value()); 39 | } 40 | meta->largest.DecodeFrom(key); 41 | 42 | s = builder->Finish(); 43 | if (s.ok()) { 44 | meta->file_size = builder->FileSize(); 45 | } 46 | delete builder; 47 | 48 | if (s.ok()) { 49 | s = file->Sync(); 50 | } 51 | if (s.ok()) { 52 | s = file->Close(); 53 | } 54 | delete file; 55 | file = nullptr; 56 | } 57 | if (!iter->status().ok()) { 58 | s = iter->status(); 59 | } 60 | if (!s.ok() || meta->file_size == 0) { 61 | option.env->RemoveFile(filename); 62 | } 63 | return s; 64 | } 65 | struct SSTableBuilder::Rep { 66 | Rep(const Option& option, WritableFile* file) 67 | : closed_(false), 68 | data_block_option_(option), 69 | index_block_option_(option), 70 | file_(file), 71 | data_block_builder_(&data_block_option_), 72 | index_block_builder_(&index_block_option_), 73 | filter_block_builder_( 74 | option.filter_policy == nullptr 75 | ? nullptr 76 | : new FilterBlockBuilder(option.filter_policy)), 77 | num_entries_(0), 78 | offset_(0), 79 | data_block_over_(false) { 80 | // index_block is used for random access 81 | // using prefix compress will slow down the effiency. 82 | index_block_option_.block_restart_interval = 1; 83 | } 84 | bool closed_; 85 | Status status_; 86 | Option data_block_option_; 87 | Option index_block_option_; 88 | WritableFile* file_; 89 | BlockBuilder data_block_builder_; 90 | BlockBuilder index_block_builder_; 91 | FilterBlockBuilder* filter_block_builder_; 92 | std::string last_key_; 93 | uint64_t num_entries_; 94 | uint64_t offset_; 95 | // Set when a data block is flushed. 96 | // Used for index block building. 97 | bool data_block_over_; 98 | BlockHandle data_block_handle_; 99 | std::string compress_output_; 100 | }; 101 | Status SSTableBuilder::status() const { return rep_->status_; } 102 | SSTableBuilder::SSTableBuilder(const Option& option, WritableFile* file) 103 | : rep_(new Rep(option, file)) { 104 | if (rep_->filter_block_builder_ != nullptr) { 105 | rep_->filter_block_builder_->StartBlock(0); 106 | } 107 | } 108 | 109 | SSTableBuilder::~SSTableBuilder() { 110 | delete rep_->filter_block_builder_; 111 | delete rep_; 112 | } 113 | 114 | void SSTableBuilder::Add(std::string_view key, std::string_view value) { 115 | assert(!rep_->closed_); 116 | if (!ok()) return; 117 | if (rep_->num_entries_ != 0) { 118 | assert(rep_->data_block_option_.comparator->Compare( 119 | key, std::string_view(rep_->last_key_)) > 0); 120 | } 121 | // When a data block is flushed over, 122 | // Add a record into the index block 123 | // key : a key greater than last data block's last key, 124 | // smaller than curr data block's first key 125 | // value : the block handle of last data block 126 | if (rep_->data_block_over_) { 127 | // find the shortest key between last_key and key 128 | // in order to decrease the key's length 129 | rep_->data_block_option_.comparator->FindShortestMiddle(&rep_->last_key_, 130 | key); 131 | std::string block_handele; 132 | rep_->data_block_handle_.EncodeTo(&block_handele); 133 | rep_->index_block_builder_.Add(rep_->last_key_, 134 | std::string_view(block_handele)); 135 | rep_->data_block_over_ = false; 136 | } 137 | if (rep_->filter_block_builder_ != nullptr) { 138 | rep_->filter_block_builder_->AddKey(key); 139 | } 140 | rep_->data_block_builder_.Add(key, value); 141 | rep_->last_key_ = key; 142 | rep_->num_entries_++; 143 | 144 | if (rep_->data_block_builder_.ByteSize() >= 145 | rep_->data_block_option_.block_size) { 146 | Flush(); 147 | } 148 | } 149 | 150 | void SSTableBuilder::Flush() { 151 | assert(!rep_->closed_); 152 | if (rep_->data_block_builder_.Empty()) return; 153 | WriteBlock(&rep_->data_block_builder_, &rep_->data_block_handle_); 154 | if (ok()) { 155 | rep_->data_block_over_ = true; 156 | rep_->status_ = rep_->file_->Flush(); 157 | } 158 | if (rep_->filter_block_builder_ != nullptr) { 159 | rep_->filter_block_builder_->StartBlock(rep_->offset_); 160 | } 161 | } 162 | 163 | // check the snappy compress is used. 164 | // generate the std::string_view contents 165 | void SSTableBuilder::WriteBlock(BlockBuilder* builder, BlockHandle* handle) { 166 | assert(ok()); 167 | std::string_view contents; 168 | std::string_view raw = builder->Finish(); 169 | CompressType type = rep_->data_block_option_.compress_type; 170 | 171 | if (type == KUnCompress) { 172 | contents = raw; 173 | } else if (type == KSnappyCompress) { 174 | std::string* compress = &rep_->compress_output_; 175 | if (snappy::Compress(raw.data(), raw.size(), compress)) { 176 | contents = *compress; 177 | } else { 178 | contents = raw; 179 | type = KUnCompress; 180 | } 181 | } 182 | WriteRawBlock(contents, type, handle); 183 | rep_->compress_output_.clear(); 184 | builder->Reset(); 185 | } 186 | 187 | void SSTableBuilder::WriteRawBlock(std::string_view contents, CompressType type, 188 | BlockHandle* handle) { 189 | handle->SetOffset(rep_->offset_); 190 | handle->SetSize(contents.size()); 191 | rep_->status_ = rep_->file_->Append(contents); 192 | if (ok()) { 193 | char tail[KBlockTailSize]; 194 | tail[0] = static_cast(type); 195 | uint32_t crc = crc32c::Crc32c(contents.data(), contents.size()); 196 | EncodeFixed32(tail + 1, CrcMask(crc)); 197 | crc = crc32c::Extend(crc, reinterpret_cast(tail), 1); 198 | rep_->status_ = rep_->file_->Append(std::string_view(tail, KBlockTailSize)); 199 | if (ok()) { 200 | rep_->offset_ += contents.size() + KBlockTailSize; 201 | } 202 | } 203 | } 204 | 205 | Status SSTableBuilder::Finish() { 206 | assert(!rep_->closed_); 207 | Flush(); 208 | rep_->closed_ = true; 209 | BlockHandle filter_block_handle, index_block_handle, filter_index_handle; 210 | if (ok() && rep_->filter_block_builder_ != nullptr) { 211 | WriteRawBlock(rep_->filter_block_builder_->Finish(), KUnCompress, 212 | &filter_block_handle); 213 | } 214 | if (ok()) { 215 | BlockBuilder filter_index_builder(&rep_->data_block_option_); 216 | if (rep_->filter_block_builder_ != nullptr) { 217 | std::string key = "filter"; 218 | key.append(rep_->data_block_option_.filter_policy->Name()); 219 | std::string val; 220 | filter_block_handle.EncodeTo(&val); 221 | filter_index_builder.Add(key, val); 222 | } 223 | WriteBlock(&filter_index_builder, &filter_index_handle); 224 | } 225 | if (ok()) { 226 | if (rep_->data_block_over_) { 227 | // find the shortest key bigger than last_key 228 | // in order to decrease the key's length 229 | rep_->data_block_option_.comparator->FindShortestBigger(&rep_->last_key_); 230 | std::string block_handele; 231 | rep_->data_block_handle_.EncodeTo(&block_handele); 232 | rep_->index_block_builder_.Add(rep_->last_key_, 233 | std::string_view(block_handele)); 234 | rep_->data_block_over_ = false; 235 | } 236 | WriteBlock(&rep_->index_block_builder_, &index_block_handle); 237 | } 238 | if (ok()) { 239 | Footer footer; 240 | footer.SetIndexHandle(index_block_handle); 241 | footer.SetFilterHandle(filter_block_handle); 242 | std::string footer_encode; 243 | footer.EncodeTo(&footer_encode); 244 | rep_->status_ = rep_->file_->Append(footer_encode); 245 | if (ok()) { 246 | rep_->offset_ += footer_encode.size(); 247 | } 248 | {} 249 | } 250 | return rep_->status_; 251 | } 252 | 253 | uint64_t SSTableBuilder::FileSize() const { return rep_->offset_; } 254 | 255 | uint64_t SSTableBuilder::NumEntries() const { return rep_->num_entries_; } 256 | 257 | } // namespace lsmkv -------------------------------------------------------------------------------- /lsmkv/util/MurmurHash3.cc: -------------------------------------------------------------------------------- 1 | // This source file was originally from: 2 | // https://github.com/PeterScott/murmur3 3 | // 4 | // We've changed it for use with VoltDB: 5 | // - We changed the top-level functions defined below to return 6 | // their hash by value, rather than accept a pointer to storage 7 | // for the result 8 | 9 | //----------------------------------------------------------------------------- 10 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 11 | // domain. The author hereby disclaims copyright to this source code. 12 | 13 | // Note - The x86 and x64 versions do _not_ produce the same results, as the 14 | // algorithms are optimized for their respective platforms. You can still 15 | // compile and run any of them on any platform, but your performance with the 16 | // non-native version will be less than optimal. 17 | 18 | #include "MurmurHash3.h" 19 | 20 | namespace murmur3 { 21 | //----------------------------------------------------------------------------- 22 | // Platform-specific functions and macros 23 | 24 | // Microsoft Visual Studio 25 | 26 | #if defined(_MSC_VER) 27 | 28 | #define FORCE_INLINE __forceinline 29 | 30 | #include 31 | 32 | #define ROTL64(x,y) _rotl64(x,y) 33 | #define ROTL32(x,y) _rotl32(x,y) 34 | 35 | #define BIG_CONSTANT(x) (x) 36 | 37 | // Other compilers 38 | 39 | #else // defined(_MSC_VER) 40 | 41 | #define FORCE_INLINE inline __attribute__((always_inline)) 42 | 43 | inline uint32_t rotl32 ( uint32_t x, int8_t r ) 44 | { 45 | return (x << r) | (x >> (32 - r)); 46 | } 47 | 48 | inline uint64_t rotl64 ( uint64_t x, int8_t r ) 49 | { 50 | return (x << r) | (x >> (64 - r)); 51 | } 52 | 53 | #define ROTL32(x,y) rotl32(x,y) 54 | #define ROTL64(x,y) rotl64(x,y) 55 | 56 | #define BIG_CONSTANT(x) (x##LLU) 57 | 58 | #endif // !defined(_MSC_VER) 59 | 60 | //----------------------------------------------------------------------------- 61 | // Block read - if your platform needs to do endian-swapping or can only 62 | // handle aligned reads, do the conversion here 63 | 64 | static FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i ) 65 | { 66 | return p[i]; 67 | } 68 | 69 | static FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) 70 | { 71 | return p[i]; 72 | } 73 | 74 | static FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) 75 | { 76 | return p[i]; 77 | } 78 | 79 | //----------------------------------------------------------------------------- 80 | // Finalization mix - force all bits of a hash block to avalanche 81 | 82 | static FORCE_INLINE uint32_t fmix32 ( uint32_t h ) 83 | { 84 | h ^= h >> 16; 85 | h *= 0x85ebca6b; 86 | h ^= h >> 13; 87 | h *= 0xc2b2ae35; 88 | h ^= h >> 16; 89 | 90 | return h; 91 | } 92 | 93 | static FORCE_INLINE uint64_t fmix ( uint64_t k ) 94 | { 95 | k ^= k >> 33; 96 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 97 | k ^= k >> 33; 98 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 99 | k ^= k >> 33; 100 | 101 | return k; 102 | } 103 | 104 | static FORCE_INLINE uint64_t fmix64 ( uint64_t k ) 105 | { 106 | k ^= k >> 33; 107 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 108 | k ^= k >> 33; 109 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 110 | k ^= k >> 33; 111 | 112 | return k; 113 | } 114 | 115 | //----------------------------------------------------------------------------- 116 | 117 | int32_t MurmurHash3_x64_128 ( const void * key, const int len, 118 | const uint32_t seed ) 119 | { 120 | const uint8_t * data = (const uint8_t*)key; 121 | const int nblocks = len / 16; 122 | 123 | uint64_t h1 = seed; 124 | uint64_t h2 = seed; 125 | 126 | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 127 | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 128 | 129 | //---------- 130 | // body 131 | 132 | const uint64_t * blocks = (const uint64_t *)(data); 133 | 134 | for(int i = 0; i < nblocks; i++) 135 | { 136 | uint64_t k1 = getblock(blocks,i*2+0); 137 | uint64_t k2 = getblock(blocks,i*2+1); 138 | 139 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 140 | 141 | h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; 142 | 143 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 144 | 145 | h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; 146 | } 147 | 148 | //---------- 149 | // tail 150 | 151 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 152 | 153 | uint64_t k1 = 0; 154 | uint64_t k2 = 0; 155 | 156 | switch(len & 15) 157 | { 158 | case 15: k2 ^= uint64_t(tail[14]) << 48; // fallthrough 159 | case 14: k2 ^= uint64_t(tail[13]) << 40; // fallthrough 160 | case 13: k2 ^= uint64_t(tail[12]) << 32; // fallthrough 161 | case 12: k2 ^= uint64_t(tail[11]) << 24; // fallthrough 162 | case 11: k2 ^= uint64_t(tail[10]) << 16; // fallthrough 163 | case 10: k2 ^= uint64_t(tail[ 9]) << 8; // fallthrough 164 | case 9: k2 ^= uint64_t(tail[ 8]) << 0; // fallthrough 165 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; // fallthrough 166 | 167 | case 8: k1 ^= uint64_t(tail[ 7]) << 56; // fallthrough 168 | case 7: k1 ^= uint64_t(tail[ 6]) << 48; // fallthrough 169 | case 6: k1 ^= uint64_t(tail[ 5]) << 40; // fallthrough 170 | case 5: k1 ^= uint64_t(tail[ 4]) << 32; // fallthrough 171 | case 4: k1 ^= uint64_t(tail[ 3]) << 24; // fallthrough 172 | case 3: k1 ^= uint64_t(tail[ 2]) << 16; // fallthrough 173 | case 2: k1 ^= uint64_t(tail[ 1]) << 8; // fallthrough 174 | case 1: k1 ^= uint64_t(tail[ 0]) << 0; // fallthrough 175 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; // fallthrough 176 | }; 177 | 178 | //---------- 179 | // finalization 180 | 181 | h1 ^= len; h2 ^= len; 182 | 183 | h1 += h2; 184 | h2 += h1; 185 | 186 | h1 = fmix(h1); 187 | h2 = fmix(h2); 188 | 189 | h1 += h2; 190 | h2 += h1; 191 | 192 | //Shift so that we use the higher order bits in case we want to use the lower order ones later 193 | //Also use the h1 higher order bits because it provided much better performance in voter, consistent too 194 | return static_cast(h1 >> 32); 195 | } 196 | 197 | uint32_t MurmurHash3_x86_32 ( const void * key, uint32_t len, 198 | uint32_t seed ) 199 | { 200 | const uint8_t * data = (const uint8_t*)key; 201 | const int nblocks = len / 4; 202 | int i; 203 | 204 | uint32_t h1 = seed; 205 | 206 | uint32_t c1 = 0xcc9e2d51; 207 | uint32_t c2 = 0x1b873593; 208 | 209 | //---------- 210 | // body 211 | 212 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); 213 | 214 | for(i = -nblocks; i; i++) 215 | { 216 | uint32_t k1 = getblock32(blocks,i); 217 | 218 | k1 *= c1; 219 | k1 = ROTL32(k1,15); 220 | k1 *= c2; 221 | 222 | h1 ^= k1; 223 | h1 = ROTL32(h1,13); 224 | h1 = h1*5+0xe6546b64; 225 | } 226 | 227 | //---------- 228 | // tail 229 | 230 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4); 231 | 232 | uint32_t k1 = 0; 233 | 234 | switch(len & 3) 235 | { 236 | case 3: k1 ^= tail[2] << 16; // fallthrough 237 | case 2: k1 ^= tail[1] << 8; // fallthrough 238 | case 1: k1 ^= tail[0]; // fallthrough 239 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 240 | }; 241 | 242 | //---------- 243 | // finalization 244 | 245 | h1 ^= len; 246 | 247 | h1 = fmix32(h1); 248 | 249 | return h1; 250 | } 251 | 252 | 253 | void MurmurHash3_x64_128 ( const void * key, const int len, 254 | const uint32_t seed, void * out ) 255 | { 256 | const uint8_t * data = (const uint8_t*)key; 257 | const int nblocks = len / 16; 258 | 259 | uint64_t h1 = seed; 260 | uint64_t h2 = seed; 261 | 262 | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 263 | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 264 | 265 | //---------- 266 | // body 267 | 268 | const uint64_t * blocks = (const uint64_t *)(data); 269 | 270 | for(int i = 0; i < nblocks; i++) 271 | { 272 | uint64_t k1 = getblock64(blocks,i*2+0); 273 | uint64_t k2 = getblock64(blocks,i*2+1); 274 | 275 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 276 | 277 | h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; 278 | 279 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 280 | 281 | h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; 282 | } 283 | 284 | //---------- 285 | // tail 286 | 287 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 288 | 289 | uint64_t k1 = 0; 290 | uint64_t k2 = 0; 291 | 292 | switch(len & 15) 293 | { 294 | case 15: k2 ^= ((uint64_t)tail[14]) << 48; // fallthrough 295 | case 14: k2 ^= ((uint64_t)tail[13]) << 40; // fallthrough 296 | case 13: k2 ^= ((uint64_t)tail[12]) << 32; // fallthrough 297 | case 12: k2 ^= ((uint64_t)tail[11]) << 24; // fallthrough 298 | case 11: k2 ^= ((uint64_t)tail[10]) << 16; // fallthrough 299 | case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // fallthrough 300 | case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; // fallthrough 301 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; // fallthrough 302 | 303 | case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // fallthrough 304 | case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // fallthrough 305 | case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; // fallthrough 306 | case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; // fallthrough 307 | case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; // fallthrough 308 | case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // fallthrough 309 | case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // fallthrough 310 | case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; // fallthrough 311 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 312 | }; 313 | 314 | //---------- 315 | // finalization 316 | 317 | h1 ^= len; h2 ^= len; 318 | 319 | h1 += h2; 320 | h2 += h1; 321 | 322 | h1 = fmix64(h1); 323 | h2 = fmix64(h2); 324 | 325 | h1 += h2; 326 | h2 += h1; 327 | 328 | ((uint64_t*)out)[0] = h1; 329 | ((uint64_t*)out)[1] = h2; 330 | } 331 | 332 | } 333 | //----------------------------------------------------------------------------- 334 | --------------------------------------------------------------------------------