├── test └── log_view.py ├── utils ├── log.cpp ├── timer_test.cpp ├── timer.cpp ├── timer.h └── log.h ├── src ├── comparator.cpp ├── debug.h ├── error.h ├── slice.cpp ├── skip_list_test.cpp ├── config.h ├── format.h ├── record.h ├── merge_heap_test.cpp ├── merge_heap.h ├── memtable_test.cpp ├── concurrent_queue.h ├── memtable.h ├── log_writer.h ├── db.cpp ├── sstable_builder.h ├── merge_heap.cpp ├── record.cpp ├── comparator.h ├── db_impl.h ├── memtable.cpp ├── sstable_test.cpp ├── file_util.h ├── log_writer.cpp ├── version.h ├── sstable.h ├── db_test.cpp ├── sstable.cpp ├── version.cpp ├── skiplist.h ├── file_util.cpp ├── sstable_builder.cpp └── db_impl.cpp ├── README.md ├── .gitignore ├── include ├── slice.h └── db.h ├── scripts └── data_generation.py └── CMakeLists.txt /test/log_view.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/log.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/11. 3 | // 4 | 5 | 6 | #include "log.h" 7 | 8 | LOG::LogLevel LOG::log_level=LOG::LogLevel::DEBUG; -------------------------------------------------------------------------------- /src/comparator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #include "comparator.h" 6 | #include 7 | namespace minidb{ 8 | 9 | 10 | } -------------------------------------------------------------------------------- /src/debug.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/20. 3 | // 4 | 5 | #ifndef MINIDB_DEBUG_H 6 | #define MINIDB_DEBUG_H 7 | 8 | 9 | 10 | #endif //MINIDB_DEBUG_H 11 | -------------------------------------------------------------------------------- /utils/timer_test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/11. 3 | // 4 | 5 | #include "timer.h" 6 | 7 | int main(){ 8 | timer::reg("a"); 9 | timer::duration("a"); 10 | timer::print(); 11 | return 0; 12 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 概述 2 | 这是一个纯学习用的nosql。是在阅读完leveldb的源码后,自己从头实现的一个leveldb。实现逻辑上时差不多的,但是细节上会有些不同。代码只有不到3k行。现在只是单线程的,多线程的版本在学习开发中。 3 | 4 | # 性能 5 | 10000000数据。 6 | write qps: 50000 7 | read qps: 10000 8 | 9 | # TODO 10 | -[ ] compact拆分单独线程 11 | -[ ] 多线程读/写 12 | -[ ] 详细测试分析及优化 13 | -------------------------------------------------------------------------------- /utils/timer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/11. 3 | // 4 | 5 | #include "timer.h" 6 | 7 | std::map>> timer::durations= std::map>>(); 8 | std::map timer::starts=std::map(); -------------------------------------------------------------------------------- /src/error.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_ERROR_H 6 | #define MINIDB_ERROR_H 7 | 8 | #include 9 | 10 | namespace minidb { 11 | template 12 | class KeyNotFound : public std::exception { 13 | const Key key_; 14 | public: 15 | KeyNotFound(const Key &key) : key_(key) {}; 16 | }; 17 | } 18 | #endif //MINIDB_ERROR_H 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /src/slice.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | 6 | #include "slice.h" 7 | 8 | namespace minidb{ 9 | Slice::Slice(const std::string &str) { 10 | data_=str; 11 | } 12 | Slice::Slice(const char *start, const char* end) { 13 | data_=std::string(start,end); 14 | } 15 | bool Slice::operator==(const minidb::Slice &ref) const { 16 | return data_==ref.data_; 17 | } 18 | 19 | Slice::Slice(int size):data_(size,0) {} 20 | } -------------------------------------------------------------------------------- /include/slice.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_SLICE_H 6 | #define MINIDB_SLICE_H 7 | 8 | #include 9 | 10 | namespace minidb{ 11 | 12 | class Slice{ 13 | std::string data_; 14 | public: 15 | Slice()= default; 16 | Slice(int size); 17 | explicit Slice(const std::string& str); 18 | Slice(const char* start,const char* end); 19 | inline int size(){return data_.size();} 20 | inline const char* data(){return data_.data();} 21 | bool operator==(const Slice& ref)const; 22 | }; 23 | } 24 | 25 | 26 | 27 | #endif //MINIDB_SLICE_H 28 | -------------------------------------------------------------------------------- /src/skip_list_test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | 6 | #include "skiplist.h" 7 | #include 8 | using namespace std; 9 | 10 | int func(const std::shared_ptr& a,const std::shared_ptr& b){ 11 | if(ab)return 1; 14 | } 15 | int main(){ 16 | minidb::SkipList> skiplist(func); 17 | for(int i=1;i<100;i+=2){ 18 | skiplist.add(make_shared(i)); 19 | } 20 | for(int i=0;i<100;i+=2){ 21 | auto ret = skiplist.seek(make_shared(i)); 22 | cout< 10 | namespace minidb { 11 | class DBImpl; 12 | class DB { 13 | std::shared_ptr impl; 14 | DB()= default; 15 | public: 16 | static DB create(const std::string& db_name); 17 | static DB open(const std::string& db_name); 18 | static DB open_or_create(const std::string& db_name); 19 | void set(std::shared_ptr key,std::shared_ptr value); 20 | std::shared_ptr get(std::shared_ptr key); 21 | void remove(std::shared_ptr key); 22 | }; 23 | } 24 | 25 | 26 | #endif //MINIDB_DB_H 27 | -------------------------------------------------------------------------------- /src/format.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_FORMAT_H 6 | #define MINIDB_FORMAT_H 7 | 8 | #include 9 | #include 10 | #include 11 | namespace minidb{ 12 | template using ptr = std::shared_ptr; 13 | template using vec = std::vector; 14 | using LogSeqNumber = uint64_t; 15 | enum class KeyType:unsigned char{ 16 | INSERT, 17 | DELETE, 18 | OFFSET, 19 | LOOKUP 20 | }; 21 | using Checksum = std::uint32_t; 22 | template 23 | inline ptr make_ptr(Args ...args){ 24 | return std::make_shared(args...); 25 | } 26 | 27 | } 28 | #endif //MINIDB_FORMAT_H 29 | -------------------------------------------------------------------------------- /src/record.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_RECORD_H 6 | #define MINIDB_RECORD_H 7 | 8 | #include "slice.h" 9 | #include "format.h" 10 | 11 | namespace minidb { 12 | class Record { 13 | ptr user_key_; 14 | LogSeqNumber lsn_; 15 | ptr value_; 16 | KeyType type_; 17 | Checksum checksum_; 18 | public: 19 | Record(ptr user_key, LogSeqNumber lsn, KeyType type, ptr value); 20 | Record(char *data, bool hash_checksum); 21 | ptr user_key(); 22 | ptr value(); 23 | LogSeqNumber lsn(); 24 | KeyType type(); 25 | Checksum checksum(); 26 | }; 27 | } 28 | #endif //MINIDB_RECORD_H 29 | -------------------------------------------------------------------------------- /src/merge_heap_test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/19. 3 | // 4 | 5 | #include "merge_heap.h" 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | using namespace minidb; 12 | 13 | 14 | int main(){ 15 | ptr sst1 = make_ptr("test_db",37); 16 | ptr sst2 = make_ptr("test_db",41); 17 | MergeHeap heap; 18 | heap.add_sst(sst1); 19 | heap.add_sst(sst2); 20 | heap.init(); 21 | for(;;){ 22 | auto ret = heap.pop(); 23 | if(strncmp(ret->user_key()->data(),"602253",6)==0){ 24 | break; 25 | } 26 | } 27 | while(!heap.empty()){ 28 | auto ret = heap.pop(); 29 | cout<user_key()->data()<,SSTable::Iterator>; 13 | vec heap_array; 14 | vec iter_end_flag; 15 | public: 16 | void add_sst(const ptr& sst); 17 | void init(); 18 | inline bool empty(); 19 | ptr pop(); 20 | }; 21 | bool MergeHeap::empty() { 22 | if(heap_array.empty()){ 23 | return true; 24 | } 25 | for(auto flag:iter_end_flag){ 26 | if(!flag){ 27 | return false; 28 | } 29 | } 30 | return true; 31 | } 32 | } 33 | 34 | #endif //MINIDB_MERGE_HEAP_H 35 | -------------------------------------------------------------------------------- /src/memtable_test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #include "memtable.h" 6 | #include "slice.h" 7 | #include "format.h" 8 | using namespace minidb; 9 | using namespace std; 10 | int main(){ 11 | MemTable memTable; 12 | memTable.set(make_ptr("123"),1,KeyType::INSERT,make_ptr("234")); 13 | memTable.set(make_ptr("abc"),1,KeyType::INSERT,make_ptr("bcd")); 14 | memTable.set(make_ptr("123"),2,KeyType::DELETE, nullptr); 15 | memTable.set(make_ptr("abc"),3,KeyType::INSERT,make_ptr("asdfg")); 16 | memTable.set(make_ptr("123"),3,KeyType::INSERT,make_ptr("345")); 17 | 18 | 19 | auto ret = memTable.get(make_ptr("123"),3); 20 | cout<data()<("abc"),3); 22 | cout<data()< 9 | #include 10 | #include 11 | namespace minidb{ 12 | template 13 | class ConcurrentQueue{ 14 | std::mutex mut; 15 | std::queue queue_; 16 | public: 17 | ConcurrentQueue():queue_(){} 18 | void push(const Key& key){ 19 | std::unique_lock lck(mut); 20 | queue_.push(key); 21 | } 22 | Key& front(){ 23 | std::unique_lock lck(mut); 24 | return queue_.front(); 25 | } 26 | void pop(){ 27 | std::unique_lock lck(mut); 28 | queue_.pop(); 29 | } 30 | bool empty(){ 31 | return queue_.empty(); 32 | } 33 | }; 34 | } 35 | #endif //MINIDB_CONCURRENT_QUEUE_H 36 | -------------------------------------------------------------------------------- /src/memtable.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_MEMTABLE_H 6 | #define MINIDB_MEMTABLE_H 7 | 8 | #include "skiplist.h" 9 | #include "slice.h" 10 | #include "record.h" 11 | #include "comparator.h" 12 | namespace minidb{ 13 | class MemTable{ 14 | SkipList> skiplist_; 15 | int size_; 16 | public: 17 | MemTable(); 18 | int size(); 19 | void set(const ptr& user_key,LogSeqNumber lsn,KeyType type,const ptr& value); 20 | ptr get(const ptr& user_key,LogSeqNumber lsn); 21 | class Iterator{ 22 | SkipList>::Iterator iter; 23 | Iterator(SkipList>::Iterator iter); 24 | friend class MemTable; 25 | public: 26 | bool hash_next(); 27 | ptr next(); 28 | }; 29 | Iterator iterator(); 30 | }; 31 | 32 | } 33 | #endif //MINIDB_MEMTABLE_H 34 | -------------------------------------------------------------------------------- /src/log_writer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #ifndef MINIDB_LOG_WRITER_H 6 | #define MINIDB_LOG_WRITER_H 7 | 8 | 9 | #include "format.h" 10 | #include "record.h" 11 | #include "config.h" 12 | #include "file_util.h" 13 | #include "memtable.h" 14 | namespace minidb{ 15 | class LogWriter{ 16 | ptr writer; 17 | int file_number_; 18 | LogSeqNumber max_lsn_; 19 | inline void buf_append(Checksum checksum); 20 | inline void buf_append(int size); 21 | inline void buf_append(const ptr& slice); 22 | inline void buf_append(LogSeqNumber lsn); 23 | inline void buf_append(KeyType type); 24 | inline void buf_append(const char* data,int size); 25 | public: 26 | LogWriter(const std::string& db_name,int file_number,bool create); 27 | void append(const ptr& record); 28 | int file_number(); 29 | LogSeqNumber max_lsn(); 30 | int flush(); 31 | int sync(); 32 | int remove(); 33 | }; 34 | } 35 | #endif //MINIDB_LOG_WRITER_H 36 | -------------------------------------------------------------------------------- /scripts/data_generation.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import random 4 | 5 | max_key_len = 500 6 | max_val_len = 1000 7 | char_list="1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 8 | 9 | 10 | key_set = set() 11 | key_count = 10000 12 | data_count = 1000000 13 | 14 | delete_pct = 10 15 | print("gen key set") 16 | for _ in range(key_count): 17 | l = random.randint(1,max_key_len) 18 | t = "".join([random.choice(char_list) for _ in range(l)]) 19 | key_set.add(t) 20 | f = open("input2.txt","w") 21 | kv_map = {} 22 | key_list = list(key_set) 23 | print("gen value") 24 | for i in range(data_count): 25 | key = random.choice(key_list) 26 | if i%10000==0: 27 | print(i) 28 | if random.randint(1,100) 11 | #include "slice.h" 12 | #include "file_util.h" 13 | #include "format.h" 14 | #include "db_impl.h" 15 | namespace minidb{ 16 | DB DB::open(const std::string &db_name) { 17 | DB db; 18 | db.impl = DBImpl::open(db_name); 19 | return std::move(db); 20 | } 21 | DB DB::create(const std::string &db_name) { 22 | DB db; 23 | db.impl = DBImpl::create(db_name); 24 | return std::move(db); 25 | } 26 | DB DB::open_or_create(const std::string &db_name) { 27 | DB db; 28 | db.impl = DBImpl::open(db_name); 29 | if(!db.impl){ 30 | db.impl = DBImpl::create(db_name); 31 | } 32 | return db; 33 | } 34 | std::shared_ptr DB::get(std::shared_ptr key) { 35 | return impl->get(key); 36 | } 37 | void DB::set(std::shared_ptr key, std::shared_ptr value) { 38 | return impl->set(key,value); 39 | } 40 | void DB::remove(std::shared_ptr key) { 41 | return impl->remove(key); 42 | } 43 | } 44 | #endif //MINIDB_DP_CPP 45 | -------------------------------------------------------------------------------- /src/sstable_builder.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #ifndef MINIDB_SSTABLE_BUILDER_H 6 | #define MINIDB_SSTABLE_BUILDER_H 7 | #include "format.h" 8 | #include "config.h" 9 | #include "record.h" 10 | #include "file_util.h" 11 | namespace minidb{ 12 | class BlockBuilder; 13 | class SSTableBuilder{ 14 | ptr min_user_key; 15 | ptr max_user_key; 16 | //多级index 17 | vec> index_block_list; 18 | ptr data_block; 19 | ptr writer; 20 | int add_index(const ptr& record,int index_level); 21 | ptr make_index(const ptr& block); 22 | public: 23 | SSTableBuilder(const std::string& db_name,int file_number); 24 | int add_record(const ptr& record); 25 | int finish(); 26 | uint64_t size(); 27 | }; 28 | class BlockBuilder{ 29 | vec> record_list; 30 | int size_; 31 | public: 32 | BlockBuilder(); 33 | int add_record(const ptr& record); 34 | int size(); 35 | bool empty(); 36 | int dump(const ptr& writer); 37 | int clear(); 38 | ptr max_record(); 39 | }; 40 | } 41 | #endif //MINIDB_SSTABLE_BUILDER_H 42 | -------------------------------------------------------------------------------- /src/merge_heap.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/19. 3 | // 4 | 5 | #include "merge_heap.h" 6 | #include "comparator.h" 7 | #include "timer.h" 8 | #include "debug.h" 9 | namespace minidb { 10 | void MergeHeap::add_sst(const ptr &sst) { 11 | SSTable::Iterator iter = sst->iterator(); 12 | ptr record = iter.next(); 13 | heap_array.push_back(std::make_pair(record, iter)); 14 | } 15 | 16 | void MergeHeap::init() { 17 | for(int i=0;i MergeHeap::pop() { 24 | int index=-1; 25 | for(int i=0;i0){ 30 | index=i; 31 | } 32 | } 33 | auto ret = std::move(heap_array[index].first); 34 | heap_array[index].first=nullptr; 35 | if(!heap_array[index].second.has_next()){ 36 | iter_end_flag[index]=true; 37 | } 38 | else{ 39 | #ifdef DEBUG 40 | timer::start("sst iter next"); 41 | #endif 42 | heap_array[index].first=std::move(heap_array[index].second.next()); 43 | #ifdef DEBUG 44 | timer::end("sst iter next"); 45 | #endif 46 | } 47 | return ret; 48 | } 49 | } -------------------------------------------------------------------------------- /src/record.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #include "record.h" 6 | 7 | #include 8 | namespace minidb{ 9 | Record::Record(char *data, bool hash_checksum) { 10 | if(hash_checksum){ 11 | checksum_ = *(Checksum*)data; 12 | data+= sizeof(Checksum); 13 | } 14 | int user_key_size = *(int*)data; 15 | data+=4; 16 | user_key_ = make_ptr(data,data+user_key_size); 17 | data+=user_key_size; 18 | lsn_ = *(LogSeqNumber*)data; 19 | data+=8; 20 | type_ =*(KeyType*)data; 21 | data+=sizeof(KeyType); 22 | if(type_!=KeyType::DELETE) { 23 | int value_key_size = *(int *) data; 24 | data += 4; 25 | value_ = make_ptr(data, data + value_key_size); 26 | } 27 | } 28 | Record::Record(minidb::ptr user_key, minidb::LogSeqNumber lsn, minidb::KeyType type, 29 | minidb::ptr value): 30 | user_key_(std::move(user_key)),lsn_(lsn),type_(type),value_(std::move(value)),checksum_(0){} 31 | ptr Record::value() { 32 | return value_; 33 | } 34 | KeyType Record::type() { 35 | return type_; 36 | } 37 | ptr Record::user_key() { 38 | return user_key_; 39 | } 40 | LogSeqNumber Record::lsn() { 41 | return lsn_; 42 | } 43 | Checksum Record::checksum() { 44 | //TODO 计算checksum 45 | return checksum_; 46 | } 47 | } -------------------------------------------------------------------------------- /src/comparator.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_COMPARATOR_H 6 | #define MINIDB_COMPARATOR_H 7 | 8 | #include "slice.h" 9 | #include "format.h" 10 | #include "record.h" 11 | namespace minidb{ 12 | inline int userkey_comparator(const ptr& a,const ptr& b); 13 | inline int record_comparator(const ptr& a,const ptr& b); 14 | inline int userkey_comparator(const ptr& a,const ptr& b){ 15 | int n = a->size(); 16 | int m = b->size(); 17 | if(nm){ 21 | return 1; 22 | } 23 | const char* x = a->data(); 24 | const char* y = b->data(); 25 | for(int i=0;iy[i]){ 30 | return 1; 31 | } 32 | } 33 | if(nm){ 37 | return 1; 38 | } 39 | return 0; 40 | } 41 | 42 | inline int record_comparator(const ptr& a,const ptr& b){ 43 | int ret = userkey_comparator(a->user_key(),b->user_key()); 44 | if(ret!=0){ 45 | return ret; 46 | } 47 | if(a->lsn()>b->lsn()){ 48 | return -1; 49 | } 50 | if(a->lsn()lsn()){ 51 | return 1; 52 | } 53 | return 0; 54 | } 55 | } 56 | #endif //MINIDB_COMPARATOR_H 57 | -------------------------------------------------------------------------------- /utils/timer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/11. 3 | // 4 | 5 | #ifndef MINIDB_TIMER_H 6 | #define MINIDB_TIMER_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | class timer{ 14 | static std::map>> durations; 15 | static std::map starts; 16 | public: 17 | static inline void start(const std::string& name){ 18 | starts[name]=std::chrono::steady_clock::now(); 19 | } 20 | static inline void end(const std::string& name){ 21 | if(starts.count(name)){ 22 | if(!durations.count(name)){ 23 | durations[name]=std::vector>(); 24 | } 25 | durations[name].push_back( 26 | std::chrono::duration_cast>(std::chrono::steady_clock::now()-starts[name])); 27 | starts.erase(name); 28 | } 29 | } 30 | static inline void print(){ 31 | for(auto & iter : timer::durations){ 32 | auto name = iter.first; 33 | auto& duration_list = iter.second; 34 | int cnt = duration_list.size(); 35 | std::chrono::duration total{}; 36 | for(auto dur:duration_list){ 37 | total+=dur; 38 | } 39 | std::chrono::duration avg = 1.0*total/cnt; 40 | printf("Name: %s\tcount: %d\ttotal time: %.4f\tavg time: %.4f\n",name.c_str(),cnt,total.count(),avg.count()); 41 | } 42 | } 43 | }; 44 | #endif //MINIDB_TIMER_H 45 | -------------------------------------------------------------------------------- /src/db_impl.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #ifndef MINIDB_DB_IMPL_H 6 | #define MINIDB_DB_IMPL_H 7 | 8 | #include "slice.h" 9 | #include "memtable.h" 10 | #include "format.h" 11 | #include "log_writer.h" 12 | #include "sstable.h" 13 | #include "version.h" 14 | #include "concurrent_queue.h" 15 | #include 16 | #include 17 | #include 18 | #include 19 | namespace minidb{ 20 | class DBImpl:public std::enable_shared_from_this{ 21 | std::string db_name_; 22 | ptr memtable_; 23 | ptr immu_memtable_; 24 | ptr version_; 25 | int file_number_; 26 | LogSeqNumber lsn_; 27 | ConcurrentQueue compact_task_queue; 28 | //ConcurrentQueue write_task_queue; 29 | int minor_compact(const ptr& mem); 30 | int major_compact(int level); 31 | void make_write_room(); 32 | bool stop_; 33 | static void _start_compact_thread(ptr db); 34 | void write(const ptr& user_key,KeyType key_type,const ptr& value); 35 | int exchange_version(ptr new_ver,int new_ver_fn); 36 | public: 37 | explicit DBImpl(std::string dn_name); 38 | static ptr open(const std::string& db_name); 39 | static ptr create(const std::string& db_name); 40 | void set(const ptr& key,const ptr& value); 41 | ptr get(const ptr& key); 42 | void remove(const ptr& key); 43 | void stop(); 44 | void start_compact_thread(); 45 | void do_compact(bool loop); 46 | ~DBImpl(); 47 | 48 | }; 49 | } 50 | #endif //MINIDB_DB_IMPL_H 51 | -------------------------------------------------------------------------------- /src/memtable.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #include "memtable.h" 6 | #include "comparator.h" 7 | #include "error.h" 8 | namespace minidb { 9 | MemTable::MemTable() : size_(0), skiplist_(record_comparator) {} 10 | 11 | int MemTable::size() { 12 | return size_; 13 | } 14 | 15 | void MemTable::set(const minidb::ptr& user_key, minidb::LogSeqNumber lsn, minidb::KeyType type, 16 | const minidb::ptr& value) { 17 | size_+=user_key->size()+9; 18 | if(value){ 19 | size_+=value->size(); 20 | } 21 | //timer::start("skiplist add"); 22 | skiplist_.add(make_ptr(user_key,lsn,type,value)); 23 | //timer::end("skiplist add"); 24 | } 25 | ptr MemTable::get(const minidb::ptr& user_key, minidb::LogSeqNumber lsn) { 26 | ptr record = make_ptr(user_key,lsn,KeyType::LOOKUP, nullptr); 27 | ptr ret_record; 28 | try { 29 | ret_record = skiplist_.seek(record); 30 | }catch(const KeyNotFound>& err){ 31 | ret_record= nullptr; 32 | } 33 | if(ret_record && userkey_comparator(ret_record->user_key(),user_key)==0){ 34 | return ret_record; 35 | } 36 | return nullptr; 37 | } 38 | MemTable::Iterator MemTable::iterator() { 39 | return Iterator(skiplist_.iterator()); 40 | } 41 | MemTable::Iterator::Iterator(minidb::SkipList>::Iterator it):iter(it) {} 42 | bool MemTable::Iterator::hash_next() { 43 | return iter.hash_next(); 44 | } 45 | ptr MemTable::Iterator::next() { 46 | return iter.next(); 47 | } 48 | } -------------------------------------------------------------------------------- /src/sstable_test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/7. 3 | // 4 | 5 | #include "sstable_builder.h" 6 | #include "sstable.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace minidb; 14 | 15 | void test_write() { 16 | map data; 17 | for (int i = 0; i < 1000000; i++) { 18 | data[to_string(i)]=to_string(i*2); 19 | } 20 | SSTableBuilder sst_builder("sst_test", 1); 21 | for(map::iterator iter=data.begin();iter!=data.end();iter++){ 22 | LogSeqNumber lsn = 1; 23 | KeyType type = KeyType::INSERT; 24 | ptr record = make_ptr(make_ptr(iter->first), lsn, type, make_ptr(iter->second)); 25 | sst_builder.add_record(record); 26 | } 27 | sst_builder.finish(); 28 | } 29 | 30 | void test_read() { 31 | SSTable sst("sst_test/00000001.sst"); 32 | for (int i = 0; i < 1000000; i+=10) { 33 | string user_key = to_string(i); 34 | KeyType type = KeyType::LOOKUP; 35 | LogSeqNumber lsn = 1; 36 | ptr record = make_ptr(make_ptr(user_key), lsn, type, nullptr); 37 | ptr ret = sst.lower_bound(record); 38 | //printf("a:%5s\n", ret->value()->data()); 39 | //printf("b:%s\n", to_string(i * 2).c_str()); 40 | //assert(*(ret->value()) == *(make_ptr(to_string(i * 2)))); 41 | } 42 | } 43 | 44 | int main() { 45 | time_t s = time(NULL); 46 | test_write(); 47 | time_t e = time(NULL); 48 | printf("%d\n", e - s); 49 | time_t s2 = time(NULL); 50 | test_read(); 51 | time_t e2 = time(NULL); 52 | printf("%d\n", e2 - s2); 53 | time_t s3 = time(NULL); 54 | test_read(); 55 | time_t e3 = time(NULL); 56 | printf("%d\n", e3 - s3); 57 | return 0; 58 | } -------------------------------------------------------------------------------- /src/file_util.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_FILE_UTIL_H 6 | #define MINIDB_FILE_UTIL_H 7 | 8 | #include 9 | #include "config.h" 10 | 11 | namespace minidb{ 12 | void create_dir(const std::string& dir_name); 13 | int create_file(const std::string& file_name); 14 | int get_version_pointer(const std::string& db_name); 15 | int set_version_pointer(const std::string& db_name,int version_fd); 16 | std::string fn_fmt(int file_number); 17 | struct FileMeta{ 18 | std::string file_name; 19 | int file_number; 20 | int fd=-1; 21 | bool remove_flag=false; 22 | FileMeta()= default; 23 | FileMeta(const std::string& file_name,int file_number,int fd); 24 | ~FileMeta(); 25 | int remove_file(); 26 | }; 27 | 28 | class BufWriter{ 29 | char buf[config::BUFWRITER_BUF_SIZE]; 30 | int buf_offset; 31 | uint64_t size_; 32 | public: 33 | FileMeta filemeta; 34 | BufWriter(const std::string& file_name,bool end_with_magic,bool cover); 35 | int append(const char* data,int size); 36 | int append(void* data,int size); 37 | bool flush(); 38 | bool sync(); 39 | uint64_t size(); 40 | int remove(); 41 | int close(); 42 | }; 43 | class MmapReader{ 44 | char* data; 45 | int size_; 46 | int file_size; 47 | FileMeta filemeta; 48 | int offset_; 49 | 50 | public: 51 | MmapReader(const std::string& file_name,bool end_with_magic); 52 | int read(char* dest,int size); 53 | int read(void* dest,int size); 54 | int remove(); 55 | int seek(uint64_t offset); 56 | int size(); 57 | int remain(); 58 | char* base(); 59 | ~MmapReader(); 60 | }; 61 | } 62 | #endif //MINIDB_FILE_UTIL_H 63 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15) 2 | project(minidb) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) 7 | find_package(Threads) 8 | include_directories(./include ./test ./src ./utils) 9 | add_executable(minidb include/db.h src/slice.cpp include/slice.h src/db.cpp src/file_util.cpp src/file_util.h src/memtable.h src/skiplist.h src/format.h src/config.h src/memtable_test.cpp) 10 | add_executable(skip_list_test src/skip_list_test.cpp include/slice.h src/slice.cpp src/config.h src/format.h src/skiplist.h src/error.h) 11 | add_executable(memtable_test src/memtable_test.cpp src/memtable.cpp src/memtable.h src/slice.cpp src/record.h src/comparator.h src src/record.cpp src/comparator.cpp src/log_writer.h src/log_writer.cpp src/db_impl.cpp src/db_impl.h) 12 | add_executable(db_test src/db_test.cpp include/slice.h include/db.h src/record.h src/memtable.h src/memtable.cpp src/record.cpp src/slice.cpp src/skiplist.h 13 | src/db_impl.h src/db_impl.cpp src/db.cpp src/comparator.h 14 | src/comparator.cpp src/file_util.h src/file_util.cpp 15 | src/log_writer.cpp src/log_writer.h src/version.h 16 | src/version.cpp src/sstable.h src/sstable.cpp 17 | src/sstable_builder.cpp utils/timer.h utils/timer.cpp utils/log.h utils/log.cpp src/concurrent_queue.h src/merge_heap.h src/merge_heap.cpp src/debug.h) 18 | add_executable(sstable_test src/sstable_test.cpp src/sstable_builder.cpp src/sstable_builder.h src/file_util.cpp src/file_util.h src/format.h src/config.h src/record.cpp src/record.h src/slice.cpp include/slice.h src/sstable.cpp src/sstable.h src/comparator.h src/comparator.cpp) 19 | add_executable(merge_test src/file_util.h src/file_util.cpp utils/log.h utils/log.cpp src/comparator.h src/comparator.cpp src/record.h src/record.cpp include/slice.h src/slice.cpp src/merge_heap.h src/merge_heap.cpp src/sstable.h src/sstable.cpp src/merge_heap_test.cpp) 20 | target_link_libraries (db_test ${CMAKE_THREAD_LIBS_INIT}) 21 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3") -------------------------------------------------------------------------------- /src/log_writer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #include "log_writer.h" 6 | #include "file_util.h" 7 | namespace minidb{ 8 | LogWriter::LogWriter(const std::string& db_name,int file_number,bool create) { 9 | file_number_=file_number; 10 | writer = make_ptr(db_name+"/"+fn_fmt(file_number)+".log",true, create); 11 | } 12 | void LogWriter::append(const minidb::ptr& record) { 13 | buf_append(record->checksum()); 14 | buf_append(record->user_key()->size()); 15 | buf_append(record->user_key()); 16 | buf_append(record->lsn()); 17 | buf_append(record->type()); 18 | if(record->type()!=KeyType::DELETE) { 19 | buf_append(record->value()->size()); 20 | buf_append(record->value()); 21 | } 22 | max_lsn_=record->lsn(); 23 | } 24 | void LogWriter::buf_append(minidb::Checksum checksum) { 25 | buf_append((char*)&checksum,sizeof(Checksum)); 26 | } 27 | void LogWriter::buf_append(int size) { 28 | buf_append((char*)&size,sizeof(int)); 29 | } 30 | void LogWriter::buf_append(const minidb::ptr& slice) { 31 | buf_append(slice->data(),slice->size()); 32 | } 33 | void LogWriter::buf_append(minidb::LogSeqNumber lsn) { 34 | buf_append((char*)&lsn,sizeof(LogSeqNumber)); 35 | } 36 | void LogWriter::buf_append(minidb::KeyType type) { 37 | buf_append((char*)&type,sizeof(KeyType)); 38 | } 39 | void LogWriter::buf_append(const char *data, int size) { 40 | writer->append(data,size); 41 | } 42 | int LogWriter::flush() { 43 | return writer->flush(); 44 | } 45 | int LogWriter::sync() { 46 | return writer->sync(); 47 | } 48 | 49 | int LogWriter::file_number() { 50 | return file_number_; 51 | } 52 | 53 | LogSeqNumber LogWriter::max_lsn() { 54 | return max_lsn_; 55 | } 56 | 57 | int LogWriter::remove() { 58 | return writer->remove(); 59 | } 60 | } -------------------------------------------------------------------------------- /utils/log.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/11. 3 | // 4 | 5 | #ifndef MINIDB_LOG_H 6 | #define MINIDB_LOG_H 7 | 8 | #include 9 | #include 10 | #include 11 | class LOG{ 12 | public: 13 | enum class LogLevel:uint8_t { 14 | DEBUG, 15 | INFO, 16 | WARNING, 17 | ERROR, 18 | OFF 19 | }; 20 | static LogLevel log_level; 21 | template 22 | static void info(const std::string& file, const char *func, int line, format fmt, Args ...args){ 23 | if(log_level>LogLevel::INFO){ 24 | return; 25 | } 26 | printf("\033[30m%s:%s:%d [INFO]:",file.c_str(),func,line); 27 | printf(fmt,args...); 28 | printf("\033[0m\n"); 29 | } 30 | template 31 | static void warning(const std::string& file,const char* func,int line,format fmt,Args ...args){ 32 | if(log_level>LogLevel::WARNING){ 33 | return; 34 | } 35 | printf("\033[34m%s:%s:%d [WARN]:",file.c_str(),func,line); 36 | printf(fmt,args...); 37 | printf("\033[0m\n"); 38 | } 39 | template 40 | static void error(const std::string& file,const char* func,int line,format fmt,Args ...args){ 41 | if(log_level>LogLevel::ERROR){ 42 | return; 43 | } 44 | printf("\033[31m%s:%s:%d [ERROR]:",file.c_str(),func,line); 45 | printf(fmt,args...); 46 | printf("\033[0m\n"); 47 | } 48 | template 49 | static void debug(const std::string& file,const char* func,int line,format fmt,Args ...args){ 50 | if(log_level>LogLevel::DEBUG){ 51 | return; 52 | } 53 | printf("\033[32m%s:%s:%d [DEBUG]:",file.c_str(),func,line); 54 | printf(fmt,args...); 55 | printf("\033[0m\n"); 56 | } 57 | }; 58 | #define log_info(...) LOG::info(__FILE__,__FUNCTION__,__LINE__,__VA_ARGS__) 59 | #define log_warn(...) LOG::warning(__FILE__,__FUNCTION__,__LINE__,__VA_ARGS__) 60 | #define log_error(...) LOG::error(__FILE__,__FUNCTION__,__LINE__,__VA_ARGS__) 61 | #define log_debug(...) LOG::debug(__FILE__,__FUNCTION__,__LINE__,__VA_ARGS__) 62 | 63 | 64 | 65 | #endif //MINIDB_LOG_H 66 | -------------------------------------------------------------------------------- /src/version.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/4. 3 | // 4 | 5 | #ifndef MINIDB_VERSION_H 6 | #define MINIDB_VERSION_H 7 | 8 | #include "format.h" 9 | #include "log_writer.h" 10 | #include "memtable.h" 11 | #include "sstable.h" 12 | #include "file_util.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | namespace minidb{ 18 | struct SSTableHasher{ 19 | size_t operator()(const ptr& sst)const { 20 | return sst->file_number(); 21 | } 22 | }; 23 | struct SSTableEqual{ 24 | bool operator()(const ptr& a,const ptr& b) const{ 25 | return a->file_number()==b->file_number(); 26 | } 27 | }; 28 | using SSTableSet = std::unordered_set,SSTableHasher,SSTableEqual>; 29 | using SstSetList = std::array; 30 | class VersionEdit; 31 | class DBImpl; 32 | /* 1. db_impl打开version,并恢复memtable 33 | * 2. db_impl新建空version 34 | * 3. db_impl将version_edit加到version上 35 | */ 36 | class Version{ 37 | FileMeta filemeta; 38 | ptr log_; 39 | ptr pre_log_; 40 | SstSetList sst_set_list_; 41 | LogSeqNumber lsn_; 42 | friend class VersionEdit; 43 | friend class DBImpl; 44 | public: 45 | //在内存中构建新的version 46 | Version(ptr log,ptr imm_log,SstSetList& sst_set_list,LogSeqNumber lsn,const std::string& db_name,int file_number,bool create); 47 | //新建version并修改version pointer 48 | ptr apply(const ptr& edit,const std::string& db_name,int file_number); 49 | void remove(); 50 | void print(); 51 | }; 52 | 53 | class VersionEdit{ 54 | ptr log_= nullptr; 55 | ptr pre_log_= nullptr; 56 | bool log_flag=false; 57 | bool pre_log_flag= false; 58 | SstSetList add_sst_; 59 | SstSetList remove_sst_; 60 | friend class Version; 61 | public: 62 | VersionEdit()= default; 63 | void set_log(ptr log); 64 | void set_pre_log(ptr log); 65 | void add_sst(const ptr& sst,int level); 66 | void remove_sst(const ptr& sst,int level); 67 | }; 68 | } 69 | #endif //MINIDB_VERSION_H 70 | -------------------------------------------------------------------------------- /src/sstable.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/7. 3 | // 4 | 5 | #ifndef MINIDB_SSTABLE_H 6 | #define MINIDB_SSTABLE_H 7 | 8 | #include "file_util.h" 9 | #include "record.h" 10 | #include 11 | #include 12 | namespace minidb{ 13 | class Block:public std::enable_shared_from_this{ 14 | char* base_; 15 | uint16_t* record_offset_array; 16 | uint16_t record_offset_array_offset; 17 | uint16_t record_offset_array_size; 18 | public: 19 | Block(char* base); 20 | ptr lower_bound(const ptr& lookup); 21 | class Iterator{ 22 | ptr block; 23 | int index; 24 | Iterator(ptr blk); 25 | friend class Block; 26 | public: 27 | inline bool hash_next() { 28 | return indexrecord_offset_array_size; 29 | } 30 | inline ptr next() { 31 | return make_ptr(block->record_offset_array[index++]+block->base_, false); 32 | } 33 | }; 34 | Iterator iterator(){ 35 | return Iterator(this->shared_from_this()); 36 | } 37 | 38 | }; 39 | class SSTable:public std::enable_shared_from_this{ 40 | ptr reader; 41 | ptr root; 42 | int file_number_; 43 | int miss_times_=0; 44 | bool wait_compact_= false; 45 | SSTable(const std::string& file_name); 46 | public: 47 | ptr min_user_key; 48 | ptr max_user_key; 49 | int file_number(); 50 | int miss_times(); 51 | bool wait_compact(); 52 | SSTable(const std::string& db_name,int file_number); 53 | ptr lower_bound(const ptr& lookup); 54 | int remove(); 55 | class Iterator{ 56 | ptr sst; 57 | std::stack block_stack; 58 | Iterator(const ptr& sst); 59 | friend class SSTable; 60 | public: 61 | inline bool has_next(); 62 | ptr next(); 63 | }; 64 | Iterator iterator(){ 65 | return Iterator(this->shared_from_this()); 66 | } 67 | }; 68 | bool SSTable::Iterator::has_next() { 69 | return !block_stack.empty(); 70 | } 71 | } 72 | #endif //MINIDB_SSTABLE_H 73 | -------------------------------------------------------------------------------- /src/db_test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #include "db.h" 6 | #include "slice.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "timer.h" 12 | #include "log.h" 13 | 14 | using namespace std; 15 | using namespace minidb; 16 | int main(){ 17 | LOG::log_level=LOG::LogLevel::INFO; 18 | string key,value; 19 | DB db = DB::open("test_db"); 20 | // log_info("start set"); 21 | // timer::start("per 100000"); 22 | // ifstream fin("../scripts/input.txt"); 23 | // string line; 24 | 25 | // int i=0; 26 | // vector key_list; 27 | // vector value_list; 28 | // while(!fin.eof()){ 29 | // key_list.clear(); 30 | // value_list.clear(); 31 | // for(int i=0;i<100000&&!fin.eof();i++){ 32 | // fin>>key>>value; 33 | // key_list.emplace_back(key); 34 | // value_list.emplace_back(value); 35 | // } 36 | // timer::start("per 100000"); 37 | // 38 | // for(int i=0;i(key_list[i])); 41 | // } 42 | // else{ 43 | // db.set(make_shared(key_list[i]),make_shared(value_list[i])); 44 | // } 45 | // } 46 | // 47 | // timer::end("per 100000"); 48 | // timer::print(); 49 | // 50 | // i++; 51 | // } 52 | // fin.close(); 53 | log_debug("start get"); 54 | ifstream fin2("../scripts/output.txt"); 55 | unordered_map expire_set; 56 | while(!fin2.eof()){ 57 | fin2>>key>>value; 58 | expire_set[key]=value; 59 | } 60 | timer::start("get"); 61 | int cnt=0; 62 | for(auto iter=expire_set.begin();iter!=expire_set.end();iter++){ 63 | cnt++; 64 | auto ret = db.get(make_shared(iter->first)); 65 | // if((iter->second=="delete")){ 66 | // assert(ret== nullptr); 67 | // } 68 | // else{ 69 | // shared_ptr expire = make_shared(iter->second); 70 | // assert(*ret==*expire); 71 | // } 72 | if(cnt%10000==0){ 73 | timer::end("get"); 74 | log_info("get %d",cnt); 75 | timer::print(); 76 | timer::start("get"); 77 | } 78 | } 79 | return 0; 80 | } -------------------------------------------------------------------------------- /src/sstable.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/7. 3 | // 4 | #include "sstable.h" 5 | #include "format.h" 6 | #include "config.h" 7 | #include "comparator.h" 8 | #include "debug.h" 9 | #include "timer.h" 10 | #include 11 | 12 | namespace minidb { 13 | int SSTable::remove() { 14 | return reader->remove(); 15 | } 16 | 17 | SSTable::SSTable(const std::string &file_name) { 18 | reader = make_ptr(file_name, true); 19 | int size = reader->size(); 20 | reader->seek(size - 8); 21 | uint64_t root_block_offset; 22 | uint64_t metadata_offset; 23 | reader->read(&metadata_offset, 8); 24 | reader->seek(metadata_offset); 25 | reader->read(&root_block_offset,8); 26 | int x; 27 | reader->read(&x,4); 28 | min_user_key = make_ptr(x); 29 | reader->read((void *) min_user_key->data(), x); 30 | reader->read(&x,4); 31 | max_user_key = make_ptr(x); 32 | reader->read((void*)max_user_key->data(),x); 33 | root = make_ptr((char *) (reader->base() + root_block_offset)); 34 | } 35 | 36 | SSTable::SSTable(const std::string &db_name, int file_number) : SSTable( 37 | db_name + "/" + fn_fmt(file_number) + ".sst") { 38 | file_number_=file_number; 39 | } 40 | int SSTable::file_number() { 41 | return file_number_; 42 | } 43 | ptr SSTable::lower_bound(const ptr& lookup) { 44 | if(userkey_comparator(lookup->user_key(),min_user_key)<0){ 45 | return nullptr; 46 | } 47 | if(userkey_comparator(lookup->user_key(),max_user_key)>0){ 48 | return nullptr; 49 | } 50 | ptr blk = root; 51 | ptr ret; 52 | for (;;) { 53 | ret = blk->lower_bound(lookup); 54 | if (ret && ret->type() == KeyType::OFFSET) { 55 | blk = make_ptr((char *) (reader->base() + *(uint64_t *) (ret->value()->data()))); 56 | } else { 57 | break; 58 | } 59 | } 60 | if(ret== nullptr){ 61 | miss_times_++; 62 | } 63 | return ret; 64 | } 65 | 66 | int SSTable::miss_times() { 67 | return miss_times_; 68 | } 69 | 70 | bool SSTable::wait_compact() { 71 | return wait_compact_; 72 | } 73 | 74 | SSTable::Iterator::Iterator(const minidb::ptr& sst) { 75 | this->sst=sst; 76 | block_stack.push(sst->root->iterator()); 77 | } 78 | 79 | ptr SSTable::Iterator::next() { 80 | for(;;) { 81 | Block::Iterator &iter = block_stack.top(); 82 | #ifdef DEBUG 83 | timer::start("block iter next"); 84 | #endif 85 | ptr ret = iter.next(); 86 | #ifdef DEBUG 87 | timer::end("block iter next"); 88 | #endif 89 | if(ret->type()==KeyType::OFFSET){ 90 | ptr blk = make_ptr((char *) (sst->reader->base() + *(uint64_t *) (ret->value()->data()))); 91 | block_stack.emplace(blk->iterator()); 92 | } 93 | else{ 94 | while(!block_stack.empty()&&!block_stack.top().hash_next()){ 95 | block_stack.pop(); 96 | } 97 | return ret; 98 | } 99 | } 100 | } 101 | Block::Block(char *base) { 102 | base_ = base; 103 | record_offset_array_offset = *(uint16_t *) (base + config::BLOCK_SIZE - 4); 104 | record_offset_array_size = *(uint16_t *) (base + config::BLOCK_SIZE - 2); 105 | record_offset_array = (uint16_t *) (base + record_offset_array_offset); 106 | } 107 | 108 | ptr Block::lower_bound(const ptr& lookup) { 109 | int L = 0; 110 | int R = record_offset_array_size - 1; 111 | ptr ret; 112 | while (L <= R) { 113 | int M = (L + R) >> 1; 114 | ptr record = make_ptr(record_offset_array[M] + base_, false); 115 | int cmp = record_comparator(record, lookup); 116 | if (cmp >= 0) { 117 | ret = record; 118 | R = M - 1; 119 | } else { 120 | L = M + 1; 121 | } 122 | } 123 | return ret; 124 | } 125 | Block::Iterator::Iterator(minidb::ptr blk) { 126 | block=std::move(blk); 127 | index=0; 128 | } 129 | 130 | } 131 | -------------------------------------------------------------------------------- /src/version.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/4. 3 | // 4 | 5 | #include "version.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "file_util.h" 12 | #include "config.h" 13 | #include "log.h" 14 | namespace minidb { 15 | using std::max; 16 | void Version::remove() { 17 | filemeta.remove_file(); 18 | } 19 | Version::Version(minidb::ptr log, minidb::ptr imm_log, 20 | SstSetList& sst_set_list, LogSeqNumber lsn, const std::string &db_name, 21 | int file_number, bool create): 22 | sst_set_list_(std::move(sst_set_list)), 23 | lsn_(lsn), 24 | log_(std::move(log)), 25 | pre_log_(std::move(imm_log)){ 26 | filemeta.file_name=db_name + "/" + fn_fmt(file_number) + ".ver"; 27 | if (create) { 28 | BufWriter writer(db_name + "/" + fn_fmt(file_number) + ".ver", true, true); 29 | int x = log_ ? log_->file_number() : -1; 30 | writer.append(&x, 4); 31 | log_debug("[new version] log:%d ",x); 32 | x = pre_log_ ? pre_log_->file_number() : -1; 33 | writer.append(&x, 4); 34 | log_debug("[new version] pre log:%d",x); 35 | x=0; 36 | for(const auto& sst_set:sst_set_list_){ 37 | x+=sst_set.size(); 38 | } 39 | writer.append(&x, 4); 40 | log_debug("[new version] sst cnt:%d",x); 41 | for(int i=0;ifile_number(); 44 | log_debug("new version sst number:%d",x); 45 | writer.append(&x, 4); 46 | char level=(char)i; 47 | writer.append(&level, 1); 48 | } 49 | } 50 | writer.append(&lsn_, sizeof(LogSeqNumber)); 51 | writer.append(reinterpret_cast(&config::MAGIC), 8); 52 | writer.flush(); 53 | writer.sync(); 54 | writer.close(); 55 | } 56 | } 57 | 58 | ptr 59 | Version::apply(const ptr& edit, const std::string &db_name, int file_number) { 60 | LogSeqNumber lsn = this->lsn_; 61 | ptr log = edit->log_flag?edit->log_:log_; 62 | ptr pre_log = edit->pre_log_flag?edit->pre_log_:pre_log_; 63 | if (log) { 64 | lsn = max(lsn, log->max_lsn()); 65 | } 66 | if (pre_log) { 67 | lsn = max(lsn, pre_log->max_lsn()); 68 | } 69 | SstSetList sst_set_list; 70 | for(int i=0;iremove_sst_[i]){ 73 | sst_set_list[i].erase(rm_sst); 74 | rm_sst->remove(); 75 | } 76 | } 77 | for(int i=0;iadd_sst_[i].begin(),edit->add_sst_[i].end()); 79 | } 80 | ptr new_version = make_ptr(log, pre_log, sst_set_list, lsn, db_name, file_number, true); 81 | return new_version; 82 | } 83 | 84 | void Version::print() { 85 | printf("============================================================\n"); 86 | printf("Version: %s\n",filemeta.file_name.c_str()); 87 | if(log_)printf("LOG: %d\n",log_->file_number()); 88 | else printf("LOG: nullptr\n"); 89 | if(pre_log_)printf("PRE LOG: %d\n",pre_log_->file_number()); 90 | else printf("PRE LOG: nullptr\n"); 91 | for(int i=0;ifile_number()); 95 | } 96 | printf("\n"); 97 | } 98 | printf("============================================================\n"); 99 | } 100 | 101 | void VersionEdit::add_sst(const ptr& sst, int level) { 102 | add_sst_[level].insert(sst); 103 | } 104 | void VersionEdit::set_log(ptr log) { 105 | log_flag=true; 106 | log_=std::move(log); 107 | } 108 | void VersionEdit::set_pre_log(ptr log) { 109 | pre_log_flag=true; 110 | pre_log_=std::move(log); 111 | } 112 | void VersionEdit::remove_sst(const ptr& sst, int level) { 113 | remove_sst_[level].insert(sst); 114 | } 115 | } -------------------------------------------------------------------------------- /src/skiplist.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | 5 | #ifndef MINIDB_SKIPLIST_H 6 | #define MINIDB_SKIPLIST_H 7 | 8 | 9 | #include "format.h" 10 | #include "config.h" 11 | #include "error.h" 12 | #include "timer.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | namespace minidb { 18 | /** 19 | * SkipList是一个只支持插入和查找的跳表(线程不安全) 20 | * Key:数据类型 21 | */ 22 | template 23 | class SkipList { 24 | //Node相关 25 | struct Node { 26 | Key key; 27 | Node* right = nullptr; 28 | Node* down = nullptr; 29 | }; 30 | vec node_pool_list; 31 | Node* node_pool= nullptr; 32 | int node_pool_head=0; 33 | Node* alloc_node(){ 34 | if(node_pool== nullptr||node_pool_head==1024){ 35 | node_pool=new Node[1024]; 36 | node_pool_list.push_back(node_pool); 37 | node_pool_head=0; 38 | } 39 | Node* ret = &node_pool[node_pool_head++]; 40 | return ret; 41 | } 42 | //比较器 43 | std::function cmp_; 44 | vec pre_; 45 | 46 | //获取一个插入level 47 | unsigned int bits=0; 48 | inline unsigned int next_add_level() { 49 | bits++; 50 | unsigned int ret=0; 51 | while(ret>ret)&1)){ 52 | ret++; 53 | } 54 | if(ret==config::SKIPLIST_MAX_LEVEL){ 55 | ret--; 56 | bits=0; 57 | } 58 | return ret; 59 | } 60 | Node* _add(Node* pre, const Key &key, int cur_level, int ins_level) { 61 | if (cur_level < 0) { 62 | return nullptr; 63 | } 64 | Node* x = pre->right; 65 | int c = -1; 66 | while (x) { 67 | c = cmp_(x->key, key); 68 | if (c < 0) { 69 | pre = x; 70 | x = pre->right; 71 | continue; 72 | } else { 73 | break; 74 | } 75 | } 76 | assert(c != 0); 77 | Node* down = _add(pre->down, key, cur_level - 1, ins_level); 78 | if (cur_level <= ins_level) { 79 | Node* ret = alloc_node(); 80 | pre->right = ret; 81 | ret->down = down; 82 | ret->key = key; 83 | ret->right = x; 84 | return ret; 85 | } else { 86 | return nullptr; 87 | } 88 | } 89 | 90 | Node* _seek(Node* pre, const Key &key) { 91 | Node* x = pre->right; 92 | int c = -1; 93 | while (x) { 94 | c = cmp_(x->key, key); 95 | if (c < 0) { 96 | pre = x; 97 | x = pre->right; 98 | continue; 99 | } else { 100 | break; 101 | } 102 | } 103 | if (pre->down) { 104 | return _seek(pre->down, key); 105 | } else { 106 | return x; 107 | } 108 | } 109 | 110 | public: 111 | SkipList() = delete; 112 | 113 | SkipList(std::function cmp) : cmp_(cmp), pre_(config::SKIPLIST_MAX_LEVEL){ 114 | for (int i = 0; i < config::SKIPLIST_MAX_LEVEL; i++) { 115 | pre_[i] = alloc_node(); 116 | 117 | } 118 | for (int i = 1; i < config::SKIPLIST_MAX_LEVEL; i++) { 119 | pre_[i]->down = pre_[i - 1]; 120 | } 121 | 122 | } 123 | 124 | //防止析构递归爆栈 125 | ~SkipList() { 126 | for(Node* np:node_pool_list){ 127 | delete [] np; 128 | } 129 | } 130 | 131 | 132 | 133 | void add(const Key &key) { 134 | //timer::start("nxt add level"); 135 | int ins_level = next_add_level(); 136 | //timer::end("nxt add level"); 137 | 138 | Node* pre = pre_.back(); 139 | //timer::start("skiplist real add"); 140 | _add(pre, key, config::SKIPLIST_MAX_LEVEL - 1, ins_level); 141 | //timer::end("skiplist real add"); 142 | } 143 | 144 | //查找第一个大于等于key的key 145 | Key seek(const Key &key) { 146 | Node* x = _seek(pre_.back(), key); 147 | if (x == nullptr) { 148 | throw KeyNotFound(key); 149 | } 150 | return x->key; 151 | } 152 | 153 | class Iterator { 154 | Node* current; 155 | 156 | friend class SkipList; 157 | 158 | Iterator(Node* x) { 159 | current = x; 160 | } 161 | 162 | public: 163 | bool hash_next() { 164 | return current != nullptr && current->right != nullptr; 165 | } 166 | 167 | Key next() { 168 | current = current->right; 169 | return current->key; 170 | } 171 | }; 172 | 173 | Iterator iterator() { 174 | Node* x = nullptr; 175 | if (pre_.size() > 0) { 176 | x = pre_.front(); 177 | } 178 | return Iterator(x); 179 | } 180 | }; 181 | } 182 | 183 | 184 | #endif //MINIDB_SKIPLIST_H 185 | -------------------------------------------------------------------------------- /src/file_util.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/1. 3 | // 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "file_util.h" 10 | #include "config.h" 11 | #include "log.h" 12 | #include 13 | #include 14 | #include 15 | #include 16 | namespace minidb { 17 | void create_dir(const std::string &dir_name) { 18 | int result = mkdir(dir_name.c_str(), 0755); 19 | if (result == -1) { 20 | //TODO throw exception 21 | } 22 | } 23 | int get_version_pointer(const std::string& db_name){ 24 | int fd = open((db_name+"/version_pointer").c_str(),O_RDONLY); 25 | log_debug("[Version pointer] fd: %d",fd); 26 | if(fd==-1){ 27 | //TODO throw exception 28 | } 29 | int version_fn; 30 | uint64_t magic; 31 | read(fd,&version_fn,4); 32 | read(fd,&magic,8); 33 | assert(magic==config::MAGIC); 34 | close(fd); 35 | return version_fn; 36 | } 37 | int set_version_pointer(const std::string& db_name,int version_fn){ 38 | int fd=open((db_name+"/version_pointer").c_str(),O_WRONLY|O_CREAT|O_TRUNC,0644); 39 | log_debug("[Version pointer] fd: %d",fd); 40 | if(fd==-1){ 41 | //TODO throw exception 42 | } 43 | write(fd,&version_fn,4); 44 | write(fd,&config::MAGIC,8); 45 | ::fsync(fd); 46 | close(fd); 47 | return 0; 48 | } 49 | 50 | std::string fn_fmt(int file_number) { 51 | std::string s(8, '0'); 52 | for (int i = 7; i >= 0; i--) { 53 | s[i] = file_number % 10 + '0'; 54 | file_number /= 10; 55 | } 56 | return std::move(s); 57 | } 58 | 59 | FileMeta::FileMeta(const std::string &file_name, int file_number, int fd) { 60 | this->file_name = file_name; 61 | this->file_number = file_number; 62 | this->fd = fd; 63 | remove_flag = false; 64 | } 65 | FileMeta::~FileMeta(){ 66 | log_debug("[close fd] %d",fd); 67 | close(fd); 68 | if(remove_flag){ 69 | remove(file_name.c_str()); 70 | } 71 | } 72 | int FileMeta::remove_file() { 73 | remove_flag = true; 74 | return 0; 75 | } 76 | BufWriter::BufWriter(const std::string &file_name, bool end_with_magic, bool cover) { 77 | auto mod = O_WRONLY; 78 | if (cover) { 79 | mod |= O_CREAT | O_TRUNC; 80 | } 81 | int fd = open(file_name.c_str(),mod,0644); 82 | log_debug("open fd:%d",fd); 83 | assert(fd!=-1); 84 | filemeta.file_name=file_name; 85 | filemeta.file_number=-1; 86 | filemeta.fd=fd; 87 | buf_offset=0; 88 | size_=0; 89 | } 90 | int BufWriter::remove() { 91 | log_debug("[buf writer] remove file %s",filemeta.file_name.c_str()); 92 | return filemeta.remove_file(); 93 | } 94 | bool BufWriter::sync() { 95 | fsync(filemeta.fd); 96 | return true; 97 | } 98 | uint64_t BufWriter::size() { 99 | return size_; 100 | } 101 | int BufWriter::append(void * data, int size) { 102 | return append((const char*)data,size); 103 | } 104 | int BufWriter::append(const char * data, int size) { 105 | int total=0; 106 | while(size>0){ 107 | int cnt = std::min(size,config::BUFWRITER_BUF_SIZE-buf_offset); 108 | memcpy(buf+buf_offset,data+total,cnt); 109 | size-=cnt; 110 | total+=cnt; 111 | buf_offset+=cnt; 112 | if(buf_offset==config::BUFWRITER_BUF_SIZE){ 113 | flush(); 114 | } 115 | } 116 | size_+=total; 117 | return total; 118 | } 119 | bool BufWriter::flush() { 120 | int cnt = write(filemeta.fd,buf,buf_offset); 121 | if(cnt!=buf_offset){ 122 | log_debug("[BufWriter]error no:%d",errno); 123 | } 124 | assert(cnt==buf_offset); 125 | buf_offset=0; 126 | return true; 127 | } 128 | int BufWriter::close() { 129 | if(buf_offset>0){ 130 | flush(); 131 | } 132 | sync(); 133 | return 0; 134 | } 135 | MmapReader::MmapReader(const std::string &file_name, bool end_with_magic) { 136 | filemeta.file_name=file_name; 137 | int fd = open(file_name.c_str(),O_RDONLY); 138 | log_debug("open fd:%d",fd); 139 | filemeta.fd=fd; 140 | if(fd==-1){ 141 | log_error("open file:%s failed",file_name.c_str()); 142 | } 143 | file_size = lseek(fd,0,SEEK_END); 144 | size_=file_size; 145 | data=(char*)mmap(nullptr,file_size,PROT_READ,MAP_SHARED,fd,0); 146 | if(end_with_magic){ 147 | size_-=8; 148 | assert(config::MAGIC==*(uint64_t*)(data+size_)); 149 | } 150 | offset_=0; 151 | } 152 | int MmapReader::size() { 153 | return size_; 154 | } 155 | int MmapReader::remove() { 156 | return filemeta.remove_file(); 157 | } 158 | char * MmapReader::base() { 159 | return data; 160 | } 161 | int MmapReader::seek(uint64_t offset) { 162 | if(offset=size_); 167 | return -1; 168 | } 169 | int MmapReader::read(void * dest, int size) { 170 | return read((char*)dest,size); 171 | } 172 | int MmapReader::read(char * dest, int size) { 173 | int i=0; 174 | for(i=0;i(file_name,true,true); 11 | data_block = make_ptr(); 12 | } 13 | int SSTableBuilder::add_index(const minidb::ptr& record,int index_level) { 14 | if(index_block_list.size()<=index_level){ 15 | index_block_list.emplace_back(make_ptr()); 16 | } 17 | auto& block = index_block_list[index_level]; 18 | int ret = block->add_record(record); 19 | if(ret==0){ 20 | return 0; 21 | } 22 | ptr index_record = make_index(block); 23 | block->dump(writer); 24 | block->clear(); 25 | block->add_record(record); 26 | add_index(index_record,index_level+1); 27 | return 0; 28 | } 29 | int SSTableBuilder::add_record(const ptr& record) { 30 | if(min_user_key== nullptr){ 31 | min_user_key=record->user_key(); 32 | } 33 | max_user_key=record->user_key(); 34 | int ret = data_block->add_record(record); 35 | if(ret==0){ 36 | return 0; 37 | } 38 | ptr index_record = make_index(data_block); 39 | data_block->dump(writer); 40 | data_block->clear(); 41 | data_block->add_record(record); 42 | add_index(index_record,0); 43 | return 0; 44 | } 45 | int SSTableBuilder::finish() { 46 | uint64_t root_offset; 47 | auto index_record = make_index(data_block); 48 | add_index(index_record,0); 49 | data_block->dump(writer); 50 | for(int i=0;iempty()){ 53 | continue; 54 | } 55 | if(i==index_block_list.size()-1){ 56 | root_offset = writer->size(); 57 | block->dump(writer); 58 | } 59 | else{ 60 | index_record = make_index(block); 61 | add_index(index_record,i+1); 62 | block->dump(writer); 63 | } 64 | } 65 | //记录根节点offset 66 | uint64_t metadata_offset=writer->size(); 67 | writer->append(&root_offset,8); 68 | //记录user_key 区间 69 | int x = min_user_key->size(); 70 | writer->append(&x,4); 71 | writer->append(min_user_key->data(),x); 72 | x = max_user_key->size(); 73 | writer->append(&x,4); 74 | writer->append(max_user_key->data(),x); 75 | writer->append(&metadata_offset,8); 76 | //写入magic 77 | writer->append((char*)(&config::MAGIC),8); 78 | writer->sync(); 79 | writer->close(); 80 | return 0; 81 | } 82 | ptr SSTableBuilder::make_index(const minidb::ptr& block) { 83 | ptr r = block->max_record(); 84 | uint64_t offset = writer->size(); 85 | char* data = (char*)&offset; 86 | ptr index_record = make_ptr(r->user_key(),r->lsn(),KeyType::OFFSET,make_ptr(data,data+8)); 87 | return index_record; 88 | } 89 | 90 | uint64_t SSTableBuilder::size() { 91 | return writer->size(); 92 | } 93 | 94 | bool BlockBuilder::empty() { 95 | return record_list.empty(); 96 | } 97 | ptr BlockBuilder::max_record() { 98 | return record_list.back(); 99 | } 100 | int BlockBuilder::size() { 101 | return size_; 102 | } 103 | int BlockBuilder::add_record(const ptr& record) { 104 | int need = record->user_key()->size(); 105 | need+=4+8+sizeof(KeyType)+4+2; 106 | if(record->value()){ 107 | need+=+record->value()->size(); 108 | } 109 | if(size_+need>config::BLOCK_SIZE){ 110 | return -1; 111 | } 112 | record_list.push_back(record); 113 | size_+=need; 114 | return 0; 115 | } 116 | int BlockBuilder::clear() { 117 | record_list.clear(); 118 | size_=4; 119 | return 0; 120 | } 121 | int BlockBuilder::dump(const minidb::ptr& writer) { 122 | uint16_t offset=0; 123 | vec record_offset_array; 124 | for(const auto& record:record_list){ 125 | int cnt=0; 126 | record_offset_array.push_back(offset); 127 | int size = record->user_key()->size(); 128 | writer->append(&size,4); 129 | cnt+=4; 130 | writer->append(record->user_key()->data(),record->user_key()->size()); 131 | cnt+=record->user_key()->size(); 132 | LogSeqNumber lsn = record->lsn(); 133 | writer->append(&lsn,8); 134 | cnt+=8; 135 | KeyType type = record->type(); 136 | writer->append(&type,sizeof(KeyType)); 137 | cnt+=sizeof(KeyType); 138 | if(record->value()) { 139 | size = record->value()->size(); 140 | writer->append(&size, 4); 141 | cnt += 4; 142 | writer->append(record->value()->data(), record->value()->size()); 143 | cnt += record->value()->size(); 144 | } 145 | offset+=cnt; 146 | } 147 | uint16_t record_offset_array_offset = offset; 148 | for(auto record_offset:record_offset_array){ 149 | writer->append(&record_offset,2); 150 | offset+=2; 151 | } 152 | std::string padding(config::BLOCK_SIZE-4-offset,0); 153 | writer->append(padding.data(),padding.size()); 154 | writer->append(&record_offset_array_offset,2); 155 | uint16_t array_size = record_list.size(); 156 | writer->append(&array_size,2); 157 | return 0; 158 | } 159 | BlockBuilder::BlockBuilder():size_(4){}//block末尾2字节存array的base,2字节存array的size 160 | } -------------------------------------------------------------------------------- /src/db_impl.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by cangfeng on 2019/12/2. 3 | // 4 | 5 | #include "db_impl.h" 6 | #include "version.h" 7 | #include "file_util.h" 8 | #include "sstable_builder.h" 9 | #include "timer.h" 10 | #include "merge_heap.h" 11 | #include "log.h" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "debug.h" 17 | namespace minidb { 18 | DBImpl::DBImpl(std::string db_name) : db_name_(std::move(db_name)) { 19 | file_number_ = 1; 20 | lsn_ = 1; 21 | } 22 | 23 | ptr DBImpl::open(const std::string &db_name) { 24 | using std::max; 25 | ptr impl = make_ptr(db_name); 26 | int version_fn = get_version_pointer(db_name); 27 | log_info("load version:%d",version_fn); 28 | MmapReader reader(db_name + "/" + fn_fmt(version_fn) + ".ver", true); 29 | auto log2mem = [impl, db_name](int log_fn) { 30 | log_info("load log file %d",log_fn); 31 | MmapReader log_reader(db_name + "/" + fn_fmt(log_fn) + ".log", false); 32 | ptr mem=make_ptr(); 33 | Checksum checksum; 34 | int user_key_size; 35 | ptr user_key; 36 | LogSeqNumber lsn; 37 | KeyType key_type; 38 | int value_size; 39 | ptr value; 40 | int cnt; 41 | while (log_reader.remain() > 0) { 42 | cnt = log_reader.read(&checksum, sizeof(Checksum)); 43 | if (cnt != sizeof(Checksum))break; 44 | cnt = log_reader.read(&user_key_size, sizeof(int)); 45 | if (cnt != sizeof(int))break; 46 | user_key = make_ptr(user_key_size); 47 | cnt = log_reader.read((void *) user_key->data(), user_key_size); 48 | if (cnt != user_key_size)break; 49 | cnt = log_reader.read(&lsn, sizeof(LogSeqNumber)); 50 | if (cnt != sizeof(LogSeqNumber))break; 51 | cnt = log_reader.read(&key_type, sizeof(KeyType)); 52 | if (cnt != sizeof(KeyType))break; 53 | if(key_type==KeyType::INSERT) { 54 | cnt = log_reader.read(&value_size, sizeof(int)); 55 | if (cnt != sizeof(int))break; 56 | value = make_ptr(value_size); 57 | cnt = log_reader.read((void *) value->data(), value_size); 58 | if (cnt != value_size)break; 59 | } 60 | else{ 61 | value= nullptr; 62 | } 63 | ptr record = make_ptr(user_key, lsn, key_type, value); 64 | if (record->checksum() != checksum)break; 65 | impl->lsn_ = lsn; 66 | mem->set(user_key, lsn, key_type, value); 67 | } 68 | log_debug("remain:%d",log_reader.remain()); 69 | assert(log_reader.remain()==0); 70 | return mem; 71 | }; 72 | int x; 73 | ptr log; 74 | ptr pre_log; 75 | SstSetList sst_set_list; 76 | //恢复memtable 77 | reader.read(&x, 4); 78 | if (x > 0) { 79 | impl->memtable_ = log2mem(x); 80 | log = make_ptr(db_name, x, false); 81 | impl->file_number_ = max(impl->file_number_, x); 82 | } 83 | //恢复immu_memtable 84 | reader.read(&x, 4); 85 | if (x > 0) { 86 | impl->immu_memtable_ = log2mem(x); 87 | pre_log = make_ptr(db_name, x, false); 88 | impl->file_number_ = max(impl->file_number_, x); 89 | } 90 | //恢复sst_list 91 | reader.read(&x,4); 92 | int cnt = x; 93 | char level; 94 | for(int i=0;i(db_name,x)); 98 | impl->file_number_ = max(impl->file_number_, x); 99 | } 100 | //获取version的最大lsn,避免出现log都为空的情况 101 | LogSeqNumber lsn; 102 | reader.read(&lsn,sizeof(LogSeqNumber)); 103 | impl->lsn_=max(impl->lsn_,lsn); 104 | 105 | //init version 106 | impl->version_=make_ptr(log,pre_log,sst_set_list,lsn,db_name,version_fn, false); 107 | //impl->start_compact_thread(); 108 | if(impl->immu_memtable_){ 109 | impl->compact_task_queue.push(-1); 110 | } 111 | return impl; 112 | 113 | } 114 | 115 | ptr DBImpl::create(const std::string &db_name) { 116 | create_dir(db_name); 117 | ptr impl = make_ptr(db_name); 118 | ptr log = make_ptr(db_name,impl->file_number_++,true); 119 | impl->version_ = make_ptr(log, nullptr,SstSetList(),impl->lsn_,db_name, impl->file_number_, true); 120 | set_version_pointer(db_name,impl->file_number_); 121 | impl->file_number_++; 122 | impl->memtable_ = make_ptr(); 123 | return impl; 124 | } 125 | 126 | void DBImpl::set(const minidb::ptr& key, const minidb::ptr& value) { 127 | write(key,KeyType::INSERT,value); 128 | } 129 | void DBImpl::make_write_room() { 130 | if(memtable_->size()>=config::MEMTABLE_MAX_SIZE){ 131 | assert(immu_memtable_==nullptr); 132 | log_debug("make write room"); 133 | immu_memtable_=memtable_; 134 | ptr edit = make_ptr(); 135 | edit->set_pre_log(version_->log_); 136 | edit->set_log(make_ptr(db_name_,file_number_++, true)); 137 | int new_ver_fn = file_number_++; 138 | auto new_ver = version_->apply(edit,db_name_,new_ver_fn); 139 | exchange_version(new_ver,new_ver_fn); 140 | memtable_=make_ptr(); 141 | compact_task_queue.push(-1); 142 | } 143 | } 144 | void DBImpl::write(const minidb::ptr& user_key, minidb::KeyType key_type, 145 | const minidb::ptr& value) { 146 | make_write_room(); 147 | LogSeqNumber lsn = lsn_ + 1; 148 | ptr record = make_ptr(user_key, lsn, key_type, value); 149 | //timer::start(std::string("log")); 150 | version_->log_->append(record); 151 | version_->log_->flush(); 152 | //timer::end(std::string("log")); 153 | //timer::start(std::string("mem")); 154 | memtable_->set(user_key, lsn, key_type, value); 155 | //timer::end(std::string("mem")); 156 | lsn_ = lsn; 157 | do_compact(false); 158 | 159 | } 160 | ptr DBImpl::get(const minidb::ptr& key) { 161 | //TODO 锁version 162 | auto memtable = memtable_; 163 | auto immu_memtable = immu_memtable_; 164 | auto version = version_; 165 | LogSeqNumber lsn = lsn_; 166 | //释放锁 167 | 168 | //memtable 169 | ptr ret = memtable->get(key, lsn); 170 | if(ret){return ret->type()==KeyType::DELETE? nullptr:ret->value();} 171 | //immu memtable 172 | if(immu_memtable){ 173 | ret = immu_memtable->get(key,lsn); 174 | if(ret){return ret->type()==KeyType::DELETE? nullptr:ret->value();} 175 | } 176 | //level 0和level 1-n都要搜索一遍,取lsn较大的 177 | ptr lookup = make_ptr(key,lsn,KeyType::LOOKUP, nullptr); 178 | auto& sst_set_list = version->sst_set_list_; 179 | //sst level 0 180 | ptr res; 181 | for(const auto& sst:sst_set_list[0]){ 182 | auto tmp = sst->lower_bound(lookup); 183 | //TODO incr miss count 184 | if(!sst->wait_compact()&&sst->miss_times()>=config::MAX_MISS_TIMES){ 185 | //TODO set sst wait compact 186 | compact_task_queue.push(0); 187 | } 188 | if(tmp== nullptr){ 189 | continue; 190 | } 191 | if(userkey_comparator(tmp->user_key(),lookup->user_key())==0){ 192 | if(res==nullptr){ 193 | res=tmp; 194 | }else if(tmp->lsn()>res->lsn()){ 195 | res = tmp; 196 | } 197 | } 198 | } 199 | //sst level 1-n 200 | ptr res2; 201 | for(int i=1;ilower_bound(lookup); 204 | //TODO incr miss count 205 | if(!sst->wait_compact()&&sst->miss_times()>=config::MAX_MISS_TIMES){ 206 | //TODO set sst wait compact 207 | compact_task_queue.push(i); 208 | } 209 | if(tmp== nullptr){ 210 | continue; 211 | } 212 | if(userkey_comparator(tmp->user_key(),lookup->user_key())==0){ 213 | res2 = tmp; 214 | break; 215 | } 216 | } 217 | } 218 | ret = res; 219 | if(res2){ 220 | if(ret== nullptr){ 221 | ret = res2; 222 | }else if(res2->lsn()>ret->lsn()){ 223 | ret = res2; 224 | } 225 | } 226 | do_compact(false); 227 | if(ret== nullptr||ret->type()==KeyType::DELETE){ 228 | return nullptr; 229 | } 230 | return ret->value(); 231 | } 232 | 233 | void DBImpl::remove(const minidb::ptr& key) { 234 | write(key,KeyType::DELETE, nullptr); 235 | } 236 | int DBImpl::minor_compact(const minidb::ptr& mem) { 237 | log_debug("minor compact"); 238 | int fn = file_number_++; 239 | SSTableBuilder sst(db_name_,fn); 240 | auto iter = mem->iterator(); 241 | while(iter.hash_next()){ 242 | ptr record = iter.next(); 243 | sst.add_record(record); 244 | } 245 | sst.finish(); 246 | version_->pre_log_->remove(); 247 | ptr s = make_ptr(db_name_,fn); 248 | ptr version_edit=make_ptr(); 249 | version_edit->set_pre_log(nullptr); 250 | version_edit->add_sst(s,0); 251 | int new_ver_fn = file_number_++; 252 | auto new_ver = version_->apply(version_edit,db_name_,new_ver_fn); 253 | exchange_version(new_ver,new_ver_fn); 254 | immu_memtable_= nullptr; 255 | set_version_pointer(db_name_,new_ver_fn); 256 | if(version_->sst_set_list_[0].size()>=config::SSTABLE_MAX_FILE_COUNT){ 257 | compact_task_queue.push(0); 258 | } 259 | return 0; 260 | } 261 | void DBImpl::stop() { 262 | stop_= true; 263 | } 264 | DBImpl::~DBImpl(){ 265 | if(!stop_){ 266 | stop(); 267 | } 268 | //compact_thread.join(); 269 | } 270 | // void DBImpl::start_compact_thread() { 271 | // compact_thread = std::thread(_start_compact_thread,shared_from_this()); 272 | // } 273 | void DBImpl::_start_compact_thread(minidb::ptr db) { 274 | db->do_compact(true); 275 | } 276 | void DBImpl::do_compact(bool loop) { 277 | while(!stop_){ 278 | if(loop&&compact_task_queue.empty()){ 279 | //std::this_thread::sleep_for(std::chrono::milliseconds(1000)); 280 | continue; 281 | } 282 | int last=-2; 283 | while(!compact_task_queue.empty()){ 284 | int x = compact_task_queue.front(); 285 | compact_task_queue.pop(); 286 | if(x==-1){ 287 | minor_compact(immu_memtable_); 288 | continue; 289 | } 290 | if(x==last){ 291 | continue; 292 | } 293 | else{ 294 | major_compact(x); 295 | } 296 | last=x; 297 | } 298 | if(!loop){ 299 | break; 300 | } 301 | } 302 | } 303 | int DBImpl::major_compact(int level) { 304 | //最后一个level不能compact 305 | if(level==config::SSTABLE_LEVEL-1){ 306 | return 0; 307 | } 308 | log_debug("major compact"); 309 | ptr version = version_; 310 | ptr edit = make_ptr(); 311 | //获取level级要compact的sst 312 | SSTableSet sst_set; 313 | for(const auto& sst:version->sst_set_list_[level]){ 314 | if(sst->wait_compact()){ 315 | sst_set.insert(sst); 316 | edit->remove_sst(sst,level); 317 | } 318 | } 319 | if(sst_set.empty()){ 320 | int x = rand()%version->sst_set_list_[level].size(); 321 | auto iter = version->sst_set_list_[level].begin(); 322 | while(x--){ 323 | iter++; 324 | } 325 | edit->remove_sst(*iter,level); 326 | sst_set.insert(*iter); 327 | } 328 | //根据level级的sst的key范围,选取level+1级的sst 329 | for(const auto& sst:version->sst_set_list_[level+1]){ 330 | for(const auto& sst2:sst_set){ 331 | if(userkey_comparator(sst->min_user_key,sst2->max_user_key)==1|| 332 | userkey_comparator(sst->max_user_key,sst2->min_user_key)==-1){ 333 | continue; 334 | } 335 | sst_set.insert(sst); 336 | edit->remove_sst(sst,level+1); 337 | } 338 | } 339 | //对要合并的sst进行迭代(归并排序) 340 | MergeHeap heap; 341 | for(const auto& sst:sst_set){ 342 | heap.add_sst(sst); 343 | } 344 | heap.init(); 345 | int sst_fn=file_number_++; 346 | ptr sst_builder = make_ptr(db_name_,sst_fn); 347 | ptr last; 348 | timer::start("merge"); 349 | while(!heap.empty()){ 350 | #ifdef DEBUG 351 | timer::start("pop"); 352 | #endif 353 | auto nxt = heap.pop(); 354 | #ifdef DEBUG 355 | timer::end("pop"); 356 | #endif 357 | if(nxt== nullptr){ 358 | break; 359 | } 360 | if(last&&userkey_comparator(last->user_key(),nxt->user_key())==0){ 361 | continue; 362 | }else{ 363 | if(sst_builder->size()>=config::SSTABLE_FILE_SIZE[level+1]){ 364 | sst_builder->finish(); 365 | edit->add_sst(make_ptr(db_name_,sst_fn),level+1); 366 | sst_fn = file_number_++; 367 | sst_builder = make_ptr(db_name_,sst_fn); 368 | } 369 | #ifdef DEBUG 370 | timer::start("add record"); 371 | #endif 372 | sst_builder->add_record(nxt); 373 | #ifdef DEBUG 374 | timer::end("add record"); 375 | #endif 376 | last = nxt; 377 | } 378 | } 379 | sst_builder->finish(); 380 | timer::end("merge"); 381 | edit->add_sst(make_ptr(db_name_,sst_fn),level+1); 382 | int new_ver_fn = file_number_++; 383 | auto new_ver = version_->apply(edit,db_name_,new_ver_fn); 384 | exchange_version(new_ver,new_ver_fn); 385 | if(version_->sst_set_list_[level+1].size()>=config::SSTABLE_MAX_FILE_COUNT){ 386 | compact_task_queue.push(level+1); 387 | } 388 | return 0; 389 | } 390 | 391 | int DBImpl::exchange_version(ptr new_ver, int new_ver_fn) { 392 | version_->remove(); 393 | version_=std::move(new_ver); 394 | //version_->print(); 395 | set_version_pointer(db_name_,new_ver_fn); 396 | return 0; 397 | } 398 | 399 | } 400 | --------------------------------------------------------------------------------