├── README.md ├── go.mod ├── go.sum ├── utils └── comparator.go ├── internal ├── config.go ├── error.go ├── filename.go └── internal_key.go ├── memtable ├── memtable_test.go ├── iterator.go └── memtable.go ├── skiplist ├── node.go ├── skiplist_test.go ├── iterator.go └── skiplist.go ├── sstable ├── block │ ├── block.go │ ├── block_test.go │ ├── block_build.go │ └── iterator.go ├── sstable_test.go ├── sstable.go ├── block_handle.go ├── table_build.go └── iterator.go ├── version ├── version_test.go ├── table_cache.go ├── merger_iterator.go ├── version.go └── compaction.go ├── leveldb.go ├── db ├── db_test.go ├── bgwork.go └── db.go └── LICENSE /README.md: -------------------------------------------------------------------------------- 1 | # leveldb -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/merlin82/leveldb 2 | 3 | go 1.12 4 | 5 | require github.com/hashicorp/golang-lru v0.5.1 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= 2 | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 3 | -------------------------------------------------------------------------------- /utils/comparator.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | // <0 , if a < b 4 | // =0 , if a == b 5 | // >0 , if a > b 6 | type Comparator func(a, b interface{}) int 7 | 8 | func IntComparator(a, b interface{}) int { 9 | aInt := a.(int) 10 | bInt := b.(int) 11 | return aInt - bInt 12 | } 13 | -------------------------------------------------------------------------------- /internal/config.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | const ( 4 | L0_CompactionTrigger = 4 5 | L0_SlowdownWritesTrigger = 8 6 | Write_buffer_size = 4 << 20 7 | NumLevels = 7 8 | MaxOpenFiles = 1000 9 | NumNonTableCacheFiles = 10 10 | MaxMemCompactLevel = 2 11 | MaxFileSize = 2 << 20 12 | ) 13 | -------------------------------------------------------------------------------- /internal/error.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ( 8 | ErrNotFound = errors.New("Not Found") 9 | ErrDeletion = errors.New("Type Deletion") 10 | ErrTableFileMagic = errors.New("not an sstable (bad magic number)") 11 | ErrTableFileTooShort = errors.New("file is too short to be an sstable") 12 | ) 13 | -------------------------------------------------------------------------------- /memtable/memtable_test.go: -------------------------------------------------------------------------------- 1 | package memtable 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/merlin82/leveldb/internal" 8 | ) 9 | 10 | func Test_MemTable(t *testing.T) { 11 | memTable := New() 12 | memTable.Add(1234567, internal.TypeValue, []byte("aadsa34a"), []byte("bb23b3423")) 13 | value, _ := memTable.Get([]byte("aadsa34a")) 14 | fmt.Println(string(value)) 15 | fmt.Println(memTable.ApproximateMemoryUsage()) 16 | } 17 | -------------------------------------------------------------------------------- /skiplist/node.go: -------------------------------------------------------------------------------- 1 | package skiplist 2 | 3 | type Node struct { 4 | key interface{} 5 | next []*Node 6 | } 7 | 8 | func newNode(key interface{}, height int) *Node { 9 | x := new(Node) 10 | x.key = key 11 | x.next = make([]*Node, height) 12 | 13 | return x 14 | } 15 | func (node *Node) getNext(level int) *Node { 16 | return node.next[level] 17 | } 18 | 19 | func (node *Node) setNext(level int, x *Node) { 20 | node.next[level] = x 21 | } 22 | -------------------------------------------------------------------------------- /skiplist/skiplist_test.go: -------------------------------------------------------------------------------- 1 | package skiplist 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | 8 | "github.com/merlin82/leveldb/utils" 9 | ) 10 | 11 | func Test_Insert(t *testing.T) { 12 | skiplist := New(utils.IntComparator) 13 | for i := 0; i < 10; i++ { 14 | skiplist.Insert(rand.Int() % 10) 15 | } 16 | it := skiplist.NewIterator() 17 | for it.SeekToFirst(); it.Valid(); it.Next() { 18 | fmt.Println(it.Key()) 19 | } 20 | fmt.Println() 21 | for it.SeekToLast(); it.Valid(); it.Prev() { 22 | fmt.Println(it.Key()) 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /internal/filename.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | func makeFileName(dbname string, number uint64, suffix string) string { 8 | return fmt.Sprintf("%s/%06d.%s", dbname, number, suffix) 9 | } 10 | 11 | func TableFileName(dbname string, number uint64) string { 12 | return makeFileName(dbname, number, "ldb") 13 | } 14 | 15 | func DescriptorFileName(dbname string, number uint64) string { 16 | return fmt.Sprintf("%s/MANIFEST-%06d", dbname, number) 17 | } 18 | 19 | func CurrentFileName(dbname string) string { 20 | return dbname + "/CURRENT" 21 | } 22 | func TempFileName(dbname string, number uint64) string { 23 | return makeFileName(dbname, number, "dbtmp") 24 | } 25 | -------------------------------------------------------------------------------- /sstable/block/block.go: -------------------------------------------------------------------------------- 1 | package block 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | 7 | "github.com/merlin82/leveldb/internal" 8 | ) 9 | 10 | type Block struct { 11 | items []internal.InternalKey 12 | } 13 | 14 | func New(p []byte) *Block { 15 | var block Block 16 | data := bytes.NewBuffer(p) 17 | counter := binary.LittleEndian.Uint32(p[len(p)-4:]) 18 | 19 | for i := uint32(0); i < counter; i++ { 20 | var item internal.InternalKey 21 | err := item.DecodeFrom(data) 22 | if err != nil { 23 | return nil 24 | } 25 | block.items = append(block.items, item) 26 | } 27 | 28 | return &block 29 | } 30 | 31 | func (block *Block) NewIterator() *Iterator { 32 | return &Iterator{block: block} 33 | } 34 | -------------------------------------------------------------------------------- /sstable/block/block_test.go: -------------------------------------------------------------------------------- 1 | package block 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/merlin82/leveldb/internal" 7 | ) 8 | 9 | func Test_SsTable(t *testing.T) { 10 | var builder BlockBuilder 11 | 12 | item := internal.NewInternalKey(1, internal.TypeValue, []byte("123"), []byte("1234")) 13 | builder.Add(item) 14 | item = internal.NewInternalKey(2, internal.TypeValue, []byte("124"), []byte("1245")) 15 | builder.Add(item) 16 | item = internal.NewInternalKey(3, internal.TypeValue, []byte("125"), []byte("0245")) 17 | builder.Add(item) 18 | p := builder.Finish() 19 | 20 | block := New(p) 21 | it := block.NewIterator() 22 | 23 | it.Seek([]byte("1244")) 24 | if it.Valid() { 25 | if string(it.InternalKey().UserKey) != "125" { 26 | t.Fail() 27 | } 28 | 29 | } else { 30 | t.Fail() 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /sstable/block/block_build.go: -------------------------------------------------------------------------------- 1 | package block 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | 7 | "github.com/merlin82/leveldb/internal" 8 | ) 9 | 10 | type BlockBuilder struct { 11 | buf bytes.Buffer 12 | counter uint32 13 | } 14 | 15 | func (blockBuilder *BlockBuilder) Reset() { 16 | blockBuilder.counter = 0 17 | blockBuilder.buf.Reset() 18 | } 19 | 20 | func (blockBuilder *BlockBuilder) Add(item *internal.InternalKey) error { 21 | blockBuilder.counter++ 22 | return item.EncodeTo(&blockBuilder.buf) 23 | } 24 | 25 | func (blockBuilder *BlockBuilder) Finish() []byte { 26 | binary.Write(&blockBuilder.buf, binary.LittleEndian, blockBuilder.counter) 27 | return blockBuilder.buf.Bytes() 28 | } 29 | 30 | func (blockBuilder *BlockBuilder) CurrentSizeEstimate() int { 31 | return blockBuilder.buf.Len() 32 | } 33 | 34 | func (blockBuilder *BlockBuilder) Empty() bool { 35 | return blockBuilder.buf.Len() == 0 36 | } 37 | -------------------------------------------------------------------------------- /sstable/sstable_test.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "fmt" 5 | 6 | "testing" 7 | 8 | "github.com/merlin82/leveldb/internal" 9 | ) 10 | 11 | func Test_SsTable(t *testing.T) { 12 | builder := NewTableBuilder("D:\\000123.ldb") 13 | item := internal.NewInternalKey(1, internal.TypeValue, []byte("123"), []byte("1234")) 14 | builder.Add(item) 15 | item = internal.NewInternalKey(2, internal.TypeValue, []byte("124"), []byte("1245")) 16 | builder.Add(item) 17 | item = internal.NewInternalKey(3, internal.TypeValue, []byte("125"), []byte("0245")) 18 | builder.Add(item) 19 | builder.Finish() 20 | 21 | table, err := Open("D:\\000123.ldb") 22 | fmt.Println(err) 23 | if err == nil { 24 | fmt.Println(table.index) 25 | fmt.Println(table.footer) 26 | } 27 | it := table.NewIterator() 28 | it.Seek([]byte("1244")) 29 | if it.Valid() { 30 | if string(it.InternalKey().UserKey) != "125" { 31 | t.Fail() 32 | } 33 | } else { 34 | t.Fail() 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /version/version_test.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/merlin82/leveldb/internal" 8 | "github.com/merlin82/leveldb/memtable" 9 | ) 10 | 11 | func Test_Version_Get(t *testing.T) { 12 | v := New("D:\\") 13 | var f FileMetaData 14 | f.number = 123 15 | f.smallest = internal.NewInternalKey(1, internal.TypeValue, []byte("123"), nil) 16 | f.largest = internal.NewInternalKey(1, internal.TypeValue, []byte("125"), nil) 17 | v.files[0] = append(v.files[0], &f) 18 | 19 | value, err := v.Get([]byte("125")) 20 | fmt.Println(err, value) 21 | } 22 | 23 | func Test_Version_Load(t *testing.T) { 24 | v := New("D:\\leveldbtest") 25 | memTable := memtable.New() 26 | memTable.Add(1234567, internal.TypeValue, []byte("aadsa34a"), []byte("bb23b3423")) 27 | v.WriteLevel0Table(memTable) 28 | n, _ := v.Save() 29 | fmt.Println(v) 30 | 31 | v2, _ := Load("D:\\leveldbtest", n) 32 | fmt.Println(v2) 33 | value, err := v2.Get([]byte("aadsa34a")) 34 | fmt.Println(err, value) 35 | } 36 | -------------------------------------------------------------------------------- /leveldb.go: -------------------------------------------------------------------------------- 1 | package leveldb 2 | 3 | import ( 4 | "github.com/merlin82/leveldb/db" 5 | ) 6 | 7 | type LevelDb interface { 8 | Put(key, value []byte) error 9 | Get(key []byte) ([]byte, error) 10 | Delete(key []byte) error 11 | } 12 | 13 | type Iterator interface { 14 | // Returns true iff the iterator is positioned at a valid node. 15 | Valid() bool 16 | 17 | // Returns the key at the current position. 18 | // REQUIRES: Valid() 19 | Key() []byte 20 | 21 | // Return the value for the current entry. The underlying storage for 22 | // the returned slice is valid only until the next modification of 23 | // the iterator. 24 | // REQUIRES: Valid() 25 | Value() []byte 26 | 27 | // Advances to the next position. 28 | // REQUIRES: Valid() 29 | Next() 30 | 31 | // Advances to the previous position. 32 | // REQUIRES: Valid() 33 | Prev() 34 | 35 | // Advance to the first entry with a key >= target 36 | Seek(target []byte) 37 | 38 | // Position at the first entry in list. 39 | // Final state of iterator is Valid() iff list is not empty. 40 | SeekToFirst() 41 | 42 | // Position at the last entry in list. 43 | // Final state of iterator is Valid() iff list is not empty. 44 | SeekToLast() 45 | } 46 | 47 | func Open(dbName string) LevelDb { 48 | return db.Open(dbName) 49 | } 50 | -------------------------------------------------------------------------------- /memtable/iterator.go: -------------------------------------------------------------------------------- 1 | package memtable 2 | 3 | import ( 4 | "github.com/merlin82/leveldb/internal" 5 | "github.com/merlin82/leveldb/skiplist" 6 | ) 7 | 8 | type Iterator struct { 9 | listIter *skiplist.Iterator 10 | } 11 | 12 | // Returns true iff the iterator is positioned at a valid node. 13 | func (it *Iterator) Valid() bool { 14 | return it.listIter.Valid() 15 | } 16 | 17 | func (it *Iterator) InternalKey() *internal.InternalKey { 18 | return it.listIter.Key().(*internal.InternalKey) 19 | } 20 | 21 | // Advances to the next position. 22 | // REQUIRES: Valid() 23 | func (it *Iterator) Next() { 24 | it.listIter.Next() 25 | } 26 | 27 | // Advances to the previous position. 28 | // REQUIRES: Valid() 29 | func (it *Iterator) Prev() { 30 | it.listIter.Prev() 31 | } 32 | 33 | // Advance to the first entry with a key >= target 34 | func (it *Iterator) Seek(target interface{}) { 35 | it.listIter.Seek(target) 36 | } 37 | 38 | // Position at the first entry in list. 39 | // Final state of iterator is Valid() iff list is not empty. 40 | func (it *Iterator) SeekToFirst() { 41 | it.listIter.SeekToFirst() 42 | } 43 | 44 | // Position at the last entry in list. 45 | // Final state of iterator is Valid() iff list is not empty. 46 | func (it *Iterator) SeekToLast() { 47 | it.listIter.SeekToLast() 48 | } 49 | -------------------------------------------------------------------------------- /db/db_test.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | var r = rand.New(rand.NewSource(time.Now().UnixNano())) 11 | 12 | //生成随机字符串 13 | func GetRandomString(lenth int) []byte { 14 | str := "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 15 | bytes := []byte(str) 16 | result := []byte{} 17 | 18 | for i := 0; i < lenth; i++ { 19 | result = append(result, bytes[r.Intn(len(bytes))]) 20 | } 21 | return result 22 | } 23 | 24 | func Test_Db(t *testing.T) { 25 | db := Open("D:\\leveldbtest") 26 | db.Put([]byte("123"), []byte("456")) 27 | 28 | value, err := db.Get([]byte("123")) 29 | fmt.Println(string(value)) 30 | 31 | db.Delete([]byte("123")) 32 | value, err = db.Get([]byte("123")) 33 | fmt.Println(err) 34 | 35 | db.Put([]byte("123"), []byte("789")) 36 | value, _ = db.Get([]byte("123")) 37 | fmt.Println(string(value)) 38 | db.Close() 39 | } 40 | 41 | func Test_Db2(t *testing.T) { 42 | db := Open("D:\\leveldbtest") 43 | db.Put([]byte("123"), []byte("456")) 44 | 45 | for i := 0; i < 1000000; i++ { 46 | db.Put(GetRandomString(10), GetRandomString(10)) 47 | } 48 | value, err := db.Get([]byte("123")) 49 | fmt.Println("db:", err, string(value)) 50 | db.Close() 51 | 52 | db2 := Open("D:\\leveldbtest") 53 | value, err = db2.Get([]byte("123")) 54 | fmt.Println("db reopen:", err, string(value)) 55 | db2.Close() 56 | } 57 | -------------------------------------------------------------------------------- /memtable/memtable.go: -------------------------------------------------------------------------------- 1 | package memtable 2 | 3 | import ( 4 | "github.com/merlin82/leveldb/internal" 5 | "github.com/merlin82/leveldb/skiplist" 6 | ) 7 | 8 | type MemTable struct { 9 | table *skiplist.SkipList 10 | memoryUsage uint64 11 | } 12 | 13 | func New() *MemTable { 14 | var memTable MemTable 15 | memTable.table = skiplist.New(internal.InternalKeyComparator) 16 | return &memTable 17 | } 18 | 19 | func (memTable *MemTable) NewIterator() *Iterator { 20 | return &Iterator{listIter: memTable.table.NewIterator()} 21 | } 22 | 23 | func (memTable *MemTable) Add(seq uint64, valueType internal.ValueType, key, value []byte) { 24 | internalKey := internal.NewInternalKey(seq, valueType, key, value) 25 | 26 | memTable.memoryUsage += uint64(16 + len(key) + len(value)) 27 | memTable.table.Insert(internalKey) 28 | } 29 | 30 | func (memTable *MemTable) Get(key []byte) ([]byte, error) { 31 | lookupKey := internal.LookupKey(key) 32 | 33 | it := memTable.table.NewIterator() 34 | it.Seek(lookupKey) 35 | if it.Valid() { 36 | internalKey := it.Key().(*internal.InternalKey) 37 | if internal.UserKeyComparator(key, internalKey.UserKey) == 0 { 38 | // 判断valueType 39 | if internalKey.Type == internal.TypeValue { 40 | return internalKey.UserValue, nil 41 | } else { 42 | return nil, internal.ErrDeletion 43 | } 44 | } 45 | } 46 | return nil, internal.ErrNotFound 47 | } 48 | 49 | func (memTable *MemTable) ApproximateMemoryUsage() uint64 { 50 | return memTable.memoryUsage 51 | } 52 | -------------------------------------------------------------------------------- /version/table_cache.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/hashicorp/golang-lru" 7 | "github.com/merlin82/leveldb/internal" 8 | "github.com/merlin82/leveldb/sstable" 9 | ) 10 | 11 | type TableCache struct { 12 | mu sync.Mutex 13 | dbName string 14 | cache *lru.Cache 15 | } 16 | 17 | func NewTableCache(dbName string) *TableCache { 18 | var tableCache TableCache 19 | tableCache.dbName = dbName 20 | tableCache.cache, _ = lru.New(internal.MaxOpenFiles - internal.NumNonTableCacheFiles) 21 | return &tableCache 22 | } 23 | 24 | func (tableCache *TableCache) NewIterator(fileNum uint64) *sstable.Iterator { 25 | table, _ := tableCache.findTable(fileNum) 26 | if table != nil { 27 | return table.NewIterator() 28 | } 29 | return nil 30 | } 31 | func (tableCache *TableCache) Get(fileNum uint64, key []byte) ([]byte, error) { 32 | table, err := tableCache.findTable(fileNum) 33 | if table != nil { 34 | return table.Get(key) 35 | } 36 | 37 | return nil, err 38 | } 39 | 40 | func (tableCache *TableCache) Evict(fileNum uint64) { 41 | tableCache.cache.Remove(fileNum) 42 | } 43 | 44 | func (tableCache *TableCache) findTable(fileNum uint64) (*sstable.SsTable, error) { 45 | tableCache.mu.Lock() 46 | defer tableCache.mu.Unlock() 47 | table, ok := tableCache.cache.Get(fileNum) 48 | if ok { 49 | return table.(*sstable.SsTable), nil 50 | } else { 51 | ssTable, err := sstable.Open(internal.TableFileName(tableCache.dbName, fileNum)) 52 | tableCache.cache.Add(fileNum, ssTable) 53 | return ssTable, err 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /db/bgwork.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "strconv" 8 | 9 | "github.com/merlin82/leveldb/internal" 10 | ) 11 | 12 | func (db *Db) maybeScheduleCompaction() { 13 | if db.bgCompactionScheduled { 14 | return 15 | } 16 | db.bgCompactionScheduled = true 17 | go db.backgroundCall() 18 | } 19 | 20 | func (db *Db) backgroundCall() { 21 | db.mu.Lock() 22 | defer db.mu.Unlock() 23 | db.backgroundCompaction() 24 | db.bgCompactionScheduled = false 25 | db.cond.Broadcast() 26 | } 27 | 28 | func (db *Db) backgroundCompaction() { 29 | imm := db.imm 30 | version := db.current.Copy() 31 | db.mu.Unlock() 32 | 33 | // minor compaction 34 | if imm != nil { 35 | version.WriteLevel0Table(imm) 36 | } 37 | // major compaction 38 | for version.DoCompactionWork() { 39 | version.Log() 40 | } 41 | descriptorNumber, _ := version.Save() 42 | db.SetCurrentFile(descriptorNumber) 43 | db.mu.Lock() 44 | db.imm = nil 45 | db.current = version 46 | } 47 | 48 | func (db *Db) SetCurrentFile(descriptorNumber uint64) { 49 | tmp := internal.TempFileName(db.name, descriptorNumber) 50 | ioutil.WriteFile(tmp, []byte(fmt.Sprintf("%d", descriptorNumber)), 0600) 51 | os.Rename(tmp, internal.CurrentFileName(db.name)) 52 | } 53 | 54 | func (db *Db) ReadCurrentFile() uint64 { 55 | b, err := ioutil.ReadFile(internal.CurrentFileName(db.name)) 56 | if err != nil { 57 | return 0 58 | } 59 | descriptorNumber, err := strconv.ParseUint(string(b), 10, 64) 60 | if err != nil { 61 | return 0 62 | } 63 | return descriptorNumber 64 | } 65 | -------------------------------------------------------------------------------- /version/merger_iterator.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "github.com/merlin82/leveldb/internal" 5 | "github.com/merlin82/leveldb/sstable" 6 | ) 7 | 8 | type MergingIterator struct { 9 | list []*sstable.Iterator 10 | current *sstable.Iterator 11 | } 12 | 13 | func NewMergingIterator(list []*sstable.Iterator) *MergingIterator { 14 | var iter MergingIterator 15 | iter.list = list 16 | return &iter 17 | } 18 | 19 | // Returns true iff the iterator is positioned at a valid node. 20 | func (it *MergingIterator) Valid() bool { 21 | return it.current != nil && it.current.Valid() 22 | } 23 | 24 | func (it *MergingIterator) InternalKey() *internal.InternalKey { 25 | return it.current.InternalKey() 26 | } 27 | 28 | // Advances to the next position. 29 | // REQUIRES: Valid() 30 | func (it *MergingIterator) Next() { 31 | if it.current != nil { 32 | it.current.Next() 33 | } 34 | it.findSmallest() 35 | } 36 | 37 | // Position at the first entry in list. 38 | // Final state of iterator is Valid() iff list is not empty. 39 | func (it *MergingIterator) SeekToFirst() { 40 | for i := 0; i < len(it.list); i++ { 41 | it.list[i].SeekToFirst() 42 | } 43 | it.findSmallest() 44 | } 45 | 46 | func (it *MergingIterator) findSmallest() { 47 | var smallest *sstable.Iterator = nil 48 | for i := 0; i < len(it.list); i++ { 49 | if it.list[i].Valid() { 50 | if smallest == nil { 51 | smallest = it.list[i] 52 | } else if internal.InternalKeyComparator(smallest.InternalKey(), it.list[i].InternalKey()) > 0 { 53 | smallest = it.list[i] 54 | } 55 | } 56 | } 57 | it.current = smallest 58 | } 59 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, merlin82 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /skiplist/iterator.go: -------------------------------------------------------------------------------- 1 | package skiplist 2 | 3 | type Iterator struct { 4 | list *SkipList 5 | node *Node 6 | } 7 | 8 | // Returns true iff the iterator is positioned at a valid node. 9 | func (it *Iterator) Valid() bool { 10 | return it.node != nil 11 | } 12 | 13 | // Returns the key at the current position. 14 | // REQUIRES: Valid() 15 | func (it *Iterator) Key() interface{} { 16 | return it.node.key 17 | } 18 | 19 | // Advances to the next position. 20 | // REQUIRES: Valid() 21 | func (it *Iterator) Next() { 22 | it.list.mu.RLock() 23 | defer it.list.mu.RUnlock() 24 | 25 | it.node = it.node.getNext(0) 26 | } 27 | 28 | // Advances to the previous position. 29 | // REQUIRES: Valid() 30 | func (it *Iterator) Prev() { 31 | it.list.mu.RLock() 32 | defer it.list.mu.RUnlock() 33 | 34 | it.node = it.list.findLessThan(it.node.key) 35 | if it.node == it.list.head { 36 | it.node = nil 37 | } 38 | } 39 | 40 | // Advance to the first entry with a key >= target 41 | func (it *Iterator) Seek(target interface{}) { 42 | it.list.mu.RLock() 43 | defer it.list.mu.RUnlock() 44 | 45 | it.node, _ = it.list.findGreaterOrEqual(target) 46 | } 47 | 48 | // Position at the first entry in list. 49 | // Final state of iterator is Valid() iff list is not empty. 50 | func (it *Iterator) SeekToFirst() { 51 | it.list.mu.RLock() 52 | defer it.list.mu.RUnlock() 53 | 54 | it.node = it.list.head.getNext(0) 55 | } 56 | 57 | // Position at the last entry in list. 58 | // Final state of iterator is Valid() iff list is not empty. 59 | func (it *Iterator) SeekToLast() { 60 | it.list.mu.RLock() 61 | defer it.list.mu.RUnlock() 62 | 63 | it.node = it.list.findlast() 64 | if it.node == it.list.head { 65 | it.node = nil 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /sstable/block/iterator.go: -------------------------------------------------------------------------------- 1 | package block 2 | 3 | import ( 4 | "github.com/merlin82/leveldb/internal" 5 | ) 6 | 7 | type Iterator struct { 8 | block *Block 9 | index int 10 | } 11 | 12 | // Returns true iff the iterator is positioned at a valid node. 13 | func (it *Iterator) Valid() bool { 14 | return it.index >= 0 && it.index < len(it.block.items) 15 | } 16 | 17 | func (it *Iterator) InternalKey() *internal.InternalKey { 18 | return &it.block.items[it.index] 19 | } 20 | 21 | // Advances to the next position. 22 | // REQUIRES: Valid() 23 | func (it *Iterator) Next() { 24 | it.index++ 25 | } 26 | 27 | // Advances to the previous position. 28 | // REQUIRES: Valid() 29 | func (it *Iterator) Prev() { 30 | it.index-- 31 | } 32 | 33 | // Advance to the first entry with a key >= target 34 | func (it *Iterator) Seek(target interface{}) { 35 | // 二分法查询 36 | left := 0 37 | right := len(it.block.items) - 1 38 | for left < right { 39 | mid := (left + right) / 2 40 | if internal.UserKeyComparator(it.block.items[mid].UserKey, target) < 0 { 41 | left = mid + 1 42 | } else { 43 | right = mid 44 | } 45 | } 46 | if left == len(it.block.items)-1 { 47 | if internal.UserKeyComparator(it.block.items[left].UserKey, target) < 0 { 48 | // not found 49 | left++ 50 | } 51 | } 52 | it.index = left 53 | } 54 | 55 | // Position at the first entry in list. 56 | // Final state of iterator is Valid() iff list is not empty. 57 | func (it *Iterator) SeekToFirst() { 58 | it.index = 0 59 | } 60 | 61 | // Position at the last entry in list. 62 | // Final state of iterator is Valid() iff list is not empty. 63 | func (it *Iterator) SeekToLast() { 64 | if len(it.block.items) > 0 { 65 | it.index = len(it.block.items) - 1 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /sstable/sstable.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "io" 5 | "os" 6 | 7 | "github.com/merlin82/leveldb/internal" 8 | "github.com/merlin82/leveldb/sstable/block" 9 | ) 10 | 11 | type SsTable struct { 12 | index *block.Block 13 | footer Footer 14 | file *os.File 15 | } 16 | 17 | func Open(fileName string) (*SsTable, error) { 18 | var table SsTable 19 | var err error 20 | table.file, err = os.Open(fileName) 21 | if err != nil { 22 | return nil, err 23 | } 24 | stat, _ := table.file.Stat() 25 | // Read the footer block 26 | footerSize := int64(table.footer.Size()) 27 | if stat.Size() < footerSize { 28 | return nil, internal.ErrTableFileTooShort 29 | } 30 | 31 | _, err = table.file.Seek(-footerSize, io.SeekEnd) 32 | if err != nil { 33 | return nil, err 34 | } 35 | err = table.footer.DecodeFrom(table.file) 36 | if err != nil { 37 | return nil, err 38 | } 39 | // Read the index block 40 | table.index = table.readBlock(table.footer.IndexHandle) 41 | return &table, nil 42 | } 43 | 44 | func (table *SsTable) NewIterator() *Iterator { 45 | var it Iterator 46 | it.table = table 47 | it.indexIter = table.index.NewIterator() 48 | return &it 49 | } 50 | 51 | func (table *SsTable) Get(key []byte) ([]byte, error) { 52 | it := table.NewIterator() 53 | it.Seek(key) 54 | if it.Valid() { 55 | internalKey := it.InternalKey() 56 | if internal.UserKeyComparator(key, internalKey.UserKey) == 0 { 57 | // 判断valueType 58 | if internalKey.Type == internal.TypeValue { 59 | return internalKey.UserValue, nil 60 | } else { 61 | return nil, internal.ErrDeletion 62 | } 63 | } 64 | } 65 | return nil, internal.ErrNotFound 66 | } 67 | 68 | func (table *SsTable) readBlock(blockHandle BlockHandle) *block.Block { 69 | p := make([]byte, blockHandle.Size) 70 | n, err := table.file.ReadAt(p, int64(blockHandle.Offset)) 71 | if err != nil || uint32(n) != blockHandle.Size { 72 | return nil 73 | } 74 | 75 | return block.New(p) 76 | } 77 | -------------------------------------------------------------------------------- /sstable/block_handle.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | 7 | "github.com/merlin82/leveldb/internal" 8 | ) 9 | 10 | const ( 11 | kTableMagicNumber uint64 = 0xdb4775248b80fb57 12 | ) 13 | 14 | type BlockHandle struct { 15 | Offset uint32 16 | Size uint32 17 | } 18 | 19 | func (blockHandle *BlockHandle) EncodeToBytes() []byte { 20 | p := make([]byte, 8) 21 | binary.LittleEndian.PutUint32(p, blockHandle.Offset) 22 | binary.LittleEndian.PutUint32(p[4:], blockHandle.Size) 23 | return p 24 | } 25 | 26 | func (blockHandle *BlockHandle) DecodeFromBytes(p []byte) { 27 | if len(p) == 8 { 28 | blockHandle.Offset = binary.LittleEndian.Uint32(p) 29 | blockHandle.Size = binary.LittleEndian.Uint32(p[4:]) 30 | } 31 | } 32 | 33 | type IndexBlockHandle struct { 34 | *internal.InternalKey 35 | } 36 | 37 | func (index *IndexBlockHandle) SetBlockHandle(blockHandle BlockHandle) { 38 | index.UserValue = blockHandle.EncodeToBytes() 39 | } 40 | 41 | func (index *IndexBlockHandle) GetBlockHandle() (blockHandle BlockHandle) { 42 | blockHandle.DecodeFromBytes(index.UserValue) 43 | return 44 | } 45 | 46 | type Footer struct { 47 | MetaIndexHandle BlockHandle 48 | IndexHandle BlockHandle 49 | } 50 | 51 | func (footer *Footer) Size() int { 52 | // add magic size 53 | return binary.Size(footer) + 8 54 | } 55 | 56 | func (footer *Footer) EncodeTo(w io.Writer) error { 57 | err := binary.Write(w, binary.LittleEndian, footer) 58 | if err != nil { 59 | return err 60 | } 61 | err = binary.Write(w, binary.LittleEndian, kTableMagicNumber) 62 | return err 63 | } 64 | 65 | func (footer *Footer) DecodeFrom(r io.Reader) error { 66 | err := binary.Read(r, binary.LittleEndian, footer) 67 | if err != nil { 68 | return err 69 | } 70 | var magic uint64 71 | err = binary.Read(r, binary.LittleEndian, &magic) 72 | if err != nil { 73 | return err 74 | } 75 | if magic != kTableMagicNumber { 76 | return internal.ErrTableFileMagic 77 | } 78 | return nil 79 | } 80 | -------------------------------------------------------------------------------- /internal/internal_key.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "io" 7 | "math" 8 | ) 9 | 10 | type ValueType int8 11 | 12 | const ( 13 | TypeDeletion ValueType = 0 14 | TypeValue ValueType = 1 15 | ) 16 | 17 | type InternalKey struct { 18 | Seq uint64 19 | Type ValueType 20 | UserKey []byte 21 | UserValue []byte 22 | } 23 | 24 | func NewInternalKey(seq uint64, valueType ValueType, key, value []byte) *InternalKey { 25 | var internalKey InternalKey 26 | internalKey.Seq = seq 27 | internalKey.Type = valueType 28 | 29 | internalKey.UserKey = make([]byte, len(key)) 30 | copy(internalKey.UserKey, key) 31 | internalKey.UserValue = make([]byte, len(value)) 32 | copy(internalKey.UserValue, value) 33 | 34 | return &internalKey 35 | } 36 | 37 | func (key *InternalKey) EncodeTo(w io.Writer) error { 38 | binary.Write(w, binary.LittleEndian, key.Seq) 39 | binary.Write(w, binary.LittleEndian, key.Type) 40 | binary.Write(w, binary.LittleEndian, int32(len(key.UserKey))) 41 | binary.Write(w, binary.LittleEndian, key.UserKey) 42 | binary.Write(w, binary.LittleEndian, int32(len(key.UserValue))) 43 | return binary.Write(w, binary.LittleEndian, key.UserValue) 44 | } 45 | 46 | func (key *InternalKey) DecodeFrom(r io.Reader) error { 47 | var tmp int32 48 | binary.Read(r, binary.LittleEndian, &key.Seq) 49 | binary.Read(r, binary.LittleEndian, &key.Type) 50 | binary.Read(r, binary.LittleEndian, &tmp) 51 | key.UserKey = make([]byte, tmp) 52 | binary.Read(r, binary.LittleEndian, key.UserKey) 53 | binary.Read(r, binary.LittleEndian, &tmp) 54 | key.UserValue = make([]byte, tmp) 55 | return binary.Read(r, binary.LittleEndian, key.UserValue) 56 | } 57 | 58 | func LookupKey(key []byte) *InternalKey { 59 | return NewInternalKey(math.MaxUint64, TypeValue, key, nil) 60 | } 61 | 62 | func InternalKeyComparator(a, b interface{}) int { 63 | // Order by: 64 | // increasing user key (according to user-supplied comparator) 65 | // decreasing sequence number 66 | // decreasing type (though sequence# should be enough to disambiguate) 67 | aKey := a.(*InternalKey) 68 | bKey := b.(*InternalKey) 69 | r := UserKeyComparator(aKey.UserKey, bKey.UserKey) 70 | if r == 0 { 71 | anum := aKey.Seq 72 | bnum := bKey.Seq 73 | if anum > bnum { 74 | r = -1 75 | } else if anum < bnum { 76 | r = +1 77 | } 78 | } 79 | return r 80 | } 81 | 82 | func UserKeyComparator(a, b interface{}) int { 83 | aKey := a.([]byte) 84 | bKey := b.([]byte) 85 | return bytes.Compare(aKey, bKey) 86 | } 87 | -------------------------------------------------------------------------------- /sstable/table_build.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/merlin82/leveldb/internal" 7 | "github.com/merlin82/leveldb/sstable/block" 8 | ) 9 | 10 | const ( 11 | MAX_BLOCK_SIZE = 4 * 1024 12 | ) 13 | 14 | type TableBuilder struct { 15 | file *os.File 16 | offset uint32 17 | numEntries int32 18 | dataBlockBuilder block.BlockBuilder 19 | indexBlockBuilder block.BlockBuilder 20 | pendingIndexEntry bool 21 | pendingIndexHandle IndexBlockHandle 22 | status error 23 | } 24 | 25 | func NewTableBuilder(fileName string) *TableBuilder { 26 | var builder TableBuilder 27 | var err error 28 | builder.file, err = os.Create(fileName) 29 | if err != nil { 30 | return nil 31 | } 32 | builder.pendingIndexEntry = false 33 | return &builder 34 | } 35 | 36 | func (builder *TableBuilder) FileSize() uint32 { 37 | return builder.offset 38 | } 39 | 40 | func (builder *TableBuilder) Add(internalKey *internal.InternalKey) { 41 | if builder.status != nil { 42 | return 43 | } 44 | if builder.pendingIndexEntry { 45 | builder.indexBlockBuilder.Add(builder.pendingIndexHandle.InternalKey) 46 | builder.pendingIndexEntry = false 47 | } 48 | // todo : filter block 49 | 50 | builder.pendingIndexHandle.InternalKey = internalKey 51 | 52 | builder.numEntries++ 53 | builder.dataBlockBuilder.Add(internalKey) 54 | if builder.dataBlockBuilder.CurrentSizeEstimate() > MAX_BLOCK_SIZE { 55 | builder.flush() 56 | } 57 | } 58 | func (builder *TableBuilder) flush() { 59 | if builder.dataBlockBuilder.Empty() { 60 | return 61 | } 62 | orgKey := builder.pendingIndexHandle.InternalKey 63 | builder.pendingIndexHandle.InternalKey = internal.NewInternalKey(orgKey.Seq, orgKey.Type, orgKey.UserKey, nil) 64 | builder.pendingIndexHandle.SetBlockHandle(builder.writeblock(&builder.dataBlockBuilder)) 65 | builder.pendingIndexEntry = true 66 | } 67 | 68 | func (builder *TableBuilder) Finish() error { 69 | // write data block 70 | builder.flush() 71 | // todo : filter block 72 | 73 | // write index block 74 | if builder.pendingIndexEntry { 75 | builder.indexBlockBuilder.Add(builder.pendingIndexHandle.InternalKey) 76 | builder.pendingIndexEntry = false 77 | } 78 | var footer Footer 79 | footer.IndexHandle = builder.writeblock(&builder.indexBlockBuilder) 80 | 81 | // write footer block 82 | footer.EncodeTo(builder.file) 83 | builder.file.Close() 84 | return nil 85 | } 86 | 87 | func (builder *TableBuilder) writeblock(blockBuilder *block.BlockBuilder) BlockHandle { 88 | content := blockBuilder.Finish() 89 | // todo : compress, crc 90 | var blockHandle BlockHandle 91 | blockHandle.Offset = builder.offset 92 | blockHandle.Size = uint32(len(content)) 93 | builder.offset += uint32(len(content)) 94 | _, builder.status = builder.file.Write(content) 95 | builder.file.Sync() 96 | blockBuilder.Reset() 97 | return blockHandle 98 | } 99 | -------------------------------------------------------------------------------- /db/db.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "sync" 5 | 6 | "time" 7 | 8 | "github.com/merlin82/leveldb/internal" 9 | "github.com/merlin82/leveldb/memtable" 10 | "github.com/merlin82/leveldb/version" 11 | ) 12 | 13 | type Db struct { 14 | name string 15 | mu sync.Mutex 16 | cond *sync.Cond 17 | mem *memtable.MemTable 18 | imm *memtable.MemTable 19 | current *version.Version 20 | bgCompactionScheduled bool 21 | } 22 | 23 | func Open(dbName string) *Db { 24 | var db Db 25 | db.name = dbName 26 | db.mem = memtable.New() 27 | db.imm = nil 28 | db.bgCompactionScheduled = false 29 | db.cond = sync.NewCond(&db.mu) 30 | num := db.ReadCurrentFile() 31 | if num > 0 { 32 | v, err := version.Load(dbName, num) 33 | if err != nil { 34 | return nil 35 | } 36 | db.current = v 37 | } else { 38 | db.current = version.New(dbName) 39 | } 40 | 41 | return &db 42 | } 43 | 44 | func (db *Db) Close() { 45 | db.mu.Lock() 46 | for db.bgCompactionScheduled { 47 | db.cond.Wait() 48 | } 49 | db.mu.Unlock() 50 | } 51 | 52 | func (db *Db) Put(key, value []byte) error { 53 | // May temporarily unlock and wait. 54 | seq, err := db.makeRoomForWrite() 55 | if err != nil { 56 | return err 57 | } 58 | 59 | // todo : add log 60 | 61 | db.mem.Add(seq, internal.TypeValue, key, value) 62 | return nil 63 | } 64 | 65 | func (db *Db) Get(key []byte) ([]byte, error) { 66 | db.mu.Lock() 67 | mem := db.mem 68 | imm := db.imm 69 | current := db.current 70 | db.mu.Unlock() 71 | value, err := mem.Get(key) 72 | if err != internal.ErrNotFound { 73 | return value, err 74 | } 75 | 76 | if imm != nil { 77 | value, err := imm.Get(key) 78 | if err != internal.ErrNotFound { 79 | return value, err 80 | } 81 | } 82 | 83 | value, err = current.Get(key) 84 | return value, err 85 | } 86 | 87 | func (db *Db) Delete(key []byte) error { 88 | seq, err := db.makeRoomForWrite() 89 | if err != nil { 90 | return err 91 | } 92 | db.mem.Add(seq, internal.TypeDeletion, key, nil) 93 | return nil 94 | } 95 | 96 | func (db *Db) makeRoomForWrite() (uint64, error) { 97 | db.mu.Lock() 98 | defer db.mu.Unlock() 99 | 100 | for true { 101 | if db.current.NumLevelFiles(0) >= internal.L0_SlowdownWritesTrigger { 102 | db.mu.Unlock() 103 | time.Sleep(time.Duration(1000) * time.Microsecond) 104 | db.mu.Lock() 105 | } else if db.mem.ApproximateMemoryUsage() <= internal.Write_buffer_size { 106 | return db.current.NextSeq(), nil 107 | } else if db.imm != nil { 108 | // Current memtable full; waiting 109 | db.cond.Wait() 110 | } else { 111 | // Attempt to switch to a new memtable and trigger compaction of old 112 | // todo : switch log 113 | db.imm = db.mem 114 | db.mem = memtable.New() 115 | db.maybeScheduleCompaction() 116 | } 117 | } 118 | 119 | return db.current.NextSeq(), nil 120 | } 121 | -------------------------------------------------------------------------------- /skiplist/skiplist.go: -------------------------------------------------------------------------------- 1 | package skiplist 2 | 3 | import ( 4 | "math/rand" 5 | "sync" 6 | 7 | "github.com/merlin82/leveldb/utils" 8 | ) 9 | 10 | const ( 11 | kMaxHeight = 12 12 | kBranching = 4 13 | ) 14 | 15 | type SkipList struct { 16 | maxHeight int 17 | head *Node 18 | comparator utils.Comparator 19 | mu sync.RWMutex 20 | } 21 | 22 | func New(comp utils.Comparator) *SkipList { 23 | var skiplist SkipList 24 | skiplist.head = newNode(nil, kMaxHeight) 25 | skiplist.maxHeight = 1 26 | skiplist.comparator = comp 27 | return &skiplist 28 | } 29 | 30 | func (list *SkipList) Insert(key interface{}) { 31 | list.mu.Lock() 32 | defer list.mu.Unlock() 33 | 34 | _, prev := list.findGreaterOrEqual(key) 35 | height := list.randomHeight() 36 | if height > list.maxHeight { 37 | for i := list.maxHeight; i < height; i++ { 38 | prev[i] = list.head 39 | } 40 | list.maxHeight = height 41 | } 42 | x := newNode(key, height) 43 | for i := 0; i < height; i++ { 44 | x.setNext(i, prev[i].getNext(i)) 45 | prev[i].setNext(i, x) 46 | } 47 | } 48 | 49 | func (list *SkipList) Contains(key interface{}) bool { 50 | list.mu.RLock() 51 | defer list.mu.RUnlock() 52 | x, _ := list.findGreaterOrEqual(key) 53 | if x != nil && list.comparator(x.key, key) == 0 { 54 | return true 55 | } 56 | return false 57 | } 58 | 59 | func (list *SkipList) NewIterator() *Iterator { 60 | var it Iterator 61 | it.list = list 62 | return &it 63 | } 64 | 65 | func (list *SkipList) randomHeight() int { 66 | height := 1 67 | for height < kMaxHeight && (rand.Intn(kBranching) == 0) { 68 | height++ 69 | } 70 | return height 71 | } 72 | 73 | func (list *SkipList) findGreaterOrEqual(key interface{}) (*Node, [kMaxHeight]*Node) { 74 | var prev [kMaxHeight]*Node 75 | x := list.head 76 | level := list.maxHeight - 1 77 | for true { 78 | next := x.getNext(level) 79 | if list.keyIsAfterNode(key, next) { 80 | x = next 81 | } else { 82 | prev[level] = x 83 | if level == 0 { 84 | return next, prev 85 | } else { 86 | // Switch to next list 87 | level-- 88 | } 89 | } 90 | } 91 | return nil, prev 92 | } 93 | 94 | func (list *SkipList) findLessThan(key interface{}) *Node { 95 | x := list.head 96 | level := list.maxHeight - 1 97 | for true { 98 | next := x.getNext(level) 99 | if next == nil || list.comparator(next.key, key) >= 0 { 100 | if level == 0 { 101 | return x 102 | } else { 103 | level-- 104 | } 105 | } else { 106 | x = next 107 | } 108 | } 109 | return nil 110 | } 111 | func (list *SkipList) findlast() *Node { 112 | x := list.head 113 | level := list.maxHeight - 1 114 | for true { 115 | next := x.getNext(level) 116 | if next == nil { 117 | if level == 0 { 118 | return x 119 | } else { 120 | level-- 121 | } 122 | } else { 123 | x = next 124 | } 125 | } 126 | return nil 127 | } 128 | 129 | func (list *SkipList) keyIsAfterNode(key interface{}, n *Node) bool { 130 | return (n != nil) && (list.comparator(n.key, key) < 0) 131 | } 132 | -------------------------------------------------------------------------------- /sstable/iterator.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "github.com/merlin82/leveldb/internal" 5 | "github.com/merlin82/leveldb/sstable/block" 6 | ) 7 | 8 | type Iterator struct { 9 | table *SsTable 10 | dataBlockHandle BlockHandle 11 | dataIter *block.Iterator 12 | indexIter *block.Iterator 13 | } 14 | 15 | // Returns true iff the iterator is positioned at a valid node. 16 | func (it *Iterator) Valid() bool { 17 | return it.dataIter != nil && it.dataIter.Valid() 18 | } 19 | 20 | func (it *Iterator) InternalKey() *internal.InternalKey { 21 | return it.dataIter.InternalKey() 22 | } 23 | 24 | func (it *Iterator) Key() []byte { 25 | return it.InternalKey().UserKey 26 | } 27 | 28 | func (it *Iterator) Value() []byte { 29 | return it.InternalKey().UserValue 30 | } 31 | 32 | // Advances to the next position. 33 | // REQUIRES: Valid() 34 | func (it *Iterator) Next() { 35 | it.dataIter.Next() 36 | it.skipEmptyDataBlocksForward() 37 | } 38 | 39 | // Advances to the previous position. 40 | // REQUIRES: Valid() 41 | func (it *Iterator) Prev() { 42 | it.dataIter.Prev() 43 | it.skipEmptyDataBlocksBackward() 44 | } 45 | 46 | // Advance to the first entry with a key >= target 47 | func (it *Iterator) Seek(target []byte) { 48 | // Index Block的block_data字段中,每一条记录的key都满足: 49 | // 大于等于Data Block的所有key,并且小于后面所有Data Block的key 50 | // 因为Seek是查找key>=target的第一条记录,所以当index_iter_找到时, 51 | // 该index_inter_对应的data_iter_所管理的Data Block中所有记录的 52 | // key都小于等于target,如果需要在下一个Data Block中seek,而下一个Data Block 53 | // 中的第一条记录就满足key>=target 54 | 55 | it.indexIter.Seek(target) 56 | it.initDataBlock() 57 | if it.dataIter != nil { 58 | 59 | it.dataIter.Seek(target) 60 | } 61 | it.skipEmptyDataBlocksForward() 62 | } 63 | 64 | // Position at the first entry in list. 65 | // Final state of iterator is Valid() iff list is not empty. 66 | func (it *Iterator) SeekToFirst() { 67 | it.indexIter.SeekToFirst() 68 | it.initDataBlock() 69 | if it.dataIter != nil { 70 | it.dataIter.SeekToFirst() 71 | } 72 | it.skipEmptyDataBlocksForward() 73 | } 74 | 75 | // Position at the last entry in list. 76 | // Final state of iterator is Valid() iff list is not empty. 77 | func (it *Iterator) SeekToLast() { 78 | it.indexIter.SeekToLast() 79 | it.initDataBlock() 80 | if it.dataIter != nil { 81 | it.dataIter.SeekToLast() 82 | } 83 | it.skipEmptyDataBlocksBackward() 84 | } 85 | 86 | func (it *Iterator) initDataBlock() { 87 | if !it.indexIter.Valid() { 88 | it.dataIter = nil 89 | } else { 90 | var index IndexBlockHandle 91 | index.InternalKey = it.indexIter.InternalKey() 92 | tmpBlockHandle := index.GetBlockHandle() 93 | 94 | if it.dataIter != nil && it.dataBlockHandle == tmpBlockHandle { 95 | // data_iter_ is already constructed with this iterator, so 96 | // no need to change anything 97 | } else { 98 | it.dataIter = it.table.readBlock(tmpBlockHandle).NewIterator() 99 | it.dataBlockHandle = tmpBlockHandle 100 | } 101 | } 102 | } 103 | 104 | func (it *Iterator) skipEmptyDataBlocksForward() { 105 | for it.dataIter == nil || !it.dataIter.Valid() { 106 | if !it.indexIter.Valid() { 107 | it.dataIter = nil 108 | return 109 | } 110 | it.indexIter.Next() 111 | it.initDataBlock() 112 | if it.dataIter != nil { 113 | it.dataIter.SeekToFirst() 114 | } 115 | } 116 | } 117 | 118 | func (it *Iterator) skipEmptyDataBlocksBackward() { 119 | for it.dataIter == nil || !it.dataIter.Valid() { 120 | if !it.indexIter.Valid() { 121 | it.dataIter = nil 122 | return 123 | } 124 | it.indexIter.Prev() 125 | it.initDataBlock() 126 | if it.dataIter != nil { 127 | it.dataIter.SeekToLast() 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "sort" 7 | 8 | "github.com/merlin82/leveldb/internal" 9 | ) 10 | 11 | type FileMetaData struct { 12 | allowSeeks uint64 13 | number uint64 14 | fileSize uint64 15 | smallest *internal.InternalKey 16 | largest *internal.InternalKey 17 | } 18 | 19 | type Version struct { 20 | tableCache *TableCache 21 | nextFileNumber uint64 22 | seq uint64 23 | files [internal.NumLevels][]*FileMetaData 24 | // Per-level key at which the next compaction at that level should start. 25 | // Either an empty string, or a valid InternalKey. 26 | compactPointer [internal.NumLevels]*internal.InternalKey 27 | } 28 | 29 | func New(dbName string) *Version { 30 | var v Version 31 | v.tableCache = NewTableCache(dbName) 32 | v.nextFileNumber = 1 33 | return &v 34 | } 35 | 36 | func Load(dbName string, number uint64) (*Version, error) { 37 | fileName := internal.DescriptorFileName(dbName, number) 38 | file, err := os.Open(fileName) 39 | if err != nil { 40 | return nil, err 41 | } 42 | defer file.Close() 43 | v := New(dbName) 44 | return v, v.DecodeFrom(file) 45 | } 46 | 47 | func (v *Version) Save() (uint64, error) { 48 | tmp := v.nextFileNumber 49 | fileName := internal.DescriptorFileName(v.tableCache.dbName, v.nextFileNumber) 50 | v.nextFileNumber++ 51 | file, err := os.Create(fileName) 52 | if err != nil { 53 | return tmp, err 54 | } 55 | defer file.Close() 56 | return tmp, v.EncodeTo(file) 57 | } 58 | func (v *Version) Log() { 59 | for level := 0; level < internal.NumLevels; level++ { 60 | for i := 0; i < len(v.files[level]); i++ { 61 | log.Printf("version[%d]: %d", level, v.files[level][i].number) 62 | } 63 | } 64 | } 65 | func (v *Version) Copy() *Version { 66 | var c Version 67 | 68 | c.tableCache = v.tableCache 69 | c.nextFileNumber = v.nextFileNumber 70 | c.seq = v.seq 71 | for level := 0; level < internal.NumLevels; level++ { 72 | c.files[level] = make([]*FileMetaData, len(v.files[level])) 73 | copy(c.files[level], v.files[level]) 74 | } 75 | return &c 76 | } 77 | func (v *Version) NextSeq() uint64 { 78 | v.seq++ 79 | return v.seq 80 | } 81 | 82 | func (v *Version) NumLevelFiles(l int) int { 83 | return len(v.files[l]) 84 | } 85 | 86 | func (v *Version) Get(key []byte) ([]byte, error) { 87 | var tmp []*FileMetaData 88 | var tmp2 [1]*FileMetaData 89 | var files []*FileMetaData 90 | // We can search level-by-level since entries never hop across 91 | // levels. Therefore we are guaranteed that if we find data 92 | // in an smaller level, later levels are irrelevant. 93 | for level := 0; level < internal.NumLevels; level++ { 94 | numFiles := len(v.files[level]) 95 | if numFiles == 0 { 96 | continue 97 | } 98 | if level == 0 { 99 | // Level-0 files may overlap each other. Find all files that 100 | // overlap user_key and process them in order from newest to oldest. 101 | for i := 0; i < numFiles; i++ { 102 | f := v.files[level][i] 103 | if internal.UserKeyComparator(key, f.smallest.UserKey) >= 0 && internal.UserKeyComparator(key, f.largest.UserKey) <= 0 { 104 | tmp = append(tmp, f) 105 | } 106 | } 107 | if len(tmp) == 0 { 108 | continue 109 | } 110 | sort.Slice(tmp, func(i, j int) bool { return tmp[i].number > tmp[j].number }) 111 | numFiles = len(tmp) 112 | files = tmp 113 | } else { 114 | index := v.findFile(v.files[level], key) 115 | if index >= numFiles { 116 | files = nil 117 | numFiles = 0 118 | } else { 119 | tmp2[0] = v.files[level][index] 120 | if internal.UserKeyComparator(key, tmp2[0].smallest.UserKey) < 0 { 121 | files = nil 122 | numFiles = 0 123 | } else { 124 | files = tmp2[:] 125 | numFiles = 1 126 | } 127 | } 128 | } 129 | for i := 0; i < numFiles; i++ { 130 | f := files[i] 131 | value, err := v.tableCache.Get(f.number, key) 132 | if err != internal.ErrNotFound { 133 | return value, err 134 | } 135 | } 136 | } 137 | return nil, internal.ErrNotFound 138 | } 139 | 140 | func (v *Version) findFile(files []*FileMetaData, key []byte) int { 141 | left := 0 142 | right := len(files) 143 | for left < right { 144 | mid := (left + right) / 2 145 | f := files[mid] 146 | if internal.UserKeyComparator(f.largest.UserKey, key) < 0 { 147 | // Key at "mid.largest" is < "target". Therefore all 148 | // files at or before "mid" are uninteresting. 149 | left = mid + 1 150 | } else { 151 | // Key at "mid.largest" is >= "target". Therefore all files 152 | // after "mid" are uninteresting. 153 | right = mid 154 | } 155 | } 156 | return right 157 | } 158 | -------------------------------------------------------------------------------- /version/compaction.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | "log" 7 | 8 | "github.com/merlin82/leveldb/internal" 9 | "github.com/merlin82/leveldb/memtable" 10 | "github.com/merlin82/leveldb/sstable" 11 | ) 12 | 13 | type Compaction struct { 14 | level int 15 | inputs [2][]*FileMetaData 16 | } 17 | 18 | func (c *Compaction) isTrivialMove() bool { 19 | return len(c.inputs[0]) == 1 && len(c.inputs[1]) == 0 20 | } 21 | 22 | func (c *Compaction) Log() { 23 | log.Printf("Compaction, level:%d", c.level) 24 | for i := 0; i < len(c.inputs[0]); i++ { 25 | log.Printf("inputs[0]: %d", c.inputs[0][i].number) 26 | } 27 | for i := 0; i < len(c.inputs[1]); i++ { 28 | log.Printf("inputs[1]: %d", c.inputs[1][i].number) 29 | } 30 | } 31 | 32 | func (meta *FileMetaData) EncodeTo(w io.Writer) error { 33 | binary.Write(w, binary.LittleEndian, meta.allowSeeks) 34 | binary.Write(w, binary.LittleEndian, meta.fileSize) 35 | binary.Write(w, binary.LittleEndian, meta.number) 36 | meta.smallest.EncodeTo(w) 37 | meta.largest.EncodeTo(w) 38 | return nil 39 | } 40 | 41 | func (meta *FileMetaData) DecodeFrom(r io.Reader) error { 42 | binary.Read(r, binary.LittleEndian, &meta.allowSeeks) 43 | binary.Read(r, binary.LittleEndian, &meta.fileSize) 44 | binary.Read(r, binary.LittleEndian, &meta.number) 45 | meta.smallest = new(internal.InternalKey) 46 | meta.smallest.DecodeFrom(r) 47 | meta.largest = new(internal.InternalKey) 48 | meta.largest.DecodeFrom(r) 49 | return nil 50 | } 51 | 52 | func (v *Version) EncodeTo(w io.Writer) error { 53 | binary.Write(w, binary.LittleEndian, v.nextFileNumber) 54 | binary.Write(w, binary.LittleEndian, v.seq) 55 | for level := 0; level < internal.NumLevels; level++ { 56 | numFiles := len(v.files[level]) 57 | binary.Write(w, binary.LittleEndian, int32(numFiles)) 58 | 59 | for i := 0; i < numFiles; i++ { 60 | v.files[level][i].EncodeTo(w) 61 | } 62 | } 63 | return nil 64 | } 65 | 66 | func (v *Version) DecodeFrom(r io.Reader) error { 67 | binary.Read(r, binary.LittleEndian, &v.nextFileNumber) 68 | binary.Read(r, binary.LittleEndian, &v.seq) 69 | var numFiles int32 70 | for level := 0; level < internal.NumLevels; level++ { 71 | binary.Read(r, binary.LittleEndian, &numFiles) 72 | v.files[level] = make([]*FileMetaData, numFiles) 73 | for i := 0; i < int(numFiles); i++ { 74 | var meta FileMetaData 75 | meta.DecodeFrom(r) 76 | v.files[level][i] = &meta 77 | } 78 | } 79 | return nil 80 | } 81 | 82 | func (v *Version) deleteFile(level int, meta *FileMetaData) { 83 | numFiles := len(v.files[level]) 84 | for i := 0; i < numFiles; i++ { 85 | if v.files[level][i].number == meta.number { 86 | v.files[level] = append(v.files[level][:i], v.files[level][i+1:]...) 87 | log.Printf("deleteFile, level:%d, num:%d", level, meta.number) 88 | break 89 | } 90 | } 91 | } 92 | 93 | func (v *Version) addFile(level int, meta *FileMetaData) { 94 | log.Printf("addFile, level:%d, num:%d, %s-%s", level, meta.number, string(meta.smallest.UserKey), string(meta.largest.UserKey)) 95 | if level == 0 { 96 | // 0层没有排序 97 | v.files[level] = append(v.files[level], meta) 98 | } else { 99 | numFiles := len(v.files[level]) 100 | index := v.findFile(v.files[level], meta.smallest.UserKey) 101 | if index >= numFiles { 102 | v.files[level] = append(v.files[level], meta) 103 | } else { 104 | var tmp []*FileMetaData 105 | tmp = append(tmp, v.files[level][:index]...) 106 | tmp = append(tmp, meta) 107 | v.files[level] = append(tmp, v.files[level][index:]...) 108 | } 109 | } 110 | } 111 | 112 | func (v *Version) WriteLevel0Table(imm *memtable.MemTable) { 113 | var meta FileMetaData 114 | meta.allowSeeks = 1 << 30 115 | meta.number = v.nextFileNumber 116 | v.nextFileNumber++ 117 | builder := sstable.NewTableBuilder((internal.TableFileName(v.tableCache.dbName, meta.number))) 118 | iter := imm.NewIterator() 119 | iter.SeekToFirst() 120 | if iter.Valid() { 121 | meta.smallest = iter.InternalKey() 122 | for ; iter.Valid(); iter.Next() { 123 | meta.largest = iter.InternalKey() 124 | builder.Add(iter.InternalKey()) 125 | } 126 | builder.Finish() 127 | meta.fileSize = uint64(builder.FileSize()) 128 | meta.smallest.UserValue = nil 129 | meta.largest.UserValue = nil 130 | } 131 | 132 | // 挑选合适的level 133 | level := 0 134 | if !v.overlapInLevel(0, meta.smallest.UserKey, meta.largest.UserKey) { 135 | for ; level < internal.MaxMemCompactLevel; level++ { 136 | if v.overlapInLevel(level+1, meta.smallest.UserKey, meta.largest.UserKey) { 137 | break 138 | } 139 | } 140 | } 141 | 142 | v.addFile(level, &meta) 143 | } 144 | 145 | func (v *Version) overlapInLevel(level int, smallestKey, largestKey []byte) bool { 146 | numFiles := len(v.files[level]) 147 | if numFiles == 0 { 148 | return false 149 | } 150 | if level == 0 { 151 | for i := 0; i < numFiles; i++ { 152 | f := v.files[level][i] 153 | if internal.UserKeyComparator(smallestKey, f.largest.UserKey) > 0 || internal.UserKeyComparator(f.smallest.UserKey, largestKey) > 0 { 154 | continue 155 | } else { 156 | return true 157 | } 158 | } 159 | } else { 160 | index := v.findFile(v.files[level], smallestKey) 161 | if index >= numFiles { 162 | return false 163 | } 164 | if internal.UserKeyComparator(largestKey, v.files[level][index].smallest.UserKey) > 0 { 165 | return true 166 | } 167 | } 168 | return false 169 | } 170 | 171 | func (v *Version) DoCompactionWork() bool { 172 | c := v.pickCompaction() 173 | if c == nil { 174 | return false 175 | } 176 | log.Printf("DoCompactionWork begin\n") 177 | defer log.Printf("DoCompactionWork end\n") 178 | c.Log() 179 | if c.isTrivialMove() { 180 | // Move file to next level 181 | v.deleteFile(c.level, c.inputs[0][0]) 182 | v.addFile(c.level+1, c.inputs[0][0]) 183 | return true 184 | } 185 | var list []*FileMetaData 186 | var current_key *internal.InternalKey 187 | iter := v.makeInputIterator(c) 188 | for iter.SeekToFirst(); iter.Valid(); iter.Next() { 189 | var meta FileMetaData 190 | meta.allowSeeks = 1 << 30 191 | meta.number = v.nextFileNumber 192 | v.nextFileNumber++ 193 | builder := sstable.NewTableBuilder((internal.TableFileName(v.tableCache.dbName, meta.number))) 194 | 195 | meta.smallest = iter.InternalKey() 196 | for ; iter.Valid(); iter.Next() { 197 | if current_key != nil { 198 | // 去除重复的记录 199 | ret := internal.UserKeyComparator(iter.InternalKey().UserKey, current_key.UserKey) 200 | if ret == 0 { 201 | continue 202 | } else if ret < 0 { 203 | log.Fatalf("%s < %s", string(iter.InternalKey().UserKey), string(current_key.UserKey)) 204 | } 205 | current_key = iter.InternalKey() 206 | } 207 | meta.largest = iter.InternalKey() 208 | builder.Add(iter.InternalKey()) 209 | if builder.FileSize() > internal.MaxFileSize { 210 | break 211 | } 212 | } 213 | builder.Finish() 214 | meta.fileSize = uint64(builder.FileSize()) 215 | meta.smallest.UserValue = nil 216 | meta.largest.UserValue = nil 217 | 218 | list = append(list, &meta) 219 | } 220 | 221 | for i := 0; i < len(c.inputs[0]); i++ { 222 | v.deleteFile(c.level, c.inputs[0][i]) 223 | } 224 | for i := 0; i < len(c.inputs[1]); i++ { 225 | v.deleteFile(c.level+1, c.inputs[1][i]) 226 | } 227 | for i := 0; i < len(list); i++ { 228 | v.addFile(c.level+1, list[i]) 229 | } 230 | return true 231 | } 232 | 233 | func (v *Version) makeInputIterator(c *Compaction) *MergingIterator { 234 | var list []*sstable.Iterator 235 | for i := 0; i < len(c.inputs[0]); i++ { 236 | list = append(list, v.tableCache.NewIterator(c.inputs[0][i].number)) 237 | } 238 | for i := 0; i < len(c.inputs[1]); i++ { 239 | list = append(list, v.tableCache.NewIterator(c.inputs[1][i].number)) 240 | } 241 | return NewMergingIterator(list) 242 | } 243 | 244 | func (v *Version) pickCompaction() *Compaction { 245 | var c Compaction 246 | c.level = v.pickCompactionLevel() 247 | if c.level < 0 { 248 | return nil 249 | } 250 | var smallest, largest *internal.InternalKey 251 | // Files in level 0 may overlap each other, so pick up all overlapping ones 252 | if c.level == 0 { 253 | c.inputs[0] = append(c.inputs[0], v.files[c.level]...) 254 | smallest = c.inputs[0][0].smallest 255 | largest = c.inputs[0][0].largest 256 | for i := 1; i < len(c.inputs[0]); i++ { 257 | f := c.inputs[0][i] 258 | if internal.InternalKeyComparator(f.largest, largest) > 0 { 259 | largest = f.largest 260 | } 261 | if internal.InternalKeyComparator(f.smallest, smallest) < 0 { 262 | smallest = f.smallest 263 | } 264 | } 265 | } else { 266 | // Pick the first file that comes after compact_pointer_[level] 267 | for i := 0; i < len(v.files[c.level]); i++ { 268 | f := v.files[c.level][i] 269 | if v.compactPointer[c.level] == nil || internal.InternalKeyComparator(f.largest, v.compactPointer[c.level]) > 0 { 270 | c.inputs[0] = append(c.inputs[0], f) 271 | break 272 | } 273 | } 274 | if len(c.inputs[0]) == 0 { 275 | c.inputs[0] = append(c.inputs[0], v.files[c.level][0]) 276 | } 277 | smallest = c.inputs[0][0].smallest 278 | largest = c.inputs[0][0].largest 279 | } 280 | 281 | for i := 0; i < len(v.files[c.level+1]); i++ { 282 | f := v.files[c.level+1][i] 283 | 284 | if internal.InternalKeyComparator(f.largest, smallest) < 0 || internal.InternalKeyComparator(f.smallest, largest) > 0 { 285 | // "f" is completely before specified range; skip it, // "f" is completely after specified range; skip it 286 | } else { 287 | c.inputs[1] = append(c.inputs[1], f) 288 | } 289 | } 290 | return &c 291 | } 292 | 293 | func (v *Version) pickCompactionLevel() int { 294 | // We treat level-0 specially by bounding the number of files 295 | // instead of number of bytes for two reasons: 296 | // 297 | // (1) With larger write-buffer sizes, it is nice not to do too 298 | // many level-0 compactions. 299 | // 300 | // (2) The files in level-0 are merged on every read and 301 | // therefore we wish to avoid too many files when the individual 302 | // file size is small (perhaps because of a small write-buffer 303 | // setting, or very high compression ratios, or lots of 304 | // overwrites/deletions). 305 | compactionLevel := -1 306 | bestScore := 1.0 307 | score := 0.0 308 | for level := 0; level < internal.NumLevels-1; level++ { 309 | if level == 0 { 310 | score = float64(len(v.files[0])) / float64(internal.L0_CompactionTrigger) 311 | } else { 312 | score = float64(totalFileSize(v.files[level])) / maxBytesForLevel(level) 313 | } 314 | 315 | if score > bestScore { 316 | bestScore = score 317 | compactionLevel = level 318 | } 319 | 320 | } 321 | return compactionLevel 322 | } 323 | 324 | func totalFileSize(files []*FileMetaData) uint64 { 325 | var sum uint64 326 | for i := 0; i < len(files); i++ { 327 | sum += files[i].fileSize 328 | } 329 | return sum 330 | } 331 | func maxBytesForLevel(level int) float64 { 332 | // Note: the result for level zero is not really used since we set 333 | // the level-0 compaction threshold based on number of files. 334 | 335 | // Result for both level-0 and level-1 336 | result := 10. * 1048576.0 337 | for level > 1 { 338 | result *= 10 339 | level-- 340 | } 341 | return result 342 | } 343 | --------------------------------------------------------------------------------