├── .gitignore ├── entity ├── entity.go └── entry.go ├── .github ├── ISSUE_TEMPLATE │ └── custom.md └── workflows │ └── go.yml ├── go.mod ├── options.go ├── util.go ├── README.md ├── go.sum ├── index └── index.go ├── db_test.go ├── db.go └── storage └── datafiles.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.dat -------------------------------------------------------------------------------- /entity/entity.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type Entity interface { 4 | Encode() []byte 5 | 6 | DecodePayload([]byte) 7 | 8 | DecodeMeta([]byte) 9 | 10 | Size() int64 11 | } 12 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module tiny-bitcask 2 | 3 | go 1.23.4 4 | 5 | require github.com/stretchr/testify v1.8.1 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- 1 | package tiny_bitcask 2 | 3 | import "tiny-bitcask/storage" 4 | 5 | const ( 6 | DefaultSegmentSize = 256 * storage.MB 7 | ) 8 | 9 | var ( 10 | DefaultOptions = &Options{ 11 | Dir: "db", 12 | SegmentSize: DefaultSegmentSize, 13 | } 14 | ) 15 | 16 | type Options struct { 17 | Dir string 18 | SegmentSize int64 19 | } 20 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package tiny_bitcask 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | func isDirExist(dir string) (bool, error) { 8 | _, err := os.Stat(dir) 9 | if err == nil { 10 | return true, nil 11 | } 12 | if os.IsNotExist(err) { 13 | return false, nil 14 | } 15 | return false, err 16 | } 17 | 18 | func getSegmentSize(size int64) int64 { 19 | var fileSize int64 20 | if size <= 0 { 21 | fileSize = DefaultSegmentSize 22 | } else { 23 | fileSize = size 24 | } 25 | return fileSize 26 | } 27 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Go 5 | 6 | on: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v4 21 | with: 22 | go-version: '1.20' 23 | 24 | - name: Build 25 | run: go build -v ./... 26 | 27 | - name: Test 28 | run: go test -v ./... 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## tiny-bitcask 2 | 3 | tiny-bitcask是对[bitcask论文](https://riak.com/assets/bitcask-intro.pdf)的简单实现,旨在提供用Go实现简单kv存储引擎的参考。后来我想把这个项目作为实现我各种想法的试验田。所以会持续迭代这个项目,迭代的结果就是master分支的代码。如果想看简单版本的实现可以通过以下方式拉取代码,并且切换到demo分支。 4 | 5 | ````shell 6 | git clone git@github.com:elliotchenzichang/tiny-bitcask.git 7 | cd tiny-bitcask 8 | git chckout demo 9 | ```` 10 | 11 | 另外我想实现做的实验和实现的想法会记录在项目TODO中。并切换分支进行相关迭代,有效果的部分会合并进入master分支。实践的相关文章会列在文章列表上。感谢各位的关注,希望各位都能从中学到一些东西。欢迎star,欢迎提PR。 12 | 13 | ## Todo 14 | 15 | - [ ] 实现HintFile 16 | - [ ] 探究对map的优化 17 | - [ ] 实现version control 18 | 19 | ## 文章list 20 | 21 | 1. [基于Bitcask实现简单的kv存储详细讲解](https://mp.weixin.qq.com/s?__biz=Mzg5MzU5NzQxMA==&mid=2247483844&idx=1&sn=2fc13cf8ce7c465dbd08690c56eaba69&chksm=c02d2249f75aab5f0955377c6ed29f8529c4a5f18bd53f27ab4cd88b85b3792af370f8a378ab#rd) 22 | 23 | ## 个人 24 | 25 | 下面是本人微信公众号,欢迎关注 26 | 27 | ![image](https://user-images.githubusercontent.com/92676541/226180799-973944bd-5c75-4a9b-8226-6c7e6e465d19.png) 28 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 8 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 9 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 10 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 11 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 12 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 15 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 16 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 17 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | -------------------------------------------------------------------------------- /index/index.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import "tiny-bitcask/entity" 4 | 5 | const ( 6 | KeyNotFound = "key not found" 7 | ) 8 | 9 | type Index interface { 10 | Find(key string) *DataPosition 11 | Delete(key string) 12 | Update(key string, dp *DataPosition) 13 | Add(key string, dp *DataPosition) 14 | } 15 | 16 | type indexer map[string]*DataPosition 17 | 18 | func newIndexer() indexer { 19 | return indexer{} 20 | } 21 | 22 | type KeyDir struct { 23 | Index indexer 24 | } 25 | 26 | func NewKD() *KeyDir { 27 | kd := &KeyDir{} 28 | kd.Index = newIndexer() 29 | return kd 30 | } 31 | 32 | func (kd *KeyDir) Add(key string, dp *DataPosition) { 33 | kd.Index[key] = dp 34 | } 35 | 36 | // Find searches an index in KeyDir 37 | func (kd *KeyDir) Find(key string) *DataPosition { 38 | dp := kd.Index[key] 39 | return dp 40 | } 41 | 42 | // Update inserts an index to KeyDir 43 | func (kd *KeyDir) Update(key string, dp *DataPosition) { 44 | kd.Index[key] = dp 45 | } 46 | 47 | // Delete deletes an index in KeyDir 48 | func (kd *KeyDir) Delete(key string) { 49 | delete(kd.Index, key) 50 | } 51 | 52 | // DataPosition means a certain position of an entity.Entry which stores in disk. 53 | type DataPosition struct { 54 | Fid int 55 | Off int64 56 | Timestamp uint64 57 | KeySize int 58 | ValueSize int 59 | } 60 | 61 | func (kd *KeyDir) AddIndexByData(hint *entity.Hint, entry *entity.Entry) { 62 | kd.AddIndexByRawInfo(hint.Fid, hint.Off, entry.Key, entry.Value) 63 | } 64 | 65 | func (kd *KeyDir) AddIndexByRawInfo(fid int, off int64, key, value []byte) { 66 | index := newDataPosition(fid, off, key, value) 67 | kd.Add(string(key), index) 68 | } 69 | 70 | func newDataPosition(fid int, off int64, key, value []byte) *DataPosition { 71 | dp := &DataPosition{} 72 | dp.Fid = fid 73 | dp.Off = off 74 | dp.KeySize = len(key) 75 | dp.ValueSize = len(value) 76 | return dp 77 | } 78 | 79 | func (i *DataPosition) IsEqualPos(fid int, off int64) bool { 80 | return i.Off == off && i.Fid == fid 81 | } 82 | -------------------------------------------------------------------------------- /db_test.go: -------------------------------------------------------------------------------- 1 | package tiny_bitcask 2 | 3 | import ( 4 | "fmt" 5 | "github.com/stretchr/testify/assert" 6 | "os" 7 | "testing" 8 | "tiny-bitcask/storage" 9 | ) 10 | 11 | func BitCaskTest(t *testing.T, opt *Options, test func(db *DB)) { 12 | if opt == nil { 13 | opt = DefaultOptions 14 | } 15 | db, err := NewDB(opt) 16 | assert.NoError(t, err) 17 | test(db) 18 | os.RemoveAll(opt.Dir) 19 | } 20 | 21 | func TestDB_Base(t *testing.T) { 22 | var test = func(db *DB) { 23 | err := db.Set([]byte("test_key"), []byte("test_value")) 24 | assert.NoError(t, err) 25 | value, err := db.Get([]byte("test_key")) 26 | assert.NoError(t, err) 27 | assert.Equal(t, "test_value", string(value)) 28 | 29 | err = db.Set([]byte("test_key"), []byte("test_value_2")) 30 | assert.NoError(t, err) 31 | 32 | value, err = db.Get([]byte("test_key")) 33 | assert.NoError(t, err) 34 | assert.Equal(t, "test_value_2", string(value)) 35 | } 36 | BitCaskTest(t, nil, test) 37 | } 38 | 39 | func TestDB_SegmentSize(t *testing.T) { 40 | opt := &Options{ 41 | Dir: "db", 42 | SegmentSize: 4 * storage.KB, 43 | } 44 | var test = func(db *DB) { 45 | for i := 0; i < 1000; i++ { 46 | key := fmt.Sprintf("test_key_%d", i) 47 | value := fmt.Sprintf("test_value_%d", i) 48 | err := db.Set([]byte(key), []byte(value)) 49 | assert.NoError(t, err) 50 | } 51 | } 52 | BitCaskTest(t, opt, test) 53 | } 54 | 55 | func TestDB_Merge(t *testing.T) { 56 | opt := &Options{ 57 | Dir: "db", 58 | SegmentSize: 4 * storage.KB, 59 | } 60 | var test = func(db *DB) { 61 | key := "test_key" 62 | for i := 0; i < 1000; i++ { 63 | value := fmt.Sprintf("test_value_%d", i) 64 | err := db.Set([]byte(key), []byte(value)) 65 | assert.NoError(t, err) 66 | } 67 | err := db.Merge() 68 | assert.NoError(t, err) 69 | value, err := db.Get([]byte("test_key")) 70 | assert.NoError(t, err) 71 | assert.Equal(t, "test_value_999", string(value)) 72 | } 73 | BitCaskTest(t, opt, test) 74 | } 75 | 76 | func TestDB_Delete(t *testing.T) { 77 | var test = func(db *DB) { 78 | err := db.Set([]byte("test_key"), []byte("test_value")) 79 | assert.NoError(t, err) 80 | value, err := db.Get([]byte("test_key")) 81 | 82 | assert.NoError(t, err) 83 | assert.Equal(t, "test_value", string(value)) 84 | 85 | err = db.Delete([]byte("test_key")) 86 | assert.NoError(t, err) 87 | 88 | value, err = db.Get([]byte("test_key")) 89 | assert.Nil(t, value) 90 | assert.ErrorAs(t, KeyNotFoundErr, &err) 91 | } 92 | BitCaskTest(t, nil, test) 93 | } 94 | 95 | func TestDB_Delete_Merge(t *testing.T) { 96 | opt := &Options{ 97 | Dir: "db", 98 | SegmentSize: 4 * storage.KB, 99 | } 100 | var test = func(db *DB) { 101 | key := "test_key" 102 | for i := 0; i < 1000; i++ { 103 | value := fmt.Sprintf("test_value_%d", i) 104 | err := db.Set([]byte(key), []byte(value)) 105 | assert.NoError(t, err) 106 | } 107 | err := db.Delete([]byte("test_key")) 108 | assert.NoError(t, err) 109 | err = db.Merge() 110 | assert.NoError(t, err) 111 | 112 | value, err := db.Get([]byte("test_key")) 113 | assert.Nil(t, value) 114 | assert.Equal(t, KeyNotFoundErr, err) 115 | } 116 | BitCaskTest(t, opt, test) 117 | } 118 | -------------------------------------------------------------------------------- /entity/entry.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "encoding/binary" 5 | "hash/crc32" 6 | "time" 7 | ) 8 | 9 | const ( 10 | MetaSize = 29 11 | DeleteFlag = 1 12 | ) 13 | 14 | type Hint struct { 15 | Fid int 16 | Off int64 17 | } 18 | 19 | type Entry struct { 20 | Key []byte 21 | Value []byte 22 | Meta *Meta 23 | } 24 | 25 | type Meta struct { 26 | Crc uint32 27 | position uint64 28 | TimeStamp uint64 29 | KeySize uint32 30 | ValueSize uint32 31 | Flag uint8 32 | } 33 | 34 | func NewEntryWithData(key []byte, value []byte) *Entry { 35 | now := uint64(time.Now().Unix()) 36 | meta := NewMeta().WithTimeStamp(now).WithKeySize(uint32(len(key))).WithValueSize(uint32(len(value))) 37 | e := NewEntry().WithKey(key).WithValue(value).WithMeta(meta) 38 | return e 39 | } 40 | 41 | func (e *Entry) Encode() []byte { 42 | size := e.Size() 43 | buf := make([]byte, size) 44 | binary.LittleEndian.PutUint64(buf[4:12], e.Meta.position) 45 | binary.LittleEndian.PutUint64(buf[12:20], e.Meta.TimeStamp) 46 | binary.LittleEndian.PutUint32(buf[20:24], e.Meta.KeySize) 47 | binary.LittleEndian.PutUint32(buf[24:28], e.Meta.ValueSize) 48 | buf[28] = e.Meta.Flag 49 | if e.Meta.Flag != DeleteFlag { 50 | copy(buf[MetaSize:MetaSize+len(e.Key)], e.Key) 51 | copy(buf[MetaSize+len(e.Key):MetaSize+len(e.Key)+len(e.Value)], e.Value) 52 | } 53 | c32 := crc32.ChecksumIEEE(buf[4:]) 54 | binary.LittleEndian.PutUint32(buf[0:4], c32) 55 | return buf 56 | } 57 | 58 | func (e *Entry) DecodePayload(payload []byte) { 59 | keyHighBound := int(e.Meta.KeySize) 60 | valueHighBound := keyHighBound + int(e.Meta.ValueSize) 61 | e.Key = payload[0:keyHighBound] 62 | e.Value = payload[keyHighBound:valueHighBound] 63 | } 64 | 65 | func (e *Entry) DecodeMeta(bytes []byte) { 66 | e.Meta.Crc = binary.LittleEndian.Uint32(bytes[0:4]) 67 | e.Meta.position = binary.LittleEndian.Uint64(bytes[4:12]) 68 | e.Meta.TimeStamp = binary.LittleEndian.Uint64(bytes[12:20]) 69 | e.Meta.KeySize = binary.LittleEndian.Uint32(bytes[20:24]) 70 | e.Meta.ValueSize = binary.LittleEndian.Uint32(bytes[24:28]) 71 | } 72 | 73 | func (e *Entry) Size() int64 { 74 | return int64(MetaSize + e.Meta.KeySize + e.Meta.ValueSize) 75 | } 76 | 77 | func (e *Entry) GetCrc(buf []byte) uint32 { 78 | crc := crc32.ChecksumIEEE(buf[4:]) 79 | crc = crc32.Update(crc, crc32.IEEETable, e.Key) 80 | crc = crc32.Update(crc, crc32.IEEETable, e.Value) 81 | return crc 82 | } 83 | 84 | func NewEntry() *Entry { 85 | return new(Entry) 86 | } 87 | 88 | func (e *Entry) WithKey(key []byte) *Entry { 89 | e.Key = key 90 | return e 91 | } 92 | 93 | func (e *Entry) WithValue(value []byte) *Entry { 94 | e.Value = value 95 | return e 96 | } 97 | 98 | func (e *Entry) WithMeta(meta *Meta) *Entry { 99 | e.Meta = meta 100 | return e 101 | } 102 | 103 | func NewMeta() *Meta { 104 | return new(Meta) 105 | } 106 | 107 | func (m *Meta) WithPosition(pos uint64) *Meta { 108 | m.position = pos 109 | return m 110 | } 111 | 112 | func (m *Meta) WithTimeStamp(timestamp uint64) *Meta { 113 | m.TimeStamp = timestamp 114 | return m 115 | } 116 | 117 | func (m *Meta) WithKeySize(keySize uint32) *Meta { 118 | m.KeySize = keySize 119 | return m 120 | } 121 | 122 | func (m *Meta) WithValueSize(valueSize uint32) *Meta { 123 | m.ValueSize = valueSize 124 | return m 125 | } 126 | 127 | func (m *Meta) WithFlag(flag uint8) *Meta { 128 | m.Flag = flag 129 | return m 130 | } 131 | 132 | func NewHint() *Hint { 133 | return new(Hint) 134 | } 135 | 136 | func (h *Hint) WithFid(fid int) *Hint { 137 | h.Fid = fid 138 | return h 139 | } 140 | 141 | func (h *Hint) WithOff(off int64) *Hint { 142 | h.Off = off 143 | return h 144 | } 145 | -------------------------------------------------------------------------------- /db.go: -------------------------------------------------------------------------------- 1 | package tiny_bitcask 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "sort" 7 | "sync" 8 | "tiny-bitcask/entity" 9 | "tiny-bitcask/index" 10 | "tiny-bitcask/storage" 11 | ) 12 | 13 | var ( 14 | KeyNotFoundErr = errors.New("key not found") 15 | NoNeedToMergeErr = errors.New("no need to merge") 16 | ) 17 | 18 | type DB struct { 19 | rw sync.RWMutex 20 | kd *index.KeyDir 21 | storage *storage.DataFiles 22 | opt *Options 23 | } 24 | 25 | // NewDB create a new DB instance with Options 26 | func NewDB(opt *Options) (db *DB, err error) { 27 | db = &DB{} 28 | db.kd = index.NewKD() 29 | db.opt = opt 30 | if isExist, _ := isDirExist(opt.Dir); isExist { 31 | if err := db.recovery(opt); err != nil { 32 | return nil, err 33 | } 34 | return db, nil 35 | } 36 | var fileSize = getSegmentSize(opt.SegmentSize) 37 | db.storage, err = storage.NewDataFiles(opt.Dir, fileSize) 38 | if err != nil { 39 | return nil, err 40 | } 41 | return db, err 42 | } 43 | 44 | // Set sets a key-value pairs into DB 45 | func (db *DB) Set(key []byte, value []byte) error { 46 | db.rw.Lock() 47 | defer db.rw.Unlock() 48 | entry := entity.NewEntryWithData(key, value) 49 | h, err := db.storage.WriterEntity(entry) 50 | if err != nil { 51 | return err 52 | } 53 | db.kd.AddIndexByData(h, entry) 54 | return nil 55 | } 56 | 57 | // Get gets value by using key 58 | func (db *DB) Get(key []byte) (value []byte, err error) { 59 | db.rw.RLock() 60 | defer db.rw.RUnlock() 61 | i := db.kd.Find(string(key)) 62 | if i == nil { 63 | return nil, KeyNotFoundErr 64 | } 65 | entry, err := db.storage.ReadEntry(i) 66 | if err != nil { 67 | return nil, err 68 | } 69 | return entry.Value, nil 70 | } 71 | 72 | // Delete delete a key 73 | func (db *DB) Delete(key []byte) error { 74 | db.rw.Lock() 75 | defer db.rw.Unlock() 76 | keyStr := string(key) 77 | index := db.kd.Find(keyStr) 78 | if index == nil { 79 | return KeyNotFoundErr 80 | } 81 | e := entity.NewEntry() 82 | e.Meta.Flag = entity.DeleteFlag 83 | _, err := db.storage.WriterEntity(e) 84 | if err != nil { 85 | return err 86 | } 87 | db.kd.Delete(keyStr) 88 | return nil 89 | } 90 | 91 | // Merge clean the useless data 92 | func (db *DB) Merge() error { 93 | db.rw.Lock() 94 | defer db.rw.Unlock() 95 | fids := db.storage.GetOldFiles() 96 | if len(fids) < 2 { 97 | return NoNeedToMergeErr 98 | } 99 | sort.Ints(fids) 100 | for _, fid := range fids[:len(fids)-1] { 101 | var off int64 = 0 102 | reader := db.storage.GetOldFile(fid) 103 | for { 104 | entry, err := reader.ReadEntityWithOutLength(off) 105 | if err == nil { 106 | key := string(entry.Key) 107 | off += entry.Size() 108 | oldIndex := db.kd.Find(key) 109 | if oldIndex == nil { 110 | continue 111 | } 112 | if oldIndex.IsEqualPos(fid, off) { 113 | h, err := db.storage.WriterEntity(entry) 114 | if err != nil { 115 | return err 116 | } 117 | db.kd.AddIndexByData(h, entry) 118 | } 119 | } else { 120 | if err == io.EOF { 121 | break 122 | } 123 | return err 124 | } 125 | } 126 | err := db.storage.RemoveFile(fid) 127 | if err != nil { 128 | return err 129 | } 130 | } 131 | return nil 132 | } 133 | 134 | // recovery will rebuild a db from existing dir 135 | func (db *DB) recovery(opt *Options) (err error) { 136 | var fileSize = getSegmentSize(opt.SegmentSize) 137 | db.storage, err = storage.NewDataFileWithFiles(opt.Dir, fileSize) 138 | if err != nil { 139 | return err 140 | } 141 | fids := db.storage.GetOldFiles() 142 | sort.Ints(fids) 143 | for _, fid := range fids { 144 | var off int64 = 0 145 | reader := db.storage.GetOldFile(fid) 146 | for { 147 | entry, err := reader.ReadEntityWithOutLength(off) 148 | if err == nil { 149 | db.kd.AddIndexByRawInfo(fid, off, entry.Key, entry.Value) 150 | off += entry.Size() 151 | } else { 152 | if err == storage.DeleteEntryErr { 153 | continue 154 | } 155 | if err == io.EOF { 156 | break 157 | } 158 | return err 159 | } 160 | } 161 | } 162 | return err 163 | } 164 | -------------------------------------------------------------------------------- /storage/datafiles.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path" 9 | "strconv" 10 | "strings" 11 | "tiny-bitcask/entity" 12 | "tiny-bitcask/index" 13 | ) 14 | 15 | var ( 16 | ReadMissDataErr = errors.New("miss data during read") 17 | WriteMissDataErr = errors.New("miss data during write") 18 | DeleteEntryErr = errors.New("read an entry which had deleted") 19 | MissOldFileErr = errors.New("miss old file error") 20 | CrcErr = errors.New("crc error") 21 | ) 22 | 23 | const ( 24 | FileSuffix = ".dat" 25 | B = 1 26 | KB = 1024 * B 27 | MB = 1024 * KB 28 | GB = 1024 * MB 29 | ) 30 | 31 | type oldFiles map[int]*OldFile 32 | 33 | func newOldFiles() oldFiles { 34 | return oldFiles{} 35 | } 36 | 37 | type DataFiles struct { 38 | dir string 39 | oIds []int 40 | segmentSize int64 41 | active *ActiveFile 42 | olds map[int]*OldFile 43 | } 44 | 45 | func (dfs *DataFiles) GetOldFiles() []int { 46 | return dfs.oIds 47 | } 48 | 49 | func (dfs *DataFiles) RemoveReader(fid int) error { 50 | delete(dfs.olds, fid) 51 | return nil 52 | } 53 | 54 | func (dfs *DataFiles) AddReader(fid int) error { 55 | path := getFilePath(dfs.dir, fid) 56 | reader, err := NewOldFile(path) 57 | if err != nil { 58 | return err 59 | } 60 | dfs.olds[fid] = reader 61 | return nil 62 | } 63 | 64 | // NewDataFileWithFiles create a DataFiles with existing dir 65 | func NewDataFileWithFiles(dir string, segmentSize int64) (dfs *DataFiles, err error) { 66 | dfs = &DataFiles{ 67 | dir: dir, 68 | olds: newOldFiles(), 69 | segmentSize: segmentSize, 70 | } 71 | 72 | fids, err := getFids(dir) 73 | if err != nil { 74 | return nil, err 75 | } 76 | aFid := fids[len(fids)-1] 77 | dfs.active, err = NewActiveFile(dir, aFid) 78 | if err != nil { 79 | return nil, err 80 | } 81 | if len(fids) == 1 { 82 | return dfs, nil 83 | } 84 | oldFids := fids[:len(fids)-1] 85 | for _, fid := range oldFids { 86 | path := getFilePath(dir, fid) 87 | reader, err := NewOldFile(path) 88 | if err != nil { 89 | return nil, err 90 | } 91 | dfs.olds[fid] = reader 92 | } 93 | 94 | return dfs, nil 95 | } 96 | 97 | // NewDataFiles create a DataFiles Object with an empty dir 98 | func NewDataFiles(path string, segmentSize int64) (dfs *DataFiles, err error) { 99 | err = os.Mkdir(path, os.ModePerm) 100 | if err != nil { 101 | return nil, err 102 | } 103 | af, err := NewActiveFile(path, 1) 104 | if err != nil { 105 | return nil, err 106 | } 107 | dfs = &DataFiles{ 108 | dir: path, 109 | oIds: nil, 110 | active: af, 111 | olds: map[int]*OldFile{}, 112 | segmentSize: segmentSize, 113 | } 114 | return dfs, nil 115 | } 116 | 117 | func (dfs *DataFiles) rotate() error { 118 | aFid := dfs.active.fid 119 | path := getFilePath(dfs.dir, aFid) 120 | fd, err := os.OpenFile(path, os.O_RDONLY, os.ModePerm) 121 | if err != nil { 122 | return err 123 | } 124 | r := &OldFile{fd: fd} 125 | dfs.olds[dfs.active.fid] = r 126 | dfs.oIds = append(dfs.oIds, aFid) 127 | 128 | af, err := NewActiveFile(dfs.dir, aFid+1) 129 | if err != nil { 130 | return err 131 | } 132 | dfs.active = af 133 | return nil 134 | } 135 | 136 | func (dfs *DataFiles) ReadEntry(index *index.DataPosition) (e *entity.Entry, err error) { 137 | dataSize := entity.MetaSize + index.KeySize + index.ValueSize 138 | if index.Fid == dfs.active.fid { 139 | return dfs.active.ReadEntity(index.Off, dataSize) 140 | } 141 | of, exist := dfs.olds[index.Fid] 142 | if !exist { 143 | return nil, MissOldFileErr 144 | } 145 | return of.ReadEntity(index.Off, dataSize) 146 | } 147 | 148 | func (dfs *DataFiles) GetOldFile(fid int) *OldFile { 149 | return dfs.olds[fid] 150 | } 151 | 152 | func (dfs *DataFiles) RemoveFile(fid int) error { 153 | of := dfs.olds[fid] 154 | err := of.fd.Close() 155 | if err != nil { 156 | return err 157 | } 158 | path := getFilePath(dfs.dir, fid) 159 | err = os.Remove(path) 160 | if err != nil { 161 | return err 162 | } 163 | delete(dfs.olds, fid) 164 | return nil 165 | } 166 | 167 | func (dfs *DataFiles) WriterEntity(e entity.Entity) (h *entity.Hint, err error) { 168 | h, err = dfs.active.WriterEntity(e) 169 | if err != nil { 170 | return nil, err 171 | } 172 | if dfs.canRotate() { 173 | err := dfs.rotate() 174 | if err != nil { 175 | return nil, err 176 | } 177 | } 178 | return h, nil 179 | } 180 | 181 | func (dfs *DataFiles) canRotate() bool { 182 | return dfs.active.off > dfs.segmentSize 183 | } 184 | 185 | type ActiveFile struct { 186 | fid int 187 | fd *os.File 188 | off int64 189 | } 190 | 191 | func NewActiveFile(dir string, fid int) (af *ActiveFile, err error) { 192 | path := getFilePath(dir, fid) 193 | fd, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, os.ModePerm) 194 | if err != nil { 195 | return nil, err 196 | } 197 | fi, err := os.Stat(path) 198 | if err != nil { 199 | return nil, err 200 | } 201 | af = &ActiveFile{ 202 | fd: fd, 203 | off: fi.Size(), 204 | fid: fid, 205 | } 206 | return af, nil 207 | } 208 | 209 | func (af *ActiveFile) WriterEntity(e entity.Entity) (h *entity.Hint, err error) { 210 | buf := e.Encode() 211 | n, err := af.fd.WriteAt(buf, af.off) 212 | if n < len(buf) { 213 | return nil, WriteMissDataErr 214 | } 215 | if err != nil { 216 | return nil, err 217 | } 218 | h = entity.NewHint().WithFid(af.fid).WithOff(af.off) 219 | af.off += e.Size() 220 | return h, nil 221 | } 222 | 223 | func (af *ActiveFile) ReadEntity(off int64, length int) (e *entity.Entry, err error) { 224 | return readEntry(af.fd, off, length) 225 | } 226 | 227 | type OldFile struct { 228 | fd *os.File 229 | } 230 | 231 | func NewOldFile(path string) (of *OldFile, err error) { 232 | fd, err := os.OpenFile(path, os.O_RDONLY, os.ModePerm) 233 | if err != nil { 234 | return nil, err 235 | } 236 | of = &OldFile{fd: fd} 237 | return of, nil 238 | } 239 | 240 | func (of *OldFile) ReadEntity(off int64, length int) (e *entity.Entry, err error) { 241 | return readEntry(of.fd, off, length) 242 | } 243 | 244 | func (of *OldFile) ReadEntityWithOutLength(off int64) (e *entity.Entry, err error) { 245 | e = entity.NewEntry().WithMeta(entity.NewMeta()) 246 | buf := make([]byte, entity.MetaSize) 247 | n, err := of.fd.ReadAt(buf, off) 248 | if err != nil { 249 | return nil, err 250 | } 251 | if n < entity.MetaSize { 252 | return nil, ReadMissDataErr 253 | } 254 | off += entity.MetaSize 255 | e.DecodeMeta(buf) 256 | payloadSize := e.Meta.KeySize + e.Meta.ValueSize 257 | buf = make([]byte, payloadSize) 258 | n, err = of.fd.ReadAt(buf, off) 259 | if err != nil { 260 | return nil, err 261 | } 262 | if n < int(payloadSize) { 263 | return nil, ReadMissDataErr 264 | } 265 | e.DecodePayload(buf) 266 | return e, nil 267 | } 268 | 269 | func readEntry(fd *os.File, off int64, length int) (e *entity.Entry, err error) { 270 | buf := make([]byte, length) 271 | n, err := fd.ReadAt(buf, off) 272 | if n < length { 273 | return nil, ReadMissDataErr 274 | } 275 | if err != nil { 276 | return nil, err 277 | } 278 | e = entity.NewEntry() 279 | e.DecodeMeta(buf[:entity.MetaSize]) 280 | e.DecodePayload(buf[entity.MetaSize:]) 281 | return e, nil 282 | } 283 | 284 | func getFids(dir string) (fids []int, err error) { 285 | files, err := ioutil.ReadDir(dir) 286 | if err != nil { 287 | return nil, err 288 | } 289 | for _, f := range files { 290 | fileName := f.Name() 291 | filePath := path.Base(fileName) 292 | if path.Ext(filePath) == FileSuffix { 293 | filePrefix := strings.TrimSuffix(filePath, FileSuffix) 294 | fid, err := strconv.Atoi(filePrefix) 295 | if err != nil { 296 | return nil, err 297 | } 298 | fids = append(fids, fid) 299 | } 300 | } 301 | return fids, nil 302 | } 303 | 304 | func getFilePath(dir string, fid int) string { 305 | return fmt.Sprintf("%s/%d%s", dir, fid, FileSuffix) 306 | } 307 | --------------------------------------------------------------------------------