├── utils
    ├── cache
    │   ├── cache.s
    │   ├── cache_test.go
    │   ├── lru.go
    │   ├── cmSketch.go
    │   ├── s2lru.go
    │   ├── bloom.go
    │   └── cache.go
    ├── slice.go
    ├── tools.go
    ├── iterator.go
    ├── entry_test.go
    ├── closer.go
    ├── rand.go
    ├── mmap
    │   ├── mmap_darwin.go
    │   ├── mmap_linux.go
    │   ├── darwin.go
    │   └── linux.go
    ├── const.go
    ├── map.go
    ├── throttle.go
    ├── key.go
    ├── bloom.go
    ├── error.go
    ├── wal.go
    ├── file.go
    ├── bloom_test.go
    ├── value.go
    ├── skiplist_test.go
    ├── entry.go
    └── arena.go
├── .gitignore
├── README.md
├── go.mod
├── debug.sh
├── gen.sh
├── LICENSE
├── file
    ├── file.go
    ├── wal.go
    ├── sstable_linux.go
    ├── sstable_darwin.go
    ├── vlog.go
    ├── mmap_darwin.go
    ├── mmap_linux.go
    └── manifest.go
├── stats.go
├── lsm
    ├── cache.go
    ├── manifest_test.go
    ├── lsm.go
    ├── memtable.go
    ├── lsm_test.go
    ├── levels.go
    ├── table.go
    └── iterator.go
├── options.go
├── pb
    └── pb.proto
├── db_test.go
├── iterator.go
├── go.sum
├── vlog_test.go
└── db.go


/utils/cache/cache.s:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | work_test
3 | testdata
4 | .idea


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # corekv
2 | 
3 | corekv 是一个用来高效率的验证kv引擎feature的项目。


--------------------------------------------------------------------------------
/utils/slice.go:
--------------------------------------------------------------------------------
1 | package utils
2 | 
3 | // Slice holds a reusable buf, will reallocate if you request a larger size than ever before.
4 | // One problem is with n distinct sizes in random order it'll reallocate log(n) times.
5 | type Slice struct {
6 | 	buf []byte
7 | }
8 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/hardcore-os/corekv
 2 | 
 3 | go 1.16
 4 | 
 5 | require (
 6 | 	github.com/cespare/xxhash/v2 v2.1.2
 7 | 	github.com/davecgh/go-spew v1.1.1 // indirect
 8 | 	github.com/golang/protobuf v1.5.2
 9 | 	github.com/pkg/errors v0.9.1
10 | 	github.com/stretchr/testify v1.7.0
11 | 	golang.org/x/sys v0.0.0-20210910150752-751e447fb3d0
12 | 	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
13 | 	google.golang.org/protobuf v1.27.1 // indirect
14 | )
15 | 


--------------------------------------------------------------------------------
/debug.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ###
 3 |  # Copyright 2021 logicrec Project Authors
 4 |  #
 5 |  # Licensed under the Apache License, Version 2.0 (the "License")
 6 |  # you may not use this file except in compliance with the License.
 7 |  # You may obtain a copy of the License at
 8 |  # 
 9 |  # http://www.apache.org/licenses/LICENSE-2.0
10 |  # 
11 |  # Unless required by applicable law or agreed to in writing, software
12 |  # distributed under the License is distributed on an "AS IS" BASIS,
13 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  # See the License for the specific language governing permissions and
15 |  # limitations under the License.
16 | ### 
17 | 
18 | dlv test -test.run=$1


--------------------------------------------------------------------------------
/utils/cache/cache_test.go:
--------------------------------------------------------------------------------
 1 | package cache
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestCacheBasicCRUD(t *testing.T) {
11 | 	cache := NewCache(5)
12 | 	for i := 0; i < 10; i++ {
13 | 		key := fmt.Sprintf("key%d", i)
14 | 		val := fmt.Sprintf("val%d", i)
15 | 		cache.Set(key, val)
16 | 		fmt.Printf("set %s: %s\n", key, cache)
17 | 	}
18 | 
19 | 	for i := 0; i < 1000; i++ {
20 | 		key := fmt.Sprintf("key%d", i)
21 | 		val := fmt.Sprintf("val%d", i)
22 | 		res, ok := cache.Get(key)
23 | 		if ok {
24 | 			fmt.Printf("get %s: %s\n", key, cache)
25 | 			assert.Equal(t, val, res)
26 | 			continue
27 | 		}
28 | 		assert.Equal(t, res, nil)
29 | 	}
30 | 	fmt.Printf("at last: %s\n", cache)
31 | }
32 | 


--------------------------------------------------------------------------------
/gen.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | ### Copyright hardcore-os Project Authors
 3 | ###
 4 |  # Licensed under the Apache License, Version 2.0 (the "License")
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  # 
 8 |  # http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | protoDir="pb"
16 | outDir="pb"
17 | protoc -I ${protoDir}/  ${protoDir}/pb.proto --gofast_out=plugins=grpc:${outDir}


--------------------------------------------------------------------------------
/utils/tools.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 bardcckre-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package utils
16 | 
17 | func ValueSize(value []byte) int64 {
18 | 	return 0
19 | }
20 | 
21 | // Copy copies a byte slice and returns the copied slice.
22 | func Copy(a []byte) []byte {
23 | 	b := make([]byte, len(a))
24 | 	copy(b, a)
25 | 	return b
26 | }
27 | 


--------------------------------------------------------------------------------
/utils/iterator.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package utils
16 | 
17 | // Iterator 迭代器
18 | type Iterator interface {
19 | 	Next()
20 | 	Valid() bool
21 | 	Rewind()
22 | 	Item() Item
23 | 	Close() error
24 | 	Seek(key []byte)
25 | }
26 | 
27 | // Item _
28 | type Item interface {
29 | 	Entry() *Entry
30 | }
31 | 
32 | // Options _
33 | // TODO 可能被重构
34 | type Options struct {
35 | 	Prefix []byte
36 | 	IsAsc  bool
37 | }
38 | 


--------------------------------------------------------------------------------
/utils/entry_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package utils
16 | 
17 | import (
18 | 	"testing"
19 | 
20 | 	"github.com/stretchr/testify/assert"
21 | )
22 | 
23 | func TestValueStruct(t *testing.T) {
24 | 	v := ValueStruct{
25 | 		Value:     []byte("硬核课堂"),
26 | 		Meta:      2,
27 | 		ExpiresAt: 213123123123,
28 | 	}
29 | 	data := make([]byte, v.EncodedSize())
30 | 	v.EncodeValue(data)
31 | 	var vv ValueStruct
32 | 	vv.DecodeValue(data)
33 | 	assert.Equal(t, vv, v)
34 | }
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 hardcore-os
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/file/file.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package file
16 | 
17 | import "io"
18 | 
19 | // Options
20 | type Options struct {
21 | 	FID      uint64
22 | 	FileName string
23 | 	Dir      string
24 | 	Path     string
25 | 	Flag     int
26 | 	MaxSz    int
27 | }
28 | 
29 | type CoreFile interface {
30 | 	Close() error
31 | 	Truncature(n int64) error
32 | 	ReName(name string) error
33 | 	NewReader(offset int) io.Reader
34 | 	Bytes(off, sz int) ([]byte, error)
35 | 	AllocateSlice(sz, offset int) ([]byte, int, error)
36 | 	Sync() error
37 | 	Delete() error
38 | 	Slice(offset int) []byte
39 | }
40 | 


--------------------------------------------------------------------------------
/stats.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 logicrec Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package corekv
16 | 
17 | import "github.com/hardcore-os/corekv/utils"
18 | 
19 | type Stats struct {
20 | 	closer   *utils.Closer
21 | 	EntryNum int64 // 存储多少个kv数据
22 | }
23 | 
24 | // Close
25 | func (s *Stats) close() error {
26 | 	return nil
27 | }
28 | 
29 | // StartStats
30 | func (s *Stats) StartStats() {
31 | 	defer s.closer.Done()
32 | 	for {
33 | 		select {
34 | 		case <-s.closer.CloseSignal:
35 | 			return
36 | 		}
37 | 		// stats logic...
38 | 	}
39 | }
40 | 
41 | // NewStats
42 | func newStats(opt *Options) *Stats {
43 | 	s := &Stats{}
44 | 	s.closer = utils.NewCloser()
45 | 	s.EntryNum = 1 // 这里直接写
46 | 	return s
47 | }
48 | 


--------------------------------------------------------------------------------
/utils/closer.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package utils
16 | 
17 | import "sync"
18 | 
19 | // Closer _用于资源回收的信号控制
20 | type Closer struct {
21 | 	waiting     sync.WaitGroup
22 | 	CloseSignal chan struct{}
23 | }
24 | 
25 | // NewCloser _
26 | func NewCloser() *Closer {
27 | 	closer := &Closer{waiting: sync.WaitGroup{}}
28 | 	closer.CloseSignal = make(chan struct{})
29 | 	return closer
30 | }
31 | 
32 | // Close 上游通知下游协程进行资源回收，并等待协程通知回收完毕
33 | func (c *Closer) Close() {
34 | 	close(c.CloseSignal)
35 | 	c.waiting.Wait()
36 | }
37 | 
38 | // Done 标示协程已经完成资源回收，通知上游正式关闭
39 | func (c *Closer) Done() {
40 | 	c.waiting.Done()
41 | }
42 | 
43 | // Add 添加wait 计数
44 | func (c *Closer) Add(n int) {
45 | 	c.waiting.Add(n)
46 | }
47 | 


--------------------------------------------------------------------------------
/lsm/cache.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package lsm
16 | 
17 | import (
18 | 	coreCache "github.com/hardcore-os/corekv/utils/cache"
19 | )
20 | 
21 | type cache struct {
22 | 	indexs *coreCache.Cache // key fid， value table
23 | 	blocks *coreCache.Cache // key fid_blockOffset  value block []byte
24 | }
25 | 
26 | type blockBuffer struct {
27 | 	b []byte
28 | }
29 | 
30 | const defaultCacheSize = 1024
31 | 
32 | // close
33 | func (c *cache) close() error {
34 | 	return nil
35 | }
36 | 
37 | // newCache
38 | func newCache(opt *Options) *cache {
39 | 	return &cache{indexs: coreCache.NewCache(defaultCacheSize), blocks: coreCache.NewCache(defaultCacheSize)}
40 | }
41 | 
42 | // TODO fid 使用字符串是不是会有性能损耗
43 | func (c *cache) addIndex(fid uint64, t *table) {
44 | 	c.indexs.Set(fid, t)
45 | }
46 | 


--------------------------------------------------------------------------------
/options.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-o Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package corekv
16 | 
17 | import "github.com/hardcore-os/corekv/utils"
18 | 
19 | // Options corekv 总的配置文件
20 | type Options struct {
21 | 	ValueThreshold      int64
22 | 	WorkDir             string
23 | 	MemTableSize        int64
24 | 	SSTableMaxSz        int64
25 | 	MaxBatchCount       int64
26 | 	MaxBatchSize        int64 // max batch size in bytes
27 | 	ValueLogFileSize    int
28 | 	VerifyValueChecksum bool
29 | 	ValueLogMaxEntries  uint32
30 | 	LogRotatesToFlush   int32
31 | 	MaxTableSize        int64
32 | }
33 | 
34 | // NewDefaultOptions 返回默认的options
35 | func NewDefaultOptions() *Options {
36 | 	opt := &Options{
37 | 		WorkDir:      "./work_test",
38 | 		MemTableSize: 1024,
39 | 		SSTableMaxSz: 1 << 30,
40 | 	}
41 | 	opt.ValueThreshold = utils.DefaultValueThreshold
42 | 	return opt
43 | }
44 | 


--------------------------------------------------------------------------------
/utils/cache/lru.go:
--------------------------------------------------------------------------------
 1 | package cache
 2 | 
 3 | import (
 4 | 	"container/list"
 5 | 	"fmt"
 6 | )
 7 | 
 8 | type windowLRU struct {
 9 | 	data map[uint64]*list.Element
10 | 	cap  int
11 | 	list *list.List
12 | }
13 | 
14 | type storeItem struct {
15 | 	stage    int
16 | 	key      uint64
17 | 	conflict uint64
18 | 	value    interface{}
19 | }
20 | 
21 | func newWindowLRU(size int, data map[uint64]*list.Element) *windowLRU {
22 | 	return &windowLRU{
23 | 		data: data,
24 | 		cap:  size,
25 | 		list: list.New(),
26 | 	}
27 | }
28 | 
29 | func (lru *windowLRU) add(newitem storeItem) (eitem storeItem, evicted bool) {
30 | 	// 如果 window 部分容量未满，直接插入
31 | 	if lru.list.Len() < lru.cap {
32 | 		lru.data[newitem.key] = lru.list.PushFront(&newitem)
33 | 		return storeItem{}, false
34 | 	}
35 | 	//如果 widow 部分容量已满，按照 lru 规则从尾部淘汰
36 | 	evictItem := lru.list.Back()
37 | 	item := evictItem.Value.(*storeItem)
38 | 
39 | 	// 从 slice 中删除该条数据
40 | 	delete(lru.data, item.key)
41 | 
42 | 	// 这里直接对 evictItem 和 *item 赋值，避免向runtime 再次申请空间
43 | 	eitem, *item = *item, newitem
44 | 
45 | 	lru.data[item.key] = evictItem
46 | 	lru.list.MoveToFront(evictItem)
47 | 	return eitem, true
48 | }
49 | 
50 | func (lru *windowLRU) get(v *list.Element) {
51 | 	lru.list.MoveToFront(v)
52 | }
53 | 
54 | func (lru *windowLRU) String() string {
55 | 	var s string
56 | 	for e := lru.list.Front(); e != nil; e = e.Next() {
57 | 		s += fmt.Sprintf("%v,", e.Value.(*storeItem).value)
58 | 	}
59 | 	return s
60 | }
61 | 


--------------------------------------------------------------------------------
/utils/rand.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math/rand"
 6 | 	"sync"
 7 | 	"time"
 8 | )
 9 | 
10 | var (
11 | 	r  = rand.New(rand.NewSource(time.Now().UnixNano()))
12 | 	mu sync.Mutex
13 | )
14 | 
15 | func Int63n(n int64) int64 {
16 | 	mu.Lock()
17 | 	res := r.Int63n(n)
18 | 	mu.Unlock()
19 | 	return res
20 | }
21 | 
22 | func RandN(n int) int {
23 | 	mu.Lock()
24 | 	res := r.Intn(n)
25 | 	mu.Unlock()
26 | 	return res
27 | }
28 | 
29 | func Float64() float64 {
30 | 	mu.Lock()
31 | 	res := r.Float64()
32 | 	mu.Unlock()
33 | 	return res
34 | }
35 | 
36 | // 生成随机字符串作为key和value
37 | func randStr(length int) string {
38 | 	// 包括特殊字符,进行测试
39 | 	str := "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~=+%^*/()[]{}/!@#$?|©®😁😭🉑️🐂㎡硬核课堂"
40 | 	bytes := []byte(str)
41 | 	result := []byte{}
42 | 	rand.Seed(time.Now().UnixNano() + int64(rand.Intn(100)))
43 | 	for i := 0; i < length; i++ {
44 | 		result = append(result, bytes[rand.Intn(len(bytes))])
45 | 	}
46 | 	return string(result)
47 | }
48 | 
49 | // 构建entry对象
50 | func BuildEntry() *Entry {
51 | 	rand.Seed(time.Now().Unix())
52 | 	key := []byte(fmt.Sprintf("%s%s", randStr(16), "12345678"))
53 | 	value := []byte(randStr(128))
54 | 	// key := []byte(fmt.Sprintf("%s%s", "硬核课堂", "12345678"))
55 | 	// value := []byte("硬核😁课堂")
56 | 	expiresAt := uint64(time.Now().Add(12*time.Hour).UnixNano() / 1e6)
57 | 	return &Entry{
58 | 		Key:       key,
59 | 		Value:     value,
60 | 		ExpiresAt: expiresAt,
61 | 	}
62 | }
63 | 


--------------------------------------------------------------------------------
/utils/mmap/mmap_darwin.go:
--------------------------------------------------------------------------------
 1 | // +build darwin
 2 | 
 3 | // Copyright 2021 hardcore-os Project Authors
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License")
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | // mmap api
17 | 
18 | // Mmap uses the mmap system call to memory-map a file. If writable is true,
19 | // memory protection of the pages is set so that they may be written to as well.
20 | package mmap
21 | 
22 | import (
23 | 	"os"
24 | )
25 | 
26 | func Mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
27 | 	return mmap(fd, writable, size)
28 | }
29 | 
30 | // Munmap unmaps a previously mapped slice.
31 | func Munmap(b []byte) error {
32 | 	return munmap(b)
33 | }
34 | 
35 | // Madvise uses the madvise system call to give advise about the use of memory
36 | // when using a slice that is memory-mapped to a file. Set the readahead flag to
37 | // false if page references are expected in random order.
38 | func Madvise(b []byte, readahead bool) error {
39 | 	return madvise(b, readahead)
40 | }
41 | 
42 | // Msync would call sync on the mmapped data.
43 | func Msync(b []byte) error {
44 | 	return msync(b)
45 | }
46 | 


--------------------------------------------------------------------------------
/utils/mmap/mmap_linux.go:
--------------------------------------------------------------------------------
 1 | // +build linux
 2 | 
 3 | // Copyright 2021 hardcore-os Project Authors
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License")
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | // mmap api
17 | 
18 | // Mmap uses the mmap system call to memory-map a file. If writable is true,
19 | // memory protection of the pages is set so that they may be written to as well.
20 | package mmap
21 | 
22 | import (
23 | 	"os"
24 | )
25 | 
26 | func Mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
27 | 	return mmap(fd, writable, size)
28 | }
29 | 
30 | // Munmap unmaps a previously mapped slice.
31 | func Munmap(b []byte) error {
32 | 	return munmap(b)
33 | }
34 | 
35 | // Madvise uses the madvise system call to give advise about the use of memory
36 | // when using a slice that is memory-mapped to a file. Set the readahead flag to
37 | // false if page references are expected in random order.
38 | func Madvise(b []byte, readahead bool) error {
39 | 	return madvise(b, readahead)
40 | }
41 | 
42 | // Msync would call sync on the mmapped data.
43 | func Msync(b []byte) error {
44 | 	return msync(b)
45 | }
46 | 
47 | // Mremap unmmap and mmap
48 | func Mremap(data []byte, size int) ([]byte, error) {
49 | 	return mremap(data, size)
50 | }
51 | 


--------------------------------------------------------------------------------
/utils/mmap/darwin.go:
--------------------------------------------------------------------------------
 1 | // +build darwin
 2 | 
 3 | /*
 4 |  * Copyright 2019 Dgraph Labs, Inc. and Contributors
 5 |  *
 6 |  * Licensed under the Apache License, Version 2.0 (the "License");
 7 |  * you may not use this file except in compliance with the License.
 8 |  * You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package mmap
20 | 
21 | import (
22 | 	"os"
23 | 	"syscall"
24 | 	"unsafe"
25 | 
26 | 	"golang.org/x/sys/unix"
27 | )
28 | 
29 | // Mmap uses the mmap system call to memory-map a file. If writable is true,
30 | // memory protection of the pages is set so that they may be written to as well.
31 | func mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
32 | 	mtype := unix.PROT_READ
33 | 	if writable {
34 | 		mtype |= unix.PROT_WRITE
35 | 	}
36 | 	return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED)
37 | }
38 | 
39 | // Munmap unmaps a previously mapped slice.
40 | func munmap(b []byte) error {
41 | 	return unix.Munmap(b)
42 | }
43 | 
44 | // This is required because the unix package does not support the madvise system call on OS X.
45 | func madvise(b []byte, readahead bool) error {
46 | 	advice := unix.MADV_NORMAL
47 | 	if !readahead {
48 | 		advice = unix.MADV_RANDOM
49 | 	}
50 | 
51 | 	_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])),
52 | 		uintptr(len(b)), uintptr(advice))
53 | 	if e1 != 0 {
54 | 		return e1
55 | 	}
56 | 	return nil
57 | }
58 | 
59 | func msync(b []byte) error {
60 | 	return unix.Msync(b, unix.MS_SYNC)
61 | }
62 | 


--------------------------------------------------------------------------------
/pb/pb.proto:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright hardcore-os Project Authors
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License")
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  * 
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  * 
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | // Use protos/gen.sh to generate .pb.go files.
17 | syntax = "proto3";
18 | 
19 | package pb;
20 | 
21 | message KV {
22 |   bytes key = 1;
23 |   bytes value = 2;
24 |   bytes user_meta = 3;
25 |   uint64 version = 4;
26 |   uint64 expires_at = 5;
27 |   bytes meta = 6;
28 | 
29 |   // Stream id is used to identify which stream the KV came from.
30 |   uint32 stream_id = 10;
31 | }
32 | 
33 | message KVList {
34 |   repeated KV kv = 1;
35 | }
36 | 
37 | message ManifestChangeSet {
38 |         // A set of changes that are applied atomically.
39 |         repeated ManifestChange changes = 1;
40 | }
41 | 
42 | message ManifestChange {
43 |         uint64 Id = 1;
44 |         enum Operation {
45 |                 CREATE = 0;
46 |                 DELETE = 1;
47 |         }
48 |         Operation Op   = 2;
49 |         uint32 Level   = 3; // Only used for CREATE
50 |         bytes Checksum = 4; // Only used for CREATE
51 | }
52 | message TableIndex{
53 |         repeated BlockOffset offsets = 1;
54 |         bytes  bloomFilter = 2;
55 |         uint64 maxVersion = 3;
56 |         uint32 keyCount = 4;
57 |         uint32 staleDataSize = 5;
58 | }
59 | 
60 | message BlockOffset{
61 |         bytes key = 1;
62 |         uint32 offset = 2;
63 |         uint32 len = 3;
64 | }


--------------------------------------------------------------------------------
/utils/const.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package utils
16 | 
17 | import (
18 | 	"hash/crc32"
19 | 	"math"
20 | 	"os"
21 | )
22 | 
23 | const (
24 | 	// MaxLevelNum _
25 | 	MaxLevelNum = 7
26 | 	// DefaultValueThreshold _
27 | 	DefaultValueThreshold = 1024
28 | )
29 | 
30 | // file
31 | const (
32 | 	ManifestFilename                  = "MANIFEST"
33 | 	ManifestRewriteFilename           = "REWRITEMANIFEST"
34 | 	ManifestDeletionsRewriteThreshold = 10000
35 | 	ManifestDeletionsRatio            = 10
36 | 	DefaultFileFlag                   = os.O_RDWR | os.O_CREATE | os.O_APPEND
37 | 	DefaultFileMode                   = 0666
38 | 	MaxValueLogSize                   = 10 << 20
39 | 	// This is O_DSYNC (datasync) on platforms that support it -- see file_unix.go
40 | 	datasyncFileFlag = 0x0
41 | 	// 基于可变长编码,其最可能的编码
42 | 	MaxHeaderSize          = 21
43 | 	VlogHeaderSize         = 0
44 | 	MaxVlogFileSize uint32 = math.MaxUint32
45 | 	Mi              int64  = 1 << 20
46 | 	KVWriteChCapacity = 1000
47 | )
48 | 
49 | // meta
50 | const (
51 | 	BitDelete       byte = 1 << 0 // Set if the key has been deleted.
52 | 	BitValuePointer byte = 1 << 1 // Set if the value is NOT stored directly next to key.
53 | )
54 | 
55 | // codec
56 | var (
57 | 	MagicText    = [4]byte{'H', 'A', 'R', 'D'}
58 | 	MagicVersion = uint32(1)
59 | 	// CastagnoliCrcTable is a CRC32 polynomial table
60 | 	CastagnoliCrcTable = crc32.MakeTable(crc32.Castagnoli)
61 | )
62 | 


--------------------------------------------------------------------------------
/utils/map.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package utils
16 | 
17 | import (
18 | 	"reflect"
19 | 	"sync"
20 | 
21 | 	"github.com/pkg/errors"
22 | )
23 | 
24 | // CoreMap _
25 | type CoreMap struct {
26 | 	m sync.Map
27 | }
28 | 
29 | // NewMap _
30 | func NewMap() *CoreMap {
31 | 	return &CoreMap{m: sync.Map{}}
32 | }
33 | 
34 | // Get _
35 | func (c *CoreMap) Get(key interface{}) (interface{}, bool) {
36 | 	hashKey := c.keyToHash(key)
37 | 	return c.m.Load(hashKey)
38 | }
39 | 
40 | // Set _
41 | func (c *CoreMap) Set(key, value interface{}) {
42 | 	hashKey := c.keyToHash(key)
43 | 	c.m.Store(hashKey, value)
44 | }
45 | 
46 | // Del _
47 | func (c *CoreMap) Del(key interface{}) {
48 | 	hashKey := c.keyToHash(key)
49 | 	c.m.Delete(hashKey)
50 | }
51 | 
52 | // Range _
53 | func (c *CoreMap) Range(f func(key, value interface{}) bool) {
54 | 	c.m.Range(f)
55 | }
56 | 
57 | func (c *CoreMap) keyToHash(key interface{}) uint64 {
58 | 	if key == nil {
59 | 		return 0
60 | 	}
61 | 	switch k := key.(type) {
62 | 	case []byte:
63 | 		return MemHash(k)
64 | 	case uint32:
65 | 		return uint64(k)
66 | 	case string:
67 | 		return MemHashString(k)
68 | 	case uint64:
69 | 		return k
70 | 	case byte:
71 | 		return uint64(k)
72 | 	case int:
73 | 		return uint64(k)
74 | 	case int32:
75 | 		return uint64(k)
76 | 
77 | 	case int64:
78 | 		return uint64(k)
79 | 	default:
80 | 		CondPanic(true, errors.Errorf("Key:[%+v] type not supported", reflect.TypeOf(k)))
81 | 	}
82 | 	return 0
83 | }
84 | 


--------------------------------------------------------------------------------
/lsm/manifest_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | package lsm
15 | 
16 | import (
17 | 	"os"
18 | 	"path/filepath"
19 | 	"testing"
20 | 
21 | 	"github.com/hardcore-os/corekv/utils"
22 | 	"github.com/stretchr/testify/require"
23 | )
24 | 
25 | // TestBaseManifest  manifest 文件整体性测试
26 | func TestBaseManifest(t *testing.T) {
27 | 	clearDir()
28 | 	recovery := func() {
29 | 		// 每次运行都是相当于意外重启
30 | 		lsm := buildLSM()
31 | 		// 测试正确性
32 | 		baseTest(t, lsm, 128)
33 | 		lsm.Close()
34 | 	}
35 | 	// 运行这个闭包5次进行测试
36 | 	runTest(5, recovery)
37 | }
38 | 
39 | func TestManifestMagic(t *testing.T) {
40 | 	helpTestManifestFileCorruption(t, 3, "bad magic")
41 | }
42 | 
43 | func TestManifestVersion(t *testing.T) {
44 | 	helpTestManifestFileCorruption(t, 4, "unsupported version")
45 | }
46 | 
47 | func TestManifestChecksum(t *testing.T) {
48 | 	helpTestManifestFileCorruption(t, 15, "bad check sum")
49 | }
50 | 
51 | func helpTestManifestFileCorruption(t *testing.T, off int64, errorContent string) {
52 | 	clearDir()
53 | 	// 创建lsm，然后再将其关闭
54 | 	{
55 | 		lsm := buildLSM()
56 | 		require.NoError(t, lsm.Close())
57 | 	}
58 | 	fp, err := os.OpenFile(filepath.Join(opt.WorkDir, utils.ManifestFilename), os.O_RDWR, 0)
59 | 	require.NoError(t, err)
60 | 	// 写入一个错误的值
61 | 	_, err = fp.WriteAt([]byte{'X'}, off)
62 | 	require.NoError(t, err)
63 | 	require.NoError(t, fp.Close())
64 | 	defer func() {
65 | 		if err := recover(); err != nil {
66 | 			require.Contains(t, err.(error).Error(), errorContent)
67 | 		}
68 | 	}()
69 | 	// 在此打开 lsm 此时会panic
70 | 	lsm := buildLSM()
71 | 	require.NoError(t, lsm.Close())
72 | }
73 | 


--------------------------------------------------------------------------------
/db_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package corekv
16 | 
17 | import (
18 | 	"fmt"
19 | 	"testing"
20 | 	"time"
21 | 
22 | 	"github.com/hardcore-os/corekv/utils"
23 | )
24 | 
25 | func TestAPI(t *testing.T) {
26 | 	clearDir()
27 | 	db := Open(opt)
28 | 	defer func() { _ = db.Close() }()
29 | 	// 写入
30 | 	for i := 0; i < 50; i++ {
31 | 		key, val := fmt.Sprintf("key%d", i), fmt.Sprintf("val%d", i)
32 | 		e := utils.NewEntry([]byte(key), []byte(val)).WithTTL(1000 * time.Second)
33 | 		if err := db.Set(e); err != nil {
34 | 			t.Fatal(err)
35 | 		}
36 | 		// 查询
37 | 		if entry, err := db.Get([]byte(key)); err != nil {
38 | 			t.Fatal(err)
39 | 		} else {
40 | 			t.Logf("db.Get key=%s, value=%s, expiresAt=%d", entry.Key, entry.Value, entry.ExpiresAt)
41 | 		}
42 | 	}
43 | 
44 | 	for i := 0; i < 40; i++ {
45 | 		key, _ := fmt.Sprintf("key%d", i), fmt.Sprintf("val%d", i)
46 | 		if err := db.Del([]byte(key)); err != nil {
47 | 			t.Fatal(err)
48 | 		}
49 | 	}
50 | 
51 | 	// 迭代器
52 | 	iter := db.NewIterator(&utils.Options{
53 | 		Prefix: []byte("hello"),
54 | 		IsAsc:  false,
55 | 	})
56 | 	defer func() { _ = iter.Close() }()
57 | 	defer func() { _ = iter.Close() }()
58 | 	for iter.Rewind(); iter.Valid(); iter.Next() {
59 | 		it := iter.Item()
60 | 		t.Logf("db.NewIterator key=%s, value=%s, expiresAt=%d", it.Entry().Key, it.Entry().Value, it.Entry().ExpiresAt)
61 | 	}
62 | 	t.Logf("db.Stats.EntryNum=%+v", db.Info().EntryNum)
63 | 	// 删除
64 | 	if err := db.Del([]byte("hello")); err != nil {
65 | 		t.Fatal(err)
66 | 	}
67 | 
68 | 	for i := 0; i < 10; i++ {
69 | 		key, val := fmt.Sprintf("key%d", i), fmt.Sprintf("val%d", i)
70 | 		e := utils.NewEntry([]byte(key), []byte(val)).WithTTL(1000 * time.Second)
71 | 		if err := db.Set(e); err != nil {
72 | 			t.Fatal(err)
73 | 		}
74 | 		// 查询
75 | 		if entry, err := db.Get([]byte(key)); err != nil {
76 | 			t.Fatal(err)
77 | 		} else {
78 | 			t.Logf("db.Get key=%s, value=%s, expiresAt=%d", entry.Key, entry.Value, entry.ExpiresAt)
79 | 		}
80 | 	}
81 | 
82 | }
83 | 


--------------------------------------------------------------------------------
/iterator.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 logicrec Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package corekv
16 | 
17 | import (
18 | 	"github.com/hardcore-os/corekv/lsm"
19 | 	"github.com/hardcore-os/corekv/utils"
20 | )
21 | 
22 | type DBIterator struct {
23 | 	iitr utils.Iterator
24 | 	vlog *valueLog
25 | }
26 | type Item struct {
27 | 	e *utils.Entry
28 | }
29 | 
30 | func (it *Item) Entry() *utils.Entry {
31 | 	return it.e
32 | }
33 | func (db *DB) NewIterator(opt *utils.Options) utils.Iterator {
34 | 	iters := make([]utils.Iterator, 0)
35 | 	iters = append(iters, db.lsm.NewIterators(opt)...)
36 | 
37 | 	res := &DBIterator{
38 | 		vlog: db.vlog,
39 | 		iitr: lsm.NewMergeIterator(iters, opt.IsAsc),
40 | 	}
41 | 	return res
42 | }
43 | 
44 | func (iter *DBIterator) Next() {
45 | 	iter.iitr.Next()
46 | 	for ; iter.Valid() && iter.Item() == nil; iter.iitr.Next() {
47 | 	}
48 | }
49 | func (iter *DBIterator) Valid() bool {
50 | 	return iter.iitr.Valid()
51 | }
52 | func (iter *DBIterator) Rewind() {
53 | 	iter.iitr.Rewind()
54 | 	for ; iter.Valid() && iter.Item() == nil; iter.iitr.Next() {
55 | 	}
56 | }
57 | func (iter *DBIterator) Item() utils.Item {
58 | 	// 检查从lsm拿到的value是否是value ptr,是则从vlog中拿值
59 | 	e := iter.iitr.Item().Entry()
60 | 	var value []byte
61 | 
62 | 	if e != nil && utils.IsValuePtr(e) {
63 | 		var vp utils.ValuePtr
64 | 		vp.Decode(e.Value)
65 | 		result, cb, err := iter.vlog.read(&vp)
66 | 		defer utils.RunCallback(cb)
67 | 		if err != nil {
68 | 			return nil
69 | 		}
70 | 		value = utils.SafeCopy(nil, result)
71 | 	}
72 | 
73 | 	if e.IsDeletedOrExpired() || value == nil {
74 | 		return nil
75 | 	}
76 | 
77 | 	res := &utils.Entry{
78 | 		Key:          e.Key,
79 | 		Value:        value,
80 | 		ExpiresAt:    e.ExpiresAt,
81 | 		Meta:         e.Meta,
82 | 		Version:      e.Version,
83 | 		Offset:       e.Offset,
84 | 		Hlen:         e.Hlen,
85 | 		ValThreshold: e.ValThreshold,
86 | 	}
87 | 	return res
88 | }
89 | func (iter *DBIterator) Close() error {
90 | 	return iter.iitr.Close()
91 | }
92 | func (iter *DBIterator) Seek(key []byte) {
93 | }
94 | 


--------------------------------------------------------------------------------
/utils/throttle.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | package utils
15 | 
16 | import "sync"
17 | 
18 | // Throttle allows a limited number of workers to run at a time. It also
19 | // provides a mechanism to check for errors encountered by workers and wait for
20 | // them to finish.
21 | type Throttle struct {
22 | 	once      sync.Once
23 | 	wg        sync.WaitGroup
24 | 	ch        chan struct{}
25 | 	errCh     chan error
26 | 	finishErr error
27 | }
28 | 
29 | // NewThrottle creates a new throttle with a max number of workers.
30 | func NewThrottle(max int) *Throttle {
31 | 	return &Throttle{
32 | 		ch:    make(chan struct{}, max),
33 | 		errCh: make(chan error, max),
34 | 	}
35 | }
36 | 
37 | // Do should be called by workers before they start working. It blocks if there
38 | // are already maximum number of workers working. If it detects an error from
39 | // previously Done workers, it would return it.
40 | func (t *Throttle) Do() error {
41 | 	for {
42 | 		select {
43 | 		case t.ch <- struct{}{}:
44 | 			t.wg.Add(1)
45 | 			return nil
46 | 		case err := <-t.errCh:
47 | 			if err != nil {
48 | 				return err
49 | 			}
50 | 		}
51 | 	}
52 | }
53 | 
54 | // Done should be called by workers when they finish working. They can also
55 | // pass the error status of work done.
56 | func (t *Throttle) Done(err error) {
57 | 	if err != nil {
58 | 		t.errCh <- err
59 | 	}
60 | 	select {
61 | 	case <-t.ch:
62 | 	default:
63 | 		panic("Throttle Do Done mismatch")
64 | 	}
65 | 	t.wg.Done()
66 | }
67 | 
68 | // Finish waits until all workers have finished working. It would return any error passed by Done.
69 | // If Finish is called multiple time, it will wait for workers to finish only once(first time).
70 | // From next calls, it will return same error as found on first call.
71 | func (t *Throttle) Finish() error {
72 | 	t.once.Do(func() {
73 | 		t.wg.Wait()
74 | 		close(t.ch)
75 | 		close(t.errCh)
76 | 		for err := range t.errCh {
77 | 			if err != nil {
78 | 				t.finishErr = err
79 | 				return
80 | 			}
81 | 		}
82 | 	})
83 | 
84 | 	return t.finishErr
85 | }
86 | 


--------------------------------------------------------------------------------
/utils/cache/cmSketch.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/rand"
  6 | 	"time"
  7 | )
  8 | 
  9 | const (
 10 | 	cmDepth = 4
 11 | )
 12 | 
 13 | type cmSketch struct {
 14 | 	rows [cmDepth]cmRow
 15 | 	seed [cmDepth]uint64
 16 | 	mask uint64
 17 | }
 18 | 
 19 | func newCmSketch(numCounters int64) *cmSketch {
 20 | 	if numCounters == 0 {
 21 | 		panic("cmSketch: invalid numCounters")
 22 | 	}
 23 | 
 24 | 	// numCounters 一定是二次幂，也就一定是1后面有 n 个 0
 25 | 	numCounters = next2Power(numCounters)
 26 | 	// mask 一定是0111...111
 27 | 	sketch := &cmSketch{mask: uint64(numCounters - 1)}
 28 | 	source := rand.New(rand.NewSource(time.Now().UnixNano()))
 29 | 
 30 | 	// 初始化4行
 31 | 	// 0000,0000|0000,0000|0000,0000
 32 | 	// 0000,0000|0000,0000|0000,0000
 33 | 	// 0000,0000|0000,0000|0000,0000
 34 | 	// 0000,0000|0000,0000|0000,0000
 35 | 
 36 | 	for i := 0; i < cmDepth; i++ {
 37 | 		sketch.seed[i] = source.Uint64()
 38 | 		sketch.rows[i] = newCmRow(numCounters)
 39 | 	}
 40 | 
 41 | 	return sketch
 42 | }
 43 | 
 44 | func (s *cmSketch) Increment(hashed uint64) {
 45 | 	// 对于每一行进行相同操作
 46 | 	for i := range s.rows {
 47 | 		s.rows[i].increment((hashed ^ s.seed[i]) & s.mask)
 48 | 	}
 49 | }
 50 | 
 51 | func (s *cmSketch) Estimate(hashed uint64) int64 {
 52 | 	min := byte(255)
 53 | 	for i := range s.rows {
 54 | 		val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask)
 55 | 		if val < min {
 56 | 			min = val
 57 | 		}
 58 | 	}
 59 | 
 60 | 	return int64(min)
 61 | }
 62 | 
 63 | // Reset halves all counter values.
 64 | func (s *cmSketch) Reset() {
 65 | 	for _, r := range s.rows {
 66 | 		r.reset()
 67 | 	}
 68 | }
 69 | 
 70 | // Clear zeroes all counters.
 71 | func (s *cmSketch) Clear() {
 72 | 	for _, r := range s.rows {
 73 | 		r.clear()
 74 | 	}
 75 | }
 76 | 
 77 | // 快速计算大于 X，且最接近 X 的二次幂
 78 | func next2Power(x int64) int64 {
 79 | 	x--
 80 | 	x |= x >> 1
 81 | 	x |= x >> 2
 82 | 	x |= x >> 4
 83 | 	x |= x >> 8
 84 | 	x |= x >> 16
 85 | 	x |= x >> 32
 86 | 	x++
 87 | 	return x
 88 | }
 89 | 
 90 | type cmRow []byte
 91 | 
 92 | func newCmRow(numCounters int64) cmRow {
 93 | 	return make(cmRow, numCounters/2)
 94 | }
 95 | 
 96 | func (r cmRow) get(n uint64) byte {
 97 | 	return r[n/2] >> ((n & 1) * 4) & 0x0f
 98 | }
 99 | 
100 | func (r cmRow) increment(n uint64) {
101 | 	i := n / 2
102 | 	s := (n & 1) * 4
103 | 	v := (r[i] >> s) & 0x0f
104 | 	if v < 15 {
105 | 		r[i] += 1 << s
106 | 	}
107 | }
108 | 
109 | func (r cmRow) reset() {
110 | 	for i := range r {
111 | 		r[i] = (r[i] >> 1) & 0x77
112 | 	}
113 | }
114 | 
115 | func (r cmRow) clear() {
116 | 	for i := range r {
117 | 		r[i] = 0
118 | 	}
119 | }
120 | 
121 | func (r cmRow) string() string {
122 | 	s := ""
123 | 	for i := uint64(0); i < uint64(len(r)*2); i++ {
124 | 		s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f)
125 | 	}
126 | 	s = s[:len(s)-1]
127 | 	return s
128 | }
129 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
 2 | github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 6 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
 7 | github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
 8 | github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 9 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
10 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
11 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
12 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
13 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
14 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
15 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
16 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
17 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
18 | golang.org/x/sys v0.0.0-20210910150752-751e447fb3d0 h1:xrCZDmdtoloIiooiA9q0OQb9r8HejIHYoHGhGCe1pGg=
19 | golang.org/x/sys v0.0.0-20210910150752-751e447fb3d0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
20 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
21 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
22 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
23 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
24 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
25 | google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ=
26 | google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
27 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
28 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
29 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
30 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
31 | 


--------------------------------------------------------------------------------
/utils/key.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 hardcore-os Project Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License")
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | // http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | package utils
16 | 
17 | import (
18 | 	"bytes"
19 | 	"encoding/binary"
20 | 	"math"
21 | 	"time"
22 | 	"unsafe"
23 | )
24 | 
25 | type stringStruct struct {
26 | 	str unsafe.Pointer
27 | 	len int
28 | }
29 | 
30 | //go:noescape
31 | //go:linkname memhash runtime.memhash
32 | func memhash(p unsafe.Pointer, h, s uintptr) uintptr
33 | 
34 | // ParseKey parses the actual key from the key bytes.
35 | func ParseKey(key []byte) []byte {
36 | 	if len(key) < 8 {
37 | 		return key
38 | 	}
39 | 
40 | 	return key[:len(key)-8]
41 | }
42 | 
43 | // ParseTs parses the timestamp from the key bytes.
44 | func ParseTs(key []byte) uint64 {
45 | 	if len(key) <= 8 {
46 | 		return 0
47 | 	}
48 | 	return math.MaxUint64 - binary.BigEndian.Uint64(key[len(key)-8:])
49 | }
50 | 
51 | // SameKey checks for key equality ignoring the version timestamp suffix.
52 | func SameKey(src, dst []byte) bool {
53 | 	if len(src) != len(dst) {
54 | 		return false
55 | 	}
56 | 	return bytes.Equal(ParseKey(src), ParseKey(dst))
57 | }
58 | 
59 | // KeyWithTs generates a new key by appending ts to key.
60 | func KeyWithTs(key []byte, ts uint64) []byte {
61 | 	out := make([]byte, len(key)+8)
62 | 	copy(out, key)
63 | 	binary.BigEndian.PutUint64(out[len(key):], math.MaxUint64-ts)
64 | 	return out
65 | }
66 | 
67 | // MemHash is the hash function used by go map, it utilizes available hardware instructions(behaves
68 | // as aeshash if aes instruction is available).
69 | // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash.
70 | func MemHash(data []byte) uint64 {
71 | 	ss := (*stringStruct)(unsafe.Pointer(&data))
72 | 	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
73 | }
74 | 
75 | // MemHashString is the hash function used by go map, it utilizes available hardware instructions
76 | // (behaves as aeshash if aes instruction is available).
77 | // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash.
78 | func MemHashString(str string) uint64 {
79 | 	ss := (*stringStruct)(unsafe.Pointer(&str))
80 | 	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
81 | }
82 | 
83 | // SafeCopy does append(a[:0], src...).
84 | func SafeCopy(a, src []byte) []byte {
85 | 	return append(a[:0], src...)
86 | }
87 | 
88 | func NewCurVersion() uint64 {
89 | 	return uint64(time.Now().UnixNano() / 1e9)
90 | }
91 | 


--------------------------------------------------------------------------------
/utils/cache/s2lru.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"container/list"
  5 | 	"fmt"
  6 | )
  7 | 
  8 | type segmentedLRU struct {
  9 | 	data                     map[uint64]*list.Element
 10 | 	stageOneCap, stageTwoCap int
 11 | 	stageOne, stageTwo       *list.List
 12 | }
 13 | 
 14 | const (
 15 | 	STAGE_ONE = iota + 1
 16 | 	STAGE_TWO
 17 | )
 18 | 
 19 | func newSLRU(data map[uint64]*list.Element, stageOneCap, stageTwoCap int) *segmentedLRU {
 20 | 	return &segmentedLRU{
 21 | 		data:        data,
 22 | 		stageOneCap: stageOneCap,
 23 | 		stageTwoCap: stageTwoCap,
 24 | 		stageOne:    list.New(),
 25 | 		stageTwo:    list.New(),
 26 | 	}
 27 | }
 28 | 
 29 | func (slru *segmentedLRU) add(newitem storeItem) {
 30 | 	// 先进来的都放 stageOne
 31 | 	newitem.stage = 1
 32 | 
 33 | 	// 如果 stageOne 没满，整个 LFU 区域也没满
 34 | 	if slru.stageOne.Len() < slru.stageOneCap || slru.Len() < slru.stageOneCap+slru.stageTwoCap {
 35 | 		slru.data[newitem.key] = slru.stageOne.PushFront(&newitem)
 36 | 		return
 37 | 	}
 38 | 
 39 | 	//走到这里说明 StageOne 满了，或者整个 LFU都满了
 40 | 	//那么需要从 StageOne 淘汰数据了
 41 | 	e := slru.stageOne.Back()
 42 | 	item := e.Value.(*storeItem)
 43 | 
 44 | 	//这里淘汰就是真的淘汰了
 45 | 	delete(slru.data, item.key)
 46 | 
 47 | 	*item = newitem
 48 | 
 49 | 	slru.data[item.key] = e
 50 | 	slru.stageOne.MoveToFront(e)
 51 | }
 52 | 
 53 | func (slru *segmentedLRU) get(v *list.Element) {
 54 | 	item := v.Value.(*storeItem)
 55 | 
 56 | 	// 若访问的缓存数据，已经在 StageTwo，只需要按照 LRU 规则提前即可
 57 | 	if item.stage == STAGE_TWO {
 58 | 		slru.stageTwo.MoveToFront(v)
 59 | 		return
 60 | 	}
 61 | 
 62 | 	// 若访问的数据还在 StageOne，那么再次被访问到，就需要提升到 StageTwo 阶段了
 63 | 	if slru.stageTwo.Len() < slru.stageTwoCap {
 64 | 		slru.stageOne.Remove(v)
 65 | 		item.stage = STAGE_TWO
 66 | 		slru.data[item.key] = slru.stageTwo.PushFront(item)
 67 | 		return
 68 | 	}
 69 | 
 70 | 	// 新数据加入 StageTwo，需要淘汰旧数据
 71 | 	// StageTwo 中淘汰的数据不会消失，会进入 StageOne
 72 | 	// StageOne 中，访问频率更低的数据，有可能会被淘汰
 73 | 	back := slru.stageTwo.Back()
 74 | 	bitem := back.Value.(*storeItem)
 75 | 
 76 | 	*bitem, *item = *item, *bitem
 77 | 
 78 | 	bitem.stage = STAGE_TWO
 79 | 	item.stage = STAGE_ONE
 80 | 
 81 | 	slru.data[item.key] = v
 82 | 	slru.data[bitem.key] = back
 83 | 
 84 | 	slru.stageOne.MoveToFront(v)
 85 | 	slru.stageTwo.MoveToFront(back)
 86 | }
 87 | 
 88 | func (slru *segmentedLRU) Len() int {
 89 | 	return slru.stageTwo.Len() + slru.stageOne.Len()
 90 | }
 91 | 
 92 | func (slru *segmentedLRU) victim() *storeItem {
 93 | 	//如果 slru 的容量未满，不需要淘汰
 94 | 	if slru.Len() < slru.stageOneCap+slru.stageTwoCap {
 95 | 		return nil
 96 | 	}
 97 | 
 98 | 	// 如果已经满了，则需要从20%的区域淘汰数据，这里直接从尾部拿最后一个元素即可
 99 | 	v := slru.stageOne.Back()
100 | 	return v.Value.(*storeItem)
101 | }
102 | 
103 | func (slru *segmentedLRU) String() string {
104 | 	var s string
105 | 	for e := slru.stageTwo.Front(); e != nil; e = e.Next() {
106 | 		s += fmt.Sprintf("%v,", e.Value.(*storeItem).value)
107 | 	}
108 | 	s += fmt.Sprintf(" | ")
109 | 	for e := slru.stageOne.Front(); e != nil; e = e.Next() {
110 | 		s += fmt.Sprintf("%v,", e.Value.(*storeItem).value)
111 | 	}
112 | 	return s
113 | }
114 | 


--------------------------------------------------------------------------------
/utils/mmap/linux.go:
--------------------------------------------------------------------------------
 1 | // +build linux
 2 | 
 3 | // Copyright 2021 logicrec Project Authors
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License")
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | 
17 | package mmap
18 | 
19 | import (
20 | 	"os"
21 | 	"reflect"
22 | 	"unsafe"
23 | 
24 | 	"golang.org/x/sys/unix"
25 | )
26 | 
27 | // mmap uses the mmap system call to memory-map a file. If writable is true,
28 | // memory protection of the pages is set so that they may be written to as well.
29 | func mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
30 | 	mtype := unix.PROT_READ
31 | 	if writable {
32 | 		mtype |= unix.PROT_WRITE
33 | 	}
34 | 	return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED)
35 | }
36 | 
37 | // mremap is a Linux-specific system call to remap pages in memory. This can be used in place of munmap + mmap.
38 | func mremap(data []byte, size int) ([]byte, error) {
39 | 	// taken from <https://github.com/torvalds/linux/blob/f8394f232b1eab649ce2df5c5f15b0e528c92091/include/uapi/linux/mman.h#L8>
40 | 	const MREMAP_MAYMOVE = 0x1
41 | 
42 | 	header := (*reflect.SliceHeader)(unsafe.Pointer(&data))
43 | 	mmapAddr, _, errno := unix.Syscall6(
44 | 		unix.SYS_MREMAP,
45 | 		header.Data,
46 | 		uintptr(header.Len),
47 | 		uintptr(size),
48 | 		uintptr(MREMAP_MAYMOVE),
49 | 		0,
50 | 		0,
51 | 	)
52 | 	if errno != 0 {
53 | 		return nil, errno
54 | 	}
55 | 
56 | 	header.Data = mmapAddr
57 | 	header.Cap = size
58 | 	header.Len = size
59 | 	return data, nil
60 | }
61 | 
62 | // munmap unmaps a previously mapped slice.
63 | //
64 | // unix.Munmap maintains an internal list of mmapped addresses, and only calls munmap
65 | // if the address is present in that list. If we use mremap, this list is not updated.
66 | // To bypass this, we call munmap ourselves.
67 | func munmap(data []byte) error {
68 | 	if len(data) == 0 || len(data) != cap(data) {
69 | 		return unix.EINVAL
70 | 	}
71 | 	_, _, errno := unix.Syscall(
72 | 		unix.SYS_MUNMAP,
73 | 		uintptr(unsafe.Pointer(&data[0])),
74 | 		uintptr(len(data)),
75 | 		0,
76 | 	)
77 | 	if errno != 0 {
78 | 		return errno
79 | 	}
80 | 	return nil
81 | }
82 | 
83 | // madvise uses the madvise system call to give advise about the use of memory
84 | // when using a slice that is memory-mapped to a file. Set the readahead flag to
85 | // false if page references are expected in random order.
86 | func madvise(b []byte, readahead bool) error {
87 | 	flags := unix.MADV_NORMAL
88 | 	if !readahead {
89 | 		flags = unix.MADV_RANDOM
90 | 	}
91 | 	return unix.Madvise(b, flags)
92 | }
93 | 
94 | // msync writes any modified data to persistent storage.
95 | func msync(b []byte) error {
96 | 	return unix.Msync(b, unix.MS_SYNC)
97 | }
98 | 


--------------------------------------------------------------------------------
/utils/bloom.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package utils
 16 | 
 17 | import "math"
 18 | 
 19 | // Filter is an encoded set of []byte keys.
 20 | type Filter []byte
 21 | 
 22 | // MayContainKey _
 23 | func (f Filter) MayContainKey(k []byte) bool {
 24 | 	return f.MayContain(Hash(k))
 25 | }
 26 | 
 27 | // MayContain returns whether the filter may contain given key. False positives
 28 | // are possible, where it returns true for keys not in the original set.
 29 | func (f Filter) MayContain(h uint32) bool {
 30 | 	if len(f) < 2 {
 31 | 		return false
 32 | 	}
 33 | 	k := f[len(f)-1]
 34 | 	if k > 30 {
 35 | 		// This is reserved for potentially new encodings for short Bloom filters.
 36 | 		// Consider it a match.
 37 | 		return true
 38 | 	}
 39 | 	nBits := uint32(8 * (len(f) - 1))
 40 | 	delta := h>>17 | h<<15
 41 | 	for j := uint8(0); j < k; j++ {
 42 | 		bitPos := h % nBits
 43 | 		if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
 44 | 			return false
 45 | 		}
 46 | 		h += delta
 47 | 	}
 48 | 	return true
 49 | }
 50 | 
 51 | // NewFilter returns a new Bloom filter that encodes a set of []byte keys with
 52 | // the given number of bits per key, approximately.
 53 | //
 54 | // A good bitsPerKey value is 10, which yields a filter with ~ 1% false
 55 | // positive rate.
 56 | func NewFilter(keys []uint32, bitsPerKey int) Filter {
 57 | 	return Filter(appendFilter(keys, bitsPerKey))
 58 | }
 59 | 
 60 | // BloomBitsPerKey returns the bits per key required by bloomfilter based on
 61 | // the false positive rate.
 62 | func BloomBitsPerKey(numEntries int, fp float64) int {
 63 | 	size := -1 * float64(numEntries) * math.Log(fp) / math.Pow(float64(0.69314718056), 2)
 64 | 	locs := math.Ceil(size / float64(numEntries))
 65 | 	return int(locs)
 66 | }
 67 | 
 68 | func appendFilter(keys []uint32, bitsPerKey int) []byte {
 69 | 	if bitsPerKey < 0 {
 70 | 		bitsPerKey = 0
 71 | 	}
 72 | 	// 0.69 is approximately ln(2).
 73 | 	k := uint32(float64(bitsPerKey) * 0.69)
 74 | 	if k < 1 {
 75 | 		k = 1
 76 | 	}
 77 | 	if k > 30 {
 78 | 		k = 30
 79 | 	}
 80 | 
 81 | 	nBits := len(keys) * int(bitsPerKey)
 82 | 	// For small len(keys), we can see a very high false positive rate. Fix it
 83 | 	// by enforcing a minimum bloom filter length.
 84 | 	if nBits < 64 {
 85 | 		nBits = 64
 86 | 	}
 87 | 	nBytes := (nBits + 7) / 8
 88 | 	nBits = nBytes * 8
 89 | 	filter := make([]byte, nBytes+1)
 90 | 
 91 | 	for _, h := range keys {
 92 | 		delta := h>>17 | h<<15
 93 | 		for j := uint32(0); j < k; j++ {
 94 | 			bitPos := h % uint32(nBits)
 95 | 			filter[bitPos/8] |= 1 << (bitPos % 8)
 96 | 			h += delta
 97 | 		}
 98 | 	}
 99 | 
100 | 	//record the K value of this Bloom Filter
101 | 	filter[nBytes] = uint8(k)
102 | 
103 | 	return filter
104 | }
105 | 
106 | // Hash implements a hashing algorithm similar to the Murmur hash.
107 | func Hash(b []byte) uint32 {
108 | 	const (
109 | 		seed = 0xbc9f1d34
110 | 		m    = 0xc6a4a793
111 | 	)
112 | 	h := uint32(seed) ^ uint32(len(b))*m
113 | 	for ; len(b) >= 4; b = b[4:] {
114 | 		h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
115 | 		h *= m
116 | 		h ^= h >> 16
117 | 	}
118 | 	switch len(b) {
119 | 	case 3:
120 | 		h += uint32(b[2]) << 16
121 | 		fallthrough
122 | 	case 2:
123 | 		h += uint32(b[1]) << 8
124 | 		fallthrough
125 | 	case 1:
126 | 		h += uint32(b[0])
127 | 		h *= m
128 | 		h ^= h >> 24
129 | 	}
130 | 	return h
131 | }
132 | 


--------------------------------------------------------------------------------
/lsm/lsm.go:
--------------------------------------------------------------------------------
  1 | package lsm
  2 | 
  3 | import (
  4 | 	"github.com/hardcore-os/corekv/utils"
  5 | )
  6 | 
  7 | // LSM _
  8 | type LSM struct {
  9 | 	memTable   *memTable
 10 | 	immutables []*memTable
 11 | 	levels     *levelManager
 12 | 	option     *Options
 13 | 	closer     *utils.Closer
 14 | 	maxMemFID  uint32
 15 | }
 16 | 
 17 | //Options _
 18 | type Options struct {
 19 | 	WorkDir      string
 20 | 	MemTableSize int64
 21 | 	SSTableMaxSz int64
 22 | 	// BlockSize is the size of each block inside SSTable in bytes.
 23 | 	BlockSize int
 24 | 	// BloomFalsePositive is the false positive probabiltiy of bloom filter.
 25 | 	BloomFalsePositive float64
 26 | 
 27 | 	// compact
 28 | 	NumCompactors       int
 29 | 	BaseLevelSize       int64
 30 | 	LevelSizeMultiplier int // 决定level之间期望的size比例
 31 | 	TableSizeMultiplier int
 32 | 	BaseTableSize       int64
 33 | 	NumLevelZeroTables  int
 34 | 	MaxLevelNum         int
 35 | 
 36 | 	DiscardStatsCh *chan map[uint32]int64
 37 | }
 38 | 
 39 | // Close  _
 40 | func (lsm *LSM) Close() error {
 41 | 	// 等待全部合并过程的结束
 42 | 	// 等待全部api调用过程结束
 43 | 	lsm.closer.Close()
 44 | 	// TODO 需要加锁保证并发安全
 45 | 	if lsm.memTable != nil {
 46 | 		if err := lsm.memTable.close(); err != nil {
 47 | 			return err
 48 | 		}
 49 | 	}
 50 | 	for i := range lsm.immutables {
 51 | 		if err := lsm.immutables[i].close(); err != nil {
 52 | 			return err
 53 | 		}
 54 | 	}
 55 | 	if err := lsm.levels.close(); err != nil {
 56 | 		return err
 57 | 	}
 58 | 	return nil
 59 | }
 60 | 
 61 | // NewLSM _
 62 | func NewLSM(opt *Options) *LSM {
 63 | 	lsm := &LSM{option: opt}
 64 | 	// 初始化levelManager
 65 | 	lsm.levels = lsm.initLevelManager(opt)
 66 | 	// 启动DB恢复过程加载wal，如果没有恢复内容则创建新的内存表
 67 | 	lsm.memTable, lsm.immutables = lsm.recovery()
 68 | 	// 初始化closer 用于资源回收的信号控制
 69 | 	lsm.closer = utils.NewCloser()
 70 | 	return lsm
 71 | }
 72 | 
 73 | // StartCompacter _
 74 | func (lsm *LSM) StartCompacter() {
 75 | 	n := lsm.option.NumCompactors
 76 | 	lsm.closer.Add(n)
 77 | 	for i := 0; i < n; i++ {
 78 | 		go lsm.levels.runCompacter(i)
 79 | 	}
 80 | }
 81 | 
 82 | // Set _
 83 | func (lsm *LSM) Set(entry *utils.Entry) (err error) {
 84 | 	if entry == nil || len(entry.Key) == 0 {
 85 | 		return utils.ErrEmptyKey
 86 | 	}
 87 | 	// 优雅关闭
 88 | 	lsm.closer.Add(1)
 89 | 	defer lsm.closer.Done()
 90 | 	// 检查当前memtable是否写满，是的话创建新的memtable,并将当前内存表写到immutables中
 91 | 	// 否则写入当前memtable中
 92 | 	if int64(lsm.memTable.wal.Size())+
 93 | 		int64(utils.EstimateWalCodecSize(entry)) > lsm.option.MemTableSize {
 94 | 		lsm.Rotate()
 95 | 	}
 96 | 
 97 | 	if err = lsm.memTable.set(entry); err != nil {
 98 | 		return err
 99 | 	}
100 | 	// 检查是否存在immutable需要刷盘，
101 | 	for _, immutable := range lsm.immutables {
102 | 		if err = lsm.levels.flush(immutable); err != nil {
103 | 			return err
104 | 		}
105 | 		// TODO 这里问题很大，应该是用引用计数的方式回收
106 | 		err = immutable.close()
107 | 		utils.Panic(err)
108 | 	}
109 | 	if len(lsm.immutables) != 0 {
110 | 		// TODO 将lsm的immutables队列置空，这里可以优化一下节省内存空间，还可以限制一下immut table的大小为固定值
111 | 		lsm.immutables = make([]*memTable, 0)
112 | 	}
113 | 	return err
114 | }
115 | 
116 | // Get _
117 | func (lsm *LSM) Get(key []byte) (*utils.Entry, error) {
118 | 	if len(key) == 0 {
119 | 		return nil, utils.ErrEmptyKey
120 | 	}
121 | 	lsm.closer.Add(1)
122 | 	defer lsm.closer.Done()
123 | 	var (
124 | 		entry *utils.Entry
125 | 		err   error
126 | 	)
127 | 	// 从内存表中查询,先查活跃表，在查不变表
128 | 	if entry, err = lsm.memTable.Get(key); entry != nil && entry.Value != nil {
129 | 		return entry, err
130 | 	}
131 | 
132 | 	for i := len(lsm.immutables) - 1; i >= 0; i-- {
133 | 		if entry, err = lsm.immutables[i].Get(key); entry != nil && entry.Value != nil {
134 | 			return entry, err
135 | 		}
136 | 	}
137 | 	// 从level manger查询
138 | 	return lsm.levels.Get(key)
139 | }
140 | 
141 | func (lsm *LSM) MemSize() int64 {
142 | 	return lsm.memTable.Size()
143 | }
144 | 
145 | func (lsm *LSM) MemTableIsNil() bool {
146 | 	return lsm.memTable == nil
147 | }
148 | 
149 | func (lsm *LSM) GetSkipListFromMemTable() *utils.Skiplist {
150 | 	return lsm.memTable.sl
151 | }
152 | 
153 | func (lsm *LSM) Rotate() {
154 | 	lsm.immutables = append(lsm.immutables, lsm.memTable)
155 | 	lsm.memTable = lsm.NewMemtable()
156 | }
157 | 


--------------------------------------------------------------------------------
/utils/error.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 logicrec Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | // Copyright 2021 hardcore-os Project Authors
 16 | //
 17 | // Licensed under the Apache License, Version 2.0 (the "License")
 18 | // you may not use this file except in compliance with the License.
 19 | // You may obtain a copy of the License at
 20 | //
 21 | // http://www.apache.org/licenses/LICENSE-2.0
 22 | //
 23 | // Unless required by applicable law or agreed to in writing, software
 24 | // distributed under the License is distributed on an "AS IS" BASIS,
 25 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 26 | // See the License for the specific language governing permissions and
 27 | // limitations under the License.
 28 | 
 29 | package utils
 30 | 
 31 | import (
 32 | 	"errors"
 33 | 	"fmt"
 34 | 	"os"
 35 | 	"path"
 36 | 	"path/filepath"
 37 | 	"runtime"
 38 | 	"strconv"
 39 | 	"strings"
 40 | )
 41 | 
 42 | var (
 43 | 	gopath = path.Join(os.Getenv("GOPATH"), "src") + "/"
 44 | )
 45 | 
 46 | // NotFoundKey 找不到key
 47 | var (
 48 | 	// ErrKeyNotFound is returned when key isn't found on a txn.Get.
 49 | 	ErrKeyNotFound = errors.New("Key not found")
 50 | 	// ErrEmptyKey is returned if an empty key is passed on an update function.
 51 | 	ErrEmptyKey = errors.New("Key cannot be empty")
 52 | 	// ErrReWriteFailure reWrite failure
 53 | 	ErrReWriteFailure = errors.New("reWrite failure")
 54 | 	// ErrBadMagic bad magic
 55 | 	ErrBadMagic = errors.New("bad magic")
 56 | 	// ErrBadChecksum bad check sum
 57 | 	ErrBadChecksum = errors.New("bad check sum")
 58 | 	// ErrChecksumMismatch is returned at checksum mismatch.
 59 | 	ErrChecksumMismatch = errors.New("checksum mismatch")
 60 | 
 61 | 	ErrTruncate = errors.New("Do truncate")
 62 | 	ErrStop     = errors.New("Stop")
 63 | 
 64 | 	// compact
 65 | 	ErrFillTables = errors.New("Unable to fill tables")
 66 | 
 67 | 	ErrBlockedWrites  = errors.New("Writes are blocked, possibly due to DropAll or Close")
 68 | 	ErrTxnTooBig      = errors.New("Txn is too big to fit into one request")
 69 | 	ErrDeleteVlogFile = errors.New("Delete vlog file")
 70 | 	ErrNoRoom         = errors.New("No room for write")
 71 | 
 72 | 	// ErrInvalidRequest is returned if the user request is invalid.
 73 | 	ErrInvalidRequest = errors.New("Invalid request")
 74 | 	// ErrNoRewrite is returned if a call for value log GC doesn't result in a log file rewrite.
 75 | 	ErrNoRewrite = errors.New("Value log GC attempt didn't result in any cleanup")
 76 | 
 77 | 	// ErrRejected is returned if a value log GC is called either while another GC is running, or
 78 | 	// after DB::Close has been called.
 79 | 	ErrRejected = errors.New("Value log GC request rejected")
 80 | )
 81 | 
 82 | // Panic 如果err 不为nil 则panicc
 83 | func Panic(err error) {
 84 | 	if err != nil {
 85 | 		panic(err)
 86 | 	}
 87 | }
 88 | 
 89 | // Panic2 _
 90 | func Panic2(_ interface{}, err error) {
 91 | 	Panic(err)
 92 | }
 93 | 
 94 | // Err err
 95 | func Err(err error) error {
 96 | 	if err != nil {
 97 | 		fmt.Printf("%s %s\n", location(2, true), err)
 98 | 	}
 99 | 	return err
100 | }
101 | 
102 | // WarpErr err
103 | func WarpErr(format string, err error) error {
104 | 	if err != nil {
105 | 		fmt.Printf("%s %s %s", format, location(2, true), err)
106 | 	}
107 | 	return err
108 | }
109 | func location(deep int, fullPath bool) string {
110 | 	_, file, line, ok := runtime.Caller(deep)
111 | 	if !ok {
112 | 		file = "???"
113 | 		line = 0
114 | 	}
115 | 
116 | 	if fullPath {
117 | 		if strings.HasPrefix(file, gopath) {
118 | 			file = file[len(gopath):]
119 | 		}
120 | 	} else {
121 | 		file = filepath.Base(file)
122 | 	}
123 | 	return file + ":" + strconv.Itoa(line)
124 | }
125 | 
126 | // CondPanic e
127 | func CondPanic(condition bool, err error) {
128 | 	if condition {
129 | 		Panic(err)
130 | 	}
131 | }
132 | 


--------------------------------------------------------------------------------
/utils/wal.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 logicrec Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package utils
 16 | 
 17 | import (
 18 | 	"bytes"
 19 | 	"encoding/binary"
 20 | 	"hash"
 21 | 	"hash/crc32"
 22 | 	"io"
 23 | )
 24 | 
 25 | // LogEntry
 26 | type LogEntry func(e *Entry, vp *ValuePtr) error
 27 | 
 28 | type WalHeader struct {
 29 | 	KeyLen    uint32
 30 | 	ValueLen  uint32
 31 | 	Meta      byte
 32 | 	ExpiresAt uint64
 33 | }
 34 | 
 35 | const maxHeaderSize int = 21
 36 | 
 37 | func (h WalHeader) Encode(out []byte) int {
 38 | 	index := 0
 39 | 	index = binary.PutUvarint(out[index:], uint64(h.KeyLen))
 40 | 	index += binary.PutUvarint(out[index:], uint64(h.ValueLen))
 41 | 	index += binary.PutUvarint(out[index:], uint64(h.Meta))
 42 | 	index += binary.PutUvarint(out[index:], h.ExpiresAt)
 43 | 	return index
 44 | }
 45 | 
 46 | func (h *WalHeader) Decode(reader *HashReader) (int, error) {
 47 | 	var err error
 48 | 
 49 | 	klen, err := binary.ReadUvarint(reader)
 50 | 	if err != nil {
 51 | 		return 0, err
 52 | 	}
 53 | 	h.KeyLen = uint32(klen)
 54 | 
 55 | 	vlen, err := binary.ReadUvarint(reader)
 56 | 	if err != nil {
 57 | 		return 0, err
 58 | 	}
 59 | 	h.ValueLen = uint32(vlen)
 60 | 
 61 | 	meta, err := binary.ReadUvarint(reader)
 62 | 	if err != nil {
 63 | 		return 0, err
 64 | 	}
 65 | 	h.Meta = byte(meta)
 66 | 	h.ExpiresAt, err = binary.ReadUvarint(reader)
 67 | 	if err != nil {
 68 | 		return 0, err
 69 | 	}
 70 | 	return reader.BytesRead, nil
 71 | }
 72 | 
 73 | // WalCodec 写入wal文件的编码
 74 | // | header | key | value | crc32 |
 75 | func WalCodec(buf *bytes.Buffer, e *Entry) int {
 76 | 	buf.Reset()
 77 | 	h := WalHeader{
 78 | 		KeyLen:    uint32(len(e.Key)),
 79 | 		ValueLen:  uint32(len(e.Value)),
 80 | 		ExpiresAt: e.ExpiresAt,
 81 | 	}
 82 | 
 83 | 	hash := crc32.New(CastagnoliCrcTable)
 84 | 	writer := io.MultiWriter(buf, hash)
 85 | 
 86 | 	// encode header.
 87 | 	var headerEnc [maxHeaderSize]byte
 88 | 	sz := h.Encode(headerEnc[:])
 89 | 	Panic2(writer.Write(headerEnc[:sz]))
 90 | 	Panic2(writer.Write(e.Key))
 91 | 	Panic2(writer.Write(e.Value))
 92 | 	// write crc32 hash.
 93 | 	var crcBuf [crc32.Size]byte
 94 | 	binary.BigEndian.PutUint32(crcBuf[:], hash.Sum32())
 95 | 	Panic2(buf.Write(crcBuf[:]))
 96 | 	// return encoded length.
 97 | 	return len(headerEnc[:sz]) + len(e.Key) + len(e.Value) + len(crcBuf)
 98 | }
 99 | 
100 | // EstimateWalCodecSize 预估当前kv 写入wal文件占用的空间大小
101 | func EstimateWalCodecSize(e *Entry) int {
102 | 	return len(e.Key) + len(e.Value) + 8 /* ExpiresAt uint64 */ +
103 | 		crc32.Size + maxHeaderSize
104 | }
105 | 
106 | type HashReader struct {
107 | 	R         io.Reader
108 | 	H         hash.Hash32
109 | 	BytesRead int // Number of bytes read.
110 | }
111 | 
112 | func NewHashReader(r io.Reader) *HashReader {
113 | 	hash := crc32.New(CastagnoliCrcTable)
114 | 	return &HashReader{
115 | 		R: r,
116 | 		H: hash,
117 | 	}
118 | }
119 | 
120 | // Read reads len(p) bytes from the reader. Returns the number of bytes read, error on failure.
121 | func (t *HashReader) Read(p []byte) (int, error) {
122 | 	n, err := t.R.Read(p)
123 | 	if err != nil {
124 | 		return n, err
125 | 	}
126 | 	t.BytesRead += n
127 | 	return t.H.Write(p[:n])
128 | }
129 | 
130 | // ReadByte reads exactly one byte from the reader. Returns error on failure.
131 | func (t *HashReader) ReadByte() (byte, error) {
132 | 	b := make([]byte, 1)
133 | 	_, err := t.Read(b)
134 | 	return b[0], err
135 | }
136 | 
137 | // Sum32 returns the sum32 of the underlying hash.
138 | func (t *HashReader) Sum32() uint32 {
139 | 	return t.H.Sum32()
140 | }
141 | 
142 | // IsZero _
143 | func (e *Entry) IsZero() bool {
144 | 	return len(e.Key) == 0
145 | }
146 | 
147 | // LogHeaderLen _
148 | func (e *Entry) LogHeaderLen() int {
149 | 	return e.Hlen
150 | }
151 | 
152 | // LogOffset _
153 | func (e *Entry) LogOffset() uint32 {
154 | 	return e.Offset
155 | }
156 | 


--------------------------------------------------------------------------------
/utils/file.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package utils
 16 | 
 17 | import (
 18 | 	"bytes"
 19 | 	"fmt"
 20 | 	"hash/crc32"
 21 | 	"io/ioutil"
 22 | 	"os"
 23 | 	"path"
 24 | 	"path/filepath"
 25 | 	"strconv"
 26 | 	"strings"
 27 | 
 28 | 	"github.com/pkg/errors"
 29 | )
 30 | 
 31 | // FID 根据file name 获取其fid
 32 | func FID(name string) uint64 {
 33 | 	name = path.Base(name)
 34 | 	if !strings.HasSuffix(name, ".sst") {
 35 | 		return 0
 36 | 	}
 37 | 	//	suffix := name[len(fileSuffix):]
 38 | 	name = strings.TrimSuffix(name, ".sst")
 39 | 	id, err := strconv.Atoi(name)
 40 | 	if err != nil {
 41 | 		Err(err)
 42 | 		return 0
 43 | 	}
 44 | 	return uint64(id)
 45 | }
 46 | 
 47 | func VlogFilePath(dirPath string, fid uint32) string {
 48 | 	return fmt.Sprintf("%s%s%05d.vlog", dirPath, string(os.PathSeparator), fid)
 49 | }
 50 | 
 51 | // CreateSyncedFile creates a new file (using O_EXCL), errors if it already existed.
 52 | func CreateSyncedFile(filename string, sync bool) (*os.File, error) {
 53 | 	flags := os.O_RDWR | os.O_CREATE | os.O_EXCL
 54 | 	if sync {
 55 | 		flags |= datasyncFileFlag
 56 | 	}
 57 | 	return os.OpenFile(filename, flags, 0600)
 58 | }
 59 | 
 60 | // FileNameSSTable  sst 文件名
 61 | func FileNameSSTable(dir string, id uint64) string {
 62 | 	return filepath.Join(dir, fmt.Sprintf("%05d.sst", id))
 63 | }
 64 | 
 65 | // openDir opens a directory for syncing.
 66 | func openDir(path string) (*os.File, error) { return os.Open(path) }
 67 | 
 68 | // SyncDir When you create or delete a file, you have to ensure the directory entry for the file is synced
 69 | // in order to guarantee the file is visible (if the system crashes). (See the man page for fsync,
 70 | // or see https://github.com/coreos/etcd/issues/6368 for an example.)
 71 | func SyncDir(dir string) error {
 72 | 	f, err := openDir(dir)
 73 | 	if err != nil {
 74 | 		return errors.Wrapf(err, "While opening directory: %s.", dir)
 75 | 	}
 76 | 	err = f.Sync()
 77 | 	closeErr := f.Close()
 78 | 	if err != nil {
 79 | 		return errors.Wrapf(err, "While syncing directory: %s.", dir)
 80 | 	}
 81 | 	return errors.Wrapf(closeErr, "While closing directory: %s.", dir)
 82 | }
 83 | 
 84 | // LoadIDMap Get the id of all sst files in the current folder
 85 | func LoadIDMap(dir string) map[uint64]struct{} {
 86 | 	fileInfos, err := ioutil.ReadDir(dir)
 87 | 	Err(err)
 88 | 	idMap := make(map[uint64]struct{})
 89 | 	for _, info := range fileInfos {
 90 | 		if info.IsDir() {
 91 | 			continue
 92 | 		}
 93 | 		fileID := FID(info.Name())
 94 | 		if fileID != 0 {
 95 | 			idMap[fileID] = struct{}{}
 96 | 		}
 97 | 	}
 98 | 	return idMap
 99 | }
100 | 
101 | // CompareKeys checks the key without timestamp and checks the timestamp if keyNoTs
102 | // is same.
103 | // a<timestamp> would be sorted higher than aa<timestamp> if we use bytes.compare
104 | // All keys should have timestamp.
105 | func CompareKeys(key1, key2 []byte) int {
106 | 	CondPanic((len(key1) <= 8 || len(key2) <= 8), fmt.Errorf("%s,%s < 8", string(key1), string(key2)))
107 | 	if cmp := bytes.Compare(key1[:len(key1)-8], key2[:len(key2)-8]); cmp != 0 {
108 | 		return cmp
109 | 	}
110 | 	return bytes.Compare(key1[len(key1)-8:], key2[len(key2)-8:])
111 | }
112 | 
113 | // VerifyChecksum crc32
114 | func VerifyChecksum(data []byte, expected []byte) error {
115 | 	actual := uint64(crc32.Checksum(data, CastagnoliCrcTable))
116 | 	expectedU64 := BytesToU64(expected)
117 | 	if actual != expectedU64 {
118 | 		return errors.Wrapf(ErrChecksumMismatch, "actual: %d, expected: %d", actual, expectedU64)
119 | 	}
120 | 
121 | 	return nil
122 | }
123 | 
124 | // CalculateChecksum _
125 | func CalculateChecksum(data []byte) uint64 {
126 | 	return uint64(crc32.Checksum(data, CastagnoliCrcTable))
127 | }
128 | 
129 | // RemoveDir _
130 | func RemoveDir(dir string) {
131 | 	if err := os.RemoveAll(dir); err != nil {
132 | 		panic(err)
133 | 	}
134 | }
135 | 


--------------------------------------------------------------------------------
/utils/bloom_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | package utils
 15 | 
 16 | import (
 17 | 	"testing"
 18 | )
 19 | 
 20 | func (f Filter) String() string {
 21 | 	s := make([]byte, 8*len(f))
 22 | 	for i, x := range f {
 23 | 		for j := 0; j < 8; j++ {
 24 | 			if x&(1<<uint(j)) != 0 {
 25 | 				s[8*i+j] = '1'
 26 | 			} else {
 27 | 				s[8*i+j] = '.'
 28 | 			}
 29 | 		}
 30 | 	}
 31 | 	return string(s)
 32 | }
 33 | 
 34 | func TestSmallBloomFilter(t *testing.T) {
 35 | 	var hash []uint32
 36 | 	for _, word := range [][]byte{
 37 | 		[]byte("hello"),
 38 | 		[]byte("world"),
 39 | 	} {
 40 | 		hash = append(hash, Hash(word))
 41 | 	}
 42 | 
 43 | 	f := NewFilter(hash, 10)
 44 | 	got := f.String()
 45 | 	// The magic want string comes from running the C++ leveldb code's bloom_test.cc.
 46 | 	want := "1...1.........1.........1.....1...1...1.....1.........1.....1....11....."
 47 | 	if got != want {
 48 | 		t.Fatalf("bits:\ngot  %q\nwant %q", got, want)
 49 | 	}
 50 | 
 51 | 	m := map[string]bool{
 52 | 		"hello": true,
 53 | 		"world": true,
 54 | 		"x":     false,
 55 | 		"foo":   false,
 56 | 	}
 57 | 	for k, want := range m {
 58 | 		got := f.MayContainKey([]byte(k))
 59 | 		if got != want {
 60 | 			t.Errorf("MayContain: k=%q: got %v, want %v", k, got, want)
 61 | 		}
 62 | 	}
 63 | }
 64 | 
 65 | func TestBloomFilter(t *testing.T) {
 66 | 	nextLength := func(x int) int {
 67 | 		if x < 10 {
 68 | 			return x + 1
 69 | 		}
 70 | 		if x < 100 {
 71 | 			return x + 10
 72 | 		}
 73 | 		if x < 1000 {
 74 | 			return x + 100
 75 | 		}
 76 | 		return x + 1000
 77 | 	}
 78 | 	le32 := func(i int) []byte {
 79 | 		b := make([]byte, 4)
 80 | 		b[0] = uint8(uint32(i) >> 0)
 81 | 		b[1] = uint8(uint32(i) >> 8)
 82 | 		b[2] = uint8(uint32(i) >> 16)
 83 | 		b[3] = uint8(uint32(i) >> 24)
 84 | 		return b
 85 | 	}
 86 | 
 87 | 	nMediocreFilters, nGoodFilters := 0, 0
 88 | loop:
 89 | 	for length := 1; length <= 10000; length = nextLength(length) {
 90 | 		keys := make([][]byte, 0, length)
 91 | 		for i := 0; i < length; i++ {
 92 | 			keys = append(keys, le32(i))
 93 | 		}
 94 | 		var hashes []uint32
 95 | 		for _, key := range keys {
 96 | 			hashes = append(hashes, Hash(key))
 97 | 		}
 98 | 		f := NewFilter(hashes, 10)
 99 | 
100 | 		if len(f) > (length*10/8)+40 {
101 | 			t.Errorf("length=%d: len(f)=%d is too large", length, len(f))
102 | 			continue
103 | 		}
104 | 
105 | 		// All added keys must match.
106 | 		for _, key := range keys {
107 | 			if !f.MayContainKey(key) {
108 | 				t.Errorf("length=%d: did not contain key %q", length, key)
109 | 				continue loop
110 | 			}
111 | 		}
112 | 
113 | 		// Check false positive rate.
114 | 		nFalsePositive := 0
115 | 		for i := 0; i < 10000; i++ {
116 | 			if f.MayContainKey(le32(1e9 + i)) {
117 | 				nFalsePositive++
118 | 			}
119 | 		}
120 | 		if nFalsePositive > 0.02*10000 {
121 | 			t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive)
122 | 			continue
123 | 		}
124 | 		if nFalsePositive > 0.0125*10000 {
125 | 			nMediocreFilters++
126 | 		} else {
127 | 			nGoodFilters++
128 | 		}
129 | 	}
130 | 
131 | 	if nMediocreFilters > nGoodFilters/5 {
132 | 		t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters)
133 | 	}
134 | }
135 | 
136 | func TestHash(t *testing.T) {
137 | 	// The magic want numbers come from running the C++ leveldb code in hash.cc.
138 | 	testCases := []struct {
139 | 		s    string
140 | 		want uint32
141 | 	}{
142 | 		{"", 0xbc9f1d34},
143 | 		{"g", 0xd04a8bda},
144 | 		{"go", 0x3e0b0745},
145 | 		{"gop", 0x0c326610},
146 | 		{"goph", 0x8c9d6390},
147 | 		{"gophe", 0x9bfd4b0a},
148 | 		{"gopher", 0xa78edc7c},
149 | 		{"I had a dream it would end this way.", 0xe14a9db9},
150 | 	}
151 | 	for _, tc := range testCases {
152 | 		if got := Hash([]byte(tc.s)); got != tc.want {
153 | 			t.Errorf("s=%q: got 0x%08x, want 0x%08x", tc.s, got, tc.want)
154 | 		}
155 | 	}
156 | }
157 | 


--------------------------------------------------------------------------------
/utils/value.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package utils
 16 | 
 17 | import (
 18 | 	"encoding/binary"
 19 | 	"reflect"
 20 | 	"time"
 21 | 	"unsafe"
 22 | )
 23 | 
 24 | const (
 25 | 	// size of vlog header.
 26 | 	// +----------------+------------------+
 27 | 	// | keyID(8 bytes) |  baseIV(12 bytes)|
 28 | 	// +----------------+------------------+
 29 | 	ValueLogHeaderSize = 20
 30 | 	vptrSize           = unsafe.Sizeof(ValuePtr{})
 31 | )
 32 | 
 33 | type ValuePtr struct {
 34 | 	Len    uint32
 35 | 	Offset uint32
 36 | 	Fid    uint32
 37 | }
 38 | 
 39 | func (p ValuePtr) Less(o *ValuePtr) bool {
 40 | 	if o == nil {
 41 | 		return false
 42 | 	}
 43 | 	if p.Fid != o.Fid {
 44 | 		return p.Fid < o.Fid
 45 | 	}
 46 | 	if p.Offset != o.Offset {
 47 | 		return p.Offset < o.Offset
 48 | 	}
 49 | 	return p.Len < o.Len
 50 | }
 51 | 
 52 | func (p ValuePtr) IsZero() bool {
 53 | 	return p.Fid == 0 && p.Offset == 0 && p.Len == 0
 54 | }
 55 | 
 56 | // Encode encodes Pointer into byte buffer.
 57 | func (p ValuePtr) Encode() []byte {
 58 | 	b := make([]byte, vptrSize)
 59 | 	// Copy over the content from p to b.
 60 | 	*(*ValuePtr)(unsafe.Pointer(&b[0])) = p
 61 | 	return b
 62 | }
 63 | 
 64 | // Decode decodes the value pointer into the provided byte buffer.
 65 | func (p *ValuePtr) Decode(b []byte) {
 66 | 	// Copy over data from b into p. Using *p=unsafe.pointer(...) leads to
 67 | 	copy(((*[vptrSize]byte)(unsafe.Pointer(p))[:]), b[:vptrSize])
 68 | }
 69 | func IsValuePtr(e *Entry) bool {
 70 | 	return e.Meta&BitValuePointer > 0
 71 | }
 72 | 
 73 | // BytesToU32 converts the given byte slice to uint32
 74 | func BytesToU32(b []byte) uint32 {
 75 | 	return binary.BigEndian.Uint32(b)
 76 | }
 77 | 
 78 | // BytesToU64 _
 79 | func BytesToU64(b []byte) uint64 {
 80 | 	return binary.BigEndian.Uint64(b)
 81 | }
 82 | 
 83 | // U32SliceToBytes converts the given Uint32 slice to byte slice
 84 | func U32SliceToBytes(u32s []uint32) []byte {
 85 | 	if len(u32s) == 0 {
 86 | 		return nil
 87 | 	}
 88 | 	var b []byte
 89 | 	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
 90 | 	hdr.Len = len(u32s) * 4
 91 | 	hdr.Cap = hdr.Len
 92 | 	hdr.Data = uintptr(unsafe.Pointer(&u32s[0]))
 93 | 	return b
 94 | }
 95 | 
 96 | // U32ToBytes converts the given Uint32 to bytes
 97 | func U32ToBytes(v uint32) []byte {
 98 | 	var uBuf [4]byte
 99 | 	binary.BigEndian.PutUint32(uBuf[:], v)
100 | 	return uBuf[:]
101 | }
102 | 
103 | // U64ToBytes converts the given Uint64 to bytes
104 | func U64ToBytes(v uint64) []byte {
105 | 	var uBuf [8]byte
106 | 	binary.BigEndian.PutUint64(uBuf[:], v)
107 | 	return uBuf[:]
108 | }
109 | 
110 | // BytesToU32Slice converts the given byte slice to uint32 slice
111 | func BytesToU32Slice(b []byte) []uint32 {
112 | 	if len(b) == 0 {
113 | 		return nil
114 | 	}
115 | 	var u32s []uint32
116 | 	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&u32s))
117 | 	hdr.Len = len(b) / 4
118 | 	hdr.Cap = hdr.Len
119 | 	hdr.Data = uintptr(unsafe.Pointer(&b[0]))
120 | 	return u32s
121 | }
122 | 
123 | // ValuePtrCodec _
124 | func ValuePtrCodec(vp *ValuePtr) []byte {
125 | 	return []byte{}
126 | }
127 | 
128 | // RunCallback _
129 | func RunCallback(cb func()) {
130 | 	if cb != nil {
131 | 		cb()
132 | 	}
133 | }
134 | 
135 | func IsDeletedOrExpired(meta byte, expiresAt uint64) bool {
136 | 	if meta&BitDelete > 0 {
137 | 		return true
138 | 	}
139 | 	if expiresAt == 0 {
140 | 		return false
141 | 	}
142 | 	return expiresAt <= uint64(time.Now().Unix())
143 | }
144 | 
145 | func DiscardEntry(e, vs *Entry) bool {
146 | 	// TODO 版本这个信息应该被弱化掉 在后面上MVCC或者多版本查询的时候再考虑
147 | 	// if vs.Version != ParseTs(e.Key) {
148 | 	// 	// Version not found. Discard.
149 | 	// 	return true
150 | 	// }
151 | 	if IsDeletedOrExpired(vs.Meta, vs.ExpiresAt) {
152 | 		return true
153 | 	}
154 | 	if (vs.Meta & BitValuePointer) == 0 {
155 | 		// Key also stores the value in LSM. Discard.
156 | 		return true
157 | 	}
158 | 	return false
159 | }
160 | 


--------------------------------------------------------------------------------
/vlog_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | package corekv
 15 | 
 16 | import (
 17 | 	"bytes"
 18 | 	"math/rand"
 19 | 	"os"
 20 | 	"testing"
 21 | 
 22 | 	"github.com/hardcore-os/corekv/utils"
 23 | 	"github.com/stretchr/testify/require"
 24 | )
 25 | 
 26 | var (
 27 | 	// 初始化opt
 28 | 	opt = &Options{
 29 | 		WorkDir:          "./work_test",
 30 | 		SSTableMaxSz:     1 << 10,
 31 | 		MemTableSize:     1 << 10,
 32 | 		ValueLogFileSize: 1 << 20,
 33 | 		ValueThreshold:   0,
 34 | 		MaxBatchCount:    10,
 35 | 		MaxBatchSize:     1 << 20,
 36 | 	}
 37 | )
 38 | 
 39 | func TestVlogBase(t *testing.T) {
 40 | 	// 清理目录
 41 | 	clearDir()
 42 | 	// 打开DB
 43 | 	db := Open(opt)
 44 | 	defer db.Close()
 45 | 	log := db.vlog
 46 | 	var err error
 47 | 	// 创建一个简单的kv entry对象
 48 | 	const val1 = "sampleval012345678901234567890123"
 49 | 	const val2 = "samplevalb012345678901234567890123"
 50 | 	require.True(t, int64(len(val1)) >= db.opt.ValueThreshold)
 51 | 
 52 | 	e1 := &utils.Entry{
 53 | 		Key:   []byte("samplekey"),
 54 | 		Value: []byte(val1),
 55 | 		Meta:  utils.BitValuePointer,
 56 | 	}
 57 | 	e2 := &utils.Entry{
 58 | 		Key:   []byte("samplekeyb"),
 59 | 		Value: []byte(val2),
 60 | 		Meta:  utils.BitValuePointer,
 61 | 	}
 62 | 
 63 | 	// 构建一个批量请求的request
 64 | 	b := new(request)
 65 | 	b.Entries = []*utils.Entry{e1, e2}
 66 | 
 67 | 	// 直接写入vlog中
 68 | 	log.write([]*request{b})
 69 | 	require.Len(t, b.Ptrs, 2)
 70 | 	t.Logf("Pointer written: %+v %+v\n", b.Ptrs[0], b.Ptrs[1])
 71 | 
 72 | 	// 从vlog中使用 value ptr指针中查询写入的分段vlog文件
 73 | 	buf1, lf1, err1 := log.readValueBytes(b.Ptrs[0])
 74 | 	buf2, lf2, err2 := log.readValueBytes(b.Ptrs[1])
 75 | 	require.NoError(t, err1)
 76 | 	require.NoError(t, err2)
 77 | 	// 关闭会调的锁
 78 | 	defer utils.RunCallback(log.getUnlockCallback(lf1))
 79 | 	defer utils.RunCallback((log.getUnlockCallback(lf2)))
 80 | 	e1, err = lf1.DecodeEntry(buf1, b.Ptrs[0].Offset)
 81 | 	require.NoError(t, err)
 82 | 	// 从vlog文件中通过指指针反序列化回 entry对象
 83 | 	e2, err = lf1.DecodeEntry(buf2, b.Ptrs[1].Offset)
 84 | 	require.NoError(t, err)
 85 | 
 86 | 	// 比较entry对象是否相等
 87 | 	readEntries := []utils.Entry{*e1, *e2}
 88 | 	require.EqualValues(t, []utils.Entry{
 89 | 		{
 90 | 			Key:    []byte("samplekey"),
 91 | 			Value:  []byte(val1),
 92 | 			Meta:   utils.BitValuePointer,
 93 | 			Offset: b.Ptrs[0].Offset,
 94 | 		},
 95 | 		{
 96 | 			Key:    []byte("samplekeyb"),
 97 | 			Value:  []byte(val2),
 98 | 			Meta:   utils.BitValuePointer,
 99 | 			Offset: b.Ptrs[1].Offset,
100 | 		},
101 | 	}, readEntries)
102 | }
103 | 
104 | func clearDir() {
105 | 	_, err := os.Stat(opt.WorkDir)
106 | 	if err == nil {
107 | 		os.RemoveAll(opt.WorkDir)
108 | 	}
109 | 	os.Mkdir(opt.WorkDir, os.ModePerm)
110 | }
111 | 
112 | func TestValueGC(t *testing.T) {
113 | 	clearDir()
114 | 	opt.ValueLogFileSize = 1 << 20
115 | 	kv := Open(opt)
116 | 	defer kv.Close()
117 | 	sz := 32 << 10
118 | 	kvList := []*utils.Entry{}
119 | 	for i := 0; i < 100; i++ {
120 | 		e := newRandEntry(sz)
121 | 		kvList = append(kvList, &utils.Entry{
122 | 			Key:       e.Key,
123 | 			Value:     e.Value,
124 | 			Meta:      e.Meta,
125 | 			ExpiresAt: e.ExpiresAt,
126 | 		})
127 | 		require.NoError(t, kv.Set(e))
128 | 	}
129 | 	kv.RunValueLogGC(0.9)
130 | 	for _, e := range kvList {
131 | 		item, err := kv.Get(e.Key)
132 | 		require.NoError(t, err)
133 | 		val := getItemValue(t, item)
134 | 		require.NotNil(t, val)
135 | 		require.True(t, bytes.Equal(item.Key, e.Key), "key not equal: e:%s, v:%s", e.Key, item.Key)
136 | 		require.True(t, bytes.Equal(item.Value, e.Value), "value not equal: e:%s, v:%s", e.Value, item.Key)
137 | 	}
138 | }
139 | 
140 | func newRandEntry(sz int) *utils.Entry {
141 | 	v := make([]byte, sz)
142 | 	rand.Read(v[:rand.Intn(sz)])
143 | 	e := utils.BuildEntry()
144 | 	e.Value = v
145 | 	return e
146 | }
147 | func getItemValue(t *testing.T, item *utils.Entry) (val []byte) {
148 | 	t.Helper()
149 | 	if item == nil {
150 | 		return nil
151 | 	}
152 | 	var v []byte
153 | 	v = append(v, item.Value...)
154 | 	if v == nil {
155 | 		return nil
156 | 	}
157 | 	return v
158 | }
159 | 


--------------------------------------------------------------------------------
/utils/skiplist_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package utils
 16 | 
 17 | import (
 18 | 	"fmt"
 19 | 	"strconv"
 20 | 	"strings"
 21 | 	"sync"
 22 | 	"testing"
 23 | 
 24 | 	"github.com/stretchr/testify/assert"
 25 | 	"github.com/stretchr/testify/require"
 26 | )
 27 | 
 28 | func RandString(len int) string {
 29 | 	bytes := make([]byte, len)
 30 | 	for i := 0; i < len; i++ {
 31 | 		b := r.Intn(26) + 65
 32 | 		bytes[i] = byte(b)
 33 | 	}
 34 | 	return string(bytes)
 35 | }
 36 | 
 37 | func TestSkipListBasicCRUD(t *testing.T) {
 38 | 	list := NewSkiplist(1000)
 39 | 
 40 | 	//Put & Get
 41 | 	entry1 := NewEntry([]byte(RandString(10)), []byte("Val1"))
 42 | 	list.Add(entry1)
 43 | 	vs := list.Search(entry1.Key)
 44 | 	assert.Equal(t, entry1.Value, vs.Value)
 45 | 
 46 | 	entry2 := NewEntry([]byte(RandString(10)), []byte("Val2"))
 47 | 	list.Add(entry2)
 48 | 	vs = list.Search(entry2.Key)
 49 | 	assert.Equal(t, entry2.Value, vs.Value)
 50 | 
 51 | 	//Get a not exist entry
 52 | 	assert.Nil(t, list.Search([]byte(RandString(10))).Value)
 53 | 
 54 | 	//Update a entry
 55 | 	entry2_new := NewEntry(entry1.Key, []byte("Val1+1"))
 56 | 	list.Add(entry2_new)
 57 | 	assert.Equal(t, entry2_new.Value, list.Search(entry2_new.Key).Value)
 58 | }
 59 | 
 60 | func Benchmark_SkipListBasicCRUD(b *testing.B) {
 61 | 	list := NewSkiplist(100000000)
 62 | 	key, val := "", ""
 63 | 	maxTime := 1000
 64 | 	for i := 0; i < maxTime; i++ {
 65 | 		//number := rand.Intn(10000)
 66 | 		key, val = RandString(10), fmt.Sprintf("Val%d", i)
 67 | 		entry := NewEntry([]byte(key), []byte(val))
 68 | 		list.Add(entry)
 69 | 		searchVal := list.Search([]byte(key))
 70 | 		assert.Equal(b, searchVal.Value, []byte(val))
 71 | 	}
 72 | }
 73 | 
 74 | func TestDrawList(t *testing.T) {
 75 | 	list := NewSkiplist(1000)
 76 | 	n := 12
 77 | 	for i:=0; i<n; i++ {
 78 | 		index := strconv.Itoa(r.Intn(90)+10)
 79 | 		key := index + RandString(8)
 80 | 		entryRand := NewEntry([]byte(key), []byte(index))
 81 | 		list.Add(entryRand)
 82 | 	}
 83 | 	list.Draw(true)
 84 | 	fmt.Println(strings.Repeat("*", 30) + "分割线" + strings.Repeat("*", 30))
 85 | 	list.Draw(false)
 86 | }
 87 | 
 88 | func TestConcurrentBasic(t *testing.T) {
 89 | 	const n = 1000
 90 | 	l := NewSkiplist(100000000)
 91 | 	var wg sync.WaitGroup
 92 | 	key := func(i int) []byte {
 93 | 		return []byte(fmt.Sprintf("Keykeykey%05d", i))
 94 | 	}
 95 | 	for i := 0; i < n; i++ {
 96 | 		wg.Add(1)
 97 | 		go func(i int) {
 98 | 			defer wg.Done()
 99 | 			l.Add(NewEntry(key(i), key(i)))
100 | 		}(i)
101 | 	}
102 | 	wg.Wait()
103 | 
104 | 	// Check values. Concurrent reads.
105 | 	for i := 0; i < n; i++ {
106 | 		wg.Add(1)
107 | 		go func(i int) {
108 | 			defer wg.Done()
109 | 			v := l.Search(key(i))
110 | 			require.EqualValues(t, key(i), v.Value)
111 | 			return
112 | 
113 | 			require.Nil(t, v)
114 | 		}(i)
115 | 	}
116 | 	wg.Wait()
117 | }
118 | 
119 | func Benchmark_ConcurrentBasic(b *testing.B) {
120 | 	const n = 1000
121 | 	l := NewSkiplist(100000000)
122 | 	var wg sync.WaitGroup
123 | 	key := func(i int) []byte {
124 | 		return []byte(fmt.Sprintf("keykeykey%05d", i))
125 | 	}
126 | 	for i := 0; i < n; i++ {
127 | 		wg.Add(1)
128 | 		go func(i int) {
129 | 			defer wg.Done()
130 | 			l.Add(NewEntry(key(i), key(i)))
131 | 		}(i)
132 | 	}
133 | 	wg.Wait()
134 | 
135 | 	// Check values. Concurrent reads.
136 | 	for i := 0; i < n; i++ {
137 | 		wg.Add(1)
138 | 		go func(i int) {
139 | 			defer wg.Done()
140 | 			v := l.Search(key(i))
141 | 			require.EqualValues(b, key(i), v.Value)
142 | 			require.NotNil(b, v)
143 | 		}(i)
144 | 	}
145 | 	wg.Wait()
146 | }
147 | 
148 | func TestSkipListIterator(t *testing.T) {
149 | 	list := NewSkiplist(100000)
150 | 
151 | 	//Put & Get
152 | 	entry1 := NewEntry([]byte(RandString(10)), []byte(RandString(10)))
153 | 	list.Add(entry1)
154 | 	assert.Equal(t, entry1.Value, list.Search(entry1.Key).Value)
155 | 
156 | 	entry2 := NewEntry([]byte(RandString(10)), []byte(RandString(10)))
157 | 	list.Add(entry2)
158 | 	assert.Equal(t, entry2.Value, list.Search(entry2.Key).Value)
159 | 
160 | 	//Update a entry
161 | 	entry2_new := NewEntry([]byte(RandString(10)), []byte(RandString(10)))
162 | 	list.Add(entry2_new)
163 | 	assert.Equal(t, entry2_new.Value, list.Search(entry2_new.Key).Value)
164 | 
165 | 	iter := list.NewSkipListIterator()
166 | 	for iter.Rewind(); iter.Valid(); iter.Next() {
167 | 		fmt.Printf("iter key %s, value %s", iter.Item().Entry().Key, iter.Item().Entry().Value)
168 | 	}
169 | }
170 | 


--------------------------------------------------------------------------------
/utils/entry.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package utils
 16 | 
 17 | import (
 18 | 	"encoding/binary"
 19 | 	"time"
 20 | )
 21 | 
 22 | type ValueStruct struct {
 23 | 	Meta      byte
 24 | 	Value     []byte
 25 | 	ExpiresAt uint64
 26 | 
 27 | 	Version uint64 // This field is not serialized. Only for internal usage.
 28 | }
 29 | 
 30 | // value只持久化具体的value值和过期时间
 31 | func (vs *ValueStruct) EncodedSize() uint32 {
 32 | 	sz := len(vs.Value) + 1 // meta
 33 | 	enc := sizeVarint(vs.ExpiresAt)
 34 | 	return uint32(sz + enc)
 35 | }
 36 | 
 37 | // DecodeValue
 38 | func (vs *ValueStruct) DecodeValue(buf []byte) {
 39 | 	vs.Meta = buf[0]
 40 | 	var sz int
 41 | 	vs.ExpiresAt, sz = binary.Uvarint(buf[1:])
 42 | 	vs.Value = buf[1+sz:]
 43 | }
 44 | 
 45 | //对value进行编码，并将编码后的字节写入byte
 46 | //这里将过期时间和value的值一起编码
 47 | func (vs *ValueStruct) EncodeValue(b []byte) uint32 {
 48 | 	b[0] = vs.Meta
 49 | 	sz := binary.PutUvarint(b[1:], vs.ExpiresAt)
 50 | 	n := copy(b[1+sz:], vs.Value)
 51 | 	return uint32(1 + sz + n)
 52 | }
 53 | 
 54 | func sizeVarint(x uint64) (n int) {
 55 | 	for {
 56 | 		n++
 57 | 		x >>= 7
 58 | 		if x == 0 {
 59 | 			break
 60 | 		}
 61 | 	}
 62 | 	return n
 63 | }
 64 | 
 65 | //Entry _ 最外层写入的结构体
 66 | type Entry struct {
 67 | 	Key       []byte
 68 | 	Value     []byte
 69 | 	ExpiresAt uint64
 70 | 
 71 | 	Meta         byte
 72 | 	Version      uint64
 73 | 	Offset       uint32
 74 | 	Hlen         int // Length of the header.
 75 | 	ValThreshold int64
 76 | }
 77 | 
 78 | // NewEntry_
 79 | func NewEntry(key, value []byte) *Entry {
 80 | 	return &Entry{
 81 | 		Key:   key,
 82 | 		Value: value,
 83 | 	}
 84 | }
 85 | 
 86 | // Entry_
 87 | func (e *Entry) Entry() *Entry {
 88 | 	return e
 89 | }
 90 | 
 91 | func (e *Entry) IsDeletedOrExpired() bool {
 92 | 	if e.Value == nil {
 93 | 		return true
 94 | 	}
 95 | 
 96 | 	if e.ExpiresAt == 0 {
 97 | 		return false
 98 | 	}
 99 | 
100 | 	return e.ExpiresAt <= uint64(time.Now().Unix())
101 | }
102 | 
103 | // WithTTL _
104 | func (e *Entry) WithTTL(dur time.Duration) *Entry {
105 | 	e.ExpiresAt = uint64(time.Now().Add(dur).Unix())
106 | 	return e
107 | }
108 | 
109 | // EncodedSize is the size of the ValueStruct when encoded
110 | func (e *Entry) EncodedSize() uint32 {
111 | 	sz := len(e.Value)
112 | 	enc := sizeVarint(uint64(e.Meta))
113 | 	enc += sizeVarint(e.ExpiresAt)
114 | 	return uint32(sz + enc)
115 | }
116 | 
117 | // EstimateSize
118 | func (e *Entry) EstimateSize(threshold int) int {
119 | 	// TODO: 是否考虑 user meta?
120 | 	if len(e.Value) < threshold {
121 | 		return len(e.Key) + len(e.Value) + 1 // Meta
122 | 	}
123 | 	return len(e.Key) + 12 + 1 // 12 for ValuePointer, 2 for meta.
124 | }
125 | 
126 | // header 对象
127 | // header is used in value log as a header before Entry.
128 | type Header struct {
129 | 	KLen      uint32
130 | 	VLen      uint32
131 | 	ExpiresAt uint64
132 | 	Meta      byte
133 | }
134 | 
135 | // +------+----------+------------+--------------+-----------+
136 | // | Meta | UserMeta | Key Length | Value Length | ExpiresAt |
137 | // +------+----------+------------+--------------+-----------+
138 | func (h Header) Encode(out []byte) int {
139 | 	out[0] = h.Meta
140 | 	index := 1
141 | 	index += binary.PutUvarint(out[index:], uint64(h.KLen))
142 | 	index += binary.PutUvarint(out[index:], uint64(h.VLen))
143 | 	index += binary.PutUvarint(out[index:], h.ExpiresAt)
144 | 	return index
145 | }
146 | 
147 | // Decode decodes the given header from the provided byte slice.
148 | // Returns the number of bytes read.
149 | func (h *Header) Decode(buf []byte) int {
150 | 	h.Meta = buf[0]
151 | 	index := 1
152 | 	klen, count := binary.Uvarint(buf[index:])
153 | 	h.KLen = uint32(klen)
154 | 	index += count
155 | 	vlen, count := binary.Uvarint(buf[index:])
156 | 	h.VLen = uint32(vlen)
157 | 	index += count
158 | 	h.ExpiresAt, count = binary.Uvarint(buf[index:])
159 | 	return index + count
160 | }
161 | 
162 | // DecodeFrom reads the header from the hashReader.
163 | // Returns the number of bytes read.
164 | func (h *Header) DecodeFrom(reader *HashReader) (int, error) {
165 | 	var err error
166 | 	h.Meta, err = reader.ReadByte()
167 | 	if err != nil {
168 | 		return 0, err
169 | 	}
170 | 	klen, err := binary.ReadUvarint(reader)
171 | 	if err != nil {
172 | 		return 0, err
173 | 	}
174 | 	h.KLen = uint32(klen)
175 | 	vlen, err := binary.ReadUvarint(reader)
176 | 	if err != nil {
177 | 		return 0, err
178 | 	}
179 | 	h.VLen = uint32(vlen)
180 | 	h.ExpiresAt, err = binary.ReadUvarint(reader)
181 | 	if err != nil {
182 | 		return 0, err
183 | 	}
184 | 	return reader.BytesRead, nil
185 | }
186 | 


--------------------------------------------------------------------------------
/utils/cache/bloom.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package cache
 16 | 
 17 | import "math"
 18 | 
 19 | // Filter is an encoded set of []byte keys.
 20 | type Filter []byte
 21 | 
 22 | type BloomFilter struct {
 23 | 	bitmap Filter
 24 | 	k      uint8
 25 | }
 26 | 
 27 | // MayContainKey _
 28 | func (f *BloomFilter) MayContainKey(k []byte) bool {
 29 | 	return f.MayContain(Hash(k))
 30 | }
 31 | 
 32 | // MayContain returns whether the filter may contain given key. False positives
 33 | // are possible, where it returns true for keys not in the original set.
 34 | func (f *BloomFilter) MayContain(h uint32) bool {
 35 | 	if f.Len() < 2 {
 36 | 		return false
 37 | 	}
 38 | 	k := f.k
 39 | 	if k > 30 {
 40 | 		// This is reserved for potentially new encodings for short Bloom filters.
 41 | 		// Consider it a match.
 42 | 		return true
 43 | 	}
 44 | 	nBits := uint32(8 * (f.Len() - 1))
 45 | 	delta := h>>17 | h<<15
 46 | 	for j := uint8(0); j < k; j++ {
 47 | 		bitPos := h % nBits
 48 | 		if f.bitmap[bitPos/8]&(1<<(bitPos%8)) == 0 {
 49 | 			return false
 50 | 		}
 51 | 		h += delta
 52 | 	}
 53 | 	return true
 54 | }
 55 | 
 56 | func (f *BloomFilter) Len() int32 {
 57 | 	return int32(len(f.bitmap))
 58 | }
 59 | 
 60 | func (f *BloomFilter) InsertKey(k []byte) bool {
 61 | 	return f.Insert(Hash(k))
 62 | }
 63 | 
 64 | func (f *BloomFilter) Insert(h uint32) bool {
 65 | 	k := f.k
 66 | 	if k > 30 {
 67 | 		// This is reserved for potentially new encodings for short Bloom filters.
 68 | 		// Consider it a match.
 69 | 		return true
 70 | 	}
 71 | 	nBits := uint32(8 * (f.Len() - 1))
 72 | 	delta := h>>17 | h<<15
 73 | 	for j := uint8(0); j < k; j++ {
 74 | 		bitPos := h % uint32(nBits)
 75 | 		f.bitmap[bitPos/8] |= 1 << (bitPos % 8)
 76 | 		h += delta
 77 | 	}
 78 | 	return true
 79 | }
 80 | 
 81 | func (f *BloomFilter) AllowKey(k []byte) bool {
 82 | 	if f == nil {
 83 | 		return true
 84 | 	}
 85 | 	already := f.MayContainKey(k)
 86 | 	if !already {
 87 | 		f.InsertKey(k)
 88 | 	}
 89 | 	return already
 90 | }
 91 | 
 92 | func (f *BloomFilter) Allow(h uint32) bool {
 93 | 	if f == nil {
 94 | 		return true
 95 | 	}
 96 | 	already := f.MayContain(h)
 97 | 	if !already {
 98 | 		f.Insert(h)
 99 | 	}
100 | 	return already
101 | }
102 | 
103 | func (f *BloomFilter) reset() {
104 | 	if f == nil {
105 | 		return
106 | 	}
107 | 	for i := range f.bitmap {
108 | 		f.bitmap[i] = 0
109 | 	}
110 | }
111 | 
112 | // NewFilter returns a new Bloom filter that encodes a set of []byte keys with
113 | // the given number of bits per key, approximately.
114 | //
115 | // A good bitsPerKey value is 10, which yields a filter with ~ 1% false
116 | // positive rate.
117 | func newFilter(numEntries int, falsePositive float64) *BloomFilter {
118 | 	bitsPerKey := bloomBitsPerKey(numEntries, falsePositive)
119 | 	return initFilter(numEntries, bitsPerKey)
120 | }
121 | 
122 | // BloomBitsPerKey returns the bits per key required by bloomfilter based on
123 | // the false positive rate.
124 | func bloomBitsPerKey(numEntries int, fp float64) int {
125 | 	size := -1 * float64(numEntries) * math.Log(fp) / math.Pow(float64(0.69314718056), 2)
126 | 	locs := math.Ceil(size / float64(numEntries))
127 | 	return int(locs)
128 | }
129 | 
130 | func initFilter(numEntries int, bitsPerKey int) *BloomFilter {
131 | 	bf := &BloomFilter{}
132 | 	if bitsPerKey < 0 {
133 | 		bitsPerKey = 0
134 | 	}
135 | 	// 0.69 is approximately ln(2).
136 | 	k := uint32(float64(bitsPerKey) * 0.69)
137 | 	if k < 1 {
138 | 		k = 1
139 | 	}
140 | 	if k > 30 {
141 | 		k = 30
142 | 	}
143 | 	bf.k = uint8(k)
144 | 
145 | 	nBits := numEntries * int(bitsPerKey)
146 | 	// For small len(keys), we can see a very high false positive rate. Fix it
147 | 	// by enforcing a minimum bloom filter length.
148 | 	if nBits < 64 {
149 | 		nBits = 64
150 | 	}
151 | 	nBytes := (nBits + 7) / 8
152 | 	nBits = nBytes * 8
153 | 	filter := make([]byte, nBytes+1)
154 | 
155 | 	//record the K value of this Bloom Filter
156 | 	filter[nBytes] = uint8(k)
157 | 
158 | 	bf.bitmap = filter
159 | 	return bf
160 | }
161 | 
162 | // Hash implements a hashing algorithm similar to the Murmur hash.
163 | func Hash(b []byte) uint32 {
164 | 	const (
165 | 		seed = 0xbc9f1d34
166 | 		m    = 0xc6a4a793
167 | 	)
168 | 	h := uint32(seed) ^ uint32(len(b))*m
169 | 	for ; len(b) >= 4; b = b[4:] {
170 | 		h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
171 | 		h *= m
172 | 		h ^= h >> 16
173 | 	}
174 | 	switch len(b) {
175 | 	case 3:
176 | 		h += uint32(b[2]) << 16
177 | 		fallthrough
178 | 	case 2:
179 | 		h += uint32(b[1]) << 8
180 | 		fallthrough
181 | 	case 1:
182 | 		h += uint32(b[0])
183 | 		h *= m
184 | 		h ^= h >> 24
185 | 	}
186 | 	return h
187 | }
188 | 


--------------------------------------------------------------------------------
/file/wal.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package file
 16 | 
 17 | import (
 18 | 	"bufio"
 19 | 	"bytes"
 20 | 	"fmt"
 21 | 	"hash/crc32"
 22 | 	"io"
 23 | 	"os"
 24 | 	"sync"
 25 | 
 26 | 	"github.com/hardcore-os/corekv/utils"
 27 | 	"github.com/pkg/errors"
 28 | )
 29 | 
 30 | // WalFile _
 31 | type WalFile struct {
 32 | 	lock    *sync.RWMutex
 33 | 	f       *MmapFile
 34 | 	opts    *Options
 35 | 	buf     *bytes.Buffer
 36 | 	size    uint32
 37 | 	writeAt uint32
 38 | }
 39 | 
 40 | // Fid _
 41 | func (wf *WalFile) Fid() uint64 {
 42 | 	return wf.opts.FID
 43 | }
 44 | 
 45 | // Close _
 46 | func (wf *WalFile) Close() error {
 47 | 	fileName := wf.f.Fd.Name()
 48 | 	if err := wf.f.Close(); err != nil {
 49 | 		return err
 50 | 	}
 51 | 	return os.Remove(fileName)
 52 | }
 53 | 
 54 | // Name _
 55 | func (wf *WalFile) Name() string {
 56 | 	return wf.f.Fd.Name()
 57 | }
 58 | 
 59 | // Size 当前已经被写入的数据
 60 | func (wf *WalFile) Size() uint32 {
 61 | 	return wf.writeAt
 62 | }
 63 | 
 64 | // OpenWalFile _
 65 | func OpenWalFile(opt *Options) *WalFile {
 66 | 	omf, err := OpenMmapFile(opt.FileName, os.O_CREATE|os.O_RDWR, opt.MaxSz)
 67 | 	wf := &WalFile{f: omf, lock: &sync.RWMutex{}, opts: opt}
 68 | 	wf.buf = &bytes.Buffer{}
 69 | 	wf.size = uint32(len(wf.f.Data))
 70 | 	utils.Err(err)
 71 | 	return wf
 72 | }
 73 | 
 74 | func (wf *WalFile) Write(entry *utils.Entry) error {
 75 | 	// 落预写日志简单的同步写即可
 76 | 	// 序列化为磁盘结构
 77 | 	wf.lock.Lock()
 78 | 	plen := utils.WalCodec(wf.buf, entry)
 79 | 	buf := wf.buf.Bytes()
 80 | 	utils.Panic(wf.f.AppendBuffer(wf.writeAt, buf))
 81 | 	wf.writeAt += uint32(plen)
 82 | 	wf.lock.Unlock()
 83 | 	return nil
 84 | }
 85 | 
 86 | // Iterate 从磁盘中遍历wal，获得数据
 87 | func (wf *WalFile) Iterate(readOnly bool, offset uint32, fn utils.LogEntry) (uint32, error) {
 88 | 	// For now, read directly from file, because it allows
 89 | 	reader := bufio.NewReader(wf.f.NewReader(int(offset)))
 90 | 	read := SafeRead{
 91 | 		K:            make([]byte, 10),
 92 | 		V:            make([]byte, 10),
 93 | 		RecordOffset: offset,
 94 | 		LF:           wf,
 95 | 	}
 96 | 	var validEndOffset uint32 = offset
 97 | loop:
 98 | 	for {
 99 | 		e, err := read.MakeEntry(reader)
100 | 		switch {
101 | 		case err == io.EOF:
102 | 			break loop
103 | 		case err == io.ErrUnexpectedEOF || err == utils.ErrTruncate:
104 | 			break loop
105 | 		case err != nil:
106 | 			return 0, err
107 | 		case e.IsZero():
108 | 			break loop
109 | 		}
110 | 
111 | 		var vp utils.ValuePtr // 给kv分离的设计留下扩展,可以不用考虑其作用
112 | 		size := uint32(int(e.LogHeaderLen()) + len(e.Key) + len(e.Value) + crc32.Size)
113 | 		read.RecordOffset += size
114 | 		validEndOffset = read.RecordOffset
115 | 		if err := fn(e, &vp); err != nil {
116 | 			if err == utils.ErrStop {
117 | 				break
118 | 			}
119 | 			return 0, errors.WithMessage(err, "Iteration function")
120 | 		}
121 | 	}
122 | 	return validEndOffset, nil
123 | }
124 | 
125 | // Truncate _
126 | // TODO Truncate 函数
127 | func (wf *WalFile) Truncate(end int64) error {
128 | 	if end <= 0 {
129 | 		return nil
130 | 	}
131 | 	if fi, err := wf.f.Fd.Stat(); err != nil {
132 | 		return fmt.Errorf("while file.stat on file: %s, error: %v\n", wf.Name(), err)
133 | 	} else if fi.Size() == end {
134 | 		return nil
135 | 	}
136 | 	wf.size = uint32(end)
137 | 	return wf.f.Truncature(end)
138 | }
139 | 
140 | // 封装kv分离的读操作
141 | type SafeRead struct {
142 | 	K []byte
143 | 	V []byte
144 | 
145 | 	RecordOffset uint32
146 | 	LF           *WalFile
147 | }
148 | 
149 | // MakeEntry _
150 | func (r *SafeRead) MakeEntry(reader io.Reader) (*utils.Entry, error) {
151 | 	tee := utils.NewHashReader(reader)
152 | 	var h utils.WalHeader
153 | 	hlen, err := h.Decode(tee)
154 | 	if err != nil {
155 | 		return nil, err
156 | 	}
157 | 	if h.KeyLen > uint32(1<<16) { // Key length must be below uint16.
158 | 		return nil, utils.ErrTruncate
159 | 	}
160 | 	kl := int(h.KeyLen)
161 | 	if cap(r.K) < kl {
162 | 		r.K = make([]byte, 2*kl)
163 | 	}
164 | 	vl := int(h.ValueLen)
165 | 	if cap(r.V) < vl {
166 | 		r.V = make([]byte, 2*vl)
167 | 	}
168 | 
169 | 	e := &utils.Entry{}
170 | 	e.Offset = r.RecordOffset
171 | 	e.Hlen = hlen
172 | 	buf := make([]byte, h.KeyLen+h.ValueLen)
173 | 	if _, err := io.ReadFull(tee, buf[:]); err != nil {
174 | 		if err == io.EOF {
175 | 			err = utils.ErrTruncate
176 | 		}
177 | 		return nil, err
178 | 	}
179 | 	e.Key = buf[:h.KeyLen]
180 | 	e.Value = buf[h.KeyLen:]
181 | 	var crcBuf [crc32.Size]byte
182 | 	if _, err := io.ReadFull(reader, crcBuf[:]); err != nil {
183 | 		if err == io.EOF {
184 | 			err = utils.ErrTruncate
185 | 		}
186 | 		return nil, err
187 | 	}
188 | 	crc := utils.BytesToU32(crcBuf[:])
189 | 	if crc != tee.Sum32() {
190 | 		return nil, utils.ErrTruncate
191 | 	}
192 | 	e.ExpiresAt = h.ExpiresAt
193 | 	return e, nil
194 | }
195 | 


--------------------------------------------------------------------------------
/lsm/memtable.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package lsm
 16 | 
 17 | import (
 18 | 	"bytes"
 19 | 	"fmt"
 20 | 	"io/ioutil"
 21 | 	"os"
 22 | 	"path/filepath"
 23 | 	"sort"
 24 | 	"strconv"
 25 | 	"strings"
 26 | 	"sync/atomic"
 27 | 
 28 | 	"github.com/hardcore-os/corekv/file"
 29 | 	"github.com/hardcore-os/corekv/utils"
 30 | 	"github.com/pkg/errors"
 31 | )
 32 | 
 33 | const walFileExt string = ".wal"
 34 | 
 35 | // MemTable
 36 | type memTable struct {
 37 | 	lsm        *LSM
 38 | 	wal        *file.WalFile
 39 | 	sl         *utils.Skiplist
 40 | 	buf        *bytes.Buffer
 41 | 	maxVersion uint64
 42 | }
 43 | 
 44 | // NewMemtable _
 45 | func (lsm *LSM) NewMemtable() *memTable {
 46 | 	newFid := atomic.AddUint64(&(lsm.levels.maxFID), 1)
 47 | 	fileOpt := &file.Options{
 48 | 		Dir:      lsm.option.WorkDir,
 49 | 		Flag:     os.O_CREATE | os.O_RDWR,
 50 | 		MaxSz:    int(lsm.option.MemTableSize), //TODO wal 要设置多大比较合理？ 姑且跟sst一样大
 51 | 		FID:      newFid,
 52 | 		FileName: mtFilePath(lsm.option.WorkDir, newFid),
 53 | 	}
 54 | 	return &memTable{wal: file.OpenWalFile(fileOpt), sl: utils.NewSkiplist(int64(1 << 20)), lsm: lsm}
 55 | }
 56 | 
 57 | // Close
 58 | func (m *memTable) close() error {
 59 | 	if err := m.wal.Close(); err != nil {
 60 | 		return err
 61 | 	}
 62 | 
 63 | 	return nil
 64 | }
 65 | 
 66 | func (m *memTable) set(entry *utils.Entry) error {
 67 | 	// 写到wal 日志中，防止崩溃
 68 | 	if err := m.wal.Write(entry); err != nil {
 69 | 		return err
 70 | 	}
 71 | 	// 写到memtable中
 72 | 	m.sl.Add(entry)
 73 | 	return nil
 74 | }
 75 | 
 76 | func (m *memTable) Get(key []byte) (*utils.Entry, error) {
 77 | 	// 索引检查当前的key是否在表中 O(1) 的时间复杂度
 78 | 	// 从内存表中获取数据
 79 | 	vs := m.sl.Search(key)
 80 | 
 81 | 	e := &utils.Entry{
 82 | 		Key:       key,
 83 | 		Value:     vs.Value,
 84 | 		ExpiresAt: vs.ExpiresAt,
 85 | 		Meta:      vs.Meta,
 86 | 		Version:   vs.Version,
 87 | 	}
 88 | 
 89 | 	return e, nil
 90 | 
 91 | }
 92 | 
 93 | func (m *memTable) Size() int64 {
 94 | 	return m.sl.MemSize()
 95 | }
 96 | 
 97 | //recovery
 98 | func (lsm *LSM) recovery() (*memTable, []*memTable) {
 99 | 	// 从 工作目录中获取所有文件
100 | 	files, err := ioutil.ReadDir(lsm.option.WorkDir)
101 | 	if err != nil {
102 | 		utils.Panic(err)
103 | 		return nil, nil
104 | 	}
105 | 	var fids []uint64
106 | 	maxFid := lsm.levels.maxFID
107 | 	// 识别 后缀为.wal的文件
108 | 	for _, file := range files {
109 | 		if !strings.HasSuffix(file.Name(), walFileExt) {
110 | 			continue
111 | 		}
112 | 		fsz := len(file.Name())
113 | 		fid, err := strconv.ParseUint(file.Name()[:fsz-len(walFileExt)], 10, 64)
114 | 		// 考虑 wal文件的存在 更新maxFid
115 | 		if maxFid < fid {
116 | 			maxFid = fid
117 | 		}
118 | 		if err != nil {
119 | 			utils.Panic(err)
120 | 			return nil, nil
121 | 		}
122 | 		fids = append(fids, fid)
123 | 	}
124 | 	// 排序一下子
125 | 	sort.Slice(fids, func(i, j int) bool {
126 | 		return fids[i] < fids[j]
127 | 	})
128 | 	imms := []*memTable{}
129 | 	// 遍历fid 做处理
130 | 	for _, fid := range fids {
131 | 		mt, err := lsm.openMemTable(fid)
132 | 		utils.CondPanic(err != nil, err)
133 | 		if mt.sl.MemSize() == 0 {
134 | 			// mt.DecrRef()
135 | 			continue
136 | 		}
137 | 		// TODO 如果最后一个跳表没写满会怎么样？这不就浪费空间了吗
138 | 		imms = append(imms, mt)
139 | 	}
140 | 	// 更新最终的maxfid，初始化一定是串行执行的，因此不需要原子操作
141 | 	lsm.levels.maxFID = maxFid
142 | 	return lsm.NewMemtable(), imms
143 | }
144 | 
145 | func (lsm *LSM) openMemTable(fid uint64) (*memTable, error) {
146 | 	fileOpt := &file.Options{
147 | 		Dir:      lsm.option.WorkDir,
148 | 		Flag:     os.O_CREATE | os.O_RDWR,
149 | 		MaxSz:    int(lsm.option.MemTableSize),
150 | 		FID:      fid,
151 | 		FileName: mtFilePath(lsm.option.WorkDir, fid),
152 | 	}
153 | 	s := utils.NewSkiplist(int64(1 << 20))
154 | 	mt := &memTable{
155 | 		sl:  s,
156 | 		buf: &bytes.Buffer{},
157 | 		lsm: lsm,
158 | 	}
159 | 	mt.wal = file.OpenWalFile(fileOpt)
160 | 	err := mt.UpdateSkipList()
161 | 	utils.CondPanic(err != nil, errors.WithMessage(err, "while updating skiplist"))
162 | 	return mt, nil
163 | }
164 | func mtFilePath(dir string, fid uint64) string {
165 | 	return filepath.Join(dir, fmt.Sprintf("%05d%s", fid, walFileExt))
166 | }
167 | 
168 | func (m *memTable) UpdateSkipList() error {
169 | 	if m.wal == nil || m.sl == nil {
170 | 		return nil
171 | 	}
172 | 	endOff, err := m.wal.Iterate(true, 0, m.replayFunction(m.lsm.option))
173 | 	if err != nil {
174 | 		return errors.WithMessage(err, fmt.Sprintf("while iterating wal: %s", m.wal.Name()))
175 | 	}
176 | 	// if endOff < m.wal.Size() {
177 | 	// 	return errors.WithMessage(utils.ErrTruncate, fmt.Sprintf("end offset: %d < size: %d", endOff, m.wal.Size()))
178 | 	// }
179 | 	return m.wal.Truncate(int64(endOff))
180 | }
181 | 
182 | func (m *memTable) replayFunction(opt *Options) func(*utils.Entry, *utils.ValuePtr) error {
183 | 	return func(e *utils.Entry, _ *utils.ValuePtr) error { // Function for replaying.
184 | 		if ts := utils.ParseTs(e.Key); ts > m.maxVersion {
185 | 			m.maxVersion = ts
186 | 		}
187 | 		m.sl.Add(e)
188 | 		return nil
189 | 	}
190 | }
191 | 


--------------------------------------------------------------------------------
/file/sstable_linux.go:
--------------------------------------------------------------------------------
  1 | // +build linux
  2 | 
  3 | // Copyright 2021 hardcore-os Project Authors
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License")
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | // http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | package file
 18 | 
 19 | import (
 20 | 	"io"
 21 | 	"os"
 22 | 	"sync"
 23 | 	"syscall"
 24 | 	"time"
 25 | 
 26 | 	"github.com/golang/protobuf/proto"
 27 | 	"github.com/hardcore-os/corekv/pb"
 28 | 	"github.com/hardcore-os/corekv/utils"
 29 | 	"github.com/pkg/errors"
 30 | )
 31 | 
 32 | // SSTable 文件的内存封装
 33 | type SSTable struct {
 34 | 	lock           *sync.RWMutex
 35 | 	f              *MmapFile
 36 | 	maxKey         []byte
 37 | 	minKey         []byte
 38 | 	idxTables      *pb.TableIndex
 39 | 	hasBloomFilter bool
 40 | 	idxLen         int
 41 | 	idxStart       int
 42 | 	fid            uint64
 43 | 	createdAt      time.Time
 44 | }
 45 | 
 46 | // OpenSStable 打开一个 sst文件
 47 | func OpenSStable(opt *Options) *SSTable {
 48 | 	omf, err := OpenMmapFile(opt.FileName, os.O_CREATE|os.O_RDWR, opt.MaxSz)
 49 | 	utils.Err(err)
 50 | 	return &SSTable{f: omf, fid: opt.FID, lock: &sync.RWMutex{}}
 51 | }
 52 | 
 53 | // Init 初始化
 54 | func (ss *SSTable) Init() error {
 55 | 	var ko *pb.BlockOffset
 56 | 	var err error
 57 | 	if ko, err = ss.initTable(); err != nil {
 58 | 		return err
 59 | 	}
 60 | 	// 从文件中获取创建时间
 61 | 	stat, _ := ss.f.Fd.Stat()
 62 | 	statType := stat.Sys().(*syscall.Stat_t)
 63 | 	ss.createdAt = time.Unix(statType.Ctim.Sec, statType.Ctim.Nsec)
 64 | 	// init min key
 65 | 	keyBytes := ko.GetKey()
 66 | 	minKey := make([]byte, len(keyBytes))
 67 | 	copy(minKey, keyBytes)
 68 | 	ss.minKey = minKey
 69 | 	ss.maxKey = minKey
 70 | 	return nil
 71 | }
 72 | 
 73 | // SetMaxKey max 需要使用table的迭代器，来获取最后一个block的最后一个key
 74 | func (ss *SSTable) SetMaxKey(maxKey []byte) {
 75 | 	ss.maxKey = maxKey
 76 | }
 77 | func (ss *SSTable) initTable() (bo *pb.BlockOffset, err error) {
 78 | 	readPos := len(ss.f.Data)
 79 | 
 80 | 	// Read checksum len from the last 4 bytes.
 81 | 	readPos -= 4
 82 | 	buf := ss.readCheckError(readPos, 4)
 83 | 	checksumLen := int(utils.BytesToU32(buf))
 84 | 	if checksumLen < 0 {
 85 | 		return nil, errors.New("checksum length less than zero. Data corrupted")
 86 | 	}
 87 | 
 88 | 	// Read checksum.
 89 | 	readPos -= checksumLen
 90 | 	expectedChk := ss.readCheckError(readPos, checksumLen)
 91 | 
 92 | 	// Read index size from the footer.
 93 | 	readPos -= 4
 94 | 	buf = ss.readCheckError(readPos, 4)
 95 | 	ss.idxLen = int(utils.BytesToU32(buf))
 96 | 
 97 | 	// Read index.
 98 | 	readPos -= ss.idxLen
 99 | 	ss.idxStart = readPos
100 | 	data := ss.readCheckError(readPos, ss.idxLen)
101 | 	if err := utils.VerifyChecksum(data, expectedChk); err != nil {
102 | 		return nil, errors.Wrapf(err, "failed to verify checksum for table: %s", ss.f.Fd.Name())
103 | 	}
104 | 	indexTable := &pb.TableIndex{}
105 | 	if err := proto.Unmarshal(data, indexTable); err != nil {
106 | 		return nil, err
107 | 	}
108 | 	ss.idxTables = indexTable
109 | 
110 | 	ss.hasBloomFilter = len(indexTable.BloomFilter) > 0
111 | 	if len(indexTable.GetOffsets()) > 0 {
112 | 		return indexTable.GetOffsets()[0], nil
113 | 	}
114 | 	return nil, errors.New("read index fail, offset is nil")
115 | }
116 | 
117 | // Close 关闭
118 | func (ss *SSTable) Close() error {
119 | 	return ss.f.Close()
120 | }
121 | 
122 | // Indexs _
123 | func (ss *SSTable) Indexs() *pb.TableIndex {
124 | 	return ss.idxTables
125 | }
126 | 
127 | // MaxKey 当前最大的key
128 | func (ss *SSTable) MaxKey() []byte {
129 | 	return ss.maxKey
130 | }
131 | 
132 | // MinKey 当前最小的key
133 | func (ss *SSTable) MinKey() []byte {
134 | 	return ss.minKey
135 | }
136 | 
137 | // FID 获取fid
138 | func (ss *SSTable) FID() uint64 {
139 | 	return ss.fid
140 | }
141 | 
142 | // HasBloomFilter _
143 | func (ss *SSTable) HasBloomFilter() bool {
144 | 	return ss.hasBloomFilter
145 | }
146 | 
147 | func (ss *SSTable) read(off, sz int) ([]byte, error) {
148 | 	if len(ss.f.Data) > 0 {
149 | 		if len(ss.f.Data[off:]) < sz {
150 | 			return nil, io.EOF
151 | 		}
152 | 		return ss.f.Data[off : off+sz], nil
153 | 	}
154 | 
155 | 	res := make([]byte, sz)
156 | 	_, err := ss.f.Fd.ReadAt(res, int64(off))
157 | 	return res, err
158 | }
159 | func (ss *SSTable) readCheckError(off, sz int) []byte {
160 | 	buf, err := ss.read(off, sz)
161 | 	utils.Panic(err)
162 | 	return buf
163 | }
164 | 
165 | // Bytes returns data starting from offset off of size sz. If there's not enough data, it would
166 | // return nil slice and io.EOF.
167 | func (ss *SSTable) Bytes(off, sz int) ([]byte, error) {
168 | 	return ss.f.Bytes(off, sz)
169 | }
170 | 
171 | // Size 返回底层文件的尺寸
172 | func (ss *SSTable) Size() int64 {
173 | 	fileStats, err := ss.f.Fd.Stat()
174 | 	utils.Panic(err)
175 | 	return fileStats.Size()
176 | }
177 | 
178 | // GetCreatedAt _
179 | func (ss *SSTable) GetCreatedAt() *time.Time {
180 | 	return &ss.createdAt
181 | }
182 | 
183 | // SetCreatedAt _
184 | func (ss *SSTable) SetCreatedAt(t *time.Time) {
185 | 	ss.createdAt = *t
186 | }
187 | 
188 | // Detele _
189 | func (ss *SSTable) Detele() error {
190 | 	return ss.f.Delete()
191 | }
192 | 
193 | // Truncature _
194 | func (ss *SSTable) Truncature(size int64) error {
195 | 	return ss.f.Truncature(size)
196 | }
197 | 


--------------------------------------------------------------------------------
/file/sstable_darwin.go:
--------------------------------------------------------------------------------
  1 | // +build darwin
  2 | 
  3 | // Copyright 2021 hardcore-os Project Authors
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License")
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | // http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | package file
 18 | 
 19 | import (
 20 | 	"io"
 21 | 	"os"
 22 | 	"sync"
 23 | 	"syscall"
 24 | 	"time"
 25 | 
 26 | 	"github.com/golang/protobuf/proto"
 27 | 	"github.com/hardcore-os/corekv/pb"
 28 | 	"github.com/hardcore-os/corekv/utils"
 29 | 	"github.com/pkg/errors"
 30 | )
 31 | 
 32 | // SSTable 文件的内存封装
 33 | type SSTable struct {
 34 | 	lock           *sync.RWMutex
 35 | 	f              *MmapFile
 36 | 	maxKey         []byte
 37 | 	minKey         []byte
 38 | 	idxTables      *pb.TableIndex
 39 | 	hasBloomFilter bool
 40 | 	idxLen         int
 41 | 	idxStart       int
 42 | 	fid            uint64
 43 | 	createdAt      time.Time
 44 | }
 45 | 
 46 | // OpenSStable 打开一个 sst文件
 47 | func OpenSStable(opt *Options) *SSTable {
 48 | 	omf, err := OpenMmapFile(opt.FileName, os.O_CREATE|os.O_RDWR, opt.MaxSz)
 49 | 	utils.Err(err)
 50 | 	return &SSTable{f: omf, fid: opt.FID, lock: &sync.RWMutex{}}
 51 | }
 52 | 
 53 | // Init 初始化
 54 | func (ss *SSTable) Init() error {
 55 | 	var ko *pb.BlockOffset
 56 | 	var err error
 57 | 	if ko, err = ss.initTable(); err != nil {
 58 | 		return err
 59 | 	}
 60 | 	// 从文件中获取创建时间
 61 | 	stat, _ := ss.f.Fd.Stat()
 62 | 	statType := stat.Sys().(*syscall.Stat_t)
 63 | 	ss.createdAt = time.Unix(statType.Atimespec.Sec, statType.Atimespec.Nsec)
 64 | 	// init min key
 65 | 	keyBytes := ko.GetKey()
 66 | 	minKey := make([]byte, len(keyBytes))
 67 | 	copy(minKey, keyBytes)
 68 | 	ss.minKey = minKey
 69 | 	ss.maxKey = minKey
 70 | 	return nil
 71 | }
 72 | 
 73 | // SetMaxKey max 需要使用table的迭代器，来获取最后一个block的最后一个key
 74 | func (ss *SSTable) SetMaxKey(maxKey []byte) {
 75 | 	ss.maxKey = maxKey
 76 | }
 77 | func (ss *SSTable) initTable() (bo *pb.BlockOffset, err error) {
 78 | 	readPos := len(ss.f.Data)
 79 | 
 80 | 	// Read checksum len from the last 4 bytes.
 81 | 	readPos -= 4
 82 | 	buf := ss.readCheckError(readPos, 4)
 83 | 	checksumLen := int(utils.BytesToU32(buf))
 84 | 	if checksumLen < 0 {
 85 | 		return nil, errors.New("checksum length less than zero. Data corrupted")
 86 | 	}
 87 | 
 88 | 	// Read checksum.
 89 | 	readPos -= checksumLen
 90 | 	expectedChk := ss.readCheckError(readPos, checksumLen)
 91 | 
 92 | 	// Read index size from the footer.
 93 | 	readPos -= 4
 94 | 	buf = ss.readCheckError(readPos, 4)
 95 | 	ss.idxLen = int(utils.BytesToU32(buf))
 96 | 
 97 | 	// Read index.
 98 | 	readPos -= ss.idxLen
 99 | 	ss.idxStart = readPos
100 | 	data := ss.readCheckError(readPos, ss.idxLen)
101 | 	if err := utils.VerifyChecksum(data, expectedChk); err != nil {
102 | 		return nil, errors.Wrapf(err, "failed to verify checksum for table: %s", ss.f.Fd.Name())
103 | 	}
104 | 	indexTable := &pb.TableIndex{}
105 | 	if err := proto.Unmarshal(data, indexTable); err != nil {
106 | 		return nil, err
107 | 	}
108 | 	ss.idxTables = indexTable
109 | 
110 | 	ss.hasBloomFilter = len(indexTable.BloomFilter) > 0
111 | 	if len(indexTable.GetOffsets()) > 0 {
112 | 		return indexTable.GetOffsets()[0], nil
113 | 	}
114 | 	return nil, errors.New("read index fail, offset is nil")
115 | }
116 | 
117 | // Close 关闭
118 | func (ss *SSTable) Close() error {
119 | 	return ss.f.Close()
120 | }
121 | 
122 | // Indexs _
123 | func (ss *SSTable) Indexs() *pb.TableIndex {
124 | 	return ss.idxTables
125 | }
126 | 
127 | // MaxKey 当前最大的key
128 | func (ss *SSTable) MaxKey() []byte {
129 | 	return ss.maxKey
130 | }
131 | 
132 | // MinKey 当前最小的key
133 | func (ss *SSTable) MinKey() []byte {
134 | 	return ss.minKey
135 | }
136 | 
137 | // FID 获取fid
138 | func (ss *SSTable) FID() uint64 {
139 | 	return ss.fid
140 | }
141 | 
142 | // HasBloomFilter _
143 | func (ss *SSTable) HasBloomFilter() bool {
144 | 	return ss.hasBloomFilter
145 | }
146 | 
147 | func (ss *SSTable) read(off, sz int) ([]byte, error) {
148 | 	if len(ss.f.Data) > 0 {
149 | 		if len(ss.f.Data[off:]) < sz {
150 | 			return nil, io.EOF
151 | 		}
152 | 		return ss.f.Data[off : off+sz], nil
153 | 	}
154 | 
155 | 	res := make([]byte, sz)
156 | 	_, err := ss.f.Fd.ReadAt(res, int64(off))
157 | 	return res, err
158 | }
159 | func (ss *SSTable) readCheckError(off, sz int) []byte {
160 | 	buf, err := ss.read(off, sz)
161 | 	utils.Panic(err)
162 | 	return buf
163 | }
164 | 
165 | // Bytes returns data starting from offset off of size sz. If there's not enough data, it would
166 | // return nil slice and io.EOF.
167 | func (ss *SSTable) Bytes(off, sz int) ([]byte, error) {
168 | 	return ss.f.Bytes(off, sz)
169 | }
170 | 
171 | // Size 返回底层文件的尺寸
172 | func (ss *SSTable) Size() int64 {
173 | 	fileStats, err := ss.f.Fd.Stat()
174 | 	utils.Panic(err)
175 | 	return fileStats.Size()
176 | }
177 | 
178 | // GetCreatedAt _
179 | func (ss *SSTable) GetCreatedAt() *time.Time {
180 | 	return &ss.createdAt
181 | }
182 | 
183 | // SetCreatedAt _
184 | func (ss *SSTable) SetCreatedAt(t *time.Time) {
185 | 	ss.createdAt = *t
186 | }
187 | 
188 | // Detele _
189 | func (ss *SSTable) Detele() error {
190 | 	return ss.f.Delete()
191 | }
192 | 
193 | // Truncature _
194 | func (ss *SSTable) Truncature(size int64) error {
195 | 	return ss.f.Truncature(size)
196 | }
197 | 


--------------------------------------------------------------------------------
/utils/arena.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2017 Dgraph Labs, Inc. and Contributors
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package utils
 18 | 
 19 | import (
 20 | 	"log"
 21 | 	"sync/atomic"
 22 | 	"unsafe"
 23 | 
 24 | 	"github.com/pkg/errors"
 25 | )
 26 | 
 27 | const (
 28 | 	offsetSize = int(unsafe.Sizeof(uint32(0)))
 29 | 
 30 | 	// Always align nodes on 64-bit boundaries, even on 32-bit architectures,
 31 | 	// so that the node.value field is 64-bit aligned. This is necessary because
 32 | 	// node.getValueOffset uses atomic.LoadUint64, which expects its input
 33 | 	// pointer to be 64-bit aligned.
 34 | 	nodeAlign = int(unsafe.Sizeof(uint64(0))) - 1
 35 | 
 36 | 	MaxNodeSize = int(unsafe.Sizeof(node{}))
 37 | )
 38 | 
 39 | // Arena should be lock-free.
 40 | type Arena struct {
 41 | 	n          uint32
 42 | 	shouldGrow bool
 43 | 	buf        []byte
 44 | }
 45 | 
 46 | // newArena returns a new arena.
 47 | func newArena(n int64) *Arena {
 48 | 	// Don't store data at position 0 in order to reserve offset=0 as a kind
 49 | 	// of nil pointer.
 50 | 	out := &Arena{
 51 | 		n:   1,
 52 | 		buf: make([]byte, n),
 53 | 	}
 54 | 	return out
 55 | }
 56 | 
 57 | func (s *Arena) allocate(sz uint32) uint32 {
 58 | 	offset := atomic.AddUint32(&s.n, sz)
 59 | 	if !s.shouldGrow {
 60 | 		AssertTrue(int(offset) <= len(s.buf))
 61 | 		return offset - sz
 62 | 	}
 63 | 
 64 | 	// We are keeping extra bytes in the end so that the checkptr doesn't fail. We apply some
 65 | 	// intelligence to reduce the size of the node by only keeping towers upto valid height and not
 66 | 	// maxHeight. This reduces the node's size, but checkptr doesn't know about its reduced size.
 67 | 	// checkptr tries to verify that the node of size MaxNodeSize resides on a single heap
 68 | 	// allocation which causes this error: checkptr:converted pointer straddles multiple allocations
 69 | 	if int(offset) > len(s.buf)-MaxNodeSize {
 70 | 		growBy := uint32(len(s.buf))
 71 | 		if growBy > 1<<30 {
 72 | 			growBy = 1 << 30
 73 | 		}
 74 | 		if growBy < sz {
 75 | 			growBy = sz
 76 | 		}
 77 | 		newBuf := make([]byte, len(s.buf)+int(growBy))
 78 | 		AssertTrue(len(s.buf) == copy(newBuf, s.buf))
 79 | 		s.buf = newBuf
 80 | 		// fmt.Print(len(s.buf), " ")
 81 | 	}
 82 | 	return offset - sz
 83 | }
 84 | 
 85 | func (s *Arena) size() int64 {
 86 | 	return int64(atomic.LoadUint32(&s.n))
 87 | }
 88 | 
 89 | // putNode allocates a node in the arena. The node is aligned on a pointer-sized
 90 | // boundary. The arena offset of the node is returned.
 91 | func (s *Arena) putNode(height int) uint32 {
 92 | 	// Compute the amount of the tower that will never be used, since the height
 93 | 	// is less than maxHeight.
 94 | 	unusedSize := (maxHeight - height) * offsetSize
 95 | 
 96 | 	// Pad the allocation with enough bytes to ensure pointer alignment.
 97 | 	l := uint32(MaxNodeSize - unusedSize + nodeAlign)
 98 | 	n := s.allocate(l)
 99 | 
100 | 	// Return the aligned offset.
101 | 	m := (n + uint32(nodeAlign)) & ^uint32(nodeAlign)
102 | 	return m
103 | }
104 | 
105 | // Put will *copy* val into arena. To make better use of this, reuse your input
106 | // val buffer. Returns an offset into buf. User is responsible for remembering
107 | // size of val. We could also store this size inside arena but the encoding and
108 | // decoding will incur some overhead.
109 | func (s *Arena) putVal(v ValueStruct) uint32 {
110 | 	l := uint32(v.EncodedSize())
111 | 	offset := s.allocate(l)
112 | 	v.EncodeValue(s.buf[offset:])
113 | 	return offset
114 | }
115 | 
116 | func (s *Arena) putKey(key []byte) uint32 {
117 | 	keySz := uint32(len(key))
118 | 	offset := s.allocate(keySz)
119 | 	buf := s.buf[offset : offset+keySz]
120 | 	AssertTrue(len(key) == copy(buf, key))
121 | 	return offset
122 | }
123 | 
124 | // getNode returns a pointer to the node located at offset. If the offset is
125 | // zero, then the nil node pointer is returned.
126 | func (s *Arena) getNode(offset uint32) *node {
127 | 	if offset == 0 {
128 | 		return nil
129 | 	}
130 | 	return (*node)(unsafe.Pointer(&s.buf[offset]))
131 | }
132 | 
133 | // getKey returns byte slice at offset.
134 | func (s *Arena) getKey(offset uint32, size uint16) []byte {
135 | 	return s.buf[offset : offset+uint32(size)]
136 | }
137 | 
138 | // getVal returns byte slice at offset. The given size should be just the value
139 | // size and should NOT include the meta bytes.
140 | func (s *Arena) getVal(offset uint32, size uint32) (ret ValueStruct) {
141 | 	ret.DecodeValue(s.buf[offset : offset+size])
142 | 	return
143 | }
144 | 
145 | // getNodeOffset returns the offset of node in the arena. If the node pointer is
146 | // nil, then the zero offset is returned.
147 | func (s *Arena) getNodeOffset(nd *node) uint32 {
148 | 	if nd == nil {
149 | 		return 0 //返回空指针
150 | 	}
151 | 	//implement me here！！！
152 | 	//获取某个节点,在 arena 当中的偏移量
153 | 	//unsafe.Pointer等价于void*,uintptr可以专门把void*的对于地址转化为数值型变量
154 | 	return uint32(uintptr(unsafe.Pointer(nd)) - uintptr(unsafe.Pointer(&s.buf[0])))
155 | }
156 | 
157 | // AssertTrue asserts that b is true. Otherwise, it would log fatal.
158 | func AssertTrue(b bool) {
159 | 	if !b {
160 | 		log.Fatalf("%+v", errors.Errorf("Assert failed"))
161 | 	}
162 | }
163 | 


--------------------------------------------------------------------------------
/utils/cache/cache.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"container/list"
  5 | 	xxhash "github.com/cespare/xxhash/v2"
  6 | 	"sync"
  7 | 	"unsafe"
  8 | )
  9 | 
 10 | type Cache struct {
 11 | 	m         sync.RWMutex
 12 | 	lru       *windowLRU
 13 | 	slru      *segmentedLRU
 14 | 	door      *BloomFilter
 15 | 	c         *cmSketch
 16 | 	t         int32
 17 | 	threshold int32
 18 | 	data      map[uint64]*list.Element
 19 | }
 20 | 
 21 | type Options struct {
 22 | 	lruPct uint8
 23 | }
 24 | 
 25 | // NewCache size 指的是要缓存的数据个数
 26 | func NewCache(size int) *Cache {
 27 | 	//定义 window 部分缓存所占百分比，这里定义为1%
 28 | 	const lruPct = 1
 29 | 	//计算出来 widow 部分的容量
 30 | 	lruSz := (lruPct * size) / 100
 31 | 
 32 | 	if lruSz < 1 {
 33 | 		lruSz = 1
 34 | 	}
 35 | 
 36 | 	// 计算 LFU 部分的缓存容量
 37 | 	slruSz := int(float64(size) * ((100 - lruPct) / 100.0))
 38 | 
 39 | 	if slruSz < 1 {
 40 | 		slruSz = 1
 41 | 	}
 42 | 
 43 | 	//LFU 分为两部分，stageOne 部分占比20%
 44 | 	slruO := int(0.2 * float64(slruSz))
 45 | 
 46 | 	if slruO < 1 {
 47 | 		slruO = 1
 48 | 	}
 49 | 
 50 | 	data := make(map[uint64]*list.Element, size)
 51 | 
 52 | 	return &Cache{
 53 | 		lru:  newWindowLRU(lruSz, data),
 54 | 		slru: newSLRU(data, slruO, slruSz-slruO),
 55 | 		door: newFilter(size, 0.01), //布隆过滤器设置误差率为0.01
 56 | 		c:    newCmSketch(int64(size)),
 57 | 		data: data, //共用同一个 map 存储数据
 58 | 	}
 59 | 
 60 | }
 61 | 
 62 | func (c *Cache) Set(key interface{}, value interface{}) bool {
 63 | 	c.m.Lock()
 64 | 	defer c.m.Unlock()
 65 | 	return c.set(key, value)
 66 | }
 67 | 
 68 | func (c *Cache) set(key, value interface{}) bool {
 69 | 	// keyHash 用来快速定位，conflice 用来判断冲突
 70 | 	keyHash, conflictHash := c.keyToHash(key)
 71 | 
 72 | 	// 刚放进去的缓存都先放到 window lru 中，所以 stage = 0
 73 | 	i := storeItem{
 74 | 		stage:    0,
 75 | 		key:      keyHash,
 76 | 		conflict: conflictHash,
 77 | 		value:    value,
 78 | 	}
 79 | 
 80 | 	// 如果 window 已满，要返回被淘汰的数据
 81 | 	eitem, evicted := c.lru.add(i)
 82 | 
 83 | 	if !evicted {
 84 | 		return true
 85 | 	}
 86 | 
 87 | 	// 如果 window 中有被淘汰的数据，会走到这里
 88 | 	// 需要从 LFU 的 stageOne 部分找到一个淘汰者
 89 | 	// 二者进行 PK
 90 | 	victim := c.slru.victim()
 91 | 
 92 | 	// 走到这里是因为 LFU 未满，那么 window lru 的淘汰数据，可以进入 stageOne
 93 | 	if victim == nil {
 94 | 		c.slru.add(eitem)
 95 | 		return true
 96 | 	}
 97 | 
 98 | 	// 这里进行 PK，必须在 bloomfilter 中出现过一次，才允许 PK
 99 | 	// 在 bf 中出现，说明访问频率 >= 2
100 | 	if !c.door.Allow(uint32(eitem.key)) {
101 | 		return true
102 | 	}
103 | 
104 | 	// 估算 windowlru 和 LFU 中淘汰数据，历史访问频次
105 | 	// 访问频率高的，被认为更有资格留下来
106 | 	vcount := c.c.Estimate(victim.key)
107 | 	ocount := c.c.Estimate(eitem.key)
108 | 
109 | 	if ocount < vcount {
110 | 		return true
111 | 	}
112 | 
113 | 	// 留下来的人进入 stageOne
114 | 	c.slru.add(eitem)
115 | 	return true
116 | }
117 | 
118 | func (c *Cache) Get(key interface{}) (interface{}, bool) {
119 | 	c.m.RLock()
120 | 	defer c.m.RUnlock()
121 | 	return c.get(key)
122 | }
123 | 
124 | func (c *Cache) get(key interface{}) (interface{}, bool) {
125 | 	c.t++
126 | 	if c.t == c.threshold {
127 | 		c.c.Reset()
128 | 		c.door.reset()
129 | 		c.t = 0
130 | 	}
131 | 
132 | 	keyHash, conflictHash := c.keyToHash(key)
133 | 
134 | 	val, ok := c.data[keyHash]
135 | 	if !ok {
136 | 		c.door.Allow(uint32(keyHash))
137 | 		c.c.Increment(keyHash)
138 | 		return nil, false
139 | 	}
140 | 
141 | 	item := val.Value.(*storeItem)
142 | 
143 | 	if item.conflict != conflictHash {
144 | 		c.door.Allow(uint32(keyHash))
145 | 		c.c.Increment(keyHash)
146 | 		return nil, false
147 | 	}
148 | 	c.door.Allow(uint32(keyHash))
149 | 	c.c.Increment(item.key)
150 | 
151 | 	v := item.value
152 | 
153 | 	if item.stage == 0 {
154 | 		c.lru.get(val)
155 | 	} else {
156 | 		c.slru.get(val)
157 | 	}
158 | 
159 | 	return v, true
160 | 
161 | }
162 | 
163 | func (c *Cache) Del(key interface{}) (interface{}, bool) {
164 | 	c.m.Lock()
165 | 	defer c.m.Unlock()
166 | 	return c.del(key)
167 | }
168 | 
169 | func (c *Cache) del(key interface{}) (interface{}, bool) {
170 | 	keyHash, conflictHash := c.keyToHash(key)
171 | 
172 | 	val, ok := c.data[keyHash]
173 | 	if !ok {
174 | 		return 0, false
175 | 	}
176 | 
177 | 	item := val.Value.(*storeItem)
178 | 
179 | 	if conflictHash != 0 && (conflictHash != item.conflict) {
180 | 		return 0, false
181 | 	}
182 | 
183 | 	delete(c.data, keyHash)
184 | 	return item.conflict, true
185 | }
186 | 
187 | func (c *Cache) keyToHash(key interface{}) (uint64, uint64) {
188 | 	if key == nil {
189 | 		return 0, 0
190 | 	}
191 | 	switch k := key.(type) {
192 | 	case uint64:
193 | 		return k, 0
194 | 	case string:
195 | 		return MemHashString(k), xxhash.Sum64String(k)
196 | 	case []byte:
197 | 		return MemHash(k), xxhash.Sum64(k)
198 | 	case byte:
199 | 		return uint64(k), 0
200 | 	case int:
201 | 		return uint64(k), 0
202 | 	case int32:
203 | 		return uint64(k), 0
204 | 	case uint32:
205 | 		return uint64(k), 0
206 | 	case int64:
207 | 		return uint64(k), 0
208 | 	default:
209 | 		panic("Key type not supported")
210 | 	}
211 | }
212 | 
213 | type stringStruct struct {
214 | 	str unsafe.Pointer
215 | 	len int
216 | }
217 | 
218 | //go:noescape
219 | //go:linkname memhash runtime.memhash
220 | func memhash(p unsafe.Pointer, h, s uintptr) uintptr
221 | 
222 | // MemHashString is the hash function used by go map, it utilizes available hardware instructions
223 | // (behaves as aeshash if aes instruction is available).
224 | // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash.
225 | func MemHashString(str string) uint64 {
226 | 	ss := (*stringStruct)(unsafe.Pointer(&str))
227 | 	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
228 | }
229 | 
230 | func MemHash(data []byte) uint64 {
231 | 	ss := (*stringStruct)(unsafe.Pointer(&data))
232 | 	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
233 | }
234 | 
235 | func (c *Cache) String() string {
236 | 	var s string
237 | 	s += c.lru.String() + " | " + c.slru.String()
238 | 	return s
239 | }
240 | 


--------------------------------------------------------------------------------
/file/vlog.go:
--------------------------------------------------------------------------------
  1 | package file
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"fmt"
  7 | 	"hash/crc32"
  8 | 	"io"
  9 | 	"math"
 10 | 	"os"
 11 | 	"sync"
 12 | 	"sync/atomic"
 13 | 
 14 | 	"github.com/hardcore-os/corekv/utils"
 15 | 	"github.com/pkg/errors"
 16 | )
 17 | 
 18 | type LogFile struct {
 19 | 	Lock sync.RWMutex
 20 | 	FID  uint32
 21 | 	size uint32
 22 | 	f    *MmapFile
 23 | }
 24 | 
 25 | func (lf *LogFile) Open(opt *Options) error {
 26 | 	var err error
 27 | 	lf.FID = uint32(opt.FID)
 28 | 	lf.Lock = sync.RWMutex{}
 29 | 	lf.f, err = OpenMmapFile(opt.FileName, os.O_CREATE|os.O_RDWR, opt.MaxSz)
 30 | 	utils.Panic2(nil, err)
 31 | 	fi, err := lf.f.Fd.Stat()
 32 | 	if err != nil {
 33 | 		return utils.WarpErr("Unable to run file.Stat", err)
 34 | 	}
 35 | 	// 获取文件尺寸
 36 | 	sz := fi.Size()
 37 | 	utils.CondPanic(sz > math.MaxUint32, fmt.Errorf("file size: %d greater than %d",
 38 | 		uint32(sz), uint32(math.MaxUint32)))
 39 | 	lf.size = uint32(sz)
 40 | 	// TODO 是否要在这里弄一个header放一些元数据呢?
 41 | 	return nil
 42 | }
 43 | 
 44 | // Acquire lock on mmap/file if you are calling this
 45 | func (lf *LogFile) Read(p *utils.ValuePtr) (buf []byte, err error) {
 46 | 	offset := p.Offset
 47 | 	// Do not convert size to uint32, because the lf.fmap can be of size
 48 | 	// 4GB, which overflows the uint32 during conversion to make the size 0,
 49 | 	// causing the read to fail with ErrEOF. See issue #585.
 50 | 	size := int64(len(lf.f.Data))
 51 | 	valsz := p.Len
 52 | 	lfsz := atomic.LoadUint32(&lf.size)
 53 | 	if int64(offset) >= size || int64(offset+valsz) > size ||
 54 | 		// Ensure that the read is within the file's actual size. It might be possible that
 55 | 		// the offset+valsz length is beyond the file's actual size. This could happen when
 56 | 		// dropAll and iterations are running simultaneously.
 57 | 		int64(offset+valsz) > int64(lfsz) {
 58 | 		err = io.EOF
 59 | 	} else {
 60 | 		buf, err = lf.f.Bytes(int(offset), int(valsz))
 61 | 	}
 62 | 	return buf, err
 63 | }
 64 | 
 65 | func (lf *LogFile) DoneWriting(offset uint32) error {
 66 | 	// Sync before acquiring lock. (We call this from write() and thus know we have shared access
 67 | 	// to the fd.)
 68 | 	if err := lf.f.Sync(); err != nil {
 69 | 		return errors.Wrapf(err, "Unable to sync value log: %q", lf.FileName())
 70 | 	}
 71 | 
 72 | 	// 写嘛 总是要锁一下的
 73 | 	lf.Lock.Lock()
 74 | 	defer lf.Lock.Unlock()
 75 | 
 76 | 	// TODO: Confirm if we need to run a file sync after truncation.
 77 | 	// Truncation must run after unmapping, otherwise Windows would crap itself.
 78 | 	if err := lf.f.Truncature(int64(offset)); err != nil {
 79 | 		return errors.Wrapf(err, "Unable to truncate file: %q", lf.FileName())
 80 | 	}
 81 | 
 82 | 	// Reinitialize the log file. This will mmap the entire file.
 83 | 	if err := lf.Init(); err != nil {
 84 | 		return errors.Wrapf(err, "failed to initialize file %s", lf.FileName())
 85 | 	}
 86 | 
 87 | 	// Previously we used to close the file after it was written and reopen it in read-only mode.
 88 | 	// We no longer open files in read-only mode. We keep all vlog files open in read-write mode.
 89 | 	return nil
 90 | }
 91 | func (lf *LogFile) Write(offset uint32, buf []byte) (err error) {
 92 | 	return lf.f.AppendBuffer(offset, buf)
 93 | }
 94 | func (lf *LogFile) Truncate(offset int64) error {
 95 | 	return lf.f.Truncature(offset)
 96 | }
 97 | func (lf *LogFile) Close() error {
 98 | 	return lf.f.Close()
 99 | }
100 | 
101 | func (lf *LogFile) Size() int64 {
102 | 	return int64(atomic.LoadUint32(&lf.size))
103 | }
104 | func (lf *LogFile) AddSize(offset uint32) {
105 | 	atomic.StoreUint32(&lf.size, offset)
106 | }
107 | 
108 | // 完成log文件的初始化
109 | func (lf *LogFile) Bootstrap() error {
110 | 	// TODO 是否需要初始化一些内容给vlog文件?
111 | 	return nil
112 | }
113 | 
114 | func (lf *LogFile) Init() error {
115 | 	fstat, err := lf.f.Fd.Stat()
116 | 	if err != nil {
117 | 		return errors.Wrapf(err, "Unable to check stat for %q", lf.FileName())
118 | 	}
119 | 	sz := fstat.Size()
120 | 	if sz == 0 {
121 | 		// File is empty. We don't need to mmap it. Return.
122 | 		return nil
123 | 	}
124 | 	utils.CondPanic(sz > math.MaxUint32, fmt.Errorf("[LogFile.Init] sz > math.MaxUint32"))
125 | 	lf.size = uint32(sz)
126 | 	return nil
127 | }
128 | func (lf *LogFile) FileName() string {
129 | 	return lf.f.Fd.Name()
130 | }
131 | 
132 | func (lf *LogFile) Seek(offset int64, whence int) (ret int64, err error) {
133 | 	return lf.f.Fd.Seek(offset, whence)
134 | }
135 | 
136 | func (lf *LogFile) FD() *os.File {
137 | 	return lf.f.Fd
138 | }
139 | 
140 | // You must hold lf.lock to sync()
141 | func (lf *LogFile) Sync() error {
142 | 	return lf.f.Sync()
143 | }
144 | 
145 | // encodeEntry will encode entry to the buf
146 | // layout of entry
147 | // +--------+-----+-------+-------+
148 | // | header | key | value | crc32 |
149 | // +--------+-----+-------+-------+
150 | func (lf *LogFile) EncodeEntry(e *utils.Entry, buf *bytes.Buffer, offset uint32) (int, error) {
151 | 	h := utils.Header{
152 | 		KLen:      uint32(len(e.Key)),
153 | 		VLen:      uint32(len(e.Value)),
154 | 		ExpiresAt: e.ExpiresAt,
155 | 		Meta:      e.Meta,
156 | 	}
157 | 
158 | 	hash := crc32.New(utils.CastagnoliCrcTable)
159 | 	writer := io.MultiWriter(buf, hash)
160 | 
161 | 	// encode header.
162 | 	var headerEnc [utils.MaxHeaderSize]byte
163 | 	sz := h.Encode(headerEnc[:])
164 | 	utils.Panic2(writer.Write(headerEnc[:sz]))
165 | 	// Encryption is disabled so writing directly to the buffer.
166 | 	utils.Panic2(writer.Write(e.Key))
167 | 	utils.Panic2(writer.Write(e.Value))
168 | 	// write crc32 hash.
169 | 	var crcBuf [crc32.Size]byte
170 | 	binary.BigEndian.PutUint32(crcBuf[:], hash.Sum32())
171 | 	utils.Panic2(buf.Write(crcBuf[:]))
172 | 	// return encoded length.
173 | 	return len(headerEnc[:sz]) + len(e.Key) + len(e.Value) + len(crcBuf), nil
174 | }
175 | func (lf *LogFile) DecodeEntry(buf []byte, offset uint32) (*utils.Entry, error) {
176 | 	var h utils.Header
177 | 	hlen := h.Decode(buf)
178 | 	kv := buf[hlen:]
179 | 	e := &utils.Entry{
180 | 		Meta:      h.Meta,
181 | 		ExpiresAt: h.ExpiresAt,
182 | 		Offset:    offset,
183 | 		Key:       kv[:h.KLen],
184 | 		Value:     kv[h.KLen : h.KLen+h.VLen],
185 | 	}
186 | 	return e, nil
187 | }
188 | 


--------------------------------------------------------------------------------
/file/mmap_darwin.go:
--------------------------------------------------------------------------------
  1 | //go:build darwin
  2 | // +build darwin
  3 | 
  4 | // Copyright 2021 hardcore-os Project Authors
  5 | //
  6 | // Licensed under the Apache License, Version 2.0 (the "License")
  7 | // you may not use this file except in compliance with the License.
  8 | // You may obtain a copy of the License at
  9 | //
 10 | // http://www.apache.org/licenses/LICENSE-2.0
 11 | //
 12 | // Unless required by applicable law or agreed to in writing, software
 13 | // distributed under the License is distributed on an "AS IS" BASIS,
 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | // See the License for the specific language governing permissions and
 16 | // limitations under the License.
 17 | 
 18 | package file
 19 | 
 20 | import (
 21 | 	"encoding/binary"
 22 | 	"fmt"
 23 | 	"io"
 24 | 	"os"
 25 | 	"path/filepath"
 26 | 
 27 | 	"github.com/hardcore-os/corekv/utils/mmap"
 28 | 	"github.com/pkg/errors"
 29 | )
 30 | 
 31 | // MmapFile represents an mmapd file and includes both the buffer to the data and the file descriptor.
 32 | type MmapFile struct {
 33 | 	Data []byte
 34 | 	Fd   *os.File
 35 | }
 36 | 
 37 | // OpenMmapFileUsing os
 38 | func OpenMmapFileUsing(fd *os.File, sz int, writable bool) (*MmapFile, error) {
 39 | 	filename := fd.Name()
 40 | 	fi, err := fd.Stat()
 41 | 	if err != nil {
 42 | 		return nil, errors.Wrapf(err, "cannot stat file: %s", filename)
 43 | 	}
 44 | 
 45 | 	var rerr error
 46 | 	fileSize := fi.Size()
 47 | 	if sz > 0 && fileSize == 0 {
 48 | 		// If file is empty, truncate it to sz.
 49 | 		if err := fd.Truncate(int64(sz)); err != nil {
 50 | 			return nil, errors.Wrapf(err, "error while truncation")
 51 | 		}
 52 | 		fileSize = int64(sz)
 53 | 	}
 54 | 
 55 | 	// fmt.Printf("Mmaping file: %s with writable: %v filesize: %d\n", fd.Name(), writable, fileSize)
 56 | 	buf, err := mmap.Mmap(fd, writable, fileSize) // Mmap up to file size.
 57 | 	if err != nil {
 58 | 		return nil, errors.Wrapf(err, "while mmapping %s with size: %d", fd.Name(), fileSize)
 59 | 	}
 60 | 
 61 | 	if fileSize == 0 {
 62 | 		dir, _ := filepath.Split(filename)
 63 | 		go SyncDir(dir)
 64 | 	}
 65 | 	return &MmapFile{
 66 | 		Data: buf,
 67 | 		Fd:   fd,
 68 | 	}, rerr
 69 | }
 70 | 
 71 | // OpenMmapFile opens an existing file or creates a new file. If the file is
 72 | // created, it would truncate the file to maxSz. In both cases, it would mmap
 73 | // the file to maxSz and returned it. In case the file is created, z.NewFile is
 74 | // returned.
 75 | func OpenMmapFile(filename string, flag int, maxSz int) (*MmapFile, error) {
 76 | 	// fmt.Printf("opening file %s with flag: %v\n", filename, flag)
 77 | 	fd, err := os.OpenFile(filename, flag, 0666)
 78 | 	if err != nil {
 79 | 		return nil, errors.Wrapf(err, "unable to open: %s", filename)
 80 | 	}
 81 | 	writable := true
 82 | 	if flag == os.O_RDONLY {
 83 | 		writable = false
 84 | 	}
 85 | 	return OpenMmapFileUsing(fd, maxSz, writable)
 86 | }
 87 | 
 88 | type mmapReader struct {
 89 | 	Data   []byte
 90 | 	offset int
 91 | }
 92 | 
 93 | func (mr *mmapReader) Read(buf []byte) (int, error) {
 94 | 	if mr.offset > len(mr.Data) {
 95 | 		return 0, io.EOF
 96 | 	}
 97 | 	n := copy(buf, mr.Data[mr.offset:])
 98 | 	mr.offset += n
 99 | 	if n < len(buf) {
100 | 		return n, io.EOF
101 | 	}
102 | 	return n, nil
103 | }
104 | 
105 | func (m *MmapFile) NewReader(offset int) io.Reader {
106 | 	return &mmapReader{
107 | 		Data:   m.Data,
108 | 		offset: offset,
109 | 	}
110 | }
111 | 
112 | // Bytes returns data starting from offset off of size sz. If there's not enough data, it would
113 | // return nil slice and io.EOF.
114 | func (m *MmapFile) Bytes(off, sz int) ([]byte, error) {
115 | 	if len(m.Data[off:]) < sz {
116 | 		return nil, io.EOF
117 | 	}
118 | 	return m.Data[off : off+sz], nil
119 | }
120 | 
121 | // Slice returns the slice at the given offset.
122 | func (m *MmapFile) Slice(offset int) []byte {
123 | 	sz := binary.BigEndian.Uint32(m.Data[offset:])
124 | 	start := offset + 4
125 | 	next := start + int(sz)
126 | 	if next > len(m.Data) {
127 | 		return []byte{}
128 | 	}
129 | 	res := m.Data[start:next]
130 | 	return res
131 | }
132 | 
133 | // AllocateSlice allocates a slice of the given size at the given offset.
134 | func (m *MmapFile) AllocateSlice(sz, offset int) ([]byte, int, error) {
135 | 	start := offset + 4
136 | 
137 | 	// If the file is too small, double its size or increase it by 1GB, whichever is smaller.
138 | 	if start+sz > len(m.Data) {
139 | 		const oneGB = 1 << 30
140 | 		growBy := len(m.Data)
141 | 		if growBy > oneGB {
142 | 			growBy = oneGB
143 | 		}
144 | 		if growBy < sz+4 {
145 | 			growBy = sz + 4
146 | 		}
147 | 		if err := m.Truncature(int64(len(m.Data) + growBy)); err != nil {
148 | 			return nil, 0, err
149 | 		}
150 | 	}
151 | 
152 | 	binary.BigEndian.PutUint32(m.Data[offset:], uint32(sz))
153 | 	return m.Data[start : start+sz], start + sz, nil
154 | }
155 | 
156 | const oneGB = 1 << 30
157 | 
158 | // AppendBuffer 向内存中追加一个buffer，如果空间不足则重新映射，扩大空间
159 | func (m *MmapFile) AppendBuffer(offset uint32, buf []byte) error {
160 | 	size := len(m.Data)
161 | 	needSize := len(buf)
162 | 	end := int(offset) + needSize
163 | 	if end > size {
164 | 		growBy := size
165 | 		if growBy > oneGB {
166 | 			growBy = oneGB
167 | 		}
168 | 		if growBy < needSize {
169 | 			growBy = needSize
170 | 		}
171 | 		if err := m.Truncature(int64(end)); err != nil {
172 | 			return err
173 | 		}
174 | 	}
175 | 	dLen := copy(m.Data[offset:end], buf)
176 | 	if dLen != needSize {
177 | 		return errors.Errorf("dLen != needSize AppendBuffer failed")
178 | 	}
179 | 	return nil
180 | }
181 | 
182 | func (m *MmapFile) Sync() error {
183 | 	if m == nil {
184 | 		return nil
185 | 	}
186 | 	return mmap.Msync(m.Data)
187 | }
188 | 
189 | func (m *MmapFile) Delete() error {
190 | 	if m.Fd == nil {
191 | 		return nil
192 | 	}
193 | 
194 | 	if err := mmap.Munmap(m.Data); err != nil {
195 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
196 | 	}
197 | 	m.Data = nil
198 | 	if err := m.Fd.Truncate(0); err != nil {
199 | 		return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
200 | 	}
201 | 	if err := m.Fd.Close(); err != nil {
202 | 		return fmt.Errorf("while close file: %s, error: %v\n", m.Fd.Name(), err)
203 | 	}
204 | 	return os.Remove(m.Fd.Name())
205 | }
206 | 
207 | // Close would close the file. It would also truncate the file if maxSz >= 0.
208 | func (m *MmapFile) Close() error {
209 | 	if m.Fd == nil {
210 | 		return nil
211 | 	}
212 | 	if err := m.Sync(); err != nil {
213 | 		return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err)
214 | 	}
215 | 	if err := mmap.Munmap(m.Data); err != nil {
216 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
217 | 	}
218 | 	return m.Fd.Close()
219 | }
220 | 
221 | func SyncDir(dir string) error {
222 | 	df, err := os.Open(dir)
223 | 	if err != nil {
224 | 		return errors.Wrapf(err, "while opening %s", dir)
225 | 	}
226 | 	if err := df.Sync(); err != nil {
227 | 		return errors.Wrapf(err, "while syncing %s", dir)
228 | 	}
229 | 	if err := df.Close(); err != nil {
230 | 		return errors.Wrapf(err, "while closing %s", dir)
231 | 	}
232 | 	return nil
233 | }
234 | 
235 | // Truncature 兼容接口
236 | func (m *MmapFile) Truncature(maxSz int64) error {
237 | 	if err := m.Sync(); err != nil {
238 | 		return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err)
239 | 	}
240 | 	if err := mmap.Munmap(m.Data); err != nil {
241 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
242 | 	}
243 | 	if err := m.Fd.Truncate(maxSz); err != nil {
244 | 		return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
245 | 	}
246 | 	var err error
247 | 	m.Data, err = mmap.Mmap(m.Fd, true, maxSz) // Mmap up to max size.
248 | 	return err
249 | }
250 | 
251 | // ReName 兼容接口
252 | func (m *MmapFile) ReName(name string) error {
253 | 	return nil
254 | }
255 | 


--------------------------------------------------------------------------------
/file/mmap_linux.go:
--------------------------------------------------------------------------------
  1 | // +build linux
  2 | 
  3 | // Copyright 2021 hardcore-os Project Authors
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License")
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | // http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | package file
 18 | 
 19 | import (
 20 | 	"encoding/binary"
 21 | 	"fmt"
 22 | 	"io"
 23 | 	"os"
 24 | 	"path/filepath"
 25 | 
 26 | 	"github.com/hardcore-os/corekv/utils/mmap"
 27 | 	"github.com/pkg/errors"
 28 | )
 29 | 
 30 | // MmapFile represents an mmapd file and includes both the buffer to the data and the file descriptor.
 31 | type MmapFile struct {
 32 | 	Data []byte
 33 | 	Fd   *os.File
 34 | }
 35 | 
 36 | // OpenMmapFileUsing os
 37 | func OpenMmapFileUsing(fd *os.File, sz int, writable bool) (*MmapFile, error) {
 38 | 	filename := fd.Name()
 39 | 	fi, err := fd.Stat()
 40 | 	if err != nil {
 41 | 		return nil, errors.Wrapf(err, "cannot stat file: %s", filename)
 42 | 	}
 43 | 
 44 | 	var rerr error
 45 | 	fileSize := fi.Size()
 46 | 	if sz > 0 && fileSize == 0 {
 47 | 		// If file is empty, truncate it to sz.
 48 | 		if err := fd.Truncate(int64(sz)); err != nil {
 49 | 			return nil, errors.Wrapf(err, "error while truncation")
 50 | 		}
 51 | 		fileSize = int64(sz)
 52 | 	}
 53 | 
 54 | 	// fmt.Printf("Mmaping file: %s with writable: %v filesize: %d\n", fd.Name(), writable, fileSize)
 55 | 	buf, err := mmap.Mmap(fd, writable, fileSize) // Mmap up to file size.
 56 | 	if err != nil {
 57 | 		return nil, errors.Wrapf(err, "while mmapping %s with size: %d", fd.Name(), fileSize)
 58 | 	}
 59 | 
 60 | 	if fileSize == 0 {
 61 | 		dir, _ := filepath.Split(filename)
 62 | 		go SyncDir(dir)
 63 | 	}
 64 | 	return &MmapFile{
 65 | 		Data: buf,
 66 | 		Fd:   fd,
 67 | 	}, rerr
 68 | }
 69 | 
 70 | // OpenMmapFile opens an existing file or creates a new file. If the file is
 71 | // created, it would truncate the file to maxSz. In both cases, it would mmap
 72 | // the file to maxSz and returned it. In case the file is created, z.NewFile is
 73 | // returned.
 74 | func OpenMmapFile(filename string, flag int, maxSz int) (*MmapFile, error) {
 75 | 	// fmt.Printf("opening file %s with flag: %v\n", filename, flag)
 76 | 	fd, err := os.OpenFile(filename, flag, 0666)
 77 | 	if err != nil {
 78 | 		return nil, errors.Wrapf(err, "unable to open: %s", filename)
 79 | 	}
 80 | 	writable := true
 81 | 	if flag == os.O_RDONLY {
 82 | 		writable = false
 83 | 	}
 84 | 	// 如果 sst文件层被打开过，则使用其文件原来的大小
 85 | 	if fileInfo, err := fd.Stat(); err == nil && fileInfo != nil && fileInfo.Size() > 0 {
 86 | 		maxSz = int(fileInfo.Size())
 87 | 	}
 88 | 	return OpenMmapFileUsing(fd, maxSz, writable)
 89 | }
 90 | 
 91 | type mmapReader struct {
 92 | 	Data   []byte
 93 | 	offset int
 94 | }
 95 | 
 96 | func (mr *mmapReader) Read(buf []byte) (int, error) {
 97 | 	if mr.offset > len(mr.Data) {
 98 | 		return 0, io.EOF
 99 | 	}
100 | 	n := copy(buf, mr.Data[mr.offset:])
101 | 	mr.offset += n
102 | 	if n < len(buf) {
103 | 		return n, io.EOF
104 | 	}
105 | 	return n, nil
106 | }
107 | 
108 | func (m *MmapFile) NewReader(offset int) io.Reader {
109 | 	return &mmapReader{
110 | 		Data:   m.Data,
111 | 		offset: offset,
112 | 	}
113 | }
114 | 
115 | // Bytes returns data starting from offset off of size sz. If there's not enough data, it would
116 | // return nil slice and io.EOF.
117 | func (m *MmapFile) Bytes(off, sz int) ([]byte, error) {
118 | 	if len(m.Data[off:]) < sz {
119 | 		return nil, io.EOF
120 | 	}
121 | 	return m.Data[off : off+sz], nil
122 | }
123 | 
124 | // Slice returns the slice at the given offset.
125 | func (m *MmapFile) Slice(offset int) []byte {
126 | 	sz := binary.BigEndian.Uint32(m.Data[offset:])
127 | 	start := offset + 4
128 | 	next := start + int(sz)
129 | 	if next > len(m.Data) {
130 | 		return []byte{}
131 | 	}
132 | 	res := m.Data[start:next]
133 | 	return res
134 | }
135 | 
136 | // AllocateSlice allocates a slice of the given size at the given offset.
137 | func (m *MmapFile) AllocateSlice(sz, offset int) ([]byte, int, error) {
138 | 	start := offset + 4
139 | 
140 | 	// If the file is too small, double its size or increase it by 1GB, whichever is smaller.
141 | 	if start+sz > len(m.Data) {
142 | 		const oneGB = 1 << 30
143 | 		growBy := len(m.Data)
144 | 		if growBy > oneGB {
145 | 			growBy = oneGB
146 | 		}
147 | 		if growBy < sz+4 {
148 | 			growBy = sz + 4
149 | 		}
150 | 		if err := m.Truncature(int64(len(m.Data) + growBy)); err != nil {
151 | 			return nil, 0, err
152 | 		}
153 | 	}
154 | 
155 | 	binary.BigEndian.PutUint32(m.Data[offset:], uint32(sz))
156 | 	return m.Data[start : start+sz], start + sz, nil
157 | }
158 | 
159 | const oneGB = 1 << 30
160 | 
161 | // AppendBuffer 向内存中追加一个buffer，如果空间不足则重新映射，扩大空间
162 | func (m *MmapFile) AppendBuffer(offset uint32, buf []byte) error {
163 | 	size := len(m.Data)
164 | 	needSize := len(buf)
165 | 	end := int(offset) + needSize
166 | 	if end > size {
167 | 		growBy := size
168 | 		if growBy > oneGB {
169 | 			growBy = oneGB
170 | 		}
171 | 		if growBy < needSize {
172 | 			growBy = needSize
173 | 		}
174 | 		if err := m.Truncature(int64(end)); err != nil {
175 | 			return err
176 | 		}
177 | 	}
178 | 	dLen := copy(m.Data[offset:end], buf)
179 | 	if dLen != needSize {
180 | 		return errors.Errorf("dLen != needSize AppendBuffer failed")
181 | 	}
182 | 	return nil
183 | }
184 | 
185 | func (m *MmapFile) Sync() error {
186 | 	if m == nil {
187 | 		return nil
188 | 	}
189 | 	return mmap.Msync(m.Data)
190 | }
191 | 
192 | func (m *MmapFile) Delete() error {
193 | 	if m.Fd == nil {
194 | 		return nil
195 | 	}
196 | 
197 | 	if err := mmap.Munmap(m.Data); err != nil {
198 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
199 | 	}
200 | 	m.Data = nil
201 | 	if err := m.Fd.Truncate(0); err != nil {
202 | 		return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
203 | 	}
204 | 	if err := m.Fd.Close(); err != nil {
205 | 		return fmt.Errorf("while close file: %s, error: %v\n", m.Fd.Name(), err)
206 | 	}
207 | 	return os.Remove(m.Fd.Name())
208 | }
209 | 
210 | // Close would close the file. It would also truncate the file if maxSz >= 0.
211 | func (m *MmapFile) Close() error {
212 | 	if m.Fd == nil {
213 | 		return nil
214 | 	}
215 | 	if err := m.Sync(); err != nil {
216 | 		return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err)
217 | 	}
218 | 	if err := mmap.Munmap(m.Data); err != nil {
219 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
220 | 	}
221 | 	return m.Fd.Close()
222 | }
223 | 
224 | func SyncDir(dir string) error {
225 | 	df, err := os.Open(dir)
226 | 	if err != nil {
227 | 		return errors.Wrapf(err, "while opening %s", dir)
228 | 	}
229 | 	if err := df.Sync(); err != nil {
230 | 		return errors.Wrapf(err, "while syncing %s", dir)
231 | 	}
232 | 	if err := df.Close(); err != nil {
233 | 		return errors.Wrapf(err, "while closing %s", dir)
234 | 	}
235 | 	return nil
236 | }
237 | 
238 | // Truncature 兼容接口
239 | func (m *MmapFile) Truncature(maxSz int64) error {
240 | 	if err := m.Sync(); err != nil {
241 | 		return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err)
242 | 	}
243 | 	if err := m.Fd.Truncate(maxSz); err != nil {
244 | 		return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
245 | 	}
246 | 
247 | 	var err error
248 | 	m.Data, err = mmap.Mremap(m.Data, int(maxSz)) // Mmap up to max size.
249 | 	return err
250 | }
251 | 
252 | // ReName 兼容接口
253 | func (m *MmapFile) ReName(name string) error {
254 | 	return nil
255 | }
256 | 


--------------------------------------------------------------------------------
/lsm/lsm_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package lsm
 16 | 
 17 | import (
 18 | 	"bytes"
 19 | 	"fmt"
 20 | 	"os"
 21 | 	"testing"
 22 | 	"time"
 23 | 
 24 | 	"github.com/hardcore-os/corekv/utils"
 25 | )
 26 | 
 27 | var (
 28 | 	// 初始化opt
 29 | 	opt = &Options{
 30 | 		WorkDir:             "../work_test",
 31 | 		SSTableMaxSz:        1024,
 32 | 		MemTableSize:        1024,
 33 | 		BlockSize:           1024,
 34 | 		BloomFalsePositive:  0,
 35 | 		BaseLevelSize:       10 << 20,
 36 | 		LevelSizeMultiplier: 10,
 37 | 		BaseTableSize:       2 << 20,
 38 | 		TableSizeMultiplier: 2,
 39 | 		NumLevelZeroTables:  15,
 40 | 		MaxLevelNum:         7,
 41 | 		NumCompactors:       3,
 42 | 	}
 43 | )
 44 | 
 45 | // TestBase 正确性测试
 46 | func TestBase(t *testing.T) {
 47 | 	clearDir()
 48 | 	lsm := buildLSM()
 49 | 	test := func() {
 50 | 		// 基准测试
 51 | 		baseTest(t, lsm, 128)
 52 | 	}
 53 | 	// 运行N次测试多个sst的影响
 54 | 	runTest(1, test)
 55 | }
 56 | 
 57 | // TestClose 测试优雅关闭
 58 | func TestClose(t *testing.T) {
 59 | 	clearDir()
 60 | 	lsm := buildLSM()
 61 | 	lsm.StartCompacter()
 62 | 	test := func() {
 63 | 		baseTest(t, lsm, 128)
 64 | 		utils.Err(lsm.Close())
 65 | 		// 重启后可正常工作才算成功
 66 | 		lsm = buildLSM()
 67 | 		baseTest(t, lsm, 128)
 68 | 	}
 69 | 	// 运行N次测试多个sst的影响
 70 | 	runTest(1, test)
 71 | }
 72 | 
 73 | // 命中不同存储介质的逻辑分支测试
 74 | func TestHitStorage(t *testing.T) {
 75 | 	clearDir()
 76 | 	lsm := buildLSM()
 77 | 	e := utils.BuildEntry()
 78 | 	lsm.Set(e)
 79 | 	// 命中内存表
 80 | 	hitMemtable := func() {
 81 | 		v, err := lsm.memTable.Get(e.Key)
 82 | 		utils.Err(err)
 83 | 		utils.CondPanic(!bytes.Equal(v.Value, e.Value), fmt.Errorf("[hitMemtable] !equal(v.Value, e.Value)"))
 84 | 	}
 85 | 	// 命中L0层
 86 | 	hitL0 := func() {
 87 | 		// baseTest的测试就包含 在命中L0的sst查询
 88 | 		baseTest(t, lsm, 128)
 89 | 	}
 90 | 	// 命中非L0层
 91 | 	hitNotL0 := func() {
 92 | 		// 通过压缩将compact生成非L0数据, 会命中l6层
 93 | 		lsm.levels.runOnce(0)
 94 | 		baseTest(t, lsm, 128)
 95 | 	}
 96 | 	// 命中bf
 97 | 	hitBloom := func() {
 98 | 		ee := utils.BuildEntry()
 99 | 		// 查询不存在的key 如果命中则说明一定不存在
100 | 		v, err := lsm.levels.levels[0].tables[0].Serach(ee.Key, &ee.Version)
101 | 		utils.CondPanic(v != nil, fmt.Errorf("[hitBloom] v != nil"))
102 | 		utils.CondPanic(err != utils.ErrKeyNotFound, fmt.Errorf("[hitBloom] err != utils.ErrKeyNotFound"))
103 | 	}
104 | 
105 | 	runTest(1, hitMemtable, hitL0, hitNotL0, hitBloom)
106 | }
107 | 
108 | // Testparameter 测试异常参数
109 | func TestPsarameter(t *testing.T) {
110 | 	clearDir()
111 | 	lsm := buildLSM()
112 | 	testNil := func() {
113 | 		utils.CondPanic(lsm.Set(nil) != utils.ErrEmptyKey, fmt.Errorf("[testNil] lsm.Set(nil) != err"))
114 | 		_, err := lsm.Get(nil)
115 | 		utils.CondPanic(err != utils.ErrEmptyKey, fmt.Errorf("[testNil] lsm.Set(nil) != err"))
116 | 	}
117 | 	// TODO p2 优先级的case先忽略
118 | 	runTest(1, testNil)
119 | }
120 | 
121 | // TestCompact 测试L0到Lmax压缩
122 | func TestCompact(t *testing.T) {
123 | 	clearDir()
124 | 	lsm := buildLSM()
125 | 	ok := false
126 | 	l0TOLMax := func() {
127 | 		// 正常触发即可
128 | 		baseTest(t, lsm, 128)
129 | 		// 直接触发压缩执行
130 | 		fid := lsm.levels.maxFID + 1
131 | 		lsm.levels.runOnce(1)
132 | 		for _, t := range lsm.levels.levels[6].tables {
133 | 			if t.fid == fid {
134 | 				ok = true
135 | 			}
136 | 		}
137 | 		utils.CondPanic(!ok, fmt.Errorf("[l0TOLMax] fid not found"))
138 | 	}
139 | 	l0ToL0 := func() {
140 | 		// 先写一些数据进来
141 | 		baseTest(t, lsm, 128)
142 | 		fid := lsm.levels.maxFID + 1
143 | 		cd := buildCompactDef(lsm, 0, 0, 0)
144 | 		// 非常tricky的处理方法，为了能通过检查
145 | 		tricky(cd.thisLevel.tables)
146 | 		ok := lsm.levels.fillTablesL0ToL0(cd)
147 | 		utils.CondPanic(!ok, fmt.Errorf("[l0ToL0] lsm.levels.fillTablesL0ToL0(cd) ret == false"))
148 | 		err := lsm.levels.runCompactDef(0, 0, *cd)
149 | 		// 删除全局状态，便于下游测试逻辑
150 | 		lsm.levels.compactState.delete(*cd)
151 | 		utils.Err(err)
152 | 		ok = false
153 | 		for _, t := range lsm.levels.levels[0].tables {
154 | 			if t.fid == fid {
155 | 				ok = true
156 | 			}
157 | 		}
158 | 		utils.CondPanic(!ok, fmt.Errorf("[l0ToL0] fid not found"))
159 | 	}
160 | 	nextCompact := func() {
161 | 		baseTest(t, lsm, 128)
162 | 		fid := lsm.levels.maxFID + 1
163 | 		cd := buildCompactDef(lsm, 0, 0, 1)
164 | 		// 非常tricky的处理方法，为了能通过检查
165 | 		tricky(cd.thisLevel.tables)
166 | 		ok := lsm.levels.fillTables(cd)
167 | 		utils.CondPanic(!ok, fmt.Errorf("[nextCompact] lsm.levels.fillTables(cd) ret == false"))
168 | 		err := lsm.levels.runCompactDef(0, 0, *cd)
169 | 		// 删除全局状态，便于下游测试逻辑
170 | 		lsm.levels.compactState.delete(*cd)
171 | 		utils.Err(err)
172 | 		ok = false
173 | 		for _, t := range lsm.levels.levels[1].tables {
174 | 			if t.fid == fid {
175 | 				ok = true
176 | 			}
177 | 		}
178 | 		utils.CondPanic(!ok, fmt.Errorf("[nextCompact] fid not found"))
179 | 	}
180 | 
181 | 	maxToMax := func() {
182 | 		baseTest(t, lsm, 128)
183 | 		fid := lsm.levels.maxFID + 1
184 | 		cd := buildCompactDef(lsm, 6, 6, 6)
185 | 		// 非常tricky的处理方法，为了能通过检查
186 | 		tricky(cd.thisLevel.tables)
187 | 		ok := lsm.levels.fillTables(cd)
188 | 		utils.CondPanic(!ok, fmt.Errorf("[maxToMax] lsm.levels.fillTables(cd) ret == false"))
189 | 		err := lsm.levels.runCompactDef(0, 6, *cd)
190 | 		// 删除全局状态，便于下游测试逻辑
191 | 		lsm.levels.compactState.delete(*cd)
192 | 		utils.Err(err)
193 | 		ok = false
194 | 		for _, t := range lsm.levels.levels[6].tables {
195 | 			if t.fid == fid {
196 | 				ok = true
197 | 			}
198 | 		}
199 | 		utils.CondPanic(!ok, fmt.Errorf("[maxToMax] fid not found"))
200 | 	}
201 | 	parallerCompact := func() {
202 | 		baseTest(t, lsm, 128)
203 | 		cd := buildCompactDef(lsm, 0, 0, 1)
204 | 		// 非常tricky的处理方法，为了能通过检查
205 | 		tricky(cd.thisLevel.tables)
206 | 		ok := lsm.levels.fillTables(cd)
207 | 		utils.CondPanic(!ok, fmt.Errorf("[parallerCompact] lsm.levels.fillTables(cd) ret == false"))
208 | 		// 构建完全相同两个压缩计划的执行，以便于百分比构建 压缩冲突
209 | 		go lsm.levels.runCompactDef(0, 0, *cd)
210 | 		lsm.levels.runCompactDef(0, 0, *cd)
211 | 		// 检查compact status状态查看是否在执行并行压缩
212 | 		isParaller := false
213 | 		for _, state := range lsm.levels.compactState.levels {
214 | 			if len(state.ranges) != 0 {
215 | 				isParaller = true
216 | 			}
217 | 		}
218 | 		utils.CondPanic(!isParaller, fmt.Errorf("[parallerCompact] not is paralle"))
219 | 	}
220 | 	// 运行N次测试多个sst的影响
221 | 	runTest(1, l0TOLMax, l0ToL0, nextCompact, maxToMax, parallerCompact)
222 | }
223 | 
224 | // 正确性测试
225 | func baseTest(t *testing.T, lsm *LSM, n int) {
226 | 	// 用来跟踪调试的
227 | 	e := &utils.Entry{
228 | 		Key:       []byte("CRTS😁硬核课堂MrGSBtL12345678"),
229 | 		Value:     []byte("我草了"),
230 | 		ExpiresAt: 123,
231 | 	}
232 | 	//caseList := make([]*utils.Entry, 0)
233 | 	//caseList = append(caseList, e)
234 | 
235 | 	// 随机构建数据进行测试
236 | 	lsm.Set(e)
237 | 	for i := 1; i < n; i++ {
238 | 		ee := utils.BuildEntry()
239 | 		lsm.Set(ee)
240 | 		// caseList = append(caseList, ee)
241 | 	}
242 | 	// 从levels中进行GET
243 | 	v, err := lsm.Get(e.Key)
244 | 	utils.Panic(err)
245 | 	utils.CondPanic(!bytes.Equal(e.Value, v.Value), fmt.Errorf("lsm.Get(e.Key) value not equal !!!"))
246 | 	// TODO range功能待完善
247 | 	//retList := make([]*utils.Entry, 0)
248 | 	// testRange := func(isAsc bool) {
249 | 	// 	// Range 确保写入进去的每个lsm都可以被读取到
250 | 	// 	iter := lsm.NewIterator(&utils.Options{IsAsc: true})
251 | 	// 	for iter.Rewind(); iter.Valid(); iter.Next() {
252 | 	// 		e := iter.Item().Entry()
253 | 	// 		retList = append(retList, e)
254 | 	// 	}
255 | 	// 	utils.CondPanic(len(retList) != len(caseList), fmt.Errorf("len(retList) != len(caseList)"))
256 | 	// 	sort.Slice(retList, func(i, j int) bool {
257 | 	// 		return utils.CompareKeys(retList[i].Key, retList[j].Key) > 1
258 | 	// 	})
259 | 	// 	for i := 0; i < len(caseList); i++ {
260 | 	// 		a, b := caseList[i], retList[i]
261 | 	// 		if !equal(a.Key, b.Key) || !equal(a.Value, b.Value) || a.ExpiresAt != b.ExpiresAt {
262 | 	// 			utils.Panic(fmt.Errorf("lsm.Get(e.Key) kv disagreement !!!"))
263 | 	// 		}
264 | 	// 	}
265 | 	// }
266 | 	// // 测试升序
267 | 	// testRange(true)
268 | 	// // 测试降序
269 | 	// testRange(false)
270 | }
271 | 
272 | // 驱动模块
273 | func buildLSM() *LSM {
274 | 	// init DB Basic Test
275 | 	c := make(chan map[uint32]int64, 16)
276 | 	opt.DiscardStatsCh = &c
277 | 	lsm := NewLSM(opt)
278 | 	return lsm
279 | }
280 | 
281 | // 运行测试用例
282 | func runTest(n int, testFunList ...func()) {
283 | 	for _, f := range testFunList {
284 | 		for i := 0; i < n; i++ {
285 | 			f()
286 | 		}
287 | 	}
288 | }
289 | 
290 | // 构建compactDef对象
291 | func buildCompactDef(lsm *LSM, id, thisLevel, nextLevel int) *compactDef {
292 | 	t := targets{
293 | 		targetSz:  []int64{0, 10485760, 10485760, 10485760, 10485760, 10485760, 10485760},
294 | 		fileSz:    []int64{1024, 2097152, 2097152, 2097152, 2097152, 2097152, 2097152},
295 | 		baseLevel: nextLevel,
296 | 	}
297 | 	def := &compactDef{
298 | 		compactorId: id,
299 | 		thisLevel:   lsm.levels.levels[thisLevel],
300 | 		nextLevel:   lsm.levels.levels[nextLevel],
301 | 		t:           t,
302 | 		p:           buildCompactionPriority(lsm, thisLevel, t),
303 | 	}
304 | 	return def
305 | }
306 | 
307 | // 构建CompactionPriority对象
308 | func buildCompactionPriority(lsm *LSM, thisLevel int, t targets) compactionPriority {
309 | 	return compactionPriority{
310 | 		level:    thisLevel,
311 | 		score:    8.6,
312 | 		adjusted: 860,
313 | 		t:        t,
314 | 	}
315 | }
316 | 
317 | func tricky(tables []*table) {
318 | 	// 非常tricky的处理方法，为了能通过检查，检查所有逻辑分支
319 | 	for _, table := range tables {
320 | 		table.ss.Indexs().StaleDataSize = 10 << 20
321 | 		t, _ := time.Parse("2006-01-02 15:04:05", "1995-08-10 00:00:00")
322 | 		table.ss.SetCreatedAt(&t)
323 | 	}
324 | }
325 | func clearDir() {
326 | 	_, err := os.Stat(opt.WorkDir)
327 | 	if err == nil {
328 | 		os.RemoveAll(opt.WorkDir)
329 | 	}
330 | 	os.Mkdir(opt.WorkDir, os.ModePerm)
331 | }
332 | 


--------------------------------------------------------------------------------
/db.go:
--------------------------------------------------------------------------------
  1 | package corekv
  2 | 
  3 | import (
  4 | 	"expvar"
  5 | 	"fmt"
  6 | 	"math"
  7 | 	"sync"
  8 | 	"sync/atomic"
  9 | 	"time"
 10 | 
 11 | 	"github.com/hardcore-os/corekv/lsm"
 12 | 	"github.com/hardcore-os/corekv/utils"
 13 | 	"github.com/pkg/errors"
 14 | )
 15 | 
 16 | type (
 17 | 	// coreKV对外提供的功能集合
 18 | 	CoreAPI interface {
 19 | 		Set(data *utils.Entry) error
 20 | 		Get(key []byte) (*utils.Entry, error)
 21 | 		Del(key []byte) error
 22 | 		NewIterator(opt *utils.Options) utils.Iterator
 23 | 		Info() *Stats
 24 | 		Close() error
 25 | 	}
 26 | 
 27 | 	// DB 对外暴露的接口对象 全局唯一，持有各种资源句柄
 28 | 	DB struct {
 29 | 		sync.RWMutex
 30 | 		opt         *Options
 31 | 		lsm         *lsm.LSM
 32 | 		vlog        *valueLog
 33 | 		stats       *Stats
 34 | 		flushChan   chan flushTask // For flushing memtables.
 35 | 		writeCh     chan *request
 36 | 		blockWrites int32
 37 | 		vhead       *utils.ValuePtr
 38 | 		logRotates  int32
 39 | 	}
 40 | )
 41 | 
 42 | var (
 43 | 	head = []byte("!corekv!head") // For storing value offset for replay.
 44 | )
 45 | 
 46 | /**
 47 | SSTableMaxSz:        1024,
 48 | MemTableSize:        1024,
 49 | BlockSize:           1024,
 50 | BloomFalsePositive:  0,
 51 | BaseLevelSize:       10 << 20,
 52 | LevelSizeMultiplier: 10,
 53 | BaseTableSize:       2 << 20,
 54 | TableSizeMultiplier: 2,
 55 | NumLevelZeroTables:  15,
 56 | MaxLevelNum:         7,
 57 | NumCompactors:       3,
 58 | */
 59 | // Open DB
 60 | // TODO 这里是不是要上一个目录锁比较好，防止多个进程打开同一个目录?
 61 | func Open(opt *Options) *DB {
 62 | 	c := utils.NewCloser()
 63 | 	db := &DB{opt: opt}
 64 | 	// 初始化vlog结构
 65 | 	db.initVLog()
 66 | 	// 初始化LSM结构
 67 | 	db.lsm = lsm.NewLSM(&lsm.Options{
 68 | 		WorkDir:             opt.WorkDir,
 69 | 		MemTableSize:        opt.MemTableSize,
 70 | 		SSTableMaxSz:        opt.SSTableMaxSz,
 71 | 		BlockSize:           8 * 1024,
 72 | 		BloomFalsePositive:  0, //0.01,
 73 | 		BaseLevelSize:       10 << 20,
 74 | 		LevelSizeMultiplier: 10,
 75 | 		BaseTableSize:       5 << 20,
 76 | 		TableSizeMultiplier: 2,
 77 | 		NumLevelZeroTables:  15,
 78 | 		MaxLevelNum:         7,
 79 | 		NumCompactors:       1,
 80 | 		DiscardStatsCh:      &(db.vlog.lfDiscardStats.flushChan),
 81 | 	})
 82 | 	// 初始化统计信息
 83 | 	db.stats = newStats(opt)
 84 | 	// 启动 sstable 的合并压缩过程
 85 | 	go db.lsm.StartCompacter()
 86 | 	// 准备vlog gc
 87 | 	c.Add(1)
 88 | 	db.writeCh = make(chan *request)
 89 | 	db.flushChan = make(chan flushTask, 16)
 90 | 	go db.doWrites(c)
 91 | 	// 启动 info 统计过程
 92 | 	go db.stats.StartStats()
 93 | 	return db
 94 | }
 95 | 
 96 | func (db *DB) Close() error {
 97 | 	db.vlog.lfDiscardStats.closer.Close()
 98 | 	if err := db.lsm.Close(); err != nil {
 99 | 		return err
100 | 	}
101 | 	if err := db.vlog.close(); err != nil {
102 | 		return err
103 | 	}
104 | 	if err := db.stats.close(); err != nil {
105 | 		return err
106 | 	}
107 | 	return nil
108 | }
109 | 
110 | func (db *DB) Del(key []byte) error {
111 | 	// 写入一个值为nil的entry 作为墓碑消息实现删除
112 | 	return db.Set(&utils.Entry{
113 | 		Key:       key,
114 | 		Value:     nil,
115 | 		ExpiresAt: 0,
116 | 	})
117 | }
118 | func (db *DB) Set(data *utils.Entry) error {
119 | 	if data == nil || len(data.Key) == 0 {
120 | 		return utils.ErrEmptyKey
121 | 	}
122 | 	// 做一些必要性的检查
123 | 	// 如果value 大于一个阈值 则创建值指针，并将其写入vlog中
124 | 	var (
125 | 		vp  *utils.ValuePtr
126 | 		err error
127 | 	)
128 | 	data.Key = utils.KeyWithTs(data.Key, math.MaxUint32)
129 | 	// 如果value不应该直接写入LSM 则先写入 vlog文件，这时必须保证vlog具有重放功能
130 | 	// 以便于崩溃后恢复数据
131 | 	if !db.shouldWriteValueToLSM(data) {
132 | 		if vp, err = db.vlog.newValuePtr(data); err != nil {
133 | 			return err
134 | 		}
135 | 		data.Meta |= utils.BitValuePointer
136 | 		data.Value = vp.Encode()
137 | 	}
138 | 	return db.lsm.Set(data)
139 | }
140 | func (db *DB) Get(key []byte) (*utils.Entry, error) {
141 | 	if len(key) == 0 {
142 | 		return nil, utils.ErrEmptyKey
143 | 	}
144 | 
145 | 	originKey := key
146 | 	var (
147 | 		entry *utils.Entry
148 | 		err   error
149 | 	)
150 | 	key = utils.KeyWithTs(key, math.MaxUint32)
151 | 	// 从LSM中查询entry，这时不确定entry是不是值指针
152 | 	if entry, err = db.lsm.Get(key); err != nil {
153 | 		return entry, err
154 | 	}
155 | 	// 检查从lsm拿到的value是否是value ptr,是则从vlog中拿值
156 | 	if entry != nil && utils.IsValuePtr(entry) {
157 | 		var vp utils.ValuePtr
158 | 		vp.Decode(entry.Value)
159 | 		result, cb, err := db.vlog.read(&vp)
160 | 		defer utils.RunCallback(cb)
161 | 		if err != nil {
162 | 			return nil, err
163 | 		}
164 | 		entry.Value = utils.SafeCopy(nil, result)
165 | 	}
166 | 
167 | 	if lsm.IsDeletedOrExpired(entry) {
168 | 		return nil, utils.ErrKeyNotFound
169 | 	}
170 | 	entry.Key = originKey
171 | 	return entry, nil
172 | }
173 | 
174 | func (db *DB) Info() *Stats {
175 | 	// 读取stats结构，打包数据并返回
176 | 	return db.stats
177 | }
178 | 
179 | // RunValueLogGC triggers a value log garbage collection.
180 | func (db *DB) RunValueLogGC(discardRatio float64) error {
181 | 	if discardRatio >= 1.0 || discardRatio <= 0.0 {
182 | 		return utils.ErrInvalidRequest
183 | 	}
184 | 	// Find head on disk
185 | 	headKey := utils.KeyWithTs(head, math.MaxUint64)
186 | 	val, err := db.lsm.Get(headKey)
187 | 	if err != nil {
188 | 		if err == utils.ErrKeyNotFound {
189 | 			val = &utils.Entry{
190 | 				Key:   headKey,
191 | 				Value: []byte{},
192 | 			}
193 | 		} else {
194 | 			return errors.Wrap(err, "Retrieving head from on-disk LSM")
195 | 		}
196 | 	}
197 | 
198 | 	// 内部key head 一定是value ptr 不需要检查内容
199 | 	var head utils.ValuePtr
200 | 	if len(val.Value) > 0 {
201 | 		head.Decode(val.Value)
202 | 	}
203 | 
204 | 	// Pick a log file and run GC
205 | 	return db.vlog.runGC(discardRatio, &head)
206 | }
207 | 
208 | func (db *DB) shouldWriteValueToLSM(e *utils.Entry) bool {
209 | 	return int64(len(e.Value)) < db.opt.ValueThreshold
210 | }
211 | 
212 | func (db *DB) sendToWriteCh(entries []*utils.Entry) (*request, error) {
213 | 	if atomic.LoadInt32(&db.blockWrites) == 1 {
214 | 		return nil, utils.ErrBlockedWrites
215 | 	}
216 | 	var count, size int64
217 | 	for _, e := range entries {
218 | 		size += int64(e.EstimateSize(int(db.opt.ValueThreshold)))
219 | 		count++
220 | 	}
221 | 	if count >= db.opt.MaxBatchCount || size >= db.opt.MaxBatchSize {
222 | 		return nil, utils.ErrTxnTooBig
223 | 	}
224 | 
225 | 	// TODO 尝试使用对象复用，后面entry对象也应该使用
226 | 	req := requestPool.Get().(*request)
227 | 	req.reset()
228 | 	req.Entries = entries
229 | 	req.Wg.Add(1)
230 | 	req.IncrRef()     // for db write
231 | 	db.writeCh <- req // Handled in doWrites.
232 | 	return req, nil
233 | }
234 | 
235 | //   Check(kv.BatchSet(entries))
236 | func (db *DB) batchSet(entries []*utils.Entry) error {
237 | 	req, err := db.sendToWriteCh(entries)
238 | 	if err != nil {
239 | 		return err
240 | 	}
241 | 
242 | 	return req.Wait()
243 | }
244 | 
245 | func (db *DB) doWrites(lc *utils.Closer) {
246 | 	defer lc.Done()
247 | 	pendingCh := make(chan struct{}, 1)
248 | 
249 | 	writeRequests := func(reqs []*request) {
250 | 		if err := db.writeRequests(reqs); err != nil {
251 | 			utils.Err(fmt.Errorf("writeRequests: %v", err))
252 | 		}
253 | 		<-pendingCh
254 | 	}
255 | 
256 | 	// This variable tracks the number of pending writes.
257 | 	reqLen := new(expvar.Int)
258 | 
259 | 	reqs := make([]*request, 0, 10)
260 | 	for {
261 | 		var r *request
262 | 		select {
263 | 		case r = <-db.writeCh:
264 | 		case <-lc.CloseSignal:
265 | 			goto closedCase
266 | 		}
267 | 
268 | 		for {
269 | 			reqs = append(reqs, r)
270 | 			reqLen.Set(int64(len(reqs)))
271 | 
272 | 			if len(reqs) >= 3*utils.KVWriteChCapacity {
273 | 				pendingCh <- struct{}{} // blocking.
274 | 				goto writeCase
275 | 			}
276 | 
277 | 			select {
278 | 			// Either push to pending, or continue to pick from writeCh.
279 | 			case r = <-db.writeCh:
280 | 			case pendingCh <- struct{}{}:
281 | 				goto writeCase
282 | 			case <-lc.CloseSignal:
283 | 				goto closedCase
284 | 			}
285 | 		}
286 | 
287 | 	closedCase:
288 | 		// All the pending request are drained.
289 | 		// Don't close the writeCh, because it has be used in several places.
290 | 		for {
291 | 			select {
292 | 			case r = <-db.writeCh:
293 | 				reqs = append(reqs, r)
294 | 			default:
295 | 				pendingCh <- struct{}{} // Push to pending before doing a write.
296 | 				writeRequests(reqs)
297 | 				return
298 | 			}
299 | 		}
300 | 
301 | 	writeCase:
302 | 		go writeRequests(reqs)
303 | 		reqs = make([]*request, 0, 10)
304 | 		reqLen.Set(0)
305 | 	}
306 | }
307 | 
308 | // writeRequests is called serially by only one goroutine.
309 | func (db *DB) writeRequests(reqs []*request) error {
310 | 	if len(reqs) == 0 {
311 | 		return nil
312 | 	}
313 | 
314 | 	done := func(err error) {
315 | 		for _, r := range reqs {
316 | 			r.Err = err
317 | 			r.Wg.Done()
318 | 		}
319 | 	}
320 | 	err := db.vlog.write(reqs)
321 | 	if err != nil {
322 | 		done(err)
323 | 		return err
324 | 	}
325 | 	var count int
326 | 	for _, b := range reqs {
327 | 		if len(b.Entries) == 0 {
328 | 			continue
329 | 		}
330 | 		count += len(b.Entries)
331 | 		if err != nil {
332 | 			done(err)
333 | 			return errors.Wrap(err, "writeRequests")
334 | 		}
335 | 		if err := db.writeToLSM(b); err != nil {
336 | 			done(err)
337 | 			return errors.Wrap(err, "writeRequests")
338 | 		}
339 | 		db.Lock()
340 | 		db.updateHead(b.Ptrs)
341 | 		db.Unlock()
342 | 	}
343 | 	done(nil)
344 | 	return nil
345 | }
346 | func (db *DB) writeToLSM(b *request) error {
347 | 	if len(b.Ptrs) != len(b.Entries) {
348 | 		return errors.Errorf("Ptrs and Entries don't match: %+v", b)
349 | 	}
350 | 
351 | 	for i, entry := range b.Entries {
352 | 		if db.shouldWriteValueToLSM(entry) { // Will include deletion / tombstone case.
353 | 			entry.Meta = entry.Meta &^ utils.BitValuePointer
354 | 		} else {
355 | 			entry.Meta = entry.Meta | utils.BitValuePointer
356 | 			entry.Value = b.Ptrs[i].Encode()
357 | 		}
358 | 		db.lsm.Set(entry)
359 | 	}
360 | 	return nil
361 | }
362 | func (req *request) IncrRef() {
363 | 	atomic.AddInt32(&req.ref, 1)
364 | }
365 | 
366 | func (req *request) DecrRef() {
367 | 	nRef := atomic.AddInt32(&req.ref, -1)
368 | 	if nRef > 0 {
369 | 		return
370 | 	}
371 | 	req.Entries = nil
372 | 	requestPool.Put(req)
373 | }
374 | 
375 | func (req *request) Wait() error {
376 | 	req.Wg.Wait()
377 | 	err := req.Err
378 | 	req.DecrRef() // DecrRef after writing to DB.
379 | 	return err
380 | }
381 | 
382 | // 结构体
383 | type flushTask struct {
384 | 	mt           *utils.Skiplist
385 | 	vptr         *utils.ValuePtr
386 | 	dropPrefixes [][]byte
387 | }
388 | 
389 | func (db *DB) pushHead(ft flushTask) error {
390 | 	// Ensure we never push a zero valued head pointer.
391 | 	if ft.vptr.IsZero() {
392 | 		return errors.New("Head should not be zero")
393 | 	}
394 | 
395 | 	fmt.Printf("Storing value log head: %+v\n", ft.vptr)
396 | 	val := ft.vptr.Encode()
397 | 
398 | 	// Pick the max commit ts, so in case of crash, our read ts would be higher than all the
399 | 	// commits.
400 | 	headTs := utils.KeyWithTs(head, uint64(time.Now().Unix()/1e9))
401 | 	ft.mt.Add(&utils.Entry{
402 | 		Key:   headTs,
403 | 		Value: val,
404 | 	})
405 | 	return nil
406 | }
407 | 


--------------------------------------------------------------------------------
/lsm/levels.go:
--------------------------------------------------------------------------------
  1 | package lsm
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"sort"
  6 | 	"sync"
  7 | 	"sync/atomic"
  8 | 
  9 | 	"github.com/hardcore-os/corekv/file"
 10 | 	"github.com/hardcore-os/corekv/utils"
 11 | )
 12 | 
 13 | // initLevelManager 初始化函数
 14 | func (lsm *LSM) initLevelManager(opt *Options) *levelManager {
 15 | 	lm := &levelManager{lsm: lsm} // 反引用
 16 | 	lm.compactState = lsm.newCompactStatus()
 17 | 	lm.opt = opt
 18 | 	// 读取manifest文件构建管理器
 19 | 	if err := lm.loadManifest(); err != nil {
 20 | 		panic(err)
 21 | 	}
 22 | 	lm.build()
 23 | 	return lm
 24 | }
 25 | 
 26 | type levelManager struct {
 27 | 	maxFID       uint64 // 已经分配出去的最大fid，只要创建了memtable 就算已分配
 28 | 	opt          *Options
 29 | 	cache        *cache
 30 | 	manifestFile *file.ManifestFile
 31 | 	levels       []*levelHandler
 32 | 	lsm          *LSM
 33 | 	compactState *compactStatus
 34 | }
 35 | 
 36 | func (lm *levelManager) close() error {
 37 | 	if err := lm.cache.close(); err != nil {
 38 | 		return err
 39 | 	}
 40 | 	if err := lm.manifestFile.Close(); err != nil {
 41 | 		return err
 42 | 	}
 43 | 	for i := range lm.levels {
 44 | 		if err := lm.levels[i].close(); err != nil {
 45 | 			return err
 46 | 		}
 47 | 	}
 48 | 	return nil
 49 | }
 50 | 
 51 | func (lm *levelManager) iterators() []utils.Iterator {
 52 | 
 53 | 	itrs := make([]utils.Iterator, 0, len(lm.levels))
 54 | 	for _, level := range lm.levels {
 55 | 		itrs = append(itrs, level.iterators()...)
 56 | 	}
 57 | 	return itrs
 58 | }
 59 | 
 60 | func (lm *levelManager) Get(key []byte) (*utils.Entry, error) {
 61 | 	var (
 62 | 		entry *utils.Entry
 63 | 		err   error
 64 | 	)
 65 | 	// L0层查询
 66 | 	if entry, err = lm.levels[0].Get(key); entry != nil {
 67 | 		return entry, err
 68 | 	}
 69 | 	// L1-7层查询
 70 | 	for level := 1; level < lm.opt.MaxLevelNum; level++ {
 71 | 		ld := lm.levels[level]
 72 | 		if entry, err = ld.Get(key); entry != nil {
 73 | 			return entry, err
 74 | 		}
 75 | 	}
 76 | 	return entry, utils.ErrKeyNotFound
 77 | }
 78 | 
 79 | func (lm *levelManager) loadCache() {
 80 | 
 81 | }
 82 | func (lm *levelManager) loadManifest() (err error) {
 83 | 	lm.manifestFile, err = file.OpenManifestFile(&file.Options{Dir: lm.opt.WorkDir})
 84 | 	return err
 85 | }
 86 | func (lm *levelManager) build() error {
 87 | 	lm.levels = make([]*levelHandler, 0, lm.opt.MaxLevelNum)
 88 | 	for i := 0; i < lm.opt.MaxLevelNum; i++ {
 89 | 		lm.levels = append(lm.levels, &levelHandler{
 90 | 			levelNum: i,
 91 | 			tables:   make([]*table, 0),
 92 | 			lm:       lm,
 93 | 		})
 94 | 	}
 95 | 
 96 | 	manifest := lm.manifestFile.GetManifest()
 97 | 	// 对比manifest 文件的正确性
 98 | 	if err := lm.manifestFile.RevertToManifest(utils.LoadIDMap(lm.opt.WorkDir)); err != nil {
 99 | 		return err
100 | 	}
101 | 	// 逐一加载sstable 的index block 构建cache
102 | 	lm.cache = newCache(lm.opt)
103 | 	// TODO 初始化的时候index 结构放在了table中，相当于全部加载到了内存，减少了一次读磁盘，但增加了内存消耗
104 | 	var maxFID uint64
105 | 	for fID, tableInfo := range manifest.Tables {
106 | 		fileName := utils.FileNameSSTable(lm.opt.WorkDir, fID)
107 | 		if fID > maxFID {
108 | 			maxFID = fID
109 | 		}
110 | 		t := openTable(lm, fileName, nil)
111 | 		lm.levels[tableInfo.Level].add(t)
112 | 		lm.levels[tableInfo.Level].addSize(t) // 记录一个level的文件总大小
113 | 	}
114 | 	// 对每一层进行排序
115 | 	for i := 0; i < lm.opt.MaxLevelNum; i++ {
116 | 		lm.levels[i].Sort()
117 | 	}
118 | 	// 得到最大的fid值
119 | 	atomic.AddUint64(&lm.maxFID, maxFID)
120 | 	return nil
121 | }
122 | 
123 | // 向L0层flush一个sstable
124 | func (lm *levelManager) flush(immutable *memTable) (err error) {
125 | 	// 分配一个fid
126 | 	fid := immutable.wal.Fid()
127 | 	sstName := utils.FileNameSSTable(lm.opt.WorkDir, fid)
128 | 
129 | 	// 构建一个 builder
130 | 	builder := newTableBuiler(lm.opt)
131 | 	iter := immutable.sl.NewSkipListIterator()
132 | 	for iter.Rewind(); iter.Valid(); iter.Next() {
133 | 		entry := iter.Item().Entry()
134 | 		builder.add(entry, false)
135 | 	}
136 | 	// 创建一个 table 对象
137 | 	table := openTable(lm, sstName, builder)
138 | 	err = lm.manifestFile.AddTableMeta(0, &file.TableMeta{
139 | 		ID:       fid,
140 | 		Checksum: []byte{'m', 'o', 'c', 'k'},
141 | 	})
142 | 	// manifest写入失败直接panic
143 | 	utils.Panic(err)
144 | 	// 更新manifest文件
145 | 	lm.levels[0].add(table)
146 | 	return
147 | }
148 | 
149 | //--------- level处理器 -------
150 | type levelHandler struct {
151 | 	sync.RWMutex
152 | 	levelNum       int
153 | 	tables         []*table
154 | 	totalSize      int64
155 | 	totalStaleSize int64
156 | 	lm             *levelManager
157 | }
158 | 
159 | func (lh *levelHandler) close() error {
160 | 	for i := range lh.tables {
161 | 		if err := lh.tables[i].ss.Close(); err != nil {
162 | 			return err
163 | 		}
164 | 	}
165 | 	return nil
166 | }
167 | func (lh *levelHandler) add(t *table) {
168 | 	lh.Lock()
169 | 	defer lh.Unlock()
170 | 	lh.tables = append(lh.tables, t)
171 | }
172 | func (lh *levelHandler) addBatch(ts []*table) {
173 | 	lh.Lock()
174 | 	defer lh.Unlock()
175 | 	lh.tables = append(lh.tables, ts...)
176 | }
177 | 
178 | func (lh *levelHandler) getTotalSize() int64 {
179 | 	lh.RLock()
180 | 	defer lh.RUnlock()
181 | 	return lh.totalSize
182 | }
183 | 
184 | func (lh *levelHandler) addSize(t *table) {
185 | 	lh.totalSize += t.Size()
186 | 	lh.totalStaleSize += int64(t.StaleDataSize())
187 | }
188 | 
189 | func (lh *levelHandler) subtractSize(t *table) {
190 | 	lh.totalSize -= t.Size()
191 | 	lh.totalStaleSize -= int64(t.StaleDataSize())
192 | }
193 | 
194 | func (lh *levelHandler) numTables() int {
195 | 	lh.RLock()
196 | 	defer lh.RUnlock()
197 | 	return len(lh.tables)
198 | }
199 | 
200 | func (lh *levelHandler) Get(key []byte) (*utils.Entry, error) {
201 | 	// 如果是第0层文件则进行特殊处理
202 | 	if lh.levelNum == 0 {
203 | 		// TODO: logic...
204 | 		// 获取可能存在key的sst
205 | 		return lh.searchL0SST(key)
206 | 	} else {
207 | 		// TODO: logic...
208 | 		return lh.searchLNSST(key)
209 | 	}
210 | }
211 | 
212 | func (lh *levelHandler) Sort() {
213 | 	lh.Lock()
214 | 	defer lh.Unlock()
215 | 	if lh.levelNum == 0 {
216 | 		// Key range will overlap. Just sort by fileID in ascending order
217 | 		// because newer tables are at the end of level 0.
218 | 		sort.Slice(lh.tables, func(i, j int) bool {
219 | 			return lh.tables[i].fid < lh.tables[j].fid
220 | 		})
221 | 	} else {
222 | 		// Sort tables by keys.
223 | 		sort.Slice(lh.tables, func(i, j int) bool {
224 | 			return utils.CompareKeys(lh.tables[i].ss.MinKey(), lh.tables[j].ss.MinKey()) < 0
225 | 		})
226 | 	}
227 | }
228 | 
229 | func (lh *levelHandler) searchL0SST(key []byte) (*utils.Entry, error) {
230 | 	var version uint64
231 | 	for _, table := range lh.tables {
232 | 		if entry, err := table.Serach(key, &version); err == nil {
233 | 			return entry, nil
234 | 		}
235 | 	}
236 | 	return nil, utils.ErrKeyNotFound
237 | }
238 | func (lh *levelHandler) searchLNSST(key []byte) (*utils.Entry, error) {
239 | 	table := lh.getTable(key)
240 | 	var version uint64
241 | 	if table == nil {
242 | 		return nil, utils.ErrKeyNotFound
243 | 	}
244 | 	if entry, err := table.Serach(key, &version); err == nil {
245 | 		return entry, nil
246 | 	}
247 | 	return nil, utils.ErrKeyNotFound
248 | }
249 | func (lh *levelHandler) getTable(key []byte) *table {
250 | 	if len(lh.tables) > 0 && (bytes.Compare(key, lh.tables[0].ss.MinKey()) < 0 || bytes.Compare(key, lh.tables[len(lh.tables)-1].ss.MaxKey()) > 0) {
251 | 		return nil
252 | 	} else {
253 | 		for i := len(lh.tables) - 1; i >= 0; i-- {
254 | 			if bytes.Compare(key, lh.tables[i].ss.MinKey()) > -1 &&
255 | 				bytes.Compare(key, lh.tables[i].ss.MaxKey()) < 1 {
256 | 				return lh.tables[i]
257 | 			}
258 | 		}
259 | 	}
260 | 	return nil
261 | }
262 | func (lh *levelHandler) isLastLevel() bool {
263 | 	return lh.levelNum == lh.lm.opt.MaxLevelNum-1
264 | }
265 | 
266 | type levelHandlerRLocked struct{}
267 | 
268 | // overlappingTables returns the tables that intersect with key range. Returns a half-interval.
269 | // This function should already have acquired a read lock, and this is so important the caller must
270 | // pass an empty parameter declaring such.
271 | func (lh *levelHandler) overlappingTables(_ levelHandlerRLocked, kr keyRange) (int, int) {
272 | 	if len(kr.left) == 0 || len(kr.right) == 0 {
273 | 		return 0, 0
274 | 	}
275 | 	left := sort.Search(len(lh.tables), func(i int) bool {
276 | 		return utils.CompareKeys(kr.left, lh.tables[i].ss.MaxKey()) <= 0
277 | 	})
278 | 	right := sort.Search(len(lh.tables), func(i int) bool {
279 | 		return utils.CompareKeys(kr.right, lh.tables[i].ss.MaxKey()) < 0
280 | 	})
281 | 	return left, right
282 | }
283 | 
284 | // replaceTables will replace tables[left:right] with newTables. Note this EXCLUDES tables[right].
285 | // You must call decr() to delete the old tables _after_ writing the update to the manifest.
286 | func (lh *levelHandler) replaceTables(toDel, toAdd []*table) error {
287 | 	// Need to re-search the range of tables in this level to be replaced as other goroutines might
288 | 	// be changing it as well.  (They can't touch our tables, but if they add/remove other tables,
289 | 	// the indices get shifted around.)
290 | 	lh.Lock() // We s.Unlock() below.
291 | 
292 | 	toDelMap := make(map[uint64]struct{})
293 | 	for _, t := range toDel {
294 | 		toDelMap[t.fid] = struct{}{}
295 | 	}
296 | 	var newTables []*table
297 | 	for _, t := range lh.tables {
298 | 		_, found := toDelMap[t.fid]
299 | 		if !found {
300 | 			newTables = append(newTables, t)
301 | 			continue
302 | 		}
303 | 		lh.subtractSize(t)
304 | 	}
305 | 
306 | 	// Increase totalSize first.
307 | 	for _, t := range toAdd {
308 | 		lh.addSize(t)
309 | 		t.IncrRef()
310 | 		newTables = append(newTables, t)
311 | 	}
312 | 
313 | 	// Assign tables.
314 | 	lh.tables = newTables
315 | 	sort.Slice(lh.tables, func(i, j int) bool {
316 | 		return utils.CompareKeys(lh.tables[i].ss.MinKey(), lh.tables[i].ss.MinKey()) < 0
317 | 	})
318 | 	lh.Unlock() // s.Unlock before we DecrRef tables -- that can be slow.
319 | 	return decrRefs(toDel)
320 | }
321 | 
322 | // deleteTables remove tables idx0, ..., idx1-1.
323 | func (lh *levelHandler) deleteTables(toDel []*table) error {
324 | 	lh.Lock() // s.Unlock() below
325 | 
326 | 	toDelMap := make(map[uint64]struct{})
327 | 	for _, t := range toDel {
328 | 		toDelMap[t.fid] = struct{}{}
329 | 	}
330 | 
331 | 	// Make a copy as iterators might be keeping a slice of tables.
332 | 	var newTables []*table
333 | 	for _, t := range lh.tables {
334 | 		_, found := toDelMap[t.fid]
335 | 		if !found {
336 | 			newTables = append(newTables, t)
337 | 			continue
338 | 		}
339 | 		lh.subtractSize(t)
340 | 	}
341 | 	lh.tables = newTables
342 | 
343 | 	lh.Unlock() // Unlock s _before_ we DecrRef our tables, which can be slow.
344 | 
345 | 	return decrRefs(toDel)
346 | }
347 | 
348 | func (lh *levelHandler) iterators() []utils.Iterator {
349 | 	lh.RLock()
350 | 	defer lh.RUnlock()
351 | 	topt := &utils.Options{IsAsc: true}
352 | 	if lh.levelNum == 0 {
353 | 		return iteratorsReversed(lh.tables, topt)
354 | 	}
355 | 
356 | 	if len(lh.tables) == 0 {
357 | 		return nil
358 | 	}
359 | 	return []utils.Iterator{NewConcatIterator(lh.tables, topt)}
360 | }
361 | 


--------------------------------------------------------------------------------
/lsm/table.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package lsm
 16 | 
 17 | import (
 18 | 	"encoding/binary"
 19 | 	"fmt"
 20 | 	"io"
 21 | 	"math"
 22 | 	"os"
 23 | 	"sort"
 24 | 	"strings"
 25 | 	"sync/atomic"
 26 | 	"time"
 27 | 
 28 | 	"github.com/hardcore-os/corekv/file"
 29 | 	"github.com/hardcore-os/corekv/pb"
 30 | 	"github.com/hardcore-os/corekv/utils"
 31 | 	"github.com/pkg/errors"
 32 | )
 33 | 
 34 | type table struct {
 35 | 	ss  *file.SSTable
 36 | 	lm  *levelManager
 37 | 	fid uint64
 38 | 	ref int32 // For file garbage collection. Atomic.
 39 | }
 40 | 
 41 | func openTable(lm *levelManager, tableName string, builder *tableBuilder) *table {
 42 | 	sstSize := int(lm.opt.SSTableMaxSz)
 43 | 	if builder != nil {
 44 | 		sstSize = int(builder.done().size)
 45 | 	}
 46 | 	var (
 47 | 		t   *table
 48 | 		err error
 49 | 	)
 50 | 	fid := utils.FID(tableName)
 51 | 	// 对builder存在的情况 把buf flush到磁盘
 52 | 	if builder != nil {
 53 | 		if t, err = builder.flush(lm, tableName); err != nil {
 54 | 			utils.Err(err)
 55 | 			return nil
 56 | 		}
 57 | 	} else {
 58 | 		t = &table{lm: lm, fid: fid}
 59 | 		// 如果没有builder 则创打开一个已经存在的sst文件
 60 | 		t.ss = file.OpenSStable(&file.Options{
 61 | 			FileName: tableName,
 62 | 			Dir:      lm.opt.WorkDir,
 63 | 			Flag:     os.O_CREATE | os.O_RDWR,
 64 | 			MaxSz:    int(sstSize)})
 65 | 	}
 66 | 	// 先要引用一下，否则后面使用迭代器会导致引用状态错误
 67 | 	t.IncrRef()
 68 | 	//  初始化sst文件，把index加载进来
 69 | 	if err := t.ss.Init(); err != nil {
 70 | 		utils.Err(err)
 71 | 		return nil
 72 | 	}
 73 | 
 74 | 	// 获取sst的最大key 需要使用迭代器
 75 | 	itr := t.NewIterator(&utils.Options{}) // 默认是降序
 76 | 	defer itr.Close()
 77 | 	// 定位到初始位置就是最大的key
 78 | 	itr.Rewind()
 79 | 	utils.CondPanic(!itr.Valid(), errors.Errorf("failed to read index, form maxKey"))
 80 | 	maxKey := itr.Item().Entry().Key
 81 | 	t.ss.SetMaxKey(maxKey)
 82 | 
 83 | 	return t
 84 | }
 85 | 
 86 | // Serach 从table中查找key
 87 | func (t *table) Serach(key []byte, maxVs *uint64) (entry *utils.Entry, err error) {
 88 | 	t.IncrRef()
 89 | 	defer t.DecrRef()
 90 | 	// 获取索引
 91 | 	idx := t.ss.Indexs()
 92 | 	// 检查key是否存在
 93 | 	bloomFilter := utils.Filter(idx.BloomFilter)
 94 | 	if t.ss.HasBloomFilter() && !bloomFilter.MayContainKey(key) {
 95 | 		return nil, utils.ErrKeyNotFound
 96 | 	}
 97 | 	iter := t.NewIterator(&utils.Options{})
 98 | 	defer iter.Close()
 99 | 
100 | 	iter.Seek(key)
101 | 	if !iter.Valid() {
102 | 		return nil, utils.ErrKeyNotFound
103 | 	}
104 | 
105 | 	if utils.SameKey(key, iter.Item().Entry().Key) {
106 | 		if version := utils.ParseTs(iter.Item().Entry().Key); *maxVs < version {
107 | 			*maxVs = version
108 | 			return iter.Item().Entry(), nil
109 | 		}
110 | 	}
111 | 	return nil, utils.ErrKeyNotFound
112 | }
113 | 
114 | func (t *table) indexKey() uint64 {
115 | 	return t.fid
116 | }
117 | func (t *table) getEntry(key, block []byte, idx int) (entry *utils.Entry, err error) {
118 | 	if len(block) == 0 {
119 | 		return nil, utils.ErrKeyNotFound
120 | 	}
121 | 	dataStr := string(block)
122 | 	blocks := strings.Split(dataStr, ",")
123 | 	if idx >= 0 && idx < len(blocks) {
124 | 		return &utils.Entry{
125 | 			Key:   key,
126 | 			Value: []byte(blocks[idx]),
127 | 		}, nil
128 | 	}
129 | 	return nil, utils.ErrKeyNotFound
130 | }
131 | 
132 | // 去加载sst对应的block
133 | func (t *table) block(idx int) (*block, error) {
134 | 	utils.CondPanic(idx < 0, fmt.Errorf("idx=%d", idx))
135 | 	if idx >= len(t.ss.Indexs().Offsets) {
136 | 		return nil, errors.New("block out of index")
137 | 	}
138 | 	var b *block
139 | 	key := t.blockCacheKey(idx)
140 | 	blk, ok := t.lm.cache.blocks.Get(key)
141 | 	if ok && blk != nil {
142 | 		b, _ = blk.(*block)
143 | 		return b, nil
144 | 	}
145 | 
146 | 	var ko pb.BlockOffset
147 | 	utils.CondPanic(!t.offsets(&ko, idx), fmt.Errorf("block t.offset id=%d", idx))
148 | 	b = &block{
149 | 		offset: int(ko.GetOffset()),
150 | 	}
151 | 
152 | 	var err error
153 | 	if b.data, err = t.read(b.offset, int(ko.GetLen())); err != nil {
154 | 		return nil, errors.Wrapf(err,
155 | 			"failed to read from sstable: %d at offset: %d, len: %d",
156 | 			t.ss.FID(), b.offset, ko.GetLen())
157 | 	}
158 | 
159 | 	readPos := len(b.data) - 4 // First read checksum length.
160 | 	b.chkLen = int(utils.BytesToU32(b.data[readPos : readPos+4]))
161 | 
162 | 	if b.chkLen > len(b.data) {
163 | 		return nil, errors.New("invalid checksum length. Either the data is " +
164 | 			"corrupted or the table options are incorrectly set")
165 | 	}
166 | 
167 | 	readPos -= b.chkLen
168 | 	b.checksum = b.data[readPos : readPos+b.chkLen]
169 | 
170 | 	b.data = b.data[:readPos]
171 | 
172 | 	if err = b.verifyCheckSum(); err != nil {
173 | 		return nil, err
174 | 	}
175 | 
176 | 	readPos -= 4
177 | 	numEntries := int(utils.BytesToU32(b.data[readPos : readPos+4]))
178 | 	entriesIndexStart := readPos - (numEntries * 4)
179 | 	entriesIndexEnd := entriesIndexStart + numEntries*4
180 | 
181 | 	b.entryOffsets = utils.BytesToU32Slice(b.data[entriesIndexStart:entriesIndexEnd])
182 | 
183 | 	b.entriesIndexStart = entriesIndexStart
184 | 
185 | 	t.lm.cache.blocks.Set(key, b)
186 | 
187 | 	return b, nil
188 | }
189 | 
190 | func (t *table) read(off, sz int) ([]byte, error) {
191 | 	return t.ss.Bytes(off, sz)
192 | }
193 | 
194 | // blockCacheKey is used to store blocks in the block cache.
195 | func (t *table) blockCacheKey(idx int) []byte {
196 | 	utils.CondPanic(t.fid >= math.MaxUint32, fmt.Errorf("t.fid >= math.MaxUint32"))
197 | 	utils.CondPanic(uint32(idx) >= math.MaxUint32, fmt.Errorf("uint32(idx) >=  math.MaxUint32"))
198 | 
199 | 	buf := make([]byte, 8)
200 | 	// Assume t.ID does not overflow uint32.
201 | 	binary.BigEndian.PutUint32(buf[:4], uint32(t.fid))
202 | 	binary.BigEndian.PutUint32(buf[4:], uint32(idx))
203 | 	return buf
204 | }
205 | 
206 | type tableIterator struct {
207 | 	it       utils.Item
208 | 	opt      *utils.Options
209 | 	t        *table
210 | 	blockPos int
211 | 	bi       *blockIterator
212 | 	err      error
213 | }
214 | 
215 | func (t *table) NewIterator(options *utils.Options) utils.Iterator {
216 | 	t.IncrRef()
217 | 	return &tableIterator{
218 | 		opt: options,
219 | 		t:   t,
220 | 		bi:  &blockIterator{},
221 | 	}
222 | }
223 | func (it *tableIterator) Next() {
224 | 	it.err = nil
225 | 
226 | 	if it.blockPos >= len(it.t.ss.Indexs().GetOffsets()) {
227 | 		it.err = io.EOF
228 | 		return
229 | 	}
230 | 
231 | 	if len(it.bi.data) == 0 {
232 | 		block, err := it.t.block(it.blockPos)
233 | 		if err != nil {
234 | 			it.err = err
235 | 			return
236 | 		}
237 | 		it.bi.tableID = it.t.fid
238 | 		it.bi.blockID = it.blockPos
239 | 		it.bi.setBlock(block)
240 | 		it.bi.seekToFirst()
241 | 		it.err = it.bi.Error()
242 | 		return
243 | 	}
244 | 
245 | 	it.bi.Next()
246 | 	if !it.bi.Valid() {
247 | 		it.blockPos++
248 | 		it.bi.data = nil
249 | 		it.Next()
250 | 		return
251 | 	}
252 | 	it.it = it.bi.it
253 | }
254 | func (it *tableIterator) Valid() bool {
255 | 	return it.err != io.EOF // 如果没有的时候 则是EOF
256 | }
257 | func (it *tableIterator) Rewind() {
258 | 	if it.opt.IsAsc {
259 | 		it.seekToFirst()
260 | 	} else {
261 | 		it.seekToLast()
262 | 	}
263 | }
264 | func (it *tableIterator) Item() utils.Item {
265 | 	return it.it
266 | }
267 | func (it *tableIterator) Close() error {
268 | 	it.bi.Close()
269 | 	return it.t.DecrRef()
270 | }
271 | func (it *tableIterator) seekToFirst() {
272 | 	numBlocks := len(it.t.ss.Indexs().Offsets)
273 | 	if numBlocks == 0 {
274 | 		it.err = io.EOF
275 | 		return
276 | 	}
277 | 	it.blockPos = 0
278 | 	block, err := it.t.block(it.blockPos)
279 | 	if err != nil {
280 | 		it.err = err
281 | 		return
282 | 	}
283 | 	it.bi.tableID = it.t.fid
284 | 	it.bi.blockID = it.blockPos
285 | 	it.bi.setBlock(block)
286 | 	it.bi.seekToFirst()
287 | 	it.it = it.bi.Item()
288 | 	it.err = it.bi.Error()
289 | }
290 | 
291 | func (it *tableIterator) seekToLast() {
292 | 	numBlocks := len(it.t.ss.Indexs().Offsets)
293 | 	if numBlocks == 0 {
294 | 		it.err = io.EOF
295 | 		return
296 | 	}
297 | 	it.blockPos = numBlocks - 1
298 | 	block, err := it.t.block(it.blockPos)
299 | 	if err != nil {
300 | 		it.err = err
301 | 		return
302 | 	}
303 | 	it.bi.tableID = it.t.fid
304 | 	it.bi.blockID = it.blockPos
305 | 	it.bi.setBlock(block)
306 | 	it.bi.seekToLast()
307 | 	it.it = it.bi.Item()
308 | 	it.err = it.bi.Error()
309 | }
310 | 
311 | // Seek
312 | // 二分法搜索 offsets
313 | // 如果idx == 0 说明key只能在第一个block中 block[0].MinKey <= key
314 | // 否则 block[0].MinKey > key
315 | // 如果在 idx-1 的block中未找到key 那才可能在 idx 中
316 | // 如果都没有，则当前key不再此table
317 | func (it *tableIterator) Seek(key []byte) {
318 | 	var ko pb.BlockOffset
319 | 	idx := sort.Search(len(it.t.ss.Indexs().GetOffsets()), func(idx int) bool {
320 | 		utils.CondPanic(!it.t.offsets(&ko, idx), fmt.Errorf("tableutils.Seek idx < 0 || idx > len(index.GetOffsets()"))
321 | 		if idx == len(it.t.ss.Indexs().GetOffsets()) {
322 | 			return true
323 | 		}
324 | 		return utils.CompareKeys(ko.GetKey(), key) > 0
325 | 	})
326 | 	if idx == 0 {
327 | 		it.seekHelper(0, key)
328 | 		return
329 | 	}
330 | 	it.seekHelper(idx-1, key)
331 | }
332 | 
333 | func (it *tableIterator) seekHelper(blockIdx int, key []byte) {
334 | 	it.blockPos = blockIdx
335 | 	block, err := it.t.block(blockIdx)
336 | 	if err != nil {
337 | 		it.err = err
338 | 		return
339 | 	}
340 | 	it.bi.tableID = it.t.fid
341 | 	it.bi.blockID = it.blockPos
342 | 	it.bi.setBlock(block)
343 | 	it.bi.seek(key)
344 | 	it.err = it.bi.Error()
345 | 	it.it = it.bi.Item()
346 | }
347 | 
348 | func (t *table) offsets(ko *pb.BlockOffset, i int) bool {
349 | 	index := t.ss.Indexs()
350 | 	if i < 0 || i > len(index.GetOffsets()) {
351 | 		return false
352 | 	}
353 | 	if i == len(index.GetOffsets()) {
354 | 		return true
355 | 	}
356 | 	*ko = *index.GetOffsets()[i]
357 | 	return true
358 | }
359 | 
360 | // Size is its file size in bytes
361 | func (t *table) Size() int64 { return int64(t.ss.Size()) }
362 | 
363 | // GetCreatedAt
364 | func (t *table) GetCreatedAt() *time.Time {
365 | 	return t.ss.GetCreatedAt()
366 | }
367 | func (t *table) Delete() error {
368 | 	return t.ss.Detele()
369 | }
370 | 
371 | // StaleDataSize is the amount of stale data (that can be dropped by a compaction )in this SST.
372 | func (t *table) StaleDataSize() uint32 { return t.ss.Indexs().StaleDataSize }
373 | 
374 | // DecrRef decrements the refcount and possibly deletes the table
375 | func (t *table) DecrRef() error {
376 | 	newRef := atomic.AddInt32(&t.ref, -1)
377 | 	if newRef == 0 {
378 | 		// TODO 从缓存中删除
379 | 		for i := 0; i < len(t.ss.Indexs().GetOffsets()); i++ {
380 | 			t.lm.cache.blocks.Del(t.blockCacheKey(i))
381 | 		}
382 | 		if err := t.Delete(); err != nil {
383 | 			return err
384 | 		}
385 | 	}
386 | 	return nil
387 | }
388 | 
389 | func (t *table) IncrRef() {
390 | 	atomic.AddInt32(&t.ref, 1)
391 | }
392 | func decrRefs(tables []*table) error {
393 | 	for _, table := range tables {
394 | 		if err := table.DecrRef(); err != nil {
395 | 			return err
396 | 		}
397 | 	}
398 | 	return nil
399 | }
400 | 


--------------------------------------------------------------------------------
/lsm/iterator.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 logicrec Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package lsm
 16 | 
 17 | import (
 18 | 	"bytes"
 19 | 	"fmt"
 20 | 	"sort"
 21 | 
 22 | 	"github.com/hardcore-os/corekv/utils"
 23 | )
 24 | 
 25 | type Iterator struct {
 26 | 	it    Item
 27 | 	iters []utils.Iterator
 28 | }
 29 | type Item struct {
 30 | 	e *utils.Entry
 31 | }
 32 | 
 33 | func (it *Item) Entry() *utils.Entry {
 34 | 	return it.e
 35 | }
 36 | 
 37 | // 创建迭代器
 38 | func (lsm *LSM) NewIterators(opt *utils.Options) []utils.Iterator {
 39 | 	iter := &Iterator{}
 40 | 	iter.iters = make([]utils.Iterator, 0)
 41 | 	iter.iters = append(iter.iters, lsm.memTable.NewIterator(opt))
 42 | 	for _, imm := range lsm.immutables {
 43 | 		iter.iters = append(iter.iters, imm.NewIterator(opt))
 44 | 	}
 45 | 	iter.iters = append(iter.iters, lsm.levels.iterators()...)
 46 | 	return iter.iters
 47 | }
 48 | func (iter *Iterator) Next() {
 49 | 	iter.iters[0].Next()
 50 | }
 51 | func (iter *Iterator) Valid() bool {
 52 | 	return iter.iters[0].Valid()
 53 | }
 54 | func (iter *Iterator) Rewind() {
 55 | 	iter.iters[0].Rewind()
 56 | }
 57 | func (iter *Iterator) Item() utils.Item {
 58 | 	return iter.iters[0].Item()
 59 | }
 60 | func (iter *Iterator) Close() error {
 61 | 	return nil
 62 | }
 63 | 
 64 | func (iter *Iterator) Seek(key []byte) {
 65 | }
 66 | 
 67 | // 内存表迭代器
 68 | type memIterator struct {
 69 | 	innerIter utils.Iterator
 70 | }
 71 | 
 72 | func (m *memTable) NewIterator(opt *utils.Options) utils.Iterator {
 73 | 	return &memIterator{innerIter: m.sl.NewSkipListIterator()}
 74 | }
 75 | func (iter *memIterator) Next() {
 76 | 	iter.innerIter.Next()
 77 | }
 78 | func (iter *memIterator) Valid() bool {
 79 | 	return iter.innerIter.Valid()
 80 | }
 81 | func (iter *memIterator) Rewind() {
 82 | 	iter.innerIter.Rewind()
 83 | }
 84 | func (iter *memIterator) Item() utils.Item {
 85 | 	return iter.innerIter.Item()
 86 | }
 87 | func (iter *memIterator) Close() error {
 88 | 	return iter.innerIter.Close()
 89 | }
 90 | func (iter *memIterator) Seek(key []byte) {
 91 | }
 92 | 
 93 | // levelManager上的迭代器
 94 | type levelIterator struct {
 95 | 	it    *utils.Item
 96 | 	iters []*Iterator
 97 | }
 98 | 
 99 | func (lm *levelManager) NewIterators(options *utils.Options) []utils.Iterator {
100 | 	return lm.iterators()
101 | }
102 | func (iter *levelIterator) Next() {
103 | }
104 | func (iter *levelIterator) Valid() bool {
105 | 	return false
106 | }
107 | func (iter *levelIterator) Rewind() {
108 | 
109 | }
110 | func (iter *levelIterator) Item() utils.Item {
111 | 	return &Item{}
112 | }
113 | func (iter *levelIterator) Close() error {
114 | 	return nil
115 | }
116 | 
117 | func (iter *levelIterator) Seek(key []byte) {
118 | }
119 | 
120 | // ConcatIterator 将table 数组链接成一个迭代器，这样迭代效率更高
121 | type ConcatIterator struct {
122 | 	idx     int // Which iterator is active now.
123 | 	cur     utils.Iterator
124 | 	iters   []utils.Iterator // Corresponds to tables.
125 | 	tables  []*table         // Disregarding reversed, this is in ascending order.
126 | 	options *utils.Options   // Valid options are REVERSED and NOCACHE.
127 | }
128 | 
129 | // NewConcatIterator creates a new concatenated iterator
130 | func NewConcatIterator(tbls []*table, opt *utils.Options) *ConcatIterator {
131 | 	iters := make([]utils.Iterator, len(tbls))
132 | 	return &ConcatIterator{
133 | 		options: opt,
134 | 		iters:   iters,
135 | 		tables:  tbls,
136 | 		idx:     -1, // Not really necessary because s.it.Valid()=false, but good to have.
137 | 	}
138 | }
139 | 
140 | func (s *ConcatIterator) setIdx(idx int) {
141 | 	s.idx = idx
142 | 	if idx < 0 || idx >= len(s.iters) {
143 | 		s.cur = nil
144 | 		return
145 | 	}
146 | 	if s.iters[idx] == nil {
147 | 		s.iters[idx] = s.tables[idx].NewIterator(s.options)
148 | 	}
149 | 	s.cur = s.iters[s.idx]
150 | }
151 | 
152 | // Rewind implements Interface
153 | func (s *ConcatIterator) Rewind() {
154 | 	if len(s.iters) == 0 {
155 | 		return
156 | 	}
157 | 	if !s.options.IsAsc {
158 | 		s.setIdx(0)
159 | 	} else {
160 | 		s.setIdx(len(s.iters) - 1)
161 | 	}
162 | 	s.cur.Rewind()
163 | }
164 | 
165 | // Valid implements y.Interface
166 | func (s *ConcatIterator) Valid() bool {
167 | 	return s.cur != nil && s.cur.Valid()
168 | }
169 | 
170 | // Item _
171 | func (s *ConcatIterator) Item() utils.Item {
172 | 	return s.cur.Item()
173 | }
174 | 
175 | // Seek brings us to element >= key if reversed is false. Otherwise, <= key.
176 | func (s *ConcatIterator) Seek(key []byte) {
177 | 	var idx int
178 | 	if s.options.IsAsc {
179 | 		idx = sort.Search(len(s.tables), func(i int) bool {
180 | 			return utils.CompareKeys(s.tables[i].ss.MaxKey(), key) >= 0
181 | 		})
182 | 	} else {
183 | 		n := len(s.tables)
184 | 		idx = n - 1 - sort.Search(n, func(i int) bool {
185 | 			return utils.CompareKeys(s.tables[n-1-i].ss.MinKey(), key) <= 0
186 | 		})
187 | 	}
188 | 	if idx >= len(s.tables) || idx < 0 {
189 | 		s.setIdx(-1)
190 | 		return
191 | 	}
192 | 	// For reversed=false, we know s.tables[i-1].Biggest() < key. Thus, the
193 | 	// previous table cannot possibly contain key.
194 | 	s.setIdx(idx)
195 | 	s.cur.Seek(key)
196 | }
197 | 
198 | // Next advances our concat iterator.
199 | func (s *ConcatIterator) Next() {
200 | 	s.cur.Next()
201 | 	if s.cur.Valid() {
202 | 		// Nothing to do. Just stay with the current table.
203 | 		return
204 | 	}
205 | 	for { // In case there are empty tables.
206 | 		if !s.options.IsAsc {
207 | 			s.setIdx(s.idx + 1)
208 | 		} else {
209 | 			s.setIdx(s.idx - 1)
210 | 		}
211 | 		if s.cur == nil {
212 | 			// End of list. Valid will become false.
213 | 			return
214 | 		}
215 | 		s.cur.Rewind()
216 | 		if s.cur.Valid() {
217 | 			break
218 | 		}
219 | 	}
220 | }
221 | 
222 | // Close implements y.Interface.
223 | func (s *ConcatIterator) Close() error {
224 | 	for _, it := range s.iters {
225 | 		if it == nil {
226 | 			continue
227 | 		}
228 | 		if err := it.Close(); err != nil {
229 | 			return fmt.Errorf("ConcatIterator:%+v", err)
230 | 		}
231 | 	}
232 | 	return nil
233 | }
234 | 
235 | // MergeIterator 多路合并迭代器
236 | // NOTE: MergeIterator owns the array of iterators and is responsible for closing them.
237 | type MergeIterator struct {
238 | 	left  node
239 | 	right node
240 | 	small *node
241 | 
242 | 	curKey  []byte
243 | 	reverse bool
244 | }
245 | 
246 | type node struct {
247 | 	valid bool
248 | 	entry *utils.Entry
249 | 	iter  utils.Iterator
250 | 
251 | 	// The two iterators are type asserted from `y.Iterator`, used to inline more function calls.
252 | 	// Calling functions on concrete types is much faster (about 25-30%) than calling the
253 | 	// interface's function.
254 | 	merge  *MergeIterator
255 | 	concat *ConcatIterator
256 | }
257 | 
258 | func (n *node) setIterator(iter utils.Iterator) {
259 | 	n.iter = iter
260 | 	// It's okay if the type assertion below fails and n.merge/n.concat are set to nil.
261 | 	// We handle the nil values of merge and concat in all the methods.
262 | 	n.merge, _ = iter.(*MergeIterator)
263 | 	n.concat, _ = iter.(*ConcatIterator)
264 | }
265 | 
266 | func (n *node) setKey() {
267 | 	switch {
268 | 	case n.merge != nil:
269 | 		n.valid = n.merge.small.valid
270 | 		if n.valid {
271 | 			n.entry = n.merge.small.entry
272 | 		}
273 | 	case n.concat != nil:
274 | 		n.valid = n.concat.Valid()
275 | 		if n.valid {
276 | 			n.entry = n.concat.Item().Entry()
277 | 		}
278 | 	default:
279 | 		n.valid = n.iter.Valid()
280 | 		if n.valid {
281 | 			n.entry = n.iter.Item().Entry()
282 | 		}
283 | 	}
284 | }
285 | 
286 | func (n *node) next() {
287 | 	switch {
288 | 	case n.merge != nil:
289 | 		n.merge.Next()
290 | 	case n.concat != nil:
291 | 		n.concat.Next()
292 | 	default:
293 | 		n.iter.Next()
294 | 	}
295 | 	n.setKey()
296 | }
297 | 
298 | func (n *node) rewind() {
299 | 	n.iter.Rewind()
300 | 	n.setKey()
301 | }
302 | 
303 | func (n *node) seek(key []byte) {
304 | 	n.iter.Seek(key)
305 | 	n.setKey()
306 | }
307 | 
308 | func (mi *MergeIterator) fix() {
309 | 	if !mi.bigger().valid {
310 | 		return
311 | 	}
312 | 	if !mi.small.valid {
313 | 		mi.swapSmall()
314 | 		return
315 | 	}
316 | 	cmp := utils.CompareKeys(mi.small.entry.Key, mi.bigger().entry.Key)
317 | 	switch {
318 | 	case cmp == 0: // Both the keys are equal.
319 | 		// In case of same keys, move the right iterator ahead.
320 | 		mi.right.next()
321 | 		if &mi.right == mi.small {
322 | 			mi.swapSmall()
323 | 		}
324 | 		return
325 | 	case cmp < 0: // Small is less than bigger().
326 | 		if mi.reverse {
327 | 			mi.swapSmall()
328 | 		} else {
329 | 			// we don't need to do anything. Small already points to the smallest.
330 | 		}
331 | 		return
332 | 	default: // bigger() is less than small.
333 | 		if mi.reverse {
334 | 			// Do nothing since we're iterating in reverse. Small currently points to
335 | 			// the bigger key and that's okay in reverse iteration.
336 | 		} else {
337 | 			mi.swapSmall()
338 | 		}
339 | 		return
340 | 	}
341 | }
342 | 
343 | func (mi *MergeIterator) bigger() *node {
344 | 	if mi.small == &mi.left {
345 | 		return &mi.right
346 | 	}
347 | 	return &mi.left
348 | }
349 | 
350 | func (mi *MergeIterator) swapSmall() {
351 | 	if mi.small == &mi.left {
352 | 		mi.small = &mi.right
353 | 		return
354 | 	}
355 | 	if mi.small == &mi.right {
356 | 		mi.small = &mi.left
357 | 		return
358 | 	}
359 | }
360 | 
361 | // Next returns the next element. If it is the same as the current key, ignore it.
362 | func (mi *MergeIterator) Next() {
363 | 	for mi.Valid() {
364 | 		if !bytes.Equal(mi.small.entry.Key, mi.curKey) {
365 | 			break
366 | 		}
367 | 		mi.small.next()
368 | 		mi.fix()
369 | 	}
370 | 	mi.setCurrent()
371 | }
372 | 
373 | func (mi *MergeIterator) setCurrent() {
374 | 	utils.CondPanic(mi.small.entry == nil && mi.small.valid == true, fmt.Errorf("mi.small.entry is nil"))
375 | 	if mi.small.valid {
376 | 		mi.curKey = append(mi.curKey[:0], mi.small.entry.Key...)
377 | 	}
378 | }
379 | 
380 | // Rewind seeks to first element (or last element for reverse iterator).
381 | func (mi *MergeIterator) Rewind() {
382 | 	mi.left.rewind()
383 | 	mi.right.rewind()
384 | 	mi.fix()
385 | 	mi.setCurrent()
386 | }
387 | 
388 | // Seek brings us to element with key >= given key.
389 | func (mi *MergeIterator) Seek(key []byte) {
390 | 	mi.left.seek(key)
391 | 	mi.right.seek(key)
392 | 	mi.fix()
393 | 	mi.setCurrent()
394 | }
395 | 
396 | // Valid returns whether the MergeIterator is at a valid element.
397 | func (mi *MergeIterator) Valid() bool {
398 | 	return mi.small.valid
399 | }
400 | 
401 | // Key returns the key associated with the current iterator.
402 | func (mi *MergeIterator) Item() utils.Item {
403 | 	return mi.small.iter.Item()
404 | }
405 | 
406 | // Close implements Iterator.
407 | func (mi *MergeIterator) Close() error {
408 | 	err1 := mi.left.iter.Close()
409 | 	err2 := mi.right.iter.Close()
410 | 	if err1 != nil {
411 | 		return utils.WarpErr("MergeIterator", err1)
412 | 	}
413 | 	return utils.WarpErr("MergeIterator", err2)
414 | }
415 | 
416 | // NewMergeIterator creates a merge iterator.
417 | func NewMergeIterator(iters []utils.Iterator, reverse bool) utils.Iterator {
418 | 	switch len(iters) {
419 | 	case 0:
420 | 		return &Iterator{}
421 | 	case 1:
422 | 		return iters[0]
423 | 	case 2:
424 | 		mi := &MergeIterator{
425 | 			reverse: reverse,
426 | 		}
427 | 		mi.left.setIterator(iters[0])
428 | 		mi.right.setIterator(iters[1])
429 | 		// Assign left iterator randomly. This will be fixed when user calls rewind/seek.
430 | 		mi.small = &mi.left
431 | 		return mi
432 | 	}
433 | 	mid := len(iters) / 2
434 | 	return NewMergeIterator(
435 | 		[]utils.Iterator{
436 | 			NewMergeIterator(iters[:mid], reverse),
437 | 			NewMergeIterator(iters[mid:], reverse),
438 | 		}, reverse)
439 | }
440 | 


--------------------------------------------------------------------------------
/file/manifest.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 hardcore-os Project Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License")
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | // http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package file
 16 | 
 17 | import (
 18 | 	"bufio"
 19 | 	"bytes"
 20 | 	"encoding/binary"
 21 | 	"fmt"
 22 | 	"hash/crc32"
 23 | 	"io"
 24 | 	"os"
 25 | 	"path/filepath"
 26 | 	"sync"
 27 | 
 28 | 	"github.com/hardcore-os/corekv/pb"
 29 | 	"github.com/hardcore-os/corekv/utils"
 30 | 	"github.com/pkg/errors"
 31 | )
 32 | 
 33 | // ManifestFile 维护sst文件元信息的文件
 34 | // manifest 比较特殊，不能使用mmap，需要保证实时的写入
 35 | type ManifestFile struct {
 36 | 	opt                       *Options
 37 | 	f                         *os.File
 38 | 	lock                      sync.Mutex
 39 | 	deletionsRewriteThreshold int
 40 | 	manifest                  *Manifest
 41 | }
 42 | 
 43 | // Manifest corekv 元数据状态维护
 44 | type Manifest struct {
 45 | 	Levels    []levelManifest
 46 | 	Tables    map[uint64]TableManifest
 47 | 	Creations int
 48 | 	Deletions int
 49 | }
 50 | 
 51 | // TableManifest 包含sst的基本信息
 52 | type TableManifest struct {
 53 | 	Level    uint8
 54 | 	Checksum []byte // 方便今后扩展
 55 | }
 56 | type levelManifest struct {
 57 | 	Tables map[uint64]struct{} // Set of table id's
 58 | }
 59 | 
 60 | //TableMeta sst 的一些元信息
 61 | type TableMeta struct {
 62 | 	ID       uint64
 63 | 	Checksum []byte
 64 | }
 65 | 
 66 | // OpenManifestFile 打开manifest文件
 67 | func OpenManifestFile(opt *Options) (*ManifestFile, error) {
 68 | 	path := filepath.Join(opt.Dir, utils.ManifestFilename)
 69 | 	mf := &ManifestFile{lock: sync.Mutex{}, opt: opt}
 70 | 	f, err := os.OpenFile(path, os.O_RDWR, 0)
 71 | 	// 如果打开失败 则尝试创建一个新的 manifest file
 72 | 	if err != nil {
 73 | 		if !os.IsNotExist(err) {
 74 | 			return mf, err
 75 | 		}
 76 | 		m := createManifest()
 77 | 		fp, netCreations, err := helpRewrite(opt.Dir, m)
 78 | 		utils.CondPanic(netCreations == 0, errors.Wrap(err, utils.ErrReWriteFailure.Error()))
 79 | 		if err != nil {
 80 | 			return mf, err
 81 | 		}
 82 | 		mf.f = fp
 83 | 		f = fp
 84 | 		mf.manifest = m
 85 | 		return mf, nil
 86 | 	}
 87 | 
 88 | 	// 如果打开 则对manifest文件重放
 89 | 	manifest, truncOffset, err := ReplayManifestFile(f)
 90 | 	if err != nil {
 91 | 		_ = f.Close()
 92 | 		return mf, err
 93 | 	}
 94 | 	// Truncate file so we don't have a half-written entry at the end.
 95 | 	if err := f.Truncate(truncOffset); err != nil {
 96 | 		_ = f.Close()
 97 | 		return mf, err
 98 | 	}
 99 | 	if _, err = f.Seek(0, io.SeekEnd); err != nil {
100 | 		_ = f.Close()
101 | 		return mf, err
102 | 	}
103 | 	mf.f = f
104 | 	mf.manifest = manifest
105 | 	return mf, nil
106 | }
107 | 
108 | // ReplayManifestFile 对已经存在的manifest文件重新应用所有状态变更
109 | func ReplayManifestFile(fp *os.File) (ret *Manifest, truncOffset int64, err error) {
110 | 	r := &bufReader{reader: bufio.NewReader(fp)}
111 | 	var magicBuf [8]byte
112 | 	if _, err := io.ReadFull(r, magicBuf[:]); err != nil {
113 | 		return &Manifest{}, 0, utils.ErrBadMagic
114 | 	}
115 | 	if !bytes.Equal(magicBuf[0:4], utils.MagicText[:]) {
116 | 		return &Manifest{}, 0, utils.ErrBadMagic
117 | 	}
118 | 	version := binary.BigEndian.Uint32(magicBuf[4:8])
119 | 	if version != uint32(utils.MagicVersion) {
120 | 		return &Manifest{}, 0,
121 | 			fmt.Errorf("manifest has unsupported version: %d (we support %d)", version, utils.MagicVersion)
122 | 	}
123 | 
124 | 	build := createManifest()
125 | 	var offset int64
126 | 	for {
127 | 		offset = r.count
128 | 		var lenCrcBuf [8]byte
129 | 		_, err := io.ReadFull(r, lenCrcBuf[:])
130 | 		if err != nil {
131 | 			if err == io.EOF || err == io.ErrUnexpectedEOF {
132 | 				break
133 | 			}
134 | 			return &Manifest{}, 0, err
135 | 		}
136 | 		length := binary.BigEndian.Uint32(lenCrcBuf[0:4])
137 | 		var buf = make([]byte, length)
138 | 		if _, err := io.ReadFull(r, buf); err != nil {
139 | 			if err == io.EOF || err == io.ErrUnexpectedEOF {
140 | 				break
141 | 			}
142 | 			return &Manifest{}, 0, err
143 | 		}
144 | 		if crc32.Checksum(buf, utils.CastagnoliCrcTable) != binary.BigEndian.Uint32(lenCrcBuf[4:8]) {
145 | 			return &Manifest{}, 0, utils.ErrBadChecksum
146 | 		}
147 | 
148 | 		var changeSet pb.ManifestChangeSet
149 | 		if err := changeSet.Unmarshal(buf); err != nil {
150 | 			return &Manifest{}, 0, err
151 | 		}
152 | 
153 | 		if err := applyChangeSet(build, &changeSet); err != nil {
154 | 			return &Manifest{}, 0, err
155 | 		}
156 | 	}
157 | 
158 | 	return build, offset, err
159 | }
160 | 
161 | // This is not a "recoverable" error -- opening the KV store fails because the MANIFEST file is
162 | // just plain broken.
163 | func applyChangeSet(build *Manifest, changeSet *pb.ManifestChangeSet) error {
164 | 	for _, change := range changeSet.Changes {
165 | 		if err := applyManifestChange(build, change); err != nil {
166 | 			return err
167 | 		}
168 | 	}
169 | 	return nil
170 | }
171 | 
172 | func applyManifestChange(build *Manifest, tc *pb.ManifestChange) error {
173 | 	switch tc.Op {
174 | 	case pb.ManifestChange_CREATE:
175 | 		if _, ok := build.Tables[tc.Id]; ok {
176 | 			return fmt.Errorf("MANIFEST invalid, table %d exists", tc.Id)
177 | 		}
178 | 		build.Tables[tc.Id] = TableManifest{
179 | 			Level:    uint8(tc.Level),
180 | 			Checksum: append([]byte{}, tc.Checksum...),
181 | 		}
182 | 		for len(build.Levels) <= int(tc.Level) {
183 | 			build.Levels = append(build.Levels, levelManifest{make(map[uint64]struct{})})
184 | 		}
185 | 		build.Levels[tc.Level].Tables[tc.Id] = struct{}{}
186 | 		build.Creations++
187 | 	case pb.ManifestChange_DELETE:
188 | 		tm, ok := build.Tables[tc.Id]
189 | 		if !ok {
190 | 			return fmt.Errorf("MANIFEST removes non-existing table %d", tc.Id)
191 | 		}
192 | 		delete(build.Levels[tm.Level].Tables, tc.Id)
193 | 		delete(build.Tables, tc.Id)
194 | 		build.Deletions++
195 | 	default:
196 | 		return fmt.Errorf("MANIFEST file has invalid manifestChange op")
197 | 	}
198 | 	return nil
199 | }
200 | 
201 | func createManifest() *Manifest {
202 | 	levels := make([]levelManifest, 0)
203 | 	return &Manifest{
204 | 		Levels: levels,
205 | 		Tables: make(map[uint64]TableManifest),
206 | 	}
207 | }
208 | 
209 | type bufReader struct {
210 | 	reader *bufio.Reader
211 | 	count  int64
212 | }
213 | 
214 | func (r *bufReader) Read(p []byte) (n int, err error) {
215 | 	n, err = r.reader.Read(p)
216 | 	r.count += int64(n)
217 | 	return
218 | }
219 | 
220 | // asChanges returns a sequence of changes that could be used to recreate the Manifest in its
221 | // present state.
222 | func (m *Manifest) asChanges() []*pb.ManifestChange {
223 | 	changes := make([]*pb.ManifestChange, 0, len(m.Tables))
224 | 	for id, tm := range m.Tables {
225 | 		changes = append(changes, newCreateChange(id, int(tm.Level), tm.Checksum))
226 | 	}
227 | 	return changes
228 | }
229 | func newCreateChange(id uint64, level int, checksum []byte) *pb.ManifestChange {
230 | 	return &pb.ManifestChange{
231 | 		Id:       id,
232 | 		Op:       pb.ManifestChange_CREATE,
233 | 		Level:    uint32(level),
234 | 		Checksum: checksum,
235 | 	}
236 | }
237 | 
238 | // Must be called while appendLock is held.
239 | func (mf *ManifestFile) rewrite() error {
240 | 	// In Windows the files should be closed before doing a Rename.
241 | 	if err := mf.f.Close(); err != nil {
242 | 		return err
243 | 	}
244 | 	fp, nextCreations, err := helpRewrite(mf.opt.Dir, mf.manifest)
245 | 	if err != nil {
246 | 		return err
247 | 	}
248 | 	mf.manifest.Creations = nextCreations
249 | 	mf.manifest.Deletions = 0
250 | 	mf.f = fp
251 | 	return nil
252 | }
253 | 
254 | func helpRewrite(dir string, m *Manifest) (*os.File, int, error) {
255 | 	rewritePath := filepath.Join(dir, utils.ManifestRewriteFilename)
256 | 	// We explicitly sync.
257 | 	fp, err := os.OpenFile(rewritePath, utils.DefaultFileFlag, utils.DefaultFileMode)
258 | 	if err != nil {
259 | 		return nil, 0, err
260 | 	}
261 | 
262 | 	buf := make([]byte, 8)
263 | 	copy(buf[0:4], utils.MagicText[:])
264 | 	binary.BigEndian.PutUint32(buf[4:8], uint32(utils.MagicVersion))
265 | 
266 | 	netCreations := len(m.Tables)
267 | 	changes := m.asChanges()
268 | 	set := pb.ManifestChangeSet{Changes: changes}
269 | 
270 | 	changeBuf, err := set.Marshal()
271 | 	if err != nil {
272 | 		fp.Close()
273 | 		return nil, 0, err
274 | 	}
275 | 	var lenCrcBuf [8]byte
276 | 	binary.BigEndian.PutUint32(lenCrcBuf[0:4], uint32(len(changeBuf)))
277 | 	binary.BigEndian.PutUint32(lenCrcBuf[4:8], crc32.Checksum(changeBuf, utils.CastagnoliCrcTable))
278 | 	buf = append(buf, lenCrcBuf[:]...)
279 | 	buf = append(buf, changeBuf...)
280 | 	if _, err := fp.Write(buf); err != nil {
281 | 		fp.Close()
282 | 		return nil, 0, err
283 | 	}
284 | 	if err := fp.Sync(); err != nil {
285 | 		fp.Close()
286 | 		return nil, 0, err
287 | 	}
288 | 
289 | 	// In Windows the files should be closed before doing a Rename.
290 | 	if err = fp.Close(); err != nil {
291 | 		return nil, 0, err
292 | 	}
293 | 	manifestPath := filepath.Join(dir, utils.ManifestFilename)
294 | 	if err := os.Rename(rewritePath, manifestPath); err != nil {
295 | 		return nil, 0, err
296 | 	}
297 | 	fp, err = os.OpenFile(manifestPath, utils.DefaultFileFlag, utils.DefaultFileMode)
298 | 	if err != nil {
299 | 		return nil, 0, err
300 | 	}
301 | 	if _, err := fp.Seek(0, io.SeekEnd); err != nil {
302 | 		fp.Close()
303 | 		return nil, 0, err
304 | 	}
305 | 	if err := utils.SyncDir(dir); err != nil {
306 | 		fp.Close()
307 | 		return nil, 0, err
308 | 	}
309 | 
310 | 	return fp, netCreations, nil
311 | }
312 | 
313 | // Close 关闭文件
314 | func (mf *ManifestFile) Close() error {
315 | 	if err := mf.f.Close(); err != nil {
316 | 		return err
317 | 	}
318 | 	return nil
319 | }
320 | 
321 | // AddChanges 对外暴露的写比那更丰富
322 | func (mf *ManifestFile) AddChanges(changesParam []*pb.ManifestChange) error {
323 | 	return mf.addChanges(changesParam)
324 | }
325 | func (mf *ManifestFile) addChanges(changesParam []*pb.ManifestChange) error {
326 | 	changes := pb.ManifestChangeSet{Changes: changesParam}
327 | 	buf, err := changes.Marshal()
328 | 	if err != nil {
329 | 		return err
330 | 	}
331 | 
332 | 	// TODO 锁粒度可以优化
333 | 	mf.lock.Lock()
334 | 	defer mf.lock.Unlock()
335 | 	if err := applyChangeSet(mf.manifest, &changes); err != nil {
336 | 		return err
337 | 	}
338 | 	// Rewrite manifest if it'd shrink by 1/10 and it's big enough to care
339 | 	if mf.manifest.Deletions > utils.ManifestDeletionsRewriteThreshold &&
340 | 		mf.manifest.Deletions > utils.ManifestDeletionsRatio*(mf.manifest.Creations-mf.manifest.Deletions) {
341 | 		if err := mf.rewrite(); err != nil {
342 | 			return err
343 | 		}
344 | 	} else {
345 | 		var lenCrcBuf [8]byte
346 | 		binary.BigEndian.PutUint32(lenCrcBuf[0:4], uint32(len(buf)))
347 | 		binary.BigEndian.PutUint32(lenCrcBuf[4:8], crc32.Checksum(buf, utils.CastagnoliCrcTable))
348 | 		buf = append(lenCrcBuf[:], buf...)
349 | 		if _, err := mf.f.Write(buf); err != nil {
350 | 			return err
351 | 		}
352 | 	}
353 | 	err = mf.f.Sync()
354 | 	return err
355 | }
356 | 
357 | // AddTableMeta 存储level表到manifest的level中
358 | func (mf *ManifestFile) AddTableMeta(levelNum int, t *TableMeta) (err error) {
359 | 	mf.addChanges([]*pb.ManifestChange{
360 | 		newCreateChange(t.ID, levelNum, t.Checksum),
361 | 	})
362 | 	return err
363 | }
364 | 
365 | // RevertToManifest checks that all necessary table files exist and removes all table files not
366 | // referenced by the manifest.  idMap is a set of table file id's that were read from the directory
367 | // listing.
368 | func (mf *ManifestFile) RevertToManifest(idMap map[uint64]struct{}) error {
369 | 	// 1. Check all files in manifest exist.
370 | 	for id := range mf.manifest.Tables {
371 | 		if _, ok := idMap[id]; !ok {
372 | 			return fmt.Errorf("file does not exist for table %d", id)
373 | 		}
374 | 	}
375 | 
376 | 	// 2. Delete files that shouldn't exist.
377 | 	for id := range idMap {
378 | 		if _, ok := mf.manifest.Tables[id]; !ok {
379 | 			utils.Err(fmt.Errorf("Table file %d  not referenced in MANIFEST", id))
380 | 			filename := utils.FileNameSSTable(mf.opt.Dir, id)
381 | 			if err := os.Remove(filename); err != nil {
382 | 				return errors.Wrapf(err, "While removing table %d", id)
383 | 			}
384 | 		}
385 | 	}
386 | 	return nil
387 | }
388 | 
389 | // GetManifest manifest
390 | func (mf *ManifestFile) GetManifest() *Manifest {
391 | 	return mf.manifest
392 | }
393 | 


--------------------------------------------------------------------------------