├── go.mod ├── fs ├── fs.go └── os_file.go ├── .gitignore ├── options.go ├── go.sum ├── .github └── workflows │ └── go.yml ├── examples └── main.go ├── benchmark └── bench_test.go ├── README.md ├── bucket.go ├── table_test.go ├── LICENSE └── table.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/rosedblabs/diskhash 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/spaolacci/murmur3 v1.1.0 7 | github.com/stretchr/testify v1.8.4 8 | ) 9 | 10 | require ( 11 | github.com/davecgh/go-spew v1.1.1 // indirect 12 | github.com/pmezard/go-difflib v1.0.0 // indirect 13 | gopkg.in/yaml.v3 v3.0.1 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /fs/fs.go: -------------------------------------------------------------------------------- 1 | package fs 2 | 3 | import "io" 4 | 5 | type File interface { 6 | io.Reader 7 | io.ReaderAt 8 | io.Writer 9 | io.WriterAt 10 | io.Closer 11 | Truncate(int64) error 12 | Size() int64 13 | Sync() error 14 | } 15 | 16 | type FileSystem = byte 17 | 18 | const ( 19 | OSFileSystem FileSystem = iota 20 | ) 21 | 22 | func Open(name string, fs FileSystem) (File, error) { 23 | switch fs { 24 | case OSFileSystem: 25 | return openOSFile(name) 26 | } 27 | return nil, nil 28 | } 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- 1 | package diskhash 2 | 3 | import "os" 4 | 5 | // Options is used to create a new diskhash table. 6 | type Options struct { 7 | // DirPath is the directory path to store the hash table files. 8 | DirPath string 9 | 10 | // SlotValueLength is the length of the value in each slot. 11 | // Your value lenght must be equal to the value length you set when creating the table. 12 | SlotValueLength uint32 13 | 14 | // LoadFactor is the load factor of the hash table. 15 | // The load factor is the ratio of the number of elements in the hash table to the table size. 16 | // If the ratio is greater than the load factor, the hash table will be expanded automatically. 17 | // The default value is 0.7. 18 | LoadFactor float64 19 | } 20 | 21 | // DefaultOptions is the default options. 22 | var DefaultOptions = Options{ 23 | DirPath: os.TempDir(), 24 | SlotValueLength: 0, 25 | LoadFactor: 0.7, 26 | } 27 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= 6 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 7 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 8 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /fs/os_file.go: -------------------------------------------------------------------------------- 1 | package fs 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | type OSFile struct { 8 | fd *os.File 9 | size int64 10 | } 11 | 12 | func openOSFile(name string) (File, error) { 13 | fd, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0644) 14 | if err != nil { 15 | return nil, err 16 | } 17 | 18 | // get the file size 19 | stat, err := fd.Stat() 20 | if err != nil { 21 | return nil, err 22 | } 23 | return &OSFile{fd: fd, size: stat.Size()}, nil 24 | } 25 | 26 | func (of *OSFile) Read(p []byte) (n int, err error) { 27 | return of.fd.Read(p) 28 | } 29 | 30 | func (of *OSFile) ReadAt(b []byte, off int64) (n int, err error) { 31 | return of.fd.ReadAt(b, off) 32 | } 33 | 34 | func (of *OSFile) Write(p []byte) (n int, err error) { 35 | return of.fd.Write(p) 36 | } 37 | 38 | func (of *OSFile) WriteAt(b []byte, off int64) (n int, err error) { 39 | return of.fd.WriteAt(b, off) 40 | } 41 | 42 | func (of *OSFile) Truncate(size int64) error { 43 | err := of.fd.Truncate(of.size + size) 44 | if err != nil { 45 | return err 46 | } 47 | of.size += size 48 | return nil 49 | } 50 | 51 | func (of *OSFile) Size() int64 { 52 | return of.size 53 | } 54 | 55 | func (of *OSFile) Sync() error { 56 | return of.fd.Sync() 57 | } 58 | 59 | func (of *OSFile) Close() error { 60 | return of.fd.Close() 61 | } 62 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | ubuntu-test: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.19 20 | 21 | - name: Run Go Vet 22 | run: | 23 | go vet ./... 24 | 25 | - name: Run Go Fmt 26 | run: | 27 | files=$(go fmt ./...) 28 | if [ -n "$files" ]; then 29 | echo "Please run gofmt on these files ..." 30 | echo "$files" 31 | exit 1 32 | fi 33 | 34 | - name: Build 35 | run: go build -v 36 | 37 | - name: Run Unit Test 38 | run: go test -count 1 -v ./... 39 | 40 | windows-test: 41 | runs-on: windows-latest 42 | steps: 43 | - uses: actions/checkout@v2 44 | 45 | - name: Set up Go 46 | uses: actions/setup-go@v2 47 | with: 48 | go-version: 1.19 49 | 50 | - name: Run Go Vet 51 | run: | 52 | go vet ./... 53 | 54 | - name: Build 55 | run: go build -v 56 | 57 | - name: Run Unit Test 58 | run: go test -count 1 -v ./... 59 | -------------------------------------------------------------------------------- /examples/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/rosedblabs/diskhash" 6 | "strings" 7 | ) 8 | 9 | func main() { 10 | // open the table, specify the slot value length, 11 | // remember that you can't change it once you set it, and all values must be the same length. 12 | options := diskhash.DefaultOptions 13 | options.DirPath = "/tmp/diskhash-test" 14 | options.SlotValueLength = 10 15 | table, err := diskhash.Open(options) 16 | if err != nil { 17 | panic(err) 18 | } 19 | 20 | // don't forget to close the table!!! 21 | // some meta info will be saved when you close the table. 22 | defer func() { 23 | _ = table.Close() 24 | }() 25 | 26 | // put a key-value pair into the table. 27 | // the MatchKey function will be called when the key is matched. 28 | // Why we need the MatchKey function? 29 | // When we store the data in the hash table, we only store the hash value of the key, and the raw value. 30 | // So when we get the data from hash table, even if the hash value of the key matches, that doesn't mean 31 | // the key matches because of hash collision. 32 | // So we need to provide a function to determine whether the key of the slot matches the stored key. 33 | err = table.Put([]byte("key1"), []byte(strings.Repeat("v", 10)), func(slot diskhash.Slot) (bool, error) { 34 | return true, nil 35 | }) 36 | if err != nil { 37 | panic(err) 38 | } 39 | 40 | err = table.Get([]byte("key1"), func(slot diskhash.Slot) (bool, error) { 41 | fmt.Println("val =", string(slot.Value)) 42 | return true, nil 43 | }) 44 | if err != nil { 45 | panic(err) 46 | } 47 | 48 | err = table.Delete([]byte("key1"), func(slot diskhash.Slot) (bool, error) { 49 | return true, nil 50 | }) 51 | if err != nil { 52 | panic(err) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /benchmark/bench_test.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "fmt" 5 | "github.com/spaolacci/murmur3" 6 | "math/rand" 7 | "os" 8 | "strings" 9 | "testing" 10 | 11 | "github.com/rosedblabs/diskhash" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func newTable() (*diskhash.Table, func()) { 16 | options := diskhash.DefaultOptions 17 | options.SlotValueLength = 20 18 | options.DirPath = "/tmp/diskhash-bench" 19 | table, err := diskhash.Open(options) 20 | if err != nil { 21 | panic(err) 22 | } 23 | 24 | return table, func() { 25 | _ = table.Close() 26 | _ = os.RemoveAll(options.DirPath) 27 | } 28 | } 29 | 30 | func BenchmarkPut(b *testing.B) { 31 | table, destroy := newTable() 32 | defer destroy() 33 | 34 | b.ResetTimer() 35 | b.ReportAllocs() 36 | 37 | value := []byte(strings.Repeat("d", 20)) 38 | for i := 0; i < b.N; i++ { 39 | err := table.Put(GetTestKey(i), value, func(slot diskhash.Slot) (bool, error) { 40 | return false, nil 41 | }) 42 | assert.Nil(b, err) 43 | } 44 | } 45 | 46 | func BenchmarkGet(b *testing.B) { 47 | table, destroy := newTable() 48 | defer destroy() 49 | 50 | value := []byte(strings.Repeat("d", 20)) 51 | for i := 0; i < 100000; i++ { 52 | err := table.Put(GetTestKey(i), value, func(slot diskhash.Slot) (bool, error) { 53 | return false, nil 54 | }) 55 | assert.Nil(b, err) 56 | } 57 | 58 | b.ResetTimer() 59 | b.ReportAllocs() 60 | 61 | for i := 0; i < b.N; i++ { 62 | key := GetTestKey(rand.Intn(100000)) 63 | err := table.Get(key, func(slot diskhash.Slot) (bool, error) { 64 | hash := murmur3.Sum32(key) 65 | if hash == slot.Hash { 66 | return true, nil 67 | } 68 | return false, nil 69 | }) 70 | assert.Nil(b, err) 71 | } 72 | } 73 | 74 | func GetTestKey(i int) []byte { 75 | return []byte(fmt.Sprintf("diskhash-test-key-%09d", i)) 76 | } 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # diskhash 2 | on-disk hash table index(mainly for WAL). 3 | 4 | ## When will you need it? 5 | If you are using [WAL](https://github.com/rosedblabs/wal) to store your data, 6 | 7 | > wal: https://github.com/rosedblabs/wal 8 | 9 | you will get the positions to get the data from WAL, the common way to store the positions is to use an in-memory index(like rosedb). 10 | 11 | But if you have a large amount of data, and it will take a lot of time to load the index into memory when you restart the system. 12 | 13 | So, you can use diskhash to store the index on disk. 14 | 15 | ## Can be used as a general hash table index(without wal)? 16 | 17 | yes, you can use it as an on-disk hash table index, but the restriction is that the value must be fixed size. 18 | you can set the value size when you create the index, and once you set the value size, you can't change it. 19 | 20 | But don't set the value size too large(1KB), the disk size maybe increase dramatically because of the write amplification. 21 | **it is suitable for storing some metadata of your system.** 22 | 23 | ## Design Overview 24 | The diskhash consists of two disk files: main and overflow. 25 | The file format is as follows: 26 | ``` 27 | File Format: 28 | +---------------+---------------+---------------+---------------+-----+----------------+ 29 | | (unused) | bucket0 | bucket1 | bucket2 | ... | bucketN | 30 | +---------------+---------------+---------------+---------------+-----+----------------+ 31 | ``` 32 | 33 | A file is divided into multiple buckets, if the table reaches the load factor, a new bucket will be appended to the end of the file. 34 | A bucket contains 31 slots, and an overflow offset which points to the overflow file buckets. 35 | ``` 36 | Bucket Format: 37 | +-------------+-------------+-------------+-------------+-----+--------------+-----------------+ 38 | | slot0 | slot1 | slot2 | slot3 | ... | slotN | overflow_offset | 39 | +-------------+-------------+-------------+-------------+-----+--------------+-----------------+ 40 | ``` 41 | 42 | A slot contains a key hash value, and user-defined value. 43 | ``` 44 | Slot Format: 45 | +-----------------------+--------------------------------+ 46 | | key_hash(4B) | value(N Bytes) | 47 | +-----------------------+--------------------------------+ 48 | ``` 49 | 50 | ## Getting Started 51 | ```go 52 | package main 53 | 54 | import ( 55 | "fmt" 56 | "github.com/rosedblabs/diskhash" 57 | "strings" 58 | ) 59 | 60 | func main() { 61 | // open the table, specify the slot value length, 62 | // remember that you can't change it once you set it, and all values must be the same length. 63 | options := diskhash.DefaultOptions 64 | options.DirPath = "/tmp/diskhash-test" 65 | options.SlotValueLength = 10 66 | table, err := diskhash.Open(options) 67 | if err != nil { 68 | panic(err) 69 | } 70 | 71 | // don't forget to close the table!!! 72 | // some meta info will be saved when you close the table. 73 | defer func() { 74 | _ = table.Close() 75 | }() 76 | 77 | // put a key-value pair into the table. 78 | // the MatchKey function will be called when the key is matched. 79 | // When we store the data in the hash table, we only store the hash value of the key, and the raw value. 80 | // So when we get the data from hash table, even if the hash value of the key matches, that doesn't mean 81 | // the key matches because of hash collision. 82 | // So we need to provide a function to determine whether the key of the slot matches the stored key. 83 | err = table.Put([]byte("key1"), []byte(strings.Repeat("v", 10)), func(slot diskhash.Slot) (bool, error) { 84 | return true, nil 85 | }) 86 | if err != nil { 87 | panic(err) 88 | } 89 | 90 | err = table.Get([]byte("key1"), func(slot diskhash.Slot) (bool, error) { 91 | fmt.Println("val =", string(slot.Value)) 92 | return true, nil 93 | }) 94 | if err != nil { 95 | panic(err) 96 | } 97 | 98 | err = table.Delete([]byte("key1"), func(slot diskhash.Slot) (bool, error) { 99 | return true, nil 100 | }) 101 | if err != nil { 102 | panic(err) 103 | } 104 | } 105 | ``` 106 | -------------------------------------------------------------------------------- /bucket.go: -------------------------------------------------------------------------------- 1 | package diskhash 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | 7 | "github.com/rosedblabs/diskhash/fs" 8 | ) 9 | 10 | // bucket is the basic unit of a file in diskhash. 11 | // each file contains 31 slots at most. 12 | type bucket struct { 13 | slots [slotsPerBucket]Slot // 31 slots now 14 | offset int64 // the offset of the bucket in the file 15 | nextOffset int64 // the offset of the next overflow bucket 16 | file fs.File // the file that contains the bucket 17 | bucketSize uint32 18 | } 19 | 20 | // bucketIterator is used to iterate all buckets in hash table. 21 | type bucketIterator struct { 22 | currentFile fs.File 23 | overflowFile fs.File 24 | offset int64 25 | 26 | slotValueLen uint32 27 | bucketSize uint32 28 | } 29 | 30 | // Slot is the basic unit of a bucket. 31 | // each slot contains a key hash and a value. 32 | type Slot struct { 33 | Hash uint32 // the hash of the key 34 | Value []byte // raw value 35 | } 36 | 37 | type slotWriter struct { 38 | currentBucket *bucket 39 | currentSlotIndex int 40 | prevBuckets []*bucket 41 | overwrite bool 42 | } 43 | 44 | func (t *Table) bucketOffset(bucketIndex uint32) int64 { 45 | return int64((bucketIndex + 1) * t.meta.BucketSize) 46 | } 47 | 48 | func (t *Table) newBucketIterator(startBucket uint32) *bucketIterator { 49 | return &bucketIterator{ 50 | currentFile: t.primaryFile, 51 | overflowFile: t.overflowFile, 52 | offset: t.bucketOffset(startBucket), 53 | slotValueLen: t.options.SlotValueLength, 54 | bucketSize: t.meta.BucketSize, 55 | } 56 | } 57 | 58 | func (bi *bucketIterator) next() (*bucket, error) { 59 | // we skip the first bucket size in both primary and overflow file, 60 | // so the bucket offset will never be 0. 61 | if bi.offset == 0 { 62 | return nil, io.EOF 63 | } 64 | 65 | // read the bucket and get all solts in it 66 | bucket, err := bi.readBucket() 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | // move to next overflow bucket 72 | bi.offset = bucket.nextOffset 73 | bi.currentFile = bi.overflowFile 74 | return bucket, nil 75 | } 76 | 77 | // readBucket reads a bucket from the current file. 78 | func (bi *bucketIterator) readBucket() (*bucket, error) { 79 | // read an entire bucket with all slots 80 | bucketBuf := make([]byte, bi.bucketSize) 81 | if _, err := bi.currentFile.ReadAt(bucketBuf, bi.offset); err != nil { 82 | return nil, err 83 | } 84 | 85 | b := &bucket{file: bi.currentFile, offset: bi.offset, bucketSize: bi.bucketSize} 86 | // parse and get slots in the bucket 87 | for i := 0; i < slotsPerBucket; i++ { 88 | _ = bucketBuf[hashLen+bi.slotValueLen] 89 | b.slots[i].Hash = binary.LittleEndian.Uint32(bucketBuf[:hashLen]) 90 | if b.slots[i].Hash != 0 { 91 | b.slots[i].Value = bucketBuf[hashLen : hashLen+bi.slotValueLen] 92 | } 93 | bucketBuf = bucketBuf[hashLen+bi.slotValueLen:] 94 | } 95 | 96 | // the last 8 bytes is the offset of next overflow bucket 97 | b.nextOffset = int64(binary.LittleEndian.Uint64(bucketBuf[:nextOffLen])) 98 | 99 | return b, nil 100 | } 101 | 102 | func (sw *slotWriter) insertSlot(sl Slot, t *Table) error { 103 | // if we exceed the slotsPerBucket, we need to create a new overflow bucket 104 | // and link it to the current bucket 105 | if sw.currentSlotIndex == slotsPerBucket { 106 | nextBucket, err := t.createOverflowBucket() 107 | if err != nil { 108 | return err 109 | } 110 | sw.currentBucket.nextOffset = nextBucket.offset 111 | sw.prevBuckets = append(sw.prevBuckets, sw.currentBucket) 112 | sw.currentBucket = nextBucket 113 | sw.currentSlotIndex = 0 114 | } 115 | 116 | sw.currentBucket.slots[sw.currentSlotIndex] = sl 117 | sw.currentSlotIndex++ 118 | return nil 119 | } 120 | 121 | func (sw *slotWriter) writeSlots() error { 122 | for i := len(sw.prevBuckets) - 1; i >= 0; i-- { 123 | if err := sw.prevBuckets[i].write(); err != nil { 124 | return err 125 | } 126 | } 127 | return sw.currentBucket.write() 128 | } 129 | 130 | // write all slots in the bucket to the file. 131 | func (b *bucket) write() error { 132 | buf := make([]byte, b.bucketSize) 133 | // write all slots to the buffer 134 | var index = 0 135 | for i := 0; i < slotsPerBucket; i++ { 136 | slot := b.slots[i] 137 | 138 | binary.LittleEndian.PutUint32(buf[index:index+hashLen], slot.Hash) 139 | copy(buf[index+hashLen:index+hashLen+len(slot.Value)], slot.Value) 140 | 141 | index += hashLen + len(slot.Value) 142 | } 143 | 144 | // write the offset of next overflow bucket 145 | binary.LittleEndian.PutUint64(buf[len(buf)-nextOffLen:], uint64(b.nextOffset)) 146 | 147 | _, err := b.file.WriteAt(buf, b.offset) 148 | return err 149 | } 150 | 151 | // remove a slot from the bucket, and move all slots after it forward 152 | // to fill the empty slot. 153 | func (b *bucket) removeSlot(slotIndex int) { 154 | i := slotIndex 155 | for ; i < slotsPerBucket-1; i++ { 156 | b.slots[i] = b.slots[i+1] 157 | } 158 | b.slots[i] = Slot{} 159 | } 160 | -------------------------------------------------------------------------------- /table_test.go: -------------------------------------------------------------------------------- 1 | package diskhash 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func destroyTable(t *Table) { 13 | _ = t.Close() 14 | _ = os.RemoveAll(t.options.DirPath) 15 | } 16 | 17 | func GetTestKey(i int) []byte { 18 | return []byte(fmt.Sprintf("diskhash-test-key-%09d", i)) 19 | } 20 | 21 | func TestOpen(t *testing.T) { 22 | dir, err := os.MkdirTemp("", "diskhash-test-open") 23 | assert.Nil(t, err) 24 | 25 | options := DefaultOptions 26 | options.DirPath = dir 27 | options.SlotValueLength = 10 28 | table, err := Open(options) 29 | assert.Nil(t, err) 30 | defer destroyTable(table) 31 | 32 | err = table.Close() 33 | assert.Nil(t, err) 34 | } 35 | 36 | func TestTable_Put(t *testing.T) { 37 | t.Run("16B-1000000", func(t *testing.T) { 38 | testTableBaisc(t, 16, 1000000, false, false, false) 39 | }) 40 | t.Run("20B-100000", func(t *testing.T) { 41 | testTableBaisc(t, 16, 1000000, false, false, false) 42 | }) 43 | t.Run("1K-50000", func(t *testing.T) { 44 | testTableBaisc(t, 1024, 50000, false, false, false) 45 | }) 46 | t.Run("4K-50000", func(t *testing.T) { 47 | testTableBaisc(t, 4*1024, 50000, false, false, false) 48 | }) 49 | } 50 | 51 | func TestTable_Get(t *testing.T) { 52 | t.Run("16B-1000000", func(t *testing.T) { 53 | testTableBaisc(t, 16, 1000000, true, false, false) 54 | }) 55 | t.Run("20B-1000000", func(t *testing.T) { 56 | testTableBaisc(t, 16, 1000000, true, false, false) 57 | }) 58 | t.Run("1K-50000", func(t *testing.T) { 59 | testTableBaisc(t, 1024, 50000, true, false, false) 60 | }) 61 | t.Run("4K-50000", func(t *testing.T) { 62 | testTableBaisc(t, 4*1024, 50000, true, false, false) 63 | }) 64 | } 65 | 66 | func TestTable_Delete(t *testing.T) { 67 | t.Run("16B-1000000", func(t *testing.T) { 68 | testTableBaisc(t, 16, 1000000, false, true, false) 69 | }) 70 | t.Run("20B-1000000", func(t *testing.T) { 71 | testTableBaisc(t, 16, 1000000, false, true, false) 72 | }) 73 | t.Run("1K-50000", func(t *testing.T) { 74 | testTableBaisc(t, 1024, 50000, false, true, false) 75 | }) 76 | t.Run("4K-50000", func(t *testing.T) { 77 | testTableBaisc(t, 4*1024, 50000, false, true, false) 78 | }) 79 | } 80 | 81 | func TestTable_Update(t *testing.T) { 82 | t.Run("16B-1000000", func(t *testing.T) { 83 | testTableBaisc(t, 16, 1000000, false, false, true) 84 | }) 85 | t.Run("20B-1000000", func(t *testing.T) { 86 | testTableBaisc(t, 16, 1000000, false, false, true) 87 | }) 88 | t.Run("1K-50000", func(t *testing.T) { 89 | testTableBaisc(t, 1024, 50000, false, false, true) 90 | }) 91 | t.Run("4K-50000", func(t *testing.T) { 92 | testTableBaisc(t, 4*1024, 50000, false, false, true) 93 | }) 94 | } 95 | 96 | func testTableBaisc(t *testing.T, valueLen uint32, count int, needGet, needDelete, needUpdate bool) { 97 | dir, err := os.MkdirTemp("", "diskhash-test") 98 | assert.Nil(t, err) 99 | 100 | options := DefaultOptions 101 | options.DirPath = dir 102 | options.SlotValueLength = valueLen 103 | table, err := Open(options) 104 | assert.Nil(t, err) 105 | defer destroyTable(table) 106 | 107 | value := []byte(strings.Repeat("D", int(valueLen))) 108 | for i := 0; i < count; i++ { 109 | key := GetTestKey(i) 110 | err = table.Put(key, value, func(slot Slot) (bool, error) { 111 | return false, nil 112 | }) 113 | assert.Nil(t, err) 114 | } 115 | 116 | getValue := func(target []byte) { 117 | for i := 0; i < count; i++ { 118 | key := GetTestKey(i) 119 | var res []byte 120 | matchKey := func(slot Slot) (bool, error) { 121 | if getKeyHash(key) == slot.Hash { 122 | res = make([]byte, len(slot.Value)) 123 | copy(res, slot.Value) 124 | return true, nil 125 | } 126 | return false, nil 127 | } 128 | err := table.Get(key, matchKey) 129 | assert.Equal(t, target, res) 130 | assert.Nil(t, err) 131 | } 132 | } 133 | 134 | if needGet { 135 | getValue(value) 136 | } 137 | 138 | if needDelete { 139 | assert.Equal(t, uint32(count), table.Size()) 140 | for i := 0; i < count; i++ { 141 | key := GetTestKey(i) 142 | matchKey := func(slot Slot) (bool, error) { 143 | if getKeyHash(key) == slot.Hash { 144 | return true, nil 145 | } 146 | return false, nil 147 | } 148 | err := table.Delete(key, matchKey) 149 | assert.Nil(t, err) 150 | } 151 | assert.Equal(t, uint32(0), table.Size()) 152 | getValue(nil) 153 | } 154 | 155 | if needUpdate { 156 | assert.Equal(t, uint32(count), table.Size()) 157 | newValue := []byte(strings.Repeat("H", int(valueLen))) 158 | for i := 0; i < count; i++ { 159 | key := GetTestKey(i) 160 | matchKey := func(slot Slot) (bool, error) { 161 | if getKeyHash(key) == slot.Hash { 162 | return true, nil 163 | } 164 | return false, nil 165 | } 166 | err := table.Put(key, newValue, matchKey) 167 | assert.Nil(t, err) 168 | } 169 | getValue(newValue) 170 | assert.Equal(t, uint32(count), table.Size()) 171 | } 172 | } 173 | 174 | func TestTableCRUD(t *testing.T) { 175 | dir, err := os.MkdirTemp("", "diskhash-test-crud") 176 | assert.Nil(t, err) 177 | 178 | options := DefaultOptions 179 | options.DirPath = dir 180 | options.SlotValueLength = 32 181 | table, err := Open(options) 182 | assert.Nil(t, err) 183 | defer destroyTable(table) 184 | 185 | for i := 0; i < 100; i++ { 186 | var cur []byte 187 | 188 | getFunc := func(slot Slot) (bool, error) { 189 | cur = slot.Value 190 | return false, nil 191 | } 192 | updateFunc := func(slot Slot) (bool, error) { 193 | return false, nil 194 | } 195 | 196 | key := GetTestKey(i) 197 | value := []byte(strings.Repeat("D", 32)) 198 | 199 | // put 200 | err = table.Put(key, value, updateFunc) 201 | assert.Nil(t, err) 202 | 203 | // get 204 | err = table.Get(key, getFunc) 205 | assert.Nil(t, err) 206 | assert.Equal(t, value, cur) 207 | 208 | // put different value 209 | value = []byte(strings.Repeat("A", 32)) 210 | err = table.Put(key, value, updateFunc) 211 | assert.Nil(t, err) 212 | 213 | // get after put different value 214 | err = table.Get(key, getFunc) 215 | assert.Nil(t, err) 216 | assert.Equal(t, value, cur) 217 | 218 | // delete 219 | err = table.Delete(key, updateFunc) 220 | assert.Nil(t, err) 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /table.go: -------------------------------------------------------------------------------- 1 | package diskhash 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "sync" 10 | 11 | "github.com/rosedblabs/diskhash/fs" 12 | "github.com/spaolacci/murmur3" 13 | ) 14 | 15 | const ( 16 | primaryFileName = "HASH.PRIMARY" 17 | overflowFileName = "HASH.OVERFLOW" 18 | metaFileName = "HASH.META" 19 | slotsPerBucket = 31 20 | nextOffLen = 8 21 | hashLen = 4 22 | ) 23 | 24 | // MatchKeyFunc is used to determine whether the key of the slot matches the stored key. 25 | // And you must supply the function to the Put/Get/Delete methods. 26 | // 27 | // Why we need this function? 28 | // 29 | // When we store the data in the hash table, we only store the hash value of the key, and the raw value. 30 | // So when we get the data from hash table, even if the hash value of the key matches, that doesn't mean 31 | // the key matches because of hash collision. 32 | // So we need to provide a function to determine whether the key of the slot matches the stored key. 33 | type MatchKeyFunc func(Slot) (bool, error) 34 | 35 | // Table is a hash table that stores data on disk. 36 | // It consists of two files, the primary file and the overflow file. 37 | // Each file is divided into multiple buckets, each bucket contains multiple slots. 38 | // 39 | // The overview design of the hash table is as the Linear Hashing algorithm. 40 | // See more: 41 | // https://en.wikipedia.org/wiki/Linear_hashing 42 | // https://dsf.berkeley.edu/jmh/cs186/f02/lecs/lec18_2up.pdf 43 | type Table struct { 44 | primaryFile fs.File 45 | overflowFile fs.File 46 | metaFile fs.File // meta file stores the metadata of the hash table 47 | meta *tableMeta 48 | mu *sync.RWMutex // protect the table when multiple goroutines access it 49 | options Options 50 | } 51 | 52 | // tableMeta is the metadata of the hash table. 53 | type tableMeta struct { 54 | Level uint8 55 | SplitBucketIndex uint32 56 | NumBuckets uint32 57 | NumKeys uint32 58 | SlotValueLength uint32 59 | BucketSize uint32 60 | FreeBuckets []int64 61 | } 62 | 63 | // Open opens a hash table. 64 | // If the hash table does not exist, it will be created automatically. 65 | // It will open the primary file, the overflow file and the meta file. 66 | func Open(options Options) (*Table, error) { 67 | if err := checkOptions(options); err != nil { 68 | return nil, err 69 | } 70 | 71 | t := &Table{ 72 | mu: new(sync.RWMutex), 73 | options: options, 74 | } 75 | 76 | // create data directory if not exist 77 | if _, err := os.Stat(options.DirPath); err != nil { 78 | if err := os.MkdirAll(options.DirPath, os.ModePerm); err != nil { 79 | return nil, err 80 | } 81 | } 82 | 83 | // open meta file and read metadata info 84 | if err := t.readMeta(); err != nil { 85 | return nil, err 86 | } 87 | 88 | // open and init primary file 89 | primaryFile, err := t.openFile(primaryFileName) 90 | if err != nil { 91 | return nil, err 92 | } 93 | // init first bucket if the primary file is empty 94 | if primaryFile.Size() == int64(t.meta.BucketSize) { 95 | if err := primaryFile.Truncate(int64(t.meta.BucketSize)); err != nil { 96 | _ = primaryFile.Close() 97 | return nil, err 98 | } 99 | } 100 | t.primaryFile = primaryFile 101 | 102 | // open overflow file 103 | overflowFile, err := t.openFile(overflowFileName) 104 | if err != nil { 105 | return nil, err 106 | } 107 | t.overflowFile = overflowFile 108 | 109 | return t, nil 110 | } 111 | 112 | func checkOptions(options Options) error { 113 | if options.DirPath == "" { 114 | return errors.New("dir path cannot be empty") 115 | } 116 | if options.SlotValueLength <= 0 { 117 | return errors.New("slot value length must be greater than 0") 118 | } 119 | if options.LoadFactor < 0 || options.LoadFactor > 1 { 120 | return errors.New("load factor must be between 0 and 1") 121 | } 122 | return nil 123 | } 124 | 125 | // read the metadata info from the meta file. 126 | // if the file is empty, init the metadata info. 127 | func (t *Table) readMeta() error { 128 | file, err := fs.Open(filepath.Join(t.options.DirPath, metaFileName), fs.OSFileSystem) 129 | if err != nil { 130 | return err 131 | } 132 | t.metaFile = file 133 | t.meta = &tableMeta{} 134 | 135 | // init meta file if not exist 136 | if file.Size() == 0 { 137 | t.meta.NumBuckets = 1 138 | t.meta.SlotValueLength = t.options.SlotValueLength 139 | t.meta.BucketSize = slotsPerBucket*(hashLen+t.meta.SlotValueLength) + nextOffLen 140 | } else { 141 | decoder := json.NewDecoder(t.metaFile) 142 | if err := decoder.Decode(t.meta); err != nil { 143 | return err 144 | } 145 | // we require that the slot value length must be equal to the length specified in the options, 146 | // once the slot value length is set, it cannot be changed. 147 | if t.meta.SlotValueLength != t.options.SlotValueLength { 148 | return errors.New("slot value length mismatch") 149 | } 150 | } 151 | 152 | return nil 153 | } 154 | 155 | // write the metadata info to the meta file in json format. 156 | func (t *Table) writeMeta() error { 157 | encoder := json.NewEncoder(t.metaFile) 158 | return encoder.Encode(t.meta) 159 | } 160 | 161 | // Close closes the files of the hash table. 162 | func (t *Table) Close() error { 163 | t.mu.Lock() 164 | defer t.mu.Unlock() 165 | 166 | if err := t.writeMeta(); err != nil { 167 | return err 168 | } 169 | 170 | _ = t.primaryFile.Close() 171 | _ = t.overflowFile.Close() 172 | _ = t.metaFile.Close() 173 | return nil 174 | } 175 | 176 | // Sync flushes the data of the hash table to disk. 177 | func (t *Table) Sync() error { 178 | t.mu.Lock() 179 | defer t.mu.Unlock() 180 | 181 | if err := t.primaryFile.Sync(); err != nil { 182 | return err 183 | } 184 | 185 | if err := t.overflowFile.Sync(); err != nil { 186 | return err 187 | } 188 | 189 | return nil 190 | } 191 | 192 | // Put puts a new ke/value pair to the hash table. 193 | // the parameter matchKey is described in the MatchKeyFunc. 194 | func (t *Table) Put(key, value []byte, matchKey MatchKeyFunc) error { 195 | t.mu.Lock() 196 | defer t.mu.Unlock() 197 | 198 | // the value length must be equal to the length specified in the options 199 | if len(value) != int(t.meta.SlotValueLength) { 200 | return errors.New("value length must be equal to the length specified in the options") 201 | } 202 | 203 | // get the slot writer to write the new slot, 204 | // it will get the corresponding bucket according to the key hash, 205 | // and find an empty slot to insert. 206 | // If there are no empty slots, an overflow bucket will be created. 207 | keyHash := getKeyHash(key) 208 | slot := &Slot{Hash: keyHash, Value: value} 209 | sw, err := t.getSlotWriter(slot.Hash, matchKey) 210 | if err != nil { 211 | return err 212 | } 213 | 214 | // write the new slot to the bucket 215 | if err = sw.insertSlot(*slot, t); err != nil { 216 | return err 217 | } 218 | if err := sw.writeSlots(); err != nil { 219 | return err 220 | } 221 | // if the slot already exists, no need to update meta 222 | // because the number of keys has not changed 223 | if sw.overwrite { 224 | return nil 225 | } 226 | 227 | t.meta.NumKeys++ 228 | // split if the load factor is exceeded 229 | keyRatio := float64(t.meta.NumKeys) / float64(t.meta.NumBuckets*slotsPerBucket) 230 | if keyRatio > t.options.LoadFactor { 231 | if err := t.split(); err != nil { 232 | return err 233 | } 234 | } 235 | return nil 236 | } 237 | 238 | // find a free slot position to insert the new slot. 239 | // return the slot writer. 240 | func (t *Table) getSlotWriter(keyHash uint32, matchKey MatchKeyFunc) (*slotWriter, error) { 241 | sw := &slotWriter{} 242 | bi := t.newBucketIterator(t.getKeyBucket(keyHash)) 243 | // iterate all slots in the bucket and the overflow buckets, 244 | // find the slot to insert. 245 | for { 246 | b, err := bi.next() 247 | if err == io.EOF { 248 | return nil, errors.New("failed to put new slot") 249 | } 250 | if err != nil { 251 | return nil, err 252 | } 253 | 254 | sw.currentBucket = b 255 | // iterate all slots in current bucket, find the existing or empty slot 256 | for i, slot := range b.slots { 257 | // find an empty slot to insert 258 | if slot.Hash == 0 { 259 | sw.currentSlotIndex = i 260 | return sw, nil 261 | } 262 | // if the slot hash value is not equal to the key hash value, 263 | // which means the key will never be matched, so we can skip it. 264 | if slot.Hash != keyHash { 265 | continue 266 | } 267 | match, err := matchKey(slot) 268 | if err != nil { 269 | return nil, err 270 | } 271 | // key already exists, overwrite the value 272 | if match { 273 | sw.currentSlotIndex, sw.overwrite = i, true 274 | return sw, nil 275 | } 276 | } 277 | // no empty slot in the bucket and it's all overflow buckets, 278 | // create a new overflow bucket. 279 | if b.nextOffset == 0 { 280 | sw.currentSlotIndex = slotsPerBucket 281 | return sw, nil 282 | } 283 | } 284 | } 285 | 286 | // Get gets the value of the key from the hash table. 287 | // the parameter matchKey is described in the MatchKeyFunc. 288 | func (t *Table) Get(key []byte, matchKey MatchKeyFunc) error { 289 | t.mu.RLock() 290 | defer t.mu.RUnlock() 291 | 292 | // get the bucket according to the key hash 293 | keyHash := getKeyHash(key) 294 | startBucket := t.getKeyBucket(keyHash) 295 | bi := t.newBucketIterator(startBucket) 296 | // iterate all slots in the bucket and the overflow buckets, 297 | // find the slot to get. 298 | for { 299 | b, err := bi.next() 300 | if err == io.EOF { 301 | return nil 302 | } 303 | if err != nil { 304 | return err 305 | } 306 | for _, slot := range b.slots { 307 | // if the slot hash value is 0, which means the subsequent slots are all empty, 308 | // (why? when we write a new slot, we will iterate from the beginning of the bucket, find an empty slot to insert, 309 | // when we remove a slot, we will move the subsequent slots forward, so all non-empty slots will be continuous) 310 | // so we can skip the current bucket and move to the next bucket. 311 | if slot.Hash == 0 { 312 | break 313 | } 314 | // if the slot hash value is not equal to the key hash value, 315 | // which means the key will never be matched, so we can skip it. 316 | if slot.Hash != keyHash { 317 | continue 318 | } 319 | if match, err := matchKey(slot); match || err != nil { 320 | return err 321 | } 322 | } 323 | } 324 | } 325 | 326 | // Delete deletes the key from the hash table. 327 | // the parameter matchKey is described in the MatchKeyFunc. 328 | func (t *Table) Delete(key []byte, matchKey MatchKeyFunc) error { 329 | t.mu.Lock() 330 | defer t.mu.Unlock() 331 | 332 | // get the bucket according to the key hash 333 | keyHash := getKeyHash(key) 334 | bi := t.newBucketIterator(t.getKeyBucket(keyHash)) 335 | // iterate all slots in the bucket and the overflow buckets, 336 | // find the slot to delete. 337 | for { 338 | b, err := bi.next() 339 | if err == io.EOF { 340 | return nil 341 | } 342 | if err != nil { 343 | return err 344 | } 345 | 346 | // the following code is similar to the Get method 347 | for i, slot := range b.slots { 348 | if slot.Hash == 0 { 349 | break 350 | } 351 | if slot.Hash != keyHash { 352 | continue 353 | } 354 | match, err := matchKey(slot) 355 | if err != nil { 356 | return err 357 | } 358 | if !match { 359 | continue 360 | } 361 | // now we find the slot to delete, remove it from the bucket 362 | b.removeSlot(i) 363 | if err := b.write(); err != nil { 364 | return err 365 | } 366 | t.meta.NumKeys-- 367 | return nil 368 | } 369 | } 370 | } 371 | 372 | // Size returns the number of keys in the hash table. 373 | func (t *Table) Size() uint32 { 374 | t.mu.RLock() 375 | defer t.mu.RUnlock() 376 | return t.meta.NumKeys 377 | } 378 | 379 | // get the hash value according to the key 380 | func getKeyHash(key []byte) uint32 { 381 | return murmur3.Sum32(key) 382 | } 383 | 384 | // get the bucket according to the key 385 | func (t *Table) getKeyBucket(keyHash uint32) uint32 { 386 | bucketIndex := keyHash & ((1 << t.meta.Level) - 1) 387 | if bucketIndex < t.meta.SplitBucketIndex { 388 | return keyHash & ((1 << (t.meta.Level + 1)) - 1) 389 | } 390 | return bucketIndex 391 | } 392 | 393 | func (t *Table) openFile(name string) (fs.File, error) { 394 | file, err := fs.Open(filepath.Join(t.options.DirPath, name), fs.OSFileSystem) 395 | if err != nil { 396 | return nil, err 397 | } 398 | // init (dummy) file header 399 | // the first bucket size in the file is not used, so we just init it. 400 | if file.Size() == 0 { 401 | if err := file.Truncate(int64(t.meta.BucketSize)); err != nil { 402 | _ = file.Close() 403 | return nil, err 404 | } 405 | } 406 | return file, nil 407 | } 408 | 409 | // split the current bucket. 410 | // it will create a new bucket, and rewrite all slots in the current bucket and the overflow buckets. 411 | func (t *Table) split() error { 412 | // the bucket to be split 413 | splitBucketIndex := t.meta.SplitBucketIndex 414 | splitSlotWriter := &slotWriter{ 415 | currentBucket: &bucket{ 416 | file: t.primaryFile, 417 | offset: t.bucketOffset(splitBucketIndex), 418 | bucketSize: t.meta.BucketSize, 419 | }, 420 | } 421 | 422 | // create a new bucket 423 | newSlotWriter := &slotWriter{ 424 | currentBucket: &bucket{ 425 | file: t.primaryFile, 426 | offset: t.primaryFile.Size(), 427 | bucketSize: t.meta.BucketSize, 428 | }, 429 | } 430 | if err := t.primaryFile.Truncate(int64(t.meta.BucketSize)); err != nil { 431 | return err 432 | } 433 | 434 | // increase the split bucket index 435 | t.meta.SplitBucketIndex++ 436 | // if the split bucket index is equal to 1 << level, 437 | // reset the split bucket index to 0, and increase the level. 438 | if t.meta.SplitBucketIndex == 1< 0 { 478 | t.meta.FreeBuckets = append(t.meta.FreeBuckets, freeBuckets...) 479 | } 480 | 481 | // write all slots to the file 482 | if err := splitSlotWriter.writeSlots(); err != nil { 483 | return err 484 | } 485 | if err := newSlotWriter.writeSlots(); err != nil { 486 | return err 487 | } 488 | 489 | t.meta.NumBuckets++ 490 | return nil 491 | } 492 | 493 | // create overflow bucket. 494 | // If there are free buckets, it will reuse the free buckets. 495 | // Otherwise, it will create a new bucket in the overflow file. 496 | func (t *Table) createOverflowBucket() (*bucket, error) { 497 | var offset int64 498 | if len(t.meta.FreeBuckets) > 0 { 499 | offset = t.meta.FreeBuckets[0] 500 | t.meta.FreeBuckets = t.meta.FreeBuckets[1:] 501 | } else { 502 | offset = t.overflowFile.Size() 503 | err := t.overflowFile.Truncate(int64(t.meta.BucketSize)) 504 | if err != nil { 505 | return nil, err 506 | } 507 | } 508 | 509 | return &bucket{ 510 | file: t.overflowFile, 511 | offset: offset, 512 | bucketSize: t.meta.BucketSize, 513 | }, nil 514 | } 515 | --------------------------------------------------------------------------------