├── go.mod
├── fs
    ├── fs.go
    └── os_file.go
├── .gitignore
├── options.go
├── go.sum
├── .github
    └── workflows
    │   └── go.yml
├── examples
    └── main.go
├── benchmark
    └── bench_test.go
├── README.md
├── bucket.go
├── table_test.go
├── LICENSE
└── table.go


/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/rosedblabs/diskhash
 2 | 
 3 | go 1.19
 4 | 
 5 | require (
 6 | 	github.com/spaolacci/murmur3 v1.1.0
 7 | 	github.com/stretchr/testify v1.8.4
 8 | )
 9 | 
10 | require (
11 | 	github.com/davecgh/go-spew v1.1.1 // indirect
12 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
13 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
14 | )
15 | 


--------------------------------------------------------------------------------
/fs/fs.go:
--------------------------------------------------------------------------------
 1 | package fs
 2 | 
 3 | import "io"
 4 | 
 5 | type File interface {
 6 | 	io.Reader
 7 | 	io.ReaderAt
 8 | 	io.Writer
 9 | 	io.WriterAt
10 | 	io.Closer
11 | 	Truncate(int64) error
12 | 	Size() int64
13 | 	Sync() error
14 | }
15 | 
16 | type FileSystem = byte
17 | 
18 | const (
19 | 	OSFileSystem FileSystem = iota
20 | )
21 | 
22 | func Open(name string, fs FileSystem) (File, error) {
23 | 	switch fs {
24 | 	case OSFileSystem:
25 | 		return openOSFile(name)
26 | 	}
27 | 	return nil, nil
28 | }
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # If you prefer the allow list template instead of the deny list, see community template:
 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
 3 | #
 4 | # Binaries for programs and plugins
 5 | *.exe
 6 | *.exe~
 7 | *.dll
 8 | *.so
 9 | *.dylib
10 | 
11 | # Test binary, built with `go test -c`
12 | *.test
13 | 
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 | 
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 | 
20 | # Go workspace file
21 | go.work
22 | 


--------------------------------------------------------------------------------
/options.go:
--------------------------------------------------------------------------------
 1 | package diskhash
 2 | 
 3 | import "os"
 4 | 
 5 | // Options is used to create a new diskhash table.
 6 | type Options struct {
 7 | 	// DirPath is the directory path to store the hash table files.
 8 | 	DirPath string
 9 | 
10 | 	// SlotValueLength is the length of the value in each slot.
11 | 	// Your value lenght must be equal to the value length you set when creating the table.
12 | 	SlotValueLength uint32
13 | 
14 | 	// LoadFactor is the load factor of the hash table.
15 | 	// The load factor is the ratio of the number of elements in the hash table to the table size.
16 | 	// If the ratio is greater than the load factor, the hash table will be expanded automatically.
17 | 	// The default value is 0.7.
18 | 	LoadFactor float64
19 | }
20 | 
21 | // DefaultOptions is the default options.
22 | var DefaultOptions = Options{
23 | 	DirPath:         os.TempDir(),
24 | 	SlotValueLength: 0,
25 | 	LoadFactor:      0.7,
26 | }
27 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
 6 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
 7 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 8 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/fs/os_file.go:
--------------------------------------------------------------------------------
 1 | package fs
 2 | 
 3 | import (
 4 | 	"os"
 5 | )
 6 | 
 7 | type OSFile struct {
 8 | 	fd   *os.File
 9 | 	size int64
10 | }
11 | 
12 | func openOSFile(name string) (File, error) {
13 | 	fd, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0644)
14 | 	if err != nil {
15 | 		return nil, err
16 | 	}
17 | 
18 | 	// get the file size
19 | 	stat, err := fd.Stat()
20 | 	if err != nil {
21 | 		return nil, err
22 | 	}
23 | 	return &OSFile{fd: fd, size: stat.Size()}, nil
24 | }
25 | 
26 | func (of *OSFile) Read(p []byte) (n int, err error) {
27 | 	return of.fd.Read(p)
28 | }
29 | 
30 | func (of *OSFile) ReadAt(b []byte, off int64) (n int, err error) {
31 | 	return of.fd.ReadAt(b, off)
32 | }
33 | 
34 | func (of *OSFile) Write(p []byte) (n int, err error) {
35 | 	return of.fd.Write(p)
36 | }
37 | 
38 | func (of *OSFile) WriteAt(b []byte, off int64) (n int, err error) {
39 | 	return of.fd.WriteAt(b, off)
40 | }
41 | 
42 | func (of *OSFile) Truncate(size int64) error {
43 | 	err := of.fd.Truncate(of.size + size)
44 | 	if err != nil {
45 | 		return err
46 | 	}
47 | 	of.size += size
48 | 	return nil
49 | }
50 | 
51 | func (of *OSFile) Size() int64 {
52 | 	return of.size
53 | }
54 | 
55 | func (of *OSFile) Sync() error {
56 | 	return of.fd.Sync()
57 | }
58 | 
59 | func (of *OSFile) Close() error {
60 | 	return of.fd.Close()
61 | }
62 | 


--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 | 
11 |   ubuntu-test:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v2
15 | 
16 |       - name: Set up Go
17 |         uses: actions/setup-go@v2
18 |         with:
19 |           go-version: 1.19
20 | 
21 |       - name: Run Go Vet
22 |         run: |
23 |           go vet ./...
24 | 
25 |       - name: Run Go Fmt
26 |         run: |
27 |           files=$(go fmt ./...)
28 |           if [ -n "$files" ]; then
29 |             echo "Please run gofmt on these files ..."
30 |             echo "$files"
31 |             exit 1
32 |           fi
33 | 
34 |       - name: Build
35 |         run: go build -v
36 | 
37 |       - name: Run Unit Test
38 |         run: go test -count 1 -v ./...
39 |         
40 |   windows-test:
41 |     runs-on: windows-latest
42 |     steps:
43 |       - uses: actions/checkout@v2
44 | 
45 |       - name: Set up Go
46 |         uses: actions/setup-go@v2
47 |         with:
48 |           go-version: 1.19
49 | 
50 |       - name: Run Go Vet
51 |         run: |
52 |           go vet ./...
53 | 
54 |       - name: Build
55 |         run: go build -v
56 | 
57 |       - name: Run Unit Test
58 |         run: go test -count 1 -v ./...
59 | 


--------------------------------------------------------------------------------
/examples/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/rosedblabs/diskhash"
 6 | 	"strings"
 7 | )
 8 | 
 9 | func main() {
10 | 	// open the table, specify the slot value length,
11 | 	// remember that you can't change it once you set it, and all values must be the same length.
12 | 	options := diskhash.DefaultOptions
13 | 	options.DirPath = "/tmp/diskhash-test"
14 | 	options.SlotValueLength = 10
15 | 	table, err := diskhash.Open(options)
16 | 	if err != nil {
17 | 		panic(err)
18 | 	}
19 | 
20 | 	// don't forget to close the table!!!
21 | 	// some meta info will be saved when you close the table.
22 | 	defer func() {
23 | 		_ = table.Close()
24 | 	}()
25 | 
26 | 	// put a key-value pair into the table.
27 | 	// the MatchKey function will be called when the key is matched.
28 | 	// Why we need the MatchKey function?
29 | 	// When we store the data in the hash table, we only store the hash value of the key, and the raw value.
30 | 	// So when we get the data from hash table, even if the hash value of the key matches, that doesn't mean
31 | 	// the key matches because of hash collision.
32 | 	// So we need to provide a function to determine whether the key of the slot matches the stored key.
33 | 	err = table.Put([]byte("key1"), []byte(strings.Repeat("v", 10)), func(slot diskhash.Slot) (bool, error) {
34 | 		return true, nil
35 | 	})
36 | 	if err != nil {
37 | 		panic(err)
38 | 	}
39 | 
40 | 	err = table.Get([]byte("key1"), func(slot diskhash.Slot) (bool, error) {
41 | 		fmt.Println("val =", string(slot.Value))
42 | 		return true, nil
43 | 	})
44 | 	if err != nil {
45 | 		panic(err)
46 | 	}
47 | 
48 | 	err = table.Delete([]byte("key1"), func(slot diskhash.Slot) (bool, error) {
49 | 		return true, nil
50 | 	})
51 | 	if err != nil {
52 | 		panic(err)
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/benchmark/bench_test.go:
--------------------------------------------------------------------------------
 1 | package benchmark
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/spaolacci/murmur3"
 6 | 	"math/rand"
 7 | 	"os"
 8 | 	"strings"
 9 | 	"testing"
10 | 
11 | 	"github.com/rosedblabs/diskhash"
12 | 	"github.com/stretchr/testify/assert"
13 | )
14 | 
15 | func newTable() (*diskhash.Table, func()) {
16 | 	options := diskhash.DefaultOptions
17 | 	options.SlotValueLength = 20
18 | 	options.DirPath = "/tmp/diskhash-bench"
19 | 	table, err := diskhash.Open(options)
20 | 	if err != nil {
21 | 		panic(err)
22 | 	}
23 | 
24 | 	return table, func() {
25 | 		_ = table.Close()
26 | 		_ = os.RemoveAll(options.DirPath)
27 | 	}
28 | }
29 | 
30 | func BenchmarkPut(b *testing.B) {
31 | 	table, destroy := newTable()
32 | 	defer destroy()
33 | 
34 | 	b.ResetTimer()
35 | 	b.ReportAllocs()
36 | 
37 | 	value := []byte(strings.Repeat("d", 20))
38 | 	for i := 0; i < b.N; i++ {
39 | 		err := table.Put(GetTestKey(i), value, func(slot diskhash.Slot) (bool, error) {
40 | 			return false, nil
41 | 		})
42 | 		assert.Nil(b, err)
43 | 	}
44 | }
45 | 
46 | func BenchmarkGet(b *testing.B) {
47 | 	table, destroy := newTable()
48 | 	defer destroy()
49 | 
50 | 	value := []byte(strings.Repeat("d", 20))
51 | 	for i := 0; i < 100000; i++ {
52 | 		err := table.Put(GetTestKey(i), value, func(slot diskhash.Slot) (bool, error) {
53 | 			return false, nil
54 | 		})
55 | 		assert.Nil(b, err)
56 | 	}
57 | 
58 | 	b.ResetTimer()
59 | 	b.ReportAllocs()
60 | 
61 | 	for i := 0; i < b.N; i++ {
62 | 		key := GetTestKey(rand.Intn(100000))
63 | 		err := table.Get(key, func(slot diskhash.Slot) (bool, error) {
64 | 			hash := murmur3.Sum32(key)
65 | 			if hash == slot.Hash {
66 | 				return true, nil
67 | 			}
68 | 			return false, nil
69 | 		})
70 | 		assert.Nil(b, err)
71 | 	}
72 | }
73 | 
74 | func GetTestKey(i int) []byte {
75 | 	return []byte(fmt.Sprintf("diskhash-test-key-%09d", i))
76 | }
77 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # diskhash
  2 | on-disk hash table index(mainly for WAL).
  3 | 
  4 | ## When will you need it?
  5 | If you are using [WAL](https://github.com/rosedblabs/wal) to store your data,
  6 | 
  7 | > wal: https://github.com/rosedblabs/wal
  8 | 
  9 | you will get the positions to get the data from WAL, the common way to store the positions is to use an in-memory index(like rosedb).
 10 | 
 11 | But if you have a large amount of data, and it will take a lot of time to load the index into memory when you restart the system.
 12 | 
 13 | So, you can use diskhash to store the index on disk.
 14 | 
 15 | ## Can be used as a general hash table index(without wal)?
 16 | 
 17 | yes, you can use it as an on-disk hash table index, but the restriction is that the value must be fixed size.
 18 | you can set the value size when you create the index, and once you set the value size, you can't change it.
 19 | 
 20 | But don't set the value size too large(1KB), the disk size maybe increase dramatically because of the write amplification.
 21 | **it is suitable for storing some metadata of your system.**
 22 | 
 23 | ## Design Overview
 24 | The diskhash consists of two disk files: main and overflow.
 25 | The file format is as follows:
 26 | ```
 27 | File Format:
 28 | +---------------+---------------+---------------+---------------+-----+----------------+
 29 | |    (unused)   |    bucket0    |    bucket1    |    bucket2    | ... |     bucketN    |
 30 | +---------------+---------------+---------------+---------------+-----+----------------+
 31 | ```
 32 | 
 33 | A file is divided into multiple buckets, if the table reaches the load factor, a new bucket will be appended to the end of the file.
 34 | A bucket contains 31 slots, and an overflow offset which points to the overflow file buckets.
 35 | ```
 36 | Bucket Format:
 37 | +-------------+-------------+-------------+-------------+-----+--------------+-----------------+
 38 | |   slot0     |   slot1     |   slot2     |   slot3     | ... |    slotN     | overflow_offset |
 39 | +-------------+-------------+-------------+-------------+-----+--------------+-----------------+
 40 | ```
 41 | 
 42 | A slot contains a key hash value, and user-defined value.
 43 | ```
 44 | Slot Format:
 45 | +-----------------------+--------------------------------+
 46 | |      key_hash(4B)     |          value(N Bytes)        |
 47 | +-----------------------+--------------------------------+
 48 | ```
 49 | 
 50 | ## Getting Started
 51 | ```go
 52 | package main
 53 | 
 54 | import (
 55 | 	"fmt"
 56 | 	"github.com/rosedblabs/diskhash"
 57 | 	"strings"
 58 | )
 59 | 
 60 | func main() {
 61 | 	// open the table, specify the slot value length,
 62 | 	// remember that you can't change it once you set it, and all values must be the same length.
 63 | 	options := diskhash.DefaultOptions
 64 | 	options.DirPath = "/tmp/diskhash-test"
 65 | 	options.SlotValueLength = 10
 66 | 	table, err := diskhash.Open(options)
 67 | 	if err != nil {
 68 | 		panic(err)
 69 | 	}
 70 | 
 71 | 	// don't forget to close the table!!!
 72 | 	// some meta info will be saved when you close the table.
 73 | 	defer func() {
 74 | 		_ = table.Close()
 75 | 	}()
 76 | 
 77 | 	// put a key-value pair into the table.
 78 | 	// the MatchKey function will be called when the key is matched.
 79 | 	// When we store the data in the hash table, we only store the hash value of the key, and the raw value.
 80 | 	// So when we get the data from hash table, even if the hash value of the key matches, that doesn't mean
 81 | 	// the key matches because of hash collision.
 82 | 	// So we need to provide a function to determine whether the key of the slot matches the stored key.
 83 | 	err = table.Put([]byte("key1"), []byte(strings.Repeat("v", 10)), func(slot diskhash.Slot) (bool, error) {
 84 | 		return true, nil
 85 | 	})
 86 | 	if err != nil {
 87 | 		panic(err)
 88 | 	}
 89 | 
 90 | 	err = table.Get([]byte("key1"), func(slot diskhash.Slot) (bool, error) {
 91 | 		fmt.Println("val =", string(slot.Value))
 92 | 		return true, nil
 93 | 	})
 94 | 	if err != nil {
 95 | 		panic(err)
 96 | 	}
 97 | 
 98 | 	err = table.Delete([]byte("key1"), func(slot diskhash.Slot) (bool, error) {
 99 | 		return true, nil
100 | 	})
101 | 	if err != nil {
102 | 		panic(err)
103 | 	}
104 | }
105 | ```
106 | 


--------------------------------------------------------------------------------
/bucket.go:
--------------------------------------------------------------------------------
  1 | package diskhash
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"io"
  6 | 
  7 | 	"github.com/rosedblabs/diskhash/fs"
  8 | )
  9 | 
 10 | // bucket is the basic unit of a file in diskhash.
 11 | // each file contains 31 slots at most.
 12 | type bucket struct {
 13 | 	slots      [slotsPerBucket]Slot // 31 slots now
 14 | 	offset     int64                // the offset of the bucket in the file
 15 | 	nextOffset int64                // the offset of the next overflow bucket
 16 | 	file       fs.File              // the file that contains the bucket
 17 | 	bucketSize uint32
 18 | }
 19 | 
 20 | // bucketIterator is used to iterate all buckets in hash table.
 21 | type bucketIterator struct {
 22 | 	currentFile  fs.File
 23 | 	overflowFile fs.File
 24 | 	offset       int64
 25 | 
 26 | 	slotValueLen uint32
 27 | 	bucketSize   uint32
 28 | }
 29 | 
 30 | // Slot is the basic unit of a bucket.
 31 | // each slot contains a key hash and a value.
 32 | type Slot struct {
 33 | 	Hash  uint32 // the hash of the key
 34 | 	Value []byte // raw value
 35 | }
 36 | 
 37 | type slotWriter struct {
 38 | 	currentBucket    *bucket
 39 | 	currentSlotIndex int
 40 | 	prevBuckets      []*bucket
 41 | 	overwrite        bool
 42 | }
 43 | 
 44 | func (t *Table) bucketOffset(bucketIndex uint32) int64 {
 45 | 	return int64((bucketIndex + 1) * t.meta.BucketSize)
 46 | }
 47 | 
 48 | func (t *Table) newBucketIterator(startBucket uint32) *bucketIterator {
 49 | 	return &bucketIterator{
 50 | 		currentFile:  t.primaryFile,
 51 | 		overflowFile: t.overflowFile,
 52 | 		offset:       t.bucketOffset(startBucket),
 53 | 		slotValueLen: t.options.SlotValueLength,
 54 | 		bucketSize:   t.meta.BucketSize,
 55 | 	}
 56 | }
 57 | 
 58 | func (bi *bucketIterator) next() (*bucket, error) {
 59 | 	// we skip the first bucket size in both primary and overflow file,
 60 | 	// so the bucket offset will never be 0.
 61 | 	if bi.offset == 0 {
 62 | 		return nil, io.EOF
 63 | 	}
 64 | 
 65 | 	// read the bucket and get all solts in it
 66 | 	bucket, err := bi.readBucket()
 67 | 	if err != nil {
 68 | 		return nil, err
 69 | 	}
 70 | 
 71 | 	// move to next overflow bucket
 72 | 	bi.offset = bucket.nextOffset
 73 | 	bi.currentFile = bi.overflowFile
 74 | 	return bucket, nil
 75 | }
 76 | 
 77 | // readBucket reads a bucket from the current file.
 78 | func (bi *bucketIterator) readBucket() (*bucket, error) {
 79 | 	// read an entire bucket with all slots
 80 | 	bucketBuf := make([]byte, bi.bucketSize)
 81 | 	if _, err := bi.currentFile.ReadAt(bucketBuf, bi.offset); err != nil {
 82 | 		return nil, err
 83 | 	}
 84 | 
 85 | 	b := &bucket{file: bi.currentFile, offset: bi.offset, bucketSize: bi.bucketSize}
 86 | 	// parse and get slots in the bucket
 87 | 	for i := 0; i < slotsPerBucket; i++ {
 88 | 		_ = bucketBuf[hashLen+bi.slotValueLen]
 89 | 		b.slots[i].Hash = binary.LittleEndian.Uint32(bucketBuf[:hashLen])
 90 | 		if b.slots[i].Hash != 0 {
 91 | 			b.slots[i].Value = bucketBuf[hashLen : hashLen+bi.slotValueLen]
 92 | 		}
 93 | 		bucketBuf = bucketBuf[hashLen+bi.slotValueLen:]
 94 | 	}
 95 | 
 96 | 	// the last 8 bytes is the offset of next overflow bucket
 97 | 	b.nextOffset = int64(binary.LittleEndian.Uint64(bucketBuf[:nextOffLen]))
 98 | 
 99 | 	return b, nil
100 | }
101 | 
102 | func (sw *slotWriter) insertSlot(sl Slot, t *Table) error {
103 | 	// if we exceed the slotsPerBucket, we need to create a new overflow bucket
104 | 	// and link it to the current bucket
105 | 	if sw.currentSlotIndex == slotsPerBucket {
106 | 		nextBucket, err := t.createOverflowBucket()
107 | 		if err != nil {
108 | 			return err
109 | 		}
110 | 		sw.currentBucket.nextOffset = nextBucket.offset
111 | 		sw.prevBuckets = append(sw.prevBuckets, sw.currentBucket)
112 | 		sw.currentBucket = nextBucket
113 | 		sw.currentSlotIndex = 0
114 | 	}
115 | 
116 | 	sw.currentBucket.slots[sw.currentSlotIndex] = sl
117 | 	sw.currentSlotIndex++
118 | 	return nil
119 | }
120 | 
121 | func (sw *slotWriter) writeSlots() error {
122 | 	for i := len(sw.prevBuckets) - 1; i >= 0; i-- {
123 | 		if err := sw.prevBuckets[i].write(); err != nil {
124 | 			return err
125 | 		}
126 | 	}
127 | 	return sw.currentBucket.write()
128 | }
129 | 
130 | // write all slots in the bucket to the file.
131 | func (b *bucket) write() error {
132 | 	buf := make([]byte, b.bucketSize)
133 | 	// write all slots to the buffer
134 | 	var index = 0
135 | 	for i := 0; i < slotsPerBucket; i++ {
136 | 		slot := b.slots[i]
137 | 
138 | 		binary.LittleEndian.PutUint32(buf[index:index+hashLen], slot.Hash)
139 | 		copy(buf[index+hashLen:index+hashLen+len(slot.Value)], slot.Value)
140 | 
141 | 		index += hashLen + len(slot.Value)
142 | 	}
143 | 
144 | 	// write the offset of next overflow bucket
145 | 	binary.LittleEndian.PutUint64(buf[len(buf)-nextOffLen:], uint64(b.nextOffset))
146 | 
147 | 	_, err := b.file.WriteAt(buf, b.offset)
148 | 	return err
149 | }
150 | 
151 | // remove a slot from the bucket, and move all slots after it forward
152 | // to fill the empty slot.
153 | func (b *bucket) removeSlot(slotIndex int) {
154 | 	i := slotIndex
155 | 	for ; i < slotsPerBucket-1; i++ {
156 | 		b.slots[i] = b.slots[i+1]
157 | 	}
158 | 	b.slots[i] = Slot{}
159 | }
160 | 


--------------------------------------------------------------------------------
/table_test.go:
--------------------------------------------------------------------------------
  1 | package diskhash
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"strings"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | func destroyTable(t *Table) {
 13 | 	_ = t.Close()
 14 | 	_ = os.RemoveAll(t.options.DirPath)
 15 | }
 16 | 
 17 | func GetTestKey(i int) []byte {
 18 | 	return []byte(fmt.Sprintf("diskhash-test-key-%09d", i))
 19 | }
 20 | 
 21 | func TestOpen(t *testing.T) {
 22 | 	dir, err := os.MkdirTemp("", "diskhash-test-open")
 23 | 	assert.Nil(t, err)
 24 | 
 25 | 	options := DefaultOptions
 26 | 	options.DirPath = dir
 27 | 	options.SlotValueLength = 10
 28 | 	table, err := Open(options)
 29 | 	assert.Nil(t, err)
 30 | 	defer destroyTable(table)
 31 | 
 32 | 	err = table.Close()
 33 | 	assert.Nil(t, err)
 34 | }
 35 | 
 36 | func TestTable_Put(t *testing.T) {
 37 | 	t.Run("16B-1000000", func(t *testing.T) {
 38 | 		testTableBaisc(t, 16, 1000000, false, false, false)
 39 | 	})
 40 | 	t.Run("20B-100000", func(t *testing.T) {
 41 | 		testTableBaisc(t, 16, 1000000, false, false, false)
 42 | 	})
 43 | 	t.Run("1K-50000", func(t *testing.T) {
 44 | 		testTableBaisc(t, 1024, 50000, false, false, false)
 45 | 	})
 46 | 	t.Run("4K-50000", func(t *testing.T) {
 47 | 		testTableBaisc(t, 4*1024, 50000, false, false, false)
 48 | 	})
 49 | }
 50 | 
 51 | func TestTable_Get(t *testing.T) {
 52 | 	t.Run("16B-1000000", func(t *testing.T) {
 53 | 		testTableBaisc(t, 16, 1000000, true, false, false)
 54 | 	})
 55 | 	t.Run("20B-1000000", func(t *testing.T) {
 56 | 		testTableBaisc(t, 16, 1000000, true, false, false)
 57 | 	})
 58 | 	t.Run("1K-50000", func(t *testing.T) {
 59 | 		testTableBaisc(t, 1024, 50000, true, false, false)
 60 | 	})
 61 | 	t.Run("4K-50000", func(t *testing.T) {
 62 | 		testTableBaisc(t, 4*1024, 50000, true, false, false)
 63 | 	})
 64 | }
 65 | 
 66 | func TestTable_Delete(t *testing.T) {
 67 | 	t.Run("16B-1000000", func(t *testing.T) {
 68 | 		testTableBaisc(t, 16, 1000000, false, true, false)
 69 | 	})
 70 | 	t.Run("20B-1000000", func(t *testing.T) {
 71 | 		testTableBaisc(t, 16, 1000000, false, true, false)
 72 | 	})
 73 | 	t.Run("1K-50000", func(t *testing.T) {
 74 | 		testTableBaisc(t, 1024, 50000, false, true, false)
 75 | 	})
 76 | 	t.Run("4K-50000", func(t *testing.T) {
 77 | 		testTableBaisc(t, 4*1024, 50000, false, true, false)
 78 | 	})
 79 | }
 80 | 
 81 | func TestTable_Update(t *testing.T) {
 82 | 	t.Run("16B-1000000", func(t *testing.T) {
 83 | 		testTableBaisc(t, 16, 1000000, false, false, true)
 84 | 	})
 85 | 	t.Run("20B-1000000", func(t *testing.T) {
 86 | 		testTableBaisc(t, 16, 1000000, false, false, true)
 87 | 	})
 88 | 	t.Run("1K-50000", func(t *testing.T) {
 89 | 		testTableBaisc(t, 1024, 50000, false, false, true)
 90 | 	})
 91 | 	t.Run("4K-50000", func(t *testing.T) {
 92 | 		testTableBaisc(t, 4*1024, 50000, false, false, true)
 93 | 	})
 94 | }
 95 | 
 96 | func testTableBaisc(t *testing.T, valueLen uint32, count int, needGet, needDelete, needUpdate bool) {
 97 | 	dir, err := os.MkdirTemp("", "diskhash-test")
 98 | 	assert.Nil(t, err)
 99 | 
100 | 	options := DefaultOptions
101 | 	options.DirPath = dir
102 | 	options.SlotValueLength = valueLen
103 | 	table, err := Open(options)
104 | 	assert.Nil(t, err)
105 | 	defer destroyTable(table)
106 | 
107 | 	value := []byte(strings.Repeat("D", int(valueLen)))
108 | 	for i := 0; i < count; i++ {
109 | 		key := GetTestKey(i)
110 | 		err = table.Put(key, value, func(slot Slot) (bool, error) {
111 | 			return false, nil
112 | 		})
113 | 		assert.Nil(t, err)
114 | 	}
115 | 
116 | 	getValue := func(target []byte) {
117 | 		for i := 0; i < count; i++ {
118 | 			key := GetTestKey(i)
119 | 			var res []byte
120 | 			matchKey := func(slot Slot) (bool, error) {
121 | 				if getKeyHash(key) == slot.Hash {
122 | 					res = make([]byte, len(slot.Value))
123 | 					copy(res, slot.Value)
124 | 					return true, nil
125 | 				}
126 | 				return false, nil
127 | 			}
128 | 			err := table.Get(key, matchKey)
129 | 			assert.Equal(t, target, res)
130 | 			assert.Nil(t, err)
131 | 		}
132 | 	}
133 | 
134 | 	if needGet {
135 | 		getValue(value)
136 | 	}
137 | 
138 | 	if needDelete {
139 | 		assert.Equal(t, uint32(count), table.Size())
140 | 		for i := 0; i < count; i++ {
141 | 			key := GetTestKey(i)
142 | 			matchKey := func(slot Slot) (bool, error) {
143 | 				if getKeyHash(key) == slot.Hash {
144 | 					return true, nil
145 | 				}
146 | 				return false, nil
147 | 			}
148 | 			err := table.Delete(key, matchKey)
149 | 			assert.Nil(t, err)
150 | 		}
151 | 		assert.Equal(t, uint32(0), table.Size())
152 | 		getValue(nil)
153 | 	}
154 | 
155 | 	if needUpdate {
156 | 		assert.Equal(t, uint32(count), table.Size())
157 | 		newValue := []byte(strings.Repeat("H", int(valueLen)))
158 | 		for i := 0; i < count; i++ {
159 | 			key := GetTestKey(i)
160 | 			matchKey := func(slot Slot) (bool, error) {
161 | 				if getKeyHash(key) == slot.Hash {
162 | 					return true, nil
163 | 				}
164 | 				return false, nil
165 | 			}
166 | 			err := table.Put(key, newValue, matchKey)
167 | 			assert.Nil(t, err)
168 | 		}
169 | 		getValue(newValue)
170 | 		assert.Equal(t, uint32(count), table.Size())
171 | 	}
172 | }
173 | 
174 | func TestTableCRUD(t *testing.T) {
175 | 	dir, err := os.MkdirTemp("", "diskhash-test-crud")
176 | 	assert.Nil(t, err)
177 | 
178 | 	options := DefaultOptions
179 | 	options.DirPath = dir
180 | 	options.SlotValueLength = 32
181 | 	table, err := Open(options)
182 | 	assert.Nil(t, err)
183 | 	defer destroyTable(table)
184 | 
185 | 	for i := 0; i < 100; i++ {
186 | 		var cur []byte
187 | 
188 | 		getFunc := func(slot Slot) (bool, error) {
189 | 			cur = slot.Value
190 | 			return false, nil
191 | 		}
192 | 		updateFunc := func(slot Slot) (bool, error) {
193 | 			return false, nil
194 | 		}
195 | 
196 | 		key := GetTestKey(i)
197 | 		value := []byte(strings.Repeat("D", 32))
198 | 
199 | 		// put
200 | 		err = table.Put(key, value, updateFunc)
201 | 		assert.Nil(t, err)
202 | 
203 | 		// get
204 | 		err = table.Get(key, getFunc)
205 | 		assert.Nil(t, err)
206 | 		assert.Equal(t, value, cur)
207 | 
208 | 		// put different value
209 | 		value = []byte(strings.Repeat("A", 32))
210 | 		err = table.Put(key, value, updateFunc)
211 | 		assert.Nil(t, err)
212 | 
213 | 		// get after put different value
214 | 		err = table.Get(key, getFunc)
215 | 		assert.Nil(t, err)
216 | 		assert.Equal(t, value, cur)
217 | 
218 | 		// delete
219 | 		err = table.Delete(key, updateFunc)
220 | 		assert.Nil(t, err)
221 | 	}
222 | }
223 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/table.go:
--------------------------------------------------------------------------------
  1 | package diskhash
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"errors"
  6 | 	"io"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"sync"
 10 | 
 11 | 	"github.com/rosedblabs/diskhash/fs"
 12 | 	"github.com/spaolacci/murmur3"
 13 | )
 14 | 
 15 | const (
 16 | 	primaryFileName  = "HASH.PRIMARY"
 17 | 	overflowFileName = "HASH.OVERFLOW"
 18 | 	metaFileName     = "HASH.META"
 19 | 	slotsPerBucket   = 31
 20 | 	nextOffLen       = 8
 21 | 	hashLen          = 4
 22 | )
 23 | 
 24 | // MatchKeyFunc is used to determine whether the key of the slot matches the stored key.
 25 | // And you must supply the function to the Put/Get/Delete methods.
 26 | //
 27 | // Why we need this function?
 28 | //
 29 | // When we store the data in the hash table, we only store the hash value of the key, and the raw value.
 30 | // So when we get the data from hash table, even if the hash value of the key matches, that doesn't mean
 31 | // the key matches because of hash collision.
 32 | // So we need to provide a function to determine whether the key of the slot matches the stored key.
 33 | type MatchKeyFunc func(Slot) (bool, error)
 34 | 
 35 | // Table is a hash table that stores data on disk.
 36 | // It consists of two files, the primary file and the overflow file.
 37 | // Each file is divided into multiple buckets, each bucket contains multiple slots.
 38 | //
 39 | // The overview design of the hash table is as the Linear Hashing algorithm.
 40 | // See more:
 41 | // https://en.wikipedia.org/wiki/Linear_hashing
 42 | // https://dsf.berkeley.edu/jmh/cs186/f02/lecs/lec18_2up.pdf
 43 | type Table struct {
 44 | 	primaryFile  fs.File
 45 | 	overflowFile fs.File
 46 | 	metaFile     fs.File // meta file stores the metadata of the hash table
 47 | 	meta         *tableMeta
 48 | 	mu           *sync.RWMutex // protect the table when multiple goroutines access it
 49 | 	options      Options
 50 | }
 51 | 
 52 | // tableMeta is the metadata of the hash table.
 53 | type tableMeta struct {
 54 | 	Level            uint8
 55 | 	SplitBucketIndex uint32
 56 | 	NumBuckets       uint32
 57 | 	NumKeys          uint32
 58 | 	SlotValueLength  uint32
 59 | 	BucketSize       uint32
 60 | 	FreeBuckets      []int64
 61 | }
 62 | 
 63 | // Open opens a hash table.
 64 | // If the hash table does not exist, it will be created automatically.
 65 | // It will open the primary file, the overflow file and the meta file.
 66 | func Open(options Options) (*Table, error) {
 67 | 	if err := checkOptions(options); err != nil {
 68 | 		return nil, err
 69 | 	}
 70 | 
 71 | 	t := &Table{
 72 | 		mu:      new(sync.RWMutex),
 73 | 		options: options,
 74 | 	}
 75 | 
 76 | 	// create data directory if not exist
 77 | 	if _, err := os.Stat(options.DirPath); err != nil {
 78 | 		if err := os.MkdirAll(options.DirPath, os.ModePerm); err != nil {
 79 | 			return nil, err
 80 | 		}
 81 | 	}
 82 | 
 83 | 	// open meta file and read metadata info
 84 | 	if err := t.readMeta(); err != nil {
 85 | 		return nil, err
 86 | 	}
 87 | 
 88 | 	// open and init primary file
 89 | 	primaryFile, err := t.openFile(primaryFileName)
 90 | 	if err != nil {
 91 | 		return nil, err
 92 | 	}
 93 | 	// init first bucket if the primary file is empty
 94 | 	if primaryFile.Size() == int64(t.meta.BucketSize) {
 95 | 		if err := primaryFile.Truncate(int64(t.meta.BucketSize)); err != nil {
 96 | 			_ = primaryFile.Close()
 97 | 			return nil, err
 98 | 		}
 99 | 	}
100 | 	t.primaryFile = primaryFile
101 | 
102 | 	// open overflow file
103 | 	overflowFile, err := t.openFile(overflowFileName)
104 | 	if err != nil {
105 | 		return nil, err
106 | 	}
107 | 	t.overflowFile = overflowFile
108 | 
109 | 	return t, nil
110 | }
111 | 
112 | func checkOptions(options Options) error {
113 | 	if options.DirPath == "" {
114 | 		return errors.New("dir path cannot be empty")
115 | 	}
116 | 	if options.SlotValueLength <= 0 {
117 | 		return errors.New("slot value length must be greater than 0")
118 | 	}
119 | 	if options.LoadFactor < 0 || options.LoadFactor > 1 {
120 | 		return errors.New("load factor must be between 0 and 1")
121 | 	}
122 | 	return nil
123 | }
124 | 
125 | // read the metadata info from the meta file.
126 | // if the file is empty, init the metadata info.
127 | func (t *Table) readMeta() error {
128 | 	file, err := fs.Open(filepath.Join(t.options.DirPath, metaFileName), fs.OSFileSystem)
129 | 	if err != nil {
130 | 		return err
131 | 	}
132 | 	t.metaFile = file
133 | 	t.meta = &tableMeta{}
134 | 
135 | 	// init meta file if not exist
136 | 	if file.Size() == 0 {
137 | 		t.meta.NumBuckets = 1
138 | 		t.meta.SlotValueLength = t.options.SlotValueLength
139 | 		t.meta.BucketSize = slotsPerBucket*(hashLen+t.meta.SlotValueLength) + nextOffLen
140 | 	} else {
141 | 		decoder := json.NewDecoder(t.metaFile)
142 | 		if err := decoder.Decode(t.meta); err != nil {
143 | 			return err
144 | 		}
145 | 		// we require that the slot value length must be equal to the length specified in the options,
146 | 		// once the slot value length is set, it cannot be changed.
147 | 		if t.meta.SlotValueLength != t.options.SlotValueLength {
148 | 			return errors.New("slot value length mismatch")
149 | 		}
150 | 	}
151 | 
152 | 	return nil
153 | }
154 | 
155 | // write the metadata info to the meta file in json format.
156 | func (t *Table) writeMeta() error {
157 | 	encoder := json.NewEncoder(t.metaFile)
158 | 	return encoder.Encode(t.meta)
159 | }
160 | 
161 | // Close closes the files of the hash table.
162 | func (t *Table) Close() error {
163 | 	t.mu.Lock()
164 | 	defer t.mu.Unlock()
165 | 
166 | 	if err := t.writeMeta(); err != nil {
167 | 		return err
168 | 	}
169 | 
170 | 	_ = t.primaryFile.Close()
171 | 	_ = t.overflowFile.Close()
172 | 	_ = t.metaFile.Close()
173 | 	return nil
174 | }
175 | 
176 | // Sync flushes the data of the hash table to disk.
177 | func (t *Table) Sync() error {
178 | 	t.mu.Lock()
179 | 	defer t.mu.Unlock()
180 | 
181 | 	if err := t.primaryFile.Sync(); err != nil {
182 | 		return err
183 | 	}
184 | 
185 | 	if err := t.overflowFile.Sync(); err != nil {
186 | 		return err
187 | 	}
188 | 
189 | 	return nil
190 | }
191 | 
192 | // Put puts a new ke/value pair to the hash table.
193 | // the parameter matchKey is described in the MatchKeyFunc.
194 | func (t *Table) Put(key, value []byte, matchKey MatchKeyFunc) error {
195 | 	t.mu.Lock()
196 | 	defer t.mu.Unlock()
197 | 
198 | 	// the value length must be equal to the length specified in the options
199 | 	if len(value) != int(t.meta.SlotValueLength) {
200 | 		return errors.New("value length must be equal to the length specified in the options")
201 | 	}
202 | 
203 | 	// get the slot writer to write the new slot,
204 | 	// it will get the corresponding bucket according to the key hash,
205 | 	// and find an empty slot to insert.
206 | 	// If there are no empty slots, an overflow bucket will be created.
207 | 	keyHash := getKeyHash(key)
208 | 	slot := &Slot{Hash: keyHash, Value: value}
209 | 	sw, err := t.getSlotWriter(slot.Hash, matchKey)
210 | 	if err != nil {
211 | 		return err
212 | 	}
213 | 
214 | 	// write the new slot to the bucket
215 | 	if err = sw.insertSlot(*slot, t); err != nil {
216 | 		return err
217 | 	}
218 | 	if err := sw.writeSlots(); err != nil {
219 | 		return err
220 | 	}
221 | 	// if the slot already exists, no need to update meta
222 | 	// because the number of keys has not changed
223 | 	if sw.overwrite {
224 | 		return nil
225 | 	}
226 | 
227 | 	t.meta.NumKeys++
228 | 	// split if the load factor is exceeded
229 | 	keyRatio := float64(t.meta.NumKeys) / float64(t.meta.NumBuckets*slotsPerBucket)
230 | 	if keyRatio > t.options.LoadFactor {
231 | 		if err := t.split(); err != nil {
232 | 			return err
233 | 		}
234 | 	}
235 | 	return nil
236 | }
237 | 
238 | // find a free slot position to insert the new slot.
239 | // return the slot writer.
240 | func (t *Table) getSlotWriter(keyHash uint32, matchKey MatchKeyFunc) (*slotWriter, error) {
241 | 	sw := &slotWriter{}
242 | 	bi := t.newBucketIterator(t.getKeyBucket(keyHash))
243 | 	// iterate all slots in the bucket and the overflow buckets,
244 | 	// find the slot to insert.
245 | 	for {
246 | 		b, err := bi.next()
247 | 		if err == io.EOF {
248 | 			return nil, errors.New("failed to put new slot")
249 | 		}
250 | 		if err != nil {
251 | 			return nil, err
252 | 		}
253 | 
254 | 		sw.currentBucket = b
255 | 		// iterate all slots in current bucket, find the existing or empty slot
256 | 		for i, slot := range b.slots {
257 | 			// find an empty slot to insert
258 | 			if slot.Hash == 0 {
259 | 				sw.currentSlotIndex = i
260 | 				return sw, nil
261 | 			}
262 | 			// if the slot hash value is not equal to the key hash value,
263 | 			// which means the key will never be matched, so we can skip it.
264 | 			if slot.Hash != keyHash {
265 | 				continue
266 | 			}
267 | 			match, err := matchKey(slot)
268 | 			if err != nil {
269 | 				return nil, err
270 | 			}
271 | 			// key already exists, overwrite the value
272 | 			if match {
273 | 				sw.currentSlotIndex, sw.overwrite = i, true
274 | 				return sw, nil
275 | 			}
276 | 		}
277 | 		// no empty slot in the bucket and it's all overflow buckets,
278 | 		// create a new overflow bucket.
279 | 		if b.nextOffset == 0 {
280 | 			sw.currentSlotIndex = slotsPerBucket
281 | 			return sw, nil
282 | 		}
283 | 	}
284 | }
285 | 
286 | // Get gets the value of the key from the hash table.
287 | // the parameter matchKey is described in the MatchKeyFunc.
288 | func (t *Table) Get(key []byte, matchKey MatchKeyFunc) error {
289 | 	t.mu.RLock()
290 | 	defer t.mu.RUnlock()
291 | 
292 | 	// get the bucket according to the key hash
293 | 	keyHash := getKeyHash(key)
294 | 	startBucket := t.getKeyBucket(keyHash)
295 | 	bi := t.newBucketIterator(startBucket)
296 | 	// iterate all slots in the bucket and the overflow buckets,
297 | 	// find the slot to get.
298 | 	for {
299 | 		b, err := bi.next()
300 | 		if err == io.EOF {
301 | 			return nil
302 | 		}
303 | 		if err != nil {
304 | 			return err
305 | 		}
306 | 		for _, slot := range b.slots {
307 | 			// if the slot hash value is 0, which means the subsequent slots are all empty,
308 | 			// (why? when we write a new slot, we will iterate from the beginning of the bucket, find an empty slot to insert,
309 | 			// when we remove a slot, we will move the subsequent slots forward, so all non-empty slots will be continuous)
310 | 			// so we can skip the current bucket and move to the next bucket.
311 | 			if slot.Hash == 0 {
312 | 				break
313 | 			}
314 | 			// if the slot hash value is not equal to the key hash value,
315 | 			// which means the key will never be matched, so we can skip it.
316 | 			if slot.Hash != keyHash {
317 | 				continue
318 | 			}
319 | 			if match, err := matchKey(slot); match || err != nil {
320 | 				return err
321 | 			}
322 | 		}
323 | 	}
324 | }
325 | 
326 | // Delete deletes the key from the hash table.
327 | // the parameter matchKey is described in the MatchKeyFunc.
328 | func (t *Table) Delete(key []byte, matchKey MatchKeyFunc) error {
329 | 	t.mu.Lock()
330 | 	defer t.mu.Unlock()
331 | 
332 | 	// get the bucket according to the key hash
333 | 	keyHash := getKeyHash(key)
334 | 	bi := t.newBucketIterator(t.getKeyBucket(keyHash))
335 | 	// iterate all slots in the bucket and the overflow buckets,
336 | 	// find the slot to delete.
337 | 	for {
338 | 		b, err := bi.next()
339 | 		if err == io.EOF {
340 | 			return nil
341 | 		}
342 | 		if err != nil {
343 | 			return err
344 | 		}
345 | 
346 | 		// the following code is similar to the Get method
347 | 		for i, slot := range b.slots {
348 | 			if slot.Hash == 0 {
349 | 				break
350 | 			}
351 | 			if slot.Hash != keyHash {
352 | 				continue
353 | 			}
354 | 			match, err := matchKey(slot)
355 | 			if err != nil {
356 | 				return err
357 | 			}
358 | 			if !match {
359 | 				continue
360 | 			}
361 | 			// now we find the slot to delete, remove it from the bucket
362 | 			b.removeSlot(i)
363 | 			if err := b.write(); err != nil {
364 | 				return err
365 | 			}
366 | 			t.meta.NumKeys--
367 | 			return nil
368 | 		}
369 | 	}
370 | }
371 | 
372 | // Size returns the number of keys in the hash table.
373 | func (t *Table) Size() uint32 {
374 | 	t.mu.RLock()
375 | 	defer t.mu.RUnlock()
376 | 	return t.meta.NumKeys
377 | }
378 | 
379 | // get the hash value according to the key
380 | func getKeyHash(key []byte) uint32 {
381 | 	return murmur3.Sum32(key)
382 | }
383 | 
384 | // get the bucket according to the key
385 | func (t *Table) getKeyBucket(keyHash uint32) uint32 {
386 | 	bucketIndex := keyHash & ((1 << t.meta.Level) - 1)
387 | 	if bucketIndex < t.meta.SplitBucketIndex {
388 | 		return keyHash & ((1 << (t.meta.Level + 1)) - 1)
389 | 	}
390 | 	return bucketIndex
391 | }
392 | 
393 | func (t *Table) openFile(name string) (fs.File, error) {
394 | 	file, err := fs.Open(filepath.Join(t.options.DirPath, name), fs.OSFileSystem)
395 | 	if err != nil {
396 | 		return nil, err
397 | 	}
398 | 	// init (dummy) file header
399 | 	// the first bucket size in the file is not used, so we just init it.
400 | 	if file.Size() == 0 {
401 | 		if err := file.Truncate(int64(t.meta.BucketSize)); err != nil {
402 | 			_ = file.Close()
403 | 			return nil, err
404 | 		}
405 | 	}
406 | 	return file, nil
407 | }
408 | 
409 | // split the current bucket.
410 | // it will create a new bucket, and rewrite all slots in the current bucket and the overflow buckets.
411 | func (t *Table) split() error {
412 | 	// the bucket to be split
413 | 	splitBucketIndex := t.meta.SplitBucketIndex
414 | 	splitSlotWriter := &slotWriter{
415 | 		currentBucket: &bucket{
416 | 			file:       t.primaryFile,
417 | 			offset:     t.bucketOffset(splitBucketIndex),
418 | 			bucketSize: t.meta.BucketSize,
419 | 		},
420 | 	}
421 | 
422 | 	// create a new bucket
423 | 	newSlotWriter := &slotWriter{
424 | 		currentBucket: &bucket{
425 | 			file:       t.primaryFile,
426 | 			offset:     t.primaryFile.Size(),
427 | 			bucketSize: t.meta.BucketSize,
428 | 		},
429 | 	}
430 | 	if err := t.primaryFile.Truncate(int64(t.meta.BucketSize)); err != nil {
431 | 		return err
432 | 	}
433 | 
434 | 	// increase the split bucket index
435 | 	t.meta.SplitBucketIndex++
436 | 	// if the split bucket index is equal to 1 << level,
437 | 	// reset the split bucket index to 0, and increase the level.
438 | 	if t.meta.SplitBucketIndex == 1<<t.meta.Level {
439 | 		t.meta.Level++
440 | 		t.meta.SplitBucketIndex = 0
441 | 	}
442 | 
443 | 	// iterate all slots in the split bucket and the overflow buckets,
444 | 	// rewrite all slots.
445 | 	var freeBuckets []int64
446 | 	bi := t.newBucketIterator(splitBucketIndex)
447 | 	for {
448 | 		b, err := bi.next()
449 | 		if err == io.EOF {
450 | 			break
451 | 		}
452 | 		if err != nil {
453 | 			return err
454 | 		}
455 | 		for _, slot := range b.slots {
456 | 			if slot.Hash == 0 {
457 | 				break
458 | 			}
459 | 			var insertErr error
460 | 			if t.getKeyBucket(slot.Hash) == splitBucketIndex {
461 | 				insertErr = splitSlotWriter.insertSlot(slot, t)
462 | 			} else {
463 | 				insertErr = newSlotWriter.insertSlot(slot, t)
464 | 			}
465 | 			if insertErr != nil {
466 | 				return insertErr
467 | 			}
468 | 		}
469 | 		// if the splitBucket has overflow buckets, and these buckets are no longer used,
470 | 		// because all slots has been rewritten, so we can free these buckets.
471 | 		if b.nextOffset != 0 {
472 | 			freeBuckets = append(freeBuckets, b.nextOffset)
473 | 		}
474 | 	}
475 | 
476 | 	// collect the free buckets
477 | 	if len(freeBuckets) > 0 {
478 | 		t.meta.FreeBuckets = append(t.meta.FreeBuckets, freeBuckets...)
479 | 	}
480 | 
481 | 	// write all slots to the file
482 | 	if err := splitSlotWriter.writeSlots(); err != nil {
483 | 		return err
484 | 	}
485 | 	if err := newSlotWriter.writeSlots(); err != nil {
486 | 		return err
487 | 	}
488 | 
489 | 	t.meta.NumBuckets++
490 | 	return nil
491 | }
492 | 
493 | // create overflow bucket.
494 | // If there are free buckets, it will reuse the free buckets.
495 | // Otherwise, it will create a new bucket in the overflow file.
496 | func (t *Table) createOverflowBucket() (*bucket, error) {
497 | 	var offset int64
498 | 	if len(t.meta.FreeBuckets) > 0 {
499 | 		offset = t.meta.FreeBuckets[0]
500 | 		t.meta.FreeBuckets = t.meta.FreeBuckets[1:]
501 | 	} else {
502 | 		offset = t.overflowFile.Size()
503 | 		err := t.overflowFile.Truncate(int64(t.meta.BucketSize))
504 | 		if err != nil {
505 | 			return nil, err
506 | 		}
507 | 	}
508 | 
509 | 	return &bucket{
510 | 		file:       t.overflowFile,
511 | 		offset:     offset,
512 | 		bucketSize: t.meta.BucketSize,
513 | 	}, nil
514 | }
515 | 


--------------------------------------------------------------------------------