├── .github
    └── workflows
    │   ├── autodep.yml
    │   ├── check.yml
    │   ├── gotip.yml
    │   ├── test.yml
    │   └── version.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── cmd
    ├── gonudbadmin
    │   ├── info.go
    │   ├── main.go
    │   └── verify.go
    └── gonudbsample
    │   └── gonudbsample.go
├── go.mod
├── go.sum
├── internal
    ├── bucket.go
    ├── bucket_test.go
    ├── bucketcache.go
    ├── bucketcache_test.go
    ├── cache.go
    ├── const32.go
    ├── const64.go
    ├── context.go
    ├── error.go
    ├── field.go
    ├── file.go
    ├── file_test.go
    ├── format.go
    ├── hasher.go
    ├── pool.go
    ├── store.go
    ├── syscall.go
    ├── syscallunix.go
    ├── verify.go
    ├── version.go
    └── version_test.go
└── store.go


/.github/workflows/autodep.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 | name: Update package dependencies
 4 | jobs:
 5 |   dep_update:
 6 |     runs-on: ubuntu-latest
 7 |     timeout-minutes: 10
 8 | 
 9 |     steps:
10 |       - name: Install Go
11 |         uses: actions/setup-go@v3
12 |         with:
13 |           go-version: 1.20.x
14 | 
15 |       - name: Checkout
16 |         uses: actions/checkout@v3
17 | 
18 |       - name: Update minor and patch-level dependencies
19 |         run: go get -t -u ./...
20 | 
21 |       - name: Tidy
22 |         run: go mod tidy
23 | 
24 |       - name: Create pull request
25 |         uses: peter-evans/create-pull-request@v4
26 |         with:
27 |           title: "Update package dependencies + tidy"
28 |           body: |
29 |             This is a change initiated automatically on a weekly basis by a
30 |             GitHub Action that updates the projects dependencies to their latest
31 |             minor and patch-level versions. This lets us stay up to date
32 |             incrementally so that updates are less effort to get merged compared
33 |             to large monolithic updates, and gets us security updates more
34 |             expediently.
35 | 
36 |             If the build passes, you are probably A-OK to merge and deploy this.
37 |             If not, try to dig into what's not working and see if you can fix it
38 |             so that the dep train stays on its rails.
39 | 
40 |             Note that although minor/patch level changes are handled
41 |             automatically, notably major version changes like you'd find in
42 |             stripe-go are not and those upgrades need to be performed manually.
43 |             That should theoretically not be a problem if fixes are backported
44 |             to all previous majors, but in practice they are often not, so it's
45 |             worthwhile to occasionally look for new majors and integrate them.
46 |           branch: "autodep"
47 |           branch-suffix: "timestamp"
48 |           commit-message: |
49 |             Update package dependencies + tidy
50 | 
51 |             Weekly update to the project's package dependencies initiated by an
52 |             automatic GitHub Action running on cron. Keeps upgrades less of a
53 |             monolithic task and lets security-related patches trickle in more
54 |             quickly.
55 |           author: "Bot <bot@iandavis.com>"
56 |           committer: "Bot <bot@iandavis.com>"
57 |           delete-branch: true
58 |           draft: true
59 |           reviewers: |
60 |             iand
61 |           assignees: |
62 |             iand
63 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   pull_request:
 4 |     types: [opened, reopened, ready_for_review, synchronize]
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |       - master
 9 | name: Run checks
10 | jobs:
11 |   check:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Install Go
15 |         uses: actions/setup-go@v3
16 |         with:
17 |           go-version: 1.20.x
18 |       - name: Get StaticCheck
19 |         run: go install honnef.co/go/tools/cmd/staticcheck@v0.4.2 # Version 2023.1.2 (v0.4.2)
20 |       - name: Checkout
21 |         uses: actions/checkout@v3
22 |         with:
23 |           submodules: recursive
24 |       - name: Gomod
25 |         run: |
26 |           go mod tidy
27 |           if [[ -n $(git ls-files --other --exclude-standard --directory -- go.sum) ]]; then
28 |             echo "go.sum was added by go mod tidy"
29 |             exit 1
30 |           fi
31 |           git diff --exit-code -- go.sum go.mod
32 |       - name: Gofmt
33 |         if: ${{ success() || failure() }} # run this step even if the previous one failed
34 |         run: |
35 |           out=$(gofmt -s -l .)
36 |           if [[ -n "$out" ]]; then
37 |             echo $out | awk '{print "::error file=" $0 ",line=0,col=0::File is not gofmt-ed."}'
38 |             exit 1
39 |           fi
40 |       - name: Vet
41 |         if: ${{ success() || failure() }} # run this step even if the previous one failed
42 |         run: go vet ./...
43 |       - name: StaticCheck
44 |         if: ${{ success() || failure() }} # run this step even if the previous one failed
45 |         run: staticcheck ./...
46 | 


--------------------------------------------------------------------------------
/.github/workflows/gotip.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   pull_request:
 4 |     types: [opened, reopened, ready_for_review, synchronize]
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |       - master
 9 | name: Test Go tip
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Install Go
15 |         uses: actions/setup-go@v3
16 |         with:
17 |           go-version: 1.20.x
18 |       - name: Install Go tip
19 |         run: |
20 |           go install golang.org/dl/gotip@latest
21 |           gotip download
22 |           gotip version
23 |       - name: Checkout
24 |         uses: actions/checkout@v3
25 |         with:
26 |           submodules: recursive
27 |       - id: Cache
28 |         uses: actions/cache@v3
29 |         with:
30 |           path: |
31 |             ~/go/pkg/mod              # Module download cache
32 |             ~/.cache/go-build         # Build cache (Linux)
33 |           key: ubuntu-go-${{ hashFiles('**/go.sum') }}
34 |           restore-keys: |
35 |             ubuntu-go-
36 |       - name: Dependencies
37 |         run: gotip mod download
38 |         if: steps.cache.outputs.cache-hit != 'true'
39 |       - name: Test
40 |         run: gotip test ./...
41 |       - name: Test 32 bit
42 |         env:
43 |           GOARCH: 386
44 |         run: gotip test ./...
45 |       - name: Test with race detector
46 |         run: gotip test -race ./...
47 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   pull_request:
 4 |     types: [opened, reopened, ready_for_review, synchronize]
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |       - master
 9 | name: Run tests
10 | jobs:
11 |   test:
12 |     strategy:
13 |       matrix:
14 |         go-version: [1.19.x, 1.20.x]
15 |         os: ["ubuntu", "windows", "macos"]
16 |     runs-on: ${{ matrix.os }}-latest
17 |     steps:
18 |       - name: Install Go
19 |         uses: actions/setup-go@v3
20 |         with:
21 |           go-version: ${{ matrix.go-version }}
22 |       - name: Checkout
23 |         uses: actions/checkout@v3
24 |         with:
25 |           submodules: recursive
26 |       - id: Cache
27 |         uses: actions/cache@v3
28 |         with:
29 |           path: |
30 |             ~/go/pkg/mod              # Module download cache
31 |             ~/.cache/go-build         # Build cache (Linux)
32 |             ~/Library/Caches/go-build # Build cache (Mac)
33 |             '%LocalAppData%\go-build' # Build cache (Windows)
34 |           key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
35 |           restore-keys: |
36 |             ${{ runner.os }}-go-
37 |       - name: Dependencies
38 |         run: go mod download
39 |         if: steps.cache.outputs.cache-hit != 'true'
40 |       - name: Test
41 |         run: go test ./...
42 |       - name: Test 32 bit
43 |         if: ${{ matrix.os != 'macos' }} # can't run 32 bit tests on OSX.
44 |         env:
45 |           GOARCH: 386
46 |         run: go test ./...
47 |       - name: Test with race detector
48 |         if: ${{ matrix.os == 'ubuntu' }} # speed things up. Windows and OSX VMs are slow
49 |         run: go test -race ./...
50 | 


--------------------------------------------------------------------------------
/.github/workflows/version.yml:
--------------------------------------------------------------------------------
 1 | name: Bump version
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       - name: Bump version and push tag
14 |         id: tag_version
15 |         uses: mathieudutour/github-tag-action@v6.1
16 |         with:
17 |           github_token: ${{ secrets.GITHUB_TOKEN }}
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | gonudbadmin
2 | !gonudbadmin/
3 | gonudbsample
4 | !gonudbsample/
5 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 6 | 
 7 | ## [Unreleased]
 8 | 
 9 | These are changes that will probably be included in the next release.
10 | 
11 | ### Added
12 |  
13 | ### Fixed
14 | 
15 | ### Changed
16 |  
17 | ### Removed
18 | 
19 | ## [v0.2.1] - 2020-11-23
20 | 
21 | ### Added
22 |  * Add Exists and DataSize methods to Store
23 | 
24 | ## [v0.2.0] - 2020-11-23
25 | 
26 | ### Added
27 | 
28 |  * Support variable length keys
29 |  * Compute number of records when store is opened
30 | 
31 | ### Changed
32 | 
33 |  * Simplify method signatures of Fetch and FetchReader (breaking change)
34 | 
35 | ## [v0.1.1] - 2020-11-18
36 | 
37 | ### Fixed
38 | 
39 |  * Reduce scope of insert locking to unblock reads when inserts are throttled 
40 |  * Fix bucket scanner not detecting EOF
41 | 
42 | ## [v0.1.0] - 2020-11-18
43 | 
44 | Initial release
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Gonudb 
  2 | 
  3 | Gonudb is an append-only key/value datastore written in Go.
  4 | 
  5 | [![Check Status](https://github.com/iand/gonudb/actions/workflows/check.yml/badge.svg)](https://github.com/iand/gonudb/actions/workflows/check.yml)
  6 | [![Test Status](https://github.com/iand/gonudb/actions/workflows/test.yml/badge.svg)](https://github.com/iand/gonudb/actions/workflows/test.yml)
  7 | [![Go Report Card](https://goreportcard.com/badge/github.com/iand/gonudb)](https://goreportcard.com/report/github.com/iand/gonudb)
  8 | [![go.dev reference](https://img.shields.io/badge/go.dev-reference-007d9c?logo=go&logoColor=white)](https://pkg.go.dev/github.com/iand/gonudb)
  9 | 
 10 | ## Overview
 11 | 
 12 | Gonudb is a port of [NuDB](https://github.com/CPPAlliance/NuDB), a C++ key/value store.
 13 | 
 14 | A Gonudb datastore comprises a data file holding keys and values stored sequentially and an
 15 | accompanying key file which forms an on-disk hash table indexing the values stored in the data
 16 | file.
 17 | 
 18 | During commits a log file is created to store bookkeeping information that may be used to repair the
 19 | datastore in the event of a failure.
 20 | 
 21 | The data file and key file are independent and a new key file may be rebuilt from the data file if
 22 | necessary, potentially with an alternate hashing scheme.
 23 | 
 24 | 
 25 | ## Installation
 26 | 
 27 | Execute `go get github.com/iand/gonudb` within a Go module directory to add it to your module.
 28 | 
 29 | ## Usage
 30 | 
 31 | Gonudb is primarily a library. Import package `github.com/iand/gonudb` to use. A sample application
 32 | that demonstrates some simple inserts and fetches is provided in `cmd/gonudbsample`.
 33 | 
 34 | An admin tool can be found in `cmd/gonudbadmin` which provides some commands for inspecting and
 35 | validating the files that comprise a store.
 36 | 
 37 | Install by executing `go install github.com/iand/gonudb/cmd/gonudbadmin` from the root of the 
 38 | repository.
 39 | 
 40 |  - `gonudbadmin info` can be used to view charactistic information about any of the three files used by gonudb (data, key and log files).
 41 |  - `gonudbadmin verify` verifies the consistency of data and key files and shows some statistics on the data they hold.
 42 | 
 43 | 
 44 | ## Design
 45 | 
 46 | Gonudb shares the design ideals that motivated NuDB (but see Status below):
 47 | 
 48 |  1. Writes should not block reads.
 49 |  2. Reads should be limited only by the SSD's IOPS limit.
 50 |  3. A read for a non-present key should require one IOP.
 51 |  4. A read for a present key whose data can be read in a single IOP should only require two IOPs, one to figure out where it is and one to read it in.
 52 | 
 53 | Keys and values are stored sequentially in an append only data file. The data file begins with a
 54 | header that contains characteristic information about the file such as the version of the encoding
 55 | scheme, a datastore identifier and an application identifier. Data records follow immediately on
 56 | from the header. Each record comprises the size of the value, followed by the size of the key,
 57 | followed by the key, followed by the value data. The data file is considered to be immutable and
 58 | there are no delete or mutate operations.
 59 | 
 60 | Inserts are buffered in memory and periodically committed to disk. Clients are throttled based on
 61 | the rate at which data is flushed to disk. Values are immediately discoverable via their key and
 62 | may be read from memory or disk.
 63 | 
 64 | Keys are hashed and written to buckets stored in the key file. As with the data file, the key file
 65 | begins with a header containing characteristic information. The key file's version, datastore
 66 | identifier and application identifier must match those in the data file header. Additionally the
 67 | key file header contains the hash salt, the block size of each bucket and the target load factor
 68 | which determines when a bucket should be split. Buckets are a fixed size and written sequentially
 69 | after the header which enables them to the be easily located by index.
 70 | 
 71 | Each bucket is assigned a range of hash values and entries within a bucket are ordered by hash. When
 72 | the number of entries in a bucket exceeds the load factor it undergoes a split and its entries are
 73 | rehashed across the pair of buckets using the linear hashing algorithm. When a bucket exceeds its
 74 | capacity it is spilled to the data file and replaced with an empty bucket containing a pointer to
 75 | the spill record. A spilled bucket may spill multiple times with the resulting spill records
 76 | forming a linked list in the data file.
 77 | 
 78 | In the best case reading a record from the datastore requires one read from the key file to load the
 79 | relevant bucket and a read from the data file to access the value. Additional reads from the data
 80 | file may be required to resolve hash collisions and to load spill records. Read performance is
 81 | independent of the size of the datastore and the size of buckets in the key file may be tuned to
 82 | the block size of the underlying physical media so loading a bucket may only take a single IOP.
 83 | 
 84 | ## Status
 85 | 
 86 | Version 0.1.0 is an alpha quality functional port of the original NuDB suitable for testing with 
 87 | expendable loads. Correctness and safety has been prioritised over performance. Locks are broad in scope
 88 | and treat reads and writes with equal priority. Future work will tune the locking bahaviour to 
 89 | better meet the goal of writes not blocking reads.
 90 | 
 91 | High priority tasks include:
 92 | 
 93 |  * Add recover from partial writes
 94 |  * Add rekey admin function.
 95 |  * Tune locking strategy
 96 | 
 97 | Additional features under consideration:
 98 | 
 99 |  * Allow alternate hashing functions to be specified.
100 | 
101 | ## Author
102 | 
103 | Go port written by:
104 | 
105 | * [Ian Davis](http://github.com/iand) - <http://iandavis.com/>
106 | 
107 | ## License
108 | 
109 | Distributed under the Boost Software License, Version 1.0. (See accompanying file [LICENSE](LICENSE)
110 |  or copy at http://www.boost.org/LICENSE_1_0.txt)
111 | 


--------------------------------------------------------------------------------
/cmd/gonudbadmin/info.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"os"
  7 | 
  8 | 	"github.com/urfave/cli/v2"
  9 | 
 10 | 	"github.com/iand/gonudb/internal"
 11 | )
 12 | 
 13 | var infoCommand = &cli.Command{
 14 | 	Name:      "info",
 15 | 	Usage:     "Report information about one or more gonudb files.",
 16 | 	ArgsUsage: "<file>...",
 17 | 	Action:    info,
 18 | 	Flags: []cli.Flag{
 19 | 		logLevelFlag,
 20 | 	},
 21 | }
 22 | 
 23 | func info(cc *cli.Context) error {
 24 | 	if err := initLogging(cc); err != nil {
 25 | 		return cli.Exit(err.Error(), 1)
 26 | 	}
 27 | 
 28 | 	if cc.Args().Len() == 0 {
 29 | 		cli.ShowAppHelpAndExit(cc, 1)
 30 | 	}
 31 | 
 32 | 	for i := 0; i < cc.Args().Len(); i++ {
 33 | 		path := cc.Args().Get(i)
 34 | 		Print(os.Stdout, &section{
 35 | 			label: path,
 36 | 			rows:  infoFile(path),
 37 | 		})
 38 | 	}
 39 | 
 40 | 	return nil
 41 | }
 42 | 
 43 | func infoFile(path string) []kv {
 44 | 	f, err := os.Open(path)
 45 | 	if err != nil {
 46 | 		return []kv{{Key: "Error", Value: fmt.Errorf("failed to open file: %w", err)}}
 47 | 	}
 48 | 	defer f.Close()
 49 | 
 50 | 	fStat, err := f.Stat()
 51 | 	if err != nil {
 52 | 		return []kv{{Key: "Error", Value: fmt.Errorf("failed to stat file: %w", err)}}
 53 | 	}
 54 | 
 55 | 	var typeHeader [8]byte
 56 | 
 57 | 	if _, err := f.ReadAt(typeHeader[:], 0); err != nil {
 58 | 		return []kv{{Key: "Error", Value: fmt.Errorf("failed to read file type: %w", err)}}
 59 | 	}
 60 | 
 61 | 	switch string(typeHeader[:]) {
 62 | 	case string(internal.DatFileHeaderType):
 63 | 		var dh internal.DatFileHeader
 64 | 		if err := dh.DecodeFrom(f); err != nil {
 65 | 			return []kv{{Key: "Error", Value: fmt.Errorf("failed to read data file header: %w", err)}}
 66 | 		}
 67 | 
 68 | 		return []kv{
 69 | 			{Key: "Type", Value: string(dh.Type[:])},
 70 | 			{Key: "Version", Value: dh.Version},
 71 | 			{Key: "UID", Value: dh.UID},
 72 | 			{Key: "AppNum", Value: dh.AppNum},
 73 | 			{Key: "File size", Value: Bytes(fStat.Size())},
 74 | 		}
 75 | 	case string(internal.KeyFileHeaderType):
 76 | 		var kh internal.KeyFileHeader
 77 | 		if err := kh.DecodeFrom(f, fStat.Size()); err != nil {
 78 | 			return []kv{{Key: "Error", Value: fmt.Errorf("failed to read key file header: %w", err)}}
 79 | 		}
 80 | 
 81 | 		return []kv{
 82 | 			{Key: "Type", Value: string(kh.Type[:])},
 83 | 			{Key: "Version", Value: kh.Version},
 84 | 			{Key: "UID", Value: kh.UID},
 85 | 			{Key: "AppNum", Value: kh.AppNum},
 86 | 			{Key: "Salt", Value: kh.Salt},
 87 | 			{Key: "Pepper", Value: kh.Pepper},
 88 | 			{Key: "BlockSize", Value: Bytes(kh.BlockSize)},
 89 | 			{Key: "Capacity", Value: kh.Capacity},
 90 | 			{Key: "Buckets", Value: kh.Buckets},
 91 | 			{Key: "Modulus", Value: kh.Modulus},
 92 | 			{Key: "File size", Value: Bytes(fStat.Size())},
 93 | 		}
 94 | 	case string(internal.LogFileHeaderType):
 95 | 		var lh internal.LogFileHeader
 96 | 		if err := lh.DecodeFrom(f); err != nil {
 97 | 			return []kv{{Key: "Error", Value: fmt.Errorf("failed to read log file header: %w", err)}}
 98 | 		}
 99 | 
100 | 		return []kv{
101 | 			{Key: "Type", Value: string(lh.Type[:])},
102 | 			{Key: "Version", Value: lh.Version},
103 | 			{Key: "UID", Value: lh.UID},
104 | 			{Key: "AppNum", Value: lh.AppNum},
105 | 			{Key: "Salt", Value: lh.Salt},
106 | 			{Key: "Pepper", Value: lh.Pepper},
107 | 			{Key: "BlockSize", Value: Bytes(lh.BlockSize)},
108 | 			{Key: "KeyFileSize", Value: Bytes(lh.KeyFileSize)},
109 | 			{Key: "DatFileSize", Value: Bytes(lh.DatFileSize)},
110 | 			{Key: "File size", Value: Bytes(fStat.Size())},
111 | 		}
112 | 	default:
113 | 		return []kv{{Key: "Error", Value: fmt.Sprintf("unknown file type: %s", string(typeHeader[:]))}}
114 | 	}
115 | }
116 | 
117 | type section struct {
118 | 	label string
119 | 	rows  []kv
120 | }
121 | 
122 | type kv struct {
123 | 	Key   string
124 | 	Value interface{}
125 | }
126 | 
127 | func Print(w io.Writer, s *section) {
128 | 	fmt.Fprintln(w, s.label)
129 | 	maxKeyLen := 0
130 | 	for _, r := range s.rows {
131 | 		if len(r.Key) > maxKeyLen {
132 | 			maxKeyLen = len(r.Key)
133 | 		}
134 | 	}
135 | 
136 | 	fmtstr := fmt.Sprintf("  %%-%ds: %%v\n", maxKeyLen)
137 | 	for _, r := range s.rows {
138 | 		fmt.Fprintf(w, fmtstr, r.Key, r.Value)
139 | 	}
140 | 	fmt.Fprintln(w)
141 | }
142 | 
143 | type Bytes int64
144 | 
145 | func (b Bytes) String() string {
146 | 	return fmt.Sprintf("%d bytes", b)
147 | }
148 | 


--------------------------------------------------------------------------------
/cmd/gonudbadmin/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/go-logr/logr"
 8 | 	"github.com/iand/logfmtr"
 9 | 	"github.com/urfave/cli/v2"
10 | 
11 | 	"github.com/iand/gonudb/internal"
12 | )
13 | 
14 | func main() {
15 | 	app := &cli.App{
16 | 		Name:     "gonudbadmin",
17 | 		HelpName: "gonudbadmin",
18 | 		Usage:    "Administer a gonudb store",
19 | 		Flags: []cli.Flag{
20 | 			logLevelFlag,
21 | 		},
22 | 		Version: internal.Version(),
23 | 		Commands: []*cli.Command{
24 | 			infoCommand,
25 | 			verifyCommand,
26 | 		},
27 | 	}
28 | 
29 | 	if err := app.Run(os.Args); err != nil {
30 | 		fmt.Fprintln(os.Stderr, err.Error())
31 | 		os.Exit(1)
32 | 	}
33 | }
34 | 
35 | var logLevelFlag = &cli.IntFlag{
36 | 	Name:    "log-level",
37 | 	Aliases: []string{"ll"},
38 | 	Usage:   "Set verbosity of logs to `LEVEL` (higher is more verbose)",
39 | 	Value:   0,
40 | }
41 | 
42 | var logger = logr.Discard()
43 | 
44 | func initLogging(cc *cli.Context) error {
45 | 	if cc.IsSet("log-level") {
46 | 		logfmtr.SetVerbosity(cc.Int("log-level"))
47 | 		loggerOpts := logfmtr.DefaultOptions()
48 | 		loggerOpts.Humanize = true
49 | 		loggerOpts.Colorize = true
50 | 		logfmtr.UseOptions(loggerOpts)
51 | 		logger = logfmtr.NewNamed("gonudb")
52 | 	}
53 | 	return nil
54 | }
55 | 


--------------------------------------------------------------------------------
/cmd/gonudbadmin/verify.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"os"
 5 | 
 6 | 	"github.com/urfave/cli/v2"
 7 | 
 8 | 	"github.com/iand/gonudb/internal"
 9 | )
10 | 
11 | var verifyCommand = &cli.Command{
12 | 	Name:      "verify",
13 | 	Usage:     "Verify consistency of data and key files.",
14 | 	ArgsUsage: "<datafile> <keyfile>",
15 | 	Action:    verify,
16 | 	Description: "" +
17 | 		"Verifies the consistency and integrity of a store by analysing its data and\n" +
18 | 		"key files. Reports statistical information about the data distribution and\n" +
19 | 		"efficiency of the store.",
20 | 	Flags: []cli.Flag{
21 | 		logLevelFlag,
22 | 	},
23 | }
24 | 
25 | func verify(cc *cli.Context) error {
26 | 	if err := initLogging(cc); err != nil {
27 | 		return cli.Exit(err.Error(), 1)
28 | 	}
29 | 
30 | 	if cc.Args().Len() == 0 {
31 | 		cli.ShowAppHelpAndExit(cc, 1)
32 | 	}
33 | 
34 | 	if cc.Args().Len() != 2 {
35 | 		return cli.Exit("expecting paths to data and key files", 1)
36 | 	}
37 | 	datPath := cc.Args().Get(0)
38 | 	keyPath := cc.Args().Get(1)
39 | 
40 | 	info, err := internal.VerifyStore(datPath, keyPath, logger)
41 | 	if err != nil {
42 | 		return cli.Exit(err.Error(), 1)
43 | 	}
44 | 
45 | 	Print(os.Stdout, &section{
46 | 		label: "Store metadata",
47 | 		rows: []kv{
48 | 			{Key: "Version", Value: info.Version},
49 | 			{Key: "UID", Value: info.UID},
50 | 			{Key: "AppNum", Value: info.AppNum},
51 | 		},
52 | 	})
53 | 
54 | 	Print(os.Stdout, &section{
55 | 		label: "Data file",
56 | 		rows: []kv{
57 | 			{Key: "DatFileSize", Value: Bytes(info.DatFileSize)},
58 | 			{Key: "ValueCountInUse", Value: info.ValueCountInUse},
59 | 			{Key: "ValueCountTotal", Value: info.ValueCountTotal},
60 | 			{Key: "ValueBytesInUse", Value: Bytes(info.ValueBytesInUse)},
61 | 			{Key: "ValueBytesTotal", Value: Bytes(info.ValueBytesTotal)},
62 | 			{Key: "RecordBytesInUse", Value: Bytes(info.RecordBytesInUse)},
63 | 			{Key: "RecordBytesTotal", Value: Bytes(info.RecordBytesTotal)},
64 | 			{Key: "SpillCountInUse", Value: info.SpillCountInUse},
65 | 			{Key: "SpillCountTotal", Value: info.SpillCountTotal},
66 | 			{Key: "SpillBytesInUse", Value: Bytes(info.SpillBytesInUse)},
67 | 			{Key: "SpillBytesTotal", Value: Bytes(info.SpillBytesTotal)},
68 | 			{Key: "AverageFetch", Value: info.AverageFetch},
69 | 			{Key: "Waste", Value: info.Waste},
70 | 			{Key: "Overhead", Value: info.Overhead},
71 | 			{Key: "ActualLoad", Value: info.ActualLoad},
72 | 		},
73 | 	})
74 | 
75 | 	Print(os.Stdout, &section{
76 | 		label: "Key file",
77 | 		rows: []kv{
78 | 			{Key: "KeyFileSize", Value: Bytes(info.KeyFileSize)},
79 | 			{Key: "Salt", Value: info.Salt},
80 | 			{Key: "Pepper", Value: info.Pepper},
81 | 			{Key: "BlockSize", Value: Bytes(info.BlockSize)},
82 | 			{Key: "LoadFactor", Value: info.LoadFactor},
83 | 			{Key: "Capacity", Value: info.Capacity},
84 | 			{Key: "Buckets", Value: info.Buckets},
85 | 			{Key: "BucketSize", Value: info.BucketSize},
86 | 			{Key: "Modulus", Value: info.Modulus},
87 | 			{Key: "KeyCount", Value: info.KeyCount},
88 | 		},
89 | 	})
90 | 
91 | 	return nil
92 | }
93 | 


--------------------------------------------------------------------------------
/cmd/gonudbsample/gonudbsample.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"math/rand"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"sync"
 11 | 	"time"
 12 | 
 13 | 	"github.com/iand/logfmtr"
 14 | 	"github.com/urfave/cli/v2"
 15 | 
 16 | 	"github.com/iand/gonudb"
 17 | )
 18 | 
 19 | func main() {
 20 | 	app := &cli.App{
 21 | 		Name:      "gonudbsample",
 22 | 		HelpName:  "gonudbsample",
 23 | 		Usage:     "Sample application for Gonudb",
 24 | 		UsageText: "gonudbsample [options] <directory>",
 25 | 		ArgsUsage: "<directory>",
 26 | 		Flags: []cli.Flag{
 27 | 			logLevelFlag,
 28 | 			concurrentFlag,
 29 | 		},
 30 | 		Action:          run,
 31 | 		Version:         gonudb.Version(),
 32 | 		HideHelpCommand: true,
 33 | 	}
 34 | 
 35 | 	if err := app.Run(os.Args); err != nil {
 36 | 		fmt.Fprintln(os.Stderr, err.Error())
 37 | 		os.Exit(1)
 38 | 	}
 39 | }
 40 | 
 41 | var logLevelFlag = &cli.IntFlag{
 42 | 	Name:    "log-level",
 43 | 	Aliases: []string{"ll"},
 44 | 	Usage:   "Set verbosity of logs to `LEVEL` (higher is more verbose)",
 45 | 	Value:   0,
 46 | }
 47 | 
 48 | var concurrentFlag = &cli.IntFlag{
 49 | 	Name:    "concurrent",
 50 | 	Aliases: []string{"c"},
 51 | 	Usage:   "Perform some concurrent inserts and fetches for `SECONDS`.",
 52 | 	Value:   0,
 53 | }
 54 | 
 55 | func run(cc *cli.Context) error {
 56 | 	if cc.Args().Len() != 1 {
 57 | 		return cli.Exit("Missing directory for Gonudb store", 1)
 58 | 	}
 59 | 
 60 | 	rand.Seed(time.Now().Unix())
 61 | 
 62 | 	path := cc.Args().Get(0)
 63 | 
 64 | 	logfmtr.SetVerbosity(cc.Int("log-level"))
 65 | 	loggerOpts := logfmtr.DefaultOptions()
 66 | 	loggerOpts.Humanize = true
 67 | 	loggerOpts.Colorize = true
 68 | 	loggerOpts.AddCaller = true
 69 | 	logfmtr.UseOptions(loggerOpts)
 70 | 
 71 | 	datPath := filepath.Join(path, "gonudb.dat")
 72 | 	keyPath := filepath.Join(path, "gonudb.key")
 73 | 	logPath := filepath.Join(path, "gonudb.log")
 74 | 
 75 | 	fmt.Printf("Creating store in directory %s\n", path)
 76 | 	err := gonudb.CreateStore(
 77 | 		datPath,
 78 | 		keyPath,
 79 | 		logPath,
 80 | 		1,
 81 | 		gonudb.NewSalt(),
 82 | 		4096,
 83 | 		0.5,
 84 | 	)
 85 | 	if err != nil {
 86 | 		var pathErr *os.PathError
 87 | 		if errors.As(err, &pathErr) && os.IsExist(pathErr) {
 88 | 			fmt.Println("Store already exists")
 89 | 		} else {
 90 | 			return cli.Exit("Failed to create store: "+err.Error(), 1)
 91 | 		}
 92 | 	}
 93 | 
 94 | 	fmt.Println("Opening store")
 95 | 	s, err := gonudb.OpenStore(datPath, keyPath, logPath, &gonudb.StoreOptions{Logger: logfmtr.NewNamed("gonudb")})
 96 | 	if err != nil {
 97 | 		return cli.Exit("Failed to open store: "+err.Error(), 1)
 98 | 	}
 99 | 
100 | 	defer s.Close()
101 | 
102 | 	keys := make([]string, 500)
103 | 	for i := range keys {
104 | 		keys[i] = fmt.Sprintf("key%09d", i)
105 | 	}
106 | 
107 | 	fmt.Printf("Inserting %d samples\n", len(keys))
108 | 	duplicates := 0
109 | 	for i := range keys {
110 | 		if err := s.Insert(keys[i], []byte(fmt.Sprintf("this is data for %05d", i))); err != nil {
111 | 			if errors.Is(err, gonudb.ErrKeyExists) {
112 | 				duplicates++
113 | 				continue
114 | 			}
115 | 			return cli.Exit("Failed to insert: "+err.Error(), 1)
116 | 		}
117 | 	}
118 | 	fmt.Printf("Skipped %d duplicates\n", duplicates)
119 | 
120 | 	fmt.Println("Finding random keys")
121 | 	for i := 0; i < len(keys)/25; i++ {
122 | 		key := keys[rand.Intn(len(keys))]
123 | 		data, err := s.Fetch(key)
124 | 		if err != nil {
125 | 			return cli.Exit("Failed to fetch "+key+": "+err.Error(), 1)
126 | 		}
127 | 		fmt.Printf("Found %s => %s\n", key, string(data))
128 | 	}
129 | 
130 | 	if cc.Int("concurrent") == 0 {
131 | 		return nil
132 | 	}
133 | 	fmt.Println("Running some concurrent inserts and fetches")
134 | 	ctx, cancel := context.WithTimeout(cc.Context, time.Duration(cc.Int("concurrent"))*time.Second)
135 | 	defer cancel()
136 | 
137 | 	var wg sync.WaitGroup
138 | 	wg.Add(7)
139 | 
140 | 	for i := 0; i < 2; i++ {
141 | 		go func(ctx context.Context, wg *sync.WaitGroup) {
142 | 			defer wg.Done()
143 | 			for {
144 | 				select {
145 | 				case <-ctx.Done():
146 | 					return
147 | 				default:
148 | 				}
149 | 				for i := 0; i < 500; i++ {
150 | 					key := fmt.Sprintf("%08d", rand.Intn(10000000))
151 | 					data := fmt.Sprintf("this is data for %s", key)
152 | 					if err := s.Insert(key, []byte(data)); err != nil && !errors.Is(err, gonudb.ErrKeyExists) {
153 | 						fmt.Printf("Failed to insert: %v\n", err)
154 | 						return
155 | 					}
156 | 				}
157 | 				fmt.Println("Wrote 500 records")
158 | 				time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
159 | 			}
160 | 		}(ctx, &wg)
161 | 	}
162 | 
163 | 	for i := 0; i < 5; i++ {
164 | 		go func(tx context.Context, wg *sync.WaitGroup) {
165 | 			defer wg.Done()
166 | 			for {
167 | 				select {
168 | 				case <-ctx.Done():
169 | 					return
170 | 				default:
171 | 				}
172 | 				for i := 0; i < 500; i++ {
173 | 					key := fmt.Sprintf("%08d", rand.Intn(10000000))
174 | 					_, err := s.Fetch(key)
175 | 					if err != nil && !errors.Is(err, gonudb.ErrKeyNotFound) {
176 | 						fmt.Printf("Failed to fetch: %v\n", err)
177 | 						return
178 | 					}
179 | 				}
180 | 				fmt.Println("Read 500 records")
181 | 				time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
182 | 			}
183 | 		}(ctx, &wg)
184 | 	}
185 | 
186 | 	wg.Wait()
187 | 
188 | 	return nil
189 | }
190 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/iand/gonudb
 2 | 
 3 | go 1.19
 4 | 
 5 | require (
 6 | 	github.com/OneOfOne/xxhash v1.2.8
 7 | 	github.com/go-logr/logr v1.3.0
 8 | 	github.com/iand/logfmtr v0.2.2
 9 | 	github.com/urfave/cli/v2 v2.25.7
10 | 	golang.org/x/sys v0.14.0
11 | )
12 | 
13 | require (
14 | 	github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect
15 | 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
16 | 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
17 | )
18 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
 2 | github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
 3 | github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM=
 4 | github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 5 | github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 6 | github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
 7 | github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 8 | github.com/iand/logfmtr v0.2.2 h1:Z9ZFUiDLAw2jv+fGocKJ1d7T14imolI/7RvsUSTcfJ0=
 9 | github.com/iand/logfmtr v0.2.2/go.mod h1:SByvTWmBzMDZ/7pDjWTMhH/rbgu+UKFl76l0aTxuL3I=
10 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
11 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
12 | github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
13 | github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
14 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
15 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
16 | golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
17 | golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
18 | 


--------------------------------------------------------------------------------
/internal/bucket.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"sync"
  8 | )
  9 | 
 10 | const (
 11 | 	// Bucket header
 12 | 	BucketHeaderSize = SizeUint16 + // Count
 13 | 		SizeUint48 // Spill
 14 | 
 15 | 	// Bucket item
 16 | 	BucketEntrySize = SizeUint48 + // Offset
 17 | 		SizeUint48 + // Size
 18 | 		SizeUint64 // Hash
 19 | )
 20 | 
 21 | func BucketIndex(h uint64, buckets int, modulus uint64) int {
 22 | 	n := h % modulus
 23 | 	if n >= uint64(buckets) {
 24 | 		n -= modulus / 2
 25 | 	}
 26 | 	return int(n)
 27 | }
 28 | 
 29 | // BucketSize returns the actual size of a bucket.
 30 | // This can be smaller than the block size.
 31 | func BucketSize(capacity int) int {
 32 | 	// Bucket Record
 33 | 	return SizeUint16 + // Count
 34 | 		SizeUint48 + // Spill
 35 | 		capacity*(SizeUint48+ // Offset
 36 | 			SizeUint48+ // Size
 37 | 			SizeUint64) // Hash
 38 | }
 39 | 
 40 | // BucketCapacity returns the number of entries that fit in a bucket
 41 | func BucketCapacity(blockSize int) int {
 42 | 	if blockSize < BucketEntrySize || blockSize < BucketHeaderSize {
 43 | 		return 0
 44 | 	}
 45 | 	return (blockSize - BucketHeaderSize) / BucketEntrySize
 46 | }
 47 | 
 48 | type Entry struct {
 49 | 	Offset int64 // 48 bits
 50 | 	Size   int64 // 48 bits
 51 | 	Hash   uint64
 52 | }
 53 | 
 54 | // TODO: evaluate tradeoffs of using a slice of Entry instead of blob
 55 | type Bucket struct {
 56 | 	// Read only
 57 | 	blockSize int // Size of a key file block
 58 | 
 59 | 	mu    sync.Mutex // protects following including writes into blob slice
 60 | 	count int        // Current key count
 61 | 	spill int64      // Offset of next spill record or 0
 62 | 	blob  []byte
 63 | }
 64 | 
 65 | // bucket takes ownership of blob
 66 | func NewBucket(blockSize int, blob []byte) *Bucket {
 67 | 	if len(blob) != blockSize {
 68 | 		panic("bucket blob size must equal block size")
 69 | 	}
 70 | 
 71 | 	b := &Bucket{
 72 | 		blockSize: blockSize,
 73 | 		blob:      blob,
 74 | 	}
 75 | 
 76 | 	b.initFromHeader()
 77 | 
 78 | 	return b
 79 | }
 80 | 
 81 | func (b *Bucket) Lock() {
 82 | 	b.mu.Lock()
 83 | }
 84 | 
 85 | func (b *Bucket) Unlock() {
 86 | 	b.mu.Unlock()
 87 | }
 88 | 
 89 | // LowerBound returns index of entry with hash
 90 | // equal to or greater than the given hash.
 91 | func (b *Bucket) lowerBound(h uint64) int {
 92 | 	// expects caller to hold lock
 93 | 	const w = BucketEntrySize
 94 | 
 95 | 	// offset to first hash
 96 | 	const offset = BucketHeaderSize +
 97 | 		// first bucket Entry
 98 | 		SizeUint48 + // Offset
 99 | 		SizeUint48 // Size
100 | 
101 | 	first := 0
102 | 	count := b.count
103 | 	for count > 0 {
104 | 		step := count / 2
105 | 		i := first + step
106 | 		h1 := binary.BigEndian.Uint64(b.blob[offset+i*w : offset+i*w+SizeUint64])
107 | 		if h1 < h {
108 | 			first = i + 1
109 | 			count -= step + 1
110 | 		} else {
111 | 			count = step
112 | 		}
113 | 	}
114 | 
115 | 	return first
116 | }
117 | 
118 | func (b *Bucket) Has(h uint64) bool {
119 | 	const w = BucketEntrySize
120 | 
121 | 	// offset to first hash
122 | 	const offset = BucketHeaderSize +
123 | 		// first bucket Entry
124 | 		SizeUint48 + // Offset
125 | 		SizeUint48 // Size
126 | 
127 | 	b.mu.Lock()
128 | 	defer b.mu.Unlock()
129 | 
130 | 	first := 0
131 | 	count := b.count
132 | 	for count > 0 {
133 | 		step := count / 2
134 | 		i := first + step
135 | 		h1 := binary.BigEndian.Uint64(b.blob[offset+i*w : offset+i*w+SizeUint64])
136 | 		if h1 == h {
137 | 			return true
138 | 		} else if h1 < h {
139 | 			first = i + 1
140 | 			count -= step + 1
141 | 		} else {
142 | 			count = step
143 | 		}
144 | 	}
145 | 
146 | 	return false
147 | }
148 | 
149 | // Count returns the number of entries in the bucket
150 | func (b *Bucket) Count() int {
151 | 	b.mu.Lock()
152 | 	defer b.mu.Unlock()
153 | 	return b.count
154 | }
155 | 
156 | // ActualSize returns the serialized bucket size, excluding empty space
157 | func (b *Bucket) ActualSize() int {
158 | 	b.mu.Lock()
159 | 	defer b.mu.Unlock()
160 | 	return BucketSize(b.count)
161 | }
162 | 
163 | func (b *Bucket) BlockSize() int {
164 | 	return b.blockSize
165 | }
166 | 
167 | func (b *Bucket) IsEmpty() bool {
168 | 	b.mu.Lock()
169 | 	defer b.mu.Unlock()
170 | 	return b.count == 0
171 | }
172 | 
173 | func (b *Bucket) IsFull() bool {
174 | 	b.mu.Lock()
175 | 	defer b.mu.Unlock()
176 | 	return b.count >= BucketCapacity(b.blockSize)
177 | }
178 | 
179 | func (b *Bucket) Capacity() int {
180 | 	return BucketCapacity(b.blockSize)
181 | }
182 | 
183 | // Spill returns offset of next spill record or 0
184 | func (b *Bucket) Spill() int64 {
185 | 	b.mu.Lock()
186 | 	defer b.mu.Unlock()
187 | 	return b.spill
188 | }
189 | 
190 | // SetSpill sets the offset of next spill record
191 | func (b *Bucket) SetSpill(v int64) {
192 | 	b.mu.Lock()
193 | 	defer b.mu.Unlock()
194 | 	b.setSpill(v)
195 | }
196 | 
197 | func (b *Bucket) LowestHash() uint64 {
198 | 	b.mu.Lock()
199 | 	defer b.mu.Unlock()
200 | 	if b.count == 0 {
201 | 		return 0
202 | 	}
203 | 	pos := BucketHeaderSize
204 | 	return DecodeUint64(b.blob[pos+SizeUint48*2 : pos+SizeUint48*2+SizeUint64])
205 | }
206 | 
207 | func (b *Bucket) HighestHash() uint64 {
208 | 	b.mu.Lock()
209 | 	defer b.mu.Unlock()
210 | 	if b.count == 0 {
211 | 		return 0
212 | 	}
213 | 	pos := BucketHeaderSize + (b.count-1)*BucketEntrySize
214 | 	return DecodeUint64(b.blob[pos+SizeUint48*2 : pos+SizeUint48*2+SizeUint64])
215 | }
216 | 
217 | func (b *Bucket) clear() {
218 | 	// expects caller to hold lock
219 | 	b.count = 0
220 | 	b.spill = 0
221 | 	for i := range b.blob {
222 | 		b.blob[i] = 0
223 | 	}
224 | }
225 | 
226 | // Returns the record for a key entry without bounds checking.
227 | func (b *Bucket) Entry(idx int) Entry {
228 | 	return b.entry(idx)
229 | }
230 | 
231 | func (b *Bucket) entry(idx int) Entry {
232 | 	// expects caller to hold lock
233 | 
234 | 	// Start position of item in blob
235 | 	pos := BucketHeaderSize + idx*BucketEntrySize
236 | 
237 | 	return Entry{
238 | 		Offset: int64(DecodeUint48(b.blob[pos : pos+SizeUint48])),
239 | 		Size:   int64(DecodeUint48(b.blob[pos+SizeUint48 : pos+SizeUint48*2])),
240 | 		Hash:   DecodeUint64(b.blob[pos+SizeUint48*2 : pos+SizeUint48*2+SizeUint64]),
241 | 	}
242 | }
243 | 
244 | // Erase an entry by index
245 | func (b *Bucket) erase(idx int) {
246 | 	// expects caller to hold lock
247 | 
248 | 	// Start position of item in blob
249 | 	pos := BucketHeaderSize + idx*BucketEntrySize
250 | 	// Start position of next item in blob
251 | 	next := BucketHeaderSize + (idx+1)*BucketEntrySize
252 | 	// Position immediately after last entry
253 | 	end := next + (b.count-idx-1)*BucketEntrySize
254 | 
255 | 	b.count--
256 | 	if b.count < 0 {
257 | 		panic("logic error: erase resulted in negative bucket count")
258 | 	}
259 | 
260 | 	if idx < b.count {
261 | 		// Shift remainder down
262 | 		copy(b.blob[pos:], b.blob[next:end])
263 | 	}
264 | 
265 | 	// TODO: bounds checks
266 | 	zeroLower := BucketHeaderSize + b.count*BucketEntrySize
267 | 	zeroUpper := BucketHeaderSize + (b.count+1)*(BucketEntrySize) - 1
268 | 
269 | 	if zeroLower < 0 || zeroLower > len(b.blob)-1 || zeroUpper < 0 || zeroUpper > len(b.blob)-1 {
270 | 		panic(fmt.Sprintf("logic error: zeroing [%d:%d] out of bounds of blob length %d", zeroLower, zeroUpper, len(b.blob)))
271 | 	}
272 | 
273 | 	for i := zeroLower; i < zeroUpper; i++ {
274 | 		b.blob[i] = 0
275 | 	}
276 | 
277 | 	b.update()
278 | }
279 | 
280 | // Insert an entry
281 | func (b *Bucket) insert(offset int64, size int64, hash uint64) {
282 | 	// expects caller to hold lock
283 | 
284 | 	idx := b.lowerBound(hash)
285 | 
286 | 	// Position we want to insert the item in blob
287 | 	pos := BucketHeaderSize + idx*BucketEntrySize
288 | 	// Start position of next item in blob
289 | 	next := BucketHeaderSize + (idx+1)*BucketEntrySize
290 | 	// Position immediately after last entry
291 | 	end := next + (b.count-idx)*BucketEntrySize
292 | 
293 | 	// Make room for the item
294 | 	copy(b.blob[next:], b.blob[pos:end])
295 | 	b.count++
296 | 	b.update()
297 | 
298 | 	EncodeUint48(b.blob[pos:pos+SizeUint48], uint64(offset))
299 | 	EncodeUint48(b.blob[pos+SizeUint48:pos+SizeUint48*2], uint64(size))
300 | 	EncodeUint64(b.blob[pos+SizeUint48*2:pos+SizeUint48*2+SizeUint64], hash)
301 | }
302 | 
303 | // update updates the bucket header
304 | func (b *Bucket) update() {
305 | 	// expects caller to hold lock
306 | 	EncodeUint16(b.blob[0:SizeUint16], uint16(b.count))
307 | 	EncodeUint48(b.blob[SizeUint16:SizeUint16+SizeUint48], uint64(b.spill))
308 | }
309 | 
310 | func (b *Bucket) initFromHeader() {
311 | 	// expects caller to hold lock
312 | 	b.count = int(DecodeUint16(b.blob[0:SizeUint16]))
313 | 	b.spill = int64(DecodeUint48(b.blob[SizeUint16 : SizeUint16+SizeUint48]))
314 | }
315 | 
316 | func (b *Bucket) CopyInto(b2 *Bucket) {
317 | 	copy(b2.blob, b.blob)
318 | 	b.initFromHeader()
319 | }
320 | 
321 | // WriteTo writes data to w until all entries in the bucket are written or an error occurs.
322 | func (b *Bucket) WriteTo(w io.Writer) (int64, error) {
323 | 	b.mu.Lock()
324 | 	defer b.mu.Unlock()
325 | 	actualSize := BucketSize(b.count)
326 | 	n, err := w.Write(b.blob[:actualSize])
327 | 	if err == nil && n != actualSize {
328 | 		err = io.ErrShortWrite
329 | 	}
330 | 
331 | 	return int64(n), err
332 | }
333 | 
334 | // LoadFrom reads data containing entries from r, padding the rest of the bucket with zero bytes.
335 | func (b *Bucket) LoadFrom(spill int64, s Spiller) error {
336 | 	b.mu.Lock()
337 | 	defer b.mu.Unlock()
338 | 
339 | 	if spill == 0 {
340 | 		panic("attempt to load from zero spill")
341 | 	}
342 | 
343 | 	if err := s.LoadBucketSpill(spill, b.blob); err != nil {
344 | 		return fmt.Errorf("load bucket spill (at %d): %w", spill, err)
345 | 	}
346 | 	b.initFromHeader()
347 | 
348 | 	return nil
349 | }
350 | 
351 | // StoreFullTo writes until the entire blob is written (including zero padding) or an error occurs.
352 | func (b *Bucket) storeFullTo(w io.Writer) (int64, error) {
353 | 	n, err := w.Write(b.blob)
354 | 	if err == nil && n != len(b.blob) {
355 | 		err = io.ErrShortWrite
356 | 	}
357 | 	return int64(n), err
358 | }
359 | 
360 | // LoadFullFrom reads the entire blob from r
361 | func (b *Bucket) loadFullFrom(r io.Reader) error {
362 | 	_, err := io.ReadFull(r, b.blob)
363 | 	if err != nil {
364 | 		return err
365 | 	}
366 | 	b.initFromHeader()
367 | 
368 | 	return nil
369 | }
370 | 
371 | // MaybeSpill spills the bucket if full. Bucket is cleared after it spills.
372 | func (b *Bucket) maybeSpill(sp Spiller) (bool, error) {
373 | 	// expects caller to hold lock
374 | 
375 | 	if b.count < BucketCapacity(b.blockSize) {
376 | 		return false, nil
377 | 	}
378 | 
379 | 	actualSize := BucketSize(b.count)
380 | 	offset, err := sp.AppendBucketSpill(b.blob[:actualSize])
381 | 	if err != nil {
382 | 		return true, fmt.Errorf("write bucket spill: %w", err)
383 | 	}
384 | 
385 | 	b.clear()
386 | 
387 | 	// Set the spill location to be the start of the blob so a bucket can simply be read in from that spot
388 | 	b.setSpill(offset)
389 | 
390 | 	return true, nil
391 | }
392 | 
393 | func (b *Bucket) setSpill(spill int64) {
394 | 	b.spill = spill
395 | 	EncodeUint48(b.blob[SizeUint16:SizeUint16+SizeUint48], uint64(b.spill))
396 | }
397 | 


--------------------------------------------------------------------------------
/internal/bucket_test.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"io"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"testing"
  9 | )
 10 | 
 11 | func TestEntry(t *testing.T) {
 12 | 	blob := make([]byte, BucketHeaderSize+BucketEntrySize*2)
 13 | 
 14 | 	entries := []Entry{
 15 | 		{
 16 | 			Offset: 15555,
 17 | 			Size:   14444,
 18 | 			Hash:   19999,
 19 | 		},
 20 | 		{
 21 | 			Offset: 25555,
 22 | 			Size:   24444,
 23 | 			Hash:   29999,
 24 | 		},
 25 | 	}
 26 | 
 27 | 	pos := BucketHeaderSize
 28 | 	for i := range entries {
 29 | 		EncodeUint48(blob[pos:pos+SizeUint48], uint64(entries[i].Offset))
 30 | 		EncodeUint48(blob[pos+SizeUint48:pos+SizeUint48*2], uint64(entries[i].Size))
 31 | 		EncodeUint64(blob[pos+SizeUint48*2:pos+SizeUint48*2+SizeUint64], entries[i].Hash)
 32 | 		pos += BucketEntrySize
 33 | 	}
 34 | 
 35 | 	b := Bucket{
 36 | 		blockSize: len(blob),
 37 | 		blob:      blob,
 38 | 	}
 39 | 
 40 | 	for i := range entries {
 41 | 		got := b.entry(i)
 42 | 		if got != entries[i] {
 43 | 			t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i])
 44 | 		}
 45 | 	}
 46 | }
 47 | 
 48 | func TestInsert(t *testing.T) {
 49 | 	blob := make([]byte, BucketHeaderSize+BucketEntrySize*2)
 50 | 
 51 | 	entries := []Entry{
 52 | 		{
 53 | 			Offset: 15555,
 54 | 			Size:   14444,
 55 | 			Hash:   19999,
 56 | 		},
 57 | 		{
 58 | 			Offset: 25555,
 59 | 			Size:   24444,
 60 | 			Hash:   29999,
 61 | 		},
 62 | 	}
 63 | 
 64 | 	b := Bucket{
 65 | 		blockSize: len(blob),
 66 | 		blob:      blob,
 67 | 	}
 68 | 
 69 | 	for i := range entries {
 70 | 		b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash)
 71 | 	}
 72 | 
 73 | 	for i := range entries {
 74 | 		got := b.entry(i)
 75 | 		if got != entries[i] {
 76 | 			t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i])
 77 | 		}
 78 | 	}
 79 | }
 80 | 
 81 | func TestErase(t *testing.T) {
 82 | 	blob := make([]byte, BucketHeaderSize+BucketEntrySize*2)
 83 | 
 84 | 	entries := []Entry{
 85 | 		{
 86 | 			Offset: 15555,
 87 | 			Size:   14444,
 88 | 			Hash:   19999,
 89 | 		},
 90 | 		{
 91 | 			Offset: 25555,
 92 | 			Size:   24444,
 93 | 			Hash:   29999,
 94 | 		},
 95 | 	}
 96 | 
 97 | 	b := Bucket{
 98 | 		blockSize: len(blob),
 99 | 		blob:      blob,
100 | 	}
101 | 
102 | 	for i := range entries {
103 | 		b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash)
104 | 	}
105 | 
106 | 	b.erase(0)
107 | 
108 | 	for i := 1; i < len(entries); i++ {
109 | 		got := b.entry(i - 1)
110 | 		if got != entries[i] {
111 | 			t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i])
112 | 		}
113 | 	}
114 | }
115 | 
116 | func TestWriteReadFull(t *testing.T) {
117 | 	entries := []Entry{
118 | 		{
119 | 			Offset: 15555,
120 | 			Size:   14444,
121 | 			Hash:   19999,
122 | 		},
123 | 		{
124 | 			Offset: 25555,
125 | 			Size:   24444,
126 | 			Hash:   29999,
127 | 		},
128 | 	}
129 | 	size := BucketHeaderSize + BucketEntrySize*2
130 | 
131 | 	b := Bucket{
132 | 		blockSize: size,
133 | 		blob:      make([]byte, size),
134 | 	}
135 | 
136 | 	for i := range entries {
137 | 		b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash)
138 | 	}
139 | 
140 | 	buf := &bytes.Buffer{}
141 | 	_, err := b.storeFullTo(buf)
142 | 	if err != nil {
143 | 		t.Fatalf("unexpected error during write: %v", err)
144 | 	}
145 | 
146 | 	b2 := Bucket{
147 | 		blockSize: size,
148 | 		blob:      make([]byte, size),
149 | 	}
150 | 
151 | 	err = b2.loadFullFrom(buf)
152 | 	if err != nil {
153 | 		t.Fatalf("unexpected error during read: %v", err)
154 | 	}
155 | 
156 | 	for i := 0; i < len(entries); i++ {
157 | 		got := b2.entry(i)
158 | 		if got != entries[i] {
159 | 			t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i])
160 | 		}
161 | 	}
162 | }
163 | 
164 | func TestBucketHas(t *testing.T) {
165 | 	entries := []Entry{
166 | 		{
167 | 			Offset: 15555,
168 | 			Size:   14444,
169 | 			Hash:   19999,
170 | 		},
171 | 		{
172 | 			Offset: 25555,
173 | 			Size:   24444,
174 | 			Hash:   29999,
175 | 		},
176 | 	}
177 | 	size := BucketHeaderSize + BucketEntrySize*2
178 | 
179 | 	b := Bucket{
180 | 		blockSize: size,
181 | 		blob:      make([]byte, size),
182 | 	}
183 | 
184 | 	for i := range entries {
185 | 		b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash)
186 | 	}
187 | 
188 | 	for i := range entries {
189 | 		if !b.Has(entries[i].Hash) {
190 | 			t.Errorf("did not find hash %d", entries[i].Hash)
191 | 		}
192 | 	}
193 | 
194 | 	if b.Has(112233) {
195 | 		t.Errorf("unexpectedly found hash %d", 112233)
196 | 	}
197 | }
198 | 
199 | func TestEntryDuplicateHashes(t *testing.T) {
200 | 	entries := []Entry{
201 | 		{
202 | 			Offset: 15555,
203 | 			Size:   14444,
204 | 			Hash:   19999,
205 | 		},
206 | 		{
207 | 			Offset: 25555,
208 | 			Size:   24444,
209 | 			Hash:   29999,
210 | 		},
211 | 		{
212 | 			Offset: 35555,
213 | 			Size:   34444,
214 | 			Hash:   19999,
215 | 		},
216 | 		{
217 | 			Offset: 45555,
218 | 			Size:   44444,
219 | 			Hash:   19999,
220 | 		},
221 | 	}
222 | 
223 | 	blob := make([]byte, BucketHeaderSize+BucketEntrySize*len(entries))
224 | 	b := Bucket{
225 | 		blockSize: len(blob),
226 | 		blob:      blob,
227 | 	}
228 | 
229 | 	for i := range entries {
230 | 		b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash)
231 | 	}
232 | 
233 | 	testCases := []struct {
234 | 		hash  uint64
235 | 		count int
236 | 	}{
237 | 		{
238 | 			hash:  19999,
239 | 			count: 3,
240 | 		},
241 | 		{
242 | 			hash:  29999,
243 | 			count: 1,
244 | 		},
245 | 		{
246 | 			hash:  39999,
247 | 			count: 0,
248 | 		},
249 | 	}
250 | 
251 | 	for _, tc := range testCases {
252 | 		var got []Entry
253 | 		for i := b.lowerBound(tc.hash); i < b.count; i++ {
254 | 			entry := b.entry(i)
255 | 			if entry.Hash != tc.hash {
256 | 				break
257 | 			}
258 | 			got = append(got, entry)
259 | 		}
260 | 
261 | 		if len(got) != tc.count {
262 | 			t.Errorf("%d: got %d, wanted %d", tc.hash, len(got), tc.count)
263 | 		}
264 | 
265 | 	}
266 | }
267 | 
268 | func TestBucketMaybeSpill(t *testing.T) {
269 | 	entries := []Entry{
270 | 		{
271 | 			Offset: 15555,
272 | 			Size:   14444,
273 | 			Hash:   19999,
274 | 		},
275 | 		{
276 | 			Offset: 25555,
277 | 			Size:   24444,
278 | 			Hash:   29999,
279 | 		},
280 | 	}
281 | 	blockSize := BucketHeaderSize + BucketEntrySize*len(entries)
282 | 
283 | 	tmpdir, err := os.MkdirTemp("", "gonudb.*")
284 | 	if err != nil {
285 | 		t.Fatalf("unexpected error creating temp directory: %v", err)
286 | 	}
287 | 	defer os.RemoveAll(tmpdir)
288 | 
289 | 	t.Run("empty bucket does not spill", func(t *testing.T) {
290 | 		b := Bucket{
291 | 			blockSize: blockSize,
292 | 			blob:      make([]byte, blockSize),
293 | 		}
294 | 		tmpfile := filepath.Join(tmpdir, "empty")
295 | 
296 | 		if err := CreateDataFile(tmpfile, 5, 6); err != nil {
297 | 			t.Fatalf("unexpected error creating data file: %v", err)
298 | 		}
299 | 
300 | 		df, err := OpenDataFile(tmpfile)
301 | 		if err != nil {
302 | 			t.Fatalf("unexpected error opening data file: %v", err)
303 | 		}
304 | 
305 | 		_, err = b.maybeSpill(df)
306 | 		if err != nil {
307 | 			t.Fatalf("unexpected error during write: %v", err)
308 | 		}
309 | 		df.Close()
310 | 		written, err := os.ReadFile(tmpfile)
311 | 		if err != nil {
312 | 			t.Fatalf("unexpected error reading data file: %v", err)
313 | 		}
314 | 
315 | 		if len(written) != DatFileHeaderSize {
316 | 			t.Errorf("got %d bytes written, wanted %d", len(written), DatFileHeaderSize)
317 | 		}
318 | 	})
319 | 
320 | 	t.Run("half full bucket does not spill", func(t *testing.T) {
321 | 		b := Bucket{
322 | 			blockSize: blockSize,
323 | 			blob:      make([]byte, blockSize),
324 | 		}
325 | 		b.insert(entries[0].Offset, entries[0].Size, entries[0].Hash)
326 | 
327 | 		tmpfile := filepath.Join(tmpdir, "half")
328 | 
329 | 		if err := CreateDataFile(tmpfile, 5, 6); err != nil {
330 | 			t.Fatalf("unexpected error creating data file: %v", err)
331 | 		}
332 | 
333 | 		df, err := OpenDataFile(tmpfile)
334 | 		if err != nil {
335 | 			t.Fatalf("unexpected error opening data file: %v", err)
336 | 		}
337 | 
338 | 		_, err = b.maybeSpill(df)
339 | 		if err != nil {
340 | 			t.Fatalf("unexpected error during write: %v", err)
341 | 		}
342 | 		df.Close()
343 | 		written, err := os.ReadFile(tmpfile)
344 | 		if err != nil {
345 | 			t.Fatalf("unexpected error reading data file: %v", err)
346 | 		}
347 | 
348 | 		if len(written) != DatFileHeaderSize {
349 | 			t.Errorf("got %d bytes written, wanted %d", len(written), DatFileHeaderSize)
350 | 		}
351 | 	})
352 | 
353 | 	t.Run("full bucket does spill", func(t *testing.T) {
354 | 		b := Bucket{
355 | 			blockSize: blockSize,
356 | 			blob:      make([]byte, blockSize),
357 | 		}
358 | 		b.insert(entries[0].Offset, entries[0].Size, entries[0].Hash)
359 | 		b.insert(entries[1].Offset, entries[1].Size, entries[1].Hash)
360 | 
361 | 		tmpfile := filepath.Join(tmpdir, "full")
362 | 
363 | 		if err := CreateDataFile(tmpfile, 5, 6); err != nil {
364 | 			t.Fatalf("unexpected error creating data file: %v", err)
365 | 		}
366 | 
367 | 		df, err := OpenDataFile(tmpfile)
368 | 		if err != nil {
369 | 			t.Fatalf("unexpected error opening data file: %v", err)
370 | 		}
371 | 
372 | 		_, err = b.maybeSpill(df)
373 | 		if err != nil {
374 | 			t.Fatalf("unexpected error during write: %v", err)
375 | 		}
376 | 		df.Close()
377 | 
378 | 		written, err := os.ReadFile(tmpfile)
379 | 		if err != nil {
380 | 			t.Fatalf("unexpected error reading data file: %v", err)
381 | 		}
382 | 
383 | 		if len(written) != DatFileHeaderSize+BucketHeaderSize+blockSize {
384 | 			t.Errorf("got %d bytes written, wanted %d", len(written), BucketHeaderSize+blockSize)
385 | 		}
386 | 		if b.Spill() != DatFileHeaderSize {
387 | 			t.Errorf("got spill %d, wanted %d", b.Spill(), DatFileHeaderSize)
388 | 		}
389 | 		if b.Count() != 0 {
390 | 			t.Errorf("got %d entries in bucket, wanted %d", b.Count(), 0)
391 | 		}
392 | 
393 | 		marker := DecodeUint48(written[DatFileHeaderSize : DatFileHeaderSize+SizeUint48])
394 | 		if marker != 0 {
395 | 			t.Errorf("got marker %x, wanted %x", marker, 0)
396 | 		}
397 | 
398 | 		size := int64(DecodeUint16(written[DatFileHeaderSize+SizeUint48 : DatFileHeaderSize+SizeUint48+SizeUint16]))
399 | 		if size != int64(blockSize) {
400 | 			t.Errorf("got size %d, wanted %d", size, blockSize)
401 | 		}
402 | 	})
403 | 
404 | 	t.Run("read from spill", func(t *testing.T) {
405 | 		b := Bucket{
406 | 			blockSize: blockSize,
407 | 			blob:      make([]byte, blockSize),
408 | 		}
409 | 		b.insert(entries[0].Offset, entries[0].Size, entries[0].Hash)
410 | 		b.insert(entries[1].Offset, entries[1].Size, entries[1].Hash)
411 | 
412 | 		tmpfile := filepath.Join(tmpdir, "read")
413 | 
414 | 		if err := CreateDataFile(tmpfile, 5, 6); err != nil {
415 | 			t.Fatalf("unexpected error creating data file: %v", err)
416 | 		}
417 | 
418 | 		df, err := OpenDataFile(tmpfile)
419 | 		if err != nil {
420 | 			t.Fatalf("unexpected error opening data file: %v", err)
421 | 		}
422 | 
423 | 		_, err = b.maybeSpill(df)
424 | 		if err != nil {
425 | 			t.Fatalf("unexpected error during write: %v", err)
426 | 		}
427 | 		df.Close()
428 | 		written, err := os.ReadFile(tmpfile)
429 | 		if err != nil {
430 | 			t.Fatalf("unexpected error reading data file: %v", err)
431 | 		}
432 | 
433 | 		r := io.NewSectionReader(bytes.NewReader(written), DatFileHeaderSize+SizeUint48+SizeUint16, int64(blockSize))
434 | 
435 | 		b2 := Bucket{
436 | 			blockSize: blockSize,
437 | 			blob:      make([]byte, blockSize),
438 | 		}
439 | 
440 | 		err = b2.loadFullFrom(r)
441 | 		if err != nil {
442 | 			t.Fatalf("unexpected error during read: %v", err)
443 | 		}
444 | 		if b2.Count() != 2 {
445 | 			t.Errorf("got count %d, wanted %d", b.Count(), 2)
446 | 		}
447 | 
448 | 		for i := 0; i < len(entries); i++ {
449 | 			got := b2.entry(i)
450 | 			if got != entries[i] {
451 | 				t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i])
452 | 			}
453 | 		}
454 | 	})
455 | }
456 | 


--------------------------------------------------------------------------------
/internal/bucketcache.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"sync"
  9 | 
 10 | 	"github.com/go-logr/logr"
 11 | )
 12 | 
 13 | type Spiller interface {
 14 | 	LoadBucketSpill(int64, []byte) error
 15 | 	AppendBucketSpill([]byte) (int64, error)
 16 | 	Flush() error
 17 | }
 18 | 
 19 | type BucketCache struct {
 20 | 	mu      sync.Mutex
 21 | 	buckets []*Bucket
 22 | 	dirty   []bool
 23 | 
 24 | 	modulus uint64 // hash modulus
 25 | 
 26 | 	bucketSize int
 27 | 	count      int // number of entries
 28 | 	threshold  int // target average number of entries per bucket
 29 | 
 30 | 	tlogger logr.Logger // trace logger
 31 | }
 32 | 
 33 | func (c *BucketCache) Insert(offset int64, size int64, hash uint64, df Spiller) error {
 34 | 	c.mu.Lock()
 35 | 	defer c.mu.Unlock()
 36 | 
 37 | 	c.count++
 38 | 	if c.count/len(c.buckets) > c.threshold {
 39 | 
 40 | 		if uint64(len(c.buckets)) == c.modulus {
 41 | 			c.modulus *= 2
 42 | 		}
 43 | 
 44 | 		// Split using linear hashing
 45 | 		idxSplit := len(c.buckets) - int(c.modulus/2) // index of the bucket to be split
 46 | 		c.dirty[idxSplit] = true
 47 | 
 48 | 		idxNew := len(c.buckets) // new bucket will be added at end of list
 49 | 		c.buckets = append(c.buckets, NewBucket(c.bucketSize, make([]byte, c.bucketSize)))
 50 | 		c.dirty = append(c.dirty, true)
 51 | 
 52 | 		if err := c.split(idxSplit, idxNew, df); err != nil {
 53 | 			return fmt.Errorf("split bucket: %w", err)
 54 | 		}
 55 | 	}
 56 | 
 57 | 	idx := c.bucketIndex(hash)
 58 | 	b := c.buckets[idx]
 59 | 	spilled, err := b.maybeSpill(df)
 60 | 
 61 | 	if spilled && c.tlogger.Enabled() {
 62 | 		c.tlogger.Info("bucket spilled", "index", idx, "hash", hash, "buckets", len(c.buckets), "modulus", c.modulus, "spill", b.spill)
 63 | 	}
 64 | 	if err != nil {
 65 | 		return fmt.Errorf("maybe spill: %w", err)
 66 | 	}
 67 | 
 68 | 	// If bucket spilled then it will be empty
 69 | 	b.insert(offset, size, hash)
 70 | 
 71 | 	c.dirty[idx] = true
 72 | 	return nil
 73 | }
 74 | 
 75 | // assumes caller holds lock
 76 | func (c *BucketCache) bucketIndex(h uint64) int {
 77 | 	n := h % c.modulus
 78 | 	if n >= uint64(len(c.buckets)) {
 79 | 		n -= c.modulus / 2
 80 | 	}
 81 | 	return int(n)
 82 | }
 83 | 
 84 | // assumes caller holds lock
 85 | func (c *BucketCache) split(idxSplit, idxNew int, df Spiller) error {
 86 | 	bSplit := c.buckets[idxSplit]
 87 | 	// Trivial case: split empty bucket
 88 | 	if bSplit.count == 0 && bSplit.spill == 0 {
 89 | 		return nil
 90 | 	}
 91 | 
 92 | 	bNew := c.buckets[idxNew]
 93 | 
 94 | 	for i := 0; i < bSplit.count; {
 95 | 		e := bSplit.entry(i)
 96 | 		idx := c.bucketIndex(e.Hash)
 97 | 		if c.tlogger.Enabled() {
 98 | 			c.tlogger.Info("entry rehash", "hash", e.Hash, "rehash_index", idx)
 99 | 		}
100 | 
101 | 		if idx != idxSplit && idx != idxNew {
102 | 			// panic due to a logic error. Something very bad must have happened.
103 | 			panic(fmt.Sprintf("bucket index of rehashed key (hash=%d, bucket=%d) does not correspond to bucket being split (bucket=%d) or new bucket (bucket=%d), modulus=%d, buckets=%d", e.Hash, idx, idxSplit, idxNew, c.modulus, len(c.buckets)))
104 | 		}
105 | 
106 | 		// If the entry falls into the new bucket then add it and remove from the splitting bucket
107 | 		if idx == idxNew {
108 | 			bNew.insert(e.Offset, e.Size, e.Hash)
109 | 			bSplit.erase(i)
110 | 		} else {
111 | 			i++
112 | 		}
113 | 	}
114 | 
115 | 	// Deal with any spills in the splitting bucket by rehashing the entries as well, walking the linked list
116 | 	// of spills. Potentially this can lead to the new bucket spilling.
117 | 	// Since spills are immutable we may leave orphaned entries that have been copied to the new bucket.
118 | 
119 | 	spill := bSplit.spill
120 | 	bSplit.setSpill(0)
121 | 
122 | 	tmp := NewBucket(c.bucketSize, make([]byte, c.bucketSize))
123 | 
124 | 	if spill > 0 {
125 | 		for {
126 | 			// Make sure any spills are on disk
127 | 			// TODO: figure out semantics of buffered writer here
128 | 			if err := df.Flush(); err != nil {
129 | 				return fmt.Errorf("flush data file: %w", err)
130 | 			}
131 | 
132 | 			// Read the spill record from the data file into the temporary bucket
133 | 			if err := tmp.LoadFrom(spill, df); err != nil {
134 | 				return fmt.Errorf("load from spill (%d): %w", spill, err)
135 | 			}
136 | 
137 | 			if c.tlogger.Enabled() {
138 | 				c.tlogger.Info("loaded bucket from spill", "spill", spill, "bucket_entry_count", tmp.count)
139 | 			}
140 | 
141 | 			for i := 0; i < tmp.count; i++ {
142 | 				e := tmp.entry(i)
143 | 				idx := c.bucketIndex(e.Hash)
144 | 				if c.tlogger.Enabled() {
145 | 					c.tlogger.Info("spill entry rehash", "hash", e.Hash, "rehash_index", idx, "buckets", len(c.buckets), "modulus", c.modulus)
146 | 				}
147 | 				if idx != idxSplit && idx != idxNew {
148 | 					panic(fmt.Sprintf("bucket index of rehashed key (%d) does not correspond to bucket being split (%d) or new bucket (%d)", idx, idxSplit, idxNew))
149 | 				}
150 | 				if idx == idxNew {
151 | 					spilled, err := bNew.maybeSpill(df)
152 | 					if spilled && c.tlogger.Enabled() {
153 | 						c.tlogger.Info("new bucket spilled during split", "index", idx, "spill", bNew.spill)
154 | 					}
155 | 					if err != nil {
156 | 						return fmt.Errorf("maybe spill: %w", err)
157 | 					}
158 | 					// we hold the lock on bNew
159 | 					bNew.insert(e.Offset, e.Size, e.Hash)
160 | 				} else {
161 | 					spilled, err := bSplit.maybeSpill(df)
162 | 					if spilled && c.tlogger.Enabled() {
163 | 						c.tlogger.Info("split bucket spilled during split", "index", idx, "spill", bSplit.spill)
164 | 					}
165 | 					if err != nil {
166 | 						return fmt.Errorf("maybe spill: %w", err)
167 | 					}
168 | 					bSplit.insert(e.Offset, e.Size, e.Hash)
169 | 				}
170 | 
171 | 			}
172 | 
173 | 			// Continue reading any further spills
174 | 			spill = tmp.spill
175 | 
176 | 			if spill == 0 {
177 | 				break
178 | 			}
179 | 		}
180 | 	}
181 | 
182 | 	return nil
183 | }
184 | 
185 | // Exists reports whether a record with the given hash and key exists in the data file
186 | func (c *BucketCache) Exists(hash uint64, key string, df *DataFile) (bool, error) {
187 | 	_, err := c.FetchHeader(hash, key, df)
188 | 	if err == nil {
189 | 		return true, nil
190 | 	}
191 | 	if err == ErrKeyNotFound {
192 | 		return false, nil
193 | 	}
194 | 	return false, err
195 | }
196 | 
197 | // Fetch returns a reader that can be used to read the data record associated with the key
198 | func (c *BucketCache) Fetch(hash uint64, key string, df *DataFile) (io.Reader, error) {
199 | 	c.mu.Lock()
200 | 	defer c.mu.Unlock()
201 | 
202 | 	idx := c.bucketIndex(hash)
203 | 	b := c.buckets[idx]
204 | 
205 | 	for {
206 | 		for i := b.lowerBound(hash); i < b.count; i++ {
207 | 			entry := b.entry(i)
208 | 			if entry.Hash != hash {
209 | 				break
210 | 			}
211 | 
212 | 			dr, err := df.RecordDataReader(entry.Offset, key)
213 | 			if err != nil {
214 | 				if errors.Is(err, ErrKeyMismatch) {
215 | 					continue
216 | 				}
217 | 				if err != nil {
218 | 					return nil, fmt.Errorf("load data record: %w", err)
219 | 				}
220 | 			}
221 | 
222 | 			// Found a matching record
223 | 			return dr, nil
224 | 		}
225 | 
226 | 		if b.spill == 0 {
227 | 			break
228 | 		}
229 | 
230 | 		spill := b.spill
231 | 
232 | 		blockBuf := make([]byte, c.bucketSize)
233 | 		b = NewBucket(c.bucketSize, blockBuf)
234 | 		if err := b.LoadFrom(spill, df); err != nil {
235 | 			return nil, fmt.Errorf("read spill: %w", err)
236 | 		}
237 | 
238 | 	}
239 | 
240 | 	return nil, ErrKeyNotFound
241 | }
242 | 
243 | // FetchHeader returns a record header for the data record associated with the key
244 | func (c *BucketCache) FetchHeader(hash uint64, key string, df *DataFile) (*DataRecordHeader, error) {
245 | 	c.mu.Lock()
246 | 	defer c.mu.Unlock()
247 | 
248 | 	idx := c.bucketIndex(hash)
249 | 	b := c.buckets[idx]
250 | 
251 | 	bkey := []byte(key)
252 | 	for {
253 | 		for i := b.lowerBound(hash); i < b.count; i++ {
254 | 			entry := b.entry(i)
255 | 			if entry.Hash != hash {
256 | 				break
257 | 			}
258 | 
259 | 			rh, err := df.LoadRecordHeader(entry.Offset)
260 | 			if err != nil {
261 | 				return nil, fmt.Errorf("read data record header: %w", err)
262 | 			}
263 | 			if !bytes.Equal(bkey, rh.Key) {
264 | 				continue
265 | 			}
266 | 
267 | 			// Found a matching record
268 | 			return rh, nil
269 | 		}
270 | 
271 | 		if b.spill == 0 {
272 | 			break
273 | 		}
274 | 
275 | 		spill := b.spill
276 | 
277 | 		blockBuf := make([]byte, c.bucketSize)
278 | 		b = NewBucket(c.bucketSize, blockBuf)
279 | 		if err := b.LoadFrom(spill, df); err != nil {
280 | 			return nil, fmt.Errorf("read spill: %w", err)
281 | 		}
282 | 
283 | 	}
284 | 
285 | 	return nil, ErrKeyNotFound
286 | }
287 | 
288 | // computeStats counts the number of entries in buckets and spills
289 | func (c *BucketCache) computeStats(df *DataFile) error {
290 | 	c.mu.Lock()
291 | 	defer c.mu.Unlock()
292 | 
293 | 	blockBuf := make([]byte, c.bucketSize)
294 | 	for idx := range c.buckets {
295 | 		b := c.buckets[idx]
296 | 		for {
297 | 			c.count += b.count
298 | 			if b.spill == 0 {
299 | 				break
300 | 			}
301 | 			spill := b.spill
302 | 
303 | 			b = NewBucket(c.bucketSize, blockBuf)
304 | 			if err := b.LoadFrom(spill, df); err != nil {
305 | 				return fmt.Errorf("read spill: %w", err)
306 | 			}
307 | 
308 | 		}
309 | 	}
310 | 	return nil
311 | }
312 | 
313 | // EntryCount returns the number of entries in the cache
314 | func (c *BucketCache) EntryCount() int {
315 | 	c.mu.Lock()
316 | 	defer c.mu.Unlock()
317 | 	return c.count
318 | }
319 | 
320 | // BucketCount returns the number of buckets in the cache
321 | func (c *BucketCache) BucketCount() int {
322 | 	c.mu.Lock()
323 | 	defer c.mu.Unlock()
324 | 	return len(c.buckets)
325 | }
326 | 
327 | // Get retrieves a copy of the bucket at index idx
328 | func (c *BucketCache) Get(idx int) *Bucket {
329 | 	c.mu.Lock()
330 | 	defer c.mu.Unlock()
331 | 
332 | 	buf := make([]byte, c.bucketSize)
333 | 	copy(buf, c.buckets[idx].blob)
334 | 	return NewBucket(c.bucketSize, buf)
335 | }
336 | 
337 | func (c *BucketCache) Has(hash uint64, sp Spiller) (bool, error) {
338 | 	c.mu.Lock()
339 | 	defer c.mu.Unlock()
340 | 
341 | 	idx := c.bucketIndex(hash)
342 | 	b := c.buckets[idx]
343 | 
344 | 	// tmp is only used if we need to read from spuill
345 | 	var tmp *Bucket
346 | 
347 | 	for {
348 | 		idx := b.lowerBound(hash)
349 | 		if idx < b.count {
350 | 			entry := b.entry(idx)
351 | 			if entry.Hash == hash {
352 | 				// Found a matching record
353 | 				return true, nil
354 | 			}
355 | 		}
356 | 
357 | 		if b.spill == 0 {
358 | 			break
359 | 		}
360 | 
361 | 		if tmp == nil {
362 | 			tmp = NewBucket(c.bucketSize, make([]byte, c.bucketSize))
363 | 		}
364 | 		if err := tmp.LoadFrom(int64(b.spill), sp); err != nil {
365 | 			return false, fmt.Errorf("read spill: %w", err)
366 | 		}
367 | 		b = tmp
368 | 	}
369 | 
370 | 	return false, nil
371 | }
372 | 
373 | func (c *BucketCache) WriteDirty(lf *LogFile, kf *KeyFile) (int64, error) {
374 | 	c.mu.Lock()
375 | 	defer c.mu.Unlock()
376 | 
377 | 	work := int64(0)
378 | 	for idx := range c.dirty {
379 | 		if !c.dirty[idx] {
380 | 			continue
381 | 		}
382 | 		written, err := lf.AppendBucket(idx, c.buckets[idx])
383 | 		work += written
384 | 		if err != nil {
385 | 			return work, fmt.Errorf("append bucket to log %d: %w", idx, err)
386 | 		}
387 | 	}
388 | 
389 | 	if err := lf.Sync(); err != nil {
390 | 		return work, fmt.Errorf("sync log file: %w", err)
391 | 	}
392 | 
393 | 	for idx := range c.dirty {
394 | 		if !c.dirty[idx] {
395 | 			continue
396 | 		}
397 | 		if err := kf.PutBucket(idx, c.buckets[idx]); err != nil {
398 | 			return work, fmt.Errorf("put bucket %d: %w", idx, err)
399 | 		}
400 | 	}
401 | 
402 | 	if err := kf.Sync(); err != nil {
403 | 		return work, fmt.Errorf("sync key file: %w", err)
404 | 	}
405 | 
406 | 	for idx := range c.dirty {
407 | 		c.dirty[idx] = false
408 | 	}
409 | 
410 | 	return work, nil
411 | }
412 | 


--------------------------------------------------------------------------------
/internal/bucketcache_test.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math/rand"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/go-logr/logr"
 9 | )
10 | 
11 | func TestBucketCacheInsert(t *testing.T) {
12 | 	bucketSize := BucketHeaderSize + BucketEntrySize*6
13 | 
14 | 	n := 5000
15 | 	c := &BucketCache{
16 | 		bucketSize: bucketSize, // capacity of 6 entries per bucket
17 | 		modulus:    1,
18 | 		buckets:    []*Bucket{NewBucket(bucketSize, make([]byte, bucketSize))},
19 | 		dirty:      []bool{false},
20 | 		threshold:  3, // aim for an average of 3 entries per bucket
21 | 		tlogger:    logr.Discard(),
22 | 	}
23 | 
24 | 	rng := rand.New(rand.NewSource(299792458))
25 | 
26 | 	sm := make(spillMap)
27 | 	hashes := make([]uint64, n)
28 | 	for i := range hashes {
29 | 		hashes[i] = rng.Uint64()
30 | 		err := c.Insert(5, 5, hashes[i], sm)
31 | 		if err != nil {
32 | 			t.Fatalf("unexpected error on insert (%d): %v", hashes[i], err)
33 | 		}
34 | 	}
35 | 	if c.EntryCount() != len(hashes) {
36 | 		t.Errorf("c.EntryCount()=%d, wanted %d", c.EntryCount(), len(hashes))
37 | 	}
38 | 
39 | 	for i := range hashes {
40 | 		has, err := c.Has(hashes[i], sm)
41 | 		if err != nil {
42 | 			t.Fatalf("unexpected error on has (%d): %v", hashes[i], err)
43 | 		}
44 | 
45 | 		if !has {
46 | 			t.Errorf("did not find hash %d", hashes[i])
47 | 		}
48 | 	}
49 | }
50 | 
51 | type spillMap map[int64][]byte
52 | 
53 | func (s spillMap) LoadBucketSpill(spill int64, buf []byte) error {
54 | 	data, ok := s[spill]
55 | 	if !ok {
56 | 		return fmt.Errorf("unknown spill: %d", spill)
57 | 	}
58 | 	copy(buf, data)
59 | 	return nil
60 | }
61 | 
62 | func (s spillMap) AppendBucketSpill(buf []byte) (int64, error) {
63 | 	spill := int64(len(s)) + 1
64 | 	data := make([]byte, len(buf))
65 | 	copy(data, buf)
66 | 	s[spill] = data
67 | 	return spill, nil
68 | }
69 | 
70 | func (s spillMap) Flush() error {
71 | 	return nil
72 | }
73 | 


--------------------------------------------------------------------------------
/internal/cache.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | // CacheData is a read only view of a bucket cache. It is safe for concurrent use.
 4 | type CacheData struct {
 5 | 	index   map[int]int
 6 | 	buckets []BucketRecord
 7 | }
 8 | 
 9 | func (c *CacheData) Find(n int) (*Bucket, bool) {
10 | 	idx, exists := c.index[n]
11 | 	if !exists {
12 | 		return nil, false
13 | 	}
14 | 	return c.buckets[idx].bucket, true
15 | }
16 | 
17 | func (c *CacheData) Has(n int) bool {
18 | 	_, exists := c.index[n]
19 | 	return exists
20 | }
21 | 
22 | func (c *CacheData) Count() int {
23 | 	return len(c.buckets)
24 | }
25 | 
26 | func (c *CacheData) WithBuckets(fn func(bs []BucketRecord) error) error {
27 | 	return fn(c.buckets)
28 | }
29 | 
30 | // Cache is an in memory buffer of buckets. It is not safe for concurrent use.
31 | type Cache struct {
32 | 	keySize   int
33 | 	blockSize int
34 | 	sizeHint  int
35 | 
36 | 	data *CacheData
37 | }
38 | 
39 | func NewCache(keySize int, blockSize int, sizeHint int) *Cache {
40 | 	return &Cache{
41 | 		keySize:   keySize,
42 | 		blockSize: blockSize,
43 | 		sizeHint:  sizeHint,
44 | 		data: &CacheData{
45 | 			index:   make(map[int]int, sizeHint),
46 | 			buckets: make([]BucketRecord, 0, sizeHint),
47 | 		},
48 | 	}
49 | }
50 | 
51 | func (c *Cache) Find(n int) (*Bucket, bool) {
52 | 	return c.data.Find(n)
53 | }
54 | 
55 | func (c *Cache) Has(n int) bool {
56 | 	return c.data.Has(n)
57 | }
58 | 
59 | func (c *Cache) Count() int {
60 | 	return c.data.Count()
61 | }
62 | 
63 | func (c *Cache) WithBuckets(fn func(bs []BucketRecord) error) error {
64 | 	return c.data.WithBuckets(fn)
65 | }
66 | 
67 | func (c *Cache) Insert(idx int, b *Bucket) {
68 | 	br := BucketRecord{
69 | 		idx:    idx,
70 | 		bucket: b,
71 | 	}
72 | 
73 | 	c.data.buckets = append(c.data.buckets, br)
74 | 	c.data.index[idx] = len(c.data.buckets) - 1
75 | }
76 | 
77 | func (c *Cache) Clear() {
78 | 	c.data = &CacheData{
79 | 		index:   make(map[int]int, c.sizeHint),
80 | 		buckets: make([]BucketRecord, 0, c.sizeHint),
81 | 	}
82 | }
83 | 
84 | // TakeData takes ownership of the Cache's data. The Cache is cleared after.
85 | func (c *Cache) TakeData() *CacheData {
86 | 	data := c.data
87 | 	c.data = &CacheData{
88 | 		index:   make(map[int]int, c.sizeHint),
89 | 		buckets: make([]BucketRecord, 0, c.sizeHint),
90 | 	}
91 | 	return data
92 | }
93 | 


--------------------------------------------------------------------------------
/internal/const32.go:
--------------------------------------------------------------------------------
 1 | //go:build 386 || arm || mips || mipsle
 2 | 
 3 | package internal
 4 | 
 5 | import (
 6 | 	"math"
 7 | )
 8 | 
 9 | const (
10 | 	MaxBlockSize = MaxUint16         // maximum length of a keyfile block in bytes (must not be larger than MaxKeySize due to on-disk representation)
11 | 	MaxKeySize   = MaxUint16         // maximum length of a data record's key in bytes
12 | 	MaxDataSize  = math.MaxInt32 - 1 // maximum length of a data record's value in bytes
13 | )
14 | 


--------------------------------------------------------------------------------
/internal/const64.go:
--------------------------------------------------------------------------------
 1 | //go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm
 2 | 
 3 | package internal
 4 | 
 5 | const (
 6 | 	MaxBlockSize = MaxUint16 // maximum length of a keyfile block in bytes (must not be larger than MaxKeySize due to on-disk representation)
 7 | 	MaxKeySize   = MaxUint16 // maximum length of a data record's key in bytes
 8 | 	MaxDataSize  = MaxUint48 // maximum length of a data record's value in bytes
 9 | )
10 | 


--------------------------------------------------------------------------------
/internal/context.go:
--------------------------------------------------------------------------------
1 | package internal
2 | 


--------------------------------------------------------------------------------
/internal/error.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | import (
 4 | 	"errors"
 5 | )
 6 | 
 7 | var (
 8 | 	ErrAppNumMismatch     = errors.New("appnum mismatch")
 9 | 	ErrDataMissing        = errors.New("data missing")
10 | 	ErrDataTooLarge       = errors.New("data too large")
11 | 	ErrDifferentVersion   = errors.New("different version")
12 | 	ErrHashMismatch       = errors.New("hash mismatch")
13 | 	ErrInvalidBlockSize   = errors.New("invalid block size")
14 | 	ErrInvalidBucketCount = errors.New("invalid bucket count")
15 | 	ErrInvalidCapacity    = errors.New("invalid capacity")
16 | 	ErrInvalidDataRecord  = errors.New("not a data record: contains spill marker")
17 | 	ErrInvalidKeySize     = errors.New("invalid key size")
18 | 	ErrInvalidLoadFactor  = errors.New("invalid load factor")
19 | 	ErrInvalidRecordSize  = errors.New("invalid record size")
20 | 	ErrInvalidSpill       = errors.New("not a spill record: missing spill marker")
21 | 	ErrKeyExists          = errors.New("key exists")
22 | 	ErrKeyMismatch        = errors.New("key mismatch")
23 | 	ErrKeyMissing         = errors.New("key missing")
24 | 	ErrKeyNotFound        = errors.New("key not found")
25 | 	ErrKeySizeMismatch    = errors.New("key size mismatch")
26 | 	ErrKeyTooLarge        = errors.New("key too large")
27 | 	ErrKeyWrongSize       = errors.New("key wrong size") // deprecated: use ErrKeyMissing and ErrKeyTooLarge instead
28 | 	ErrNotDataFile        = errors.New("not a data file")
29 | 	ErrNotKeyFile         = errors.New("not a key file")
30 | 	ErrNotLogFile         = errors.New("not a log file")
31 | 	ErrShortKeyFile       = errors.New("short key file")
32 | 	ErrUIDMismatch        = errors.New("uid mismatch")
33 | )
34 | 


--------------------------------------------------------------------------------
/internal/field.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"math"
  6 | )
  7 | 
  8 | const (
  9 | 	MaxUint16 = math.MaxUint16
 10 | 	MaxUint24 = 0xffffff
 11 | 	MaxUint32 = math.MaxUint32
 12 | 	MaxUint48 = 0x0000ffffffffffff
 13 | 	MaxUint64 = math.MaxUint64
 14 | 
 15 | 	MaxInt16 = math.MaxInt16
 16 | )
 17 | 
 18 | const (
 19 | 	SizeUint16 = 2
 20 | 	SizeUint24 = 3
 21 | 	SizeUint32 = 4
 22 | 	SizeUint48 = 6
 23 | 	SizeUint64 = 8
 24 | )
 25 | 
 26 | func DecodeUint16(b []byte) uint16 {
 27 | 	return binary.BigEndian.Uint16(b)
 28 | }
 29 | 
 30 | func DecodeUint32(b []byte) uint32 {
 31 | 	return binary.BigEndian.Uint32(b)
 32 | }
 33 | 
 34 | func DecodeUint64(b []byte) uint64 {
 35 | 	return binary.BigEndian.Uint64(b)
 36 | }
 37 | 
 38 | func DecodeUint48(b []byte) uint64 {
 39 | 	_ = b[5] // bounds check hint to compiler; see golang.org/issue/14808
 40 | 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
 41 | 		uint64(b[4])<<32 | uint64(b[5])<<40
 42 | }
 43 | 
 44 | func DecodeUint24(b []byte) uint32 {
 45 | 	_ = b[2] // bounds check hint to compiler; see golang.org/issue/14808
 46 | 	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16
 47 | }
 48 | 
 49 | func EncodeUint16(b []byte, v uint16) {
 50 | 	binary.BigEndian.PutUint16(b, v)
 51 | }
 52 | 
 53 | func EncodeUint32(b []byte, v uint32) {
 54 | 	binary.BigEndian.PutUint32(b, v)
 55 | }
 56 | 
 57 | func EncodeUint64(b []byte, v uint64) {
 58 | 	binary.BigEndian.PutUint64(b, v)
 59 | }
 60 | 
 61 | func EncodeUint48(b []byte, v uint64) {
 62 | 	_ = b[5] // early bounds check to guarantee safety of writes below
 63 | 	b[0] = byte(v)
 64 | 	b[1] = byte(v >> 8)
 65 | 	b[2] = byte(v >> 16)
 66 | 	b[3] = byte(v >> 24)
 67 | 	b[4] = byte(v >> 32)
 68 | 	b[5] = byte(v >> 40)
 69 | }
 70 | 
 71 | func EncodeUint24(b []byte, v uint32) {
 72 | 	_ = b[3] // early bounds check to guarantee safety of writes below
 73 | 	b[0] = byte(v)
 74 | 	b[1] = byte(v >> 8)
 75 | 	b[2] = byte(v >> 16)
 76 | 	b[3] = byte(v >> 24)
 77 | }
 78 | 
 79 | /*
 80 | 
 81 | // These metafunctions describe the binary format of fields on disk
 82 | 
 83 | template<class T>
 84 | struct field;
 85 | 
 86 | template<>
 87 | struct field<std::uint8_t>
 88 | {
 89 |     static std::size_t constexpr size = 1;
 90 |     static std::uint64_t constexpr max = 0xff;
 91 | };
 92 | 
 93 | template<>
 94 | struct field<std::uint16_t>
 95 | {
 96 |     static std::size_t constexpr size = 2;
 97 |     static std::uint64_t constexpr max = 0xffff;
 98 | };
 99 | 
100 | template<>
101 | struct field<uint24_t>
102 | {
103 |     static std::size_t constexpr size = 3;
104 |     static std::uint64_t constexpr max = 0xffffff;
105 | };
106 | 
107 | template<>
108 | struct field<std::uint32_t>
109 | {
110 |     static std::size_t constexpr size = 4;
111 |     static std::uint64_t constexpr max = 0xffffffff;
112 | };
113 | 
114 | template<>
115 | struct field<uint48_t>
116 | {
117 |     static std::size_t constexpr size = 6;
118 |     static std::uint64_t constexpr max = 0x0000ffffffffffff;
119 | };
120 | 
121 | template<>
122 | struct field<std::uint64_t>
123 | {
124 |     static std::size_t constexpr size = 8;
125 |     static std::uint64_t constexpr max = 0xffffffffffffffff;
126 | };
127 | 
128 | // read field from memory
129 | 
130 | template<class T, class U, typename std::enable_if<
131 |     std::is_same<T, std::uint8_t>::value>::type* = nullptr>
132 | void
133 | readp(void const* v, U& u)
134 | {
135 |     auto p = reinterpret_cast<std::uint8_t const*>(v);
136 |     u = *p;
137 | }
138 | 
139 | template<class T, class U, typename std::enable_if<
140 |     std::is_same<T, std::uint16_t>::value>::type* = nullptr>
141 | void
142 | readp(void const* v, U& u)
143 | {
144 |     auto p = reinterpret_cast<std::uint8_t const*>(v);
145 |     T t;
146 |     t =  T(*p++)<< 8;
147 |     t =  T(*p  )      | t;
148 |     u = t;
149 | }
150 | 
151 | template<class T, class U, typename std::enable_if<
152 |     std::is_same<T, uint24_t>::value>::type* = nullptr>
153 | void
154 | readp(void const* v, U& u)
155 | {
156 |     auto p = reinterpret_cast<std::uint8_t const*>(v);
157 |     std::uint32_t t;
158 |     t =  std::uint32_t(*p++)<<16;
159 |     t = (std::uint32_t(*p++)<< 8) | t;
160 |     t =  std::uint32_t(*p  )      | t;
161 |     u = t;
162 | }
163 | 
164 | template<class T, class U, typename std::enable_if<
165 |     std::is_same<T, std::uint32_t>::value>::type* = nullptr>
166 | void
167 | readp(void const* v, U& u)
168 | {
169 |     auto const* p = reinterpret_cast<std::uint8_t const*>(v);
170 |     T t;
171 |     t =  T(*p++)<<24;
172 |     t = (T(*p++)<<16) | t;
173 |     t = (T(*p++)<< 8) | t;
174 |     t =  T(*p  )      | t;
175 |     u = t;
176 | }
177 | 
178 | template<class T, class U, typename std::enable_if<
179 |     std::is_same<T, uint48_t>::value>::type* = nullptr>
180 | void
181 | readp(void const* v, U& u)
182 | {
183 |     auto p = reinterpret_cast<std::uint8_t const*>(v);
184 |     std::uint64_t t;
185 |     t = (std::uint64_t(*p++)<<40);
186 |     t = (std::uint64_t(*p++)<<32) | t;
187 |     t = (std::uint64_t(*p++)<<24) | t;
188 |     t = (std::uint64_t(*p++)<<16) | t;
189 |     t = (std::uint64_t(*p++)<< 8) | t;
190 |     t =  std::uint64_t(*p  )      | t;
191 |     u = t;
192 | }
193 | 
194 | template<class T, class U, typename std::enable_if<
195 |     std::is_same<T, std::uint64_t>::value>::type* = nullptr>
196 | void
197 | readp(void const* v, U& u)
198 | {
199 |     auto p = reinterpret_cast<std::uint8_t const*>(v);
200 |     T t;
201 |     t =  T(*p++)<<56;
202 |     t = (T(*p++)<<48) | t;
203 |     t = (T(*p++)<<40) | t;
204 |     t = (T(*p++)<<32) | t;
205 |     t = (T(*p++)<<24) | t;
206 |     t = (T(*p++)<<16) | t;
207 |     t = (T(*p++)<< 8) | t;
208 |     t =  T(*p  )      | t;
209 |     u = t;
210 | }
211 | 
212 | // read field from istream
213 | 
214 | template<class T, class U>
215 | void
216 | read(istream& is, U& u)
217 | {
218 |     readp<T>(is.data(field<T>::size), u);
219 | }
220 | 
221 | inline
222 | void
223 | read_size48(istream& is, std::size_t& u)
224 | {
225 |     std::uint64_t v;
226 |     read<uint48_t>(is, v);
227 |     BOOST_ASSERT(v <= std::numeric_limits<std::uint32_t>::max());
228 |     u = static_cast<std::uint32_t>(v);
229 | }
230 | 
231 | // write field to ostream
232 | 
233 | template<class T, class U, typename std::enable_if<
234 |     std::is_same<T, std::uint8_t>::value>::type* = nullptr>
235 | void
236 | write(ostream& os, U u)
237 | {
238 |     BOOST_ASSERT(u <= field<T>::max);
239 |     std::uint8_t* p = os.data(field<T>::size);
240 |     *p = static_cast<std::uint8_t>(u);
241 | }
242 | 
243 | template<class T, class U, typename std::enable_if<
244 |     std::is_same<T, std::uint16_t>::value>::type* = nullptr>
245 | void
246 | write(ostream& os, U u)
247 | {
248 |     BOOST_ASSERT(u <= field<T>::max);
249 |     auto const t = static_cast<T>(u);
250 |     std::uint8_t* p = os.data(field<T>::size);
251 |     *p++ = (t>> 8)&0xff;
252 |     *p   =  t     &0xff;
253 | }
254 | 
255 | template<class T, class U, typename std::enable_if<
256 |     std::is_same<T, uint24_t>::value>::type* = nullptr>
257 | void
258 | write(ostream& os, U u)
259 | {
260 |     BOOST_ASSERT(u <= field<T>::max);
261 |     auto const t = static_cast<std::uint32_t>(u);
262 |     std::uint8_t* p = os.data(field<T>::size);
263 |     *p++ = (t>>16)&0xff;
264 |     *p++ = (t>> 8)&0xff;
265 |     *p   =  t     &0xff;
266 | }
267 | 
268 | template<class T, class U, typename std::enable_if<
269 |     std::is_same<T, std::uint32_t>::value>::type* = nullptr>
270 | void
271 | write(ostream& os, U u)
272 | {
273 |     BOOST_ASSERT(u <= field<T>::max);
274 |     auto const t = static_cast<T>(u);
275 |     std::uint8_t* p = os.data(field<T>::size);
276 |     *p++ = (t>>24)&0xff;
277 |     *p++ = (t>>16)&0xff;
278 |     *p++ = (t>> 8)&0xff;
279 |     *p   =  t     &0xff;
280 | }
281 | 
282 | template<class T, class U, typename std::enable_if<
283 |     std::is_same<T, uint48_t>::value>::type* = nullptr>
284 | void
285 | write(ostream& os, U u)
286 | {
287 |     BOOST_ASSERT(u <= field<T>::max);
288 |     auto const t = static_cast<std::uint64_t>(u);
289 |     std::uint8_t* p = os.data(field<T>::size);
290 |     *p++ = (t>>40)&0xff;
291 |     *p++ = (t>>32)&0xff;
292 |     *p++ = (t>>24)&0xff;
293 |     *p++ = (t>>16)&0xff;
294 |     *p++ = (t>> 8)&0xff;
295 |     *p   =  t     &0xff;
296 | }
297 | 
298 | template<class T, class U, typename std::enable_if<
299 |     std::is_same<T, std::uint64_t>::value>::type* = nullptr>
300 | void
301 | write(ostream& os, U u)
302 | {
303 |     auto const t = static_cast<T>(u);
304 |     std::uint8_t* p = os.data(field<T>::size);
305 |     *p++ = (t>>56)&0xff;
306 |     *p++ = (t>>48)&0xff;
307 |     *p++ = (t>>40)&0xff;
308 |     *p++ = (t>>32)&0xff;
309 |     *p++ = (t>>24)&0xff;
310 |     *p++ = (t>>16)&0xff;
311 |     *p++ = (t>> 8)&0xff;
312 |     *p   =  t     &0xff;
313 | }
314 | 
315 | } // detail
316 | } // nudb
317 | 
318 | #endif
319 | 
320 | */
321 | 


--------------------------------------------------------------------------------
/internal/file.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"math"
 10 | 	"os"
 11 | 	"sync"
 12 | 
 13 | 	"github.com/go-logr/logr"
 14 | )
 15 | 
 16 | func openFile(name string, flag int, perm os.FileMode, advice int) (*os.File, error) {
 17 | 	f, err := os.OpenFile(name, flag, perm)
 18 | 	if err != nil {
 19 | 		return nil, fmt.Errorf("open: %w", err)
 20 | 	}
 21 | 
 22 | 	err = Fadvise(int(f.Fd()), 0, 0, advice)
 23 | 	if err != nil {
 24 | 		return nil, fmt.Errorf("fadvise: %w", err)
 25 | 	}
 26 | 
 27 | 	return f, nil
 28 | }
 29 | 
 30 | // openFileForScan creates a file for sequential reads
 31 | func openFileForScan(name string) (*os.File, error) {
 32 | 	return openFile(name, os.O_RDONLY, 0o644, FADV_SEQUENTIAL)
 33 | }
 34 | 
 35 | func block_size(path string) int {
 36 | 	// A reasonable default for many SSD devices
 37 | 	return 4096
 38 | }
 39 | 
 40 | type CountWriter interface {
 41 | 	WriterFlusher
 42 | 
 43 | 	// Offset returns the position in the file at which the next write will be made
 44 | 	Offset() int64
 45 | 
 46 | 	// Count returns the number of bytes written
 47 | 	Count() int64
 48 | }
 49 | 
 50 | type WriterFlusher interface {
 51 | 	io.Writer
 52 | 	Flush() error
 53 | }
 54 | 
 55 | // DataFile assumes it has exclusive write access to the file
 56 | type DataFile struct {
 57 | 	Path   string
 58 | 	Header DatFileHeader
 59 | 
 60 | 	offset  int64
 61 | 	file    *os.File
 62 | 	writer  *bufio.Writer
 63 | 	elogger logr.Logger
 64 | }
 65 | 
 66 | func CreateDataFile(path string, appnum, uid uint64) error {
 67 | 	f, err := openFile(path, os.O_APPEND|os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644, FADV_RANDOM)
 68 | 	if err != nil {
 69 | 		return fmt.Errorf("create file: %w", err)
 70 | 	}
 71 | 
 72 | 	dh := DatFileHeader{
 73 | 		Version: currentVersion,
 74 | 		UID:     uid,
 75 | 		AppNum:  appnum,
 76 | 	}
 77 | 
 78 | 	if err := dh.EncodeTo(f); err != nil {
 79 | 		f.Close()
 80 | 		os.Remove(path)
 81 | 		return fmt.Errorf("write header: %w", err)
 82 | 	}
 83 | 
 84 | 	if err := f.Sync(); err != nil {
 85 | 		f.Close()
 86 | 		os.Remove(path)
 87 | 		return fmt.Errorf("sync: %w", err)
 88 | 	}
 89 | 	if err := f.Close(); err != nil {
 90 | 		os.Remove(path)
 91 | 		return fmt.Errorf("close: %w", err)
 92 | 	}
 93 | 
 94 | 	return nil
 95 | }
 96 | 
 97 | // OpenDataFile opens a data file for appending and random reads
 98 | func OpenDataFile(path string) (*DataFile, error) {
 99 | 	f, err := os.OpenFile(path, os.O_APPEND|os.O_RDWR|os.O_EXCL, 0o644)
100 | 	if err != nil {
101 | 		return nil, fmt.Errorf("open: %w", err)
102 | 	}
103 | 
104 | 	err = Fadvise(int(f.Fd()), 0, 0, FADV_RANDOM)
105 | 	if err != nil {
106 | 		return nil, fmt.Errorf("fadvise: %w", err)
107 | 	}
108 | 
109 | 	st, err := f.Stat()
110 | 	if err != nil {
111 | 		return nil, fmt.Errorf("stat data file: %w", err)
112 | 	}
113 | 
114 | 	var dh DatFileHeader
115 | 	if err := dh.DecodeFrom(f); err != nil {
116 | 		return nil, fmt.Errorf("read header: %w", err)
117 | 	}
118 | 	if err := dh.Verify(); err != nil {
119 | 		return nil, fmt.Errorf("verify header: %w", err)
120 | 	}
121 | 
122 | 	return &DataFile{
123 | 		Path:   path,
124 | 		Header: dh,
125 | 		file:   f,
126 | 		offset: st.Size(),
127 | 		// Buffered writes to avoid write amplification
128 | 		writer:  bufio.NewWriterSize(f, 32*block_size(path)),
129 | 		elogger: logr.Discard(),
130 | 	}, nil
131 | }
132 | 
133 | func (d *DataFile) Offset() int64 {
134 | 	return d.offset
135 | }
136 | 
137 | func (d *DataFile) Sync() error {
138 | 	if err := d.writer.Flush(); err != nil {
139 | 		return err
140 | 	}
141 | 	return d.file.Sync()
142 | }
143 | 
144 | func (d *DataFile) Flush() error {
145 | 	return d.writer.Flush()
146 | }
147 | 
148 | func (d *DataFile) Close() error {
149 | 	if err := d.writer.Flush(); err != nil {
150 | 		return err
151 | 	}
152 | 	return d.file.Close()
153 | }
154 | 
155 | func (d *DataFile) Size() (int64, error) {
156 | 	st, err := d.file.Stat()
157 | 	if err != nil {
158 | 		return 0, err
159 | 	}
160 | 	return st.Size(), nil
161 | }
162 | 
163 | // AppendRecord writes a record to the data file. It returns the position at which
164 | // the record was written.
165 | func (d *DataFile) AppendRecord(dr *DataRecord) (int64, error) {
166 | 	hdr := make([]byte, SizeUint48+SizeUint16)
167 | 	EncodeUint48(hdr[0:SizeUint48], uint64(len(dr.data)))
168 | 	EncodeUint16(hdr[SizeUint48:SizeUint48+SizeUint16], uint16(len(dr.key)))
169 | 
170 | 	offset := d.offset
171 | 
172 | 	n, err := d.file.Write(hdr[:])
173 | 	d.offset += int64(n)
174 | 	if err != nil {
175 | 		return offset, err
176 | 	}
177 | 	if n != len(hdr) {
178 | 		return offset, io.ErrShortWrite
179 | 	}
180 | 
181 | 	nk, err := d.file.Write([]byte(dr.key))
182 | 	d.offset += int64(nk)
183 | 	if err != nil {
184 | 		return offset, err
185 | 	}
186 | 	if nk != len(dr.key) {
187 | 		return offset, io.ErrShortWrite
188 | 	}
189 | 
190 | 	nd, err := d.file.Write(dr.data)
191 | 	d.offset += int64(nd)
192 | 	if err != nil {
193 | 		return offset, err
194 | 	}
195 | 	if nd != len(dr.data) {
196 | 		return offset, io.ErrShortWrite
197 | 	}
198 | 
199 | 	return offset, nil
200 | }
201 | 
202 | func (d *DataFile) LoadRecordHeader(offset int64) (*DataRecordHeader, error) {
203 | 	hdr := make([]byte, SizeUint48+SizeUint16)
204 | 
205 | 	_, err := d.file.ReadAt(hdr, offset)
206 | 	if err != nil {
207 | 		return nil, fmt.Errorf("read data record header: %w", err)
208 | 	}
209 | 
210 | 	dataSize := DecodeUint48(hdr[:SizeUint48])
211 | 	if dataSize == 0 {
212 | 		// Data size 0 indicates a bucket spill follows
213 | 		return nil, ErrInvalidDataRecord
214 | 	}
215 | 	keySize := DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16])
216 | 	if keySize == 0 {
217 | 		return nil, ErrInvalidDataRecord
218 | 	}
219 | 
220 | 	key := make([]byte, keySize)
221 | 	_, err = d.file.ReadAt(key, offset+SizeUint48+SizeUint16)
222 | 	if err != nil {
223 | 		return nil, fmt.Errorf("read data record key: %w", err)
224 | 	}
225 | 
226 | 	return &DataRecordHeader{
227 | 		Key:      key,
228 | 		DataSize: int64(dataSize),
229 | 		KeySize:  keySize,
230 | 	}, nil
231 | }
232 | 
233 | func (d *DataFile) RecordDataReader(offset int64, key string) (io.Reader, error) {
234 | 	rh, err := d.LoadRecordHeader(offset)
235 | 	if err != nil {
236 | 		return nil, fmt.Errorf("read data record header: %w", err)
237 | 	}
238 | 	if !bytes.Equal([]byte(key), rh.Key) {
239 | 		return nil, ErrKeyMismatch
240 | 	}
241 | 
242 | 	return io.NewSectionReader(d.file, offset+rh.Size(), int64(rh.DataSize)), nil
243 | }
244 | 
245 | func (d *DataFile) AppendBucketSpill(blob []byte) (int64, error) {
246 | 	offset := d.offset
247 | 
248 | 	var hdr [SpillHeaderSize]byte
249 | 	// Initial Uint48 is zero to indicate this is a spill record
250 | 	EncodeUint16(hdr[SizeUint48:], uint16(len(blob)))
251 | 
252 | 	hn, err := d.writer.Write(hdr[:])
253 | 	d.offset += int64(hn)
254 | 	if err == nil && hn != len(hdr) {
255 | 		err = io.ErrShortWrite
256 | 	}
257 | 	if err != nil {
258 | 		if d.elogger.Enabled() && errors.Is(err, io.ErrShortWrite) {
259 | 			d.elogger.Info("data file: short write on bucket header", "expected", len(hdr), "wrote", hn)
260 | 		}
261 | 		return offset, fmt.Errorf("write header: %w", err)
262 | 	}
263 | 
264 | 	bn, err := d.writer.Write(blob)
265 | 	d.offset += int64(bn)
266 | 	if err == nil && bn != len(blob) {
267 | 		err = io.ErrShortWrite
268 | 	}
269 | 
270 | 	if err != nil {
271 | 		if d.elogger.Enabled() && errors.Is(err, io.ErrShortWrite) {
272 | 			d.elogger.Info("data file: short write on bucket data", "expected", len(blob), "wrote", bn)
273 | 		}
274 | 		return offset, fmt.Errorf("write header: %w", err)
275 | 	}
276 | 
277 | 	return offset, nil
278 | }
279 | 
280 | func (d *DataFile) LoadBucketSpill(offset int64, blob []byte) error {
281 | 	var hdr [SpillHeaderSize]byte
282 | 	_, err := d.file.ReadAt(hdr[:], offset)
283 | 	if err != nil {
284 | 		return fmt.Errorf("read header: %w", err)
285 | 	}
286 | 
287 | 	marker := DecodeUint48(hdr[:SizeUint48])
288 | 	if marker != 0 {
289 | 		return ErrInvalidSpill
290 | 	}
291 | 
292 | 	size := DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16])
293 | 
294 | 	sr := io.NewSectionReader(d.file, offset+int64(len(hdr)), int64(size))
295 | 	off := 0
296 | 	for {
297 | 		n, err := sr.Read(blob[off:])
298 | 		off += n
299 | 		if err == io.EOF {
300 | 			break
301 | 		}
302 | 		if err != nil {
303 | 			return fmt.Errorf("read bucket data: %w", err)
304 | 		}
305 | 		if off >= len(blob) {
306 | 			return io.ErrShortBuffer
307 | 		}
308 | 	}
309 | 
310 | 	for i := off; i < len(blob); i++ {
311 | 		blob[i] = 0
312 | 	}
313 | 
314 | 	return nil
315 | }
316 | 
317 | // RecordScanner returns a RecordScanner that may be used to iterate over the records in the data file.
318 | func (d *DataFile) RecordScanner() *RecordScanner {
319 | 	f, err := openFileForScan(d.Path)
320 | 	if err != nil {
321 | 		return &RecordScanner{err: err}
322 | 	}
323 | 
324 | 	r := bufio.NewReaderSize(f, 32*block_size(d.Path))
325 | 	n, err := r.Discard(DatFileHeaderSize)
326 | 
327 | 	return &RecordScanner{
328 | 		err:     err,
329 | 		r:       r,
330 | 		closer:  f,
331 | 		offset:  int64(n),
332 | 		lr:      io.LimitedReader{R: r, N: 0},
333 | 		size:    -1,
334 | 		isSpill: false,
335 | 	}
336 | }
337 | 
338 | // RecordScanner implements a sequential scan through a data file. Successive calls to the Next method will step through
339 | // the records in the file.
340 | type RecordScanner struct {
341 | 	r       *bufio.Reader
342 | 	closer  io.Closer
343 | 	err     error
344 | 	offset  int64
345 | 	size    int64
346 | 	key     []byte
347 | 	isSpill bool
348 | 	lr      io.LimitedReader
349 | }
350 | 
351 | // Next reads the next bucket in sequence, including spills to the data store. It returns false
352 | // if it encounters an error or there are no more buckets to read.
353 | func (s *RecordScanner) Next() bool {
354 | 	if s.err != nil {
355 | 		return false
356 | 	}
357 | 
358 | 	var n int
359 | 
360 | 	// Skip any unread bytes
361 | 	n, s.err = s.r.Discard(int(s.lr.N))
362 | 	if s.err != nil {
363 | 		return false
364 | 	}
365 | 	s.offset += int64(n)
366 | 
367 | 	hdr := make([]byte, int64(SizeUint48+SizeUint16))
368 | 	n, s.err = io.ReadFull(s.r, hdr)
369 | 	if s.err != nil {
370 | 		return false
371 | 	}
372 | 	s.offset += int64(n)
373 | 
374 | 	s.size = int64(DecodeUint48(hdr[:SizeUint48]))
375 | 	if s.size == 0 {
376 | 		s.isSpill = true
377 | 		s.key = nil
378 | 		// Spill size is in the next 2 bytes
379 | 		s.size = int64(DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16]))
380 | 		if s.size == 0 {
381 | 			s.err = ErrInvalidRecordSize
382 | 		}
383 | 	} else {
384 | 		s.isSpill = false
385 | 		keySize := int(DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16]))
386 | 		key := make([]byte, keySize)
387 | 		n, s.err = io.ReadFull(s.r, key)
388 | 		s.offset += int64(n)
389 | 		if s.err != nil {
390 | 			return false
391 | 		}
392 | 		s.key = key
393 | 	}
394 | 
395 | 	// Set the limited reader hard limit
396 | 	s.lr.N = s.size
397 | 
398 | 	return s.err == nil
399 | }
400 | 
401 | // Reader returns an io.Reader that may be used to read the data from the record. Should not be called until Next has been called.
402 | // The Reader is only valid for use until the next call to Next().
403 | func (s *RecordScanner) Reader() io.Reader {
404 | 	if s.err != nil {
405 | 		return nil
406 | 	}
407 | 	return &s.lr
408 | }
409 | 
410 | // IsSpill reports whether the current record is a bucket spill
411 | func (s *RecordScanner) IsSpill() bool {
412 | 	return s.isSpill
413 | }
414 | 
415 | // IsData reports whether the current record is a data record
416 | func (s *RecordScanner) IsData() bool {
417 | 	return !s.isSpill
418 | }
419 | 
420 | // Size returns the size of the current record's data in bytes
421 | func (s *RecordScanner) Size() int64 {
422 | 	return s.size
423 | }
424 | 
425 | // RecordSize returns the number of bytes occupied by the current record including its header
426 | func (s *RecordScanner) RecordSize() int64 {
427 | 	if s.isSpill {
428 | 		return SizeUint48 + // holds marker
429 | 			SizeUint16 + // holds spill size
430 | 			s.size // spill data
431 | 	}
432 | 	return SizeUint48 + // holds data size
433 | 		SizeUint16 + // holds key size
434 | 		s.size + // data
435 | 		int64(len(s.key)) // key
436 | }
437 | 
438 | // Key returns the key of the current record
439 | func (s *RecordScanner) Key() string {
440 | 	if s.key == nil {
441 | 		return ""
442 | 	}
443 | 	return string(s.key)
444 | }
445 | 
446 | // Err returns the first non-EOF error that was encountered by the RecordScanner.
447 | func (s *RecordScanner) Err() error {
448 | 	if s.err == io.EOF {
449 | 		return nil
450 | 	}
451 | 	return s.err
452 | }
453 | 
454 | func (s *RecordScanner) Close() error {
455 | 	return s.closer.Close()
456 | }
457 | 
458 | // KeyFile assumes it has exclusive write access to the file
459 | type KeyFile struct {
460 | 	Path   string
461 | 	Header KeyFileHeader
462 | 
463 | 	file    *os.File
464 | 	hasher  Hasher
465 | 	elogger logr.Logger
466 | 
467 | 	// bucketLocks is a list of locks corresponding to each bucket in the file
468 | 	// the locks guard access to read/writes of that portion of the keyfile
469 | 	// blmu guards mutations to bucketLocks
470 | 	blmu        sync.Mutex
471 | 	bucketLocks []*sync.Mutex
472 | }
473 | 
474 | func CreateKeyFile(path string, uid uint64, appnum uint64, salt uint64, blockSize int, loadFactor float64) error {
475 | 	kf, err := openFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644, FADV_RANDOM)
476 | 	if err != nil {
477 | 		return fmt.Errorf("create file: %w", err)
478 | 	}
479 | 	abandon := func() {
480 | 		kf.Close()
481 | 		os.Remove(path)
482 | 	}
483 | 
484 | 	kh := KeyFileHeader{
485 | 		Version:    currentVersion,
486 | 		UID:        uid,
487 | 		AppNum:     appnum,
488 | 		Salt:       salt,
489 | 		Pepper:     pepper(salt),
490 | 		BlockSize:  uint16(blockSize),
491 | 		LoadFactor: uint16(math.Min((MaxUint16+1)*loadFactor, MaxUint16)),
492 | 	}
493 | 
494 | 	if err := kh.EncodeTo(kf); err != nil {
495 | 		abandon()
496 | 		return fmt.Errorf("write header: %w", err)
497 | 	}
498 | 
499 | 	buf := make([]byte, blockSize)
500 | 	b := NewBucket(blockSize, buf)
501 | 
502 | 	sw := NewSectionWriter(kf, KeyFileHeaderSize, int64(kh.BlockSize))
503 | 	n, err := b.storeFullTo(sw)
504 | 	if err != nil {
505 | 		abandon()
506 | 		return fmt.Errorf("write initial bucket: %w", err)
507 | 	}
508 | 	if n != int64(len(buf)) {
509 | 		abandon()
510 | 		return fmt.Errorf("write initial bucket (%d!=%d): %w", n, int64(len(buf)), io.ErrShortWrite)
511 | 	}
512 | 
513 | 	if err := kf.Sync(); err != nil {
514 | 		abandon()
515 | 		return fmt.Errorf("sync: %w", err)
516 | 	}
517 | 	if err := kf.Close(); err != nil {
518 | 		abandon()
519 | 		return fmt.Errorf("close : %w", err)
520 | 	}
521 | 	return nil
522 | }
523 | 
524 | // OpenKeyFile opens a key file for random reads and writes
525 | func OpenKeyFile(path string) (*KeyFile, error) {
526 | 	f, err := os.OpenFile(path, os.O_RDWR|os.O_EXCL, 0o644)
527 | 	if err != nil {
528 | 		return nil, fmt.Errorf("open: %w", err)
529 | 	}
530 | 
531 | 	err = Fadvise(int(f.Fd()), 0, 0, FADV_RANDOM)
532 | 	if err != nil {
533 | 		return nil, fmt.Errorf("fadvise: %w", err)
534 | 	}
535 | 
536 | 	st, err := f.Stat()
537 | 	if err != nil {
538 | 		return nil, fmt.Errorf("stat key file: %w", err)
539 | 	}
540 | 
541 | 	var kh KeyFileHeader
542 | 	if err := kh.DecodeFrom(f, st.Size()); err != nil {
543 | 		return nil, fmt.Errorf("read key file header: %w", err)
544 | 	}
545 | 	if err := kh.Verify(); err != nil {
546 | 		return nil, fmt.Errorf("verify key file header: %w", err)
547 | 	}
548 | 
549 | 	bucketLocks := make([]*sync.Mutex, int(kh.Buckets))
550 | 	for i := 0; i < int(kh.Buckets); i++ {
551 | 		bucketLocks[i] = &sync.Mutex{}
552 | 	}
553 | 
554 | 	return &KeyFile{
555 | 		Path:        path,
556 | 		Header:      kh,
557 | 		file:        f,
558 | 		hasher:      Hasher(kh.Salt),
559 | 		elogger:     logr.Discard(),
560 | 		bucketLocks: bucketLocks,
561 | 	}, nil
562 | }
563 | 
564 | func (k *KeyFile) Sync() error {
565 | 	return k.file.Sync()
566 | }
567 | 
568 | func (k *KeyFile) Close() error {
569 | 	return k.file.Close()
570 | }
571 | 
572 | func (k *KeyFile) Size() (int64, error) {
573 | 	st, err := k.file.Stat()
574 | 	if err != nil {
575 | 		return 0, err
576 | 	}
577 | 	return st.Size(), nil
578 | }
579 | 
580 | func (k *KeyFile) BlockSize() uint16 {
581 | 	return k.Header.BlockSize
582 | }
583 | 
584 | func (k *KeyFile) Hash(key []byte) uint64 {
585 | 	return k.hasher.Hash(key)
586 | }
587 | 
588 | func (k *KeyFile) HashString(key string) uint64 {
589 | 	return k.hasher.HashString(key)
590 | }
591 | 
592 | func (k *KeyFile) LoadBucket(idx int) (*Bucket, error) {
593 | 	var bmu *sync.Mutex
594 | 	k.blmu.Lock()
595 | 	if len(k.bucketLocks) > idx {
596 | 		k.bucketLocks[idx].Lock()
597 | 		bmu = k.bucketLocks[idx]
598 | 	}
599 | 	k.blmu.Unlock()
600 | 
601 | 	if bmu == nil {
602 | 		if k.elogger.Enabled() {
603 | 			k.elogger.Error(fmt.Errorf("unknown bucket index"), "attempt to load invalid bucket index", "index", idx, "bucket_count", k.Header.Buckets)
604 | 		}
605 | 		panic("attempt to load invalid bucket index")
606 | 	}
607 | 	defer bmu.Unlock()
608 | 
609 | 	offset := KeyFileHeaderSize + int64(idx)*int64(k.Header.BlockSize)
610 | 	b := NewBucket(int(k.Header.BlockSize), make([]byte, int(k.Header.BlockSize)))
611 | 
612 | 	sr := io.NewSectionReader(k.file, offset, int64(k.Header.BlockSize))
613 | 	if err := b.loadFullFrom(sr); err != nil {
614 | 		return nil, fmt.Errorf("read bucket: %w", err)
615 | 	}
616 | 	return b, nil
617 | }
618 | 
619 | // expects to have exclusive access to b
620 | func (k *KeyFile) PutBucket(idx int, b *Bucket) error {
621 | 	var bmu *sync.Mutex
622 | 	k.blmu.Lock()
623 | 	for idx > len(k.bucketLocks)-1 {
624 | 		k.bucketLocks = append(k.bucketLocks, &sync.Mutex{})
625 | 	}
626 | 	k.bucketLocks[idx].Lock()
627 | 	bmu = k.bucketLocks[idx]
628 | 	k.blmu.Unlock()
629 | 	if bmu == nil {
630 | 		panic("attempt to put invalid bucket index")
631 | 	}
632 | 	defer bmu.Unlock()
633 | 
634 | 	offset := KeyFileHeaderSize + int64(idx)*int64(k.Header.BlockSize)
635 | 	sw := NewSectionWriter(k.file, offset, int64(k.Header.BlockSize))
636 | 	_, err := b.storeFullTo(sw)
637 | 	if err != nil {
638 | 		return fmt.Errorf("write bucket: %w", err)
639 | 	}
640 | 	return nil
641 | }
642 | 
643 | // BucketScanner returns a BucketScanner that may be used to iterate over the buckets in the key file.
644 | func (k *KeyFile) BucketScanner(df *DataFile) *BucketScanner {
645 | 	f, err := openFileForScan(k.Path)
646 | 	if err != nil {
647 | 		return &BucketScanner{err: err}
648 | 	}
649 | 
650 | 	r := bufio.NewReaderSize(f, 32*block_size(k.Path))
651 | 	_, err = r.Discard(KeyFileHeaderSize)
652 | 
653 | 	return &BucketScanner{
654 | 		err:       err,
655 | 		r:         r,
656 | 		closer:    f,
657 | 		bucket:    NewBucket(int(k.Header.BlockSize), make([]byte, int(k.Header.BlockSize))),
658 | 		blockSize: int64(k.Header.BlockSize),
659 | 		index:     -1,
660 | 		df:        df,
661 | 		elogger:   k.elogger,
662 | 	}
663 | }
664 | 
665 | // BucketScanner implements a sequential scan through a key file. Successive calls to the Next method will step through
666 | // the buckets in the file, including spilled buckets in the data file.
667 | type BucketScanner struct {
668 | 	r         *bufio.Reader
669 | 	closer    io.Closer
670 | 	df        *DataFile
671 | 	bucket    *Bucket
672 | 	blockSize int64
673 | 	index     int
674 | 	err       error
675 | 	spill     int64 // non-zero if next read is a spill to the data store
676 | 	isSpill   bool  // true if the current bucket was read from a spill
677 | 	elogger   logr.Logger
678 | }
679 | 
680 | // Next reads the next bucket in sequence, including spills to the data store. It returns false
681 | // if it encounters an error or there are no more buckets to read.
682 | func (b *BucketScanner) Next() bool {
683 | 	if b.err != nil {
684 | 		return false
685 | 	}
686 | 	// Is next bucket in a spill?
687 | 	if b.spill != 0 {
688 | 		b.err = b.bucket.LoadFrom(b.spill, b.df)
689 | 		b.isSpill = true
690 | 		if b.elogger.Enabled() && b.err != nil {
691 | 			b.elogger.Error(b.err, "reading spill", "index", b.index, "spill", b.spill)
692 | 		}
693 | 	} else {
694 | 		lr := io.LimitedReader{R: b.r, N: b.blockSize}
695 | 		b.err = b.bucket.loadFullFrom(&lr)
696 | 		b.isSpill = false
697 | 		b.index++
698 | 		if b.elogger.Enabled() && b.err != nil && b.err != io.EOF {
699 | 			b.elogger.Error(b.err, "reading bucket", "index", b.index)
700 | 		}
701 | 	}
702 | 
703 | 	if b.err != nil {
704 | 		b.spill = b.bucket.spill
705 | 	}
706 | 
707 | 	return b.err == nil
708 | }
709 | 
710 | // Index returns the index of the current bucket. Should not be called until Next has been called. Spill buckets
711 | // share an index with their parent.
712 | func (b *BucketScanner) Index() int {
713 | 	return b.index
714 | }
715 | 
716 | // IsSpill reports whether the current bucket was read from a data store spill.
717 | func (b *BucketScanner) IsSpill() bool {
718 | 	return b.isSpill
719 | }
720 | 
721 | // Bucket returns the current bucket. Should not be called until Next has been called. The bucket is backed by data
722 | // that may be overwritten with a call to Next so should not be retained.
723 | func (b *BucketScanner) Bucket() *Bucket {
724 | 	if b.err != nil {
725 | 		return nil
726 | 	}
727 | 	return b.bucket
728 | }
729 | 
730 | // Err returns the first non-EOF error that was encountered by the BucketScanner.
731 | func (b *BucketScanner) Err() error {
732 | 	if b.err == io.EOF {
733 | 		return nil
734 | 	}
735 | 	return b.err
736 | }
737 | 
738 | func (b *BucketScanner) Close() error {
739 | 	return b.closer.Close()
740 | }
741 | 
742 | type LogFile struct {
743 | 	Path   string
744 | 	Header LogFileHeader
745 | 
746 | 	file    *os.File
747 | 	writer  *bufio.Writer
748 | 	elogger logr.Logger
749 | }
750 | 
751 | // OpenLogFile opens a log file for appending, creating it if necessary.
752 | func OpenLogFile(path string) (*LogFile, error) {
753 | 	lf := &LogFile{
754 | 		Path:    path,
755 | 		elogger: logr.Discard(),
756 | 	}
757 | 
758 | 	if err := lf.open(false); err != nil {
759 | 		return nil, err
760 | 	}
761 | 
762 | 	return lf, nil
763 | }
764 | 
765 | func (l *LogFile) open(reopen bool) error {
766 | 	flags := os.O_APPEND | os.O_RDWR | os.O_CREATE
767 | 	if !reopen {
768 | 		flags |= os.O_EXCL
769 | 	}
770 | 	f, err := os.OpenFile(l.Path, flags, 0o644)
771 | 	if err != nil {
772 | 		return fmt.Errorf("open: %w", err)
773 | 	}
774 | 
775 | 	err = Fadvise(int(f.Fd()), 0, 0, FADV_RANDOM)
776 | 	if err != nil {
777 | 		return fmt.Errorf("fadvise: %w", err)
778 | 	}
779 | 
780 | 	l.file = f
781 | 	if l.writer == nil {
782 | 		// Buffered writes to avoid write amplification
783 | 		l.writer = bufio.NewWriterSize(f, 32*block_size(l.Path))
784 | 	} else {
785 | 		l.writer.Reset(l.file)
786 | 	}
787 | 	return nil
788 | }
789 | 
790 | func (l *LogFile) Sync() error {
791 | 	if err := l.writer.Flush(); err != nil {
792 | 		return err
793 | 	}
794 | 	return l.file.Sync()
795 | }
796 | 
797 | func (l *LogFile) Flush() error {
798 | 	return l.writer.Flush()
799 | }
800 | 
801 | func (l *LogFile) Close() error {
802 | 	if err := l.writer.Flush(); err != nil {
803 | 		return err
804 | 	}
805 | 	return l.file.Close()
806 | }
807 | 
808 | func (l *LogFile) Truncate() error {
809 | 	// file must be closed before truncate on windows
810 | 	if err := l.Close(); err != nil {
811 | 		return err
812 | 	}
813 | 
814 | 	if err := os.Truncate(l.Path, 0); err != nil {
815 | 		return err
816 | 	}
817 | 	return l.open(true)
818 | }
819 | 
820 | func (l *LogFile) Prepare(df *DataFile, kf *KeyFile) error {
821 | 	// Prepare rollback information
822 | 	lh := LogFileHeader{
823 | 		Version:   currentVersion,
824 | 		UID:       kf.Header.UID,
825 | 		AppNum:    kf.Header.AppNum,
826 | 		Salt:      kf.Header.Salt,
827 | 		Pepper:    pepper(kf.Header.Salt),
828 | 		BlockSize: kf.Header.BlockSize,
829 | 	}
830 | 
831 | 	var err error
832 | 	lh.DatFileSize, err = df.Size()
833 | 	if err != nil {
834 | 		return fmt.Errorf("data file size: %w", err)
835 | 	}
836 | 
837 | 	lh.KeyFileSize, err = kf.Size()
838 | 	if err != nil {
839 | 		return fmt.Errorf("key file size: %w", err)
840 | 	}
841 | 
842 | 	if err := lh.EncodeTo(l.writer); err != nil {
843 | 		return fmt.Errorf("write log file header: %w", err)
844 | 	}
845 | 
846 | 	// Checkpoint
847 | 	if err := l.Sync(); err != nil {
848 | 		return fmt.Errorf("sync: %w", err)
849 | 	}
850 | 
851 | 	return nil
852 | }
853 | 
854 | func (l *LogFile) AppendBucket(idx int, b *Bucket) (int64, error) {
855 | 	var idxBuf [SizeUint64]byte
856 | 	EncodeUint64(idxBuf[:], uint64(idx))
857 | 	n, err := l.writer.Write(idxBuf[:])
858 | 	if err == nil && n != len(idxBuf) {
859 | 		err = io.ErrShortWrite
860 | 	}
861 | 	if err != nil {
862 | 		return int64(n), fmt.Errorf("write index: %w", err)
863 | 	}
864 | 
865 | 	bn, err := b.WriteTo(l.writer)
866 | 	if err != nil {
867 | 		if l.elogger.Enabled() && errors.Is(err, io.ErrShortWrite) {
868 | 			l.elogger.Info("log file: short write on bucket data", "expected", b.ActualSize(), "wrote", bn)
869 | 		}
870 | 		return bn + int64(n), fmt.Errorf("write data: %w", err)
871 | 	}
872 | 
873 | 	return bn + int64(n), nil
874 | }
875 | 
876 | // SectionWriter implements Write on a section of an underlying WriterAt
877 | type SectionWriter struct {
878 | 	w      io.WriterAt
879 | 	offset int64
880 | 	limit  int64
881 | }
882 | 
883 | func NewSectionWriter(w io.WriterAt, offset int64, size int64) *SectionWriter {
884 | 	return &SectionWriter{
885 | 		w:      w,
886 | 		offset: offset,
887 | 		limit:  offset + size,
888 | 	}
889 | }
890 | 
891 | func (s *SectionWriter) Write(v []byte) (int, error) {
892 | 	size := int64(len(v))
893 | 	if size > s.limit-s.offset {
894 | 		size = s.limit - s.offset
895 | 	}
896 | 
897 | 	n, err := s.w.WriteAt(v[:size], s.offset)
898 | 	s.offset += int64(n)
899 | 	if err == nil && n < len(v) {
900 | 		err = io.ErrShortWrite
901 | 	}
902 | 	return n, err
903 | }
904 | 


--------------------------------------------------------------------------------
/internal/file_test.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestCreateKeyFile(t *testing.T) {
  9 | 	tmpdir, err := os.MkdirTemp("", "gonudb.*")
 10 | 	if err != nil {
 11 | 		t.Fatalf("unexpected error creating temp directory: %v", err)
 12 | 	}
 13 | 	defer os.RemoveAll(tmpdir)
 14 | 
 15 | 	const blockSize = 256
 16 | 
 17 | 	filename := tmpdir + "key"
 18 | 	err = CreateKeyFile(filename, 121212, 222222, 333333, blockSize, 0.7)
 19 | 	if err != nil {
 20 | 		t.Errorf("CreateKeyFile: unexpected error: %v", err)
 21 | 	}
 22 | 
 23 | 	st, err := os.Stat(filename)
 24 | 	if err != nil {
 25 | 		if os.IsNotExist(err) {
 26 | 			t.Fatalf("key file was not created")
 27 | 		}
 28 | 		t.Fatalf("Stat: unexpected error: %v", err)
 29 | 	}
 30 | 
 31 | 	wantSize := int64(KeyFileHeaderSize + blockSize)
 32 | 	if st.Size() != wantSize {
 33 | 		t.Errorf("got size %d, wanted %d", st.Size(), wantSize)
 34 | 	}
 35 | 
 36 | 	f, err := os.OpenFile(filename, os.O_RDONLY, 0o644)
 37 | 	if err != nil {
 38 | 		t.Fatalf("OpenFile: unexpected error: %v", err)
 39 | 	}
 40 | 	defer f.Close()
 41 | 
 42 | 	var kh KeyFileHeader
 43 | 	if err := kh.DecodeFrom(f, st.Size()); err != nil {
 44 | 		t.Fatalf("DecodeFrom: unexpected error: %v", err)
 45 | 	}
 46 | 	if err := kh.Verify(); err != nil {
 47 | 		t.Fatalf("Verify: unexpected error: %v", err)
 48 | 	}
 49 | 
 50 | 	if kh.UID != 121212 {
 51 | 		t.Errorf("got uid %d, wanted %d", kh.UID, 121212)
 52 | 	}
 53 | 	if kh.AppNum != 222222 {
 54 | 		t.Errorf("got appnum %d, wanted %d", kh.AppNum, 222222)
 55 | 	}
 56 | 	if kh.Salt != 333333 {
 57 | 		t.Errorf("got salt %d, wanted %d", kh.Salt, 333333)
 58 | 	}
 59 | 
 60 | 	blob := make([]byte, blockSize)
 61 | 	if _, err := f.ReadAt(blob, KeyFileHeaderSize); err != nil {
 62 | 		t.Fatalf("ReadAt: unexpected error: %v", err)
 63 | 	}
 64 | 
 65 | 	for i, b := range blob {
 66 | 		if b != 0 {
 67 | 			t.Fatalf("non zero byte found in bucket blob at %d", i)
 68 | 		}
 69 | 	}
 70 | }
 71 | 
 72 | func TestTruncateLogFileWithoutError(t *testing.T) {
 73 | 	tmpdir, err := os.MkdirTemp("", "gonudb.*")
 74 | 	if err != nil {
 75 | 		t.Fatalf("unexpected error creating temp directory: %v", err)
 76 | 	}
 77 | 	defer os.RemoveAll(tmpdir)
 78 | 
 79 | 	filename := tmpdir + "log"
 80 | 	lf, err := OpenLogFile(filename)
 81 | 	if err != nil {
 82 | 		t.Errorf("OpenLogFile: unexpected error: %v", err)
 83 | 	}
 84 | 
 85 | 	blob := make([]byte, BucketHeaderSize+BucketEntrySize*2)
 86 | 	b := &Bucket{
 87 | 		blockSize: len(blob),
 88 | 		blob:      blob,
 89 | 	}
 90 | 
 91 | 	entries := []Entry{
 92 | 		{
 93 | 			Offset: 15555,
 94 | 			Size:   14444,
 95 | 			Hash:   19999,
 96 | 		},
 97 | 		{
 98 | 			Offset: 25555,
 99 | 			Size:   24444,
100 | 			Hash:   29999,
101 | 		},
102 | 	}
103 | 
104 | 	for i := range entries {
105 | 		b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash)
106 | 	}
107 | 
108 | 	if _, err := lf.AppendBucket(0, b); err != nil {
109 | 		t.Errorf("AppendBucket: unexpected error: %v", err)
110 | 	}
111 | 
112 | 	if err := lf.Flush(); err != nil {
113 | 		t.Errorf("Flush: unexpected error: %v", err)
114 | 	}
115 | 
116 | 	if err := lf.Truncate(); err != nil {
117 | 		t.Fatalf("Truncate: unexpected error: %v", err)
118 | 	}
119 | }
120 | 


--------------------------------------------------------------------------------
/internal/format.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"io"
  6 | )
  7 | 
  8 | // Format of the nudb files:
  9 | 
 10 | /*
 11 | 
 12 | Integer sizes
 13 | 
 14 | block_size          less than 32 bits (maybe restrict it to 16 bits)
 15 | buckets             more than 32 bits
 16 | capacity            (same as bucket index)
 17 | file offsets        63 bits
 18 | hash                up to 64 bits (48 currently)
 19 | item index          less than 32 bits (index of item in bucket)
 20 | modulus             (same as buckets)
 21 | value size          up to 32 bits (or 32-bit builds can't read it)
 22 | 
 23 | */
 24 | 
 25 | const currentVersion = 2
 26 | 
 27 | const (
 28 | 	DatFileHeaderSize = SizeUint64 + // Type
 29 | 		SizeUint16 + // Version
 30 | 		SizeUint64 + // UID
 31 | 		SizeUint64 + // Appnum
 32 | 		SizeUint16 + // REMOVED in version 2, was KeySize
 33 | 		64 // (Reserved)
 34 | 
 35 | 	KeyFileHeaderSize = 8 + // Type
 36 | 		SizeUint16 + // Version
 37 | 		SizeUint64 + // UID
 38 | 		SizeUint64 + // Appnum
 39 | 		SizeUint16 + // REMOVED in version 2, was KeySize
 40 | 		SizeUint64 + // Salt
 41 | 		SizeUint64 + // Pepper
 42 | 		SizeUint16 + // BlockSize
 43 | 		SizeUint16 + // LoadFactor
 44 | 		56 // (Reserved)
 45 | 
 46 | 	LogFileHeaderSize = 8 + // Type
 47 | 		SizeUint16 + // Version
 48 | 		SizeUint64 + // UID
 49 | 		SizeUint64 + // Appnum
 50 | 		SizeUint16 + // REMOVED in version 2, was KeySize
 51 | 		SizeUint64 + // Salt
 52 | 		SizeUint64 + // Pepper
 53 | 		SizeUint16 + // BlockSize
 54 | 		SizeUint64 + // KeyFileSize
 55 | 		SizeUint64 // DataFileSize
 56 | 
 57 | 	SpillHeaderSize = SizeUint48 + // zero marker
 58 | 		SizeUint16 // size
 59 | )
 60 | 
 61 | var (
 62 | 	DatFileHeaderType = []byte("gonudbdt")
 63 | 	KeyFileHeaderType = []byte("gonudbky")
 64 | 	LogFileHeaderType = []byte("gonudblg")
 65 | )
 66 | 
 67 | type DatFileHeader struct {
 68 | 	Type    [8]byte
 69 | 	Version uint16
 70 | 	UID     uint64
 71 | 	AppNum  uint64
 72 | }
 73 | 
 74 | func (*DatFileHeader) Size() int {
 75 | 	return DatFileHeaderSize
 76 | }
 77 | 
 78 | // DecodeFrom reads d from a reader
 79 | func (d *DatFileHeader) DecodeFrom(r io.Reader) error {
 80 | 	var data [DatFileHeaderSize]byte
 81 | 	if _, err := io.ReadFull(r, data[:]); err != nil {
 82 | 		return err
 83 | 	}
 84 | 
 85 | 	copy(d.Type[:], data[0:8])
 86 | 	d.Version = DecodeUint16(data[8:10])
 87 | 	d.UID = DecodeUint64(data[10:18])
 88 | 	d.AppNum = DecodeUint64(data[18:26])
 89 | 	// data[26:28] is unused, was key size
 90 | 
 91 | 	return nil
 92 | }
 93 | 
 94 | // EncodeTo writes d to a writer
 95 | func (d *DatFileHeader) EncodeTo(w io.Writer) error {
 96 | 	var data [DatFileHeaderSize]byte
 97 | 
 98 | 	copy(data[0:8], DatFileHeaderType)
 99 | 	EncodeUint16(data[8:10], d.Version)
100 | 	EncodeUint64(data[10:18], d.UID)
101 | 	EncodeUint64(data[18:26], d.AppNum)
102 | 	// data[26:28] is unused, was key size
103 | 
104 | 	n, err := w.Write(data[:])
105 | 	if err != nil {
106 | 		return err
107 | 	}
108 | 	if n != len(data) {
109 | 		return io.ErrShortWrite
110 | 	}
111 | 
112 | 	return nil
113 | }
114 | 
115 | // Verify contents of data file header
116 | func (d *DatFileHeader) Verify() error {
117 | 	if !bytes.Equal(DatFileHeaderType, d.Type[:]) {
118 | 		return ErrNotDataFile
119 | 	}
120 | 
121 | 	if d.Version != currentVersion {
122 | 		return ErrDifferentVersion
123 | 	}
124 | 
125 | 	return nil
126 | }
127 | 
128 | // VerifyMatchingKey makes sure key file and data file headers match
129 | func (d *DatFileHeader) VerifyMatchingKey(k *KeyFileHeader) error {
130 | 	if k.UID != d.UID {
131 | 		return ErrUIDMismatch
132 | 	}
133 | 	if k.AppNum != d.AppNum {
134 | 		return ErrAppNumMismatch
135 | 	}
136 | 
137 | 	return nil
138 | }
139 | 
140 | type KeyFileHeader struct {
141 | 	Type    [8]byte
142 | 	Version uint16
143 | 	UID     uint64
144 | 	AppNum  uint64
145 | 
146 | 	Salt       uint64
147 | 	Pepper     uint64
148 | 	BlockSize  uint16
149 | 	LoadFactor uint16
150 | 
151 | 	// Computed values
152 | 	Capacity int    // Entries per bucket
153 | 	Buckets  int    // Number of buckets
154 | 	Modulus  uint64 // pow(2,ceil(log2(buckets)))
155 | }
156 | 
157 | func (k *KeyFileHeader) Size() int {
158 | 	return KeyFileHeaderSize
159 | }
160 | 
161 | func (k *KeyFileHeader) DecodeFrom(r io.Reader, fileSize int64) error {
162 | 	var data [KeyFileHeaderSize]byte
163 | 	if _, err := io.ReadFull(r, data[:]); err != nil {
164 | 		return err
165 | 	}
166 | 
167 | 	copy(k.Type[:], data[0:8])
168 | 	k.Version = DecodeUint16(data[8:10])
169 | 	k.UID = DecodeUint64(data[10:18])
170 | 	k.AppNum = DecodeUint64(data[18:26])
171 | 	// data[26:28] is unused, was key size
172 | 	k.Salt = DecodeUint64(data[28:36])
173 | 	k.Pepper = DecodeUint64(data[36:44])
174 | 	k.BlockSize = DecodeUint16(data[44:46])
175 | 	k.LoadFactor = DecodeUint16(data[46:48])
176 | 
177 | 	k.Capacity = BucketCapacity(int(k.BlockSize))
178 | 	if fileSize > int64(k.BlockSize) {
179 | 		if k.BlockSize > 0 {
180 | 			k.Buckets = int((fileSize - int64(KeyFileHeaderSize)) / int64(k.BlockSize))
181 | 		} else {
182 | 			// Corruption or logic error
183 | 			k.Buckets = 0
184 | 		}
185 | 	} else {
186 | 		k.Buckets = 0
187 | 	}
188 | 
189 | 	k.Modulus = ceil_pow2(uint64(k.Buckets))
190 | 
191 | 	return nil
192 | }
193 | 
194 | func (k *KeyFileHeader) EncodeTo(w io.Writer) error {
195 | 	var data [KeyFileHeaderSize]byte
196 | 
197 | 	copy(data[0:8], KeyFileHeaderType)
198 | 	EncodeUint16(data[8:10], k.Version)
199 | 	EncodeUint64(data[10:18], k.UID)
200 | 	EncodeUint64(data[18:26], k.AppNum)
201 | 	// data[26:28] is unused, was key size
202 | 	EncodeUint64(data[28:36], k.Salt)
203 | 	EncodeUint64(data[36:44], k.Pepper)
204 | 	EncodeUint16(data[44:46], k.BlockSize)
205 | 	EncodeUint16(data[46:48], k.LoadFactor)
206 | 
207 | 	n, err := w.Write(data[:])
208 | 	if err != nil {
209 | 		return err
210 | 	}
211 | 	if n != len(data) {
212 | 		return io.ErrShortWrite
213 | 	}
214 | 
215 | 	return nil
216 | }
217 | 
218 | // Verify contents of key file header
219 | func (k *KeyFileHeader) Verify() error {
220 | 	if !bytes.Equal(KeyFileHeaderType, k.Type[:]) {
221 | 		return ErrNotKeyFile
222 | 	}
223 | 
224 | 	if k.Version != currentVersion {
225 | 		return ErrDifferentVersion
226 | 	}
227 | 
228 | 	if k.Pepper != pepper(k.Salt) {
229 | 		return ErrHashMismatch
230 | 	}
231 | 
232 | 	if k.LoadFactor < 1 {
233 | 		return ErrInvalidLoadFactor
234 | 	}
235 | 	if k.Capacity < 1 {
236 | 		return ErrInvalidCapacity
237 | 	}
238 | 	if k.Buckets < 1 {
239 | 		return ErrInvalidBucketCount
240 | 	}
241 | 
242 | 	return nil
243 | }
244 | 
245 | type LogFileHeader struct {
246 | 	Type        [8]byte
247 | 	Version     uint16
248 | 	UID         uint64
249 | 	AppNum      uint64
250 | 	Salt        uint64
251 | 	Pepper      uint64
252 | 	BlockSize   uint16
253 | 	KeyFileSize int64
254 | 	DatFileSize int64
255 | }
256 | 
257 | func (l *LogFileHeader) Size() int {
258 | 	return LogFileHeaderSize
259 | }
260 | 
261 | func (l *LogFileHeader) DecodeFrom(r io.Reader) error {
262 | 	var data [LogFileHeaderSize]byte
263 | 	if _, err := io.ReadFull(r, data[:]); err != nil {
264 | 		return err
265 | 	}
266 | 
267 | 	copy(l.Type[:], data[0:8])
268 | 	l.Version = DecodeUint16(data[8:10])
269 | 	l.UID = DecodeUint64(data[10:18])
270 | 	l.AppNum = DecodeUint64(data[18:26])
271 | 	// data[26:28] was KeySize
272 | 	l.Salt = DecodeUint64(data[28:36])
273 | 	l.Pepper = DecodeUint64(data[36:44])
274 | 	l.BlockSize = DecodeUint16(data[44:46])
275 | 	l.KeyFileSize = int64(DecodeUint64(data[46:54]))
276 | 	l.DatFileSize = int64(DecodeUint64(data[54:62]))
277 | 
278 | 	return nil
279 | }
280 | 
281 | func (l *LogFileHeader) EncodeTo(w io.Writer) error {
282 | 	var data [LogFileHeaderSize]byte
283 | 
284 | 	copy(data[0:8], LogFileHeaderType)
285 | 	EncodeUint16(data[8:10], l.Version)
286 | 	EncodeUint64(data[10:18], l.UID)
287 | 	EncodeUint64(data[18:26], l.AppNum)
288 | 	// data[26:28] was KeySize
289 | 	EncodeUint64(data[28:36], l.Salt)
290 | 	EncodeUint64(data[36:44], l.Pepper)
291 | 	EncodeUint16(data[44:46], l.BlockSize)
292 | 	EncodeUint64(data[46:54], uint64(l.KeyFileSize))
293 | 	EncodeUint64(data[54:62], uint64(l.DatFileSize))
294 | 
295 | 	n, err := w.Write(data[:])
296 | 	if err != nil {
297 | 		return err
298 | 	}
299 | 	if n != len(data) {
300 | 		return io.ErrShortWrite
301 | 	}
302 | 
303 | 	return nil
304 | }
305 | 
306 | type DataRecord struct {
307 | 	hash   uint64
308 | 	key    string
309 | 	data   []byte
310 | 	offset int64
311 | 	size   int64
312 | }
313 | 
314 | // DataRecordHeader is prepended to each record written to the data file.
315 | // Layout is:
316 | //
317 | //	6 bytes  DataSize
318 | //	2 bytes  KeySize
319 | //	n bytes  Key
320 | type DataRecordHeader struct {
321 | 	DataSize int64
322 | 	KeySize  uint16
323 | 	Key      []byte
324 | }
325 | 
326 | // IsData reports whether the data record contains data
327 | func (d *DataRecordHeader) IsData() bool {
328 | 	return d.DataSize != 0
329 | }
330 | 
331 | // IsSpill reports whether the data record is a bucket spill
332 | func (d *DataRecordHeader) IsSpill() bool {
333 | 	return d.DataSize == 0
334 | }
335 | 
336 | // Size returns the size of the header in bytes
337 | func (d *DataRecordHeader) Size() int64 {
338 | 	return SizeUint48 + SizeUint16 + int64(len(d.Key))
339 | }
340 | 
341 | type BucketRecord struct {
342 | 	idx    int
343 | 	bucket *Bucket
344 | }
345 | 
346 | // ceil_pow2 returns the closest power of 2 not less than v
347 | func ceil_pow2(x uint64) uint64 {
348 | 	t := [6]uint64{
349 | 		0xFFFFFFFF00000000,
350 | 		0x00000000FFFF0000,
351 | 		0x000000000000FF00,
352 | 		0x00000000000000F0,
353 | 		0x000000000000000C,
354 | 		0x0000000000000002,
355 | 	}
356 | 
357 | 	var y int
358 | 	if (x & (x - 1)) != 0 {
359 | 		y = 1
360 | 	}
361 | 	var j int = 32
362 | 
363 | 	for i := 0; i < 6; i++ {
364 | 		var k int
365 | 		if (x & t[i]) != 0 {
366 | 			k = j
367 | 		}
368 | 		y += k
369 | 		x >>= k
370 | 		j >>= 1
371 | 	}
372 | 
373 | 	return 1 << y
374 | }
375 | 


--------------------------------------------------------------------------------
/internal/hasher.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | import (
 4 | 	"crypto/rand"
 5 | 	"encoding/binary"
 6 | 
 7 | 	"github.com/OneOfOne/xxhash"
 8 | )
 9 | 
10 | type Hasher uint64
11 | 
12 | func (h Hasher) Hash(data []byte) uint64 {
13 | 	return xxhash.Checksum64S(data, uint64(h))
14 | }
15 | 
16 | func (h Hasher) HashString(data string) uint64 {
17 | 	return xxhash.ChecksumString64S(data, uint64(h))
18 | }
19 | 
20 | // pepper computes pepper from salt
21 | func pepper(salt uint64) uint64 {
22 | 	var data [8]byte
23 | 	binary.BigEndian.PutUint64(data[:], salt)
24 | 	return Hasher(salt).Hash(data[:])
25 | }
26 | 
27 | // NewSalt returns a random salt or panics if the system source of entropy
28 | // cannot be read
29 | func NewSalt() uint64 {
30 | 	var v uint64
31 | 	err := binary.Read(rand.Reader, binary.BigEndian, &v)
32 | 	if err != nil {
33 | 		panic(err.Error())
34 | 	}
35 | 	return v
36 | }
37 | 
38 | // NewUID returns a random identifier or panics if the system source of entropy
39 | // cannot be read
40 | func NewUID() uint64 {
41 | 	var v uint64
42 | 	err := binary.Read(rand.Reader, binary.BigEndian, &v)
43 | 	if err != nil {
44 | 		panic(err.Error())
45 | 	}
46 | 	return v
47 | }
48 | 


--------------------------------------------------------------------------------
/internal/pool.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | )
  7 | 
  8 | // Buffers data records in a map
  9 | type Pool struct {
 10 | 	mu       sync.RWMutex // guards index, records and dataSize
 11 | 	index    map[string]int
 12 | 	records  []DataRecord
 13 | 	dataSize int
 14 | }
 15 | 
 16 | func NewPool(sizeHint int) *Pool {
 17 | 	return &Pool{
 18 | 		index:   make(map[string]int, sizeHint),
 19 | 		records: make([]DataRecord, sizeHint),
 20 | 	}
 21 | }
 22 | 
 23 | func (p *Pool) IsEmpty() bool {
 24 | 	p.mu.RLock()
 25 | 	defer p.mu.RUnlock()
 26 | 	return len(p.records) == 0
 27 | }
 28 | 
 29 | // Count returns the number of data records in the pool
 30 | func (p *Pool) Count() int {
 31 | 	p.mu.RLock()
 32 | 	defer p.mu.RUnlock()
 33 | 	return len(p.records)
 34 | }
 35 | 
 36 | // DataSize returns the sum of data sizes in the pool
 37 | func (p *Pool) DataSize() int {
 38 | 	p.mu.RLock()
 39 | 	defer p.mu.RUnlock()
 40 | 	return p.dataSize
 41 | }
 42 | 
 43 | func (p *Pool) Clear() {
 44 | 	p.mu.Lock()
 45 | 	defer p.mu.Unlock()
 46 | 	p.dataSize = 0
 47 | 	p.records = p.records[:0]
 48 | 	for k := range p.index {
 49 | 		delete(p.index, k)
 50 | 	}
 51 | }
 52 | 
 53 | func (p *Pool) Find(key string) ([]byte, bool) {
 54 | 	p.mu.RLock()
 55 | 	defer p.mu.RUnlock()
 56 | 	idx, exists := p.index[key]
 57 | 	if !exists {
 58 | 		return nil, false
 59 | 	}
 60 | 	return p.records[idx].data, true
 61 | }
 62 | 
 63 | func (p *Pool) Has(key string) bool {
 64 | 	p.mu.RLock()
 65 | 	defer p.mu.RUnlock()
 66 | 	_, exists := p.index[key]
 67 | 	return exists
 68 | }
 69 | 
 70 | func (p *Pool) Insert(hash uint64, key string, value []byte) {
 71 | 	p.mu.Lock()
 72 | 	defer p.mu.Unlock()
 73 | 
 74 | 	if _, exists := p.index[key]; exists {
 75 | 		panic("duplicate key inserted: " + key)
 76 | 	}
 77 | 
 78 | 	// TODO: review need to make copy of value
 79 | 	r := DataRecord{
 80 | 		hash: hash,
 81 | 		key:  key,
 82 | 		data: make([]byte, len(value)),
 83 | 		size: int64(len(value)),
 84 | 	}
 85 | 	copy(r.data, value)
 86 | 
 87 | 	p.records = append(p.records, r)
 88 | 	p.index[key] = len(p.records) - 1
 89 | 	p.dataSize += len(value)
 90 | }
 91 | 
 92 | func (p *Pool) WithRecords(fn func([]DataRecord) error) error {
 93 | 	p.mu.RLock()
 94 | 	defer p.mu.RUnlock()
 95 | 	return fn(p.records)
 96 | }
 97 | 
 98 | func (p *Pool) WriteRecords(df *DataFile) (int64, error) {
 99 | 	p.mu.RLock()
100 | 	defer p.mu.RUnlock()
101 | 
102 | 	written := int64(0)
103 | 	for i := range p.records {
104 | 		offset, err := df.AppendRecord(&p.records[i])
105 | 		if err != nil {
106 | 			return written, fmt.Errorf("encode record: %w", err)
107 | 		}
108 | 		// if s.tlogger.Enabled() {
109 | 		// 	s.tlogger.Info("wrote p0 record", "index", i, "offset", offset, "record_key", rs[i].key, "record_size", rs[i].size)
110 | 		// }
111 | 		p.records[i].offset = offset
112 | 		written += p.records[i].size
113 | 	}
114 | 
115 | 	return written, nil
116 | }
117 | 


--------------------------------------------------------------------------------
/internal/store.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"math"
  9 | 	"os"
 10 | 	"sync"
 11 | 	"time"
 12 | 
 13 | 	"github.com/go-logr/logr"
 14 | )
 15 | 
 16 | type Store struct {
 17 | 	// Fields written when open or close is called
 18 | 	df *DataFile
 19 | 	kf *KeyFile
 20 | 	lf *LogFile
 21 | 
 22 | 	// Currently imu guards all calls to p0 and bc methods
 23 | 	imu sync.Mutex
 24 | 	p0  *Pool
 25 | 	bc  *BucketCache
 26 | 
 27 | 	rmu  sync.Mutex // guards acess to rate and when
 28 | 	rate float64    // rate at which data can be flushed
 29 | 	when time.Time
 30 | 
 31 | 	emu  sync.Mutex // guards access to open and err
 32 | 	open bool
 33 | 	err  error
 34 | 
 35 | 	monitor chan struct{}
 36 | 
 37 | 	elogger logr.Logger // error logger
 38 | 	dlogger logr.Logger // diagnostics logger
 39 | 	tlogger logr.Logger // trace logger
 40 | }
 41 | 
 42 | func CreateStore(datPath, keyPath, logPath string, appnum, uid, salt uint64, blockSize int, loadFactor float64) error {
 43 | 	// TODO make this a constant MaxBlockSize
 44 | 	if blockSize > MaxBlockSize {
 45 | 		return ErrInvalidBlockSize
 46 | 	}
 47 | 
 48 | 	if loadFactor <= 0 || loadFactor >= 1 {
 49 | 		return ErrInvalidLoadFactor
 50 | 	}
 51 | 
 52 | 	capacity := BucketCapacity(blockSize)
 53 | 	if capacity < 1 {
 54 | 		return ErrInvalidBlockSize
 55 | 	}
 56 | 
 57 | 	if err := CreateDataFile(datPath, appnum, uid); err != nil {
 58 | 		return fmt.Errorf("create data file: %w", err)
 59 | 	}
 60 | 
 61 | 	if err := CreateKeyFile(keyPath, uid, appnum, salt, blockSize, loadFactor); err != nil {
 62 | 		return fmt.Errorf("create key file: %w", err)
 63 | 	}
 64 | 
 65 | 	return nil
 66 | }
 67 | 
 68 | func OpenStore(datPath, keyPath, logPath string, syncInterval time.Duration, elogger logr.Logger, dlogger logr.Logger, tlogger logr.Logger) (*Store, error) {
 69 | 	df, err := OpenDataFile(datPath)
 70 | 	if err != nil {
 71 | 		return nil, fmt.Errorf("open data file: %w", err)
 72 | 	}
 73 | 
 74 | 	abandon := func() {
 75 | 		df.Close() // TODO: handle close error
 76 | 	}
 77 | 
 78 | 	kf, err := OpenKeyFile(keyPath)
 79 | 	if err != nil {
 80 | 		abandon()
 81 | 		return nil, fmt.Errorf("open key file: %w", err)
 82 | 	}
 83 | 
 84 | 	abandon = func() {
 85 | 		df.Close() // TODO: handle close error
 86 | 		kf.Close() // TODO: handle close error
 87 | 	}
 88 | 
 89 | 	if err := df.Header.VerifyMatchingKey(&kf.Header); err != nil {
 90 | 		abandon()
 91 | 		return nil, fmt.Errorf("verify key file matches data file: %w", err)
 92 | 	}
 93 | 
 94 | 	if kf.Header.Buckets < 1 {
 95 | 		abandon()
 96 | 		return nil, ErrShortKeyFile
 97 | 	}
 98 | 
 99 | 	lf, err := OpenLogFile(logPath)
100 | 	if err != nil {
101 | 		abandon()
102 | 
103 | 		var pathErr *os.PathError
104 | 		if errors.As(err, &pathErr) && os.IsExist(pathErr) {
105 | 			return nil, fmt.Errorf("log file exists, store requires recovery")
106 | 		}
107 | 
108 | 		return nil, fmt.Errorf("open log file: %w", err)
109 | 	}
110 | 
111 | 	df.elogger = elogger
112 | 	kf.elogger = elogger
113 | 	lf.elogger = elogger
114 | 
115 | 	s := &Store{
116 | 		when: time.Now(),
117 | 		df:   df,
118 | 		kf:   kf,
119 | 		lf:   lf,
120 | 
121 | 		p0: NewPool(0),
122 | 		bc: &BucketCache{
123 | 			bucketSize: int(kf.Header.BlockSize),
124 | 			modulus:    ceil_pow2(uint64(kf.Header.Buckets)),
125 | 			buckets:    make([]*Bucket, int(kf.Header.Buckets)),
126 | 			dirty:      make([]bool, int(kf.Header.Buckets)),
127 | 			threshold:  (int(kf.Header.LoadFactor) * int(kf.Header.Capacity)) / 65536,
128 | 			tlogger:    tlogger,
129 | 		},
130 | 
131 | 		open:    true,
132 | 		monitor: make(chan struct{}),
133 | 
134 | 		elogger: elogger,
135 | 		dlogger: dlogger,
136 | 		tlogger: tlogger,
137 | 	}
138 | 
139 | 	for idx := range s.bc.buckets {
140 | 		b, err := kf.LoadBucket(idx)
141 | 		if err != nil {
142 | 			return nil, fmt.Errorf("read bucket: %w", err)
143 | 		}
144 | 		s.bc.buckets[idx] = b
145 | 	}
146 | 	s.bc.computeStats(df)
147 | 
148 | 	// Flush writes automatically
149 | 	go func() {
150 | 		d := time.NewTicker(syncInterval)
151 | 
152 | 		for {
153 | 			select {
154 | 
155 | 			case <-s.monitor:
156 | 				d.Stop()
157 | 				select {
158 | 				case <-d.C:
159 | 				default:
160 | 				}
161 | 				return
162 | 
163 | 			case <-d.C:
164 | 				if s.tlogger.Enabled() {
165 | 					s.tlogger.Info("Background flush")
166 | 				}
167 | 				s.Flush()
168 | 
169 | 			}
170 | 		}
171 | 	}()
172 | 
173 | 	return s, nil
174 | }
175 | 
176 | func (s *Store) Close() error {
177 | 	s.emu.Lock()
178 | 	open := s.open
179 | 	s.open = false
180 | 	s.emu.Unlock()
181 | 	if !open {
182 | 		return nil
183 | 	}
184 | 
185 | 	close(s.monitor)
186 | 
187 | 	s.imu.Lock()
188 | 	defer s.imu.Unlock()
189 | 
190 | 	if !s.p0.IsEmpty() {
191 | 		if _, err := s.commit(); err != nil {
192 | 			if s.elogger.Enabled() {
193 | 				s.elogger.Error(err, "commit")
194 | 			}
195 | 			s.setErr(err)
196 | 		}
197 | 	}
198 | 
199 | 	// Return if the store is in an error state, such as from a failed flush
200 | 	if err := s.Err(); err != nil {
201 | 		return err
202 | 	}
203 | 
204 | 	if s.lf != nil {
205 | 		if err := s.lf.Close(); err != nil {
206 | 			return fmt.Errorf("close log file: %w", err)
207 | 		}
208 | 
209 | 		if err := os.Remove(s.lf.Path); err != nil {
210 | 			return fmt.Errorf("delete log file: %w", err)
211 | 		}
212 | 	}
213 | 
214 | 	if err := s.kf.Close(); err != nil {
215 | 		return fmt.Errorf("close key file: %w", err)
216 | 	}
217 | 
218 | 	if err := s.df.Close(); err != nil {
219 | 		return fmt.Errorf("close data file: %w", err)
220 | 	}
221 | 
222 | 	return nil
223 | }
224 | 
225 | // Err returns an error if the store is in an error state, nil otherwise
226 | func (s *Store) Err() error {
227 | 	s.emu.Lock()
228 | 	defer s.emu.Unlock()
229 | 	return s.err
230 | }
231 | 
232 | func (s *Store) setErr(err error) {
233 | 	s.emu.Lock()
234 | 	s.err = err
235 | 	s.emu.Unlock()
236 | }
237 | 
238 | func (s *Store) DataFile() *DataFile { return s.df }
239 | func (s *Store) KeyFile() *KeyFile   { return s.kf }
240 | func (s *Store) LogFile() *LogFile   { return s.lf }
241 | 
242 | func (s *Store) RecordCount() int {
243 | 	return s.bc.EntryCount()
244 | }
245 | 
246 | func (s *Store) Rate() float64 {
247 | 	s.rmu.Lock()
248 | 	defer s.rmu.Unlock()
249 | 	return s.rate
250 | }
251 | 
252 | func (s *Store) Insert(key string, data []byte) error {
253 | 	if s.tlogger.Enabled() {
254 | 		s.tlogger.Info("Store.Insert", "key", key, "data_len", len(data))
255 | 	}
256 | 
257 | 	// Return if the store is in an error state, such as from a failed flush
258 | 	if err := s.Err(); err != nil {
259 | 		return err
260 | 	}
261 | 	if len(key) == 0 {
262 | 		return ErrKeyMissing
263 | 	} else if len(key) > MaxKeySize {
264 | 		return ErrKeyTooLarge
265 | 	} else if len(data) == 0 {
266 | 		return ErrDataMissing
267 | 	} else if len(data) > MaxDataSize {
268 | 		return ErrDataTooLarge
269 | 	}
270 | 
271 | 	s.imu.Lock()
272 | 	err := s.insert(key, data)
273 | 	s.imu.Unlock()
274 | 
275 | 	if err != nil {
276 | 		return err
277 | 	}
278 | 
279 | 	// Calculate throttling
280 | 	now := time.Now()
281 | 	s.rmu.Lock()
282 | 	elapsed := now.Sub(s.when)
283 | 	work := s.p0.DataSize() + 3*s.p0.Count()*int(s.kf.Header.BlockSize) // TODO: move this calculation into Pool
284 | 	rate := math.Ceil(float64(work) / elapsed.Seconds())
285 | 	sleep := s.rate > 0 && rate > s.rate
286 | 	s.rmu.Unlock()
287 | 
288 | 	if s.dlogger.Enabled() {
289 | 		s.dlogger.Info("insert work rate", "rate", rate, "work", work, "time", elapsed.Seconds(), "throttle", sleep)
290 | 	}
291 | 
292 | 	// The caller of insert must be blocked when the rate of insertion
293 | 	// (measured in approximate bytes per second) exceeds the maximum rate
294 | 	// that can be flushed. The precise sleep duration is not important.
295 | 
296 | 	if sleep {
297 | 		time.Sleep(25 * time.Millisecond)
298 | 	}
299 | 
300 | 	return nil
301 | }
302 | 
303 | // insert expects caller to hold s.imu lock
304 | func (s *Store) insert(key string, data []byte) error {
305 | 	h := s.kf.HashString(key)
306 | 	if s.p0.Has(key) {
307 | 		return ErrKeyExists
308 | 	}
309 | 
310 | 	found, err := s.bc.Exists(h, key, s.df)
311 | 	if err != nil {
312 | 		return fmt.Errorf("exists in bucket: %w", err)
313 | 	}
314 | 	if found {
315 | 		return ErrKeyExists
316 | 	}
317 | 
318 | 	// Perform insert
319 | 	if s.tlogger.Enabled() {
320 | 		s.tlogger.Info("inserting into pool p1", "key", key, "size", len(data))
321 | 	}
322 | 	s.p0.Insert(h, key, data)
323 | 
324 | 	return nil
325 | }
326 | 
327 | func (s *Store) Flush() {
328 | 	if s.tlogger.Enabled() {
329 | 		s.tlogger.Info("Store.Flush")
330 | 	}
331 | 
332 | 	s.rmu.Lock()
333 | 	s.when = time.Now()
334 | 	s.rmu.Unlock()
335 | 
336 | 	s.imu.Lock()
337 | 	defer s.imu.Unlock()
338 | 
339 | 	if s.p0.IsEmpty() {
340 | 		// Nothing to flush
341 | 		return
342 | 	}
343 | 
344 | 	work, err := s.commit()
345 | 	if err != nil {
346 | 		if s.elogger.Enabled() {
347 | 			s.elogger.Error(err, "flush")
348 | 		}
349 | 		s.setErr(err)
350 | 		return
351 | 	}
352 | 
353 | 	now := time.Now()
354 | 	s.rmu.Lock()
355 | 	elapsed := now.Sub(s.when)
356 | 	s.rate = math.Ceil(float64(work) / elapsed.Seconds())
357 | 	s.rmu.Unlock()
358 | 
359 | 	if s.dlogger.Enabled() {
360 | 		s.dlogger.Info("flush work rate", "rate", s.rate, "work", work, "time", elapsed.Seconds())
361 | 	}
362 | }
363 | 
364 | // Currently expects s.imu to be held
365 | func (s *Store) commit() (int64, error) {
366 | 	if s.tlogger.Enabled() {
367 | 		s.tlogger.Info("Store.commit")
368 | 	}
369 | 
370 | 	if err := s.lf.Prepare(s.df, s.kf); err != nil {
371 | 		return 0, fmt.Errorf("prepare log: %w", err)
372 | 	}
373 | 
374 | 	// Append data and spills to data file
375 | 
376 | 	work, err := s.p0.WriteRecords(s.df)
377 | 	if err != nil {
378 | 		return 0, fmt.Errorf("write data file: %w", err)
379 | 	}
380 | 
381 | 	if err := s.p0.WithRecords(func(rs []DataRecord) error {
382 | 		for i := range rs {
383 | 			err := s.bc.Insert(rs[i].offset, rs[i].size, rs[i].hash, s.df)
384 | 			if err != nil {
385 | 				return fmt.Errorf("bucket cache insert: %w", err)
386 | 			}
387 | 		}
388 | 		return nil
389 | 	}); err != nil {
390 | 		return 0, fmt.Errorf("write to buckets: %w", err)
391 | 	}
392 | 
393 | 	// Ensure any data written to data file is on disk.
394 | 	if err := s.df.Flush(); err != nil {
395 | 		return 0, fmt.Errorf("flush data file: %w", err)
396 | 	}
397 | 	// work += int(s.kf.Header.BlockSize) * (2*mutatedBuckets.Count() + newBuckets.Count())
398 | 
399 | 	s.p0.Clear()
400 | 
401 | 	written, err := s.bc.WriteDirty(s.lf, s.kf)
402 | 	work += written
403 | 	if err != nil {
404 | 		return work, fmt.Errorf("write dirty buckets: %w", err)
405 | 	}
406 | 
407 | 	// Finalize the commit
408 | 	if err := s.df.Sync(); err != nil {
409 | 		return 0, fmt.Errorf("sync data file: %w", err)
410 | 	}
411 | 
412 | 	if err := s.lf.Truncate(); err != nil {
413 | 		return 0, fmt.Errorf("trunc log file: %w", err)
414 | 	}
415 | 
416 | 	if err := s.lf.Sync(); err != nil {
417 | 		return 0, fmt.Errorf("sync log file: %w", err)
418 | 	}
419 | 
420 | 	return work, nil
421 | }
422 | 
423 | func (s *Store) FetchReader(key string) (io.Reader, error) {
424 | 	if s.tlogger.Enabled() {
425 | 		s.tlogger.Info("Store.FetchReader", "key", key)
426 | 	}
427 | 	if err := s.Err(); err != nil {
428 | 		return nil, err
429 | 	}
430 | 
431 | 	h := s.kf.HashString(key)
432 | 
433 | 	if s.tlogger.Enabled() {
434 | 		s.tlogger.Info("looking for data in pool p0", "key", key)
435 | 	}
436 | 
437 | 	s.imu.Lock()
438 | 	defer s.imu.Unlock()
439 | 
440 | 	if data, exists := s.p0.Find(key); exists {
441 | 		return bytes.NewReader(data), nil
442 | 	}
443 | 
444 | 	r, err := s.bc.Fetch(h, key, s.df)
445 | 	if err != nil {
446 | 		return nil, fmt.Errorf("read bucket: %w", err)
447 | 	}
448 | 	return r, nil
449 | }
450 | 
451 | func (s *Store) Exists(key string) (bool, error) {
452 | 	if s.tlogger.Enabled() {
453 | 		s.tlogger.Info("Store.Exists", "key", key)
454 | 	}
455 | 	if err := s.Err(); err != nil {
456 | 		return false, err
457 | 	}
458 | 
459 | 	if s.p0.Has(key) {
460 | 		return true, nil
461 | 	}
462 | 
463 | 	h := s.kf.HashString(key)
464 | 	return s.bc.Exists(h, key, s.df)
465 | }
466 | 
467 | func (s *Store) DataSize(key string) (int64, error) {
468 | 	if s.tlogger.Enabled() {
469 | 		s.tlogger.Info("Store.DataSize", "key", key)
470 | 	}
471 | 	if err := s.Err(); err != nil {
472 | 		return 0, err
473 | 	}
474 | 
475 | 	if data, exists := s.p0.Find(key); exists {
476 | 		return int64(len(data)), nil
477 | 	}
478 | 
479 | 	h := s.kf.HashString(key)
480 | 	rh, err := s.bc.FetchHeader(h, key, s.df)
481 | 	if err != nil {
482 | 		return 0, fmt.Errorf("fetch header: %w", err)
483 | 	}
484 | 
485 | 	return rh.DataSize, nil
486 | }
487 | 


--------------------------------------------------------------------------------
/internal/syscall.go:
--------------------------------------------------------------------------------
 1 | //go:build !(linux && amd64)
 2 | 
 3 | package internal
 4 | 
 5 | const (
 6 | 	FADV_NORMAL     = 0x0
 7 | 	FADV_RANDOM     = 0x1
 8 | 	FADV_SEQUENTIAL = 0x2
 9 | 	FADV_WILLNEED   = 0x3
10 | )
11 | 
12 | func Fadvise(fd int, offset int64, length int64, advice int) error {
13 | 	// noop on non unix platforms
14 | 	return nil
15 | }
16 | 


--------------------------------------------------------------------------------
/internal/syscallunix.go:
--------------------------------------------------------------------------------
 1 | //go:build linux && amd64
 2 | 
 3 | package internal
 4 | 
 5 | import (
 6 | 	"golang.org/x/sys/unix"
 7 | )
 8 | 
 9 | const (
10 | 	FADV_NORMAL     = 0x0
11 | 	FADV_RANDOM     = 0x1
12 | 	FADV_SEQUENTIAL = 0x2
13 | 	FADV_WILLNEED   = 0x3
14 | )
15 | 
16 | func Fadvise(fd int, offset int64, length int64, advice int) error {
17 | 	return unix.Fadvise(fd, offset, length, advice)
18 | }
19 | 


--------------------------------------------------------------------------------
/internal/verify.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/go-logr/logr"
  7 | )
  8 | 
  9 | // VerifyStore verifies consistency of the data and key files.
 10 | func VerifyStore(datPath, keyPath string, logger logr.Logger) (*VerifyResult, error) {
 11 | 	df, err := OpenDataFile(datPath)
 12 | 	if err != nil {
 13 | 		return nil, fmt.Errorf("open data file: %w", err)
 14 | 	}
 15 | 	defer df.Close()
 16 | 	df.elogger = logger
 17 | 	logger.Info("opened data file", "version", df.Header.Version, "uid", df.Header.UID, "appnum", df.Header.AppNum)
 18 | 
 19 | 	kf, err := OpenKeyFile(keyPath)
 20 | 	if err != nil {
 21 | 		return nil, fmt.Errorf("open key file: %w", err)
 22 | 	}
 23 | 	defer kf.Close()
 24 | 	kf.elogger = logger
 25 | 	logger.Info("opened key file", "version", kf.Header.Version, "uid", kf.Header.UID, "appnum", kf.Header.AppNum, "buckets", kf.Header.Buckets, "block_size", kf.Header.BlockSize, "load_factor", kf.Header.LoadFactor)
 26 | 
 27 | 	if err := df.Header.VerifyMatchingKey(&kf.Header); err != nil {
 28 | 		return nil, fmt.Errorf("key file and data file have incompatible metadata: %w", err)
 29 | 	}
 30 | 
 31 | 	kfSize, err := kf.Size()
 32 | 	if err != nil {
 33 | 		return nil, fmt.Errorf("failed to read key file size: %w", err)
 34 | 	}
 35 | 
 36 | 	if kf.Header.Buckets < 1 {
 37 | 		return nil, fmt.Errorf("possibly corrupt key file: file should contain at least one bucket")
 38 | 	}
 39 | 
 40 | 	expectedFileSize := int64(KeyFileHeaderSize) + (int64(kf.Header.BlockSize) * int64(kf.Header.Buckets))
 41 | 	if kfSize != expectedFileSize {
 42 | 		return nil, fmt.Errorf("possibly corrupt key file: file size %d does not match expected %d", kfSize, expectedFileSize)
 43 | 	}
 44 | 
 45 | 	if kfSize < int64(kf.Header.BlockSize) {
 46 | 		return nil, fmt.Errorf("possibly corrupt key file, file smaller than a single block")
 47 | 	}
 48 | 
 49 | 	res := &VerifyResult{
 50 | 		DatPath:    datPath,
 51 | 		KeyPath:    keyPath,
 52 | 		Version:    df.Header.Version,
 53 | 		UID:        df.Header.UID,
 54 | 		AppNum:     df.Header.AppNum,
 55 | 		Salt:       kf.Header.Salt,
 56 | 		Pepper:     kf.Header.Pepper,
 57 | 		BlockSize:  kf.Header.BlockSize,
 58 | 		LoadFactor: float64(kf.Header.LoadFactor) / float64(MaxUint16),
 59 | 		Capacity:   kf.Header.Capacity,
 60 | 		Buckets:    kf.Header.Buckets,
 61 | 		Modulus:    kf.Header.Modulus,
 62 | 	}
 63 | 
 64 | 	res.DatFileSize, err = df.Size()
 65 | 	if err != nil {
 66 | 		return nil, fmt.Errorf("reading data file size: %w", err)
 67 | 	}
 68 | 	res.KeyFileSize, err = kf.Size()
 69 | 	if err != nil {
 70 | 		return nil, fmt.Errorf("reading key file size: %w", err)
 71 | 	}
 72 | 
 73 | 	// Verify records
 74 | 	rs := df.RecordScanner()
 75 | 	defer rs.Close()
 76 | 	totalFetches := 0
 77 | 	for rs.Next() {
 78 | 		res.RecordBytesTotal += rs.RecordSize()
 79 | 		if rs.IsData() {
 80 | 			res.ValueCountTotal++
 81 | 			res.ValueBytesTotal += rs.Size()
 82 | 		} else {
 83 | 			res.SpillCountTotal++
 84 | 			res.SpillBytesTotal += rs.Size()
 85 | 		}
 86 | 
 87 | 		fetches, err := countFetches(rs.Key(), df, kf)
 88 | 		if err != nil {
 89 | 			return nil, fmt.Errorf("counting fetches: %w", err)
 90 | 		}
 91 | 		totalFetches += fetches
 92 | 
 93 | 	}
 94 | 	if rs.Err() != nil {
 95 | 		return nil, fmt.Errorf("scanning data file: %w", rs.Err())
 96 | 	}
 97 | 
 98 | 	if res.DatFileSize != res.RecordBytesTotal+DatFileHeaderSize {
 99 | 		return nil, fmt.Errorf("data file size mismatch: file size is %d, size of records is %d (diff: %d)", res.DatFileSize, res.RecordBytesTotal+DatFileHeaderSize, res.DatFileSize-(res.RecordBytesTotal+DatFileHeaderSize))
100 | 	}
101 | 
102 | 	// Verify buckets
103 | 	bs := kf.BucketScanner(df)
104 | 	defer bs.Close()
105 | 	for bs.Next() {
106 | 		b := bs.Bucket()
107 | 		res.KeyCount += int64(b.Count())
108 | 		if bs.IsSpill() {
109 | 			res.SpillCountInUse++
110 | 			res.SpillBytesInUse += SpillHeaderSize + int64(b.ActualSize())
111 | 			res.RecordBytesInUse += SpillHeaderSize + int64(b.ActualSize())
112 | 		}
113 | 
114 | 		for i := 0; i < b.Count(); i++ {
115 | 			e := b.entry(i)
116 | 			ehdr, err := df.LoadRecordHeader(e.Offset)
117 | 			if err != nil {
118 | 				return nil, fmt.Errorf("load record header at offset %d: %w", e.Offset, bs.Err())
119 | 			}
120 | 
121 | 			if !ehdr.IsData() {
122 | 				return nil, fmt.Errorf("record type mismatch at offset %d, key file expects data record", e.Offset)
123 | 			}
124 | 
125 | 			if ehdr.DataSize != e.Size {
126 | 				return nil, fmt.Errorf("record size mismatch at offset %d, data file record size %d, key file expects size %d", e.Offset, ehdr.DataSize, e.Size)
127 | 			}
128 | 
129 | 			hash := kf.Hash(ehdr.Key)
130 | 			if hash != e.Hash {
131 | 				return nil, fmt.Errorf("record key hash mismatch at offset %d, data file record hash %d, key file expects hash %d", e.Offset, hash, e.Hash)
132 | 			}
133 | 
134 | 			res.ValueCountInUse++
135 | 			res.ValueBytesInUse += ehdr.DataSize
136 | 			res.RecordBytesInUse += ehdr.Size() + ehdr.DataSize
137 | 		}
138 | 
139 | 	}
140 | 	if bs.Err() != nil {
141 | 		return nil, fmt.Errorf("scanning key file (index: %d): %w", bs.Index(), bs.Err())
142 | 	}
143 | 
144 | 	res.Waste = float64(res.SpillBytesTotal-res.SpillBytesInUse) / float64(res.DatFileSize)
145 | 	res.ActualLoad = float64(res.KeyCount) / float64(res.Capacity*res.Buckets)
146 | 
147 | 	if res.ValueCountInUse > 0 {
148 | 		res.Overhead = float64(res.KeyFileSize+res.DatFileSize) / float64(res.RecordBytesTotal)
149 | 		res.AverageFetch = float64(totalFetches) / float64(res.ValueCountInUse)
150 | 	}
151 | 
152 | 	return res, nil
153 | }
154 | 
155 | func countFetches(key string, df *DataFile, kf *KeyFile) (int, error) {
156 | 	fetches := 0
157 | 	h := kf.HashString(key)
158 | 
159 | 	idx := BucketIndex(h, kf.Header.Buckets, kf.Header.Modulus)
160 | 	tmpb, err := kf.LoadBucket(idx)
161 | 	if err != nil {
162 | 		return fetches, fmt.Errorf("read bucket: %w", err)
163 | 	}
164 | 	fetches++
165 | 	for {
166 | 		for i := tmpb.lowerBound(h); i < tmpb.count; i++ {
167 | 			entry := tmpb.entry(i)
168 | 			if entry.Hash != h {
169 | 				break
170 | 			}
171 | 
172 | 			ehdr, err := df.LoadRecordHeader(entry.Offset)
173 | 			if err != nil {
174 | 				return fetches, fmt.Errorf("read data record: %w", err)
175 | 			}
176 | 			fetches++
177 | 
178 | 			if string(ehdr.Key) != key {
179 | 				continue
180 | 			}
181 | 
182 | 			return fetches, nil
183 | 		}
184 | 
185 | 		spill := tmpb.Spill()
186 | 
187 | 		if spill == 0 {
188 | 			break
189 | 		}
190 | 
191 | 		blockBuf := make([]byte, kf.Header.BlockSize)
192 | 		tmpb = NewBucket(int(kf.Header.BlockSize), blockBuf)
193 | 		if err := tmpb.LoadFrom(int64(spill), df); err != nil {
194 | 			return fetches, fmt.Errorf("read spill: %w", err)
195 | 		}
196 | 		fetches++
197 | 
198 | 	}
199 | 
200 | 	// record not reachable from the key file so don't count it as a fetch
201 | 	return 0, nil
202 | }
203 | 
204 | type VerifyResult struct {
205 | 	DatPath    string  // The path to the data file
206 | 	KeyPath    string  // The path to the key file
207 | 	Version    uint16  // The API version used to create the database
208 | 	UID        uint64  // The unique identifier
209 | 	AppNum     uint64  // The application-defined constant
210 | 	Salt       uint64  // The salt used in the key file
211 | 	Pepper     uint64  // The salt fingerprint
212 | 	BlockSize  uint16  // The block size used in the key file
213 | 	LoadFactor float64 // The target load factor used in the key file
214 | 
215 | 	KeyFileSize int64 // The size of the key file in bytes
216 | 	DatFileSize int64 // The size of the data file in bytes
217 | 	Capacity    int   // The maximum number of keys each bucket can hold
218 | 	Buckets     int   // The number of buckets in the key file
219 | 	BucketSize  int64 // The size of a bucket in bytes
220 | 	Modulus     uint64
221 | 
222 | 	KeyCount         int64   // The number of keys found
223 | 	ValueCountInUse  int64   // The number of values found that are referenced by a key
224 | 	ValueCountTotal  int64   // The number of values found
225 | 	ValueBytesInUse  int64   // The total number of bytes occupied by values that are referenced by a key
226 | 	ValueBytesTotal  int64   // The total number of bytes occupied by values
227 | 	RecordBytesInUse int64   // The total number of bytes occupied by records (header + value) that are referenced by a key
228 | 	RecordBytesTotal int64   // The total number of bytes occupied by records (header + value)
229 | 	SpillCountInUse  int64   // The number of spill records in use
230 | 	SpillCountTotal  int64   // The total number of spill records
231 | 	SpillBytesInUse  int64   // The number of bytes occupied by spill records in use
232 | 	SpillBytesTotal  int64   // The number of bytes occupied by all spill records
233 | 	AverageFetch     float64 // Average number of key file reads per fetch
234 | 	Waste            float64 // The fraction of the data file that is wasted
235 | 	Overhead         float64 // The data amplification ratio (size of data files compared to the size of the underlying data and keys)
236 | 	ActualLoad       float64 // The measured bucket load fraction (number of keys as a fraction of the total capacity)
237 | }
238 | 


--------------------------------------------------------------------------------
/internal/version.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | import "regexp"
 4 | 
 5 | var GitVersion string = "unknown"
 6 | 
 7 | var reVersion = regexp.MustCompile(`^(v\d+\.\d+.\d+)(?:-)?(.+)?$`)
 8 | 
 9 | // String formats the version in semver format, see semver.org
10 | func Version() string {
11 | 	m := reVersion.FindStringSubmatch(GitVersion)
12 | 	if m == nil || len(m) < 3 {
13 | 		return "v0.0.0+" + GitVersion
14 | 	}
15 | 
16 | 	if m[2] == "" {
17 | 		return m[1]
18 | 	}
19 | 	return m[1] + "+" + m[2]
20 | }
21 | 


--------------------------------------------------------------------------------
/internal/version_test.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestString(t *testing.T) {
 8 | 	testCases := map[string]string{
 9 | 		"f176923-dirty":           "v0.0.0+f176923-dirty",
10 | 		"f176923":                 "v0.0.0+f176923",
11 | 		"v0.1.3-1-g518f694":       "v0.1.3+1-g518f694",
12 | 		"v0.1.3-1-g518f694-dirty": "v0.1.3+1-g518f694-dirty",
13 | 		"v0.1.3":                  "v0.1.3",
14 | 		"v10.31.93":               "v10.31.93",
15 | 	}
16 | 
17 | 	for v, want := range testCases {
18 | 		GitVersion = v
19 | 		if Version() != want {
20 | 			t.Errorf("got %q, want %q", Version(), want)
21 | 		}
22 | 
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------
/store.go:
--------------------------------------------------------------------------------
  1 | package gonudb
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"time"
  6 | 
  7 | 	"github.com/go-logr/logr"
  8 | 
  9 | 	"github.com/iand/gonudb/internal"
 10 | )
 11 | 
 12 | func CreateStore(datPath, keyPath, logPath string, appnum, salt uint64, blockSize int, loadFactor float64) error {
 13 | 	return internal.CreateStore(datPath, keyPath, logPath, appnum, internal.NewUID(), salt, blockSize, loadFactor)
 14 | }
 15 | 
 16 | func OpenStore(datPath, keyPath, logPath string, options *StoreOptions) (*Store, error) {
 17 | 	if options == nil {
 18 | 		options = &StoreOptions{}
 19 | 	}
 20 | 	options.Logger = logr.Discard()
 21 | 
 22 | 	if options.BackgroundSyncInterval < time.Second {
 23 | 		options.BackgroundSyncInterval = time.Second
 24 | 	}
 25 | 
 26 | 	store, err := internal.OpenStore(
 27 | 		datPath,
 28 | 		keyPath,
 29 | 		logPath,
 30 | 		options.BackgroundSyncInterval,
 31 | 		options.Logger,
 32 | 		options.Logger.V(LogLevelDiagnostics),
 33 | 		options.Logger.V(LogLevelTrace),
 34 | 	)
 35 | 	if err != nil {
 36 | 		return nil, err
 37 | 	}
 38 | 	return &Store{store: store}, nil
 39 | }
 40 | 
 41 | type StoreOptions struct {
 42 | 	Logger                 logr.Logger
 43 | 	BackgroundSyncInterval time.Duration
 44 | }
 45 | 
 46 | type Store struct {
 47 | 	store *internal.Store
 48 | }
 49 | 
 50 | func (s *Store) Close() error {
 51 | 	return s.store.Close()
 52 | }
 53 | 
 54 | // Insert adds a key/value pair to the store. Zero length values are not supported.
 55 | func (s *Store) Insert(key string, value []byte) error {
 56 | 	return s.store.Insert(key, value)
 57 | }
 58 | 
 59 | func (s *Store) Flush() error {
 60 | 	s.store.Flush()
 61 | 	return s.store.Err()
 62 | }
 63 | 
 64 | // Fetch fetches the value associated with key from the store.
 65 | func (s *Store) Fetch(key string) ([]byte, error) {
 66 | 	r, err := s.store.FetchReader(key)
 67 | 	if err != nil {
 68 | 		return nil, err
 69 | 	}
 70 | 
 71 | 	d, err := io.ReadAll(r)
 72 | 	if err != nil {
 73 | 		return nil, err
 74 | 	}
 75 | 	return d, nil
 76 | }
 77 | 
 78 | // Fetch fetches a reader that may be used to read the value associated with a key.
 79 | func (s *Store) FetchReader(key string) (io.Reader, error) {
 80 | 	return s.store.FetchReader(key)
 81 | }
 82 | 
 83 | // Exists reports whether a data record is associated with a key.
 84 | func (s *Store) Exists(key string) (bool, error) {
 85 | 	return s.store.Exists(key)
 86 | }
 87 | 
 88 | // DataSize returns the size of the data record associated with a key.
 89 | func (s *Store) DataSize(key string) (int64, error) {
 90 | 	return s.store.DataSize(key)
 91 | }
 92 | 
 93 | // Err returns an error if the store is in an error state, nil otherwise
 94 | func (s *Store) Err() error {
 95 | 	return s.store.Err()
 96 | }
 97 | 
 98 | // RecordScanner returns a scanner that may be used to iterate the datastore's values. The caller is responsible
 99 | // for calling Close on the scanner after use.
100 | func (s *Store) RecordScanner() *RecordScanner {
101 | 	return &RecordScanner{scanner: s.store.DataFile().RecordScanner()}
102 | }
103 | 
104 | // BucketScanner returns a scanner that may be used to iterate the datastore's index of keys. The caller is responsible
105 | // for calling Close on the scanner after use.
106 | func (s *Store) BucketScanner() *BucketScanner {
107 | 	return &BucketScanner{
108 | 		scanner: s.store.KeyFile().BucketScanner(s.store.DataFile()),
109 | 		bucket:  internal.NewBucket(int(s.store.KeyFile().BlockSize()), make([]byte, int(s.store.KeyFile().BlockSize()))),
110 | 	}
111 | }
112 | 
113 | // Version returns the version number of the store's data format.
114 | func (s *Store) Version() uint16 {
115 | 	return s.store.DataFile().Header.Version
116 | }
117 | 
118 | // AppNum returns the store's unique id that was generated on creation.
119 | func (s *Store) UID() uint64 {
120 | 	return s.store.DataFile().Header.UID
121 | }
122 | 
123 | // AppNum returns the store's application-defined integer constant.
124 | func (s *Store) AppNum() uint64 {
125 | 	return s.store.DataFile().Header.AppNum
126 | }
127 | 
128 | // BlockSize returns the physical size of a key file bucket.
129 | func (s *Store) BlockSize() uint16 {
130 | 	return s.store.KeyFile().Header.BlockSize
131 | }
132 | 
133 | // RecordCount returns the number of data records in the store.
134 | func (s *Store) RecordCount() int {
135 | 	return s.store.RecordCount()
136 | }
137 | 
138 | // Rate returns the data write rate in bytes per second.
139 | func (s *Store) Rate() float64 {
140 | 	return s.store.Rate()
141 | }
142 | 
143 | // RecordScanner implements a sequential scan through a store's data file. Successive calls to the Next method will step through
144 | // the records in the file. Note that the scanner does not include data buffered in memory. Call Flush to ensure all
145 | // written data is visible to the scanner.
146 | type RecordScanner struct {
147 | 	scanner *internal.RecordScanner
148 | }
149 | 
150 | // Next reads the next bucket in sequence, including spills to the data store. It returns false
151 | // if it encounters an error or there are no more buckets to read.
152 | func (s *RecordScanner) Next() bool {
153 | 	return s.scanner.Next()
154 | }
155 | 
156 | // Reader returns an io.Reader that may be used to read the data from the record. Should not be called until Next has been called.
157 | // The Reader is only valid for use until the next call to Next().
158 | func (s *RecordScanner) Reader() io.Reader {
159 | 	return s.scanner.Reader()
160 | }
161 | 
162 | // IsSpill reports whether the current record is a bucket spill
163 | func (s *RecordScanner) IsSpill() bool {
164 | 	return s.scanner.IsSpill()
165 | }
166 | 
167 | // IsData reports whether the current record is a data record
168 | func (s *RecordScanner) IsData() bool {
169 | 	return s.scanner.IsData()
170 | }
171 | 
172 | // Size returns the size of the current record's data in bytes
173 | func (s *RecordScanner) Size() int64 {
174 | 	return s.scanner.Size()
175 | }
176 | 
177 | // RecordSize returns the number of bytes occupied by the current record including its header
178 | func (s *RecordScanner) RecordSize() int64 {
179 | 	return s.scanner.RecordSize()
180 | }
181 | 
182 | // Size returns the key of the current record
183 | func (s *RecordScanner) Key() string {
184 | 	return s.scanner.Key()
185 | }
186 | 
187 | // Err returns the first non-EOF error that was encountered by the RecordScanner.
188 | func (s *RecordScanner) Err() error {
189 | 	return s.scanner.Err()
190 | }
191 | 
192 | func (s *RecordScanner) Close() error {
193 | 	return s.scanner.Close()
194 | }
195 | 
196 | // BucketScanner implements a sequential scan through a key file. Successive calls to the Next method will step through
197 | // the buckets in the file, including spilled buckets in the data file.
198 | type BucketScanner struct {
199 | 	scanner *internal.BucketScanner
200 | 	bucket  *internal.Bucket
201 | }
202 | 
203 | // Next reads the next bucket in sequence, including spills to the data store. It returns false
204 | // if it encounters an error or there are no more buckets to read.
205 | func (s *BucketScanner) Next() bool {
206 | 	return s.scanner.Next()
207 | }
208 | 
209 | // Index returns the index of the current bucket. Should not be called until Next has been called. Spill buckets
210 | // share an index with their parent.
211 | func (s *BucketScanner) Index() int {
212 | 	return s.scanner.Index()
213 | }
214 | 
215 | // IsSpill reports whether the current bucket was read from a data store spill.
216 | func (s *BucketScanner) IsSpill() bool {
217 | 	return s.scanner.IsSpill()
218 | }
219 | 
220 | // Bucket returns the current bucket. Should not be called until Next has been called. The bucket is backed by data
221 | // that may be overwritten with a call to Next so should not be retained.
222 | func (s *BucketScanner) Bucket() *Bucket {
223 | 	s.scanner.Bucket().CopyInto(s.bucket)
224 | 	return &Bucket{bucket: s.bucket}
225 | }
226 | 
227 | // Err returns the first non-EOF error that was encountered by the BucketScanner.
228 | func (s *BucketScanner) Err() error {
229 | 	return s.scanner.Err()
230 | }
231 | 
232 | // Close closes the underlying reader used by the scanner.
233 | func (s *BucketScanner) Close() error {
234 | 	return s.scanner.Close()
235 | }
236 | 
237 | // A Bucket contains a set of key entries that form part of the data store's index.
238 | type Bucket struct {
239 | 	bucket *internal.Bucket
240 | }
241 | 
242 | // Has reports whether the bucket contains an entry with the given hash.
243 | func (b *Bucket) Has(h uint64) bool {
244 | 	return b.bucket.Has(h)
245 | }
246 | 
247 | // Count returns the number of key entries in the bucket
248 | func (b *Bucket) Count() int {
249 | 	return b.bucket.Count()
250 | }
251 | 
252 | // ActualSize returns the serialized bucket size, excluding empty space
253 | func (b *Bucket) ActualSize() int {
254 | 	return b.bucket.ActualSize()
255 | }
256 | 
257 | // BlockSize returns the physical size of a key file bucket.
258 | func (b *Bucket) BlockSize() int {
259 | 	return b.bucket.BlockSize()
260 | }
261 | 
262 | // IsEmpty reports whether the bucket has any key entries.
263 | func (b *Bucket) IsEmpty() bool {
264 | 	return b.bucket.IsEmpty()
265 | }
266 | 
267 | // Capacity returns the maximum number of key entries that can be held in the bucket.
268 | func (b *Bucket) Capacity() int {
269 | 	return b.bucket.Capacity()
270 | }
271 | 
272 | // Spill returns offset in the store's data file of next spill record or 0 is there no spill.
273 | func (b *Bucket) Spill() int64 {
274 | 	return b.bucket.Spill()
275 | }
276 | 
277 | // HashRange returns the range of hashed keys that are contained in the bucket.
278 | func (b *Bucket) HashRange() (uint64, uint64) {
279 | 	return b.bucket.LowestHash(), b.bucket.HighestHash()
280 | }
281 | 
282 | // Entry returns the record for a key entry
283 | func (b *Bucket) Entry(idx int) BucketEntry {
284 | 	// TODO: bounds check
285 | 	e := b.bucket.Entry(idx)
286 | 	return BucketEntry{
287 | 		Offset: e.Offset,
288 | 		Size:   e.Size,
289 | 		Hash:   e.Hash,
290 | 	}
291 | }
292 | 
293 | type BucketEntry struct {
294 | 	// Offset is the position in the store's data file of the data record.
295 | 	Offset int64
296 | 
297 | 	// Size is the size of the data value within the data record.
298 | 	Size int64
299 | 
300 | 	// Hash is the hashed version of the key used to insert the data value.
301 | 	Hash uint64
302 | }
303 | 
304 | func NewSalt() uint64 {
305 | 	return internal.NewSalt()
306 | }
307 | 
308 | func Version() string {
309 | 	return internal.Version()
310 | }
311 | 
312 | var (
313 | 	ErrAppNumMismatch     = internal.ErrAppNumMismatch
314 | 	ErrDataMissing        = internal.ErrDataMissing
315 | 	ErrDataTooLarge       = internal.ErrDataTooLarge
316 | 	ErrDifferentVersion   = internal.ErrDifferentVersion
317 | 	ErrHashMismatch       = internal.ErrHashMismatch
318 | 	ErrInvalidBlockSize   = internal.ErrInvalidBlockSize
319 | 	ErrInvalidBucketCount = internal.ErrInvalidBucketCount
320 | 	ErrInvalidCapacity    = internal.ErrInvalidCapacity
321 | 	ErrInvalidDataRecord  = internal.ErrInvalidDataRecord
322 | 	ErrInvalidKeySize     = internal.ErrInvalidKeySize
323 | 	ErrInvalidLoadFactor  = internal.ErrInvalidLoadFactor
324 | 	ErrInvalidRecordSize  = internal.ErrInvalidRecordSize
325 | 	ErrInvalidSpill       = internal.ErrInvalidSpill
326 | 	ErrKeyExists          = internal.ErrKeyExists
327 | 	ErrKeyMismatch        = internal.ErrKeyMismatch
328 | 	ErrKeyMissing         = internal.ErrKeyMissing
329 | 	ErrKeyNotFound        = internal.ErrKeyNotFound
330 | 	ErrKeySizeMismatch    = internal.ErrKeySizeMismatch
331 | 	ErrKeyTooLarge        = internal.ErrKeyTooLarge
332 | 	ErrKeyWrongSize       = internal.ErrKeyWrongSize // deprecated: use ErrKeyMissing and ErrKeyTooLarge instead
333 | 	ErrNotDataFile        = internal.ErrNotDataFile
334 | 	ErrNotKeyFile         = internal.ErrNotKeyFile
335 | 	ErrNotLogFile         = internal.ErrNotLogFile
336 | 	ErrShortKeyFile       = internal.ErrShortKeyFile
337 | 	ErrUIDMismatch        = internal.ErrUIDMismatch
338 | )
339 | 
340 | const (
341 | 	LogLevelDiagnostics = 1 // log level increment for diagnostics logging
342 | 	LogLevelTrace       = 2 // log level increment for verbose tracing
343 | )
344 | 


--------------------------------------------------------------------------------