├── .github └── workflows │ ├── autodep.yml │ ├── check.yml │ ├── gotip.yml │ ├── test.yml │ └── version.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── cmd ├── gonudbadmin │ ├── info.go │ ├── main.go │ └── verify.go └── gonudbsample │ └── gonudbsample.go ├── go.mod ├── go.sum ├── internal ├── bucket.go ├── bucket_test.go ├── bucketcache.go ├── bucketcache_test.go ├── cache.go ├── const32.go ├── const64.go ├── context.go ├── error.go ├── field.go ├── file.go ├── file_test.go ├── format.go ├── hasher.go ├── pool.go ├── store.go ├── syscall.go ├── syscallunix.go ├── verify.go ├── version.go └── version_test.go └── store.go /.github/workflows/autodep.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | name: Update package dependencies 4 | jobs: 5 | dep_update: 6 | runs-on: ubuntu-latest 7 | timeout-minutes: 10 8 | 9 | steps: 10 | - name: Install Go 11 | uses: actions/setup-go@v3 12 | with: 13 | go-version: 1.20.x 14 | 15 | - name: Checkout 16 | uses: actions/checkout@v3 17 | 18 | - name: Update minor and patch-level dependencies 19 | run: go get -t -u ./... 20 | 21 | - name: Tidy 22 | run: go mod tidy 23 | 24 | - name: Create pull request 25 | uses: peter-evans/create-pull-request@v4 26 | with: 27 | title: "Update package dependencies + tidy" 28 | body: | 29 | This is a change initiated automatically on a weekly basis by a 30 | GitHub Action that updates the projects dependencies to their latest 31 | minor and patch-level versions. This lets us stay up to date 32 | incrementally so that updates are less effort to get merged compared 33 | to large monolithic updates, and gets us security updates more 34 | expediently. 35 | 36 | If the build passes, you are probably A-OK to merge and deploy this. 37 | If not, try to dig into what's not working and see if you can fix it 38 | so that the dep train stays on its rails. 39 | 40 | Note that although minor/patch level changes are handled 41 | automatically, notably major version changes like you'd find in 42 | stripe-go are not and those upgrades need to be performed manually. 43 | That should theoretically not be a problem if fixes are backported 44 | to all previous majors, but in practice they are often not, so it's 45 | worthwhile to occasionally look for new majors and integrate them. 46 | branch: "autodep" 47 | branch-suffix: "timestamp" 48 | commit-message: | 49 | Update package dependencies + tidy 50 | 51 | Weekly update to the project's package dependencies initiated by an 52 | automatic GitHub Action running on cron. Keeps upgrades less of a 53 | monolithic task and lets security-related patches trickle in more 54 | quickly. 55 | author: "Bot " 56 | committer: "Bot " 57 | delete-branch: true 58 | draft: true 59 | reviewers: | 60 | iand 61 | assignees: | 62 | iand 63 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | pull_request: 4 | types: [opened, reopened, ready_for_review, synchronize] 5 | push: 6 | branches: 7 | - main 8 | - master 9 | name: Run checks 10 | jobs: 11 | check: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Install Go 15 | uses: actions/setup-go@v3 16 | with: 17 | go-version: 1.20.x 18 | - name: Get StaticCheck 19 | run: go install honnef.co/go/tools/cmd/staticcheck@v0.4.2 # Version 2023.1.2 (v0.4.2) 20 | - name: Checkout 21 | uses: actions/checkout@v3 22 | with: 23 | submodules: recursive 24 | - name: Gomod 25 | run: | 26 | go mod tidy 27 | if [[ -n $(git ls-files --other --exclude-standard --directory -- go.sum) ]]; then 28 | echo "go.sum was added by go mod tidy" 29 | exit 1 30 | fi 31 | git diff --exit-code -- go.sum go.mod 32 | - name: Gofmt 33 | if: ${{ success() || failure() }} # run this step even if the previous one failed 34 | run: | 35 | out=$(gofmt -s -l .) 36 | if [[ -n "$out" ]]; then 37 | echo $out | awk '{print "::error file=" $0 ",line=0,col=0::File is not gofmt-ed."}' 38 | exit 1 39 | fi 40 | - name: Vet 41 | if: ${{ success() || failure() }} # run this step even if the previous one failed 42 | run: go vet ./... 43 | - name: StaticCheck 44 | if: ${{ success() || failure() }} # run this step even if the previous one failed 45 | run: staticcheck ./... 46 | -------------------------------------------------------------------------------- /.github/workflows/gotip.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | pull_request: 4 | types: [opened, reopened, ready_for_review, synchronize] 5 | push: 6 | branches: 7 | - main 8 | - master 9 | name: Test Go tip 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Install Go 15 | uses: actions/setup-go@v3 16 | with: 17 | go-version: 1.20.x 18 | - name: Install Go tip 19 | run: | 20 | go install golang.org/dl/gotip@latest 21 | gotip download 22 | gotip version 23 | - name: Checkout 24 | uses: actions/checkout@v3 25 | with: 26 | submodules: recursive 27 | - id: Cache 28 | uses: actions/cache@v3 29 | with: 30 | path: | 31 | ~/go/pkg/mod # Module download cache 32 | ~/.cache/go-build # Build cache (Linux) 33 | key: ubuntu-go-${{ hashFiles('**/go.sum') }} 34 | restore-keys: | 35 | ubuntu-go- 36 | - name: Dependencies 37 | run: gotip mod download 38 | if: steps.cache.outputs.cache-hit != 'true' 39 | - name: Test 40 | run: gotip test ./... 41 | - name: Test 32 bit 42 | env: 43 | GOARCH: 386 44 | run: gotip test ./... 45 | - name: Test with race detector 46 | run: gotip test -race ./... 47 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | pull_request: 4 | types: [opened, reopened, ready_for_review, synchronize] 5 | push: 6 | branches: 7 | - main 8 | - master 9 | name: Run tests 10 | jobs: 11 | test: 12 | strategy: 13 | matrix: 14 | go-version: [1.19.x, 1.20.x] 15 | os: ["ubuntu", "windows", "macos"] 16 | runs-on: ${{ matrix.os }}-latest 17 | steps: 18 | - name: Install Go 19 | uses: actions/setup-go@v3 20 | with: 21 | go-version: ${{ matrix.go-version }} 22 | - name: Checkout 23 | uses: actions/checkout@v3 24 | with: 25 | submodules: recursive 26 | - id: Cache 27 | uses: actions/cache@v3 28 | with: 29 | path: | 30 | ~/go/pkg/mod # Module download cache 31 | ~/.cache/go-build # Build cache (Linux) 32 | ~/Library/Caches/go-build # Build cache (Mac) 33 | '%LocalAppData%\go-build' # Build cache (Windows) 34 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 35 | restore-keys: | 36 | ${{ runner.os }}-go- 37 | - name: Dependencies 38 | run: go mod download 39 | if: steps.cache.outputs.cache-hit != 'true' 40 | - name: Test 41 | run: go test ./... 42 | - name: Test 32 bit 43 | if: ${{ matrix.os != 'macos' }} # can't run 32 bit tests on OSX. 44 | env: 45 | GOARCH: 386 46 | run: go test ./... 47 | - name: Test with race detector 48 | if: ${{ matrix.os == 'ubuntu' }} # speed things up. Windows and OSX VMs are slow 49 | run: go test -race ./... 50 | -------------------------------------------------------------------------------- /.github/workflows/version.yml: -------------------------------------------------------------------------------- 1 | name: Bump version 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Bump version and push tag 14 | id: tag_version 15 | uses: mathieudutour/github-tag-action@v6.1 16 | with: 17 | github_token: ${{ secrets.GITHUB_TOKEN }} 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | gonudbadmin 2 | !gonudbadmin/ 3 | gonudbsample 4 | !gonudbsample/ 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | These are changes that will probably be included in the next release. 10 | 11 | ### Added 12 | 13 | ### Fixed 14 | 15 | ### Changed 16 | 17 | ### Removed 18 | 19 | ## [v0.2.1] - 2020-11-23 20 | 21 | ### Added 22 | * Add Exists and DataSize methods to Store 23 | 24 | ## [v0.2.0] - 2020-11-23 25 | 26 | ### Added 27 | 28 | * Support variable length keys 29 | * Compute number of records when store is opened 30 | 31 | ### Changed 32 | 33 | * Simplify method signatures of Fetch and FetchReader (breaking change) 34 | 35 | ## [v0.1.1] - 2020-11-18 36 | 37 | ### Fixed 38 | 39 | * Reduce scope of insert locking to unblock reads when inserts are throttled 40 | * Fix bucket scanner not detecting EOF 41 | 42 | ## [v0.1.0] - 2020-11-18 43 | 44 | Initial release 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gonudb 2 | 3 | Gonudb is an append-only key/value datastore written in Go. 4 | 5 | [![Check Status](https://github.com/iand/gonudb/actions/workflows/check.yml/badge.svg)](https://github.com/iand/gonudb/actions/workflows/check.yml) 6 | [![Test Status](https://github.com/iand/gonudb/actions/workflows/test.yml/badge.svg)](https://github.com/iand/gonudb/actions/workflows/test.yml) 7 | [![Go Report Card](https://goreportcard.com/badge/github.com/iand/gonudb)](https://goreportcard.com/report/github.com/iand/gonudb) 8 | [![go.dev reference](https://img.shields.io/badge/go.dev-reference-007d9c?logo=go&logoColor=white)](https://pkg.go.dev/github.com/iand/gonudb) 9 | 10 | ## Overview 11 | 12 | Gonudb is a port of [NuDB](https://github.com/CPPAlliance/NuDB), a C++ key/value store. 13 | 14 | A Gonudb datastore comprises a data file holding keys and values stored sequentially and an 15 | accompanying key file which forms an on-disk hash table indexing the values stored in the data 16 | file. 17 | 18 | During commits a log file is created to store bookkeeping information that may be used to repair the 19 | datastore in the event of a failure. 20 | 21 | The data file and key file are independent and a new key file may be rebuilt from the data file if 22 | necessary, potentially with an alternate hashing scheme. 23 | 24 | 25 | ## Installation 26 | 27 | Execute `go get github.com/iand/gonudb` within a Go module directory to add it to your module. 28 | 29 | ## Usage 30 | 31 | Gonudb is primarily a library. Import package `github.com/iand/gonudb` to use. A sample application 32 | that demonstrates some simple inserts and fetches is provided in `cmd/gonudbsample`. 33 | 34 | An admin tool can be found in `cmd/gonudbadmin` which provides some commands for inspecting and 35 | validating the files that comprise a store. 36 | 37 | Install by executing `go install github.com/iand/gonudb/cmd/gonudbadmin` from the root of the 38 | repository. 39 | 40 | - `gonudbadmin info` can be used to view charactistic information about any of the three files used by gonudb (data, key and log files). 41 | - `gonudbadmin verify` verifies the consistency of data and key files and shows some statistics on the data they hold. 42 | 43 | 44 | ## Design 45 | 46 | Gonudb shares the design ideals that motivated NuDB (but see Status below): 47 | 48 | 1. Writes should not block reads. 49 | 2. Reads should be limited only by the SSD's IOPS limit. 50 | 3. A read for a non-present key should require one IOP. 51 | 4. A read for a present key whose data can be read in a single IOP should only require two IOPs, one to figure out where it is and one to read it in. 52 | 53 | Keys and values are stored sequentially in an append only data file. The data file begins with a 54 | header that contains characteristic information about the file such as the version of the encoding 55 | scheme, a datastore identifier and an application identifier. Data records follow immediately on 56 | from the header. Each record comprises the size of the value, followed by the size of the key, 57 | followed by the key, followed by the value data. The data file is considered to be immutable and 58 | there are no delete or mutate operations. 59 | 60 | Inserts are buffered in memory and periodically committed to disk. Clients are throttled based on 61 | the rate at which data is flushed to disk. Values are immediately discoverable via their key and 62 | may be read from memory or disk. 63 | 64 | Keys are hashed and written to buckets stored in the key file. As with the data file, the key file 65 | begins with a header containing characteristic information. The key file's version, datastore 66 | identifier and application identifier must match those in the data file header. Additionally the 67 | key file header contains the hash salt, the block size of each bucket and the target load factor 68 | which determines when a bucket should be split. Buckets are a fixed size and written sequentially 69 | after the header which enables them to the be easily located by index. 70 | 71 | Each bucket is assigned a range of hash values and entries within a bucket are ordered by hash. When 72 | the number of entries in a bucket exceeds the load factor it undergoes a split and its entries are 73 | rehashed across the pair of buckets using the linear hashing algorithm. When a bucket exceeds its 74 | capacity it is spilled to the data file and replaced with an empty bucket containing a pointer to 75 | the spill record. A spilled bucket may spill multiple times with the resulting spill records 76 | forming a linked list in the data file. 77 | 78 | In the best case reading a record from the datastore requires one read from the key file to load the 79 | relevant bucket and a read from the data file to access the value. Additional reads from the data 80 | file may be required to resolve hash collisions and to load spill records. Read performance is 81 | independent of the size of the datastore and the size of buckets in the key file may be tuned to 82 | the block size of the underlying physical media so loading a bucket may only take a single IOP. 83 | 84 | ## Status 85 | 86 | Version 0.1.0 is an alpha quality functional port of the original NuDB suitable for testing with 87 | expendable loads. Correctness and safety has been prioritised over performance. Locks are broad in scope 88 | and treat reads and writes with equal priority. Future work will tune the locking bahaviour to 89 | better meet the goal of writes not blocking reads. 90 | 91 | High priority tasks include: 92 | 93 | * Add recover from partial writes 94 | * Add rekey admin function. 95 | * Tune locking strategy 96 | 97 | Additional features under consideration: 98 | 99 | * Allow alternate hashing functions to be specified. 100 | 101 | ## Author 102 | 103 | Go port written by: 104 | 105 | * [Ian Davis](http://github.com/iand) - 106 | 107 | ## License 108 | 109 | Distributed under the Boost Software License, Version 1.0. (See accompanying file [LICENSE](LICENSE) 110 | or copy at http://www.boost.org/LICENSE_1_0.txt) 111 | -------------------------------------------------------------------------------- /cmd/gonudbadmin/info.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | 8 | "github.com/urfave/cli/v2" 9 | 10 | "github.com/iand/gonudb/internal" 11 | ) 12 | 13 | var infoCommand = &cli.Command{ 14 | Name: "info", 15 | Usage: "Report information about one or more gonudb files.", 16 | ArgsUsage: "...", 17 | Action: info, 18 | Flags: []cli.Flag{ 19 | logLevelFlag, 20 | }, 21 | } 22 | 23 | func info(cc *cli.Context) error { 24 | if err := initLogging(cc); err != nil { 25 | return cli.Exit(err.Error(), 1) 26 | } 27 | 28 | if cc.Args().Len() == 0 { 29 | cli.ShowAppHelpAndExit(cc, 1) 30 | } 31 | 32 | for i := 0; i < cc.Args().Len(); i++ { 33 | path := cc.Args().Get(i) 34 | Print(os.Stdout, §ion{ 35 | label: path, 36 | rows: infoFile(path), 37 | }) 38 | } 39 | 40 | return nil 41 | } 42 | 43 | func infoFile(path string) []kv { 44 | f, err := os.Open(path) 45 | if err != nil { 46 | return []kv{{Key: "Error", Value: fmt.Errorf("failed to open file: %w", err)}} 47 | } 48 | defer f.Close() 49 | 50 | fStat, err := f.Stat() 51 | if err != nil { 52 | return []kv{{Key: "Error", Value: fmt.Errorf("failed to stat file: %w", err)}} 53 | } 54 | 55 | var typeHeader [8]byte 56 | 57 | if _, err := f.ReadAt(typeHeader[:], 0); err != nil { 58 | return []kv{{Key: "Error", Value: fmt.Errorf("failed to read file type: %w", err)}} 59 | } 60 | 61 | switch string(typeHeader[:]) { 62 | case string(internal.DatFileHeaderType): 63 | var dh internal.DatFileHeader 64 | if err := dh.DecodeFrom(f); err != nil { 65 | return []kv{{Key: "Error", Value: fmt.Errorf("failed to read data file header: %w", err)}} 66 | } 67 | 68 | return []kv{ 69 | {Key: "Type", Value: string(dh.Type[:])}, 70 | {Key: "Version", Value: dh.Version}, 71 | {Key: "UID", Value: dh.UID}, 72 | {Key: "AppNum", Value: dh.AppNum}, 73 | {Key: "File size", Value: Bytes(fStat.Size())}, 74 | } 75 | case string(internal.KeyFileHeaderType): 76 | var kh internal.KeyFileHeader 77 | if err := kh.DecodeFrom(f, fStat.Size()); err != nil { 78 | return []kv{{Key: "Error", Value: fmt.Errorf("failed to read key file header: %w", err)}} 79 | } 80 | 81 | return []kv{ 82 | {Key: "Type", Value: string(kh.Type[:])}, 83 | {Key: "Version", Value: kh.Version}, 84 | {Key: "UID", Value: kh.UID}, 85 | {Key: "AppNum", Value: kh.AppNum}, 86 | {Key: "Salt", Value: kh.Salt}, 87 | {Key: "Pepper", Value: kh.Pepper}, 88 | {Key: "BlockSize", Value: Bytes(kh.BlockSize)}, 89 | {Key: "Capacity", Value: kh.Capacity}, 90 | {Key: "Buckets", Value: kh.Buckets}, 91 | {Key: "Modulus", Value: kh.Modulus}, 92 | {Key: "File size", Value: Bytes(fStat.Size())}, 93 | } 94 | case string(internal.LogFileHeaderType): 95 | var lh internal.LogFileHeader 96 | if err := lh.DecodeFrom(f); err != nil { 97 | return []kv{{Key: "Error", Value: fmt.Errorf("failed to read log file header: %w", err)}} 98 | } 99 | 100 | return []kv{ 101 | {Key: "Type", Value: string(lh.Type[:])}, 102 | {Key: "Version", Value: lh.Version}, 103 | {Key: "UID", Value: lh.UID}, 104 | {Key: "AppNum", Value: lh.AppNum}, 105 | {Key: "Salt", Value: lh.Salt}, 106 | {Key: "Pepper", Value: lh.Pepper}, 107 | {Key: "BlockSize", Value: Bytes(lh.BlockSize)}, 108 | {Key: "KeyFileSize", Value: Bytes(lh.KeyFileSize)}, 109 | {Key: "DatFileSize", Value: Bytes(lh.DatFileSize)}, 110 | {Key: "File size", Value: Bytes(fStat.Size())}, 111 | } 112 | default: 113 | return []kv{{Key: "Error", Value: fmt.Sprintf("unknown file type: %s", string(typeHeader[:]))}} 114 | } 115 | } 116 | 117 | type section struct { 118 | label string 119 | rows []kv 120 | } 121 | 122 | type kv struct { 123 | Key string 124 | Value interface{} 125 | } 126 | 127 | func Print(w io.Writer, s *section) { 128 | fmt.Fprintln(w, s.label) 129 | maxKeyLen := 0 130 | for _, r := range s.rows { 131 | if len(r.Key) > maxKeyLen { 132 | maxKeyLen = len(r.Key) 133 | } 134 | } 135 | 136 | fmtstr := fmt.Sprintf(" %%-%ds: %%v\n", maxKeyLen) 137 | for _, r := range s.rows { 138 | fmt.Fprintf(w, fmtstr, r.Key, r.Value) 139 | } 140 | fmt.Fprintln(w) 141 | } 142 | 143 | type Bytes int64 144 | 145 | func (b Bytes) String() string { 146 | return fmt.Sprintf("%d bytes", b) 147 | } 148 | -------------------------------------------------------------------------------- /cmd/gonudbadmin/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/go-logr/logr" 8 | "github.com/iand/logfmtr" 9 | "github.com/urfave/cli/v2" 10 | 11 | "github.com/iand/gonudb/internal" 12 | ) 13 | 14 | func main() { 15 | app := &cli.App{ 16 | Name: "gonudbadmin", 17 | HelpName: "gonudbadmin", 18 | Usage: "Administer a gonudb store", 19 | Flags: []cli.Flag{ 20 | logLevelFlag, 21 | }, 22 | Version: internal.Version(), 23 | Commands: []*cli.Command{ 24 | infoCommand, 25 | verifyCommand, 26 | }, 27 | } 28 | 29 | if err := app.Run(os.Args); err != nil { 30 | fmt.Fprintln(os.Stderr, err.Error()) 31 | os.Exit(1) 32 | } 33 | } 34 | 35 | var logLevelFlag = &cli.IntFlag{ 36 | Name: "log-level", 37 | Aliases: []string{"ll"}, 38 | Usage: "Set verbosity of logs to `LEVEL` (higher is more verbose)", 39 | Value: 0, 40 | } 41 | 42 | var logger = logr.Discard() 43 | 44 | func initLogging(cc *cli.Context) error { 45 | if cc.IsSet("log-level") { 46 | logfmtr.SetVerbosity(cc.Int("log-level")) 47 | loggerOpts := logfmtr.DefaultOptions() 48 | loggerOpts.Humanize = true 49 | loggerOpts.Colorize = true 50 | logfmtr.UseOptions(loggerOpts) 51 | logger = logfmtr.NewNamed("gonudb") 52 | } 53 | return nil 54 | } 55 | -------------------------------------------------------------------------------- /cmd/gonudbadmin/verify.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/urfave/cli/v2" 7 | 8 | "github.com/iand/gonudb/internal" 9 | ) 10 | 11 | var verifyCommand = &cli.Command{ 12 | Name: "verify", 13 | Usage: "Verify consistency of data and key files.", 14 | ArgsUsage: " ", 15 | Action: verify, 16 | Description: "" + 17 | "Verifies the consistency and integrity of a store by analysing its data and\n" + 18 | "key files. Reports statistical information about the data distribution and\n" + 19 | "efficiency of the store.", 20 | Flags: []cli.Flag{ 21 | logLevelFlag, 22 | }, 23 | } 24 | 25 | func verify(cc *cli.Context) error { 26 | if err := initLogging(cc); err != nil { 27 | return cli.Exit(err.Error(), 1) 28 | } 29 | 30 | if cc.Args().Len() == 0 { 31 | cli.ShowAppHelpAndExit(cc, 1) 32 | } 33 | 34 | if cc.Args().Len() != 2 { 35 | return cli.Exit("expecting paths to data and key files", 1) 36 | } 37 | datPath := cc.Args().Get(0) 38 | keyPath := cc.Args().Get(1) 39 | 40 | info, err := internal.VerifyStore(datPath, keyPath, logger) 41 | if err != nil { 42 | return cli.Exit(err.Error(), 1) 43 | } 44 | 45 | Print(os.Stdout, §ion{ 46 | label: "Store metadata", 47 | rows: []kv{ 48 | {Key: "Version", Value: info.Version}, 49 | {Key: "UID", Value: info.UID}, 50 | {Key: "AppNum", Value: info.AppNum}, 51 | }, 52 | }) 53 | 54 | Print(os.Stdout, §ion{ 55 | label: "Data file", 56 | rows: []kv{ 57 | {Key: "DatFileSize", Value: Bytes(info.DatFileSize)}, 58 | {Key: "ValueCountInUse", Value: info.ValueCountInUse}, 59 | {Key: "ValueCountTotal", Value: info.ValueCountTotal}, 60 | {Key: "ValueBytesInUse", Value: Bytes(info.ValueBytesInUse)}, 61 | {Key: "ValueBytesTotal", Value: Bytes(info.ValueBytesTotal)}, 62 | {Key: "RecordBytesInUse", Value: Bytes(info.RecordBytesInUse)}, 63 | {Key: "RecordBytesTotal", Value: Bytes(info.RecordBytesTotal)}, 64 | {Key: "SpillCountInUse", Value: info.SpillCountInUse}, 65 | {Key: "SpillCountTotal", Value: info.SpillCountTotal}, 66 | {Key: "SpillBytesInUse", Value: Bytes(info.SpillBytesInUse)}, 67 | {Key: "SpillBytesTotal", Value: Bytes(info.SpillBytesTotal)}, 68 | {Key: "AverageFetch", Value: info.AverageFetch}, 69 | {Key: "Waste", Value: info.Waste}, 70 | {Key: "Overhead", Value: info.Overhead}, 71 | {Key: "ActualLoad", Value: info.ActualLoad}, 72 | }, 73 | }) 74 | 75 | Print(os.Stdout, §ion{ 76 | label: "Key file", 77 | rows: []kv{ 78 | {Key: "KeyFileSize", Value: Bytes(info.KeyFileSize)}, 79 | {Key: "Salt", Value: info.Salt}, 80 | {Key: "Pepper", Value: info.Pepper}, 81 | {Key: "BlockSize", Value: Bytes(info.BlockSize)}, 82 | {Key: "LoadFactor", Value: info.LoadFactor}, 83 | {Key: "Capacity", Value: info.Capacity}, 84 | {Key: "Buckets", Value: info.Buckets}, 85 | {Key: "BucketSize", Value: info.BucketSize}, 86 | {Key: "Modulus", Value: info.Modulus}, 87 | {Key: "KeyCount", Value: info.KeyCount}, 88 | }, 89 | }) 90 | 91 | return nil 92 | } 93 | -------------------------------------------------------------------------------- /cmd/gonudbsample/gonudbsample.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "math/rand" 8 | "os" 9 | "path/filepath" 10 | "sync" 11 | "time" 12 | 13 | "github.com/iand/logfmtr" 14 | "github.com/urfave/cli/v2" 15 | 16 | "github.com/iand/gonudb" 17 | ) 18 | 19 | func main() { 20 | app := &cli.App{ 21 | Name: "gonudbsample", 22 | HelpName: "gonudbsample", 23 | Usage: "Sample application for Gonudb", 24 | UsageText: "gonudbsample [options] ", 25 | ArgsUsage: "", 26 | Flags: []cli.Flag{ 27 | logLevelFlag, 28 | concurrentFlag, 29 | }, 30 | Action: run, 31 | Version: gonudb.Version(), 32 | HideHelpCommand: true, 33 | } 34 | 35 | if err := app.Run(os.Args); err != nil { 36 | fmt.Fprintln(os.Stderr, err.Error()) 37 | os.Exit(1) 38 | } 39 | } 40 | 41 | var logLevelFlag = &cli.IntFlag{ 42 | Name: "log-level", 43 | Aliases: []string{"ll"}, 44 | Usage: "Set verbosity of logs to `LEVEL` (higher is more verbose)", 45 | Value: 0, 46 | } 47 | 48 | var concurrentFlag = &cli.IntFlag{ 49 | Name: "concurrent", 50 | Aliases: []string{"c"}, 51 | Usage: "Perform some concurrent inserts and fetches for `SECONDS`.", 52 | Value: 0, 53 | } 54 | 55 | func run(cc *cli.Context) error { 56 | if cc.Args().Len() != 1 { 57 | return cli.Exit("Missing directory for Gonudb store", 1) 58 | } 59 | 60 | rand.Seed(time.Now().Unix()) 61 | 62 | path := cc.Args().Get(0) 63 | 64 | logfmtr.SetVerbosity(cc.Int("log-level")) 65 | loggerOpts := logfmtr.DefaultOptions() 66 | loggerOpts.Humanize = true 67 | loggerOpts.Colorize = true 68 | loggerOpts.AddCaller = true 69 | logfmtr.UseOptions(loggerOpts) 70 | 71 | datPath := filepath.Join(path, "gonudb.dat") 72 | keyPath := filepath.Join(path, "gonudb.key") 73 | logPath := filepath.Join(path, "gonudb.log") 74 | 75 | fmt.Printf("Creating store in directory %s\n", path) 76 | err := gonudb.CreateStore( 77 | datPath, 78 | keyPath, 79 | logPath, 80 | 1, 81 | gonudb.NewSalt(), 82 | 4096, 83 | 0.5, 84 | ) 85 | if err != nil { 86 | var pathErr *os.PathError 87 | if errors.As(err, &pathErr) && os.IsExist(pathErr) { 88 | fmt.Println("Store already exists") 89 | } else { 90 | return cli.Exit("Failed to create store: "+err.Error(), 1) 91 | } 92 | } 93 | 94 | fmt.Println("Opening store") 95 | s, err := gonudb.OpenStore(datPath, keyPath, logPath, &gonudb.StoreOptions{Logger: logfmtr.NewNamed("gonudb")}) 96 | if err != nil { 97 | return cli.Exit("Failed to open store: "+err.Error(), 1) 98 | } 99 | 100 | defer s.Close() 101 | 102 | keys := make([]string, 500) 103 | for i := range keys { 104 | keys[i] = fmt.Sprintf("key%09d", i) 105 | } 106 | 107 | fmt.Printf("Inserting %d samples\n", len(keys)) 108 | duplicates := 0 109 | for i := range keys { 110 | if err := s.Insert(keys[i], []byte(fmt.Sprintf("this is data for %05d", i))); err != nil { 111 | if errors.Is(err, gonudb.ErrKeyExists) { 112 | duplicates++ 113 | continue 114 | } 115 | return cli.Exit("Failed to insert: "+err.Error(), 1) 116 | } 117 | } 118 | fmt.Printf("Skipped %d duplicates\n", duplicates) 119 | 120 | fmt.Println("Finding random keys") 121 | for i := 0; i < len(keys)/25; i++ { 122 | key := keys[rand.Intn(len(keys))] 123 | data, err := s.Fetch(key) 124 | if err != nil { 125 | return cli.Exit("Failed to fetch "+key+": "+err.Error(), 1) 126 | } 127 | fmt.Printf("Found %s => %s\n", key, string(data)) 128 | } 129 | 130 | if cc.Int("concurrent") == 0 { 131 | return nil 132 | } 133 | fmt.Println("Running some concurrent inserts and fetches") 134 | ctx, cancel := context.WithTimeout(cc.Context, time.Duration(cc.Int("concurrent"))*time.Second) 135 | defer cancel() 136 | 137 | var wg sync.WaitGroup 138 | wg.Add(7) 139 | 140 | for i := 0; i < 2; i++ { 141 | go func(ctx context.Context, wg *sync.WaitGroup) { 142 | defer wg.Done() 143 | for { 144 | select { 145 | case <-ctx.Done(): 146 | return 147 | default: 148 | } 149 | for i := 0; i < 500; i++ { 150 | key := fmt.Sprintf("%08d", rand.Intn(10000000)) 151 | data := fmt.Sprintf("this is data for %s", key) 152 | if err := s.Insert(key, []byte(data)); err != nil && !errors.Is(err, gonudb.ErrKeyExists) { 153 | fmt.Printf("Failed to insert: %v\n", err) 154 | return 155 | } 156 | } 157 | fmt.Println("Wrote 500 records") 158 | time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) 159 | } 160 | }(ctx, &wg) 161 | } 162 | 163 | for i := 0; i < 5; i++ { 164 | go func(tx context.Context, wg *sync.WaitGroup) { 165 | defer wg.Done() 166 | for { 167 | select { 168 | case <-ctx.Done(): 169 | return 170 | default: 171 | } 172 | for i := 0; i < 500; i++ { 173 | key := fmt.Sprintf("%08d", rand.Intn(10000000)) 174 | _, err := s.Fetch(key) 175 | if err != nil && !errors.Is(err, gonudb.ErrKeyNotFound) { 176 | fmt.Printf("Failed to fetch: %v\n", err) 177 | return 178 | } 179 | } 180 | fmt.Println("Read 500 records") 181 | time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) 182 | } 183 | }(ctx, &wg) 184 | } 185 | 186 | wg.Wait() 187 | 188 | return nil 189 | } 190 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/iand/gonudb 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/OneOfOne/xxhash v1.2.8 7 | github.com/go-logr/logr v1.3.0 8 | github.com/iand/logfmtr v0.2.2 9 | github.com/urfave/cli/v2 v2.25.7 10 | golang.org/x/sys v0.14.0 11 | ) 12 | 13 | require ( 14 | github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect 15 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 16 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8= 2 | github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= 3 | github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= 4 | github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 5 | github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= 6 | github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= 7 | github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 8 | github.com/iand/logfmtr v0.2.2 h1:Z9ZFUiDLAw2jv+fGocKJ1d7T14imolI/7RvsUSTcfJ0= 9 | github.com/iand/logfmtr v0.2.2/go.mod h1:SByvTWmBzMDZ/7pDjWTMhH/rbgu+UKFl76l0aTxuL3I= 10 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 11 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 12 | github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs= 13 | github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= 14 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= 15 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= 16 | golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= 17 | golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 18 | -------------------------------------------------------------------------------- /internal/bucket.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "io" 7 | "sync" 8 | ) 9 | 10 | const ( 11 | // Bucket header 12 | BucketHeaderSize = SizeUint16 + // Count 13 | SizeUint48 // Spill 14 | 15 | // Bucket item 16 | BucketEntrySize = SizeUint48 + // Offset 17 | SizeUint48 + // Size 18 | SizeUint64 // Hash 19 | ) 20 | 21 | func BucketIndex(h uint64, buckets int, modulus uint64) int { 22 | n := h % modulus 23 | if n >= uint64(buckets) { 24 | n -= modulus / 2 25 | } 26 | return int(n) 27 | } 28 | 29 | // BucketSize returns the actual size of a bucket. 30 | // This can be smaller than the block size. 31 | func BucketSize(capacity int) int { 32 | // Bucket Record 33 | return SizeUint16 + // Count 34 | SizeUint48 + // Spill 35 | capacity*(SizeUint48+ // Offset 36 | SizeUint48+ // Size 37 | SizeUint64) // Hash 38 | } 39 | 40 | // BucketCapacity returns the number of entries that fit in a bucket 41 | func BucketCapacity(blockSize int) int { 42 | if blockSize < BucketEntrySize || blockSize < BucketHeaderSize { 43 | return 0 44 | } 45 | return (blockSize - BucketHeaderSize) / BucketEntrySize 46 | } 47 | 48 | type Entry struct { 49 | Offset int64 // 48 bits 50 | Size int64 // 48 bits 51 | Hash uint64 52 | } 53 | 54 | // TODO: evaluate tradeoffs of using a slice of Entry instead of blob 55 | type Bucket struct { 56 | // Read only 57 | blockSize int // Size of a key file block 58 | 59 | mu sync.Mutex // protects following including writes into blob slice 60 | count int // Current key count 61 | spill int64 // Offset of next spill record or 0 62 | blob []byte 63 | } 64 | 65 | // bucket takes ownership of blob 66 | func NewBucket(blockSize int, blob []byte) *Bucket { 67 | if len(blob) != blockSize { 68 | panic("bucket blob size must equal block size") 69 | } 70 | 71 | b := &Bucket{ 72 | blockSize: blockSize, 73 | blob: blob, 74 | } 75 | 76 | b.initFromHeader() 77 | 78 | return b 79 | } 80 | 81 | func (b *Bucket) Lock() { 82 | b.mu.Lock() 83 | } 84 | 85 | func (b *Bucket) Unlock() { 86 | b.mu.Unlock() 87 | } 88 | 89 | // LowerBound returns index of entry with hash 90 | // equal to or greater than the given hash. 91 | func (b *Bucket) lowerBound(h uint64) int { 92 | // expects caller to hold lock 93 | const w = BucketEntrySize 94 | 95 | // offset to first hash 96 | const offset = BucketHeaderSize + 97 | // first bucket Entry 98 | SizeUint48 + // Offset 99 | SizeUint48 // Size 100 | 101 | first := 0 102 | count := b.count 103 | for count > 0 { 104 | step := count / 2 105 | i := first + step 106 | h1 := binary.BigEndian.Uint64(b.blob[offset+i*w : offset+i*w+SizeUint64]) 107 | if h1 < h { 108 | first = i + 1 109 | count -= step + 1 110 | } else { 111 | count = step 112 | } 113 | } 114 | 115 | return first 116 | } 117 | 118 | func (b *Bucket) Has(h uint64) bool { 119 | const w = BucketEntrySize 120 | 121 | // offset to first hash 122 | const offset = BucketHeaderSize + 123 | // first bucket Entry 124 | SizeUint48 + // Offset 125 | SizeUint48 // Size 126 | 127 | b.mu.Lock() 128 | defer b.mu.Unlock() 129 | 130 | first := 0 131 | count := b.count 132 | for count > 0 { 133 | step := count / 2 134 | i := first + step 135 | h1 := binary.BigEndian.Uint64(b.blob[offset+i*w : offset+i*w+SizeUint64]) 136 | if h1 == h { 137 | return true 138 | } else if h1 < h { 139 | first = i + 1 140 | count -= step + 1 141 | } else { 142 | count = step 143 | } 144 | } 145 | 146 | return false 147 | } 148 | 149 | // Count returns the number of entries in the bucket 150 | func (b *Bucket) Count() int { 151 | b.mu.Lock() 152 | defer b.mu.Unlock() 153 | return b.count 154 | } 155 | 156 | // ActualSize returns the serialized bucket size, excluding empty space 157 | func (b *Bucket) ActualSize() int { 158 | b.mu.Lock() 159 | defer b.mu.Unlock() 160 | return BucketSize(b.count) 161 | } 162 | 163 | func (b *Bucket) BlockSize() int { 164 | return b.blockSize 165 | } 166 | 167 | func (b *Bucket) IsEmpty() bool { 168 | b.mu.Lock() 169 | defer b.mu.Unlock() 170 | return b.count == 0 171 | } 172 | 173 | func (b *Bucket) IsFull() bool { 174 | b.mu.Lock() 175 | defer b.mu.Unlock() 176 | return b.count >= BucketCapacity(b.blockSize) 177 | } 178 | 179 | func (b *Bucket) Capacity() int { 180 | return BucketCapacity(b.blockSize) 181 | } 182 | 183 | // Spill returns offset of next spill record or 0 184 | func (b *Bucket) Spill() int64 { 185 | b.mu.Lock() 186 | defer b.mu.Unlock() 187 | return b.spill 188 | } 189 | 190 | // SetSpill sets the offset of next spill record 191 | func (b *Bucket) SetSpill(v int64) { 192 | b.mu.Lock() 193 | defer b.mu.Unlock() 194 | b.setSpill(v) 195 | } 196 | 197 | func (b *Bucket) LowestHash() uint64 { 198 | b.mu.Lock() 199 | defer b.mu.Unlock() 200 | if b.count == 0 { 201 | return 0 202 | } 203 | pos := BucketHeaderSize 204 | return DecodeUint64(b.blob[pos+SizeUint48*2 : pos+SizeUint48*2+SizeUint64]) 205 | } 206 | 207 | func (b *Bucket) HighestHash() uint64 { 208 | b.mu.Lock() 209 | defer b.mu.Unlock() 210 | if b.count == 0 { 211 | return 0 212 | } 213 | pos := BucketHeaderSize + (b.count-1)*BucketEntrySize 214 | return DecodeUint64(b.blob[pos+SizeUint48*2 : pos+SizeUint48*2+SizeUint64]) 215 | } 216 | 217 | func (b *Bucket) clear() { 218 | // expects caller to hold lock 219 | b.count = 0 220 | b.spill = 0 221 | for i := range b.blob { 222 | b.blob[i] = 0 223 | } 224 | } 225 | 226 | // Returns the record for a key entry without bounds checking. 227 | func (b *Bucket) Entry(idx int) Entry { 228 | return b.entry(idx) 229 | } 230 | 231 | func (b *Bucket) entry(idx int) Entry { 232 | // expects caller to hold lock 233 | 234 | // Start position of item in blob 235 | pos := BucketHeaderSize + idx*BucketEntrySize 236 | 237 | return Entry{ 238 | Offset: int64(DecodeUint48(b.blob[pos : pos+SizeUint48])), 239 | Size: int64(DecodeUint48(b.blob[pos+SizeUint48 : pos+SizeUint48*2])), 240 | Hash: DecodeUint64(b.blob[pos+SizeUint48*2 : pos+SizeUint48*2+SizeUint64]), 241 | } 242 | } 243 | 244 | // Erase an entry by index 245 | func (b *Bucket) erase(idx int) { 246 | // expects caller to hold lock 247 | 248 | // Start position of item in blob 249 | pos := BucketHeaderSize + idx*BucketEntrySize 250 | // Start position of next item in blob 251 | next := BucketHeaderSize + (idx+1)*BucketEntrySize 252 | // Position immediately after last entry 253 | end := next + (b.count-idx-1)*BucketEntrySize 254 | 255 | b.count-- 256 | if b.count < 0 { 257 | panic("logic error: erase resulted in negative bucket count") 258 | } 259 | 260 | if idx < b.count { 261 | // Shift remainder down 262 | copy(b.blob[pos:], b.blob[next:end]) 263 | } 264 | 265 | // TODO: bounds checks 266 | zeroLower := BucketHeaderSize + b.count*BucketEntrySize 267 | zeroUpper := BucketHeaderSize + (b.count+1)*(BucketEntrySize) - 1 268 | 269 | if zeroLower < 0 || zeroLower > len(b.blob)-1 || zeroUpper < 0 || zeroUpper > len(b.blob)-1 { 270 | panic(fmt.Sprintf("logic error: zeroing [%d:%d] out of bounds of blob length %d", zeroLower, zeroUpper, len(b.blob))) 271 | } 272 | 273 | for i := zeroLower; i < zeroUpper; i++ { 274 | b.blob[i] = 0 275 | } 276 | 277 | b.update() 278 | } 279 | 280 | // Insert an entry 281 | func (b *Bucket) insert(offset int64, size int64, hash uint64) { 282 | // expects caller to hold lock 283 | 284 | idx := b.lowerBound(hash) 285 | 286 | // Position we want to insert the item in blob 287 | pos := BucketHeaderSize + idx*BucketEntrySize 288 | // Start position of next item in blob 289 | next := BucketHeaderSize + (idx+1)*BucketEntrySize 290 | // Position immediately after last entry 291 | end := next + (b.count-idx)*BucketEntrySize 292 | 293 | // Make room for the item 294 | copy(b.blob[next:], b.blob[pos:end]) 295 | b.count++ 296 | b.update() 297 | 298 | EncodeUint48(b.blob[pos:pos+SizeUint48], uint64(offset)) 299 | EncodeUint48(b.blob[pos+SizeUint48:pos+SizeUint48*2], uint64(size)) 300 | EncodeUint64(b.blob[pos+SizeUint48*2:pos+SizeUint48*2+SizeUint64], hash) 301 | } 302 | 303 | // update updates the bucket header 304 | func (b *Bucket) update() { 305 | // expects caller to hold lock 306 | EncodeUint16(b.blob[0:SizeUint16], uint16(b.count)) 307 | EncodeUint48(b.blob[SizeUint16:SizeUint16+SizeUint48], uint64(b.spill)) 308 | } 309 | 310 | func (b *Bucket) initFromHeader() { 311 | // expects caller to hold lock 312 | b.count = int(DecodeUint16(b.blob[0:SizeUint16])) 313 | b.spill = int64(DecodeUint48(b.blob[SizeUint16 : SizeUint16+SizeUint48])) 314 | } 315 | 316 | func (b *Bucket) CopyInto(b2 *Bucket) { 317 | copy(b2.blob, b.blob) 318 | b.initFromHeader() 319 | } 320 | 321 | // WriteTo writes data to w until all entries in the bucket are written or an error occurs. 322 | func (b *Bucket) WriteTo(w io.Writer) (int64, error) { 323 | b.mu.Lock() 324 | defer b.mu.Unlock() 325 | actualSize := BucketSize(b.count) 326 | n, err := w.Write(b.blob[:actualSize]) 327 | if err == nil && n != actualSize { 328 | err = io.ErrShortWrite 329 | } 330 | 331 | return int64(n), err 332 | } 333 | 334 | // LoadFrom reads data containing entries from r, padding the rest of the bucket with zero bytes. 335 | func (b *Bucket) LoadFrom(spill int64, s Spiller) error { 336 | b.mu.Lock() 337 | defer b.mu.Unlock() 338 | 339 | if spill == 0 { 340 | panic("attempt to load from zero spill") 341 | } 342 | 343 | if err := s.LoadBucketSpill(spill, b.blob); err != nil { 344 | return fmt.Errorf("load bucket spill (at %d): %w", spill, err) 345 | } 346 | b.initFromHeader() 347 | 348 | return nil 349 | } 350 | 351 | // StoreFullTo writes until the entire blob is written (including zero padding) or an error occurs. 352 | func (b *Bucket) storeFullTo(w io.Writer) (int64, error) { 353 | n, err := w.Write(b.blob) 354 | if err == nil && n != len(b.blob) { 355 | err = io.ErrShortWrite 356 | } 357 | return int64(n), err 358 | } 359 | 360 | // LoadFullFrom reads the entire blob from r 361 | func (b *Bucket) loadFullFrom(r io.Reader) error { 362 | _, err := io.ReadFull(r, b.blob) 363 | if err != nil { 364 | return err 365 | } 366 | b.initFromHeader() 367 | 368 | return nil 369 | } 370 | 371 | // MaybeSpill spills the bucket if full. Bucket is cleared after it spills. 372 | func (b *Bucket) maybeSpill(sp Spiller) (bool, error) { 373 | // expects caller to hold lock 374 | 375 | if b.count < BucketCapacity(b.blockSize) { 376 | return false, nil 377 | } 378 | 379 | actualSize := BucketSize(b.count) 380 | offset, err := sp.AppendBucketSpill(b.blob[:actualSize]) 381 | if err != nil { 382 | return true, fmt.Errorf("write bucket spill: %w", err) 383 | } 384 | 385 | b.clear() 386 | 387 | // Set the spill location to be the start of the blob so a bucket can simply be read in from that spot 388 | b.setSpill(offset) 389 | 390 | return true, nil 391 | } 392 | 393 | func (b *Bucket) setSpill(spill int64) { 394 | b.spill = spill 395 | EncodeUint48(b.blob[SizeUint16:SizeUint16+SizeUint48], uint64(b.spill)) 396 | } 397 | -------------------------------------------------------------------------------- /internal/bucket_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | "path/filepath" 8 | "testing" 9 | ) 10 | 11 | func TestEntry(t *testing.T) { 12 | blob := make([]byte, BucketHeaderSize+BucketEntrySize*2) 13 | 14 | entries := []Entry{ 15 | { 16 | Offset: 15555, 17 | Size: 14444, 18 | Hash: 19999, 19 | }, 20 | { 21 | Offset: 25555, 22 | Size: 24444, 23 | Hash: 29999, 24 | }, 25 | } 26 | 27 | pos := BucketHeaderSize 28 | for i := range entries { 29 | EncodeUint48(blob[pos:pos+SizeUint48], uint64(entries[i].Offset)) 30 | EncodeUint48(blob[pos+SizeUint48:pos+SizeUint48*2], uint64(entries[i].Size)) 31 | EncodeUint64(blob[pos+SizeUint48*2:pos+SizeUint48*2+SizeUint64], entries[i].Hash) 32 | pos += BucketEntrySize 33 | } 34 | 35 | b := Bucket{ 36 | blockSize: len(blob), 37 | blob: blob, 38 | } 39 | 40 | for i := range entries { 41 | got := b.entry(i) 42 | if got != entries[i] { 43 | t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i]) 44 | } 45 | } 46 | } 47 | 48 | func TestInsert(t *testing.T) { 49 | blob := make([]byte, BucketHeaderSize+BucketEntrySize*2) 50 | 51 | entries := []Entry{ 52 | { 53 | Offset: 15555, 54 | Size: 14444, 55 | Hash: 19999, 56 | }, 57 | { 58 | Offset: 25555, 59 | Size: 24444, 60 | Hash: 29999, 61 | }, 62 | } 63 | 64 | b := Bucket{ 65 | blockSize: len(blob), 66 | blob: blob, 67 | } 68 | 69 | for i := range entries { 70 | b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash) 71 | } 72 | 73 | for i := range entries { 74 | got := b.entry(i) 75 | if got != entries[i] { 76 | t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i]) 77 | } 78 | } 79 | } 80 | 81 | func TestErase(t *testing.T) { 82 | blob := make([]byte, BucketHeaderSize+BucketEntrySize*2) 83 | 84 | entries := []Entry{ 85 | { 86 | Offset: 15555, 87 | Size: 14444, 88 | Hash: 19999, 89 | }, 90 | { 91 | Offset: 25555, 92 | Size: 24444, 93 | Hash: 29999, 94 | }, 95 | } 96 | 97 | b := Bucket{ 98 | blockSize: len(blob), 99 | blob: blob, 100 | } 101 | 102 | for i := range entries { 103 | b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash) 104 | } 105 | 106 | b.erase(0) 107 | 108 | for i := 1; i < len(entries); i++ { 109 | got := b.entry(i - 1) 110 | if got != entries[i] { 111 | t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i]) 112 | } 113 | } 114 | } 115 | 116 | func TestWriteReadFull(t *testing.T) { 117 | entries := []Entry{ 118 | { 119 | Offset: 15555, 120 | Size: 14444, 121 | Hash: 19999, 122 | }, 123 | { 124 | Offset: 25555, 125 | Size: 24444, 126 | Hash: 29999, 127 | }, 128 | } 129 | size := BucketHeaderSize + BucketEntrySize*2 130 | 131 | b := Bucket{ 132 | blockSize: size, 133 | blob: make([]byte, size), 134 | } 135 | 136 | for i := range entries { 137 | b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash) 138 | } 139 | 140 | buf := &bytes.Buffer{} 141 | _, err := b.storeFullTo(buf) 142 | if err != nil { 143 | t.Fatalf("unexpected error during write: %v", err) 144 | } 145 | 146 | b2 := Bucket{ 147 | blockSize: size, 148 | blob: make([]byte, size), 149 | } 150 | 151 | err = b2.loadFullFrom(buf) 152 | if err != nil { 153 | t.Fatalf("unexpected error during read: %v", err) 154 | } 155 | 156 | for i := 0; i < len(entries); i++ { 157 | got := b2.entry(i) 158 | if got != entries[i] { 159 | t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i]) 160 | } 161 | } 162 | } 163 | 164 | func TestBucketHas(t *testing.T) { 165 | entries := []Entry{ 166 | { 167 | Offset: 15555, 168 | Size: 14444, 169 | Hash: 19999, 170 | }, 171 | { 172 | Offset: 25555, 173 | Size: 24444, 174 | Hash: 29999, 175 | }, 176 | } 177 | size := BucketHeaderSize + BucketEntrySize*2 178 | 179 | b := Bucket{ 180 | blockSize: size, 181 | blob: make([]byte, size), 182 | } 183 | 184 | for i := range entries { 185 | b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash) 186 | } 187 | 188 | for i := range entries { 189 | if !b.Has(entries[i].Hash) { 190 | t.Errorf("did not find hash %d", entries[i].Hash) 191 | } 192 | } 193 | 194 | if b.Has(112233) { 195 | t.Errorf("unexpectedly found hash %d", 112233) 196 | } 197 | } 198 | 199 | func TestEntryDuplicateHashes(t *testing.T) { 200 | entries := []Entry{ 201 | { 202 | Offset: 15555, 203 | Size: 14444, 204 | Hash: 19999, 205 | }, 206 | { 207 | Offset: 25555, 208 | Size: 24444, 209 | Hash: 29999, 210 | }, 211 | { 212 | Offset: 35555, 213 | Size: 34444, 214 | Hash: 19999, 215 | }, 216 | { 217 | Offset: 45555, 218 | Size: 44444, 219 | Hash: 19999, 220 | }, 221 | } 222 | 223 | blob := make([]byte, BucketHeaderSize+BucketEntrySize*len(entries)) 224 | b := Bucket{ 225 | blockSize: len(blob), 226 | blob: blob, 227 | } 228 | 229 | for i := range entries { 230 | b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash) 231 | } 232 | 233 | testCases := []struct { 234 | hash uint64 235 | count int 236 | }{ 237 | { 238 | hash: 19999, 239 | count: 3, 240 | }, 241 | { 242 | hash: 29999, 243 | count: 1, 244 | }, 245 | { 246 | hash: 39999, 247 | count: 0, 248 | }, 249 | } 250 | 251 | for _, tc := range testCases { 252 | var got []Entry 253 | for i := b.lowerBound(tc.hash); i < b.count; i++ { 254 | entry := b.entry(i) 255 | if entry.Hash != tc.hash { 256 | break 257 | } 258 | got = append(got, entry) 259 | } 260 | 261 | if len(got) != tc.count { 262 | t.Errorf("%d: got %d, wanted %d", tc.hash, len(got), tc.count) 263 | } 264 | 265 | } 266 | } 267 | 268 | func TestBucketMaybeSpill(t *testing.T) { 269 | entries := []Entry{ 270 | { 271 | Offset: 15555, 272 | Size: 14444, 273 | Hash: 19999, 274 | }, 275 | { 276 | Offset: 25555, 277 | Size: 24444, 278 | Hash: 29999, 279 | }, 280 | } 281 | blockSize := BucketHeaderSize + BucketEntrySize*len(entries) 282 | 283 | tmpdir, err := os.MkdirTemp("", "gonudb.*") 284 | if err != nil { 285 | t.Fatalf("unexpected error creating temp directory: %v", err) 286 | } 287 | defer os.RemoveAll(tmpdir) 288 | 289 | t.Run("empty bucket does not spill", func(t *testing.T) { 290 | b := Bucket{ 291 | blockSize: blockSize, 292 | blob: make([]byte, blockSize), 293 | } 294 | tmpfile := filepath.Join(tmpdir, "empty") 295 | 296 | if err := CreateDataFile(tmpfile, 5, 6); err != nil { 297 | t.Fatalf("unexpected error creating data file: %v", err) 298 | } 299 | 300 | df, err := OpenDataFile(tmpfile) 301 | if err != nil { 302 | t.Fatalf("unexpected error opening data file: %v", err) 303 | } 304 | 305 | _, err = b.maybeSpill(df) 306 | if err != nil { 307 | t.Fatalf("unexpected error during write: %v", err) 308 | } 309 | df.Close() 310 | written, err := os.ReadFile(tmpfile) 311 | if err != nil { 312 | t.Fatalf("unexpected error reading data file: %v", err) 313 | } 314 | 315 | if len(written) != DatFileHeaderSize { 316 | t.Errorf("got %d bytes written, wanted %d", len(written), DatFileHeaderSize) 317 | } 318 | }) 319 | 320 | t.Run("half full bucket does not spill", func(t *testing.T) { 321 | b := Bucket{ 322 | blockSize: blockSize, 323 | blob: make([]byte, blockSize), 324 | } 325 | b.insert(entries[0].Offset, entries[0].Size, entries[0].Hash) 326 | 327 | tmpfile := filepath.Join(tmpdir, "half") 328 | 329 | if err := CreateDataFile(tmpfile, 5, 6); err != nil { 330 | t.Fatalf("unexpected error creating data file: %v", err) 331 | } 332 | 333 | df, err := OpenDataFile(tmpfile) 334 | if err != nil { 335 | t.Fatalf("unexpected error opening data file: %v", err) 336 | } 337 | 338 | _, err = b.maybeSpill(df) 339 | if err != nil { 340 | t.Fatalf("unexpected error during write: %v", err) 341 | } 342 | df.Close() 343 | written, err := os.ReadFile(tmpfile) 344 | if err != nil { 345 | t.Fatalf("unexpected error reading data file: %v", err) 346 | } 347 | 348 | if len(written) != DatFileHeaderSize { 349 | t.Errorf("got %d bytes written, wanted %d", len(written), DatFileHeaderSize) 350 | } 351 | }) 352 | 353 | t.Run("full bucket does spill", func(t *testing.T) { 354 | b := Bucket{ 355 | blockSize: blockSize, 356 | blob: make([]byte, blockSize), 357 | } 358 | b.insert(entries[0].Offset, entries[0].Size, entries[0].Hash) 359 | b.insert(entries[1].Offset, entries[1].Size, entries[1].Hash) 360 | 361 | tmpfile := filepath.Join(tmpdir, "full") 362 | 363 | if err := CreateDataFile(tmpfile, 5, 6); err != nil { 364 | t.Fatalf("unexpected error creating data file: %v", err) 365 | } 366 | 367 | df, err := OpenDataFile(tmpfile) 368 | if err != nil { 369 | t.Fatalf("unexpected error opening data file: %v", err) 370 | } 371 | 372 | _, err = b.maybeSpill(df) 373 | if err != nil { 374 | t.Fatalf("unexpected error during write: %v", err) 375 | } 376 | df.Close() 377 | 378 | written, err := os.ReadFile(tmpfile) 379 | if err != nil { 380 | t.Fatalf("unexpected error reading data file: %v", err) 381 | } 382 | 383 | if len(written) != DatFileHeaderSize+BucketHeaderSize+blockSize { 384 | t.Errorf("got %d bytes written, wanted %d", len(written), BucketHeaderSize+blockSize) 385 | } 386 | if b.Spill() != DatFileHeaderSize { 387 | t.Errorf("got spill %d, wanted %d", b.Spill(), DatFileHeaderSize) 388 | } 389 | if b.Count() != 0 { 390 | t.Errorf("got %d entries in bucket, wanted %d", b.Count(), 0) 391 | } 392 | 393 | marker := DecodeUint48(written[DatFileHeaderSize : DatFileHeaderSize+SizeUint48]) 394 | if marker != 0 { 395 | t.Errorf("got marker %x, wanted %x", marker, 0) 396 | } 397 | 398 | size := int64(DecodeUint16(written[DatFileHeaderSize+SizeUint48 : DatFileHeaderSize+SizeUint48+SizeUint16])) 399 | if size != int64(blockSize) { 400 | t.Errorf("got size %d, wanted %d", size, blockSize) 401 | } 402 | }) 403 | 404 | t.Run("read from spill", func(t *testing.T) { 405 | b := Bucket{ 406 | blockSize: blockSize, 407 | blob: make([]byte, blockSize), 408 | } 409 | b.insert(entries[0].Offset, entries[0].Size, entries[0].Hash) 410 | b.insert(entries[1].Offset, entries[1].Size, entries[1].Hash) 411 | 412 | tmpfile := filepath.Join(tmpdir, "read") 413 | 414 | if err := CreateDataFile(tmpfile, 5, 6); err != nil { 415 | t.Fatalf("unexpected error creating data file: %v", err) 416 | } 417 | 418 | df, err := OpenDataFile(tmpfile) 419 | if err != nil { 420 | t.Fatalf("unexpected error opening data file: %v", err) 421 | } 422 | 423 | _, err = b.maybeSpill(df) 424 | if err != nil { 425 | t.Fatalf("unexpected error during write: %v", err) 426 | } 427 | df.Close() 428 | written, err := os.ReadFile(tmpfile) 429 | if err != nil { 430 | t.Fatalf("unexpected error reading data file: %v", err) 431 | } 432 | 433 | r := io.NewSectionReader(bytes.NewReader(written), DatFileHeaderSize+SizeUint48+SizeUint16, int64(blockSize)) 434 | 435 | b2 := Bucket{ 436 | blockSize: blockSize, 437 | blob: make([]byte, blockSize), 438 | } 439 | 440 | err = b2.loadFullFrom(r) 441 | if err != nil { 442 | t.Fatalf("unexpected error during read: %v", err) 443 | } 444 | if b2.Count() != 2 { 445 | t.Errorf("got count %d, wanted %d", b.Count(), 2) 446 | } 447 | 448 | for i := 0; i < len(entries); i++ { 449 | got := b2.entry(i) 450 | if got != entries[i] { 451 | t.Errorf("%d: got %+v, wanted %+v", i, got, entries[i]) 452 | } 453 | } 454 | }) 455 | } 456 | -------------------------------------------------------------------------------- /internal/bucketcache.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "sync" 9 | 10 | "github.com/go-logr/logr" 11 | ) 12 | 13 | type Spiller interface { 14 | LoadBucketSpill(int64, []byte) error 15 | AppendBucketSpill([]byte) (int64, error) 16 | Flush() error 17 | } 18 | 19 | type BucketCache struct { 20 | mu sync.Mutex 21 | buckets []*Bucket 22 | dirty []bool 23 | 24 | modulus uint64 // hash modulus 25 | 26 | bucketSize int 27 | count int // number of entries 28 | threshold int // target average number of entries per bucket 29 | 30 | tlogger logr.Logger // trace logger 31 | } 32 | 33 | func (c *BucketCache) Insert(offset int64, size int64, hash uint64, df Spiller) error { 34 | c.mu.Lock() 35 | defer c.mu.Unlock() 36 | 37 | c.count++ 38 | if c.count/len(c.buckets) > c.threshold { 39 | 40 | if uint64(len(c.buckets)) == c.modulus { 41 | c.modulus *= 2 42 | } 43 | 44 | // Split using linear hashing 45 | idxSplit := len(c.buckets) - int(c.modulus/2) // index of the bucket to be split 46 | c.dirty[idxSplit] = true 47 | 48 | idxNew := len(c.buckets) // new bucket will be added at end of list 49 | c.buckets = append(c.buckets, NewBucket(c.bucketSize, make([]byte, c.bucketSize))) 50 | c.dirty = append(c.dirty, true) 51 | 52 | if err := c.split(idxSplit, idxNew, df); err != nil { 53 | return fmt.Errorf("split bucket: %w", err) 54 | } 55 | } 56 | 57 | idx := c.bucketIndex(hash) 58 | b := c.buckets[idx] 59 | spilled, err := b.maybeSpill(df) 60 | 61 | if spilled && c.tlogger.Enabled() { 62 | c.tlogger.Info("bucket spilled", "index", idx, "hash", hash, "buckets", len(c.buckets), "modulus", c.modulus, "spill", b.spill) 63 | } 64 | if err != nil { 65 | return fmt.Errorf("maybe spill: %w", err) 66 | } 67 | 68 | // If bucket spilled then it will be empty 69 | b.insert(offset, size, hash) 70 | 71 | c.dirty[idx] = true 72 | return nil 73 | } 74 | 75 | // assumes caller holds lock 76 | func (c *BucketCache) bucketIndex(h uint64) int { 77 | n := h % c.modulus 78 | if n >= uint64(len(c.buckets)) { 79 | n -= c.modulus / 2 80 | } 81 | return int(n) 82 | } 83 | 84 | // assumes caller holds lock 85 | func (c *BucketCache) split(idxSplit, idxNew int, df Spiller) error { 86 | bSplit := c.buckets[idxSplit] 87 | // Trivial case: split empty bucket 88 | if bSplit.count == 0 && bSplit.spill == 0 { 89 | return nil 90 | } 91 | 92 | bNew := c.buckets[idxNew] 93 | 94 | for i := 0; i < bSplit.count; { 95 | e := bSplit.entry(i) 96 | idx := c.bucketIndex(e.Hash) 97 | if c.tlogger.Enabled() { 98 | c.tlogger.Info("entry rehash", "hash", e.Hash, "rehash_index", idx) 99 | } 100 | 101 | if idx != idxSplit && idx != idxNew { 102 | // panic due to a logic error. Something very bad must have happened. 103 | panic(fmt.Sprintf("bucket index of rehashed key (hash=%d, bucket=%d) does not correspond to bucket being split (bucket=%d) or new bucket (bucket=%d), modulus=%d, buckets=%d", e.Hash, idx, idxSplit, idxNew, c.modulus, len(c.buckets))) 104 | } 105 | 106 | // If the entry falls into the new bucket then add it and remove from the splitting bucket 107 | if idx == idxNew { 108 | bNew.insert(e.Offset, e.Size, e.Hash) 109 | bSplit.erase(i) 110 | } else { 111 | i++ 112 | } 113 | } 114 | 115 | // Deal with any spills in the splitting bucket by rehashing the entries as well, walking the linked list 116 | // of spills. Potentially this can lead to the new bucket spilling. 117 | // Since spills are immutable we may leave orphaned entries that have been copied to the new bucket. 118 | 119 | spill := bSplit.spill 120 | bSplit.setSpill(0) 121 | 122 | tmp := NewBucket(c.bucketSize, make([]byte, c.bucketSize)) 123 | 124 | if spill > 0 { 125 | for { 126 | // Make sure any spills are on disk 127 | // TODO: figure out semantics of buffered writer here 128 | if err := df.Flush(); err != nil { 129 | return fmt.Errorf("flush data file: %w", err) 130 | } 131 | 132 | // Read the spill record from the data file into the temporary bucket 133 | if err := tmp.LoadFrom(spill, df); err != nil { 134 | return fmt.Errorf("load from spill (%d): %w", spill, err) 135 | } 136 | 137 | if c.tlogger.Enabled() { 138 | c.tlogger.Info("loaded bucket from spill", "spill", spill, "bucket_entry_count", tmp.count) 139 | } 140 | 141 | for i := 0; i < tmp.count; i++ { 142 | e := tmp.entry(i) 143 | idx := c.bucketIndex(e.Hash) 144 | if c.tlogger.Enabled() { 145 | c.tlogger.Info("spill entry rehash", "hash", e.Hash, "rehash_index", idx, "buckets", len(c.buckets), "modulus", c.modulus) 146 | } 147 | if idx != idxSplit && idx != idxNew { 148 | panic(fmt.Sprintf("bucket index of rehashed key (%d) does not correspond to bucket being split (%d) or new bucket (%d)", idx, idxSplit, idxNew)) 149 | } 150 | if idx == idxNew { 151 | spilled, err := bNew.maybeSpill(df) 152 | if spilled && c.tlogger.Enabled() { 153 | c.tlogger.Info("new bucket spilled during split", "index", idx, "spill", bNew.spill) 154 | } 155 | if err != nil { 156 | return fmt.Errorf("maybe spill: %w", err) 157 | } 158 | // we hold the lock on bNew 159 | bNew.insert(e.Offset, e.Size, e.Hash) 160 | } else { 161 | spilled, err := bSplit.maybeSpill(df) 162 | if spilled && c.tlogger.Enabled() { 163 | c.tlogger.Info("split bucket spilled during split", "index", idx, "spill", bSplit.spill) 164 | } 165 | if err != nil { 166 | return fmt.Errorf("maybe spill: %w", err) 167 | } 168 | bSplit.insert(e.Offset, e.Size, e.Hash) 169 | } 170 | 171 | } 172 | 173 | // Continue reading any further spills 174 | spill = tmp.spill 175 | 176 | if spill == 0 { 177 | break 178 | } 179 | } 180 | } 181 | 182 | return nil 183 | } 184 | 185 | // Exists reports whether a record with the given hash and key exists in the data file 186 | func (c *BucketCache) Exists(hash uint64, key string, df *DataFile) (bool, error) { 187 | _, err := c.FetchHeader(hash, key, df) 188 | if err == nil { 189 | return true, nil 190 | } 191 | if err == ErrKeyNotFound { 192 | return false, nil 193 | } 194 | return false, err 195 | } 196 | 197 | // Fetch returns a reader that can be used to read the data record associated with the key 198 | func (c *BucketCache) Fetch(hash uint64, key string, df *DataFile) (io.Reader, error) { 199 | c.mu.Lock() 200 | defer c.mu.Unlock() 201 | 202 | idx := c.bucketIndex(hash) 203 | b := c.buckets[idx] 204 | 205 | for { 206 | for i := b.lowerBound(hash); i < b.count; i++ { 207 | entry := b.entry(i) 208 | if entry.Hash != hash { 209 | break 210 | } 211 | 212 | dr, err := df.RecordDataReader(entry.Offset, key) 213 | if err != nil { 214 | if errors.Is(err, ErrKeyMismatch) { 215 | continue 216 | } 217 | if err != nil { 218 | return nil, fmt.Errorf("load data record: %w", err) 219 | } 220 | } 221 | 222 | // Found a matching record 223 | return dr, nil 224 | } 225 | 226 | if b.spill == 0 { 227 | break 228 | } 229 | 230 | spill := b.spill 231 | 232 | blockBuf := make([]byte, c.bucketSize) 233 | b = NewBucket(c.bucketSize, blockBuf) 234 | if err := b.LoadFrom(spill, df); err != nil { 235 | return nil, fmt.Errorf("read spill: %w", err) 236 | } 237 | 238 | } 239 | 240 | return nil, ErrKeyNotFound 241 | } 242 | 243 | // FetchHeader returns a record header for the data record associated with the key 244 | func (c *BucketCache) FetchHeader(hash uint64, key string, df *DataFile) (*DataRecordHeader, error) { 245 | c.mu.Lock() 246 | defer c.mu.Unlock() 247 | 248 | idx := c.bucketIndex(hash) 249 | b := c.buckets[idx] 250 | 251 | bkey := []byte(key) 252 | for { 253 | for i := b.lowerBound(hash); i < b.count; i++ { 254 | entry := b.entry(i) 255 | if entry.Hash != hash { 256 | break 257 | } 258 | 259 | rh, err := df.LoadRecordHeader(entry.Offset) 260 | if err != nil { 261 | return nil, fmt.Errorf("read data record header: %w", err) 262 | } 263 | if !bytes.Equal(bkey, rh.Key) { 264 | continue 265 | } 266 | 267 | // Found a matching record 268 | return rh, nil 269 | } 270 | 271 | if b.spill == 0 { 272 | break 273 | } 274 | 275 | spill := b.spill 276 | 277 | blockBuf := make([]byte, c.bucketSize) 278 | b = NewBucket(c.bucketSize, blockBuf) 279 | if err := b.LoadFrom(spill, df); err != nil { 280 | return nil, fmt.Errorf("read spill: %w", err) 281 | } 282 | 283 | } 284 | 285 | return nil, ErrKeyNotFound 286 | } 287 | 288 | // computeStats counts the number of entries in buckets and spills 289 | func (c *BucketCache) computeStats(df *DataFile) error { 290 | c.mu.Lock() 291 | defer c.mu.Unlock() 292 | 293 | blockBuf := make([]byte, c.bucketSize) 294 | for idx := range c.buckets { 295 | b := c.buckets[idx] 296 | for { 297 | c.count += b.count 298 | if b.spill == 0 { 299 | break 300 | } 301 | spill := b.spill 302 | 303 | b = NewBucket(c.bucketSize, blockBuf) 304 | if err := b.LoadFrom(spill, df); err != nil { 305 | return fmt.Errorf("read spill: %w", err) 306 | } 307 | 308 | } 309 | } 310 | return nil 311 | } 312 | 313 | // EntryCount returns the number of entries in the cache 314 | func (c *BucketCache) EntryCount() int { 315 | c.mu.Lock() 316 | defer c.mu.Unlock() 317 | return c.count 318 | } 319 | 320 | // BucketCount returns the number of buckets in the cache 321 | func (c *BucketCache) BucketCount() int { 322 | c.mu.Lock() 323 | defer c.mu.Unlock() 324 | return len(c.buckets) 325 | } 326 | 327 | // Get retrieves a copy of the bucket at index idx 328 | func (c *BucketCache) Get(idx int) *Bucket { 329 | c.mu.Lock() 330 | defer c.mu.Unlock() 331 | 332 | buf := make([]byte, c.bucketSize) 333 | copy(buf, c.buckets[idx].blob) 334 | return NewBucket(c.bucketSize, buf) 335 | } 336 | 337 | func (c *BucketCache) Has(hash uint64, sp Spiller) (bool, error) { 338 | c.mu.Lock() 339 | defer c.mu.Unlock() 340 | 341 | idx := c.bucketIndex(hash) 342 | b := c.buckets[idx] 343 | 344 | // tmp is only used if we need to read from spuill 345 | var tmp *Bucket 346 | 347 | for { 348 | idx := b.lowerBound(hash) 349 | if idx < b.count { 350 | entry := b.entry(idx) 351 | if entry.Hash == hash { 352 | // Found a matching record 353 | return true, nil 354 | } 355 | } 356 | 357 | if b.spill == 0 { 358 | break 359 | } 360 | 361 | if tmp == nil { 362 | tmp = NewBucket(c.bucketSize, make([]byte, c.bucketSize)) 363 | } 364 | if err := tmp.LoadFrom(int64(b.spill), sp); err != nil { 365 | return false, fmt.Errorf("read spill: %w", err) 366 | } 367 | b = tmp 368 | } 369 | 370 | return false, nil 371 | } 372 | 373 | func (c *BucketCache) WriteDirty(lf *LogFile, kf *KeyFile) (int64, error) { 374 | c.mu.Lock() 375 | defer c.mu.Unlock() 376 | 377 | work := int64(0) 378 | for idx := range c.dirty { 379 | if !c.dirty[idx] { 380 | continue 381 | } 382 | written, err := lf.AppendBucket(idx, c.buckets[idx]) 383 | work += written 384 | if err != nil { 385 | return work, fmt.Errorf("append bucket to log %d: %w", idx, err) 386 | } 387 | } 388 | 389 | if err := lf.Sync(); err != nil { 390 | return work, fmt.Errorf("sync log file: %w", err) 391 | } 392 | 393 | for idx := range c.dirty { 394 | if !c.dirty[idx] { 395 | continue 396 | } 397 | if err := kf.PutBucket(idx, c.buckets[idx]); err != nil { 398 | return work, fmt.Errorf("put bucket %d: %w", idx, err) 399 | } 400 | } 401 | 402 | if err := kf.Sync(); err != nil { 403 | return work, fmt.Errorf("sync key file: %w", err) 404 | } 405 | 406 | for idx := range c.dirty { 407 | c.dirty[idx] = false 408 | } 409 | 410 | return work, nil 411 | } 412 | -------------------------------------------------------------------------------- /internal/bucketcache_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | 8 | "github.com/go-logr/logr" 9 | ) 10 | 11 | func TestBucketCacheInsert(t *testing.T) { 12 | bucketSize := BucketHeaderSize + BucketEntrySize*6 13 | 14 | n := 5000 15 | c := &BucketCache{ 16 | bucketSize: bucketSize, // capacity of 6 entries per bucket 17 | modulus: 1, 18 | buckets: []*Bucket{NewBucket(bucketSize, make([]byte, bucketSize))}, 19 | dirty: []bool{false}, 20 | threshold: 3, // aim for an average of 3 entries per bucket 21 | tlogger: logr.Discard(), 22 | } 23 | 24 | rng := rand.New(rand.NewSource(299792458)) 25 | 26 | sm := make(spillMap) 27 | hashes := make([]uint64, n) 28 | for i := range hashes { 29 | hashes[i] = rng.Uint64() 30 | err := c.Insert(5, 5, hashes[i], sm) 31 | if err != nil { 32 | t.Fatalf("unexpected error on insert (%d): %v", hashes[i], err) 33 | } 34 | } 35 | if c.EntryCount() != len(hashes) { 36 | t.Errorf("c.EntryCount()=%d, wanted %d", c.EntryCount(), len(hashes)) 37 | } 38 | 39 | for i := range hashes { 40 | has, err := c.Has(hashes[i], sm) 41 | if err != nil { 42 | t.Fatalf("unexpected error on has (%d): %v", hashes[i], err) 43 | } 44 | 45 | if !has { 46 | t.Errorf("did not find hash %d", hashes[i]) 47 | } 48 | } 49 | } 50 | 51 | type spillMap map[int64][]byte 52 | 53 | func (s spillMap) LoadBucketSpill(spill int64, buf []byte) error { 54 | data, ok := s[spill] 55 | if !ok { 56 | return fmt.Errorf("unknown spill: %d", spill) 57 | } 58 | copy(buf, data) 59 | return nil 60 | } 61 | 62 | func (s spillMap) AppendBucketSpill(buf []byte) (int64, error) { 63 | spill := int64(len(s)) + 1 64 | data := make([]byte, len(buf)) 65 | copy(data, buf) 66 | s[spill] = data 67 | return spill, nil 68 | } 69 | 70 | func (s spillMap) Flush() error { 71 | return nil 72 | } 73 | -------------------------------------------------------------------------------- /internal/cache.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | // CacheData is a read only view of a bucket cache. It is safe for concurrent use. 4 | type CacheData struct { 5 | index map[int]int 6 | buckets []BucketRecord 7 | } 8 | 9 | func (c *CacheData) Find(n int) (*Bucket, bool) { 10 | idx, exists := c.index[n] 11 | if !exists { 12 | return nil, false 13 | } 14 | return c.buckets[idx].bucket, true 15 | } 16 | 17 | func (c *CacheData) Has(n int) bool { 18 | _, exists := c.index[n] 19 | return exists 20 | } 21 | 22 | func (c *CacheData) Count() int { 23 | return len(c.buckets) 24 | } 25 | 26 | func (c *CacheData) WithBuckets(fn func(bs []BucketRecord) error) error { 27 | return fn(c.buckets) 28 | } 29 | 30 | // Cache is an in memory buffer of buckets. It is not safe for concurrent use. 31 | type Cache struct { 32 | keySize int 33 | blockSize int 34 | sizeHint int 35 | 36 | data *CacheData 37 | } 38 | 39 | func NewCache(keySize int, blockSize int, sizeHint int) *Cache { 40 | return &Cache{ 41 | keySize: keySize, 42 | blockSize: blockSize, 43 | sizeHint: sizeHint, 44 | data: &CacheData{ 45 | index: make(map[int]int, sizeHint), 46 | buckets: make([]BucketRecord, 0, sizeHint), 47 | }, 48 | } 49 | } 50 | 51 | func (c *Cache) Find(n int) (*Bucket, bool) { 52 | return c.data.Find(n) 53 | } 54 | 55 | func (c *Cache) Has(n int) bool { 56 | return c.data.Has(n) 57 | } 58 | 59 | func (c *Cache) Count() int { 60 | return c.data.Count() 61 | } 62 | 63 | func (c *Cache) WithBuckets(fn func(bs []BucketRecord) error) error { 64 | return c.data.WithBuckets(fn) 65 | } 66 | 67 | func (c *Cache) Insert(idx int, b *Bucket) { 68 | br := BucketRecord{ 69 | idx: idx, 70 | bucket: b, 71 | } 72 | 73 | c.data.buckets = append(c.data.buckets, br) 74 | c.data.index[idx] = len(c.data.buckets) - 1 75 | } 76 | 77 | func (c *Cache) Clear() { 78 | c.data = &CacheData{ 79 | index: make(map[int]int, c.sizeHint), 80 | buckets: make([]BucketRecord, 0, c.sizeHint), 81 | } 82 | } 83 | 84 | // TakeData takes ownership of the Cache's data. The Cache is cleared after. 85 | func (c *Cache) TakeData() *CacheData { 86 | data := c.data 87 | c.data = &CacheData{ 88 | index: make(map[int]int, c.sizeHint), 89 | buckets: make([]BucketRecord, 0, c.sizeHint), 90 | } 91 | return data 92 | } 93 | -------------------------------------------------------------------------------- /internal/const32.go: -------------------------------------------------------------------------------- 1 | //go:build 386 || arm || mips || mipsle 2 | 3 | package internal 4 | 5 | import ( 6 | "math" 7 | ) 8 | 9 | const ( 10 | MaxBlockSize = MaxUint16 // maximum length of a keyfile block in bytes (must not be larger than MaxKeySize due to on-disk representation) 11 | MaxKeySize = MaxUint16 // maximum length of a data record's key in bytes 12 | MaxDataSize = math.MaxInt32 - 1 // maximum length of a data record's value in bytes 13 | ) 14 | -------------------------------------------------------------------------------- /internal/const64.go: -------------------------------------------------------------------------------- 1 | //go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm 2 | 3 | package internal 4 | 5 | const ( 6 | MaxBlockSize = MaxUint16 // maximum length of a keyfile block in bytes (must not be larger than MaxKeySize due to on-disk representation) 7 | MaxKeySize = MaxUint16 // maximum length of a data record's key in bytes 8 | MaxDataSize = MaxUint48 // maximum length of a data record's value in bytes 9 | ) 10 | -------------------------------------------------------------------------------- /internal/context.go: -------------------------------------------------------------------------------- 1 | package internal 2 | -------------------------------------------------------------------------------- /internal/error.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ( 8 | ErrAppNumMismatch = errors.New("appnum mismatch") 9 | ErrDataMissing = errors.New("data missing") 10 | ErrDataTooLarge = errors.New("data too large") 11 | ErrDifferentVersion = errors.New("different version") 12 | ErrHashMismatch = errors.New("hash mismatch") 13 | ErrInvalidBlockSize = errors.New("invalid block size") 14 | ErrInvalidBucketCount = errors.New("invalid bucket count") 15 | ErrInvalidCapacity = errors.New("invalid capacity") 16 | ErrInvalidDataRecord = errors.New("not a data record: contains spill marker") 17 | ErrInvalidKeySize = errors.New("invalid key size") 18 | ErrInvalidLoadFactor = errors.New("invalid load factor") 19 | ErrInvalidRecordSize = errors.New("invalid record size") 20 | ErrInvalidSpill = errors.New("not a spill record: missing spill marker") 21 | ErrKeyExists = errors.New("key exists") 22 | ErrKeyMismatch = errors.New("key mismatch") 23 | ErrKeyMissing = errors.New("key missing") 24 | ErrKeyNotFound = errors.New("key not found") 25 | ErrKeySizeMismatch = errors.New("key size mismatch") 26 | ErrKeyTooLarge = errors.New("key too large") 27 | ErrKeyWrongSize = errors.New("key wrong size") // deprecated: use ErrKeyMissing and ErrKeyTooLarge instead 28 | ErrNotDataFile = errors.New("not a data file") 29 | ErrNotKeyFile = errors.New("not a key file") 30 | ErrNotLogFile = errors.New("not a log file") 31 | ErrShortKeyFile = errors.New("short key file") 32 | ErrUIDMismatch = errors.New("uid mismatch") 33 | ) 34 | -------------------------------------------------------------------------------- /internal/field.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "encoding/binary" 5 | "math" 6 | ) 7 | 8 | const ( 9 | MaxUint16 = math.MaxUint16 10 | MaxUint24 = 0xffffff 11 | MaxUint32 = math.MaxUint32 12 | MaxUint48 = 0x0000ffffffffffff 13 | MaxUint64 = math.MaxUint64 14 | 15 | MaxInt16 = math.MaxInt16 16 | ) 17 | 18 | const ( 19 | SizeUint16 = 2 20 | SizeUint24 = 3 21 | SizeUint32 = 4 22 | SizeUint48 = 6 23 | SizeUint64 = 8 24 | ) 25 | 26 | func DecodeUint16(b []byte) uint16 { 27 | return binary.BigEndian.Uint16(b) 28 | } 29 | 30 | func DecodeUint32(b []byte) uint32 { 31 | return binary.BigEndian.Uint32(b) 32 | } 33 | 34 | func DecodeUint64(b []byte) uint64 { 35 | return binary.BigEndian.Uint64(b) 36 | } 37 | 38 | func DecodeUint48(b []byte) uint64 { 39 | _ = b[5] // bounds check hint to compiler; see golang.org/issue/14808 40 | return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | 41 | uint64(b[4])<<32 | uint64(b[5])<<40 42 | } 43 | 44 | func DecodeUint24(b []byte) uint32 { 45 | _ = b[2] // bounds check hint to compiler; see golang.org/issue/14808 46 | return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 47 | } 48 | 49 | func EncodeUint16(b []byte, v uint16) { 50 | binary.BigEndian.PutUint16(b, v) 51 | } 52 | 53 | func EncodeUint32(b []byte, v uint32) { 54 | binary.BigEndian.PutUint32(b, v) 55 | } 56 | 57 | func EncodeUint64(b []byte, v uint64) { 58 | binary.BigEndian.PutUint64(b, v) 59 | } 60 | 61 | func EncodeUint48(b []byte, v uint64) { 62 | _ = b[5] // early bounds check to guarantee safety of writes below 63 | b[0] = byte(v) 64 | b[1] = byte(v >> 8) 65 | b[2] = byte(v >> 16) 66 | b[3] = byte(v >> 24) 67 | b[4] = byte(v >> 32) 68 | b[5] = byte(v >> 40) 69 | } 70 | 71 | func EncodeUint24(b []byte, v uint32) { 72 | _ = b[3] // early bounds check to guarantee safety of writes below 73 | b[0] = byte(v) 74 | b[1] = byte(v >> 8) 75 | b[2] = byte(v >> 16) 76 | b[3] = byte(v >> 24) 77 | } 78 | 79 | /* 80 | 81 | // These metafunctions describe the binary format of fields on disk 82 | 83 | template 84 | struct field; 85 | 86 | template<> 87 | struct field 88 | { 89 | static std::size_t constexpr size = 1; 90 | static std::uint64_t constexpr max = 0xff; 91 | }; 92 | 93 | template<> 94 | struct field 95 | { 96 | static std::size_t constexpr size = 2; 97 | static std::uint64_t constexpr max = 0xffff; 98 | }; 99 | 100 | template<> 101 | struct field 102 | { 103 | static std::size_t constexpr size = 3; 104 | static std::uint64_t constexpr max = 0xffffff; 105 | }; 106 | 107 | template<> 108 | struct field 109 | { 110 | static std::size_t constexpr size = 4; 111 | static std::uint64_t constexpr max = 0xffffffff; 112 | }; 113 | 114 | template<> 115 | struct field 116 | { 117 | static std::size_t constexpr size = 6; 118 | static std::uint64_t constexpr max = 0x0000ffffffffffff; 119 | }; 120 | 121 | template<> 122 | struct field 123 | { 124 | static std::size_t constexpr size = 8; 125 | static std::uint64_t constexpr max = 0xffffffffffffffff; 126 | }; 127 | 128 | // read field from memory 129 | 130 | template::value>::type* = nullptr> 132 | void 133 | readp(void const* v, U& u) 134 | { 135 | auto p = reinterpret_cast(v); 136 | u = *p; 137 | } 138 | 139 | template::value>::type* = nullptr> 141 | void 142 | readp(void const* v, U& u) 143 | { 144 | auto p = reinterpret_cast(v); 145 | T t; 146 | t = T(*p++)<< 8; 147 | t = T(*p ) | t; 148 | u = t; 149 | } 150 | 151 | template::value>::type* = nullptr> 153 | void 154 | readp(void const* v, U& u) 155 | { 156 | auto p = reinterpret_cast(v); 157 | std::uint32_t t; 158 | t = std::uint32_t(*p++)<<16; 159 | t = (std::uint32_t(*p++)<< 8) | t; 160 | t = std::uint32_t(*p ) | t; 161 | u = t; 162 | } 163 | 164 | template::value>::type* = nullptr> 166 | void 167 | readp(void const* v, U& u) 168 | { 169 | auto const* p = reinterpret_cast(v); 170 | T t; 171 | t = T(*p++)<<24; 172 | t = (T(*p++)<<16) | t; 173 | t = (T(*p++)<< 8) | t; 174 | t = T(*p ) | t; 175 | u = t; 176 | } 177 | 178 | template::value>::type* = nullptr> 180 | void 181 | readp(void const* v, U& u) 182 | { 183 | auto p = reinterpret_cast(v); 184 | std::uint64_t t; 185 | t = (std::uint64_t(*p++)<<40); 186 | t = (std::uint64_t(*p++)<<32) | t; 187 | t = (std::uint64_t(*p++)<<24) | t; 188 | t = (std::uint64_t(*p++)<<16) | t; 189 | t = (std::uint64_t(*p++)<< 8) | t; 190 | t = std::uint64_t(*p ) | t; 191 | u = t; 192 | } 193 | 194 | template::value>::type* = nullptr> 196 | void 197 | readp(void const* v, U& u) 198 | { 199 | auto p = reinterpret_cast(v); 200 | T t; 201 | t = T(*p++)<<56; 202 | t = (T(*p++)<<48) | t; 203 | t = (T(*p++)<<40) | t; 204 | t = (T(*p++)<<32) | t; 205 | t = (T(*p++)<<24) | t; 206 | t = (T(*p++)<<16) | t; 207 | t = (T(*p++)<< 8) | t; 208 | t = T(*p ) | t; 209 | u = t; 210 | } 211 | 212 | // read field from istream 213 | 214 | template 215 | void 216 | read(istream& is, U& u) 217 | { 218 | readp(is.data(field::size), u); 219 | } 220 | 221 | inline 222 | void 223 | read_size48(istream& is, std::size_t& u) 224 | { 225 | std::uint64_t v; 226 | read(is, v); 227 | BOOST_ASSERT(v <= std::numeric_limits::max()); 228 | u = static_cast(v); 229 | } 230 | 231 | // write field to ostream 232 | 233 | template::value>::type* = nullptr> 235 | void 236 | write(ostream& os, U u) 237 | { 238 | BOOST_ASSERT(u <= field::max); 239 | std::uint8_t* p = os.data(field::size); 240 | *p = static_cast(u); 241 | } 242 | 243 | template::value>::type* = nullptr> 245 | void 246 | write(ostream& os, U u) 247 | { 248 | BOOST_ASSERT(u <= field::max); 249 | auto const t = static_cast(u); 250 | std::uint8_t* p = os.data(field::size); 251 | *p++ = (t>> 8)&0xff; 252 | *p = t &0xff; 253 | } 254 | 255 | template::value>::type* = nullptr> 257 | void 258 | write(ostream& os, U u) 259 | { 260 | BOOST_ASSERT(u <= field::max); 261 | auto const t = static_cast(u); 262 | std::uint8_t* p = os.data(field::size); 263 | *p++ = (t>>16)&0xff; 264 | *p++ = (t>> 8)&0xff; 265 | *p = t &0xff; 266 | } 267 | 268 | template::value>::type* = nullptr> 270 | void 271 | write(ostream& os, U u) 272 | { 273 | BOOST_ASSERT(u <= field::max); 274 | auto const t = static_cast(u); 275 | std::uint8_t* p = os.data(field::size); 276 | *p++ = (t>>24)&0xff; 277 | *p++ = (t>>16)&0xff; 278 | *p++ = (t>> 8)&0xff; 279 | *p = t &0xff; 280 | } 281 | 282 | template::value>::type* = nullptr> 284 | void 285 | write(ostream& os, U u) 286 | { 287 | BOOST_ASSERT(u <= field::max); 288 | auto const t = static_cast(u); 289 | std::uint8_t* p = os.data(field::size); 290 | *p++ = (t>>40)&0xff; 291 | *p++ = (t>>32)&0xff; 292 | *p++ = (t>>24)&0xff; 293 | *p++ = (t>>16)&0xff; 294 | *p++ = (t>> 8)&0xff; 295 | *p = t &0xff; 296 | } 297 | 298 | template::value>::type* = nullptr> 300 | void 301 | write(ostream& os, U u) 302 | { 303 | auto const t = static_cast(u); 304 | std::uint8_t* p = os.data(field::size); 305 | *p++ = (t>>56)&0xff; 306 | *p++ = (t>>48)&0xff; 307 | *p++ = (t>>40)&0xff; 308 | *p++ = (t>>32)&0xff; 309 | *p++ = (t>>24)&0xff; 310 | *p++ = (t>>16)&0xff; 311 | *p++ = (t>> 8)&0xff; 312 | *p = t &0xff; 313 | } 314 | 315 | } // detail 316 | } // nudb 317 | 318 | #endif 319 | 320 | */ 321 | -------------------------------------------------------------------------------- /internal/file.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "math" 10 | "os" 11 | "sync" 12 | 13 | "github.com/go-logr/logr" 14 | ) 15 | 16 | func openFile(name string, flag int, perm os.FileMode, advice int) (*os.File, error) { 17 | f, err := os.OpenFile(name, flag, perm) 18 | if err != nil { 19 | return nil, fmt.Errorf("open: %w", err) 20 | } 21 | 22 | err = Fadvise(int(f.Fd()), 0, 0, advice) 23 | if err != nil { 24 | return nil, fmt.Errorf("fadvise: %w", err) 25 | } 26 | 27 | return f, nil 28 | } 29 | 30 | // openFileForScan creates a file for sequential reads 31 | func openFileForScan(name string) (*os.File, error) { 32 | return openFile(name, os.O_RDONLY, 0o644, FADV_SEQUENTIAL) 33 | } 34 | 35 | func block_size(path string) int { 36 | // A reasonable default for many SSD devices 37 | return 4096 38 | } 39 | 40 | type CountWriter interface { 41 | WriterFlusher 42 | 43 | // Offset returns the position in the file at which the next write will be made 44 | Offset() int64 45 | 46 | // Count returns the number of bytes written 47 | Count() int64 48 | } 49 | 50 | type WriterFlusher interface { 51 | io.Writer 52 | Flush() error 53 | } 54 | 55 | // DataFile assumes it has exclusive write access to the file 56 | type DataFile struct { 57 | Path string 58 | Header DatFileHeader 59 | 60 | offset int64 61 | file *os.File 62 | writer *bufio.Writer 63 | elogger logr.Logger 64 | } 65 | 66 | func CreateDataFile(path string, appnum, uid uint64) error { 67 | f, err := openFile(path, os.O_APPEND|os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644, FADV_RANDOM) 68 | if err != nil { 69 | return fmt.Errorf("create file: %w", err) 70 | } 71 | 72 | dh := DatFileHeader{ 73 | Version: currentVersion, 74 | UID: uid, 75 | AppNum: appnum, 76 | } 77 | 78 | if err := dh.EncodeTo(f); err != nil { 79 | f.Close() 80 | os.Remove(path) 81 | return fmt.Errorf("write header: %w", err) 82 | } 83 | 84 | if err := f.Sync(); err != nil { 85 | f.Close() 86 | os.Remove(path) 87 | return fmt.Errorf("sync: %w", err) 88 | } 89 | if err := f.Close(); err != nil { 90 | os.Remove(path) 91 | return fmt.Errorf("close: %w", err) 92 | } 93 | 94 | return nil 95 | } 96 | 97 | // OpenDataFile opens a data file for appending and random reads 98 | func OpenDataFile(path string) (*DataFile, error) { 99 | f, err := os.OpenFile(path, os.O_APPEND|os.O_RDWR|os.O_EXCL, 0o644) 100 | if err != nil { 101 | return nil, fmt.Errorf("open: %w", err) 102 | } 103 | 104 | err = Fadvise(int(f.Fd()), 0, 0, FADV_RANDOM) 105 | if err != nil { 106 | return nil, fmt.Errorf("fadvise: %w", err) 107 | } 108 | 109 | st, err := f.Stat() 110 | if err != nil { 111 | return nil, fmt.Errorf("stat data file: %w", err) 112 | } 113 | 114 | var dh DatFileHeader 115 | if err := dh.DecodeFrom(f); err != nil { 116 | return nil, fmt.Errorf("read header: %w", err) 117 | } 118 | if err := dh.Verify(); err != nil { 119 | return nil, fmt.Errorf("verify header: %w", err) 120 | } 121 | 122 | return &DataFile{ 123 | Path: path, 124 | Header: dh, 125 | file: f, 126 | offset: st.Size(), 127 | // Buffered writes to avoid write amplification 128 | writer: bufio.NewWriterSize(f, 32*block_size(path)), 129 | elogger: logr.Discard(), 130 | }, nil 131 | } 132 | 133 | func (d *DataFile) Offset() int64 { 134 | return d.offset 135 | } 136 | 137 | func (d *DataFile) Sync() error { 138 | if err := d.writer.Flush(); err != nil { 139 | return err 140 | } 141 | return d.file.Sync() 142 | } 143 | 144 | func (d *DataFile) Flush() error { 145 | return d.writer.Flush() 146 | } 147 | 148 | func (d *DataFile) Close() error { 149 | if err := d.writer.Flush(); err != nil { 150 | return err 151 | } 152 | return d.file.Close() 153 | } 154 | 155 | func (d *DataFile) Size() (int64, error) { 156 | st, err := d.file.Stat() 157 | if err != nil { 158 | return 0, err 159 | } 160 | return st.Size(), nil 161 | } 162 | 163 | // AppendRecord writes a record to the data file. It returns the position at which 164 | // the record was written. 165 | func (d *DataFile) AppendRecord(dr *DataRecord) (int64, error) { 166 | hdr := make([]byte, SizeUint48+SizeUint16) 167 | EncodeUint48(hdr[0:SizeUint48], uint64(len(dr.data))) 168 | EncodeUint16(hdr[SizeUint48:SizeUint48+SizeUint16], uint16(len(dr.key))) 169 | 170 | offset := d.offset 171 | 172 | n, err := d.file.Write(hdr[:]) 173 | d.offset += int64(n) 174 | if err != nil { 175 | return offset, err 176 | } 177 | if n != len(hdr) { 178 | return offset, io.ErrShortWrite 179 | } 180 | 181 | nk, err := d.file.Write([]byte(dr.key)) 182 | d.offset += int64(nk) 183 | if err != nil { 184 | return offset, err 185 | } 186 | if nk != len(dr.key) { 187 | return offset, io.ErrShortWrite 188 | } 189 | 190 | nd, err := d.file.Write(dr.data) 191 | d.offset += int64(nd) 192 | if err != nil { 193 | return offset, err 194 | } 195 | if nd != len(dr.data) { 196 | return offset, io.ErrShortWrite 197 | } 198 | 199 | return offset, nil 200 | } 201 | 202 | func (d *DataFile) LoadRecordHeader(offset int64) (*DataRecordHeader, error) { 203 | hdr := make([]byte, SizeUint48+SizeUint16) 204 | 205 | _, err := d.file.ReadAt(hdr, offset) 206 | if err != nil { 207 | return nil, fmt.Errorf("read data record header: %w", err) 208 | } 209 | 210 | dataSize := DecodeUint48(hdr[:SizeUint48]) 211 | if dataSize == 0 { 212 | // Data size 0 indicates a bucket spill follows 213 | return nil, ErrInvalidDataRecord 214 | } 215 | keySize := DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16]) 216 | if keySize == 0 { 217 | return nil, ErrInvalidDataRecord 218 | } 219 | 220 | key := make([]byte, keySize) 221 | _, err = d.file.ReadAt(key, offset+SizeUint48+SizeUint16) 222 | if err != nil { 223 | return nil, fmt.Errorf("read data record key: %w", err) 224 | } 225 | 226 | return &DataRecordHeader{ 227 | Key: key, 228 | DataSize: int64(dataSize), 229 | KeySize: keySize, 230 | }, nil 231 | } 232 | 233 | func (d *DataFile) RecordDataReader(offset int64, key string) (io.Reader, error) { 234 | rh, err := d.LoadRecordHeader(offset) 235 | if err != nil { 236 | return nil, fmt.Errorf("read data record header: %w", err) 237 | } 238 | if !bytes.Equal([]byte(key), rh.Key) { 239 | return nil, ErrKeyMismatch 240 | } 241 | 242 | return io.NewSectionReader(d.file, offset+rh.Size(), int64(rh.DataSize)), nil 243 | } 244 | 245 | func (d *DataFile) AppendBucketSpill(blob []byte) (int64, error) { 246 | offset := d.offset 247 | 248 | var hdr [SpillHeaderSize]byte 249 | // Initial Uint48 is zero to indicate this is a spill record 250 | EncodeUint16(hdr[SizeUint48:], uint16(len(blob))) 251 | 252 | hn, err := d.writer.Write(hdr[:]) 253 | d.offset += int64(hn) 254 | if err == nil && hn != len(hdr) { 255 | err = io.ErrShortWrite 256 | } 257 | if err != nil { 258 | if d.elogger.Enabled() && errors.Is(err, io.ErrShortWrite) { 259 | d.elogger.Info("data file: short write on bucket header", "expected", len(hdr), "wrote", hn) 260 | } 261 | return offset, fmt.Errorf("write header: %w", err) 262 | } 263 | 264 | bn, err := d.writer.Write(blob) 265 | d.offset += int64(bn) 266 | if err == nil && bn != len(blob) { 267 | err = io.ErrShortWrite 268 | } 269 | 270 | if err != nil { 271 | if d.elogger.Enabled() && errors.Is(err, io.ErrShortWrite) { 272 | d.elogger.Info("data file: short write on bucket data", "expected", len(blob), "wrote", bn) 273 | } 274 | return offset, fmt.Errorf("write header: %w", err) 275 | } 276 | 277 | return offset, nil 278 | } 279 | 280 | func (d *DataFile) LoadBucketSpill(offset int64, blob []byte) error { 281 | var hdr [SpillHeaderSize]byte 282 | _, err := d.file.ReadAt(hdr[:], offset) 283 | if err != nil { 284 | return fmt.Errorf("read header: %w", err) 285 | } 286 | 287 | marker := DecodeUint48(hdr[:SizeUint48]) 288 | if marker != 0 { 289 | return ErrInvalidSpill 290 | } 291 | 292 | size := DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16]) 293 | 294 | sr := io.NewSectionReader(d.file, offset+int64(len(hdr)), int64(size)) 295 | off := 0 296 | for { 297 | n, err := sr.Read(blob[off:]) 298 | off += n 299 | if err == io.EOF { 300 | break 301 | } 302 | if err != nil { 303 | return fmt.Errorf("read bucket data: %w", err) 304 | } 305 | if off >= len(blob) { 306 | return io.ErrShortBuffer 307 | } 308 | } 309 | 310 | for i := off; i < len(blob); i++ { 311 | blob[i] = 0 312 | } 313 | 314 | return nil 315 | } 316 | 317 | // RecordScanner returns a RecordScanner that may be used to iterate over the records in the data file. 318 | func (d *DataFile) RecordScanner() *RecordScanner { 319 | f, err := openFileForScan(d.Path) 320 | if err != nil { 321 | return &RecordScanner{err: err} 322 | } 323 | 324 | r := bufio.NewReaderSize(f, 32*block_size(d.Path)) 325 | n, err := r.Discard(DatFileHeaderSize) 326 | 327 | return &RecordScanner{ 328 | err: err, 329 | r: r, 330 | closer: f, 331 | offset: int64(n), 332 | lr: io.LimitedReader{R: r, N: 0}, 333 | size: -1, 334 | isSpill: false, 335 | } 336 | } 337 | 338 | // RecordScanner implements a sequential scan through a data file. Successive calls to the Next method will step through 339 | // the records in the file. 340 | type RecordScanner struct { 341 | r *bufio.Reader 342 | closer io.Closer 343 | err error 344 | offset int64 345 | size int64 346 | key []byte 347 | isSpill bool 348 | lr io.LimitedReader 349 | } 350 | 351 | // Next reads the next bucket in sequence, including spills to the data store. It returns false 352 | // if it encounters an error or there are no more buckets to read. 353 | func (s *RecordScanner) Next() bool { 354 | if s.err != nil { 355 | return false 356 | } 357 | 358 | var n int 359 | 360 | // Skip any unread bytes 361 | n, s.err = s.r.Discard(int(s.lr.N)) 362 | if s.err != nil { 363 | return false 364 | } 365 | s.offset += int64(n) 366 | 367 | hdr := make([]byte, int64(SizeUint48+SizeUint16)) 368 | n, s.err = io.ReadFull(s.r, hdr) 369 | if s.err != nil { 370 | return false 371 | } 372 | s.offset += int64(n) 373 | 374 | s.size = int64(DecodeUint48(hdr[:SizeUint48])) 375 | if s.size == 0 { 376 | s.isSpill = true 377 | s.key = nil 378 | // Spill size is in the next 2 bytes 379 | s.size = int64(DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16])) 380 | if s.size == 0 { 381 | s.err = ErrInvalidRecordSize 382 | } 383 | } else { 384 | s.isSpill = false 385 | keySize := int(DecodeUint16(hdr[SizeUint48 : SizeUint48+SizeUint16])) 386 | key := make([]byte, keySize) 387 | n, s.err = io.ReadFull(s.r, key) 388 | s.offset += int64(n) 389 | if s.err != nil { 390 | return false 391 | } 392 | s.key = key 393 | } 394 | 395 | // Set the limited reader hard limit 396 | s.lr.N = s.size 397 | 398 | return s.err == nil 399 | } 400 | 401 | // Reader returns an io.Reader that may be used to read the data from the record. Should not be called until Next has been called. 402 | // The Reader is only valid for use until the next call to Next(). 403 | func (s *RecordScanner) Reader() io.Reader { 404 | if s.err != nil { 405 | return nil 406 | } 407 | return &s.lr 408 | } 409 | 410 | // IsSpill reports whether the current record is a bucket spill 411 | func (s *RecordScanner) IsSpill() bool { 412 | return s.isSpill 413 | } 414 | 415 | // IsData reports whether the current record is a data record 416 | func (s *RecordScanner) IsData() bool { 417 | return !s.isSpill 418 | } 419 | 420 | // Size returns the size of the current record's data in bytes 421 | func (s *RecordScanner) Size() int64 { 422 | return s.size 423 | } 424 | 425 | // RecordSize returns the number of bytes occupied by the current record including its header 426 | func (s *RecordScanner) RecordSize() int64 { 427 | if s.isSpill { 428 | return SizeUint48 + // holds marker 429 | SizeUint16 + // holds spill size 430 | s.size // spill data 431 | } 432 | return SizeUint48 + // holds data size 433 | SizeUint16 + // holds key size 434 | s.size + // data 435 | int64(len(s.key)) // key 436 | } 437 | 438 | // Key returns the key of the current record 439 | func (s *RecordScanner) Key() string { 440 | if s.key == nil { 441 | return "" 442 | } 443 | return string(s.key) 444 | } 445 | 446 | // Err returns the first non-EOF error that was encountered by the RecordScanner. 447 | func (s *RecordScanner) Err() error { 448 | if s.err == io.EOF { 449 | return nil 450 | } 451 | return s.err 452 | } 453 | 454 | func (s *RecordScanner) Close() error { 455 | return s.closer.Close() 456 | } 457 | 458 | // KeyFile assumes it has exclusive write access to the file 459 | type KeyFile struct { 460 | Path string 461 | Header KeyFileHeader 462 | 463 | file *os.File 464 | hasher Hasher 465 | elogger logr.Logger 466 | 467 | // bucketLocks is a list of locks corresponding to each bucket in the file 468 | // the locks guard access to read/writes of that portion of the keyfile 469 | // blmu guards mutations to bucketLocks 470 | blmu sync.Mutex 471 | bucketLocks []*sync.Mutex 472 | } 473 | 474 | func CreateKeyFile(path string, uid uint64, appnum uint64, salt uint64, blockSize int, loadFactor float64) error { 475 | kf, err := openFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644, FADV_RANDOM) 476 | if err != nil { 477 | return fmt.Errorf("create file: %w", err) 478 | } 479 | abandon := func() { 480 | kf.Close() 481 | os.Remove(path) 482 | } 483 | 484 | kh := KeyFileHeader{ 485 | Version: currentVersion, 486 | UID: uid, 487 | AppNum: appnum, 488 | Salt: salt, 489 | Pepper: pepper(salt), 490 | BlockSize: uint16(blockSize), 491 | LoadFactor: uint16(math.Min((MaxUint16+1)*loadFactor, MaxUint16)), 492 | } 493 | 494 | if err := kh.EncodeTo(kf); err != nil { 495 | abandon() 496 | return fmt.Errorf("write header: %w", err) 497 | } 498 | 499 | buf := make([]byte, blockSize) 500 | b := NewBucket(blockSize, buf) 501 | 502 | sw := NewSectionWriter(kf, KeyFileHeaderSize, int64(kh.BlockSize)) 503 | n, err := b.storeFullTo(sw) 504 | if err != nil { 505 | abandon() 506 | return fmt.Errorf("write initial bucket: %w", err) 507 | } 508 | if n != int64(len(buf)) { 509 | abandon() 510 | return fmt.Errorf("write initial bucket (%d!=%d): %w", n, int64(len(buf)), io.ErrShortWrite) 511 | } 512 | 513 | if err := kf.Sync(); err != nil { 514 | abandon() 515 | return fmt.Errorf("sync: %w", err) 516 | } 517 | if err := kf.Close(); err != nil { 518 | abandon() 519 | return fmt.Errorf("close : %w", err) 520 | } 521 | return nil 522 | } 523 | 524 | // OpenKeyFile opens a key file for random reads and writes 525 | func OpenKeyFile(path string) (*KeyFile, error) { 526 | f, err := os.OpenFile(path, os.O_RDWR|os.O_EXCL, 0o644) 527 | if err != nil { 528 | return nil, fmt.Errorf("open: %w", err) 529 | } 530 | 531 | err = Fadvise(int(f.Fd()), 0, 0, FADV_RANDOM) 532 | if err != nil { 533 | return nil, fmt.Errorf("fadvise: %w", err) 534 | } 535 | 536 | st, err := f.Stat() 537 | if err != nil { 538 | return nil, fmt.Errorf("stat key file: %w", err) 539 | } 540 | 541 | var kh KeyFileHeader 542 | if err := kh.DecodeFrom(f, st.Size()); err != nil { 543 | return nil, fmt.Errorf("read key file header: %w", err) 544 | } 545 | if err := kh.Verify(); err != nil { 546 | return nil, fmt.Errorf("verify key file header: %w", err) 547 | } 548 | 549 | bucketLocks := make([]*sync.Mutex, int(kh.Buckets)) 550 | for i := 0; i < int(kh.Buckets); i++ { 551 | bucketLocks[i] = &sync.Mutex{} 552 | } 553 | 554 | return &KeyFile{ 555 | Path: path, 556 | Header: kh, 557 | file: f, 558 | hasher: Hasher(kh.Salt), 559 | elogger: logr.Discard(), 560 | bucketLocks: bucketLocks, 561 | }, nil 562 | } 563 | 564 | func (k *KeyFile) Sync() error { 565 | return k.file.Sync() 566 | } 567 | 568 | func (k *KeyFile) Close() error { 569 | return k.file.Close() 570 | } 571 | 572 | func (k *KeyFile) Size() (int64, error) { 573 | st, err := k.file.Stat() 574 | if err != nil { 575 | return 0, err 576 | } 577 | return st.Size(), nil 578 | } 579 | 580 | func (k *KeyFile) BlockSize() uint16 { 581 | return k.Header.BlockSize 582 | } 583 | 584 | func (k *KeyFile) Hash(key []byte) uint64 { 585 | return k.hasher.Hash(key) 586 | } 587 | 588 | func (k *KeyFile) HashString(key string) uint64 { 589 | return k.hasher.HashString(key) 590 | } 591 | 592 | func (k *KeyFile) LoadBucket(idx int) (*Bucket, error) { 593 | var bmu *sync.Mutex 594 | k.blmu.Lock() 595 | if len(k.bucketLocks) > idx { 596 | k.bucketLocks[idx].Lock() 597 | bmu = k.bucketLocks[idx] 598 | } 599 | k.blmu.Unlock() 600 | 601 | if bmu == nil { 602 | if k.elogger.Enabled() { 603 | k.elogger.Error(fmt.Errorf("unknown bucket index"), "attempt to load invalid bucket index", "index", idx, "bucket_count", k.Header.Buckets) 604 | } 605 | panic("attempt to load invalid bucket index") 606 | } 607 | defer bmu.Unlock() 608 | 609 | offset := KeyFileHeaderSize + int64(idx)*int64(k.Header.BlockSize) 610 | b := NewBucket(int(k.Header.BlockSize), make([]byte, int(k.Header.BlockSize))) 611 | 612 | sr := io.NewSectionReader(k.file, offset, int64(k.Header.BlockSize)) 613 | if err := b.loadFullFrom(sr); err != nil { 614 | return nil, fmt.Errorf("read bucket: %w", err) 615 | } 616 | return b, nil 617 | } 618 | 619 | // expects to have exclusive access to b 620 | func (k *KeyFile) PutBucket(idx int, b *Bucket) error { 621 | var bmu *sync.Mutex 622 | k.blmu.Lock() 623 | for idx > len(k.bucketLocks)-1 { 624 | k.bucketLocks = append(k.bucketLocks, &sync.Mutex{}) 625 | } 626 | k.bucketLocks[idx].Lock() 627 | bmu = k.bucketLocks[idx] 628 | k.blmu.Unlock() 629 | if bmu == nil { 630 | panic("attempt to put invalid bucket index") 631 | } 632 | defer bmu.Unlock() 633 | 634 | offset := KeyFileHeaderSize + int64(idx)*int64(k.Header.BlockSize) 635 | sw := NewSectionWriter(k.file, offset, int64(k.Header.BlockSize)) 636 | _, err := b.storeFullTo(sw) 637 | if err != nil { 638 | return fmt.Errorf("write bucket: %w", err) 639 | } 640 | return nil 641 | } 642 | 643 | // BucketScanner returns a BucketScanner that may be used to iterate over the buckets in the key file. 644 | func (k *KeyFile) BucketScanner(df *DataFile) *BucketScanner { 645 | f, err := openFileForScan(k.Path) 646 | if err != nil { 647 | return &BucketScanner{err: err} 648 | } 649 | 650 | r := bufio.NewReaderSize(f, 32*block_size(k.Path)) 651 | _, err = r.Discard(KeyFileHeaderSize) 652 | 653 | return &BucketScanner{ 654 | err: err, 655 | r: r, 656 | closer: f, 657 | bucket: NewBucket(int(k.Header.BlockSize), make([]byte, int(k.Header.BlockSize))), 658 | blockSize: int64(k.Header.BlockSize), 659 | index: -1, 660 | df: df, 661 | elogger: k.elogger, 662 | } 663 | } 664 | 665 | // BucketScanner implements a sequential scan through a key file. Successive calls to the Next method will step through 666 | // the buckets in the file, including spilled buckets in the data file. 667 | type BucketScanner struct { 668 | r *bufio.Reader 669 | closer io.Closer 670 | df *DataFile 671 | bucket *Bucket 672 | blockSize int64 673 | index int 674 | err error 675 | spill int64 // non-zero if next read is a spill to the data store 676 | isSpill bool // true if the current bucket was read from a spill 677 | elogger logr.Logger 678 | } 679 | 680 | // Next reads the next bucket in sequence, including spills to the data store. It returns false 681 | // if it encounters an error or there are no more buckets to read. 682 | func (b *BucketScanner) Next() bool { 683 | if b.err != nil { 684 | return false 685 | } 686 | // Is next bucket in a spill? 687 | if b.spill != 0 { 688 | b.err = b.bucket.LoadFrom(b.spill, b.df) 689 | b.isSpill = true 690 | if b.elogger.Enabled() && b.err != nil { 691 | b.elogger.Error(b.err, "reading spill", "index", b.index, "spill", b.spill) 692 | } 693 | } else { 694 | lr := io.LimitedReader{R: b.r, N: b.blockSize} 695 | b.err = b.bucket.loadFullFrom(&lr) 696 | b.isSpill = false 697 | b.index++ 698 | if b.elogger.Enabled() && b.err != nil && b.err != io.EOF { 699 | b.elogger.Error(b.err, "reading bucket", "index", b.index) 700 | } 701 | } 702 | 703 | if b.err != nil { 704 | b.spill = b.bucket.spill 705 | } 706 | 707 | return b.err == nil 708 | } 709 | 710 | // Index returns the index of the current bucket. Should not be called until Next has been called. Spill buckets 711 | // share an index with their parent. 712 | func (b *BucketScanner) Index() int { 713 | return b.index 714 | } 715 | 716 | // IsSpill reports whether the current bucket was read from a data store spill. 717 | func (b *BucketScanner) IsSpill() bool { 718 | return b.isSpill 719 | } 720 | 721 | // Bucket returns the current bucket. Should not be called until Next has been called. The bucket is backed by data 722 | // that may be overwritten with a call to Next so should not be retained. 723 | func (b *BucketScanner) Bucket() *Bucket { 724 | if b.err != nil { 725 | return nil 726 | } 727 | return b.bucket 728 | } 729 | 730 | // Err returns the first non-EOF error that was encountered by the BucketScanner. 731 | func (b *BucketScanner) Err() error { 732 | if b.err == io.EOF { 733 | return nil 734 | } 735 | return b.err 736 | } 737 | 738 | func (b *BucketScanner) Close() error { 739 | return b.closer.Close() 740 | } 741 | 742 | type LogFile struct { 743 | Path string 744 | Header LogFileHeader 745 | 746 | file *os.File 747 | writer *bufio.Writer 748 | elogger logr.Logger 749 | } 750 | 751 | // OpenLogFile opens a log file for appending, creating it if necessary. 752 | func OpenLogFile(path string) (*LogFile, error) { 753 | lf := &LogFile{ 754 | Path: path, 755 | elogger: logr.Discard(), 756 | } 757 | 758 | if err := lf.open(false); err != nil { 759 | return nil, err 760 | } 761 | 762 | return lf, nil 763 | } 764 | 765 | func (l *LogFile) open(reopen bool) error { 766 | flags := os.O_APPEND | os.O_RDWR | os.O_CREATE 767 | if !reopen { 768 | flags |= os.O_EXCL 769 | } 770 | f, err := os.OpenFile(l.Path, flags, 0o644) 771 | if err != nil { 772 | return fmt.Errorf("open: %w", err) 773 | } 774 | 775 | err = Fadvise(int(f.Fd()), 0, 0, FADV_RANDOM) 776 | if err != nil { 777 | return fmt.Errorf("fadvise: %w", err) 778 | } 779 | 780 | l.file = f 781 | if l.writer == nil { 782 | // Buffered writes to avoid write amplification 783 | l.writer = bufio.NewWriterSize(f, 32*block_size(l.Path)) 784 | } else { 785 | l.writer.Reset(l.file) 786 | } 787 | return nil 788 | } 789 | 790 | func (l *LogFile) Sync() error { 791 | if err := l.writer.Flush(); err != nil { 792 | return err 793 | } 794 | return l.file.Sync() 795 | } 796 | 797 | func (l *LogFile) Flush() error { 798 | return l.writer.Flush() 799 | } 800 | 801 | func (l *LogFile) Close() error { 802 | if err := l.writer.Flush(); err != nil { 803 | return err 804 | } 805 | return l.file.Close() 806 | } 807 | 808 | func (l *LogFile) Truncate() error { 809 | // file must be closed before truncate on windows 810 | if err := l.Close(); err != nil { 811 | return err 812 | } 813 | 814 | if err := os.Truncate(l.Path, 0); err != nil { 815 | return err 816 | } 817 | return l.open(true) 818 | } 819 | 820 | func (l *LogFile) Prepare(df *DataFile, kf *KeyFile) error { 821 | // Prepare rollback information 822 | lh := LogFileHeader{ 823 | Version: currentVersion, 824 | UID: kf.Header.UID, 825 | AppNum: kf.Header.AppNum, 826 | Salt: kf.Header.Salt, 827 | Pepper: pepper(kf.Header.Salt), 828 | BlockSize: kf.Header.BlockSize, 829 | } 830 | 831 | var err error 832 | lh.DatFileSize, err = df.Size() 833 | if err != nil { 834 | return fmt.Errorf("data file size: %w", err) 835 | } 836 | 837 | lh.KeyFileSize, err = kf.Size() 838 | if err != nil { 839 | return fmt.Errorf("key file size: %w", err) 840 | } 841 | 842 | if err := lh.EncodeTo(l.writer); err != nil { 843 | return fmt.Errorf("write log file header: %w", err) 844 | } 845 | 846 | // Checkpoint 847 | if err := l.Sync(); err != nil { 848 | return fmt.Errorf("sync: %w", err) 849 | } 850 | 851 | return nil 852 | } 853 | 854 | func (l *LogFile) AppendBucket(idx int, b *Bucket) (int64, error) { 855 | var idxBuf [SizeUint64]byte 856 | EncodeUint64(idxBuf[:], uint64(idx)) 857 | n, err := l.writer.Write(idxBuf[:]) 858 | if err == nil && n != len(idxBuf) { 859 | err = io.ErrShortWrite 860 | } 861 | if err != nil { 862 | return int64(n), fmt.Errorf("write index: %w", err) 863 | } 864 | 865 | bn, err := b.WriteTo(l.writer) 866 | if err != nil { 867 | if l.elogger.Enabled() && errors.Is(err, io.ErrShortWrite) { 868 | l.elogger.Info("log file: short write on bucket data", "expected", b.ActualSize(), "wrote", bn) 869 | } 870 | return bn + int64(n), fmt.Errorf("write data: %w", err) 871 | } 872 | 873 | return bn + int64(n), nil 874 | } 875 | 876 | // SectionWriter implements Write on a section of an underlying WriterAt 877 | type SectionWriter struct { 878 | w io.WriterAt 879 | offset int64 880 | limit int64 881 | } 882 | 883 | func NewSectionWriter(w io.WriterAt, offset int64, size int64) *SectionWriter { 884 | return &SectionWriter{ 885 | w: w, 886 | offset: offset, 887 | limit: offset + size, 888 | } 889 | } 890 | 891 | func (s *SectionWriter) Write(v []byte) (int, error) { 892 | size := int64(len(v)) 893 | if size > s.limit-s.offset { 894 | size = s.limit - s.offset 895 | } 896 | 897 | n, err := s.w.WriteAt(v[:size], s.offset) 898 | s.offset += int64(n) 899 | if err == nil && n < len(v) { 900 | err = io.ErrShortWrite 901 | } 902 | return n, err 903 | } 904 | -------------------------------------------------------------------------------- /internal/file_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestCreateKeyFile(t *testing.T) { 9 | tmpdir, err := os.MkdirTemp("", "gonudb.*") 10 | if err != nil { 11 | t.Fatalf("unexpected error creating temp directory: %v", err) 12 | } 13 | defer os.RemoveAll(tmpdir) 14 | 15 | const blockSize = 256 16 | 17 | filename := tmpdir + "key" 18 | err = CreateKeyFile(filename, 121212, 222222, 333333, blockSize, 0.7) 19 | if err != nil { 20 | t.Errorf("CreateKeyFile: unexpected error: %v", err) 21 | } 22 | 23 | st, err := os.Stat(filename) 24 | if err != nil { 25 | if os.IsNotExist(err) { 26 | t.Fatalf("key file was not created") 27 | } 28 | t.Fatalf("Stat: unexpected error: %v", err) 29 | } 30 | 31 | wantSize := int64(KeyFileHeaderSize + blockSize) 32 | if st.Size() != wantSize { 33 | t.Errorf("got size %d, wanted %d", st.Size(), wantSize) 34 | } 35 | 36 | f, err := os.OpenFile(filename, os.O_RDONLY, 0o644) 37 | if err != nil { 38 | t.Fatalf("OpenFile: unexpected error: %v", err) 39 | } 40 | defer f.Close() 41 | 42 | var kh KeyFileHeader 43 | if err := kh.DecodeFrom(f, st.Size()); err != nil { 44 | t.Fatalf("DecodeFrom: unexpected error: %v", err) 45 | } 46 | if err := kh.Verify(); err != nil { 47 | t.Fatalf("Verify: unexpected error: %v", err) 48 | } 49 | 50 | if kh.UID != 121212 { 51 | t.Errorf("got uid %d, wanted %d", kh.UID, 121212) 52 | } 53 | if kh.AppNum != 222222 { 54 | t.Errorf("got appnum %d, wanted %d", kh.AppNum, 222222) 55 | } 56 | if kh.Salt != 333333 { 57 | t.Errorf("got salt %d, wanted %d", kh.Salt, 333333) 58 | } 59 | 60 | blob := make([]byte, blockSize) 61 | if _, err := f.ReadAt(blob, KeyFileHeaderSize); err != nil { 62 | t.Fatalf("ReadAt: unexpected error: %v", err) 63 | } 64 | 65 | for i, b := range blob { 66 | if b != 0 { 67 | t.Fatalf("non zero byte found in bucket blob at %d", i) 68 | } 69 | } 70 | } 71 | 72 | func TestTruncateLogFileWithoutError(t *testing.T) { 73 | tmpdir, err := os.MkdirTemp("", "gonudb.*") 74 | if err != nil { 75 | t.Fatalf("unexpected error creating temp directory: %v", err) 76 | } 77 | defer os.RemoveAll(tmpdir) 78 | 79 | filename := tmpdir + "log" 80 | lf, err := OpenLogFile(filename) 81 | if err != nil { 82 | t.Errorf("OpenLogFile: unexpected error: %v", err) 83 | } 84 | 85 | blob := make([]byte, BucketHeaderSize+BucketEntrySize*2) 86 | b := &Bucket{ 87 | blockSize: len(blob), 88 | blob: blob, 89 | } 90 | 91 | entries := []Entry{ 92 | { 93 | Offset: 15555, 94 | Size: 14444, 95 | Hash: 19999, 96 | }, 97 | { 98 | Offset: 25555, 99 | Size: 24444, 100 | Hash: 29999, 101 | }, 102 | } 103 | 104 | for i := range entries { 105 | b.insert(entries[i].Offset, entries[i].Size, entries[i].Hash) 106 | } 107 | 108 | if _, err := lf.AppendBucket(0, b); err != nil { 109 | t.Errorf("AppendBucket: unexpected error: %v", err) 110 | } 111 | 112 | if err := lf.Flush(); err != nil { 113 | t.Errorf("Flush: unexpected error: %v", err) 114 | } 115 | 116 | if err := lf.Truncate(); err != nil { 117 | t.Fatalf("Truncate: unexpected error: %v", err) 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /internal/format.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | ) 7 | 8 | // Format of the nudb files: 9 | 10 | /* 11 | 12 | Integer sizes 13 | 14 | block_size less than 32 bits (maybe restrict it to 16 bits) 15 | buckets more than 32 bits 16 | capacity (same as bucket index) 17 | file offsets 63 bits 18 | hash up to 64 bits (48 currently) 19 | item index less than 32 bits (index of item in bucket) 20 | modulus (same as buckets) 21 | value size up to 32 bits (or 32-bit builds can't read it) 22 | 23 | */ 24 | 25 | const currentVersion = 2 26 | 27 | const ( 28 | DatFileHeaderSize = SizeUint64 + // Type 29 | SizeUint16 + // Version 30 | SizeUint64 + // UID 31 | SizeUint64 + // Appnum 32 | SizeUint16 + // REMOVED in version 2, was KeySize 33 | 64 // (Reserved) 34 | 35 | KeyFileHeaderSize = 8 + // Type 36 | SizeUint16 + // Version 37 | SizeUint64 + // UID 38 | SizeUint64 + // Appnum 39 | SizeUint16 + // REMOVED in version 2, was KeySize 40 | SizeUint64 + // Salt 41 | SizeUint64 + // Pepper 42 | SizeUint16 + // BlockSize 43 | SizeUint16 + // LoadFactor 44 | 56 // (Reserved) 45 | 46 | LogFileHeaderSize = 8 + // Type 47 | SizeUint16 + // Version 48 | SizeUint64 + // UID 49 | SizeUint64 + // Appnum 50 | SizeUint16 + // REMOVED in version 2, was KeySize 51 | SizeUint64 + // Salt 52 | SizeUint64 + // Pepper 53 | SizeUint16 + // BlockSize 54 | SizeUint64 + // KeyFileSize 55 | SizeUint64 // DataFileSize 56 | 57 | SpillHeaderSize = SizeUint48 + // zero marker 58 | SizeUint16 // size 59 | ) 60 | 61 | var ( 62 | DatFileHeaderType = []byte("gonudbdt") 63 | KeyFileHeaderType = []byte("gonudbky") 64 | LogFileHeaderType = []byte("gonudblg") 65 | ) 66 | 67 | type DatFileHeader struct { 68 | Type [8]byte 69 | Version uint16 70 | UID uint64 71 | AppNum uint64 72 | } 73 | 74 | func (*DatFileHeader) Size() int { 75 | return DatFileHeaderSize 76 | } 77 | 78 | // DecodeFrom reads d from a reader 79 | func (d *DatFileHeader) DecodeFrom(r io.Reader) error { 80 | var data [DatFileHeaderSize]byte 81 | if _, err := io.ReadFull(r, data[:]); err != nil { 82 | return err 83 | } 84 | 85 | copy(d.Type[:], data[0:8]) 86 | d.Version = DecodeUint16(data[8:10]) 87 | d.UID = DecodeUint64(data[10:18]) 88 | d.AppNum = DecodeUint64(data[18:26]) 89 | // data[26:28] is unused, was key size 90 | 91 | return nil 92 | } 93 | 94 | // EncodeTo writes d to a writer 95 | func (d *DatFileHeader) EncodeTo(w io.Writer) error { 96 | var data [DatFileHeaderSize]byte 97 | 98 | copy(data[0:8], DatFileHeaderType) 99 | EncodeUint16(data[8:10], d.Version) 100 | EncodeUint64(data[10:18], d.UID) 101 | EncodeUint64(data[18:26], d.AppNum) 102 | // data[26:28] is unused, was key size 103 | 104 | n, err := w.Write(data[:]) 105 | if err != nil { 106 | return err 107 | } 108 | if n != len(data) { 109 | return io.ErrShortWrite 110 | } 111 | 112 | return nil 113 | } 114 | 115 | // Verify contents of data file header 116 | func (d *DatFileHeader) Verify() error { 117 | if !bytes.Equal(DatFileHeaderType, d.Type[:]) { 118 | return ErrNotDataFile 119 | } 120 | 121 | if d.Version != currentVersion { 122 | return ErrDifferentVersion 123 | } 124 | 125 | return nil 126 | } 127 | 128 | // VerifyMatchingKey makes sure key file and data file headers match 129 | func (d *DatFileHeader) VerifyMatchingKey(k *KeyFileHeader) error { 130 | if k.UID != d.UID { 131 | return ErrUIDMismatch 132 | } 133 | if k.AppNum != d.AppNum { 134 | return ErrAppNumMismatch 135 | } 136 | 137 | return nil 138 | } 139 | 140 | type KeyFileHeader struct { 141 | Type [8]byte 142 | Version uint16 143 | UID uint64 144 | AppNum uint64 145 | 146 | Salt uint64 147 | Pepper uint64 148 | BlockSize uint16 149 | LoadFactor uint16 150 | 151 | // Computed values 152 | Capacity int // Entries per bucket 153 | Buckets int // Number of buckets 154 | Modulus uint64 // pow(2,ceil(log2(buckets))) 155 | } 156 | 157 | func (k *KeyFileHeader) Size() int { 158 | return KeyFileHeaderSize 159 | } 160 | 161 | func (k *KeyFileHeader) DecodeFrom(r io.Reader, fileSize int64) error { 162 | var data [KeyFileHeaderSize]byte 163 | if _, err := io.ReadFull(r, data[:]); err != nil { 164 | return err 165 | } 166 | 167 | copy(k.Type[:], data[0:8]) 168 | k.Version = DecodeUint16(data[8:10]) 169 | k.UID = DecodeUint64(data[10:18]) 170 | k.AppNum = DecodeUint64(data[18:26]) 171 | // data[26:28] is unused, was key size 172 | k.Salt = DecodeUint64(data[28:36]) 173 | k.Pepper = DecodeUint64(data[36:44]) 174 | k.BlockSize = DecodeUint16(data[44:46]) 175 | k.LoadFactor = DecodeUint16(data[46:48]) 176 | 177 | k.Capacity = BucketCapacity(int(k.BlockSize)) 178 | if fileSize > int64(k.BlockSize) { 179 | if k.BlockSize > 0 { 180 | k.Buckets = int((fileSize - int64(KeyFileHeaderSize)) / int64(k.BlockSize)) 181 | } else { 182 | // Corruption or logic error 183 | k.Buckets = 0 184 | } 185 | } else { 186 | k.Buckets = 0 187 | } 188 | 189 | k.Modulus = ceil_pow2(uint64(k.Buckets)) 190 | 191 | return nil 192 | } 193 | 194 | func (k *KeyFileHeader) EncodeTo(w io.Writer) error { 195 | var data [KeyFileHeaderSize]byte 196 | 197 | copy(data[0:8], KeyFileHeaderType) 198 | EncodeUint16(data[8:10], k.Version) 199 | EncodeUint64(data[10:18], k.UID) 200 | EncodeUint64(data[18:26], k.AppNum) 201 | // data[26:28] is unused, was key size 202 | EncodeUint64(data[28:36], k.Salt) 203 | EncodeUint64(data[36:44], k.Pepper) 204 | EncodeUint16(data[44:46], k.BlockSize) 205 | EncodeUint16(data[46:48], k.LoadFactor) 206 | 207 | n, err := w.Write(data[:]) 208 | if err != nil { 209 | return err 210 | } 211 | if n != len(data) { 212 | return io.ErrShortWrite 213 | } 214 | 215 | return nil 216 | } 217 | 218 | // Verify contents of key file header 219 | func (k *KeyFileHeader) Verify() error { 220 | if !bytes.Equal(KeyFileHeaderType, k.Type[:]) { 221 | return ErrNotKeyFile 222 | } 223 | 224 | if k.Version != currentVersion { 225 | return ErrDifferentVersion 226 | } 227 | 228 | if k.Pepper != pepper(k.Salt) { 229 | return ErrHashMismatch 230 | } 231 | 232 | if k.LoadFactor < 1 { 233 | return ErrInvalidLoadFactor 234 | } 235 | if k.Capacity < 1 { 236 | return ErrInvalidCapacity 237 | } 238 | if k.Buckets < 1 { 239 | return ErrInvalidBucketCount 240 | } 241 | 242 | return nil 243 | } 244 | 245 | type LogFileHeader struct { 246 | Type [8]byte 247 | Version uint16 248 | UID uint64 249 | AppNum uint64 250 | Salt uint64 251 | Pepper uint64 252 | BlockSize uint16 253 | KeyFileSize int64 254 | DatFileSize int64 255 | } 256 | 257 | func (l *LogFileHeader) Size() int { 258 | return LogFileHeaderSize 259 | } 260 | 261 | func (l *LogFileHeader) DecodeFrom(r io.Reader) error { 262 | var data [LogFileHeaderSize]byte 263 | if _, err := io.ReadFull(r, data[:]); err != nil { 264 | return err 265 | } 266 | 267 | copy(l.Type[:], data[0:8]) 268 | l.Version = DecodeUint16(data[8:10]) 269 | l.UID = DecodeUint64(data[10:18]) 270 | l.AppNum = DecodeUint64(data[18:26]) 271 | // data[26:28] was KeySize 272 | l.Salt = DecodeUint64(data[28:36]) 273 | l.Pepper = DecodeUint64(data[36:44]) 274 | l.BlockSize = DecodeUint16(data[44:46]) 275 | l.KeyFileSize = int64(DecodeUint64(data[46:54])) 276 | l.DatFileSize = int64(DecodeUint64(data[54:62])) 277 | 278 | return nil 279 | } 280 | 281 | func (l *LogFileHeader) EncodeTo(w io.Writer) error { 282 | var data [LogFileHeaderSize]byte 283 | 284 | copy(data[0:8], LogFileHeaderType) 285 | EncodeUint16(data[8:10], l.Version) 286 | EncodeUint64(data[10:18], l.UID) 287 | EncodeUint64(data[18:26], l.AppNum) 288 | // data[26:28] was KeySize 289 | EncodeUint64(data[28:36], l.Salt) 290 | EncodeUint64(data[36:44], l.Pepper) 291 | EncodeUint16(data[44:46], l.BlockSize) 292 | EncodeUint64(data[46:54], uint64(l.KeyFileSize)) 293 | EncodeUint64(data[54:62], uint64(l.DatFileSize)) 294 | 295 | n, err := w.Write(data[:]) 296 | if err != nil { 297 | return err 298 | } 299 | if n != len(data) { 300 | return io.ErrShortWrite 301 | } 302 | 303 | return nil 304 | } 305 | 306 | type DataRecord struct { 307 | hash uint64 308 | key string 309 | data []byte 310 | offset int64 311 | size int64 312 | } 313 | 314 | // DataRecordHeader is prepended to each record written to the data file. 315 | // Layout is: 316 | // 317 | // 6 bytes DataSize 318 | // 2 bytes KeySize 319 | // n bytes Key 320 | type DataRecordHeader struct { 321 | DataSize int64 322 | KeySize uint16 323 | Key []byte 324 | } 325 | 326 | // IsData reports whether the data record contains data 327 | func (d *DataRecordHeader) IsData() bool { 328 | return d.DataSize != 0 329 | } 330 | 331 | // IsSpill reports whether the data record is a bucket spill 332 | func (d *DataRecordHeader) IsSpill() bool { 333 | return d.DataSize == 0 334 | } 335 | 336 | // Size returns the size of the header in bytes 337 | func (d *DataRecordHeader) Size() int64 { 338 | return SizeUint48 + SizeUint16 + int64(len(d.Key)) 339 | } 340 | 341 | type BucketRecord struct { 342 | idx int 343 | bucket *Bucket 344 | } 345 | 346 | // ceil_pow2 returns the closest power of 2 not less than v 347 | func ceil_pow2(x uint64) uint64 { 348 | t := [6]uint64{ 349 | 0xFFFFFFFF00000000, 350 | 0x00000000FFFF0000, 351 | 0x000000000000FF00, 352 | 0x00000000000000F0, 353 | 0x000000000000000C, 354 | 0x0000000000000002, 355 | } 356 | 357 | var y int 358 | if (x & (x - 1)) != 0 { 359 | y = 1 360 | } 361 | var j int = 32 362 | 363 | for i := 0; i < 6; i++ { 364 | var k int 365 | if (x & t[i]) != 0 { 366 | k = j 367 | } 368 | y += k 369 | x >>= k 370 | j >>= 1 371 | } 372 | 373 | return 1 << y 374 | } 375 | -------------------------------------------------------------------------------- /internal/hasher.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "crypto/rand" 5 | "encoding/binary" 6 | 7 | "github.com/OneOfOne/xxhash" 8 | ) 9 | 10 | type Hasher uint64 11 | 12 | func (h Hasher) Hash(data []byte) uint64 { 13 | return xxhash.Checksum64S(data, uint64(h)) 14 | } 15 | 16 | func (h Hasher) HashString(data string) uint64 { 17 | return xxhash.ChecksumString64S(data, uint64(h)) 18 | } 19 | 20 | // pepper computes pepper from salt 21 | func pepper(salt uint64) uint64 { 22 | var data [8]byte 23 | binary.BigEndian.PutUint64(data[:], salt) 24 | return Hasher(salt).Hash(data[:]) 25 | } 26 | 27 | // NewSalt returns a random salt or panics if the system source of entropy 28 | // cannot be read 29 | func NewSalt() uint64 { 30 | var v uint64 31 | err := binary.Read(rand.Reader, binary.BigEndian, &v) 32 | if err != nil { 33 | panic(err.Error()) 34 | } 35 | return v 36 | } 37 | 38 | // NewUID returns a random identifier or panics if the system source of entropy 39 | // cannot be read 40 | func NewUID() uint64 { 41 | var v uint64 42 | err := binary.Read(rand.Reader, binary.BigEndian, &v) 43 | if err != nil { 44 | panic(err.Error()) 45 | } 46 | return v 47 | } 48 | -------------------------------------------------------------------------------- /internal/pool.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | // Buffers data records in a map 9 | type Pool struct { 10 | mu sync.RWMutex // guards index, records and dataSize 11 | index map[string]int 12 | records []DataRecord 13 | dataSize int 14 | } 15 | 16 | func NewPool(sizeHint int) *Pool { 17 | return &Pool{ 18 | index: make(map[string]int, sizeHint), 19 | records: make([]DataRecord, sizeHint), 20 | } 21 | } 22 | 23 | func (p *Pool) IsEmpty() bool { 24 | p.mu.RLock() 25 | defer p.mu.RUnlock() 26 | return len(p.records) == 0 27 | } 28 | 29 | // Count returns the number of data records in the pool 30 | func (p *Pool) Count() int { 31 | p.mu.RLock() 32 | defer p.mu.RUnlock() 33 | return len(p.records) 34 | } 35 | 36 | // DataSize returns the sum of data sizes in the pool 37 | func (p *Pool) DataSize() int { 38 | p.mu.RLock() 39 | defer p.mu.RUnlock() 40 | return p.dataSize 41 | } 42 | 43 | func (p *Pool) Clear() { 44 | p.mu.Lock() 45 | defer p.mu.Unlock() 46 | p.dataSize = 0 47 | p.records = p.records[:0] 48 | for k := range p.index { 49 | delete(p.index, k) 50 | } 51 | } 52 | 53 | func (p *Pool) Find(key string) ([]byte, bool) { 54 | p.mu.RLock() 55 | defer p.mu.RUnlock() 56 | idx, exists := p.index[key] 57 | if !exists { 58 | return nil, false 59 | } 60 | return p.records[idx].data, true 61 | } 62 | 63 | func (p *Pool) Has(key string) bool { 64 | p.mu.RLock() 65 | defer p.mu.RUnlock() 66 | _, exists := p.index[key] 67 | return exists 68 | } 69 | 70 | func (p *Pool) Insert(hash uint64, key string, value []byte) { 71 | p.mu.Lock() 72 | defer p.mu.Unlock() 73 | 74 | if _, exists := p.index[key]; exists { 75 | panic("duplicate key inserted: " + key) 76 | } 77 | 78 | // TODO: review need to make copy of value 79 | r := DataRecord{ 80 | hash: hash, 81 | key: key, 82 | data: make([]byte, len(value)), 83 | size: int64(len(value)), 84 | } 85 | copy(r.data, value) 86 | 87 | p.records = append(p.records, r) 88 | p.index[key] = len(p.records) - 1 89 | p.dataSize += len(value) 90 | } 91 | 92 | func (p *Pool) WithRecords(fn func([]DataRecord) error) error { 93 | p.mu.RLock() 94 | defer p.mu.RUnlock() 95 | return fn(p.records) 96 | } 97 | 98 | func (p *Pool) WriteRecords(df *DataFile) (int64, error) { 99 | p.mu.RLock() 100 | defer p.mu.RUnlock() 101 | 102 | written := int64(0) 103 | for i := range p.records { 104 | offset, err := df.AppendRecord(&p.records[i]) 105 | if err != nil { 106 | return written, fmt.Errorf("encode record: %w", err) 107 | } 108 | // if s.tlogger.Enabled() { 109 | // s.tlogger.Info("wrote p0 record", "index", i, "offset", offset, "record_key", rs[i].key, "record_size", rs[i].size) 110 | // } 111 | p.records[i].offset = offset 112 | written += p.records[i].size 113 | } 114 | 115 | return written, nil 116 | } 117 | -------------------------------------------------------------------------------- /internal/store.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "math" 9 | "os" 10 | "sync" 11 | "time" 12 | 13 | "github.com/go-logr/logr" 14 | ) 15 | 16 | type Store struct { 17 | // Fields written when open or close is called 18 | df *DataFile 19 | kf *KeyFile 20 | lf *LogFile 21 | 22 | // Currently imu guards all calls to p0 and bc methods 23 | imu sync.Mutex 24 | p0 *Pool 25 | bc *BucketCache 26 | 27 | rmu sync.Mutex // guards acess to rate and when 28 | rate float64 // rate at which data can be flushed 29 | when time.Time 30 | 31 | emu sync.Mutex // guards access to open and err 32 | open bool 33 | err error 34 | 35 | monitor chan struct{} 36 | 37 | elogger logr.Logger // error logger 38 | dlogger logr.Logger // diagnostics logger 39 | tlogger logr.Logger // trace logger 40 | } 41 | 42 | func CreateStore(datPath, keyPath, logPath string, appnum, uid, salt uint64, blockSize int, loadFactor float64) error { 43 | // TODO make this a constant MaxBlockSize 44 | if blockSize > MaxBlockSize { 45 | return ErrInvalidBlockSize 46 | } 47 | 48 | if loadFactor <= 0 || loadFactor >= 1 { 49 | return ErrInvalidLoadFactor 50 | } 51 | 52 | capacity := BucketCapacity(blockSize) 53 | if capacity < 1 { 54 | return ErrInvalidBlockSize 55 | } 56 | 57 | if err := CreateDataFile(datPath, appnum, uid); err != nil { 58 | return fmt.Errorf("create data file: %w", err) 59 | } 60 | 61 | if err := CreateKeyFile(keyPath, uid, appnum, salt, blockSize, loadFactor); err != nil { 62 | return fmt.Errorf("create key file: %w", err) 63 | } 64 | 65 | return nil 66 | } 67 | 68 | func OpenStore(datPath, keyPath, logPath string, syncInterval time.Duration, elogger logr.Logger, dlogger logr.Logger, tlogger logr.Logger) (*Store, error) { 69 | df, err := OpenDataFile(datPath) 70 | if err != nil { 71 | return nil, fmt.Errorf("open data file: %w", err) 72 | } 73 | 74 | abandon := func() { 75 | df.Close() // TODO: handle close error 76 | } 77 | 78 | kf, err := OpenKeyFile(keyPath) 79 | if err != nil { 80 | abandon() 81 | return nil, fmt.Errorf("open key file: %w", err) 82 | } 83 | 84 | abandon = func() { 85 | df.Close() // TODO: handle close error 86 | kf.Close() // TODO: handle close error 87 | } 88 | 89 | if err := df.Header.VerifyMatchingKey(&kf.Header); err != nil { 90 | abandon() 91 | return nil, fmt.Errorf("verify key file matches data file: %w", err) 92 | } 93 | 94 | if kf.Header.Buckets < 1 { 95 | abandon() 96 | return nil, ErrShortKeyFile 97 | } 98 | 99 | lf, err := OpenLogFile(logPath) 100 | if err != nil { 101 | abandon() 102 | 103 | var pathErr *os.PathError 104 | if errors.As(err, &pathErr) && os.IsExist(pathErr) { 105 | return nil, fmt.Errorf("log file exists, store requires recovery") 106 | } 107 | 108 | return nil, fmt.Errorf("open log file: %w", err) 109 | } 110 | 111 | df.elogger = elogger 112 | kf.elogger = elogger 113 | lf.elogger = elogger 114 | 115 | s := &Store{ 116 | when: time.Now(), 117 | df: df, 118 | kf: kf, 119 | lf: lf, 120 | 121 | p0: NewPool(0), 122 | bc: &BucketCache{ 123 | bucketSize: int(kf.Header.BlockSize), 124 | modulus: ceil_pow2(uint64(kf.Header.Buckets)), 125 | buckets: make([]*Bucket, int(kf.Header.Buckets)), 126 | dirty: make([]bool, int(kf.Header.Buckets)), 127 | threshold: (int(kf.Header.LoadFactor) * int(kf.Header.Capacity)) / 65536, 128 | tlogger: tlogger, 129 | }, 130 | 131 | open: true, 132 | monitor: make(chan struct{}), 133 | 134 | elogger: elogger, 135 | dlogger: dlogger, 136 | tlogger: tlogger, 137 | } 138 | 139 | for idx := range s.bc.buckets { 140 | b, err := kf.LoadBucket(idx) 141 | if err != nil { 142 | return nil, fmt.Errorf("read bucket: %w", err) 143 | } 144 | s.bc.buckets[idx] = b 145 | } 146 | s.bc.computeStats(df) 147 | 148 | // Flush writes automatically 149 | go func() { 150 | d := time.NewTicker(syncInterval) 151 | 152 | for { 153 | select { 154 | 155 | case <-s.monitor: 156 | d.Stop() 157 | select { 158 | case <-d.C: 159 | default: 160 | } 161 | return 162 | 163 | case <-d.C: 164 | if s.tlogger.Enabled() { 165 | s.tlogger.Info("Background flush") 166 | } 167 | s.Flush() 168 | 169 | } 170 | } 171 | }() 172 | 173 | return s, nil 174 | } 175 | 176 | func (s *Store) Close() error { 177 | s.emu.Lock() 178 | open := s.open 179 | s.open = false 180 | s.emu.Unlock() 181 | if !open { 182 | return nil 183 | } 184 | 185 | close(s.monitor) 186 | 187 | s.imu.Lock() 188 | defer s.imu.Unlock() 189 | 190 | if !s.p0.IsEmpty() { 191 | if _, err := s.commit(); err != nil { 192 | if s.elogger.Enabled() { 193 | s.elogger.Error(err, "commit") 194 | } 195 | s.setErr(err) 196 | } 197 | } 198 | 199 | // Return if the store is in an error state, such as from a failed flush 200 | if err := s.Err(); err != nil { 201 | return err 202 | } 203 | 204 | if s.lf != nil { 205 | if err := s.lf.Close(); err != nil { 206 | return fmt.Errorf("close log file: %w", err) 207 | } 208 | 209 | if err := os.Remove(s.lf.Path); err != nil { 210 | return fmt.Errorf("delete log file: %w", err) 211 | } 212 | } 213 | 214 | if err := s.kf.Close(); err != nil { 215 | return fmt.Errorf("close key file: %w", err) 216 | } 217 | 218 | if err := s.df.Close(); err != nil { 219 | return fmt.Errorf("close data file: %w", err) 220 | } 221 | 222 | return nil 223 | } 224 | 225 | // Err returns an error if the store is in an error state, nil otherwise 226 | func (s *Store) Err() error { 227 | s.emu.Lock() 228 | defer s.emu.Unlock() 229 | return s.err 230 | } 231 | 232 | func (s *Store) setErr(err error) { 233 | s.emu.Lock() 234 | s.err = err 235 | s.emu.Unlock() 236 | } 237 | 238 | func (s *Store) DataFile() *DataFile { return s.df } 239 | func (s *Store) KeyFile() *KeyFile { return s.kf } 240 | func (s *Store) LogFile() *LogFile { return s.lf } 241 | 242 | func (s *Store) RecordCount() int { 243 | return s.bc.EntryCount() 244 | } 245 | 246 | func (s *Store) Rate() float64 { 247 | s.rmu.Lock() 248 | defer s.rmu.Unlock() 249 | return s.rate 250 | } 251 | 252 | func (s *Store) Insert(key string, data []byte) error { 253 | if s.tlogger.Enabled() { 254 | s.tlogger.Info("Store.Insert", "key", key, "data_len", len(data)) 255 | } 256 | 257 | // Return if the store is in an error state, such as from a failed flush 258 | if err := s.Err(); err != nil { 259 | return err 260 | } 261 | if len(key) == 0 { 262 | return ErrKeyMissing 263 | } else if len(key) > MaxKeySize { 264 | return ErrKeyTooLarge 265 | } else if len(data) == 0 { 266 | return ErrDataMissing 267 | } else if len(data) > MaxDataSize { 268 | return ErrDataTooLarge 269 | } 270 | 271 | s.imu.Lock() 272 | err := s.insert(key, data) 273 | s.imu.Unlock() 274 | 275 | if err != nil { 276 | return err 277 | } 278 | 279 | // Calculate throttling 280 | now := time.Now() 281 | s.rmu.Lock() 282 | elapsed := now.Sub(s.when) 283 | work := s.p0.DataSize() + 3*s.p0.Count()*int(s.kf.Header.BlockSize) // TODO: move this calculation into Pool 284 | rate := math.Ceil(float64(work) / elapsed.Seconds()) 285 | sleep := s.rate > 0 && rate > s.rate 286 | s.rmu.Unlock() 287 | 288 | if s.dlogger.Enabled() { 289 | s.dlogger.Info("insert work rate", "rate", rate, "work", work, "time", elapsed.Seconds(), "throttle", sleep) 290 | } 291 | 292 | // The caller of insert must be blocked when the rate of insertion 293 | // (measured in approximate bytes per second) exceeds the maximum rate 294 | // that can be flushed. The precise sleep duration is not important. 295 | 296 | if sleep { 297 | time.Sleep(25 * time.Millisecond) 298 | } 299 | 300 | return nil 301 | } 302 | 303 | // insert expects caller to hold s.imu lock 304 | func (s *Store) insert(key string, data []byte) error { 305 | h := s.kf.HashString(key) 306 | if s.p0.Has(key) { 307 | return ErrKeyExists 308 | } 309 | 310 | found, err := s.bc.Exists(h, key, s.df) 311 | if err != nil { 312 | return fmt.Errorf("exists in bucket: %w", err) 313 | } 314 | if found { 315 | return ErrKeyExists 316 | } 317 | 318 | // Perform insert 319 | if s.tlogger.Enabled() { 320 | s.tlogger.Info("inserting into pool p1", "key", key, "size", len(data)) 321 | } 322 | s.p0.Insert(h, key, data) 323 | 324 | return nil 325 | } 326 | 327 | func (s *Store) Flush() { 328 | if s.tlogger.Enabled() { 329 | s.tlogger.Info("Store.Flush") 330 | } 331 | 332 | s.rmu.Lock() 333 | s.when = time.Now() 334 | s.rmu.Unlock() 335 | 336 | s.imu.Lock() 337 | defer s.imu.Unlock() 338 | 339 | if s.p0.IsEmpty() { 340 | // Nothing to flush 341 | return 342 | } 343 | 344 | work, err := s.commit() 345 | if err != nil { 346 | if s.elogger.Enabled() { 347 | s.elogger.Error(err, "flush") 348 | } 349 | s.setErr(err) 350 | return 351 | } 352 | 353 | now := time.Now() 354 | s.rmu.Lock() 355 | elapsed := now.Sub(s.when) 356 | s.rate = math.Ceil(float64(work) / elapsed.Seconds()) 357 | s.rmu.Unlock() 358 | 359 | if s.dlogger.Enabled() { 360 | s.dlogger.Info("flush work rate", "rate", s.rate, "work", work, "time", elapsed.Seconds()) 361 | } 362 | } 363 | 364 | // Currently expects s.imu to be held 365 | func (s *Store) commit() (int64, error) { 366 | if s.tlogger.Enabled() { 367 | s.tlogger.Info("Store.commit") 368 | } 369 | 370 | if err := s.lf.Prepare(s.df, s.kf); err != nil { 371 | return 0, fmt.Errorf("prepare log: %w", err) 372 | } 373 | 374 | // Append data and spills to data file 375 | 376 | work, err := s.p0.WriteRecords(s.df) 377 | if err != nil { 378 | return 0, fmt.Errorf("write data file: %w", err) 379 | } 380 | 381 | if err := s.p0.WithRecords(func(rs []DataRecord) error { 382 | for i := range rs { 383 | err := s.bc.Insert(rs[i].offset, rs[i].size, rs[i].hash, s.df) 384 | if err != nil { 385 | return fmt.Errorf("bucket cache insert: %w", err) 386 | } 387 | } 388 | return nil 389 | }); err != nil { 390 | return 0, fmt.Errorf("write to buckets: %w", err) 391 | } 392 | 393 | // Ensure any data written to data file is on disk. 394 | if err := s.df.Flush(); err != nil { 395 | return 0, fmt.Errorf("flush data file: %w", err) 396 | } 397 | // work += int(s.kf.Header.BlockSize) * (2*mutatedBuckets.Count() + newBuckets.Count()) 398 | 399 | s.p0.Clear() 400 | 401 | written, err := s.bc.WriteDirty(s.lf, s.kf) 402 | work += written 403 | if err != nil { 404 | return work, fmt.Errorf("write dirty buckets: %w", err) 405 | } 406 | 407 | // Finalize the commit 408 | if err := s.df.Sync(); err != nil { 409 | return 0, fmt.Errorf("sync data file: %w", err) 410 | } 411 | 412 | if err := s.lf.Truncate(); err != nil { 413 | return 0, fmt.Errorf("trunc log file: %w", err) 414 | } 415 | 416 | if err := s.lf.Sync(); err != nil { 417 | return 0, fmt.Errorf("sync log file: %w", err) 418 | } 419 | 420 | return work, nil 421 | } 422 | 423 | func (s *Store) FetchReader(key string) (io.Reader, error) { 424 | if s.tlogger.Enabled() { 425 | s.tlogger.Info("Store.FetchReader", "key", key) 426 | } 427 | if err := s.Err(); err != nil { 428 | return nil, err 429 | } 430 | 431 | h := s.kf.HashString(key) 432 | 433 | if s.tlogger.Enabled() { 434 | s.tlogger.Info("looking for data in pool p0", "key", key) 435 | } 436 | 437 | s.imu.Lock() 438 | defer s.imu.Unlock() 439 | 440 | if data, exists := s.p0.Find(key); exists { 441 | return bytes.NewReader(data), nil 442 | } 443 | 444 | r, err := s.bc.Fetch(h, key, s.df) 445 | if err != nil { 446 | return nil, fmt.Errorf("read bucket: %w", err) 447 | } 448 | return r, nil 449 | } 450 | 451 | func (s *Store) Exists(key string) (bool, error) { 452 | if s.tlogger.Enabled() { 453 | s.tlogger.Info("Store.Exists", "key", key) 454 | } 455 | if err := s.Err(); err != nil { 456 | return false, err 457 | } 458 | 459 | if s.p0.Has(key) { 460 | return true, nil 461 | } 462 | 463 | h := s.kf.HashString(key) 464 | return s.bc.Exists(h, key, s.df) 465 | } 466 | 467 | func (s *Store) DataSize(key string) (int64, error) { 468 | if s.tlogger.Enabled() { 469 | s.tlogger.Info("Store.DataSize", "key", key) 470 | } 471 | if err := s.Err(); err != nil { 472 | return 0, err 473 | } 474 | 475 | if data, exists := s.p0.Find(key); exists { 476 | return int64(len(data)), nil 477 | } 478 | 479 | h := s.kf.HashString(key) 480 | rh, err := s.bc.FetchHeader(h, key, s.df) 481 | if err != nil { 482 | return 0, fmt.Errorf("fetch header: %w", err) 483 | } 484 | 485 | return rh.DataSize, nil 486 | } 487 | -------------------------------------------------------------------------------- /internal/syscall.go: -------------------------------------------------------------------------------- 1 | //go:build !(linux && amd64) 2 | 3 | package internal 4 | 5 | const ( 6 | FADV_NORMAL = 0x0 7 | FADV_RANDOM = 0x1 8 | FADV_SEQUENTIAL = 0x2 9 | FADV_WILLNEED = 0x3 10 | ) 11 | 12 | func Fadvise(fd int, offset int64, length int64, advice int) error { 13 | // noop on non unix platforms 14 | return nil 15 | } 16 | -------------------------------------------------------------------------------- /internal/syscallunix.go: -------------------------------------------------------------------------------- 1 | //go:build linux && amd64 2 | 3 | package internal 4 | 5 | import ( 6 | "golang.org/x/sys/unix" 7 | ) 8 | 9 | const ( 10 | FADV_NORMAL = 0x0 11 | FADV_RANDOM = 0x1 12 | FADV_SEQUENTIAL = 0x2 13 | FADV_WILLNEED = 0x3 14 | ) 15 | 16 | func Fadvise(fd int, offset int64, length int64, advice int) error { 17 | return unix.Fadvise(fd, offset, length, advice) 18 | } 19 | -------------------------------------------------------------------------------- /internal/verify.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/go-logr/logr" 7 | ) 8 | 9 | // VerifyStore verifies consistency of the data and key files. 10 | func VerifyStore(datPath, keyPath string, logger logr.Logger) (*VerifyResult, error) { 11 | df, err := OpenDataFile(datPath) 12 | if err != nil { 13 | return nil, fmt.Errorf("open data file: %w", err) 14 | } 15 | defer df.Close() 16 | df.elogger = logger 17 | logger.Info("opened data file", "version", df.Header.Version, "uid", df.Header.UID, "appnum", df.Header.AppNum) 18 | 19 | kf, err := OpenKeyFile(keyPath) 20 | if err != nil { 21 | return nil, fmt.Errorf("open key file: %w", err) 22 | } 23 | defer kf.Close() 24 | kf.elogger = logger 25 | logger.Info("opened key file", "version", kf.Header.Version, "uid", kf.Header.UID, "appnum", kf.Header.AppNum, "buckets", kf.Header.Buckets, "block_size", kf.Header.BlockSize, "load_factor", kf.Header.LoadFactor) 26 | 27 | if err := df.Header.VerifyMatchingKey(&kf.Header); err != nil { 28 | return nil, fmt.Errorf("key file and data file have incompatible metadata: %w", err) 29 | } 30 | 31 | kfSize, err := kf.Size() 32 | if err != nil { 33 | return nil, fmt.Errorf("failed to read key file size: %w", err) 34 | } 35 | 36 | if kf.Header.Buckets < 1 { 37 | return nil, fmt.Errorf("possibly corrupt key file: file should contain at least one bucket") 38 | } 39 | 40 | expectedFileSize := int64(KeyFileHeaderSize) + (int64(kf.Header.BlockSize) * int64(kf.Header.Buckets)) 41 | if kfSize != expectedFileSize { 42 | return nil, fmt.Errorf("possibly corrupt key file: file size %d does not match expected %d", kfSize, expectedFileSize) 43 | } 44 | 45 | if kfSize < int64(kf.Header.BlockSize) { 46 | return nil, fmt.Errorf("possibly corrupt key file, file smaller than a single block") 47 | } 48 | 49 | res := &VerifyResult{ 50 | DatPath: datPath, 51 | KeyPath: keyPath, 52 | Version: df.Header.Version, 53 | UID: df.Header.UID, 54 | AppNum: df.Header.AppNum, 55 | Salt: kf.Header.Salt, 56 | Pepper: kf.Header.Pepper, 57 | BlockSize: kf.Header.BlockSize, 58 | LoadFactor: float64(kf.Header.LoadFactor) / float64(MaxUint16), 59 | Capacity: kf.Header.Capacity, 60 | Buckets: kf.Header.Buckets, 61 | Modulus: kf.Header.Modulus, 62 | } 63 | 64 | res.DatFileSize, err = df.Size() 65 | if err != nil { 66 | return nil, fmt.Errorf("reading data file size: %w", err) 67 | } 68 | res.KeyFileSize, err = kf.Size() 69 | if err != nil { 70 | return nil, fmt.Errorf("reading key file size: %w", err) 71 | } 72 | 73 | // Verify records 74 | rs := df.RecordScanner() 75 | defer rs.Close() 76 | totalFetches := 0 77 | for rs.Next() { 78 | res.RecordBytesTotal += rs.RecordSize() 79 | if rs.IsData() { 80 | res.ValueCountTotal++ 81 | res.ValueBytesTotal += rs.Size() 82 | } else { 83 | res.SpillCountTotal++ 84 | res.SpillBytesTotal += rs.Size() 85 | } 86 | 87 | fetches, err := countFetches(rs.Key(), df, kf) 88 | if err != nil { 89 | return nil, fmt.Errorf("counting fetches: %w", err) 90 | } 91 | totalFetches += fetches 92 | 93 | } 94 | if rs.Err() != nil { 95 | return nil, fmt.Errorf("scanning data file: %w", rs.Err()) 96 | } 97 | 98 | if res.DatFileSize != res.RecordBytesTotal+DatFileHeaderSize { 99 | return nil, fmt.Errorf("data file size mismatch: file size is %d, size of records is %d (diff: %d)", res.DatFileSize, res.RecordBytesTotal+DatFileHeaderSize, res.DatFileSize-(res.RecordBytesTotal+DatFileHeaderSize)) 100 | } 101 | 102 | // Verify buckets 103 | bs := kf.BucketScanner(df) 104 | defer bs.Close() 105 | for bs.Next() { 106 | b := bs.Bucket() 107 | res.KeyCount += int64(b.Count()) 108 | if bs.IsSpill() { 109 | res.SpillCountInUse++ 110 | res.SpillBytesInUse += SpillHeaderSize + int64(b.ActualSize()) 111 | res.RecordBytesInUse += SpillHeaderSize + int64(b.ActualSize()) 112 | } 113 | 114 | for i := 0; i < b.Count(); i++ { 115 | e := b.entry(i) 116 | ehdr, err := df.LoadRecordHeader(e.Offset) 117 | if err != nil { 118 | return nil, fmt.Errorf("load record header at offset %d: %w", e.Offset, bs.Err()) 119 | } 120 | 121 | if !ehdr.IsData() { 122 | return nil, fmt.Errorf("record type mismatch at offset %d, key file expects data record", e.Offset) 123 | } 124 | 125 | if ehdr.DataSize != e.Size { 126 | return nil, fmt.Errorf("record size mismatch at offset %d, data file record size %d, key file expects size %d", e.Offset, ehdr.DataSize, e.Size) 127 | } 128 | 129 | hash := kf.Hash(ehdr.Key) 130 | if hash != e.Hash { 131 | return nil, fmt.Errorf("record key hash mismatch at offset %d, data file record hash %d, key file expects hash %d", e.Offset, hash, e.Hash) 132 | } 133 | 134 | res.ValueCountInUse++ 135 | res.ValueBytesInUse += ehdr.DataSize 136 | res.RecordBytesInUse += ehdr.Size() + ehdr.DataSize 137 | } 138 | 139 | } 140 | if bs.Err() != nil { 141 | return nil, fmt.Errorf("scanning key file (index: %d): %w", bs.Index(), bs.Err()) 142 | } 143 | 144 | res.Waste = float64(res.SpillBytesTotal-res.SpillBytesInUse) / float64(res.DatFileSize) 145 | res.ActualLoad = float64(res.KeyCount) / float64(res.Capacity*res.Buckets) 146 | 147 | if res.ValueCountInUse > 0 { 148 | res.Overhead = float64(res.KeyFileSize+res.DatFileSize) / float64(res.RecordBytesTotal) 149 | res.AverageFetch = float64(totalFetches) / float64(res.ValueCountInUse) 150 | } 151 | 152 | return res, nil 153 | } 154 | 155 | func countFetches(key string, df *DataFile, kf *KeyFile) (int, error) { 156 | fetches := 0 157 | h := kf.HashString(key) 158 | 159 | idx := BucketIndex(h, kf.Header.Buckets, kf.Header.Modulus) 160 | tmpb, err := kf.LoadBucket(idx) 161 | if err != nil { 162 | return fetches, fmt.Errorf("read bucket: %w", err) 163 | } 164 | fetches++ 165 | for { 166 | for i := tmpb.lowerBound(h); i < tmpb.count; i++ { 167 | entry := tmpb.entry(i) 168 | if entry.Hash != h { 169 | break 170 | } 171 | 172 | ehdr, err := df.LoadRecordHeader(entry.Offset) 173 | if err != nil { 174 | return fetches, fmt.Errorf("read data record: %w", err) 175 | } 176 | fetches++ 177 | 178 | if string(ehdr.Key) != key { 179 | continue 180 | } 181 | 182 | return fetches, nil 183 | } 184 | 185 | spill := tmpb.Spill() 186 | 187 | if spill == 0 { 188 | break 189 | } 190 | 191 | blockBuf := make([]byte, kf.Header.BlockSize) 192 | tmpb = NewBucket(int(kf.Header.BlockSize), blockBuf) 193 | if err := tmpb.LoadFrom(int64(spill), df); err != nil { 194 | return fetches, fmt.Errorf("read spill: %w", err) 195 | } 196 | fetches++ 197 | 198 | } 199 | 200 | // record not reachable from the key file so don't count it as a fetch 201 | return 0, nil 202 | } 203 | 204 | type VerifyResult struct { 205 | DatPath string // The path to the data file 206 | KeyPath string // The path to the key file 207 | Version uint16 // The API version used to create the database 208 | UID uint64 // The unique identifier 209 | AppNum uint64 // The application-defined constant 210 | Salt uint64 // The salt used in the key file 211 | Pepper uint64 // The salt fingerprint 212 | BlockSize uint16 // The block size used in the key file 213 | LoadFactor float64 // The target load factor used in the key file 214 | 215 | KeyFileSize int64 // The size of the key file in bytes 216 | DatFileSize int64 // The size of the data file in bytes 217 | Capacity int // The maximum number of keys each bucket can hold 218 | Buckets int // The number of buckets in the key file 219 | BucketSize int64 // The size of a bucket in bytes 220 | Modulus uint64 221 | 222 | KeyCount int64 // The number of keys found 223 | ValueCountInUse int64 // The number of values found that are referenced by a key 224 | ValueCountTotal int64 // The number of values found 225 | ValueBytesInUse int64 // The total number of bytes occupied by values that are referenced by a key 226 | ValueBytesTotal int64 // The total number of bytes occupied by values 227 | RecordBytesInUse int64 // The total number of bytes occupied by records (header + value) that are referenced by a key 228 | RecordBytesTotal int64 // The total number of bytes occupied by records (header + value) 229 | SpillCountInUse int64 // The number of spill records in use 230 | SpillCountTotal int64 // The total number of spill records 231 | SpillBytesInUse int64 // The number of bytes occupied by spill records in use 232 | SpillBytesTotal int64 // The number of bytes occupied by all spill records 233 | AverageFetch float64 // Average number of key file reads per fetch 234 | Waste float64 // The fraction of the data file that is wasted 235 | Overhead float64 // The data amplification ratio (size of data files compared to the size of the underlying data and keys) 236 | ActualLoad float64 // The measured bucket load fraction (number of keys as a fraction of the total capacity) 237 | } 238 | -------------------------------------------------------------------------------- /internal/version.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import "regexp" 4 | 5 | var GitVersion string = "unknown" 6 | 7 | var reVersion = regexp.MustCompile(`^(v\d+\.\d+.\d+)(?:-)?(.+)?$`) 8 | 9 | // String formats the version in semver format, see semver.org 10 | func Version() string { 11 | m := reVersion.FindStringSubmatch(GitVersion) 12 | if m == nil || len(m) < 3 { 13 | return "v0.0.0+" + GitVersion 14 | } 15 | 16 | if m[2] == "" { 17 | return m[1] 18 | } 19 | return m[1] + "+" + m[2] 20 | } 21 | -------------------------------------------------------------------------------- /internal/version_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestString(t *testing.T) { 8 | testCases := map[string]string{ 9 | "f176923-dirty": "v0.0.0+f176923-dirty", 10 | "f176923": "v0.0.0+f176923", 11 | "v0.1.3-1-g518f694": "v0.1.3+1-g518f694", 12 | "v0.1.3-1-g518f694-dirty": "v0.1.3+1-g518f694-dirty", 13 | "v0.1.3": "v0.1.3", 14 | "v10.31.93": "v10.31.93", 15 | } 16 | 17 | for v, want := range testCases { 18 | GitVersion = v 19 | if Version() != want { 20 | t.Errorf("got %q, want %q", Version(), want) 21 | } 22 | 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /store.go: -------------------------------------------------------------------------------- 1 | package gonudb 2 | 3 | import ( 4 | "io" 5 | "time" 6 | 7 | "github.com/go-logr/logr" 8 | 9 | "github.com/iand/gonudb/internal" 10 | ) 11 | 12 | func CreateStore(datPath, keyPath, logPath string, appnum, salt uint64, blockSize int, loadFactor float64) error { 13 | return internal.CreateStore(datPath, keyPath, logPath, appnum, internal.NewUID(), salt, blockSize, loadFactor) 14 | } 15 | 16 | func OpenStore(datPath, keyPath, logPath string, options *StoreOptions) (*Store, error) { 17 | if options == nil { 18 | options = &StoreOptions{} 19 | } 20 | options.Logger = logr.Discard() 21 | 22 | if options.BackgroundSyncInterval < time.Second { 23 | options.BackgroundSyncInterval = time.Second 24 | } 25 | 26 | store, err := internal.OpenStore( 27 | datPath, 28 | keyPath, 29 | logPath, 30 | options.BackgroundSyncInterval, 31 | options.Logger, 32 | options.Logger.V(LogLevelDiagnostics), 33 | options.Logger.V(LogLevelTrace), 34 | ) 35 | if err != nil { 36 | return nil, err 37 | } 38 | return &Store{store: store}, nil 39 | } 40 | 41 | type StoreOptions struct { 42 | Logger logr.Logger 43 | BackgroundSyncInterval time.Duration 44 | } 45 | 46 | type Store struct { 47 | store *internal.Store 48 | } 49 | 50 | func (s *Store) Close() error { 51 | return s.store.Close() 52 | } 53 | 54 | // Insert adds a key/value pair to the store. Zero length values are not supported. 55 | func (s *Store) Insert(key string, value []byte) error { 56 | return s.store.Insert(key, value) 57 | } 58 | 59 | func (s *Store) Flush() error { 60 | s.store.Flush() 61 | return s.store.Err() 62 | } 63 | 64 | // Fetch fetches the value associated with key from the store. 65 | func (s *Store) Fetch(key string) ([]byte, error) { 66 | r, err := s.store.FetchReader(key) 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | d, err := io.ReadAll(r) 72 | if err != nil { 73 | return nil, err 74 | } 75 | return d, nil 76 | } 77 | 78 | // Fetch fetches a reader that may be used to read the value associated with a key. 79 | func (s *Store) FetchReader(key string) (io.Reader, error) { 80 | return s.store.FetchReader(key) 81 | } 82 | 83 | // Exists reports whether a data record is associated with a key. 84 | func (s *Store) Exists(key string) (bool, error) { 85 | return s.store.Exists(key) 86 | } 87 | 88 | // DataSize returns the size of the data record associated with a key. 89 | func (s *Store) DataSize(key string) (int64, error) { 90 | return s.store.DataSize(key) 91 | } 92 | 93 | // Err returns an error if the store is in an error state, nil otherwise 94 | func (s *Store) Err() error { 95 | return s.store.Err() 96 | } 97 | 98 | // RecordScanner returns a scanner that may be used to iterate the datastore's values. The caller is responsible 99 | // for calling Close on the scanner after use. 100 | func (s *Store) RecordScanner() *RecordScanner { 101 | return &RecordScanner{scanner: s.store.DataFile().RecordScanner()} 102 | } 103 | 104 | // BucketScanner returns a scanner that may be used to iterate the datastore's index of keys. The caller is responsible 105 | // for calling Close on the scanner after use. 106 | func (s *Store) BucketScanner() *BucketScanner { 107 | return &BucketScanner{ 108 | scanner: s.store.KeyFile().BucketScanner(s.store.DataFile()), 109 | bucket: internal.NewBucket(int(s.store.KeyFile().BlockSize()), make([]byte, int(s.store.KeyFile().BlockSize()))), 110 | } 111 | } 112 | 113 | // Version returns the version number of the store's data format. 114 | func (s *Store) Version() uint16 { 115 | return s.store.DataFile().Header.Version 116 | } 117 | 118 | // AppNum returns the store's unique id that was generated on creation. 119 | func (s *Store) UID() uint64 { 120 | return s.store.DataFile().Header.UID 121 | } 122 | 123 | // AppNum returns the store's application-defined integer constant. 124 | func (s *Store) AppNum() uint64 { 125 | return s.store.DataFile().Header.AppNum 126 | } 127 | 128 | // BlockSize returns the physical size of a key file bucket. 129 | func (s *Store) BlockSize() uint16 { 130 | return s.store.KeyFile().Header.BlockSize 131 | } 132 | 133 | // RecordCount returns the number of data records in the store. 134 | func (s *Store) RecordCount() int { 135 | return s.store.RecordCount() 136 | } 137 | 138 | // Rate returns the data write rate in bytes per second. 139 | func (s *Store) Rate() float64 { 140 | return s.store.Rate() 141 | } 142 | 143 | // RecordScanner implements a sequential scan through a store's data file. Successive calls to the Next method will step through 144 | // the records in the file. Note that the scanner does not include data buffered in memory. Call Flush to ensure all 145 | // written data is visible to the scanner. 146 | type RecordScanner struct { 147 | scanner *internal.RecordScanner 148 | } 149 | 150 | // Next reads the next bucket in sequence, including spills to the data store. It returns false 151 | // if it encounters an error or there are no more buckets to read. 152 | func (s *RecordScanner) Next() bool { 153 | return s.scanner.Next() 154 | } 155 | 156 | // Reader returns an io.Reader that may be used to read the data from the record. Should not be called until Next has been called. 157 | // The Reader is only valid for use until the next call to Next(). 158 | func (s *RecordScanner) Reader() io.Reader { 159 | return s.scanner.Reader() 160 | } 161 | 162 | // IsSpill reports whether the current record is a bucket spill 163 | func (s *RecordScanner) IsSpill() bool { 164 | return s.scanner.IsSpill() 165 | } 166 | 167 | // IsData reports whether the current record is a data record 168 | func (s *RecordScanner) IsData() bool { 169 | return s.scanner.IsData() 170 | } 171 | 172 | // Size returns the size of the current record's data in bytes 173 | func (s *RecordScanner) Size() int64 { 174 | return s.scanner.Size() 175 | } 176 | 177 | // RecordSize returns the number of bytes occupied by the current record including its header 178 | func (s *RecordScanner) RecordSize() int64 { 179 | return s.scanner.RecordSize() 180 | } 181 | 182 | // Size returns the key of the current record 183 | func (s *RecordScanner) Key() string { 184 | return s.scanner.Key() 185 | } 186 | 187 | // Err returns the first non-EOF error that was encountered by the RecordScanner. 188 | func (s *RecordScanner) Err() error { 189 | return s.scanner.Err() 190 | } 191 | 192 | func (s *RecordScanner) Close() error { 193 | return s.scanner.Close() 194 | } 195 | 196 | // BucketScanner implements a sequential scan through a key file. Successive calls to the Next method will step through 197 | // the buckets in the file, including spilled buckets in the data file. 198 | type BucketScanner struct { 199 | scanner *internal.BucketScanner 200 | bucket *internal.Bucket 201 | } 202 | 203 | // Next reads the next bucket in sequence, including spills to the data store. It returns false 204 | // if it encounters an error or there are no more buckets to read. 205 | func (s *BucketScanner) Next() bool { 206 | return s.scanner.Next() 207 | } 208 | 209 | // Index returns the index of the current bucket. Should not be called until Next has been called. Spill buckets 210 | // share an index with their parent. 211 | func (s *BucketScanner) Index() int { 212 | return s.scanner.Index() 213 | } 214 | 215 | // IsSpill reports whether the current bucket was read from a data store spill. 216 | func (s *BucketScanner) IsSpill() bool { 217 | return s.scanner.IsSpill() 218 | } 219 | 220 | // Bucket returns the current bucket. Should not be called until Next has been called. The bucket is backed by data 221 | // that may be overwritten with a call to Next so should not be retained. 222 | func (s *BucketScanner) Bucket() *Bucket { 223 | s.scanner.Bucket().CopyInto(s.bucket) 224 | return &Bucket{bucket: s.bucket} 225 | } 226 | 227 | // Err returns the first non-EOF error that was encountered by the BucketScanner. 228 | func (s *BucketScanner) Err() error { 229 | return s.scanner.Err() 230 | } 231 | 232 | // Close closes the underlying reader used by the scanner. 233 | func (s *BucketScanner) Close() error { 234 | return s.scanner.Close() 235 | } 236 | 237 | // A Bucket contains a set of key entries that form part of the data store's index. 238 | type Bucket struct { 239 | bucket *internal.Bucket 240 | } 241 | 242 | // Has reports whether the bucket contains an entry with the given hash. 243 | func (b *Bucket) Has(h uint64) bool { 244 | return b.bucket.Has(h) 245 | } 246 | 247 | // Count returns the number of key entries in the bucket 248 | func (b *Bucket) Count() int { 249 | return b.bucket.Count() 250 | } 251 | 252 | // ActualSize returns the serialized bucket size, excluding empty space 253 | func (b *Bucket) ActualSize() int { 254 | return b.bucket.ActualSize() 255 | } 256 | 257 | // BlockSize returns the physical size of a key file bucket. 258 | func (b *Bucket) BlockSize() int { 259 | return b.bucket.BlockSize() 260 | } 261 | 262 | // IsEmpty reports whether the bucket has any key entries. 263 | func (b *Bucket) IsEmpty() bool { 264 | return b.bucket.IsEmpty() 265 | } 266 | 267 | // Capacity returns the maximum number of key entries that can be held in the bucket. 268 | func (b *Bucket) Capacity() int { 269 | return b.bucket.Capacity() 270 | } 271 | 272 | // Spill returns offset in the store's data file of next spill record or 0 is there no spill. 273 | func (b *Bucket) Spill() int64 { 274 | return b.bucket.Spill() 275 | } 276 | 277 | // HashRange returns the range of hashed keys that are contained in the bucket. 278 | func (b *Bucket) HashRange() (uint64, uint64) { 279 | return b.bucket.LowestHash(), b.bucket.HighestHash() 280 | } 281 | 282 | // Entry returns the record for a key entry 283 | func (b *Bucket) Entry(idx int) BucketEntry { 284 | // TODO: bounds check 285 | e := b.bucket.Entry(idx) 286 | return BucketEntry{ 287 | Offset: e.Offset, 288 | Size: e.Size, 289 | Hash: e.Hash, 290 | } 291 | } 292 | 293 | type BucketEntry struct { 294 | // Offset is the position in the store's data file of the data record. 295 | Offset int64 296 | 297 | // Size is the size of the data value within the data record. 298 | Size int64 299 | 300 | // Hash is the hashed version of the key used to insert the data value. 301 | Hash uint64 302 | } 303 | 304 | func NewSalt() uint64 { 305 | return internal.NewSalt() 306 | } 307 | 308 | func Version() string { 309 | return internal.Version() 310 | } 311 | 312 | var ( 313 | ErrAppNumMismatch = internal.ErrAppNumMismatch 314 | ErrDataMissing = internal.ErrDataMissing 315 | ErrDataTooLarge = internal.ErrDataTooLarge 316 | ErrDifferentVersion = internal.ErrDifferentVersion 317 | ErrHashMismatch = internal.ErrHashMismatch 318 | ErrInvalidBlockSize = internal.ErrInvalidBlockSize 319 | ErrInvalidBucketCount = internal.ErrInvalidBucketCount 320 | ErrInvalidCapacity = internal.ErrInvalidCapacity 321 | ErrInvalidDataRecord = internal.ErrInvalidDataRecord 322 | ErrInvalidKeySize = internal.ErrInvalidKeySize 323 | ErrInvalidLoadFactor = internal.ErrInvalidLoadFactor 324 | ErrInvalidRecordSize = internal.ErrInvalidRecordSize 325 | ErrInvalidSpill = internal.ErrInvalidSpill 326 | ErrKeyExists = internal.ErrKeyExists 327 | ErrKeyMismatch = internal.ErrKeyMismatch 328 | ErrKeyMissing = internal.ErrKeyMissing 329 | ErrKeyNotFound = internal.ErrKeyNotFound 330 | ErrKeySizeMismatch = internal.ErrKeySizeMismatch 331 | ErrKeyTooLarge = internal.ErrKeyTooLarge 332 | ErrKeyWrongSize = internal.ErrKeyWrongSize // deprecated: use ErrKeyMissing and ErrKeyTooLarge instead 333 | ErrNotDataFile = internal.ErrNotDataFile 334 | ErrNotKeyFile = internal.ErrNotKeyFile 335 | ErrNotLogFile = internal.ErrNotLogFile 336 | ErrShortKeyFile = internal.ErrShortKeyFile 337 | ErrUIDMismatch = internal.ErrUIDMismatch 338 | ) 339 | 340 | const ( 341 | LogLevelDiagnostics = 1 // log level increment for diagnostics logging 342 | LogLevelTrace = 2 // log level increment for verbose tracing 343 | ) 344 | --------------------------------------------------------------------------------