├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── actionlint.yml ├── renovate.json └── workflows │ ├── cd-badger.yml │ ├── ci-badger-bank-tests-nightly.yml │ ├── ci-badger-bank-tests.yml │ ├── ci-badger-tests.yml │ └── ci-dgraph-tests.yml ├── .gitignore ├── .trunk ├── .gitignore ├── configs │ ├── .checkov.yaml │ ├── .markdownlint.json │ ├── .prettierrc │ ├── .shellcheckrc │ ├── .yamllint.yaml │ └── svgo.config.mjs └── trunk.yaml ├── .vscode ├── extensions.json └── settings.json ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── VERSIONING.md ├── backup.go ├── backup_test.go ├── badger ├── .gitignore ├── Makefile ├── cmd │ ├── backup.go │ ├── bank.go │ ├── bench.go │ ├── flatten.go │ ├── info.go │ ├── pick_table_bench.go │ ├── read_bench.go │ ├── restore.go │ ├── root.go │ ├── rotate.go │ ├── rotate_test.go │ ├── stream.go │ └── write_bench.go └── main.go ├── batch.go ├── batch_test.go ├── changes.sh ├── compaction.go ├── db.go ├── db2_test.go ├── db_test.go ├── dir_other.go ├── dir_plan9.go ├── dir_unix.go ├── dir_windows.go ├── discard.go ├── discard_test.go ├── doc.go ├── errors.go ├── fb ├── BlockOffset.go ├── TableIndex.go ├── flatbuffer.fbs ├── gen.sh └── install_flatbuffers.sh ├── go.mod ├── go.sum ├── histogram.go ├── histogram_test.go ├── images ├── benchmarks-rocksdb.png └── diggy-shadow.png ├── integration └── testgc │ ├── .gitignore │ └── main.go ├── iterator.go ├── iterator_test.go ├── key_registry.go ├── key_registry_test.go ├── level_handler.go ├── levels.go ├── levels_test.go ├── logger.go ├── logger_test.go ├── managed_db.go ├── managed_db_test.go ├── manifest.go ├── manifest_test.go ├── memtable.go ├── merge.go ├── merge_test.go ├── metrics_test.go ├── options.go ├── options └── options.go ├── options_test.go ├── pb ├── badgerpb4.pb.go ├── badgerpb4.proto ├── gen.sh └── protos_test.go ├── publisher.go ├── publisher_test.go ├── skl ├── README.md ├── arena.go ├── skl.go └── skl_test.go ├── stream.go ├── stream_test.go ├── stream_writer.go ├── stream_writer_test.go ├── structs.go ├── structs_test.go ├── table ├── README.md ├── builder.go ├── builder_test.go ├── iterator.go ├── merge_iterator.go ├── merge_iterator_test.go ├── table.go └── table_test.go ├── test.sh ├── test_extensions.go ├── trie ├── trie.go └── trie_test.go ├── txn.go ├── txn_test.go ├── util.go ├── value.go ├── value_test.go ├── watermark_edge_test.go └── y ├── bloom.go ├── bloom_test.go ├── checksum.go ├── encrypt.go ├── encrypt_test.go ├── error.go ├── error_test.go ├── event_log.go ├── file_dsync.go ├── file_nodsync.go ├── iterator.go ├── metrics.go ├── watermark.go ├── y.go ├── y_test.go └── zstd.go /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # CODEOWNERS info: https://help.github.com/en/articles/about-code-owners 2 | # Owners are automatically requested for review for PRs that changes code 3 | # that they own. 4 | * @hypermodeinc/database 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "" 5 | labels: bug 6 | assignees: "" 7 | --- 8 | 9 | ## Describe the bug 10 | 11 | A clear and concise description of what the bug is. 12 | 13 | ## To Reproduce 14 | 15 | Steps to reproduce the behavior: 16 | 17 | 1. Go to '...' 18 | 2. Click on '....' 19 | 3. Scroll down to '....' 20 | 4. See error 21 | 22 | ## Expected behavior 23 | 24 | A clear and concise description of what you expected to happen. 25 | 26 | ## Screenshots 27 | 28 | If applicable, add screenshots to help explain your problem. 29 | 30 | ## Environment 31 | 32 | - OS: [e.g. macOS, Windows, Ubuntu] 33 | - Language [e.g. AssemblyScript, Go] 34 | - Version [e.g. v0.xx] 35 | 36 | ## Additional context 37 | 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Badger Community Support 4 | url: https://discord.hypermode.com 5 | about: Please ask and answer questions here 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "" 5 | labels: "" 6 | assignees: "" 7 | --- 8 | 9 | ## Is your feature request related to a problem? Please describe 10 | 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | ## Describe the solution you'd like 14 | 15 | A clear and concise description of what you want to happen. 16 | 17 | ## Describe alternatives you've considered 18 | 19 | A clear and concise description of any alternative solutions or features you've considered. 20 | 21 | ## Additional context 22 | 23 | Add any other context or screenshots about the feature request here. 24 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **Description** 2 | 3 | Please explain the changes you made here. 4 | 5 | **Checklist** 6 | 7 | - [ ] Code compiles correctly and linting passes locally 8 | - [ ] For all _code_ changes, an entry added to the `CHANGELOG.md` file describing and linking to 9 | this PR 10 | - [ ] Tests added for new functionality, or regression tests for bug fixes added as applicable 11 | - [ ] For public APIs, new features, etc., PR on [docs repo](https://github.com/hypermodeinc/docs) 12 | staged and linked here 13 | 14 | **Instructions** 15 | 16 | - The PR title should follow the [Conventional Commits](https://www.conventionalcommits.org/) 17 | syntax, leading with `fix:`, `feat:`, `chore:`, `ci:`, etc. 18 | - The description should briefly explain what the PR is about. In the case of a bugfix, describe or 19 | link to the bug. 20 | - In the checklist section, check the boxes in that are applicable, using `[x]` syntax. 21 | - If not applicable, remove the entire line. Only leave the box unchecked if you intend to come 22 | back and check the box later. 23 | - Delete the `Instructions` line and everything below it, to indicate you have read and are 24 | following these instructions. 🙂 25 | 26 | Thank you for your contribution to Badger! 27 | -------------------------------------------------------------------------------- /.github/actionlint.yml: -------------------------------------------------------------------------------- 1 | self-hosted-runner: 2 | # Labels of self-hosted runner in array of string 3 | labels: 4 | - warp-ubuntu-latest-x64-4x 5 | - warp-ubuntu-latest-arm64-4x 6 | - warp-ubuntu-latest-x64-16x 7 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["local>hypermodeinc/renovate-config"], 4 | "rangeStrategy": "widen" 5 | } 6 | -------------------------------------------------------------------------------- /.github/workflows/cd-badger.yml: -------------------------------------------------------------------------------- 1 | name: cd-badger 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | releasetag: 7 | description: releasetag 8 | required: true 9 | type: string 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | badger-build-amd64: 16 | runs-on: warp-ubuntu-latest-x64-4x 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | ref: "${{ github.event.inputs.releasetag }}" 21 | - name: Set up Go 22 | uses: actions/setup-go@v5 23 | with: 24 | go-version-file: go.mod 25 | - name: Set Badger Release Version 26 | run: | 27 | #!/bin/bash 28 | GIT_TAG_NAME='${{ github.event.inputs.releasetag }}' 29 | if [[ "$GIT_TAG_NAME" == "v"* ]]; 30 | then 31 | echo "this is a release tag" 32 | else 33 | echo "this is NOT a release tag" 34 | exit 1 35 | fi 36 | BADGER_RELEASE_VERSION='${{ github.event.inputs.releasetag }}' 37 | echo "making a new release for "$BADGER_RELEASE_VERSION 38 | echo "BADGER_RELEASE_VERSION=$BADGER_RELEASE_VERSION" >> $GITHUB_ENV 39 | - name: Fetch dependencies 40 | run: sudo apt-get update && sudo apt-get -y install build-essential 41 | - name: Build badger linux/amd64 42 | run: make badger 43 | - name: Generate SHA for Linux Build 44 | run: 45 | cd badger && sha256sum badger-linux-amd64 | cut -c-64 > badger-checksum-linux-amd64.sha256 46 | - name: Tar Archive for Linux Build 47 | run: cd badger && tar -zcvf badger-linux-amd64.tar.gz badger-linux-amd64 48 | - name: Upload Badger Binary Build Artifacts 49 | uses: actions/upload-artifact@v4 50 | with: 51 | name: badger-linux-amd64-${{ github.run_id }}-${{ github.job }} 52 | path: | 53 | badger/badger-checksum-linux-amd64.sha256 54 | badger/badger-linux-amd64.tar.gz 55 | 56 | badger-build-arm64: 57 | runs-on: warp-ubuntu-latest-arm64-4x 58 | steps: 59 | - uses: actions/checkout@v4 60 | with: 61 | ref: "${{ github.event.inputs.releasetag }}" 62 | - name: Set up Go 63 | uses: actions/setup-go@v5 64 | with: 65 | go-version-file: go.mod 66 | - name: Set Badger Release Version 67 | run: | 68 | #!/bin/bash 69 | GIT_TAG_NAME='${{ github.event.inputs.releasetag }}' 70 | if [[ "$GIT_TAG_NAME" == "v"* ]]; 71 | then 72 | echo "this is a release tag" 73 | else 74 | echo "this is NOT a release tag" 75 | exit 1 76 | fi 77 | BADGER_RELEASE_VERSION='${{ github.event.inputs.releasetag }}' 78 | echo "making a new release for "$BADGER_RELEASE_VERSION 79 | echo "BADGER_RELEASE_VERSION=$BADGER_RELEASE_VERSION" >> $GITHUB_ENV 80 | - name: Fetch dependencies 81 | run: sudo apt-get -y install build-essential 82 | - name: Build badger linux/arm64 83 | run: make badger 84 | - name: Generate SHA for Linux Build 85 | run: 86 | cd badger && sha256sum badger-linux-arm64 | cut -c-64 > badger-checksum-linux-arm64.sha256 87 | - name: Tar Archive for Linux Build 88 | run: cd badger && tar -zcvf badger-linux-arm64.tar.gz badger-linux-arm64 89 | - name: List Artifacts 90 | run: ls -al badger/ 91 | - name: Upload Badger Binary Build Artifacts 92 | uses: actions/upload-artifact@v4 93 | with: 94 | name: badger-linux-arm64-${{ github.run_id }}-${{ github.job }} 95 | path: | 96 | badger/badger-checksum-linux-arm64.sha256 97 | badger/badger-linux-arm64.tar.gz 98 | -------------------------------------------------------------------------------- /.github/workflows/ci-badger-bank-tests-nightly.yml: -------------------------------------------------------------------------------- 1 | name: ci-badger-bank-tests-nightly 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - "**.md" 7 | - docs/** 8 | - images/** 9 | branches: 10 | - main 11 | - release/v* 12 | schedule: 13 | - cron: 1 3 * * * 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | badger-bank: 20 | runs-on: warp-ubuntu-latest-x64-4x 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Setup Go 24 | uses: actions/setup-go@v5 25 | with: 26 | go-version-file: go.mod 27 | - name: Install Dependencies 28 | run: make dependency 29 | - name: Install jemalloc 30 | run: make jemalloc 31 | - name: Install Badger 32 | run: cd badger && go install --race --tags=jemalloc . 33 | - name: Run Badger Bank Test 34 | run: | 35 | #!/bin/bash -x 36 | set -o pipefail 37 | # get 16 random bytes from /dev/urandom 38 | hexdump -vn16 -e'4/4 "%08X" 1 "\n"' /dev/urandom > badgerkey16bytes 39 | badger bank test --dir=. --encryption-key "badgerkey16bytes" -d=4h 2>&1 | tee badgerbanktest.log | grep -v 'Moved $5' 40 | if [ $? -ne 0 ]; then 41 | if grep -qi 'data race' badgerbanktest.log; then 42 | echo "Detected data race via grep..." 43 | cat badgerbanktest.log | grep -v 'Moved $5' 44 | else 45 | echo "No data race detected via grep. Assuming txn violation..." 46 | tail -1000 badgerbanktest.log 47 | badger bank disect --dir=. --decryption-key "badgerkey16bytes" 48 | fi 49 | exit 1 50 | fi 51 | echo 'Bank test finished with no issues.' 52 | -------------------------------------------------------------------------------- /.github/workflows/ci-badger-bank-tests.yml: -------------------------------------------------------------------------------- 1 | name: ci-badger-bank-tests 2 | 3 | on: 4 | pull_request: 5 | paths-ignore: 6 | - "**.md" 7 | - docs/** 8 | - images/** 9 | branches: 10 | - main 11 | - release/v* 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | badger-bank: 18 | runs-on: warp-ubuntu-latest-x64-4x 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Setup Go 22 | uses: actions/setup-go@v5 23 | with: 24 | go-version-file: go.mod 25 | - name: Install Dependencies 26 | run: make dependency 27 | - name: Install jemalloc 28 | run: make jemalloc 29 | - name: Install Badger 30 | run: cd badger && go install --race --tags=jemalloc . 31 | - name: Run Badger Bank Test 32 | run: | 33 | #!/bin/bash 34 | mkdir bank && cd bank 35 | badger bank test -v --dir=. -d=20m 36 | -------------------------------------------------------------------------------- /.github/workflows/ci-badger-tests.yml: -------------------------------------------------------------------------------- 1 | name: ci-badger-tests 2 | 3 | on: 4 | pull_request: 5 | paths-ignore: 6 | - "**.md" 7 | - docs/** 8 | - images/** 9 | branches: 10 | - main 11 | - release/v* 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | badger-tests: 18 | runs-on: warp-ubuntu-latest-x64-4x 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Setup Go 22 | uses: actions/setup-go@v5 23 | with: 24 | go-version-file: go.mod 25 | - name: Install Dependencies 26 | run: make dependency 27 | - name: Run Badger Tests 28 | run: make test 29 | -------------------------------------------------------------------------------- /.github/workflows/ci-dgraph-tests.yml: -------------------------------------------------------------------------------- 1 | name: ci-dgraph-tests 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - "**.md" 7 | - docs/** 8 | - images/** 9 | branches: 10 | - main 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | dgraph-tests: 17 | runs-on: warp-ubuntu-latest-x64-16x 18 | steps: 19 | - name: Checkout Dgraph repo 20 | uses: actions/checkout@v4 21 | with: 22 | repository: hypermodeinc/dgraph 23 | ref: main 24 | - name: Set up Go 25 | uses: actions/setup-go@v5 26 | with: 27 | go-version-file: go.mod 28 | - name: Fetch latest Badger version 29 | run: | 30 | go get github.com/dgraph-io/badger/v4@main 31 | - name: Set up Node 32 | uses: actions/setup-node@v4 33 | with: 34 | node-version: 16 35 | - name: Install protobuf-compiler 36 | run: sudo apt update && sudo apt install -y protobuf-compiler 37 | - name: Check protobuf 38 | run: | 39 | cd ./protos 40 | go mod tidy 41 | make regenerate 42 | git diff --exit-code -- . 43 | - name: Make Linux Build and Docker Image 44 | run: make docker-image 45 | - name: Build Test Binary 46 | run: | 47 | #!/bin/bash 48 | # build the test binary 49 | cd t; go build . 50 | - name: Clean Up Environment 51 | run: | 52 | #!/bin/bash 53 | # clean cache 54 | go clean -testcache 55 | # clean up docker containers before test execution 56 | cd t; ./t -r 57 | - name: Run Unit Tests 58 | run: | 59 | #!/bin/bash 60 | # go env settings 61 | export GOPATH=~/go 62 | # move the binary 63 | cp dgraph/dgraph ~/go/bin/dgraph 64 | # run the tests 65 | cd t; ./t 66 | # clean up docker containers after test execution 67 | ./t -r 68 | # sleep 69 | sleep 5 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | badger/badger-* 8 | 9 | # Test binary, build with `go test -c` 10 | *.test 11 | badger-test*/ 12 | 13 | # Output of the go coverage tool 14 | *.out 15 | 16 | #darwin 17 | .DS_Store 18 | 19 | -------------------------------------------------------------------------------- /.trunk/.gitignore: -------------------------------------------------------------------------------- 1 | *out 2 | *logs 3 | *actions 4 | *notifications 5 | *tools 6 | plugins 7 | user_trunk.yaml 8 | user.yaml 9 | tmp 10 | -------------------------------------------------------------------------------- /.trunk/configs/.checkov.yaml: -------------------------------------------------------------------------------- 1 | skip-check: 2 | - CKV_GHA_7 3 | -------------------------------------------------------------------------------- /.trunk/configs/.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "line-length": { "line_length": 150, "tables": false }, 3 | "no-inline-html": false, 4 | "no-bare-urls": false, 5 | "no-space-in-emphasis": false, 6 | "no-emphasis-as-heading": false, 7 | "first-line-heading": false 8 | } 9 | -------------------------------------------------------------------------------- /.trunk/configs/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "proseWrap": "always", 4 | "printWidth": 100 5 | } 6 | -------------------------------------------------------------------------------- /.trunk/configs/.shellcheckrc: -------------------------------------------------------------------------------- 1 | enable=all 2 | source-path=SCRIPTDIR 3 | disable=SC2154 4 | 5 | # If you're having issues with shellcheck following source, disable the errors via: 6 | # disable=SC1090 7 | # disable=SC1091 8 | -------------------------------------------------------------------------------- /.trunk/configs/.yamllint.yaml: -------------------------------------------------------------------------------- 1 | rules: 2 | quoted-strings: 3 | required: only-when-needed 4 | extra-allowed: ["{|}"] 5 | key-duplicates: {} 6 | octal-values: 7 | forbid-implicit-octal: true 8 | -------------------------------------------------------------------------------- /.trunk/configs/svgo.config.mjs: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: [ 3 | { 4 | name: "preset-default", 5 | params: { 6 | overrides: { 7 | removeViewBox: false, // https://github.com/svg/svgo/issues/1128 8 | sortAttrs: true, 9 | removeOffCanvasPaths: true, 10 | }, 11 | }, 12 | }, 13 | ], 14 | } 15 | -------------------------------------------------------------------------------- /.trunk/trunk.yaml: -------------------------------------------------------------------------------- 1 | # This file controls the behavior of Trunk: https://docs.trunk.io/cli 2 | # To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml 3 | version: 0.1 4 | 5 | cli: 6 | version: 1.22.10 7 | 8 | # Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins) 9 | plugins: 10 | sources: 11 | - id: trunk 12 | ref: v1.6.7 13 | uri: https://github.com/trunk-io/plugins 14 | 15 | # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes) 16 | runtimes: 17 | enabled: 18 | - go@1.23.5 19 | - node@18.20.5 20 | - python@3.10.8 21 | 22 | # This is the section where you manage your linters. (https://docs.trunk.io/check/configuration) 23 | lint: 24 | ignore: 25 | - linters: [ALL] 26 | paths: 27 | - "*.pb.go" 28 | enabled: 29 | - trivy@0.59.1 30 | - renovate@39.169.3 31 | - actionlint@1.7.7 32 | - checkov@3.2.369 33 | - git-diff-check 34 | - gofmt@1.20.4 35 | - golangci-lint@1.63.4 36 | - markdownlint@0.44.0 37 | - osv-scanner@1.9.2 38 | - oxipng@9.1.3 39 | - prettier@3.5.0 40 | - shellcheck@0.10.0 41 | - shfmt@3.6.0 42 | - svgo@3.3.2 43 | - taplo@0.9.3 44 | - trufflehog@3.88.7 45 | - yamllint@1.35.1 46 | actions: 47 | enabled: 48 | - trunk-announce 49 | - trunk-check-pre-push 50 | - trunk-fmt-pre-commit 51 | - trunk-upgrade-available 52 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": ["trunk.io"] 3 | } 4 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | "editor.defaultFormatter": "trunk.io", 4 | "editor.trimAutoWhitespace": true, 5 | "trunk.autoInit": false 6 | } 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our community a 6 | harassment-free experience for everyone, regardless of age, body size, visible or invisible 7 | disability, ethnicity, sex characteristics, gender identity and expression, level of experience, 8 | education, socio-economic status, nationality, personal appearance, race, religion, or sexual 9 | identity and orientation. 10 | 11 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and 12 | healthy community. 13 | 14 | ## Our Standards 15 | 16 | Examples of behavior that contributes to a positive environment for our community include: 17 | 18 | - Demonstrating empathy and kindness toward other people 19 | - Being respectful of differing opinions, viewpoints, and experiences 20 | - Giving and gracefully accepting constructive feedback 21 | - Accepting responsibility and apologizing to those affected by our mistakes, and learning from the 22 | experience 23 | - Focusing on what is best not just for us as individuals, but for the overall community 24 | 25 | Examples of unacceptable behavior include: 26 | 27 | - The use of sexualized language or imagery, and sexual attention or advances of any kind 28 | - Trolling, insulting or derogatory comments, and personal or political attacks 29 | - Public or private harassment 30 | - Publishing others' private information, such as a physical or email address, without their 31 | explicit permission 32 | - Other conduct which could reasonably be considered inappropriate in a professional setting 33 | 34 | ## Enforcement Responsibilities 35 | 36 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior 37 | and will take appropriate and fair corrective action in response to any behavior that they deem 38 | inappropriate, threatening, offensive, or harmful. 39 | 40 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, 41 | code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and 42 | will communicate reasons for moderation decisions when appropriate. 43 | 44 | ## Scope 45 | 46 | This Code of Conduct applies within all community spaces, and also applies when an individual is 47 | officially representing the community in public spaces. Examples of representing our community 48 | include using an official e-mail address, posting via an official social media account, or acting as 49 | an appointed representative at an online or offline event. 50 | 51 | ## Enforcement 52 | 53 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community 54 | leaders responsible for enforcement at hello@hypermode.com. All complaints will be reviewed and 55 | investigated promptly and fairly. 56 | 57 | All community leaders are obligated to respect the privacy and security of the reporter of any 58 | incident. 59 | 60 | ## Enforcement Guidelines 61 | 62 | Community leaders will follow these Community Impact Guidelines in determining the consequences for 63 | any action they deem in violation of this Code of Conduct: 64 | 65 | ### 1. Correction 66 | 67 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or 68 | unwelcome in the community. 69 | 70 | **Consequence**: A private, written warning from community leaders, providing clarity around the 71 | nature of the violation and an explanation of why the behavior was inappropriate. A public apology 72 | may be requested. 73 | 74 | ### 2. Warning 75 | 76 | **Community Impact**: A violation through a single incident or series of actions. 77 | 78 | **Consequence**: A warning with consequences for continued behavior. No interaction with the people 79 | involved, including unsolicited interaction with those enforcing the Code of Conduct, for a 80 | specified period of time. This includes avoiding interactions in community spaces as well as 81 | external channels like social media. Violating these terms may lead to a temporary or permanent ban. 82 | 83 | ### 3. Temporary Ban 84 | 85 | **Community Impact**: A serious violation of community standards, including sustained inappropriate 86 | behavior. 87 | 88 | **Consequence**: A temporary ban from any sort of interaction or public communication with the 89 | community for a specified period of time. No public or private interaction with the people involved, 90 | including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this 91 | period. Violating these terms may lead to a permanent ban. 92 | 93 | ### 4. Permanent Ban 94 | 95 | **Community Impact**: Demonstrating a pattern of violation of community standards, including 96 | sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement 97 | of classes of individuals. 98 | 99 | **Consequence**: A permanent ban from any sort of public interaction within the community. 100 | 101 | ## Attribution 102 | 103 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at 104 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 105 | 106 | Community Impact Guidelines were inspired by 107 | [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). 108 | 109 | [homepage]: https://www.contributor-covenant.org 110 | 111 | For answers to common questions about this code of conduct, see the FAQ at 112 | https://www.contributor-covenant.org/faq. Translations are available at 113 | https://www.contributor-covenant.org/translations. 114 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guide 2 | 3 | - [Before you get started](#before-you-get-started) 4 | - [Code of Conduct](#code-of-conduct) 5 | - [Your First Contribution](#your-first-contribution) 6 | - [Find a good first topic](#find-a-good-first-topic) 7 | - [Setting up your development environment](#setting-up-your-development-environment) 8 | - [Fork the project](#fork-the-project) 9 | - [Clone the project](#clone-the-project) 10 | - [New branch for a new code](#new-branch-for-a-new-code) 11 | - [Test](#test) 12 | - [Commit and push](#commit-and-push) 13 | - [Create a Pull Request](#create-a-pull-request) 14 | - [Sign the CLA](#sign-the-cla) 15 | - [Get a code review](#get-a-code-review) 16 | 17 | ## Before you get started 18 | 19 | ### Code of Conduct 20 | 21 | Please make sure to read and observe our [Code of Conduct](./CODE_OF_CONDUCT.md). 22 | 23 | ## Your First Contribution 24 | 25 | ### Find a good first topic 26 | 27 | You can start by finding an existing issue with the 28 | [good first issue](https://github.com/hypermodeinc/badger/labels/good%20first%20issue) or 29 | [help wanted](https://github.com/hypermodeinc/badger/labels/help%20wanted) labels. These issues are 30 | well suited for new contributors. 31 | 32 | ## Setting up your development environment 33 | 34 | Badger uses [`Go Modules`](https://github.com/golang/go/wiki/Modules) to manage dependencies. The 35 | version of Go should be **1.12** or above. 36 | 37 | ### Fork the project 38 | 39 | - Visit https://github.com/hypermodeinc/badger 40 | - Click the `Fork` button (top right) to create a fork of the repository 41 | 42 | ### Clone the project 43 | 44 | ```sh 45 | git clone https://github.com/$GITHUB_USER/badger 46 | cd badger 47 | git remote add upstream git@github.com:hypermodeinc/badger.git 48 | 49 | # Never push to the upstream master 50 | git remote set-url --push upstream no_push 51 | ``` 52 | 53 | ### New branch for a new code 54 | 55 | Get your local master up to date: 56 | 57 | ```sh 58 | git fetch upstream 59 | git checkout master 60 | git rebase upstream/master 61 | ``` 62 | 63 | Create a new branch from the master: 64 | 65 | ```sh 66 | git checkout -b my_new_feature 67 | ``` 68 | 69 | And now you can finally add your changes to project. 70 | 71 | ### Test 72 | 73 | Build and run all tests: 74 | 75 | ```sh 76 | ./test.sh 77 | ``` 78 | 79 | ### Commit and push 80 | 81 | Commit your changes: 82 | 83 | ```sh 84 | git commit 85 | ``` 86 | 87 | When the changes are ready to review: 88 | 89 | ```sh 90 | git push origin my_new_feature 91 | ``` 92 | 93 | ### Create a Pull Request 94 | 95 | Just open `https://github.com/$GITHUB_USER/badger/pull/new/my_new_feature` and fill the PR 96 | description. 97 | 98 | ### Sign the CLA 99 | 100 | Click the **Sign in with Github to agree** button to sign the CLA. 101 | [An example](https://cla-assistant.io/hypermodeinc/badger?pullRequest=1377). 102 | 103 | ### Get a code review 104 | 105 | If your pull request (PR) is opened, it will be assigned to one or more reviewers. Those reviewers 106 | will do a code review. 107 | 108 | To address review comments, you should commit the changes to the same branch of the PR on your fork. 109 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # SPDX-FileCopyrightText: © Hypermode Inc. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | USER_ID = $(shell id -u) 7 | HAS_JEMALLOC = $(shell test -f /usr/local/lib/libjemalloc.a && echo "jemalloc") 8 | JEMALLOC_URL = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2" 9 | 10 | 11 | .PHONY: all badger test jemalloc dependency 12 | 13 | badger: jemalloc 14 | @echo "Compiling Badger binary..." 15 | @$(MAKE) -C badger badger 16 | @echo "Badger binary located in badger directory." 17 | 18 | test: jemalloc 19 | @echo "Running Badger tests..." 20 | @./test.sh 21 | 22 | jemalloc: 23 | @if [ -z "$(HAS_JEMALLOC)" ] ; then \ 24 | mkdir -p /tmp/jemalloc-temp && cd /tmp/jemalloc-temp ; \ 25 | echo "Downloading jemalloc..." ; \ 26 | curl -s -L ${JEMALLOC_URL} -o jemalloc.tar.bz2 ; \ 27 | tar xjf ./jemalloc.tar.bz2 ; \ 28 | cd jemalloc-5.3.0 ; \ 29 | ./configure --with-jemalloc-prefix='je_' --with-malloc-conf='background_thread:true,metadata_thp:auto'; \ 30 | make ; \ 31 | if [ "$(USER_ID)" -eq "0" ]; then \ 32 | make install ; \ 33 | else \ 34 | echo "==== Need sudo access to install jemalloc" ; \ 35 | sudo make install ; \ 36 | fi \ 37 | fi 38 | 39 | dependency: 40 | @echo "Installing dependencies..." 41 | @sudo apt-get update 42 | @sudo apt-get -y install \ 43 | ca-certificates \ 44 | curl \ 45 | gnupg \ 46 | lsb-release \ 47 | build-essential \ 48 | protobuf-compiler \ 49 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting Security Concerns 2 | 3 | We take the security of Badger very seriously. If you believe you have found a security 4 | vulnerability in Badger, we encourage you to let us know right away. 5 | 6 | We will investigate all legitimate reports and do our best to quickly fix the problem. Please report 7 | any issues or vulnerabilities via GitHub Security Advisories instead of posting a public issue in 8 | GitHub. You can also send security communications to security@hypermode.com. 9 | 10 | Please include the version identifier and details on how the vulnerability can be exploited. 11 | -------------------------------------------------------------------------------- /VERSIONING.md: -------------------------------------------------------------------------------- 1 | # Serialization Versioning: Semantic Versioning for databases 2 | 3 | Semantic Versioning, commonly known as SemVer, is a great idea that has been very widely adopted as 4 | a way to decide how to name software versions. The whole concept is very well summarized on 5 | semver.org with the following lines: 6 | 7 | > Given a version number MAJOR.MINOR.PATCH, increment the: 8 | > 9 | > 1. MAJOR version when you make incompatible API changes, 10 | > 2. MINOR version when you add functionality in a backwards-compatible manner, and 11 | > 3. PATCH version when you make backwards-compatible bug fixes. 12 | > 13 | > Additional labels for pre-release and build metadata are available as extensions to the 14 | > MAJOR.MINOR.PATCH format. 15 | 16 | Unfortunately, API changes are not the most important changes for libraries that serialize data for 17 | later consumption. For these libraries, such as BadgerDB, changes to the API are much easier to 18 | handle than change to the data format used to store data on disk. 19 | 20 | ## Serialization Version specification 21 | 22 | Serialization Versioning, like Semantic Versioning, uses 3 numbers and also calls them 23 | MAJOR.MINOR.PATCH, but the semantics of the numbers are slightly modified: 24 | 25 | Given a version number MAJOR.MINOR.PATCH, increment the: 26 | 27 | - MAJOR version when you make changes that require a transformation of the dataset before it can be 28 | used again. 29 | - MINOR version when old datasets are still readable but the API might have changed in 30 | backwards-compatible or incompatible ways. 31 | - PATCH version when you make backwards-compatible bug fixes. 32 | 33 | Additional labels for pre-release and build metadata are available as extensions to the 34 | MAJOR.MINOR.PATCH format. 35 | 36 | Following this naming strategy, migration from v1.x to v2.x requires a migration strategy for your 37 | existing dataset, and as such has to be carefully planned. Migrations in between different minor 38 | versions (e.g. v1.5.x and v1.6.x) might break your build, as the API _might_ have changed, but once 39 | your code compiles there's no need for any data migration. Lastly, changes in between two different 40 | patch versions should never break your build or dataset. 41 | 42 | For more background on our decision to adopt Serialization Versioning, read the blog post [Semantic 43 | Versioning, Go Modules, and Databases][blog] and the original proposal on [this comment on Dgraph's 44 | Discuss forum][discuss]. 45 | 46 | [blog]: https://open.dgraph.io/post/serialization-versioning/ 47 | [discuss]: https://discuss.dgraph.io/t/go-modules-on-badger-and-dgraph/4662/7 48 | -------------------------------------------------------------------------------- /badger/.gitignore: -------------------------------------------------------------------------------- 1 | /badger 2 | -------------------------------------------------------------------------------- /badger/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # SPDX-FileCopyrightText: © Hypermode Inc. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | GOOS ?= $(shell go env GOOS) 7 | GOARCH ?= $(shell go env GOARCH) 8 | 9 | .PHONY: badger 10 | 11 | all: badger 12 | 13 | badger: 14 | # build badger binary 15 | @go build --tags=jemalloc -o badger-$(GOOS)-$(GOARCH) . 16 | -------------------------------------------------------------------------------- /badger/cmd/backup.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "bufio" 10 | "math" 11 | "os" 12 | 13 | "github.com/spf13/cobra" 14 | 15 | "github.com/dgraph-io/badger/v4" 16 | ) 17 | 18 | var bo = struct { 19 | backupFile string 20 | numVersions int 21 | }{} 22 | 23 | // backupCmd represents the backup command 24 | var backupCmd = &cobra.Command{ 25 | Use: "backup", 26 | Short: "Backup Badger database.", 27 | Long: `Backup Badger database to a file in a version-agnostic manner. 28 | 29 | Iterates over each key-value pair, encodes it along with its metadata and 30 | version in protocol buffers and writes them to a file. This file can later be 31 | used by the restore command to create an identical copy of the 32 | database.`, 33 | RunE: doBackup, 34 | } 35 | 36 | func init() { 37 | RootCmd.AddCommand(backupCmd) 38 | backupCmd.Flags().StringVarP(&bo.backupFile, "backup-file", "f", 39 | "badger.bak", "File to backup to") 40 | backupCmd.Flags().IntVarP(&bo.numVersions, "num-versions", "n", 41 | 0, "Number of versions to keep. A value <= 0 means keep all versions.") 42 | } 43 | 44 | func doBackup(cmd *cobra.Command, args []string) error { 45 | opt := badger.DefaultOptions(sstDir). 46 | WithValueDir(vlogDir). 47 | WithNumVersionsToKeep(math.MaxInt32) 48 | 49 | if bo.numVersions > 0 { 50 | opt.NumVersionsToKeep = bo.numVersions 51 | } 52 | 53 | // Open DB 54 | db, err := badger.Open(opt) 55 | if err != nil { 56 | return err 57 | } 58 | defer db.Close() 59 | 60 | // Create File 61 | f, err := os.Create(bo.backupFile) 62 | if err != nil { 63 | return err 64 | } 65 | 66 | bw := bufio.NewWriterSize(f, 64<<20) 67 | if _, err = db.Backup(bw, 0); err != nil { 68 | return err 69 | } 70 | 71 | if err = bw.Flush(); err != nil { 72 | return err 73 | } 74 | 75 | if err = f.Sync(); err != nil { 76 | return err 77 | } 78 | 79 | return f.Close() 80 | } 81 | -------------------------------------------------------------------------------- /badger/cmd/bench.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var benchCmd = &cobra.Command{ 13 | Use: "benchmark", 14 | Short: "Benchmark Badger database.", 15 | Long: `This command will benchmark Badger for different usecases. 16 | Useful for testing and performance analysis.`, 17 | } 18 | 19 | func init() { 20 | RootCmd.AddCommand(benchCmd) 21 | } 22 | -------------------------------------------------------------------------------- /badger/cmd/flatten.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "math" 12 | 13 | "github.com/spf13/cobra" 14 | 15 | "github.com/dgraph-io/badger/v4" 16 | "github.com/dgraph-io/badger/v4/options" 17 | ) 18 | 19 | var flattenCmd = &cobra.Command{ 20 | Use: "flatten", 21 | Short: "Flatten the LSM tree.", 22 | Long: ` 23 | This command would compact all the LSM tables into one level. 24 | `, 25 | RunE: flatten, 26 | } 27 | 28 | var fo = struct { 29 | keyPath string 30 | numWorkers int 31 | numVersions int 32 | compressionType uint32 33 | }{} 34 | 35 | func init() { 36 | RootCmd.AddCommand(flattenCmd) 37 | flattenCmd.Flags().IntVarP(&fo.numWorkers, "num-workers", "w", 1, 38 | "Number of concurrent compactors to run. More compactors would use more"+ 39 | " server resources to potentially achieve faster compactions.") 40 | flattenCmd.Flags().IntVarP(&fo.numVersions, "num_versions", "", 0, 41 | "Option to configure the maximum number of versions per key. "+ 42 | "Values <= 0 will be considered to have the max number of versions.") 43 | flattenCmd.Flags().StringVar(&fo.keyPath, "encryption-key-file", "", 44 | "Path of the encryption key file.") 45 | flattenCmd.Flags().Uint32VarP(&fo.compressionType, "compression", "", 1, 46 | "Option to configure the compression type in output DB. "+ 47 | "0 to disable, 1 for Snappy, and 2 for ZSTD.") 48 | } 49 | 50 | func flatten(cmd *cobra.Command, args []string) error { 51 | if fo.numVersions <= 0 { 52 | // Keep all versions. 53 | fo.numVersions = math.MaxInt32 54 | } 55 | encKey, err := getKey(fo.keyPath) 56 | if err != nil { 57 | return err 58 | } 59 | if fo.compressionType > 2 { 60 | return errors.New( 61 | "compression value must be one of 0 (disabled), 1 (Snappy), or 2 (ZSTD)") 62 | } 63 | opt := badger.DefaultOptions(sstDir). 64 | WithValueDir(vlogDir). 65 | WithNumVersionsToKeep(fo.numVersions). 66 | WithNumCompactors(0). 67 | WithBlockCacheSize(100 << 20). 68 | WithIndexCacheSize(200 << 20). 69 | WithCompression(options.CompressionType(fo.compressionType)). 70 | WithEncryptionKey(encKey) 71 | fmt.Printf("Opening badger with options = %+v\n", opt) 72 | db, err := badger.Open(opt) 73 | if err != nil { 74 | return err 75 | } 76 | defer db.Close() 77 | 78 | return db.Flatten(fo.numWorkers) 79 | } 80 | -------------------------------------------------------------------------------- /badger/cmd/restore.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "errors" 10 | "math" 11 | "os" 12 | "path/filepath" 13 | 14 | "github.com/spf13/cobra" 15 | 16 | "github.com/dgraph-io/badger/v4" 17 | ) 18 | 19 | var restoreFile string 20 | var maxPendingWrites int 21 | 22 | // restoreCmd represents the restore command 23 | var restoreCmd = &cobra.Command{ 24 | Use: "restore", 25 | Short: "Restore Badger database.", 26 | Long: `Restore Badger database from a file. 27 | 28 | It reads a file generated using the backup command (or by calling the 29 | DB.Backup() API method) and writes each key-value pair found in the file to 30 | the Badger database. 31 | 32 | Restore creates a new database, and currently does not work on an already 33 | existing database.`, 34 | RunE: doRestore, 35 | } 36 | 37 | func init() { 38 | RootCmd.AddCommand(restoreCmd) 39 | restoreCmd.Flags().StringVarP(&restoreFile, "backup-file", "f", 40 | "badger.bak", "File to restore from") 41 | // Default value for maxPendingWrites is 256, to minimise memory usage 42 | // and overall finish time. 43 | restoreCmd.Flags().IntVarP(&maxPendingWrites, "max-pending-writes", "w", 44 | 256, "Max number of pending writes at any time while restore") 45 | } 46 | 47 | func doRestore(cmd *cobra.Command, args []string) error { 48 | // Check if the DB already exists 49 | manifestFile := filepath.Join(sstDir, badger.ManifestFilename) 50 | if _, err := os.Stat(manifestFile); err == nil { // No error. File already exists. 51 | return errors.New("Cannot restore to an already existing database") 52 | } else if os.IsNotExist(err) { 53 | // pass 54 | } else { // Return an error if anything other than the error above 55 | return err 56 | } 57 | 58 | // Open DB 59 | db, err := badger.Open(badger.DefaultOptions(sstDir). 60 | WithValueDir(vlogDir). 61 | WithNumVersionsToKeep(math.MaxInt32)) 62 | if err != nil { 63 | return err 64 | } 65 | defer db.Close() 66 | 67 | // Open File 68 | f, err := os.Open(restoreFile) 69 | if err != nil { 70 | return err 71 | } 72 | defer f.Close() 73 | 74 | // Run restore 75 | return db.Load(f, maxPendingWrites) 76 | } 77 | -------------------------------------------------------------------------------- /badger/cmd/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "os" 12 | "strings" 13 | 14 | "github.com/spf13/cobra" 15 | ) 16 | 17 | var sstDir, vlogDir string 18 | 19 | // RootCmd represents the base command when called without any subcommands 20 | var RootCmd = &cobra.Command{ 21 | Use: "badger", 22 | Short: "Tools to manage Badger database.", 23 | PersistentPreRunE: validateRootCmdArgs, 24 | } 25 | 26 | // Execute adds all child commands to the root command and sets flags appropriately. 27 | // This is called by main.main(). It only needs to happen once to the rootCmd. 28 | func Execute() { 29 | if err := RootCmd.Execute(); err != nil { 30 | fmt.Println(err) 31 | os.Exit(1) 32 | } 33 | } 34 | 35 | func init() { 36 | RootCmd.PersistentFlags().StringVar(&sstDir, "dir", "", 37 | "Directory where the LSM tree files are located. (required)") 38 | 39 | RootCmd.PersistentFlags().StringVar(&vlogDir, "vlog-dir", "", 40 | "Directory where the value log files are located, if different from --dir") 41 | } 42 | 43 | func validateRootCmdArgs(cmd *cobra.Command, args []string) error { 44 | if strings.HasPrefix(cmd.Use, "help ") { // No need to validate if it is help 45 | return nil 46 | } 47 | if sstDir == "" { 48 | return errors.New("--dir not specified") 49 | } 50 | if vlogDir == "" { 51 | vlogDir = sstDir 52 | } 53 | return nil 54 | } 55 | -------------------------------------------------------------------------------- /badger/cmd/rotate.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "io" 10 | "os" 11 | "time" 12 | 13 | "github.com/spf13/cobra" 14 | 15 | "github.com/dgraph-io/badger/v4" 16 | ) 17 | 18 | var oldKeyPath string 19 | var newKeyPath string 20 | var rotateCmd = &cobra.Command{ 21 | Use: "rotate", 22 | Short: "Rotate encryption key.", 23 | Long: "Rotate will rotate the old key with new encryption key.", 24 | RunE: doRotate, 25 | } 26 | 27 | func init() { 28 | RootCmd.AddCommand(rotateCmd) 29 | rotateCmd.Flags().StringVarP(&oldKeyPath, "old-key-path", "o", 30 | "", "Path of the old key") 31 | rotateCmd.Flags().StringVarP(&newKeyPath, "new-key-path", "n", 32 | "", "Path of the new key") 33 | } 34 | 35 | func doRotate(cmd *cobra.Command, args []string) error { 36 | oldKey, err := getKey(oldKeyPath) 37 | if err != nil { 38 | return err 39 | } 40 | opt := badger.KeyRegistryOptions{ 41 | Dir: sstDir, 42 | ReadOnly: true, 43 | EncryptionKey: oldKey, 44 | EncryptionKeyRotationDuration: 10 * 24 * time.Hour, 45 | } 46 | kr, err := badger.OpenKeyRegistry(opt) 47 | if err != nil { 48 | return err 49 | } 50 | newKey, err := getKey(newKeyPath) 51 | if err != nil { 52 | return err 53 | } 54 | opt.EncryptionKey = newKey 55 | err = badger.WriteKeyRegistry(kr, opt) 56 | if err != nil { 57 | return err 58 | } 59 | return nil 60 | } 61 | 62 | func getKey(path string) ([]byte, error) { 63 | if path == "" { 64 | // Empty bytes for plain text to encryption(vice versa). 65 | return []byte{}, nil 66 | } 67 | fp, err := os.Open(path) 68 | if err != nil { 69 | return nil, err 70 | } 71 | return io.ReadAll(fp) 72 | } 73 | -------------------------------------------------------------------------------- /badger/cmd/rotate_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "math/rand" 10 | "os" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/require" 14 | 15 | "github.com/dgraph-io/badger/v4" 16 | "github.com/dgraph-io/badger/v4/y" 17 | ) 18 | 19 | func TestRotate(t *testing.T) { 20 | dir, err := os.MkdirTemp("", "badger-test") 21 | require.NoError(t, err) 22 | defer os.RemoveAll(dir) 23 | 24 | // Creating sample key. 25 | key := make([]byte, 32) 26 | _, err = rand.Read(key) 27 | require.NoError(t, err) 28 | 29 | fp, err := os.CreateTemp("", "*.key") 30 | require.NoError(t, err) 31 | _, err = fp.Write(key) 32 | require.NoError(t, err) 33 | defer fp.Close() 34 | 35 | // Opening DB with the encryption key. 36 | opts := badger.DefaultOptions(dir) 37 | opts.EncryptionKey = key 38 | opts.BlockCacheSize = 1 << 20 39 | 40 | db, err := badger.Open(opts) 41 | require.NoError(t, err) 42 | // Closing the db. 43 | require.NoError(t, db.Close()) 44 | 45 | // Opening the db again for the successful open. 46 | db, err = badger.Open(opts) 47 | require.NoError(t, err) 48 | // Closing so that we can open another db 49 | require.NoError(t, db.Close()) 50 | 51 | // Creating another sample key. 52 | key2 := make([]byte, 32) 53 | _, err = rand.Read(key2) 54 | require.NoError(t, err) 55 | fp2, err := os.CreateTemp("", "*.key") 56 | require.NoError(t, err) 57 | _, err = fp2.Write(key2) 58 | require.NoError(t, err) 59 | defer fp2.Close() 60 | oldKeyPath = fp2.Name() 61 | sstDir = dir 62 | 63 | // Check whether we able to rotate the key with some sample key. We should get mismatch 64 | // error. 65 | require.EqualError(t, doRotate(nil, []string{}), badger.ErrEncryptionKeyMismatch.Error()) 66 | 67 | // rotating key with proper key. 68 | oldKeyPath = fp.Name() 69 | newKeyPath = fp2.Name() 70 | require.NoError(t, doRotate(nil, []string{})) 71 | 72 | // Checking whether db opens with the new key. 73 | opts.EncryptionKey = key2 74 | db, err = badger.Open(opts) 75 | require.NoError(t, err) 76 | require.NoError(t, db.Close()) 77 | 78 | // Checking for plain text rotation. 79 | oldKeyPath = newKeyPath 80 | newKeyPath = "" 81 | require.NoError(t, doRotate(nil, []string{})) 82 | opts.EncryptionKey = []byte{} 83 | db, err = badger.Open(opts) 84 | require.NoError(t, err) 85 | defer db.Close() 86 | } 87 | 88 | // This test shows that rotate tool can be used to enable encryption. 89 | func TestRotatePlainTextToEncrypted(t *testing.T) { 90 | dir, err := os.MkdirTemp("", "badger-test") 91 | require.NoError(t, err) 92 | defer os.RemoveAll(dir) 93 | 94 | // Open DB without encryption. 95 | opts := badger.DefaultOptions(dir) 96 | db, err := badger.Open(opts) 97 | require.NoError(t, err) 98 | 99 | require.NoError(t, db.Update(func(txn *badger.Txn) error { 100 | return txn.Set([]byte("foo"), []byte("bar")) 101 | })) 102 | 103 | require.NoError(t, db.Close()) 104 | 105 | // Create an encryption key. 106 | key := make([]byte, 32) 107 | y.Check2(rand.Read(key)) 108 | fp, err := os.CreateTemp("", "*.key") 109 | require.NoError(t, err) 110 | _, err = fp.Write(key) 111 | require.NoError(t, err) 112 | defer fp.Close() 113 | 114 | oldKeyPath = "" 115 | newKeyPath = fp.Name() 116 | sstDir = dir 117 | 118 | // Enable encryption. newKeyPath is encrypted. 119 | require.Nil(t, doRotate(nil, []string{})) 120 | 121 | // Try opening DB without the key. 122 | opts.BlockCacheSize = 1 << 20 123 | _, err = badger.Open(opts) 124 | require.EqualError(t, err, badger.ErrEncryptionKeyMismatch.Error()) 125 | 126 | // Check whether db opens with the new key. 127 | opts.EncryptionKey = key 128 | db, err = badger.Open(opts) 129 | require.NoError(t, err) 130 | 131 | require.NoError(t, db.View(func(txn *badger.Txn) error { 132 | iopt := badger.DefaultIteratorOptions 133 | it := txn.NewIterator(iopt) 134 | defer it.Close() 135 | count := 0 136 | for it.Rewind(); it.Valid(); it.Next() { 137 | count++ 138 | } 139 | require.Equal(t, 1, count) 140 | return nil 141 | })) 142 | require.NoError(t, db.Close()) 143 | } 144 | -------------------------------------------------------------------------------- /badger/cmd/stream.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package cmd 7 | 8 | import ( 9 | "errors" 10 | "fmt" 11 | "io" 12 | "math" 13 | "os" 14 | 15 | "github.com/spf13/cobra" 16 | 17 | "github.com/dgraph-io/badger/v4" 18 | "github.com/dgraph-io/badger/v4/options" 19 | "github.com/dgraph-io/badger/v4/y" 20 | ) 21 | 22 | var streamCmd = &cobra.Command{ 23 | Use: "stream", 24 | Short: "Stream DB into another DB with different options", 25 | Long: ` 26 | This command streams the contents of this DB into another DB with the given options. 27 | `, 28 | RunE: stream, 29 | } 30 | 31 | var so = struct { 32 | outDir string 33 | outFile string 34 | compressionType uint32 35 | numVersions int 36 | readOnly bool 37 | keyPath string 38 | }{} 39 | 40 | func init() { 41 | // TODO: Add more options. 42 | RootCmd.AddCommand(streamCmd) 43 | streamCmd.Flags().StringVarP(&so.outDir, "out", "o", "", 44 | "Path to output DB. The directory should be empty.") 45 | streamCmd.Flags().StringVarP(&so.outFile, "", "f", "", 46 | "Run a backup to this file.") 47 | streamCmd.Flags().BoolVarP(&so.readOnly, "read_only", "", true, 48 | "Option to open input DB in read-only mode") 49 | streamCmd.Flags().IntVarP(&so.numVersions, "num_versions", "", 0, 50 | "Option to configure the maximum number of versions per key. "+ 51 | "Values <= 0 will be considered to have the max number of versions.") 52 | streamCmd.Flags().Uint32VarP(&so.compressionType, "compression", "", 1, 53 | "Option to configure the compression type in output DB. "+ 54 | "0 to disable, 1 for Snappy, and 2 for ZSTD.") 55 | streamCmd.Flags().StringVarP(&so.keyPath, "encryption-key-file", "e", "", 56 | "Path of the encryption key file.") 57 | } 58 | 59 | func stream(cmd *cobra.Command, args []string) error { 60 | // Options for input DB. 61 | if so.numVersions <= 0 { 62 | so.numVersions = math.MaxInt32 63 | } 64 | encKey, err := getKey(so.keyPath) 65 | if err != nil { 66 | return err 67 | } 68 | inOpt := badger.DefaultOptions(sstDir). 69 | WithReadOnly(so.readOnly). 70 | WithValueThreshold(1 << 10 /* 1KB */). 71 | WithNumVersionsToKeep(so.numVersions). 72 | WithBlockCacheSize(100 << 20). 73 | WithIndexCacheSize(200 << 20). 74 | WithEncryptionKey(encKey) 75 | 76 | // Options for output DB. 77 | if so.compressionType > 2 { 78 | return errors.New( 79 | "compression value must be one of 0 (disabled), 1 (Snappy), or 2 (ZSTD)") 80 | } 81 | inDB, err := badger.OpenManaged(inOpt) 82 | if err != nil { 83 | return y.Wrapf(err, "cannot open DB at %s", sstDir) 84 | } 85 | defer inDB.Close() 86 | 87 | stream := inDB.NewStreamAt(math.MaxUint64) 88 | 89 | if len(so.outDir) > 0 { 90 | if _, err := os.Stat(so.outDir); err == nil { 91 | f, err := os.Open(so.outDir) 92 | if err != nil { 93 | return err 94 | } 95 | defer f.Close() 96 | 97 | _, err = f.Readdirnames(1) 98 | if err != io.EOF { 99 | return fmt.Errorf( 100 | "cannot run stream tool on non-empty output directory %s", so.outDir) 101 | } 102 | } 103 | 104 | stream.LogPrefix = "DB.Stream" 105 | outOpt := inOpt. 106 | WithDir(so.outDir). 107 | WithValueDir(so.outDir). 108 | WithNumVersionsToKeep(so.numVersions). 109 | WithCompression(options.CompressionType(so.compressionType)). 110 | WithEncryptionKey(encKey). 111 | WithReadOnly(false) 112 | err = inDB.StreamDB(outOpt) 113 | 114 | } else if len(so.outFile) > 0 { 115 | stream.LogPrefix = "DB.Backup" 116 | f, err := os.OpenFile(so.outFile, os.O_RDWR|os.O_CREATE, 0666) 117 | y.Check(err) 118 | _, err = stream.Backup(f, 0) 119 | y.Check(err) 120 | } 121 | fmt.Println("Done.") 122 | return err 123 | } 124 | -------------------------------------------------------------------------------- /badger/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package main 7 | 8 | import ( 9 | "fmt" 10 | "net/http" 11 | _ "net/http/pprof" //nolint:gosec 12 | "runtime" 13 | 14 | "github.com/dustin/go-humanize" 15 | "go.opentelemetry.io/contrib/zpages" 16 | 17 | "github.com/dgraph-io/badger/v4/badger/cmd" 18 | "github.com/dgraph-io/ristretto/v2/z" 19 | ) 20 | 21 | func main() { 22 | go func() { 23 | for i := 8080; i < 9080; i++ { 24 | fmt.Printf("Listening for /debug HTTP requests at port: %d\n", i) 25 | if err := http.ListenAndServe(fmt.Sprintf("0.0.0.0:%d", i), nil); err != nil { 26 | fmt.Println("Port busy. Trying another one...") 27 | continue 28 | 29 | } 30 | } 31 | }() 32 | http.DefaultServeMux.Handle("/z", zpages.NewTracezHandler(zpages.NewSpanProcessor())) 33 | runtime.SetBlockProfileRate(100) 34 | runtime.GOMAXPROCS(128) 35 | 36 | out := z.CallocNoRef(1, "Badger.Main") 37 | fmt.Printf("jemalloc enabled: %v\n", len(out) > 0) 38 | z.StatsPrint() 39 | z.Free(out) 40 | 41 | cmd.Execute() 42 | fmt.Printf("Num Allocated Bytes at program end: %s\n", 43 | humanize.IBytes(uint64(z.NumAllocBytes()))) 44 | if z.NumAllocBytes() > 0 { 45 | fmt.Println(z.Leaks()) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /batch_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "fmt" 10 | "os" 11 | "testing" 12 | "time" 13 | 14 | "github.com/stretchr/testify/require" 15 | 16 | "github.com/dgraph-io/badger/v4/y" 17 | ) 18 | 19 | func TestWriteBatch(t *testing.T) { 20 | key := func(i int) []byte { 21 | return []byte(fmt.Sprintf("%10d", i)) 22 | } 23 | val := func(i int) []byte { 24 | return []byte(fmt.Sprintf("%128d", i)) 25 | } 26 | 27 | test := func(t *testing.T, db *DB) { 28 | wb := db.NewWriteBatch() 29 | defer wb.Cancel() 30 | 31 | // Sanity check for SetEntryAt. 32 | require.Error(t, wb.SetEntryAt(&Entry{}, 12)) 33 | 34 | N, M := 50000, 1000 35 | start := time.Now() 36 | 37 | for i := 0; i < N; i++ { 38 | require.NoError(t, wb.Set(key(i), val(i))) 39 | } 40 | for i := 0; i < M; i++ { 41 | require.NoError(t, wb.Delete(key(i))) 42 | } 43 | require.NoError(t, wb.Flush()) 44 | t.Logf("Time taken for %d writes (w/ test options): %s\n", N+M, time.Since(start)) 45 | 46 | err := db.View(func(txn *Txn) error { 47 | itr := txn.NewIterator(DefaultIteratorOptions) 48 | defer itr.Close() 49 | 50 | i := M 51 | for itr.Rewind(); itr.Valid(); itr.Next() { 52 | item := itr.Item() 53 | require.Equal(t, string(key(i)), string(item.Key())) 54 | valcopy, err := item.ValueCopy(nil) 55 | require.NoError(t, err) 56 | require.Equal(t, val(i), valcopy) 57 | i++ 58 | } 59 | require.Equal(t, N, i) 60 | return nil 61 | }) 62 | require.NoError(t, err) 63 | } 64 | t.Run("disk mode", func(t *testing.T) { 65 | opt := getTestOptions("") 66 | // Set value threshold to 32 bytes otherwise write batch will generate 67 | // too many files and we will crash with too many files open error. 68 | opt.ValueThreshold = 32 69 | runBadgerTest(t, &opt, func(t *testing.T, db *DB) { 70 | test(t, db) 71 | }) 72 | t.Logf("Disk mode done\n") 73 | }) 74 | t.Run("InMemory mode", func(t *testing.T) { 75 | t.Skipf("TODO(ibrahim): Please fix this") 76 | opt := getTestOptions("") 77 | opt.InMemory = true 78 | db, err := Open(opt) 79 | require.NoError(t, err) 80 | test(t, db) 81 | t.Logf("Disk mode done\n") 82 | require.NoError(t, db.Close()) 83 | }) 84 | } 85 | 86 | // This test ensures we don't end up in deadlock in case of empty writebatch. 87 | func TestEmptyWriteBatch(t *testing.T) { 88 | t.Run("normal mode", func(t *testing.T) { 89 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 90 | wb := db.NewWriteBatch() 91 | require.NoError(t, wb.Flush()) 92 | wb = db.NewWriteBatch() 93 | require.NoError(t, wb.Flush()) 94 | wb = db.NewWriteBatch() 95 | // Flush commits inner txn and sets a new one instead. 96 | // Thus we need to save it to check if it was discarded. 97 | txn := wb.txn 98 | require.NoError(t, wb.Flush()) 99 | // check that flushed txn was discarded and marked as read. 100 | require.True(t, txn.discarded) 101 | }) 102 | }) 103 | t.Run("managed mode", func(t *testing.T) { 104 | opt := getTestOptions("") 105 | opt.managedTxns = true 106 | runBadgerTest(t, &opt, func(t *testing.T, db *DB) { 107 | t.Run("WriteBatchAt", func(t *testing.T) { 108 | wb := db.NewWriteBatchAt(2) 109 | require.NoError(t, wb.Flush()) 110 | wb = db.NewWriteBatchAt(208) 111 | require.NoError(t, wb.Flush()) 112 | wb = db.NewWriteBatchAt(31) 113 | require.NoError(t, wb.Flush()) 114 | }) 115 | t.Run("ManagedWriteBatch", func(t *testing.T) { 116 | wb := db.NewManagedWriteBatch() 117 | require.NoError(t, wb.Flush()) 118 | wb = db.NewManagedWriteBatch() 119 | require.NoError(t, wb.Flush()) 120 | wb = db.NewManagedWriteBatch() 121 | require.NoError(t, wb.Flush()) 122 | }) 123 | }) 124 | }) 125 | } 126 | 127 | // This test ensures we don't panic during flush. 128 | // See issue: https://github.com/dgraph-io/badger/issues/1394 129 | func TestFlushPanic(t *testing.T) { 130 | t.Run("flush after flush", func(t *testing.T) { 131 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 132 | wb := db.NewWriteBatch() 133 | wb.Flush() 134 | require.Error(t, y.ErrCommitAfterFinish, wb.Flush()) 135 | }) 136 | }) 137 | t.Run("flush after cancel", func(t *testing.T) { 138 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 139 | wb := db.NewWriteBatch() 140 | wb.Cancel() 141 | require.Error(t, y.ErrCommitAfterFinish, wb.Flush()) 142 | }) 143 | }) 144 | } 145 | 146 | func TestBatchErrDeadlock(t *testing.T) { 147 | dir, err := os.MkdirTemp("", "badger-test") 148 | require.NoError(t, err) 149 | defer removeDir(dir) 150 | 151 | opt := DefaultOptions(dir) 152 | db, err := OpenManaged(opt) 153 | require.NoError(t, err) 154 | 155 | wb := db.NewManagedWriteBatch() 156 | require.NoError(t, wb.SetEntryAt(&Entry{Key: []byte("foo")}, 0)) 157 | require.Error(t, wb.Flush()) 158 | require.NoError(t, db.Close()) 159 | } 160 | -------------------------------------------------------------------------------- /changes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | GHORG=${GHORG:-hypermodeinc} 5 | GHREPO=${GHREPO:-badger} 6 | cat < 6 | * SPDX-License-Identifier: Apache-2.0 7 | */ 8 | 9 | package badger 10 | 11 | import ( 12 | "fmt" 13 | "os" 14 | "path/filepath" 15 | 16 | "github.com/dgraph-io/badger/v4/y" 17 | ) 18 | 19 | // directoryLockGuard holds a lock on a directory and a pid file inside. The pid file isn't part 20 | // of the locking mechanism, it's just advisory. 21 | type directoryLockGuard struct { 22 | // File handle on the directory, which we've flocked. 23 | f *os.File 24 | // The absolute path to our pid file. 25 | path string 26 | // Was this a shared lock for a read-only database? 27 | readOnly bool 28 | } 29 | 30 | // acquireDirectoryLock gets a lock on the directory (using flock). If 31 | // this is not read-only, it will also write our pid to 32 | // dirPath/pidFileName for convenience. 33 | func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) ( 34 | *directoryLockGuard, error) { 35 | // Convert to absolute path so that Release still works even if we do an unbalanced 36 | // chdir in the meantime. 37 | absPidFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName)) 38 | if err != nil { 39 | return nil, y.Wrapf(err, "cannot get absolute path for pid lock file") 40 | } 41 | f, err := os.Open(dirPath) 42 | if err != nil { 43 | return nil, y.Wrapf(err, "cannot open directory %q", dirPath) 44 | } 45 | 46 | // NOTE: Here is where we would normally call flock. 47 | // This is not supported in js / wasm, so skip it. 48 | 49 | if !readOnly { 50 | // Yes, we happily overwrite a pre-existing pid file. We're the 51 | // only read-write badger process using this directory. 52 | err = os.WriteFile(absPidFilePath, []byte(fmt.Sprintf("%d\n", os.Getpid())), 0666) 53 | if err != nil { 54 | f.Close() 55 | return nil, y.Wrapf(err, 56 | "Cannot write pid file %q", absPidFilePath) 57 | } 58 | } 59 | 60 | return &directoryLockGuard{f, absPidFilePath, readOnly}, nil 61 | } 62 | 63 | // Release deletes the pid file and releases our lock on the directory. 64 | func (guard *directoryLockGuard) release() error { 65 | var err error 66 | if !guard.readOnly { 67 | // It's important that we remove the pid file first. 68 | err = os.Remove(guard.path) 69 | } 70 | 71 | if closeErr := guard.f.Close(); err == nil { 72 | err = closeErr 73 | } 74 | guard.path = "" 75 | guard.f = nil 76 | 77 | return err 78 | } 79 | 80 | // openDir opens a directory for syncing. 81 | func openDir(path string) (*os.File, error) { return os.Open(path) } 82 | 83 | // When you create or delete a file, you have to ensure the directory entry for the file is synced 84 | // in order to guarantee the file is visible (if the system crashes). (See the man page for fsync, 85 | // or see https://github.com/coreos/etcd/issues/6368 for an example.) 86 | func syncDir(dir string) error { 87 | f, err := openDir(dir) 88 | if err != nil { 89 | return y.Wrapf(err, "While opening directory: %s.", dir) 90 | } 91 | 92 | err = f.Sync() 93 | closeErr := f.Close() 94 | if err != nil { 95 | return y.Wrapf(err, "While syncing directory: %s.", dir) 96 | } 97 | return y.Wrapf(closeErr, "While closing directory: %s.", dir) 98 | } 99 | -------------------------------------------------------------------------------- /dir_plan9.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "fmt" 10 | "os" 11 | "path/filepath" 12 | "strings" 13 | 14 | "github.com/dgraph-io/badger/v4/y" 15 | ) 16 | 17 | // directoryLockGuard holds a lock on a directory and a pid file inside. The pid file isn't part 18 | // of the locking mechanism, it's just advisory. 19 | type directoryLockGuard struct { 20 | // File handle on the directory, which we've locked. 21 | f *os.File 22 | // The absolute path to our pid file. 23 | path string 24 | } 25 | 26 | // acquireDirectoryLock gets a lock on the directory. 27 | // It will also write our pid to dirPath/pidFileName for convenience. 28 | // readOnly is not supported on Plan 9. 29 | func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) ( 30 | *directoryLockGuard, error) { 31 | if readOnly { 32 | return nil, ErrPlan9NotSupported 33 | } 34 | 35 | // Convert to absolute path so that Release still works even if we do an unbalanced 36 | // chdir in the meantime. 37 | absPidFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName)) 38 | if err != nil { 39 | return nil, y.Wrap(err, "cannot get absolute path for pid lock file") 40 | } 41 | 42 | // If the file was unpacked or created by some other program, it might not 43 | // have the ModeExclusive bit set. Set it before we call OpenFile, so that we 44 | // can be confident that a successful OpenFile implies exclusive use. 45 | // 46 | // OpenFile fails if the file ModeExclusive bit set *and* the file is already open. 47 | // So, if the file is closed when the DB crashed, we're fine. When the process 48 | // that was managing the DB crashes, the OS will close the file for us. 49 | // 50 | // This bit of code is copied from Go's lockedfile internal package: 51 | // https://github.com/golang/go/blob/go1.15rc1/src/cmd/go/internal/lockedfile/lockedfile_plan9.go#L58 52 | if fi, err := os.Stat(absPidFilePath); err == nil { 53 | if fi.Mode()&os.ModeExclusive == 0 { 54 | if err := os.Chmod(absPidFilePath, fi.Mode()|os.ModeExclusive); err != nil { 55 | return nil, y.Wrapf(err, "could not set exclusive mode bit") 56 | } 57 | } 58 | } else if !os.IsNotExist(err) { 59 | return nil, err 60 | } 61 | f, err := os.OpenFile(absPidFilePath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0666|os.ModeExclusive) 62 | if err != nil { 63 | if isLocked(err) { 64 | return nil, y.Wrapf(err, 65 | "Cannot open pid lock file %q. Another process is using this Badger database", 66 | absPidFilePath) 67 | } 68 | return nil, y.Wrapf(err, "Cannot open pid lock file %q", absPidFilePath) 69 | } 70 | 71 | if _, err = fmt.Fprintf(f, "%d\n", os.Getpid()); err != nil { 72 | f.Close() 73 | return nil, y.Wrapf(err, "could not write pid") 74 | } 75 | return &directoryLockGuard{f, absPidFilePath}, nil 76 | } 77 | 78 | // Release deletes the pid file and releases our lock on the directory. 79 | func (guard *directoryLockGuard) release() error { 80 | // It's important that we remove the pid file first. 81 | err := os.Remove(guard.path) 82 | 83 | if closeErr := guard.f.Close(); err == nil { 84 | err = closeErr 85 | } 86 | guard.path = "" 87 | guard.f = nil 88 | 89 | return err 90 | } 91 | 92 | // openDir opens a directory for syncing. 93 | func openDir(path string) (*os.File, error) { return os.Open(path) } 94 | 95 | // When you create or delete a file, you have to ensure the directory entry for the file is synced 96 | // in order to guarantee the file is visible (if the system crashes). (See the man page for fsync, 97 | // or see https://github.com/coreos/etcd/issues/6368 for an example.) 98 | func syncDir(dir string) error { 99 | f, err := openDir(dir) 100 | if err != nil { 101 | return y.Wrapf(err, "While opening directory: %s.", dir) 102 | } 103 | 104 | err = f.Sync() 105 | closeErr := f.Close() 106 | if err != nil { 107 | return y.Wrapf(err, "While syncing directory: %s.", dir) 108 | } 109 | return y.Wrapf(closeErr, "While closing directory: %s.", dir) 110 | } 111 | 112 | // Opening an exclusive-use file returns an error. 113 | // The expected error strings are: 114 | // 115 | // - "open/create -- file is locked" (cwfs, kfs) 116 | // - "exclusive lock" (fossil) 117 | // - "exclusive use file already open" (ramfs) 118 | // 119 | // See https://github.com/golang/go/blob/go1.15rc1/src/cmd/go/internal/lockedfile/lockedfile_plan9.go#L16 120 | var lockedErrStrings = [...]string{ 121 | "file is locked", 122 | "exclusive lock", 123 | "exclusive use file already open", 124 | } 125 | 126 | // Even though plan9 doesn't support the Lock/RLock/Unlock functions to 127 | // manipulate already-open files, IsLocked is still meaningful: os.OpenFile 128 | // itself may return errors that indicate that a file with the ModeExclusive bit 129 | // set is already open. 130 | func isLocked(err error) bool { 131 | s := err.Error() 132 | 133 | for _, frag := range lockedErrStrings { 134 | if strings.Contains(s, frag) { 135 | return true 136 | } 137 | } 138 | return false 139 | } 140 | -------------------------------------------------------------------------------- /dir_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows && !plan9 && !js && !wasip1 2 | // +build !windows,!plan9,!js,!wasip1 3 | 4 | /* 5 | * SPDX-FileCopyrightText: © Hypermode Inc. 6 | * SPDX-License-Identifier: Apache-2.0 7 | */ 8 | 9 | package badger 10 | 11 | import ( 12 | "fmt" 13 | "os" 14 | "path/filepath" 15 | 16 | "golang.org/x/sys/unix" 17 | 18 | "github.com/dgraph-io/badger/v4/y" 19 | ) 20 | 21 | // directoryLockGuard holds a lock on a directory and a pid file inside. The pid file isn't part 22 | // of the locking mechanism, it's just advisory. 23 | type directoryLockGuard struct { 24 | // File handle on the directory, which we've flocked. 25 | f *os.File 26 | // The absolute path to our pid file. 27 | path string 28 | // Was this a shared lock for a read-only database? 29 | readOnly bool 30 | } 31 | 32 | // acquireDirectoryLock gets a lock on the directory (using flock). If 33 | // this is not read-only, it will also write our pid to 34 | // dirPath/pidFileName for convenience. 35 | func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) ( 36 | *directoryLockGuard, error) { 37 | // Convert to absolute path so that Release still works even if we do an unbalanced 38 | // chdir in the meantime. 39 | absPidFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName)) 40 | if err != nil { 41 | return nil, y.Wrapf(err, "cannot get absolute path for pid lock file") 42 | } 43 | f, err := os.Open(dirPath) 44 | if err != nil { 45 | return nil, y.Wrapf(err, "cannot open directory %q", dirPath) 46 | } 47 | opts := unix.LOCK_EX | unix.LOCK_NB 48 | if readOnly { 49 | opts = unix.LOCK_SH | unix.LOCK_NB 50 | } 51 | 52 | err = unix.Flock(int(f.Fd()), opts) 53 | if err != nil { 54 | f.Close() 55 | return nil, y.Wrapf(err, 56 | "Cannot acquire directory lock on %q. Another process is using this Badger database.", 57 | dirPath) 58 | } 59 | 60 | if !readOnly { 61 | // Yes, we happily overwrite a pre-existing pid file. We're the 62 | // only read-write badger process using this directory. 63 | err = os.WriteFile(absPidFilePath, []byte(fmt.Sprintf("%d\n", os.Getpid())), 0666) 64 | if err != nil { 65 | f.Close() 66 | return nil, y.Wrapf(err, 67 | "Cannot write pid file %q", absPidFilePath) 68 | } 69 | } 70 | return &directoryLockGuard{f, absPidFilePath, readOnly}, nil 71 | } 72 | 73 | // Release deletes the pid file and releases our lock on the directory. 74 | func (guard *directoryLockGuard) release() error { 75 | var err error 76 | if !guard.readOnly { 77 | // It's important that we remove the pid file first. 78 | err = os.Remove(guard.path) 79 | } 80 | 81 | if closeErr := guard.f.Close(); err == nil { 82 | err = closeErr 83 | } 84 | guard.path = "" 85 | guard.f = nil 86 | 87 | return err 88 | } 89 | 90 | // openDir opens a directory for syncing. 91 | func openDir(path string) (*os.File, error) { return os.Open(path) } 92 | 93 | // When you create or delete a file, you have to ensure the directory entry for the file is synced 94 | // in order to guarantee the file is visible (if the system crashes). (See the man page for fsync, 95 | // or see https://github.com/coreos/etcd/issues/6368 for an example.) 96 | func syncDir(dir string) error { 97 | f, err := openDir(dir) 98 | if err != nil { 99 | return y.Wrapf(err, "While opening directory: %s.", dir) 100 | } 101 | 102 | err = f.Sync() 103 | closeErr := f.Close() 104 | if err != nil { 105 | return y.Wrapf(err, "While syncing directory: %s.", dir) 106 | } 107 | return y.Wrapf(closeErr, "While closing directory: %s.", dir) 108 | } 109 | -------------------------------------------------------------------------------- /dir_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | // +build windows 3 | 4 | /* 5 | * SPDX-FileCopyrightText: © Hypermode Inc. 6 | * SPDX-License-Identifier: Apache-2.0 7 | */ 8 | 9 | package badger 10 | 11 | // OpenDir opens a directory in windows with write access for syncing. 12 | import ( 13 | "os" 14 | "path/filepath" 15 | "syscall" 16 | 17 | "github.com/dgraph-io/badger/v4/y" 18 | ) 19 | 20 | // FILE_ATTRIBUTE_TEMPORARY - A file that is being used for temporary storage. 21 | // FILE_FLAG_DELETE_ON_CLOSE - The file is to be deleted immediately after all of its handles are 22 | // closed, which includes the specified handle and any other open or duplicated handles. 23 | // See: https://docs.microsoft.com/en-us/windows/desktop/FileIO/file-attribute-constants 24 | // NOTE: Added here to avoid importing golang.org/x/sys/windows 25 | const ( 26 | FILE_ATTRIBUTE_TEMPORARY = 0x00000100 27 | FILE_FLAG_DELETE_ON_CLOSE = 0x04000000 28 | ) 29 | 30 | func openDir(path string) (*os.File, error) { 31 | fd, err := openDirWin(path) 32 | if err != nil { 33 | return nil, err 34 | } 35 | return os.NewFile(uintptr(fd), path), nil 36 | } 37 | 38 | func openDirWin(path string) (fd syscall.Handle, err error) { 39 | if len(path) == 0 { 40 | return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND 41 | } 42 | pathp, err := syscall.UTF16PtrFromString(path) 43 | if err != nil { 44 | return syscall.InvalidHandle, err 45 | } 46 | access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE) 47 | sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE) 48 | createmode := uint32(syscall.OPEN_EXISTING) 49 | fl := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS) 50 | return syscall.CreateFile(pathp, access, sharemode, nil, createmode, fl, 0) 51 | } 52 | 53 | // DirectoryLockGuard holds a lock on the directory. 54 | type directoryLockGuard struct { 55 | h syscall.Handle 56 | path string 57 | } 58 | 59 | // AcquireDirectoryLock acquires exclusive access to a directory. 60 | func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) (*directoryLockGuard, error) { 61 | if readOnly { 62 | return nil, ErrWindowsNotSupported 63 | } 64 | 65 | // Convert to absolute path so that Release still works even if we do an unbalanced 66 | // chdir in the meantime. 67 | absLockFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName)) 68 | if err != nil { 69 | return nil, y.Wrap(err, "Cannot get absolute path for pid lock file") 70 | } 71 | 72 | // This call creates a file handler in memory that only one process can use at a time. When 73 | // that process ends, the file is deleted by the system. 74 | // FILE_ATTRIBUTE_TEMPORARY is used to tell Windows to try to create the handle in memory. 75 | // FILE_FLAG_DELETE_ON_CLOSE is not specified in syscall_windows.go but tells Windows to delete 76 | // the file when all processes holding the handler are closed. 77 | // XXX: this works but it's a bit klunky. i'd prefer to use LockFileEx but it needs unsafe pkg. 78 | h, err := syscall.CreateFile( 79 | syscall.StringToUTF16Ptr(absLockFilePath), 0, 0, nil, 80 | syscall.OPEN_ALWAYS, 81 | uint32(FILE_ATTRIBUTE_TEMPORARY|FILE_FLAG_DELETE_ON_CLOSE), 82 | 0) 83 | if err != nil { 84 | return nil, y.Wrapf(err, 85 | "Cannot create lock file %q. Another process is using this Badger database", 86 | absLockFilePath) 87 | } 88 | 89 | return &directoryLockGuard{h: h, path: absLockFilePath}, nil 90 | } 91 | 92 | // Release removes the directory lock. 93 | func (g *directoryLockGuard) release() error { 94 | g.path = "" 95 | return syscall.CloseHandle(g.h) 96 | } 97 | 98 | // Windows doesn't support syncing directories to the file system. See 99 | // https://github.com/hypermodeinc/badger/issues/699#issuecomment-504133587 for more details. 100 | func syncDir(dir string) error { return nil } 101 | -------------------------------------------------------------------------------- /discard.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "encoding/binary" 10 | "os" 11 | "path/filepath" 12 | "sort" 13 | "sync" 14 | 15 | "github.com/dgraph-io/badger/v4/y" 16 | "github.com/dgraph-io/ristretto/v2/z" 17 | ) 18 | 19 | // discardStats keeps track of the amount of data that could be discarded for 20 | // a given logfile. 21 | type discardStats struct { 22 | sync.Mutex 23 | 24 | *z.MmapFile 25 | opt Options 26 | nextEmptySlot int 27 | } 28 | 29 | const discardFname string = "DISCARD" 30 | 31 | func InitDiscardStats(opt Options) (*discardStats, error) { 32 | fname := filepath.Join(opt.ValueDir, discardFname) 33 | 34 | // 1MB file can store 65.536 discard entries. Each entry is 16 bytes. 35 | mf, err := z.OpenMmapFile(fname, os.O_CREATE|os.O_RDWR, 1<<20) 36 | lf := &discardStats{ 37 | MmapFile: mf, 38 | opt: opt, 39 | } 40 | if err == z.NewFile { 41 | // We don't need to zero out the entire 1MB. 42 | lf.zeroOut() 43 | 44 | } else if err != nil { 45 | return nil, y.Wrapf(err, "while opening file: %s\n", discardFname) 46 | } 47 | 48 | for slot := 0; slot < lf.maxSlot(); slot++ { 49 | if lf.get(16*slot) == 0 { 50 | lf.nextEmptySlot = slot 51 | break 52 | } 53 | } 54 | sort.Sort(lf) 55 | opt.Infof("Discard stats nextEmptySlot: %d\n", lf.nextEmptySlot) 56 | return lf, nil 57 | } 58 | 59 | func (lf *discardStats) Len() int { 60 | return lf.nextEmptySlot 61 | } 62 | func (lf *discardStats) Less(i, j int) bool { 63 | return lf.get(16*i) < lf.get(16*j) 64 | } 65 | func (lf *discardStats) Swap(i, j int) { 66 | left := lf.Data[16*i : 16*i+16] 67 | right := lf.Data[16*j : 16*j+16] 68 | var tmp [16]byte 69 | copy(tmp[:], left) 70 | copy(left, right) 71 | copy(right, tmp[:]) 72 | } 73 | 74 | // offset is not slot. 75 | func (lf *discardStats) get(offset int) uint64 { 76 | return binary.BigEndian.Uint64(lf.Data[offset : offset+8]) 77 | } 78 | func (lf *discardStats) set(offset int, val uint64) { 79 | binary.BigEndian.PutUint64(lf.Data[offset:offset+8], val) 80 | } 81 | 82 | // zeroOut would zero out the next slot. 83 | func (lf *discardStats) zeroOut() { 84 | lf.set(lf.nextEmptySlot*16, 0) 85 | lf.set(lf.nextEmptySlot*16+8, 0) 86 | } 87 | 88 | func (lf *discardStats) maxSlot() int { 89 | return len(lf.Data) / 16 90 | } 91 | 92 | // Update would update the discard stats for the given file id. If discard is 93 | // 0, it would return the current value of discard for the file. If discard is 94 | // < 0, it would set the current value of discard to zero for the file. 95 | func (lf *discardStats) Update(fidu uint32, discard int64) int64 { 96 | fid := uint64(fidu) 97 | lf.Lock() 98 | defer lf.Unlock() 99 | 100 | idx := sort.Search(lf.nextEmptySlot, func(slot int) bool { 101 | return lf.get(slot*16) >= fid 102 | }) 103 | if idx < lf.nextEmptySlot && lf.get(idx*16) == fid { 104 | off := idx*16 + 8 105 | curDisc := lf.get(off) 106 | if discard == 0 { 107 | return int64(curDisc) 108 | } 109 | if discard < 0 { 110 | lf.set(off, 0) 111 | return 0 112 | } 113 | lf.set(off, curDisc+uint64(discard)) 114 | return int64(curDisc + uint64(discard)) 115 | } 116 | if discard <= 0 { 117 | // No need to add a new entry. 118 | return 0 119 | } 120 | 121 | // Could not find the fid. Add the entry. 122 | idx = lf.nextEmptySlot 123 | lf.set(idx*16, fid) 124 | lf.set(idx*16+8, uint64(discard)) 125 | 126 | // Move to next slot. 127 | lf.nextEmptySlot++ 128 | for lf.nextEmptySlot >= lf.maxSlot() { 129 | y.Check(lf.Truncate(2 * int64(len(lf.Data)))) 130 | } 131 | lf.zeroOut() 132 | 133 | sort.Sort(lf) 134 | return discard 135 | } 136 | 137 | func (lf *discardStats) Iterate(f func(fid, stats uint64)) { 138 | for slot := 0; slot < lf.nextEmptySlot; slot++ { 139 | idx := 16 * slot 140 | f(lf.get(idx), lf.get(idx+8)) 141 | } 142 | } 143 | 144 | // MaxDiscard returns the file id with maximum discard bytes. 145 | func (lf *discardStats) MaxDiscard() (uint32, int64) { 146 | lf.Lock() 147 | defer lf.Unlock() 148 | 149 | var maxFid, maxVal uint64 150 | lf.Iterate(func(fid, val uint64) { 151 | if maxVal < val { 152 | maxVal = val 153 | maxFid = fid 154 | } 155 | }) 156 | return uint32(maxFid), int64(maxVal) 157 | } 158 | -------------------------------------------------------------------------------- /discard_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "os" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestDiscardStats(t *testing.T) { 16 | dir, err := os.MkdirTemp("", "badger-test") 17 | require.NoError(t, err) 18 | defer removeDir(dir) 19 | 20 | opt := DefaultOptions(dir) 21 | ds, err := InitDiscardStats(opt) 22 | require.NoError(t, err) 23 | require.Zero(t, ds.nextEmptySlot) 24 | fid, _ := ds.MaxDiscard() 25 | require.Zero(t, fid) 26 | 27 | for i := uint32(0); i < 20; i++ { 28 | require.Equal(t, int64(i*100), ds.Update(i, int64(i*100))) 29 | } 30 | ds.Iterate(func(id, val uint64) { 31 | require.Equal(t, id*100, val) 32 | }) 33 | for i := uint32(0); i < 10; i++ { 34 | require.Equal(t, 0, int(ds.Update(i, -1))) 35 | } 36 | ds.Iterate(func(id, val uint64) { 37 | if id < 10 { 38 | require.Zero(t, val) 39 | return 40 | } 41 | require.Equal(t, int(id*100), int(val)) 42 | }) 43 | } 44 | 45 | func TestReloadDiscardStats(t *testing.T) { 46 | dir, err := os.MkdirTemp("", "badger-test") 47 | require.NoError(t, err) 48 | defer removeDir(dir) 49 | 50 | opt := DefaultOptions(dir) 51 | db, err := Open(opt) 52 | require.NoError(t, err) 53 | ds := db.vlog.discardStats 54 | 55 | ds.Update(uint32(1), 1) 56 | ds.Update(uint32(2), 1) 57 | ds.Update(uint32(1), -1) 58 | require.NoError(t, db.Close()) 59 | 60 | // Reopen the DB, discard stats should be same. 61 | db2, err := Open(opt) 62 | require.NoError(t, err) 63 | ds2 := db2.vlog.discardStats 64 | require.Zero(t, ds2.Update(uint32(1), 0)) 65 | require.Equal(t, 1, int(ds2.Update(uint32(2), 0))) 66 | } 67 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package badger implements an embeddable, simple and fast key-value database, 3 | written in pure Go. It is designed to be highly performant for both reads and 4 | writes simultaneously. Badger uses Multi-Version Concurrency Control (MVCC), and 5 | supports transactions. It runs transactions concurrently, with serializable 6 | snapshot isolation guarantees. 7 | 8 | Badger uses an LSM tree along with a value log to separate keys from values, 9 | hence reducing both write amplification and the size of the LSM tree. This 10 | allows LSM tree to be served entirely from RAM, while the values are served 11 | from SSD. 12 | 13 | # Usage 14 | 15 | Badger has the following main types: DB, Txn, Item and Iterator. DB contains 16 | keys that are associated with values. It must be opened with the appropriate 17 | options before it can be accessed. 18 | 19 | All operations happen inside a Txn. Txn represents a transaction, which can 20 | be read-only or read-write. Read-only transactions can read values for a 21 | given key (which are returned inside an Item), or iterate over a set of 22 | key-value pairs using an Iterator (which are returned as Item type values as 23 | well). Read-write transactions can also update and delete keys from the DB. 24 | 25 | See the examples for more usage details. 26 | */ 27 | package badger 28 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | stderrors "errors" 10 | "math" 11 | ) 12 | 13 | const ( 14 | // ValueThresholdLimit is the maximum permissible value of opt.ValueThreshold. 15 | ValueThresholdLimit = math.MaxUint16 - 16 + 1 16 | ) 17 | 18 | var ( 19 | // ErrValueLogSize is returned when opt.ValueLogFileSize option is not within the valid 20 | // range. 21 | ErrValueLogSize = stderrors.New("Invalid ValueLogFileSize, must be in range [1MB, 2GB)") 22 | 23 | // ErrKeyNotFound is returned when key isn't found on a txn.Get. 24 | ErrKeyNotFound = stderrors.New("Key not found") 25 | 26 | // ErrTxnTooBig is returned if too many writes are fit into a single transaction. 27 | ErrTxnTooBig = stderrors.New("Txn is too big to fit into one request") 28 | 29 | // ErrConflict is returned when a transaction conflicts with another transaction. This can 30 | // happen if the read rows had been updated concurrently by another transaction. 31 | ErrConflict = stderrors.New("Transaction Conflict. Please retry") 32 | 33 | // ErrReadOnlyTxn is returned if an update function is called on a read-only transaction. 34 | ErrReadOnlyTxn = stderrors.New("No sets or deletes are allowed in a read-only transaction") 35 | 36 | // ErrDiscardedTxn is returned if a previously discarded transaction is re-used. 37 | ErrDiscardedTxn = stderrors.New("This transaction has been discarded. Create a new one") 38 | 39 | // ErrEmptyKey is returned if an empty key is passed on an update function. 40 | ErrEmptyKey = stderrors.New("Key cannot be empty") 41 | 42 | // ErrInvalidKey is returned if the key has a special !badger! prefix, 43 | // reserved for internal usage. 44 | ErrInvalidKey = stderrors.New("Key is using a reserved !badger! prefix") 45 | 46 | // ErrBannedKey is returned if the read/write key belongs to any banned namespace. 47 | ErrBannedKey = stderrors.New("Key is using the banned prefix") 48 | 49 | // ErrThresholdZero is returned if threshold is set to zero, and value log GC is called. 50 | // In such a case, GC can't be run. 51 | ErrThresholdZero = stderrors.New( 52 | "Value log GC can't run because threshold is set to zero") 53 | 54 | // ErrNoRewrite is returned if a call for value log GC doesn't result in a log file rewrite. 55 | ErrNoRewrite = stderrors.New( 56 | "Value log GC attempt didn't result in any cleanup") 57 | 58 | // ErrRejected is returned if a value log GC is called either while another GC is running, or 59 | // after DB::Close has been called. 60 | ErrRejected = stderrors.New("Value log GC request rejected") 61 | 62 | // ErrInvalidRequest is returned if the user request is invalid. 63 | ErrInvalidRequest = stderrors.New("Invalid request") 64 | 65 | // ErrManagedTxn is returned if the user tries to use an API which isn't 66 | // allowed due to external management of transactions, when using ManagedDB. 67 | ErrManagedTxn = stderrors.New( 68 | "Invalid API request. Not allowed to perform this action using ManagedDB") 69 | 70 | // ErrNamespaceMode is returned if the user tries to use an API which is allowed only when 71 | // NamespaceOffset is non-negative. 72 | ErrNamespaceMode = stderrors.New( 73 | "Invalid API request. Not allowed to perform this action when NamespaceMode is not set.") 74 | 75 | // ErrInvalidDump if a data dump made previously cannot be loaded into the database. 76 | ErrInvalidDump = stderrors.New("Data dump cannot be read") 77 | 78 | // ErrZeroBandwidth is returned if the user passes in zero bandwidth for sequence. 79 | ErrZeroBandwidth = stderrors.New("Bandwidth must be greater than zero") 80 | 81 | // ErrWindowsNotSupported is returned when opt.ReadOnly is used on Windows 82 | ErrWindowsNotSupported = stderrors.New("Read-only mode is not supported on Windows") 83 | 84 | // ErrPlan9NotSupported is returned when opt.ReadOnly is used on Plan 9 85 | ErrPlan9NotSupported = stderrors.New("Read-only mode is not supported on Plan 9") 86 | 87 | // ErrTruncateNeeded is returned when the value log gets corrupt, and requires truncation of 88 | // corrupt data to allow Badger to run properly. 89 | ErrTruncateNeeded = stderrors.New( 90 | "Log truncate required to run DB. This might result in data loss") 91 | 92 | // ErrBlockedWrites is returned if the user called DropAll. During the process of dropping all 93 | // data from Badger, we stop accepting new writes, by returning this error. 94 | ErrBlockedWrites = stderrors.New("Writes are blocked, possibly due to DropAll or Close") 95 | 96 | // ErrNilCallback is returned when subscriber's callback is nil. 97 | ErrNilCallback = stderrors.New("Callback cannot be nil") 98 | 99 | // ErrEncryptionKeyMismatch is returned when the storage key is not 100 | // matched with the key previously given. 101 | ErrEncryptionKeyMismatch = stderrors.New("Encryption key mismatch") 102 | 103 | // ErrInvalidDataKeyID is returned if the datakey id is invalid. 104 | ErrInvalidDataKeyID = stderrors.New("Invalid datakey id") 105 | 106 | // ErrInvalidEncryptionKey is returned if length of encryption keys is invalid. 107 | ErrInvalidEncryptionKey = stderrors.New("Encryption key's length should be" + 108 | "either 16, 24, or 32 bytes") 109 | // ErrGCInMemoryMode is returned when db.RunValueLogGC is called in in-memory mode. 110 | ErrGCInMemoryMode = stderrors.New("Cannot run value log GC when DB is opened in InMemory mode") 111 | 112 | // ErrDBClosed is returned when a get operation is performed after closing the DB. 113 | ErrDBClosed = stderrors.New("DB Closed") 114 | ) 115 | -------------------------------------------------------------------------------- /fb/BlockOffset.go: -------------------------------------------------------------------------------- 1 | // Code generated by the FlatBuffers compiler. DO NOT EDIT. 2 | 3 | package fb 4 | 5 | import ( 6 | flatbuffers "github.com/google/flatbuffers/go" 7 | ) 8 | 9 | type BlockOffset struct { 10 | _tab flatbuffers.Table 11 | } 12 | 13 | func GetRootAsBlockOffset(buf []byte, offset flatbuffers.UOffsetT) *BlockOffset { 14 | n := flatbuffers.GetUOffsetT(buf[offset:]) 15 | x := &BlockOffset{} 16 | x.Init(buf, n+offset) 17 | return x 18 | } 19 | 20 | func (rcv *BlockOffset) Init(buf []byte, i flatbuffers.UOffsetT) { 21 | rcv._tab.Bytes = buf 22 | rcv._tab.Pos = i 23 | } 24 | 25 | func (rcv *BlockOffset) Table() flatbuffers.Table { 26 | return rcv._tab 27 | } 28 | 29 | func (rcv *BlockOffset) Key(j int) byte { 30 | o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) 31 | if o != 0 { 32 | a := rcv._tab.Vector(o) 33 | return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1)) 34 | } 35 | return 0 36 | } 37 | 38 | func (rcv *BlockOffset) KeyLength() int { 39 | o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) 40 | if o != 0 { 41 | return rcv._tab.VectorLen(o) 42 | } 43 | return 0 44 | } 45 | 46 | func (rcv *BlockOffset) KeyBytes() []byte { 47 | o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) 48 | if o != 0 { 49 | return rcv._tab.ByteVector(o + rcv._tab.Pos) 50 | } 51 | return nil 52 | } 53 | 54 | func (rcv *BlockOffset) MutateKey(j int, n byte) bool { 55 | o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) 56 | if o != 0 { 57 | a := rcv._tab.Vector(o) 58 | return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n) 59 | } 60 | return false 61 | } 62 | 63 | func (rcv *BlockOffset) Offset() uint32 { 64 | o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) 65 | if o != 0 { 66 | return rcv._tab.GetUint32(o + rcv._tab.Pos) 67 | } 68 | return 0 69 | } 70 | 71 | func (rcv *BlockOffset) MutateOffset(n uint32) bool { 72 | return rcv._tab.MutateUint32Slot(6, n) 73 | } 74 | 75 | func (rcv *BlockOffset) Len() uint32 { 76 | o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) 77 | if o != 0 { 78 | return rcv._tab.GetUint32(o + rcv._tab.Pos) 79 | } 80 | return 0 81 | } 82 | 83 | func (rcv *BlockOffset) MutateLen(n uint32) bool { 84 | return rcv._tab.MutateUint32Slot(8, n) 85 | } 86 | 87 | func BlockOffsetStart(builder *flatbuffers.Builder) { 88 | builder.StartObject(3) 89 | } 90 | func BlockOffsetAddKey(builder *flatbuffers.Builder, key flatbuffers.UOffsetT) { 91 | builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(key), 0) 92 | } 93 | func BlockOffsetStartKeyVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT { 94 | return builder.StartVector(1, numElems, 1) 95 | } 96 | func BlockOffsetAddOffset(builder *flatbuffers.Builder, offset uint32) { 97 | builder.PrependUint32Slot(1, offset, 0) 98 | } 99 | func BlockOffsetAddLen(builder *flatbuffers.Builder, len uint32) { 100 | builder.PrependUint32Slot(2, len, 0) 101 | } 102 | func BlockOffsetEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { 103 | return builder.EndObject() 104 | } 105 | -------------------------------------------------------------------------------- /fb/TableIndex.go: -------------------------------------------------------------------------------- 1 | // Code generated by the FlatBuffers compiler. DO NOT EDIT. 2 | 3 | package fb 4 | 5 | import ( 6 | flatbuffers "github.com/google/flatbuffers/go" 7 | ) 8 | 9 | type TableIndex struct { 10 | _tab flatbuffers.Table 11 | } 12 | 13 | func GetRootAsTableIndex(buf []byte, offset flatbuffers.UOffsetT) *TableIndex { 14 | n := flatbuffers.GetUOffsetT(buf[offset:]) 15 | x := &TableIndex{} 16 | x.Init(buf, n+offset) 17 | return x 18 | } 19 | 20 | func (rcv *TableIndex) Init(buf []byte, i flatbuffers.UOffsetT) { 21 | rcv._tab.Bytes = buf 22 | rcv._tab.Pos = i 23 | } 24 | 25 | func (rcv *TableIndex) Table() flatbuffers.Table { 26 | return rcv._tab 27 | } 28 | 29 | func (rcv *TableIndex) Offsets(obj *BlockOffset, j int) bool { 30 | o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) 31 | if o != 0 { 32 | x := rcv._tab.Vector(o) 33 | x += flatbuffers.UOffsetT(j) * 4 34 | x = rcv._tab.Indirect(x) 35 | obj.Init(rcv._tab.Bytes, x) 36 | return true 37 | } 38 | return false 39 | } 40 | 41 | func (rcv *TableIndex) OffsetsLength() int { 42 | o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) 43 | if o != 0 { 44 | return rcv._tab.VectorLen(o) 45 | } 46 | return 0 47 | } 48 | 49 | func (rcv *TableIndex) BloomFilter(j int) byte { 50 | o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) 51 | if o != 0 { 52 | a := rcv._tab.Vector(o) 53 | return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1)) 54 | } 55 | return 0 56 | } 57 | 58 | func (rcv *TableIndex) BloomFilterLength() int { 59 | o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) 60 | if o != 0 { 61 | return rcv._tab.VectorLen(o) 62 | } 63 | return 0 64 | } 65 | 66 | func (rcv *TableIndex) BloomFilterBytes() []byte { 67 | o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) 68 | if o != 0 { 69 | return rcv._tab.ByteVector(o + rcv._tab.Pos) 70 | } 71 | return nil 72 | } 73 | 74 | func (rcv *TableIndex) MutateBloomFilter(j int, n byte) bool { 75 | o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) 76 | if o != 0 { 77 | a := rcv._tab.Vector(o) 78 | return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n) 79 | } 80 | return false 81 | } 82 | 83 | func (rcv *TableIndex) MaxVersion() uint64 { 84 | o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) 85 | if o != 0 { 86 | return rcv._tab.GetUint64(o + rcv._tab.Pos) 87 | } 88 | return 0 89 | } 90 | 91 | func (rcv *TableIndex) MutateMaxVersion(n uint64) bool { 92 | return rcv._tab.MutateUint64Slot(8, n) 93 | } 94 | 95 | func (rcv *TableIndex) KeyCount() uint32 { 96 | o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) 97 | if o != 0 { 98 | return rcv._tab.GetUint32(o + rcv._tab.Pos) 99 | } 100 | return 0 101 | } 102 | 103 | func (rcv *TableIndex) MutateKeyCount(n uint32) bool { 104 | return rcv._tab.MutateUint32Slot(10, n) 105 | } 106 | 107 | func (rcv *TableIndex) UncompressedSize() uint32 { 108 | o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) 109 | if o != 0 { 110 | return rcv._tab.GetUint32(o + rcv._tab.Pos) 111 | } 112 | return 0 113 | } 114 | 115 | func (rcv *TableIndex) MutateUncompressedSize(n uint32) bool { 116 | return rcv._tab.MutateUint32Slot(12, n) 117 | } 118 | 119 | func (rcv *TableIndex) OnDiskSize() uint32 { 120 | o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) 121 | if o != 0 { 122 | return rcv._tab.GetUint32(o + rcv._tab.Pos) 123 | } 124 | return 0 125 | } 126 | 127 | func (rcv *TableIndex) MutateOnDiskSize(n uint32) bool { 128 | return rcv._tab.MutateUint32Slot(14, n) 129 | } 130 | 131 | func (rcv *TableIndex) StaleDataSize() uint32 { 132 | o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) 133 | if o != 0 { 134 | return rcv._tab.GetUint32(o + rcv._tab.Pos) 135 | } 136 | return 0 137 | } 138 | 139 | func (rcv *TableIndex) MutateStaleDataSize(n uint32) bool { 140 | return rcv._tab.MutateUint32Slot(16, n) 141 | } 142 | 143 | func TableIndexStart(builder *flatbuffers.Builder) { 144 | builder.StartObject(7) 145 | } 146 | func TableIndexAddOffsets(builder *flatbuffers.Builder, offsets flatbuffers.UOffsetT) { 147 | builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(offsets), 0) 148 | } 149 | func TableIndexStartOffsetsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT { 150 | return builder.StartVector(4, numElems, 4) 151 | } 152 | func TableIndexAddBloomFilter(builder *flatbuffers.Builder, bloomFilter flatbuffers.UOffsetT) { 153 | builder.PrependUOffsetTSlot(1, flatbuffers.UOffsetT(bloomFilter), 0) 154 | } 155 | func TableIndexStartBloomFilterVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT { 156 | return builder.StartVector(1, numElems, 1) 157 | } 158 | func TableIndexAddMaxVersion(builder *flatbuffers.Builder, maxVersion uint64) { 159 | builder.PrependUint64Slot(2, maxVersion, 0) 160 | } 161 | func TableIndexAddKeyCount(builder *flatbuffers.Builder, keyCount uint32) { 162 | builder.PrependUint32Slot(3, keyCount, 0) 163 | } 164 | func TableIndexAddUncompressedSize(builder *flatbuffers.Builder, uncompressedSize uint32) { 165 | builder.PrependUint32Slot(4, uncompressedSize, 0) 166 | } 167 | func TableIndexAddOnDiskSize(builder *flatbuffers.Builder, onDiskSize uint32) { 168 | builder.PrependUint32Slot(5, onDiskSize, 0) 169 | } 170 | func TableIndexAddStaleDataSize(builder *flatbuffers.Builder, staleDataSize uint32) { 171 | builder.PrependUint32Slot(6, staleDataSize, 0) 172 | } 173 | func TableIndexEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { 174 | return builder.EndObject() 175 | } 176 | -------------------------------------------------------------------------------- /fb/flatbuffer.fbs: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | namespace fb; 7 | 8 | table TableIndex { 9 | offsets:[BlockOffset]; 10 | bloom_filter:[ubyte]; 11 | max_version:uint64; 12 | key_count:uint32; 13 | uncompressed_size:uint32; 14 | on_disk_size:uint32; 15 | stale_data_size:uint32; 16 | } 17 | 18 | table BlockOffset { 19 | key:[ubyte]; 20 | offset:uint; 21 | len:uint; 22 | } 23 | 24 | root_type TableIndex; 25 | root_type BlockOffset; 26 | -------------------------------------------------------------------------------- /fb/gen.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | ## Install flatc if not present 6 | ## ref. https://google.github.io/flatbuffers/flatbuffers_guide_building.html 7 | command -v flatc >/dev/null || { ./install_flatbuffers.sh; } 8 | 9 | flatc --go flatbuffer.fbs 10 | # Move files to the correct directory. 11 | mv fb/* ./ 12 | rmdir fb 13 | -------------------------------------------------------------------------------- /fb/install_flatbuffers.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | install_mac() { 6 | command -v brew >/dev/null || 7 | { 8 | echo "[ERROR]: 'brew' command not not found. Exiting" 1>&2 9 | exit 1 10 | } 11 | brew install flatbuffers 12 | } 13 | 14 | install_linux() { 15 | for CMD in curl cmake g++ make; do 16 | command -v "${CMD}" >/dev/null || 17 | { 18 | echo "[ERROR]: '${CMD}' command not not found. Exiting" 1>&2 19 | exit 1 20 | } 21 | done 22 | 23 | ## Create Temp Build Directory 24 | BUILD_DIR=$(mktemp -d) 25 | pushd "${BUILD_DIR}" 26 | 27 | ## Fetch Latest Tarball 28 | LATEST_VERSION=$(curl -s https://api.github.com/repos/google/flatbuffers/releases/latest | grep -oP '(?<=tag_name": ")[^"]+') 29 | curl -sLO https://github.com/google/flatbuffers/archive/"${LATEST_VERSION}".tar.gz 30 | tar xf "${LATEST_VERSION}".tar.gz 31 | 32 | ## Build Binaries 33 | cd flatbuffers-"${LATEST_VERSION#v}" 34 | cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release 35 | make 36 | ./flattests 37 | cp flatc /usr/local/bin/flatc 38 | 39 | ## Cleanup Temp Build Directory 40 | popd 41 | rm -rf "${BUILD_DIR}" 42 | } 43 | 44 | SYSTEM=$(uname -s) 45 | 46 | case ${SYSTEM,,} in 47 | linux) 48 | sudo bash -c "$(declare -f install_linux); install_linux" 49 | ;; 50 | darwin) 51 | install_mac 52 | ;; 53 | esac 54 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dgraph-io/badger/v4 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.24.3 6 | 7 | require ( 8 | github.com/cespare/xxhash/v2 v2.3.0 9 | github.com/dgraph-io/ristretto/v2 v2.2.0 10 | github.com/dustin/go-humanize v1.0.1 11 | github.com/google/flatbuffers v25.2.10+incompatible 12 | github.com/klauspost/compress v1.18.0 13 | github.com/spf13/cobra v1.9.1 14 | github.com/stretchr/testify v1.10.0 15 | go.opentelemetry.io/contrib/zpages v0.61.0 16 | go.opentelemetry.io/otel v1.36.0 17 | golang.org/x/net v0.40.0 18 | golang.org/x/sys v0.33.0 19 | google.golang.org/protobuf v1.36.6 20 | ) 21 | 22 | require ( 23 | github.com/davecgh/go-spew v1.1.1 // indirect 24 | github.com/go-logr/logr v1.4.2 // indirect 25 | github.com/go-logr/stdr v1.2.2 // indirect 26 | github.com/google/uuid v1.6.0 // indirect 27 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 28 | github.com/pmezard/go-difflib v1.0.0 // indirect 29 | github.com/spf13/pflag v1.0.6 // indirect 30 | go.opentelemetry.io/auto/sdk v1.1.0 // indirect 31 | go.opentelemetry.io/otel/metric v1.36.0 // indirect 32 | go.opentelemetry.io/otel/sdk v1.36.0 // indirect 33 | go.opentelemetry.io/otel/trace v1.36.0 // indirect 34 | gopkg.in/yaml.v3 v3.0.1 // indirect 35 | ) 36 | 37 | retract v4.0.0 // see #1888 and #1889 38 | 39 | retract v4.3.0 // see #2113 and #2121 40 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 2 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 3 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= 7 | github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= 8 | github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= 9 | github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= 10 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 11 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 12 | github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= 13 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 14 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 15 | github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 16 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 17 | github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= 18 | github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= 19 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 20 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 21 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 22 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 23 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 24 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 25 | github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 26 | github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 27 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 28 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 29 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 30 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 31 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 32 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 33 | github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= 34 | github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= 35 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 36 | github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= 37 | github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= 38 | github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= 39 | github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 40 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 41 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 42 | go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= 43 | go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= 44 | go.opentelemetry.io/contrib/zpages v0.61.0 h1:tYvUj377Dn3k1wf1le/f8YWSNQ8k0byS3jK8PiIXu9Y= 45 | go.opentelemetry.io/contrib/zpages v0.61.0/go.mod h1:MFNPHMJOGA1P6m5501ANjOJDp4A9BUQja1Y53CDL8LQ= 46 | go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= 47 | go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= 48 | go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= 49 | go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= 50 | go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= 51 | go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= 52 | go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= 53 | go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= 54 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 55 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 56 | golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= 57 | golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= 58 | golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= 59 | golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 60 | google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= 61 | google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= 62 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 63 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 64 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 65 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 66 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 67 | -------------------------------------------------------------------------------- /histogram.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "fmt" 10 | "math" 11 | ) 12 | 13 | // PrintHistogram builds and displays the key-value size histogram. 14 | // When keyPrefix is set, only the keys that have prefix "keyPrefix" are 15 | // considered for creating the histogram 16 | func (db *DB) PrintHistogram(keyPrefix []byte) { 17 | if db == nil { 18 | fmt.Println("\nCannot build histogram: DB is nil.") 19 | return 20 | } 21 | histogram := db.buildHistogram(keyPrefix) 22 | fmt.Printf("Histogram of key sizes (in bytes)\n") 23 | histogram.keySizeHistogram.printHistogram() 24 | fmt.Printf("Histogram of value sizes (in bytes)\n") 25 | histogram.valueSizeHistogram.printHistogram() 26 | } 27 | 28 | // histogramData stores information about a histogram 29 | type histogramData struct { 30 | bins []int64 31 | countPerBin []int64 32 | totalCount int64 33 | min int64 34 | max int64 35 | sum int64 36 | } 37 | 38 | // sizeHistogram contains keySize histogram and valueSize histogram 39 | type sizeHistogram struct { 40 | keySizeHistogram, valueSizeHistogram histogramData 41 | } 42 | 43 | // newSizeHistogram returns a new instance of keyValueSizeHistogram with 44 | // properly initialized fields. 45 | func newSizeHistogram() *sizeHistogram { 46 | // TODO(ibrahim): find appropriate bin size. 47 | keyBins := createHistogramBins(1, 16) 48 | valueBins := createHistogramBins(1, 30) 49 | return &sizeHistogram{ 50 | keySizeHistogram: histogramData{ 51 | bins: keyBins, 52 | countPerBin: make([]int64, len(keyBins)+1), 53 | max: math.MinInt64, 54 | min: math.MaxInt64, 55 | sum: 0, 56 | }, 57 | valueSizeHistogram: histogramData{ 58 | bins: valueBins, 59 | countPerBin: make([]int64, len(valueBins)+1), 60 | max: math.MinInt64, 61 | min: math.MaxInt64, 62 | sum: 0, 63 | }, 64 | } 65 | } 66 | 67 | // createHistogramBins creates bins for an histogram. The bin sizes are powers 68 | // of two of the form [2^min_exponent, ..., 2^max_exponent]. 69 | func createHistogramBins(minExponent, maxExponent uint32) []int64 { 70 | var bins []int64 71 | for i := minExponent; i <= maxExponent; i++ { 72 | bins = append(bins, int64(1)< histogram.max { 81 | histogram.max = value 82 | } 83 | if value < histogram.min { 84 | histogram.min = value 85 | } 86 | 87 | histogram.sum += value 88 | histogram.totalCount++ 89 | 90 | for index := 0; index <= len(histogram.bins); index++ { 91 | // Allocate value in the last buckets if we reached the end of the Bounds array. 92 | if index == len(histogram.bins) { 93 | histogram.countPerBin[index]++ 94 | break 95 | } 96 | 97 | // Check if the value should be added to the "index" bin 98 | if value < histogram.bins[index] { 99 | histogram.countPerBin[index]++ 100 | break 101 | } 102 | } 103 | } 104 | 105 | // buildHistogram builds the key-value size histogram. 106 | // When keyPrefix is set, only the keys that have prefix "keyPrefix" are 107 | // considered for creating the histogram 108 | func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram { 109 | txn := db.NewTransaction(false) 110 | defer txn.Discard() 111 | 112 | itr := txn.NewIterator(DefaultIteratorOptions) 113 | defer itr.Close() 114 | 115 | badgerHistogram := newSizeHistogram() 116 | 117 | // Collect key and value sizes. 118 | for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() { 119 | item := itr.Item() 120 | badgerHistogram.keySizeHistogram.Update(item.KeySize()) 121 | badgerHistogram.valueSizeHistogram.Update(item.ValueSize()) 122 | } 123 | return badgerHistogram 124 | } 125 | 126 | // printHistogram prints the histogram data in a human-readable format. 127 | func (histogram histogramData) printHistogram() { 128 | fmt.Printf("Total count: %d\n", histogram.totalCount) 129 | fmt.Printf("Min value: %d\n", histogram.min) 130 | fmt.Printf("Max value: %d\n", histogram.max) 131 | fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount)) 132 | fmt.Printf("%24s %9s\n", "Range", "Count") 133 | 134 | numBins := len(histogram.bins) 135 | for index, count := range histogram.countPerBin { 136 | if count == 0 { 137 | continue 138 | } 139 | 140 | // The last bin represents the bin that contains the range from 141 | // the last bin up to infinity so it's processed differently than the 142 | // other bins. 143 | if index == len(histogram.countPerBin)-1 { 144 | lowerBound := int(histogram.bins[numBins-1]) 145 | fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count) 146 | continue 147 | } 148 | 149 | upperBound := int(histogram.bins[index]) 150 | lowerBound := 0 151 | if index > 0 { 152 | lowerBound = int(histogram.bins[index-1]) 153 | } 154 | 155 | fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count) 156 | } 157 | fmt.Println() 158 | } 159 | -------------------------------------------------------------------------------- /histogram_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "testing" 10 | 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestBuildKeyValueSizeHistogram(t *testing.T) { 15 | t.Run("All same size key-values", func(t *testing.T) { 16 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 17 | entries := int64(40) 18 | err := db.Update(func(txn *Txn) error { 19 | for i := rune(0); i < rune(entries); i++ { 20 | err := txn.SetEntry(NewEntry([]byte(string(i)), []byte("B"))) 21 | if err != nil { 22 | return err 23 | } 24 | } 25 | return nil 26 | }) 27 | require.NoError(t, err) 28 | 29 | histogram := db.buildHistogram(nil) 30 | keyHistogram := histogram.keySizeHistogram 31 | valueHistogram := histogram.valueSizeHistogram 32 | 33 | require.Equal(t, entries, keyHistogram.totalCount) 34 | require.Equal(t, entries, valueHistogram.totalCount) 35 | 36 | // Each entry is of size one. So the sum of sizes should be the same 37 | // as number of entries 38 | require.Equal(t, entries, valueHistogram.sum) 39 | require.Equal(t, entries, keyHistogram.sum) 40 | 41 | // All value sizes are same. The first bin should have all the values. 42 | require.Equal(t, entries, valueHistogram.countPerBin[0]) 43 | require.Equal(t, entries, keyHistogram.countPerBin[0]) 44 | 45 | require.Equal(t, int64(1), keyHistogram.max) 46 | require.Equal(t, int64(1), keyHistogram.min) 47 | require.Equal(t, int64(1), valueHistogram.max) 48 | require.Equal(t, int64(1), valueHistogram.min) 49 | }) 50 | }) 51 | 52 | t.Run("different size key-values", func(t *testing.T) { 53 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 54 | entries := int64(3) 55 | err := db.Update(func(txn *Txn) error { 56 | if err := txn.SetEntry(NewEntry([]byte("A"), []byte("B"))); err != nil { 57 | return err 58 | } 59 | 60 | if err := txn.SetEntry(NewEntry([]byte("AA"), []byte("BB"))); err != nil { 61 | return err 62 | } 63 | 64 | return txn.SetEntry(NewEntry([]byte("AAA"), []byte("BBB"))) 65 | }) 66 | require.NoError(t, err) 67 | 68 | histogram := db.buildHistogram(nil) 69 | keyHistogram := histogram.keySizeHistogram 70 | valueHistogram := histogram.valueSizeHistogram 71 | 72 | require.Equal(t, entries, keyHistogram.totalCount) 73 | require.Equal(t, entries, valueHistogram.totalCount) 74 | 75 | // Each entry is of size one. So the sum of sizes should be the same 76 | // as number of entries 77 | require.Equal(t, int64(6), valueHistogram.sum) 78 | require.Equal(t, int64(6), keyHistogram.sum) 79 | 80 | // Length 1 key is in first bucket, length 2 and 3 are in the second 81 | // bucket 82 | require.Equal(t, int64(1), valueHistogram.countPerBin[0]) 83 | require.Equal(t, int64(2), valueHistogram.countPerBin[1]) 84 | require.Equal(t, int64(1), keyHistogram.countPerBin[0]) 85 | require.Equal(t, int64(2), keyHistogram.countPerBin[1]) 86 | 87 | require.Equal(t, int64(3), keyHistogram.max) 88 | require.Equal(t, int64(1), keyHistogram.min) 89 | require.Equal(t, int64(3), valueHistogram.max) 90 | require.Equal(t, int64(1), valueHistogram.min) 91 | }) 92 | }) 93 | } 94 | -------------------------------------------------------------------------------- /images/benchmarks-rocksdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypermodeinc/badger/98a3f1ef7de9558b84e368cb5684fad67492aa8b/images/benchmarks-rocksdb.png -------------------------------------------------------------------------------- /images/diggy-shadow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypermodeinc/badger/98a3f1ef7de9558b84e368cb5684fad67492aa8b/images/diggy-shadow.png -------------------------------------------------------------------------------- /integration/testgc/.gitignore: -------------------------------------------------------------------------------- 1 | /testgc 2 | -------------------------------------------------------------------------------- /integration/testgc/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package main 7 | 8 | import ( 9 | "encoding/binary" 10 | "fmt" 11 | "log" 12 | "math/rand" 13 | "net/http" 14 | _ "net/http/pprof" //nolint:gosec 15 | "os" 16 | "sync" 17 | "sync/atomic" 18 | "time" 19 | 20 | "github.com/dgraph-io/badger/v4" 21 | "github.com/dgraph-io/badger/v4/y" 22 | "github.com/dgraph-io/ristretto/v2/z" 23 | ) 24 | 25 | var maxValue int64 = 10000000 26 | var suffix = make([]byte, 128) 27 | 28 | type testSuite struct { 29 | sync.Mutex 30 | vals map[uint64]uint64 31 | 32 | count atomic.Uint64 // Not under mutex lock. 33 | } 34 | 35 | func encoded(i uint64) []byte { 36 | out := make([]byte, 8) 37 | binary.BigEndian.PutUint64(out, i) 38 | return out 39 | } 40 | 41 | func (s *testSuite) write(db *badger.DB) error { 42 | return db.Update(func(txn *badger.Txn) error { 43 | for i := 0; i < 10; i++ { 44 | // These keys would be overwritten. 45 | keyi := uint64(rand.Int63n(maxValue)) 46 | key := encoded(keyi) 47 | vali := s.count.Add(1) 48 | val := encoded(vali) 49 | val = append(val, suffix...) 50 | if err := txn.SetEntry(badger.NewEntry(key, val)); err != nil { 51 | return err 52 | } 53 | } 54 | for i := 0; i < 20; i++ { 55 | // These keys would be new and never overwritten. 56 | keyi := s.count.Add(1) 57 | if keyi%1000000 == 0 { 58 | log.Printf("Count: %d\n", keyi) 59 | } 60 | key := encoded(keyi) 61 | val := append(key, suffix...) 62 | if err := txn.SetEntry(badger.NewEntry(key, val)); err != nil { 63 | return err 64 | } 65 | } 66 | return nil 67 | }) 68 | } 69 | 70 | func (s *testSuite) read(db *badger.DB) error { 71 | max := int64(s.count.Load()) 72 | keyi := uint64(rand.Int63n(max)) 73 | key := encoded(keyi) 74 | 75 | err := db.View(func(txn *badger.Txn) error { 76 | item, err := txn.Get(key) 77 | if err != nil { 78 | return err 79 | } 80 | val, err := item.ValueCopy(nil) 81 | if err != nil { 82 | return err 83 | } 84 | y.AssertTruef(len(val) == len(suffix)+8, "Found val of len: %d\n", len(val)) 85 | vali := binary.BigEndian.Uint64(val[0:8]) 86 | s.Lock() 87 | expected := s.vals[keyi] 88 | if vali < expected { 89 | log.Fatalf("Expected: %d. Found: %d. Key: %d\n", expected, vali, keyi) 90 | } else if vali == expected { 91 | // pass 92 | } else { 93 | s.vals[keyi] = vali 94 | } 95 | s.Unlock() 96 | return nil 97 | }) 98 | if err == badger.ErrKeyNotFound { 99 | return nil 100 | } 101 | return err 102 | } 103 | 104 | func main() { 105 | fmt.Println("Badger Integration test for value log GC.") 106 | 107 | dir := "/mnt/drive/badgertest" 108 | os.RemoveAll(dir) 109 | 110 | db, err := badger.Open(badger.DefaultOptions(dir). 111 | WithSyncWrites(false)) 112 | if err != nil { 113 | log.Fatal(err) 114 | } 115 | defer db.Close() 116 | 117 | go func() { 118 | _ = http.ListenAndServe("localhost:8080", nil) 119 | }() 120 | 121 | closer := z.NewCloser(11) 122 | go func() { 123 | // Run value log GC. 124 | defer closer.Done() 125 | var count int 126 | ticker := time.NewTicker(5 * time.Second) 127 | defer ticker.Stop() 128 | for range ticker.C { 129 | again: 130 | select { 131 | case <-closer.HasBeenClosed(): 132 | log.Printf("Num times value log GC was successful: %d\n", count) 133 | return 134 | default: 135 | } 136 | log.Printf("Starting a value log GC") 137 | err := db.RunValueLogGC(0.1) 138 | log.Printf("Result of value log GC: %v\n", err) 139 | if err == nil { 140 | count++ 141 | goto again 142 | } 143 | } 144 | }() 145 | 146 | s := testSuite{vals: make(map[uint64]uint64)} 147 | s.count.Store(uint64(maxValue)) 148 | var numLoops atomic.Uint64 149 | ticker := time.NewTicker(5 * time.Second) 150 | for i := 0; i < 10; i++ { 151 | go func() { 152 | defer closer.Done() 153 | for { 154 | if err := s.write(db); err != nil { 155 | log.Fatal(err) 156 | } 157 | for j := 0; j < 10; j++ { 158 | if err := s.read(db); err != nil { 159 | log.Fatal(err) 160 | } 161 | } 162 | nl := numLoops.Add(1) 163 | select { 164 | case <-closer.HasBeenClosed(): 165 | return 166 | case <-ticker.C: 167 | log.Printf("Num loops: %d\n", nl) 168 | default: 169 | } 170 | } 171 | }() 172 | } 173 | time.Sleep(5 * time.Minute) 174 | log.Println("Signaling...") 175 | closer.SignalAndWait() 176 | log.Println("Wait done. Now iterating over everything.") 177 | 178 | err = db.View(func(txn *badger.Txn) error { 179 | iopts := badger.DefaultIteratorOptions 180 | itr := txn.NewIterator(iopts) 181 | defer itr.Close() 182 | 183 | var total, tested int 184 | for itr.Rewind(); itr.Valid(); itr.Next() { 185 | item := itr.Item() 186 | key := item.Key() 187 | keyi := binary.BigEndian.Uint64(key) 188 | total++ 189 | 190 | val, err := item.ValueCopy(nil) 191 | if err != nil { 192 | return err 193 | } 194 | if len(val) < 8 { 195 | log.Printf("Unexpected value: %x\n", val) 196 | continue 197 | } 198 | vali := binary.BigEndian.Uint64(val[0:8]) 199 | 200 | expected, ok := s.vals[keyi] // Not all keys must be in vals map. 201 | if ok { 202 | tested++ 203 | if vali < expected { 204 | // vali must be equal or greater than what's in the map. 205 | log.Fatalf("Expected: %d. Got: %d. Key: %d\n", expected, vali, keyi) 206 | } 207 | } 208 | } 209 | log.Printf("Total iterated: %d. Tested values: %d\n", total, tested) 210 | return nil 211 | }) 212 | if err != nil { 213 | log.Fatalf("Error while iterating: %v", err) 214 | } 215 | log.Println("Iteration done. Test successful.") 216 | time.Sleep(time.Minute) // Time to do some poking around. 217 | } 218 | -------------------------------------------------------------------------------- /key_registry_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "math/rand" 10 | "os" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func getRegistryTestOptions(dir string, key []byte) KeyRegistryOptions { 17 | return KeyRegistryOptions{ 18 | Dir: dir, 19 | EncryptionKey: key, 20 | ReadOnly: false, 21 | } 22 | } 23 | func TestBuildRegistry(t *testing.T) { 24 | encryptionKey := make([]byte, 32) 25 | dir, err := os.MkdirTemp("", "badger-test") 26 | require.NoError(t, err) 27 | defer removeDir(dir) 28 | 29 | _, err = rand.Read(encryptionKey) 30 | require.NoError(t, err) 31 | opt := getRegistryTestOptions(dir, encryptionKey) 32 | 33 | kr, err := OpenKeyRegistry(opt) 34 | require.NoError(t, err) 35 | dk, err := kr.LatestDataKey() 36 | require.NoError(t, err) 37 | // We're resetting the last created timestamp. So, it creates 38 | // new datakey. 39 | kr.lastCreated = 0 40 | dk1, err := kr.LatestDataKey() 41 | // We generated two key. So, checking the length. 42 | require.Equal(t, 2, len(kr.dataKeys)) 43 | require.NoError(t, err) 44 | require.NoError(t, kr.Close()) 45 | 46 | kr2, err := OpenKeyRegistry(opt) 47 | require.NoError(t, err) 48 | require.Equal(t, 2, len(kr2.dataKeys)) 49 | // Asserting the correctness of the datakey after opening the registry. 50 | require.Equal(t, dk.Data, kr.dataKeys[dk.KeyId].Data) 51 | require.Equal(t, dk1.Data, kr.dataKeys[dk1.KeyId].Data) 52 | require.NoError(t, kr2.Close()) 53 | } 54 | 55 | func TestRewriteRegistry(t *testing.T) { 56 | encryptionKey := make([]byte, 32) 57 | dir, err := os.MkdirTemp("", "badger-test") 58 | require.NoError(t, err) 59 | defer removeDir(dir) 60 | _, err = rand.Read(encryptionKey) 61 | require.NoError(t, err) 62 | opt := getRegistryTestOptions(dir, encryptionKey) 63 | kr, err := OpenKeyRegistry(opt) 64 | require.NoError(t, err) 65 | _, err = kr.LatestDataKey() 66 | require.NoError(t, err) 67 | // We're resetting the last created timestamp. So, it creates 68 | // new datakey. 69 | kr.lastCreated = 0 70 | _, err = kr.LatestDataKey() 71 | require.NoError(t, err) 72 | require.NoError(t, kr.Close()) 73 | delete(kr.dataKeys, 1) 74 | require.NoError(t, WriteKeyRegistry(kr, opt)) 75 | kr2, err := OpenKeyRegistry(opt) 76 | require.NoError(t, err) 77 | require.Equal(t, 1, len(kr2.dataKeys)) 78 | require.NoError(t, kr2.Close()) 79 | } 80 | 81 | func TestMismatch(t *testing.T) { 82 | encryptionKey := make([]byte, 32) 83 | dir, err := os.MkdirTemp("", "badger-test") 84 | require.NoError(t, err) 85 | defer removeDir(dir) 86 | _, err = rand.Read(encryptionKey) 87 | require.NoError(t, err) 88 | opt := getRegistryTestOptions(dir, encryptionKey) 89 | kr, err := OpenKeyRegistry(opt) 90 | require.NoError(t, err) 91 | require.NoError(t, kr.Close()) 92 | // Opening with the same key and asserting. 93 | kr, err = OpenKeyRegistry(opt) 94 | require.NoError(t, err) 95 | require.NoError(t, kr.Close()) 96 | // Opening with the invalid key and asserting. 97 | encryptionKey = make([]byte, 32) 98 | _, err = rand.Read(encryptionKey) 99 | require.NoError(t, err) 100 | opt.EncryptionKey = encryptionKey 101 | _, err = OpenKeyRegistry(opt) 102 | require.Error(t, err) 103 | require.EqualError(t, err, ErrEncryptionKeyMismatch.Error()) 104 | } 105 | 106 | func TestEncryptionAndDecryption(t *testing.T) { 107 | encryptionKey := make([]byte, 32) 108 | dir, err := os.MkdirTemp("", "badger-test") 109 | require.NoError(t, err) 110 | defer removeDir(dir) 111 | _, err = rand.Read(encryptionKey) 112 | require.NoError(t, err) 113 | opt := getRegistryTestOptions(dir, encryptionKey) 114 | kr, err := OpenKeyRegistry(opt) 115 | require.NoError(t, err) 116 | dk, err := kr.LatestDataKey() 117 | require.NoError(t, err) 118 | require.NoError(t, kr.Close()) 119 | // Checking the correctness of the datakey after closing and 120 | // opening the key registry. 121 | kr, err = OpenKeyRegistry(opt) 122 | require.NoError(t, err) 123 | dk1, err := kr.DataKey(dk.GetKeyId()) 124 | require.NoError(t, err) 125 | require.Equal(t, dk.Data, dk1.Data) 126 | require.NoError(t, kr.Close()) 127 | } 128 | 129 | func TestKeyRegistryInMemory(t *testing.T) { 130 | encryptionKey := make([]byte, 32) 131 | _, err := rand.Read(encryptionKey) 132 | require.NoError(t, err) 133 | 134 | opt := getRegistryTestOptions("", encryptionKey) 135 | opt.InMemory = true 136 | 137 | kr, err := OpenKeyRegistry(opt) 138 | require.NoError(t, err) 139 | _, err = kr.LatestDataKey() 140 | require.NoError(t, err) 141 | // We're resetting the last created timestamp. So, it creates 142 | // new datakey. 143 | kr.lastCreated = 0 144 | _, err = kr.LatestDataKey() 145 | // We generated two key. So, checking the length. 146 | require.Equal(t, 2, len(kr.dataKeys)) 147 | require.NoError(t, err) 148 | require.NoError(t, kr.Close()) 149 | } 150 | -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "log" 10 | "os" 11 | ) 12 | 13 | // Logger is implemented by any logging system that is used for standard logs. 14 | type Logger interface { 15 | Errorf(string, ...interface{}) 16 | Warningf(string, ...interface{}) 17 | Infof(string, ...interface{}) 18 | Debugf(string, ...interface{}) 19 | } 20 | 21 | // Errorf logs an ERROR log message to the logger specified in opts or to the 22 | // global logger if no logger is specified in opts. 23 | func (opt *Options) Errorf(format string, v ...interface{}) { 24 | if opt.Logger == nil { 25 | return 26 | } 27 | opt.Logger.Errorf(format, v...) 28 | } 29 | 30 | // Infof logs an INFO message to the logger specified in opts. 31 | func (opt *Options) Infof(format string, v ...interface{}) { 32 | if opt.Logger == nil { 33 | return 34 | } 35 | opt.Logger.Infof(format, v...) 36 | } 37 | 38 | // Warningf logs a WARNING message to the logger specified in opts. 39 | func (opt *Options) Warningf(format string, v ...interface{}) { 40 | if opt.Logger == nil { 41 | return 42 | } 43 | opt.Logger.Warningf(format, v...) 44 | } 45 | 46 | // Debugf logs a DEBUG message to the logger specified in opts. 47 | func (opt *Options) Debugf(format string, v ...interface{}) { 48 | if opt.Logger == nil { 49 | return 50 | } 51 | opt.Logger.Debugf(format, v...) 52 | } 53 | 54 | type loggingLevel int 55 | 56 | const ( 57 | DEBUG loggingLevel = iota 58 | INFO 59 | WARNING 60 | ERROR 61 | ) 62 | 63 | type defaultLog struct { 64 | *log.Logger 65 | level loggingLevel 66 | } 67 | 68 | func defaultLogger(level loggingLevel) *defaultLog { 69 | return &defaultLog{Logger: log.New(os.Stderr, "badger ", log.LstdFlags), level: level} 70 | } 71 | 72 | func (l *defaultLog) Errorf(f string, v ...interface{}) { 73 | if l.level <= ERROR { 74 | l.Printf("ERROR: "+f, v...) 75 | } 76 | } 77 | 78 | func (l *defaultLog) Warningf(f string, v ...interface{}) { 79 | if l.level <= WARNING { 80 | l.Printf("WARNING: "+f, v...) 81 | } 82 | } 83 | 84 | func (l *defaultLog) Infof(f string, v ...interface{}) { 85 | if l.level <= INFO { 86 | l.Printf("INFO: "+f, v...) 87 | } 88 | } 89 | 90 | func (l *defaultLog) Debugf(f string, v ...interface{}) { 91 | if l.level <= DEBUG { 92 | l.Printf("DEBUG: "+f, v...) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /logger_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "fmt" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | type mockLogger struct { 16 | output string 17 | } 18 | 19 | func (l *mockLogger) Errorf(f string, v ...interface{}) { 20 | l.output = fmt.Sprintf("ERROR: "+f, v...) 21 | } 22 | 23 | func (l *mockLogger) Infof(f string, v ...interface{}) { 24 | l.output = fmt.Sprintf("INFO: "+f, v...) 25 | } 26 | 27 | func (l *mockLogger) Warningf(f string, v ...interface{}) { 28 | l.output = fmt.Sprintf("WARNING: "+f, v...) 29 | } 30 | 31 | func (l *mockLogger) Debugf(f string, v ...interface{}) { 32 | l.output = fmt.Sprintf("DEBUG: "+f, v...) 33 | } 34 | 35 | // Test that the DB-specific log is used instead of the global log. 36 | func TestDbLog(t *testing.T) { 37 | l := &mockLogger{} 38 | opt := Options{Logger: l} 39 | 40 | opt.Errorf("test") 41 | require.Equal(t, "ERROR: test", l.output) 42 | opt.Infof("test") 43 | require.Equal(t, "INFO: test", l.output) 44 | opt.Warningf("test") 45 | require.Equal(t, "WARNING: test", l.output) 46 | } 47 | 48 | // Test that the global logger is used when no logger is specified in Options. 49 | func TestNoDbLog(t *testing.T) { 50 | l := &mockLogger{} 51 | opt := Options{} 52 | opt.Logger = l 53 | 54 | opt.Errorf("test") 55 | require.Equal(t, "ERROR: test", l.output) 56 | opt.Infof("test") 57 | require.Equal(t, "INFO: test", l.output) 58 | opt.Warningf("test") 59 | require.Equal(t, "WARNING: test", l.output) 60 | } 61 | -------------------------------------------------------------------------------- /managed_db.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | // OpenManaged returns a new DB, which allows more control over setting 9 | // transaction timestamps, aka managed mode. 10 | // 11 | // This is only useful for databases built on top of Badger (like Dgraph), and 12 | // can be ignored by most users. 13 | func OpenManaged(opts Options) (*DB, error) { 14 | opts.managedTxns = true 15 | return Open(opts) 16 | } 17 | 18 | // NewTransactionAt follows the same logic as DB.NewTransaction(), but uses the 19 | // provided read timestamp. 20 | // 21 | // This is only useful for databases built on top of Badger (like Dgraph), and 22 | // can be ignored by most users. 23 | func (db *DB) NewTransactionAt(readTs uint64, update bool) *Txn { 24 | if !db.opt.managedTxns { 25 | panic("Cannot use NewTransactionAt with managedDB=false. Use NewTransaction instead.") 26 | } 27 | txn := db.newTransaction(update, true) 28 | txn.readTs = readTs 29 | return txn 30 | } 31 | 32 | // NewWriteBatchAt is similar to NewWriteBatch but it allows user to set the commit timestamp. 33 | // NewWriteBatchAt is supposed to be used only in the managed mode. 34 | func (db *DB) NewWriteBatchAt(commitTs uint64) *WriteBatch { 35 | if !db.opt.managedTxns { 36 | panic("cannot use NewWriteBatchAt with managedDB=false. Use NewWriteBatch instead") 37 | } 38 | 39 | wb := db.newWriteBatch(true) 40 | wb.commitTs = commitTs 41 | wb.txn.commitTs = commitTs 42 | return wb 43 | } 44 | func (db *DB) NewManagedWriteBatch() *WriteBatch { 45 | if !db.opt.managedTxns { 46 | panic("cannot use NewManagedWriteBatch with managedDB=false. Use NewWriteBatch instead") 47 | } 48 | 49 | wb := db.newWriteBatch(true) 50 | return wb 51 | } 52 | 53 | // CommitAt commits the transaction, following the same logic as Commit(), but 54 | // at the given commit timestamp. This will panic if not used with managed transactions. 55 | // 56 | // This is only useful for databases built on top of Badger (like Dgraph), and 57 | // can be ignored by most users. 58 | func (txn *Txn) CommitAt(commitTs uint64, callback func(error)) error { 59 | if !txn.db.opt.managedTxns { 60 | panic("Cannot use CommitAt with managedDB=false. Use Commit instead.") 61 | } 62 | txn.commitTs = commitTs 63 | if callback == nil { 64 | return txn.Commit() 65 | } 66 | txn.CommitWith(callback) 67 | return nil 68 | } 69 | 70 | // SetDiscardTs sets a timestamp at or below which, any invalid or deleted 71 | // versions can be discarded from the LSM tree, and thence from the value log to 72 | // reclaim disk space. Can only be used with managed transactions. 73 | func (db *DB) SetDiscardTs(ts uint64) { 74 | if !db.opt.managedTxns { 75 | panic("Cannot use SetDiscardTs with managedDB=false.") 76 | } 77 | db.orc.setDiscardTs(ts) 78 | } 79 | -------------------------------------------------------------------------------- /merge.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | stderrors "errors" 10 | "sync" 11 | "time" 12 | 13 | "github.com/dgraph-io/badger/v4/y" 14 | "github.com/dgraph-io/ristretto/v2/z" 15 | ) 16 | 17 | // MergeOperator represents a Badger merge operator. 18 | type MergeOperator struct { 19 | sync.RWMutex 20 | f MergeFunc 21 | db *DB 22 | key []byte 23 | closer *z.Closer 24 | } 25 | 26 | // MergeFunc accepts two byte slices, one representing an existing value, and 27 | // another representing a new value that needs to be ‘merged’ into it. MergeFunc 28 | // contains the logic to perform the ‘merge’ and return an updated value. 29 | // MergeFunc could perform operations like integer addition, list appends etc. 30 | // Note that the ordering of the operands is maintained. 31 | type MergeFunc func(existingVal, newVal []byte) []byte 32 | 33 | // GetMergeOperator creates a new MergeOperator for a given key and returns a 34 | // pointer to it. It also fires off a goroutine that performs a compaction using 35 | // the merge function that runs periodically, as specified by dur. 36 | func (db *DB) GetMergeOperator(key []byte, 37 | f MergeFunc, dur time.Duration) *MergeOperator { 38 | op := &MergeOperator{ 39 | f: f, 40 | db: db, 41 | key: key, 42 | closer: z.NewCloser(1), 43 | } 44 | 45 | go op.runCompactions(dur) 46 | return op 47 | } 48 | 49 | var errNoMerge = stderrors.New("No need for merge") 50 | 51 | func (op *MergeOperator) iterateAndMerge() (newVal []byte, latest uint64, err error) { 52 | txn := op.db.NewTransaction(false) 53 | defer txn.Discard() 54 | opt := DefaultIteratorOptions 55 | opt.AllVersions = true 56 | it := txn.NewKeyIterator(op.key, opt) 57 | defer it.Close() 58 | 59 | var numVersions int 60 | for it.Rewind(); it.Valid(); it.Next() { 61 | item := it.Item() 62 | if item.IsDeletedOrExpired() { 63 | break 64 | } 65 | numVersions++ 66 | if numVersions == 1 { 67 | // This should be the newVal, considering this is the latest version. 68 | newVal, err = item.ValueCopy(newVal) 69 | if err != nil { 70 | return nil, 0, err 71 | } 72 | latest = item.Version() 73 | } else { 74 | if err := item.Value(func(oldVal []byte) error { 75 | // The merge should always be on the newVal considering it has the merge result of 76 | // the latest version. The value read should be the oldVal. 77 | newVal = op.f(oldVal, newVal) 78 | return nil 79 | }); err != nil { 80 | return nil, 0, err 81 | } 82 | } 83 | if item.DiscardEarlierVersions() { 84 | break 85 | } 86 | } 87 | if numVersions == 0 { 88 | return nil, latest, ErrKeyNotFound 89 | } else if numVersions == 1 { 90 | return newVal, latest, errNoMerge 91 | } 92 | return newVal, latest, nil 93 | } 94 | 95 | func (op *MergeOperator) compact() error { 96 | op.Lock() 97 | defer op.Unlock() 98 | val, version, err := op.iterateAndMerge() 99 | if err == ErrKeyNotFound || err == errNoMerge { 100 | return nil 101 | } else if err != nil { 102 | return err 103 | } 104 | entries := []*Entry{ 105 | { 106 | Key: y.KeyWithTs(op.key, version), 107 | Value: val, 108 | meta: bitDiscardEarlierVersions, 109 | }, 110 | } 111 | // Write value back to the DB. It is important that we do not set the bitMergeEntry bit 112 | // here. When compaction happens, all the older merged entries will be removed. 113 | return op.db.batchSetAsync(entries, func(err error) { 114 | if err != nil { 115 | op.db.opt.Errorf("failed to insert the result of merge compaction: %s", err) 116 | } 117 | }) 118 | } 119 | 120 | func (op *MergeOperator) runCompactions(dur time.Duration) { 121 | ticker := time.NewTicker(dur) 122 | defer op.closer.Done() 123 | var stop bool 124 | for { 125 | select { 126 | case <-op.closer.HasBeenClosed(): 127 | stop = true 128 | case <-ticker.C: // wait for tick 129 | } 130 | if err := op.compact(); err != nil { 131 | op.db.opt.Errorf("failure while running merge operation: %s", err) 132 | } 133 | if stop { 134 | ticker.Stop() 135 | break 136 | } 137 | } 138 | } 139 | 140 | // Add records a value in Badger which will eventually be merged by a background 141 | // routine into the values that were recorded by previous invocations to Add(). 142 | func (op *MergeOperator) Add(val []byte) error { 143 | return op.db.Update(func(txn *Txn) error { 144 | return txn.SetEntry(NewEntry(op.key, val).withMergeBit()) 145 | }) 146 | } 147 | 148 | // Get returns the latest value for the merge operator, which is derived by 149 | // applying the merge function to all the values added so far. 150 | // 151 | // If Add has not been called even once, Get will return ErrKeyNotFound. 152 | func (op *MergeOperator) Get() ([]byte, error) { 153 | op.RLock() 154 | defer op.RUnlock() 155 | var existing []byte 156 | err := op.db.View(func(txn *Txn) (err error) { 157 | existing, _, err = op.iterateAndMerge() 158 | return err 159 | }) 160 | if err == errNoMerge { 161 | return existing, nil 162 | } 163 | return existing, err 164 | } 165 | 166 | // Stop waits for any pending merge to complete and then stops the background 167 | // goroutine. 168 | func (op *MergeOperator) Stop() { 169 | op.closer.SignalAndWait() 170 | } 171 | -------------------------------------------------------------------------------- /merge_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "encoding/binary" 10 | "os" 11 | "testing" 12 | "time" 13 | 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | func TestGetMergeOperator(t *testing.T) { 18 | t.Run("Get before Add", func(t *testing.T) { 19 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 20 | m := db.GetMergeOperator([]byte("merge"), add, 200*time.Millisecond) 21 | defer m.Stop() 22 | 23 | val, err := m.Get() 24 | require.Equal(t, ErrKeyNotFound, err) 25 | require.Nil(t, val) 26 | }) 27 | }) 28 | t.Run("Add and Get", func(t *testing.T) { 29 | key := []byte("merge") 30 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 31 | m := db.GetMergeOperator(key, add, 200*time.Millisecond) 32 | defer m.Stop() 33 | 34 | require.NoError(t, m.Add(uint64ToBytes(1))) 35 | require.NoError(t, m.Add(uint64ToBytes(2))) 36 | require.NoError(t, m.Add(uint64ToBytes(3))) 37 | 38 | res, err := m.Get() 39 | require.NoError(t, err) 40 | require.Equal(t, uint64(6), bytesToUint64(res)) 41 | }) 42 | 43 | }) 44 | t.Run("Add and Get slices", func(t *testing.T) { 45 | // Merge function to merge two byte slices 46 | add := func(originalValue, newValue []byte) []byte { 47 | return append(originalValue, newValue...) 48 | } 49 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 50 | m := db.GetMergeOperator([]byte("fooprefix"), add, 2*time.Millisecond) 51 | defer m.Stop() 52 | 53 | require.NoError(t, m.Add([]byte("A"))) 54 | require.NoError(t, m.Add([]byte("B"))) 55 | require.NoError(t, m.Add([]byte("C"))) 56 | 57 | value, err := m.Get() 58 | require.NoError(t, err) 59 | require.Equal(t, "ABC", string(value)) 60 | }) 61 | }) 62 | t.Run("Get Before Compact", func(t *testing.T) { 63 | key := []byte("merge") 64 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 65 | m := db.GetMergeOperator(key, add, 500*time.Millisecond) 66 | defer m.Stop() 67 | 68 | require.NoError(t, m.Add(uint64ToBytes(1))) 69 | require.NoError(t, m.Add(uint64ToBytes(2))) 70 | require.NoError(t, m.Add(uint64ToBytes(3))) 71 | 72 | res, err := m.Get() 73 | require.NoError(t, err) 74 | require.Equal(t, uint64(6), bytesToUint64(res)) 75 | }) 76 | }) 77 | 78 | t.Run("Get after Delete", func(t *testing.T) { 79 | key := []byte("merge") 80 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 81 | m := db.GetMergeOperator(key, add, 200*time.Millisecond) 82 | 83 | require.NoError(t, m.Add(uint64ToBytes(1))) 84 | require.NoError(t, m.Add(uint64ToBytes(2))) 85 | require.NoError(t, m.Add(uint64ToBytes(3))) 86 | 87 | m.Stop() 88 | res, err := m.Get() 89 | require.NoError(t, err) 90 | require.Equal(t, uint64(6), bytesToUint64(res)) 91 | 92 | require.NoError(t, db.Update(func(txn *Txn) error { 93 | return txn.Delete(key) 94 | })) 95 | 96 | m = db.GetMergeOperator(key, add, 200*time.Millisecond) 97 | require.NoError(t, m.Add(uint64ToBytes(1))) 98 | m.Stop() 99 | 100 | res, err = m.Get() 101 | require.NoError(t, err) 102 | require.Equal(t, uint64(1), bytesToUint64(res)) 103 | }) 104 | }) 105 | 106 | t.Run("Get after Stop", func(t *testing.T) { 107 | key := []byte("merge") 108 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 109 | m := db.GetMergeOperator(key, add, 1*time.Second) 110 | 111 | require.NoError(t, m.Add(uint64ToBytes(1))) 112 | require.NoError(t, m.Add(uint64ToBytes(2))) 113 | require.NoError(t, m.Add(uint64ToBytes(3))) 114 | 115 | m.Stop() 116 | res, err := m.Get() 117 | require.NoError(t, err) 118 | require.Equal(t, uint64(6), bytesToUint64(res)) 119 | }) 120 | }) 121 | t.Run("Old keys should be removed after compaction", func(t *testing.T) { 122 | dir, err := os.MkdirTemp("", "badger-test") 123 | require.NoError(t, err) 124 | defer removeDir(dir) 125 | 126 | // This test relies on CompactL0OnClose 127 | opts := getTestOptions(dir).WithCompactL0OnClose(true) 128 | db, err := Open(opts) 129 | require.NoError(t, err) 130 | mergeKey := []byte("foo") 131 | m := db.GetMergeOperator(mergeKey, add, 2*time.Millisecond) 132 | 133 | count := 5000 // This will cause compaction from L0->L1 134 | for i := 0; i < count; i++ { 135 | require.NoError(t, m.Add(uint64ToBytes(1))) 136 | } 137 | value, err := m.Get() 138 | require.Nil(t, err) 139 | require.Equal(t, uint64(count), bytesToUint64(value)) 140 | m.Stop() 141 | 142 | // Force compaction by closing DB. The compaction should discard all the old merged values 143 | require.Nil(t, db.Close()) 144 | db, err = Open(opts) 145 | require.NoError(t, err) 146 | defer db.Close() 147 | 148 | keyCount := 0 149 | txn := db.NewTransaction(false) 150 | defer txn.Discard() 151 | iopt := DefaultIteratorOptions 152 | iopt.AllVersions = true 153 | it := txn.NewKeyIterator(mergeKey, iopt) 154 | defer it.Close() 155 | for it.Rewind(); it.Valid(); it.Next() { 156 | keyCount++ 157 | } 158 | // We should have only one key in badger. All the other keys should've been removed by 159 | // compaction 160 | require.Equal(t, 1, keyCount) 161 | }) 162 | 163 | } 164 | 165 | func uint64ToBytes(i uint64) []byte { 166 | var buf [8]byte 167 | binary.BigEndian.PutUint64(buf[:], i) 168 | return buf[:] 169 | } 170 | 171 | func bytesToUint64(b []byte) uint64 { 172 | return binary.BigEndian.Uint64(b) 173 | } 174 | 175 | // Merge function to add two uint64 numbers 176 | func add(existing, latest []byte) []byte { 177 | return uint64ToBytes(bytesToUint64(existing) + bytesToUint64(latest)) 178 | } 179 | -------------------------------------------------------------------------------- /metrics_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "expvar" 10 | "math/rand" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func clearAllMetrics() { 17 | expvar.Do(func(kv expvar.KeyValue) { 18 | // Reset the value of each expvar variable based on its type 19 | switch v := kv.Value.(type) { 20 | case *expvar.Int: 21 | v.Set(0) 22 | case *expvar.Float: 23 | v.Set(0) 24 | case *expvar.Map: 25 | v.Init() 26 | case *expvar.String: 27 | v.Set("") 28 | } 29 | }) 30 | } 31 | 32 | func TestWriteMetrics(t *testing.T) { 33 | opt := getTestOptions("") 34 | opt.managedTxns = true 35 | opt.CompactL0OnClose = true 36 | runBadgerTest(t, &opt, func(t *testing.T, db *DB) { 37 | clearAllMetrics() 38 | num := 10 39 | val := make([]byte, 1<<12) 40 | key := make([]byte, 40) 41 | for i := 0; i < num; i++ { 42 | _, err := rand.Read(key) 43 | require.NoError(t, err) 44 | _, err = rand.Read(val) 45 | require.NoError(t, err) 46 | 47 | writer := db.NewManagedWriteBatch() 48 | require.NoError(t, writer.SetEntryAt(NewEntry(key, val), 1)) 49 | writer.Flush() 50 | } 51 | 52 | expectedSize := int64(len(val)) + 48 + 2 // 48 := size of key (40 + 8(ts)), 2 := meta 53 | write_metric := expvar.Get("badger_write_bytes_user") 54 | require.Equal(t, expectedSize*int64(num), write_metric.(*expvar.Int).Value()) 55 | 56 | put_metric := expvar.Get("badger_put_num_user") 57 | require.Equal(t, int64(num), put_metric.(*expvar.Int).Value()) 58 | 59 | lsm_metric := expvar.Get("badger_write_bytes_l0") 60 | require.Equal(t, expectedSize*int64(num), lsm_metric.(*expvar.Int).Value()) 61 | 62 | compactionMetric := expvar.Get("badger_write_bytes_compaction").(*expvar.Map) 63 | require.Equal(t, nil, compactionMetric.Get("l6")) 64 | 65 | // Force compaction 66 | db.Close() 67 | 68 | _, err := OpenManaged(opt) 69 | require.NoError(t, err) 70 | 71 | compactionMetric = expvar.Get("badger_write_bytes_compaction").(*expvar.Map) 72 | require.GreaterOrEqual(t, expectedSize*int64(num)+int64(num*200), compactionMetric.Get("l6").(*expvar.Int).Value()) 73 | // Because we have random values, compression is not able to do much, so we incur a cost on total size 74 | }) 75 | } 76 | 77 | func TestVlogMetrics(t *testing.T) { 78 | opt := getTestOptions("") 79 | opt.managedTxns = true 80 | opt.CompactL0OnClose = true 81 | runBadgerTest(t, &opt, func(t *testing.T, db *DB) { 82 | clearAllMetrics() 83 | num := 10 84 | val := make([]byte, 1<<20) // Large Value 85 | key := make([]byte, 40) 86 | for i := 0; i < num; i++ { 87 | _, err := rand.Read(key) 88 | require.NoError(t, err) 89 | _, err = rand.Read(val) 90 | require.NoError(t, err) 91 | 92 | writer := db.NewManagedWriteBatch() 93 | require.NoError(t, writer.SetEntryAt(NewEntry(key, val), 1)) 94 | writer.Flush() 95 | } 96 | 97 | expectedSize := int64(len(val)) + 200 // vlog expected size 98 | 99 | totalWrites := expvar.Get("badger_write_num_vlog") 100 | require.Equal(t, int64(num), totalWrites.(*expvar.Int).Value()) 101 | 102 | bytesWritten := expvar.Get("badger_write_bytes_vlog") 103 | require.GreaterOrEqual(t, expectedSize*int64(num), bytesWritten.(*expvar.Int).Value()) 104 | 105 | txn := db.NewTransactionAt(2, false) 106 | item, err := txn.Get(key) 107 | require.NoError(t, err) 108 | require.Equal(t, uint64(1), item.Version()) 109 | 110 | err = item.Value(func(val []byte) error { 111 | totalReads := expvar.Get("badger_read_num_vlog") 112 | bytesRead := expvar.Get("badger_read_bytes_vlog") 113 | require.Equal(t, int64(1), totalReads.(*expvar.Int).Value()) 114 | require.GreaterOrEqual(t, expectedSize, bytesRead.(*expvar.Int).Value()) 115 | return nil 116 | }) 117 | 118 | require.NoError(t, err) 119 | }) 120 | } 121 | 122 | func TestReadMetrics(t *testing.T) { 123 | opt := getTestOptions("") 124 | opt.managedTxns = true 125 | opt.CompactL0OnClose = true 126 | runBadgerTest(t, &opt, func(t *testing.T, db *DB) { 127 | clearAllMetrics() 128 | num := 10 129 | val := make([]byte, 1<<15) 130 | keys := [][]byte{} 131 | writer := db.NewManagedWriteBatch() 132 | for i := 0; i < num; i++ { 133 | keyB := key("byte", 1) 134 | keys = append(keys, []byte(keyB)) 135 | 136 | _, err := rand.Read(val) 137 | require.NoError(t, err) 138 | 139 | require.NoError(t, writer.SetEntryAt(NewEntry([]byte(keyB), val), 1)) 140 | } 141 | writer.Flush() 142 | 143 | txn := db.NewTransactionAt(2, false) 144 | item, err := txn.Get(keys[0]) 145 | require.NoError(t, err) 146 | require.Equal(t, uint64(1), item.Version()) 147 | 148 | totalGets := expvar.Get("badger_get_num_user") 149 | require.Equal(t, int64(1), totalGets.(*expvar.Int).Value()) 150 | 151 | totalMemtableReads := expvar.Get("badger_get_num_memtable") 152 | require.Equal(t, int64(1), totalMemtableReads.(*expvar.Int).Value()) 153 | 154 | totalLSMGets := expvar.Get("badger_get_num_lsm") 155 | require.Nil(t, totalLSMGets.(*expvar.Map).Get("l6")) 156 | 157 | // Force compaction 158 | db.Close() 159 | 160 | db, err = OpenManaged(opt) 161 | require.NoError(t, err) 162 | 163 | txn = db.NewTransactionAt(2, false) 164 | item, err = txn.Get(keys[0]) 165 | require.NoError(t, err) 166 | require.Equal(t, uint64(1), item.Version()) 167 | 168 | _, err = txn.Get([]byte(key("abdbyte", 1000))) // val should be far enough that bloom filter doesn't hit 169 | require.Error(t, err) 170 | 171 | totalLSMGets = expvar.Get("badger_get_num_lsm") 172 | require.Equal(t, int64(0x1), totalLSMGets.(*expvar.Map).Get("l6").(*expvar.Int).Value()) 173 | 174 | totalBloom := expvar.Get("badger_hit_num_lsm_bloom_filter") 175 | require.Equal(t, int64(0x1), totalBloom.(*expvar.Map).Get("l6").(*expvar.Int).Value()) 176 | require.Equal(t, int64(0x1), totalBloom.(*expvar.Map).Get("DoesNotHave_HIT").(*expvar.Int).Value()) 177 | require.Equal(t, int64(0x2), totalBloom.(*expvar.Map).Get("DoesNotHave_ALL").(*expvar.Int).Value()) 178 | 179 | bytesLSM := expvar.Get("badger_read_bytes_lsm") 180 | require.Equal(t, int64(len(val)), bytesLSM.(*expvar.Int).Value()) 181 | 182 | getWithResult := expvar.Get("badger_get_with_result_num_user") 183 | require.Equal(t, int64(2), getWithResult.(*expvar.Int).Value()) 184 | 185 | iterOpts := DefaultIteratorOptions 186 | iter := txn.NewKeyIterator(keys[0], iterOpts) 187 | iter.Seek(keys[0]) 188 | 189 | rangeQueries := expvar.Get("badger_iterator_num_user") 190 | require.Equal(t, int64(1), rangeQueries.(*expvar.Int).Value()) 191 | }) 192 | } 193 | -------------------------------------------------------------------------------- /options/options.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package options 7 | 8 | // ChecksumVerificationMode tells when should DB verify checksum for SSTable blocks. 9 | type ChecksumVerificationMode int 10 | 11 | const ( 12 | // NoVerification indicates DB should not verify checksum for SSTable blocks. 13 | NoVerification ChecksumVerificationMode = iota 14 | // OnTableRead indicates checksum should be verified while opening SSTtable. 15 | OnTableRead 16 | // OnBlockRead indicates checksum should be verified on every SSTable block read. 17 | OnBlockRead 18 | // OnTableAndBlockRead indicates checksum should be verified 19 | // on SSTable opening and on every block read. 20 | OnTableAndBlockRead 21 | ) 22 | 23 | // CompressionType specifies how a block should be compressed. 24 | type CompressionType uint32 25 | 26 | const ( 27 | // None mode indicates that a block is not compressed. 28 | None CompressionType = 0 29 | // Snappy mode indicates that a block is compressed using Snappy algorithm. 30 | Snappy CompressionType = 1 31 | // ZSTD mode indicates that a block is compressed using ZSTD algorithm. 32 | ZSTD CompressionType = 2 33 | ) 34 | -------------------------------------------------------------------------------- /options_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "reflect" 10 | "testing" 11 | 12 | "github.com/dgraph-io/badger/v4/options" 13 | ) 14 | 15 | func TestOptions(t *testing.T) { 16 | t.Run("default options", func(t *testing.T) { 17 | // copy all the default options over to a big SuperFlag string 18 | defaultSuperFlag := generateSuperFlag(DefaultOptions("")) 19 | // fill an empty Options with values from the SuperFlag 20 | generated := Options{}.FromSuperFlag(defaultSuperFlag) 21 | // make sure they're equal 22 | if !optionsEqual(DefaultOptions(""), generated) { 23 | t.Fatal("generated default SuperFlag != default Options") 24 | } 25 | // check that values are overwritten properly 26 | overwritten := DefaultOptions("").FromSuperFlag("numgoroutines=1234") 27 | if overwritten.NumGoroutines != 1234 { 28 | t.Fatal("Option value not overwritten by SuperFlag value") 29 | } 30 | }) 31 | 32 | t.Run("special flags", func(t *testing.T) { 33 | o1 := DefaultOptions("") 34 | o1.NamespaceOffset = 10 35 | o1.Compression = options.ZSTD 36 | o1.ZSTDCompressionLevel = 2 37 | o1.NumGoroutines = 20 38 | 39 | o2 := DefaultOptions("") 40 | o2.NamespaceOffset = 10 41 | o2 = o2.FromSuperFlag("compression=zstd:2; numgoroutines=20;") 42 | 43 | // make sure they're equal 44 | if !optionsEqual(o1, o2) { 45 | t.Fatal("generated superFlag != expected options") 46 | } 47 | }) 48 | } 49 | 50 | // optionsEqual just compares the values of two Options structs 51 | func optionsEqual(o1, o2 Options) bool { 52 | o1v := reflect.ValueOf(&o1).Elem() 53 | o2v := reflect.ValueOf(&o2).Elem() 54 | for i := 0; i < o1v.NumField(); i++ { 55 | if o1v.Field(i).CanInterface() { 56 | kind := o1v.Field(i).Kind() 57 | // compare values 58 | switch kind { 59 | case reflect.Bool: 60 | if o1v.Field(i).Bool() != o2v.Field(i).Bool() { 61 | return false 62 | } 63 | case reflect.Int, reflect.Int64: 64 | if o1v.Field(i).Int() != o2v.Field(i).Int() { 65 | return false 66 | } 67 | case reflect.Uint32, reflect.Uint64: 68 | if o1v.Field(i).Uint() != o2v.Field(i).Uint() { 69 | return false 70 | } 71 | case reflect.Float64: 72 | if o1v.Field(i).Float() != o2v.Field(i).Float() { 73 | return false 74 | } 75 | case reflect.String: 76 | if o1v.Field(i).String() != o2v.Field(i).String() { 77 | return false 78 | } 79 | } 80 | } 81 | } 82 | return true 83 | } 84 | -------------------------------------------------------------------------------- /pb/badgerpb4.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | // Use protos/gen.sh to generate .pb.go files. 7 | syntax = "proto3"; 8 | 9 | package badgerpb4; 10 | 11 | option go_package = "github.com/dgraph-io/badger/v4/pb"; 12 | 13 | message KV { 14 | bytes key = 1; 15 | bytes value = 2; 16 | bytes user_meta = 3; 17 | uint64 version = 4; 18 | uint64 expires_at = 5; 19 | bytes meta = 6; 20 | 21 | // Stream id is used to identify which stream the KV came from. 22 | uint32 stream_id = 10; 23 | // Stream done is used to indicate end of stream. 24 | bool stream_done = 11; 25 | } 26 | 27 | message KVList { 28 | repeated KV kv = 1; 29 | 30 | // alloc_ref used internally for memory management. 31 | uint64 alloc_ref = 10; 32 | } 33 | 34 | message ManifestChangeSet { 35 | // A set of changes that are applied atomically. 36 | repeated ManifestChange changes = 1; 37 | } 38 | 39 | enum EncryptionAlgo { 40 | aes = 0; 41 | } 42 | 43 | message ManifestChange { 44 | uint64 Id = 1; // Table ID. 45 | enum Operation { 46 | CREATE = 0; 47 | DELETE = 1; 48 | } 49 | Operation Op = 2; 50 | uint32 Level = 3; // Only used for CREATE. 51 | uint64 key_id = 4; 52 | EncryptionAlgo encryption_algo = 5; 53 | uint32 compression = 6; // Only used for CREATE Op. 54 | } 55 | 56 | message Checksum { 57 | enum Algorithm { 58 | CRC32C = 0; 59 | XXHash64 = 1; 60 | } 61 | Algorithm algo = 1; // For storing type of Checksum algorithm used 62 | uint64 sum = 2; 63 | } 64 | 65 | message DataKey { 66 | uint64 key_id = 1; 67 | bytes data = 2; 68 | bytes iv = 3; 69 | int64 created_at = 4; 70 | } 71 | 72 | message Match { 73 | bytes prefix = 1; 74 | string ignore_bytes = 2; // Comma separated with dash to represent ranges "1, 2-3, 4-7, 9" 75 | } 76 | -------------------------------------------------------------------------------- /pb/gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Run this script from its directory, so that badgerpb4.proto is where it's expected to 4 | # be. 5 | 6 | go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.31.0 7 | protoc --go_out=. --go_opt=paths=source_relative badgerpb4.proto 8 | -------------------------------------------------------------------------------- /pb/protos_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package pb 7 | 8 | import ( 9 | "os/exec" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func Exec(argv ...string) error { 16 | cmd := exec.Command(argv[0], argv[1:]...) 17 | 18 | if err := cmd.Start(); err != nil { 19 | return err 20 | } 21 | return cmd.Wait() 22 | } 23 | 24 | func TestProtosRegenerate(t *testing.T) { 25 | err := Exec("./gen.sh") 26 | require.NoError(t, err, "Got error while regenerating protos: %v\n", err) 27 | 28 | generatedProtos := "badgerpb4.pb.go" 29 | err = Exec("git", "diff", "--quiet", "--", generatedProtos) 30 | require.NoError(t, err, "badgerpb4.pb.go changed after regenerating") 31 | } 32 | -------------------------------------------------------------------------------- /publisher.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "sync" 10 | "sync/atomic" 11 | 12 | "github.com/dgraph-io/badger/v4/pb" 13 | "github.com/dgraph-io/badger/v4/trie" 14 | "github.com/dgraph-io/badger/v4/y" 15 | "github.com/dgraph-io/ristretto/v2/z" 16 | ) 17 | 18 | type subscriber struct { 19 | id uint64 20 | matches []pb.Match 21 | sendCh chan *pb.KVList 22 | subCloser *z.Closer 23 | // this will be atomic pointer which will be used to 24 | // track whether the subscriber is active or not 25 | active *atomic.Uint64 26 | } 27 | 28 | type publisher struct { 29 | sync.Mutex 30 | pubCh chan requests 31 | subscribers map[uint64]subscriber 32 | nextID uint64 33 | indexer *trie.Trie 34 | } 35 | 36 | func newPublisher() *publisher { 37 | return &publisher{ 38 | pubCh: make(chan requests, 1000), 39 | subscribers: make(map[uint64]subscriber), 40 | nextID: 0, 41 | indexer: trie.NewTrie(), 42 | } 43 | } 44 | 45 | func (p *publisher) listenForUpdates(c *z.Closer) { 46 | defer func() { 47 | p.cleanSubscribers() 48 | c.Done() 49 | }() 50 | slurp := func(batch requests) { 51 | for { 52 | select { 53 | case reqs := <-p.pubCh: 54 | batch = append(batch, reqs...) 55 | default: 56 | p.publishUpdates(batch) 57 | return 58 | } 59 | } 60 | } 61 | for { 62 | select { 63 | case <-c.HasBeenClosed(): 64 | return 65 | case reqs := <-p.pubCh: 66 | slurp(reqs) 67 | } 68 | } 69 | } 70 | 71 | func (p *publisher) publishUpdates(reqs requests) { 72 | p.Lock() 73 | defer func() { 74 | p.Unlock() 75 | // Release all the request. 76 | reqs.DecrRef() 77 | }() 78 | batchedUpdates := make(map[uint64]*pb.KVList) 79 | for _, req := range reqs { 80 | for _, e := range req.Entries { 81 | ids := p.indexer.Get(e.Key) 82 | if len(ids) == 0 { 83 | continue 84 | } 85 | k := y.SafeCopy(nil, e.Key) 86 | kv := &pb.KV{ 87 | Key: y.ParseKey(k), 88 | Value: y.SafeCopy(nil, e.Value), 89 | Meta: []byte{e.UserMeta}, 90 | ExpiresAt: e.ExpiresAt, 91 | Version: y.ParseTs(k), 92 | } 93 | for id := range ids { 94 | if _, ok := batchedUpdates[id]; !ok { 95 | batchedUpdates[id] = &pb.KVList{} 96 | } 97 | batchedUpdates[id].Kv = append(batchedUpdates[id].Kv, kv) 98 | } 99 | } 100 | } 101 | 102 | for id, kvs := range batchedUpdates { 103 | if p.subscribers[id].active.Load() == 1 { 104 | p.subscribers[id].sendCh <- kvs 105 | } 106 | } 107 | } 108 | 109 | func (p *publisher) newSubscriber(c *z.Closer, matches []pb.Match) (subscriber, error) { 110 | p.Lock() 111 | defer p.Unlock() 112 | ch := make(chan *pb.KVList, 1000) 113 | id := p.nextID 114 | // Increment next ID. 115 | p.nextID++ 116 | s := subscriber{ 117 | id: id, 118 | matches: matches, 119 | sendCh: ch, 120 | subCloser: c, 121 | active: new(atomic.Uint64), 122 | } 123 | s.active.Store(1) 124 | 125 | p.subscribers[id] = s 126 | for _, m := range matches { 127 | if err := p.indexer.AddMatch(m, id); err != nil { 128 | return subscriber{}, err 129 | } 130 | } 131 | return s, nil 132 | } 133 | 134 | // cleanSubscribers stops all the subscribers. Ideally, It should be called while closing DB. 135 | func (p *publisher) cleanSubscribers() { 136 | p.Lock() 137 | defer p.Unlock() 138 | for id, s := range p.subscribers { 139 | for _, m := range s.matches { 140 | _ = p.indexer.DeleteMatch(m, id) 141 | } 142 | delete(p.subscribers, id) 143 | s.subCloser.SignalAndWait() 144 | } 145 | } 146 | 147 | func (p *publisher) deleteSubscriber(id uint64) { 148 | p.Lock() 149 | defer p.Unlock() 150 | if s, ok := p.subscribers[id]; ok { 151 | for _, m := range s.matches { 152 | _ = p.indexer.DeleteMatch(m, id) 153 | } 154 | } 155 | delete(p.subscribers, id) 156 | } 157 | 158 | func (p *publisher) sendUpdates(reqs requests) { 159 | if p.noOfSubscribers() != 0 { 160 | reqs.IncrRef() 161 | p.pubCh <- reqs 162 | } 163 | } 164 | 165 | func (p *publisher) noOfSubscribers() int { 166 | p.Lock() 167 | defer p.Unlock() 168 | return len(p.subscribers) 169 | } 170 | -------------------------------------------------------------------------------- /publisher_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "context" 10 | "errors" 11 | "fmt" 12 | "runtime" 13 | "sync" 14 | "sync/atomic" 15 | "testing" 16 | 17 | "github.com/stretchr/testify/require" 18 | 19 | "github.com/dgraph-io/badger/v4/pb" 20 | ) 21 | 22 | // This test will result in deadlock for commits before this. 23 | // Exiting this test gracefully will be the proof that the 24 | // publisher is no longer stuck in deadlock. 25 | func TestPublisherDeadlock(t *testing.T) { 26 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 27 | var subWg sync.WaitGroup 28 | subWg.Add(1) 29 | 30 | var firstUpdate sync.WaitGroup 31 | firstUpdate.Add(1) 32 | 33 | var allUpdatesDone sync.WaitGroup 34 | allUpdatesDone.Add(1) 35 | var subDone sync.WaitGroup 36 | subDone.Add(1) 37 | go func() { 38 | subWg.Done() 39 | match := pb.Match{Prefix: []byte("ke"), IgnoreBytes: ""} 40 | err := db.Subscribe(context.Background(), func(kvs *pb.KVList) error { 41 | firstUpdate.Done() 42 | // Before exiting Subscribe process, we will wait until each of the 43 | // 1110 updates (defined below) have been completed. 44 | allUpdatesDone.Wait() 45 | return errors.New("error returned") 46 | }, []pb.Match{match}) 47 | require.Error(t, err, errors.New("error returned")) 48 | subDone.Done() 49 | }() 50 | subWg.Wait() 51 | go func() { 52 | err := db.Update(func(txn *Txn) error { 53 | e := NewEntry([]byte(fmt.Sprintf("key%d", 0)), []byte(fmt.Sprintf("value%d", 0))) 54 | return txn.SetEntry(e) 55 | }) 56 | require.NoError(t, err) 57 | }() 58 | 59 | firstUpdate.Wait() 60 | var req atomic.Int64 61 | for i := 1; i < 1110; i++ { 62 | go func(i int) { 63 | err := db.Update(func(txn *Txn) error { 64 | e := NewEntry([]byte(fmt.Sprintf("key%d", i)), []byte(fmt.Sprintf("value%d", i))) 65 | return txn.SetEntry(e) 66 | }) 67 | require.NoError(t, err) 68 | req.Add(1) 69 | }(i) 70 | } 71 | for { 72 | if req.Load() == 1109 { 73 | break 74 | } 75 | // FYI: This does the same as "thread.yield()" from other languages. 76 | // In other words, it tells the go-routine scheduler to switch 77 | // to another go-routine. This is strongly preferred over 78 | // time.Sleep(...). 79 | runtime.Gosched() 80 | } 81 | // Free up the subscriber, which is waiting for updates to finish. 82 | allUpdatesDone.Done() 83 | // Exit when the subscription process has been exited. 84 | subDone.Wait() 85 | }) 86 | } 87 | 88 | func TestPublisherOrdering(t *testing.T) { 89 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 90 | order := []string{} 91 | var wg sync.WaitGroup 92 | wg.Add(1) 93 | var subWg sync.WaitGroup 94 | subWg.Add(1) 95 | go func() { 96 | subWg.Done() 97 | updates := 0 98 | match := pb.Match{Prefix: []byte("ke"), IgnoreBytes: ""} 99 | err := db.Subscribe(context.Background(), func(kvs *pb.KVList) error { 100 | updates += len(kvs.GetKv()) 101 | for _, kv := range kvs.GetKv() { 102 | order = append(order, string(kv.Value)) 103 | } 104 | if updates == 5 { 105 | wg.Done() 106 | } 107 | return nil 108 | }, []pb.Match{match}) 109 | if err != nil { 110 | require.Equal(t, err.Error(), context.Canceled.Error()) 111 | } 112 | }() 113 | subWg.Wait() 114 | for i := 0; i < 5; i++ { 115 | require.NoError(t, db.Update(func(txn *Txn) error { 116 | e := NewEntry([]byte(fmt.Sprintf("key%d", i)), []byte(fmt.Sprintf("value%d", i))) 117 | return txn.SetEntry(e) 118 | })) 119 | } 120 | wg.Wait() 121 | for i := 0; i < 5; i++ { 122 | require.Equal(t, fmt.Sprintf("value%d", i), order[i]) 123 | } 124 | }) 125 | } 126 | 127 | func TestMultiplePrefix(t *testing.T) { 128 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 129 | var wg sync.WaitGroup 130 | wg.Add(1) 131 | var subWg sync.WaitGroup 132 | subWg.Add(1) 133 | go func() { 134 | subWg.Done() 135 | updates := 0 136 | match1 := pb.Match{Prefix: []byte("ke"), IgnoreBytes: ""} 137 | match2 := pb.Match{Prefix: []byte("hel"), IgnoreBytes: ""} 138 | err := db.Subscribe(context.Background(), func(kvs *pb.KVList) error { 139 | updates += len(kvs.GetKv()) 140 | for _, kv := range kvs.GetKv() { 141 | if string(kv.Key) == "key" { 142 | require.Equal(t, string(kv.Value), "value") 143 | } else { 144 | require.Equal(t, string(kv.Value), "badger") 145 | } 146 | } 147 | if updates == 2 { 148 | wg.Done() 149 | } 150 | return nil 151 | }, []pb.Match{match1, match2}) 152 | if err != nil { 153 | require.Equal(t, err.Error(), context.Canceled.Error()) 154 | } 155 | }() 156 | subWg.Wait() 157 | require.NoError(t, db.Update(func(txn *Txn) error { 158 | return txn.SetEntry(NewEntry([]byte("key"), []byte("value"))) 159 | })) 160 | require.NoError(t, db.Update(func(txn *Txn) error { 161 | return txn.SetEntry(NewEntry([]byte("hello"), []byte("badger"))) 162 | })) 163 | wg.Wait() 164 | }) 165 | } 166 | -------------------------------------------------------------------------------- /skl/README.md: -------------------------------------------------------------------------------- 1 | This is much better than `skiplist` and `slist`. 2 | 3 | ```sh 4 | BenchmarkReadWrite/frac_0-8 3000000 537 ns/op 5 | BenchmarkReadWrite/frac_1-8 3000000 503 ns/op 6 | BenchmarkReadWrite/frac_2-8 3000000 492 ns/op 7 | BenchmarkReadWrite/frac_3-8 3000000 475 ns/op 8 | BenchmarkReadWrite/frac_4-8 3000000 440 ns/op 9 | BenchmarkReadWrite/frac_5-8 5000000 442 ns/op 10 | BenchmarkReadWrite/frac_6-8 5000000 380 ns/op 11 | BenchmarkReadWrite/frac_7-8 5000000 338 ns/op 12 | BenchmarkReadWrite/frac_8-8 5000000 294 ns/op 13 | BenchmarkReadWrite/frac_9-8 10000000 268 ns/op 14 | BenchmarkReadWrite/frac_10-8 100000000 26.3 ns/op 15 | ``` 16 | 17 | And even better than a simple map with read-write lock: 18 | 19 | ```sh 20 | BenchmarkReadWriteMap/frac_0-8 2000000 774 ns/op 21 | BenchmarkReadWriteMap/frac_1-8 2000000 647 ns/op 22 | BenchmarkReadWriteMap/frac_2-8 3000000 605 ns/op 23 | BenchmarkReadWriteMap/frac_3-8 3000000 603 ns/op 24 | BenchmarkReadWriteMap/frac_4-8 3000000 556 ns/op 25 | BenchmarkReadWriteMap/frac_5-8 3000000 472 ns/op 26 | BenchmarkReadWriteMap/frac_6-8 3000000 476 ns/op 27 | BenchmarkReadWriteMap/frac_7-8 3000000 457 ns/op 28 | BenchmarkReadWriteMap/frac_8-8 5000000 444 ns/op 29 | BenchmarkReadWriteMap/frac_9-8 5000000 361 ns/op 30 | BenchmarkReadWriteMap/frac_10-8 10000000 212 ns/op 31 | ``` 32 | 33 | # Node Pooling 34 | 35 | Command used 36 | 37 | ```sh 38 | rm -Rf tmp && /usr/bin/time -l ./populate -keys_mil 10 39 | ``` 40 | 41 | For pprof results, we run without using /usr/bin/time. There are four runs below. 42 | 43 | Results seem to vary quite a bit between runs. 44 | 45 | ## Before node pooling 46 | 47 | ```sh 48 | 1311.53MB of 1338.69MB total (97.97%) 49 | Dropped 30 nodes (cum <= 6.69MB) 50 | Showing top 10 nodes out of 37 (cum >= 12.50MB) 51 | flat flat% sum% cum cum% 52 | 523.04MB 39.07% 39.07% 523.04MB 39.07% github.com/dgraph-io/badger/skl.(*Skiplist).Put 53 | 184.51MB 13.78% 52.85% 184.51MB 13.78% runtime.stringtoslicebyte 54 | 166.01MB 12.40% 65.25% 689.04MB 51.47% github.com/dgraph-io/badger/mem.(*Table).Put 55 | 165MB 12.33% 77.58% 165MB 12.33% runtime.convT2E 56 | 116.92MB 8.73% 86.31% 116.92MB 8.73% bytes.makeSlice 57 | 62.50MB 4.67% 90.98% 62.50MB 4.67% main.newValue 58 | 34.50MB 2.58% 93.56% 34.50MB 2.58% github.com/dgraph-io/badger/table.(*BlockIterator).parseKV 59 | 25.50MB 1.90% 95.46% 100.06MB 7.47% github.com/dgraph-io/badger/y.(*MergeIterator).Next 60 | 21.06MB 1.57% 97.04% 21.06MB 1.57% github.com/dgraph-io/badger/table.(*Table).read 61 | 12.50MB 0.93% 97.97% 12.50MB 0.93% github.com/dgraph-io/badger/table.header.Encode 62 | 63 | 128.31 real 329.37 user 17.11 sys 64 | 3355660288 maximum resident set size 65 | 0 average shared memory size 66 | 0 average unshared data size 67 | 0 average unshared stack size 68 | 2203080 page reclaims 69 | 764 page faults 70 | 0 swaps 71 | 275 block input operations 72 | 76 block output operations 73 | 0 messages sent 74 | 0 messages received 75 | 0 signals received 76 | 49173 voluntary context switches 77 | 599922 involuntary context switches 78 | ``` 79 | 80 | ## After node pooling 81 | 82 | ```sh 83 | 1963.13MB of 2026.09MB total (96.89%) 84 | Dropped 29 nodes (cum <= 10.13MB) 85 | Showing top 10 nodes out of 41 (cum >= 185.62MB) 86 | flat flat% sum% cum cum% 87 | 658.05MB 32.48% 32.48% 658.05MB 32.48% github.com/dgraph-io/badger/skl.glob..func1 88 | 297.51MB 14.68% 47.16% 297.51MB 14.68% runtime.convT2E 89 | 257.51MB 12.71% 59.87% 257.51MB 12.71% runtime.stringtoslicebyte 90 | 249.01MB 12.29% 72.16% 1007.06MB 49.70% github.com/dgraph-io/badger/mem.(*Table).Put 91 | 142.43MB 7.03% 79.19% 142.43MB 7.03% bytes.makeSlice 92 | 100MB 4.94% 84.13% 758.05MB 37.41% github.com/dgraph-io/badger/skl.newNode 93 | 99.50MB 4.91% 89.04% 99.50MB 4.91% main.newValue 94 | 75MB 3.70% 92.74% 75MB 3.70% github.com/dgraph-io/badger/table.(*BlockIterator).parseKV 95 | 44.62MB 2.20% 94.94% 44.62MB 2.20% github.com/dgraph-io/badger/table.(*Table).read 96 | 39.50MB 1.95% 96.89% 185.62MB 9.16% github.com/dgraph-io/badger/y.(*MergeIterator).Next 97 | 98 | 135.58 real 374.29 user 17.65 sys 99 | 3740614656 maximum resident set size 100 | 0 average shared memory size 101 | 0 average unshared data size 102 | 0 average unshared stack size 103 | 2276566 page reclaims 104 | 770 page faults 105 | 0 swaps 106 | 128 block input operations 107 | 90 block output operations 108 | 0 messages sent 109 | 0 messages received 110 | 0 signals received 111 | 46434 voluntary context switches 112 | 597049 involuntary context switches 113 | ``` 114 | -------------------------------------------------------------------------------- /skl/arena.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package skl 7 | 8 | import ( 9 | "sync/atomic" 10 | "unsafe" 11 | 12 | "github.com/dgraph-io/badger/v4/y" 13 | ) 14 | 15 | const ( 16 | offsetSize = int(unsafe.Sizeof(uint32(0))) 17 | 18 | // Always align nodes on 64-bit boundaries, even on 32-bit architectures, 19 | // so that the node.value field is 64-bit aligned. This is necessary because 20 | // node.getValueOffset uses atomic.LoadUint64, which expects its input 21 | // pointer to be 64-bit aligned. 22 | nodeAlign = int(unsafe.Sizeof(uint64(0))) - 1 23 | ) 24 | 25 | // Arena should be lock-free. 26 | type Arena struct { 27 | n atomic.Uint32 28 | buf []byte 29 | } 30 | 31 | // newArena returns a new arena. 32 | func newArena(n int64) *Arena { 33 | // Don't store data at position 0 in order to reserve offset=0 as a kind 34 | // of nil pointer. 35 | out := &Arena{buf: make([]byte, n)} 36 | out.n.Store(1) 37 | return out 38 | } 39 | 40 | func (s *Arena) size() int64 { 41 | return int64(s.n.Load()) 42 | } 43 | 44 | // putNode allocates a node in the arena. The node is aligned on a pointer-sized 45 | // boundary. The arena offset of the node is returned. 46 | func (s *Arena) putNode(height int) uint32 { 47 | // Compute the amount of the tower that will never be used, since the height 48 | // is less than maxHeight. 49 | unusedSize := (maxHeight - height) * offsetSize 50 | 51 | // Pad the allocation with enough bytes to ensure pointer alignment. 52 | l := uint32(MaxNodeSize - unusedSize + nodeAlign) 53 | n := s.n.Add(l) 54 | y.AssertTruef(int(n) <= len(s.buf), 55 | "Arena too small, toWrite:%d newTotal:%d limit:%d", 56 | l, n, len(s.buf)) 57 | 58 | // Return the aligned offset. 59 | m := (n - l + uint32(nodeAlign)) & ^uint32(nodeAlign) 60 | return m 61 | } 62 | 63 | // Put will *copy* val into arena. To make better use of this, reuse your input 64 | // val buffer. Returns an offset into buf. User is responsible for remembering 65 | // size of val. We could also store this size inside arena but the encoding and 66 | // decoding will incur some overhead. 67 | func (s *Arena) putVal(v y.ValueStruct) uint32 { 68 | l := v.EncodedSize() 69 | n := s.n.Add(l) 70 | y.AssertTruef(int(n) <= len(s.buf), 71 | "Arena too small, toWrite:%d newTotal:%d limit:%d", 72 | l, n, len(s.buf)) 73 | m := n - l 74 | v.Encode(s.buf[m:]) 75 | return m 76 | } 77 | 78 | func (s *Arena) putKey(key []byte) uint32 { 79 | l := uint32(len(key)) 80 | n := s.n.Add(l) 81 | y.AssertTruef(int(n) <= len(s.buf), 82 | "Arena too small, toWrite:%d newTotal:%d limit:%d", 83 | l, n, len(s.buf)) 84 | // m is the offset where you should write. 85 | // n = new len - key len give you the offset at which you should write. 86 | m := n - l 87 | // Copy to buffer from m:n 88 | y.AssertTrue(len(key) == copy(s.buf[m:n], key)) 89 | return m 90 | } 91 | 92 | // getNode returns a pointer to the node located at offset. If the offset is 93 | // zero, then the nil node pointer is returned. 94 | func (s *Arena) getNode(offset uint32) *node { 95 | if offset == 0 { 96 | return nil 97 | } 98 | 99 | return (*node)(unsafe.Pointer(&s.buf[offset])) 100 | } 101 | 102 | // getKey returns byte slice at offset. 103 | func (s *Arena) getKey(offset uint32, size uint16) []byte { 104 | return s.buf[offset : offset+uint32(size)] 105 | } 106 | 107 | // getVal returns byte slice at offset. The given size should be just the value 108 | // size and should NOT include the meta bytes. 109 | func (s *Arena) getVal(offset uint32, size uint32) (ret y.ValueStruct) { 110 | ret.Decode(s.buf[offset : offset+size]) 111 | return 112 | } 113 | 114 | // getNodeOffset returns the offset of node in the arena. If the node pointer is 115 | // nil, then the zero offset is returned. 116 | func (s *Arena) getNodeOffset(nd *node) uint32 { 117 | if nd == nil { 118 | return 0 119 | } 120 | 121 | return uint32(uintptr(unsafe.Pointer(nd)) - uintptr(unsafe.Pointer(&s.buf[0]))) 122 | } 123 | -------------------------------------------------------------------------------- /structs_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "math" 10 | "reflect" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | // Regression test for github.com/hypermodeinc/badger/pull/1800 17 | func TestLargeEncode(t *testing.T) { 18 | var headerEnc [maxHeaderSize]byte 19 | h := header{math.MaxUint32, math.MaxUint32, math.MaxUint64, math.MaxUint8, math.MaxUint8} 20 | require.NotPanics(t, func() { _ = h.Encode(headerEnc[:]) }) 21 | } 22 | 23 | func TestNumFieldsHeader(t *testing.T) { 24 | // maxHeaderSize must correspond with any changes made to header 25 | require.Equal(t, 5, reflect.TypeOf(header{}).NumField()) 26 | } 27 | -------------------------------------------------------------------------------- /table/README.md: -------------------------------------------------------------------------------- 1 | Size of table is 123,217,667 bytes for all benchmarks. 2 | 3 | # BenchmarkRead 4 | 5 | ```sh 6 | $ go test -bench ^BenchmarkRead$ -run ^$ -count 3 7 | goos: linux 8 | goarch: amd64 9 | pkg: github.com/dgraph-io/badger/table 10 | BenchmarkRead-16 10 154074944 ns/op 11 | BenchmarkRead-16 10 154340411 ns/op 12 | BenchmarkRead-16 10 151914489 ns/op 13 | PASS 14 | ok github.com/dgraph-io/badger/table 22.467s 15 | ``` 16 | 17 | Size of table is 123,217,667 bytes, which is ~118MB. 18 | 19 | The rate is ~762MB/s using LoadToRAM (when table is in RAM). 20 | 21 | To read a 64MB table, this would take ~0.084s, which is negligible. 22 | 23 | # BenchmarkReadAndBuild 24 | 25 | ```sh 26 | $ go test -bench BenchmarkReadAndBuild -run ^$ -count 3 27 | goos: linux 28 | goarch: amd64 29 | pkg: github.com/dgraph-io/badger/table 30 | BenchmarkReadAndBuild-16 1 1026755231 ns/op 31 | BenchmarkReadAndBuild-16 1 1009543316 ns/op 32 | BenchmarkReadAndBuild-16 1 1039920546 ns/op 33 | PASS 34 | ok github.com/dgraph-io/badger/table 12.081s 35 | ``` 36 | 37 | The rate is ~123MB/s. To build a 64MB table, this would take ~0.56s. Note that this does NOT include 38 | the flushing of the table to disk. All we are doing above is reading one table (which is in RAM) and 39 | write one table in memory. 40 | 41 | The table building takes 0.56-0.084s ~ 0.4823s. 42 | 43 | # BenchmarkReadMerged 44 | 45 | Below, we merge 5 tables. The total size remains unchanged at ~122M. 46 | 47 | ```sh 48 | $ go test -bench ReadMerged -run ^$ -count 3 49 | goos: linux 50 | goarch: amd64 51 | pkg: github.com/dgraph-io/badger/table 52 | BenchmarkReadMerged-16 2 977588975 ns/op 53 | BenchmarkReadMerged-16 2 982140738 ns/op 54 | BenchmarkReadMerged-16 2 962046017 ns/op 55 | PASS 56 | ok github.com/dgraph-io/badger/table 27.433s 57 | ``` 58 | 59 | The rate is ~120MB/s. To read a 64MB table using merge iterator, this would take ~0.53s. 60 | 61 | # BenchmarkRandomRead 62 | 63 | ```sh 64 | go test -bench BenchmarkRandomRead$ -run ^$ -count 3 65 | goos: linux 66 | goarch: amd64 67 | pkg: github.com/dgraph-io/badger/table 68 | BenchmarkRandomRead-16 500000 2645 ns/op 69 | BenchmarkRandomRead-16 500000 2648 ns/op 70 | BenchmarkRandomRead-16 500000 2614 ns/op 71 | PASS 72 | ok github.com/dgraph-io/badger/table 50.850s 73 | ``` 74 | 75 | For random read benchmarking, we are randomly reading a key and verifying its value. 76 | 77 | # DB Open benchmark 78 | 79 | 1. Create badger DB with 2 billion key-value pairs (about 380GB of data) 80 | 81 | ```sh 82 | badger fill -m 2000 --dir="/tmp/data" --sorted 83 | ``` 84 | 85 | 2. Clear buffers and swap memory 86 | 87 | ```sh 88 | free -mh && sync && echo 3 | sudo tee /proc/sys/vm/drop_caches && sudo swapoff -a && sudo swapon -a && free -mh 89 | ``` 90 | 91 | Also flush disk buffers 92 | 93 | ```sh 94 | blockdev --flushbufs /dev/nvme0n1p4 95 | ``` 96 | 97 | 3. Run the benchmark 98 | 99 | ```sh 100 | go test -run=^$ github.com/dgraph-io/badger -bench ^BenchmarkDBOpen$ -benchdir="/tmp/data" -v 101 | 102 | badger 2019/06/04 17:15:56 INFO: 126 tables out of 1028 opened in 3.017s 103 | badger 2019/06/04 17:15:59 INFO: 257 tables out of 1028 opened in 6.014s 104 | badger 2019/06/04 17:16:02 INFO: 387 tables out of 1028 opened in 9.017s 105 | badger 2019/06/04 17:16:05 INFO: 516 tables out of 1028 opened in 12.025s 106 | badger 2019/06/04 17:16:08 INFO: 645 tables out of 1028 opened in 15.013s 107 | badger 2019/06/04 17:16:11 INFO: 775 tables out of 1028 opened in 18.008s 108 | badger 2019/06/04 17:16:14 INFO: 906 tables out of 1028 opened in 21.003s 109 | badger 2019/06/04 17:16:17 INFO: All 1028 tables opened in 23.851s 110 | badger 2019/06/04 17:16:17 INFO: Replaying file id: 1998 at offset: 332000 111 | badger 2019/06/04 17:16:17 INFO: Replay took: 9.81µs 112 | goos: linux 113 | goarch: amd64 114 | pkg: github.com/dgraph-io/badger 115 | BenchmarkDBOpen-16 1 23930082140 ns/op 116 | PASS 117 | ok github.com/dgraph-io/badger 24.076s 118 | 119 | ``` 120 | 121 | It takes about 23.851s to open a DB with 2 billion sorted key-value entries. 122 | -------------------------------------------------------------------------------- /table/merge_iterator.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package table 7 | 8 | import ( 9 | "bytes" 10 | 11 | "github.com/dgraph-io/badger/v4/y" 12 | ) 13 | 14 | // MergeIterator merges multiple iterators. 15 | // NOTE: MergeIterator owns the array of iterators and is responsible for closing them. 16 | type MergeIterator struct { 17 | left node 18 | right node 19 | small *node 20 | 21 | curKey []byte 22 | reverse bool 23 | } 24 | 25 | type node struct { 26 | valid bool 27 | key []byte 28 | iter y.Iterator 29 | 30 | // The two iterators are type asserted from `y.Iterator`, used to inline more function calls. 31 | // Calling functions on concrete types is much faster (about 25-30%) than calling the 32 | // interface's function. 33 | merge *MergeIterator 34 | concat *ConcatIterator 35 | } 36 | 37 | func (n *node) setIterator(iter y.Iterator) { 38 | n.iter = iter 39 | // It's okay if the type assertion below fails and n.merge/n.concat are set to nil. 40 | // We handle the nil values of merge and concat in all the methods. 41 | n.merge, _ = iter.(*MergeIterator) 42 | n.concat, _ = iter.(*ConcatIterator) 43 | } 44 | 45 | func (n *node) setKey() { 46 | switch { 47 | case n.merge != nil: 48 | n.valid = n.merge.small.valid 49 | if n.valid { 50 | n.key = n.merge.small.key 51 | } 52 | case n.concat != nil: 53 | n.valid = n.concat.Valid() 54 | if n.valid { 55 | n.key = n.concat.Key() 56 | } 57 | default: 58 | n.valid = n.iter.Valid() 59 | if n.valid { 60 | n.key = n.iter.Key() 61 | } 62 | } 63 | } 64 | 65 | func (n *node) next() { 66 | switch { 67 | case n.merge != nil: 68 | n.merge.Next() 69 | case n.concat != nil: 70 | n.concat.Next() 71 | default: 72 | n.iter.Next() 73 | } 74 | n.setKey() 75 | } 76 | 77 | func (n *node) rewind() { 78 | n.iter.Rewind() 79 | n.setKey() 80 | } 81 | 82 | func (n *node) seek(key []byte) { 83 | n.iter.Seek(key) 84 | n.setKey() 85 | } 86 | 87 | func (mi *MergeIterator) fix() { 88 | if !mi.bigger().valid { 89 | return 90 | } 91 | if !mi.small.valid { 92 | mi.swapSmall() 93 | return 94 | } 95 | cmp := y.CompareKeys(mi.small.key, mi.bigger().key) 96 | switch { 97 | case cmp == 0: // Both the keys are equal. 98 | // In case of same keys, move the right iterator ahead. 99 | mi.right.next() 100 | if &mi.right == mi.small { 101 | mi.swapSmall() 102 | } 103 | return 104 | case cmp < 0: // Small is less than bigger(). 105 | if mi.reverse { 106 | mi.swapSmall() 107 | } else { //nolint:staticcheck 108 | // we don't need to do anything. Small already points to the smallest. 109 | } 110 | return 111 | default: // bigger() is less than small. 112 | if mi.reverse { 113 | // Do nothing since we're iterating in reverse. Small currently points to 114 | // the bigger key and that's okay in reverse iteration. 115 | } else { 116 | mi.swapSmall() 117 | } 118 | return 119 | } 120 | } 121 | 122 | func (mi *MergeIterator) bigger() *node { 123 | if mi.small == &mi.left { 124 | return &mi.right 125 | } 126 | return &mi.left 127 | } 128 | 129 | func (mi *MergeIterator) swapSmall() { 130 | if mi.small == &mi.left { 131 | mi.small = &mi.right 132 | return 133 | } 134 | if mi.small == &mi.right { 135 | mi.small = &mi.left 136 | return 137 | } 138 | } 139 | 140 | // Next returns the next element. If it is the same as the current key, ignore it. 141 | func (mi *MergeIterator) Next() { 142 | for mi.Valid() { 143 | if !bytes.Equal(mi.small.key, mi.curKey) { 144 | break 145 | } 146 | mi.small.next() 147 | mi.fix() 148 | } 149 | mi.setCurrent() 150 | } 151 | 152 | func (mi *MergeIterator) setCurrent() { 153 | mi.curKey = append(mi.curKey[:0], mi.small.key...) 154 | } 155 | 156 | // Rewind seeks to first element (or last element for reverse iterator). 157 | func (mi *MergeIterator) Rewind() { 158 | mi.left.rewind() 159 | mi.right.rewind() 160 | mi.fix() 161 | mi.setCurrent() 162 | } 163 | 164 | // Seek brings us to element with key >= given key. 165 | func (mi *MergeIterator) Seek(key []byte) { 166 | mi.left.seek(key) 167 | mi.right.seek(key) 168 | mi.fix() 169 | mi.setCurrent() 170 | } 171 | 172 | // Valid returns whether the MergeIterator is at a valid element. 173 | func (mi *MergeIterator) Valid() bool { 174 | return mi.small.valid 175 | } 176 | 177 | // Key returns the key associated with the current iterator. 178 | func (mi *MergeIterator) Key() []byte { 179 | return mi.small.key 180 | } 181 | 182 | // Value returns the value associated with the iterator. 183 | func (mi *MergeIterator) Value() y.ValueStruct { 184 | return mi.small.iter.Value() 185 | } 186 | 187 | // Close implements y.Iterator. 188 | func (mi *MergeIterator) Close() error { 189 | err1 := mi.left.iter.Close() 190 | err2 := mi.right.iter.Close() 191 | if err1 != nil { 192 | return y.Wrap(err1, "MergeIterator") 193 | } 194 | return y.Wrap(err2, "MergeIterator") 195 | } 196 | 197 | // NewMergeIterator creates a merge iterator. 198 | func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator { 199 | switch len(iters) { 200 | case 0: 201 | return nil 202 | case 1: 203 | return iters[0] 204 | case 2: 205 | mi := &MergeIterator{ 206 | reverse: reverse, 207 | } 208 | mi.left.setIterator(iters[0]) 209 | mi.right.setIterator(iters[1]) 210 | // Assign left iterator randomly. This will be fixed when user calls rewind/seek. 211 | mi.small = &mi.left 212 | return mi 213 | } 214 | mid := len(iters) / 2 215 | return NewMergeIterator( 216 | []y.Iterator{ 217 | NewMergeIterator(iters[:mid], reverse), 218 | NewMergeIterator(iters[mid:], reverse), 219 | }, reverse) 220 | } 221 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | go version 6 | 7 | # Check if Github Actions is running 8 | if [ $CI = "true" ]; then 9 | # Enable code coverage 10 | # export because tests run in a subprocess 11 | export covermode="-covermode=atomic" 12 | export coverprofile="-coverprofile=cover_tmp.out" 13 | echo "mode: atomic" >>cover.out 14 | fi 15 | 16 | # Run `go list` BEFORE setting GOFLAGS so that the output is in the right 17 | # format for grep. 18 | # export packages because the test will run in a sub process. 19 | export packages=$(go list ./... | grep "github.com/dgraph-io/badger/v4/") 20 | 21 | tags="-tags=jemalloc" 22 | 23 | # Compile the Badger binary 24 | pushd badger 25 | go build -v $tags . 26 | popd 27 | 28 | # Run the memory intensive tests first. 29 | manual() { 30 | timeout="-timeout 2m" 31 | echo "==> Running package tests for $packages" 32 | set -e 33 | for pkg in $packages; do 34 | echo "===> Testing $pkg" 35 | go test $tags -timeout=25m $covermode $coverprofile -failfast -race -parallel 16 $pkg && write_coverage || return 1 36 | done 37 | echo "==> DONE package tests" 38 | 39 | echo "==> Running manual tests" 40 | # Run the special Truncate test. 41 | rm -rf p 42 | set -e 43 | go test $tags $timeout $covermode $coverprofile -run='TestTruncateVlogNoClose$' -failfast --manual=true && write_coverage || return 1 44 | truncate --size=4096 p/000000.vlog 45 | go test $tags $timeout $covermode $coverprofile -run='TestTruncateVlogNoClose2$' -failfast --manual=true && write_coverage || return 1 46 | go test $tags $timeout $covermode $coverprofile -run='TestTruncateVlogNoClose3$' -failfast --manual=true && write_coverage || return 1 47 | rm -rf p 48 | 49 | # TODO(ibrahim): Let's make these tests have Manual prefix. 50 | # go test $tags -run='TestManual' --manual=true --parallel=2 51 | # TestWriteBatch 52 | # TestValueGCManaged 53 | # TestDropPrefix 54 | # TestDropAllManaged 55 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestBigKeyValuePairs$' --manual=true && write_coverage || return 1 56 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestPushValueLogLimit' --manual=true && write_coverage || return 1 57 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestKeyCount' --manual=true && write_coverage || return 1 58 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestIteratePrefix' --manual=true && write_coverage || return 1 59 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestIterateParallel' --manual=true && write_coverage || return 1 60 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestBigStream' --manual=true && write_coverage || return 1 61 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestGoroutineLeak' --manual=true && write_coverage || return 1 62 | go test $tags $timeout $covermode $coverprofile -failfast -run='TestGetMore' --manual=true && write_coverage || return 1 63 | 64 | echo "==> DONE manual tests" 65 | } 66 | 67 | root() { 68 | # Run the normal tests. 69 | # go test -timeout=25m -v -race github.com/dgraph-io/badger/v4/... 70 | 71 | echo "==> Running root level tests." 72 | go test $tags -v -race -parallel=16 -timeout=25m -failfast $covermode $coverprofile . && write_coverage || return 1 73 | echo "==> DONE root level tests" 74 | } 75 | 76 | stream() { 77 | set -eo pipefail 78 | pushd badger 79 | baseDir=$(mktemp -d -p .) 80 | ./badger benchmark write -s --dir=$baseDir/test | tee $baseDir/log.txt 81 | ./badger benchmark read --dir=$baseDir/test --full-scan | tee --append $baseDir/log.txt 82 | ./badger benchmark read --dir=$baseDir/test -d=30s | tee --append $baseDir/log.txt 83 | ./badger stream --dir=$baseDir/test -o "$baseDir/test2" | tee --append $baseDir/log.txt 84 | count=$(cat "$baseDir/log.txt" | grep "at program end: 0 B" | wc -l) 85 | rm -rf $baseDir 86 | if [ $count -ne 4 ]; then 87 | echo "LEAK detected in Badger stream." 88 | return 1 89 | fi 90 | echo "==> DONE stream test" 91 | popd 92 | return 0 93 | } 94 | 95 | write_coverage() { 96 | if [[ $CI == "true" ]]; then 97 | if [[ -f cover_tmp.out ]]; then 98 | sed -i '1d' cover_tmp.out 99 | cat cover_tmp.out >>cover.out && rm cover_tmp.out 100 | fi 101 | fi 102 | 103 | } 104 | 105 | # parallel tests currently not working 106 | # parallel --halt now,fail=1 --progress --line-buffer ::: stream manual root 107 | # run tests in sequence 108 | root 109 | stream 110 | manual 111 | -------------------------------------------------------------------------------- /test_extensions.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | // Important: Do NOT import the "testing" package, as otherwise, that 9 | // will pull in imports into the production class that we do not want. 10 | 11 | // TODO: Consider using this with specific compilation tags so that it only 12 | // shows up when performing testing (e.g., specify build tag=unit). 13 | // We are not yet ready to do that, as it may impact customer usage as 14 | // well as requiring us to update the CI build flags. Moreover, the 15 | // current model does not actually incur any significant cost. 16 | // If we do this, we will also want to introduce a parallel file that 17 | // overrides some of these structs and functions with empty contents. 18 | 19 | // String constants for messages to be pushed to syncChan. 20 | const ( 21 | updateDiscardStatsMsg = "updateDiscardStats iteration done" 22 | endVLogInitMsg = "End: vlog.init(db)" 23 | ) 24 | 25 | // testOnlyOptions specifies an extension to the type Options that we want to 26 | // use only in the context of testing. 27 | type testOnlyOptions struct { 28 | // syncChan is used to listen for specific messages related to activities 29 | // that can occur in a DB instance. Currently, this is only used in 30 | // testing activities. 31 | syncChan chan string 32 | } 33 | 34 | // testOnlyDBExtensions specifies an extension to the type DB that we want to 35 | // use only in the context of testing. 36 | type testOnlyDBExtensions struct { 37 | syncChan chan string 38 | 39 | // onCloseDiscardCapture will be populated by a DB instance during the 40 | // process of performing the Close operation. Currently, we only consider 41 | // using this during testing. 42 | onCloseDiscardCapture map[uint64]uint64 43 | } 44 | 45 | // logToSyncChan sends a message to the DB's syncChan. Note that we expect 46 | // that the DB never closes this channel; the responsibility for 47 | // allocating and closing the channel belongs to the test module. 48 | // if db.syncChan is nil or has never been initialized, ths will be 49 | // silently ignored. 50 | func (db *DB) logToSyncChan(msg string) { 51 | if db.syncChan != nil { 52 | db.syncChan <- msg 53 | } 54 | } 55 | 56 | // captureDiscardStats will copy the contents of the discardStats file 57 | // maintained by vlog to the onCloseDiscardCapture map specified by 58 | // db.opt. Of couse, if db.opt.onCloseDiscardCapture is nil (as expected 59 | // for a production system as opposed to a test system), this is a no-op. 60 | func (db *DB) captureDiscardStats() { 61 | if db.onCloseDiscardCapture != nil { 62 | db.vlog.discardStats.Lock() 63 | db.vlog.discardStats.Iterate(func(id, val uint64) { 64 | db.onCloseDiscardCapture[id] = val 65 | }) 66 | db.vlog.discardStats.Unlock() 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /trie/trie_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package trie 7 | 8 | import ( 9 | "sort" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | 14 | "github.com/dgraph-io/badger/v4/pb" 15 | ) 16 | 17 | func TestGet(t *testing.T) { 18 | trie := NewTrie() 19 | trie.Add([]byte("hello"), 1) 20 | trie.Add([]byte("hello"), 3) 21 | trie.Add([]byte("hello"), 4) 22 | trie.Add([]byte("hel"), 20) 23 | trie.Add([]byte("he"), 20) 24 | trie.Add([]byte("badger"), 30) 25 | 26 | trie.Add(nil, 10) 27 | require.Equal(t, map[uint64]struct{}{10: {}}, trie.Get([]byte("A"))) 28 | 29 | ids := trie.Get([]byte("hel")) 30 | require.Equal(t, 2, len(ids)) 31 | require.Equal(t, map[uint64]struct{}{10: {}, 20: {}}, ids) 32 | 33 | ids = trie.Get([]byte("badger")) 34 | require.Equal(t, 2, len(ids)) 35 | require.Equal(t, map[uint64]struct{}{10: {}, 30: {}}, ids) 36 | 37 | ids = trie.Get([]byte("hello")) 38 | require.Equal(t, 5, len(ids)) 39 | require.Equal(t, map[uint64]struct{}{10: {}, 1: {}, 3: {}, 4: {}, 20: {}}, ids) 40 | 41 | trie.Add([]byte{}, 11) 42 | require.Equal(t, map[uint64]struct{}{10: {}, 11: {}}, trie.Get([]byte("A"))) 43 | } 44 | 45 | func TestTrieDelete(t *testing.T) { 46 | trie := NewTrie() 47 | t.Logf("Num nodes: %d", numNodes(trie.root)) 48 | require.Equal(t, 1, numNodes(trie.root)) 49 | 50 | trie.Add([]byte("hello"), 1) 51 | trie.Add([]byte("hello"), 3) 52 | trie.Add([]byte("hello"), 4) 53 | trie.Add(nil, 5) 54 | 55 | t.Logf("Num nodes: %d", numNodes(trie.root)) 56 | 57 | require.NoError(t, trie.Delete([]byte("hello"), 4)) 58 | t.Logf("Num nodes: %d", numNodes(trie.root)) 59 | 60 | require.Equal(t, map[uint64]struct{}{5: {}, 1: {}, 3: {}}, trie.Get([]byte("hello"))) 61 | 62 | require.NoError(t, trie.Delete(nil, 5)) 63 | t.Logf("Num nodes: %d", numNodes(trie.root)) 64 | require.Equal(t, map[uint64]struct{}{1: {}, 3: {}}, trie.Get([]byte("hello"))) 65 | 66 | require.NoError(t, trie.Delete([]byte("hello"), 1)) 67 | require.NoError(t, trie.Delete([]byte("hello"), 3)) 68 | require.NoError(t, trie.Delete([]byte("hello"), 4)) 69 | require.NoError(t, trie.Delete([]byte("hello"), 5)) 70 | require.NoError(t, trie.Delete([]byte("hello"), 6)) 71 | 72 | require.Equal(t, 1, numNodes(trie.root)) 73 | t.Logf("Num nodes: %d", numNodes(trie.root)) 74 | 75 | require.Equal(t, true, trie.root.isEmpty()) 76 | require.Equal(t, map[uint64]struct{}{}, trie.Get([]byte("hello"))) 77 | } 78 | 79 | func TestParseIgnoreBytes(t *testing.T) { 80 | out, err := parseIgnoreBytes("1") 81 | require.NoError(t, err) 82 | require.Equal(t, []bool{false, true}, out) 83 | 84 | out, err = parseIgnoreBytes("0") 85 | require.NoError(t, err) 86 | require.Equal(t, []bool{true}, out) 87 | 88 | out, err = parseIgnoreBytes("0, 3 - 5, 7") 89 | require.NoError(t, err) 90 | require.Equal(t, []bool{true, false, false, true, true, true, false, true}, out) 91 | } 92 | 93 | func TestPrefixMatchWithHoles(t *testing.T) { 94 | trie := NewTrie() 95 | 96 | add := func(prefix, ignore string, id uint64) { 97 | m := pb.Match{ 98 | Prefix: []byte(prefix), 99 | IgnoreBytes: ignore, 100 | } 101 | require.NoError(t, trie.AddMatch(m, id)) 102 | } 103 | 104 | add("", "", 1) 105 | add("aaaa", "", 2) 106 | add("aaaaaa", "2-10", 3) 107 | add("aaaaaaaaa", "0, 4 - 6, 8", 4) 108 | 109 | get := func(k string) []uint64 { 110 | var ids []uint64 111 | m := trie.Get([]byte(k)) 112 | for id := range m { 113 | ids = append(ids, id) 114 | } 115 | sort.Slice(ids, func(i, j int) bool { 116 | return ids[i] < ids[j] 117 | }) 118 | return ids 119 | } 120 | 121 | // Everything matches 1 122 | require.Equal(t, []uint64{1}, get("")) 123 | require.Equal(t, []uint64{1}, get("aax")) 124 | 125 | // aaaaa would match 2, but not 3 because 3's length is 6. 126 | require.Equal(t, []uint64{1, 2}, get("aaaaa")) 127 | 128 | // aa and enough length is sufficient to match 3. 129 | require.Equal(t, []uint64{1, 3}, get("aabbbbbbbb")) 130 | 131 | // has differences in the right place to match 4. 132 | require.Equal(t, []uint64{1, 4}, get("baaabbbabba")) 133 | 134 | // Even with differences matches everything. 135 | require.Equal(t, []uint64{1, 2, 3, 4}, get("aaaabbbabba")) 136 | 137 | t.Logf("Num nodes: %d", numNodes(trie.root)) 138 | 139 | del := func(prefix, ignore string, id uint64) { 140 | m := pb.Match{ 141 | Prefix: []byte(prefix), 142 | IgnoreBytes: ignore, 143 | } 144 | require.NoError(t, trie.DeleteMatch(m, id)) 145 | } 146 | 147 | del("aaaaaaaaa", "0, 4 - 6, 8", 5) 148 | t.Logf("Num nodes: %d", numNodes(trie.root)) 149 | 150 | del("aaaaaaaaa", "0, 4 - 6, 8", 4) 151 | t.Logf("Num nodes: %d", numNodes(trie.root)) 152 | 153 | del("aaaaaa", "2-10", 3) 154 | t.Logf("Num nodes: %d", numNodes(trie.root)) 155 | 156 | del("aaaa", "", 2) 157 | t.Logf("Num nodes: %d", numNodes(trie.root)) 158 | 159 | del("", "", 1) 160 | t.Logf("Num nodes: %d", numNodes(trie.root)) 161 | 162 | del("abracadabra", "", 4) 163 | t.Logf("Num nodes: %d", numNodes(trie.root)) 164 | 165 | require.Equal(t, 1, numNodes(trie.root)) 166 | } 167 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "encoding/hex" 10 | "fmt" 11 | "math/rand" 12 | "os" 13 | "time" 14 | 15 | "github.com/dgraph-io/badger/v4/table" 16 | "github.com/dgraph-io/badger/v4/y" 17 | ) 18 | 19 | func (s *levelsController) validate() error { 20 | for _, l := range s.levels { 21 | if err := l.validate(); err != nil { 22 | return y.Wrap(err, "Levels Controller") 23 | } 24 | } 25 | return nil 26 | } 27 | 28 | // Check does some sanity check on one level of data or in-memory index. 29 | func (s *levelHandler) validate() error { 30 | if s.level == 0 { 31 | return nil 32 | } 33 | 34 | s.RLock() 35 | defer s.RUnlock() 36 | numTables := len(s.tables) 37 | for j := 1; j < numTables; j++ { 38 | if j >= len(s.tables) { 39 | return fmt.Errorf("Level %d, j=%d numTables=%d", s.level, j, numTables) 40 | } 41 | 42 | if y.CompareKeys(s.tables[j-1].Biggest(), s.tables[j].Smallest()) >= 0 { 43 | return fmt.Errorf( 44 | "Inter: Biggest(j-1)[%d] \n%s\n vs Smallest(j)[%d]: \n%s\n: "+ 45 | "level=%d j=%d numTables=%d", 46 | s.tables[j-1].ID(), hex.Dump(s.tables[j-1].Biggest()), s.tables[j].ID(), 47 | hex.Dump(s.tables[j].Smallest()), s.level, j, numTables) 48 | } 49 | 50 | if y.CompareKeys(s.tables[j].Smallest(), s.tables[j].Biggest()) > 0 { 51 | return fmt.Errorf( 52 | "Intra: \n%s\n vs \n%s\n: level=%d j=%d numTables=%d", 53 | hex.Dump(s.tables[j].Smallest()), hex.Dump(s.tables[j].Biggest()), s.level, j, numTables) 54 | } 55 | } 56 | return nil 57 | } 58 | 59 | // func (s *KV) debugPrintMore() { s.lc.debugPrintMore() } 60 | 61 | // // debugPrintMore shows key ranges of each level. 62 | // func (s *levelsController) debugPrintMore() { 63 | // s.Lock() 64 | // defer s.Unlock() 65 | // for i := 0; i < s.kv.opt.MaxLevels; i++ { 66 | // s.levels[i].debugPrintMore() 67 | // } 68 | // } 69 | 70 | // func (s *levelHandler) debugPrintMore() { 71 | // s.RLock() 72 | // defer s.RUnlock() 73 | // s.elog.Printf("Level %d:", s.level) 74 | // for _, t := range s.tables { 75 | // y.Printf(" [%s, %s]", t.Smallest(), t.Biggest()) 76 | // } 77 | // y.Printf("\n") 78 | // } 79 | 80 | // reserveFileID reserves a unique file id. 81 | func (s *levelsController) reserveFileID() uint64 { 82 | id := s.nextFileID.Add(1) 83 | return id - 1 84 | } 85 | 86 | func getIDMap(dir string) map[uint64]struct{} { 87 | fileInfos, err := os.ReadDir(dir) 88 | y.Check(err) 89 | idMap := make(map[uint64]struct{}) 90 | for _, info := range fileInfos { 91 | if info.IsDir() { 92 | continue 93 | } 94 | fileID, ok := table.ParseFileID(info.Name()) 95 | if !ok { 96 | continue 97 | } 98 | idMap[fileID] = struct{}{} 99 | } 100 | return idMap 101 | } 102 | 103 | func init() { 104 | rand.Seed(time.Now().UnixNano()) 105 | } 106 | -------------------------------------------------------------------------------- /watermark_edge_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package badger 7 | 8 | import ( 9 | "crypto/rand" 10 | "errors" 11 | "fmt" 12 | "math/big" 13 | "sync" 14 | "testing" 15 | "time" 16 | ) 17 | 18 | func TestWaterMarkEdgeCase(t *testing.T) { 19 | const N = 1_000 20 | runBadgerTest(t, nil, func(t *testing.T, db *DB) { 21 | eg := make(chan error, N) 22 | defer close(eg) 23 | 24 | var wg sync.WaitGroup 25 | wg.Add(N) 26 | for i := 0; i < N; i++ { 27 | go func(j int) { 28 | defer wg.Done() 29 | if err := doWork(db, j); errors.Is(err, ErrConflict) { 30 | eg <- nil 31 | } else { 32 | eg <- fmt.Errorf("expected conflict not found, err: %v, i = %v", err, j) 33 | } 34 | }(i) 35 | } 36 | wg.Wait() 37 | 38 | for i := 0; i < N; i++ { 39 | if err := <-eg; err != nil { 40 | t.Fatal(err) 41 | } 42 | } 43 | }) 44 | } 45 | 46 | func doWork(db *DB, i int) error { 47 | delay() 48 | 49 | key1 := fmt.Sprintf("v:%d:%s", i, generateRandomBytes()) 50 | key2 := fmt.Sprintf("v:%d:%s", i, generateRandomBytes()) 51 | 52 | tx1 := db.NewTransaction(true) 53 | defer tx1.Discard() 54 | tx2 := db.NewTransaction(true) 55 | defer tx2.Discard() 56 | 57 | getValue(tx2, key1) 58 | getValue(tx2, key2) 59 | getValue(tx1, key1) 60 | getValue(tx2, key1) 61 | setValue(tx2, key1, "value1") 62 | setValue(tx2, key2, "value2") 63 | 64 | if err := tx2.Commit(); err != nil { 65 | return fmt.Errorf("tx2 failed: %w (key1 = %s, key2 = %s)", err, key1, key2) 66 | } 67 | 68 | setValue(tx1, key1, "value1-second") 69 | getValue(tx1, key1) 70 | setValue(tx1, key1, "value1-third") 71 | 72 | delay() 73 | if err := tx1.Commit(); err != nil { 74 | return fmt.Errorf("tx1 failed: %w (key1 = %s, key2 = %s)", err, key1, key2) 75 | } 76 | 77 | return nil 78 | } 79 | 80 | func generateRandomBytes() []byte { 81 | b := make([]byte, 20) 82 | if _, err := rand.Read(b); err != nil { 83 | panic(err) 84 | } 85 | return b 86 | } 87 | 88 | func getValue(txn *Txn, key string) { 89 | if _, err := txn.Get([]byte(key)); err != nil { 90 | if !errors.Is(err, ErrKeyNotFound) { 91 | panic(err) 92 | } 93 | } 94 | } 95 | 96 | func setValue(txn *Txn, key, value string) { 97 | if err := txn.Set([]byte(key), []byte(value)); err != nil { 98 | panic(err) 99 | } 100 | } 101 | 102 | func delay() { 103 | jitter, err := rand.Int(rand.Reader, big.NewInt(100)) 104 | if err != nil { 105 | panic(err) 106 | } 107 | <-time.After(time.Duration(jitter.Int64()) * time.Millisecond) 108 | } 109 | -------------------------------------------------------------------------------- /y/bloom.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package y 6 | 7 | import "math" 8 | 9 | // Filter is an encoded set of []byte keys. 10 | type Filter []byte 11 | 12 | func (f Filter) MayContainKey(k []byte) bool { 13 | return f.MayContain(Hash(k)) 14 | } 15 | 16 | // MayContain returns whether the filter may contain given key. False positives 17 | // are possible, where it returns true for keys not in the original set. 18 | func (f Filter) MayContain(h uint32) bool { 19 | if len(f) < 2 { 20 | return false 21 | } 22 | k := f[len(f)-1] 23 | if k > 30 { 24 | // This is reserved for potentially new encodings for short Bloom filters. 25 | // Consider it a match. 26 | return true 27 | } 28 | nBits := uint32(8 * (len(f) - 1)) 29 | delta := h>>17 | h<<15 30 | for j := uint8(0); j < k; j++ { 31 | bitPos := h % nBits 32 | if f[bitPos/8]&(1<<(bitPos%8)) == 0 { 33 | return false 34 | } 35 | h += delta 36 | } 37 | return true 38 | } 39 | 40 | // NewFilter returns a new Bloom filter that encodes a set of []byte keys with 41 | // the given number of bits per key, approximately. 42 | // 43 | // A good bitsPerKey value is 10, which yields a filter with ~ 1% false 44 | // positive rate. 45 | func NewFilter(keys []uint32, bitsPerKey int) Filter { 46 | return Filter(appendFilter(nil, keys, bitsPerKey)) 47 | } 48 | 49 | // BloomBitsPerKey returns the bits per key required by bloomfilter based on 50 | // the false positive rate. 51 | func BloomBitsPerKey(numEntries int, fp float64) int { 52 | size := -1 * float64(numEntries) * math.Log(fp) / math.Pow(float64(0.69314718056), 2) 53 | locs := math.Ceil(float64(0.69314718056) * size / float64(numEntries)) 54 | return int(locs) 55 | } 56 | 57 | func appendFilter(buf []byte, keys []uint32, bitsPerKey int) []byte { 58 | if bitsPerKey < 0 { 59 | bitsPerKey = 0 60 | } 61 | // 0.69 is approximately ln(2). 62 | k := uint32(float64(bitsPerKey) * 0.69) 63 | if k < 1 { 64 | k = 1 65 | } 66 | if k > 30 { 67 | k = 30 68 | } 69 | 70 | nBits := len(keys) * bitsPerKey 71 | // For small len(keys), we can see a very high false positive rate. Fix it 72 | // by enforcing a minimum bloom filter length. 73 | if nBits < 64 { 74 | nBits = 64 75 | } 76 | nBytes := (nBits + 7) / 8 77 | nBits = nBytes * 8 78 | buf, filter := extend(buf, nBytes+1) 79 | 80 | for _, h := range keys { 81 | delta := h>>17 | h<<15 82 | for j := uint32(0); j < k; j++ { 83 | bitPos := h % uint32(nBits) 84 | filter[bitPos/8] |= 1 << (bitPos % 8) 85 | h += delta 86 | } 87 | } 88 | filter[nBytes] = uint8(k) 89 | 90 | return buf 91 | } 92 | 93 | // extend appends n zero bytes to b. It returns the overall slice (of length 94 | // n+len(originalB)) and the slice of n trailing zeroes. 95 | func extend(b []byte, n int) (overall, trailer []byte) { 96 | want := n + len(b) 97 | if want <= cap(b) { 98 | overall = b[:want] 99 | trailer = overall[len(b):] 100 | for i := range trailer { 101 | trailer[i] = 0 102 | } 103 | } else { 104 | // Grow the capacity exponentially, with a 1KiB minimum. 105 | c := 1024 106 | for c < want { 107 | c += c / 4 108 | } 109 | overall = make([]byte, want, c) 110 | trailer = overall[len(b):] 111 | copy(overall, b) 112 | } 113 | return overall, trailer 114 | } 115 | 116 | // hash implements a hashing algorithm similar to the Murmur hash. 117 | func Hash(b []byte) uint32 { 118 | const ( 119 | seed = 0xbc9f1d34 120 | m = 0xc6a4a793 121 | ) 122 | h := uint32(seed) ^ uint32(len(b))*m 123 | for ; len(b) >= 4; b = b[4:] { 124 | h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 125 | h *= m 126 | h ^= h >> 16 127 | } 128 | switch len(b) { 129 | case 3: 130 | h += uint32(b[2]) << 16 131 | fallthrough 132 | case 2: 133 | h += uint32(b[1]) << 8 134 | fallthrough 135 | case 1: 136 | h += uint32(b[0]) 137 | h *= m 138 | h ^= h >> 24 139 | } 140 | return h 141 | } 142 | 143 | // FilterPolicy implements the db.FilterPolicy interface from the leveldb/db 144 | // package. 145 | // 146 | // The integer value is the approximate number of bits used per key. A good 147 | // value is 10, which yields a filter with ~ 1% false positive rate. 148 | // 149 | // It is valid to use the other API in this package (leveldb/bloom) without 150 | // using this type or the leveldb/db package. 151 | 152 | // type FilterPolicy int 153 | 154 | // // Name implements the db.FilterPolicy interface. 155 | // func (p FilterPolicy) Name() string { 156 | // // This string looks arbitrary, but its value is written to LevelDB .ldb 157 | // // files, and should be this exact value to be compatible with those files 158 | // // and with the C++ LevelDB code. 159 | // return "leveldb.BuiltinBloomFilter2" 160 | // } 161 | 162 | // // AppendFilter implements the db.FilterPolicy interface. 163 | // func (p FilterPolicy) AppendFilter(dst []byte, keys [][]byte) []byte { 164 | // return appendFilter(dst, keys, int(p)) 165 | // } 166 | 167 | // // MayContain implements the db.FilterPolicy interface. 168 | // func (p FilterPolicy) MayContain(filter, key []byte) bool { 169 | // return Filter(filter).MayContain(key) 170 | // } 171 | -------------------------------------------------------------------------------- /y/bloom_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package y 6 | 7 | import ( 8 | "testing" 9 | ) 10 | 11 | func (f Filter) String() string { 12 | s := make([]byte, 8*len(f)) 13 | for i, x := range f { 14 | for j := 0; j < 8; j++ { 15 | if x&(1<> 0) 72 | b[1] = uint8(uint32(i) >> 8) 73 | b[2] = uint8(uint32(i) >> 16) 74 | b[3] = uint8(uint32(i) >> 24) 75 | return b 76 | } 77 | 78 | nMediocreFilters, nGoodFilters := 0, 0 79 | loop: 80 | for length := 1; length <= 10000; length = nextLength(length) { 81 | keys := make([][]byte, 0, length) 82 | for i := 0; i < length; i++ { 83 | keys = append(keys, le32(i)) 84 | } 85 | var hashes []uint32 86 | for _, key := range keys { 87 | hashes = append(hashes, Hash(key)) 88 | } 89 | f := NewFilter(hashes, 10) 90 | 91 | if len(f) > (length*10/8)+40 { 92 | t.Errorf("length=%d: len(f)=%d is too large", length, len(f)) 93 | continue 94 | } 95 | 96 | // All added keys must match. 97 | for _, key := range keys { 98 | if !f.MayContainKey(key) { 99 | t.Errorf("length=%d: did not contain key %q", length, key) 100 | continue loop 101 | } 102 | } 103 | 104 | // Check false positive rate. 105 | nFalsePositive := 0 106 | for i := 0; i < 10000; i++ { 107 | if f.MayContainKey(le32(1e9 + i)) { 108 | nFalsePositive++ 109 | } 110 | } 111 | if nFalsePositive > 0.02*10000 { 112 | t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive) 113 | continue 114 | } 115 | if nFalsePositive > 0.0125*10000 { 116 | nMediocreFilters++ 117 | } else { 118 | nGoodFilters++ 119 | } 120 | } 121 | 122 | if nMediocreFilters > nGoodFilters/5 { 123 | t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters) 124 | } 125 | } 126 | 127 | func TestHash(t *testing.T) { 128 | // The magic want numbers come from running the C++ leveldb code in hash.cc. 129 | testCases := []struct { 130 | s string 131 | want uint32 132 | }{ 133 | {"", 0xbc9f1d34}, 134 | {"g", 0xd04a8bda}, 135 | {"go", 0x3e0b0745}, 136 | {"gop", 0x0c326610}, 137 | {"goph", 0x8c9d6390}, 138 | {"gophe", 0x9bfd4b0a}, 139 | {"gopher", 0xa78edc7c}, 140 | {"I had a dream it would end this way.", 0xe14a9db9}, 141 | } 142 | for _, tc := range testCases { 143 | if got := Hash([]byte(tc.s)); got != tc.want { 144 | t.Errorf("s=%q: got 0x%08x, want 0x%08x", tc.s, got, tc.want) 145 | } 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /y/checksum.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | import ( 9 | stderrors "errors" 10 | "hash/crc32" 11 | 12 | "github.com/cespare/xxhash/v2" 13 | 14 | "github.com/dgraph-io/badger/v4/pb" 15 | ) 16 | 17 | // ErrChecksumMismatch is returned at checksum mismatch. 18 | var ErrChecksumMismatch = stderrors.New("checksum mismatch") 19 | 20 | // CalculateChecksum calculates checksum for data using ct checksum type. 21 | func CalculateChecksum(data []byte, ct pb.Checksum_Algorithm) uint64 { 22 | switch ct { 23 | case pb.Checksum_CRC32C: 24 | return uint64(crc32.Checksum(data, CastagnoliCrcTable)) 25 | case pb.Checksum_XXHash64: 26 | return xxhash.Sum64(data) 27 | default: 28 | panic("checksum type not supported") 29 | } 30 | } 31 | 32 | // VerifyChecksum validates the checksum for the data against the given expected checksum. 33 | func VerifyChecksum(data []byte, expected *pb.Checksum) error { 34 | actual := CalculateChecksum(data, expected.Algo) 35 | if actual != expected.Sum { 36 | return Wrapf(ErrChecksumMismatch, "actual: %d, expected: %d", actual, expected.Sum) 37 | } 38 | return nil 39 | } 40 | -------------------------------------------------------------------------------- /y/encrypt.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | import ( 9 | "bytes" 10 | "crypto/aes" 11 | "crypto/cipher" 12 | "crypto/rand" 13 | "io" 14 | ) 15 | 16 | // XORBlock encrypts the given data with AES and XOR's with IV. 17 | // Can be used for both encryption and decryption. IV is of 18 | // AES block size. 19 | func XORBlock(dst, src, key, iv []byte) error { 20 | block, err := aes.NewCipher(key) 21 | if err != nil { 22 | return err 23 | } 24 | stream := cipher.NewCTR(block, iv) 25 | stream.XORKeyStream(dst, src) 26 | return nil 27 | } 28 | 29 | func XORBlockAllocate(src, key, iv []byte) ([]byte, error) { 30 | block, err := aes.NewCipher(key) 31 | if err != nil { 32 | return nil, err 33 | } 34 | stream := cipher.NewCTR(block, iv) 35 | dst := make([]byte, len(src)) 36 | stream.XORKeyStream(dst, src) 37 | return dst, nil 38 | } 39 | 40 | func XORBlockStream(w io.Writer, src, key, iv []byte) error { 41 | block, err := aes.NewCipher(key) 42 | if err != nil { 43 | return err 44 | } 45 | stream := cipher.NewCTR(block, iv) 46 | sw := cipher.StreamWriter{S: stream, W: w} 47 | _, err = io.Copy(sw, bytes.NewReader(src)) 48 | return Wrapf(err, "XORBlockStream") 49 | } 50 | 51 | // GenerateIV generates IV. 52 | func GenerateIV() ([]byte, error) { 53 | iv := make([]byte, aes.BlockSize) 54 | _, err := rand.Read(iv) 55 | return iv, err 56 | } 57 | -------------------------------------------------------------------------------- /y/encrypt_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | import ( 9 | "crypto/aes" 10 | "crypto/rand" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func TestXORBlock(t *testing.T) { 17 | key := make([]byte, 32) 18 | _, _ = rand.Read(key) 19 | 20 | var iv []byte 21 | { 22 | b, err := aes.NewCipher(key) 23 | require.NoError(t, err) 24 | iv = make([]byte, b.BlockSize()) 25 | _, _ = rand.Read(iv) 26 | t.Logf("Using %d size IV\n", len(iv)) 27 | } 28 | 29 | src := make([]byte, 1024) 30 | _, _ = rand.Read(src) 31 | 32 | dst := make([]byte, 1024) 33 | err := XORBlock(dst, src, key, iv) 34 | require.NoError(t, err) 35 | 36 | act := make([]byte, 1024) 37 | err = XORBlock(act, dst, key, iv) 38 | require.NoError(t, err) 39 | require.Equal(t, src, act) 40 | 41 | // Now check if we can use the same byte slice as src and dst. While this is useful to know that 42 | // we can use src and dst as the same slice, this isn't applicable to Badger because we're 43 | // reading data right off mmap. We should not modify that data, so we have to use a different 44 | // slice for dst anyway. 45 | cp := append([]byte{}, src...) 46 | err = XORBlock(cp, cp, key, iv) 47 | require.NoError(t, err) 48 | require.Equal(t, dst, cp) 49 | 50 | err = XORBlock(cp, cp, key, iv) 51 | require.NoError(t, err) 52 | require.Equal(t, src, cp) 53 | } 54 | -------------------------------------------------------------------------------- /y/error.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | // This file contains some functions for error handling. Note that we are moving 9 | // towards using x.Trace, i.e., rpc tracing using net/tracer. But for now, these 10 | // functions are useful for simple checks logged on one machine. 11 | // Some common use cases are: 12 | // (1) You receive an error from external lib, and would like to check/log fatal. 13 | // For this, use x.Check, x.Checkf. These will check for err != nil, which is 14 | // more common in Go. If you want to check for boolean being true, use 15 | // x.Assert, x.Assertf. 16 | // (2) You receive an error from external lib, and would like to pass on with some 17 | // stack trace information. In this case, use x.Wrap or x.Wrapf. 18 | // (3) You want to generate a new error with stack trace info. Use x.Errorf. 19 | 20 | import ( 21 | "errors" 22 | "fmt" 23 | "log" 24 | ) 25 | 26 | var debugMode = false 27 | 28 | // Check logs fatal if err != nil. 29 | func Check(err error) { 30 | if err != nil { 31 | log.Fatalf("%+v", Wrap(err, "")) 32 | } 33 | } 34 | 35 | // Check2 acts as convenience wrapper around Check, using the 2nd argument as error. 36 | func Check2(_ interface{}, err error) { 37 | Check(err) 38 | } 39 | 40 | // AssertTrue asserts that b is true. Otherwise, it would log fatal. 41 | func AssertTrue(b bool) { 42 | if !b { 43 | log.Fatalf("%+v", errors.New("Assert failed")) 44 | } 45 | } 46 | 47 | // AssertTruef is AssertTrue with extra info. 48 | func AssertTruef(b bool, format string, args ...interface{}) { 49 | if !b { 50 | log.Fatalf("%+v", fmt.Errorf(format, args...)) 51 | } 52 | } 53 | 54 | // Wrap wraps errors from external lib. 55 | func Wrap(err error, msg string) error { 56 | if !debugMode { 57 | if err == nil { 58 | return nil 59 | } 60 | return fmt.Errorf("%s err: %+v", msg, err) 61 | } 62 | return fmt.Errorf("%s: %w", msg, err) 63 | } 64 | 65 | // Wrapf is Wrap with extra info. 66 | func Wrapf(err error, format string, args ...interface{}) error { 67 | return Wrap(err, fmt.Sprintf(format, args...)) 68 | } 69 | 70 | func CombineErrors(one, other error) error { 71 | if one != nil && other != nil { 72 | return fmt.Errorf("%v; %v", one, other) 73 | } 74 | if one != nil && other == nil { 75 | return fmt.Errorf("%v", one) 76 | } 77 | if one == nil && other != nil { 78 | return fmt.Errorf("%v", other) 79 | } 80 | return nil 81 | } 82 | -------------------------------------------------------------------------------- /y/error_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | import ( 9 | "errors" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestCombineWithBothErrorsPresent(t *testing.T) { 16 | combinedError := CombineErrors(errors.New("one"), errors.New("two")) 17 | require.Equal(t, "one; two", combinedError.Error()) 18 | } 19 | 20 | func TestCombineErrorsWithOneErrorPresent(t *testing.T) { 21 | combinedError := CombineErrors(errors.New("one"), nil) 22 | require.Equal(t, "one", combinedError.Error()) 23 | } 24 | 25 | func TestCombineErrorsWithOtherErrorPresent(t *testing.T) { 26 | combinedError := CombineErrors(nil, errors.New("other")) 27 | require.Equal(t, "other", combinedError.Error()) 28 | } 29 | 30 | func TestCombineErrorsWithBothErrorsAsNil(t *testing.T) { 31 | combinedError := CombineErrors(nil, nil) 32 | require.NoError(t, combinedError) 33 | } 34 | -------------------------------------------------------------------------------- /y/event_log.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | import "golang.org/x/net/trace" 9 | 10 | var ( 11 | NoEventLog trace.EventLog = nilEventLog{} 12 | ) 13 | 14 | type nilEventLog struct{} 15 | 16 | func (nel nilEventLog) Printf(format string, a ...interface{}) {} 17 | 18 | func (nel nilEventLog) Errorf(format string, a ...interface{}) {} 19 | 20 | func (nel nilEventLog) Finish() {} 21 | -------------------------------------------------------------------------------- /y/file_dsync.go: -------------------------------------------------------------------------------- 1 | //go:build !dragonfly && !freebsd && !windows && !plan9 && !js && !wasip1 2 | // +build !dragonfly,!freebsd,!windows,!plan9,!js,!wasip1 3 | 4 | /* 5 | * SPDX-FileCopyrightText: © Hypermode Inc. 6 | * SPDX-License-Identifier: Apache-2.0 7 | */ 8 | 9 | package y 10 | 11 | import "golang.org/x/sys/unix" 12 | 13 | func init() { 14 | datasyncFileFlag = unix.O_DSYNC 15 | } 16 | -------------------------------------------------------------------------------- /y/file_nodsync.go: -------------------------------------------------------------------------------- 1 | //go:build dragonfly || freebsd || windows || plan9 2 | // +build dragonfly freebsd windows plan9 3 | 4 | /* 5 | * SPDX-FileCopyrightText: © Hypermode Inc. 6 | * SPDX-License-Identifier: Apache-2.0 7 | */ 8 | 9 | package y 10 | 11 | import "syscall" 12 | 13 | func init() { 14 | datasyncFileFlag = syscall.O_SYNC 15 | } 16 | -------------------------------------------------------------------------------- /y/iterator.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | import ( 9 | "bytes" 10 | "encoding/binary" 11 | ) 12 | 13 | // ValueStruct represents the value info that can be associated with a key, but also the internal 14 | // Meta field. 15 | type ValueStruct struct { 16 | Meta byte 17 | UserMeta byte 18 | ExpiresAt uint64 19 | Value []byte 20 | 21 | Version uint64 // This field is not serialized. Only for internal usage. 22 | } 23 | 24 | func sizeVarint(x uint64) (n int) { 25 | for { 26 | n++ 27 | x >>= 7 28 | if x == 0 { 29 | break 30 | } 31 | } 32 | return n 33 | } 34 | 35 | // EncodedSize is the size of the ValueStruct when encoded 36 | func (v *ValueStruct) EncodedSize() uint32 { 37 | sz := len(v.Value) + 2 // meta, usermeta. 38 | enc := sizeVarint(v.ExpiresAt) 39 | return uint32(sz + enc) 40 | } 41 | 42 | // Decode uses the length of the slice to infer the length of the Value field. 43 | func (v *ValueStruct) Decode(b []byte) { 44 | v.Meta = b[0] 45 | v.UserMeta = b[1] 46 | var sz int 47 | v.ExpiresAt, sz = binary.Uvarint(b[2:]) 48 | v.Value = b[2+sz:] 49 | } 50 | 51 | // Encode expects a slice of length at least v.EncodedSize(). 52 | func (v *ValueStruct) Encode(b []byte) uint32 { 53 | b[0] = v.Meta 54 | b[1] = v.UserMeta 55 | sz := binary.PutUvarint(b[2:], v.ExpiresAt) 56 | n := copy(b[2+sz:], v.Value) 57 | return uint32(2 + sz + n) 58 | } 59 | 60 | // EncodeTo should be kept in sync with the Encode function above. The reason 61 | // this function exists is to avoid creating byte arrays per key-value pair in 62 | // table/builder.go. 63 | func (v *ValueStruct) EncodeTo(buf *bytes.Buffer) { 64 | buf.WriteByte(v.Meta) 65 | buf.WriteByte(v.UserMeta) 66 | var enc [binary.MaxVarintLen64]byte 67 | sz := binary.PutUvarint(enc[:], v.ExpiresAt) 68 | 69 | buf.Write(enc[:sz]) 70 | buf.Write(v.Value) 71 | } 72 | 73 | // Iterator is an interface for a basic iterator. 74 | type Iterator interface { 75 | Next() 76 | Rewind() 77 | Seek(key []byte) 78 | Key() []byte 79 | Value() ValueStruct 80 | Valid() bool 81 | 82 | // All iterators should be closed so that file garbage collection works. 83 | Close() error 84 | } 85 | -------------------------------------------------------------------------------- /y/zstd.go: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: © Hypermode Inc. 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | package y 7 | 8 | import ( 9 | "sync" 10 | 11 | "github.com/klauspost/compress/zstd" 12 | ) 13 | 14 | var ( 15 | decoder *zstd.Decoder 16 | encoder *zstd.Encoder 17 | 18 | encOnce, decOnce sync.Once 19 | ) 20 | 21 | // ZSTDDecompress decompresses a block using ZSTD algorithm. 22 | func ZSTDDecompress(dst, src []byte) ([]byte, error) { 23 | decOnce.Do(func() { 24 | var err error 25 | decoder, err = zstd.NewReader(nil) 26 | Check(err) 27 | }) 28 | return decoder.DecodeAll(src, dst[:0]) 29 | } 30 | 31 | // ZSTDCompress compresses a block using ZSTD algorithm. 32 | func ZSTDCompress(dst, src []byte, compressionLevel int) ([]byte, error) { 33 | encOnce.Do(func() { 34 | var err error 35 | level := zstd.EncoderLevelFromZstd(compressionLevel) 36 | encoder, err = zstd.NewWriter(nil, zstd.WithEncoderLevel(level)) 37 | Check(err) 38 | }) 39 | return encoder.EncodeAll(src, dst[:0]), nil 40 | } 41 | 42 | // ZSTDCompressBound returns the worst case size needed for a destination buffer. 43 | // Klauspost ZSTD library does not provide any API for Compression Bound. This 44 | // calculation is based on the DataDog ZSTD library. 45 | // See https://pkg.go.dev/github.com/DataDog/zstd#CompressBound 46 | func ZSTDCompressBound(srcSize int) int { 47 | lowLimit := 128 << 10 // 128 kB 48 | var margin int 49 | if srcSize < lowLimit { 50 | margin = (lowLimit - srcSize) >> 11 51 | } 52 | return srcSize + (srcSize >> 8) + margin 53 | } 54 | --------------------------------------------------------------------------------