├── .cargo └── config.toml ├── .editorconfig ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches ├── atomics_benchmark.rs ├── common.rs ├── int_benchmark.rs ├── large_values_benchmark.rs ├── lmdb_benchmark.rs ├── multithreaded_insert_benchmark.rs ├── savepoint_benchmark.rs ├── syscall_benchmark.rs └── userspace_cache_benchmark.rs ├── build.rs ├── clippy.toml ├── deny.toml ├── docs └── design.md ├── examples ├── bincode_keys.rs ├── int_keys.rs ├── multithread.rs └── special_values.rs ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ ├── common.rs │ └── fuzz_redb.rs ├── justfile ├── py_publish.sh ├── pyproject.toml ├── rust-toolchain ├── rustfmt.toml ├── src ├── backends.rs ├── complex_types.rs ├── db.rs ├── error.rs ├── lib.rs ├── multimap_table.rs ├── python.rs ├── sealed.rs ├── table.rs ├── transaction_tracker.rs ├── transactions.rs ├── tree_store │ ├── btree.rs │ ├── btree_base.rs │ ├── btree_iters.rs │ ├── btree_mutator.rs │ ├── mod.rs │ ├── page_store │ │ ├── base.rs │ │ ├── bitmap.rs │ │ ├── buddy_allocator.rs │ │ ├── cached_file.rs │ │ ├── file_backend │ │ │ ├── fallback.rs │ │ │ ├── mod.rs │ │ │ ├── unix.rs │ │ │ └── windows.rs │ │ ├── header.rs │ │ ├── in_memory_backend.rs │ │ ├── layout.rs │ │ ├── lru_cache.rs │ │ ├── mod.rs │ │ ├── page_manager.rs │ │ ├── region.rs │ │ ├── savepoint.rs │ │ └── xxh3.rs │ ├── table_tree.rs │ └── table_tree_base.rs ├── tuple_types.rs └── types.rs ├── test └── __init__.py └── tests ├── backward_compatibility.rs ├── basic_tests.rs ├── integration_tests.rs ├── multimap_tests.rs └── multithreading_tests.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.wasm32-wasip1] 2 | runner = "wasmtime --dir=/tmp" 3 | 4 | [target.wasm32-wasip1-threads] 5 | runner = "wasmtime --dir=/tmp -W threads=y -S threads=y" 6 | 7 | [target.wasm32-wasip2] 8 | runner = "wasmtime --dir=/tmp" 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 4 7 | indent_style = space 8 | insert_final_newline = true 9 | max_line_length = 100 10 | trim_trailing_whitespace = true 11 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: # required for actions/cache to work 6 | branches: 7 | - master 8 | 9 | jobs: 10 | ci: 11 | strategy: 12 | matrix: 13 | os: ["ubuntu-latest", "macos-latest", "windows-latest", "ubuntu-24.04-arm"] 14 | 15 | runs-on: ${{ matrix.os }} 16 | 17 | env: 18 | RUSTFLAGS: --deny warnings 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Cache 23 | id: rust-cache 24 | uses: actions/cache@v4 25 | with: 26 | path: | 27 | ~/.cargo/bin/ 28 | ~/.cargo/registry/index/ 29 | ~/.cargo/registry/cache/ 30 | ~/.cargo/git/db/ 31 | target/ 32 | key: ${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/Cargo.toml', '.github/workflows/*.yml', 'rust-toolchain') }} 33 | 34 | - name: Check for forbidden words 35 | if: runner.os != 'Windows' 36 | run: "! grep --include='*.rs' -RE 'to_be_bytes|from_be_bytes|dbg!' ." 37 | 38 | - name: Install Python 39 | uses: actions/setup-python@v5 40 | with: 41 | python-version: "3.12" 42 | 43 | - name: Install Rust 44 | run: | 45 | rustup component add rustfmt 46 | rustup component add clippy 47 | 48 | - name: OSX x86 rust 49 | if: startsWith(matrix.os, 'macos') 50 | run: | 51 | # For some reason this is required to run the fuzzer on OSX 52 | rustup target add x86_64-apple-darwin 53 | 54 | - name: Setup wasmtime 55 | uses: bytecodealliance/actions/wasmtime/setup@v1 56 | with: 57 | version: "24.0.0" 58 | 59 | - name: Install cargo-deny 60 | if: steps.rust-cache.outputs.cache-hit != 'true' 61 | run: cargo install --force --version 0.16.2 cargo-deny --locked 62 | 63 | - name: Install cargo-fuzz 64 | if: steps.rust-cache.outputs.cache-hit != 'true' 65 | run: cargo install --force --version 0.12.0 cargo-fuzz --locked 66 | 67 | - name: Install just 68 | if: steps.rust-cache.outputs.cache-hit != 'true' 69 | run: cargo install --force --version 1.36.0 just --locked 70 | 71 | - name: Compile 72 | run: cargo build --all-targets --all-features 73 | 74 | - name: Fuzzer 75 | if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos') 76 | run: just fuzz_ci 77 | 78 | - name: Run tests 79 | run: just build test 80 | 81 | - name: Clippy 82 | run: cargo clippy --all --all-targets -- -Dwarnings 83 | 84 | - name: Format 85 | run: cargo fmt --all -- --check 86 | 87 | - name: Run CPython wrapper tests 88 | if: runner.os != 'Windows' 89 | run: | 90 | python3 -m venv venv 91 | source venv/bin/activate 92 | pip3 install --upgrade pip 93 | pip3 install maturin 94 | just test_py 95 | 96 | - name: Run CPython wrapper tests 97 | if: runner.os == 'Windows' 98 | run: | 99 | python3 -m venv venv 100 | venv\Scripts\activate 101 | pip3 install --upgrade pip 102 | pip3 install maturin 103 | just test_py 104 | 105 | - name: Run WASI tests 106 | if: runner.os != 'Windows' 107 | run: | 108 | RUSTFLAGS="" just test_wasi 109 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | # IntelliJ 13 | .idea/ 14 | **/*.iml 15 | 16 | 17 | **/__pycache__ 18 | **/*.pyc 19 | 20 | # Profiling 21 | perf.data* 22 | flamegraph.svg 23 | 24 | # benchmark and test temporary files 25 | /.tmp* 26 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "redb" 3 | description = "Rust Embedded DataBase" 4 | homepage = "https://www.redb.org" 5 | repository = "https://github.com/cberner/redb" 6 | readme = "README.md" 7 | license = "MIT OR Apache-2.0" 8 | version = "2.6.0" 9 | edition = "2024" 10 | rust-version = "1.85" 11 | authors = ["Christopher Berner "] 12 | exclude = ["fuzz/"] 13 | 14 | [lib] 15 | crate-type = ["cdylib", "rlib"] 16 | 17 | [build-dependencies] 18 | pyo3-build-config = { version = "0.24.1", optional = true } 19 | 20 | [dependencies] 21 | log = { version = "0.4.17", optional = true } 22 | pyo3 = { version = "0.24.1", features=["extension-module", "abi3-py37"], optional = true } 23 | 24 | [target.'cfg(unix)'.dependencies] 25 | libc = "0.2.104" 26 | 27 | # Common test/bench dependencies 28 | [dev-dependencies] 29 | rand = "0.9" 30 | tempfile = "3.5.0" 31 | # for backwards compatibility testing - pin at 2.6.0 32 | redb2_6 = { version = "=2.6.0", package = "redb" } 33 | bincode = "2.0.1" 34 | walkdir = "2.5.0" 35 | byte-unit = "5.1.6" 36 | fastrand = "2.0.0" 37 | sled = "0.34.7" 38 | libc = "0.2.99" 39 | 40 | # Just benchmarking dependencies, which don't build on wasi 41 | [target.'cfg(not(target_os = "wasi"))'.dev-dependencies] 42 | ctrlc = "3.2.3" 43 | heed = "0.20" 44 | sanakirja = "=1.4.1" 45 | sanakirja-core = "=1.4.1" 46 | rocksdb = { version = "0.22.0", default-features = false, features = ["lz4"] } 47 | fjall = "=2.10" 48 | comfy-table = "7.0.1" 49 | env_logger = "0.11" 50 | 51 | [target.'cfg(target_os = "linux")'.dev-dependencies] 52 | io-uring = "0.7.4" 53 | 54 | [features] 55 | # This feature is still experimental, and is not considered stable 56 | python = ["dep:pyo3", "dep:pyo3-build-config"] 57 | # Enables log messages 58 | logging = ["dep:log"] 59 | # Enable cache hit metrics 60 | cache_metrics = [] 61 | 62 | [profile.bench] 63 | debug = true 64 | 65 | [[bench]] 66 | name = "atomics_benchmark" 67 | harness = false 68 | 69 | [[bench]] 70 | name = "multithreaded_insert_benchmark" 71 | harness = false 72 | 73 | [[bench]] 74 | name = "userspace_cache_benchmark" 75 | harness = false 76 | 77 | [[bench]] 78 | name = "savepoint_benchmark" 79 | harness = false 80 | 81 | [[bench]] 82 | name = "lmdb_benchmark" 83 | harness = false 84 | 85 | [[bench]] 86 | name = "large_values_benchmark" 87 | harness = false 88 | 89 | [[bench]] 90 | name = "int_benchmark" 91 | harness = false 92 | 93 | [[bench]] 94 | name = "syscall_benchmark" 95 | harness = false 96 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 Christopher Berner 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # redb 2 | 3 | ![CI](https://github.com/cberner/redb/actions/workflows/ci.yml/badge.svg) 4 | [![Crates.io](https://img.shields.io/crates/v/redb.svg)](https://crates.io/crates/redb) 5 | [![Documentation](https://docs.rs/redb/badge.svg)](https://docs.rs/redb) 6 | [![License](https://img.shields.io/crates/l/redb)](https://crates.io/crates/redb) 7 | [![dependency status](https://deps.rs/repo/github/cberner/redb/status.svg)](https://deps.rs/repo/github/cberner/redb) 8 | 9 | A simple, portable, high-performance, ACID, embedded key-value store. 10 | 11 | redb is written in pure Rust and is loosely inspired by [lmdb](http://www.lmdb.tech/doc/). Data is stored in a collection 12 | of copy-on-write B-trees. For more details, see the [design doc](docs/design.md) 13 | 14 | ```rust 15 | use redb::{Database, Error, ReadableTable, TableDefinition}; 16 | 17 | const TABLE: TableDefinition<&str, u64> = TableDefinition::new("my_data"); 18 | 19 | fn main() -> Result<(), Error> { 20 | let db = Database::create("my_db.redb")?; 21 | let write_txn = db.begin_write()?; 22 | { 23 | let mut table = write_txn.open_table(TABLE)?; 24 | table.insert("my_key", &123)?; 25 | } 26 | write_txn.commit()?; 27 | 28 | let read_txn = db.begin_read()?; 29 | let table = read_txn.open_table(TABLE)?; 30 | assert_eq!(table.get("my_key")?.unwrap().value(), 123); 31 | 32 | Ok(()) 33 | } 34 | ``` 35 | 36 | ## Status 37 | redb is undergoing active development, and should be considered beta quality. The file format is stable, 38 | but redb has not been widely deployed in production systems (at least to my knowledge). 39 | 40 | ## Features 41 | * Zero-copy, thread-safe, `BTreeMap` based API 42 | * Fully ACID-compliant transactions 43 | * MVCC support for concurrent readers & writer, without blocking 44 | * Crash-safe by default 45 | * Savepoints and rollbacks 46 | 47 | ## Development 48 | To run all the tests and benchmarks a few extra dependencies are required: 49 | * `cargo install cargo-deny --locked` 50 | * `cargo install cargo-fuzz --locked` 51 | * `apt install libclang-dev` 52 | 53 | ## Benchmarks 54 | redb has similar performance to other top embedded key-value stores such as lmdb and rocksdb 55 | 56 | | | redb | lmdb | rocksdb | sled | sanakirja | 57 | |---------------------------|------------|------------|----------------|------------|-------------| 58 | | bulk load | 2689ms | 1247ms | 5330ms | 5892ms | **1187ms** | 59 | | individual writes | **226ms** | 419ms | 703ms | 816ms | 398ms | 60 | | batch writes | 2522ms | 2070ms | **1047ms** | 1867ms | 2776ms | 61 | | len() | **0ms** | **0ms** | 304ms | 444ms | 64ms | 62 | | random reads | 860ms | **624ms** | 2432ms | 1596ms | 875ms | 63 | | random reads | 866ms | **624ms** | 2464ms | 1588ms | 842ms | 64 | | random range reads | 2347ms | **1179ms** | 4436ms | 4907ms | 1367ms | 65 | | random range reads | 2322ms | **1207ms** | 4465ms | 4732ms | 1373ms | 66 | | random reads (4 threads) | 337ms | **158ms** | 732ms | 488ms | 349ms | 67 | | random reads (8 threads) | 185ms | **81ms** | 433ms | 259ms | 277ms | 68 | | random reads (16 threads) | 116ms | **49ms** | 238ms | 165ms | 1708ms | 69 | | random reads (32 threads) | 100ms | **44ms** | 203ms | 142ms | 4714ms | 70 | | removals | 1889ms | **803ms** | 2038ms | 2371ms | 1170ms | 71 | | uncompacted size | 1.00 GiB | 582.22 MiB | **206.38 MiB** | 457.01 MiB | 4.00 GiB | 72 | | compacted size | 311.23 MiB | 284.46 MiB | **106.26 MiB** | N/A | N/A | 73 | 74 | Source code for benchmark [here](./benches/lmdb_benchmark.rs). Results collected on a Ryzen 5900X with Samsung 980 PRO NVMe. 75 | 76 | ## License 77 | 78 | Licensed under either of 79 | 80 | * [Apache License, Version 2.0](LICENSE-APACHE) 81 | * [MIT License](LICENSE-MIT) 82 | 83 | at your option. 84 | 85 | ### Contribution 86 | 87 | Unless you explicitly state otherwise, any contribution intentionally 88 | submitted for inclusion in the work by you, as defined in the Apache-2.0 89 | license, shall be dual licensed as above, without any additional terms or 90 | conditions. 91 | -------------------------------------------------------------------------------- /benches/atomics_benchmark.rs: -------------------------------------------------------------------------------- 1 | use std::hint::black_box; 2 | use std::sync::atomic::{AtomicU64, Ordering}; 3 | use std::sync::{Mutex, RwLock}; 4 | use std::thread; 5 | use std::time::SystemTime; 6 | 7 | const ITERATIONS: usize = 1000 * 1000; 8 | 9 | fn baseline(num_threads: usize) { 10 | let start = SystemTime::now(); 11 | for _ in 0..num_threads { 12 | thread::scope(|s| { 13 | s.spawn(|| { 14 | let mut value = 0u64; 15 | for _ in 0..ITERATIONS { 16 | let value = black_box(&mut value); 17 | *value += 1; 18 | } 19 | for _ in 0..ITERATIONS { 20 | let value = black_box(&mut value); 21 | *value -= 1; 22 | } 23 | }); 24 | }); 25 | } 26 | 27 | let end = SystemTime::now(); 28 | let duration = end.duration_since(start).unwrap(); 29 | println!( 30 | "baseline (NOT atomic) ({} threads): {} ops in {}ms", 31 | num_threads, 32 | 2 * ITERATIONS, 33 | duration.as_millis(), 34 | ); 35 | } 36 | 37 | fn atomics(num_threads: usize) { 38 | let start = SystemTime::now(); 39 | let value = AtomicU64::new(0); 40 | for _ in 0..num_threads { 41 | thread::scope(|s| { 42 | s.spawn(|| { 43 | for _ in 0..ITERATIONS { 44 | let value = black_box(&value); 45 | value.fetch_add(1, Ordering::Release); 46 | } 47 | for _ in 0..ITERATIONS { 48 | let value = black_box(&value); 49 | value.fetch_sub(1, Ordering::Release); 50 | } 51 | }); 52 | }); 53 | } 54 | assert_eq!(0, value.load(Ordering::Acquire)); 55 | 56 | let end = SystemTime::now(); 57 | let duration = end.duration_since(start).unwrap(); 58 | println!( 59 | "atomics ({} threads): {} ops in {}ms", 60 | num_threads, 61 | 2 * ITERATIONS, 62 | duration.as_millis(), 63 | ); 64 | } 65 | 66 | fn mutex(num_threads: usize) { 67 | let start = SystemTime::now(); 68 | let value = Mutex::new(0u64); 69 | for _ in 0..num_threads { 70 | thread::scope(|s| { 71 | s.spawn(|| { 72 | for _ in 0..ITERATIONS { 73 | let value = black_box(&value); 74 | *value.lock().unwrap() += 1; 75 | } 76 | for _ in 0..ITERATIONS { 77 | let value = black_box(&value); 78 | *value.lock().unwrap() -= 1; 79 | } 80 | }); 81 | }); 82 | } 83 | assert_eq!(0u64, *value.lock().unwrap()); 84 | 85 | let end = SystemTime::now(); 86 | let duration = end.duration_since(start).unwrap(); 87 | println!( 88 | "mutex ({} threads): {} ops in {}ms", 89 | num_threads, 90 | 2 * ITERATIONS, 91 | duration.as_millis(), 92 | ); 93 | } 94 | 95 | fn rw_lock(num_threads: usize) { 96 | let start = SystemTime::now(); 97 | let value = RwLock::new(0u64); 98 | for _ in 0..num_threads { 99 | thread::scope(|s| { 100 | s.spawn(|| { 101 | for _ in 0..ITERATIONS { 102 | let value = black_box(&value); 103 | *value.write().unwrap() += 1; 104 | } 105 | for _ in 0..ITERATIONS { 106 | let value = black_box(&value); 107 | *value.write().unwrap() -= 1; 108 | } 109 | }); 110 | }); 111 | } 112 | assert_eq!(0u64, *value.read().unwrap()); 113 | 114 | let end = SystemTime::now(); 115 | let duration = end.duration_since(start).unwrap(); 116 | println!( 117 | "rwlock ({} threads): {} ops in {}ms", 118 | num_threads, 119 | 2 * ITERATIONS, 120 | duration.as_millis(), 121 | ); 122 | } 123 | 124 | fn main() { 125 | for threads in [1, 2, 4, 8, 16, 32, 64] { 126 | baseline(threads); 127 | atomics(threads); 128 | mutex(threads); 129 | rw_lock(threads); 130 | println!(); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /benches/int_benchmark.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use std::env::current_dir; 4 | use std::fs; 5 | use tempfile::{NamedTempFile, TempDir}; 6 | 7 | mod common; 8 | use common::*; 9 | 10 | use rand::rngs::StdRng; 11 | use rand::{Rng, SeedableRng}; 12 | use std::time::{Duration, Instant}; 13 | 14 | const ELEMENTS: usize = 1_000_000; 15 | 16 | /// Returns pairs of key, value 17 | fn random_data(count: usize) -> Vec<(u32, u64)> { 18 | let mut rng = StdRng::seed_from_u64(0); 19 | let mut pairs = vec![]; 20 | for _ in 0..count { 21 | pairs.push(rng.random()); 22 | } 23 | pairs 24 | } 25 | 26 | fn benchmark(db: T) -> Vec<(&'static str, Duration)> { 27 | let mut results = Vec::new(); 28 | let pairs = random_data(1_000_000); 29 | let mut written = 0; 30 | 31 | let start = Instant::now(); 32 | let mut txn = db.write_transaction(); 33 | let mut inserter = txn.get_inserter(); 34 | { 35 | for _ in 0..ELEMENTS { 36 | let len = pairs.len(); 37 | let (key, value) = pairs[written % len]; 38 | inserter 39 | .insert(&key.to_le_bytes(), &value.to_le_bytes()) 40 | .unwrap(); 41 | written += 1; 42 | } 43 | } 44 | drop(inserter); 45 | txn.commit().unwrap(); 46 | 47 | let end = Instant::now(); 48 | let duration = end - start; 49 | println!( 50 | "{}: Bulk loaded {} (u32, u64) pairs in {}ms", 51 | T::db_type_name(), 52 | ELEMENTS, 53 | duration.as_millis() 54 | ); 55 | results.push(("bulk load", duration)); 56 | 57 | results 58 | } 59 | 60 | fn main() { 61 | let _ = env_logger::try_init(); 62 | 63 | let redb_results = { 64 | let tmpfile: NamedTempFile = NamedTempFile::new_in(current_dir().unwrap()).unwrap(); 65 | let mut db = redb::Database::create(tmpfile.path()).unwrap(); 66 | let table = RedbBenchDatabase::new(&mut db); 67 | benchmark(table) 68 | }; 69 | 70 | let lmdb_results = { 71 | let tmpfile: TempDir = tempfile::tempdir_in(current_dir().unwrap()).unwrap(); 72 | let env = unsafe { 73 | heed::EnvOpenOptions::new() 74 | .map_size(10 * 4096 * 1024 * 1024) 75 | .open(tmpfile.path()) 76 | .unwrap() 77 | }; 78 | let table = HeedBenchDatabase::new(env); 79 | benchmark(table) 80 | }; 81 | 82 | let rocksdb_results = { 83 | let tmpfile: TempDir = tempfile::tempdir_in(current_dir().unwrap()).unwrap(); 84 | 85 | let mut bb = rocksdb::BlockBasedOptions::default(); 86 | bb.set_block_cache(&rocksdb::Cache::new_lru_cache(4 * 1_024 * 1_024 * 1_024)); 87 | bb.set_bloom_filter(10.0, false); 88 | 89 | let mut opts = rocksdb::Options::default(); 90 | opts.set_block_based_table_factory(&bb); 91 | opts.create_if_missing(true); 92 | opts.increase_parallelism( 93 | std::thread::available_parallelism().map_or(1, |n| n.get()) as i32 94 | ); 95 | 96 | let db = rocksdb::OptimisticTransactionDB::open(&opts, tmpfile.path()).unwrap(); 97 | let table = RocksdbBenchDatabase::new(&db); 98 | benchmark(table) 99 | }; 100 | 101 | let sled_results = { 102 | let tmpfile: TempDir = tempfile::tempdir_in(current_dir().unwrap()).unwrap(); 103 | let db = sled::Config::new().path(tmpfile.path()).open().unwrap(); 104 | let table = SledBenchDatabase::new(&db, tmpfile.path()); 105 | benchmark(table) 106 | }; 107 | 108 | let sanakirja_results = { 109 | let tmpfile: NamedTempFile = NamedTempFile::new_in(current_dir().unwrap()).unwrap(); 110 | fs::remove_file(tmpfile.path()).unwrap(); 111 | let db = sanakirja::Env::new(tmpfile.path(), 4096 * 1024 * 1024, 2).unwrap(); 112 | let table = SanakirjaBenchDatabase::new(&db, tmpfile.path()); 113 | benchmark(table) 114 | }; 115 | 116 | let mut rows = Vec::new(); 117 | 118 | for (benchmark, _duration) in &redb_results { 119 | rows.push(vec![benchmark.to_string()]); 120 | } 121 | 122 | for results in [ 123 | redb_results, 124 | lmdb_results, 125 | rocksdb_results, 126 | sled_results, 127 | sanakirja_results, 128 | ] { 129 | for (i, (_benchmark, duration)) in results.iter().enumerate() { 130 | rows[i].push(format!("{}ms", duration.as_millis())); 131 | } 132 | } 133 | 134 | let mut table = comfy_table::Table::new(); 135 | table.set_width(100); 136 | table.set_header(["", "redb", "lmdb", "rocksdb", "sled", "sanakirja"]); 137 | for row in rows { 138 | table.add_row(row); 139 | } 140 | 141 | println!(); 142 | println!("{table}"); 143 | } 144 | -------------------------------------------------------------------------------- /benches/large_values_benchmark.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use std::env::current_dir; 4 | use tempfile::{NamedTempFile, TempDir}; 5 | 6 | mod common; 7 | use common::*; 8 | 9 | use rand::RngCore; 10 | use std::time::{Duration, Instant}; 11 | 12 | const ELEMENTS: usize = 1_000_000; 13 | 14 | /// Returns pairs of key, value 15 | fn random_data(count: usize, key_size: usize, value_size: usize) -> Vec<(Vec, Vec)> { 16 | let mut pairs = vec![]; 17 | 18 | for _ in 0..count { 19 | let mut key = vec![0; key_size]; 20 | rand::rng().fill_bytes(&mut key); 21 | let mut value = vec![0; value_size]; 22 | rand::rng().fill_bytes(&mut value); 23 | pairs.push((key, value)); 24 | } 25 | 26 | pairs 27 | } 28 | 29 | fn benchmark(db: T) -> Vec<(&'static str, Duration)> { 30 | let mut results = Vec::new(); 31 | let mut pairs = random_data(1_000_000, 24, 150); 32 | let mut written = 0; 33 | 34 | let mut bigpairs = random_data(100, 24, 2_000_000); 35 | let bigelements = 4000; 36 | 37 | let start = Instant::now(); 38 | let mut txn = db.write_transaction(); 39 | let mut inserter = txn.get_inserter(); 40 | { 41 | for _ in 0..bigelements { 42 | let len = bigpairs.len(); 43 | let (key, value) = &mut bigpairs[written % len]; 44 | key[16..].copy_from_slice(&(written as u64).to_le_bytes()); 45 | inserter.insert(key, value).unwrap(); 46 | written += 1; 47 | } 48 | for _ in 0..ELEMENTS { 49 | let len = pairs.len(); 50 | let (key, value) = &mut pairs[written % len]; 51 | key[16..].copy_from_slice(&(written as u64).to_le_bytes()); 52 | inserter.insert(key, value).unwrap(); 53 | written += 1; 54 | } 55 | } 56 | drop(inserter); 57 | txn.commit().unwrap(); 58 | 59 | let end = Instant::now(); 60 | let duration = end - start; 61 | println!( 62 | "{}: Bulk loaded {} 2MB items and {} small items in {}ms", 63 | T::db_type_name(), 64 | bigelements, 65 | ELEMENTS, 66 | duration.as_millis() 67 | ); 68 | results.push(("bulk load (2MB values)", duration)); 69 | 70 | results 71 | } 72 | 73 | fn main() { 74 | let _ = env_logger::try_init(); 75 | 76 | let redb_latency_results = { 77 | let tmpfile: NamedTempFile = NamedTempFile::new_in(current_dir().unwrap()).unwrap(); 78 | let mut db = redb::Database::builder().create(tmpfile.path()).unwrap(); 79 | let table = RedbBenchDatabase::new(&mut db); 80 | benchmark(table) 81 | }; 82 | 83 | let lmdb_results = { 84 | let tmpfile: TempDir = tempfile::tempdir_in(current_dir().unwrap()).unwrap(); 85 | let env = unsafe { 86 | heed::EnvOpenOptions::new() 87 | .map_size(10 * 4096 * 1024 * 1024) 88 | .open(tmpfile.path()) 89 | .unwrap() 90 | }; 91 | let table = HeedBenchDatabase::new(env); 92 | benchmark(table) 93 | }; 94 | 95 | let rocksdb_results = { 96 | let tmpfile: TempDir = tempfile::tempdir_in(current_dir().unwrap()).unwrap(); 97 | 98 | let mut bb = rocksdb::BlockBasedOptions::default(); 99 | bb.set_block_cache(&rocksdb::Cache::new_lru_cache(4 * 1_024 * 1_024 * 1_024)); 100 | bb.set_bloom_filter(10.0, false); 101 | 102 | let mut opts = rocksdb::Options::default(); 103 | opts.set_block_based_table_factory(&bb); 104 | opts.create_if_missing(true); 105 | opts.increase_parallelism( 106 | std::thread::available_parallelism().map_or(1, |n| n.get()) as i32 107 | ); 108 | 109 | let db = rocksdb::OptimisticTransactionDB::open(&opts, tmpfile.path()).unwrap(); 110 | let table = RocksdbBenchDatabase::new(&db); 111 | benchmark(table) 112 | }; 113 | 114 | let sled_results = { 115 | let tmpfile: TempDir = tempfile::tempdir_in(current_dir().unwrap()).unwrap(); 116 | let db = sled::Config::new().path(tmpfile.path()).open().unwrap(); 117 | let table = SledBenchDatabase::new(&db, tmpfile.path()); 118 | benchmark(table) 119 | }; 120 | 121 | let mut rows = Vec::new(); 122 | 123 | for (benchmark, _duration) in &redb_latency_results { 124 | rows.push(vec![benchmark.to_string()]); 125 | } 126 | 127 | for results in [ 128 | redb_latency_results, 129 | lmdb_results, 130 | rocksdb_results, 131 | sled_results, 132 | ] { 133 | for (i, (_benchmark, duration)) in results.iter().enumerate() { 134 | rows[i].push(format!("{}ms", duration.as_millis())); 135 | } 136 | } 137 | 138 | let mut table = comfy_table::Table::new(); 139 | table.set_width(100); 140 | table.set_header(["", "redb", "lmdb", "rocksdb", "sled"]); 141 | for row in rows { 142 | table.add_row(row); 143 | } 144 | 145 | println!(); 146 | println!("{table}"); 147 | } 148 | -------------------------------------------------------------------------------- /benches/lmdb_benchmark.rs: -------------------------------------------------------------------------------- 1 | use std::env::current_dir; 2 | use std::path::Path; 3 | use std::sync::Arc; 4 | use std::{fs, process, thread}; 5 | use tempfile::{NamedTempFile, TempDir}; 6 | 7 | mod common; 8 | use common::*; 9 | 10 | use std::time::{Duration, Instant}; 11 | 12 | const READ_ITERATIONS: usize = 2; 13 | const BULK_ELEMENTS: usize = 1_000_000; 14 | const INDIVIDUAL_WRITES: usize = 1_000; 15 | const BATCH_WRITES: usize = 100; 16 | const BATCH_SIZE: usize = 1000; 17 | const SCAN_ITERATIONS: usize = 2; 18 | const NUM_READS: usize = 1_000_000; 19 | const NUM_SCANS: usize = 500_000; 20 | const SCAN_LEN: usize = 10; 21 | const KEY_SIZE: usize = 24; 22 | const VALUE_SIZE: usize = 150; 23 | const RNG_SEED: u64 = 3; 24 | 25 | const CACHE_SIZE: usize = 4 * 1_024 * 1_024 * 1_024; // 4GB 26 | 27 | /// Returns pairs of key, value 28 | fn random_pair(rng: &mut fastrand::Rng) -> ([u8; KEY_SIZE], Vec) { 29 | let mut key = [0u8; KEY_SIZE]; 30 | rng.fill(&mut key); 31 | let mut value = vec![0u8; VALUE_SIZE]; 32 | rng.fill(&mut value); 33 | 34 | (key, value) 35 | } 36 | 37 | fn make_rng() -> fastrand::Rng { 38 | fastrand::Rng::with_seed(RNG_SEED) 39 | } 40 | 41 | fn make_rng_shards(shards: usize, elements: usize) -> Vec { 42 | let mut rngs = vec![]; 43 | let elements_per_shard = elements / shards; 44 | for i in 0..shards { 45 | let mut rng = make_rng(); 46 | for _ in 0..(i * elements_per_shard) { 47 | random_pair(&mut rng); 48 | } 49 | rngs.push(rng); 50 | } 51 | 52 | rngs 53 | } 54 | 55 | fn benchmark(db: T, path: &Path) -> Vec<(String, ResultType)> { 56 | let mut rng = make_rng(); 57 | let mut results = Vec::new(); 58 | let mut db = Arc::new(db); 59 | 60 | let start = Instant::now(); 61 | let mut txn = db.write_transaction(); 62 | let mut inserter = txn.get_inserter(); 63 | { 64 | for _ in 0..BULK_ELEMENTS { 65 | let (key, value) = random_pair(&mut rng); 66 | inserter.insert(&key, &value).unwrap(); 67 | } 68 | } 69 | drop(inserter); 70 | txn.commit().unwrap(); 71 | 72 | let end = Instant::now(); 73 | let duration = end - start; 74 | println!( 75 | "{}: Bulk loaded {} items in {}ms", 76 | T::db_type_name(), 77 | BULK_ELEMENTS, 78 | duration.as_millis() 79 | ); 80 | results.push(("bulk load".to_string(), ResultType::Duration(duration))); 81 | 82 | let start = Instant::now(); 83 | { 84 | for _ in 0..INDIVIDUAL_WRITES { 85 | let mut txn = db.write_transaction(); 86 | let mut inserter = txn.get_inserter(); 87 | let (key, value) = random_pair(&mut rng); 88 | inserter.insert(&key, &value).unwrap(); 89 | drop(inserter); 90 | txn.commit().unwrap(); 91 | } 92 | } 93 | 94 | let end = Instant::now(); 95 | let duration = end - start; 96 | println!( 97 | "{}: Wrote {} individual items in {}ms", 98 | T::db_type_name(), 99 | INDIVIDUAL_WRITES, 100 | duration.as_millis() 101 | ); 102 | results.push(( 103 | "individual writes".to_string(), 104 | ResultType::Duration(duration), 105 | )); 106 | 107 | let start = Instant::now(); 108 | { 109 | for _ in 0..BATCH_WRITES { 110 | let mut txn = db.write_transaction(); 111 | let mut inserter = txn.get_inserter(); 112 | for _ in 0..BATCH_SIZE { 113 | let (key, value) = random_pair(&mut rng); 114 | inserter.insert(&key, &value).unwrap(); 115 | } 116 | drop(inserter); 117 | txn.commit().unwrap(); 118 | } 119 | } 120 | 121 | let end = Instant::now(); 122 | let duration = end - start; 123 | println!( 124 | "{}: Wrote {} batches of {} items in {}ms", 125 | T::db_type_name(), 126 | BATCH_WRITES, 127 | BATCH_SIZE, 128 | duration.as_millis() 129 | ); 130 | results.push(("batch writes".to_string(), ResultType::Duration(duration))); 131 | 132 | let elements = BULK_ELEMENTS + INDIVIDUAL_WRITES + BATCH_SIZE * BATCH_WRITES; 133 | let txn = db.read_transaction(); 134 | { 135 | { 136 | let start = Instant::now(); 137 | let len = txn.get_reader().len(); 138 | assert_eq!(len, elements as u64); 139 | let end = Instant::now(); 140 | let duration = end - start; 141 | println!("{}: len() in {}ms", T::db_type_name(), duration.as_millis()); 142 | results.push(("len()".to_string(), ResultType::Duration(duration))); 143 | } 144 | 145 | for _ in 0..READ_ITERATIONS { 146 | let mut rng = make_rng(); 147 | let start = Instant::now(); 148 | let mut checksum = 0u64; 149 | let mut expected_checksum = 0u64; 150 | let reader = txn.get_reader(); 151 | for _ in 0..NUM_READS { 152 | let (key, value) = random_pair(&mut rng); 153 | let result = reader.get(&key).unwrap(); 154 | checksum += result.as_ref()[0] as u64; 155 | expected_checksum += value[0] as u64; 156 | } 157 | assert_eq!(checksum, expected_checksum); 158 | let end = Instant::now(); 159 | let duration = end - start; 160 | println!( 161 | "{}: Random read {} items in {}ms", 162 | T::db_type_name(), 163 | NUM_READS, 164 | duration.as_millis() 165 | ); 166 | results.push(("random reads".to_string(), ResultType::Duration(duration))); 167 | } 168 | 169 | for _ in 0..SCAN_ITERATIONS { 170 | let mut rng = make_rng(); 171 | let start = Instant::now(); 172 | let reader = txn.get_reader(); 173 | let mut value_sum = 0; 174 | for _ in 0..NUM_SCANS { 175 | let (key, _value) = random_pair(&mut rng); 176 | let mut iter = reader.range_from(&key); 177 | for _ in 0..SCAN_LEN { 178 | if let Some((_, value)) = iter.next() { 179 | value_sum += value.as_ref()[0]; 180 | } else { 181 | break; 182 | } 183 | } 184 | } 185 | assert!(value_sum > 0); 186 | let end = Instant::now(); 187 | let duration = end - start; 188 | println!( 189 | "{}: Random range read {} x {} elements in {}ms", 190 | T::db_type_name(), 191 | NUM_SCANS, 192 | SCAN_LEN, 193 | duration.as_millis() 194 | ); 195 | results.push(( 196 | "random range reads".to_string(), 197 | ResultType::Duration(duration), 198 | )); 199 | } 200 | } 201 | drop(txn); 202 | 203 | for num_threads in [4, 8, 16, 32] { 204 | let barrier = Arc::new(std::sync::Barrier::new(num_threads)); 205 | let mut rngs = make_rng_shards(num_threads, elements); 206 | let start = Instant::now(); 207 | 208 | thread::scope(|s| { 209 | for _ in 0..num_threads { 210 | let barrier = barrier.clone(); 211 | let db2 = db.clone(); 212 | let rng = rngs.pop().unwrap(); 213 | s.spawn(move || { 214 | barrier.wait(); 215 | let txn = db2.read_transaction(); 216 | let mut checksum = 0u64; 217 | let mut expected_checksum = 0u64; 218 | let reader = txn.get_reader(); 219 | let mut rng = rng.clone(); 220 | for _ in 0..(elements / num_threads) { 221 | let (key, value) = random_pair(&mut rng); 222 | let result = reader.get(&key).unwrap(); 223 | checksum += result.as_ref()[0] as u64; 224 | expected_checksum += value[0] as u64; 225 | } 226 | assert_eq!(checksum, expected_checksum); 227 | }); 228 | } 229 | }); 230 | 231 | let end = Instant::now(); 232 | let duration = end - start; 233 | println!( 234 | "{}: Random read ({} threads) {} items in {}ms", 235 | T::db_type_name(), 236 | num_threads, 237 | elements, 238 | duration.as_millis() 239 | ); 240 | results.push(( 241 | format!("random reads ({num_threads} threads)"), 242 | ResultType::Duration(duration), 243 | )); 244 | } 245 | 246 | let start = Instant::now(); 247 | let deletes = elements / 2; 248 | { 249 | let mut rng = make_rng(); 250 | let mut txn = db.write_transaction(); 251 | let mut inserter = txn.get_inserter(); 252 | for _ in 0..deletes { 253 | let (key, _value) = random_pair(&mut rng); 254 | inserter.remove(&key).unwrap(); 255 | } 256 | drop(inserter); 257 | txn.commit().unwrap(); 258 | } 259 | 260 | let end = Instant::now(); 261 | let duration = end - start; 262 | println!( 263 | "{}: Removed {} items in {}ms", 264 | T::db_type_name(), 265 | deletes, 266 | duration.as_millis() 267 | ); 268 | results.push(("removals".to_string(), ResultType::Duration(duration))); 269 | 270 | let uncompacted_size = database_size(path); 271 | results.push(( 272 | "uncompacted size".to_string(), 273 | ResultType::SizeInBytes(uncompacted_size), 274 | )); 275 | let start = Instant::now(); 276 | if Arc::get_mut(&mut db).unwrap().compact() { 277 | let end = Instant::now(); 278 | let duration = end - start; 279 | println!( 280 | "{}: Compacted in {}ms", 281 | T::db_type_name(), 282 | duration.as_millis() 283 | ); 284 | { 285 | let mut txn = db.write_transaction(); 286 | let mut inserter = txn.get_inserter(); 287 | let (key, value) = random_pair(&mut rng); 288 | inserter.insert(&key, &value).unwrap(); 289 | drop(inserter); 290 | txn.commit().unwrap(); 291 | } 292 | let compacted_size = database_size(path); 293 | results.push(( 294 | "compacted size".to_string(), 295 | ResultType::SizeInBytes(compacted_size), 296 | )); 297 | } else { 298 | results.push(("compacted size".to_string(), ResultType::NA)); 299 | } 300 | 301 | results 302 | } 303 | 304 | fn database_size(path: &Path) -> u64 { 305 | let mut size = 0u64; 306 | for result in walkdir::WalkDir::new(path) { 307 | let entry = result.unwrap(); 308 | size += entry.metadata().unwrap().len(); 309 | } 310 | size 311 | } 312 | 313 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 314 | enum ResultType { 315 | Duration(Duration), 316 | SizeInBytes(u64), 317 | NA, 318 | } 319 | 320 | impl std::fmt::Display for ResultType { 321 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 322 | use byte_unit::{Byte, UnitType}; 323 | 324 | match self { 325 | ResultType::NA => write!(f, "N/A"), 326 | ResultType::Duration(d) => write!(f, "{:?}ms", d.as_millis()), 327 | ResultType::SizeInBytes(s) => { 328 | let b = Byte::from_u64(*s).get_appropriate_unit(UnitType::Binary); 329 | write!(f, "{b:.2}") 330 | } 331 | } 332 | } 333 | } 334 | 335 | fn main() { 336 | let _ = env_logger::try_init(); 337 | let tmpdir = current_dir().unwrap().join(".benchmark"); 338 | fs::create_dir(&tmpdir).unwrap(); 339 | 340 | let tmpdir2 = tmpdir.clone(); 341 | ctrlc::set_handler(move || { 342 | fs::remove_dir_all(&tmpdir2).unwrap(); 343 | process::exit(1); 344 | }) 345 | .unwrap(); 346 | 347 | let redb_latency_results = { 348 | let tmpfile: NamedTempFile = NamedTempFile::new_in(&tmpdir).unwrap(); 349 | let mut db = redb::Database::builder() 350 | .set_cache_size(CACHE_SIZE) 351 | .create(tmpfile.path()) 352 | .unwrap(); 353 | let table = RedbBenchDatabase::new(&mut db); 354 | benchmark(table, tmpfile.path()) 355 | }; 356 | 357 | let lmdb_results = { 358 | let tempdir: TempDir = tempfile::tempdir_in(&tmpdir).unwrap(); 359 | let env = unsafe { 360 | heed::EnvOpenOptions::new() 361 | .map_size(4096 * 1024 * 1024) 362 | .open(tempdir.path()) 363 | .unwrap() 364 | }; 365 | let table = HeedBenchDatabase::new(env); 366 | benchmark(table, tempdir.path()) 367 | }; 368 | 369 | let rocksdb_results = { 370 | let tmpfile: TempDir = tempfile::tempdir_in(&tmpdir).unwrap(); 371 | 372 | let mut bb = rocksdb::BlockBasedOptions::default(); 373 | bb.set_block_cache(&rocksdb::Cache::new_lru_cache(CACHE_SIZE)); 374 | bb.set_bloom_filter(10.0, false); 375 | 376 | let mut opts = rocksdb::Options::default(); 377 | opts.set_block_based_table_factory(&bb); 378 | opts.create_if_missing(true); 379 | opts.increase_parallelism( 380 | std::thread::available_parallelism().map_or(1, |n| n.get()) as i32 381 | ); 382 | 383 | let db = rocksdb::OptimisticTransactionDB::open(&opts, tmpfile.path()).unwrap(); 384 | let table = RocksdbBenchDatabase::new(&db); 385 | benchmark(table, tmpfile.path()) 386 | }; 387 | 388 | let sled_results = { 389 | let tmpfile: TempDir = tempfile::tempdir_in(&tmpdir).unwrap(); 390 | 391 | let db = sled::Config::new() 392 | .path(tmpfile.path()) 393 | .cache_capacity(CACHE_SIZE as u64) 394 | .open() 395 | .unwrap(); 396 | 397 | let table = SledBenchDatabase::new(&db, tmpfile.path()); 398 | benchmark(table, tmpfile.path()) 399 | }; 400 | 401 | let sanakirja_results = { 402 | let tmpfile: NamedTempFile = NamedTempFile::new_in(&tmpdir).unwrap(); 403 | fs::remove_file(tmpfile.path()).unwrap(); 404 | let db = sanakirja::Env::new(tmpfile.path(), 4096 * 1024 * 1024, 2).unwrap(); 405 | let table = SanakirjaBenchDatabase::new(&db, &tmpdir); 406 | benchmark(table, tmpfile.path()) 407 | }; 408 | 409 | let fjall_results = { 410 | let tmpfile: TempDir = tempfile::tempdir_in(&tmpdir).unwrap(); 411 | 412 | let mut db = fjall::Config::new(tmpfile.path()) 413 | .cache_size(CACHE_SIZE.try_into().unwrap()) 414 | .open_transactional() 415 | .unwrap(); 416 | 417 | let table = FjallBenchDatabase::new(&mut db); 418 | benchmark(table, tmpfile.path()) 419 | }; 420 | 421 | fs::remove_dir_all(&tmpdir).unwrap(); 422 | 423 | let mut rows = Vec::new(); 424 | 425 | for (benchmark, _duration) in &redb_latency_results { 426 | rows.push(vec![benchmark.to_string()]); 427 | } 428 | 429 | let results = [ 430 | redb_latency_results, 431 | lmdb_results, 432 | rocksdb_results, 433 | sled_results, 434 | sanakirja_results, 435 | fjall_results, 436 | ]; 437 | 438 | let mut identified_smallests = vec![vec![false; results.len()]; rows.len()]; 439 | for (i, identified_smallests_row) in identified_smallests.iter_mut().enumerate() { 440 | let mut smallest = None; 441 | for (j, _) in identified_smallests_row.iter().enumerate() { 442 | let (_, rt) = &results[j][i]; 443 | smallest = match smallest { 444 | Some((_, prev)) if rt < prev => Some((j, rt)), 445 | Some((pi, prev)) => Some((pi, prev)), 446 | None => Some((j, rt)), 447 | }; 448 | } 449 | let (j, _rt) = smallest.unwrap(); 450 | identified_smallests_row[j] = true; 451 | } 452 | 453 | for (j, results) in results.iter().enumerate() { 454 | for (i, (_benchmark, result_type)) in results.iter().enumerate() { 455 | rows[i].push(if identified_smallests[i][j] { 456 | format!("**{result_type}**") 457 | } else { 458 | result_type.to_string() 459 | }); 460 | } 461 | } 462 | 463 | let mut table = comfy_table::Table::new(); 464 | table.load_preset(comfy_table::presets::ASCII_MARKDOWN); 465 | table.set_width(100); 466 | table.set_header(["", "redb", "lmdb", "rocksdb", "sled", "sanakirja", "fjall"]); 467 | for row in rows { 468 | table.add_row(row); 469 | } 470 | 471 | println!(); 472 | println!("{table}"); 473 | } 474 | -------------------------------------------------------------------------------- /benches/multithreaded_insert_benchmark.rs: -------------------------------------------------------------------------------- 1 | use std::env::current_dir; 2 | use std::{fs, process, thread}; 3 | use tempfile::NamedTempFile; 4 | 5 | use rand::rngs::StdRng; 6 | use rand::{Rng, SeedableRng}; 7 | use redb::{Database, ReadableTableMetadata, TableDefinition}; 8 | use std::time::Instant; 9 | 10 | const ELEMENTS: u64 = 1_000_000; 11 | const RNG_SEED: u64 = 3; 12 | 13 | const TABLE1: TableDefinition = TableDefinition::new("x"); 14 | const TABLE2: TableDefinition = TableDefinition::new("y"); 15 | 16 | #[inline(never)] 17 | fn single_threaded(values: &[u128]) { 18 | let tmpfile: NamedTempFile = NamedTempFile::new_in(current_dir().unwrap()).unwrap(); 19 | let db = Database::builder().create(tmpfile.path()).unwrap(); 20 | 21 | let start = Instant::now(); 22 | let write_txn = db.begin_write().unwrap(); 23 | { 24 | let mut table1 = write_txn.open_table(TABLE1).unwrap(); 25 | let mut table2 = write_txn.open_table(TABLE2).unwrap(); 26 | 27 | for value in values.iter() { 28 | table1.insert(value, value).unwrap(); 29 | table2.insert(value, value).unwrap(); 30 | } 31 | } 32 | write_txn.commit().unwrap(); 33 | let end = Instant::now(); 34 | let duration = end - start; 35 | println!( 36 | "single threaded load: {} pairs in {}ms", 37 | 2 * ELEMENTS, 38 | duration.as_millis() 39 | ); 40 | let read_txn = db.begin_read().unwrap(); 41 | let table = read_txn.open_table(TABLE1).unwrap(); 42 | assert_eq!(table.len().unwrap(), ELEMENTS); 43 | let table = read_txn.open_table(TABLE2).unwrap(); 44 | assert_eq!(table.len().unwrap(), ELEMENTS); 45 | } 46 | 47 | #[inline(never)] 48 | fn multi_threaded(values: &[u128]) { 49 | let tmpfile: NamedTempFile = NamedTempFile::new_in(current_dir().unwrap()).unwrap(); 50 | let db = Database::builder().create(tmpfile.path()).unwrap(); 51 | 52 | let start = Instant::now(); 53 | let write_txn = db.begin_write().unwrap(); 54 | { 55 | let mut table1 = write_txn.open_table(TABLE1).unwrap(); 56 | let mut table2 = write_txn.open_table(TABLE2).unwrap(); 57 | 58 | thread::scope(|s| { 59 | s.spawn(|| { 60 | for value in values.iter() { 61 | table1.insert(value, value).unwrap(); 62 | } 63 | }); 64 | s.spawn(|| { 65 | for value in values.iter() { 66 | table2.insert(value, value).unwrap(); 67 | } 68 | }); 69 | }); 70 | } 71 | write_txn.commit().unwrap(); 72 | let end = Instant::now(); 73 | let duration = end - start; 74 | println!( 75 | "2 threaded load: {} pairs in {}ms", 76 | 2 * ELEMENTS, 77 | duration.as_millis() 78 | ); 79 | let read_txn = db.begin_read().unwrap(); 80 | let table = read_txn.open_table(TABLE1).unwrap(); 81 | assert_eq!(table.len().unwrap(), ELEMENTS); 82 | let table = read_txn.open_table(TABLE2).unwrap(); 83 | assert_eq!(table.len().unwrap(), ELEMENTS); 84 | } 85 | 86 | // TODO: multi-threaded inserts are slower. Probably due to lock contention checking dirty pages 87 | fn main() { 88 | let mut rng = StdRng::seed_from_u64(RNG_SEED); 89 | let mut values = vec![]; 90 | for _ in 0..ELEMENTS { 91 | values.push(rng.random()); 92 | } 93 | 94 | let tmpdir = current_dir().unwrap().join(".benchmark"); 95 | fs::create_dir(&tmpdir).unwrap(); 96 | 97 | let tmpdir2 = tmpdir.clone(); 98 | ctrlc::set_handler(move || { 99 | fs::remove_dir_all(&tmpdir2).unwrap(); 100 | process::exit(1); 101 | }) 102 | .unwrap(); 103 | 104 | single_threaded(&values); 105 | 106 | multi_threaded(&values); 107 | 108 | fs::remove_dir_all(&tmpdir).unwrap(); 109 | } 110 | -------------------------------------------------------------------------------- /benches/savepoint_benchmark.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use std::env::current_dir; 4 | use tempfile::NamedTempFile; 5 | 6 | use rand::Rng; 7 | use redb::{Database, TableDefinition}; 8 | use std::time::{Duration, Instant}; 9 | 10 | const TABLE: TableDefinition<&[u8], &[u8]> = TableDefinition::new("x"); 11 | 12 | const VALUE_SIZE: usize = 3_000; 13 | const SAVEPOINT_WINDOW: usize = 10; 14 | 15 | struct Timing { 16 | insert: Duration, 17 | savepoint_creation: Duration, 18 | savepoint_restore: Duration, 19 | } 20 | 21 | /// Returns pairs of key, value 22 | fn random_data(count: usize, key_size: usize, value_size: usize) -> Vec<(Vec, Vec)> { 23 | let mut pairs = vec![]; 24 | 25 | for _ in 0..count { 26 | let key: Vec = (0..key_size).map(|_| rand::rng().random()).collect(); 27 | let value: Vec = (0..value_size).map(|_| rand::rng().random()).collect(); 28 | pairs.push((key, value)); 29 | } 30 | 31 | pairs 32 | } 33 | 34 | fn benchmark(db: &Database, insertions: usize) -> Timing { 35 | let mut pairs = random_data(insertions, 24, VALUE_SIZE); 36 | let mut written = 0; 37 | 38 | let mut total_savepoint_creation = Duration::from_micros(0); 39 | let mut total_insert = Duration::from_micros(0); 40 | let mut first_savepoint = None; 41 | for _ in 0..SAVEPOINT_WINDOW { 42 | let txn = db.begin_write().unwrap(); 43 | let mut table = txn.open_table(TABLE).unwrap(); 44 | let start = Instant::now(); 45 | { 46 | for _ in 0..(insertions / SAVEPOINT_WINDOW) { 47 | let len = pairs.len(); 48 | let (key, value) = &mut pairs[written % len]; 49 | key[16..].copy_from_slice(&(written as u64).to_le_bytes()); 50 | table.insert(key.as_slice(), value.as_slice()).unwrap(); 51 | written += 1; 52 | } 53 | } 54 | let end = Instant::now(); 55 | total_insert += end - start; 56 | drop(table); 57 | txn.commit().unwrap(); 58 | 59 | let txn = db.begin_write().unwrap(); 60 | let start = Instant::now(); 61 | let savepoint_id = txn.persistent_savepoint().unwrap(); 62 | if first_savepoint.is_none() { 63 | first_savepoint = Some(savepoint_id); 64 | } 65 | let end = Instant::now(); 66 | total_savepoint_creation += end - start; 67 | txn.commit().unwrap(); 68 | } 69 | 70 | let mut txn = db.begin_write().unwrap(); 71 | let savepoint = txn 72 | .get_persistent_savepoint(first_savepoint.unwrap()) 73 | .unwrap(); 74 | let start = Instant::now(); 75 | txn.restore_savepoint(&savepoint).unwrap(); 76 | let end = Instant::now(); 77 | let restore_duration = end - start; 78 | txn.abort().unwrap(); 79 | 80 | let txn = db.begin_write().unwrap(); 81 | for id in txn.list_persistent_savepoints().unwrap() { 82 | txn.delete_persistent_savepoint(id).unwrap(); 83 | } 84 | txn.commit().unwrap(); 85 | 86 | Timing { 87 | insert: total_insert / insertions as u32, 88 | savepoint_creation: total_savepoint_creation / SAVEPOINT_WINDOW as u32, 89 | savepoint_restore: restore_duration, 90 | } 91 | } 92 | 93 | fn main() { 94 | let tmpfile: NamedTempFile = NamedTempFile::new_in(current_dir().unwrap()).unwrap(); 95 | let db = Database::builder().create(tmpfile.path()).unwrap(); 96 | 97 | let mut table = comfy_table::Table::new(); 98 | table.set_width(100); 99 | table.set_header([ 100 | "DB size", 101 | "insert()", 102 | "persistent_savepoint()", 103 | "restore_savepoint()", 104 | ]); 105 | for inserts in [ 106 | 10_000, 20_000, 40_000, 80_000, 160_000, 320_000, 640_000, 1_280_000, 2_560_000, 5_120_000, 107 | ] { 108 | let timing = benchmark(&db, inserts); 109 | let len = tmpfile.as_file().metadata().unwrap().len(); 110 | let row = vec![ 111 | format!("{}MiB", len / 1024 / 1024), 112 | format!("{}ns", timing.insert.as_nanos()), 113 | format!("{}us", timing.savepoint_creation.as_micros()), 114 | format!("{}us", timing.savepoint_restore.as_micros()), 115 | ]; 116 | table.add_row(row); 117 | } 118 | 119 | println!(); 120 | println!("{table}"); 121 | } 122 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-check-cfg=cfg(fuzzing)"); 3 | 4 | if std::env::var("CARGO_CFG_FUZZING").is_ok() 5 | && std::env::var("CARGO_CFG_TARGET_OS").as_deref() == Ok("macos") 6 | { 7 | println!("cargo:rustc-cdylib-link-arg=-undefined"); 8 | println!("cargo:rustc-cdylib-link-arg=dynamic_lookup"); 9 | } 10 | 11 | #[cfg(feature = "python")] 12 | pyo3_build_config::add_extension_module_link_args(); 13 | } 14 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | disallowed-methods = [ 2 | "usize::to_le_bytes", 3 | ] 4 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | # This template contains all of the possible sections and their default values 2 | 3 | # Note that all fields that take a lint level have these possible values: 4 | # * deny - An error will be produced and the check will fail 5 | # * warn - A warning will be produced, but the check will not fail 6 | # * allow - No warning or error will be produced, though in some cases a note 7 | # will be 8 | 9 | # The values provided in this template are the default values that will be used 10 | # when any section or field is not specified in your own configuration 11 | 12 | # This section is considered when running `cargo deny check advisories` 13 | # More documentation for the advisories section can be found here: 14 | # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html 15 | [advisories] 16 | # The path where the advisory database is cloned/fetched into 17 | db-path = "~/.cargo/advisory-db" 18 | # The url of the advisory database to use 19 | db-urls = ["https://github.com/rustsec/advisory-db"] 20 | # The lint level for crates that have been yanked from their source registry 21 | yanked = "warn" 22 | # A list of advisory IDs to ignore. Note that ignored advisories will still 23 | # output a note when they are encountered. 24 | ignore = [ 25 | #"RUSTSEC-0000-0000", 26 | ] 27 | # Threshold for security vulnerabilities, any vulnerability with a CVSS score 28 | # lower than the range specified will be ignored. Note that ignored advisories 29 | # will still output a note when they are encountered. 30 | # * None - CVSS Score 0.0 31 | # * Low - CVSS Score 0.1 - 3.9 32 | # * Medium - CVSS Score 4.0 - 6.9 33 | # * High - CVSS Score 7.0 - 8.9 34 | # * Critical - CVSS Score 9.0 - 10.0 35 | #severity-threshold = 36 | 37 | # This section is considered when running `cargo deny check licenses` 38 | # More documentation for the licenses section can be found here: 39 | # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html 40 | [licenses] 41 | # List of explictly allowed licenses 42 | # See https://spdx.org/licenses/ for list of possible licenses 43 | # [possible values: any SPDX 3.7 short identifier (+ optional exception)]. 44 | allow = [ 45 | "MIT", 46 | # "BSD-2-Clause", 47 | # "BSD-3-Clause", 48 | "Apache-2.0", 49 | "Apache-2.0 WITH LLVM-exception", 50 | ] 51 | # The confidence threshold for detecting a license from license text. 52 | # The higher the value, the more closely the license text must be to the 53 | # canonical license text of a valid SPDX license file. 54 | # [possible values: any between 0.0 and 1.0]. 55 | confidence-threshold = 0.8 56 | # Allow 1 or more licenses on a per-crate basis, so that particular licenses 57 | # aren't accepted for every possible crate as with the normal allow list 58 | exceptions = [ 59 | # Each entry is the crate and version constraint, and its specific allow 60 | # list 61 | { allow = ["Unicode-3.0"], name = "unicode-ident", version = "*" }, # Used only by comfy-table in the benchmarks 62 | # { allow = ["ISC"], name = "libloading", version = "*" }, # Used only by rocksdb in the benchmarks 63 | ] 64 | 65 | # Some crates don't have (easily) machine readable licensing information, 66 | # adding a clarification entry for it allows you to manually specify the 67 | # licensing information 68 | #[[licenses.clarify]] 69 | # The name of the crate the clarification applies to 70 | #name = "ring" 71 | # THe optional version constraint for the crate 72 | #version = "*" 73 | # The SPDX expression for the license requirements of the crate 74 | #expression = "MIT AND ISC AND OpenSSL" 75 | # One or more files in the crate's source used as the "source of truth" for 76 | # the license expression. If the contents match, the clarification will be used 77 | # when running the license check, otherwise the clarification will be ignored 78 | # and the crate will be checked normally, which may produce warnings or errors 79 | # depending on the rest of your configuration 80 | #license-files = [ 81 | # Each entry is a crate relative path, and the (opaque) hash of its contents 82 | #{ path = "LICENSE", hash = 0xbd0eed23 } 83 | #] 84 | 85 | [licenses.private] 86 | # If true, ignores workspace crates that aren't published, or are only 87 | # published to private registries 88 | ignore = false 89 | # One or more private registries that you might publish crates to, if a crate 90 | # is only published to private registries, and ignore is true, the crate will 91 | # not have its license(s) checked 92 | registries = [ 93 | #"https://sekretz.com/registry 94 | ] 95 | 96 | # This section is considered when running `cargo deny check bans`. 97 | # More documentation about the 'bans' section can be found here: 98 | # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html 99 | [bans] 100 | # Lint level for when multiple versions of the same crate are detected 101 | multiple-versions = "warn" 102 | # The graph highlighting used when creating dotgraphs for crates 103 | # with multiple versions 104 | # * lowest-version - The path to the lowest versioned duplicate is highlighted 105 | # * simplest-path - The path to the version with the fewest edges is highlighted 106 | # * all - Both lowest-version and simplest-path are used 107 | highlight = "all" 108 | # List of crates that are allowed. Use with care! 109 | allow = [ 110 | #{ name = "ansi_term", version = "=0.11.0" }, 111 | ] 112 | # List of crates to deny 113 | deny = [ 114 | # Each entry the name of a crate and a version range. If version is 115 | # not specified, all versions will be matched. 116 | #{ name = "ansi_term", version = "=0.11.0" }, 117 | ] 118 | # Certain crates/versions that will be skipped when doing duplicate detection. 119 | skip = [ 120 | #{ name = "ansi_term", version = "=0.11.0" }, 121 | ] 122 | # Similarly to `skip` allows you to skip certain crates during duplicate 123 | # detection. Unlike skip, it also includes the entire tree of transitive 124 | # dependencies starting at the specified crate, up to a certain depth, which is 125 | # by default infinite 126 | skip-tree = [ 127 | #{ name = "ansi_term", version = "=0.11.0", depth = 20 }, 128 | ] 129 | 130 | # This section is considered when running `cargo deny check sources`. 131 | # More documentation about the 'sources' section can be found here: 132 | # https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html 133 | [sources] 134 | # Lint level for what to happen when a crate from a crate registry that is not 135 | # in the allow list is encountered 136 | unknown-registry = "warn" 137 | # Lint level for what to happen when a crate from a git repository that is not 138 | # in the allow list is encountered 139 | unknown-git = "warn" 140 | # List of URLs for allowed crate registries. Defaults to the crates.io index 141 | # if not specified. If it is specified but empty, no registries are allowed. 142 | allow-registry = ["https://github.com/rust-lang/crates.io-index"] 143 | # List of URLs for allowed Git repositories 144 | allow-git = [] 145 | -------------------------------------------------------------------------------- /examples/bincode_keys.rs: -------------------------------------------------------------------------------- 1 | use std::any::type_name; 2 | use std::cmp::Ordering; 3 | use std::fmt::Debug; 4 | 5 | use bincode::{Decode, Encode, decode_from_slice, encode_to_vec}; 6 | use redb::{Database, Error, Key, Range, TableDefinition, TypeName, Value}; 7 | 8 | #[derive(Debug, Decode, Encode, PartialEq, Eq, PartialOrd, Ord)] 9 | struct SomeKey { 10 | foo: String, 11 | bar: i32, 12 | } 13 | 14 | #[derive(Debug, Decode, Encode, PartialEq)] 15 | struct SomeValue { 16 | foo: [f64; 3], 17 | bar: bool, 18 | } 19 | 20 | const TABLE: TableDefinition, Bincode> = 21 | TableDefinition::new("my_data"); 22 | 23 | #[allow(clippy::result_large_err)] 24 | fn main() -> Result<(), Error> { 25 | let some_key = SomeKey { 26 | foo: "hello world".to_string(), 27 | bar: 42, 28 | }; 29 | let some_value = SomeValue { 30 | foo: [1., 2., 3.], 31 | bar: true, 32 | }; 33 | let lower = SomeKey { 34 | foo: "a".to_string(), 35 | bar: 42, 36 | }; 37 | let upper = SomeKey { 38 | foo: "z".to_string(), 39 | bar: 42, 40 | }; 41 | 42 | let db = Database::create("bincode_keys.redb")?; 43 | let write_txn = db.begin_write()?; 44 | { 45 | let mut table = write_txn.open_table(TABLE)?; 46 | 47 | table.insert(&some_key, &some_value).unwrap(); 48 | } 49 | write_txn.commit()?; 50 | 51 | let read_txn = db.begin_read()?; 52 | let table = read_txn.open_table(TABLE)?; 53 | 54 | let mut iter: Range, Bincode> = table.range(lower..upper).unwrap(); 55 | assert_eq!(iter.next().unwrap().unwrap().1.value(), some_value); 56 | assert!(iter.next().is_none()); 57 | 58 | Ok(()) 59 | } 60 | 61 | /// Wrapper type to handle keys and values using bincode serialization 62 | #[derive(Debug)] 63 | pub struct Bincode(pub T); 64 | 65 | impl Value for Bincode 66 | where 67 | T: Debug + Encode + Decode<()>, 68 | { 69 | type SelfType<'a> 70 | = T 71 | where 72 | Self: 'a; 73 | 74 | type AsBytes<'a> 75 | = Vec 76 | where 77 | Self: 'a; 78 | 79 | fn fixed_width() -> Option { 80 | None 81 | } 82 | 83 | fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a> 84 | where 85 | Self: 'a, 86 | { 87 | decode_from_slice(data, bincode::config::standard()) 88 | .unwrap() 89 | .0 90 | } 91 | 92 | fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Self::AsBytes<'a> 93 | where 94 | Self: 'a, 95 | Self: 'b, 96 | { 97 | encode_to_vec(value, bincode::config::standard()).unwrap() 98 | } 99 | 100 | fn type_name() -> TypeName { 101 | TypeName::new(&format!("Bincode<{}>", type_name::())) 102 | } 103 | } 104 | 105 | impl Key for Bincode 106 | where 107 | T: Debug + Decode<()> + Encode + Ord, 108 | { 109 | fn compare(data1: &[u8], data2: &[u8]) -> Ordering { 110 | Self::from_bytes(data1).cmp(&Self::from_bytes(data2)) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /examples/int_keys.rs: -------------------------------------------------------------------------------- 1 | use redb::{Database, Error, TableDefinition}; 2 | 3 | const TABLE: TableDefinition = TableDefinition::new("my_data"); 4 | 5 | #[allow(clippy::result_large_err)] 6 | fn main() -> Result<(), Error> { 7 | let db = Database::create("int_keys.redb")?; 8 | let write_txn = db.begin_write()?; 9 | { 10 | let mut table = write_txn.open_table(TABLE)?; 11 | table.insert(0, 0)?; 12 | } 13 | write_txn.commit()?; 14 | 15 | let read_txn = db.begin_read()?; 16 | let table = read_txn.open_table(TABLE)?; 17 | assert_eq!(table.get(0)?.unwrap().value(), 0); 18 | 19 | Ok(()) 20 | } 21 | -------------------------------------------------------------------------------- /examples/multithread.rs: -------------------------------------------------------------------------------- 1 | use redb::TableHandle; 2 | use redb::{Database, Error, TableDefinition}; 3 | use std::time::Instant; 4 | use std::{sync::Arc, time::Duration}; 5 | 6 | #[allow(clippy::result_large_err)] 7 | fn main() -> Result<(), Error> { 8 | let db = Database::create("my_db.redb")?; 9 | let definition: TableDefinition<&str, u32> = TableDefinition::new("my_data"); 10 | 11 | let db = Arc::new(db); 12 | // Seed the database with some information 13 | let write_txn = db.begin_write()?; 14 | { 15 | let mut table = write_txn.open_table(definition)?; 16 | table.insert(&0.to_string().as_str(), 0_u32)?; 17 | // The resulting table should have a different "a" value each time this example is run 18 | table.insert("a".to_string().as_str(), 0_u32)?; 19 | } 20 | write_txn.commit()?; 21 | 22 | let read_threads = 8; 23 | let write_threads = 2; 24 | let mut handles = Vec::with_capacity(read_threads + write_threads); 25 | for i in 0..read_threads { 26 | let db = db.clone(); 27 | let h = std::thread::spawn(move || -> Result<(), Error> { 28 | let start = Instant::now(); 29 | while start.elapsed() < Duration::from_millis(100) { 30 | let read_txn = db.begin_read()?; 31 | // Print every (key, value) pair in the table 32 | let table = read_txn.open_table(definition)?; 33 | for (k, v) in table.range("0"..)?.flatten() { 34 | println!("From read_thread #{}: {:?}, {:?}", i, k.value(), v.value()); 35 | } 36 | } 37 | Ok(()) 38 | }); 39 | handles.push(h); 40 | } 41 | 42 | for i in 0..write_threads { 43 | let db = db.clone(); 44 | let h = std::thread::spawn(move || -> Result<(), Error> { 45 | let start = Instant::now(); 46 | while start.elapsed() < Duration::from_millis(100) { 47 | let write_txn = db.begin_write()?; 48 | { 49 | let mut table = write_txn.open_table(definition)?; 50 | table.insert(&i.to_string().as_str(), i as u32)?; 51 | // The resulting table should have a different "a" value each time this example is run 52 | table.insert("a".to_string().as_str(), i as u32)?; 53 | println!("Inserted data from write_thread #{i}"); 54 | } 55 | write_txn.commit()?; 56 | } 57 | Ok(()) 58 | }); 59 | handles.push(h); 60 | } 61 | 62 | // See if there any errors were returned from the threads 63 | for handle in handles { 64 | if let Err(e) = handle.join() { 65 | println!("{e:?}"); 66 | } 67 | } 68 | 69 | // Check that the `Database` has the table (and only the table) that we created 70 | let read_txn = db.begin_read()?; 71 | let tables = read_txn.list_tables()?; 72 | for table in tables { 73 | println!("Table: {}", table.name()); 74 | let _d = TableDefinition::<&str, u32>::new(table.name()); 75 | } 76 | 77 | // Print every (key, value) pair in the table 78 | let table = read_txn.open_table(definition)?; 79 | for (k, v) in table.range("0"..)?.flatten() { 80 | println!("{:?}, {:?}", k.value(), v.value()); 81 | } 82 | 83 | Ok(()) 84 | } 85 | -------------------------------------------------------------------------------- /examples/special_values.rs: -------------------------------------------------------------------------------- 1 | use redb::{ 2 | Database, Error, Key, ReadableTable, Table, TableDefinition, TableHandle, Value, 3 | WriteTransaction, 4 | }; 5 | use std::fs::{File, OpenOptions}; 6 | use std::io::{Read, Seek, SeekFrom, Write}; 7 | use std::marker::PhantomData; 8 | 9 | const TABLE: TableDefinition = TableDefinition::new("my_data"); 10 | 11 | struct SpecialValuesDb { 12 | database: Database, 13 | file: File, 14 | } 15 | 16 | impl SpecialValuesDb { 17 | fn new() -> Self { 18 | SpecialValuesDb { 19 | database: Database::create("index.redb").unwrap(), 20 | file: OpenOptions::new() 21 | .write(true) 22 | .truncate(true) 23 | .create(true) 24 | .read(true) 25 | .open("values.dat") 26 | .unwrap(), 27 | } 28 | } 29 | 30 | fn begin_txn(&mut self) -> SpecialValuesTransaction { 31 | SpecialValuesTransaction { 32 | inner: self.database.begin_write().unwrap(), 33 | file: &mut self.file, 34 | } 35 | } 36 | } 37 | 38 | struct SpecialValuesTransaction<'db> { 39 | inner: WriteTransaction, 40 | file: &'db mut File, 41 | } 42 | 43 | impl SpecialValuesTransaction<'_> { 44 | fn open_table( 45 | &mut self, 46 | table: TableDefinition, 47 | ) -> SpecialValuesTable { 48 | let def: TableDefinition = TableDefinition::new(table.name()); 49 | SpecialValuesTable { 50 | inner: self.inner.open_table(def).unwrap(), 51 | file: self.file, 52 | _value_type: Default::default(), 53 | } 54 | } 55 | 56 | fn commit(self) { 57 | self.file.sync_all().unwrap(); 58 | self.inner.commit().unwrap(); 59 | } 60 | } 61 | 62 | struct SpecialValuesTable<'txn, K: Key + 'static, V: Value + 'static> { 63 | inner: Table<'txn, K, (u64, u64)>, 64 | file: &'txn mut File, 65 | _value_type: PhantomData, 66 | } 67 | 68 | impl SpecialValuesTable<'_, K, V> { 69 | fn insert(&mut self, key: K::SelfType<'_>, value: V::SelfType<'_>) { 70 | // Append to end of file 71 | let offset = self.file.seek(SeekFrom::End(0)).unwrap(); 72 | let value = V::as_bytes(&value); 73 | self.file.write_all(value.as_ref()).unwrap(); 74 | self.inner 75 | .insert(key, (offset, value.as_ref().len() as u64)) 76 | .unwrap(); 77 | } 78 | 79 | fn get(&mut self, key: K::SelfType<'_>) -> ValueAccessor { 80 | let (offset, length) = self.inner.get(key).unwrap().unwrap().value(); 81 | self.file.seek(SeekFrom::Start(offset)).unwrap(); 82 | let mut data = vec![0u8; length as usize]; 83 | self.file.read_exact(data.as_mut_slice()).unwrap(); 84 | ValueAccessor { 85 | data, 86 | _value_type: Default::default(), 87 | } 88 | } 89 | } 90 | 91 | struct ValueAccessor { 92 | data: Vec, 93 | _value_type: PhantomData, 94 | } 95 | 96 | impl ValueAccessor { 97 | fn value(&self) -> V::SelfType<'_> { 98 | V::from_bytes(&self.data) 99 | } 100 | } 101 | 102 | /// redb is not designed to support very large values, or values with special requirements (such as alignment or mutability). 103 | /// There's a hard limit of slightly less than 4GiB per value, and performance is likely to be poor when mutating values above a few megabytes. 104 | /// Additionally, because redb is copy-on-write, mutating a value in-place is not possible, and therefore mutating large values is slow. 105 | /// Storing values with alignment requirements is also not supported. 106 | /// 107 | /// This example demonstrates one way to handle such values, via a sidecar file. 108 | #[allow(clippy::result_large_err)] 109 | fn main() -> Result<(), Error> { 110 | let mut db = SpecialValuesDb::new(); 111 | let mut txn = db.begin_txn(); 112 | { 113 | let mut table = txn.open_table(TABLE); 114 | table.insert(0, "hello world"); 115 | } 116 | txn.commit(); 117 | 118 | let mut txn = db.begin_txn(); 119 | let mut table = txn.open_table(TABLE); 120 | assert_eq!(table.get(0).value(), "hello world"); 121 | 122 | Ok(()) 123 | } 124 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | coverage/ 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "redb-fuzz" 3 | version = "0.0.0" 4 | authors = ["Automatically generated"] 5 | publish = false 6 | edition = "2018" 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | arbitrary = { version = "1.1.0", features = ["derive"] } 13 | libfuzzer-sys = { version = "0.4.0", features = ["arbitrary-derive"] } 14 | tempfile = "3.2.0" 15 | rand = "0.8.5" 16 | rand_distr = "0.4.3" 17 | 18 | [dependencies.redb] 19 | path = ".." 20 | 21 | # Prevent this from interfering with workspaces 22 | [workspace] 23 | members = ["."] 24 | 25 | [[bin]] 26 | name = "fuzz_redb" 27 | path = "fuzz_targets/fuzz_redb.rs" 28 | test = false 29 | doc = false 30 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/common.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | use arbitrary::Unstructured; 3 | use libfuzzer_sys::arbitrary::Arbitrary; 4 | use rand_distr::{Binomial, Distribution}; 5 | use rand::rngs::StdRng; 6 | use rand::SeedableRng; 7 | 8 | const MAX_CRASH_OPS: u64 = 20; 9 | const MAX_CACHE_SIZE: usize = 100_000_000; 10 | // Limit values to 100KiB 11 | const MAX_VALUE_SIZE: usize = 100_000; 12 | const KEY_SPACE: u64 = 1_000_000; 13 | pub const MAX_SAVEPOINTS: usize = 6; 14 | 15 | #[derive(Debug, Clone)] 16 | pub(crate) struct BoundedU64 { 17 | pub value: u64 18 | } 19 | 20 | impl Arbitrary<'_> for BoundedU64 { 21 | fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { 22 | let value: u64 = u.int_in_range(0..=(N - 1))?; 23 | Ok(Self { 24 | value 25 | }) 26 | } 27 | 28 | fn size_hint(_depth: usize) -> (usize, Option) { 29 | (size_of::(), Some(size_of::())) 30 | } 31 | } 32 | 33 | #[derive(Debug, Clone)] 34 | pub(crate) struct U64Between { 35 | pub value: u64 36 | } 37 | 38 | impl Arbitrary<'_> for U64Between { 39 | fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { 40 | let value: u64 = u.int_in_range(MIN..=MAX)?; 41 | Ok(Self { 42 | value 43 | }) 44 | } 45 | 46 | fn size_hint(_depth: usize) -> (usize, Option) { 47 | (size_of::(), Some(size_of::())) 48 | } 49 | } 50 | 51 | #[derive(Debug, Clone)] 52 | pub(crate) struct BinomialDifferenceBoundedUSize { 53 | pub value: usize 54 | } 55 | 56 | impl Arbitrary<'_> for BinomialDifferenceBoundedUSize { 57 | fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { 58 | let seed: u64 = u.arbitrary()?; 59 | let mut rng = StdRng::seed_from_u64(seed); 60 | // Distribution which is the difference from the median of B(N, 0.5) 61 | let distribution = Binomial::new(N as u64, 0.5).unwrap(); 62 | let value = distribution.sample(&mut rng) as isize; 63 | let value = (value - N as isize / 2).abs() as usize; 64 | Ok(Self { 65 | value 66 | }) 67 | } 68 | 69 | fn size_hint(_depth: usize) -> (usize, Option) { 70 | (size_of::(), Some(size_of::())) 71 | } 72 | } 73 | 74 | #[derive(Debug, Clone)] 75 | pub(crate) struct PowerOfTwoBetween { 76 | pub value: usize 77 | } 78 | 79 | impl Arbitrary<'_> for PowerOfTwoBetween { 80 | fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { 81 | let value: u32 = u.int_in_range(M..=N)?; 82 | Ok(Self { 83 | value: 2usize.pow(value) 84 | }) 85 | } 86 | 87 | fn size_hint(_depth: usize) -> (usize, Option) { 88 | (size_of::(), Some(size_of::())) 89 | } 90 | } 91 | 92 | #[derive(Debug, Clone)] 93 | pub(crate) struct BoundedUSize { 94 | pub value: usize 95 | } 96 | 97 | impl Arbitrary<'_> for BoundedUSize { 98 | fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { 99 | let value: usize = u.int_in_range(0..=(N - 1))?; 100 | Ok(Self { 101 | value 102 | }) 103 | } 104 | 105 | fn size_hint(_depth: usize) -> (usize, Option) { 106 | (size_of::(), Some(size_of::())) 107 | } 108 | } 109 | 110 | #[derive(Arbitrary, Debug, Clone)] 111 | pub(crate) enum FuzzOperation { 112 | Get { 113 | key: BoundedU64, 114 | }, 115 | Insert { 116 | key: BoundedU64, 117 | value_size: BinomialDifferenceBoundedUSize, 118 | }, 119 | InsertReserve { 120 | key: BoundedU64, 121 | value_size: BinomialDifferenceBoundedUSize, 122 | }, 123 | Remove { 124 | key: BoundedU64, 125 | }, 126 | RemoveOne { 127 | key: BoundedU64, 128 | value_size: BinomialDifferenceBoundedUSize, 129 | }, 130 | Len { 131 | }, 132 | PopFirst { 133 | }, 134 | PopLast { 135 | }, 136 | Retain { 137 | modulus: U64Between<1, 8>, 138 | }, 139 | RetainIn { 140 | start_key: BoundedU64, 141 | len: BoundedU64, 142 | modulus: U64Between<1, 8>, 143 | }, 144 | ExtractIf { 145 | modulus: U64Between<1, 8>, 146 | take: BoundedUSize<10>, 147 | reversed: bool, 148 | }, 149 | ExtractFromIf { 150 | start_key: BoundedU64, 151 | range_len: BoundedU64, 152 | take: BoundedUSize<10>, 153 | modulus: U64Between<1, 8>, 154 | reversed: bool, 155 | }, 156 | Range { 157 | start_key: BoundedU64, 158 | len: BoundedU64, 159 | reversed: bool, 160 | }, 161 | } 162 | 163 | #[derive(Arbitrary, Debug, Clone)] 164 | pub(crate) struct FuzzTransaction { 165 | pub ops: Vec, 166 | pub durable: bool, 167 | pub quick_repair: bool, 168 | pub commit: bool, 169 | pub close_db: bool, 170 | pub create_ephemeral_savepoint: bool, 171 | pub create_persistent_savepoint: bool, 172 | pub restore_savepoint: Option>, 173 | } 174 | 175 | #[derive(Arbitrary, Debug, Clone)] 176 | pub(crate) struct FuzzConfig { 177 | pub multimap_table: bool, 178 | pub cache_size: BoundedUSize, 179 | pub crash_after_ops: BoundedU64, 180 | pub transactions: Vec, 181 | pub page_size: PowerOfTwoBetween<9, 14>, 182 | // Must not be too small, otherwise persistent savepoints won't fit into a region 183 | pub region_size: PowerOfTwoBetween<20, 30>, 184 | } 185 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | build: pre 2 | cargo build --all-targets 3 | cargo doc 4 | 5 | pre: 6 | cargo deny --all-features check licenses 7 | cargo fmt --all -- --check 8 | cargo clippy --all --all-targets 9 | 10 | release: pre 11 | cargo build --release 12 | 13 | flamegraph: 14 | cargo flamegraph --bench lmdb_benchmark 15 | firefox ./flamegraph.svg 16 | 17 | publish_py: test_py 18 | docker pull quay.io/pypa/manylinux2014_x86_64 19 | MATURIN_PYPI_TOKEN=$(cat ~/.pypi/redb_token) docker run -it --rm -e "MATURIN_PYPI_TOKEN" -v `pwd`:/redb-ro:ro quay.io/pypa/manylinux2014_x86_64 /redb-ro/publish_py.sh 20 | 21 | test_py: install_py 22 | python3 -m unittest discover 23 | 24 | install_py: pre 25 | maturin develop 26 | 27 | test: pre 28 | RUST_BACKTRACE=1 cargo test 29 | 30 | test_wasi: 31 | rustup install nightly-2025-05-04 --target wasm32-wasip1-threads 32 | cargo +nightly-2025-05-04 test --target=wasm32-wasip1-threads -- --nocapture 33 | 34 | bench bench='lmdb_benchmark': pre 35 | cargo bench --bench {{bench}} 36 | 37 | watch +args='test': 38 | cargo watch --clear --exec "{{args}}" 39 | 40 | fuzz: pre 41 | cargo fuzz run --sanitizer=none fuzz_redb -- -max_len=10000 42 | 43 | fuzz_cmin: 44 | cargo fuzz cmin --sanitizer=none fuzz_redb -- -max_len=10000 45 | 46 | fuzz_ci: pre 47 | cargo fuzz run --sanitizer=none fuzz_redb -- -max_len=10000 -max_total_time=60 48 | 49 | fuzz_coverage: pre 50 | #!/usr/bin/env bash 51 | set -euxo pipefail 52 | RUST_SYSROOT=`cargo rustc -- --print sysroot 2>/dev/null` 53 | LLVM_COV=`find $RUST_SYSROOT -name llvm-cov` 54 | echo $LLVM_COV 55 | rustup component add llvm-tools-preview 56 | cargo fuzz coverage --sanitizer=none fuzz_redb 57 | $LLVM_COV show fuzz/target/*/release/fuzz_redb --format html \ 58 | -instr-profile=fuzz/coverage/fuzz_redb/coverage.profdata \ 59 | -ignore-filename-regex='.*(cargo/registry|redb/fuzz|rustc).*' > fuzz/coverage/coverage_report.html 60 | $LLVM_COV report fuzz/target/*/release/fuzz_redb \ 61 | -instr-profile=fuzz/coverage/fuzz_redb/coverage.profdata \ 62 | -ignore-filename-regex='.*(cargo/registry|redb/fuzz|rustc).*' 63 | firefox ./fuzz/coverage/coverage_report.html 64 | -------------------------------------------------------------------------------- /py_publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHON3=/opt/python/cp311-cp311/bin/python3 4 | 5 | cp -r /redb-ro /redb 6 | cd /redb 7 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=1.61.0 8 | source $HOME/.cargo/env 9 | 10 | cd /tmp 11 | $PYTHON3 -m venv venv 12 | cd /redb 13 | source /tmp/venv/bin/activate 14 | python3 -m pip install --upgrade pip 15 | python3 -m pip install maturin 16 | 17 | python3 -m maturin publish 18 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "redb" 3 | requires-python = ">=3.7" 4 | dynamic = ["version"] 5 | classifier = ["Development Status :: 4 - Beta", 6 | "License :: OSI Approved :: MIT License", 7 | "License :: OSI Approved :: Apache Software License", 8 | "Programming Language :: Python", 9 | "Programming Language :: Python :: 3", 10 | "Programming Language :: Python :: 3 :: Only", 11 | "Programming Language :: Rust"] 12 | 13 | [build-system] 14 | requires = ["maturin>=1.0,<2.0"] 15 | build-backend = "maturin" 16 | 17 | [tool.maturin] 18 | compatibility = "manylinux2014" 19 | features = ["python"] 20 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | 1.85 2 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 100 2 | -------------------------------------------------------------------------------- /src/backends.rs: -------------------------------------------------------------------------------- 1 | pub use crate::tree_store::InMemoryBackend; 2 | pub use crate::tree_store::file_backend::FileBackend; 3 | -------------------------------------------------------------------------------- /src/complex_types.rs: -------------------------------------------------------------------------------- 1 | use crate::types::{TypeName, Value}; 2 | 3 | // Encode len as a varint and store it at the end of output 4 | fn encode_varint_len(len: usize, output: &mut Vec) { 5 | if len < 254 { 6 | output.push(len.try_into().unwrap()); 7 | } else if len <= u16::MAX.into() { 8 | let u16_len: u16 = len.try_into().unwrap(); 9 | output.push(254); 10 | output.extend_from_slice(&u16_len.to_le_bytes()); 11 | } else { 12 | let u32_len: u32 = len.try_into().unwrap(); 13 | output.push(255); 14 | output.extend_from_slice(&u32_len.to_le_bytes()); 15 | } 16 | } 17 | 18 | // Decode a variable length int starting at the beginning of data 19 | // Returns (decoded length, length consumed of `data`) 20 | fn decode_varint_len(data: &[u8]) -> (usize, usize) { 21 | match data[0] { 22 | 0..=253 => (data[0] as usize, 1), 23 | 254 => ( 24 | u16::from_le_bytes(data[1..3].try_into().unwrap()) as usize, 25 | 3, 26 | ), 27 | 255 => ( 28 | u32::from_le_bytes(data[1..5].try_into().unwrap()) as usize, 29 | 5, 30 | ), 31 | } 32 | } 33 | 34 | impl Value for Vec { 35 | type SelfType<'a> 36 | = Vec> 37 | where 38 | Self: 'a; 39 | type AsBytes<'a> 40 | = Vec 41 | where 42 | Self: 'a; 43 | 44 | fn fixed_width() -> Option { 45 | None 46 | } 47 | 48 | fn from_bytes<'a>(data: &'a [u8]) -> Vec> 49 | where 50 | Self: 'a, 51 | { 52 | let (elements, mut offset) = decode_varint_len(data); 53 | let mut result = Vec::with_capacity(elements); 54 | for _ in 0..elements { 55 | let element_len = if let Some(len) = T::fixed_width() { 56 | len 57 | } else { 58 | let (len, consumed) = decode_varint_len(&data[offset..]); 59 | offset += consumed; 60 | len 61 | }; 62 | result.push(T::from_bytes(&data[offset..(offset + element_len)])); 63 | offset += element_len; 64 | } 65 | assert_eq!(offset, data.len()); 66 | result 67 | } 68 | 69 | fn as_bytes<'a, 'b: 'a>(value: &'a Vec>) -> Vec 70 | where 71 | Self: 'b, 72 | { 73 | let mut result = if let Some(width) = T::fixed_width() { 74 | Vec::with_capacity(value.len() * width + 5) 75 | } else { 76 | Vec::with_capacity(value.len() * 2 + 5) 77 | }; 78 | encode_varint_len(value.len(), &mut result); 79 | 80 | for element in value { 81 | let serialized = T::as_bytes(element); 82 | if T::fixed_width().is_none() { 83 | encode_varint_len(serialized.as_ref().len(), &mut result); 84 | } 85 | result.extend_from_slice(serialized.as_ref()); 86 | } 87 | result 88 | } 89 | 90 | fn type_name() -> TypeName { 91 | TypeName::internal(&format!("Vec<{}>", T::type_name().name())) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(clippy::all, clippy::pedantic, clippy::disallowed_methods)] 2 | // TODO: revisit this list and see if we can enable some 3 | #![allow( 4 | let_underscore_drop, 5 | clippy::default_trait_access, 6 | clippy::if_not_else, 7 | clippy::inline_always, 8 | clippy::iter_not_returning_iterator, 9 | clippy::manual_let_else, 10 | clippy::missing_errors_doc, 11 | clippy::missing_panics_doc, 12 | clippy::module_name_repetitions, 13 | clippy::must_use_candidate, 14 | clippy::needless_pass_by_value, 15 | clippy::option_option, 16 | clippy::redundant_closure_for_method_calls, 17 | clippy::similar_names, 18 | clippy::too_many_lines, 19 | clippy::unnecessary_wraps, 20 | clippy::unreadable_literal, 21 | clippy::wildcard_imports 22 | )] 23 | // TODO remove this once wasi no longer requires nightly 24 | #![cfg_attr(target_os = "wasi", feature(wasi_ext))] 25 | 26 | //! # redb 27 | //! 28 | //! A simple, portable, high-performance, ACID, embedded key-value store. 29 | //! 30 | //! redb is written in pure Rust and is loosely inspired by [lmdb][lmdb]. Data is stored in a collection 31 | //! of copy-on-write B-trees. For more details, see the [design doc][design]. 32 | //! 33 | //! # Features 34 | //! 35 | //! - Zero-copy, thread-safe, `BTreeMap` based API 36 | //! - Fully ACID-compliant transactions 37 | //! - MVCC support for concurrent readers & writer, without blocking 38 | //! - Crash-safe by default 39 | //! - Savepoints and rollbacks 40 | //! 41 | //! # Example 42 | //! 43 | //! ``` 44 | //! use redb::{Database, Error, ReadableTable, TableDefinition}; 45 | //! 46 | //! const TABLE: TableDefinition<&str, u64> = TableDefinition::new("my_data"); 47 | //! 48 | //! #[cfg(not(target_os = "wasi"))] 49 | //! fn main() -> Result<(), Error> { 50 | //! let file = tempfile::NamedTempFile::new().unwrap(); 51 | //! let db = Database::create(file.path())?; 52 | //! let write_txn = db.begin_write()?; 53 | //! { 54 | //! let mut table = write_txn.open_table(TABLE)?; 55 | //! table.insert("my_key", &123)?; 56 | //! } 57 | //! write_txn.commit()?; 58 | //! 59 | //! let read_txn = db.begin_read()?; 60 | //! let table = read_txn.open_table(TABLE)?; 61 | //! assert_eq!(table.get("my_key")?.unwrap().value(), 123); 62 | //! 63 | //! Ok(()) 64 | //! } 65 | //! ``` 66 | //! 67 | //! [lmdb]: https://www.lmdb.tech/doc/ 68 | //! [design]: https://github.com/cberner/redb/blob/master/docs/design.md 69 | 70 | pub use db::{ 71 | Builder, CacheStats, Database, MultimapTableDefinition, MultimapTableHandle, RepairSession, 72 | StorageBackend, TableDefinition, TableHandle, UntypedMultimapTableHandle, UntypedTableHandle, 73 | }; 74 | pub use error::{ 75 | CommitError, CompactionError, DatabaseError, Error, SavepointError, SetDurabilityError, 76 | StorageError, TableError, TransactionError, 77 | }; 78 | pub use multimap_table::{ 79 | MultimapRange, MultimapTable, MultimapValue, ReadOnlyMultimapTable, 80 | ReadOnlyUntypedMultimapTable, ReadableMultimapTable, 81 | }; 82 | pub use table::{ 83 | ExtractIf, Range, ReadOnlyTable, ReadOnlyUntypedTable, ReadableTable, ReadableTableMetadata, 84 | Table, TableStats, 85 | }; 86 | pub use transactions::{DatabaseStats, Durability, ReadTransaction, WriteTransaction}; 87 | pub use tree_store::{AccessGuard, AccessGuardMutInPlace, Savepoint}; 88 | pub use types::{Key, MutInPlaceValue, TypeName, Value}; 89 | 90 | pub type Result = std::result::Result; 91 | 92 | #[cfg(feature = "python")] 93 | pub use crate::python::redb; 94 | 95 | pub mod backends; 96 | mod complex_types; 97 | mod db; 98 | mod error; 99 | mod multimap_table; 100 | #[cfg(feature = "python")] 101 | mod python; 102 | mod sealed; 103 | mod table; 104 | mod transaction_tracker; 105 | mod transactions; 106 | mod tree_store; 107 | mod tuple_types; 108 | mod types; 109 | 110 | #[cfg(test)] 111 | fn create_tempfile() -> tempfile::NamedTempFile { 112 | if cfg!(target_os = "wasi") { 113 | tempfile::NamedTempFile::new_in("/tmp").unwrap() 114 | } else { 115 | tempfile::NamedTempFile::new().unwrap() 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/python.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | 3 | #[pymodule] 4 | pub fn redb(_m: &Bound<'_, PyModule>) -> PyResult<()> { 5 | Ok(()) 6 | } 7 | -------------------------------------------------------------------------------- /src/sealed.rs: -------------------------------------------------------------------------------- 1 | pub trait Sealed {} 2 | -------------------------------------------------------------------------------- /src/transaction_tracker.rs: -------------------------------------------------------------------------------- 1 | use crate::tree_store::TransactionalMemory; 2 | use crate::{Key, Result, Savepoint, TypeName, Value}; 3 | #[cfg(feature = "logging")] 4 | use log::debug; 5 | use std::cmp::Ordering; 6 | use std::collections::btree_map::BTreeMap; 7 | use std::mem::size_of; 8 | use std::sync::{Condvar, Mutex}; 9 | 10 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)] 11 | pub(crate) struct TransactionId(u64); 12 | 13 | impl TransactionId { 14 | pub(crate) fn new(value: u64) -> TransactionId { 15 | Self(value) 16 | } 17 | 18 | pub(crate) fn raw_id(self) -> u64 { 19 | self.0 20 | } 21 | 22 | pub(crate) fn next(self) -> TransactionId { 23 | TransactionId(self.0 + 1) 24 | } 25 | 26 | pub(crate) fn increment(&mut self) -> TransactionId { 27 | let next = self.next(); 28 | *self = next; 29 | next 30 | } 31 | 32 | pub(crate) fn parent(self) -> Option { 33 | if self.0 == 0 { 34 | None 35 | } else { 36 | Some(TransactionId(self.0 - 1)) 37 | } 38 | } 39 | } 40 | 41 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Debug)] 42 | pub(crate) struct SavepointId(pub u64); 43 | 44 | impl SavepointId { 45 | pub(crate) fn next(self) -> SavepointId { 46 | SavepointId(self.0 + 1) 47 | } 48 | } 49 | 50 | impl Value for SavepointId { 51 | type SelfType<'a> = SavepointId; 52 | type AsBytes<'a> = [u8; size_of::()]; 53 | 54 | fn fixed_width() -> Option { 55 | Some(size_of::()) 56 | } 57 | 58 | fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a> 59 | where 60 | Self: 'a, 61 | { 62 | SavepointId(u64::from_le_bytes(data.try_into().unwrap())) 63 | } 64 | 65 | fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Self::AsBytes<'a> 66 | where 67 | Self: 'b, 68 | { 69 | value.0.to_le_bytes() 70 | } 71 | 72 | fn type_name() -> TypeName { 73 | TypeName::internal("redb::SavepointId") 74 | } 75 | } 76 | 77 | impl Key for SavepointId { 78 | fn compare(data1: &[u8], data2: &[u8]) -> Ordering { 79 | Self::from_bytes(data1).0.cmp(&Self::from_bytes(data2).0) 80 | } 81 | } 82 | 83 | struct State { 84 | next_savepoint_id: SavepointId, 85 | // reference count of read transactions per transaction id 86 | live_read_transactions: BTreeMap, 87 | next_transaction_id: TransactionId, 88 | live_write_transaction: Option, 89 | valid_savepoints: BTreeMap, 90 | // Non-durable commits that are still in-memory, and waiting for a durable commit to get flushed 91 | // We need to make sure that the freed-table does not get processed for these, since they are not durable yet 92 | // Therefore, we hold a read transaction on their parent 93 | pending_non_durable_commits: Vec, 94 | } 95 | 96 | pub(crate) struct TransactionTracker { 97 | state: Mutex, 98 | live_write_transaction_available: Condvar, 99 | } 100 | 101 | impl TransactionTracker { 102 | pub(crate) fn new(next_transaction_id: TransactionId) -> Self { 103 | Self { 104 | state: Mutex::new(State { 105 | next_savepoint_id: SavepointId(0), 106 | live_read_transactions: Default::default(), 107 | next_transaction_id, 108 | live_write_transaction: None, 109 | valid_savepoints: Default::default(), 110 | pending_non_durable_commits: Default::default(), 111 | }), 112 | live_write_transaction_available: Condvar::new(), 113 | } 114 | } 115 | 116 | pub(crate) fn start_write_transaction(&self) -> TransactionId { 117 | let mut state = self.state.lock().unwrap(); 118 | while state.live_write_transaction.is_some() { 119 | state = self.live_write_transaction_available.wait(state).unwrap(); 120 | } 121 | assert!(state.live_write_transaction.is_none()); 122 | let transaction_id = state.next_transaction_id.increment(); 123 | #[cfg(feature = "logging")] 124 | debug!("Beginning write transaction id={:?}", transaction_id); 125 | state.live_write_transaction = Some(transaction_id); 126 | 127 | transaction_id 128 | } 129 | 130 | pub(crate) fn end_write_transaction(&self, id: TransactionId) { 131 | let mut state = self.state.lock().unwrap(); 132 | assert_eq!(state.live_write_transaction.unwrap(), id); 133 | state.live_write_transaction = None; 134 | self.live_write_transaction_available.notify_one(); 135 | } 136 | 137 | pub(crate) fn clear_pending_non_durable_commits(&self) { 138 | let mut state = self.state.lock().unwrap(); 139 | let ids: Vec = state.pending_non_durable_commits.drain(..).collect(); 140 | for id in ids { 141 | if let Some(parent) = id.parent() { 142 | let ref_count = state.live_read_transactions.get_mut(&parent).unwrap(); 143 | *ref_count -= 1; 144 | if *ref_count == 0 { 145 | state.live_read_transactions.remove(&parent); 146 | } 147 | } 148 | } 149 | } 150 | 151 | pub(crate) fn register_non_durable_commit(&self, id: TransactionId) { 152 | let mut state = self.state.lock().unwrap(); 153 | if let Some(parent) = id.parent() { 154 | state 155 | .live_read_transactions 156 | .entry(parent) 157 | .and_modify(|x| *x += 1) 158 | .or_insert(1); 159 | } 160 | state.pending_non_durable_commits.push(id); 161 | } 162 | 163 | pub(crate) fn restore_savepoint_counter_state(&self, next_savepoint: SavepointId) { 164 | let mut state = self.state.lock().unwrap(); 165 | assert!(state.valid_savepoints.is_empty()); 166 | state.next_savepoint_id = next_savepoint; 167 | } 168 | 169 | pub(crate) fn register_persistent_savepoint(&self, savepoint: &Savepoint) { 170 | let mut state = self.state.lock().unwrap(); 171 | state 172 | .live_read_transactions 173 | .entry(savepoint.get_transaction_id()) 174 | .and_modify(|x| *x += 1) 175 | .or_insert(1); 176 | state 177 | .valid_savepoints 178 | .insert(savepoint.get_id(), savepoint.get_transaction_id()); 179 | } 180 | 181 | pub(crate) fn register_read_transaction( 182 | &self, 183 | mem: &TransactionalMemory, 184 | ) -> Result { 185 | let mut state = self.state.lock()?; 186 | let id = mem.get_last_committed_transaction_id()?; 187 | state 188 | .live_read_transactions 189 | .entry(id) 190 | .and_modify(|x| *x += 1) 191 | .or_insert(1); 192 | 193 | Ok(id) 194 | } 195 | 196 | pub(crate) fn deallocate_read_transaction(&self, id: TransactionId) { 197 | let mut state = self.state.lock().unwrap(); 198 | let ref_count = state.live_read_transactions.get_mut(&id).unwrap(); 199 | *ref_count -= 1; 200 | if *ref_count == 0 { 201 | state.live_read_transactions.remove(&id); 202 | } 203 | } 204 | 205 | pub(crate) fn any_savepoint_exists(&self) -> bool { 206 | !self.state.lock().unwrap().valid_savepoints.is_empty() 207 | } 208 | 209 | pub(crate) fn allocate_savepoint(&self, transaction_id: TransactionId) -> SavepointId { 210 | let mut state = self.state.lock().unwrap(); 211 | let id = state.next_savepoint_id.next(); 212 | state.next_savepoint_id = id; 213 | state.valid_savepoints.insert(id, transaction_id); 214 | id 215 | } 216 | 217 | // Deallocates the given savepoint and its matching reference count on the transcation 218 | pub(crate) fn deallocate_savepoint(&self, savepoint: SavepointId, transaction: TransactionId) { 219 | self.state 220 | .lock() 221 | .unwrap() 222 | .valid_savepoints 223 | .remove(&savepoint); 224 | self.deallocate_read_transaction(transaction); 225 | } 226 | 227 | pub(crate) fn is_valid_savepoint(&self, id: SavepointId) -> bool { 228 | self.state 229 | .lock() 230 | .unwrap() 231 | .valid_savepoints 232 | .contains_key(&id) 233 | } 234 | 235 | pub(crate) fn invalidate_savepoints_after(&self, id: SavepointId) { 236 | self.state 237 | .lock() 238 | .unwrap() 239 | .valid_savepoints 240 | .retain(|x, _| *x <= id); 241 | } 242 | 243 | pub(crate) fn oldest_savepoint(&self) -> Option<(SavepointId, TransactionId)> { 244 | self.state 245 | .lock() 246 | .unwrap() 247 | .valid_savepoints 248 | .first_key_value() 249 | .map(|x| (*x.0, *x.1)) 250 | } 251 | 252 | pub(crate) fn oldest_live_read_transaction(&self) -> Option { 253 | self.state 254 | .lock() 255 | .unwrap() 256 | .live_read_transactions 257 | .keys() 258 | .next() 259 | .copied() 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /src/tree_store/mod.rs: -------------------------------------------------------------------------------- 1 | mod btree; 2 | mod btree_base; 3 | mod btree_iters; 4 | mod btree_mutator; 5 | mod page_store; 6 | mod table_tree; 7 | mod table_tree_base; 8 | 9 | pub(crate) use btree::{ 10 | Btree, BtreeMut, BtreeStats, PagePath, RawBtree, UntypedBtree, UntypedBtreeMut, btree_stats, 11 | }; 12 | pub use btree_base::{AccessGuard, AccessGuardMutInPlace}; 13 | pub(crate) use btree_base::{ 14 | BRANCH, BranchAccessor, BranchMutator, BtreeHeader, Checksum, DEFERRED, LEAF, LeafAccessor, 15 | LeafMutator, RawLeafBuilder, 16 | }; 17 | pub(crate) use btree_iters::{AllPageNumbersBtreeIter, BtreeExtractIf, BtreeRangeIter}; 18 | pub(crate) use page_store::{ 19 | FILE_FORMAT_VERSION3, MAX_PAIR_LENGTH, MAX_VALUE_LENGTH, PAGE_SIZE, Page, PageHint, PageNumber, 20 | PageTrackerPolicy, SerializedSavepoint, TransactionalMemory, 21 | }; 22 | pub use page_store::{InMemoryBackend, Savepoint, file_backend}; 23 | pub(crate) use table_tree::{PageListMut, TableTree, TableTreeMut}; 24 | pub(crate) use table_tree_base::{InternalTableDefinition, TableType}; 25 | -------------------------------------------------------------------------------- /src/tree_store/page_store/base.rs: -------------------------------------------------------------------------------- 1 | use crate::tree_store::page_store::cached_file::WritablePage; 2 | use crate::tree_store::page_store::page_manager::MAX_MAX_PAGE_ORDER; 3 | use std::cmp::Ordering; 4 | #[cfg(debug_assertions)] 5 | use std::collections::HashMap; 6 | use std::collections::HashSet; 7 | use std::fmt::{Debug, Formatter}; 8 | use std::mem; 9 | use std::ops::Range; 10 | use std::sync::Arc; 11 | #[cfg(debug_assertions)] 12 | use std::sync::Mutex; 13 | 14 | pub(crate) const MAX_VALUE_LENGTH: usize = 3 * 1024 * 1024 * 1024; 15 | pub(crate) const MAX_PAIR_LENGTH: usize = 3 * 1024 * 1024 * 1024 + 768 * 1024 * 1024; 16 | pub(crate) const MAX_PAGE_INDEX: u32 = 0x000F_FFFF; 17 | 18 | // On-disk format is: 19 | // lowest 20bits: page index within the region 20 | // second 20bits: region number 21 | // 19bits: reserved 22 | // highest 5bits: page order exponent 23 | // 24 | // Assuming a reasonable page size, like 4kiB, this allows for 4kiB * 2^20 * 2^20 = 4PiB of usable space 25 | #[derive(Copy, Clone, Eq, PartialEq, Hash)] 26 | pub(crate) struct PageNumber { 27 | pub(crate) region: u32, 28 | pub(crate) page_index: u32, 29 | pub(crate) page_order: u8, 30 | } 31 | 32 | // PageNumbers are ordered as determined by their starting address in the database file 33 | impl Ord for PageNumber { 34 | fn cmp(&self, other: &Self) -> Ordering { 35 | match self.region.cmp(&other.region) { 36 | Ordering::Less => Ordering::Less, 37 | Ordering::Equal => { 38 | let self_order0 = self.page_index * 2u32.pow(self.page_order.into()); 39 | let other_order0 = other.page_index * 2u32.pow(other.page_order.into()); 40 | assert!( 41 | self_order0 != other_order0 || self.page_order == other.page_order, 42 | "{self:?} overlaps {other:?}, but is not equal" 43 | ); 44 | self_order0.cmp(&other_order0) 45 | } 46 | Ordering::Greater => Ordering::Greater, 47 | } 48 | } 49 | } 50 | 51 | impl PartialOrd for PageNumber { 52 | fn partial_cmp(&self, other: &Self) -> Option { 53 | Some(self.cmp(other)) 54 | } 55 | } 56 | 57 | impl PageNumber { 58 | #[inline(always)] 59 | pub(crate) const fn serialized_size() -> usize { 60 | 8 61 | } 62 | 63 | pub(crate) fn new(region: u32, page_index: u32, page_order: u8) -> Self { 64 | debug_assert!(region <= 0x000F_FFFF); 65 | debug_assert!(page_index <= MAX_PAGE_INDEX); 66 | debug_assert!(page_order <= MAX_MAX_PAGE_ORDER); 67 | Self { 68 | region, 69 | page_index, 70 | page_order, 71 | } 72 | } 73 | 74 | pub(crate) fn to_le_bytes(self) -> [u8; 8] { 75 | let mut temp = 0x000F_FFFF & u64::from(self.page_index); 76 | temp |= (0x000F_FFFF & u64::from(self.region)) << 20; 77 | temp |= (0b0001_1111 & u64::from(self.page_order)) << 59; 78 | temp.to_le_bytes() 79 | } 80 | 81 | pub(crate) fn from_le_bytes(bytes: [u8; 8]) -> Self { 82 | let temp = u64::from_le_bytes(bytes); 83 | let index = (temp & 0x000F_FFFF) as u32; 84 | let region = ((temp >> 20) & 0x000F_FFFF) as u32; 85 | let order = (temp >> 59) as u8; 86 | 87 | Self { 88 | region, 89 | page_index: index, 90 | page_order: order, 91 | } 92 | } 93 | 94 | #[cfg(test)] 95 | pub(crate) fn to_order0(self) -> Vec { 96 | let mut pages = vec![self]; 97 | loop { 98 | let mut progress = false; 99 | let mut new_pages = vec![]; 100 | for page in pages { 101 | if page.page_order == 0 { 102 | new_pages.push(page); 103 | } else { 104 | progress = true; 105 | new_pages.push(PageNumber::new( 106 | page.region, 107 | page.page_index * 2, 108 | page.page_order - 1, 109 | )); 110 | new_pages.push(PageNumber::new( 111 | page.region, 112 | page.page_index * 2 + 1, 113 | page.page_order - 1, 114 | )); 115 | } 116 | } 117 | pages = new_pages; 118 | if !progress { 119 | break; 120 | } 121 | } 122 | 123 | pages 124 | } 125 | 126 | pub(crate) fn address_range( 127 | &self, 128 | data_section_offset: u64, 129 | region_size: u64, 130 | region_pages_start: u64, 131 | page_size: u32, 132 | ) -> Range { 133 | let regional_start = 134 | region_pages_start + u64::from(self.page_index) * self.page_size_bytes(page_size); 135 | debug_assert!(regional_start < region_size); 136 | let region_base = u64::from(self.region) * region_size; 137 | let start = data_section_offset + region_base + regional_start; 138 | let end = start + self.page_size_bytes(page_size); 139 | start..end 140 | } 141 | 142 | pub(crate) fn page_size_bytes(&self, page_size: u32) -> u64 { 143 | let pages = 1u64 << self.page_order; 144 | pages * u64::from(page_size) 145 | } 146 | } 147 | 148 | impl Debug for PageNumber { 149 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 150 | write!( 151 | f, 152 | "r{}.{}/{}", 153 | self.region, self.page_index, self.page_order 154 | ) 155 | } 156 | } 157 | 158 | pub(crate) trait Page { 159 | fn memory(&self) -> &[u8]; 160 | 161 | fn get_page_number(&self) -> PageNumber; 162 | } 163 | 164 | pub struct PageImpl { 165 | pub(super) mem: Arc<[u8]>, 166 | pub(super) page_number: PageNumber, 167 | #[cfg(debug_assertions)] 168 | pub(super) open_pages: Arc>>, 169 | } 170 | 171 | impl PageImpl { 172 | pub(crate) fn to_arc(&self) -> Arc<[u8]> { 173 | self.mem.clone() 174 | } 175 | } 176 | 177 | impl Debug for PageImpl { 178 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 179 | f.write_fmt(format_args!("PageImpl: page_number={:?}", self.page_number)) 180 | } 181 | } 182 | 183 | #[cfg(debug_assertions)] 184 | impl Drop for PageImpl { 185 | fn drop(&mut self) { 186 | let mut open_pages = self.open_pages.lock().unwrap(); 187 | let value = open_pages.get_mut(&self.page_number).unwrap(); 188 | assert!(*value > 0); 189 | *value -= 1; 190 | if *value == 0 { 191 | open_pages.remove(&self.page_number); 192 | } 193 | } 194 | } 195 | 196 | impl Page for PageImpl { 197 | fn memory(&self) -> &[u8] { 198 | self.mem.as_ref() 199 | } 200 | 201 | fn get_page_number(&self) -> PageNumber { 202 | self.page_number 203 | } 204 | } 205 | 206 | impl Clone for PageImpl { 207 | fn clone(&self) -> Self { 208 | #[cfg(debug_assertions)] 209 | { 210 | *self 211 | .open_pages 212 | .lock() 213 | .unwrap() 214 | .get_mut(&self.page_number) 215 | .unwrap() += 1; 216 | } 217 | Self { 218 | mem: self.mem.clone(), 219 | page_number: self.page_number, 220 | #[cfg(debug_assertions)] 221 | open_pages: self.open_pages.clone(), 222 | } 223 | } 224 | } 225 | 226 | pub(crate) struct PageMut { 227 | pub(super) mem: WritablePage, 228 | pub(super) page_number: PageNumber, 229 | #[cfg(debug_assertions)] 230 | pub(super) open_pages: Arc>>, 231 | } 232 | 233 | impl PageMut { 234 | pub(crate) fn memory_mut(&mut self) -> &mut [u8] { 235 | self.mem.mem_mut() 236 | } 237 | } 238 | 239 | impl Page for PageMut { 240 | fn memory(&self) -> &[u8] { 241 | self.mem.mem() 242 | } 243 | 244 | fn get_page_number(&self) -> PageNumber { 245 | self.page_number 246 | } 247 | } 248 | 249 | #[cfg(debug_assertions)] 250 | impl Drop for PageMut { 251 | fn drop(&mut self) { 252 | assert!(self.open_pages.lock().unwrap().remove(&self.page_number)); 253 | } 254 | } 255 | 256 | #[derive(Copy, Clone)] 257 | pub(crate) enum PageHint { 258 | None, 259 | Clean, 260 | } 261 | 262 | pub(crate) enum PageTrackerPolicy { 263 | Ignore, 264 | Track(HashSet), 265 | Closed, 266 | } 267 | 268 | impl PageTrackerPolicy { 269 | pub(crate) fn new_tracking() -> Self { 270 | PageTrackerPolicy::Track(HashSet::new()) 271 | } 272 | 273 | pub(crate) fn is_empty(&self) -> bool { 274 | match self { 275 | PageTrackerPolicy::Ignore | PageTrackerPolicy::Closed => true, 276 | PageTrackerPolicy::Track(x) => x.is_empty(), 277 | } 278 | } 279 | 280 | pub(super) fn remove(&mut self, page: PageNumber) { 281 | match self { 282 | PageTrackerPolicy::Ignore => {} 283 | PageTrackerPolicy::Track(x) => { 284 | assert!(x.remove(&page)); 285 | } 286 | PageTrackerPolicy::Closed => { 287 | panic!("Page tracker is closed"); 288 | } 289 | } 290 | } 291 | 292 | pub(super) fn insert(&mut self, page: PageNumber) { 293 | match self { 294 | PageTrackerPolicy::Ignore => {} 295 | PageTrackerPolicy::Track(x) => { 296 | assert!(x.insert(page)); 297 | } 298 | PageTrackerPolicy::Closed => { 299 | panic!("Page tracker is closed"); 300 | } 301 | } 302 | } 303 | 304 | pub(crate) fn close(&mut self) -> HashSet { 305 | let old = mem::replace(self, PageTrackerPolicy::Closed); 306 | match old { 307 | PageTrackerPolicy::Ignore => HashSet::new(), 308 | PageTrackerPolicy::Track(x) => x, 309 | PageTrackerPolicy::Closed => { 310 | panic!("Page tracker is closed"); 311 | } 312 | } 313 | } 314 | } 315 | 316 | #[cfg(test)] 317 | mod test { 318 | use crate::tree_store::PageNumber; 319 | 320 | #[test] 321 | fn last_page() { 322 | let region_data_size = 2u64.pow(32); 323 | let page_size = 4096; 324 | let pages_per_region = region_data_size / page_size; 325 | let region_header_size = 2u64.pow(16); 326 | let last_page_index = pages_per_region - 1; 327 | let page_number = PageNumber::new(1, last_page_index.try_into().unwrap(), 0); 328 | page_number.address_range( 329 | 4096, 330 | region_data_size + region_header_size, 331 | region_header_size, 332 | page_size.try_into().unwrap(), 333 | ); 334 | } 335 | } 336 | -------------------------------------------------------------------------------- /src/tree_store/page_store/bitmap.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | 3 | const HEIGHT_OFFSET: usize = 0; 4 | const END_OFFSETS: usize = HEIGHT_OFFSET + size_of::(); 5 | 6 | pub(crate) struct BtreeBitmap { 7 | heights: Vec, 8 | } 9 | 10 | // Stores a 64-way bit-tree of allocated ids. 11 | // 12 | // Data structure format: 13 | // height: u32 14 | // layer_ends: array of u32, ending offset in bytes of layers. 15 | // layer data: u64s 16 | // ...consecutive layers. Except for the last level, all sub-trees of the root must be complete 17 | impl BtreeBitmap { 18 | pub(crate) fn count_unset(&self) -> u32 { 19 | self.get_level(self.get_height() - 1).count_unset() 20 | } 21 | 22 | pub(crate) fn has_unset(&self) -> bool { 23 | self.get_level(self.get_height() - 1).any_unset() 24 | } 25 | 26 | pub(crate) fn get(&self, i: u32) -> bool { 27 | self.get_level(self.get_height() - 1).get(i) 28 | } 29 | 30 | pub(crate) fn capacity(&self) -> u32 { 31 | self.get_level(self.get_height() - 1).capacity() 32 | } 33 | 34 | pub(crate) fn len(&self) -> u32 { 35 | self.get_level(self.get_height() - 1).len() 36 | } 37 | 38 | pub(crate) fn find_first_unset(&self) -> Option { 39 | if let Some(mut entry) = self.get_level(0).first_unset(0, 64) { 40 | let mut height = 0; 41 | 42 | while height < self.get_height() - 1 { 43 | height += 1; 44 | entry *= 64; 45 | entry = self 46 | .get_level(height) 47 | .first_unset(entry, entry + 64) 48 | .unwrap(); 49 | } 50 | 51 | Some(entry) 52 | } else { 53 | None 54 | } 55 | } 56 | 57 | fn get_level(&self, i: u32) -> &U64GroupedBitmap { 58 | assert!(i < self.get_height()); 59 | &self.heights[i as usize] 60 | } 61 | 62 | fn get_height(&self) -> u32 { 63 | self.heights.len().try_into().unwrap() 64 | } 65 | 66 | pub(crate) fn to_vec(&self) -> Vec { 67 | let mut result = vec![]; 68 | let height: u32 = self.heights.len().try_into().unwrap(); 69 | result.extend(height.to_le_bytes()); 70 | 71 | let vecs: Vec> = self.heights.iter().map(|x| x.to_vec()).collect(); 72 | let mut data_offset = END_OFFSETS + self.heights.len() * size_of::(); 73 | let end_metadata = data_offset; 74 | for serialized in &vecs { 75 | data_offset += serialized.len(); 76 | let offset_u32: u32 = data_offset.try_into().unwrap(); 77 | result.extend(offset_u32.to_le_bytes()); 78 | } 79 | 80 | assert_eq!(end_metadata, result.len()); 81 | for serialized in &vecs { 82 | result.extend(serialized); 83 | } 84 | 85 | result 86 | } 87 | 88 | pub(crate) fn from_bytes(data: &[u8]) -> Self { 89 | let height = u32::from_le_bytes( 90 | data[HEIGHT_OFFSET..(HEIGHT_OFFSET + size_of::())] 91 | .try_into() 92 | .unwrap(), 93 | ); 94 | 95 | let mut metadata = END_OFFSETS; 96 | let mut data_start = END_OFFSETS + (height as usize) * size_of::(); 97 | 98 | let mut heights = vec![]; 99 | for _ in 0..height { 100 | let data_end = u32::from_le_bytes( 101 | data[metadata..(metadata + size_of::())] 102 | .try_into() 103 | .unwrap(), 104 | ) as usize; 105 | heights.push(U64GroupedBitmap::from_bytes(&data[data_start..data_end])); 106 | data_start = data_end; 107 | metadata += size_of::(); 108 | } 109 | 110 | Self { heights } 111 | } 112 | 113 | // Initializes a new allocator, with no ids free 114 | pub(crate) fn new(mut capacity: u32) -> Self { 115 | let mut heights = vec![]; 116 | 117 | // Build from the leaf to root 118 | loop { 119 | heights.push(U64GroupedBitmap::new_full(capacity, capacity)); 120 | if capacity <= 64 { 121 | break; 122 | } 123 | capacity = capacity.div_ceil(64); 124 | } 125 | 126 | // Reverse so that the root as index 0 127 | heights.reverse(); 128 | 129 | Self { heights } 130 | } 131 | 132 | // Returns the first unset id, and sets it 133 | pub(crate) fn alloc(&mut self) -> Option { 134 | let entry = self.find_first_unset()?; 135 | self.set(entry); 136 | Some(entry) 137 | } 138 | 139 | pub(crate) fn set(&mut self, i: u32) { 140 | let full = self.get_level_mut(self.get_height() - 1).set(i); 141 | self.update_to_root(i, full); 142 | } 143 | 144 | pub(crate) fn clear(&mut self, i: u32) { 145 | self.get_level_mut(self.get_height() - 1).clear(i); 146 | self.update_to_root(i, false); 147 | } 148 | 149 | fn get_level_mut(&mut self, i: u32) -> &mut U64GroupedBitmap { 150 | assert!(i < self.get_height()); 151 | &mut self.heights[i as usize] 152 | } 153 | 154 | // Recursively update to the root, starting at the given entry in the given height 155 | // full parameter must be set if all bits in the entry's group of u64 are full 156 | fn update_to_root(&mut self, i: u32, mut full: bool) { 157 | if self.get_height() == 1 { 158 | return; 159 | } 160 | 161 | let mut parent_height = self.get_height() - 2; 162 | let mut parent_entry = i / 64; 163 | loop { 164 | full = if full { 165 | self.get_level_mut(parent_height).set(parent_entry) 166 | } else { 167 | self.get_level_mut(parent_height).clear(parent_entry); 168 | false 169 | }; 170 | 171 | if parent_height == 0 { 172 | break; 173 | } 174 | parent_height -= 1; 175 | parent_entry /= 64; 176 | } 177 | } 178 | } 179 | 180 | #[cfg(any(test, fuzzing))] 181 | pub(crate) struct U64GroupedBitmapIter<'a> { 182 | len: u32, 183 | data: &'a [u64], 184 | data_index: usize, 185 | current: u64, 186 | } 187 | 188 | #[cfg(any(test, fuzzing))] 189 | impl<'a> U64GroupedBitmapIter<'a> { 190 | fn new(len: u32, data: &'a [u64]) -> Self { 191 | Self { 192 | len, 193 | data, 194 | data_index: 0, 195 | current: data[0], 196 | } 197 | } 198 | } 199 | 200 | #[cfg(any(test, fuzzing))] 201 | impl Iterator for U64GroupedBitmapIter<'_> { 202 | type Item = u32; 203 | 204 | fn next(&mut self) -> Option { 205 | let data_index_u32: u32 = self.data_index.try_into().unwrap(); 206 | if data_index_u32 * u64::BITS >= self.len { 207 | return None; 208 | } 209 | if self.current != 0 { 210 | let mut result: u32 = self.data_index.try_into().unwrap(); 211 | result *= u64::BITS; 212 | let bit = self.current.trailing_zeros(); 213 | result += bit; 214 | self.current &= !U64GroupedBitmap::select_mask(bit as usize); 215 | if result >= self.len { 216 | return None; 217 | } 218 | return Some(result); 219 | } 220 | self.data_index += 1; 221 | while self.data_index < self.data.len() { 222 | let next = self.data[self.data_index]; 223 | if next != 0 { 224 | self.current = next; 225 | return self.next(); 226 | } 227 | self.data_index += 1; 228 | } 229 | None 230 | } 231 | } 232 | 233 | // A bitmap which groups consecutive groups of 64bits together 234 | pub(crate) struct U64GroupedBitmap { 235 | len: u32, 236 | data: Vec, 237 | } 238 | 239 | impl U64GroupedBitmap { 240 | fn required_words(elements: u32) -> usize { 241 | let words = elements.div_ceil(64); 242 | words as usize 243 | } 244 | 245 | pub fn new_full(len: u32, capacity: u32) -> Self { 246 | let data = vec![u64::MAX; Self::required_words(capacity)]; 247 | Self { len, data } 248 | } 249 | 250 | pub fn new_empty(len: u32, capacity: u32) -> Self { 251 | let data = vec![0; Self::required_words(capacity)]; 252 | Self { len, data } 253 | } 254 | 255 | // Format: 256 | // 4 bytes: number of elements 257 | // n bytes: serialized groups 258 | pub fn to_vec(&self) -> Vec { 259 | let mut result = vec![]; 260 | result.extend(self.len.to_le_bytes()); 261 | for x in &self.data { 262 | result.extend(x.to_le_bytes()); 263 | } 264 | result 265 | } 266 | 267 | pub fn from_bytes(serialized: &[u8]) -> Self { 268 | assert_eq!(0, (serialized.len() - size_of::()) % size_of::()); 269 | let mut data = vec![]; 270 | let len = u32::from_le_bytes(serialized[..size_of::()].try_into().unwrap()); 271 | let words = (serialized.len() - size_of::()) / size_of::(); 272 | for i in 0..words { 273 | let start = size_of::() + i * size_of::(); 274 | let value = u64::from_le_bytes( 275 | serialized[start..(start + size_of::())] 276 | .try_into() 277 | .unwrap(), 278 | ); 279 | data.push(value); 280 | } 281 | 282 | Self { len, data } 283 | } 284 | 285 | fn data_index_of(bit: u32) -> (usize, usize) { 286 | ((bit as usize) / 64, (bit as usize) % 64) 287 | } 288 | 289 | fn select_mask(bit: usize) -> u64 { 290 | 1u64 << (bit as u64) 291 | } 292 | 293 | fn count_unset(&self) -> u32 { 294 | self.data.iter().map(|x| x.count_zeros()).sum() 295 | } 296 | 297 | #[cfg(any(test, fuzzing))] 298 | pub fn iter(&self) -> U64GroupedBitmapIter { 299 | U64GroupedBitmapIter::new(self.len, &self.data) 300 | } 301 | 302 | pub fn capacity(&self) -> u32 { 303 | let len: u32 = self.data.len().try_into().unwrap(); 304 | len * u64::BITS 305 | } 306 | 307 | fn any_unset(&self) -> bool { 308 | self.data.iter().any(|x| x.count_zeros() > 0) 309 | } 310 | 311 | fn first_unset(&self, start_bit: u32, end_bit: u32) -> Option { 312 | assert_eq!(end_bit, (start_bit - start_bit % 64) + 64); 313 | 314 | let (index, bit) = Self::data_index_of(start_bit); 315 | let mask = (1 << bit) - 1; 316 | let group = self.data[index]; 317 | let group = group | mask; 318 | match group.trailing_ones() { 319 | 64 => None, 320 | x => Some(start_bit + x - u32::try_from(bit).unwrap()), 321 | } 322 | } 323 | 324 | pub fn len(&self) -> u32 { 325 | self.len 326 | } 327 | 328 | // TODO: thread this through up to BuddyAllocator 329 | #[allow(dead_code)] 330 | pub fn resize(&mut self, new_len: u32) { 331 | assert!(new_len < self.capacity()); 332 | self.len = new_len; 333 | } 334 | 335 | pub fn get(&self, bit: u32) -> bool { 336 | assert!(bit < self.len); 337 | let (index, bit_index) = Self::data_index_of(bit); 338 | let group = self.data[index]; 339 | group & U64GroupedBitmap::select_mask(bit_index) != 0 340 | } 341 | 342 | // Returns true iff the bit's group is all set 343 | pub fn set(&mut self, bit: u32) -> bool { 344 | assert!(bit < self.len); 345 | let (index, bit_index) = Self::data_index_of(bit); 346 | let mut group = self.data[index]; 347 | group |= Self::select_mask(bit_index); 348 | self.data[index] = group; 349 | 350 | group == u64::MAX 351 | } 352 | 353 | pub fn clear(&mut self, bit: u32) { 354 | assert!(bit < self.len, "{bit} must be less than {}", self.len); 355 | let (index, bit_index) = Self::data_index_of(bit); 356 | self.data[index] &= !Self::select_mask(bit_index); 357 | } 358 | } 359 | 360 | #[cfg(test)] 361 | mod test { 362 | use crate::tree_store::page_store::bitmap::{BtreeBitmap, U64GroupedBitmap}; 363 | use rand::prelude::IteratorRandom; 364 | use rand::rngs::StdRng; 365 | use rand::{Rng, SeedableRng}; 366 | use std::collections::HashSet; 367 | 368 | #[test] 369 | fn alloc() { 370 | let num_pages = 2; 371 | let mut allocator = BtreeBitmap::new(num_pages); 372 | for i in 0..num_pages { 373 | allocator.clear(i); 374 | } 375 | for i in 0..num_pages { 376 | assert_eq!(i, allocator.alloc().unwrap()); 377 | } 378 | assert!(allocator.alloc().is_none()); 379 | } 380 | 381 | #[test] 382 | fn record_alloc() { 383 | let mut allocator = BtreeBitmap::new(2); 384 | allocator.clear(0); 385 | allocator.clear(1); 386 | allocator.set(0); 387 | assert_eq!(1, allocator.alloc().unwrap()); 388 | assert!(allocator.alloc().is_none()); 389 | } 390 | 391 | #[test] 392 | fn free() { 393 | let mut allocator = BtreeBitmap::new(1); 394 | allocator.clear(0); 395 | assert_eq!(0, allocator.alloc().unwrap()); 396 | assert!(allocator.alloc().is_none()); 397 | allocator.clear(0); 398 | assert_eq!(0, allocator.alloc().unwrap()); 399 | } 400 | 401 | #[test] 402 | fn reuse_lowest() { 403 | let num_pages = 65; 404 | let mut allocator = BtreeBitmap::new(num_pages); 405 | for i in 0..num_pages { 406 | allocator.clear(i); 407 | } 408 | for i in 0..num_pages { 409 | assert_eq!(i, allocator.alloc().unwrap()); 410 | } 411 | allocator.clear(5); 412 | allocator.clear(15); 413 | assert_eq!(5, allocator.alloc().unwrap()); 414 | assert_eq!(15, allocator.alloc().unwrap()); 415 | assert!(allocator.alloc().is_none()); 416 | } 417 | 418 | #[test] 419 | fn all_space_used() { 420 | let num_pages = 65; 421 | let mut allocator = BtreeBitmap::new(num_pages); 422 | for i in 0..num_pages { 423 | allocator.clear(i); 424 | } 425 | // Allocate everything 426 | while allocator.alloc().is_some() {} 427 | // The last u64 must be used, since the leaf layer is compact 428 | assert_eq!( 429 | u64::MAX, 430 | *allocator.heights.last().unwrap().data.last().unwrap() 431 | ); 432 | } 433 | 434 | #[test] 435 | fn find_free() { 436 | let num_pages = 129; 437 | let mut allocator = BtreeBitmap::new(num_pages); 438 | assert!(allocator.find_first_unset().is_none()); 439 | allocator.clear(128); 440 | assert_eq!(allocator.find_first_unset().unwrap(), 128); 441 | allocator.clear(65); 442 | assert_eq!(allocator.find_first_unset().unwrap(), 65); 443 | allocator.clear(8); 444 | assert_eq!(allocator.find_first_unset().unwrap(), 8); 445 | allocator.clear(0); 446 | assert_eq!(allocator.find_first_unset().unwrap(), 0); 447 | } 448 | 449 | #[test] 450 | fn iter() { 451 | let num_pages = 129; 452 | let mut bitmap = U64GroupedBitmap::new_empty(num_pages, num_pages); 453 | let values = [0, 1, 33, 63, 64, 65, 90, 126, 127, 128]; 454 | for x in values { 455 | bitmap.set(x); 456 | } 457 | for (i, x) in bitmap.iter().enumerate() { 458 | assert_eq!(values[i], x); 459 | } 460 | assert_eq!(bitmap.iter().count(), values.len()); 461 | } 462 | 463 | #[test] 464 | fn random_pattern() { 465 | let seed = rand::rng().random(); 466 | // Print the seed to debug for reproducibility, in case this test fails 467 | println!("seed={seed}"); 468 | let mut rng = StdRng::seed_from_u64(seed); 469 | 470 | let num_pages = rng.random_range(2..10000); 471 | let mut allocator = BtreeBitmap::new(num_pages); 472 | for i in 0..num_pages { 473 | allocator.clear(i); 474 | } 475 | let mut allocated = HashSet::new(); 476 | 477 | for _ in 0..(num_pages * 2) { 478 | if rng.random_bool(0.75) { 479 | if let Some(page) = allocator.alloc() { 480 | allocated.insert(page); 481 | } else { 482 | assert_eq!(allocated.len(), num_pages as usize); 483 | } 484 | } else if let Some(to_free) = allocated.iter().choose(&mut rng).copied() { 485 | allocator.clear(to_free); 486 | allocated.remove(&to_free); 487 | } 488 | } 489 | 490 | for _ in allocated.len()..(num_pages as usize) { 491 | allocator.alloc().unwrap(); 492 | } 493 | assert!(allocator.alloc().is_none()); 494 | 495 | for i in 0..num_pages { 496 | allocator.clear(i); 497 | } 498 | 499 | for _ in 0..num_pages { 500 | allocator.alloc().unwrap(); 501 | } 502 | assert!(allocator.alloc().is_none()); 503 | } 504 | } 505 | -------------------------------------------------------------------------------- /src/tree_store/page_store/file_backend/fallback.rs: -------------------------------------------------------------------------------- 1 | use crate::{DatabaseError, Result, StorageBackend}; 2 | use std::fs::File; 3 | use std::io; 4 | use std::io::{Read, Seek, SeekFrom, Write}; 5 | use std::sync::Mutex; 6 | 7 | /// Stores a database as a file on-disk. 8 | #[derive(Debug)] 9 | pub struct FileBackend { 10 | file: Mutex, 11 | } 12 | 13 | impl FileBackend { 14 | /// Creates a new backend which stores data to the given file. 15 | pub fn new(file: File) -> Result { 16 | Ok(Self { 17 | file: Mutex::new(file), 18 | }) 19 | } 20 | } 21 | 22 | impl StorageBackend for FileBackend { 23 | fn len(&self) -> Result { 24 | Ok(self.file.lock().unwrap().metadata()?.len()) 25 | } 26 | 27 | fn read(&self, offset: u64, len: usize) -> Result, io::Error> { 28 | let mut result = vec![0; len]; 29 | let mut file = self.file.lock().unwrap(); 30 | file.seek(SeekFrom::Start(offset))?; 31 | file.read_exact(&mut result)?; 32 | Ok(result) 33 | } 34 | 35 | fn set_len(&self, len: u64) -> Result<(), io::Error> { 36 | self.file.lock().unwrap().set_len(len) 37 | } 38 | 39 | fn sync_data(&self, _eventual: bool) -> Result<(), io::Error> { 40 | self.file.lock().unwrap().sync_data() 41 | } 42 | 43 | fn write(&self, offset: u64, data: &[u8]) -> Result<(), io::Error> { 44 | let mut file = self.file.lock().unwrap(); 45 | file.seek(SeekFrom::Start(offset))?; 46 | file.write_all(data) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/tree_store/page_store/file_backend/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(unix, target_os = "wasi"))] 2 | mod unix; 3 | #[cfg(any(unix, target_os = "wasi"))] 4 | pub use unix::FileBackend; 5 | 6 | #[cfg(windows)] 7 | mod windows; 8 | #[cfg(windows)] 9 | pub use windows::FileBackend; 10 | 11 | #[cfg(not(any(windows, unix, target_os = "wasi")))] 12 | mod fallback; 13 | #[cfg(not(any(windows, unix, target_os = "wasi")))] 14 | pub use fallback::FileBackend; 15 | -------------------------------------------------------------------------------- /src/tree_store/page_store/file_backend/unix.rs: -------------------------------------------------------------------------------- 1 | // TODO once Rust's libc has flock implemented for WASI, this file needs to be revisited. 2 | // What needs to be changed is commented below. 3 | // See also: https://github.com/WebAssembly/wasi-filesystem/issues/2 4 | 5 | // Remove this line once wasi-libc has flock 6 | #![cfg_attr(target_os = "wasi", allow(unused_imports))] 7 | 8 | use crate::{DatabaseError, Result, StorageBackend}; 9 | use std::fs::File; 10 | use std::io; 11 | 12 | #[cfg(unix)] 13 | use std::os::unix::{fs::FileExt, io::AsRawFd}; 14 | 15 | #[cfg(target_os = "wasi")] 16 | use std::os::wasi::{fs::FileExt, io::AsRawFd}; 17 | 18 | /// Stores a database as a file on-disk. 19 | #[derive(Debug)] 20 | pub struct FileBackend { 21 | file: File, 22 | } 23 | 24 | impl FileBackend { 25 | /// Creates a new backend which stores data to the given file. 26 | // This is a no-op until we get flock in wasi-libc. 27 | // Delete this function when we get flock. 28 | #[cfg(target_os = "wasi")] 29 | pub fn new(file: File) -> Result { 30 | Ok(Self { file }) 31 | } 32 | 33 | /// Creates a new backend which stores data to the given file. 34 | #[cfg(unix)] // remove this line when wasi-libc gets flock 35 | pub fn new(file: File) -> Result { 36 | let fd = file.as_raw_fd(); 37 | let result = unsafe { libc::flock(fd, libc::LOCK_EX | libc::LOCK_NB) }; 38 | if result != 0 { 39 | let err = io::Error::last_os_error(); 40 | if err.kind() == io::ErrorKind::WouldBlock { 41 | Err(DatabaseError::DatabaseAlreadyOpen) 42 | } else { 43 | Err(err.into()) 44 | } 45 | } else { 46 | Ok(Self { file }) 47 | } 48 | } 49 | } 50 | 51 | impl StorageBackend for FileBackend { 52 | fn len(&self) -> Result { 53 | Ok(self.file.metadata()?.len()) 54 | } 55 | 56 | fn read(&self, offset: u64, len: usize) -> Result, io::Error> { 57 | let mut buffer = vec![0; len]; 58 | self.file.read_exact_at(&mut buffer, offset)?; 59 | Ok(buffer) 60 | } 61 | 62 | fn set_len(&self, len: u64) -> Result<(), io::Error> { 63 | self.file.set_len(len) 64 | } 65 | 66 | #[cfg(not(target_os = "macos"))] 67 | fn sync_data(&self, _: bool) -> Result<(), io::Error> { 68 | self.file.sync_data() 69 | } 70 | 71 | #[cfg(target_os = "macos")] 72 | fn sync_data(&self, eventual: bool) -> Result<(), io::Error> { 73 | if eventual { 74 | let code = unsafe { libc::fcntl(self.file.as_raw_fd(), libc::F_BARRIERFSYNC) }; 75 | if code == -1 { 76 | Err(io::Error::last_os_error()) 77 | } else { 78 | Ok(()) 79 | } 80 | } else { 81 | self.file.sync_data() 82 | } 83 | } 84 | 85 | fn write(&self, offset: u64, data: &[u8]) -> Result<(), io::Error> { 86 | self.file.write_all_at(data, offset) 87 | } 88 | 89 | #[cfg(unix)] // remove this line when wasi-libc gets flock 90 | fn close(&self) -> Result<(), io::Error> { 91 | unsafe { libc::flock(self.file.as_raw_fd(), libc::LOCK_UN) }; 92 | 93 | Ok(()) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/tree_store/page_store/file_backend/windows.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::upper_case_acronyms)] 2 | 3 | use crate::{DatabaseError, Result, StorageBackend}; 4 | use std::fs::File; 5 | use std::io; 6 | use std::io::Error; 7 | use std::os::windows::fs::FileExt; 8 | use std::os::windows::io::AsRawHandle; 9 | use std::os::windows::io::RawHandle; 10 | 11 | const ERROR_LOCK_VIOLATION: i32 = 0x21; 12 | const ERROR_IO_PENDING: i32 = 997; 13 | 14 | unsafe extern "system" { 15 | /// 16 | fn LockFile( 17 | file: RawHandle, 18 | offset_low: u32, 19 | offset_high: u32, 20 | length_low: u32, 21 | length_high: u32, 22 | ) -> i32; 23 | 24 | /// 25 | fn UnlockFile( 26 | file: RawHandle, 27 | offset_low: u32, 28 | offset_high: u32, 29 | length_low: u32, 30 | length_high: u32, 31 | ) -> i32; 32 | } 33 | 34 | /// Stores a database as a file on-disk. 35 | #[derive(Debug)] 36 | pub struct FileBackend { 37 | file: File, 38 | } 39 | 40 | impl FileBackend { 41 | /// Creates a new backend which stores data to the given file. 42 | pub fn new(file: File) -> Result { 43 | let handle = file.as_raw_handle(); 44 | unsafe { 45 | let result = LockFile(handle, 0, 0, u32::MAX, u32::MAX); 46 | 47 | if result == 0 { 48 | let err = io::Error::last_os_error(); 49 | return if err.raw_os_error() == Some(ERROR_IO_PENDING) 50 | || err.raw_os_error() == Some(ERROR_LOCK_VIOLATION) 51 | { 52 | Err(DatabaseError::DatabaseAlreadyOpen) 53 | } else { 54 | Err(err.into()) 55 | }; 56 | } 57 | }; 58 | 59 | Ok(Self { file }) 60 | } 61 | } 62 | 63 | impl StorageBackend for FileBackend { 64 | fn len(&self) -> Result { 65 | Ok(self.file.metadata()?.len()) 66 | } 67 | 68 | fn read(&self, mut offset: u64, len: usize) -> Result, io::Error> { 69 | let mut buffer = vec![0; len]; 70 | let mut data_offset = 0; 71 | while data_offset < buffer.len() { 72 | let read = self.file.seek_read(&mut buffer[data_offset..], offset)?; 73 | offset += read as u64; 74 | data_offset += read; 75 | } 76 | Ok(buffer) 77 | } 78 | 79 | fn set_len(&self, len: u64) -> Result<(), io::Error> { 80 | self.file.set_len(len) 81 | } 82 | 83 | fn sync_data(&self, _: bool) -> Result<(), io::Error> { 84 | self.file.sync_data() 85 | } 86 | 87 | fn write(&self, mut offset: u64, data: &[u8]) -> Result<(), io::Error> { 88 | let mut data_offset = 0; 89 | while data_offset < data.len() { 90 | let written = self.file.seek_write(&data[data_offset..], offset)?; 91 | offset += written as u64; 92 | data_offset += written; 93 | } 94 | Ok(()) 95 | } 96 | 97 | fn close(&self) -> Result<(), Error> { 98 | unsafe { UnlockFile(self.file.as_raw_handle(), 0, 0, u32::MAX, u32::MAX) }; 99 | 100 | Ok(()) 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/tree_store/page_store/in_memory_backend.rs: -------------------------------------------------------------------------------- 1 | use crate::StorageBackend; 2 | use std::io; 3 | use std::sync::*; 4 | 5 | /// Acts as temporal in-memory database storage. 6 | #[derive(Debug, Default)] 7 | pub struct InMemoryBackend(RwLock>); 8 | 9 | impl InMemoryBackend { 10 | fn out_of_range() -> io::Error { 11 | io::Error::new(io::ErrorKind::InvalidInput, "Index out-of-range.") 12 | } 13 | } 14 | 15 | impl InMemoryBackend { 16 | /// Creates a new, empty memory backend. 17 | pub fn new() -> Self { 18 | Self::default() 19 | } 20 | 21 | /// Gets a read guard for this backend. 22 | fn read(&self) -> RwLockReadGuard<'_, Vec> { 23 | self.0.read().expect("Could not acquire read lock.") 24 | } 25 | 26 | /// Gets a write guard for this backend. 27 | fn write(&self) -> RwLockWriteGuard<'_, Vec> { 28 | self.0.write().expect("Could not acquire write lock.") 29 | } 30 | } 31 | 32 | impl StorageBackend for InMemoryBackend { 33 | fn len(&self) -> Result { 34 | Ok(self.read().len() as u64) 35 | } 36 | 37 | fn read(&self, offset: u64, len: usize) -> Result, io::Error> { 38 | let guard = self.read(); 39 | let offset = usize::try_from(offset).map_err(|_| Self::out_of_range())?; 40 | if offset + len <= guard.len() { 41 | Ok(guard[offset..offset + len].to_owned()) 42 | } else { 43 | Err(Self::out_of_range()) 44 | } 45 | } 46 | 47 | fn set_len(&self, len: u64) -> Result<(), io::Error> { 48 | let mut guard = self.write(); 49 | let len = usize::try_from(len).map_err(|_| Self::out_of_range())?; 50 | if guard.len() < len { 51 | let additional = len - guard.len(); 52 | guard.reserve(additional); 53 | for _ in 0..additional { 54 | guard.push(0); 55 | } 56 | } else { 57 | guard.truncate(len); 58 | } 59 | 60 | Ok(()) 61 | } 62 | 63 | fn sync_data(&self, _: bool) -> Result<(), io::Error> { 64 | Ok(()) 65 | } 66 | 67 | fn write(&self, offset: u64, data: &[u8]) -> Result<(), io::Error> { 68 | let mut guard = self.write(); 69 | let offset = usize::try_from(offset).map_err(|_| Self::out_of_range())?; 70 | if offset + data.len() <= guard.len() { 71 | guard[offset..offset + data.len()].copy_from_slice(data); 72 | Ok(()) 73 | } else { 74 | Err(Self::out_of_range()) 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/tree_store/page_store/layout.rs: -------------------------------------------------------------------------------- 1 | use crate::tree_store::page_store::region::RegionHeader; 2 | use std::ops::Range; 3 | 4 | fn round_up_to_multiple_of(value: u64, multiple: u64) -> u64 { 5 | if value % multiple == 0 { 6 | value 7 | } else { 8 | value + multiple - value % multiple 9 | } 10 | } 11 | 12 | // Regions are laid out starting with the allocator state header, followed by the pages aligned 13 | // to the next page 14 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 15 | pub(super) struct RegionLayout { 16 | num_pages: u32, 17 | // Offset where data pages start 18 | header_pages: u32, 19 | page_size: u32, 20 | } 21 | 22 | impl RegionLayout { 23 | pub(super) fn new(num_pages: u32, header_pages: u32, page_size: u32) -> Self { 24 | assert!(num_pages > 0); 25 | Self { 26 | num_pages, 27 | header_pages, 28 | page_size, 29 | } 30 | } 31 | 32 | pub(super) fn calculate( 33 | desired_usable_bytes: u64, 34 | page_capacity: u32, 35 | page_size: u32, 36 | ) -> RegionLayout { 37 | assert!(desired_usable_bytes <= u64::from(page_capacity) * u64::from(page_size)); 38 | let header_pages = RegionHeader::header_pages_expensive(page_size, page_capacity); 39 | let num_pages = 40 | round_up_to_multiple_of(desired_usable_bytes, page_size.into()) / u64::from(page_size); 41 | 42 | Self { 43 | num_pages: num_pages.try_into().unwrap(), 44 | header_pages, 45 | page_size, 46 | } 47 | } 48 | 49 | fn full_region_layout(page_capacity: u32, page_size: u32) -> RegionLayout { 50 | let header_pages = RegionHeader::header_pages_expensive(page_size, page_capacity); 51 | 52 | Self { 53 | num_pages: page_capacity, 54 | header_pages, 55 | page_size, 56 | } 57 | } 58 | 59 | pub(super) fn data_section(&self) -> Range { 60 | let header_bytes = u64::from(self.header_pages) * u64::from(self.page_size); 61 | header_bytes..(header_bytes + self.usable_bytes()) 62 | } 63 | 64 | pub(super) fn get_header_pages(&self) -> u32 { 65 | self.header_pages 66 | } 67 | 68 | pub(super) fn num_pages(&self) -> u32 { 69 | self.num_pages 70 | } 71 | 72 | pub(super) fn page_size(&self) -> u32 { 73 | self.page_size 74 | } 75 | 76 | pub(super) fn len(&self) -> u64 { 77 | u64::from(self.header_pages) * u64::from(self.page_size) + self.usable_bytes() 78 | } 79 | 80 | pub(super) fn usable_bytes(&self) -> u64 { 81 | u64::from(self.page_size) * u64::from(self.num_pages) 82 | } 83 | } 84 | 85 | #[derive(Clone, Copy, Debug)] 86 | pub(crate) struct DatabaseLayout { 87 | full_region_layout: RegionLayout, 88 | num_full_regions: u32, 89 | trailing_partial_region: Option, 90 | } 91 | 92 | impl DatabaseLayout { 93 | pub(super) fn new( 94 | full_regions: u32, 95 | full_region: RegionLayout, 96 | trailing_region: Option, 97 | ) -> Self { 98 | Self { 99 | full_region_layout: full_region, 100 | num_full_regions: full_regions, 101 | trailing_partial_region: trailing_region, 102 | } 103 | } 104 | 105 | pub(super) fn reduce_last_region(&mut self, pages: u32) { 106 | if let Some(ref mut trailing) = self.trailing_partial_region { 107 | assert!(pages <= trailing.num_pages); 108 | trailing.num_pages -= pages; 109 | if trailing.num_pages == 0 { 110 | self.trailing_partial_region = None; 111 | } 112 | } else { 113 | self.num_full_regions -= 1; 114 | let full_layout = self.full_region_layout; 115 | if full_layout.num_pages > pages { 116 | self.trailing_partial_region = Some(RegionLayout::new( 117 | full_layout.num_pages - pages, 118 | full_layout.header_pages, 119 | full_layout.page_size, 120 | )); 121 | } 122 | } 123 | } 124 | 125 | pub(super) fn recalculate( 126 | file_len: u64, 127 | region_header_pages_u32: u32, 128 | region_max_data_pages_u32: u32, 129 | page_size_u32: u32, 130 | ) -> Self { 131 | let page_size = u64::from(page_size_u32); 132 | let region_header_pages = u64::from(region_header_pages_u32); 133 | let region_max_data_pages = u64::from(region_max_data_pages_u32); 134 | // Super-header 135 | let mut remaining = file_len - page_size; 136 | let full_region_size = (region_header_pages + region_max_data_pages) * page_size; 137 | let full_regions = remaining / full_region_size; 138 | remaining -= full_regions * full_region_size; 139 | let trailing = if remaining >= (region_header_pages + 1) * page_size { 140 | remaining -= region_header_pages * page_size; 141 | let remaining: u32 = remaining.try_into().unwrap(); 142 | let data_pages = remaining / page_size_u32; 143 | assert!(data_pages < region_max_data_pages_u32); 144 | Some(RegionLayout::new( 145 | data_pages, 146 | region_header_pages_u32, 147 | page_size_u32, 148 | )) 149 | } else { 150 | None 151 | }; 152 | let full_layout = RegionLayout::new( 153 | region_max_data_pages_u32, 154 | region_header_pages_u32, 155 | page_size_u32, 156 | ); 157 | 158 | Self { 159 | full_region_layout: full_layout, 160 | num_full_regions: full_regions.try_into().unwrap(), 161 | trailing_partial_region: trailing, 162 | } 163 | } 164 | 165 | pub(super) fn calculate(desired_usable_bytes: u64, page_capacity: u32, page_size: u32) -> Self { 166 | let full_region_layout = RegionLayout::full_region_layout(page_capacity, page_size); 167 | if desired_usable_bytes <= full_region_layout.usable_bytes() { 168 | // Single region layout 169 | let region_layout = 170 | RegionLayout::calculate(desired_usable_bytes, page_capacity, page_size); 171 | DatabaseLayout { 172 | full_region_layout, 173 | num_full_regions: 0, 174 | trailing_partial_region: Some(region_layout), 175 | } 176 | } else { 177 | // Multi region layout 178 | let full_regions = desired_usable_bytes / full_region_layout.usable_bytes(); 179 | let remaining_desired = 180 | desired_usable_bytes - full_regions * full_region_layout.usable_bytes(); 181 | assert!(full_regions > 0); 182 | let trailing_region = if remaining_desired > 0 { 183 | Some(RegionLayout::calculate( 184 | remaining_desired, 185 | page_capacity, 186 | page_size, 187 | )) 188 | } else { 189 | None 190 | }; 191 | if let Some(ref region) = trailing_region { 192 | // All regions must have the same header size 193 | assert_eq!(region.header_pages, full_region_layout.header_pages); 194 | } 195 | DatabaseLayout { 196 | full_region_layout, 197 | num_full_regions: full_regions.try_into().unwrap(), 198 | trailing_partial_region: trailing_region, 199 | } 200 | } 201 | } 202 | 203 | pub(super) fn full_region_layout(&self) -> &RegionLayout { 204 | &self.full_region_layout 205 | } 206 | 207 | pub(super) fn trailing_region_layout(&self) -> Option<&RegionLayout> { 208 | self.trailing_partial_region.as_ref() 209 | } 210 | 211 | pub(super) fn num_full_regions(&self) -> u32 { 212 | self.num_full_regions 213 | } 214 | 215 | pub(super) fn num_regions(&self) -> u32 { 216 | if self.trailing_partial_region.is_some() { 217 | self.num_full_regions + 1 218 | } else { 219 | self.num_full_regions 220 | } 221 | } 222 | 223 | pub(super) fn len(&self) -> u64 { 224 | let last = self.num_regions() - 1; 225 | self.region_base_address(last) + self.region_layout(last).len() 226 | } 227 | 228 | pub(super) fn usable_bytes(&self) -> u64 { 229 | let trailing = self 230 | .trailing_partial_region 231 | .as_ref() 232 | .map(RegionLayout::usable_bytes) 233 | .unwrap_or_default(); 234 | u64::from(self.num_full_regions) * self.full_region_layout.usable_bytes() + trailing 235 | } 236 | 237 | pub(super) fn region_base_address(&self, region: u32) -> u64 { 238 | assert!(region < self.num_regions()); 239 | u64::from(self.full_region_layout.page_size()) 240 | + u64::from(region) * self.full_region_layout.len() 241 | } 242 | 243 | pub(super) fn region_layout(&self, region: u32) -> RegionLayout { 244 | assert!(region < self.num_regions()); 245 | if region == self.num_full_regions { 246 | self.trailing_partial_region.unwrap() 247 | } else { 248 | self.full_region_layout 249 | } 250 | } 251 | } 252 | 253 | #[cfg(test)] 254 | mod test { 255 | use crate::tree_store::page_store::layout::RegionLayout; 256 | 257 | #[test] 258 | fn full_layout() { 259 | let layout = RegionLayout::full_region_layout(512, 4096); 260 | assert_eq!(layout.num_pages, 512); 261 | assert_eq!(layout.page_size, 4096); 262 | } 263 | } 264 | -------------------------------------------------------------------------------- /src/tree_store/page_store/lru_cache.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, VecDeque}; 2 | use std::sync::atomic::{AtomicBool, Ordering}; 3 | 4 | #[derive(Default)] 5 | pub struct LRUCache { 6 | // AtomicBool is the second chance flag 7 | cache: HashMap, 8 | lru_queue: VecDeque, 9 | } 10 | 11 | impl LRUCache { 12 | pub(crate) fn new() -> Self { 13 | Self { 14 | cache: Default::default(), 15 | lru_queue: Default::default(), 16 | } 17 | } 18 | 19 | pub(crate) fn len(&self) -> usize { 20 | self.cache.len() 21 | } 22 | 23 | pub(crate) fn insert(&mut self, key: u64, value: T) -> Option { 24 | let result = self 25 | .cache 26 | .insert(key, (value, AtomicBool::new(false))) 27 | .map(|(x, _)| x); 28 | if result.is_none() { 29 | self.lru_queue.push_back(key); 30 | } 31 | result 32 | } 33 | 34 | pub(crate) fn remove(&mut self, key: u64) -> Option { 35 | if let Some((value, _)) = self.cache.remove(&key) { 36 | if self.lru_queue.len() > 2 * self.cache.len() { 37 | // Cycle two elements of the LRU queue to ensure it doesn't grow without bound 38 | for _ in 0..2 { 39 | if let Some(removed_key) = self.lru_queue.pop_front() { 40 | if let Some((_, second_chance)) = self.cache.get(&removed_key) { 41 | second_chance.store(false, Ordering::Release); 42 | self.lru_queue.push_back(removed_key); 43 | } 44 | } 45 | } 46 | } 47 | Some(value) 48 | } else { 49 | None 50 | } 51 | } 52 | 53 | pub(crate) fn get(&self, key: u64) -> Option<&T> { 54 | if let Some((value, second_chance)) = self.cache.get(&key) { 55 | second_chance.store(true, Ordering::Release); 56 | Some(value) 57 | } else { 58 | None 59 | } 60 | } 61 | 62 | pub(crate) fn get_mut(&mut self, key: u64) -> Option<&mut T> { 63 | if let Some((value, second_chance)) = self.cache.get_mut(&key) { 64 | second_chance.store(true, Ordering::Release); 65 | Some(value) 66 | } else { 67 | None 68 | } 69 | } 70 | 71 | pub(crate) fn iter(&self) -> impl ExactSizeIterator { 72 | self.cache.iter().map(|(k, (v, _))| (k, v)) 73 | } 74 | 75 | pub(crate) fn iter_mut(&mut self) -> impl ExactSizeIterator { 76 | self.cache.iter_mut().map(|(k, (v, _))| (k, v)) 77 | } 78 | 79 | pub(crate) fn pop_lowest_priority(&mut self) -> Option<(u64, T)> { 80 | while let Some(key) = self.lru_queue.pop_front() { 81 | if let Some((_, second_chance)) = self.cache.get(&key) { 82 | if second_chance 83 | .compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire) 84 | .is_ok() 85 | { 86 | self.lru_queue.push_back(key); 87 | } else { 88 | let (value, _) = self.cache.remove(&key).unwrap(); 89 | return Some((key, value)); 90 | } 91 | } 92 | } 93 | None 94 | } 95 | 96 | pub(crate) fn clear(&mut self) { 97 | self.cache.clear(); 98 | self.lru_queue.clear(); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/tree_store/page_store/mod.rs: -------------------------------------------------------------------------------- 1 | mod base; 2 | mod bitmap; 3 | mod buddy_allocator; 4 | mod cached_file; 5 | pub mod file_backend; 6 | mod header; 7 | mod in_memory_backend; 8 | mod layout; 9 | mod lru_cache; 10 | mod page_manager; 11 | mod region; 12 | mod savepoint; 13 | #[allow(clippy::pedantic, dead_code)] 14 | mod xxh3; 15 | 16 | pub(crate) use base::{ 17 | MAX_PAIR_LENGTH, MAX_VALUE_LENGTH, Page, PageHint, PageNumber, PageTrackerPolicy, 18 | }; 19 | pub(crate) use header::PAGE_SIZE; 20 | pub use in_memory_backend::InMemoryBackend; 21 | pub(crate) use page_manager::{FILE_FORMAT_VERSION3, TransactionalMemory, xxh3_checksum}; 22 | pub use savepoint::Savepoint; 23 | pub(crate) use savepoint::SerializedSavepoint; 24 | 25 | pub(super) use base::{PageImpl, PageMut}; 26 | pub(super) use xxh3::hash128_with_seed; 27 | -------------------------------------------------------------------------------- /src/tree_store/page_store/region.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(test, fuzzing))] 2 | use crate::tree_store::PageNumber; 3 | use crate::tree_store::page_store::bitmap::BtreeBitmap; 4 | use crate::tree_store::page_store::buddy_allocator::BuddyAllocator; 5 | use crate::tree_store::page_store::layout::DatabaseLayout; 6 | use crate::tree_store::page_store::page_manager::{INITIAL_REGIONS, MAX_MAX_PAGE_ORDER}; 7 | use crate::tree_store::page_store::xxh3_checksum; 8 | use std::cmp::{self, max}; 9 | use std::mem::size_of; 10 | 11 | // Tracks the page orders that MAY BE free in each region. This data structure is optimistic, so 12 | // a region may not actually have a page free for a given order 13 | pub(crate) struct RegionTracker { 14 | order_trackers: Vec, 15 | } 16 | 17 | impl RegionTracker { 18 | pub(crate) fn new(regions: u32, orders: u8) -> Self { 19 | let mut data = vec![]; 20 | for _ in 0..orders { 21 | data.push(BtreeBitmap::new(regions)); 22 | } 23 | Self { 24 | order_trackers: data, 25 | } 26 | } 27 | 28 | // Format: 29 | // num_orders: u32 number of order allocators 30 | // allocator_len: u32 length of each allocator 31 | // data: BtreeBitmap data for each order 32 | pub(super) fn to_vec(&self) -> Vec { 33 | let mut result = vec![]; 34 | let orders: u32 = self.order_trackers.len().try_into().unwrap(); 35 | let allocator_len: u32 = self.order_trackers[0].to_vec().len().try_into().unwrap(); 36 | result.extend(orders.to_le_bytes()); 37 | result.extend(allocator_len.to_le_bytes()); 38 | for order in &self.order_trackers { 39 | result.extend(&order.to_vec()); 40 | } 41 | result 42 | } 43 | 44 | // May contain trailing data 45 | pub(super) fn from_page(page: &[u8]) -> Self { 46 | let orders = u32::from_le_bytes(page[..size_of::()].try_into().unwrap()); 47 | let allocator_len = u32::from_le_bytes( 48 | page[size_of::()..2 * size_of::()] 49 | .try_into() 50 | .unwrap(), 51 | ) as usize; 52 | let mut data = vec![]; 53 | let mut start = 2 * size_of::(); 54 | for _ in 0..orders { 55 | data.push(BtreeBitmap::from_bytes( 56 | &page[start..(start + allocator_len)], 57 | )); 58 | start += allocator_len; 59 | } 60 | 61 | Self { 62 | order_trackers: data, 63 | } 64 | } 65 | 66 | pub(crate) fn find_free(&self, order: u8) -> Option { 67 | self.order_trackers[order as usize].find_first_unset() 68 | } 69 | 70 | pub(crate) fn mark_free(&mut self, order: u8, region: u32) { 71 | let order: usize = order.into(); 72 | for i in 0..=order { 73 | self.order_trackers[i].clear(region); 74 | } 75 | } 76 | 77 | pub(crate) fn mark_full(&mut self, order: u8, region: u32) { 78 | let order: usize = order.into(); 79 | assert!(order < self.order_trackers.len()); 80 | for i in order..self.order_trackers.len() { 81 | self.order_trackers[i].set(region); 82 | } 83 | } 84 | 85 | fn expand(&mut self, new_capacity: u32) { 86 | let mut new_trackers = vec![]; 87 | for order in 0..self.order_trackers.len() { 88 | let mut new_bitmap = BtreeBitmap::new(new_capacity); 89 | for region in 0..self.order_trackers[order].len() { 90 | if !self.order_trackers[order].get(region) { 91 | new_bitmap.clear(region); 92 | } 93 | } 94 | new_trackers.push(new_bitmap); 95 | } 96 | 97 | self.order_trackers = new_trackers; 98 | } 99 | 100 | fn capacity(&self) -> u32 { 101 | self.order_trackers[0].capacity() 102 | } 103 | 104 | fn len(&self) -> u32 { 105 | self.order_trackers[0].len() 106 | } 107 | } 108 | 109 | pub(super) struct Allocators { 110 | pub(super) region_tracker: RegionTracker, 111 | pub(super) region_allocators: Vec, 112 | } 113 | 114 | impl Allocators { 115 | pub(super) fn new(layout: DatabaseLayout) -> Self { 116 | let mut region_allocators = vec![]; 117 | let initial_regions = max(INITIAL_REGIONS, layout.num_regions()); 118 | let mut region_tracker = RegionTracker::new(initial_regions, MAX_MAX_PAGE_ORDER + 1); 119 | for i in 0..layout.num_regions() { 120 | let region_layout = layout.region_layout(i); 121 | let allocator = BuddyAllocator::new( 122 | region_layout.num_pages(), 123 | layout.full_region_layout().num_pages(), 124 | ); 125 | let max_order = allocator.get_max_order(); 126 | region_tracker.mark_free(max_order, i); 127 | region_allocators.push(allocator); 128 | } 129 | 130 | Self { 131 | region_tracker, 132 | region_allocators, 133 | } 134 | } 135 | 136 | #[cfg(any(test, fuzzing))] 137 | pub(super) fn all_allocated(&self) -> Vec { 138 | let mut pages = vec![]; 139 | for (i, allocator) in self.region_allocators.iter().enumerate() { 140 | allocator.get_allocated_pages(i.try_into().unwrap(), &mut pages); 141 | } 142 | pages 143 | } 144 | 145 | pub(crate) fn xxh3_hash(&self) -> u128 { 146 | // Ignore the region tracker because it is an optimistic cache, and so may not match 147 | // between repairs of the allocators 148 | let mut result = 0; 149 | for allocator in &self.region_allocators { 150 | result ^= xxh3_checksum(&allocator.to_vec()); 151 | } 152 | result 153 | } 154 | 155 | pub(super) fn resize_to(&mut self, new_layout: DatabaseLayout) { 156 | let shrink = match (new_layout.num_regions() as usize).cmp(&self.region_allocators.len()) { 157 | cmp::Ordering::Less => true, 158 | cmp::Ordering::Equal => { 159 | let allocator = self.region_allocators.last().unwrap(); 160 | let last_region = new_layout 161 | .trailing_region_layout() 162 | .unwrap_or_else(|| new_layout.full_region_layout()); 163 | match last_region.num_pages().cmp(&allocator.len()) { 164 | cmp::Ordering::Less => true, 165 | cmp::Ordering::Equal => { 166 | // No-op 167 | return; 168 | } 169 | cmp::Ordering::Greater => false, 170 | } 171 | } 172 | cmp::Ordering::Greater => false, 173 | }; 174 | 175 | if shrink { 176 | // Drop all regions that were removed 177 | for i in new_layout.num_regions()..(self.region_allocators.len().try_into().unwrap()) { 178 | self.region_tracker.mark_full(0, i); 179 | } 180 | self.region_allocators 181 | .drain((new_layout.num_regions() as usize)..); 182 | 183 | // Resize the last region 184 | let last_region = new_layout 185 | .trailing_region_layout() 186 | .unwrap_or_else(|| new_layout.full_region_layout()); 187 | let allocator = self.region_allocators.last_mut().unwrap(); 188 | if allocator.len() > last_region.num_pages() { 189 | allocator.resize(last_region.num_pages()); 190 | } 191 | } else { 192 | let old_num_regions = self.region_allocators.len(); 193 | for i in 0..new_layout.num_regions() { 194 | let new_region = new_layout.region_layout(i); 195 | if (i as usize) < old_num_regions { 196 | let allocator = &mut self.region_allocators[i as usize]; 197 | assert!(new_region.num_pages() >= allocator.len()); 198 | if new_region.num_pages() != allocator.len() { 199 | allocator.resize(new_region.num_pages()); 200 | let highest_free = allocator.highest_free_order().unwrap(); 201 | self.region_tracker.mark_free(highest_free, i); 202 | } 203 | } else { 204 | // brand new region 205 | let allocator = BuddyAllocator::new( 206 | new_region.num_pages(), 207 | new_layout.full_region_layout().num_pages(), 208 | ); 209 | let highest_free = allocator.highest_free_order().unwrap(); 210 | // TODO: we should be calling .capacity(), and resizing if possible 211 | if i >= self.region_tracker.len() { 212 | self.region_tracker 213 | .expand(self.region_tracker.capacity() * 2); 214 | } 215 | self.region_tracker.mark_free(highest_free, i); 216 | self.region_allocators.push(allocator); 217 | } 218 | } 219 | } 220 | } 221 | } 222 | 223 | // Region header 224 | // Note: unused as of v3 file format 225 | pub(crate) struct RegionHeader {} 226 | 227 | impl RegionHeader { 228 | pub(crate) fn header_pages_expensive(page_size: u32, pages_per_region: u32) -> u32 { 229 | let page_size = u64::from(page_size); 230 | // TODO: this is kind of expensive. Maybe it should be cached 231 | let allocator = BuddyAllocator::new(pages_per_region, pages_per_region); 232 | let result = 8u64 + allocator.to_vec().len() as u64; 233 | result.div_ceil(page_size).try_into().unwrap() 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /src/tree_store/page_store/savepoint.rs: -------------------------------------------------------------------------------- 1 | use crate::transaction_tracker::{SavepointId, TransactionId, TransactionTracker}; 2 | use crate::tree_store::page_store::page_manager::FILE_FORMAT_VERSION3; 3 | use crate::tree_store::{BtreeHeader, TransactionalMemory}; 4 | use crate::{TypeName, Value}; 5 | use std::fmt::Debug; 6 | use std::mem::size_of; 7 | use std::sync::Arc; 8 | 9 | // on-disk format: 10 | // * 1 byte: version 11 | // * 8 bytes: savepoint id 12 | // * 8 bytes: transaction id 13 | // * 1 byte: user root not-null 14 | // * 8 bytes: user root page 15 | // * 8 bytes: user root checksum 16 | pub struct Savepoint { 17 | version: u8, 18 | id: SavepointId, 19 | // Each savepoint has an associated read transaction id to ensure that any pages it references 20 | // are not freed 21 | transaction_id: TransactionId, 22 | user_root: Option, 23 | transaction_tracker: Arc, 24 | ephemeral: bool, 25 | } 26 | 27 | impl Savepoint { 28 | #[allow(clippy::too_many_arguments)] 29 | pub(crate) fn new_ephemeral( 30 | mem: &TransactionalMemory, 31 | transaction_tracker: Arc, 32 | id: SavepointId, 33 | transaction_id: TransactionId, 34 | user_root: Option, 35 | ) -> Self { 36 | Self { 37 | id, 38 | transaction_id, 39 | version: mem.get_version(), 40 | user_root, 41 | transaction_tracker, 42 | ephemeral: true, 43 | } 44 | } 45 | 46 | pub(crate) fn get_version(&self) -> u8 { 47 | self.version 48 | } 49 | 50 | pub(crate) fn get_id(&self) -> SavepointId { 51 | self.id 52 | } 53 | 54 | pub(crate) fn get_transaction_id(&self) -> TransactionId { 55 | self.transaction_id 56 | } 57 | 58 | pub(crate) fn get_user_root(&self) -> Option { 59 | self.user_root 60 | } 61 | 62 | pub(crate) fn db_address(&self) -> *const TransactionTracker { 63 | std::ptr::from_ref(self.transaction_tracker.as_ref()) 64 | } 65 | 66 | pub(crate) fn set_persistent(&mut self) { 67 | self.ephemeral = false; 68 | } 69 | } 70 | 71 | impl Drop for Savepoint { 72 | fn drop(&mut self) { 73 | if self.ephemeral { 74 | self.transaction_tracker 75 | .deallocate_savepoint(self.get_id(), self.get_transaction_id()); 76 | } 77 | } 78 | } 79 | 80 | #[derive(Debug)] 81 | pub(crate) enum SerializedSavepoint<'a> { 82 | Ref(&'a [u8]), 83 | Owned(Vec), 84 | } 85 | 86 | impl SerializedSavepoint<'_> { 87 | pub(crate) fn from_savepoint(savepoint: &Savepoint) -> Self { 88 | assert_eq!(savepoint.version, FILE_FORMAT_VERSION3); 89 | let mut result = vec![savepoint.version]; 90 | result.extend(savepoint.id.0.to_le_bytes()); 91 | result.extend(savepoint.transaction_id.raw_id().to_le_bytes()); 92 | 93 | if let Some(header) = savepoint.user_root { 94 | result.push(1); 95 | result.extend(header.to_le_bytes()); 96 | } else { 97 | result.push(0); 98 | result.extend([0; BtreeHeader::serialized_size()]); 99 | } 100 | 101 | Self::Owned(result) 102 | } 103 | 104 | fn data(&self) -> &[u8] { 105 | match self { 106 | SerializedSavepoint::Ref(x) => x, 107 | SerializedSavepoint::Owned(x) => x.as_slice(), 108 | } 109 | } 110 | 111 | pub(crate) fn to_savepoint(&self, transaction_tracker: Arc) -> Savepoint { 112 | let data = self.data(); 113 | let mut offset = 0; 114 | let version = data[offset]; 115 | assert_eq!(version, FILE_FORMAT_VERSION3); 116 | offset += size_of::(); 117 | 118 | let id = u64::from_le_bytes( 119 | data[offset..(offset + size_of::())] 120 | .try_into() 121 | .unwrap(), 122 | ); 123 | offset += size_of::(); 124 | 125 | let transaction_id = u64::from_le_bytes( 126 | data[offset..(offset + size_of::())] 127 | .try_into() 128 | .unwrap(), 129 | ); 130 | offset += size_of::(); 131 | 132 | let not_null = data[offset]; 133 | assert!(not_null == 0 || not_null == 1); 134 | offset += 1; 135 | let user_root = if not_null == 1 { 136 | Some(BtreeHeader::from_le_bytes( 137 | data[offset..(offset + BtreeHeader::serialized_size())] 138 | .try_into() 139 | .unwrap(), 140 | )) 141 | } else { 142 | None 143 | }; 144 | offset += BtreeHeader::serialized_size(); 145 | assert_eq!(offset, data.len()); 146 | 147 | Savepoint { 148 | version, 149 | id: SavepointId(id), 150 | transaction_id: TransactionId::new(transaction_id), 151 | user_root, 152 | transaction_tracker, 153 | ephemeral: false, 154 | } 155 | } 156 | } 157 | 158 | impl Value for SerializedSavepoint<'_> { 159 | type SelfType<'a> 160 | = SerializedSavepoint<'a> 161 | where 162 | Self: 'a; 163 | type AsBytes<'a> 164 | = &'a [u8] 165 | where 166 | Self: 'a; 167 | 168 | fn fixed_width() -> Option { 169 | None 170 | } 171 | 172 | fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a> 173 | where 174 | Self: 'a, 175 | { 176 | SerializedSavepoint::Ref(data) 177 | } 178 | 179 | fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Self::AsBytes<'a> 180 | where 181 | Self: 'b, 182 | { 183 | value.data() 184 | } 185 | 186 | fn type_name() -> TypeName { 187 | TypeName::internal("redb::SerializedSavepoint") 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/tuple_types.rs: -------------------------------------------------------------------------------- 1 | use crate::types::{Key, TypeName, Value}; 2 | use std::borrow::Borrow; 3 | use std::cmp::Ordering; 4 | use std::mem::size_of; 5 | 6 | fn serialize_tuple_elements_variable(slices: &[&[u8]]) -> Vec { 7 | let total_len: usize = slices.iter().map(|x| x.len()).sum(); 8 | let mut output = Vec::with_capacity((slices.len() - 1) * size_of::() + total_len); 9 | for len in slices.iter().map(|x| x.len()).take(slices.len() - 1) { 10 | output.extend_from_slice(&(u32::try_from(len).unwrap()).to_le_bytes()); 11 | } 12 | 13 | for slice in slices { 14 | output.extend_from_slice(slice); 15 | } 16 | 17 | output 18 | } 19 | 20 | fn serialize_tuple_elements_fixed(slices: &[&[u8]]) -> Vec { 21 | let total_len: usize = slices.iter().map(|x| x.len()).sum(); 22 | let mut output = Vec::with_capacity(total_len); 23 | for slice in slices { 24 | output.extend_from_slice(slice); 25 | } 26 | output 27 | } 28 | 29 | fn parse_lens(data: &[u8]) -> [usize; N] { 30 | let mut result = [0; N]; 31 | for i in 0..N { 32 | result[i] = u32::from_le_bytes(data[4 * i..4 * (i + 1)].try_into().unwrap()) as usize; 33 | } 34 | result 35 | } 36 | 37 | fn not_equal(data1: &[u8], data2: &[u8]) -> Option { 38 | match T::compare(data1, data2) { 39 | Ordering::Less => Some(Ordering::Less), 40 | Ordering::Equal => None, 41 | Ordering::Greater => Some(Ordering::Greater), 42 | } 43 | } 44 | 45 | macro_rules! fixed_width_impl { 46 | ( $( $t:ty ),+ ) => { 47 | { 48 | let mut sum = 0; 49 | $( 50 | sum += <$t>::fixed_width()?; 51 | )+ 52 | Some(sum) 53 | } 54 | }; 55 | } 56 | 57 | macro_rules! as_bytes_impl { 58 | ( $value:expr, $( $t:ty, $i:tt ),+ ) => {{ 59 | if Self::fixed_width().is_some() { 60 | serialize_tuple_elements_fixed(&[ 61 | $( 62 | <$t>::as_bytes($value.$i.borrow()).as_ref(), 63 | )+ 64 | ]) 65 | } else { 66 | serialize_tuple_elements_variable(&[ 67 | $( 68 | <$t>::as_bytes($value.$i.borrow()).as_ref(), 69 | )+ 70 | ]) 71 | } 72 | }}; 73 | } 74 | 75 | macro_rules! type_name_impl { 76 | ( $head:ty $(,$tail:ty)+ ) => { 77 | { 78 | let mut result = String::new(); 79 | result.push('('); 80 | result.push_str(&<$head>::type_name().name()); 81 | $( 82 | result.push(','); 83 | result.push_str(&<$tail>::type_name().name()); 84 | )+ 85 | result.push(')'); 86 | 87 | TypeName::internal(&result) 88 | } 89 | }; 90 | } 91 | 92 | macro_rules! from_bytes_variable_impl { 93 | ( $data:expr $(,$t:ty, $v:ident, $i:literal )+ | $t_last:ty, $v_last:ident, $i_last:literal ) => { 94 | #[allow(clippy::manual_bits)] 95 | { 96 | let lens: [usize; $i_last] = parse_lens($data); 97 | let mut offset = $i_last * size_of::(); 98 | $( 99 | let len = lens[$i]; 100 | let $v = <$t>::from_bytes(&$data[offset..(offset + len)]); 101 | offset += len; 102 | )+ 103 | let $v_last = <$t_last>::from_bytes(&$data[offset..]); 104 | ($( 105 | $v, 106 | )+ 107 | $v_last 108 | ) 109 | } 110 | }; 111 | } 112 | 113 | macro_rules! from_bytes_fixed_impl { 114 | ( $data:expr $(,$t:ty, $v:ident )+ ) => { 115 | { 116 | let mut offset = 0; 117 | $( 118 | let len = <$t>::fixed_width().unwrap(); 119 | let $v = <$t>::from_bytes(&$data[offset..(offset + len)]); 120 | #[allow(unused_assignments)] 121 | { 122 | offset += len; 123 | } 124 | )+ 125 | 126 | ($( 127 | $v, 128 | )+) 129 | } 130 | }; 131 | } 132 | 133 | macro_rules! compare_variable_impl { 134 | ( $data0:expr, $data1:expr $(,$t:ty, $i:literal )+ | $t_last:ty, $i_last:literal ) => { 135 | #[allow(clippy::manual_bits)] 136 | { 137 | let lens0: [usize; $i_last] = parse_lens($data0); 138 | let lens1: [usize; $i_last] = parse_lens($data1); 139 | let mut offset0 = $i_last * size_of::(); 140 | let mut offset1 = $i_last * size_of::(); 141 | $( 142 | let index = $i; 143 | let len0 = lens0[index]; 144 | let len1 = lens1[index]; 145 | if let Some(order) = not_equal::<$t>( 146 | &$data0[offset0..(offset0 + len0)], 147 | &$data1[offset1..(offset1 + len1)], 148 | ) { 149 | return order; 150 | } 151 | offset0 += len0; 152 | offset1 += len1; 153 | )+ 154 | 155 | <$t_last>::compare(&$data0[offset0..], &$data1[offset1..]) 156 | } 157 | }; 158 | } 159 | 160 | macro_rules! compare_fixed_impl { 161 | ( $data0:expr, $data1:expr, $($t:ty),+ ) => { 162 | { 163 | let mut offset0 = 0; 164 | let mut offset1 = 0; 165 | $( 166 | let len = <$t>::fixed_width().unwrap(); 167 | if let Some(order) = not_equal::<$t>( 168 | &$data0[offset0..(offset0 + len)], 169 | &$data1[offset1..(offset1 + len)], 170 | ) { 171 | return order; 172 | } 173 | #[allow(unused_assignments)] 174 | { 175 | offset0 += len; 176 | offset1 += len; 177 | } 178 | )+ 179 | 180 | Ordering::Equal 181 | } 182 | }; 183 | } 184 | 185 | macro_rules! tuple_impl { 186 | ( $($t:ident, $v:ident, $i:tt ),+ | $t_last:ident, $v_last:ident, $i_last:tt ) => { 187 | impl<$($t: Value,)+ $t_last: Value> Value for ($($t,)+ $t_last) { 188 | type SelfType<'a> = ( 189 | $(<$t>::SelfType<'a>,)+ 190 | <$t_last>::SelfType<'a>, 191 | ) 192 | where 193 | Self: 'a; 194 | type AsBytes<'a> = Vec 195 | where 196 | Self: 'a; 197 | 198 | fn fixed_width() -> Option { 199 | fixed_width_impl!($($t,)+ $t_last) 200 | } 201 | 202 | fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a> 203 | where 204 | Self: 'a, 205 | { 206 | if Self::fixed_width().is_some() { 207 | from_bytes_fixed_impl!(data $(,$t,$v)+, $t_last, $v_last) 208 | } else { 209 | from_bytes_variable_impl!(data $(,$t,$v,$i)+ | $t_last, $v_last, $i_last) 210 | } 211 | } 212 | 213 | fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Vec 214 | where 215 | Self: 'a, 216 | Self: 'b, 217 | { 218 | as_bytes_impl!(value, $($t,$i,)+ $t_last, $i_last) 219 | } 220 | 221 | fn type_name() -> TypeName { 222 | type_name_impl!($($t,)+ $t_last) 223 | } 224 | } 225 | 226 | impl<$($t: Key,)+ $t_last: Key> Key for ($($t,)+ $t_last) { 227 | fn compare(data1: &[u8], data2: &[u8]) -> Ordering { 228 | if Self::fixed_width().is_some() { 229 | compare_fixed_impl!(data1, data2, $($t,)+ $t_last) 230 | } else { 231 | compare_variable_impl!(data1, data2 $(,$t,$i)+ | $t_last, $i_last) 232 | } 233 | } 234 | } 235 | }; 236 | } 237 | 238 | impl Value for (T,) { 239 | type SelfType<'a> 240 | = (T::SelfType<'a>,) 241 | where 242 | Self: 'a; 243 | type AsBytes<'a> 244 | = T::AsBytes<'a> 245 | where 246 | Self: 'a; 247 | 248 | fn fixed_width() -> Option { 249 | T::fixed_width() 250 | } 251 | 252 | fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a> 253 | where 254 | Self: 'a, 255 | { 256 | (T::from_bytes(data),) 257 | } 258 | 259 | fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Self::AsBytes<'a> 260 | where 261 | Self: 'a, 262 | Self: 'b, 263 | { 264 | T::as_bytes(&value.0) 265 | } 266 | 267 | fn type_name() -> TypeName { 268 | TypeName::internal(&format!("({},)", T::type_name().name())) 269 | } 270 | } 271 | 272 | impl Key for (T,) { 273 | fn compare(data1: &[u8], data2: &[u8]) -> Ordering { 274 | T::compare(data1, data2) 275 | } 276 | } 277 | 278 | tuple_impl! { 279 | T0, t0, 0 280 | | T1, t1, 1 281 | } 282 | 283 | tuple_impl! { 284 | T0, t0, 0, 285 | T1, t1, 1 286 | | T2, t2, 2 287 | } 288 | 289 | tuple_impl! { 290 | T0, t0, 0, 291 | T1, t1, 1, 292 | T2, t2, 2 293 | | T3, t3, 3 294 | } 295 | 296 | tuple_impl! { 297 | T0, t0, 0, 298 | T1, t1, 1, 299 | T2, t2, 2, 300 | T3, t3, 3 301 | | T4, t4, 4 302 | } 303 | 304 | tuple_impl! { 305 | T0, t0, 0, 306 | T1, t1, 1, 307 | T2, t2, 2, 308 | T3, t3, 3, 309 | T4, t4, 4 310 | | T5, t5, 5 311 | } 312 | 313 | tuple_impl! { 314 | T0, t0, 0, 315 | T1, t1, 1, 316 | T2, t2, 2, 317 | T3, t3, 3, 318 | T4, t4, 4, 319 | T5, t5, 5 320 | | T6, t6, 6 321 | } 322 | 323 | tuple_impl! { 324 | T0, t0, 0, 325 | T1, t1, 1, 326 | T2, t2, 2, 327 | T3, t3, 3, 328 | T4, t4, 4, 329 | T5, t5, 5, 330 | T6, t6, 6 331 | | T7, t7, 7 332 | } 333 | 334 | tuple_impl! { 335 | T0, t0, 0, 336 | T1, t1, 1, 337 | T2, t2, 2, 338 | T3, t3, 3, 339 | T4, t4, 4, 340 | T5, t5, 5, 341 | T6, t6, 6, 342 | T7, t7, 7 343 | | T8, t8, 8 344 | } 345 | 346 | tuple_impl! { 347 | T0, t0, 0, 348 | T1, t1, 1, 349 | T2, t2, 2, 350 | T3, t3, 3, 351 | T4, t4, 4, 352 | T5, t5, 5, 353 | T6, t6, 6, 354 | T7, t7, 7, 355 | T8, t8, 8 356 | | T9, t9, 9 357 | } 358 | 359 | tuple_impl! { 360 | T0, t0, 0, 361 | T1, t1, 1, 362 | T2, t2, 2, 363 | T3, t3, 3, 364 | T4, t4, 4, 365 | T5, t5, 5, 366 | T6, t6, 6, 367 | T7, t7, 7, 368 | T8, t8, 8, 369 | T9, t9, 9 370 | | T10, t10, 10 371 | } 372 | 373 | tuple_impl! { 374 | T0, t0, 0, 375 | T1, t1, 1, 376 | T2, t2, 2, 377 | T3, t3, 3, 378 | T4, t4, 4, 379 | T5, t5, 5, 380 | T6, t6, 6, 381 | T7, t7, 7, 382 | T8, t8, 8, 383 | T9, t9, 9, 384 | T10, t10, 10 385 | | T11, t11, 11 386 | } 387 | 388 | #[cfg(test)] 389 | mod test { 390 | use crate::types::Value; 391 | 392 | #[test] 393 | fn width() { 394 | assert!(<(&str, u8)>::fixed_width().is_none()); 395 | assert!(<(u16, u8, &str, u128)>::fixed_width().is_none()); 396 | assert_eq!(<(u16,)>::fixed_width().unwrap(), 2); 397 | assert_eq!(<(u16, u8)>::fixed_width().unwrap(), 3); 398 | assert_eq!(<(u16, u8, u128)>::fixed_width().unwrap(), 19); 399 | assert_eq!(<(u16, u8, i8, u128)>::fixed_width().unwrap(), 20); 400 | } 401 | } 402 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from unittest import TestCase 4 | 5 | 6 | class TableTestCase(TestCase): 7 | def test_import(self): 8 | import redb 9 | -------------------------------------------------------------------------------- /tests/backward_compatibility.rs: -------------------------------------------------------------------------------- 1 | use redb::ReadableTableMetadata; 2 | 3 | const ELEMENTS: usize = 3; 4 | 5 | trait TestData: redb::Value + redb2_6::Value { 6 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] 7 | where 8 | Self: 'a; 9 | 10 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] 11 | where 12 | Self: 'a; 13 | } 14 | 15 | impl TestData for u8 { 16 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 17 | [0, 1, 2] 18 | } 19 | 20 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 21 | [0, 1, 2] 22 | } 23 | } 24 | 25 | impl TestData for u16 { 26 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 27 | [0, 1, 2] 28 | } 29 | 30 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 31 | [0, 1, 2] 32 | } 33 | } 34 | 35 | impl TestData for u32 { 36 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 37 | [0, 1, 2] 38 | } 39 | 40 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 41 | [0, 1, 2] 42 | } 43 | } 44 | 45 | impl TestData for u64 { 46 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 47 | [0, 1, 2] 48 | } 49 | 50 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 51 | [0, 1, 2] 52 | } 53 | } 54 | 55 | impl TestData for u128 { 56 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 57 | [0, 1, 2] 58 | } 59 | 60 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 61 | [0, 1, 2] 62 | } 63 | } 64 | 65 | impl TestData for i8 { 66 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 67 | [-1, 1, 2] 68 | } 69 | 70 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 71 | [-1, 1, 2] 72 | } 73 | } 74 | 75 | impl TestData for i16 { 76 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 77 | [-1, 1, 2] 78 | } 79 | 80 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 81 | [-1, 1, 2] 82 | } 83 | } 84 | 85 | impl TestData for i32 { 86 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 87 | [-1, 1, 2] 88 | } 89 | 90 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 91 | [-1, 1, 2] 92 | } 93 | } 94 | 95 | impl TestData for i64 { 96 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 97 | [-1, 1, 2] 98 | } 99 | 100 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 101 | [-1, 1, 2] 102 | } 103 | } 104 | 105 | impl TestData for i128 { 106 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 107 | [-1, 1, 2] 108 | } 109 | 110 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 111 | [-1, 1, 2] 112 | } 113 | } 114 | 115 | impl TestData for f32 { 116 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 117 | [f32::NAN, f32::INFINITY, f32::MIN_POSITIVE] 118 | } 119 | 120 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 121 | [f32::NAN, f32::INFINITY, f32::MIN_POSITIVE] 122 | } 123 | } 124 | 125 | impl TestData for f64 { 126 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 127 | [f64::MIN, f64::NEG_INFINITY, f64::MAX] 128 | } 129 | 130 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 131 | [f64::MIN, f64::NEG_INFINITY, f64::MAX] 132 | } 133 | } 134 | 135 | impl TestData for () { 136 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 137 | [(), (), ()] 138 | } 139 | 140 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 141 | [(), (), ()] 142 | } 143 | } 144 | 145 | impl TestData for &'static str { 146 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 147 | ["hello", "world1", "hi"] 148 | } 149 | 150 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 151 | ["hello", "world1", "hi"] 152 | } 153 | } 154 | 155 | impl TestData for &'static [u8] { 156 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 157 | [b"test", b"bytes", b"now"] 158 | } 159 | 160 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 161 | [b"test", b"bytes", b"now"] 162 | } 163 | } 164 | 165 | impl TestData for &'static [u8; 5] { 166 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 167 | [b"test1", b"bytes", b"now12"] 168 | } 169 | 170 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 171 | [b"test1", b"bytes", b"now12"] 172 | } 173 | } 174 | 175 | impl TestData for [&str; 3] { 176 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] 177 | where 178 | Self: 'a, 179 | { 180 | [ 181 | ["test1", "hi", "world"], 182 | ["test2", "hi", "world"], 183 | ["test3", "hi", "world"], 184 | ] 185 | } 186 | 187 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] 188 | where 189 | Self: 'a, 190 | { 191 | [ 192 | ["test1", "hi", "world"], 193 | ["test2", "hi", "world"], 194 | ["test3", "hi", "world"], 195 | ] 196 | } 197 | } 198 | 199 | impl TestData for [u128; 3] { 200 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 201 | [[1, 2, 3], [3, 2, 1], [300, 200, 100]] 202 | } 203 | 204 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 205 | [[1, 2, 3], [3, 2, 1], [300, 200, 100]] 206 | } 207 | } 208 | 209 | impl TestData for Vec<&str> { 210 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] 211 | where 212 | Self: 'a, 213 | { 214 | [ 215 | vec!["test1", "hi", "world"], 216 | vec!["test2", "hi", "world"], 217 | vec!["test3", "hi", "world"], 218 | ] 219 | } 220 | 221 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] 222 | where 223 | Self: 'a, 224 | { 225 | [ 226 | vec!["test1", "hi", "world"], 227 | vec!["test2", "hi", "world"], 228 | vec!["test3", "hi", "world"], 229 | ] 230 | } 231 | } 232 | 233 | impl TestData for Option { 234 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 235 | [None, Some(0), Some(7)] 236 | } 237 | 238 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 239 | [None, Some(0), Some(7)] 240 | } 241 | } 242 | 243 | impl TestData for (u64, &'static str) { 244 | fn make_data_v2_6<'a>() -> [::SelfType<'a>; ELEMENTS] { 245 | [(0, "hi"), (1, "bye"), (2, "byte")] 246 | } 247 | 248 | fn make_data<'a>() -> [::SelfType<'a>; ELEMENTS] { 249 | [(0, "hi"), (1, "bye"), (2, "byte")] 250 | } 251 | } 252 | 253 | fn create_tempfile() -> tempfile::NamedTempFile { 254 | if cfg!(target_os = "wasi") { 255 | tempfile::NamedTempFile::new_in("/tmp").unwrap() 256 | } else { 257 | tempfile::NamedTempFile::new().unwrap() 258 | } 259 | } 260 | 261 | fn test_helper() { 262 | { 263 | let tmpfile = create_tempfile(); 264 | let db = redb2_6::Database::builder() 265 | .create_with_file_format_v3(true) 266 | .create(tmpfile.path()) 267 | .unwrap(); 268 | let table_def: redb2_6::TableDefinition = redb2_6::TableDefinition::new("table"); 269 | let write_txn = db.begin_write().unwrap(); 270 | { 271 | let mut table = write_txn.open_table(table_def).unwrap(); 272 | for i in 0..ELEMENTS { 273 | table 274 | .insert(&K::make_data_v2_6()[i], &V::make_data_v2_6()[i]) 275 | .unwrap(); 276 | } 277 | } 278 | write_txn.commit().unwrap(); 279 | drop(db); 280 | 281 | let db = redb::Database::open(tmpfile.path()).unwrap(); 282 | let read_txn = db.begin_read().unwrap(); 283 | let table_def: redb::TableDefinition = redb::TableDefinition::new("table"); 284 | let table = read_txn.open_table(table_def).unwrap(); 285 | assert_eq!(table.len().unwrap(), ELEMENTS as u64); 286 | for i in 0..ELEMENTS { 287 | let result = table.get(&K::make_data()[i]).unwrap().unwrap(); 288 | let value = result.value(); 289 | let bytes = ::as_bytes(&value); 290 | let expected = &V::make_data()[i]; 291 | let expected_bytes = ::as_bytes(expected); 292 | assert_eq!(bytes.as_ref(), expected_bytes.as_ref()); 293 | } 294 | } 295 | } 296 | 297 | #[test] 298 | fn primitive_types() { 299 | test_helper::(); 300 | test_helper::(); 301 | test_helper::(); 302 | test_helper::(); 303 | test_helper::(); 304 | test_helper::(); 305 | test_helper::(); 306 | test_helper::(); 307 | test_helper::(); 308 | test_helper::(); 309 | test_helper::(); 310 | test_helper::(); 311 | test_helper::<&str, &str>(); 312 | test_helper::(); 313 | } 314 | 315 | #[test] 316 | fn container_types() { 317 | test_helper::<&[u8], &[u8]>(); 318 | test_helper::<&[u8; 5], &[u8; 5]>(); 319 | test_helper::>(); 320 | test_helper::<(u64, &str), &str>(); 321 | test_helper::<[&str; 3], [u128; 3]>(); 322 | test_helper::>(); 323 | } 324 | 325 | #[test] 326 | fn mixed_width() { 327 | test_helper::(); 328 | test_helper::<&[u8; 5], &str>(); 329 | } 330 | -------------------------------------------------------------------------------- /tests/multimap_tests.rs: -------------------------------------------------------------------------------- 1 | use redb::{ 2 | Database, MultimapTableDefinition, ReadableMultimapTable, ReadableTableMetadata, TableError, 3 | }; 4 | 5 | const STR_TABLE: MultimapTableDefinition<&str, &str> = MultimapTableDefinition::new("str_to_str"); 6 | const SLICE_U64_TABLE: MultimapTableDefinition<&[u8], u64> = 7 | MultimapTableDefinition::new("slice_to_u64"); 8 | const U64_TABLE: MultimapTableDefinition = MultimapTableDefinition::new("u64"); 9 | 10 | fn create_tempfile() -> tempfile::NamedTempFile { 11 | if cfg!(target_os = "wasi") { 12 | tempfile::NamedTempFile::new_in("/tmp").unwrap() 13 | } else { 14 | tempfile::NamedTempFile::new().unwrap() 15 | } 16 | } 17 | 18 | fn get_vec( 19 | table: &impl ReadableMultimapTable<&'static str, &'static str>, 20 | key: &str, 21 | ) -> Vec { 22 | let mut result = vec![]; 23 | let mut iter = table.get(key).unwrap(); 24 | loop { 25 | let item = iter.next(); 26 | if let Some(item_value) = item { 27 | result.push(item_value.unwrap().value().to_string()); 28 | } else { 29 | return result; 30 | } 31 | } 32 | } 33 | 34 | #[test] 35 | fn len() { 36 | let tmpfile = create_tempfile(); 37 | let db = Database::create(tmpfile.path()).unwrap(); 38 | let write_txn = db.begin_write().unwrap(); 39 | { 40 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 41 | table.insert("hello", "world").unwrap(); 42 | table.insert("hello", "world2").unwrap(); 43 | table.insert("hi", "world").unwrap(); 44 | } 45 | write_txn.commit().unwrap(); 46 | 47 | let read_txn = db.begin_read().unwrap(); 48 | let table = read_txn.open_multimap_table(STR_TABLE).unwrap(); 49 | assert_eq!(table.len().unwrap(), 3); 50 | let untyped_table = read_txn.open_untyped_multimap_table(STR_TABLE).unwrap(); 51 | assert_eq!(untyped_table.len().unwrap(), 3); 52 | } 53 | 54 | #[test] 55 | fn is_empty() { 56 | let tmpfile = create_tempfile(); 57 | let db = Database::create(tmpfile.path()).unwrap(); 58 | 59 | let write_txn = db.begin_write().unwrap(); 60 | { 61 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 62 | table.insert("hello", "world").unwrap(); 63 | } 64 | write_txn.commit().unwrap(); 65 | 66 | let read_txn = db.begin_read().unwrap(); 67 | let table = read_txn.open_multimap_table(STR_TABLE).unwrap(); 68 | assert!(!table.is_empty().unwrap()); 69 | } 70 | 71 | #[test] 72 | fn insert() { 73 | let tmpfile = create_tempfile(); 74 | let db = Database::create(tmpfile.path()).unwrap(); 75 | let write_txn = db.begin_write().unwrap(); 76 | { 77 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 78 | assert!(!table.insert("hello", "world").unwrap()); 79 | assert!(!table.insert("hello", "world2").unwrap()); 80 | assert!(table.insert("hello", "world2").unwrap()); 81 | } 82 | write_txn.commit().unwrap(); 83 | 84 | let read_txn = db.begin_read().unwrap(); 85 | let table = read_txn.open_multimap_table(STR_TABLE).unwrap(); 86 | assert_eq!( 87 | vec!["world".to_string(), "world2".to_string()], 88 | get_vec(&table, "hello") 89 | ); 90 | assert_eq!(table.len().unwrap(), 2); 91 | } 92 | 93 | #[test] 94 | fn range_query() { 95 | let tmpfile = create_tempfile(); 96 | let db = Database::create(tmpfile.path()).unwrap(); 97 | let write_txn = db.begin_write().unwrap(); 98 | { 99 | let mut table = write_txn.open_multimap_table(SLICE_U64_TABLE).unwrap(); 100 | for i in 0..5 { 101 | table.insert(b"0".as_slice(), &i).unwrap(); 102 | } 103 | for i in 5..10 { 104 | table.insert(b"1".as_slice(), &i).unwrap(); 105 | } 106 | for i in 10..15 { 107 | table.insert(b"2".as_slice(), &i).unwrap(); 108 | } 109 | } 110 | write_txn.commit().unwrap(); 111 | 112 | let read_txn = db.begin_read().unwrap(); 113 | let table = read_txn.open_multimap_table(SLICE_U64_TABLE).unwrap(); 114 | let start = b"0".as_ref(); 115 | let end = b"1".as_ref(); 116 | let mut iter = table.range(start..=end).unwrap(); 117 | 118 | { 119 | let (key, mut values) = iter.next().unwrap().unwrap(); 120 | for i in 0..5 { 121 | assert_eq!(b"0", key.value()); 122 | let value = values.next().unwrap().unwrap(); 123 | assert_eq!(i, value.value()); 124 | } 125 | } 126 | { 127 | let (key, mut values) = iter.next().unwrap().unwrap(); 128 | for i in 5..10 { 129 | assert_eq!(b"1", key.value()); 130 | let value = values.next().unwrap().unwrap(); 131 | assert_eq!(i, value.value()); 132 | } 133 | } 134 | assert!(iter.next().is_none()); 135 | 136 | let mut total: u64 = 0; 137 | for item in table.range(start..=end).unwrap() { 138 | let (_, values) = item.unwrap(); 139 | total += values.map(|x| x.unwrap().value()).sum::(); 140 | } 141 | assert_eq!(total, 45); 142 | } 143 | 144 | #[test] 145 | fn range_lifetime() { 146 | let tmpfile = create_tempfile(); 147 | let db = Database::create(tmpfile.path()).unwrap(); 148 | 149 | let definition: MultimapTableDefinition<&str, &str> = MultimapTableDefinition::new("x"); 150 | 151 | let write_txn = db.begin_write().unwrap(); 152 | { 153 | let mut table = write_txn.open_multimap_table(definition).unwrap(); 154 | table.insert("hello", "world").unwrap(); 155 | } 156 | write_txn.commit().unwrap(); 157 | 158 | let read_txn = db.begin_read().unwrap(); 159 | let table = read_txn.open_multimap_table(definition).unwrap(); 160 | 161 | let mut iter = { 162 | let start = "hello".to_string(); 163 | table.range::<&str>(start.as_str()..).unwrap() 164 | }; 165 | assert_eq!( 166 | iter.next() 167 | .unwrap() 168 | .unwrap() 169 | .1 170 | .next() 171 | .unwrap() 172 | .unwrap() 173 | .value(), 174 | "world" 175 | ); 176 | assert!(iter.next().is_none()); 177 | } 178 | 179 | #[test] 180 | fn range_arc_lifetime() { 181 | let tmpfile = create_tempfile(); 182 | let db = Database::create(tmpfile.path()).unwrap(); 183 | 184 | let definition: MultimapTableDefinition<&str, &str> = MultimapTableDefinition::new("x"); 185 | 186 | let write_txn = db.begin_write().unwrap(); 187 | { 188 | let mut table = write_txn.open_multimap_table(definition).unwrap(); 189 | table.insert("hello", "world").unwrap(); 190 | } 191 | write_txn.commit().unwrap(); 192 | 193 | let mut iter = { 194 | let read_txn = db.begin_read().unwrap(); 195 | let table = read_txn.open_multimap_table(definition).unwrap(); 196 | let start = "hello".to_string(); 197 | table.range::<&str>(start.as_str()..).unwrap() 198 | }; 199 | assert_eq!( 200 | iter.next() 201 | .unwrap() 202 | .unwrap() 203 | .1 204 | .next() 205 | .unwrap() 206 | .unwrap() 207 | .value(), 208 | "world" 209 | ); 210 | assert!(iter.next().is_none()); 211 | } 212 | 213 | #[test] 214 | fn get_arc_lifetime() { 215 | let tmpfile = create_tempfile(); 216 | let db = Database::create(tmpfile.path()).unwrap(); 217 | 218 | let definition: MultimapTableDefinition<&str, &str> = MultimapTableDefinition::new("x"); 219 | 220 | let write_txn = db.begin_write().unwrap(); 221 | { 222 | let mut table = write_txn.open_multimap_table(definition).unwrap(); 223 | table.insert("hello", "world").unwrap(); 224 | } 225 | write_txn.commit().unwrap(); 226 | 227 | let mut iter = { 228 | let read_txn = db.begin_read().unwrap(); 229 | let table = read_txn.open_multimap_table(definition).unwrap(); 230 | let start = "hello".to_string(); 231 | table.get(start.as_str()).unwrap() 232 | }; 233 | assert_eq!(iter.next().unwrap().unwrap().value(), "world"); 234 | assert!(iter.next().is_none()); 235 | } 236 | 237 | #[test] 238 | fn delete() { 239 | let tmpfile = create_tempfile(); 240 | let db = Database::create(tmpfile.path()).unwrap(); 241 | let write_txn = db.begin_write().unwrap(); 242 | { 243 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 244 | table.insert("hello", "world").unwrap(); 245 | table.insert("hello", "world2").unwrap(); 246 | table.insert("hello", "world3").unwrap(); 247 | } 248 | write_txn.commit().unwrap(); 249 | 250 | let read_txn = db.begin_read().unwrap(); 251 | let table = read_txn.open_multimap_table(STR_TABLE).unwrap(); 252 | assert_eq!(3, table.get("hello").unwrap().len()); 253 | assert_eq!( 254 | vec![ 255 | "world".to_string(), 256 | "world2".to_string(), 257 | "world3".to_string() 258 | ], 259 | get_vec(&table, "hello") 260 | ); 261 | assert_eq!(table.len().unwrap(), 3); 262 | 263 | let write_txn = db.begin_write().unwrap(); 264 | { 265 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 266 | table.remove("hello", "world2").unwrap(); 267 | } 268 | write_txn.commit().unwrap(); 269 | 270 | let read_txn = db.begin_read().unwrap(); 271 | let table = read_txn.open_multimap_table(STR_TABLE).unwrap(); 272 | assert_eq!( 273 | vec!["world".to_string(), "world3".to_string()], 274 | get_vec(&table, "hello") 275 | ); 276 | assert_eq!(table.len().unwrap(), 2); 277 | 278 | let write_txn = db.begin_write().unwrap(); 279 | { 280 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 281 | let mut iter = table.remove_all("hello").unwrap(); 282 | assert_eq!("world", iter.next().unwrap().unwrap().value()); 283 | assert_eq!("world3", iter.next().unwrap().unwrap().value()); 284 | assert!(iter.next().is_none()); 285 | } 286 | write_txn.commit().unwrap(); 287 | 288 | let read_txn = db.begin_read().unwrap(); 289 | let table = read_txn.open_multimap_table(STR_TABLE).unwrap(); 290 | assert!(table.is_empty().unwrap()); 291 | let empty: Vec = vec![]; 292 | assert_eq!(empty, get_vec(&table, "hello")); 293 | } 294 | 295 | #[test] 296 | fn wrong_types() { 297 | let tmpfile = create_tempfile(); 298 | let db = Database::create(tmpfile.path()).unwrap(); 299 | 300 | let definition: MultimapTableDefinition = MultimapTableDefinition::new("x"); 301 | let wrong_definition: MultimapTableDefinition = MultimapTableDefinition::new("x"); 302 | 303 | let txn = db.begin_write().unwrap(); 304 | txn.open_multimap_table(definition).unwrap(); 305 | txn.commit().unwrap(); 306 | 307 | let txn = db.begin_write().unwrap(); 308 | assert!(matches!( 309 | txn.open_multimap_table(wrong_definition), 310 | Err(TableError::TableTypeMismatch { .. }) 311 | )); 312 | txn.abort().unwrap(); 313 | 314 | let txn = db.begin_read().unwrap(); 315 | txn.open_multimap_table(definition).unwrap(); 316 | assert!(matches!( 317 | txn.open_multimap_table(wrong_definition), 318 | Err(TableError::TableTypeMismatch { .. }) 319 | )); 320 | } 321 | 322 | #[test] 323 | fn efficient_storage() { 324 | let tmpfile = create_tempfile(); 325 | let expected_max_size = 1024 * 1024; 326 | // Write enough values that big_key.len() * entries > db_size to check that duplicate key data is not stored 327 | // and entries * sizeof(u32) > page_size to validate that large numbers of values can be stored per key 328 | let entries = 10000; 329 | let db = Database::create(tmpfile.path()).unwrap(); 330 | let table_def: MultimapTableDefinition<&[u8], u32> = MultimapTableDefinition::new("x"); 331 | let write_txn = db.begin_write().unwrap(); 332 | { 333 | let mut table = write_txn.open_multimap_table(table_def).unwrap(); 334 | let big_key = [0u8; 1000]; 335 | for i in 0..entries { 336 | table.insert(big_key.as_slice(), &i).unwrap(); 337 | } 338 | } 339 | assert!(write_txn.stats().unwrap().stored_bytes() <= expected_max_size); 340 | write_txn.commit().unwrap(); 341 | 342 | let read_txn = db.begin_read().unwrap(); 343 | let table = read_txn.open_multimap_table(table_def).unwrap(); 344 | assert_eq!(table.len().unwrap(), entries as u64); 345 | } 346 | 347 | #[test] 348 | fn reopen_table() { 349 | let tmpfile = create_tempfile(); 350 | let db = Database::create(tmpfile.path()).unwrap(); 351 | let write_txn = db.begin_write().unwrap(); 352 | { 353 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 354 | table.insert("0", "0").unwrap(); 355 | } 356 | { 357 | let mut table = write_txn.open_multimap_table(STR_TABLE).unwrap(); 358 | table.insert("1", "1").unwrap(); 359 | } 360 | write_txn.commit().unwrap(); 361 | } 362 | 363 | #[test] 364 | fn iter() { 365 | let tmpfile = create_tempfile(); 366 | let db = Database::create(tmpfile.path()).unwrap(); 367 | let write_txn = db.begin_write().unwrap(); 368 | { 369 | let mut table = write_txn.open_multimap_table(U64_TABLE).unwrap(); 370 | for i in 0..10 { 371 | for j in 0..10 { 372 | table.insert(&i, &j).unwrap(); 373 | } 374 | } 375 | } 376 | write_txn.commit().unwrap(); 377 | 378 | let read_txn = db.begin_read().unwrap(); 379 | let table = read_txn.open_multimap_table(U64_TABLE).unwrap(); 380 | let mut iter = table.iter().unwrap(); 381 | for i in 0..10 { 382 | let (k, mut values) = iter.next().unwrap().unwrap(); 383 | assert_eq!(k.value(), i); 384 | for j in 0..10 { 385 | assert_eq!(values.next().unwrap().unwrap().value(), j); 386 | } 387 | } 388 | } 389 | 390 | #[test] 391 | fn multimap_signature_lifetimes() { 392 | let tmpfile = create_tempfile(); 393 | let db = Database::create(tmpfile.path()).unwrap(); 394 | 395 | let def: MultimapTableDefinition<&str, u64> = MultimapTableDefinition::new("x"); 396 | 397 | let write_txn = db.begin_write().unwrap(); 398 | { 399 | let mut table = write_txn.open_multimap_table(def).unwrap(); 400 | table.insert("bye", 0).unwrap(); 401 | 402 | let _ = { 403 | let key = "hi".to_string(); 404 | table.get(key.as_str()).unwrap() 405 | }; 406 | 407 | let _ = { 408 | let key = "hi".to_string(); 409 | table.range(key.as_str()..).unwrap() 410 | }; 411 | 412 | let _ = { 413 | let key = "hi".to_string(); 414 | table.remove_all(key.as_str()).unwrap() 415 | }; 416 | } 417 | write_txn.commit().unwrap(); 418 | } 419 | -------------------------------------------------------------------------------- /tests/multithreading_tests.rs: -------------------------------------------------------------------------------- 1 | #[cfg(not(target_os = "wasi"))] 2 | mod multithreading_test { 3 | use redb::{Database, ReadableTable, ReadableTableMetadata, TableDefinition}; 4 | use std::sync::Arc; 5 | use std::thread; 6 | 7 | fn create_tempfile() -> tempfile::NamedTempFile { 8 | if cfg!(target_os = "wasi") { 9 | tempfile::NamedTempFile::new_in("/tmp").unwrap() 10 | } else { 11 | tempfile::NamedTempFile::new().unwrap() 12 | } 13 | } 14 | 15 | const TABLE: TableDefinition<&str, &str> = TableDefinition::new("x"); 16 | #[test] 17 | fn len() { 18 | let tmpfile = create_tempfile(); 19 | let db = Database::create(tmpfile.path()).unwrap(); 20 | let db = Arc::new(db); 21 | let write_txn = db.begin_write().unwrap(); 22 | { 23 | let mut table = write_txn.open_table(TABLE).unwrap(); 24 | table.insert("hello", "world").unwrap(); 25 | table.insert("hello2", "world2").unwrap(); 26 | table.insert("hi", "world").unwrap(); 27 | } 28 | write_txn.commit().unwrap(); 29 | 30 | let db2 = db.clone(); 31 | let t = thread::spawn(move || { 32 | let read_txn = db2.begin_read().unwrap(); 33 | let table = read_txn.open_table(TABLE).unwrap(); 34 | assert_eq!(table.len().unwrap(), 3); 35 | }); 36 | t.join().unwrap(); 37 | 38 | let read_txn = db.begin_read().unwrap(); 39 | let table = read_txn.open_table(TABLE).unwrap(); 40 | assert_eq!(table.len().unwrap(), 3); 41 | } 42 | 43 | #[test] 44 | fn multithreaded_insert() { 45 | let tmpfile = create_tempfile(); 46 | let db = Database::create(tmpfile.path()).unwrap(); 47 | 48 | const DEF1: TableDefinition<&str, &str> = TableDefinition::new("x"); 49 | const DEF2: TableDefinition<&str, &str> = TableDefinition::new("y"); 50 | let write_txn = db.begin_write().unwrap(); 51 | { 52 | let mut table1 = write_txn.open_table(DEF1).unwrap(); 53 | let mut table2 = write_txn.open_table(DEF2).unwrap(); 54 | 55 | thread::scope(|s| { 56 | s.spawn(|| { 57 | table2.insert("hello", "world").unwrap(); 58 | table2.insert("hello2", "world2").unwrap(); 59 | }); 60 | }); 61 | 62 | table1.insert("hello", "world").unwrap(); 63 | table1.insert("hello2", "world2").unwrap(); 64 | } 65 | write_txn.commit().unwrap(); 66 | 67 | let read_txn = db.begin_read().unwrap(); 68 | let table = read_txn.open_table(DEF1).unwrap(); 69 | assert_eq!(table.len().unwrap(), 2); 70 | let table = read_txn.open_table(DEF2).unwrap(); 71 | assert_eq!(table.len().unwrap(), 2); 72 | } 73 | 74 | #[test] 75 | fn multithreaded_re_read() { 76 | let tmpfile = create_tempfile(); 77 | let db = Database::create(tmpfile.path()).unwrap(); 78 | 79 | const DEF1: TableDefinition<&str, &str> = TableDefinition::new("x"); 80 | const DEF2: TableDefinition<&str, &str> = TableDefinition::new("y"); 81 | const DEF3: TableDefinition<&str, &str> = TableDefinition::new("z"); 82 | let write_txn = db.begin_write().unwrap(); 83 | { 84 | let mut table1 = write_txn.open_table(DEF1).unwrap(); 85 | let mut table2 = write_txn.open_table(DEF2).unwrap(); 86 | let mut table3 = write_txn.open_table(DEF3).unwrap(); 87 | table1.insert("hello", "world").unwrap(); 88 | 89 | thread::scope(|s| { 90 | s.spawn(|| { 91 | let value = table1.get("hello").unwrap().unwrap(); 92 | table2.insert("hello2", value.value()).unwrap(); 93 | }); 94 | }); 95 | thread::scope(|s| { 96 | s.spawn(|| { 97 | let value = table1.get("hello").unwrap().unwrap(); 98 | table3.insert("hello2", value.value()).unwrap(); 99 | }); 100 | }); 101 | 102 | assert_eq!(table2.get("hello2").unwrap().unwrap().value(), "world"); 103 | assert_eq!(table3.get("hello2").unwrap().unwrap().value(), "world"); 104 | } 105 | write_txn.commit().unwrap(); 106 | 107 | let read_txn = db.begin_read().unwrap(); 108 | let table = read_txn.open_table(DEF1).unwrap(); 109 | assert_eq!(table.len().unwrap(), 1); 110 | let table = read_txn.open_table(DEF2).unwrap(); 111 | assert_eq!(table.len().unwrap(), 1); 112 | let table = read_txn.open_table(DEF3).unwrap(); 113 | assert_eq!(table.len().unwrap(), 1); 114 | } 115 | } 116 | --------------------------------------------------------------------------------