├── .cargo └── config.toml ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ └── feature-request.yml ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── ci_binding_js.yml │ ├── ci_binding_python.yml │ ├── ci_wasm.yml │ ├── doc.yml │ └── python_release.yml ├── .gitignore ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── common.rs ├── criterion │ └── writes.rs ├── read_bench.rs └── write_bench.rs ├── bindings ├── js │ ├── Cargo.toml │ ├── examples │ │ └── db │ │ │ ├── index.js │ │ │ ├── package.json │ │ │ ├── schema.js │ │ │ └── webpack.config.js │ └── src │ │ ├── datatype.rs │ │ ├── db.rs │ │ ├── fs.rs │ │ ├── lib.rs │ │ ├── options.rs │ │ ├── range.rs │ │ ├── transaction.rs │ │ └── utils.rs └── python │ ├── Cargo.toml │ ├── README.md │ ├── example │ ├── README.md │ ├── declare.py │ ├── fusion_storage.py │ ├── multiple_instance.py │ ├── record.py │ ├── transaction.py │ └── write_batch.py │ ├── pyproject.toml │ ├── python │ └── tonbo │ │ ├── __init__.py │ │ ├── __init__.pyi │ │ ├── error.pyi │ │ └── fs.pyi │ ├── src │ ├── column.rs │ ├── datatype.rs │ ├── db.rs │ ├── error.rs │ ├── fs.rs │ ├── lib.rs │ ├── options.rs │ ├── range.rs │ ├── record.rs │ ├── record_batch.rs │ ├── stream.rs │ ├── transaction.rs │ └── utils.rs │ └── tests │ ├── bench │ ├── test_write_async_benchmark.py │ └── test_write_benchmark.py │ ├── conftest.py │ ├── test_db.py │ ├── test_flush.py │ ├── test_s3.py │ ├── test_table_level.py │ └── test_txn.py ├── clippy.toml ├── examples ├── README.md ├── datafusion.rs ├── declare.rs └── dynamic.rs ├── guide ├── book.toml └── src │ ├── SUMMARY.md │ ├── contribution │ ├── build.md │ ├── index.md │ ├── pr.md │ └── testing.md │ ├── examples │ ├── declare.md │ ├── index.md │ └── wasm.md │ ├── introduction.md │ ├── start.md │ ├── tonbolite │ ├── build.md │ ├── index.md │ ├── start.md │ └── usage.md │ └── usage │ ├── advance.md │ ├── conf.md │ ├── faq.md │ ├── index.md │ ├── python.md │ └── tonbo.md ├── parquet-lru ├── Cargo.toml └── src │ ├── dyn.rs │ ├── foyer.rs │ └── lib.rs ├── rust-toolchain.toml ├── rustfmt.toml ├── src ├── compaction │ ├── leveled.rs │ └── mod.rs ├── context.rs ├── executor.rs ├── fs │ ├── manager.rs │ └── mod.rs ├── inmem │ ├── immutable.rs │ ├── mod.rs │ └── mutable.rs ├── lib.rs ├── magic.rs ├── ondisk │ ├── arrows.rs │ ├── mod.rs │ ├── scan.rs │ └── sstable.rs ├── option.rs ├── record │ ├── key │ │ ├── mod.rs │ │ ├── num.rs │ │ └── str.rs │ ├── mod.rs │ ├── option.rs │ ├── runtime │ │ ├── array.rs │ │ ├── mod.rs │ │ ├── record.rs │ │ ├── record_ref.rs │ │ ├── schema.rs │ │ └── value.rs │ └── test.rs ├── scope.rs ├── snapshot.rs ├── stream │ ├── level.rs │ ├── mem_projection.rs │ ├── merge.rs │ ├── mod.rs │ ├── package.rs │ └── record_batch.rs ├── timestamp │ ├── mod.rs │ └── timestamped.rs ├── transaction.rs ├── trigger.rs ├── version │ ├── cleaner.rs │ ├── edit.rs │ ├── mod.rs │ └── set.rs └── wal │ ├── log.rs │ └── mod.rs ├── tests ├── data_integrity.rs ├── fail │ ├── 01-missing-primary-key.rs │ └── 01-missing-primary-key.stderr ├── macros_correctness.rs ├── success │ └── 01-simple-record.rs └── wasm.rs └── tonbo_macros ├── Cargo.toml └── src ├── data_type.rs ├── keys.rs ├── lib.rs ├── record.rs ├── schema_model.rs └── utils ├── ident_generator.rs └── mod.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(target_arch = "wasm32")'] 2 | rustflags = ['--cfg', 'getrandom_backend="wasm_js"'] -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | description: Create a report to help us improve 3 | labels: bug 4 | body: 5 | - type: textarea 6 | attributes: 7 | label: More details about the bug 8 | validations: 9 | required: true 10 | - type: textarea 11 | attributes: 12 | label: What is your expected behavior? 13 | - type: textarea 14 | attributes: 15 | label: How to reproduce? 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for this project 3 | labels: enhancement 4 | body: 5 | - type: textarea 6 | attributes: 7 | label: What is the current? 8 | validations: 9 | required: true 10 | - type: textarea 11 | attributes: 12 | label: What will it become? 13 | validations: 14 | required: true 15 | - type: textarea 16 | attributes: 17 | label: What are the benefits? 18 | validations: 19 | required: true 20 | - type: textarea 21 | attributes: 22 | label: Are there any alternatives? 23 | validations: 24 | required: true 25 | - type: textarea 26 | attributes: 27 | label: Does it have a reference? 28 | validations: 29 | required: true 30 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | version: 2 19 | updates: 20 | # Maintain dependencies for GitHub Actions 21 | - package-ecosystem: "github-actions" 22 | directory: "/" 23 | schedule: 24 | interval: "monthly" 25 | 26 | # Maintain dependencies for rust 27 | - package-ecosystem: "cargo" 28 | directory: "/" 29 | schedule: 30 | interval: "monthly" 31 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | CARGO_REGISTRIES_MY_REGISTRY_INDEX: https://github.com/rust-lang/crates.io-index 11 | 12 | jobs: 13 | # 1 14 | check: 15 | name: Rust project check 16 | permissions: 17 | id-token: write 18 | contents: read 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | matrix: 22 | os: 23 | - ubuntu-latest 24 | - macos-latest 25 | - windows-latest 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: Install latest 29 | uses: actions-rs/toolchain@v1 30 | with: 31 | toolchain: stable 32 | override: true 33 | components: rustfmt, clippy 34 | 35 | # `cargo check` command here will use installed `nightly` 36 | # as it is set as an "override" for current directory 37 | 38 | - name: Run cargo clippy 39 | uses: actions-rs/cargo@v1 40 | with: 41 | command: check 42 | 43 | - name: Run cargo build 44 | uses: actions-rs/cargo@v1 45 | with: 46 | command: build 47 | 48 | - name: configure aws credentials 49 | if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || startsWith(github.ref, 'refs/tags/') }} 50 | uses: aws-actions/configure-aws-credentials@v4 51 | with: 52 | role-to-assume: ${{ env.ROLE_TO_ASSUME }} 53 | role-session-name: samplerolesession 54 | aws-region: ${{ env.AWS_REGION }} 55 | env: 56 | AWS_REGION: ap-southeast-1 57 | ROLE_TO_ASSUME: arn:aws:iam::232814779190:role/github 58 | - name: Run cargo test 59 | uses: actions-rs/cargo@v1 60 | with: 61 | command: test 62 | args: --workspace 63 | env: 64 | BUCKET_NAME: tonbo-test 65 | # 2 66 | fmt: 67 | name: Rust fmt 68 | runs-on: ubuntu-latest 69 | steps: 70 | - uses: actions/checkout@v4 71 | - name: Install latest nightly 72 | uses: actions-rs/toolchain@v1 73 | with: 74 | toolchain: nightly 75 | override: true 76 | components: rustfmt, clippy 77 | 78 | # `cargo check` command here will use installed `nightly` 79 | # as it is set as an "override" for current directory 80 | 81 | - name: Run cargo fmt 82 | uses: actions-rs/cargo@v1 83 | with: 84 | command: fmt 85 | args: -- --check 86 | 87 | exmaples: 88 | name: Rust exmaples 89 | runs-on: ubuntu-latest 90 | steps: 91 | - uses: actions/checkout@v4 92 | - name: Run datafusion example 93 | uses: actions-rs/cargo@v1 94 | with: 95 | command: run 96 | args: --example datafusion --features=datafusion 97 | 98 | - name: Run declare example 99 | uses: actions-rs/cargo@v1 100 | with: 101 | command: run 102 | args: --example declare --features bytes,tokio 103 | 104 | # benchmark: 105 | # name: Rust benchmark 106 | # runs-on: self-hosted 107 | # permissions: 108 | # contents: write 109 | # pull-requests: write 110 | # repository-projects: write 111 | # if: github.event_name == 'pull_request' 112 | # steps: 113 | # - uses: actions/checkout@v4 114 | # - name: Install latest nightly 115 | # uses: actions-rs/toolchain@v1 116 | # with: 117 | # toolchain: stable 118 | # override: true 119 | 120 | # # `cargo check` command here will use installed `nightly` 121 | # # as it is set as an "override" for current directory 122 | 123 | # - name: Run cargo bench 124 | # uses: actions-rs/cargo@v1 125 | # with: 126 | # command: bench 127 | # args: --features bench 128 | 129 | # - name: Comment on PR using GitHub CLI 130 | # env: 131 | # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 132 | # run: | 133 | # gh pr comment ${{ github.event.pull_request.number }} --body-file "read_benchmark.md" 134 | # gh pr comment ${{ github.event.pull_request.number }} --body-file "write_benchmark.md" 135 | coverage: 136 | name: Code coverage 137 | runs-on: ubuntu-latest 138 | steps: 139 | - uses: actions/checkout@v4 140 | - name: Install Rust 141 | run: rustup update stable 142 | - name: Install cargo-llvm-cov 143 | uses: taiki-e/install-action@cargo-llvm-cov 144 | - name: Generate code coverage 145 | run: cargo llvm-cov --workspace --lcov --output-path lcov.info 146 | - name: Upload coverage to Codecov 147 | uses: codecov/codecov-action@v5 148 | with: 149 | token: ${{ secrets.CODECOV_TOKEN }} 150 | files: lcov.info 151 | fail_ci_if_error: true 152 | -------------------------------------------------------------------------------- /.github/workflows/ci_binding_js.yml: -------------------------------------------------------------------------------- 1 | name: JavaScript Binding CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | check: 10 | name: JavaScript binding check 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: 15 | - ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Install Rust toolchain 19 | uses: actions-rs/toolchain@v1 20 | with: 21 | toolchain: stable 22 | override: true 23 | components: rustfmt, clippy 24 | 25 | - name: Run cargo clippy 26 | uses: actions-rs/cargo@v1 27 | with: 28 | command: check 29 | 30 | - name: Setup for wasm32 31 | run: | 32 | rustup target add wasm32-unknown-unknown 33 | 34 | # - name: Run cargo build 35 | # uses: actions-rs/cargo@v1 36 | # with: 37 | # working-directory: "bindings/js" 38 | # command: build 39 | # args: --target wasm32-unknown-unknown 40 | 41 | - name: Install Chrome Environment 42 | run: | 43 | mkdir -p /tmp/chrome 44 | wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chrome | .[] | select(.platform == "linux64") | .url') 45 | wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chromedriver | .[] | select(.platform == "linux64") | .url') 46 | unzip chromedriver-linux64.zip 47 | unzip chrome-linux64.zip 48 | cp -r chrome-linux64/ /tmp/chrome/ 49 | cp -r chromedriver-linux64 /tmp/chrome/chromedriver 50 | 51 | - name: Setup wasm-pack 52 | run: | 53 | cargo install wasm-pack 54 | 55 | - name: Run wasm-pack test 56 | run: | 57 | export PATH=$PATH:/tmp/chrome/chrome-linux64/:/tmp/chrome/chromedriver-linux64/ 58 | cd bindings/js 59 | wasm-pack test --chrome --headless 60 | -------------------------------------------------------------------------------- /.github/workflows/ci_binding_python.yml: -------------------------------------------------------------------------------- 1 | name: Python Binding CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | check: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Install Rust toolchain 14 | uses: actions-rs/toolchain@v1 15 | with: 16 | toolchain: stable 17 | override: true 18 | components: rustfmt, clippy 19 | 20 | - name: Run cargo clippy 21 | uses: actions-rs/cargo@v1 22 | with: 23 | command: check 24 | 25 | - uses: actions/setup-python@v5 26 | with: 27 | python-version: "3.11" 28 | - name: Build with maturin and run 29 | shell: bash 30 | working-directory: "bindings/python" 31 | run: | 32 | python -m venv venv 33 | source venv/bin/activate 34 | pip install maturin 35 | maturin develop -E test 36 | pytest --ignore=tests/bench -v . 37 | -------------------------------------------------------------------------------- /.github/workflows/ci_wasm.yml: -------------------------------------------------------------------------------- 1 | name: WASM CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | CARGO_REGISTRIES_MY_REGISTRY_INDEX: https://github.com/rust-lang/crates.io-index 11 | BUCKET_NAME: tonbo-test 12 | 13 | jobs: 14 | check: 15 | name: Rust project wasm check 16 | permissions: 17 | id-token: write 18 | contents: read 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | matrix: 22 | os: 23 | - ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v4 26 | - name: Install Rust toolchain 27 | uses: actions-rs/toolchain@v1 28 | with: 29 | toolchain: stable 30 | override: true 31 | components: rustfmt, clippy 32 | 33 | - name: Run cargo clippy 34 | uses: actions-rs/cargo@v1 35 | with: 36 | command: check 37 | 38 | - name: configure aws credentials 39 | if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || startsWith(github.ref, 'refs/tags/') }} 40 | uses: aws-actions/configure-aws-credentials@v4 41 | with: 42 | role-to-assume: ${{ env.ROLE_TO_ASSUME }} 43 | role-session-name: samplerolesession 44 | aws-region: ${{ env.AWS_REGION }} 45 | env: 46 | AWS_REGION: ap-southeast-1 47 | ROLE_TO_ASSUME: arn:aws:iam::232814779190:role/github 48 | - name: Setup for wasm32 49 | run: | 50 | rustup target add wasm32-unknown-unknown 51 | 52 | - name: Run cargo build 53 | uses: actions-rs/cargo@v1 54 | with: 55 | command: build 56 | args: --target wasm32-unknown-unknown --no-default-features --features bytes,wasm 57 | 58 | - name: Install Chrome Environment 59 | run: | 60 | mkdir -p /tmp/chrome 61 | wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chrome | .[] | select(.platform == "linux64") | .url') 62 | wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chromedriver | .[] | select(.platform == "linux64") | .url') 63 | unzip chromedriver-linux64.zip 64 | unzip chrome-linux64.zip 65 | cp -r chrome-linux64/ /tmp/chrome/ 66 | cp -r chromedriver-linux64 /tmp/chrome/chromedriver 67 | 68 | - name: Setup wasm-pack 69 | run: | 70 | cargo install wasm-pack 71 | 72 | - name: Run wasm-pack test 73 | env: 74 | BUCKET_NAME: tonbo-test 75 | run: | 76 | echo "bucket: $BUCKET_NAME" 77 | export PATH=$PATH:/tmp/chrome/chrome-linux64/:/tmp/chrome/chromedriver-linux64/ 78 | wasm-pack test --chrome --headless --test wasm --no-default-features --features bytes,wasm,wasm-http 79 | -------------------------------------------------------------------------------- /.github/workflows/doc.yml: -------------------------------------------------------------------------------- 1 | name: Doc CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "guide/**" 9 | jobs: 10 | build: 11 | name: Build, Test and Deploy 12 | runs-on: ubuntu-latest 13 | permissions: 14 | contents: write # To push a branch 15 | pull-requests: write # To create a PR from that branch 16 | steps: 17 | - name: Checkout️ 18 | uses: actions/checkout@v4 19 | 20 | - name: Install and Build 21 | run: | 22 | cd guide 23 | curl -L https://github.com/rust-lang/mdBook/releases/download/v0.4.28/mdbook-v0.4.28-x86_64-unknown-linux-gnu.tar.gz | tar xzf - 24 | echo $PWD >> $GITHUB_PATH 25 | cargo install mdbook-toc 26 | ./mdbook build 27 | - name: Deploy 28 | uses: JamesIves/github-pages-deploy-action@v4 29 | with: 30 | branch: gh-pages 31 | folder: guide/book # The folder the action should deploy. 32 | -------------------------------------------------------------------------------- /.github/workflows/python_release.yml: -------------------------------------------------------------------------------- 1 | name: Release Python 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | pull_request: 8 | branches: 9 | - main 10 | paths: 11 | - ".github/workflows/python_release.yml" 12 | workflow_dispatch: 13 | 14 | permissions: 15 | contents: read 16 | 17 | jobs: 18 | windows: 19 | runs-on: ${{ matrix.platform.runner }} 20 | strategy: 21 | matrix: 22 | platform: 23 | - runner: windows-latest 24 | target: x64 25 | - runner: windows-latest 26 | target: x86 27 | steps: 28 | - uses: actions/checkout@v4 29 | - uses: actions/setup-python@v5 30 | with: 31 | python-version: 3.x 32 | architecture: ${{ matrix.platform.target }} 33 | - name: Build wheels 34 | uses: PyO3/maturin-action@v1 35 | with: 36 | target: ${{ matrix.platform.target }} 37 | args: --release --out dist --bindings pyo3 --features=pyo3/extension-module 38 | sccache: "true" 39 | working-directory: "bindings/python" 40 | - name: Upload wheels 41 | uses: actions/upload-artifact@v4 42 | with: 43 | name: wheels-windows-${{ matrix.platform.target }} 44 | path: bindings/python/dist 45 | 46 | macos: 47 | runs-on: ${{ matrix.platform.runner }} 48 | strategy: 49 | matrix: 50 | platform: 51 | - runner: macos-13 52 | target: x86_64 53 | - runner: macos-14 54 | target: aarch64 55 | steps: 56 | - uses: actions/checkout@v4 57 | - uses: actions/setup-python@v5 58 | with: 59 | python-version: 3.x 60 | - name: Build wheels 61 | uses: PyO3/maturin-action@v1 62 | with: 63 | target: ${{ matrix.platform.target }} 64 | args: --release --out dist --bindings pyo3 --features=pyo3/extension-module 65 | sccache: "true" 66 | working-directory: "bindings/python" 67 | - name: Upload wheels 68 | uses: actions/upload-artifact@v4 69 | with: 70 | name: wheels-macos-${{ matrix.platform.target }} 71 | path: bindings/python/dist 72 | 73 | release: 74 | name: Release 75 | environment: release 76 | runs-on: ubuntu-latest 77 | if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} 78 | needs: [windows, macos] 79 | permissions: 80 | # Use to sign the release artifacts 81 | id-token: write 82 | # Used to upload release artifacts 83 | contents: write 84 | # Used to generate artifact attestation 85 | attestations: write 86 | steps: 87 | - uses: actions/download-artifact@v4 88 | - name: Generate artifact attestation 89 | uses: actions/attest-build-provenance@v1 90 | with: 91 | subject-path: "wheels-*/*" 92 | - name: Publish to PyPI 93 | if: "startsWith(github.ref, 'refs/tags/')" 94 | uses: PyO3/maturin-action@v1 95 | with: 96 | command: upload 97 | args: --non-interactive --skip-existing wheels-*/* 98 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | /.idea 3 | Cargo.lock 4 | db_path 5 | guide/book 6 | __pycache__ 7 | 8 | .DS_Store 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Get Started 4 | 5 | This is a Rust project, so [rustup](https://rustup.rs/) is the best place to start. 6 | 7 | This is a pure rust project, so only `cargo` is needed. 8 | 9 | - `cargo check` to analyze the current package and report errors. 10 | - `cargo +nightly fmt` to format the current code. 11 | - `cargo build` to compile the current package. 12 | - `cargo clippy` to catch common mistakes and improve code. 13 | - `cargo test` to run unit tests. 14 | - `cargo bench` to run benchmark tests. -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | workspace = { members = ["parquet-lru", "tonbo_macros"] } 2 | 3 | [package] 4 | description = "An embedded persistent KV database in Rust." 5 | documentation = "https://docs.rs/tonbo" 6 | edition = "2021" 7 | license = "Apache-2.0" 8 | name = "tonbo" 9 | readme = "README.md" 10 | resolver = "2" 11 | version = "0.3.2" 12 | 13 | [package.metadata] 14 | msrv = "1.79.0" 15 | 16 | [features] 17 | aws = ["fusio-dispatch/aws", "fusio-log/aws", "fusio/aws"] 18 | bench = ["redb", "rocksdb", "sled"] 19 | bytes = ["dep:bytes"] 20 | datafusion = ["dep:async-trait", "dep:datafusion"] 21 | default = ["aws", "bytes", "tokio", "tokio-http"] 22 | load_tbl = [] 23 | object-store = ["fusio/object_store"] 24 | opfs = [ 25 | "dep:wasm-bindgen-futures", 26 | "fusio-dispatch/opfs", 27 | "fusio-log/web", 28 | "fusio-parquet/web", 29 | "fusio/opfs", 30 | ] 31 | redb = ["dep:redb"] 32 | rocksdb = ["dep:rocksdb"] 33 | sled = ["dep:sled"] 34 | sync = ["fusio/sync"] 35 | tokio = [ 36 | "fusio-dispatch/tokio", 37 | "fusio-log/tokio", 38 | "fusio-parquet/tokio", 39 | "fusio/tokio", 40 | "parquet/default", 41 | "tokio/fs", 42 | "tokio/rt-multi-thread", 43 | ] 44 | tokio-http = ["fusio/tokio-http", "fusio-log/tokio-http"] 45 | wasm = ["aws", "bytes", "opfs", "wasm-http"] 46 | wasm-http = ["fusio/wasm-http", "fusio-log/web-http"] 47 | 48 | [[example]] 49 | name = "declare" 50 | required-features = ["bytes", "tokio"] 51 | 52 | [[example]] 53 | name = "datafusion" 54 | required-features = ["datafusion"] 55 | 56 | [[bench]] 57 | harness = false 58 | name = "write_bench" 59 | required-features = ["bench"] 60 | 61 | [[bench]] 62 | harness = false 63 | name = "read_bench" 64 | required-features = ["bench"] 65 | 66 | [[bench]] 67 | harness = false 68 | name = "common" 69 | required-features = ["bench"] 70 | 71 | [[bench]] 72 | harness = false 73 | name = "writes" 74 | path = "benches/criterion/writes.rs" 75 | required-features = ["sled"] 76 | 77 | [dependencies] 78 | arrow = "55" 79 | async-lock = "3" 80 | async-stream = "0.3" 81 | async-trait = { version = "0.1", optional = true } 82 | bytes = { version = "1.7", optional = true } 83 | crc32fast = "1" 84 | crossbeam-skiplist = "0.1" 85 | datafusion = { version = "47", optional = true } 86 | flume = { version = "0.11", features = ["async"] } 87 | fusio = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio", features = [ 88 | "dyn", 89 | "fs", 90 | ] } 91 | fusio-dispatch = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio-dispatch" } 92 | fusio-log = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio-log", default-features = false, features = [ 93 | "bytes", 94 | ] } 95 | fusio-parquet = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio-parquet" } 96 | futures-core = "0.3" 97 | futures-util = "0.3" 98 | lockable = "0.1.1" 99 | once_cell = "1" 100 | parquet = { version = "55", default-features = false, features = [ 101 | "async", 102 | "base64", 103 | "brotli", 104 | "flate2", 105 | "lz4", 106 | "snap", 107 | ] } 108 | parquet-lru = { version = "0.3.0", path = "parquet-lru" } 109 | pin-project-lite = "0.2" 110 | thiserror = "2.0.3" 111 | tokio = { version = "1", features = ["io-util"], default-features = false } 112 | tonbo_macros = { version = "0.3.1", path = "tonbo_macros" } 113 | tracing = "0.1" 114 | ulid = { version = "1", features = ["serde"] } 115 | 116 | # Only used for benchmarks 117 | log = "0.4.22" 118 | redb = { version = "2", optional = true } 119 | rocksdb = { version = "0.23", optional = true } 120 | sled = { version = "0.34", optional = true } 121 | 122 | [target.'cfg(target_arch = "wasm32")'.dependencies] 123 | getrandom = { version = "0.3.1", features = ["wasm_js"] } 124 | wasm-bindgen = "0.2.95" 125 | wasm-bindgen-futures = { version = "0.4.45", optional = true } 126 | 127 | [dev-dependencies] 128 | bincode = "1" 129 | fastrand = "2" 130 | futures = { version = "0.3" } 131 | serde = "1" 132 | tempfile = "3" 133 | trybuild = "1.0" 134 | 135 | [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] 136 | comfy-table = "7" 137 | criterion = { version = "0.5", features = ["async_tokio", "html_reports"] } 138 | mimalloc = "0.1" 139 | tokio = { version = "1", features = ["full"] } 140 | 141 | [target.'cfg(target_arch = "wasm32")'.dev-dependencies] 142 | wasm-bindgen = "0.2.95" 143 | wasm-bindgen-test = "0.3.9" 144 | web-sys = { version = "0.3", features = ["console"] } 145 | 146 | [target.'cfg(unix)'.dev-dependencies] 147 | pprof = { version = "0.14", features = ["criterion", "flamegraph"] } 148 | 149 | [profile.release] 150 | codegen-units = 1 151 | lto = "thin" 152 | 153 | [profile.bench] 154 | debug = true 155 | inherits = "release" 156 | -------------------------------------------------------------------------------- /benches/criterion/writes.rs: -------------------------------------------------------------------------------- 1 | use std::{iter::repeat_with, sync::Arc}; 2 | 3 | use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; 4 | use mimalloc::MiMalloc; 5 | use tonbo::{executor::tokio::TokioExecutor, DbOption, Record, DB}; 6 | 7 | #[global_allocator] 8 | static GLOBAL: MiMalloc = MiMalloc; 9 | 10 | #[derive(Record, Debug)] 11 | pub struct KV { 12 | #[record(primary_key)] 13 | key: String, 14 | value: String, 15 | } 16 | 17 | #[inline(never)] 18 | async fn tonbo_write(db: &DB, batch_size: usize) { 19 | let mut kvs = Vec::with_capacity(128); 20 | for _ in 0..batch_size { 21 | let key = repeat_with(fastrand::alphanumeric).take(256).collect(); 22 | let value = repeat_with(fastrand::alphanumeric).take(256).collect(); 23 | let kv = KV { key, value }; 24 | kvs.push(kv); 25 | } 26 | 27 | db.insert_batch(kvs.into_iter()).await.unwrap(); 28 | } 29 | 30 | #[inline(never)] 31 | async fn sled_write(db: &sled::Db, batch_size: usize) { 32 | let mut kvs = Vec::with_capacity(128); 33 | for _ in 0..batch_size { 34 | let key: String = repeat_with(fastrand::alphanumeric).take(256).collect(); 35 | let value: String = repeat_with(fastrand::alphanumeric).take(256).collect(); 36 | kvs.push((key, value)); 37 | } 38 | 39 | for (key, value) in kvs { 40 | db.insert(&key, &*value).unwrap(); 41 | } 42 | } 43 | 44 | fn single_write(c: &mut Criterion) { 45 | let runtime = Arc::new( 46 | tokio::runtime::Builder::new_multi_thread() 47 | .worker_threads(8) 48 | .enable_all() 49 | .build() 50 | .unwrap(), 51 | ); 52 | 53 | let mut group = c.benchmark_group("write"); 54 | 55 | let batches = [1, 16, 128]; 56 | 57 | let _ = std::fs::remove_dir_all("/tmp/tonbo"); 58 | let _ = std::fs::create_dir_all("/tmp/tonbo"); 59 | 60 | for batch in batches { 61 | let option = DbOption::from(fusio::path::Path::from_filesystem_path("/tmp/tonbo").unwrap()) 62 | .disable_wal(); 63 | let db = runtime 64 | .block_on(async { DB::new(option, TokioExecutor::current(), KVSchema).await }) 65 | .unwrap(); 66 | 67 | group.bench_with_input(BenchmarkId::new("Tonbo", batch), &batch, |b, batch| { 68 | let r = runtime.clone(); 69 | b.to_async(&*r) 70 | .iter(|| async { tonbo_write(&db, *batch).await }); 71 | }); 72 | let _ = std::fs::remove_dir_all("/tmp/tonbo"); 73 | let _ = std::fs::create_dir_all("/tmp/tonbo"); 74 | } 75 | 76 | let _ = std::fs::remove_dir_all("/tmp/sled"); 77 | let _ = std::fs::create_dir_all("/tmp/sled"); 78 | 79 | for batch in batches { 80 | let sled = sled::open("/tmp/sled").unwrap(); 81 | group.bench_with_input(BenchmarkId::new("Sled", batch), &batch, |b, batch| { 82 | let r = runtime.clone(); 83 | b.to_async(&*r) 84 | .iter(|| async { sled_write(&sled, *batch).await }); 85 | }); 86 | let _ = std::fs::remove_dir_all("/tmp/sled"); 87 | let _ = std::fs::create_dir_all("/tmp/sled"); 88 | } 89 | 90 | group.finish(); 91 | } 92 | 93 | criterion_group!(benches, single_write); 94 | criterion_main!(benches); 95 | -------------------------------------------------------------------------------- /bindings/js/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2021" 3 | name = "tonbo-js" 4 | version = "0.1.0" 5 | 6 | [lib] 7 | crate-type = ["cdylib", "rlib"] 8 | 9 | [workspace] 10 | 11 | [dependencies] 12 | futures = { version = "0.3" } 13 | js-sys = { version = "0.3.72" } 14 | tonbo = { version = "0.3.2", path = "../../", default-features = false, features = [ 15 | "bytes", 16 | "wasm", 17 | ] } 18 | 19 | wasm-bindgen = "0.2.95" 20 | wasm-bindgen-futures = { version = "0.4.45" } 21 | wasm-streams = "0.4.2" 22 | 23 | [dev-dependencies] 24 | wasm-bindgen = "0.2.95" 25 | wasm-bindgen-futures = { version = "0.4.45" } 26 | wasm-bindgen-test = "0.3.9" 27 | wasm-streams = "0.4.2" 28 | web-sys = { version = "0.3", features = ["console"] } 29 | fusio = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.7", default-features = false, features = [ 30 | "dyn", 31 | "bytes", 32 | "opfs", 33 | ] } 34 | 35 | [package.metadata.wasm-pack.profile.release] 36 | wasm-opt = false 37 | -------------------------------------------------------------------------------- /bindings/js/examples/db/index.js: -------------------------------------------------------------------------------- 1 | import { userSchema } from "./schema"; 2 | import init, { TonboDB, DbOption } from "./pkg/tonbo_js"; 3 | 4 | async function main() { 5 | // Initialize the WASM module 6 | await init(); 7 | 8 | const option = new DbOption("store_dir"); 9 | const db = await new TonboDB(option, userSchema); 10 | 11 | await db.insert({ id: 0, name: "Alice", price: 123.45 }); 12 | 13 | const record = await db.get(0, (val) => val); 14 | console.log("Retrieved record:", record); 15 | 16 | await db.transaction(async (txn) => { 17 | txn.insert({ id: 1, name: "Bob" }); 18 | const record1 = await txn.get(1, ["id", "price"]); 19 | const record2 = await txn.get(0, ["id", "price"]); 20 | console.log(record1); 21 | console.log(record2); 22 | // can not read uncommitted change 23 | const uncommitted_name = await db.get(1, (val) => val.name); 24 | console.log("read uncommitted name: ", uncommitted_name); 25 | await txn.commit(); 26 | const name = await db.get(1, (val) => val.name); 27 | console.log("read committed name: ", name); 28 | }); 29 | } 30 | 31 | main().catch(console.error); 32 | -------------------------------------------------------------------------------- /bindings/js/examples/db/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "build": "wasm-pack build --target web", 4 | "serve": "cp -r ../../pkg ./ && webpack serve" 5 | }, 6 | "devDependencies": { 7 | "@wasm-tool/wasm-pack-plugin": "1.5.0", 8 | "html-webpack-plugin": "^5.6.0", 9 | "webpack": "^5.93.0", 10 | "webpack-cli": "^5.1.4", 11 | "webpack-dev-server": "^5.0.4" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /bindings/js/examples/db/schema.js: -------------------------------------------------------------------------------- 1 | export const userSchema = { 2 | id: { 3 | primary: true, 4 | type: "UInt8", 5 | nullable: false, 6 | }, 7 | name: { 8 | type: "String", 9 | nullable: true, 10 | }, 11 | price: { 12 | type: "Float64", 13 | nullable: true, 14 | }, 15 | }; 16 | -------------------------------------------------------------------------------- /bindings/js/examples/db/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const HtmlWebpackPlugin = require("html-webpack-plugin"); 3 | const webpack = require("webpack"); 4 | const WasmPackPlugin = require("@wasm-tool/wasm-pack-plugin"); 5 | 6 | module.exports = { 7 | entry: "./index.js", 8 | output: { 9 | path: path.resolve(__dirname, "dist"), 10 | filename: "index.js", 11 | }, 12 | mode: "development", 13 | plugins: [ 14 | new HtmlWebpackPlugin(), 15 | new WasmPackPlugin({ 16 | crateDirectory: path.resolve(__dirname, "."), 17 | }), 18 | ], 19 | experiments: { 20 | asyncWebAssembly: true, 21 | }, 22 | }; 23 | -------------------------------------------------------------------------------- /bindings/js/src/datatype.rs: -------------------------------------------------------------------------------- 1 | use wasm_bindgen::prelude::wasm_bindgen; 2 | 3 | #[wasm_bindgen] 4 | #[repr(u8)] 5 | #[derive(Copy, Clone, Debug)] 6 | pub enum DataType { 7 | UInt8 = 0, 8 | UInt16 = 1, 9 | UInt32 = 2, 10 | UInt64 = 3, 11 | Int8 = 4, 12 | Int16 = 5, 13 | Int32 = 6, 14 | Int64 = 7, 15 | String = 8, 16 | Boolean = 9, 17 | Bytes = 10, 18 | Float32 = 11, 19 | Float64 = 12, 20 | } 21 | 22 | impl From for tonbo::record::DataType { 23 | fn from(datatype: DataType) -> Self { 24 | match datatype { 25 | DataType::UInt8 => tonbo::record::DataType::UInt8, 26 | DataType::UInt16 => tonbo::record::DataType::UInt16, 27 | DataType::UInt32 => tonbo::record::DataType::UInt32, 28 | DataType::UInt64 => tonbo::record::DataType::UInt64, 29 | DataType::Int8 => tonbo::record::DataType::Int8, 30 | DataType::Int16 => tonbo::record::DataType::Int16, 31 | DataType::Int32 => tonbo::record::DataType::Int32, 32 | DataType::Int64 => tonbo::record::DataType::Int64, 33 | DataType::String => tonbo::record::DataType::String, 34 | DataType::Boolean => tonbo::record::DataType::Boolean, 35 | DataType::Float32 => tonbo::record::DataType::Float32, 36 | DataType::Float64 => tonbo::record::DataType::Float64, 37 | _ => todo!(), 38 | } 39 | } 40 | } 41 | 42 | pub(crate) fn to_datatype(datatype: &str) -> tonbo::record::DataType { 43 | match datatype { 44 | "UInt8" => tonbo::record::DataType::UInt8, 45 | "UInt16" => tonbo::record::DataType::UInt16, 46 | "UInt32" => tonbo::record::DataType::UInt32, 47 | "UInt64" => tonbo::record::DataType::UInt64, 48 | "Int8" => tonbo::record::DataType::Int8, 49 | "Int16" => tonbo::record::DataType::Int16, 50 | "Int32" => tonbo::record::DataType::Int32, 51 | "Int64" => tonbo::record::DataType::Int64, 52 | "String" => tonbo::record::DataType::String, 53 | "Boolean" => tonbo::record::DataType::Boolean, 54 | "Float32" => tonbo::record::DataType::Float32, 55 | "Float64" => tonbo::record::DataType::Float64, 56 | _ => todo!(), 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /bindings/js/src/fs.rs: -------------------------------------------------------------------------------- 1 | use tonbo::option::Path; 2 | use wasm_bindgen::{prelude::wasm_bindgen, JsValue}; 3 | 4 | #[wasm_bindgen] 5 | #[derive(Debug, Clone)] 6 | pub struct AwsCredential { 7 | #[wasm_bindgen(skip)] 8 | pub key_id: String, 9 | #[wasm_bindgen(skip)] 10 | pub secret_key: String, 11 | #[wasm_bindgen(skip)] 12 | pub token: Option, 13 | } 14 | 15 | impl From for tonbo::option::AwsCredential { 16 | fn from(cred: AwsCredential) -> Self { 17 | tonbo::option::AwsCredential { 18 | key_id: cred.key_id, 19 | secret_key: cred.secret_key, 20 | token: cred.token, 21 | } 22 | } 23 | } 24 | 25 | #[wasm_bindgen] 26 | impl AwsCredential { 27 | #[wasm_bindgen(constructor)] 28 | pub fn new(key_id: String, secret_key: String, token: Option) -> Self { 29 | Self { 30 | key_id, 31 | secret_key, 32 | token, 33 | } 34 | } 35 | } 36 | 37 | #[wasm_bindgen] 38 | #[derive(Debug, Clone)] 39 | pub struct FsOptions { 40 | inner: FsOptionsInner, 41 | } 42 | 43 | impl FsOptions { 44 | pub(crate) fn path(&self, path: String) -> Result { 45 | match self.inner { 46 | FsOptionsInner::Local => { 47 | Path::from_opfs_path(&path).map_err(|err| JsValue::from(err.to_string())) 48 | } 49 | FsOptionsInner::S3 { .. } => { 50 | Path::from_url_path(&path).map_err(|err| JsValue::from(err.to_string())) 51 | } 52 | } 53 | } 54 | } 55 | 56 | #[derive(Debug, Clone)] 57 | enum FsOptionsInner { 58 | Local, 59 | S3 { 60 | bucket: String, 61 | credential: Option, 62 | region: Option, 63 | sign_payload: Option, 64 | checksum: Option, 65 | endpoint: Option, 66 | }, 67 | } 68 | 69 | #[wasm_bindgen] 70 | pub struct S3Builder { 71 | bucket: String, 72 | credential: Option, 73 | region: Option, 74 | sign_payload: Option, 75 | checksum: Option, 76 | endpoint: Option, 77 | } 78 | 79 | #[wasm_bindgen] 80 | impl S3Builder { 81 | #[wasm_bindgen(constructor)] 82 | pub fn new(bucket: String) -> Self { 83 | Self { 84 | bucket, 85 | credential: None, 86 | region: None, 87 | sign_payload: None, 88 | checksum: None, 89 | endpoint: None, 90 | } 91 | } 92 | 93 | pub fn credential(self, credential: AwsCredential) -> Self { 94 | Self { 95 | credential: Some(credential), 96 | ..self 97 | } 98 | } 99 | 100 | pub fn region(self, region: String) -> Self { 101 | Self { 102 | region: Some(region), 103 | ..self 104 | } 105 | } 106 | 107 | pub fn sign_payload(self, sign_payload: bool) -> Self { 108 | Self { 109 | sign_payload: Some(sign_payload), 110 | ..self 111 | } 112 | } 113 | 114 | pub fn checksum(self, checksum: bool) -> Self { 115 | Self { 116 | checksum: Some(checksum), 117 | ..self 118 | } 119 | } 120 | 121 | pub fn endpoint(self, endpoint: String) -> Self { 122 | Self { 123 | endpoint: Some(endpoint), 124 | ..self 125 | } 126 | } 127 | 128 | pub fn build(self) -> FsOptions { 129 | let S3Builder { 130 | bucket, 131 | credential, 132 | region, 133 | sign_payload, 134 | checksum, 135 | endpoint, 136 | } = self; 137 | 138 | FsOptions { 139 | inner: FsOptionsInner::S3 { 140 | bucket, 141 | credential, 142 | region, 143 | sign_payload, 144 | checksum, 145 | endpoint, 146 | }, 147 | } 148 | } 149 | } 150 | 151 | #[wasm_bindgen] 152 | impl FsOptions { 153 | pub fn local() -> Self { 154 | Self { 155 | inner: FsOptionsInner::Local, 156 | } 157 | } 158 | } 159 | 160 | impl FsOptions { 161 | pub(crate) fn into_fs_options(self) -> tonbo::option::FsOptions { 162 | match self.inner { 163 | FsOptionsInner::Local => tonbo::option::FsOptions::Local, 164 | FsOptionsInner::S3 { 165 | bucket, 166 | credential, 167 | region, 168 | sign_payload, 169 | checksum, 170 | endpoint, 171 | } => tonbo::option::FsOptions::S3 { 172 | bucket, 173 | credential: credential.map(tonbo::option::AwsCredential::from), 174 | endpoint, 175 | region, 176 | sign_payload, 177 | checksum, 178 | }, 179 | } 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /bindings/js/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod datatype; 2 | pub mod db; 3 | pub mod options; 4 | pub mod transaction; 5 | mod utils; 6 | pub use db::*; 7 | pub use transaction::*; 8 | pub mod range; 9 | pub use range::*; 10 | pub mod fs; 11 | pub use fs::*; 12 | -------------------------------------------------------------------------------- /bindings/js/src/options.rs: -------------------------------------------------------------------------------- 1 | use tonbo::{option::Path, record::Schema}; 2 | use wasm_bindgen::{prelude::wasm_bindgen, JsValue}; 3 | 4 | use crate::FsOptions; 5 | 6 | pub(crate) const MAX_LEVEL: usize = 7; 7 | 8 | #[wasm_bindgen] 9 | #[derive(Debug, Clone)] 10 | pub struct DbOption { 11 | /// cached message size in parquet cleaner 12 | clean_channel_buffer: usize, 13 | /// len threshold of `immutables` when minor compaction is triggered 14 | immutable_chunk_num: usize, 15 | /// magnification that triggers major compaction between different levels 16 | level_sst_magnification: usize, 17 | major_default_oldest_table_num: usize, 18 | /// threshold for the number of `parquet` when major compaction is triggered 19 | major_threshold_with_sst_size: usize, 20 | /// Maximum size of each parquet 21 | max_sst_file_size: usize, 22 | version_log_snapshot_threshold: u32, 23 | use_wal: bool, 24 | /// Maximum size of WAL buffer size 25 | wal_buffer_size: usize, 26 | /// build the `DB` storage directory based on the passed path 27 | path: String, 28 | base_fs: FsOptions, 29 | level_paths: Vec>, 30 | } 31 | 32 | #[wasm_bindgen] 33 | impl DbOption { 34 | #[wasm_bindgen(constructor)] 35 | pub fn new(path: String) -> Result { 36 | let path = Path::from_opfs_path(path) 37 | .map_err(|err| JsValue::from(err.to_string()))? 38 | .to_string(); 39 | Ok(Self { 40 | clean_channel_buffer: 10, 41 | immutable_chunk_num: 3, 42 | level_sst_magnification: 10, 43 | major_default_oldest_table_num: 3, 44 | major_threshold_with_sst_size: 4, 45 | max_sst_file_size: 256 * 1024 * 1024, 46 | version_log_snapshot_threshold: 200, 47 | use_wal: true, 48 | wal_buffer_size: 4 * 1024, 49 | path, 50 | base_fs: FsOptions::local(), 51 | level_paths: vec![None; MAX_LEVEL], 52 | }) 53 | } 54 | 55 | pub fn level_path( 56 | mut self, 57 | level: usize, 58 | path: String, 59 | fs_options: FsOptions, 60 | ) -> Result { 61 | self.level_paths[level] = Some((path.to_string(), fs_options)); 62 | Ok(self) 63 | } 64 | } 65 | 66 | impl DbOption { 67 | pub(crate) fn into_option(self, schema: &S) -> tonbo::DbOption { 68 | let mut opt = tonbo::DbOption::new(Path::from(self.path), schema) 69 | .clean_channel_buffer(self.clean_channel_buffer) 70 | .immutable_chunk_num(self.immutable_chunk_num) 71 | .level_sst_magnification(self.level_sst_magnification) 72 | .major_default_oldest_table_num(self.major_default_oldest_table_num) 73 | .major_threshold_with_sst_size(self.major_threshold_with_sst_size) 74 | .max_sst_file_size(self.max_sst_file_size) 75 | .version_log_snapshot_threshold(self.version_log_snapshot_threshold) 76 | .wal_buffer_size(self.wal_buffer_size) 77 | .base_fs(self.base_fs.into_fs_options()); 78 | 79 | for (level, path) in self.level_paths.into_iter().enumerate() { 80 | if let Some((path, fs_options)) = path { 81 | let path = fs_options.path(path).unwrap(); 82 | opt = opt 83 | .level_path(level, path, fs_options.into_fs_options()) 84 | .unwrap(); 85 | } 86 | } 87 | if !self.use_wal { 88 | opt = opt.disable_wal() 89 | } 90 | opt 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /bindings/js/src/range.rs: -------------------------------------------------------------------------------- 1 | use tonbo::record::{Value, ValueDesc}; 2 | use wasm_bindgen::{prelude::wasm_bindgen, JsValue}; 3 | 4 | use crate::utils::parse_key; 5 | 6 | #[wasm_bindgen] 7 | pub struct Bound { 8 | inner: BoundInner, 9 | } 10 | 11 | enum BoundInner { 12 | Included(JsValue), 13 | Exculuded(JsValue), 14 | Unbounded, 15 | } 16 | 17 | #[wasm_bindgen] 18 | impl Bound { 19 | /// represent including bound of range, null or undefined are identical to [`Bound::unbounded`] 20 | pub fn included(key: JsValue) -> Self { 21 | if key.is_null() || key.is_undefined() { 22 | return Self { 23 | inner: BoundInner::Unbounded, 24 | }; 25 | } 26 | Self { 27 | inner: BoundInner::Included(key), 28 | } 29 | } 30 | 31 | /// represent exclusive bound of range, null or undefined are identical to [`Bound::unbounded`] 32 | pub fn excluded(key: JsValue) -> Self { 33 | if key.is_null() || key.is_undefined() { 34 | return Self { 35 | inner: BoundInner::Unbounded, 36 | }; 37 | } 38 | Self { 39 | inner: BoundInner::Exculuded(key), 40 | } 41 | } 42 | 43 | pub fn unbounded() -> Self { 44 | Self { 45 | inner: BoundInner::Unbounded, 46 | } 47 | } 48 | } 49 | 50 | impl Bound { 51 | pub(crate) fn into_bound(self, desc: &ValueDesc) -> Result, JsValue> { 52 | Ok(match self.inner { 53 | BoundInner::Included(key) => std::ops::Bound::Included(parse_key(desc, key, true)?), 54 | BoundInner::Exculuded(key) => std::ops::Bound::Excluded(parse_key(desc, key, true)?), 55 | BoundInner::Unbounded => std::ops::Bound::Unbounded, 56 | }) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /bindings/python/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2021" 3 | name = "tonbo-python" 4 | version = "0.2.0" 5 | license = "Apache-2.0" 6 | repository = "https://github.com/tonbo-io/tonbo" 7 | readme = "README.md" 8 | 9 | [lib] 10 | crate-type = ["cdylib"] 11 | 12 | [workspace] 13 | 14 | [dependencies] 15 | futures = { version = "0.3" } 16 | pyo3 = { version = "0.25", features = [ 17 | "abi3", 18 | "abi3-py310", 19 | "extension-module", 20 | ] } 21 | pyo3-async-runtimes = { version = "0.25", features = [ 22 | "attributes", 23 | "tokio-runtime", 24 | ] } 25 | tokio = { version = "1", features = ["rt-multi-thread"] } 26 | tonbo = { version = "0.3.2", path = "../../" } 27 | -------------------------------------------------------------------------------- /bindings/python/README.md: -------------------------------------------------------------------------------- 1 | # Tonbo Python Binding 2 | 3 | This package intends to build a native python binding for [Tonbo](https://github.com/tonbo-io/tonbo). 4 | 5 | Tonbo's Python bindings can be used to build data-intensive applications, including other types of databases. 6 | 7 | ## Installation 8 | 9 | ```sh 10 | pip install tonbo 11 | ``` 12 | 13 | ## Example 14 | 15 | ```py 16 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound 17 | from tonbo.fs import from_filesystem_path 18 | import asyncio 19 | import os 20 | 21 | # define a Tonbo record 22 | @Record 23 | class User: 24 | age = Column(DataType.Int8, name="age", primary_key=True) 25 | height = Column(DataType.Int16, name="height", nullable=True) 26 | weight = Column(DataType.Int8, name="weight", nullable=False) 27 | 28 | async def main(): 29 | 30 | if not os.path.exists("db_path/users"): 31 | os.makedirs("db_path/users") 32 | db = TonboDB(DbOption(from_filesystem_path("db_path/users")), User()) 33 | 34 | await db.insert(User(age=18, height=175, weight=60)) 35 | record = await db.get(18) 36 | assert record == {"age": 18, "height": 175, "weight": 60} 37 | 38 | txn = await db.transaction() 39 | txn.insert(User(age=19, height=195, weight=75)) 40 | result = await txn.get(19) 41 | assert result == {"age": 19, "height": 195, "weight": 75} 42 | 43 | # commit transaction 44 | await txn.commit() 45 | 46 | txn = await db.transaction() 47 | # range scan, supports pushing down and limit 48 | scan = await txn.scan( 49 | Bound.Excluded(18), None, limit=100, projection=["age", "weight"] 50 | ) 51 | async for record in scan: 52 | print(record) 53 | 54 | asyncio.run(main()) 55 | ``` 56 | 57 | See [examples](example/README.md) for more information. 58 | 59 | ### Roadmap 60 | 61 | - Remote storage API mapping and test 62 | - Integrate with other Arrow analytical tools 63 | 64 | ## Development 65 | 66 | This assumes that you have Rust and cargo installed. We use the [pyo3](https://github.com/PyO3/pyo3) to generate a native Python module and use [maturin](https://github.com/PyO3/maturin) to build Rust-based Python packages. 67 | 68 | First, follow the commands below to build a new Python virtualenv, and install maturin into the virtualenv using Python's package manager, pip: 69 | 70 | ```bash 71 | # setup virtualenv 72 | python -m venv .env 73 | # activate venv 74 | source .env/bin/activate 75 | 76 | # install maturin 77 | pip install maturin 78 | # build bindings 79 | maturin develop 80 | ``` 81 | 82 | Whenever Rust code changes run: 83 | 84 | ```bash 85 | maturin develop 86 | ``` 87 | 88 | Run tests: 89 | 90 | ```bash 91 | maturin develop -E test 92 | python -m pytest 93 | ``` 94 | -------------------------------------------------------------------------------- /bindings/python/example/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | 3 | Here are some examples that can help you use Python binding of [Tonbo](https://github.com/tonbo-io/tonbo) 4 | 5 | ### Declaration 6 | 7 | - The [record.py](record.py) file shows how to define a `Tonbo` record 8 | - The [write_batch.py](write_batch.py) file shows how to insert records in batch 9 | - The [transaction.py](transaction.py) file shows how to use transaction 10 | - The [declare.py](declare.py) file shows how to use `Tonbo` for simple data operations. 11 | - The [multiple_instance.py](multiple_instance.py) file shows how to combine different `Tonbo` record with `TonboDB`s. 12 | 13 | ### Fusion storage 14 | 15 | The [fusion_storage.py](fusion_storage.py) file shows how to store data across different storage. 16 | -------------------------------------------------------------------------------- /bindings/python/example/declare.py: -------------------------------------------------------------------------------- 1 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound 2 | import asyncio 3 | import tempfile 4 | 5 | @Record 6 | class User: 7 | id = Column(DataType.Int64, name="id", primary_key=True) 8 | age = Column(DataType.Int16, name="age", nullable=True) 9 | name = Column(DataType.String, name="name", nullable=False) 10 | email = Column(DataType.String, name="email", nullable=True) 11 | data = Column(DataType.Bytes, name="data", nullable=True) 12 | grade = Column(DataType.Float, name="grade", nullable=True) 13 | 14 | 15 | async def main(): 16 | temp_dir = tempfile.TemporaryDirectory() 17 | 18 | db = TonboDB(DbOption(temp_dir.name), User()) 19 | await db.insert(User(id=18, age=175, name="Alice", grade = 1.23)) 20 | 21 | record = await db.get(18) 22 | assert record == { 23 | "id": 18, 24 | "age": 175, 25 | "name": "Alice", 26 | "email": None, 27 | "data": None, 28 | "grade": 1.23, 29 | } 30 | 31 | txn = await db.transaction() 32 | result = await txn.get(18) 33 | assert result == { 34 | "id": 18, 35 | "age": 175, 36 | "name": "Alice", 37 | "email": None, 38 | "data": None, 39 | "grade": 1.23, 40 | } 41 | 42 | txn.insert( 43 | User( 44 | id=19, 45 | age=195, 46 | name="Bob", 47 | data=b"Hello Tonbo!", 48 | email="contact@tonbo.io", 49 | grade = 2.23, 50 | ) 51 | ) 52 | result = await txn.get(19) 53 | assert result == { 54 | "id": 19, 55 | "age": 195, 56 | "name": "Bob", 57 | "email": "contact@tonbo.io", 58 | "data": b"Hello Tonbo!", 59 | "grade": 2.23, 60 | } 61 | 62 | await txn.commit() 63 | txn = await db.transaction() 64 | scan = await txn.scan( 65 | Bound.Excluded(18), 66 | None, 67 | limit=100, 68 | projection=["id", "email", "data", "grade"], 69 | ) 70 | async for record in scan: 71 | assert record["age"] is None 72 | print(record) 73 | await txn.commit() 74 | 75 | 76 | asyncio.run(main()) 77 | -------------------------------------------------------------------------------- /bindings/python/example/fusion_storage.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from tonbo import DbOption, Column, DataType, Record, TonboDB 5 | from tonbo.fs import from_filesystem_path, FsOptions 6 | 7 | 8 | @Record 9 | class User: 10 | id = Column(DataType.Int64, name="id", primary_key=True) 11 | name = Column(DataType.String, name="name") 12 | email = Column(DataType.String, name="email", nullable=True) 13 | age = Column(DataType.UInt8, name="age") 14 | data = Column(DataType.Bytes, name="data") 15 | 16 | 17 | async def main(): 18 | if not os.path.exists("db_path/user/l0"): 19 | os.makedirs("db_path/user/l0") 20 | if not os.path.exists("db_path/user/l1"): 21 | os.makedirs("db_path/user/l1") 22 | 23 | option = DbOption(from_filesystem_path("db_path/user")) 24 | option.level_path(0, from_filesystem_path("db_path/user/l0"), FsOptions.Local()) 25 | option.level_path(1, from_filesystem_path("db_path/user/l1"), FsOptions.Local()) 26 | 27 | option.immutable_chunk_num = 1 28 | option.major_threshold_with_sst_size = 3 29 | option.level_sst_magnification = 1 30 | option.max_sst_file_size = 1 * 1024 31 | 32 | db = TonboDB(option, User()) 33 | for i in range(0, 1000): 34 | if i % 50 == 0: 35 | await db.flush() 36 | await db.insert( 37 | User( 38 | id=i, 39 | age=i % 128, 40 | name=str(i * 10), 41 | email=str(i * 20), 42 | data=b"Hello Tonbo!", 43 | ) 44 | ) 45 | 46 | for i in range(0, 1000): 47 | user = await db.get(i) 48 | assert user == { 49 | "id": i, 50 | "name": str(i * 10), 51 | "email": str(i * 20), 52 | "age": i % 128, 53 | "data": b"Hello Tonbo!", 54 | } 55 | 56 | 57 | asyncio.run(main()) 58 | -------------------------------------------------------------------------------- /bindings/python/example/multiple_instance.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import tempfile 3 | 4 | from tonbo import Column, DataType, TonboDB, RecordBatch, DbOption, Record 5 | from tonbo.fs import from_filesystem_path 6 | 7 | 8 | @Record 9 | class User: 10 | id = Column(DataType.UInt64, name="id", primary_key=True) 11 | age = Column(datatype=DataType.UInt8, name="age", nullable=True) 12 | name = Column(DataType.String, name="name") 13 | email = Column(DataType.String, name="email") 14 | 15 | 16 | @Record 17 | class Pet: 18 | id = Column(DataType.UInt64, name="id", primary_key=True) 19 | type = Column(DataType.String, name="type") 20 | name = Column(DataType.String, name="name") 21 | master = Column(datatype=DataType.UInt64, name="master", nullable=True) 22 | 23 | 24 | @Record 25 | class FoundRecord: 26 | user_id = Column(DataType.UInt64, name="user_id", primary_key=True) 27 | pet_id = Column(DataType.UInt64, name="pet_id") 28 | email = Column(DataType.String, name="email") 29 | status = Column(DataType.Boolean, name="status") 30 | 31 | 32 | async def main(): 33 | user_temp_dir = tempfile.TemporaryDirectory() 34 | pet_temp_dir = tempfile.TemporaryDirectory() 35 | found_temp_dir = tempfile.TemporaryDirectory() 36 | user_db = TonboDB(DbOption(from_filesystem_path(user_temp_dir.name)), User()) 37 | course_db = TonboDB(DbOption(from_filesystem_path(pet_temp_dir.name)), Pet()) 38 | found_db = TonboDB(DbOption(from_filesystem_path(found_temp_dir.name)), FoundRecord()) 39 | 40 | user_txn = await user_db.transaction() 41 | for i in range(0, 100): 42 | user_txn.insert(User(id=i, age=i, name=str(i * 20), email=str(i * 100))) 43 | user_txn.commit() 44 | 45 | pet_txn = await course_db.transaction() 46 | pet_txn.insert(Pet(id=1, type="dog", name="Hachi", master=10)) 47 | pet_txn.insert(Pet(id=2, type="cat", name="Kitty", master=21)) 48 | pet_txn.insert(Pet(id=3, type="dragonfly", name="Tonbo", master=11)) 49 | 50 | await pet_txn.commit() 51 | 52 | user_txn = await user_db.transaction() 53 | found_txn = await found_db.transaction() 54 | user_scan = await user_txn.scan(lower=None, high=None, projection=["id", "name", "email"]) 55 | 56 | async for user in user_scan: 57 | pet_txn = await course_db.transaction() 58 | pet_scan = await pet_txn.scan(lower=None, high=None, projection=["master", "name"]) 59 | async for pet in pet_scan: 60 | if user["id"] == pet["master"]: 61 | found_txn.insert(FoundRecord(user_id=user["id"], pet_id=pet["id"], email=user["email"], status=False)) 62 | 63 | await found_txn.commit() 64 | 65 | found_txn = await found_db.transaction() 66 | found_scan = await found_txn.scan(lower=None, high=None) 67 | async for found in found_scan: 68 | print(found) 69 | 70 | 71 | asyncio.run(main()) 72 | -------------------------------------------------------------------------------- /bindings/python/example/record.py: -------------------------------------------------------------------------------- 1 | from tonbo import Record, Column, DataType 2 | 3 | 4 | @Record 5 | class User: 6 | id = Column(DataType.Int64, name="id", primary_key=True) 7 | age = Column(datatype=DataType.UInt8, name="age", nullable=True) 8 | # nullable is `False` by default 9 | name = Column(DataType.String, name="name") 10 | height = Column(DataType.Int16, name="height") 11 | enabled = Column(DataType.Boolean, name="enabled") 12 | email = Column(DataType.String, name="email", nullable=True) 13 | data = Column(DataType.Bytes, name="data", nullable=True) 14 | 15 | -------------------------------------------------------------------------------- /bindings/python/example/transaction.py: -------------------------------------------------------------------------------- 1 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound 2 | import asyncio 3 | import tempfile 4 | 5 | 6 | @Record 7 | class User: 8 | id = Column(DataType.Int64, name="id", primary_key=True) 9 | height = Column(DataType.Int16, name="height", nullable=True) 10 | name = Column(DataType.String, name="name", nullable=False) 11 | email = Column(DataType.String, name="email", nullable=True) 12 | data = Column(DataType.Bytes, name="data", nullable=True) 13 | 14 | 15 | async def main(): 16 | temp_dir = tempfile.TemporaryDirectory() 17 | 18 | db = TonboDB(DbOption(temp_dir.name), User()) 19 | # create a new transaction 20 | txn = await db.transaction() 21 | 22 | # insert with class 23 | txn.insert( 24 | User( 25 | id=19, 26 | height=195, 27 | name="Bob", 28 | data=b"Hello Tonbo!", 29 | email="contact@tonbo.io", 30 | ) 31 | ) 32 | result = await txn.get(19) 33 | assert result == { 34 | "id": 19, 35 | "height": 195, 36 | "name": "Bob", 37 | "email": "contact@tonbo.io", 38 | "data": b"Hello Tonbo!", 39 | } 40 | 41 | # commit a transaction 42 | await txn.commit() 43 | 44 | txn = await db.transaction() 45 | # support push down limit, filter and projection 46 | scan = await txn.scan( 47 | Bound.Excluded(18), 48 | None, 49 | limit=100, 50 | projection=["id", "email", "data"], 51 | ) 52 | async for record in scan: 53 | assert record["height"] is None 54 | print(record) 55 | await txn.commit() 56 | 57 | 58 | asyncio.run(main()) 59 | -------------------------------------------------------------------------------- /bindings/python/example/write_batch.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import tempfile 3 | 4 | from tonbo import Column, DataType, TonboDB, RecordBatch, DbOption, Record 5 | from tonbo.fs import from_filesystem_path 6 | 7 | 8 | @Record 9 | class User: 10 | id = Column(DataType.Int64, name="id", primary_key=True) 11 | age = Column(datatype=DataType.UInt8, name="age", nullable=True) 12 | name = Column(DataType.String, name="name") 13 | email = Column(DataType.String, name="email", nullable=True) 14 | data = Column(DataType.Bytes, name="data", nullable=True) 15 | 16 | 17 | async def main(): 18 | temp_dir = tempfile.TemporaryDirectory() 19 | db = TonboDB(DbOption(from_filesystem_path(temp_dir.name)), User()) 20 | batch = RecordBatch() 21 | for i in range(0, 100): 22 | # you must add record to `RecordBatch` one by one 23 | batch.append(User(id= i, age=i, name=str(i * 20))) 24 | 25 | await db.insert_batch(batch) 26 | 27 | for i in range(0, 100): 28 | user = await db.get(i) 29 | assert user == { "id": i, "age": i, "name": str(i * 20), "email": None, "data": None } 30 | 31 | 32 | asyncio.run(main()) 33 | -------------------------------------------------------------------------------- /bindings/python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=1.7,<2.0"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "tonbo" 7 | readme = "README.md" 8 | requires-python = ">=3.10" 9 | classifiers = [ 10 | "Programming Language :: Rust", 11 | "Programming Language :: Python :: Implementation :: CPython", 12 | "Programming Language :: Python :: Implementation :: PyPy", 13 | ] 14 | description = "Tonbo Python Binding" 15 | license = { text = "Apache-2.0" } 16 | dynamic = ["version"] 17 | 18 | [project.optional-dependencies] 19 | test = ["pytest", "pytest-asyncio"] 20 | bench = ["pytest", "pytest-asyncio", "pytest-benchmark", "duckdb"] 21 | docs = ["pdoc"] 22 | 23 | [tool.maturin] 24 | features = ["pyo3/extension-module"] 25 | 26 | module-name = "tonbo._tonbo" 27 | python-source = "python" 28 | strip = true 29 | -------------------------------------------------------------------------------- /bindings/python/python/tonbo/__init__.py: -------------------------------------------------------------------------------- 1 | from ._tonbo import * 2 | 3 | __doc__ = _tonbo.__doc__ 4 | __all__ = _tonbo.__all__ 5 | -------------------------------------------------------------------------------- /bindings/python/python/tonbo/__init__.pyi: -------------------------------------------------------------------------------- 1 | from typing import Any, AsyncIterable, final 2 | from enum import Enum, auto 3 | from tonbo import error as error 4 | from tonbo.fs import FsOptions 5 | 6 | @final 7 | class Record: 8 | def __call__(self) -> None: ... 9 | 10 | @final 11 | class Bound(Enum): 12 | """Tonbo range for scan. None for unbounded""" 13 | 14 | @staticmethod 15 | def Included(key: Any) -> Bound: ... 16 | @staticmethod 17 | def Excluded(key: Any) -> Bound: ... 18 | 19 | @final 20 | class DataType(Enum): 21 | """Tonbo data type.""" 22 | 23 | UInt8 = auto() 24 | UInt16 = auto() 25 | UInt32 = auto() 26 | UInt64 = auto() 27 | Int8 = auto() 28 | Int16 = auto() 29 | Int32 = auto() 30 | Int64 = auto() 31 | String = auto() 32 | Boolean = auto() 33 | Bytes = auto() 34 | Float = auto() 35 | 36 | @final 37 | class Column: 38 | """Column represents properties of a field.""" 39 | 40 | name: str 41 | datatype: DataType 42 | nullable: bool 43 | primary_key: bool 44 | def __init__( 45 | self, 46 | datatype: DataType, 47 | name: str, 48 | nullable: bool = False, 49 | primary_key: bool = False, 50 | ) -> None: ... 51 | 52 | @final 53 | class RecordBatch: 54 | """Column represents properties of a field.""" 55 | def __init__(self) -> None: ... 56 | def append(self, record: object) -> None: ... 57 | 58 | class DbOption: 59 | """Tonbo configurations.""" 60 | 61 | clean_channel_buffer: int 62 | immutable_chunk_num: int 63 | level_sst_magnification: int 64 | major_default_oldest_table_num: int 65 | major_threshold_with_sst_size: int 66 | max_sst_file_size: int 67 | version_log_snapshot_threshold: int 68 | use_wal: bool 69 | wal_buffer_size: int 70 | path: str 71 | 72 | def __init__(self, path: str) -> None: 73 | """Create a new :py:class:`DbOption` with the given path. Note: the path must exist""" 74 | ... 75 | 76 | def level_path(self, level: int, path: str, fs_options: FsOptions) -> None: 77 | """Set path for assigned level 78 | 79 | Args: 80 | level: Level for output. 81 | path: Path for output 82 | fs_options: Local or S3 83 | """ 84 | ... 85 | 86 | @final 87 | class Transaction: 88 | """Tonbo transaction.""" 89 | 90 | async def get( 91 | self, key: Any, projection: list[str] = ["*"] 92 | ) -> dict[str, Any] | None: 93 | """Get record from db. 94 | 95 | Args: 96 | key: Primary key of record. 97 | projection: fields to projection 98 | """ 99 | ... 100 | def insert(self, record: object) -> None: 101 | """Insert record to db.""" 102 | ... 103 | def remove(self, key: Any) -> None: 104 | """Remove record from db. 105 | Args: 106 | key: Primary key of record. 107 | """ 108 | ... 109 | async def scan( 110 | self, 111 | lower: Bound | None, 112 | high: Bound | None, 113 | limit: int | None, 114 | projection: list[str] = ["*"], 115 | ) -> AsyncIterable[dict[str, Any]]: 116 | """Create an async stream for scanning. 117 | 118 | Args: 119 | lower: Lower bound of range. Use None represent unbounded. 120 | high: High bound of range. Use None represent unbounded. 121 | limit: max number records to scan 122 | projection: fields to projection 123 | """ 124 | ... 125 | async def commit(self) -> None: 126 | """Commit :py:class:`Transaction`.""" 127 | ... 128 | 129 | @final 130 | class TonboDB: 131 | def __init__(self, option: DbOption, schema: Any) -> None: 132 | """Create a new :py:class:`TonboDB` with the given configuration options. 133 | 134 | Args: 135 | option: Configuration options. 136 | schema: Schema of record. 137 | """ 138 | ... 139 | async def get(self, key: Any) -> dict[str, Any] | None: 140 | """Get record from db. 141 | 142 | Args: 143 | key: Primary key of record. 144 | """ 145 | ... 146 | async def insert(self, record: object) -> None: 147 | """Insert a record to db.""" 148 | ... 149 | async def insert_batch(self, record_batch: RecordBatch) -> None: 150 | """Insert :py:class:`RecordBatch` to db.""" 151 | ... 152 | async def remove(self, key: Any) -> None: 153 | """Remove record from db. 154 | 155 | Args: 156 | key: Primary key of record. 157 | """ 158 | ... 159 | async def transaction(self) -> Transaction: 160 | """Create a new :py:class:`Transaction`.""" 161 | ... 162 | async def flush(self) -> None: 163 | """Try to execute compaction.""" 164 | ... 165 | async def flush_wal(self) -> None: 166 | """Flush wal manually.""" 167 | ... 168 | -------------------------------------------------------------------------------- /bindings/python/python/tonbo/error.pyi: -------------------------------------------------------------------------------- 1 | class CommitError(Exception): 2 | """Commit error""" 3 | pass 4 | 5 | class DbError(Exception): 6 | """DB error""" 7 | pass 8 | 9 | class DecodeError(Exception): 10 | """Decode error""" 11 | pass 12 | 13 | class RecoverError(Exception): 14 | """Recover error""" 15 | pass 16 | 17 | class ExceedsMaxLevelError(Exception): 18 | """Exceeds max level""" 19 | pass 20 | 21 | class WriteConflictError(Exception): 22 | """Write conflict""" 23 | pass 24 | 25 | class RepeatedCommitError(Exception): 26 | """Repeated commit the same transaction""" 27 | pass 28 | 29 | class PathParseError(Exception): 30 | """Parse path error""" 31 | pass -------------------------------------------------------------------------------- /bindings/python/python/tonbo/fs.pyi: -------------------------------------------------------------------------------- 1 | from typing import final 2 | 3 | @final 4 | class AwsCredential: 5 | key_id: str 6 | secret_key: str 7 | token: str | None 8 | 9 | def __init__( 10 | self, key_id: str, secret_key: str, token: str | None = None 11 | ) -> None: ... 12 | 13 | @final 14 | class FsOptions: 15 | class Local: ... 16 | 17 | class S3: 18 | bucket: str 19 | credential: AwsCredential | None 20 | region: str | None 21 | sign_payload: bool | None 22 | checksum: bool | None 23 | 24 | def __init__( 25 | self, 26 | bucket: str, 27 | credential: AwsCredential | None = None, 28 | region: str | None = None, 29 | sign_payload: bool | None = None, 30 | checksum: bool | None = None, 31 | endpoint: str | None = None, 32 | ) -> None: ... 33 | 34 | def parse(path: str) -> str: ... 35 | 36 | def from_filesystem_path(path: str) -> str: 37 | """Parse path to a filesystem relative path""" 38 | ... 39 | def from_absolute_path(path: str) -> str: 40 | """Parse path to a filesystem absolute path""" 41 | ... 42 | def from_url_path(path: str) -> str: 43 | """Parse an url path""" 44 | ... 45 | -------------------------------------------------------------------------------- /bindings/python/src/column.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | any::Any, 3 | fmt::{Display, Formatter}, 4 | sync::Arc, 5 | }; 6 | 7 | use pyo3::{pyclass, pymethods}; 8 | use tonbo::record::{DataType as TonboDataType, Value, ValueDesc}; 9 | 10 | use crate::datatype::DataType; 11 | 12 | #[pyclass] 13 | #[derive(Clone)] 14 | pub struct Column { 15 | pub name: String, 16 | pub datatype: DataType, 17 | pub nullable: bool, 18 | pub primary_key: bool, 19 | pub(crate) value: Arc, 20 | } 21 | 22 | unsafe impl Send for Column {} 23 | unsafe impl Sync for Column {} 24 | 25 | impl Column {} 26 | 27 | #[pymethods] 28 | impl Column { 29 | #[new] 30 | #[pyo3(signature= (datatype, name, nullable=false, primary_key=false))] 31 | pub fn new(datatype: DataType, name: String, nullable: bool, primary_key: bool) -> Self { 32 | if primary_key && nullable { 33 | panic!("Primary key should not be nullable!") 34 | } 35 | let value = datatype.none_value(); 36 | Self { 37 | name, 38 | datatype, 39 | nullable, 40 | primary_key, 41 | value, 42 | } 43 | } 44 | 45 | fn __str__(&self) -> String { 46 | format!("{}", &self) 47 | } 48 | } 49 | 50 | impl Display for Column { 51 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 52 | f.debug_struct("Column") 53 | .field("name", &self.name) 54 | .field("type", &self.datatype) 55 | .field("nullable", &self.nullable) 56 | .field("primary_key", &self.primary_key) 57 | .finish() 58 | } 59 | } 60 | 61 | impl From for ValueDesc { 62 | fn from(col: Column) -> Self { 63 | let datatype = TonboDataType::from(col.datatype); 64 | ValueDesc::new(col.name, datatype, col.nullable) 65 | } 66 | } 67 | impl From for Value { 68 | fn from(col: Column) -> Self { 69 | let datatype = TonboDataType::from(col.datatype); 70 | Value::new(datatype, col.name, col.value, col.nullable) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /bindings/python/src/datatype.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | any::Any, 3 | fmt::{Debug, Formatter}, 4 | sync::Arc, 5 | }; 6 | 7 | use pyo3::pyclass; 8 | use tonbo::record::{DataType as TonboDataType, F64}; 9 | 10 | #[pyclass] 11 | #[derive(PartialEq, Clone)] 12 | pub enum DataType { 13 | UInt8, 14 | UInt16, 15 | UInt32, 16 | UInt64, 17 | Int8, 18 | Int16, 19 | Int32, 20 | Int64, 21 | String, 22 | Boolean, 23 | Bytes, 24 | Float, 25 | } 26 | 27 | impl Debug for DataType { 28 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 29 | match self { 30 | DataType::UInt8 => f.write_str("u8"), 31 | DataType::UInt16 => f.write_str("u16"), 32 | DataType::UInt32 => f.write_str("u32"), 33 | DataType::UInt64 => f.write_str("u64"), 34 | DataType::Int8 => f.write_str("i8"), 35 | DataType::Int16 => f.write_str("i16"), 36 | DataType::Int32 => f.write_str("i32"), 37 | DataType::Int64 => f.write_str("i64"), 38 | DataType::String => f.write_str("str"), 39 | DataType::Boolean => f.write_str("bool"), 40 | DataType::Bytes => f.write_str("bytes"), 41 | DataType::Float => f.write_str("float"), 42 | } 43 | } 44 | } 45 | 46 | impl DataType { 47 | pub(crate) fn none_value(&self) -> Arc { 48 | match self { 49 | DataType::UInt8 => Arc::new(Option::::None), 50 | DataType::UInt16 => Arc::new(Option::::None), 51 | DataType::UInt32 => Arc::new(Option::::None), 52 | DataType::UInt64 => Arc::new(Option::::None), 53 | DataType::Int8 => Arc::new(Option::::None), 54 | DataType::Int16 => Arc::new(Option::::None), 55 | DataType::Int32 => Arc::new(Option::::None), 56 | DataType::Int64 => Arc::new(Option::::None), 57 | DataType::String => Arc::new(Option::::None), 58 | DataType::Boolean => Arc::new(Option::::None), 59 | DataType::Bytes => Arc::new(Option::>::None), 60 | DataType::Float => Arc::new(Option::::None), 61 | } 62 | } 63 | } 64 | 65 | impl From for TonboDataType { 66 | fn from(datatype: DataType) -> Self { 67 | match datatype { 68 | DataType::UInt8 => TonboDataType::UInt8, 69 | DataType::UInt16 => TonboDataType::UInt16, 70 | DataType::UInt32 => TonboDataType::UInt32, 71 | DataType::UInt64 => TonboDataType::UInt64, 72 | DataType::Int8 => TonboDataType::Int8, 73 | DataType::Int16 => TonboDataType::Int16, 74 | DataType::Int32 => TonboDataType::Int32, 75 | DataType::Int64 => TonboDataType::Int64, 76 | DataType::String => TonboDataType::String, 77 | DataType::Boolean => TonboDataType::Boolean, 78 | DataType::Bytes => TonboDataType::Bytes, 79 | DataType::Float => TonboDataType::Float64, 80 | } 81 | } 82 | } 83 | 84 | impl From<&DataType> for TonboDataType { 85 | fn from(datatype: &DataType) -> Self { 86 | match datatype { 87 | DataType::UInt8 => TonboDataType::UInt8, 88 | DataType::UInt16 => TonboDataType::UInt16, 89 | DataType::UInt32 => TonboDataType::UInt32, 90 | DataType::UInt64 => TonboDataType::UInt64, 91 | DataType::Int8 => TonboDataType::Int8, 92 | DataType::Int16 => TonboDataType::Int16, 93 | DataType::Int32 => TonboDataType::Int32, 94 | DataType::Int64 => TonboDataType::Int64, 95 | DataType::String => TonboDataType::String, 96 | DataType::Boolean => TonboDataType::Boolean, 97 | DataType::Bytes => TonboDataType::Bytes, 98 | DataType::Float => TonboDataType::Float64, 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /bindings/python/src/error.rs: -------------------------------------------------------------------------------- 1 | use pyo3::{ 2 | create_exception, 3 | exceptions::{PyException, PyIOError, PyValueError}, 4 | pyclass, PyErr, 5 | }; 6 | use tonbo::record::DynRecord; 7 | create_exception!(tonbo, DecodeError, PyException, "Decode exception"); 8 | create_exception!(tonbo, RecoverError, PyException, "Recover exception"); 9 | create_exception!( 10 | tonbo, 11 | ExceedsMaxLevelError, 12 | PyException, 13 | "Exceeds max level exception" 14 | ); 15 | 16 | create_exception!( 17 | tonbo, 18 | WriteConflictError, 19 | PyException, 20 | "Write conflict exception" 21 | ); 22 | create_exception!(tonbo, InnerError, PyException, "Inner exception"); 23 | 24 | create_exception!( 25 | tonbo, 26 | RepeatedCommitError, 27 | PyException, 28 | "Repeated commit exception" 29 | ); 30 | 31 | create_exception!(tonbo, PathParseError, PyException, "Path parse exception"); 32 | 33 | pub(crate) fn repeated_commit_err() -> PyErr { 34 | RepeatedCommitError::new_err("Transaction has been committed!") 35 | } 36 | 37 | #[pyclass] 38 | pub(crate) struct DbError(tonbo::DbError); 39 | 40 | #[pyclass] 41 | pub(crate) struct CommitError(tonbo::transaction::CommitError); 42 | 43 | impl From for PyErr { 44 | fn from(err: DbError) -> Self { 45 | match err.0 { 46 | tonbo::DbError::Io(err) => PyIOError::new_err(err.to_string()), 47 | tonbo::DbError::Version(err) => PyValueError::new_err(err.to_string()), 48 | tonbo::DbError::Parquet(err) => InnerError::new_err(err.to_string()), 49 | tonbo::DbError::UlidDecode(err) => DecodeError::new_err(err.to_string()), 50 | tonbo::DbError::Fusio(err) => InnerError::new_err(err.to_string()), 51 | tonbo::DbError::Recover(err) => RecoverError::new_err(err.to_string()), 52 | tonbo::DbError::WalWrite(err) => PyIOError::new_err(err.to_string()), 53 | tonbo::DbError::ExceedsMaxLevel => ExceedsMaxLevelError::new_err("Exceeds max level"), 54 | tonbo::DbError::Logger(err) => PyIOError::new_err(err.to_string()), 55 | } 56 | } 57 | } 58 | 59 | impl From for PyErr { 60 | fn from(err: CommitError) -> Self { 61 | match err.0 { 62 | tonbo::transaction::CommitError::Io(err) => PyIOError::new_err(err.to_string()), 63 | tonbo::transaction::CommitError::Parquet(err) => InnerError::new_err(err.to_string()), 64 | tonbo::transaction::CommitError::Database(err) => DbError::from(err).into(), 65 | tonbo::transaction::CommitError::WriteConflict(key) => { 66 | WriteConflictError::new_err(key.name()) 67 | } 68 | tonbo::transaction::CommitError::SendCompactTaskError(err) => { 69 | InnerError::new_err(err.to_string()) 70 | } 71 | tonbo::transaction::CommitError::ChannelClose => InnerError::new_err("channel close"), 72 | } 73 | } 74 | } 75 | 76 | impl From> for DbError { 77 | fn from(err: tonbo::DbError) -> Self { 78 | DbError(err) 79 | } 80 | } 81 | 82 | impl From> for CommitError { 83 | fn from(err: tonbo::transaction::CommitError) -> Self { 84 | CommitError(err) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /bindings/python/src/fs.rs: -------------------------------------------------------------------------------- 1 | use pyo3::{pyclass, pyfunction, pymethods, types::PyString, Bound, PyResult, Python}; 2 | use tonbo::option::Path; 3 | 4 | use crate::PathParseError; 5 | 6 | #[pyclass(get_all, set_all)] 7 | #[derive(Debug, Clone)] 8 | pub struct AwsCredential { 9 | pub key_id: String, 10 | pub secret_key: String, 11 | pub token: Option, 12 | } 13 | 14 | impl From for tonbo::option::AwsCredential { 15 | fn from(cred: AwsCredential) -> Self { 16 | tonbo::option::AwsCredential { 17 | key_id: cred.key_id, 18 | secret_key: cred.secret_key, 19 | token: cred.token, 20 | } 21 | } 22 | } 23 | 24 | #[pymethods] 25 | impl AwsCredential { 26 | #[new] 27 | fn new(key_id: String, secret_key: String, token: Option) -> Self { 28 | Self { 29 | key_id, 30 | secret_key, 31 | token, 32 | } 33 | } 34 | } 35 | 36 | #[pyclass] 37 | #[derive(Debug, Clone)] 38 | pub enum FsOptions { 39 | Local {}, 40 | S3 { 41 | bucket: String, 42 | credential: Option, 43 | region: Option, 44 | sign_payload: Option, 45 | checksum: Option, 46 | endpoint: Option, 47 | }, 48 | } 49 | 50 | impl From for tonbo::option::FsOptions { 51 | fn from(opt: FsOptions) -> Self { 52 | match opt { 53 | FsOptions::Local {} => tonbo::option::FsOptions::Local, 54 | FsOptions::S3 { 55 | bucket, 56 | credential, 57 | region, 58 | sign_payload, 59 | checksum, 60 | endpoint, 61 | } => tonbo::option::FsOptions::S3 { 62 | bucket, 63 | credential: credential.map(tonbo::option::AwsCredential::from), 64 | region, 65 | sign_payload, 66 | checksum, 67 | endpoint, 68 | }, 69 | } 70 | } 71 | } 72 | 73 | #[pyfunction] 74 | pub fn parse(path: String, py: Python) -> PyResult> { 75 | let path = Path::parse(path).map_err(|e| PathParseError::new_err(e.to_string()))?; 76 | Ok(PyString::new(py, path.as_ref())) 77 | } 78 | 79 | #[pyfunction] 80 | pub fn from_filesystem_path(path: String, py: Python) -> PyResult> { 81 | let path = 82 | Path::from_filesystem_path(path).map_err(|e| PathParseError::new_err(e.to_string()))?; 83 | Ok(PyString::new(py, path.as_ref())) 84 | } 85 | 86 | #[pyfunction] 87 | pub fn from_absolute_path(path: String, py: Python) -> PyResult> { 88 | let path = 89 | Path::from_absolute_path(path).map_err(|e| PathParseError::new_err(e.to_string()))?; 90 | Ok(PyString::new(py, path.as_ref())) 91 | } 92 | 93 | #[pyfunction] 94 | pub fn from_url_path(path: String, py: Python) -> PyResult> { 95 | let path = Path::from_url_path(path).map_err(|e| PathParseError::new_err(e.to_string()))?; 96 | Ok(PyString::new(py, path.as_ref())) 97 | } 98 | -------------------------------------------------------------------------------- /bindings/python/src/lib.rs: -------------------------------------------------------------------------------- 1 | use error::*; 2 | use pyo3::prelude::*; 3 | use record_batch::RecordBatch; 4 | 5 | use crate::record::Record; 6 | 7 | mod column; 8 | mod datatype; 9 | mod db; 10 | mod error; 11 | mod fs; 12 | mod options; 13 | mod range; 14 | mod record; 15 | mod record_batch; 16 | mod stream; 17 | mod transaction; 18 | mod utils; 19 | 20 | pub use column::*; 21 | pub use datatype::*; 22 | pub use db::*; 23 | pub use fs::*; 24 | pub use options::*; 25 | pub use stream::*; 26 | pub use transaction::*; 27 | 28 | use crate::error::{DecodeError, WriteConflictError}; 29 | 30 | /// Tonbo Python binding 31 | /// 32 | /// ## Usage 33 | /// ```python 34 | /// from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound 35 | /// from tonbo.fs import from_filesystem_path 36 | /// import asyncio 37 | /// 38 | /// @Record 39 | /// class User: 40 | /// id = Column(DataType.Int64, name="id", primary_key=True) 41 | /// age = Column(DataType.Int16, name="age", nullable=True) 42 | /// name = Column(DataType.String, name="name", nullable=False) 43 | /// 44 | /// async def main(): 45 | /// db = TonboDB(DbOption(from_filesystem_path("db_path/user")), User()) 46 | /// await db.insert(User(id=18, age=175, name="Alice")) 47 | /// record = await db.get(18) 48 | /// print(record) 49 | /// 50 | /// # use transcaction 51 | /// txn = await db.transaction() 52 | /// result = await txn.get(18) 53 | /// scan = await txn.scan(Bound.Included(18), None, limit=10, projection=["id", "name"]) 54 | /// 55 | /// async for record in scan: 56 | /// print(record) 57 | /// 58 | /// asyncio.run(main()) 59 | /// ```` 60 | #[pymodule] 61 | fn _tonbo(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 62 | m.add_class::()?; 63 | m.add_class::()?; 64 | m.add_class::()?; 65 | m.add_class::()?; 66 | m.add_class::()?; 67 | m.add_class::()?; 68 | m.add_class::()?; 69 | m.add_class::()?; 70 | m.add_class::()?; 71 | 72 | let fs_module = PyModule::new(py, "fs")?; 73 | fs_module.add_class::()?; 74 | fs_module.add_class::()?; 75 | fs_module.add_function(wrap_pyfunction!(parse, &fs_module)?)?; 76 | fs_module.add_function(wrap_pyfunction!(from_filesystem_path, &fs_module)?)?; 77 | fs_module.add_function(wrap_pyfunction!(from_absolute_path, &fs_module)?)?; 78 | fs_module.add_function(wrap_pyfunction!(from_url_path, &fs_module)?)?; 79 | 80 | m.add_submodule(&fs_module)?; 81 | py.import("sys")? 82 | .getattr("modules")? 83 | .set_item("tonbo.fs", fs_module)?; 84 | 85 | let error_module = PyModule::new(py, "error")?; 86 | error_module.add_class::()?; 87 | error_module.add_class::()?; 88 | 89 | error_module.add("DecodeError", py.get_type::())?; 90 | error_module.add("RecoverError", py.get_type::())?; 91 | error_module.add( 92 | "ExceedsMaxLevelError", 93 | py.get_type::(), 94 | )?; 95 | error_module.add("WriteConflictError", py.get_type::())?; 96 | error_module.add("InnerError", py.get_type::())?; 97 | error_module.add("RepeatedCommitError", py.get_type::())?; 98 | error_module.add("PathParseError", py.get_type::())?; 99 | 100 | m.add_submodule(&error_module)?; 101 | py.import("sys")? 102 | .getattr("modules")? 103 | .set_item("tonbo.error", error_module)?; 104 | 105 | Ok(()) 106 | } 107 | -------------------------------------------------------------------------------- /bindings/python/src/options.rs: -------------------------------------------------------------------------------- 1 | use pyo3::{pyclass, pymethods, PyResult}; 2 | use tonbo::{option::Path, record::Schema}; 3 | 4 | use crate::{ExceedsMaxLevelError, FsOptions}; 5 | 6 | pub(crate) const MAX_LEVEL: usize = 7; 7 | 8 | /// configure the operating parameters of each component in the `DB` 9 | #[pyclass] 10 | #[derive(Debug, Clone)] 11 | pub struct DbOption { 12 | /// cached message size in parquet cleaner 13 | #[pyo3(get, set)] 14 | clean_channel_buffer: usize, 15 | /// len threshold of `immutables` when minor compaction is triggered 16 | #[pyo3(get, set)] 17 | immutable_chunk_num: usize, 18 | /// magnification that triggers major compaction between different levels 19 | #[pyo3(get, set)] 20 | level_sst_magnification: usize, 21 | #[pyo3(get, set)] 22 | major_default_oldest_table_num: usize, 23 | /// threshold for the number of `parquet` when major compaction is triggered 24 | #[pyo3(get, set)] 25 | major_threshold_with_sst_size: usize, 26 | /// Maximum size of each parquet 27 | #[pyo3(get, set)] 28 | max_sst_file_size: usize, 29 | #[pyo3(get, set)] 30 | version_log_snapshot_threshold: u32, 31 | #[pyo3(get, set)] 32 | use_wal: bool, 33 | /// Maximum size of WAL buffer size 34 | #[pyo3(get, set)] 35 | wal_buffer_size: usize, 36 | /// build the `DB` storage directory based on the passed path 37 | #[pyo3(get, set)] 38 | path: String, 39 | #[pyo3(get, set)] 40 | base_fs: FsOptions, 41 | level_paths: Vec>, 42 | } 43 | 44 | #[pymethods] 45 | impl DbOption { 46 | #[new] 47 | fn new(path: String) -> Self { 48 | Self { 49 | clean_channel_buffer: 10, 50 | immutable_chunk_num: 3, 51 | level_sst_magnification: 10, 52 | major_default_oldest_table_num: 3, 53 | major_threshold_with_sst_size: 4, 54 | max_sst_file_size: 256 * 1024 * 1024, 55 | version_log_snapshot_threshold: 200, 56 | use_wal: true, 57 | wal_buffer_size: 4 * 1024, 58 | path, 59 | base_fs: FsOptions::Local {}, 60 | level_paths: vec![None; MAX_LEVEL], 61 | } 62 | } 63 | 64 | fn level_path(&mut self, level: usize, path: String, fs_options: FsOptions) -> PyResult<()> { 65 | if level >= MAX_LEVEL { 66 | ExceedsMaxLevelError::new_err("Exceeds max level"); 67 | } 68 | self.level_paths[level] = Some((path, fs_options)); 69 | Ok(()) 70 | } 71 | } 72 | 73 | impl DbOption { 74 | pub(crate) fn into_option(self, schema: &S) -> tonbo::DbOption { 75 | let mut opt = tonbo::DbOption::new(Path::from(self.path), schema) 76 | .clean_channel_buffer(self.clean_channel_buffer) 77 | .immutable_chunk_num(self.immutable_chunk_num) 78 | .level_sst_magnification(self.level_sst_magnification) 79 | .major_default_oldest_table_num(self.major_default_oldest_table_num) 80 | .major_threshold_with_sst_size(self.major_threshold_with_sst_size) 81 | .max_sst_file_size(self.max_sst_file_size) 82 | .version_log_snapshot_threshold(self.version_log_snapshot_threshold) 83 | .base_fs(tonbo::option::FsOptions::from(self.base_fs)); 84 | for (level, path) in self.level_paths.into_iter().enumerate() { 85 | if let Some((path, fs_options)) = path { 86 | opt = opt 87 | .level_path( 88 | level, 89 | Path::from(path), 90 | tonbo::option::FsOptions::from(fs_options), 91 | ) 92 | .unwrap(); 93 | } 94 | } 95 | if !self.use_wal { 96 | opt = opt.disable_wal() 97 | } 98 | opt 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /bindings/python/src/range.rs: -------------------------------------------------------------------------------- 1 | use std::ops; 2 | 3 | use pyo3::{pyclass, FromPyObject, Py, PyAny, Python}; 4 | use tonbo::record::Value; 5 | 6 | use crate::{utils::to_col, Column}; 7 | 8 | #[pyclass] 9 | #[derive(FromPyObject)] 10 | pub enum Bound { 11 | Included { key: Py }, 12 | Excluded { key: Py }, 13 | } 14 | 15 | impl Bound { 16 | pub(crate) fn to_bound(&self, py: Python, col: &Column) -> ops::Bound { 17 | match self { 18 | Bound::Included { key } => ops::Bound::Included(to_col(py, col, key.clone_ref(py))), 19 | Bound::Excluded { key } => ops::Bound::Excluded(to_col(py, col, key.clone_ref(py))), 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /bindings/python/src/record.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use pyo3::{ 4 | prelude::*, 5 | pyclass, pymethods, 6 | types::{PyDict, PyMapping, PyString}, 7 | Bound, 8 | }; 9 | use tonbo::record::F64; 10 | 11 | use crate::{column::Column, datatype::DataType}; 12 | 13 | #[pyclass(subclass)] 14 | pub struct Record { 15 | wraps: Py, 16 | } 17 | 18 | #[pymethods] 19 | impl Record { 20 | #[new] 21 | fn new(wraps: Py) -> Self { 22 | Self { wraps } 23 | } 24 | 25 | #[pyo3(signature = ( **kwargs))] 26 | fn __call__(&self, py: Python<'_>, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult> { 27 | let dict = self.wraps.getattr(py, "__dict__")?; 28 | let mapping_proxy = dict.downcast_bound::(py).unwrap(); 29 | if let Some(kwargs) = kwargs { 30 | for (key, v) in kwargs.iter() { 31 | let attr = key.downcast::().unwrap(); 32 | let col_bound = mapping_proxy.get_item(attr).expect("Unknown attr {attr}"); 33 | let mut col = col_bound.extract::().unwrap(); 34 | match col.datatype { 35 | DataType::UInt8 => { 36 | let value = v.extract::()?; 37 | match col.nullable { 38 | true => col.value = Arc::new(Some(value)), 39 | false => col.value = Arc::new(value), 40 | } 41 | } 42 | DataType::UInt16 => { 43 | let value = v.extract::()?; 44 | match col.nullable { 45 | true => col.value = Arc::new(Some(value)), 46 | false => col.value = Arc::new(value), 47 | } 48 | } 49 | DataType::UInt32 => { 50 | let value = v.extract::()?; 51 | match col.nullable { 52 | true => col.value = Arc::new(Some(value)), 53 | false => col.value = Arc::new(value), 54 | } 55 | } 56 | DataType::UInt64 => { 57 | let value = v.extract::()?; 58 | match col.nullable { 59 | true => col.value = Arc::new(Some(value)), 60 | false => col.value = Arc::new(value), 61 | } 62 | } 63 | DataType::Int8 => { 64 | let value = v.extract::()?; 65 | match col.nullable { 66 | true => col.value = Arc::new(Some(value)), 67 | false => col.value = Arc::new(value), 68 | } 69 | } 70 | DataType::Int16 => { 71 | let value = v.extract::()?; 72 | match col.nullable { 73 | true => col.value = Arc::new(Some(value)), 74 | false => col.value = Arc::new(value), 75 | } 76 | } 77 | DataType::Int32 => { 78 | let value = v.extract::()?; 79 | match col.nullable { 80 | true => col.value = Arc::new(Some(value)), 81 | false => col.value = Arc::new(value), 82 | } 83 | } 84 | DataType::Int64 => { 85 | let value = v.extract::()?; 86 | match col.nullable { 87 | true => col.value = Arc::new(Some(value)), 88 | false => col.value = Arc::new(value), 89 | } 90 | } 91 | DataType::String => { 92 | let value = v.extract::()?; 93 | match col.nullable { 94 | true => col.value = Arc::new(Some(value)), 95 | false => col.value = Arc::new(value), 96 | } 97 | } 98 | DataType::Boolean => { 99 | let value = v.extract::()?; 100 | match col.nullable { 101 | true => col.value = Arc::new(Some(value)), 102 | false => col.value = Arc::new(value), 103 | } 104 | } 105 | DataType::Bytes => { 106 | let value = v.extract::>()?; 107 | match col.nullable { 108 | true => col.value = Arc::new(Some(value)), 109 | false => col.value = Arc::new(value), 110 | } 111 | } 112 | DataType::Float => { 113 | let value = v.extract::()?; 114 | match col.nullable { 115 | true => col.value = Arc::new(Some(F64::from(value))), 116 | false => col.value = Arc::new(F64::from(value)), 117 | } 118 | } 119 | }; 120 | self.wraps.setattr(py, attr, col).unwrap(); 121 | } 122 | } 123 | 124 | let ret = self.wraps.clone_ref(py).into_any(); 125 | 126 | Ok(ret) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /bindings/python/src/record_batch.rs: -------------------------------------------------------------------------------- 1 | use pyo3::{ 2 | pyclass, pymethods, 3 | types::{PyAnyMethods, PyMapping, PyMappingMethods}, 4 | Py, PyAny, PyResult, Python, 5 | }; 6 | use tonbo::record::{DynRecord, Value}; 7 | 8 | use crate::Column; 9 | 10 | #[derive(Clone)] 11 | struct Record { 12 | columns: Vec, 13 | primary_key_index: usize, 14 | } 15 | 16 | impl Record { 17 | fn new(columns: Vec, primary_key_index: usize) -> Self { 18 | Self { 19 | columns, 20 | primary_key_index, 21 | } 22 | } 23 | } 24 | 25 | impl From for DynRecord { 26 | fn from(value: Record) -> Self { 27 | tonbo::record::DynRecord::new(value.columns, value.primary_key_index) 28 | } 29 | } 30 | 31 | #[pyclass] 32 | #[derive(Clone)] 33 | // #[derive(FromPyObject)] 34 | pub struct RecordBatch { 35 | batch_data: Vec, 36 | } 37 | 38 | #[pymethods] 39 | impl RecordBatch { 40 | #[new] 41 | fn new() -> Self { 42 | Self { 43 | batch_data: Vec::new(), 44 | } 45 | } 46 | 47 | fn append(&mut self, py: Python, record: Py) -> PyResult<()> { 48 | let mut cols = vec![]; 49 | let dict = record.getattr(py, "__dict__")?; 50 | let values = dict.downcast_bound::(py)?.values()?; 51 | let mut primary_key_index = 0; 52 | let mut col_idx = 0; 53 | 54 | for i in 0..values.len()? { 55 | let value = values.get_item(i)?; 56 | if let Ok(bound_col) = value.downcast::() { 57 | let col = bound_col.extract::()?; 58 | if col.primary_key { 59 | primary_key_index = col_idx; 60 | } 61 | let col = Value::from(col); 62 | cols.push(col); 63 | col_idx += 1; 64 | } 65 | } 66 | 67 | self.batch_data 68 | .push(Record::new(cols.clone(), primary_key_index)); 69 | Ok(()) 70 | } 71 | } 72 | 73 | impl RecordBatch { 74 | pub(crate) fn into_record_batch(self) -> Vec { 75 | let mut batch = vec![]; 76 | for record in self.batch_data.into_iter() { 77 | batch.push(tonbo::record::DynRecord::from(record)) 78 | } 79 | batch 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /bindings/python/src/stream.rs: -------------------------------------------------------------------------------- 1 | use std::{pin::Pin, sync::Arc}; 2 | 3 | use futures::{Stream, TryStreamExt}; 4 | use pyo3::{ 5 | exceptions::PyStopAsyncIteration, prelude::*, pyclass, pymethods, IntoPyObjectExt, PyRef, 6 | PyRefMut, PyResult, Python, 7 | }; 8 | use pyo3_async_runtimes::tokio::future_into_py; 9 | use tokio::sync::Mutex; 10 | use tonbo::{parquet::errors::ParquetError, record::DynRecord, stream}; 11 | 12 | use crate::utils::to_dict; 13 | 14 | type AsyncStream = 15 | Pin, ParquetError>> + Send>>; 16 | 17 | #[pyclass] 18 | pub struct ScanStream(Arc>); 19 | 20 | impl ScanStream { 21 | pub fn new( 22 | stream: impl Stream, ParquetError>> 23 | + 'static 24 | + Sized 25 | + Send, 26 | ) -> Self { 27 | Self(Arc::new(Mutex::new(Box::pin(stream)))) 28 | } 29 | } 30 | 31 | #[pymethods] 32 | impl ScanStream { 33 | fn __aiter__(slf: PyRef) -> PyRef { 34 | slf 35 | } 36 | 37 | fn __anext__(slf: PyRefMut, py: Python<'_>) -> PyResult> { 38 | let stream: Arc> = Arc::clone(&slf.0); 39 | let fut = future_into_py(py, async move { 40 | let mut locked_stream = stream.lock().await; 41 | let entry = locked_stream.try_next().await.unwrap(); 42 | match entry { 43 | Some(entry) => Python::with_gil(|py| match entry.value() { 44 | Some(record) => { 45 | to_dict(py, record.primary_index, record.columns).into_py_any(py) 46 | } 47 | None => Ok(py.None()), 48 | }), 49 | None => Err(PyStopAsyncIteration::new_err("stream exhausted")), 50 | } 51 | })?; 52 | Ok(Some(fut.into())) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /bindings/python/tests/bench/test_write_benchmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import duckdb 4 | import sqlite3 5 | import pytest 6 | 7 | from conftest import gen_string, gen_int, gen_bytes 8 | from tonbo import Record, Column, DataType, TonboDB, DbOption 9 | from tonbo.fs import from_filesystem_path 10 | 11 | WRITE_TIME = 500000 12 | 13 | 14 | @Record 15 | class User: 16 | id = Column(DataType.Int64, name="id", primary_key=True) 17 | name = Column(DataType.String, name="name") 18 | email = Column(DataType.String, name="email", nullable=True) 19 | age = Column(DataType.UInt16, name="age") 20 | data = Column(DataType.Bytes, name="data") 21 | 22 | 23 | def duckdb_write(auto_commit: bool): 24 | con = duckdb.connect() 25 | con.sql( 26 | "CREATE TABLE user (id INTEGER, name VARCHAR(20), email VARCHAR(20), age INTEGER, data VARCHAR(200))" 27 | ) 28 | if not auto_commit: 29 | con.begin() 30 | for i in range(0, WRITE_TIME): 31 | con.execute( 32 | "INSERT INTO user VALUES (?, ?, ?, ?, ?)", 33 | [i, gen_string(20), gen_string(20), gen_int(0, 0xffff), gen_bytes(200)], 34 | ) 35 | if not auto_commit: 36 | con.commit() 37 | con.close() 38 | 39 | 40 | async def tonbo_write(auto_commit: bool): 41 | temp_dir = tempfile.TemporaryDirectory() 42 | 43 | option = DbOption(from_filesystem_path(temp_dir.name)) 44 | 45 | db = TonboDB(option, User()) 46 | if auto_commit: 47 | for i in range(0, WRITE_TIME): 48 | await db.insert(User( 49 | id=i, 50 | age=gen_int(0, 0xffff), 51 | name=gen_string(20), 52 | email=gen_string(20), 53 | data=gen_bytes(200), 54 | )) 55 | else: 56 | txn = await db.transaction() 57 | for i in range(0, WRITE_TIME): 58 | txn.insert(User( 59 | id=i, 60 | age=gen_int(0, 0xffff), 61 | name=gen_string(20), 62 | email=gen_string(20), 63 | data=gen_bytes(200), 64 | )) 65 | await txn.commit() 66 | 67 | await db.flush_wal() 68 | 69 | 70 | def sqlite_write(auto_commit: bool): 71 | file = tempfile.NamedTemporaryFile() 72 | con = sqlite3.connect(file.name, autocommit=auto_commit) 73 | con.execute( 74 | "CREATE TABLE user (id INTEGER, name VARCHAR(20), email VARCHAR(20), age INTEGER, data VARCHAR(200))" 75 | ) 76 | for i in range(0, WRITE_TIME): 77 | con.execute( 78 | "INSERT INTO user VALUES (?, ?, ?, ?, ?)", 79 | [i, gen_string(20), gen_string(20), gen_int(0, 0xffff), gen_bytes(200)], 80 | ) 81 | con.commit() 82 | con.close() 83 | 84 | 85 | @pytest.mark.parametrize("auto_commit", [True]) 86 | @pytest.mark.benchmark(group="autocommit") 87 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark") 88 | def test_duckdb_autocommit(benchmark, auto_commit): 89 | benchmark(duckdb_write, auto_commit) 90 | 91 | 92 | @pytest.mark.parametrize("auto_commit", [False]) 93 | @pytest.mark.benchmark(group="txn") 94 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark") 95 | def test_duckdb(benchmark, auto_commit): 96 | benchmark(duckdb_write, auto_commit) 97 | 98 | 99 | @pytest.mark.parametrize("auto_commit", [False]) 100 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark") 101 | @pytest.mark.benchmark(group="txn") 102 | def test_tonbo(aio_benchmark, auto_commit): 103 | aio_benchmark(tonbo_write, auto_commit) 104 | 105 | 106 | @pytest.mark.parametrize("auto_commit", [True]) 107 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark") 108 | @pytest.mark.benchmark(group="autocommit") 109 | def test_tonbo_no_txn(aio_benchmark, auto_commit): 110 | aio_benchmark(tonbo_write, auto_commit) 111 | 112 | 113 | @pytest.mark.parametrize("auto_commit", [True]) 114 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark") 115 | @pytest.mark.benchmark(group="autocommit") 116 | def test_sqlite_autocommit(benchmark, auto_commit): 117 | benchmark(sqlite_write, auto_commit) 118 | 119 | 120 | @pytest.mark.parametrize("auto_commit", [False]) 121 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark") 122 | @pytest.mark.benchmark(group="txn") 123 | def test_sqlite(benchmark, auto_commit): 124 | benchmark(sqlite_write, auto_commit) 125 | -------------------------------------------------------------------------------- /bindings/python/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import random 3 | 4 | import pytest_asyncio 5 | 6 | 7 | def gen_string(max_size): 8 | size = gen_int(0, max_size) 9 | charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 10 | return ''.join(random.choices(charset, k=size)) 11 | 12 | 13 | def gen_bytes(max_size): 14 | return gen_string(max_size).encode("utf-8") 15 | 16 | 17 | def gen_int(lower, high): 18 | return random.randint(lower, high) 19 | 20 | 21 | # async support for pytest-benchmark 22 | # https://github.com/ionelmc/pytest-benchmark/issues/66#issuecomment-1137005280 23 | @pytest_asyncio.fixture 24 | def aio_benchmark(benchmark, event_loop): 25 | def _wrapper(func, *args, **kwargs): 26 | if asyncio.iscoroutinefunction(func): 27 | 28 | @benchmark 29 | def _(): 30 | return event_loop.run_until_complete(func(*args, **kwargs)) 31 | else: 32 | benchmark(func, *args, **kwargs) 33 | 34 | return _wrapper 35 | -------------------------------------------------------------------------------- /bindings/python/tests/test_db.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tempfile 3 | from tonbo import DbOption, Column, DataType, Record, RecordBatch, TonboDB 4 | 5 | 6 | @Record 7 | class User: 8 | age = Column(DataType.Int8, name="age", primary_key=True) 9 | height = Column(DataType.Int16, name="height", nullable=True) 10 | weight = Column(DataType.Int32, name="weight", nullable=False) 11 | 12 | 13 | def build_db(): 14 | temp_dir = tempfile.TemporaryDirectory() 15 | return TonboDB(DbOption(temp_dir.name), User()) 16 | 17 | 18 | @pytest.mark.asyncio 19 | async def test_db_write(): 20 | db = build_db() 21 | for i in range(0, 100): 22 | await db.insert(User(age=i, height=i * 10, weight=i * 20)) 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_db_read(): 27 | db = build_db() 28 | for i in range(0, 100): 29 | await db.insert(User(age=i, height=i * 10, weight=i * 20)) 30 | 31 | for i in range(0, 100): 32 | user = await db.get(i) 33 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 34 | 35 | 36 | @pytest.mark.asyncio 37 | async def test_db_write_batch(): 38 | db = build_db() 39 | batch = RecordBatch() 40 | for i in range(0, 100): 41 | batch.append(User(age=i, height=i * 10, weight=i * 20)) 42 | await db.insert_batch(batch) 43 | 44 | for i in range(0, 100): 45 | user = await db.get(i) 46 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 47 | 48 | 49 | @pytest.mark.asyncio 50 | async def test_db_remove(): 51 | db = build_db() 52 | for i in range(0, 100): 53 | if i % 2 == 0: 54 | await db.insert(User(age=i, height=i * 10, weight=i * 20)) 55 | else: 56 | await db.remove(i) 57 | 58 | for i in range(0, 100): 59 | user = await db.get(i) 60 | if i % 2 == 0: 61 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 62 | else: 63 | assert user is None 64 | 65 | 66 | @pytest.mark.asyncio 67 | async def test_db_recover(): 68 | temp_dir = tempfile.TemporaryDirectory() 69 | db = TonboDB(DbOption(temp_dir.name), User()) 70 | for i in range(0, 100): 71 | await db.insert(User(age=i, height=i * 10, weight=i * 20)) 72 | 73 | for i in range(0, 100): 74 | user = await db.get(i) 75 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 76 | 77 | await db.flush_wal() 78 | 79 | db = TonboDB(DbOption(temp_dir.name), User()) 80 | for i in range(0, 100): 81 | user = await db.get(i) 82 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 83 | -------------------------------------------------------------------------------- /bindings/python/tests/test_flush.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tempfile 3 | from tonbo import DbOption, Column, DataType, Record, TonboDB 4 | 5 | 6 | @Record 7 | class User: 8 | id = Column(DataType.Int64, name="id", primary_key=True) 9 | name = Column(DataType.String, name="name") 10 | email = Column(DataType.String, name="email", nullable=True) 11 | age = Column(DataType.Int8, name="age") 12 | data = Column(DataType.Bytes, name="data") 13 | 14 | 15 | @pytest.mark.asyncio 16 | async def test_flush(): 17 | temp_dir = tempfile.TemporaryDirectory() 18 | option = DbOption(temp_dir.name) 19 | option.immutable_chunk_num = 1 20 | option.major_threshold_with_sst_size = 3 21 | option.level_sst_magnification = 1 22 | option.max_sst_file_size = 1 * 1024 23 | db = TonboDB(option, User()) 24 | # db = build_db() 25 | for i in range(0, 1000): 26 | if i % 100 == 0: 27 | await db.flush() 28 | await db.insert( 29 | User( 30 | id=i, 31 | age=i % 128, 32 | name=str(i * 10), 33 | email=str(i * 20), 34 | data=b"Hello Tonbo!", 35 | ) 36 | ) 37 | 38 | for i in range(0, 1000): 39 | user = await db.get(i) 40 | assert user == { 41 | "id": i, 42 | "name": str(i * 10), 43 | "email": str(i * 20), 44 | "age": i % 128, 45 | "data": b"Hello Tonbo!", 46 | } 47 | -------------------------------------------------------------------------------- /bindings/python/tests/test_s3.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | import tempfile 5 | from tonbo import DbOption, Column, DataType, Record, TonboDB 6 | from tonbo.fs import FsOptions, AwsCredential, from_url_path 7 | 8 | 9 | @Record 10 | class User: 11 | id = Column(DataType.Int64, name="id", primary_key=True) 12 | name = Column(DataType.String, name="name") 13 | email = Column(DataType.String, name="email", nullable=True) 14 | age = Column(DataType.UInt8, name="age") 15 | data = Column(DataType.Bytes, name="data") 16 | 17 | 18 | @pytest.mark.asyncio 19 | @pytest.mark.skipif("S3" not in os.environ, reason="s3") 20 | async def test_s3_read_write(): 21 | 22 | temp_dir = tempfile.TemporaryDirectory() 23 | 24 | key_id = os.environ['AWS_ACCESS_KEY_ID'] 25 | secret_key = os.environ['AWS_SECRET_ACCESS_KEY'] 26 | credential = AwsCredential(key_id, secret_key) 27 | fs_option = FsOptions.S3("wasm-data", credential,"ap-southeast-2",None, None, None) 28 | 29 | option = DbOption(temp_dir.name) 30 | option.level_path(0, from_url_path("l0"), fs_option) 31 | option.level_path(1, from_url_path("l1"), fs_option) 32 | option.level_path(2, from_url_path("l2"), fs_option) 33 | 34 | option.immutable_chunk_num = 1 35 | option.major_threshold_with_sst_size = 3 36 | option.level_sst_magnification = 1 37 | option.max_sst_file_size = 1 * 1024 38 | 39 | db = TonboDB(option, User()) 40 | for i in range(0, 500): 41 | if i % 100 == 0: 42 | await db.flush() 43 | await db.insert( 44 | User( 45 | id=i, 46 | age=i % 128, 47 | name=str(i * 10), 48 | email=str(i * 20), 49 | data=b"Hello Tonbo!", 50 | ) 51 | ) 52 | user = await db.get(10) 53 | assert user == { 54 | "id": 10, 55 | "name": str(10 * 10), 56 | "email": str(10 * 20), 57 | "age": 10, 58 | "data": b"Hello Tonbo!", 59 | } 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /bindings/python/tests/test_table_level.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import pytest 3 | from tonbo import TonboDB, DbOption, Column, DataType, Record 4 | from tonbo.fs import from_filesystem_path, parse, from_url_path, FsOptions 5 | from tonbo.error import PathParseError 6 | 7 | 8 | @Record 9 | class User: 10 | id = Column(DataType.Int64, name="id", primary_key=True) 11 | name = Column(DataType.String, name="name") 12 | email = Column(DataType.String, name="email", nullable=True) 13 | age = Column(DataType.Int8, name="age") 14 | data = Column(DataType.Bytes, name="data") 15 | 16 | 17 | def test_parse(): 18 | assert parse("/foo/bar") == "foo/bar" 19 | with pytest.raises(PathParseError): 20 | parse("//foo/bar") 21 | 22 | 23 | def test_from_url_path(): 24 | assert from_url_path("foo%20bar") == "foo bar" 25 | assert from_url_path("foo%2F%252E%252E%2Fbar") == "foo/%2E%2E/bar" 26 | assert from_url_path("foo/%252E%252E/bar") == "foo/%2E%2E/bar" 27 | assert from_url_path("%48%45%4C%4C%4F") == "HELLO" 28 | with pytest.raises(PathParseError): 29 | from_url_path("foo/%2E%2E/bar") 30 | 31 | 32 | @pytest.mark.asyncio 33 | async def test_table_level_local(): 34 | temp_dir = tempfile.TemporaryDirectory() 35 | temp_dir0 = tempfile.TemporaryDirectory() 36 | temp_dir1 = tempfile.TemporaryDirectory() 37 | 38 | option = DbOption(from_filesystem_path(temp_dir.name)) 39 | option.level_path(0, from_filesystem_path(temp_dir0.name), FsOptions.Local()) 40 | option.level_path(1, from_filesystem_path(temp_dir1.name), FsOptions.Local()) 41 | 42 | option.immutable_chunk_num = 1 43 | option.major_threshold_with_sst_size = 3 44 | option.level_sst_magnification = 1 45 | option.max_sst_file_size = 1 * 1024 46 | 47 | db = TonboDB(option, User()) 48 | for i in range(0, 1000): 49 | if i % 50 == 0: 50 | await db.flush() 51 | await db.insert( 52 | User( 53 | id=i, 54 | age=i % 128, 55 | name=str(i * 10), 56 | email=str(i * 20), 57 | data=b"Hello Tonbo!", 58 | ) 59 | ) 60 | 61 | for i in range(0, 1000): 62 | user = await db.get(i) 63 | assert user == { 64 | "id": i, 65 | "name": str(i * 10), 66 | "email": str(i * 20), 67 | "age": i % 128, 68 | "data": b"Hello Tonbo!", 69 | } 70 | -------------------------------------------------------------------------------- /bindings/python/tests/test_txn.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tempfile 3 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound 4 | from tonbo.error import RepeatedCommitError, WriteConflictError 5 | 6 | 7 | @Record 8 | class User: 9 | age = Column(DataType.Int8, name="age", primary_key=True) 10 | height = Column(DataType.Int16, name="height", nullable=True) 11 | weight = Column(DataType.Int32, name="weight", nullable=False) 12 | 13 | 14 | def build_db(): 15 | temp_dir = tempfile.TemporaryDirectory() 16 | return TonboDB(DbOption(temp_dir.name), User()) 17 | 18 | 19 | @pytest.mark.asyncio 20 | async def test_txn_write(): 21 | db = build_db() 22 | txn = await db.transaction() 23 | for i in range(0, 100): 24 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 25 | await txn.commit() 26 | 27 | 28 | @pytest.mark.asyncio 29 | async def test_txn_read_write(): 30 | db = build_db() 31 | txn = await db.transaction() 32 | for i in range(0, 100): 33 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 34 | for i in range(0, 100): 35 | txn.insert(User(age=i, height=i * 11, weight=i * 22)) 36 | 37 | for i in range(0, 100): 38 | user = await txn.get(i) 39 | assert user == {"age": i, "height": i * 11, "weight": i * 22} 40 | 41 | 42 | @pytest.mark.asyncio 43 | async def test_txn_remove(): 44 | db = build_db() 45 | txn = await db.transaction() 46 | for i in range(0, 100): 47 | if i % 2 == 0: 48 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 49 | else: 50 | txn.remove(i) 51 | 52 | for i in range(0, 100): 53 | user = await txn.get(i) 54 | if i % 2 == 0: 55 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 56 | else: 57 | assert user is None 58 | 59 | 60 | @pytest.mark.asyncio 61 | async def test_txn_scan(): 62 | db = build_db() 63 | txn = await db.transaction() 64 | for i in range(0, 100): 65 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 66 | 67 | scan = await txn.scan(Bound.Included(10), Bound.Excluded(75)) 68 | i = 10 69 | async for user in scan: 70 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 71 | i += 1 72 | assert i == 75 73 | 74 | 75 | @pytest.mark.asyncio 76 | async def test_txn_projection(): 77 | db = build_db() 78 | txn = await db.transaction() 79 | for i in range(0, 100): 80 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 81 | 82 | scan = await txn.scan(None, Bound.Excluded(75), projection=["age", "height"]) 83 | i = 0 84 | async for user in scan: 85 | assert user == {"age": i, "height": i * 10, "weight": None} 86 | i += 1 87 | 88 | 89 | @pytest.mark.asyncio 90 | async def test_txn_limit(): 91 | db = build_db() 92 | txn = await db.transaction() 93 | for i in range(0, 100): 94 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 95 | 96 | scan = await txn.scan(None, Bound.Excluded(75), limit=10) 97 | i = 0 98 | async for user in scan: 99 | assert user == {"age": i, "height": i * 10, "weight": i * 20} 100 | i += 1 101 | assert i == 10 102 | 103 | 104 | @pytest.mark.asyncio 105 | async def test_repeated_commit(): 106 | db = build_db() 107 | txn = await db.transaction() 108 | await txn.commit() 109 | 110 | with pytest.raises(RepeatedCommitError): 111 | await txn.commit() 112 | 113 | 114 | @pytest.mark.asyncio 115 | async def test_txn_write_conflict(): 116 | db = build_db() 117 | txn = await db.transaction() 118 | txn2 = await db.transaction() 119 | for i in range(0, 10): 120 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 121 | 122 | for i in range(0, 10): 123 | txn2.insert(User(age=i, height=i * 10, weight=i * 20)) 124 | 125 | await txn2.commit() 126 | with pytest.raises(WriteConflictError): 127 | await txn.commit() 128 | 129 | 130 | @pytest.mark.asyncio 131 | async def test_txn_cannot_read_future(): 132 | db = build_db() 133 | txn = await db.transaction() 134 | txn2 = await db.transaction() 135 | for i in range(0, 10): 136 | txn.insert(User(age=i, height=i * 10, weight=i * 20)) 137 | 138 | for i in range(0, 10): 139 | txn2.insert(User(age=i, height=i * 20, weight=i * 40)) 140 | 141 | await txn2.commit() 142 | for i in range(0, 10): 143 | user = await txn.get(i) 144 | # can not read data in the future 145 | assert user == { "age": i, "height": i * 10, "weight": i * 20 } 146 | 147 | txn3 = await db.transaction() 148 | for i in range(0, 10): 149 | user = await txn3.get(i) 150 | assert user == { "age": i, "height": i * 20, "weight": i * 40 } 151 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | type-complexity-threshold = 900 2 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | ## Declare 3 | 4 | The [declare.rs](declare.rs) file contains some basic examples to 5 | help you understand how to use `Tonbo` for simple data operations in your project. 6 | 7 | ## Datafusion 8 | The [datafusion.rs](datafusion.rs) file demonstrates the in-depth integration of the `Tonbo` project 9 | with `DataFusion` and shows how to use the powerful features of DataFusion to perform complex data processing tasks. 10 | -------------------------------------------------------------------------------- /examples/declare.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Bound; 2 | 3 | use bytes::Bytes; 4 | use fusio::path::Path; 5 | use futures_util::stream::StreamExt; 6 | use tokio::fs; 7 | use tonbo::{executor::tokio::TokioExecutor, record::F32, DbOption, Projection, Record, DB}; 8 | 9 | /// Use macro to define schema of column family just like ORM 10 | /// It provides type-safe read & write API 11 | #[derive(Record, Debug)] 12 | pub struct User { 13 | #[record(primary_key)] 14 | name: String, 15 | email: Option, 16 | age: u8, 17 | bytes: Bytes, 18 | grade: F32, 19 | } 20 | 21 | #[tokio::main] 22 | async fn main() { 23 | // make sure the path exists 24 | let _ = fs::create_dir_all("./db_path/users").await; 25 | 26 | let options = DbOption::new( 27 | Path::from_filesystem_path("./db_path/users").unwrap(), 28 | &UserSchema, 29 | ); 30 | // pluggable async runtime and I/O 31 | let db = DB::new(options, TokioExecutor::current(), UserSchema) 32 | .await 33 | .unwrap(); 34 | 35 | // insert with owned value 36 | db.insert(User { 37 | name: "Alice".into(), 38 | email: Some("alice@gmail.com".into()), 39 | age: 22, 40 | bytes: Bytes::from(vec![0, 1, 2]), 41 | grade: 96.5.into(), 42 | }) 43 | .await 44 | .unwrap(); 45 | 46 | { 47 | // tonbo supports transaction 48 | let txn = db.transaction().await; 49 | 50 | // get from primary key 51 | let name = "Alice".into(); 52 | 53 | // get the zero-copy reference of record without any allocations. 54 | let user = txn 55 | .get( 56 | &name, 57 | // tonbo supports pushing down projection 58 | Projection::All, 59 | ) 60 | .await 61 | .unwrap(); 62 | assert!(user.is_some()); 63 | assert_eq!(user.unwrap().get().age, Some(22)); 64 | 65 | { 66 | let upper = "Blob".into(); 67 | // range scan of user 68 | let mut scan = txn 69 | .scan((Bound::Included(&name), Bound::Excluded(&upper))) 70 | // tonbo supports pushing down projection 71 | .projection(&["email", "bytes", "grade"]) 72 | // push down limitation 73 | .limit(1) 74 | .take() 75 | .await 76 | .unwrap(); 77 | while let Some(entry) = scan.next().await.transpose().unwrap() { 78 | assert_eq!( 79 | entry.value(), 80 | Some(UserRef { 81 | name: "Alice", 82 | email: Some("alice@gmail.com"), 83 | age: None, 84 | bytes: Some(&[0, 1, 2]), 85 | grade: Some(96.5.into()), 86 | }) 87 | ); 88 | } 89 | } 90 | 91 | // commit transaction 92 | txn.commit().await.unwrap(); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /examples/dynamic.rs: -------------------------------------------------------------------------------- 1 | use std::{fs, sync::Arc}; 2 | 3 | use fusio::path::Path; 4 | use tonbo::{ 5 | dyn_record, dyn_schema, 6 | executor::tokio::TokioExecutor, 7 | record::{DataType, Value}, 8 | DbOption, DB, 9 | }; 10 | 11 | #[tokio::main] 12 | async fn main() { 13 | fs::create_dir_all("./db_path/users").unwrap(); 14 | 15 | let schema = dyn_schema!(("foo", String, false), ("bar", Int32, true), 0); 16 | 17 | let options = DbOption::new( 18 | Path::from_filesystem_path("./db_path/users").unwrap(), 19 | &schema, 20 | ); 21 | let db = DB::new(options, TokioExecutor::current(), schema) 22 | .await 23 | .unwrap(); 24 | 25 | { 26 | let mut txn = db.transaction().await; 27 | txn.insert(dyn_record!( 28 | ("foo", String, false, "hello".to_owned()), 29 | ("bar", Int32, true, 1), 30 | 0 31 | )); 32 | 33 | txn.commit().await.unwrap(); 34 | } 35 | 36 | db.get( 37 | &Value::new( 38 | DataType::String, 39 | "foo".into(), 40 | Arc::new("hello".to_owned()), 41 | false, 42 | ), 43 | |v| { 44 | let v = v.get(); 45 | println!("{:?}", v.columns[0].value.downcast_ref::()); 46 | Some(()) 47 | }, 48 | ) 49 | .await 50 | .unwrap(); 51 | } 52 | -------------------------------------------------------------------------------- /guide/book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | authors = ["crwen"] 3 | language = "en" 4 | multilingual = false 5 | src = "src" 6 | title = "The Tonbo Guide" 7 | 8 | [output.html] 9 | git-repository-url = "https://github.com/tonbo-io/tonbo" 10 | mathjax-support = true 11 | [output.html.playground] 12 | runnable = false 13 | 14 | [preprocessor.toc] 15 | command = "mdbook-toc" 16 | renderer = ["html"] 17 | -------------------------------------------------------------------------------- /guide/src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | [What is Tonbo?](./introduction.md) 4 | 5 | - [Getting Started](./start.md) 6 | - [Usage]() 7 | - [Tonbo](./usage/tonbo.md) 8 | - [Python Binding](./usage/python.md) 9 | - [Configuration](./usage/conf.md) 10 | - [Advance](./usage/advance.md) 11 | - [FAQ](./usage/faq.md) 12 | - [Examples](./examples/index.md) 13 | - [Using Tonbo](./examples/declare.md) 14 | - [Using under Wasm](./examples/wasm.md) 15 | - [Contribution]() 16 | - [Building](./contribution/build.md) 17 | - [Submitting PR](./contribution/pr.md) 18 | - [TonboLite](./tonbolite/index.md) 19 | - [Getting Started](./tonbolite/start.md) 20 | - [Building and Testing](./tonbolite/build.md) 21 | - [Usage](./tonbolite/usage.md) 22 | -------------------------------------------------------------------------------- /guide/src/contribution/build.md: -------------------------------------------------------------------------------- 1 | # Building and Testing 2 | 3 | 4 | To get started using tonbo you should make sure you have [Rust](https://www.rust-lang.org/tools/install) installed on your system. If you haven't alreadly done yet, try following the instructions [here](https://www.rust-lang.org/tools/install). 5 | 6 | ## Building and Testing for Rust 7 | 8 | ### Building and Testing with Non-WASM 9 | 10 | To use local disk as storage backend, you should import [tokio](https://github.com/tokio-rs/tokio) crate and enable "tokio" feature (enabled by default) 11 | 12 | ```bash 13 | cargo build 14 | ``` 15 | 16 | If you build Tonbo successfully, you can run the tests with: 17 | 18 | ```bash 19 | cargo test 20 | ``` 21 | 22 | ### Building and Testing with WASM 23 | 24 | If you want to build tonbo under wasm, you should add wasm32-unknown-unknown target first. 25 | 26 | ```bash 27 | # add wasm32-unknown-unknown target 28 | rustup target add wasm32-unknown-unknown 29 | # build under wasm 30 | cargo build --target wasm32-unknown-unknown --no-default-features --features wasm 31 | ``` 32 | 33 | Before running the tests, make sure you have installed [wasm-pack](https://github.com/rustwasm/wasm-pack) and run `wasm-pack build` to build the wasm module. If you build successfully, you can run the tests with: 34 | 35 | ```bash 36 | wasm-pack test --chrome --headless --test wasm --no-default-features --features aws,bytes,opfs 37 | ``` 38 | 39 | 40 | ## Building and Testing for Python 41 | 42 | ### Building 43 | We use the [pyo3](https://github.com/PyO3/pyo3) to generate a native Python module and use [maturin](https://github.com/PyO3/maturin) to build Rust-based Python packages. 44 | 45 | First, follow the commands below to build a new Python virtualenv, and install maturin into the virtualenv using Python's package manager, pip: 46 | 47 | ```bash 48 | # setup virtualenv 49 | python -m venv .env 50 | # activate venv 51 | source .env/bin/activate 52 | 53 | # install maturin 54 | pip install maturin 55 | # build bindings 56 | maturin develop 57 | 58 | ``` 59 | 60 | Whenever Rust code changes run: 61 | 62 | ```bash 63 | maturin develop 64 | ``` 65 | 66 | ### Testing 67 | 68 | If you want to run tests, you need to build with "test" options: 69 | 70 | ```base 71 | maturin develop -E test 72 | ``` 73 | 74 | After building successfully, you can run the tests with: 75 | 76 | ```bash 77 | # run tests except benchmarks(This need duckdb to be installed) 78 | pytest --ignore=tests/bench -v . 79 | 80 | # run all tests 81 | pip install duckdb 82 | python -m pytest 83 | ``` 84 | 85 | ## Building and Testing for JavaScript 86 | To build tonbo for JavaScript, you should install [wasm-pack](https://github.com/rustwasm/wasm-pack). If you haven't already done so, try following the instructions [here](https://rustwasm.github.io/wasm-pack/installer/). 87 | 88 | ```bash 89 | # add wasm32-unknown-unknown target 90 | rustup target add wasm32-unknown-unknown 91 | # build under wasm 92 | wasm-pack build --target web 93 | ``` 94 | -------------------------------------------------------------------------------- /guide/src/contribution/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonbo-io/tonbo/94627473ea50c2d72725a8bf575f0071009b9cfa/guide/src/contribution/index.md -------------------------------------------------------------------------------- /guide/src/contribution/pr.md: -------------------------------------------------------------------------------- 1 | # Submitting a Pull Request 2 | 3 | Thanks for your contribution! The Tonbo project welcomes contribution of various types -- new features, bug fixes and reports, typo fixes, etc. If you want to contribute to the Tonbo project, you will need to pass necessary checks. If you have any question, feel free to start a new discussion or issue, or ask in the Tonbo [Discord](https://discord.gg/j27XVFVmJM). 4 | 5 | ## Running Tests and Checks 6 | This is a Rust project, so [rustup](https://rustup.rs/) and [cargo](https://doc.rust-lang.org/cargo/) are the best place to start. 7 | 8 | - `cargo check` to analyze the current package and report errors. 9 | - `cargo +nightly fmt` to format the current code. 10 | - `cargo build` to compile the current package. 11 | - `cargo clippy` to catch common mistakes and improve code. 12 | - `cargo test` to run unit tests. 13 | - `cargo bench` to run benchmark tests. 14 | 15 | 16 | > **Note**: If you have any changes to *bindings/python*, please make sure to run checks and tests before submitting your PR. If you don not know how to build and run tests, please refer to the [Building Tonbo for Python](./build.md#building-tonbo-for-python) section. 17 | 18 | ## Pull Request title 19 | As described in [here](https://gist.github.com/joshbuchea/6f47e86d2510bce28f8e7f42ae84c716), a valid PR title should begin with one of the following prefixes: 20 | - feat: new feature for the user, not a new feature for build script 21 | - fix: bug fix for the user, not a fix to a build script 22 | - doc: changes to the documentation 23 | - style: formatting, missing semi colons, etc; no production code change 24 | - refactor: refactoring production code, eg. renaming a variable 25 | - test: adding missing tests, refactoring tests; no production code change 26 | - chore: updating grunt tasks etc; no production code change 27 | 28 | Here is an example of a valid PR title: 29 | ``` 30 | feat: add float type 31 | ^--^ ^------------^ 32 | | | 33 | | +-> Summary in present tense. 34 | | 35 | +-------> Type: chore, docs, feat, fix, refactor, style, or test. 36 | ``` 37 | -------------------------------------------------------------------------------- /guide/src/contribution/testing.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | 4 | ## Testing Tonbo in Rust 5 | 6 | ## Testing Tonbo in WASM 7 | -------------------------------------------------------------------------------- /guide/src/examples/declare.md: -------------------------------------------------------------------------------- 1 | # Using Tonbo 2 | 3 | define your schema 4 | 5 | ```rust 6 | use tonbo::Record; 7 | 8 | /// Use macro to define schema of column family just like ORM 9 | /// It provides type-safe read & write API 10 | #[derive(Record, Debug)] 11 | pub struct User { 12 | #[record(primary_key)] 13 | name: String, 14 | email: Option, 15 | age: u8, 16 | bytes: Bytes, 17 | } 18 | ``` 19 | 20 | ```rust 21 | use std::ops::Bound; 22 | 23 | use bytes::Bytes; 24 | use fusio::path::Path; 25 | use futures_util::stream::StreamExt; 26 | use tokio::fs; 27 | use tonbo::{executor::tokio::TokioExecutor, DbOption, Projection, Record, DB}; 28 | 29 | 30 | #[tokio::main] 31 | async fn main() { 32 | // make sure the path exists 33 | let _ = fs::create_dir_all("./db_path/users").await; 34 | 35 | let options = DbOption::new( 36 | Path::from_filesystem_path("./db_path/users").unwrap(), 37 | &UserSchema, 38 | ); 39 | // pluggable async runtime and I/O 40 | let db = DB::new(options, TokioExecutor::current(), UserSchema) 41 | .await 42 | .unwrap(); 43 | 44 | // insert with owned value 45 | db.insert(User { 46 | name: "Alice".into(), 47 | email: Some("alice@gmail.com".into()), 48 | age: 22, 49 | bytes: Bytes::from(vec![0, 1, 2]), 50 | }) 51 | .await 52 | .unwrap(); 53 | 54 | { 55 | // tonbo supports transaction 56 | let txn = db.transaction().await; 57 | 58 | // get from primary key 59 | let name = "Alice".into(); 60 | 61 | // get the zero-copy reference of record without any allocations. 62 | let user = txn 63 | .get( 64 | &name, 65 | // tonbo supports pushing down projection 66 | Projection::All, 67 | ) 68 | .await 69 | .unwrap(); 70 | assert!(user.is_some()); 71 | assert_eq!(user.unwrap().get().age, Some(22)); 72 | 73 | { 74 | let upper = "Blob".into(); 75 | // range scan of user 76 | let mut scan = txn 77 | .scan((Bound::Included(&name), Bound::Excluded(&upper))) 78 | // tonbo supports pushing down projection 79 | .projection(vec![1, 3]) 80 | // push down limitation 81 | .limit(1) 82 | .take() 83 | .await 84 | .unwrap(); 85 | while let Some(entry) = scan.next().await.transpose().unwrap() { 86 | assert_eq!( 87 | entry.value(), 88 | Some(UserRef { 89 | name: "Alice", 90 | email: Some("alice@gmail.com"), 91 | age: None, 92 | bytes: Some(&[0, 1, 2]), 93 | }) 94 | ); 95 | } 96 | } 97 | 98 | // commit transaction 99 | txn.commit().await.unwrap(); 100 | } 101 | } 102 | ``` 103 | -------------------------------------------------------------------------------- /guide/src/examples/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonbo-io/tonbo/94627473ea50c2d72725a8bf575f0071009b9cfa/guide/src/examples/index.md -------------------------------------------------------------------------------- /guide/src/examples/wasm.md: -------------------------------------------------------------------------------- 1 | 2 | # Using under Wasm 3 | 4 | This is the Wasm example of tonbo showing how to use tonbo under Wasm. 5 | 6 | ## `Cargo.toml` 7 | 8 | Since only limited features of tokio can be used in wasm, we need to disable tokio and use `wasm` feature in tonbo. 9 | 10 | ```toml 11 | fusio = { git = "https://github.com/tonbo-io/fusio.git", rev = "216eb446fb0a0c6e5e85bfac51a6f6ed8e5ed606", package = "fusio", version = "0.3.3", features = [ 12 | "dyn", 13 | "fs", 14 | ] } 15 | tonbo = { git = "https://github.com/tonbo-io/tonbo", default-features = false, features = ["wasm"] } 16 | ``` 17 | 18 | ## Create DB 19 | 20 | Tonbo provide [OPFS(origin private file system)](https://developer.mozilla.org/en-US/docs/Web/API/File_System_API/Origin_private_file_system) as storage backend, but the path is a little different. You should use `Path::from_opfs_path` or `Path::parse` rather than `Path::from_filesystem_path` and it is not permitted to use paths that temporarily step outside the sandbox with something like `../foo` or `./bar`. 21 | 22 | ```rust 23 | use fusio::path::Path; 24 | use tonbo::{executor::opfs::OpfsExecutor, DbOption, DB}; 25 | 26 | async fn main() { 27 | 28 | let options = DbOption::new( 29 | Path::from_opfs_path("db_path/users").unwrap(), 30 | &UserSchema, 31 | ); 32 | let db = DB::::new(options, OpfsExecutor::new(), UserSchema) 33 | .await 34 | .unwrap(); 35 | } 36 | ``` 37 | 38 | ## Operations on DB 39 | 40 | After create `DB` instance, you can operate it as usual 41 | 42 | ```rust 43 | let txn = db.transaction().await; 44 | 45 | // get from primary key 46 | let name = "Alice".into(); 47 | 48 | let user = txn.get(&name, Projection::All).await.unwrap(); 49 | 50 | let upper = "Blob".into(); 51 | // range scan of user 52 | let mut scan = txn 53 | .scan((Bound::Included(&name), Bound::Excluded(&upper))) 54 | // tonbo supports pushing down projection 55 | .projection(vec![1]) 56 | // push down limitation 57 | .limit(1) 58 | .take() 59 | .await 60 | .unwrap(); 61 | 62 | while let Some(entry) = scan.next().await.transpose().unwrap() { 63 | assert_eq!( 64 | entry.value(), 65 | Some(UserRef { 66 | name: "Alice", 67 | email: Some("alice@gmail.com"), 68 | age: None, 69 | }) 70 | ); 71 | } 72 | ``` 73 | -------------------------------------------------------------------------------- /guide/src/introduction.md: -------------------------------------------------------------------------------- 1 | # What is Tonbo? 2 | 3 | [Tonbo](https://github.com/tonbo-io/tonbo) is an in-process KV database that can be embedded in data-intensive applications written in Rust, Python, or JavaScript (WebAssembly / Deno). It is designed for analytical processing. Tonbo can efficiently write data in real time in edge environments such as browsers and AWS Lambda, with the data stored in memory, on local disks, or in S3 using Apache Parquet format. 4 | 5 | ## Build with schema 6 | Building data-intensive applications in Rust using Tonbo is convenient. You just need to declare the dependency in your `Cargo.toml` file and then create the embedded database. Tonbo supports: 7 | ```rust 8 | #[derive(tonbo::Record)] 9 | pub struct User { 10 | #[record(primary_key)] 11 | name: String, 12 | email: Option, 13 | age: u8, 14 | } 15 | 16 | async fn main() { 17 | let db = tonbo::DB::new("./db_path/users".into(), TokioExecutor::current()) 18 | .await 19 | .unwrap(); 20 | } 21 | ``` 22 | 23 | ## All in Parquet 24 | 25 | Tonbo organizes all stored data as Apache Parquet files. At each level, these files can reside in memory, on disk, or in S3. This design lets users process their data without any vendor lock-in, including with Tonbo. 26 | 27 | ``` 28 | ╔═tonbo═════════════════════════════════════════════════════╗ 29 | ║ ║ 30 | ║ ┌──────╂─client storage─┐ ┌──────╂─client storage─┐ ║ 31 | ║ │ ┏━━━━▼━━━━┓ │ │ ┏━━━━▼━━━━┓ │ ║ 32 | ║ │ ┃ parquet ┃ │ │ ┃ parquet ┃ │ ║ 33 | ║ │ ┗━━━━┳━━━━┛ │ │ ┗━━━━┳━━━━┛ │ ║ 34 | ║ └──────╂────────────────┘ └──────╂────────────────┘ ║ 35 | ║ ┣━━━━━━━━━━━━━━━━━━━━━━━━━━┛ ║ 36 | ║ ┌──────╂────────────────────────────────server ssd─┐ ║ 37 | ║ │ ┣━━━━━━━━━━━┓ │ ║ 38 | ║ │ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ │ ║ 39 | ║ │ ┃ parquet ┃ ┃ parquet ┃ │ ║ 40 | ║ │ ┗━━━━┳━━━━┛ ┗━━━━┳━━━━┛ │ ║ 41 | ║ └──────╂───────────╂───────────────────────────────┘ ║ 42 | ║ ┌──────╂───────────╂────────object storage service─┐ ║ 43 | ║ │ ┣━━━━━━━━━━━╋━━━━━━━━━━━┳━━━━━━━━━━━┓ │ ║ 44 | ║ │ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ │ ║ 45 | ║ │ ┃ parquet ┃ ┃ parquet ┃ ┃ parquet ┃ ┃ parquet ┃ │ ║ 46 | ║ │ ┗━━━━━━━━━┛ ┗━━━━━━━━━┛ ┗━━━━━━━━━┛ ┗━━━━━━━━━┛ │ ║ 47 | ║ └──────────────────────────────────────────────────┘ ║ 48 | ║ ║ 49 | ╚═══════════════════════════════════════════════════════════╝ 50 | ``` 51 | 52 | ## Easy to be integrated 53 | Compared to other analytical databases, Tonbo is extremely lightweight—only 1.3MB when compressed. In addition to being embedded directly as a KV database within applications, Tonbo can also serve as an analytical enhancement for existing OLTP databases. 54 | 55 | For example, [Tonbolite](https://github.com/tonbo-io/tonbolite) is a SQLite plugin built on Tonbo that provides SQLite with highly compressed, analytical-ready tables using Arrow/Parquet to boost query efficiency. Moreover, it can run alongside SQLite in various environments such as browsers and Linux: 56 | ``` 57 | sqlite> .load target/release/libsqlite_tonbo 58 | 59 | sqlite> CREATE VIRTUAL TABLE temp.tonbo USING tonbo( 60 | create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)', 61 | path = 'db_path/tonbo' 62 | ); 63 | 64 | sqlite> insert into tonbo (id, name, like) values (0, 'tonbo', 100); 65 | 66 | sqlite> select * from tonbo; 67 | 0|tonbo|100 68 | ``` 69 | 70 | We are committed to providing the most convenient and efficient real-time analytical database for edge-first scenarios. In addition to Tonbolite, we will offer the following based on Tonbo: 71 | 1. Time-series data writing and querying for observability and other scenarios. 72 | 2. Real-time index building and search based on BM25 or vectors. 73 | 74 | We are passionate about establishing Tonbo as an open-source, community-contributed project and are dedicated to building a community around it to develop features for all use cases. 75 | -------------------------------------------------------------------------------- /guide/src/tonbolite/build.md: -------------------------------------------------------------------------------- 1 | # Building TonboLite 2 | 3 | ### Build as Extension 4 | To build TonboLite as an extension, you should enable loadable_extension features 5 | ```sh 6 | cargo build --release --features loadable_extension 7 | ``` 8 | Once building successfully, you will get a file named libsqlite_tonbo.dylib(`.dll` on windows, `.so` on most other unixes) in *target/release/* 9 | ### Build on Rust 10 | 11 | ```sh 12 | cargo build 13 | ``` 14 | 15 | ### Build on Wasm 16 | 17 | To use TonboLite in wasm, it takes a few steps to build. 18 | 1. Add wasm32-unknown-unknown target 19 | ```sh 20 | rustup target add wasm32-unknown-unknown 21 | ``` 22 | 2. Override toolchain with nightly 23 | ```sh 24 | rustup override set nightly 25 | ``` 26 | 3. Build with [wasm-pack](https://github.com/rustwasm/wasm-pack) 27 | ```sh 28 | wasm-pack build --target web --no-default-features --features wasm 29 | ``` 30 | 31 | Once you build successfully, you will get a *pkg* folder containing compiled js and wasm files. Copy it to your project and then you can start to use it. 32 | ```js 33 | const tonbo = await import("./pkg/sqlite_tonbo.js"); 34 | await tonbo.default(); 35 | 36 | // start to use TonboLite ... 37 | ``` 38 | 39 | 40 |
41 | 42 | TonboLite should be used in a [secure context](https://developer.mozilla.org/en-US/docs/Web/Security/Secure_Contexts) and [cross-origin isolated](https://developer.mozilla.org/en-US/docs/Web/API/Window/crossOriginIsolated), since it uses [`SharedArrayBuffer`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SharedArrayBuffer) to share memory. Please refer to [this article](https://web.dev/articles/coop-coep) for a detailed explanation. 43 | 44 |
45 | -------------------------------------------------------------------------------- /guide/src/tonbolite/index.md: -------------------------------------------------------------------------------- 1 | # TonboLite 2 | 3 | TonboLite is a WASM compatible SQLite extension that allows users to create tables which supports analytical processing directly in SQLite. Its storage engine is powered by our open-source embedded key-value database, [Tonbo](https://github.com/tonbo-io/tonbo). 4 | -------------------------------------------------------------------------------- /guide/src/tonbolite/start.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | 4 | ## Installation 5 | 6 | ### Prerequisite 7 | To get started using tonbo you should make sure you have [Rust](https://www.rust-lang.org/tools/install) installed on your system. If you haven't alreadly done yet, try following the instructions [here](https://www.rust-lang.org/tools/install). 8 | 9 | ### Building 10 | 11 | To build TonboLite as an extension, you should enable loadable_extension features 12 | 13 | ```sh 14 | cargo build --release --features loadable_extension 15 | ``` 16 | 17 | Once building successfully, you will get a file named libsqlite_tonbo.dylib(`.dll` on windows, `.so` on most other unixes) in *target/release/* 18 | 19 | ```bash 20 | target/release/ 21 | ├── build 22 | ├── deps 23 | ├── incremental 24 | ├── libsqlite_tonbo.d 25 | ├── libsqlite_tonbo.dylib 26 | └── libsqlite_tonbo.rlib 27 | ``` 28 | 29 | ## Loading TonboLite 30 | 31 | SQLite provide [`.load`](https://www.sqlite.org/cli.html#loading_extensions) command to load a SQLite extension. So, you can load TonboLite extension by running the following command: 32 | 33 | ```bash 34 | .load target/release/libsqlite_tonbo 35 | ``` 36 | 37 | ## Creating Table 38 | 39 | After loading TonboLite extension successfully, you can [SQLite Virtual Table](https://www.sqlite.org/vtab.html) syntax to create a table: 40 | 41 | ```sql 42 | CREATE VIRTUAL TABLE temp.tonbo USING tonbo( 43 | create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)', 44 | path = 'db_path/tonbo' 45 | ); 46 | ``` 47 | - `create_sql` is a SQL statement that will be executed to create the table. 48 | - `path` is the path to the database file. 49 | 50 | ## Inserting Data 51 | 52 | After creating a table, you can start to insert data into it using the normal `INSERT INTO` statement: 53 | 54 | ```sql 55 | INSERT INTO tonbo(id, name, like) VALUES(1, 'tonbo', 100); 56 | ``` 57 | 58 | ## Querying Data 59 | 60 | After inserting data, you can query them by using the `SELECT` statement: 61 | 62 | ```sql 63 | SELECT * FROM tonbo; 64 | 65 | 1|tonbo|100 66 | ``` 67 | 68 | ## Updating Data 69 | 70 | You can update data in the table using the `UPDATE` statement: 71 | 72 | ```sql 73 | UPDATE tonbo SET like = 123 WHERE id = 1; 74 | 75 | SELECT * FROM tonbo; 76 | 1|tonbo|123 77 | ``` 78 | 79 | ## Deleting Data 80 | 81 | You can also delete data by using the `DELETE` statement: 82 | 83 | ```sql 84 | DELETE FROM tonbo WHERE id = 1; 85 | ``` 86 | 87 | ## Coding with extension 88 | 89 | TonboLite extension can also be used in any place that supports loading SQLite extensions. Here is an example of using TonboLite extension in Python: 90 | 91 | ```py 92 | import sqlite3 93 | 94 | conn = sqlite3.connect(":memory") 95 | conn.enable_load_extension(True) 96 | # Load the tonbolite extension 97 | conn.load_extension("target/release/libsqlite_tonbo.dylib") 98 | con.enable_load_extension(False) 99 | 100 | conn.execute("CREATE VIRTUAL TABLE temp.tonbo USING tonbo(" 101 | "create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)', " 102 | "path = 'db_path/tonbo'" 103 | ")") 104 | conn.execute("INSERT INTO tonbo (id, name, like) VALUES (0, 'lol', 1)") 105 | conn.execute("INSERT INTO tonbo (id, name, like) VALUES (1, 'lol', 100)") 106 | rows = conn.execute("SELECT * FROM tonbo;") 107 | for row in rows: 108 | print(row) 109 | # ...... 110 | ``` 111 | -------------------------------------------------------------------------------- /guide/src/tonbolite/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | ## Using as Extension 4 | 5 | If you do not know how to build TonboLite, please refer to the [Building](./build.md) section. 6 | 7 | ### Loading TonboLite Extension 8 | 9 | Once building successfully, you will get a file named libsqlite_tonbo.dylib(.dll on windows, .so on most other unixes) in *target/release/*(or *target/debug/*). 10 | 11 | SQLite provide [`.load`](https://www.sqlite.org/cli.html#loading_extensions) command to load a SQLite extension. So, you can load TonboLite extension by running the following command: 12 | 13 | ```bash 14 | .load target/release/libsqlite_tonbo 15 | ``` 16 | 17 | Or you can load TonboLite extension in Python or other languages. 18 | ```py 19 | import sqlite3 20 | 21 | conn = sqlite3.connect(":memory") 22 | conn.enable_load_extension(True) 23 | # Load the tonbolite extension 24 | conn.load_extension("target/release/libsqlite_tonbo.dylib") 25 | con.enable_load_extension(False) 26 | 27 | # ...... 28 | ``` 29 | 30 | 31 | After loading TonboLite successfully, you can start to use it. 32 | 33 | ### Create Table 34 | 35 | Unlike Normal `CREATE TABLE` statement, TonboLite use [SQLite Virtual Table](https://www.sqlite.org/vtab.html) syntax to create a table: 36 | 37 | ```sql 38 | CREATE VIRTUAL TABLE temp.tonbo USING tonbo( 39 | create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)', 40 | path = 'db_path/tonbo' 41 | ); 42 | ``` 43 | 44 | ### Select/Insert/Update/Delete 45 | 46 | you can execute SQL statements just like normal SQL in the SQLite. Here is an example: 47 | 48 | ```sql 49 | sqlite> .load target/release/libsqlite_tonbo 50 | 51 | sqlite> CREATE VIRTUAL TABLE temp.tonbo USING tonbo( 52 | create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)', 53 | path = 'db_path/tonbo' 54 | ); 55 | sqlite> insert into tonbo (id, name, like) values (0, 'tonbo', 100); 56 | sqlite> insert into tonbo (id, name, like) values (1, 'sqlite', 200); 57 | 58 | sqlite> select * from tonbo; 59 | 0|tonbo|100 60 | 1|sqlite|200 61 | 62 | sqlite> update tonbo set like = 123 where id = 0; 63 | 64 | sqlite> select * from tonbo; 65 | 0|tonbo|123 66 | 1|sqlite|200 67 | 68 | sqlite> delete from tonbo where id = 0; 69 | 70 | sqlite> select * from tonbo; 71 | 1|sqlite|200 72 | ``` 73 | 74 | ### Flush 75 | 76 | TonboLite use LSM tree to store data, and it use a WAL buffer size to improve performance, so you may need to flush data to disk manually. But SQLite don't provide flush interface, so we choose to implement it in the [`pragma quick_check`](https://www.sqlite.org/pragma.html#pragma_quick_check). 77 | 78 | ```sql 79 | PRAGMA tonbo.quick_check; 80 | ``` 81 | 82 | ## Using in Rust 83 | 84 | To use TonboLite in your application, you can import TonboLite in the *Cargo.toml* file. 85 | 86 | ```toml 87 | tonbolite = { git = "https://github.com/tonbo-io/tonbolite" } 88 | ``` 89 | 90 | You can create use TonboLite just like in [Rusqlite](https://github.com/rusqlite/rusqlite), but you should create table using [SQLite Virtual Table](https://www.sqlite.org/vtab.html) syntax: 91 | 92 | ```rust 93 | let _ = std::fs::create_dir_all("./db_path/test"); 94 | 95 | let db = rusqlite::Connection::open_in_memory()?; 96 | crate::load_module(&db)?; 97 | 98 | db.execute_batch( 99 | "CREATE VIRTUAL TABLE temp.tonbo USING tonbo( 100 | create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)', 101 | path = 'db_path/test' 102 | );" 103 | ).unwrap(); 104 | 105 | db.execute( 106 | "INSERT INTO tonbo (id, name, like) VALUES (1, 'lol', 12)", 107 | [], 108 | ).unwrap(); 109 | 110 | let mut stmt = db.prepare("SELECT * FROM tonbo;")?; 111 | let _rows = stmt.query([])?; 112 | ``` 113 | for more usage, you can refer to [Rusqlite](https://docs.rs/rusqlite). 114 | 115 | One difference is that TonboLite extends [`pragma quick_check`](https://www.sqlite.org/pragma.html#pragma_quick_check) to flush WAL to disk. You can use it like this: 116 | 117 | ```rust 118 | db.pragma(None, "quick_check", "tonbo", |_r| -> rusqlite::Result<()> { 119 | Ok(()) 120 | }).unwrap(); 121 | ``` 122 | 123 | ## Using in JavaScript 124 | 125 | To use TonboLite in wasm, can should enable *wasm* feature. 126 | ```toml 127 | tonbolite = { git = "https://github.com/tonbo-io/tonbolite", default-features = false, features = ["wasm"] } 128 | ``` 129 | After building successfully, you will get a *pkg* folder containing compiled js and wasm files. Copy it to your project and then you can start to use it. If you don't know how to build TonboLite on wasm, you can refer to [TonboLite](build.md#build-on-wasm). 130 | 131 | Here is an example of how to use TonboLite in JavaScript: 132 | 133 | ```javascript 134 | const tonbo = await import("./pkg/sqlite_tonbo.js"); 135 | await tonbo.default(); 136 | 137 | const db = new TonboLite('db_path/test'); 138 | await db.create(`CREATE VIRTUAL TABLE temp.tonbo USING tonbo( 139 | create_sql ='create table tonbo(id bigint primary key, name varchar, like int)', 140 | path = 'db_path/tonbo' 141 | );`); 142 | 143 | await db.insert('INSERT INTO tonbo (id, name, like) VALUES (1, \'lol\', 12)'); 144 | await conn.delete("DELETE FROM tonbo WHERE id = 4"); 145 | await conn.update("UPDATE tonbo SET name = 'tonbo' WHERE id = 6"); 146 | 147 | const rows = await db.select('SELECT * FROM tonbo limit 10;'); 148 | console.log(rows); 149 | 150 | await db.flush(); 151 | ``` 152 | 153 |
154 | 155 | TonboLite should be used in a [secure context](https://developer.mozilla.org/en-US/docs/Web/Security/Secure_Contexts) and [cross-origin isolated](https://developer.mozilla.org/en-US/docs/Web/API/Window/crossOriginIsolated), since it uses [`SharedArrayBuffer`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SharedArrayBuffer) to share memory. Please refer to [this article](https://web.dev/articles/coop-coep) for a detailed explanation. 156 | 157 |
158 | -------------------------------------------------------------------------------- /guide/src/usage/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ## Failed to run custom build command for `ring` in macOS 4 | Apple Clang is a fork of Clang that is specialized to Apple's wishes. It doesn't support wasm32-unknown-unknown. You need to download and use llvm.org Clang instead. You can refer to this [issue](https://github.com/briansmith/ring/issues/1824) for more information. 5 | 6 | ```bash 7 | brew install llvm 8 | echo 'export PATH="/opt/homebrew/opt/llvm/bin:$PATH"' >> ~/.zshrc 9 | ``` 10 | 11 | ## Why my data is not recovered and the size of log file and WAL file is 0? 12 | 13 | As Tonbo uses buffer for WAL, so it may not be persisted before exiting. You can use `DB::flush_wal` to ensure WAL is persisted or use `DB::flush` to trigger compaction manually. 14 | 15 | If you don't want to use WAL buffer, you can set `DbOption::wal_buffer_size` to 0. See more details in [Configuration](./conf.md#wal-configuration). 16 | 17 | ## How to persist metadata files to S3? / Why metadata files are not persisted in serverless environment like AWS Lambda 18 | 19 | If you want to persist metadata files to S3, you can configure `DbOption::base_fs` with `FsOptions::S3{...}`. This will enable Tonbo to upload metadata files and WAL files to the specified S3 bucket. 20 | 21 | > **Note**: This will not guarantee the latest metadata will be uploaded to S3. If you want to ensure the latest WAL is uploaded, you can use `DB::flush_wal`. If you want to ensure the latest metadata is uploaded, you can use `DB::flush` to trigger upload manually. If you want tonbo to trigger upload more frequently, you can adjust `DbOption::version_log_snapshot_threshold` to a smaller value. The default value is 200. 22 | 23 | See more details in [Configuration](./conf.md#manifest-configuration). 24 | -------------------------------------------------------------------------------- /guide/src/usage/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonbo-io/tonbo/94627473ea50c2d72725a8bf575f0071009b9cfa/guide/src/usage/index.md -------------------------------------------------------------------------------- /guide/src/usage/python.md: -------------------------------------------------------------------------------- 1 | # Tonbo Python Binding 2 | 3 | 4 | ## `@Record` 5 | 6 | Tonbo provides ORM-like macro for ease of use, you can use `@Record` to define schema of column family. 7 | ```py 8 | @Record 9 | class User: 10 | id = Column(DataType.Int64, name="id", primary_key=True) 11 | age = Column(DataType.Int16, name="age", nullable=True) 12 | name = Column(DataType.String, name="name", nullable=False) 13 | ``` 14 | 15 |
16 | 17 | This is a bad thing that you should pay attention to. 18 | 19 | Warning blocks should be used sparingly in documentation, to avoid "warning 20 | fatigue," where people are trained to ignore them because they usually don't 21 | matter for what they're doing. 22 | 23 |
24 | 25 | 26 | ## Configuration 27 | 28 | ## Example 29 | 30 | ```python 31 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound 32 | from tonbo.fs import from_filesystem_path 33 | import asyncio 34 | 35 | @Record 36 | class User: 37 | id = Column(DataType.Int64, name="id", primary_key=True) 38 | age = Column(DataType.Int16, name="age", nullable=True) 39 | name = Column(DataType.String, name="name", nullable=False) 40 | 41 | async def main(): 42 | db = TonboDB(DbOption(from_filesystem_path("db_path/user")), User()) 43 | await db.insert(User(id=18, age=175, name="Alice")) 44 | record = await db.get(18) 45 | print(record) 46 | 47 | # use transcaction 48 | txn = await db.transaction() 49 | result = await txn.get(18) 50 | scan = await txn.scan(Bound.Included(18), None, limit=10, projection=["id", "name"]) 51 | 52 | async for record in scan: 53 | print(record) 54 | 55 | asyncio.run(main()) 56 | ```` 57 | -------------------------------------------------------------------------------- /parquet-lru/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | description = "Implement LRU cache reader for parquet::arrow::async_reader::AsyncFileReader." 3 | documentation = "https://docs.rs/parquet-lru" 4 | edition = "2021" 5 | license = "Apache-2.0" 6 | name = "parquet-lru" 7 | version = "0.3.2" 8 | 9 | [package.metadata.docs.rs] 10 | all-features = true 11 | 12 | [features] 13 | default = [] 14 | foyer = ["dep:foyer", "dep:serde"] 15 | 16 | [dependencies] 17 | bytes = { version = "1.8.0", features = ["serde"] } 18 | foyer = { version = "0.14.1", optional = true } 19 | futures-core = "0.3.31" 20 | futures-util = "0.3.31" 21 | parquet = { version = "55", default-features = false, features = [ 22 | "arrow", 23 | "async", 24 | ] } 25 | serde = { version = "1.0.214", optional = true } 26 | -------------------------------------------------------------------------------- /parquet-lru/src/dyn.rs: -------------------------------------------------------------------------------- 1 | use std::{ops::Range, sync::Arc}; 2 | 3 | use bytes::Bytes; 4 | use futures_core::future::BoxFuture; 5 | use parquet::{ 6 | arrow::{arrow_reader::ArrowReaderOptions, async_reader::AsyncFileReader}, 7 | errors::Result, 8 | file::metadata::ParquetMetaData, 9 | }; 10 | 11 | use crate::LruCache; 12 | 13 | pub struct BoxedFileReader { 14 | inner: Box, 15 | } 16 | 17 | impl BoxedFileReader { 18 | pub fn new(inner: T) -> Self { 19 | Self { 20 | inner: Box::new(inner), 21 | } 22 | } 23 | } 24 | 25 | impl AsyncFileReader for BoxedFileReader { 26 | fn get_bytes(&mut self, range: Range) -> BoxFuture<'_, Result> { 27 | self.inner.get_bytes(range) 28 | } 29 | 30 | fn get_metadata<'s>( 31 | &'s mut self, 32 | options: Option<&'s ArrowReaderOptions>, 33 | ) -> BoxFuture<'s, Result>> { 34 | self.inner.get_metadata(options) 35 | } 36 | 37 | fn get_byte_ranges(&mut self, ranges: Vec>) -> BoxFuture<'_, Result>> { 38 | self.inner.get_byte_ranges(ranges) 39 | } 40 | } 41 | 42 | pub trait DynLruCache { 43 | fn get_reader(&self, key: K, reader: BoxedFileReader) -> BoxFuture<'_, BoxedFileReader>; 44 | } 45 | 46 | impl DynLruCache for C 47 | where 48 | K: 'static + Send, 49 | C: LruCache + Sized + Send + Sync, 50 | { 51 | fn get_reader(&self, key: K, reader: BoxedFileReader) -> BoxFuture<'_, BoxedFileReader> { 52 | Box::pin(async move { BoxedFileReader::new(self.get_reader(key, reader).await) }) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /parquet-lru/src/foyer.rs: -------------------------------------------------------------------------------- 1 | use std::{hash::Hash, ops::Range, sync::Arc}; 2 | 3 | use bytes::Bytes; 4 | use futures_core::future::BoxFuture; 5 | use futures_util::FutureExt; 6 | use parquet::{ 7 | arrow::{arrow_reader::ArrowReaderOptions, async_reader::AsyncFileReader}, 8 | errors::{ParquetError, Result}, 9 | file::metadata::ParquetMetaData, 10 | }; 11 | use serde::{Deserialize, Serialize}; 12 | 13 | use crate::LruCache; 14 | 15 | #[derive(Clone)] 16 | pub struct FoyerCache 17 | where 18 | for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static, 19 | { 20 | inner: Arc>, 21 | } 22 | 23 | pub struct FoyerCacheInner 24 | where 25 | for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static, 26 | { 27 | meta: foyer::Cache>, 28 | data: foyer::HybridCache<(K, Range), Bytes>, 29 | } 30 | 31 | impl LruCache for FoyerCache 32 | where 33 | for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + Clone + 'static, 34 | { 35 | type LruReader 36 | = FoyerReader 37 | where 38 | R: AsyncFileReader + 'static; 39 | 40 | async fn get_reader(&self, key: K, reader: R) -> FoyerReader 41 | where 42 | R: AsyncFileReader, 43 | { 44 | FoyerReader::new(self.clone(), key, reader) 45 | } 46 | } 47 | 48 | pub struct FoyerReader 49 | where 50 | for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static, 51 | { 52 | cache: FoyerCache, 53 | key: K, 54 | reader: R, 55 | } 56 | 57 | impl FoyerReader 58 | where 59 | for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static, 60 | R: AsyncFileReader, 61 | { 62 | fn new(cache: FoyerCache, key: K, reader: R) -> Self { 63 | Self { cache, key, reader } 64 | } 65 | } 66 | 67 | impl AsyncFileReader for FoyerReader 68 | where 69 | for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + Clone + 'static, 70 | R: AsyncFileReader, 71 | { 72 | fn get_bytes(&mut self, range: Range) -> BoxFuture<'_, Result> { 73 | async move { 74 | if let Some(data) = self 75 | .cache 76 | .inner 77 | .data 78 | .get(&(self.key.clone(), range.clone())) 79 | .await 80 | .map_err(|e| ParquetError::External(e.into()))? 81 | { 82 | Ok(data.value().clone()) 83 | } else { 84 | let data = self.reader.get_bytes(range.clone()).await?; 85 | self.cache 86 | .inner 87 | .data 88 | .insert((self.key.clone(), range), data.clone()); 89 | Ok(data) 90 | } 91 | } 92 | .boxed() 93 | } 94 | 95 | fn get_metadata<'s>( 96 | &'s mut self, 97 | options: Option<&'s ArrowReaderOptions>, 98 | ) -> BoxFuture<'s, Result>> { 99 | async move { 100 | if let Some(meta) = self.cache.inner.meta.get(&self.key) { 101 | Ok(meta.value().clone()) 102 | } else { 103 | let meta = self.reader.get_metadata(options).await?; 104 | self.cache.inner.meta.insert(self.key.clone(), meta.clone()); 105 | Ok(meta) 106 | } 107 | } 108 | .boxed() 109 | } 110 | 111 | fn get_byte_ranges(&mut self, ranges: Vec>) -> BoxFuture<'_, Result>> { 112 | async move { 113 | let mut missed = Vec::with_capacity(ranges.len()); 114 | let mut results = Vec::with_capacity(ranges.len()); 115 | for (id, range) in ranges.iter().enumerate() { 116 | if let Some(data) = self 117 | .cache 118 | .inner 119 | .data 120 | .get(&(self.key.clone(), range.clone())) 121 | .await 122 | .map_err(|e| ParquetError::External(e.into()))? 123 | { 124 | results.push((id, data.value().clone())); 125 | } else { 126 | missed.push((id, range)); 127 | } 128 | } 129 | if !missed.is_empty() { 130 | let data = self 131 | .reader 132 | .get_byte_ranges(missed.iter().map(|&(_, r)| r.clone()).collect()) 133 | .await?; 134 | for (id, range) in missed { 135 | let data = data[id].clone(); 136 | self.cache 137 | .inner 138 | .data 139 | .insert((self.key.clone(), range.clone()), data.clone()); 140 | results.push((id, data)); 141 | } 142 | } 143 | results.sort_by_key(|(id, _)| *id); 144 | Ok(results.into_iter().map(|(_, data)| data).collect()) 145 | } 146 | .boxed() 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /parquet-lru/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod r#dyn; 2 | #[cfg(feature = "foyer")] 3 | pub mod foyer; 4 | 5 | use std::{future::Future, marker::PhantomData}; 6 | 7 | use parquet::arrow::async_reader::AsyncFileReader; 8 | 9 | pub use crate::r#dyn::*; 10 | 11 | pub trait LruCache 12 | where 13 | K: 'static, 14 | { 15 | type LruReader: AsyncFileReader + 'static 16 | where 17 | R: AsyncFileReader + 'static; 18 | 19 | fn get_reader(&self, key: K, reader: R) -> impl Future> + Send 20 | where 21 | R: AsyncFileReader + 'static; 22 | } 23 | 24 | #[derive(Default)] 25 | pub struct NoCache { 26 | _phantom: PhantomData, 27 | } 28 | 29 | impl Clone for NoCache { 30 | fn clone(&self) -> Self { 31 | Self { 32 | _phantom: PhantomData, 33 | } 34 | } 35 | } 36 | 37 | unsafe impl Send for NoCache {} 38 | 39 | unsafe impl Sync for NoCache {} 40 | 41 | impl LruCache for NoCache 42 | where 43 | K: 'static, 44 | { 45 | type LruReader 46 | = R 47 | where 48 | R: AsyncFileReader + 'static; 49 | 50 | #[allow(clippy::manual_async_fn)] 51 | fn get_reader(&self, _key: K, reader: R) -> impl Future + Send 52 | where 53 | R: AsyncFileReader, 54 | { 55 | async move { reader } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.85" 3 | components = ["clippy", "rust-analyzer", "rustfmt"] 4 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | comment_width = 100 2 | edition = "2021" 3 | format_code_in_doc_comments = true 4 | format_strings = true 5 | group_imports = "StdExternalCrate" 6 | imports_granularity = "Crate" 7 | max_width = 100 8 | normalize_comments = true 9 | normalize_doc_attributes = true 10 | wrap_comments = true 11 | -------------------------------------------------------------------------------- /src/context.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use arrow::datatypes::Schema; 4 | 5 | use crate::{ 6 | fs::manager::StoreManager, 7 | record::Record, 8 | timestamp::Timestamp, 9 | version::{set::VersionSet, TransactionTs}, 10 | ParquetLru, 11 | }; 12 | 13 | pub(crate) struct Context { 14 | pub(crate) manager: Arc, 15 | pub(crate) parquet_lru: ParquetLru, 16 | pub(crate) version_set: VersionSet, 17 | pub(crate) arrow_schema: Arc, 18 | } 19 | 20 | impl Context 21 | where 22 | R: Record, 23 | { 24 | pub(crate) fn new( 25 | manager: Arc, 26 | parquet_lru: ParquetLru, 27 | version_set: VersionSet, 28 | arrow_schema: Arc, 29 | ) -> Self { 30 | Self { 31 | manager, 32 | parquet_lru, 33 | version_set, 34 | arrow_schema, 35 | } 36 | } 37 | 38 | pub(crate) fn version_set(&self) -> &VersionSet { 39 | &self.version_set 40 | } 41 | 42 | pub(crate) fn storage_manager(&self) -> &StoreManager { 43 | &self.manager 44 | } 45 | 46 | pub(crate) fn cache(&self) -> &ParquetLru { 47 | &self.parquet_lru 48 | } 49 | 50 | pub(crate) fn arrow_schema(&self) -> &Arc { 51 | &self.arrow_schema 52 | } 53 | 54 | pub(crate) fn load_ts(&self) -> Timestamp { 55 | self.version_set.load_ts() 56 | } 57 | 58 | pub(crate) fn increase_ts(&self) -> Timestamp { 59 | self.version_set.increase_ts() 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/executor.rs: -------------------------------------------------------------------------------- 1 | use std::future::Future; 2 | 3 | use fusio::MaybeSend; 4 | 5 | pub trait Executor { 6 | fn spawn(&self, future: F) 7 | where 8 | F: Future + MaybeSend + 'static; 9 | } 10 | 11 | #[cfg(feature = "tokio")] 12 | pub mod tokio { 13 | use std::future::Future; 14 | 15 | use fusio::MaybeSend; 16 | use tokio::runtime::Handle; 17 | 18 | use super::Executor; 19 | 20 | #[derive(Debug, Clone)] 21 | pub struct TokioExecutor { 22 | handle: Handle, 23 | } 24 | 25 | impl TokioExecutor { 26 | pub fn current() -> Self { 27 | Self { 28 | handle: Handle::current(), 29 | } 30 | } 31 | } 32 | 33 | impl Executor for TokioExecutor { 34 | fn spawn(&self, future: F) 35 | where 36 | F: Future + MaybeSend + 'static, 37 | { 38 | self.handle.spawn(future); 39 | } 40 | } 41 | } 42 | 43 | #[cfg(all(feature = "opfs", target_arch = "wasm32"))] 44 | pub mod opfs { 45 | use std::future::Future; 46 | 47 | use fusio::MaybeSend; 48 | use wasm_bindgen::prelude::*; 49 | 50 | use super::Executor; 51 | 52 | #[wasm_bindgen] 53 | pub struct OpfsExecutor(); 54 | 55 | impl Default for OpfsExecutor { 56 | fn default() -> Self { 57 | Self {} 58 | } 59 | } 60 | 61 | impl OpfsExecutor { 62 | pub fn new() -> Self { 63 | Self {} 64 | } 65 | } 66 | 67 | impl Executor for OpfsExecutor { 68 | fn spawn(&self, future: F) 69 | where 70 | F: Future + MaybeSend + 'static, 71 | { 72 | wasm_bindgen_futures::spawn_local(future); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/fs/manager.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, sync::Arc}; 2 | 3 | use fusio::{disk::LocalFs, dynamic::DynFs, path::Path, Error}; 4 | use fusio_dispatch::FsOptions; 5 | 6 | pub struct StoreManager { 7 | base_fs: Arc, 8 | local_fs: Arc, 9 | fs_map: HashMap>, 10 | } 11 | 12 | impl StoreManager { 13 | pub fn new( 14 | base_options: FsOptions, 15 | levels_fs: Vec>, 16 | ) -> Result { 17 | let mut fs_map = HashMap::with_capacity(levels_fs.len()); 18 | 19 | for (path, fs_options) in levels_fs.into_iter().flatten() { 20 | fs_map.entry(path).or_insert(fs_options.parse()?); 21 | } 22 | let base_fs = base_options.parse()?; 23 | 24 | Ok(StoreManager { 25 | base_fs, 26 | fs_map, 27 | local_fs: Arc::new(LocalFs {}), 28 | }) 29 | } 30 | 31 | pub fn base_fs(&self) -> &Arc { 32 | &self.base_fs 33 | } 34 | 35 | pub fn local_fs(&self) -> &Arc { 36 | &self.local_fs 37 | } 38 | 39 | pub fn get_fs(&self, path: &Path) -> &Arc { 40 | self.fs_map.get(path).unwrap_or(&self.base_fs) 41 | } 42 | } 43 | 44 | // TODO: TestCases 45 | -------------------------------------------------------------------------------- /src/fs/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod manager; 2 | 3 | use std::{ 4 | fmt::{Display, Formatter}, 5 | str::FromStr, 6 | }; 7 | 8 | use fusio::{fs::OpenOptions, path::Path}; 9 | use once_cell::sync::OnceCell; 10 | use ulid::{DecodeError, Ulid}; 11 | 12 | pub type FileId = Ulid; 13 | 14 | static GENERATOR: OnceCell> = OnceCell::new(); 15 | 16 | #[inline] 17 | pub fn generate_file_id() -> FileId { 18 | // init 19 | let m = GENERATOR.get_or_init(|| std::sync::Mutex::new(ulid::Generator::new())); 20 | let mut guard = m 21 | .lock() 22 | .expect("global file id generator lock should not fail"); 23 | 24 | guard.generate().expect("generator should not fail") 25 | } 26 | 27 | pub enum FileType { 28 | Wal, 29 | Parquet, 30 | Log, 31 | } 32 | 33 | impl Display for FileType { 34 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 35 | match self { 36 | FileType::Wal => write!(f, "wal"), 37 | FileType::Parquet => write!(f, "parquet"), 38 | FileType::Log => write!(f, "log"), 39 | } 40 | } 41 | } 42 | 43 | impl FileType { 44 | pub(crate) fn open_options(&self, only_read: bool) -> OpenOptions { 45 | match self { 46 | FileType::Wal | FileType::Log => OpenOptions::default().create(true).read(true), 47 | FileType::Parquet => { 48 | if only_read { 49 | OpenOptions::default().read(true) 50 | } else { 51 | OpenOptions::default() 52 | .create(true) 53 | .write(true) 54 | .truncate(true) 55 | } 56 | } 57 | } 58 | } 59 | } 60 | 61 | pub(crate) fn parse_file_id(path: &Path, suffix: FileType) -> Result, DecodeError> { 62 | path.filename() 63 | .map(|file_name| { 64 | let file_id = file_name 65 | .strip_suffix(&format!(".{}", suffix)) 66 | .unwrap_or(file_name); 67 | FileId::from_str(file_id) 68 | }) 69 | .transpose() 70 | } 71 | -------------------------------------------------------------------------------- /src/inmem/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod immutable; 2 | pub(crate) mod mutable; 3 | -------------------------------------------------------------------------------- /src/magic.rs: -------------------------------------------------------------------------------- 1 | pub const TS: &str = "_ts"; 2 | pub(crate) const USER_COLUMN_OFFSET: usize = 2; 3 | -------------------------------------------------------------------------------- /src/ondisk/arrows.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Bound; 2 | 3 | use arrow::{ 4 | array::{BooleanArray, Datum}, 5 | buffer::BooleanBuffer, 6 | compute::kernels::cmp::{gt, gt_eq, lt_eq}, 7 | error::ArrowError, 8 | }; 9 | use parquet::{ 10 | arrow::{ 11 | arrow_reader::{ArrowPredicate, ArrowPredicateFn, RowFilter}, 12 | ProjectionMask, 13 | }, 14 | schema::types::SchemaDescriptor, 15 | }; 16 | 17 | use crate::{ 18 | record::{Key, Record, Schema}, 19 | timestamp::Timestamp, 20 | }; 21 | 22 | unsafe fn get_range_bound_fn( 23 | range: Bound<&::Key>, 24 | ) -> ( 25 | Option<&'static ::Key>, 26 | &'static (dyn Fn(&dyn Datum, &dyn Datum) -> Result + Sync), 27 | ) 28 | where 29 | R: Record, 30 | { 31 | let cmp: &'static (dyn Fn(&dyn Datum, &dyn Datum) -> Result + Sync); 32 | let key = match range { 33 | Bound::Included(key) => { 34 | cmp = >_eq; 35 | Some(&*(key as *const _)) 36 | } 37 | Bound::Excluded(key) => { 38 | cmp = > 39 | Some(&*(key as *const _)) 40 | } 41 | Bound::Unbounded => { 42 | cmp = &|this, _| { 43 | let len = this.get().0.len(); 44 | Ok(BooleanArray::new( 45 | BooleanBuffer::collect_bool(len, |_| true), 46 | None, 47 | )) 48 | }; 49 | None 50 | } 51 | }; 52 | (key, cmp) 53 | } 54 | 55 | pub(crate) unsafe fn get_range_filter( 56 | schema_descriptor: &SchemaDescriptor, 57 | range: ( 58 | Bound<&::Key>, 59 | Bound<&::Key>, 60 | ), 61 | ts: Timestamp, 62 | ) -> RowFilter 63 | where 64 | R: Record, 65 | { 66 | let (lower_key, lower_cmp) = get_range_bound_fn::(range.0); 67 | let (upper_key, upper_cmp) = get_range_bound_fn::(range.1); 68 | 69 | let mut predictions: Vec> = vec![Box::new(ArrowPredicateFn::new( 70 | ProjectionMask::roots(schema_descriptor, [1]), 71 | move |record_batch| lt_eq(record_batch.column(0), &ts.to_arrow_scalar() as &dyn Datum), 72 | ))]; 73 | if let Some(lower_key) = lower_key { 74 | predictions.push(Box::new(ArrowPredicateFn::new( 75 | ProjectionMask::roots(schema_descriptor, [2]), 76 | move |record_batch| { 77 | lower_cmp(record_batch.column(0), lower_key.to_arrow_datum().as_ref()) 78 | }, 79 | ))); 80 | } 81 | if let Some(upper_key) = upper_key { 82 | predictions.push(Box::new(ArrowPredicateFn::new( 83 | ProjectionMask::roots(schema_descriptor, [2]), 84 | move |record_batch| { 85 | upper_cmp(upper_key.to_arrow_datum().as_ref(), record_batch.column(0)) 86 | }, 87 | ))); 88 | } 89 | 90 | RowFilter::new(predictions) 91 | } 92 | -------------------------------------------------------------------------------- /src/ondisk/mod.rs: -------------------------------------------------------------------------------- 1 | mod arrows; 2 | pub(crate) mod scan; 3 | pub(crate) mod sstable; 4 | -------------------------------------------------------------------------------- /src/ondisk/scan.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | marker::PhantomData, 3 | pin::Pin, 4 | sync::Arc, 5 | task::{Context, Poll}, 6 | }; 7 | 8 | use arrow::datatypes::Schema; 9 | use futures_core::{ready, Stream}; 10 | use parquet::arrow::{ 11 | async_reader::{AsyncFileReader, ParquetRecordBatchStream}, 12 | ProjectionMask, 13 | }; 14 | use pin_project_lite::pin_project; 15 | 16 | use crate::{ 17 | record::Record, 18 | stream::record_batch::{RecordBatchEntry, RecordBatchIterator}, 19 | }; 20 | 21 | pin_project! { 22 | #[derive(Debug)] 23 | pub struct SsTableScan<'scan, R> { 24 | #[pin] 25 | stream: ParquetRecordBatchStream>, 26 | iter: Option>, 27 | projection_mask: ProjectionMask, 28 | full_schema: Arc, 29 | _marker: PhantomData<&'scan ()> 30 | } 31 | } 32 | 33 | impl SsTableScan<'_, R> { 34 | pub fn new( 35 | stream: ParquetRecordBatchStream>, 36 | projection_mask: ProjectionMask, 37 | full_schema: Arc, 38 | ) -> Self { 39 | SsTableScan { 40 | stream, 41 | iter: None, 42 | projection_mask, 43 | full_schema, 44 | _marker: PhantomData, 45 | } 46 | } 47 | } 48 | 49 | impl<'scan, R> Stream for SsTableScan<'scan, R> 50 | where 51 | R: Record, 52 | { 53 | type Item = Result, parquet::errors::ParquetError>; 54 | 55 | fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 56 | let mut this = self.project(); 57 | loop { 58 | match this.iter { 59 | Some(iter) => { 60 | if let Some(entry) = iter.next() { 61 | return Poll::Ready(Some(Ok(entry))); 62 | } 63 | *this.iter = None; 64 | } 65 | None => { 66 | let record_batch = ready!(this.stream.as_mut().poll_next(cx)).transpose()?; 67 | let record_batch = match record_batch { 68 | Some(record_batch) => record_batch, 69 | None => return Poll::Ready(None), 70 | }; 71 | *this.iter = Some(RecordBatchIterator::new( 72 | record_batch, 73 | this.projection_mask.clone(), 74 | this.full_schema.clone(), 75 | )); 76 | } 77 | } 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/record/key/mod.rs: -------------------------------------------------------------------------------- 1 | mod num; 2 | mod str; 3 | 4 | use std::{hash::Hash, sync::Arc}; 5 | 6 | use arrow::array::Datum; 7 | use fusio_log::{Decode, Encode}; 8 | pub use num::*; 9 | 10 | pub trait Key: 11 | 'static + Encode + Decode + Ord + Clone + Send + Sync + Hash + std::fmt::Debug 12 | { 13 | type Ref<'r>: KeyRef<'r, Key = Self> 14 | where 15 | Self: 'r; 16 | 17 | fn as_key_ref(&self) -> Self::Ref<'_>; 18 | 19 | fn to_arrow_datum(&self) -> Arc; 20 | } 21 | 22 | pub trait KeyRef<'r>: Clone + Encode + Send + Sync + Ord + std::fmt::Debug { 23 | type Key: Key = Self>; 24 | 25 | fn to_key(self) -> Self::Key; 26 | } 27 | -------------------------------------------------------------------------------- /src/record/key/str.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use arrow::array::{Datum, StringArray}; 4 | 5 | use super::{Key, KeyRef}; 6 | 7 | impl Key for String { 8 | type Ref<'r> = &'r str; 9 | 10 | fn as_key_ref(&self) -> Self::Ref<'_> { 11 | self 12 | } 13 | 14 | fn to_arrow_datum(&self) -> Arc { 15 | Arc::new(StringArray::new_scalar(self)) 16 | } 17 | } 18 | 19 | impl<'r> KeyRef<'r> for &'r str { 20 | type Key = String; 21 | 22 | fn to_key(self) -> Self::Key { 23 | self.to_string() 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/record/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod key; 2 | pub mod option; 3 | pub mod runtime; 4 | #[cfg(test)] 5 | pub(crate) mod test; 6 | 7 | use std::{error::Error, fmt::Debug, io, sync::Arc}; 8 | 9 | use arrow::{array::RecordBatch, datatypes::Schema as ArrowSchema}; 10 | use fusio_log::{Decode, Encode}; 11 | pub use key::*; 12 | use option::OptionRecordRef; 13 | use parquet::{arrow::ProjectionMask, format::SortingColumn, schema::types::ColumnPath}; 14 | pub use runtime::*; 15 | use thiserror::Error; 16 | 17 | use crate::inmem::immutable::ArrowArrays; 18 | 19 | pub trait Schema: Debug + Send + Sync { 20 | type Record: Record; 21 | 22 | type Columns: ArrowArrays; 23 | 24 | type Key: Key; 25 | 26 | /// Returns the [`arrow::datatypes::Schema`] of the record. 27 | /// 28 | /// **Note**: The first column should be `_null`, and the second column should be `_ts`. 29 | fn arrow_schema(&self) -> &Arc; 30 | 31 | /// Returns the index of the primary key column. 32 | fn primary_key_index(&self) -> usize; 33 | 34 | /// Returns the ([`ColumnPath`], [`Vec`]) of the primary key column, representing 35 | /// the location of the primary key column in the parquet schema and the sort order within a 36 | /// RowGroup of a leaf column 37 | fn primary_key_path(&self) -> (ColumnPath, Vec); 38 | } 39 | 40 | pub trait Record: 'static + Sized + Decode + Debug + Send + Sync { 41 | type Schema: Schema; 42 | 43 | type Ref<'r>: RecordRef<'r, Record = Self> 44 | where 45 | Self: 'r; 46 | 47 | /// Returns the primary key of the record. This should be the type defined in the 48 | /// [`Schema`]. 49 | fn key(&self) -> <<::Schema as Schema>::Key as Key>::Ref<'_> { 50 | self.as_record_ref().key() 51 | } 52 | 53 | /// Returns a reference to the record. 54 | fn as_record_ref(&self) -> Self::Ref<'_>; 55 | 56 | /// Returns the size of the record in bytes. 57 | fn size(&self) -> usize; 58 | } 59 | 60 | pub trait RecordRef<'r>: Clone + Sized + Encode + Send + Sync { 61 | type Record: Record; 62 | 63 | /// Returns the primary key of the record. This should be the type that defined in the 64 | /// [`Schema`]. 65 | fn key(self) -> <<::Schema as Schema>::Key as Key>::Ref<'r>; 66 | 67 | /// Do projection on the record. Only keep the columns specified in the projection mask. 68 | /// 69 | /// **Note**: Primary key column are always kept. 70 | fn projection(&mut self, projection_mask: &ProjectionMask); 71 | 72 | /// Get the [`RecordRef`] from the [`RecordBatch`] at the given offset. 73 | /// 74 | /// `full_schema` is the combination of `_null`, `_ts` and all fields defined in the [`Schema`]. 75 | fn from_record_batch( 76 | record_batch: &'r RecordBatch, 77 | offset: usize, 78 | projection_mask: &'r ProjectionMask, 79 | full_schema: &'r Arc, 80 | ) -> OptionRecordRef<'r, Self>; 81 | } 82 | 83 | #[derive(Debug, Error)] 84 | pub enum RecordEncodeError { 85 | #[error("record's field: {field_name} encode error: {error}")] 86 | Encode { 87 | field_name: String, 88 | error: Box, 89 | }, 90 | #[error("record io error: {0}")] 91 | Io(#[from] io::Error), 92 | #[error("record fusio error: {0}")] 93 | Fusio(#[from] fusio::Error), 94 | } 95 | 96 | #[derive(Debug, Error)] 97 | pub enum RecordDecodeError { 98 | #[error("record's field: {field_name} decode error: {error}")] 99 | Decode { 100 | field_name: String, 101 | error: Box, 102 | }, 103 | #[error("record io error: {0}")] 104 | Io(#[from] io::Error), 105 | #[error("record fusio error: {0}")] 106 | Fusio(#[from] fusio::Error), 107 | } 108 | -------------------------------------------------------------------------------- /src/record/option.rs: -------------------------------------------------------------------------------- 1 | use std::{marker::PhantomData, mem::transmute}; 2 | 3 | use super::{Key, Record, RecordRef, Schema}; 4 | use crate::timestamp::{Timestamp, Ts}; 5 | 6 | #[derive(Debug)] 7 | pub struct OptionRecordRef<'r, R> 8 | where 9 | R: RecordRef<'r>, 10 | { 11 | record: Ts, 12 | null: bool, 13 | _marker: PhantomData<&'r ()>, 14 | } 15 | 16 | impl<'r, R> OptionRecordRef<'r, R> 17 | where 18 | R: RecordRef<'r>, 19 | { 20 | pub fn new(ts: Timestamp, record: R, null: bool) -> Self { 21 | Self { 22 | record: Ts::new(record, ts), 23 | null, 24 | _marker: PhantomData, 25 | } 26 | } 27 | } 28 | 29 | impl<'r, R> OptionRecordRef<'r, R> 30 | where 31 | R: RecordRef<'r>, 32 | { 33 | pub fn key(&self) -> Ts<<<::Schema as Schema>::Key as Key>::Ref<'_>> { 34 | // Safety: shorter lifetime of the value must be safe 35 | unsafe { transmute(Ts::new(self.record.value().clone().key(), self.record.ts())) } 36 | } 37 | 38 | pub fn get(&self) -> Option { 39 | if self.null { 40 | return None; 41 | } 42 | 43 | Some(self.record.value().clone()) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/record/runtime/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod array; 2 | mod record; 3 | mod record_ref; 4 | mod schema; 5 | mod value; 6 | 7 | pub use array::*; 8 | use arrow::datatypes::DataType as ArrowDataType; 9 | pub use record::*; 10 | pub use record_ref::*; 11 | pub use schema::*; 12 | pub use value::*; 13 | 14 | #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] 15 | pub enum DataType { 16 | UInt8, 17 | UInt16, 18 | UInt32, 19 | UInt64, 20 | Int8, 21 | Int16, 22 | Int32, 23 | Int64, 24 | String, 25 | Boolean, 26 | Bytes, 27 | Float32, 28 | Float64, 29 | } 30 | 31 | impl From<&ArrowDataType> for DataType { 32 | fn from(datatype: &ArrowDataType) -> Self { 33 | match datatype { 34 | ArrowDataType::UInt8 => DataType::UInt8, 35 | ArrowDataType::UInt16 => DataType::UInt16, 36 | ArrowDataType::UInt32 => DataType::UInt32, 37 | ArrowDataType::UInt64 => DataType::UInt64, 38 | ArrowDataType::Int8 => DataType::Int8, 39 | ArrowDataType::Int16 => DataType::Int16, 40 | ArrowDataType::Int32 => DataType::Int32, 41 | ArrowDataType::Int64 => DataType::Int64, 42 | ArrowDataType::Float32 => DataType::Float32, 43 | ArrowDataType::Float64 => DataType::Float64, 44 | ArrowDataType::Utf8 => DataType::String, 45 | ArrowDataType::Boolean => DataType::Boolean, 46 | ArrowDataType::Binary => DataType::Bytes, 47 | _ => todo!(), 48 | } 49 | } 50 | } 51 | 52 | /// Cast the `Arc` to the value of given type. 53 | #[macro_export] 54 | macro_rules! cast_arc_value { 55 | ($value:expr, $type:ty) => { 56 | $value.as_ref().downcast_ref::<$type>().unwrap() 57 | }; 58 | } 59 | -------------------------------------------------------------------------------- /src/record/runtime/schema.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, sync::Arc}; 2 | 3 | use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; 4 | use parquet::{format::SortingColumn, schema::types::ColumnPath}; 5 | 6 | use super::{array::DynRecordImmutableArrays, DynRecord, Value, ValueDesc}; 7 | use crate::{magic, record::Schema}; 8 | 9 | #[derive(Debug)] 10 | pub struct DynSchema { 11 | schema: Vec, 12 | primary_index: usize, 13 | arrow_schema: Arc, 14 | } 15 | 16 | impl DynSchema { 17 | pub fn new(schema: Vec, primary_index: usize) -> Self { 18 | let mut metadata = HashMap::new(); 19 | metadata.insert("primary_key_index".to_string(), primary_index.to_string()); 20 | let arrow_schema = Arc::new(ArrowSchema::new_with_metadata( 21 | [ 22 | Field::new("_null", DataType::Boolean, false), 23 | Field::new(magic::TS, DataType::UInt32, false), 24 | ] 25 | .into_iter() 26 | .chain(schema.iter().map(|desc| desc.arrow_field())) 27 | .collect::>(), 28 | metadata, 29 | )); 30 | Self { 31 | schema, 32 | primary_index, 33 | arrow_schema, 34 | } 35 | } 36 | } 37 | 38 | impl Schema for DynSchema { 39 | type Record = DynRecord; 40 | 41 | type Columns = DynRecordImmutableArrays; 42 | 43 | type Key = Value; 44 | 45 | fn arrow_schema(&self) -> &Arc { 46 | &self.arrow_schema 47 | } 48 | 49 | fn primary_key_index(&self) -> usize { 50 | self.primary_index + 2 51 | } 52 | 53 | fn primary_key_path(&self) -> (ColumnPath, Vec) { 54 | ( 55 | ColumnPath::new(vec![ 56 | magic::TS.to_string(), 57 | self.schema[self.primary_index].name.clone(), 58 | ]), 59 | vec![ 60 | SortingColumn::new(1_i32, true, true), 61 | SortingColumn::new(self.primary_key_index() as i32, false, true), 62 | ], 63 | ) 64 | } 65 | } 66 | 67 | /// Creates a [`DynSchema`] from literal slice of values and primary key index, suitable for rapid 68 | /// testing and development. 69 | /// 70 | /// ## Example: 71 | /// 72 | /// ```no_run 73 | /// // dyn_schema!( 74 | /// // (name, type, nullable), 75 | /// // ...... 76 | /// // (name, type, nullable), 77 | /// // primary_key_index 78 | /// // ); 79 | /// use tonbo::dyn_schema; 80 | /// 81 | /// let schema = dyn_schema!( 82 | /// ("foo", String, false), 83 | /// ("bar", Int32, true), 84 | /// ("baz", UInt64, true), 85 | /// 0 86 | /// ); 87 | /// ``` 88 | #[macro_export] 89 | macro_rules! dyn_schema { 90 | ($(($name: expr, $type: ident, $nullable: expr )),*, $primary: literal) => { 91 | { 92 | $crate::record::DynSchema::new( 93 | vec![ 94 | $( 95 | $crate::record::ValueDesc::new($name.into(), $crate::record::DataType::$type, $nullable), 96 | )* 97 | ], 98 | $primary, 99 | ) 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/stream/mem_projection.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | pin::Pin, 3 | sync::Arc, 4 | task::{Context, Poll}, 5 | }; 6 | 7 | use futures_core::Stream; 8 | use parquet::{arrow::ProjectionMask, errors::ParquetError}; 9 | use pin_project_lite::pin_project; 10 | 11 | use crate::{ 12 | record::Record, 13 | stream::{Entry, ScanStream}, 14 | }; 15 | 16 | pin_project! { 17 | pub struct MemProjectionStream<'projection, R> 18 | where 19 | R: Record, 20 | { 21 | stream: Box>, 22 | projection_mask: Arc, 23 | } 24 | } 25 | 26 | impl<'projection, R> MemProjectionStream<'projection, R> 27 | where 28 | R: Record, 29 | { 30 | pub(crate) fn new(stream: ScanStream<'projection, R>, projection_mask: ProjectionMask) -> Self { 31 | Self { 32 | stream: Box::new(stream), 33 | projection_mask: Arc::new(projection_mask), 34 | } 35 | } 36 | } 37 | 38 | impl<'projection, R> Stream for MemProjectionStream<'projection, R> 39 | where 40 | R: Record, 41 | { 42 | type Item = Result, ParquetError>; 43 | 44 | fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 45 | let mut project = self.project(); 46 | 47 | return match Pin::new(&mut project.stream).poll_next(cx) { 48 | Poll::Ready(Some(Ok(entry))) => Poll::Ready(Some(Ok(Entry::Projection(( 49 | Box::new(entry), 50 | project.projection_mask.clone(), 51 | ))))), 52 | poll => poll, 53 | }; 54 | } 55 | } 56 | 57 | #[cfg(all(test, feature = "tokio"))] 58 | mod tests { 59 | use std::{ops::Bound, sync::Arc}; 60 | 61 | use fusio::{disk::TokioFs, path::Path, DynFs}; 62 | use futures_util::StreamExt; 63 | use parquet::arrow::{ArrowSchemaConverter, ProjectionMask}; 64 | 65 | use crate::{ 66 | inmem::{immutable::tests::TestSchema, mutable::MutableMemTable}, 67 | record::Schema, 68 | stream::mem_projection::MemProjectionStream, 69 | tests::Test, 70 | trigger::TriggerFactory, 71 | wal::log::LogType, 72 | DbOption, 73 | }; 74 | 75 | #[tokio::test] 76 | async fn merge_mutable() { 77 | let temp_dir = tempfile::tempdir().unwrap(); 78 | let fs = Arc::new(TokioFs) as Arc; 79 | let option = DbOption::new( 80 | Path::from_filesystem_path(temp_dir.path()).unwrap(), 81 | &TestSchema, 82 | ); 83 | 84 | fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); 85 | 86 | let trigger = TriggerFactory::create(option.trigger_type); 87 | 88 | let mutable = 89 | MutableMemTable::::new(&option, trigger, fs.clone(), Arc::new(TestSchema {})) 90 | .await 91 | .unwrap(); 92 | 93 | mutable 94 | .insert( 95 | LogType::Full, 96 | Test { 97 | vstring: "0".to_string(), 98 | vu32: 0, 99 | vbool: Some(true), 100 | }, 101 | 0.into(), 102 | ) 103 | .await 104 | .unwrap(); 105 | mutable 106 | .insert( 107 | LogType::Full, 108 | Test { 109 | vstring: "1".to_string(), 110 | vu32: 1, 111 | vbool: Some(true), 112 | }, 113 | 0.into(), 114 | ) 115 | .await 116 | .unwrap(); 117 | mutable 118 | .insert( 119 | LogType::Full, 120 | Test { 121 | vstring: "2".to_string(), 122 | vu32: 2, 123 | vbool: Some(true), 124 | }, 125 | 0.into(), 126 | ) 127 | .await 128 | .unwrap(); 129 | 130 | let mask = ProjectionMask::roots( 131 | &ArrowSchemaConverter::new() 132 | .convert(TestSchema.arrow_schema()) 133 | .unwrap(), 134 | vec![0, 1, 2, 4], 135 | ); 136 | 137 | let mut stream = MemProjectionStream::::new( 138 | mutable 139 | .scan((Bound::Unbounded, Bound::Unbounded), 6.into()) 140 | .into(), 141 | mask, 142 | ); 143 | 144 | let entry_0 = stream.next().await.unwrap().unwrap(); 145 | assert!(entry_0.value().unwrap().vu32.is_none()); 146 | assert_eq!(entry_0.value().unwrap().vstring, "0"); 147 | assert_eq!(entry_0.value().unwrap().vbool, Some(true)); 148 | 149 | let entry_1 = stream.next().await.unwrap().unwrap(); 150 | assert!(entry_1.value().unwrap().vu32.is_none()); 151 | assert_eq!(entry_1.value().unwrap().vstring, "1"); 152 | assert_eq!(entry_1.value().unwrap().vbool, Some(true)); 153 | 154 | let entry_2 = stream.next().await.unwrap().unwrap(); 155 | assert!(entry_2.value().unwrap().vu32.is_none()); 156 | assert_eq!(entry_2.value().unwrap().vstring, "2"); 157 | assert_eq!(entry_2.value().unwrap().vbool, Some(true)); 158 | 159 | assert!(stream.next().await.is_none()) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/stream/record_batch.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt::{self, Debug, Formatter}, 3 | marker::PhantomData, 4 | mem::transmute, 5 | sync::Arc, 6 | }; 7 | 8 | use arrow::{array::RecordBatch, datatypes::Schema}; 9 | use parquet::arrow::ProjectionMask; 10 | 11 | use crate::{ 12 | record::{option::OptionRecordRef, Key, Record, RecordRef, Schema as RecordSchema}, 13 | timestamp::Ts, 14 | }; 15 | 16 | pub struct RecordBatchEntry 17 | where 18 | R: Record, 19 | { 20 | _record_batch: RecordBatch, 21 | record_ref: OptionRecordRef<'static, R::Ref<'static>>, 22 | } 23 | 24 | impl RecordBatchEntry 25 | where 26 | R: Record, 27 | { 28 | pub(crate) fn new( 29 | _record_batch: RecordBatch, 30 | record_ref: OptionRecordRef<'static, R::Ref<'static>>, 31 | ) -> Self { 32 | Self { 33 | _record_batch, 34 | record_ref, 35 | } 36 | } 37 | 38 | pub(crate) fn internal_key(&self) -> Ts<<::Key as Key>::Ref<'_>> { 39 | self.record_ref.key() 40 | } 41 | 42 | pub fn key(&self) -> <::Key as Key>::Ref<'_> { 43 | self.internal_key().value().clone() 44 | } 45 | 46 | pub fn get(&self) -> Option> { 47 | // Safety: shorter lifetime of the key must be safe 48 | unsafe { transmute(self.record_ref.get()) } 49 | } 50 | } 51 | 52 | impl Debug for RecordBatchEntry 53 | where 54 | R: Record + Debug, 55 | { 56 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 57 | f.debug_struct("RecordBatchEntry").finish() 58 | } 59 | } 60 | 61 | #[derive(Debug)] 62 | pub struct RecordBatchIterator { 63 | record_batch: RecordBatch, 64 | offset: usize, 65 | projection_mask: ProjectionMask, 66 | full_schema: Arc, 67 | _marker: PhantomData, 68 | } 69 | 70 | impl RecordBatchIterator 71 | where 72 | R: Record, 73 | { 74 | pub(crate) fn new( 75 | record_batch: RecordBatch, 76 | projection_mask: ProjectionMask, 77 | full_schema: Arc, 78 | ) -> Self { 79 | Self { 80 | record_batch, 81 | offset: 0, 82 | projection_mask, 83 | full_schema, 84 | _marker: PhantomData, 85 | } 86 | } 87 | } 88 | 89 | impl Iterator for RecordBatchIterator 90 | where 91 | R: Record, 92 | { 93 | type Item = RecordBatchEntry; 94 | 95 | fn next(&mut self) -> Option { 96 | if self.offset >= self.record_batch.num_rows() { 97 | return None; 98 | } 99 | 100 | let record_batch = self.record_batch.clone(); 101 | let record = R::Ref::from_record_batch( 102 | &self.record_batch, 103 | self.offset, 104 | &self.projection_mask, 105 | &self.full_schema, 106 | ); 107 | let entry = RecordBatchEntry::new(record_batch, unsafe { 108 | // Safety: self-referring lifetime is safe 109 | transmute::>, OptionRecordRef<'static, R::Ref<'static>>>( 110 | record, 111 | ) 112 | }); 113 | self.offset += 1; 114 | Some(entry) 115 | } 116 | } 117 | 118 | #[cfg(test)] 119 | mod tests {} 120 | -------------------------------------------------------------------------------- /src/timestamp/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod timestamped; 2 | 3 | use arrow::{ 4 | array::{PrimitiveArray, Scalar}, 5 | datatypes::UInt32Type, 6 | }; 7 | use fusio::{SeqRead, Write}; 8 | use fusio_log::{Decode, Encode}; 9 | 10 | pub use self::timestamped::*; 11 | 12 | #[repr(transparent)] 13 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] 14 | pub struct Timestamp(u32); 15 | 16 | pub(crate) const EPOCH: Timestamp = Timestamp(0); 17 | 18 | impl From for Timestamp { 19 | fn from(ts: u32) -> Self { 20 | Self(ts) 21 | } 22 | } 23 | 24 | impl From for u32 { 25 | fn from(value: Timestamp) -> Self { 26 | value.0 27 | } 28 | } 29 | 30 | impl Timestamp { 31 | pub(crate) fn to_arrow_scalar(self) -> Scalar> { 32 | PrimitiveArray::::new_scalar(self.0) 33 | } 34 | } 35 | 36 | impl Encode for Timestamp { 37 | type Error = fusio::Error; 38 | async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> 39 | where 40 | W: Write, 41 | { 42 | self.0.encode(writer).await 43 | } 44 | fn size(&self) -> usize { 45 | self.0.size() 46 | } 47 | } 48 | impl Decode for Timestamp { 49 | type Error = fusio::Error; 50 | async fn decode(reader: &mut R) -> Result 51 | where 52 | R: SeqRead, 53 | { 54 | u32::decode(reader).await.map(Timestamp) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/trigger.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | marker::PhantomData, 3 | sync::{ 4 | atomic::{AtomicUsize, Ordering}, 5 | Arc, 6 | }, 7 | }; 8 | 9 | use fusio_log::Encode; 10 | 11 | use crate::record::Record; 12 | 13 | pub trait FreezeTrigger: Send + Sync { 14 | fn check_if_exceed(&self, item: &R) -> bool; 15 | 16 | fn reset(&self); 17 | } 18 | 19 | #[derive(Debug)] 20 | pub struct SizeOfMemTrigger { 21 | threshold: usize, 22 | current_size: AtomicUsize, 23 | _p: PhantomData, 24 | } 25 | 26 | impl SizeOfMemTrigger { 27 | pub fn new(max_size: usize) -> Self { 28 | Self { 29 | threshold: max_size, 30 | current_size: AtomicUsize::new(0), 31 | _p: Default::default(), 32 | } 33 | } 34 | } 35 | 36 | impl FreezeTrigger for SizeOfMemTrigger { 37 | fn check_if_exceed(&self, item: &R) -> bool { 38 | let size = item.size() + item.key().size(); 39 | self.current_size.fetch_add(size, Ordering::SeqCst) + size >= self.threshold 40 | } 41 | 42 | fn reset(&self) { 43 | self.current_size.store(0, Ordering::SeqCst); 44 | } 45 | } 46 | 47 | #[derive(Debug)] 48 | pub struct LengthTrigger { 49 | threshold: usize, 50 | count: AtomicUsize, 51 | _p: PhantomData, 52 | } 53 | 54 | impl LengthTrigger { 55 | pub fn new(threshold: usize) -> Self { 56 | Self { 57 | threshold, 58 | count: AtomicUsize::new(0), 59 | _p: Default::default(), 60 | } 61 | } 62 | } 63 | 64 | impl FreezeTrigger for LengthTrigger { 65 | fn check_if_exceed(&self, _: &R) -> bool { 66 | self.count.fetch_add(1, Ordering::SeqCst) + 1 >= self.threshold 67 | } 68 | 69 | fn reset(&self) { 70 | self.count.store(0, Ordering::SeqCst); 71 | } 72 | } 73 | 74 | #[derive(Copy, Clone, Debug)] 75 | pub enum TriggerType { 76 | SizeOfMem(usize), 77 | #[allow(unused)] 78 | Length(usize), 79 | } 80 | 81 | pub(crate) struct TriggerFactory { 82 | _p: PhantomData, 83 | } 84 | 85 | impl TriggerFactory { 86 | pub fn create(trigger_type: TriggerType) -> Arc> { 87 | match trigger_type { 88 | TriggerType::SizeOfMem(threshold) => Arc::new(SizeOfMemTrigger::new(threshold)), 89 | TriggerType::Length(threshold) => Arc::new(LengthTrigger::new(threshold)), 90 | } 91 | } 92 | } 93 | 94 | #[cfg(all(test, feature = "tokio"))] 95 | mod tests { 96 | use super::*; 97 | use crate::tests::Test; 98 | 99 | #[tokio::test] 100 | async fn test_size_of_mem_trigger() { 101 | let threshold = 16; 102 | let trigger = SizeOfMemTrigger::new(threshold); 103 | 104 | let record = Test { 105 | vstring: "test".to_string(), 106 | vu32: 0, 107 | vbool: None, 108 | }; 109 | 110 | let record_size = record.size(); 111 | assert_eq!(record_size, 8); 112 | let record_size = record.key().size(); 113 | assert_eq!(record_size, 6); 114 | 115 | assert!( 116 | !trigger.check_if_exceed(&record), 117 | "Trigger should not be exceeded after 1 record" 118 | ); 119 | 120 | trigger.check_if_exceed(&record); 121 | assert!( 122 | trigger.check_if_exceed(&record), 123 | "Trigger should be exceeded after 2 records" 124 | ); 125 | 126 | trigger.reset(); 127 | assert!( 128 | !trigger.check_if_exceed(&record), 129 | "Trigger should not be exceeded after reset" 130 | ); 131 | } 132 | 133 | #[tokio::test] 134 | async fn test_length_trigger() { 135 | let threshold = 2; 136 | let trigger = LengthTrigger::new(threshold); 137 | 138 | let record = Test { 139 | vstring: "test".to_string(), 140 | vu32: 0, 141 | vbool: None, 142 | }; 143 | 144 | assert!( 145 | !trigger.check_if_exceed(&record), 146 | "Trigger should not be exceeded after 1 record" 147 | ); 148 | 149 | trigger.check_if_exceed(&record); 150 | assert!( 151 | trigger.check_if_exceed(&record), 152 | "Trigger should be exceeded after 2 records" 153 | ); 154 | 155 | trigger.reset(); 156 | assert!( 157 | !trigger.check_if_exceed(&record), 158 | "Trigger should not be exceeded after reset" 159 | ); 160 | } 161 | #[tokio::test] 162 | async fn test_trigger_factory() { 163 | let size_of_mem_trigger = TriggerFactory::::create(TriggerType::SizeOfMem(16)); 164 | let length_trigger = TriggerFactory::::create(TriggerType::Length(2)); 165 | 166 | assert!(!size_of_mem_trigger.check_if_exceed(&Test { 167 | vstring: "test".to_string(), 168 | vu32: 0, 169 | vbool: None 170 | })); 171 | assert!(size_of_mem_trigger.check_if_exceed(&Test { 172 | vstring: "test".to_string(), 173 | vu32: 0, 174 | vbool: None 175 | })); 176 | 177 | assert!(!length_trigger.check_if_exceed(&Test { 178 | vstring: "test".to_string(), 179 | vu32: 1, 180 | vbool: Some(true) 181 | })); 182 | assert!(length_trigger.check_if_exceed(&Test { 183 | vstring: "test".to_string(), 184 | vu32: 1, 185 | vbool: Some(true) 186 | })); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/version/edit.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | 3 | use fusio::{SeqRead, Write}; 4 | use fusio_log::{Decode, Encode, FsOptions, Options, Path}; 5 | use futures_util::TryStreamExt; 6 | 7 | use crate::{fs::FileId, scope::Scope, timestamp::Timestamp}; 8 | 9 | #[derive(Debug, Clone, Eq, PartialEq)] 10 | pub(crate) enum VersionEdit { 11 | Add { level: u8, scope: Scope }, 12 | Remove { level: u8, gen: FileId }, 13 | LatestTimeStamp { ts: Timestamp }, 14 | NewLogLength { len: u32 }, 15 | } 16 | 17 | impl VersionEdit 18 | where 19 | K: Decode + Send, 20 | { 21 | pub(crate) async fn recover(path: Path, fs_option: FsOptions) -> Vec> { 22 | let mut edits = vec![]; 23 | 24 | let mut edits_stream = Options::new(path) 25 | .disable_buf() 26 | .fs(fs_option) 27 | .recover::>() 28 | .await 29 | .unwrap(); 30 | while let Ok(batch) = edits_stream.try_next().await { 31 | match batch { 32 | Some(mut batch) => edits.append(&mut batch), 33 | None => break, 34 | } 35 | } 36 | edits 37 | } 38 | } 39 | 40 | impl Encode for VersionEdit 41 | where 42 | K: Encode + Sync, 43 | { 44 | type Error = ::Error; 45 | 46 | async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> 47 | where 48 | W: Write, 49 | { 50 | match self { 51 | VersionEdit::Add { scope, level } => { 52 | 0u8.encode(writer).await?; 53 | level.encode(writer).await?; 54 | scope.encode(writer).await?; 55 | } 56 | VersionEdit::Remove { gen, level } => { 57 | 1u8.encode(writer).await?; 58 | level.encode(writer).await?; 59 | let (result, _) = writer.write_all(&gen.to_bytes()[..]).await; 60 | result?; 61 | } 62 | VersionEdit::LatestTimeStamp { ts } => { 63 | 2u8.encode(writer).await?; 64 | ts.encode(writer).await?; 65 | } 66 | VersionEdit::NewLogLength { len } => { 67 | 3u8.encode(writer).await?; 68 | len.encode(writer).await?; 69 | } 70 | } 71 | 72 | Ok(()) 73 | } 74 | 75 | fn size(&self) -> usize { 76 | size_of::() 77 | + size_of::() 78 | + match self { 79 | VersionEdit::Add { scope, .. } => scope.size(), 80 | VersionEdit::Remove { .. } => 16, 81 | VersionEdit::LatestTimeStamp { ts } => ts.size(), 82 | VersionEdit::NewLogLength { .. } => size_of::(), 83 | } 84 | } 85 | } 86 | 87 | impl Decode for VersionEdit 88 | where 89 | K: Decode + Send, 90 | { 91 | type Error = ::Error; 92 | 93 | async fn decode(reader: &mut R) -> Result { 94 | let edit_type = u8::decode(reader).await?; 95 | 96 | Ok(match edit_type { 97 | 0 => { 98 | let level = u8::decode(reader).await?; 99 | let scope = Scope::::decode(reader).await?; 100 | 101 | VersionEdit::Add { level, scope } 102 | } 103 | 1 => { 104 | let level = u8::decode(reader).await?; 105 | let gen = { 106 | let mut buf = [0u8; 16]; 107 | let (result, _) = reader.read_exact(&mut buf[..]).await; 108 | result?; 109 | FileId::from_bytes(buf) 110 | }; 111 | VersionEdit::Remove { level, gen } 112 | } 113 | 2 => { 114 | let ts = Timestamp::decode(reader).await?; 115 | VersionEdit::LatestTimeStamp { ts } 116 | } 117 | 3 => { 118 | let len = u32::decode(reader).await?; 119 | VersionEdit::NewLogLength { len } 120 | } 121 | _ => unreachable!(), 122 | }) 123 | } 124 | } 125 | 126 | #[cfg(test)] 127 | mod tests { 128 | use std::io::Cursor; 129 | 130 | use fusio_log::{Decode, Encode}; 131 | use tokio::io::AsyncSeekExt; 132 | 133 | use crate::{fs::generate_file_id, scope::Scope, version::edit::VersionEdit}; 134 | 135 | #[tokio::test] 136 | async fn encode_and_decode() { 137 | let edits = vec![ 138 | VersionEdit::Add { 139 | level: 0, 140 | scope: Scope { 141 | min: "Min".to_string(), 142 | max: "Max".to_string(), 143 | gen: Default::default(), 144 | wal_ids: Some(vec![generate_file_id(), generate_file_id()]), 145 | }, 146 | }, 147 | VersionEdit::Remove { 148 | level: 1, 149 | gen: Default::default(), 150 | }, 151 | VersionEdit::LatestTimeStamp { ts: 10.into() }, 152 | VersionEdit::NewLogLength { len: 233 }, 153 | ]; 154 | 155 | let mut buf = Vec::new(); 156 | let mut cursor = Cursor::new(&mut buf); 157 | 158 | for edit in edits.clone() { 159 | edit.encode(&mut cursor).await.unwrap(); 160 | } 161 | 162 | cursor.seek(std::io::SeekFrom::Start(0)).await.unwrap(); 163 | 164 | let mut decode_edits = Vec::new(); 165 | 166 | while let Ok(edit) = VersionEdit::decode(&mut cursor).await { 167 | decode_edits.push(edit); 168 | } 169 | 170 | assert_eq!(edits, decode_edits); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/wal/log.rs: -------------------------------------------------------------------------------- 1 | use fusio::{SeqRead, Write}; 2 | use fusio_log::{Decode, Encode}; 3 | 4 | use crate::{ 5 | record::{Record, Schema}, 6 | timestamp::Ts, 7 | }; 8 | 9 | #[derive(Debug, Clone, Copy)] 10 | #[repr(u8)] 11 | pub enum LogType { 12 | Full, 13 | First, 14 | Middle, 15 | Last, 16 | } 17 | 18 | impl From for LogType { 19 | fn from(value: u8) -> Self { 20 | match value { 21 | 0 => Self::Full, 22 | 1 => Self::First, 23 | 2 => Self::Middle, 24 | 3 => Self::Last, 25 | _ => unreachable!(), 26 | } 27 | } 28 | } 29 | 30 | pub(crate) struct Log 31 | where 32 | R: Record, 33 | { 34 | pub(crate) key: Ts<::Key>, 35 | pub(crate) value: Option, 36 | pub(crate) log_type: Option, 37 | } 38 | 39 | impl Log 40 | where 41 | R: Record, 42 | { 43 | pub(crate) fn new( 44 | ts: Ts<::Key>, 45 | value: Option, 46 | log_type: Option, 47 | ) -> Self { 48 | Self { 49 | key: ts, 50 | value, 51 | log_type, 52 | } 53 | } 54 | } 55 | 56 | impl Encode for Log 57 | where 58 | R: Record, 59 | { 60 | type Error = fusio::Error; 61 | 62 | async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> 63 | where 64 | W: Write, 65 | { 66 | if let Some(log_type) = self.log_type { 67 | (log_type as u8).encode(writer).await?; 68 | } else { 69 | unreachable!() 70 | } 71 | self.key.encode(writer).await.unwrap(); 72 | self.value 73 | .as_ref() 74 | .map(R::as_record_ref) 75 | .encode(writer) 76 | .await 77 | .unwrap(); 78 | Ok(()) 79 | } 80 | 81 | fn size(&self) -> usize { 82 | self.key.size() + self.value.as_ref().map(R::as_record_ref).size() + size_of::() 83 | } 84 | } 85 | 86 | impl Decode for Log 87 | where 88 | Re: Record, 89 | { 90 | type Error = fusio::Error; 91 | 92 | async fn decode(reader: &mut R) -> Result 93 | where 94 | R: SeqRead, 95 | { 96 | let log_type = LogType::from(u8::decode(reader).await?); 97 | let key = Ts::<::Key>::decode(reader) 98 | .await 99 | .unwrap(); 100 | let record = Option::::decode(reader).await.unwrap(); 101 | 102 | Ok(Log::new(key, record, Some(log_type))) 103 | } 104 | } 105 | 106 | #[cfg(test)] 107 | mod tests { 108 | use std::io::Cursor; 109 | 110 | use fusio_log::{Decode, Encode}; 111 | use tokio::io::AsyncSeekExt; 112 | 113 | use crate::{ 114 | timestamp::Ts, 115 | wal::log::{Log, LogType}, 116 | }; 117 | 118 | #[tokio::test] 119 | async fn encode_and_decode() { 120 | let entry: Log = Log::new( 121 | Ts::new("hello".into(), 1.into()), 122 | Some("hello".into()), 123 | Some(LogType::Middle), 124 | ); 125 | let mut bytes = Vec::new(); 126 | let mut cursor = Cursor::new(&mut bytes); 127 | entry.encode(&mut cursor).await.unwrap(); 128 | 129 | let decode_entry = { 130 | cursor.seek(std::io::SeekFrom::Start(0)).await.unwrap(); 131 | Log::::decode(&mut cursor).await.unwrap() 132 | }; 133 | 134 | assert_eq!(entry.value, decode_entry.value); 135 | assert_eq!(entry.key, entry.key); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /tests/data_integrity.rs: -------------------------------------------------------------------------------- 1 | #[cfg(all(test, feature = "tokio"))] 2 | mod tests { 3 | use std::{hash::Hasher, ops::Bound}; 4 | 5 | use fusio::path::Path; 6 | use futures_util::StreamExt; 7 | use tempfile::TempDir; 8 | use tonbo::{executor::tokio::TokioExecutor, DbOption, Record, DB}; 9 | 10 | const WRITE_TIMES: usize = 500_000; 11 | const STRING_SIZE: usize = 50; 12 | 13 | #[derive(Record, Debug)] 14 | pub struct Customer { 15 | #[record(primary_key)] 16 | pub c_custkey: i32, 17 | pub c_name: String, 18 | pub c_address: String, 19 | pub c_nationkey: i32, 20 | pub c_phone: String, 21 | pub c_acctbal: String, 22 | pub c_mktsegment: String, 23 | pub c_comment: String, 24 | } 25 | 26 | impl Customer { 27 | pub fn crc_hash(&self, hasher: &mut crc32fast::Hasher) { 28 | hasher.write_i32(self.c_custkey); 29 | hasher.write(self.c_name.as_bytes()); 30 | hasher.write(self.c_address.as_bytes()); 31 | hasher.write_i32(self.c_nationkey); 32 | hasher.write(self.c_phone.as_bytes()); 33 | hasher.write(self.c_acctbal.as_bytes()); 34 | hasher.write(self.c_mktsegment.as_bytes()); 35 | hasher.write(self.c_comment.as_bytes()); 36 | } 37 | } 38 | 39 | fn gen_string(rng: &mut fastrand::Rng, len: usize) -> String { 40 | let charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 41 | 42 | let random_string: String = (0..len) 43 | .map(|_| { 44 | let idx = rng.usize(0..charset.len()); 45 | charset.chars().nth(idx).unwrap() 46 | }) 47 | .collect(); 48 | random_string 49 | } 50 | 51 | fn gen_record(rng: &mut fastrand::Rng, primary_key_count: &mut i32) -> Customer { 52 | *primary_key_count += 1; 53 | Customer { 54 | c_custkey: *primary_key_count, 55 | c_name: gen_string(rng, STRING_SIZE), 56 | c_address: gen_string(rng, STRING_SIZE), 57 | c_nationkey: rng.i32(..), 58 | c_phone: gen_string(rng, STRING_SIZE), 59 | c_acctbal: gen_string(rng, STRING_SIZE), 60 | c_mktsegment: gen_string(rng, STRING_SIZE), 61 | c_comment: gen_string(rng, STRING_SIZE), 62 | } 63 | } 64 | 65 | #[ignore] 66 | #[tokio::test(flavor = "multi_thread")] 67 | async fn test_data_integrity() { 68 | let mut rng = fastrand::Rng::with_seed(42); 69 | let mut primary_key_count = 0; 70 | let mut write_hasher = crc32fast::Hasher::new(); 71 | 72 | let temp_dir = TempDir::new().unwrap(); 73 | let option = DbOption::new( 74 | Path::from_filesystem_path(temp_dir.path()).unwrap(), 75 | &CustomerSchema, 76 | ); 77 | 78 | let db: DB = 79 | DB::new(option, TokioExecutor::current(), CustomerSchema) 80 | .await 81 | .unwrap(); 82 | 83 | for _ in 0..WRITE_TIMES { 84 | let customer = gen_record(&mut rng, &mut primary_key_count); 85 | 86 | customer.crc_hash(&mut write_hasher); 87 | db.insert(customer).await.unwrap(); 88 | } 89 | println!("{} items written", WRITE_TIMES); 90 | 91 | let mut read_hasher = crc32fast::Hasher::new(); 92 | let mut read_count = 0; 93 | let txn = db.transaction().await; 94 | 95 | let mut stream = txn 96 | .scan((Bound::Unbounded, Bound::Unbounded)) 97 | .take() 98 | .await 99 | .unwrap(); 100 | while let Some(result) = stream.next().await { 101 | let entry = result.unwrap(); 102 | let customer_ref = entry.value().unwrap(); 103 | 104 | Customer { 105 | c_custkey: customer_ref.c_custkey, 106 | c_name: customer_ref.c_name.unwrap().to_string(), 107 | c_address: customer_ref.c_address.unwrap().to_string(), 108 | c_nationkey: customer_ref.c_nationkey.unwrap(), 109 | c_phone: customer_ref.c_phone.unwrap().to_string(), 110 | c_acctbal: customer_ref.c_acctbal.unwrap().to_string(), 111 | c_mktsegment: customer_ref.c_mktsegment.unwrap().to_string(), 112 | c_comment: customer_ref.c_comment.unwrap().to_string(), 113 | } 114 | .crc_hash(&mut read_hasher); 115 | read_count += 1; 116 | } 117 | println!("{} items read", read_count); 118 | assert_eq!(write_hasher.finish(), read_hasher.finish()); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /tests/fail/01-missing-primary-key.rs: -------------------------------------------------------------------------------- 1 | use tonbo_macros::Record; 2 | 3 | #[derive(Record, Debug)] 4 | pub struct User { 5 | name: String, 6 | email: Option, 7 | age: u8, 8 | } 9 | 10 | fn main() {} 11 | -------------------------------------------------------------------------------- /tests/fail/01-missing-primary-key.stderr: -------------------------------------------------------------------------------- 1 | error: missing primary key field, use #[record(primary_key)] to define one 2 | --> tests/fail/01-missing-primary-key.rs:4:12 3 | | 4 | 4 | pub struct User { 5 | | ^^^^ 6 | -------------------------------------------------------------------------------- /tests/success/01-simple-record.rs: -------------------------------------------------------------------------------- 1 | use tonbo_macros::Record; 2 | 3 | #[derive(Record, Debug)] 4 | pub struct User { 5 | #[record(primary_key)] 6 | name: String, 7 | email: Option, 8 | age: u8, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /tonbo_macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | description = "TonboRecord macro" 3 | documentation = "https://docs.rs/tonbo_macros" 4 | edition = "2021" 5 | license = "Apache-2.0" 6 | name = "tonbo_macros" 7 | version = "0.3.2" 8 | 9 | [dependencies] 10 | darling = "0.20" 11 | proc-macro2 = "1" 12 | quote = "1" 13 | syn = "2" 14 | 15 | [lib] 16 | path = "src/lib.rs" 17 | proc-macro = true 18 | -------------------------------------------------------------------------------- /tonbo_macros/src/keys.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::{Ident, TokenStream}; 2 | use syn::Type; 3 | 4 | #[derive(Clone)] 5 | pub(crate) struct PrimaryKey { 6 | pub(crate) name: Ident, 7 | pub(crate) base_ty: Type, 8 | pub(crate) fn_key: TokenStream, 9 | pub(crate) builder_append_value: TokenStream, 10 | pub(crate) index: usize, 11 | } 12 | -------------------------------------------------------------------------------- /tonbo_macros/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod keys; 2 | mod schema_model; 3 | 4 | mod record; 5 | 6 | pub(crate) mod data_type; 7 | 8 | pub(crate) mod utils; 9 | 10 | use proc_macro::TokenStream; 11 | use syn::{parse_macro_input, DeriveInput}; 12 | 13 | use crate::data_type::DataType; 14 | 15 | /// used to define the structure of Record, 16 | /// will generate the implementation required in Tonbo, allowing derive expansion. 17 | /// 18 | /// # Example 19 | /// 20 | /// ```no_rust 21 | /// use tonbo::Record; 22 | /// 23 | /// #[derive(Record)] 24 | /// pub struct Music { 25 | /// #[record(primary_key)] 26 | /// pub id: u32, 27 | /// pub name: String, 28 | /// pub url: Option, 29 | /// pub is_favorite: bool, 30 | /// } 31 | /// ``` 32 | #[proc_macro_derive(Record, attributes(record))] 33 | pub fn tonbo_record(input: TokenStream) -> TokenStream { 34 | let ast = parse_macro_input!(input as DeriveInput); 35 | 36 | let result = record::handle(ast); 37 | match result { 38 | Ok(codegen) => codegen.into(), 39 | Err(e) => e.to_compile_error().into(), 40 | } 41 | } 42 | 43 | #[proc_macro_derive(KeyAttributes, attributes(primary_key))] 44 | pub fn key_attributes(_input: TokenStream) -> TokenStream { 45 | let gen = quote::quote! {}; 46 | gen.into() 47 | } 48 | -------------------------------------------------------------------------------- /tonbo_macros/src/schema_model.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tonbo_macros/src/utils/ident_generator.rs: -------------------------------------------------------------------------------- 1 | use syn::Ident; 2 | 3 | pub(crate) trait IdentGenerator { 4 | fn to_ref_ident(&self) -> Ident; 5 | 6 | fn to_schema_ident(&self) -> Ident; 7 | 8 | fn to_builder_ident(&self) -> Ident; 9 | 10 | fn to_array_ident(&self) -> Ident; 11 | 12 | fn to_immutable_array_ident(&self) -> Ident; 13 | } 14 | 15 | impl IdentGenerator for proc_macro2::Ident { 16 | fn to_ref_ident(&self) -> Ident { 17 | Ident::new(&format!("{}Ref", self), self.span()) 18 | } 19 | 20 | fn to_schema_ident(&self) -> Ident { 21 | Ident::new(&format!("{}Schema", self), self.span()) 22 | } 23 | 24 | fn to_builder_ident(&self) -> Ident { 25 | Ident::new(&format!("{}Builder", self), self.span()) 26 | } 27 | 28 | fn to_array_ident(&self) -> Ident { 29 | Ident::new(&format!("{}_array", self), self.span()) 30 | } 31 | 32 | fn to_immutable_array_ident(&self) -> Ident { 33 | Ident::new(&format!("{}ImmutableArrays", self), self.span()) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /tonbo_macros/src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod ident_generator; 2 | --------------------------------------------------------------------------------