├── .cargo
    └── config.toml
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   └── feature-request.yml
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   ├── ci_binding_js.yml
    │   ├── ci_binding_python.yml
    │   ├── ci_wasm.yml
    │   ├── doc.yml
    │   └── python_release.yml
├── .gitignore
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    ├── common.rs
    ├── criterion
    │   └── writes.rs
    ├── read_bench.rs
    └── write_bench.rs
├── bindings
    ├── js
    │   ├── Cargo.toml
    │   ├── examples
    │   │   └── db
    │   │   │   ├── index.js
    │   │   │   ├── package.json
    │   │   │   ├── schema.js
    │   │   │   └── webpack.config.js
    │   └── src
    │   │   ├── datatype.rs
    │   │   ├── db.rs
    │   │   ├── fs.rs
    │   │   ├── lib.rs
    │   │   ├── options.rs
    │   │   ├── range.rs
    │   │   ├── transaction.rs
    │   │   └── utils.rs
    └── python
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── example
    │       ├── README.md
    │       ├── declare.py
    │       ├── fusion_storage.py
    │       ├── multiple_instance.py
    │       ├── record.py
    │       ├── transaction.py
    │       └── write_batch.py
    │   ├── pyproject.toml
    │   ├── python
    │       └── tonbo
    │       │   ├── __init__.py
    │       │   ├── __init__.pyi
    │       │   ├── error.pyi
    │       │   └── fs.pyi
    │   ├── src
    │       ├── column.rs
    │       ├── datatype.rs
    │       ├── db.rs
    │       ├── error.rs
    │       ├── fs.rs
    │       ├── lib.rs
    │       ├── options.rs
    │       ├── range.rs
    │       ├── record.rs
    │       ├── record_batch.rs
    │       ├── stream.rs
    │       ├── transaction.rs
    │       └── utils.rs
    │   └── tests
    │       ├── bench
    │           ├── test_write_async_benchmark.py
    │           └── test_write_benchmark.py
    │       ├── conftest.py
    │       ├── test_db.py
    │       ├── test_flush.py
    │       ├── test_s3.py
    │       ├── test_table_level.py
    │       └── test_txn.py
├── clippy.toml
├── examples
    ├── README.md
    ├── datafusion.rs
    ├── declare.rs
    └── dynamic.rs
├── guide
    ├── book.toml
    └── src
    │   ├── SUMMARY.md
    │   ├── contribution
    │       ├── build.md
    │       ├── index.md
    │       ├── pr.md
    │       └── testing.md
    │   ├── examples
    │       ├── declare.md
    │       ├── index.md
    │       └── wasm.md
    │   ├── introduction.md
    │   ├── start.md
    │   ├── tonbolite
    │       ├── build.md
    │       ├── index.md
    │       ├── start.md
    │       └── usage.md
    │   └── usage
    │       ├── advance.md
    │       ├── conf.md
    │       ├── faq.md
    │       ├── index.md
    │       ├── python.md
    │       └── tonbo.md
├── parquet-lru
    ├── Cargo.toml
    └── src
    │   ├── dyn.rs
    │   ├── foyer.rs
    │   └── lib.rs
├── rust-toolchain.toml
├── rustfmt.toml
├── src
    ├── compaction
    │   ├── leveled.rs
    │   └── mod.rs
    ├── context.rs
    ├── executor.rs
    ├── fs
    │   ├── manager.rs
    │   └── mod.rs
    ├── inmem
    │   ├── immutable.rs
    │   ├── mod.rs
    │   └── mutable.rs
    ├── lib.rs
    ├── magic.rs
    ├── ondisk
    │   ├── arrows.rs
    │   ├── mod.rs
    │   ├── scan.rs
    │   └── sstable.rs
    ├── option.rs
    ├── record
    │   ├── key
    │   │   ├── mod.rs
    │   │   ├── num.rs
    │   │   └── str.rs
    │   ├── mod.rs
    │   ├── option.rs
    │   ├── runtime
    │   │   ├── array.rs
    │   │   ├── mod.rs
    │   │   ├── record.rs
    │   │   ├── record_ref.rs
    │   │   ├── schema.rs
    │   │   └── value.rs
    │   └── test.rs
    ├── scope.rs
    ├── snapshot.rs
    ├── stream
    │   ├── level.rs
    │   ├── mem_projection.rs
    │   ├── merge.rs
    │   ├── mod.rs
    │   ├── package.rs
    │   └── record_batch.rs
    ├── timestamp
    │   ├── mod.rs
    │   └── timestamped.rs
    ├── transaction.rs
    ├── trigger.rs
    ├── version
    │   ├── cleaner.rs
    │   ├── edit.rs
    │   ├── mod.rs
    │   └── set.rs
    └── wal
    │   ├── log.rs
    │   └── mod.rs
├── tests
    ├── data_integrity.rs
    ├── fail
    │   ├── 01-missing-primary-key.rs
    │   └── 01-missing-primary-key.stderr
    ├── macros_correctness.rs
    ├── success
    │   └── 01-simple-record.rs
    └── wasm.rs
└── tonbo_macros
    ├── Cargo.toml
    └── src
        ├── data_type.rs
        ├── keys.rs
        ├── lib.rs
        ├── record.rs
        ├── schema_model.rs
        └── utils
            ├── ident_generator.rs
            └── mod.rs


/.cargo/config.toml:
--------------------------------------------------------------------------------
1 | [target.'cfg(target_arch = "wasm32")']
2 | rustflags = ['--cfg', 'getrandom_backend="wasm_js"']


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug report
 2 | description: Create a report to help us improve
 3 | labels: bug
 4 | body:
 5 |   - type: textarea
 6 |     attributes:
 7 |       label: More details about the bug
 8 |     validations:
 9 |       required: true
10 |   - type: textarea
11 |     attributes:
12 |       label: What is your expected behavior?
13 |   - type: textarea
14 |     attributes:
15 |       label: How to reproduce?
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature request
 2 | description: Suggest an idea for this project
 3 | labels: enhancement
 4 | body:
 5 |   - type: textarea
 6 |     attributes:
 7 |       label: What is the current?
 8 |     validations:
 9 |       required: true
10 |   - type: textarea
11 |     attributes:
12 |       label: What will it become?
13 |     validations:
14 |       required: true
15 |   - type: textarea
16 |     attributes:
17 |       label: What are the benefits?
18 |     validations:
19 |       required: true
20 |   - type: textarea
21 |     attributes:
22 |       label: Are there any alternatives?
23 |     validations:
24 |       required: true
25 |   - type: textarea
26 |     attributes:
27 |       label: Does it have a reference?
28 |     validations:
29 |       required: true
30 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied.  See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | 
18 | version: 2
19 | updates:
20 |   # Maintain dependencies for GitHub Actions
21 |   - package-ecosystem: "github-actions"
22 |     directory: "/"
23 |     schedule:
24 |       interval: "monthly"
25 | 
26 |   # Maintain dependencies for rust
27 |   - package-ecosystem: "cargo"
28 |     directory: "/"
29 |     schedule:
30 |       interval: "monthly"
31 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   push:
  5 |   pull_request:
  6 |   workflow_dispatch:
  7 | 
  8 | env:
  9 |   CARGO_TERM_COLOR: always
 10 |   CARGO_REGISTRIES_MY_REGISTRY_INDEX: https://github.com/rust-lang/crates.io-index
 11 | 
 12 | jobs:
 13 |   # 1
 14 |   check:
 15 |     name: Rust project check
 16 |     permissions:
 17 |       id-token: write
 18 |       contents: read
 19 |     runs-on: ${{ matrix.os }}
 20 |     strategy:
 21 |       matrix:
 22 |         os:
 23 |           - ubuntu-latest
 24 |           - macos-latest
 25 |           - windows-latest
 26 |     steps:
 27 |       - uses: actions/checkout@v4
 28 |       - name: Install latest
 29 |         uses: actions-rs/toolchain@v1
 30 |         with:
 31 |           toolchain: stable
 32 |           override: true
 33 |           components: rustfmt, clippy
 34 | 
 35 |       # `cargo check` command here will use installed `nightly`
 36 |       # as it is set as an "override" for current directory
 37 | 
 38 |       - name: Run cargo clippy
 39 |         uses: actions-rs/cargo@v1
 40 |         with:
 41 |           command: check
 42 | 
 43 |       - name: Run cargo build
 44 |         uses: actions-rs/cargo@v1
 45 |         with:
 46 |           command: build
 47 | 
 48 |       - name: configure aws credentials
 49 |         if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || startsWith(github.ref, 'refs/tags/') }}
 50 |         uses: aws-actions/configure-aws-credentials@v4
 51 |         with:
 52 |           role-to-assume: ${{ env.ROLE_TO_ASSUME }}
 53 |           role-session-name: samplerolesession
 54 |           aws-region: ${{ env.AWS_REGION }}
 55 |         env:
 56 |           AWS_REGION: ap-southeast-1
 57 |           ROLE_TO_ASSUME: arn:aws:iam::232814779190:role/github
 58 |       - name: Run cargo test
 59 |         uses: actions-rs/cargo@v1
 60 |         with:
 61 |           command: test
 62 |           args: --workspace
 63 |         env:
 64 |           BUCKET_NAME: tonbo-test
 65 |   # 2
 66 |   fmt:
 67 |     name: Rust fmt
 68 |     runs-on: ubuntu-latest
 69 |     steps:
 70 |       - uses: actions/checkout@v4
 71 |       - name: Install latest nightly
 72 |         uses: actions-rs/toolchain@v1
 73 |         with:
 74 |           toolchain: nightly
 75 |           override: true
 76 |           components: rustfmt, clippy
 77 | 
 78 |       # `cargo check` command here will use installed `nightly`
 79 |       # as it is set as an "override" for current directory
 80 | 
 81 |       - name: Run cargo fmt
 82 |         uses: actions-rs/cargo@v1
 83 |         with:
 84 |           command: fmt
 85 |           args: -- --check
 86 | 
 87 |   exmaples:
 88 |     name: Rust exmaples
 89 |     runs-on: ubuntu-latest
 90 |     steps:
 91 |       - uses: actions/checkout@v4
 92 |       - name: Run datafusion example
 93 |         uses: actions-rs/cargo@v1
 94 |         with:
 95 |           command: run
 96 |           args: --example datafusion --features=datafusion
 97 | 
 98 |       - name: Run declare example
 99 |         uses: actions-rs/cargo@v1
100 |         with:
101 |           command: run
102 |           args: --example declare --features bytes,tokio
103 | 
104 |   # benchmark:
105 |   #   name: Rust benchmark
106 |   #   runs-on: self-hosted
107 |   #   permissions:
108 |   #     contents: write
109 |   #     pull-requests: write
110 |   #     repository-projects: write
111 |   #   if: github.event_name == 'pull_request'
112 |   #   steps:
113 |   #     - uses: actions/checkout@v4
114 |   #     - name: Install latest nightly
115 |   #       uses: actions-rs/toolchain@v1
116 |   #       with:
117 |   #         toolchain: stable
118 |   #         override: true
119 | 
120 |   #     # `cargo check` command here will use installed `nightly`
121 |   #     # as it is set as an "override" for current directory
122 | 
123 |   #     - name: Run cargo bench
124 |   #       uses: actions-rs/cargo@v1
125 |   #       with:
126 |   #         command: bench
127 |   #         args: --features bench
128 | 
129 |   #     - name: Comment on PR using GitHub CLI
130 |   #       env:
131 |   #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
132 |   #       run: |
133 |   #         gh pr comment ${{ github.event.pull_request.number }} --body-file "read_benchmark.md"
134 |   #         gh pr comment ${{ github.event.pull_request.number }} --body-file "write_benchmark.md"
135 |   coverage:
136 |     name: Code coverage
137 |     runs-on: ubuntu-latest
138 |     steps:
139 |       - uses: actions/checkout@v4
140 |       - name: Install Rust
141 |         run: rustup update stable
142 |       - name: Install cargo-llvm-cov
143 |         uses: taiki-e/install-action@cargo-llvm-cov
144 |       - name: Generate code coverage
145 |         run: cargo llvm-cov --workspace --lcov --output-path lcov.info
146 |       - name: Upload coverage to Codecov
147 |         uses: codecov/codecov-action@v5
148 |         with:
149 |           token: ${{ secrets.CODECOV_TOKEN }}
150 |           files: lcov.info
151 |           fail_ci_if_error: true
152 | 


--------------------------------------------------------------------------------
/.github/workflows/ci_binding_js.yml:
--------------------------------------------------------------------------------
 1 | name: JavaScript Binding CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   check:
10 |     name: JavaScript binding check
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         os:
15 |           - ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - name: Install Rust toolchain
19 |         uses: actions-rs/toolchain@v1
20 |         with:
21 |           toolchain: stable
22 |           override: true
23 |           components: rustfmt, clippy
24 | 
25 |       - name: Run cargo clippy
26 |         uses: actions-rs/cargo@v1
27 |         with:
28 |           command: check
29 | 
30 |       - name: Setup for wasm32
31 |         run: |
32 |           rustup target add wasm32-unknown-unknown
33 | 
34 |       # - name: Run cargo build
35 |       #   uses: actions-rs/cargo@v1
36 |       #   with:
37 |       #     working-directory: "bindings/js"
38 |       #     command: build
39 |       #     args: --target wasm32-unknown-unknown
40 | 
41 |       - name: Install Chrome Environment
42 |         run: |
43 |           mkdir -p /tmp/chrome
44 |           wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chrome | .[] | select(.platform == "linux64") | .url')
45 |           wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chromedriver | .[] | select(.platform == "linux64") | .url')
46 |           unzip chromedriver-linux64.zip
47 |           unzip chrome-linux64.zip
48 |           cp -r chrome-linux64/ /tmp/chrome/
49 |           cp -r chromedriver-linux64 /tmp/chrome/chromedriver
50 | 
51 |       - name: Setup wasm-pack
52 |         run: |
53 |           cargo install wasm-pack
54 | 
55 |       - name: Run wasm-pack test
56 |         run: |
57 |           export PATH=$PATH:/tmp/chrome/chrome-linux64/:/tmp/chrome/chromedriver-linux64/
58 |           cd bindings/js
59 |           wasm-pack test --chrome --headless
60 | 


--------------------------------------------------------------------------------
/.github/workflows/ci_binding_python.yml:
--------------------------------------------------------------------------------
 1 | name: Python Binding CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   check:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - name: Install Rust toolchain
14 |         uses: actions-rs/toolchain@v1
15 |         with:
16 |           toolchain: stable
17 |           override: true
18 |           components: rustfmt, clippy
19 | 
20 |       - name: Run cargo clippy
21 |         uses: actions-rs/cargo@v1
22 |         with:
23 |           command: check
24 | 
25 |       - uses: actions/setup-python@v5
26 |         with:
27 |           python-version: "3.11"
28 |       - name: Build with maturin and run
29 |         shell: bash
30 |         working-directory: "bindings/python"
31 |         run: |
32 |           python -m venv venv
33 |           source venv/bin/activate
34 |           pip install maturin
35 |           maturin develop -E test
36 |           pytest --ignore=tests/bench -v .
37 | 


--------------------------------------------------------------------------------
/.github/workflows/ci_wasm.yml:
--------------------------------------------------------------------------------
 1 | name: WASM CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   workflow_dispatch:
 7 | 
 8 | env:
 9 |   CARGO_TERM_COLOR: always
10 |   CARGO_REGISTRIES_MY_REGISTRY_INDEX: https://github.com/rust-lang/crates.io-index
11 |   BUCKET_NAME: tonbo-test
12 | 
13 | jobs:
14 |   check:
15 |     name: Rust project wasm check
16 |     permissions:
17 |       id-token: write
18 |       contents: read
19 |     runs-on: ${{ matrix.os }}
20 |     strategy:
21 |       matrix:
22 |         os:
23 |           - ubuntu-latest
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |       - name: Install Rust toolchain
27 |         uses: actions-rs/toolchain@v1
28 |         with:
29 |           toolchain: stable
30 |           override: true
31 |           components: rustfmt, clippy
32 | 
33 |       - name: Run cargo clippy
34 |         uses: actions-rs/cargo@v1
35 |         with:
36 |           command: check
37 | 
38 |       - name: configure aws credentials
39 |         if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || startsWith(github.ref, 'refs/tags/') }}
40 |         uses: aws-actions/configure-aws-credentials@v4
41 |         with:
42 |           role-to-assume: ${{ env.ROLE_TO_ASSUME }}
43 |           role-session-name: samplerolesession
44 |           aws-region: ${{ env.AWS_REGION }}
45 |         env:
46 |           AWS_REGION: ap-southeast-1
47 |           ROLE_TO_ASSUME: arn:aws:iam::232814779190:role/github
48 |       - name: Setup for wasm32
49 |         run: |
50 |           rustup target add wasm32-unknown-unknown
51 | 
52 |       - name: Run cargo build
53 |         uses: actions-rs/cargo@v1
54 |         with:
55 |           command: build
56 |           args: --target wasm32-unknown-unknown --no-default-features --features bytes,wasm
57 | 
58 |       - name: Install Chrome Environment
59 |         run: |
60 |           mkdir -p /tmp/chrome
61 |           wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chrome | .[] | select(.platform == "linux64") | .url')
62 |           wget $(curl https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json | jq -r '.versions | sort_by(.version) | reverse | .[0] | .downloads.chromedriver | .[] | select(.platform == "linux64") | .url')
63 |           unzip chromedriver-linux64.zip
64 |           unzip chrome-linux64.zip
65 |           cp -r chrome-linux64/ /tmp/chrome/
66 |           cp -r chromedriver-linux64 /tmp/chrome/chromedriver
67 | 
68 |       - name: Setup wasm-pack
69 |         run: |
70 |           cargo install wasm-pack
71 | 
72 |       - name: Run wasm-pack test
73 |         env:
74 |           BUCKET_NAME: tonbo-test
75 |         run: |
76 |           echo "bucket: $BUCKET_NAME"
77 |           export PATH=$PATH:/tmp/chrome/chrome-linux64/:/tmp/chrome/chromedriver-linux64/
78 |           wasm-pack test --chrome --headless --test wasm --no-default-features --features bytes,wasm,wasm-http
79 | 


--------------------------------------------------------------------------------
/.github/workflows/doc.yml:
--------------------------------------------------------------------------------
 1 | name: Doc CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - "guide/**"
 9 | jobs:
10 |   build:
11 |     name: Build, Test and Deploy
12 |     runs-on: ubuntu-latest
13 |     permissions:
14 |       contents: write # To push a branch
15 |       pull-requests: write # To create a PR from that branch
16 |     steps:
17 |       - name: Checkout️
18 |         uses: actions/checkout@v4
19 | 
20 |       - name: Install and Build
21 |         run: |
22 |           cd guide
23 |           curl -L https://github.com/rust-lang/mdBook/releases/download/v0.4.28/mdbook-v0.4.28-x86_64-unknown-linux-gnu.tar.gz | tar xzf -
24 |           echo $PWD >> $GITHUB_PATH
25 |           cargo install mdbook-toc
26 |           ./mdbook build
27 |       - name: Deploy
28 |         uses: JamesIves/github-pages-deploy-action@v4
29 |         with:
30 |           branch: gh-pages
31 |           folder: guide/book # The folder the action should deploy.
32 | 


--------------------------------------------------------------------------------
/.github/workflows/python_release.yml:
--------------------------------------------------------------------------------
 1 | name: Release Python
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "*"
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |     paths:
11 |       - ".github/workflows/python_release.yml"
12 |   workflow_dispatch:
13 | 
14 | permissions:
15 |   contents: read
16 | 
17 | jobs:
18 |   windows:
19 |     runs-on: ${{ matrix.platform.runner }}
20 |     strategy:
21 |       matrix:
22 |         platform:
23 |           - runner: windows-latest
24 |             target: x64
25 |           - runner: windows-latest
26 |             target: x86
27 |     steps:
28 |       - uses: actions/checkout@v4
29 |       - uses: actions/setup-python@v5
30 |         with:
31 |           python-version: 3.x
32 |           architecture: ${{ matrix.platform.target }}
33 |       - name: Build wheels
34 |         uses: PyO3/maturin-action@v1
35 |         with:
36 |           target: ${{ matrix.platform.target }}
37 |           args: --release --out dist --bindings pyo3 --features=pyo3/extension-module
38 |           sccache: "true"
39 |           working-directory: "bindings/python"
40 |       - name: Upload wheels
41 |         uses: actions/upload-artifact@v4
42 |         with:
43 |           name: wheels-windows-${{ matrix.platform.target }}
44 |           path: bindings/python/dist
45 | 
46 |   macos:
47 |     runs-on: ${{ matrix.platform.runner }}
48 |     strategy:
49 |       matrix:
50 |         platform:
51 |           - runner: macos-13
52 |             target: x86_64
53 |           - runner: macos-14
54 |             target: aarch64
55 |     steps:
56 |       - uses: actions/checkout@v4
57 |       - uses: actions/setup-python@v5
58 |         with:
59 |           python-version: 3.x
60 |       - name: Build wheels
61 |         uses: PyO3/maturin-action@v1
62 |         with:
63 |           target: ${{ matrix.platform.target }}
64 |           args: --release --out dist --bindings pyo3 --features=pyo3/extension-module
65 |           sccache: "true"
66 |           working-directory: "bindings/python"
67 |       - name: Upload wheels
68 |         uses: actions/upload-artifact@v4
69 |         with:
70 |           name: wheels-macos-${{ matrix.platform.target }}
71 |           path: bindings/python/dist
72 | 
73 |   release:
74 |     name: Release
75 |     environment: release
76 |     runs-on: ubuntu-latest
77 |     if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
78 |     needs: [windows, macos]
79 |     permissions:
80 |       # Use to sign the release artifacts
81 |       id-token: write
82 |       # Used to upload release artifacts
83 |       contents: write
84 |       # Used to generate artifact attestation
85 |       attestations: write
86 |     steps:
87 |       - uses: actions/download-artifact@v4
88 |       - name: Generate artifact attestation
89 |         uses: actions/attest-build-provenance@v1
90 |         with:
91 |           subject-path: "wheels-*/*"
92 |       - name: Publish to PyPI
93 |         if: "startsWith(github.ref, 'refs/tags/')"
94 |         uses: PyO3/maturin-action@v1
95 |         with:
96 |           command: upload
97 |           args: --non-interactive --skip-existing wheels-*/*
98 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | /.idea
3 | Cargo.lock
4 | db_path
5 | guide/book
6 | __pycache__
7 | 
8 | .DS_Store
9 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## Get Started
 4 | 
 5 | This is a Rust project, so [rustup](https://rustup.rs/) is the best place to start.
 6 | 
 7 | This is a pure rust project, so only `cargo` is needed.
 8 | 
 9 | - `cargo check` to analyze the current package and report errors.
10 | - `cargo +nightly fmt` to format the current code.
11 | - `cargo build` to compile the current package.
12 | - `cargo clippy` to catch common mistakes and improve code.
13 | - `cargo test` to run unit tests.
14 | - `cargo bench` to run benchmark tests.


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | workspace = { members = ["parquet-lru", "tonbo_macros"] }
  2 | 
  3 | [package]
  4 | description = "An embedded persistent KV database in Rust."
  5 | documentation = "https://docs.rs/tonbo"
  6 | edition = "2021"
  7 | license = "Apache-2.0"
  8 | name = "tonbo"
  9 | readme = "README.md"
 10 | resolver = "2"
 11 | version = "0.3.2"
 12 | 
 13 | [package.metadata]
 14 | msrv = "1.79.0"
 15 | 
 16 | [features]
 17 | aws = ["fusio-dispatch/aws", "fusio-log/aws", "fusio/aws"]
 18 | bench = ["redb", "rocksdb", "sled"]
 19 | bytes = ["dep:bytes"]
 20 | datafusion = ["dep:async-trait", "dep:datafusion"]
 21 | default = ["aws", "bytes", "tokio", "tokio-http"]
 22 | load_tbl = []
 23 | object-store = ["fusio/object_store"]
 24 | opfs = [
 25 |     "dep:wasm-bindgen-futures",
 26 |     "fusio-dispatch/opfs",
 27 |     "fusio-log/web",
 28 |     "fusio-parquet/web",
 29 |     "fusio/opfs",
 30 | ]
 31 | redb = ["dep:redb"]
 32 | rocksdb = ["dep:rocksdb"]
 33 | sled = ["dep:sled"]
 34 | sync = ["fusio/sync"]
 35 | tokio = [
 36 |     "fusio-dispatch/tokio",
 37 |     "fusio-log/tokio",
 38 |     "fusio-parquet/tokio",
 39 |     "fusio/tokio",
 40 |     "parquet/default",
 41 |     "tokio/fs",
 42 |     "tokio/rt-multi-thread",
 43 | ]
 44 | tokio-http = ["fusio/tokio-http", "fusio-log/tokio-http"]
 45 | wasm = ["aws", "bytes", "opfs", "wasm-http"]
 46 | wasm-http = ["fusio/wasm-http", "fusio-log/web-http"]
 47 | 
 48 | [[example]]
 49 | name = "declare"
 50 | required-features = ["bytes", "tokio"]
 51 | 
 52 | [[example]]
 53 | name = "datafusion"
 54 | required-features = ["datafusion"]
 55 | 
 56 | [[bench]]
 57 | harness = false
 58 | name = "write_bench"
 59 | required-features = ["bench"]
 60 | 
 61 | [[bench]]
 62 | harness = false
 63 | name = "read_bench"
 64 | required-features = ["bench"]
 65 | 
 66 | [[bench]]
 67 | harness = false
 68 | name = "common"
 69 | required-features = ["bench"]
 70 | 
 71 | [[bench]]
 72 | harness = false
 73 | name = "writes"
 74 | path = "benches/criterion/writes.rs"
 75 | required-features = ["sled"]
 76 | 
 77 | [dependencies]
 78 | arrow = "55"
 79 | async-lock = "3"
 80 | async-stream = "0.3"
 81 | async-trait = { version = "0.1", optional = true }
 82 | bytes = { version = "1.7", optional = true }
 83 | crc32fast = "1"
 84 | crossbeam-skiplist = "0.1"
 85 | datafusion = { version = "47", optional = true }
 86 | flume = { version = "0.11", features = ["async"] }
 87 | fusio = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio", features = [
 88 |     "dyn",
 89 |     "fs",
 90 | ] }
 91 | fusio-dispatch = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio-dispatch" }
 92 | fusio-log = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio-log", default-features = false, features = [
 93 |     "bytes",
 94 | ] }
 95 | fusio-parquet = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.8", package = "fusio-parquet" }
 96 | futures-core = "0.3"
 97 | futures-util = "0.3"
 98 | lockable = "0.1.1"
 99 | once_cell = "1"
100 | parquet = { version = "55", default-features = false, features = [
101 |     "async",
102 |     "base64",
103 |     "brotli",
104 |     "flate2",
105 |     "lz4",
106 |     "snap",
107 | ] }
108 | parquet-lru = { version = "0.3.0", path = "parquet-lru" }
109 | pin-project-lite = "0.2"
110 | thiserror = "2.0.3"
111 | tokio = { version = "1", features = ["io-util"], default-features = false }
112 | tonbo_macros = { version = "0.3.1", path = "tonbo_macros" }
113 | tracing = "0.1"
114 | ulid = { version = "1", features = ["serde"] }
115 | 
116 | # Only used for benchmarks
117 | log = "0.4.22"
118 | redb = { version = "2", optional = true }
119 | rocksdb = { version = "0.23", optional = true }
120 | sled = { version = "0.34", optional = true }
121 | 
122 | [target.'cfg(target_arch = "wasm32")'.dependencies]
123 | getrandom = { version = "0.3.1", features = ["wasm_js"] }
124 | wasm-bindgen = "0.2.95"
125 | wasm-bindgen-futures = { version = "0.4.45", optional = true }
126 | 
127 | [dev-dependencies]
128 | bincode = "1"
129 | fastrand = "2"
130 | futures = { version = "0.3" }
131 | serde = "1"
132 | tempfile = "3"
133 | trybuild = "1.0"
134 | 
135 | [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
136 | comfy-table = "7"
137 | criterion = { version = "0.5", features = ["async_tokio", "html_reports"] }
138 | mimalloc = "0.1"
139 | tokio = { version = "1", features = ["full"] }
140 | 
141 | [target.'cfg(target_arch = "wasm32")'.dev-dependencies]
142 | wasm-bindgen = "0.2.95"
143 | wasm-bindgen-test = "0.3.9"
144 | web-sys = { version = "0.3", features = ["console"] }
145 | 
146 | [target.'cfg(unix)'.dev-dependencies]
147 | pprof = { version = "0.14", features = ["criterion", "flamegraph"] }
148 | 
149 | [profile.release]
150 | codegen-units = 1
151 | lto = "thin"
152 | 
153 | [profile.bench]
154 | debug = true
155 | inherits = "release"
156 | 


--------------------------------------------------------------------------------
/benches/criterion/writes.rs:
--------------------------------------------------------------------------------
 1 | use std::{iter::repeat_with, sync::Arc};
 2 | 
 3 | use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 4 | use mimalloc::MiMalloc;
 5 | use tonbo::{executor::tokio::TokioExecutor, DbOption, Record, DB};
 6 | 
 7 | #[global_allocator]
 8 | static GLOBAL: MiMalloc = MiMalloc;
 9 | 
10 | #[derive(Record, Debug)]
11 | pub struct KV {
12 |     #[record(primary_key)]
13 |     key: String,
14 |     value: String,
15 | }
16 | 
17 | #[inline(never)]
18 | async fn tonbo_write(db: &DB<KV, TokioExecutor>, batch_size: usize) {
19 |     let mut kvs = Vec::with_capacity(128);
20 |     for _ in 0..batch_size {
21 |         let key = repeat_with(fastrand::alphanumeric).take(256).collect();
22 |         let value = repeat_with(fastrand::alphanumeric).take(256).collect();
23 |         let kv = KV { key, value };
24 |         kvs.push(kv);
25 |     }
26 | 
27 |     db.insert_batch(kvs.into_iter()).await.unwrap();
28 | }
29 | 
30 | #[inline(never)]
31 | async fn sled_write(db: &sled::Db, batch_size: usize) {
32 |     let mut kvs = Vec::with_capacity(128);
33 |     for _ in 0..batch_size {
34 |         let key: String = repeat_with(fastrand::alphanumeric).take(256).collect();
35 |         let value: String = repeat_with(fastrand::alphanumeric).take(256).collect();
36 |         kvs.push((key, value));
37 |     }
38 | 
39 |     for (key, value) in kvs {
40 |         db.insert(&key, &*value).unwrap();
41 |     }
42 | }
43 | 
44 | fn single_write(c: &mut Criterion) {
45 |     let runtime = Arc::new(
46 |         tokio::runtime::Builder::new_multi_thread()
47 |             .worker_threads(8)
48 |             .enable_all()
49 |             .build()
50 |             .unwrap(),
51 |     );
52 | 
53 |     let mut group = c.benchmark_group("write");
54 | 
55 |     let batches = [1, 16, 128];
56 | 
57 |     let _ = std::fs::remove_dir_all("/tmp/tonbo");
58 |     let _ = std::fs::create_dir_all("/tmp/tonbo");
59 | 
60 |     for batch in batches {
61 |         let option = DbOption::from(fusio::path::Path::from_filesystem_path("/tmp/tonbo").unwrap())
62 |             .disable_wal();
63 |         let db = runtime
64 |             .block_on(async { DB::new(option, TokioExecutor::current(), KVSchema).await })
65 |             .unwrap();
66 | 
67 |         group.bench_with_input(BenchmarkId::new("Tonbo", batch), &batch, |b, batch| {
68 |             let r = runtime.clone();
69 |             b.to_async(&*r)
70 |                 .iter(|| async { tonbo_write(&db, *batch).await });
71 |         });
72 |         let _ = std::fs::remove_dir_all("/tmp/tonbo");
73 |         let _ = std::fs::create_dir_all("/tmp/tonbo");
74 |     }
75 | 
76 |     let _ = std::fs::remove_dir_all("/tmp/sled");
77 |     let _ = std::fs::create_dir_all("/tmp/sled");
78 | 
79 |     for batch in batches {
80 |         let sled = sled::open("/tmp/sled").unwrap();
81 |         group.bench_with_input(BenchmarkId::new("Sled", batch), &batch, |b, batch| {
82 |             let r = runtime.clone();
83 |             b.to_async(&*r)
84 |                 .iter(|| async { sled_write(&sled, *batch).await });
85 |         });
86 |         let _ = std::fs::remove_dir_all("/tmp/sled");
87 |         let _ = std::fs::create_dir_all("/tmp/sled");
88 |     }
89 | 
90 |     group.finish();
91 | }
92 | 
93 | criterion_group!(benches, single_write);
94 | criterion_main!(benches);
95 | 


--------------------------------------------------------------------------------
/bindings/js/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | edition = "2021"
 3 | name = "tonbo-js"
 4 | version = "0.1.0"
 5 | 
 6 | [lib]
 7 | crate-type = ["cdylib", "rlib"]
 8 | 
 9 | [workspace]
10 | 
11 | [dependencies]
12 | futures = { version = "0.3" }
13 | js-sys = { version = "0.3.72" }
14 | tonbo = { version = "0.3.2", path = "../../", default-features = false, features = [
15 |     "bytes",
16 |     "wasm",
17 | ] }
18 | 
19 | wasm-bindgen = "0.2.95"
20 | wasm-bindgen-futures = { version = "0.4.45" }
21 | wasm-streams = "0.4.2"
22 | 
23 | [dev-dependencies]
24 | wasm-bindgen = "0.2.95"
25 | wasm-bindgen-futures = { version = "0.4.45" }
26 | wasm-bindgen-test = "0.3.9"
27 | wasm-streams = "0.4.2"
28 | web-sys = { version = "0.3", features = ["console"] }
29 | fusio = { git = "https://github.com/tonbo-io/fusio", rev = "278eb79091b24df29eb9f3ac78ae6c3305ea3ee6", version = "0.3.7", default-features = false, features = [
30 |     "dyn",
31 |     "bytes",
32 |     "opfs",
33 | ] }
34 | 
35 | [package.metadata.wasm-pack.profile.release]
36 | wasm-opt = false
37 | 


--------------------------------------------------------------------------------
/bindings/js/examples/db/index.js:
--------------------------------------------------------------------------------
 1 | import { userSchema } from "./schema";
 2 | import init, { TonboDB, DbOption } from "./pkg/tonbo_js";
 3 | 
 4 | async function main() {
 5 |   // Initialize the WASM module
 6 |   await init();
 7 | 
 8 |   const option = new DbOption("store_dir");
 9 |   const db = await new TonboDB(option, userSchema);
10 | 
11 |   await db.insert({ id: 0, name: "Alice", price: 123.45 });
12 | 
13 |   const record = await db.get(0, (val) => val);
14 |   console.log("Retrieved record:", record);
15 | 
16 |   await db.transaction(async (txn) => {
17 |     txn.insert({ id: 1, name: "Bob" });
18 |     const record1 = await txn.get(1, ["id", "price"]);
19 |     const record2 = await txn.get(0, ["id", "price"]);
20 |     console.log(record1);
21 |     console.log(record2);
22 |     // can not read uncommitted change
23 |     const uncommitted_name = await db.get(1, (val) => val.name);
24 |     console.log("read uncommitted name: ", uncommitted_name);
25 |     await txn.commit();
26 |     const name = await db.get(1, (val) => val.name);
27 |     console.log("read committed name: ", name);
28 |   });
29 | }
30 | 
31 | main().catch(console.error);
32 | 


--------------------------------------------------------------------------------
/bindings/js/examples/db/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scripts": {
 3 |     "build": "wasm-pack build --target web",
 4 |     "serve": "cp -r ../../pkg ./ && webpack serve"
 5 |   },
 6 |   "devDependencies": {
 7 |     "@wasm-tool/wasm-pack-plugin": "1.5.0",
 8 |     "html-webpack-plugin": "^5.6.0",
 9 |     "webpack": "^5.93.0",
10 |     "webpack-cli": "^5.1.4",
11 |     "webpack-dev-server": "^5.0.4"
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/bindings/js/examples/db/schema.js:
--------------------------------------------------------------------------------
 1 | export const userSchema = {
 2 |   id: {
 3 |     primary: true,
 4 |     type: "UInt8",
 5 |     nullable: false,
 6 |   },
 7 |   name: {
 8 |     type: "String",
 9 |     nullable: true,
10 |   },
11 |   price: {
12 |     type: "Float64",
13 |     nullable: true,
14 |   },
15 | };
16 | 


--------------------------------------------------------------------------------
/bindings/js/examples/db/webpack.config.js:
--------------------------------------------------------------------------------
 1 | const path = require("path");
 2 | const HtmlWebpackPlugin = require("html-webpack-plugin");
 3 | const webpack = require("webpack");
 4 | const WasmPackPlugin = require("@wasm-tool/wasm-pack-plugin");
 5 | 
 6 | module.exports = {
 7 |   entry: "./index.js",
 8 |   output: {
 9 |     path: path.resolve(__dirname, "dist"),
10 |     filename: "index.js",
11 |   },
12 |   mode: "development",
13 |   plugins: [
14 |     new HtmlWebpackPlugin(),
15 |     new WasmPackPlugin({
16 |       crateDirectory: path.resolve(__dirname, "."),
17 |     }),
18 |   ],
19 |   experiments: {
20 |     asyncWebAssembly: true,
21 |   },
22 | };
23 | 


--------------------------------------------------------------------------------
/bindings/js/src/datatype.rs:
--------------------------------------------------------------------------------
 1 | use wasm_bindgen::prelude::wasm_bindgen;
 2 | 
 3 | #[wasm_bindgen]
 4 | #[repr(u8)]
 5 | #[derive(Copy, Clone, Debug)]
 6 | pub enum DataType {
 7 |     UInt8 = 0,
 8 |     UInt16 = 1,
 9 |     UInt32 = 2,
10 |     UInt64 = 3,
11 |     Int8 = 4,
12 |     Int16 = 5,
13 |     Int32 = 6,
14 |     Int64 = 7,
15 |     String = 8,
16 |     Boolean = 9,
17 |     Bytes = 10,
18 |     Float32 = 11,
19 |     Float64 = 12,
20 | }
21 | 
22 | impl From<DataType> for tonbo::record::DataType {
23 |     fn from(datatype: DataType) -> Self {
24 |         match datatype {
25 |             DataType::UInt8 => tonbo::record::DataType::UInt8,
26 |             DataType::UInt16 => tonbo::record::DataType::UInt16,
27 |             DataType::UInt32 => tonbo::record::DataType::UInt32,
28 |             DataType::UInt64 => tonbo::record::DataType::UInt64,
29 |             DataType::Int8 => tonbo::record::DataType::Int8,
30 |             DataType::Int16 => tonbo::record::DataType::Int16,
31 |             DataType::Int32 => tonbo::record::DataType::Int32,
32 |             DataType::Int64 => tonbo::record::DataType::Int64,
33 |             DataType::String => tonbo::record::DataType::String,
34 |             DataType::Boolean => tonbo::record::DataType::Boolean,
35 |             DataType::Float32 => tonbo::record::DataType::Float32,
36 |             DataType::Float64 => tonbo::record::DataType::Float64,
37 |             _ => todo!(),
38 |         }
39 |     }
40 | }
41 | 
42 | pub(crate) fn to_datatype(datatype: &str) -> tonbo::record::DataType {
43 |     match datatype {
44 |         "UInt8" => tonbo::record::DataType::UInt8,
45 |         "UInt16" => tonbo::record::DataType::UInt16,
46 |         "UInt32" => tonbo::record::DataType::UInt32,
47 |         "UInt64" => tonbo::record::DataType::UInt64,
48 |         "Int8" => tonbo::record::DataType::Int8,
49 |         "Int16" => tonbo::record::DataType::Int16,
50 |         "Int32" => tonbo::record::DataType::Int32,
51 |         "Int64" => tonbo::record::DataType::Int64,
52 |         "String" => tonbo::record::DataType::String,
53 |         "Boolean" => tonbo::record::DataType::Boolean,
54 |         "Float32" => tonbo::record::DataType::Float32,
55 |         "Float64" => tonbo::record::DataType::Float64,
56 |         _ => todo!(),
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/bindings/js/src/fs.rs:
--------------------------------------------------------------------------------
  1 | use tonbo::option::Path;
  2 | use wasm_bindgen::{prelude::wasm_bindgen, JsValue};
  3 | 
  4 | #[wasm_bindgen]
  5 | #[derive(Debug, Clone)]
  6 | pub struct AwsCredential {
  7 |     #[wasm_bindgen(skip)]
  8 |     pub key_id: String,
  9 |     #[wasm_bindgen(skip)]
 10 |     pub secret_key: String,
 11 |     #[wasm_bindgen(skip)]
 12 |     pub token: Option<String>,
 13 | }
 14 | 
 15 | impl From<AwsCredential> for tonbo::option::AwsCredential {
 16 |     fn from(cred: AwsCredential) -> Self {
 17 |         tonbo::option::AwsCredential {
 18 |             key_id: cred.key_id,
 19 |             secret_key: cred.secret_key,
 20 |             token: cred.token,
 21 |         }
 22 |     }
 23 | }
 24 | 
 25 | #[wasm_bindgen]
 26 | impl AwsCredential {
 27 |     #[wasm_bindgen(constructor)]
 28 |     pub fn new(key_id: String, secret_key: String, token: Option<String>) -> Self {
 29 |         Self {
 30 |             key_id,
 31 |             secret_key,
 32 |             token,
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | #[wasm_bindgen]
 38 | #[derive(Debug, Clone)]
 39 | pub struct FsOptions {
 40 |     inner: FsOptionsInner,
 41 | }
 42 | 
 43 | impl FsOptions {
 44 |     pub(crate) fn path(&self, path: String) -> Result<Path, JsValue> {
 45 |         match self.inner {
 46 |             FsOptionsInner::Local => {
 47 |                 Path::from_opfs_path(&path).map_err(|err| JsValue::from(err.to_string()))
 48 |             }
 49 |             FsOptionsInner::S3 { .. } => {
 50 |                 Path::from_url_path(&path).map_err(|err| JsValue::from(err.to_string()))
 51 |             }
 52 |         }
 53 |     }
 54 | }
 55 | 
 56 | #[derive(Debug, Clone)]
 57 | enum FsOptionsInner {
 58 |     Local,
 59 |     S3 {
 60 |         bucket: String,
 61 |         credential: Option<AwsCredential>,
 62 |         region: Option<String>,
 63 |         sign_payload: Option<bool>,
 64 |         checksum: Option<bool>,
 65 |         endpoint: Option<String>,
 66 |     },
 67 | }
 68 | 
 69 | #[wasm_bindgen]
 70 | pub struct S3Builder {
 71 |     bucket: String,
 72 |     credential: Option<AwsCredential>,
 73 |     region: Option<String>,
 74 |     sign_payload: Option<bool>,
 75 |     checksum: Option<bool>,
 76 |     endpoint: Option<String>,
 77 | }
 78 | 
 79 | #[wasm_bindgen]
 80 | impl S3Builder {
 81 |     #[wasm_bindgen(constructor)]
 82 |     pub fn new(bucket: String) -> Self {
 83 |         Self {
 84 |             bucket,
 85 |             credential: None,
 86 |             region: None,
 87 |             sign_payload: None,
 88 |             checksum: None,
 89 |             endpoint: None,
 90 |         }
 91 |     }
 92 | 
 93 |     pub fn credential(self, credential: AwsCredential) -> Self {
 94 |         Self {
 95 |             credential: Some(credential),
 96 |             ..self
 97 |         }
 98 |     }
 99 | 
100 |     pub fn region(self, region: String) -> Self {
101 |         Self {
102 |             region: Some(region),
103 |             ..self
104 |         }
105 |     }
106 | 
107 |     pub fn sign_payload(self, sign_payload: bool) -> Self {
108 |         Self {
109 |             sign_payload: Some(sign_payload),
110 |             ..self
111 |         }
112 |     }
113 | 
114 |     pub fn checksum(self, checksum: bool) -> Self {
115 |         Self {
116 |             checksum: Some(checksum),
117 |             ..self
118 |         }
119 |     }
120 | 
121 |     pub fn endpoint(self, endpoint: String) -> Self {
122 |         Self {
123 |             endpoint: Some(endpoint),
124 |             ..self
125 |         }
126 |     }
127 | 
128 |     pub fn build(self) -> FsOptions {
129 |         let S3Builder {
130 |             bucket,
131 |             credential,
132 |             region,
133 |             sign_payload,
134 |             checksum,
135 |             endpoint,
136 |         } = self;
137 | 
138 |         FsOptions {
139 |             inner: FsOptionsInner::S3 {
140 |                 bucket,
141 |                 credential,
142 |                 region,
143 |                 sign_payload,
144 |                 checksum,
145 |                 endpoint,
146 |             },
147 |         }
148 |     }
149 | }
150 | 
151 | #[wasm_bindgen]
152 | impl FsOptions {
153 |     pub fn local() -> Self {
154 |         Self {
155 |             inner: FsOptionsInner::Local,
156 |         }
157 |     }
158 | }
159 | 
160 | impl FsOptions {
161 |     pub(crate) fn into_fs_options(self) -> tonbo::option::FsOptions {
162 |         match self.inner {
163 |             FsOptionsInner::Local => tonbo::option::FsOptions::Local,
164 |             FsOptionsInner::S3 {
165 |                 bucket,
166 |                 credential,
167 |                 region,
168 |                 sign_payload,
169 |                 checksum,
170 |                 endpoint,
171 |             } => tonbo::option::FsOptions::S3 {
172 |                 bucket,
173 |                 credential: credential.map(tonbo::option::AwsCredential::from),
174 |                 endpoint,
175 |                 region,
176 |                 sign_payload,
177 |                 checksum,
178 |             },
179 |         }
180 |     }
181 | }
182 | 


--------------------------------------------------------------------------------
/bindings/js/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod datatype;
 2 | pub mod db;
 3 | pub mod options;
 4 | pub mod transaction;
 5 | mod utils;
 6 | pub use db::*;
 7 | pub use transaction::*;
 8 | pub mod range;
 9 | pub use range::*;
10 | pub mod fs;
11 | pub use fs::*;
12 | 


--------------------------------------------------------------------------------
/bindings/js/src/options.rs:
--------------------------------------------------------------------------------
 1 | use tonbo::{option::Path, record::Schema};
 2 | use wasm_bindgen::{prelude::wasm_bindgen, JsValue};
 3 | 
 4 | use crate::FsOptions;
 5 | 
 6 | pub(crate) const MAX_LEVEL: usize = 7;
 7 | 
 8 | #[wasm_bindgen]
 9 | #[derive(Debug, Clone)]
10 | pub struct DbOption {
11 |     /// cached message size in parquet cleaner
12 |     clean_channel_buffer: usize,
13 |     /// len threshold of `immutables` when minor compaction is triggered
14 |     immutable_chunk_num: usize,
15 |     /// magnification that triggers major compaction between different levels
16 |     level_sst_magnification: usize,
17 |     major_default_oldest_table_num: usize,
18 |     /// threshold for the number of `parquet` when major compaction is triggered
19 |     major_threshold_with_sst_size: usize,
20 |     /// Maximum size of each parquet
21 |     max_sst_file_size: usize,
22 |     version_log_snapshot_threshold: u32,
23 |     use_wal: bool,
24 |     /// Maximum size of WAL buffer size
25 |     wal_buffer_size: usize,
26 |     /// build the `DB` storage directory based on the passed path
27 |     path: String,
28 |     base_fs: FsOptions,
29 |     level_paths: Vec<Option<(String, FsOptions)>>,
30 | }
31 | 
32 | #[wasm_bindgen]
33 | impl DbOption {
34 |     #[wasm_bindgen(constructor)]
35 |     pub fn new(path: String) -> Result<Self, JsValue> {
36 |         let path = Path::from_opfs_path(path)
37 |             .map_err(|err| JsValue::from(err.to_string()))?
38 |             .to_string();
39 |         Ok(Self {
40 |             clean_channel_buffer: 10,
41 |             immutable_chunk_num: 3,
42 |             level_sst_magnification: 10,
43 |             major_default_oldest_table_num: 3,
44 |             major_threshold_with_sst_size: 4,
45 |             max_sst_file_size: 256 * 1024 * 1024,
46 |             version_log_snapshot_threshold: 200,
47 |             use_wal: true,
48 |             wal_buffer_size: 4 * 1024,
49 |             path,
50 |             base_fs: FsOptions::local(),
51 |             level_paths: vec![None; MAX_LEVEL],
52 |         })
53 |     }
54 | 
55 |     pub fn level_path(
56 |         mut self,
57 |         level: usize,
58 |         path: String,
59 |         fs_options: FsOptions,
60 |     ) -> Result<Self, JsValue> {
61 |         self.level_paths[level] = Some((path.to_string(), fs_options));
62 |         Ok(self)
63 |     }
64 | }
65 | 
66 | impl DbOption {
67 |     pub(crate) fn into_option<S: Schema>(self, schema: &S) -> tonbo::DbOption {
68 |         let mut opt = tonbo::DbOption::new(Path::from(self.path), schema)
69 |             .clean_channel_buffer(self.clean_channel_buffer)
70 |             .immutable_chunk_num(self.immutable_chunk_num)
71 |             .level_sst_magnification(self.level_sst_magnification)
72 |             .major_default_oldest_table_num(self.major_default_oldest_table_num)
73 |             .major_threshold_with_sst_size(self.major_threshold_with_sst_size)
74 |             .max_sst_file_size(self.max_sst_file_size)
75 |             .version_log_snapshot_threshold(self.version_log_snapshot_threshold)
76 |             .wal_buffer_size(self.wal_buffer_size)
77 |             .base_fs(self.base_fs.into_fs_options());
78 | 
79 |         for (level, path) in self.level_paths.into_iter().enumerate() {
80 |             if let Some((path, fs_options)) = path {
81 |                 let path = fs_options.path(path).unwrap();
82 |                 opt = opt
83 |                     .level_path(level, path, fs_options.into_fs_options())
84 |                     .unwrap();
85 |             }
86 |         }
87 |         if !self.use_wal {
88 |             opt = opt.disable_wal()
89 |         }
90 |         opt
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/bindings/js/src/range.rs:
--------------------------------------------------------------------------------
 1 | use tonbo::record::{Value, ValueDesc};
 2 | use wasm_bindgen::{prelude::wasm_bindgen, JsValue};
 3 | 
 4 | use crate::utils::parse_key;
 5 | 
 6 | #[wasm_bindgen]
 7 | pub struct Bound {
 8 |     inner: BoundInner,
 9 | }
10 | 
11 | enum BoundInner {
12 |     Included(JsValue),
13 |     Exculuded(JsValue),
14 |     Unbounded,
15 | }
16 | 
17 | #[wasm_bindgen]
18 | impl Bound {
19 |     /// represent including bound of range, null or undefined are identical to [`Bound::unbounded`]
20 |     pub fn included(key: JsValue) -> Self {
21 |         if key.is_null() || key.is_undefined() {
22 |             return Self {
23 |                 inner: BoundInner::Unbounded,
24 |             };
25 |         }
26 |         Self {
27 |             inner: BoundInner::Included(key),
28 |         }
29 |     }
30 | 
31 |     /// represent exclusive bound of range, null or undefined are identical to [`Bound::unbounded`]
32 |     pub fn excluded(key: JsValue) -> Self {
33 |         if key.is_null() || key.is_undefined() {
34 |             return Self {
35 |                 inner: BoundInner::Unbounded,
36 |             };
37 |         }
38 |         Self {
39 |             inner: BoundInner::Exculuded(key),
40 |         }
41 |     }
42 | 
43 |     pub fn unbounded() -> Self {
44 |         Self {
45 |             inner: BoundInner::Unbounded,
46 |         }
47 |     }
48 | }
49 | 
50 | impl Bound {
51 |     pub(crate) fn into_bound(self, desc: &ValueDesc) -> Result<std::ops::Bound<Value>, JsValue> {
52 |         Ok(match self.inner {
53 |             BoundInner::Included(key) => std::ops::Bound::Included(parse_key(desc, key, true)?),
54 |             BoundInner::Exculuded(key) => std::ops::Bound::Excluded(parse_key(desc, key, true)?),
55 |             BoundInner::Unbounded => std::ops::Bound::Unbounded,
56 |         })
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/bindings/python/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | edition = "2021"
 3 | name = "tonbo-python"
 4 | version = "0.2.0"
 5 | license = "Apache-2.0"
 6 | repository = "https://github.com/tonbo-io/tonbo"
 7 | readme = "README.md"
 8 | 
 9 | [lib]
10 | crate-type = ["cdylib"]
11 | 
12 | [workspace]
13 | 
14 | [dependencies]
15 | futures = { version = "0.3" }
16 | pyo3 = { version = "0.25", features = [
17 |     "abi3",
18 |     "abi3-py310",
19 |     "extension-module",
20 | ] }
21 | pyo3-async-runtimes = { version = "0.25", features = [
22 |     "attributes",
23 |     "tokio-runtime",
24 | ] }
25 | tokio = { version = "1", features = ["rt-multi-thread"] }
26 | tonbo = { version = "0.3.2", path = "../../" }
27 | 


--------------------------------------------------------------------------------
/bindings/python/README.md:
--------------------------------------------------------------------------------
 1 | # Tonbo Python Binding
 2 | 
 3 | This package intends to build a native python binding for [Tonbo](https://github.com/tonbo-io/tonbo).
 4 | 
 5 | Tonbo's Python bindings can be used to build data-intensive applications, including other types of databases.
 6 | 
 7 | ## Installation
 8 | 
 9 | ```sh
10 | pip install tonbo
11 | ```
12 | 
13 | ## Example
14 | 
15 | ```py
16 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound
17 | from tonbo.fs import from_filesystem_path
18 | import asyncio
19 | import os
20 | 
21 | # define a Tonbo record
22 | @Record
23 | class User:
24 |     age = Column(DataType.Int8, name="age", primary_key=True)
25 |     height = Column(DataType.Int16, name="height", nullable=True)
26 |     weight = Column(DataType.Int8, name="weight", nullable=False)
27 | 
28 | async def main():
29 | 
30 |     if not os.path.exists("db_path/users"):
31 |         os.makedirs("db_path/users")
32 |     db = TonboDB(DbOption(from_filesystem_path("db_path/users")), User())
33 | 
34 |     await db.insert(User(age=18, height=175, weight=60))
35 |     record = await db.get(18)
36 |     assert record == {"age": 18, "height": 175, "weight": 60}
37 | 
38 |     txn = await db.transaction()
39 |     txn.insert(User(age=19, height=195, weight=75))
40 |     result = await txn.get(19)
41 |     assert result == {"age": 19, "height": 195, "weight": 75}
42 | 
43 |     # commit transaction
44 |     await txn.commit()
45 | 
46 |     txn = await db.transaction()
47 |     # range scan, supports pushing down and limit
48 |     scan = await txn.scan(
49 |         Bound.Excluded(18), None, limit=100, projection=["age", "weight"]
50 |     )
51 |     async for record in scan:
52 |         print(record)
53 | 
54 | asyncio.run(main())
55 | ```
56 | 
57 | See [examples](example/README.md) for more information.
58 | 
59 | ### Roadmap
60 | 
61 | - Remote storage API mapping and test
62 | - Integrate with other Arrow analytical tools
63 | 
64 | ## Development
65 | 
66 | This assumes that you have Rust and cargo installed. We use the [pyo3](https://github.com/PyO3/pyo3) to generate a native Python module and use [maturin](https://github.com/PyO3/maturin) to build Rust-based Python packages.
67 | 
68 | First, follow the commands below to build a new Python virtualenv, and install maturin into the virtualenv using Python's package manager, pip:
69 | 
70 | ```bash
71 | # setup virtualenv
72 | python -m venv .env
73 | # activate venv
74 | source .env/bin/activate
75 | 
76 | # install maturin
77 | pip install maturin
78 | # build bindings
79 | maturin develop
80 | ```
81 | 
82 | Whenever Rust code changes run:
83 | 
84 | ```bash
85 | maturin develop
86 | ```
87 | 
88 | Run tests:
89 | 
90 | ```bash
91 | maturin develop -E test
92 | python -m pytest
93 | ```
94 | 


--------------------------------------------------------------------------------
/bindings/python/example/README.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | 
 3 | Here are some examples that can help you use Python binding of [Tonbo](https://github.com/tonbo-io/tonbo)
 4 | 
 5 | ### Declaration
 6 | 
 7 | - The [record.py](record.py) file shows how to define a `Tonbo` record
 8 | - The [write_batch.py](write_batch.py) file shows how to insert records in batch
 9 | - The [transaction.py](transaction.py) file shows how to use transaction
10 | - The [declare.py](declare.py) file shows how to use `Tonbo` for simple data operations.
11 | - The [multiple_instance.py](multiple_instance.py) file shows how to combine different `Tonbo` record with `TonboDB`s.
12 | 
13 | ### Fusion storage
14 | 
15 | The [fusion_storage.py](fusion_storage.py) file shows how to store data across different storage.
16 | 


--------------------------------------------------------------------------------
/bindings/python/example/declare.py:
--------------------------------------------------------------------------------
 1 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound
 2 | import asyncio
 3 | import tempfile
 4 | 
 5 | @Record
 6 | class User:
 7 |     id = Column(DataType.Int64, name="id", primary_key=True)
 8 |     age = Column(DataType.Int16, name="age", nullable=True)
 9 |     name = Column(DataType.String, name="name", nullable=False)
10 |     email = Column(DataType.String, name="email", nullable=True)
11 |     data = Column(DataType.Bytes, name="data", nullable=True)
12 |     grade = Column(DataType.Float, name="grade", nullable=True)
13 | 
14 | 
15 | async def main():
16 |     temp_dir = tempfile.TemporaryDirectory()
17 | 
18 |     db = TonboDB(DbOption(temp_dir.name), User())
19 |     await db.insert(User(id=18, age=175, name="Alice", grade = 1.23))
20 | 
21 |     record = await db.get(18)
22 |     assert record == {
23 |         "id": 18,
24 |         "age": 175,
25 |         "name": "Alice",
26 |         "email": None,
27 |         "data": None,
28 |         "grade": 1.23,
29 |     }
30 | 
31 |     txn = await db.transaction()
32 |     result = await txn.get(18)
33 |     assert result == {
34 |         "id": 18,
35 |         "age": 175,
36 |         "name": "Alice",
37 |         "email": None,
38 |         "data": None,
39 |         "grade": 1.23,
40 |     }
41 | 
42 |     txn.insert(
43 |         User(
44 |             id=19,
45 |             age=195,
46 |             name="Bob",
47 |             data=b"Hello Tonbo!",
48 |             email="contact@tonbo.io",
49 |             grade = 2.23,
50 |         )
51 |     )
52 |     result = await txn.get(19)
53 |     assert result == {
54 |         "id": 19,
55 |         "age": 195,
56 |         "name": "Bob",
57 |         "email": "contact@tonbo.io",
58 |         "data": b"Hello Tonbo!",
59 |         "grade": 2.23,
60 |     }
61 | 
62 |     await txn.commit()
63 |     txn = await db.transaction()
64 |     scan = await txn.scan(
65 |         Bound.Excluded(18),
66 |         None,
67 |         limit=100,
68 |         projection=["id", "email", "data", "grade"],
69 |     )
70 |     async for record in scan:
71 |         assert record["age"] is None
72 |         print(record)
73 |     await txn.commit()
74 | 
75 | 
76 | asyncio.run(main())
77 | 


--------------------------------------------------------------------------------
/bindings/python/example/fusion_storage.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from tonbo import DbOption, Column, DataType, Record, TonboDB
 5 | from tonbo.fs import from_filesystem_path, FsOptions
 6 | 
 7 | 
 8 | @Record
 9 | class User:
10 |     id = Column(DataType.Int64, name="id", primary_key=True)
11 |     name = Column(DataType.String, name="name")
12 |     email = Column(DataType.String, name="email", nullable=True)
13 |     age = Column(DataType.UInt8, name="age")
14 |     data = Column(DataType.Bytes, name="data")
15 | 
16 | 
17 | async def main():
18 |     if not os.path.exists("db_path/user/l0"):
19 |         os.makedirs("db_path/user/l0")
20 |     if not os.path.exists("db_path/user/l1"):
21 |         os.makedirs("db_path/user/l1")
22 | 
23 |     option = DbOption(from_filesystem_path("db_path/user"))
24 |     option.level_path(0, from_filesystem_path("db_path/user/l0"), FsOptions.Local())
25 |     option.level_path(1, from_filesystem_path("db_path/user/l1"), FsOptions.Local())
26 | 
27 |     option.immutable_chunk_num = 1
28 |     option.major_threshold_with_sst_size = 3
29 |     option.level_sst_magnification = 1
30 |     option.max_sst_file_size = 1 * 1024
31 | 
32 |     db = TonboDB(option, User())
33 |     for i in range(0, 1000):
34 |         if i % 50 == 0:
35 |             await db.flush()
36 |         await db.insert(
37 |             User(
38 |                 id=i,
39 |                 age=i % 128,
40 |                 name=str(i * 10),
41 |                 email=str(i * 20),
42 |                 data=b"Hello Tonbo!",
43 |             )
44 |         )
45 | 
46 |     for i in range(0, 1000):
47 |         user = await db.get(i)
48 |         assert user == {
49 |             "id": i,
50 |             "name": str(i * 10),
51 |             "email": str(i * 20),
52 |             "age": i % 128,
53 |             "data": b"Hello Tonbo!",
54 |         }
55 | 
56 | 
57 | asyncio.run(main())
58 | 


--------------------------------------------------------------------------------
/bindings/python/example/multiple_instance.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import tempfile
 3 | 
 4 | from tonbo import Column, DataType, TonboDB, RecordBatch, DbOption, Record
 5 | from tonbo.fs import from_filesystem_path
 6 | 
 7 | 
 8 | @Record
 9 | class User:
10 |     id = Column(DataType.UInt64, name="id", primary_key=True)
11 |     age = Column(datatype=DataType.UInt8, name="age", nullable=True)
12 |     name = Column(DataType.String, name="name")
13 |     email = Column(DataType.String, name="email")
14 | 
15 | 
16 | @Record
17 | class Pet:
18 |     id = Column(DataType.UInt64, name="id", primary_key=True)
19 |     type = Column(DataType.String, name="type")
20 |     name = Column(DataType.String, name="name")
21 |     master = Column(datatype=DataType.UInt64, name="master", nullable=True)
22 | 
23 | 
24 | @Record
25 | class FoundRecord:
26 |     user_id = Column(DataType.UInt64, name="user_id", primary_key=True)
27 |     pet_id = Column(DataType.UInt64, name="pet_id")
28 |     email = Column(DataType.String, name="email")
29 |     status = Column(DataType.Boolean, name="status")
30 | 
31 | 
32 | async def main():
33 |     user_temp_dir = tempfile.TemporaryDirectory()
34 |     pet_temp_dir = tempfile.TemporaryDirectory()
35 |     found_temp_dir = tempfile.TemporaryDirectory()
36 |     user_db = TonboDB(DbOption(from_filesystem_path(user_temp_dir.name)), User())
37 |     course_db = TonboDB(DbOption(from_filesystem_path(pet_temp_dir.name)), Pet())
38 |     found_db = TonboDB(DbOption(from_filesystem_path(found_temp_dir.name)), FoundRecord())
39 | 
40 |     user_txn = await user_db.transaction()
41 |     for i in range(0, 100):
42 |         user_txn.insert(User(id=i, age=i, name=str(i * 20), email=str(i * 100)))
43 |     user_txn.commit()
44 | 
45 |     pet_txn = await course_db.transaction()
46 |     pet_txn.insert(Pet(id=1, type="dog", name="Hachi", master=10))
47 |     pet_txn.insert(Pet(id=2, type="cat", name="Kitty", master=21))
48 |     pet_txn.insert(Pet(id=3, type="dragonfly", name="Tonbo", master=11))
49 | 
50 |     await pet_txn.commit()
51 | 
52 |     user_txn = await user_db.transaction()
53 |     found_txn = await found_db.transaction()
54 |     user_scan = await user_txn.scan(lower=None, high=None, projection=["id", "name", "email"])
55 | 
56 |     async for user in user_scan:
57 |         pet_txn = await course_db.transaction()
58 |         pet_scan = await pet_txn.scan(lower=None, high=None, projection=["master", "name"])
59 |         async for pet in pet_scan:
60 |             if user["id"] == pet["master"]:
61 |                 found_txn.insert(FoundRecord(user_id=user["id"], pet_id=pet["id"], email=user["email"], status=False))
62 | 
63 |     await found_txn.commit()
64 | 
65 |     found_txn = await found_db.transaction()
66 |     found_scan = await found_txn.scan(lower=None, high=None)
67 |     async for found in found_scan:
68 |         print(found)
69 | 
70 | 
71 | asyncio.run(main())
72 | 


--------------------------------------------------------------------------------
/bindings/python/example/record.py:
--------------------------------------------------------------------------------
 1 | from tonbo import Record, Column, DataType
 2 | 
 3 | 
 4 | @Record
 5 | class User:
 6 |     id = Column(DataType.Int64, name="id", primary_key=True)
 7 |     age = Column(datatype=DataType.UInt8, name="age", nullable=True)
 8 |     # nullable is `False` by default
 9 |     name = Column(DataType.String, name="name")
10 |     height = Column(DataType.Int16, name="height")
11 |     enabled = Column(DataType.Boolean, name="enabled")
12 |     email = Column(DataType.String, name="email", nullable=True)
13 |     data = Column(DataType.Bytes, name="data", nullable=True)
14 | 
15 | 


--------------------------------------------------------------------------------
/bindings/python/example/transaction.py:
--------------------------------------------------------------------------------
 1 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound
 2 | import asyncio
 3 | import tempfile
 4 | 
 5 | 
 6 | @Record
 7 | class User:
 8 |     id = Column(DataType.Int64, name="id", primary_key=True)
 9 |     height = Column(DataType.Int16, name="height", nullable=True)
10 |     name = Column(DataType.String, name="name", nullable=False)
11 |     email = Column(DataType.String, name="email", nullable=True)
12 |     data = Column(DataType.Bytes, name="data", nullable=True)
13 | 
14 | 
15 | async def main():
16 |     temp_dir = tempfile.TemporaryDirectory()
17 | 
18 |     db = TonboDB(DbOption(temp_dir.name), User())
19 |     # create a new transaction
20 |     txn = await db.transaction()
21 | 
22 |     # insert with class
23 |     txn.insert(
24 |         User(
25 |             id=19,
26 |             height=195,
27 |             name="Bob",
28 |             data=b"Hello Tonbo!",
29 |             email="contact@tonbo.io",
30 |         )
31 |     )
32 |     result = await txn.get(19)
33 |     assert result == {
34 |         "id": 19,
35 |         "height": 195,
36 |         "name": "Bob",
37 |         "email": "contact@tonbo.io",
38 |         "data": b"Hello Tonbo!",
39 |     }
40 | 
41 |     # commit a transaction
42 |     await txn.commit()
43 | 
44 |     txn = await db.transaction()
45 |     # support push down limit, filter and projection
46 |     scan = await txn.scan(
47 |         Bound.Excluded(18),
48 |         None,
49 |         limit=100,
50 |         projection=["id", "email", "data"],
51 |     )
52 |     async for record in scan:
53 |         assert record["height"] is None
54 |         print(record)
55 |     await txn.commit()
56 | 
57 | 
58 | asyncio.run(main())
59 | 


--------------------------------------------------------------------------------
/bindings/python/example/write_batch.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import tempfile
 3 | 
 4 | from tonbo import Column, DataType, TonboDB, RecordBatch, DbOption, Record
 5 | from tonbo.fs import from_filesystem_path
 6 | 
 7 | 
 8 | @Record
 9 | class User:
10 |     id = Column(DataType.Int64, name="id", primary_key=True)
11 |     age = Column(datatype=DataType.UInt8, name="age", nullable=True)
12 |     name = Column(DataType.String, name="name")
13 |     email = Column(DataType.String, name="email", nullable=True)
14 |     data = Column(DataType.Bytes, name="data", nullable=True)
15 | 
16 | 
17 | async def main():
18 |     temp_dir = tempfile.TemporaryDirectory()
19 |     db = TonboDB(DbOption(from_filesystem_path(temp_dir.name)), User())
20 |     batch = RecordBatch()
21 |     for i in range(0, 100):
22 |         # you must add record to `RecordBatch` one by one
23 |         batch.append(User(id= i, age=i, name=str(i * 20)))
24 | 
25 |     await db.insert_batch(batch)
26 | 
27 |     for i in range(0, 100):
28 |         user = await db.get(i)
29 |         assert user == { "id": i, "age": i, "name": str(i * 20), "email": None, "data": None }
30 | 
31 | 
32 | asyncio.run(main())
33 | 


--------------------------------------------------------------------------------
/bindings/python/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["maturin>=1.7,<2.0"]
 3 | build-backend = "maturin"
 4 | 
 5 | [project]
 6 | name = "tonbo"
 7 | readme = "README.md"
 8 | requires-python = ">=3.10"
 9 | classifiers = [
10 |     "Programming Language :: Rust",
11 |     "Programming Language :: Python :: Implementation :: CPython",
12 |     "Programming Language :: Python :: Implementation :: PyPy",
13 | ]
14 | description = "Tonbo Python Binding"
15 | license = { text = "Apache-2.0" }
16 | dynamic = ["version"]
17 | 
18 | [project.optional-dependencies]
19 | test = ["pytest", "pytest-asyncio"]
20 | bench = ["pytest", "pytest-asyncio", "pytest-benchmark", "duckdb"]
21 | docs = ["pdoc"]
22 | 
23 | [tool.maturin]
24 | features = ["pyo3/extension-module"]
25 | 
26 | module-name = "tonbo._tonbo"
27 | python-source = "python"
28 | strip = true
29 | 


--------------------------------------------------------------------------------
/bindings/python/python/tonbo/__init__.py:
--------------------------------------------------------------------------------
1 | from ._tonbo import *
2 | 
3 | __doc__ = _tonbo.__doc__
4 | __all__ = _tonbo.__all__
5 | 


--------------------------------------------------------------------------------
/bindings/python/python/tonbo/__init__.pyi:
--------------------------------------------------------------------------------
  1 | from typing import Any, AsyncIterable, final
  2 | from enum import Enum, auto
  3 | from tonbo import error as error
  4 | from tonbo.fs import FsOptions
  5 | 
  6 | @final
  7 | class Record:
  8 |     def __call__(self) -> None: ...
  9 | 
 10 | @final
 11 | class Bound(Enum):
 12 |     """Tonbo range for scan. None for unbounded"""
 13 | 
 14 |     @staticmethod
 15 |     def Included(key: Any) -> Bound: ...
 16 |     @staticmethod
 17 |     def Excluded(key: Any) -> Bound: ...
 18 | 
 19 | @final
 20 | class DataType(Enum):
 21 |     """Tonbo data type."""
 22 | 
 23 |     UInt8 = auto()
 24 |     UInt16 = auto()
 25 |     UInt32 = auto()
 26 |     UInt64 = auto()
 27 |     Int8 = auto()
 28 |     Int16 = auto()
 29 |     Int32 = auto()
 30 |     Int64 = auto()
 31 |     String = auto()
 32 |     Boolean = auto()
 33 |     Bytes = auto()
 34 |     Float = auto()
 35 | 
 36 | @final
 37 | class Column:
 38 |     """Column represents properties of a field."""
 39 | 
 40 |     name: str
 41 |     datatype: DataType
 42 |     nullable: bool
 43 |     primary_key: bool
 44 |     def __init__(
 45 |         self,
 46 |         datatype: DataType,
 47 |         name: str,
 48 |         nullable: bool = False,
 49 |         primary_key: bool = False,
 50 |     ) -> None: ...
 51 | 
 52 | @final
 53 | class RecordBatch:
 54 |     """Column represents properties of a field."""
 55 |     def __init__(self) -> None: ...
 56 |     def append(self, record: object) -> None: ...
 57 | 
 58 | class DbOption:
 59 |     """Tonbo configurations."""
 60 | 
 61 |     clean_channel_buffer: int
 62 |     immutable_chunk_num: int
 63 |     level_sst_magnification: int
 64 |     major_default_oldest_table_num: int
 65 |     major_threshold_with_sst_size: int
 66 |     max_sst_file_size: int
 67 |     version_log_snapshot_threshold: int
 68 |     use_wal: bool
 69 |     wal_buffer_size: int
 70 |     path: str
 71 | 
 72 |     def __init__(self, path: str) -> None:
 73 |         """Create a new :py:class:`DbOption` with the given path. Note: the path must exist"""
 74 |         ...
 75 | 
 76 |     def level_path(self, level: int, path: str, fs_options: FsOptions) -> None:
 77 |         """Set path for assigned level
 78 | 
 79 |         Args:
 80 |             level: Level for output.
 81 |             path: Path for output
 82 |             fs_options: Local or S3
 83 |         """
 84 |         ...
 85 | 
 86 | @final
 87 | class Transaction:
 88 |     """Tonbo transaction."""
 89 | 
 90 |     async def get(
 91 |         self, key: Any, projection: list[str] = ["*"]
 92 |     ) -> dict[str, Any] | None:
 93 |         """Get record from db.
 94 | 
 95 |         Args:
 96 |             key: Primary key of record.
 97 |             projection: fields to projection
 98 |         """
 99 |         ...
100 |     def insert(self, record: object) -> None:
101 |         """Insert record to db."""
102 |         ...
103 |     def remove(self, key: Any) -> None:
104 |         """Remove record from db.
105 |         Args:
106 |             key: Primary key of record.
107 |         """
108 |         ...
109 |     async def scan(
110 |         self,
111 |         lower: Bound | None,
112 |         high: Bound | None,
113 |         limit: int | None,
114 |         projection: list[str] = ["*"],
115 |     ) -> AsyncIterable[dict[str, Any]]:
116 |         """Create an async stream for scanning.
117 | 
118 |         Args:
119 |             lower: Lower bound of range. Use None represent unbounded.
120 |             high: High bound of range. Use None represent unbounded.
121 |             limit: max number records to scan
122 |             projection: fields to projection
123 |         """
124 |         ...
125 |     async def commit(self) -> None:
126 |         """Commit :py:class:`Transaction`."""
127 |         ...
128 | 
129 | @final
130 | class TonboDB:
131 |     def __init__(self, option: DbOption, schema: Any) -> None:
132 |         """Create a new :py:class:`TonboDB` with the given configuration options.
133 | 
134 |         Args:
135 |             option: Configuration options.
136 |             schema: Schema of record.
137 |         """
138 |         ...
139 |     async def get(self, key: Any) -> dict[str, Any] | None:
140 |         """Get record from db.
141 | 
142 |         Args:
143 |             key: Primary key of record.
144 |         """
145 |         ...
146 |     async def insert(self, record: object) -> None:
147 |         """Insert a record to db."""
148 |         ...
149 |     async def insert_batch(self, record_batch: RecordBatch) -> None:
150 |         """Insert :py:class:`RecordBatch` to db."""
151 |         ...
152 |     async def remove(self, key: Any) -> None:
153 |         """Remove record from db.
154 | 
155 |         Args:
156 |             key: Primary key of record.
157 |         """
158 |         ...
159 |     async def transaction(self) -> Transaction:
160 |         """Create a new :py:class:`Transaction`."""
161 |         ...
162 |     async def flush(self) -> None:
163 |         """Try to execute compaction."""
164 |         ...
165 |     async def flush_wal(self) -> None:
166 |         """Flush wal manually."""
167 |         ...
168 | 


--------------------------------------------------------------------------------
/bindings/python/python/tonbo/error.pyi:
--------------------------------------------------------------------------------
 1 | class CommitError(Exception):
 2 |     """Commit error"""
 3 |     pass
 4 | 
 5 | class DbError(Exception):
 6 |     """DB error"""
 7 |     pass
 8 | 
 9 | class DecodeError(Exception):
10 |     """Decode error"""
11 |     pass
12 | 
13 | class RecoverError(Exception):
14 |     """Recover error"""
15 |     pass
16 | 
17 | class ExceedsMaxLevelError(Exception):
18 |     """Exceeds max level"""
19 |     pass
20 | 
21 | class WriteConflictError(Exception):
22 |     """Write conflict"""
23 |     pass
24 | 
25 | class RepeatedCommitError(Exception):
26 |     """Repeated commit the same transaction"""
27 |     pass
28 | 
29 | class PathParseError(Exception):
30 |     """Parse path error"""
31 |     pass


--------------------------------------------------------------------------------
/bindings/python/python/tonbo/fs.pyi:
--------------------------------------------------------------------------------
 1 | from typing import final
 2 | 
 3 | @final
 4 | class AwsCredential:
 5 |     key_id: str
 6 |     secret_key: str
 7 |     token: str | None
 8 | 
 9 |     def __init__(
10 |         self, key_id: str, secret_key: str, token: str | None = None
11 |     ) -> None: ...
12 | 
13 | @final
14 | class FsOptions:
15 |     class Local: ...
16 | 
17 |     class S3:
18 |         bucket: str
19 |         credential: AwsCredential | None
20 |         region: str | None
21 |         sign_payload: bool | None
22 |         checksum: bool | None
23 | 
24 |         def __init__(
25 |             self,
26 |             bucket: str,
27 |             credential: AwsCredential | None = None,
28 |             region: str | None = None,
29 |             sign_payload: bool | None = None,
30 |             checksum: bool | None = None,
31 |             endpoint: str | None = None,
32 |         ) -> None: ...
33 | 
34 | def parse(path: str) -> str: ...
35 | 
36 | def from_filesystem_path(path: str) -> str:
37 |     """Parse path to a filesystem relative path"""
38 |     ...
39 | def from_absolute_path(path: str) -> str:
40 |     """Parse path to a filesystem absolute path"""
41 |     ...
42 | def from_url_path(path: str) -> str:
43 |     """Parse an url path"""
44 |     ...
45 | 


--------------------------------------------------------------------------------
/bindings/python/src/column.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     any::Any,
 3 |     fmt::{Display, Formatter},
 4 |     sync::Arc,
 5 | };
 6 | 
 7 | use pyo3::{pyclass, pymethods};
 8 | use tonbo::record::{DataType as TonboDataType, Value, ValueDesc};
 9 | 
10 | use crate::datatype::DataType;
11 | 
12 | #[pyclass]
13 | #[derive(Clone)]
14 | pub struct Column {
15 |     pub name: String,
16 |     pub datatype: DataType,
17 |     pub nullable: bool,
18 |     pub primary_key: bool,
19 |     pub(crate) value: Arc<dyn Any + Send + Sync>,
20 | }
21 | 
22 | unsafe impl Send for Column {}
23 | unsafe impl Sync for Column {}
24 | 
25 | impl Column {}
26 | 
27 | #[pymethods]
28 | impl Column {
29 |     #[new]
30 |     #[pyo3(signature= (datatype, name, nullable=false, primary_key=false))]
31 |     pub fn new(datatype: DataType, name: String, nullable: bool, primary_key: bool) -> Self {
32 |         if primary_key && nullable {
33 |             panic!("Primary key should not be nullable!")
34 |         }
35 |         let value = datatype.none_value();
36 |         Self {
37 |             name,
38 |             datatype,
39 |             nullable,
40 |             primary_key,
41 |             value,
42 |         }
43 |     }
44 | 
45 |     fn __str__(&self) -> String {
46 |         format!("{}", &self)
47 |     }
48 | }
49 | 
50 | impl Display for Column {
51 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
52 |         f.debug_struct("Column")
53 |             .field("name", &self.name)
54 |             .field("type", &self.datatype)
55 |             .field("nullable", &self.nullable)
56 |             .field("primary_key", &self.primary_key)
57 |             .finish()
58 |     }
59 | }
60 | 
61 | impl From<Column> for ValueDesc {
62 |     fn from(col: Column) -> Self {
63 |         let datatype = TonboDataType::from(col.datatype);
64 |         ValueDesc::new(col.name, datatype, col.nullable)
65 |     }
66 | }
67 | impl From<Column> for Value {
68 |     fn from(col: Column) -> Self {
69 |         let datatype = TonboDataType::from(col.datatype);
70 |         Value::new(datatype, col.name, col.value, col.nullable)
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/bindings/python/src/datatype.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     any::Any,
  3 |     fmt::{Debug, Formatter},
  4 |     sync::Arc,
  5 | };
  6 | 
  7 | use pyo3::pyclass;
  8 | use tonbo::record::{DataType as TonboDataType, F64};
  9 | 
 10 | #[pyclass]
 11 | #[derive(PartialEq, Clone)]
 12 | pub enum DataType {
 13 |     UInt8,
 14 |     UInt16,
 15 |     UInt32,
 16 |     UInt64,
 17 |     Int8,
 18 |     Int16,
 19 |     Int32,
 20 |     Int64,
 21 |     String,
 22 |     Boolean,
 23 |     Bytes,
 24 |     Float,
 25 | }
 26 | 
 27 | impl Debug for DataType {
 28 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
 29 |         match self {
 30 |             DataType::UInt8 => f.write_str("u8"),
 31 |             DataType::UInt16 => f.write_str("u16"),
 32 |             DataType::UInt32 => f.write_str("u32"),
 33 |             DataType::UInt64 => f.write_str("u64"),
 34 |             DataType::Int8 => f.write_str("i8"),
 35 |             DataType::Int16 => f.write_str("i16"),
 36 |             DataType::Int32 => f.write_str("i32"),
 37 |             DataType::Int64 => f.write_str("i64"),
 38 |             DataType::String => f.write_str("str"),
 39 |             DataType::Boolean => f.write_str("bool"),
 40 |             DataType::Bytes => f.write_str("bytes"),
 41 |             DataType::Float => f.write_str("float"),
 42 |         }
 43 |     }
 44 | }
 45 | 
 46 | impl DataType {
 47 |     pub(crate) fn none_value(&self) -> Arc<dyn Any + Send + Sync> {
 48 |         match self {
 49 |             DataType::UInt8 => Arc::new(Option::<u8>::None),
 50 |             DataType::UInt16 => Arc::new(Option::<u16>::None),
 51 |             DataType::UInt32 => Arc::new(Option::<u32>::None),
 52 |             DataType::UInt64 => Arc::new(Option::<u64>::None),
 53 |             DataType::Int8 => Arc::new(Option::<i8>::None),
 54 |             DataType::Int16 => Arc::new(Option::<i16>::None),
 55 |             DataType::Int32 => Arc::new(Option::<i32>::None),
 56 |             DataType::Int64 => Arc::new(Option::<i64>::None),
 57 |             DataType::String => Arc::new(Option::<String>::None),
 58 |             DataType::Boolean => Arc::new(Option::<bool>::None),
 59 |             DataType::Bytes => Arc::new(Option::<Vec<u8>>::None),
 60 |             DataType::Float => Arc::new(Option::<F64>::None),
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | impl From<DataType> for TonboDataType {
 66 |     fn from(datatype: DataType) -> Self {
 67 |         match datatype {
 68 |             DataType::UInt8 => TonboDataType::UInt8,
 69 |             DataType::UInt16 => TonboDataType::UInt16,
 70 |             DataType::UInt32 => TonboDataType::UInt32,
 71 |             DataType::UInt64 => TonboDataType::UInt64,
 72 |             DataType::Int8 => TonboDataType::Int8,
 73 |             DataType::Int16 => TonboDataType::Int16,
 74 |             DataType::Int32 => TonboDataType::Int32,
 75 |             DataType::Int64 => TonboDataType::Int64,
 76 |             DataType::String => TonboDataType::String,
 77 |             DataType::Boolean => TonboDataType::Boolean,
 78 |             DataType::Bytes => TonboDataType::Bytes,
 79 |             DataType::Float => TonboDataType::Float64,
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | impl From<&DataType> for TonboDataType {
 85 |     fn from(datatype: &DataType) -> Self {
 86 |         match datatype {
 87 |             DataType::UInt8 => TonboDataType::UInt8,
 88 |             DataType::UInt16 => TonboDataType::UInt16,
 89 |             DataType::UInt32 => TonboDataType::UInt32,
 90 |             DataType::UInt64 => TonboDataType::UInt64,
 91 |             DataType::Int8 => TonboDataType::Int8,
 92 |             DataType::Int16 => TonboDataType::Int16,
 93 |             DataType::Int32 => TonboDataType::Int32,
 94 |             DataType::Int64 => TonboDataType::Int64,
 95 |             DataType::String => TonboDataType::String,
 96 |             DataType::Boolean => TonboDataType::Boolean,
 97 |             DataType::Bytes => TonboDataType::Bytes,
 98 |             DataType::Float => TonboDataType::Float64,
 99 |         }
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/bindings/python/src/error.rs:
--------------------------------------------------------------------------------
 1 | use pyo3::{
 2 |     create_exception,
 3 |     exceptions::{PyException, PyIOError, PyValueError},
 4 |     pyclass, PyErr,
 5 | };
 6 | use tonbo::record::DynRecord;
 7 | create_exception!(tonbo, DecodeError, PyException, "Decode exception");
 8 | create_exception!(tonbo, RecoverError, PyException, "Recover exception");
 9 | create_exception!(
10 |     tonbo,
11 |     ExceedsMaxLevelError,
12 |     PyException,
13 |     "Exceeds max level exception"
14 | );
15 | 
16 | create_exception!(
17 |     tonbo,
18 |     WriteConflictError,
19 |     PyException,
20 |     "Write conflict exception"
21 | );
22 | create_exception!(tonbo, InnerError, PyException, "Inner exception");
23 | 
24 | create_exception!(
25 |     tonbo,
26 |     RepeatedCommitError,
27 |     PyException,
28 |     "Repeated commit exception"
29 | );
30 | 
31 | create_exception!(tonbo, PathParseError, PyException, "Path parse exception");
32 | 
33 | pub(crate) fn repeated_commit_err() -> PyErr {
34 |     RepeatedCommitError::new_err("Transaction has been committed!")
35 | }
36 | 
37 | #[pyclass]
38 | pub(crate) struct DbError(tonbo::DbError<DynRecord>);
39 | 
40 | #[pyclass]
41 | pub(crate) struct CommitError(tonbo::transaction::CommitError<DynRecord>);
42 | 
43 | impl From<DbError> for PyErr {
44 |     fn from(err: DbError) -> Self {
45 |         match err.0 {
46 |             tonbo::DbError::Io(err) => PyIOError::new_err(err.to_string()),
47 |             tonbo::DbError::Version(err) => PyValueError::new_err(err.to_string()),
48 |             tonbo::DbError::Parquet(err) => InnerError::new_err(err.to_string()),
49 |             tonbo::DbError::UlidDecode(err) => DecodeError::new_err(err.to_string()),
50 |             tonbo::DbError::Fusio(err) => InnerError::new_err(err.to_string()),
51 |             tonbo::DbError::Recover(err) => RecoverError::new_err(err.to_string()),
52 |             tonbo::DbError::WalWrite(err) => PyIOError::new_err(err.to_string()),
53 |             tonbo::DbError::ExceedsMaxLevel => ExceedsMaxLevelError::new_err("Exceeds max level"),
54 |             tonbo::DbError::Logger(err) => PyIOError::new_err(err.to_string()),
55 |         }
56 |     }
57 | }
58 | 
59 | impl From<CommitError> for PyErr {
60 |     fn from(err: CommitError) -> Self {
61 |         match err.0 {
62 |             tonbo::transaction::CommitError::Io(err) => PyIOError::new_err(err.to_string()),
63 |             tonbo::transaction::CommitError::Parquet(err) => InnerError::new_err(err.to_string()),
64 |             tonbo::transaction::CommitError::Database(err) => DbError::from(err).into(),
65 |             tonbo::transaction::CommitError::WriteConflict(key) => {
66 |                 WriteConflictError::new_err(key.name())
67 |             }
68 |             tonbo::transaction::CommitError::SendCompactTaskError(err) => {
69 |                 InnerError::new_err(err.to_string())
70 |             }
71 |             tonbo::transaction::CommitError::ChannelClose => InnerError::new_err("channel close"),
72 |         }
73 |     }
74 | }
75 | 
76 | impl From<tonbo::DbError<DynRecord>> for DbError {
77 |     fn from(err: tonbo::DbError<DynRecord>) -> Self {
78 |         DbError(err)
79 |     }
80 | }
81 | 
82 | impl From<tonbo::transaction::CommitError<DynRecord>> for CommitError {
83 |     fn from(err: tonbo::transaction::CommitError<DynRecord>) -> Self {
84 |         CommitError(err)
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/bindings/python/src/fs.rs:
--------------------------------------------------------------------------------
 1 | use pyo3::{pyclass, pyfunction, pymethods, types::PyString, Bound, PyResult, Python};
 2 | use tonbo::option::Path;
 3 | 
 4 | use crate::PathParseError;
 5 | 
 6 | #[pyclass(get_all, set_all)]
 7 | #[derive(Debug, Clone)]
 8 | pub struct AwsCredential {
 9 |     pub key_id: String,
10 |     pub secret_key: String,
11 |     pub token: Option<String>,
12 | }
13 | 
14 | impl From<AwsCredential> for tonbo::option::AwsCredential {
15 |     fn from(cred: AwsCredential) -> Self {
16 |         tonbo::option::AwsCredential {
17 |             key_id: cred.key_id,
18 |             secret_key: cred.secret_key,
19 |             token: cred.token,
20 |         }
21 |     }
22 | }
23 | 
24 | #[pymethods]
25 | impl AwsCredential {
26 |     #[new]
27 |     fn new(key_id: String, secret_key: String, token: Option<String>) -> Self {
28 |         Self {
29 |             key_id,
30 |             secret_key,
31 |             token,
32 |         }
33 |     }
34 | }
35 | 
36 | #[pyclass]
37 | #[derive(Debug, Clone)]
38 | pub enum FsOptions {
39 |     Local {},
40 |     S3 {
41 |         bucket: String,
42 |         credential: Option<AwsCredential>,
43 |         region: Option<String>,
44 |         sign_payload: Option<bool>,
45 |         checksum: Option<bool>,
46 |         endpoint: Option<String>,
47 |     },
48 | }
49 | 
50 | impl From<FsOptions> for tonbo::option::FsOptions {
51 |     fn from(opt: FsOptions) -> Self {
52 |         match opt {
53 |             FsOptions::Local {} => tonbo::option::FsOptions::Local,
54 |             FsOptions::S3 {
55 |                 bucket,
56 |                 credential,
57 |                 region,
58 |                 sign_payload,
59 |                 checksum,
60 |                 endpoint,
61 |             } => tonbo::option::FsOptions::S3 {
62 |                 bucket,
63 |                 credential: credential.map(tonbo::option::AwsCredential::from),
64 |                 region,
65 |                 sign_payload,
66 |                 checksum,
67 |                 endpoint,
68 |             },
69 |         }
70 |     }
71 | }
72 | 
73 | #[pyfunction]
74 | pub fn parse(path: String, py: Python) -> PyResult<Bound<PyString>> {
75 |     let path = Path::parse(path).map_err(|e| PathParseError::new_err(e.to_string()))?;
76 |     Ok(PyString::new(py, path.as_ref()))
77 | }
78 | 
79 | #[pyfunction]
80 | pub fn from_filesystem_path(path: String, py: Python) -> PyResult<Bound<PyString>> {
81 |     let path =
82 |         Path::from_filesystem_path(path).map_err(|e| PathParseError::new_err(e.to_string()))?;
83 |     Ok(PyString::new(py, path.as_ref()))
84 | }
85 | 
86 | #[pyfunction]
87 | pub fn from_absolute_path(path: String, py: Python) -> PyResult<Bound<PyString>> {
88 |     let path =
89 |         Path::from_absolute_path(path).map_err(|e| PathParseError::new_err(e.to_string()))?;
90 |     Ok(PyString::new(py, path.as_ref()))
91 | }
92 | 
93 | #[pyfunction]
94 | pub fn from_url_path(path: String, py: Python) -> PyResult<Bound<PyString>> {
95 |     let path = Path::from_url_path(path).map_err(|e| PathParseError::new_err(e.to_string()))?;
96 |     Ok(PyString::new(py, path.as_ref()))
97 | }
98 | 


--------------------------------------------------------------------------------
/bindings/python/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use error::*;
  2 | use pyo3::prelude::*;
  3 | use record_batch::RecordBatch;
  4 | 
  5 | use crate::record::Record;
  6 | 
  7 | mod column;
  8 | mod datatype;
  9 | mod db;
 10 | mod error;
 11 | mod fs;
 12 | mod options;
 13 | mod range;
 14 | mod record;
 15 | mod record_batch;
 16 | mod stream;
 17 | mod transaction;
 18 | mod utils;
 19 | 
 20 | pub use column::*;
 21 | pub use datatype::*;
 22 | pub use db::*;
 23 | pub use fs::*;
 24 | pub use options::*;
 25 | pub use stream::*;
 26 | pub use transaction::*;
 27 | 
 28 | use crate::error::{DecodeError, WriteConflictError};
 29 | 
 30 | /// Tonbo Python binding
 31 | ///
 32 | /// ## Usage
 33 | /// ```python
 34 | /// from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound
 35 | /// from tonbo.fs import from_filesystem_path
 36 | /// import asyncio
 37 | ///
 38 | /// @Record
 39 | /// class User:
 40 | ///    id = Column(DataType.Int64, name="id", primary_key=True)
 41 | ///    age = Column(DataType.Int16, name="age", nullable=True)
 42 | ///    name = Column(DataType.String, name="name", nullable=False)
 43 | ///
 44 | /// async def main():
 45 | ///     db = TonboDB(DbOption(from_filesystem_path("db_path/user")), User())
 46 | ///     await db.insert(User(id=18, age=175, name="Alice"))
 47 | ///     record = await db.get(18)
 48 | ///     print(record)
 49 | ///
 50 | ///     # use transcaction
 51 | ///     txn = await db.transaction()
 52 | ///     result = await txn.get(18)
 53 | ///     scan = await txn.scan(Bound.Included(18), None, limit=10, projection=["id", "name"])
 54 | ///
 55 | ///     async for record in scan:
 56 | ///         print(record)
 57 | ///
 58 | /// asyncio.run(main())
 59 | /// ````
 60 | #[pymodule]
 61 | fn _tonbo(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
 62 |     m.add_class::<TonboDB>()?;
 63 |     m.add_class::<DbOption>()?;
 64 |     m.add_class::<DataType>()?;
 65 |     m.add_class::<Column>()?;
 66 |     m.add_class::<Record>()?;
 67 |     m.add_class::<Transaction>()?;
 68 |     m.add_class::<ScanStream>()?;
 69 |     m.add_class::<range::Bound>()?;
 70 |     m.add_class::<RecordBatch>()?;
 71 | 
 72 |     let fs_module = PyModule::new(py, "fs")?;
 73 |     fs_module.add_class::<FsOptions>()?;
 74 |     fs_module.add_class::<AwsCredential>()?;
 75 |     fs_module.add_function(wrap_pyfunction!(parse, &fs_module)?)?;
 76 |     fs_module.add_function(wrap_pyfunction!(from_filesystem_path, &fs_module)?)?;
 77 |     fs_module.add_function(wrap_pyfunction!(from_absolute_path, &fs_module)?)?;
 78 |     fs_module.add_function(wrap_pyfunction!(from_url_path, &fs_module)?)?;
 79 | 
 80 |     m.add_submodule(&fs_module)?;
 81 |     py.import("sys")?
 82 |         .getattr("modules")?
 83 |         .set_item("tonbo.fs", fs_module)?;
 84 | 
 85 |     let error_module = PyModule::new(py, "error")?;
 86 |     error_module.add_class::<DbError>()?;
 87 |     error_module.add_class::<CommitError>()?;
 88 | 
 89 |     error_module.add("DecodeError", py.get_type::<DecodeError>())?;
 90 |     error_module.add("RecoverError", py.get_type::<RecoverError>())?;
 91 |     error_module.add(
 92 |         "ExceedsMaxLevelError",
 93 |         py.get_type::<ExceedsMaxLevelError>(),
 94 |     )?;
 95 |     error_module.add("WriteConflictError", py.get_type::<WriteConflictError>())?;
 96 |     error_module.add("InnerError", py.get_type::<InnerError>())?;
 97 |     error_module.add("RepeatedCommitError", py.get_type::<RepeatedCommitError>())?;
 98 |     error_module.add("PathParseError", py.get_type::<PathParseError>())?;
 99 | 
100 |     m.add_submodule(&error_module)?;
101 |     py.import("sys")?
102 |         .getattr("modules")?
103 |         .set_item("tonbo.error", error_module)?;
104 | 
105 |     Ok(())
106 | }
107 | 


--------------------------------------------------------------------------------
/bindings/python/src/options.rs:
--------------------------------------------------------------------------------
  1 | use pyo3::{pyclass, pymethods, PyResult};
  2 | use tonbo::{option::Path, record::Schema};
  3 | 
  4 | use crate::{ExceedsMaxLevelError, FsOptions};
  5 | 
  6 | pub(crate) const MAX_LEVEL: usize = 7;
  7 | 
  8 | /// configure the operating parameters of each component in the `DB`
  9 | #[pyclass]
 10 | #[derive(Debug, Clone)]
 11 | pub struct DbOption {
 12 |     /// cached message size in parquet cleaner
 13 |     #[pyo3(get, set)]
 14 |     clean_channel_buffer: usize,
 15 |     /// len threshold of `immutables` when minor compaction is triggered
 16 |     #[pyo3(get, set)]
 17 |     immutable_chunk_num: usize,
 18 |     /// magnification that triggers major compaction between different levels
 19 |     #[pyo3(get, set)]
 20 |     level_sst_magnification: usize,
 21 |     #[pyo3(get, set)]
 22 |     major_default_oldest_table_num: usize,
 23 |     /// threshold for the number of `parquet` when major compaction is triggered
 24 |     #[pyo3(get, set)]
 25 |     major_threshold_with_sst_size: usize,
 26 |     /// Maximum size of each parquet
 27 |     #[pyo3(get, set)]
 28 |     max_sst_file_size: usize,
 29 |     #[pyo3(get, set)]
 30 |     version_log_snapshot_threshold: u32,
 31 |     #[pyo3(get, set)]
 32 |     use_wal: bool,
 33 |     /// Maximum size of WAL buffer size
 34 |     #[pyo3(get, set)]
 35 |     wal_buffer_size: usize,
 36 |     /// build the `DB` storage directory based on the passed path
 37 |     #[pyo3(get, set)]
 38 |     path: String,
 39 |     #[pyo3(get, set)]
 40 |     base_fs: FsOptions,
 41 |     level_paths: Vec<Option<(String, FsOptions)>>,
 42 | }
 43 | 
 44 | #[pymethods]
 45 | impl DbOption {
 46 |     #[new]
 47 |     fn new(path: String) -> Self {
 48 |         Self {
 49 |             clean_channel_buffer: 10,
 50 |             immutable_chunk_num: 3,
 51 |             level_sst_magnification: 10,
 52 |             major_default_oldest_table_num: 3,
 53 |             major_threshold_with_sst_size: 4,
 54 |             max_sst_file_size: 256 * 1024 * 1024,
 55 |             version_log_snapshot_threshold: 200,
 56 |             use_wal: true,
 57 |             wal_buffer_size: 4 * 1024,
 58 |             path,
 59 |             base_fs: FsOptions::Local {},
 60 |             level_paths: vec![None; MAX_LEVEL],
 61 |         }
 62 |     }
 63 | 
 64 |     fn level_path(&mut self, level: usize, path: String, fs_options: FsOptions) -> PyResult<()> {
 65 |         if level >= MAX_LEVEL {
 66 |             ExceedsMaxLevelError::new_err("Exceeds max level");
 67 |         }
 68 |         self.level_paths[level] = Some((path, fs_options));
 69 |         Ok(())
 70 |     }
 71 | }
 72 | 
 73 | impl DbOption {
 74 |     pub(crate) fn into_option<S: Schema>(self, schema: &S) -> tonbo::DbOption {
 75 |         let mut opt = tonbo::DbOption::new(Path::from(self.path), schema)
 76 |             .clean_channel_buffer(self.clean_channel_buffer)
 77 |             .immutable_chunk_num(self.immutable_chunk_num)
 78 |             .level_sst_magnification(self.level_sst_magnification)
 79 |             .major_default_oldest_table_num(self.major_default_oldest_table_num)
 80 |             .major_threshold_with_sst_size(self.major_threshold_with_sst_size)
 81 |             .max_sst_file_size(self.max_sst_file_size)
 82 |             .version_log_snapshot_threshold(self.version_log_snapshot_threshold)
 83 |             .base_fs(tonbo::option::FsOptions::from(self.base_fs));
 84 |         for (level, path) in self.level_paths.into_iter().enumerate() {
 85 |             if let Some((path, fs_options)) = path {
 86 |                 opt = opt
 87 |                     .level_path(
 88 |                         level,
 89 |                         Path::from(path),
 90 |                         tonbo::option::FsOptions::from(fs_options),
 91 |                     )
 92 |                     .unwrap();
 93 |             }
 94 |         }
 95 |         if !self.use_wal {
 96 |             opt = opt.disable_wal()
 97 |         }
 98 |         opt
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/bindings/python/src/range.rs:
--------------------------------------------------------------------------------
 1 | use std::ops;
 2 | 
 3 | use pyo3::{pyclass, FromPyObject, Py, PyAny, Python};
 4 | use tonbo::record::Value;
 5 | 
 6 | use crate::{utils::to_col, Column};
 7 | 
 8 | #[pyclass]
 9 | #[derive(FromPyObject)]
10 | pub enum Bound {
11 |     Included { key: Py<PyAny> },
12 |     Excluded { key: Py<PyAny> },
13 | }
14 | 
15 | impl Bound {
16 |     pub(crate) fn to_bound(&self, py: Python, col: &Column) -> ops::Bound<Value> {
17 |         match self {
18 |             Bound::Included { key } => ops::Bound::Included(to_col(py, col, key.clone_ref(py))),
19 |             Bound::Excluded { key } => ops::Bound::Excluded(to_col(py, col, key.clone_ref(py))),
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/bindings/python/src/record.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | 
  3 | use pyo3::{
  4 |     prelude::*,
  5 |     pyclass, pymethods,
  6 |     types::{PyDict, PyMapping, PyString},
  7 |     Bound,
  8 | };
  9 | use tonbo::record::F64;
 10 | 
 11 | use crate::{column::Column, datatype::DataType};
 12 | 
 13 | #[pyclass(subclass)]
 14 | pub struct Record {
 15 |     wraps: Py<PyAny>,
 16 | }
 17 | 
 18 | #[pymethods]
 19 | impl Record {
 20 |     #[new]
 21 |     fn new(wraps: Py<PyAny>) -> Self {
 22 |         Self { wraps }
 23 |     }
 24 | 
 25 |     #[pyo3(signature = ( **kwargs))]
 26 |     fn __call__(&self, py: Python<'_>, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<Py<PyAny>> {
 27 |         let dict = self.wraps.getattr(py, "__dict__")?;
 28 |         let mapping_proxy = dict.downcast_bound::<PyMapping>(py).unwrap();
 29 |         if let Some(kwargs) = kwargs {
 30 |             for (key, v) in kwargs.iter() {
 31 |                 let attr = key.downcast::<PyString>().unwrap();
 32 |                 let col_bound = mapping_proxy.get_item(attr).expect("Unknown attr {attr}");
 33 |                 let mut col = col_bound.extract::<Column>().unwrap();
 34 |                 match col.datatype {
 35 |                     DataType::UInt8 => {
 36 |                         let value = v.extract::<u8>()?;
 37 |                         match col.nullable {
 38 |                             true => col.value = Arc::new(Some(value)),
 39 |                             false => col.value = Arc::new(value),
 40 |                         }
 41 |                     }
 42 |                     DataType::UInt16 => {
 43 |                         let value = v.extract::<u16>()?;
 44 |                         match col.nullable {
 45 |                             true => col.value = Arc::new(Some(value)),
 46 |                             false => col.value = Arc::new(value),
 47 |                         }
 48 |                     }
 49 |                     DataType::UInt32 => {
 50 |                         let value = v.extract::<u32>()?;
 51 |                         match col.nullable {
 52 |                             true => col.value = Arc::new(Some(value)),
 53 |                             false => col.value = Arc::new(value),
 54 |                         }
 55 |                     }
 56 |                     DataType::UInt64 => {
 57 |                         let value = v.extract::<u64>()?;
 58 |                         match col.nullable {
 59 |                             true => col.value = Arc::new(Some(value)),
 60 |                             false => col.value = Arc::new(value),
 61 |                         }
 62 |                     }
 63 |                     DataType::Int8 => {
 64 |                         let value = v.extract::<i8>()?;
 65 |                         match col.nullable {
 66 |                             true => col.value = Arc::new(Some(value)),
 67 |                             false => col.value = Arc::new(value),
 68 |                         }
 69 |                     }
 70 |                     DataType::Int16 => {
 71 |                         let value = v.extract::<i16>()?;
 72 |                         match col.nullable {
 73 |                             true => col.value = Arc::new(Some(value)),
 74 |                             false => col.value = Arc::new(value),
 75 |                         }
 76 |                     }
 77 |                     DataType::Int32 => {
 78 |                         let value = v.extract::<i32>()?;
 79 |                         match col.nullable {
 80 |                             true => col.value = Arc::new(Some(value)),
 81 |                             false => col.value = Arc::new(value),
 82 |                         }
 83 |                     }
 84 |                     DataType::Int64 => {
 85 |                         let value = v.extract::<i64>()?;
 86 |                         match col.nullable {
 87 |                             true => col.value = Arc::new(Some(value)),
 88 |                             false => col.value = Arc::new(value),
 89 |                         }
 90 |                     }
 91 |                     DataType::String => {
 92 |                         let value = v.extract::<String>()?;
 93 |                         match col.nullable {
 94 |                             true => col.value = Arc::new(Some(value)),
 95 |                             false => col.value = Arc::new(value),
 96 |                         }
 97 |                     }
 98 |                     DataType::Boolean => {
 99 |                         let value = v.extract::<bool>()?;
100 |                         match col.nullable {
101 |                             true => col.value = Arc::new(Some(value)),
102 |                             false => col.value = Arc::new(value),
103 |                         }
104 |                     }
105 |                     DataType::Bytes => {
106 |                         let value = v.extract::<Vec<u8>>()?;
107 |                         match col.nullable {
108 |                             true => col.value = Arc::new(Some(value)),
109 |                             false => col.value = Arc::new(value),
110 |                         }
111 |                     }
112 |                     DataType::Float => {
113 |                         let value = v.extract::<f64>()?;
114 |                         match col.nullable {
115 |                             true => col.value = Arc::new(Some(F64::from(value))),
116 |                             false => col.value = Arc::new(F64::from(value)),
117 |                         }
118 |                     }
119 |                 };
120 |                 self.wraps.setattr(py, attr, col).unwrap();
121 |             }
122 |         }
123 | 
124 |         let ret = self.wraps.clone_ref(py).into_any();
125 | 
126 |         Ok(ret)
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/bindings/python/src/record_batch.rs:
--------------------------------------------------------------------------------
 1 | use pyo3::{
 2 |     pyclass, pymethods,
 3 |     types::{PyAnyMethods, PyMapping, PyMappingMethods},
 4 |     Py, PyAny, PyResult, Python,
 5 | };
 6 | use tonbo::record::{DynRecord, Value};
 7 | 
 8 | use crate::Column;
 9 | 
10 | #[derive(Clone)]
11 | struct Record {
12 |     columns: Vec<Value>,
13 |     primary_key_index: usize,
14 | }
15 | 
16 | impl Record {
17 |     fn new(columns: Vec<Value>, primary_key_index: usize) -> Self {
18 |         Self {
19 |             columns,
20 |             primary_key_index,
21 |         }
22 |     }
23 | }
24 | 
25 | impl From<Record> for DynRecord {
26 |     fn from(value: Record) -> Self {
27 |         tonbo::record::DynRecord::new(value.columns, value.primary_key_index)
28 |     }
29 | }
30 | 
31 | #[pyclass]
32 | #[derive(Clone)]
33 | // #[derive(FromPyObject)]
34 | pub struct RecordBatch {
35 |     batch_data: Vec<Record>,
36 | }
37 | 
38 | #[pymethods]
39 | impl RecordBatch {
40 |     #[new]
41 |     fn new() -> Self {
42 |         Self {
43 |             batch_data: Vec::new(),
44 |         }
45 |     }
46 | 
47 |     fn append(&mut self, py: Python, record: Py<PyAny>) -> PyResult<()> {
48 |         let mut cols = vec![];
49 |         let dict = record.getattr(py, "__dict__")?;
50 |         let values = dict.downcast_bound::<PyMapping>(py)?.values()?;
51 |         let mut primary_key_index = 0;
52 |         let mut col_idx = 0;
53 | 
54 |         for i in 0..values.len()? {
55 |             let value = values.get_item(i)?;
56 |             if let Ok(bound_col) = value.downcast::<Column>() {
57 |                 let col = bound_col.extract::<Column>()?;
58 |                 if col.primary_key {
59 |                     primary_key_index = col_idx;
60 |                 }
61 |                 let col = Value::from(col);
62 |                 cols.push(col);
63 |                 col_idx += 1;
64 |             }
65 |         }
66 | 
67 |         self.batch_data
68 |             .push(Record::new(cols.clone(), primary_key_index));
69 |         Ok(())
70 |     }
71 | }
72 | 
73 | impl RecordBatch {
74 |     pub(crate) fn into_record_batch(self) -> Vec<DynRecord> {
75 |         let mut batch = vec![];
76 |         for record in self.batch_data.into_iter() {
77 |             batch.push(tonbo::record::DynRecord::from(record))
78 |         }
79 |         batch
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/bindings/python/src/stream.rs:
--------------------------------------------------------------------------------
 1 | use std::{pin::Pin, sync::Arc};
 2 | 
 3 | use futures::{Stream, TryStreamExt};
 4 | use pyo3::{
 5 |     exceptions::PyStopAsyncIteration, prelude::*, pyclass, pymethods, IntoPyObjectExt, PyRef,
 6 |     PyRefMut, PyResult, Python,
 7 | };
 8 | use pyo3_async_runtimes::tokio::future_into_py;
 9 | use tokio::sync::Mutex;
10 | use tonbo::{parquet::errors::ParquetError, record::DynRecord, stream};
11 | 
12 | use crate::utils::to_dict;
13 | 
14 | type AsyncStream =
15 |     Pin<Box<dyn Stream<Item = Result<stream::Entry<'static, DynRecord>, ParquetError>> + Send>>;
16 | 
17 | #[pyclass]
18 | pub struct ScanStream(Arc<Mutex<AsyncStream>>);
19 | 
20 | impl ScanStream {
21 |     pub fn new(
22 |         stream: impl Stream<Item = Result<stream::Entry<'static, DynRecord>, ParquetError>>
23 |             + 'static
24 |             + Sized
25 |             + Send,
26 |     ) -> Self {
27 |         Self(Arc::new(Mutex::new(Box::pin(stream))))
28 |     }
29 | }
30 | 
31 | #[pymethods]
32 | impl ScanStream {
33 |     fn __aiter__(slf: PyRef<Self>) -> PyRef<Self> {
34 |         slf
35 |     }
36 | 
37 |     fn __anext__(slf: PyRefMut<Self>, py: Python<'_>) -> PyResult<Option<PyObject>> {
38 |         let stream: Arc<Mutex<AsyncStream>> = Arc::clone(&slf.0);
39 |         let fut = future_into_py(py, async move {
40 |             let mut locked_stream = stream.lock().await;
41 |             let entry = locked_stream.try_next().await.unwrap();
42 |             match entry {
43 |                 Some(entry) => Python::with_gil(|py| match entry.value() {
44 |                     Some(record) => {
45 |                         to_dict(py, record.primary_index, record.columns).into_py_any(py)
46 |                     }
47 |                     None => Ok(py.None()),
48 |                 }),
49 |                 None => Err(PyStopAsyncIteration::new_err("stream exhausted")),
50 |             }
51 |         })?;
52 |         Ok(Some(fut.into()))
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/bindings/python/tests/bench/test_write_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | import duckdb
  4 | import sqlite3
  5 | import pytest
  6 | 
  7 | from conftest import gen_string, gen_int, gen_bytes
  8 | from tonbo import Record, Column, DataType, TonboDB, DbOption
  9 | from tonbo.fs import from_filesystem_path
 10 | 
 11 | WRITE_TIME = 500000
 12 | 
 13 | 
 14 | @Record
 15 | class User:
 16 |     id = Column(DataType.Int64, name="id", primary_key=True)
 17 |     name = Column(DataType.String, name="name")
 18 |     email = Column(DataType.String, name="email", nullable=True)
 19 |     age = Column(DataType.UInt16, name="age")
 20 |     data = Column(DataType.Bytes, name="data")
 21 | 
 22 | 
 23 | def duckdb_write(auto_commit: bool):
 24 |     con = duckdb.connect()
 25 |     con.sql(
 26 |         "CREATE TABLE user (id INTEGER, name VARCHAR(20), email VARCHAR(20), age INTEGER, data VARCHAR(200))"
 27 |     )
 28 |     if not auto_commit:
 29 |         con.begin()
 30 |     for i in range(0, WRITE_TIME):
 31 |         con.execute(
 32 |             "INSERT INTO user VALUES (?, ?, ?, ?, ?)",
 33 |             [i, gen_string(20), gen_string(20), gen_int(0, 0xffff), gen_bytes(200)],
 34 |         )
 35 |     if not auto_commit:
 36 |         con.commit()
 37 |     con.close()
 38 | 
 39 | 
 40 | async def tonbo_write(auto_commit: bool):
 41 |     temp_dir = tempfile.TemporaryDirectory()
 42 | 
 43 |     option = DbOption(from_filesystem_path(temp_dir.name))
 44 | 
 45 |     db = TonboDB(option, User())
 46 |     if auto_commit:
 47 |         for i in range(0, WRITE_TIME):
 48 |             await db.insert(User(
 49 |                 id=i,
 50 |                 age=gen_int(0, 0xffff),
 51 |                 name=gen_string(20),
 52 |                 email=gen_string(20),
 53 |                 data=gen_bytes(200),
 54 |             ))
 55 |     else:
 56 |         txn = await db.transaction()
 57 |         for i in range(0, WRITE_TIME):
 58 |             txn.insert(User(
 59 |                 id=i,
 60 |                 age=gen_int(0, 0xffff),
 61 |                 name=gen_string(20),
 62 |                 email=gen_string(20),
 63 |                 data=gen_bytes(200),
 64 |             ))
 65 |         await txn.commit()
 66 | 
 67 |     await db.flush_wal()
 68 | 
 69 | 
 70 | def sqlite_write(auto_commit: bool):
 71 |     file = tempfile.NamedTemporaryFile()
 72 |     con = sqlite3.connect(file.name, autocommit=auto_commit)
 73 |     con.execute(
 74 |         "CREATE TABLE user (id INTEGER, name VARCHAR(20), email VARCHAR(20), age INTEGER, data VARCHAR(200))"
 75 |     )
 76 |     for i in range(0, WRITE_TIME):
 77 |         con.execute(
 78 |             "INSERT INTO user VALUES (?, ?, ?, ?, ?)",
 79 |             [i, gen_string(20), gen_string(20), gen_int(0, 0xffff), gen_bytes(200)],
 80 |         )
 81 |     con.commit()
 82 |     con.close()
 83 | 
 84 | 
 85 | @pytest.mark.parametrize("auto_commit", [True])
 86 | @pytest.mark.benchmark(group="autocommit")
 87 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark")
 88 | def test_duckdb_autocommit(benchmark, auto_commit):
 89 |     benchmark(duckdb_write, auto_commit)
 90 | 
 91 | 
 92 | @pytest.mark.parametrize("auto_commit", [False])
 93 | @pytest.mark.benchmark(group="txn")
 94 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark")
 95 | def test_duckdb(benchmark, auto_commit):
 96 |     benchmark(duckdb_write, auto_commit)
 97 | 
 98 | 
 99 | @pytest.mark.parametrize("auto_commit", [False])
100 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark")
101 | @pytest.mark.benchmark(group="txn")
102 | def test_tonbo(aio_benchmark, auto_commit):
103 |     aio_benchmark(tonbo_write, auto_commit)
104 | 
105 | 
106 | @pytest.mark.parametrize("auto_commit", [True])
107 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark")
108 | @pytest.mark.benchmark(group="autocommit")
109 | def test_tonbo_no_txn(aio_benchmark, auto_commit):
110 |     aio_benchmark(tonbo_write, auto_commit)
111 | 
112 | 
113 | @pytest.mark.parametrize("auto_commit", [True])
114 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark")
115 | @pytest.mark.benchmark(group="autocommit")
116 | def test_sqlite_autocommit(benchmark, auto_commit):
117 |     benchmark(sqlite_write, auto_commit)
118 | 
119 | 
120 | @pytest.mark.parametrize("auto_commit", [False])
121 | @pytest.mark.skipif("BENCH" not in os.environ, reason="benchmark")
122 | @pytest.mark.benchmark(group="txn")
123 | def test_sqlite(benchmark, auto_commit):
124 |     benchmark(sqlite_write, auto_commit)
125 | 


--------------------------------------------------------------------------------
/bindings/python/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import random
 3 | 
 4 | import pytest_asyncio
 5 | 
 6 | 
 7 | def gen_string(max_size):
 8 |     size = gen_int(0, max_size)
 9 |     charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
10 |     return ''.join(random.choices(charset, k=size))
11 | 
12 | 
13 | def gen_bytes(max_size):
14 |     return gen_string(max_size).encode("utf-8")
15 | 
16 | 
17 | def gen_int(lower, high):
18 |     return random.randint(lower, high)
19 | 
20 | 
21 | # async support for pytest-benchmark
22 | # https://github.com/ionelmc/pytest-benchmark/issues/66#issuecomment-1137005280
23 | @pytest_asyncio.fixture
24 | def aio_benchmark(benchmark, event_loop):
25 |     def _wrapper(func, *args, **kwargs):
26 |         if asyncio.iscoroutinefunction(func):
27 | 
28 |             @benchmark
29 |             def _():
30 |                 return event_loop.run_until_complete(func(*args, **kwargs))
31 |         else:
32 |             benchmark(func, *args, **kwargs)
33 | 
34 |     return _wrapper
35 | 


--------------------------------------------------------------------------------
/bindings/python/tests/test_db.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tempfile
 3 | from tonbo import DbOption, Column, DataType, Record, RecordBatch, TonboDB
 4 | 
 5 | 
 6 | @Record
 7 | class User:
 8 |     age = Column(DataType.Int8, name="age", primary_key=True)
 9 |     height = Column(DataType.Int16, name="height", nullable=True)
10 |     weight = Column(DataType.Int32, name="weight", nullable=False)
11 | 
12 | 
13 | def build_db():
14 |     temp_dir = tempfile.TemporaryDirectory()
15 |     return TonboDB(DbOption(temp_dir.name), User())
16 | 
17 | 
18 | @pytest.mark.asyncio
19 | async def test_db_write():
20 |     db = build_db()
21 |     for i in range(0, 100):
22 |         await db.insert(User(age=i, height=i * 10, weight=i * 20))
23 | 
24 | 
25 | @pytest.mark.asyncio
26 | async def test_db_read():
27 |     db = build_db()
28 |     for i in range(0, 100):
29 |         await db.insert(User(age=i, height=i * 10, weight=i * 20))
30 | 
31 |     for i in range(0, 100):
32 |         user = await db.get(i)
33 |         assert user == {"age": i, "height": i * 10, "weight": i * 20}
34 | 
35 | 
36 | @pytest.mark.asyncio
37 | async def test_db_write_batch():
38 |     db = build_db()
39 |     batch = RecordBatch()
40 |     for i in range(0, 100):
41 |         batch.append(User(age=i, height=i * 10, weight=i * 20))
42 |     await db.insert_batch(batch)
43 | 
44 |     for i in range(0, 100):
45 |         user = await db.get(i)
46 |         assert user == {"age": i, "height": i * 10, "weight": i * 20}
47 | 
48 | 
49 | @pytest.mark.asyncio
50 | async def test_db_remove():
51 |     db = build_db()
52 |     for i in range(0, 100):
53 |         if i % 2 == 0:
54 |             await db.insert(User(age=i, height=i * 10, weight=i * 20))
55 |         else:
56 |             await db.remove(i)
57 | 
58 |     for i in range(0, 100):
59 |         user = await db.get(i)
60 |         if i % 2 == 0:
61 |             assert user == {"age": i, "height": i * 10, "weight": i * 20}
62 |         else:
63 |             assert user is None
64 | 
65 | 
66 | @pytest.mark.asyncio
67 | async def test_db_recover():
68 |     temp_dir = tempfile.TemporaryDirectory()
69 |     db = TonboDB(DbOption(temp_dir.name), User())
70 |     for i in range(0, 100):
71 |         await db.insert(User(age=i, height=i * 10, weight=i * 20))
72 | 
73 |     for i in range(0, 100):
74 |         user = await db.get(i)
75 |         assert user == {"age": i, "height": i * 10, "weight": i * 20}
76 | 
77 |     await db.flush_wal()
78 | 
79 |     db = TonboDB(DbOption(temp_dir.name), User())
80 |     for i in range(0, 100):
81 |         user = await db.get(i)
82 |         assert user == {"age": i, "height": i * 10, "weight": i * 20}
83 | 


--------------------------------------------------------------------------------
/bindings/python/tests/test_flush.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tempfile
 3 | from tonbo import DbOption, Column, DataType, Record, TonboDB
 4 | 
 5 | 
 6 | @Record
 7 | class User:
 8 |     id = Column(DataType.Int64, name="id", primary_key=True)
 9 |     name = Column(DataType.String, name="name")
10 |     email = Column(DataType.String, name="email", nullable=True)
11 |     age = Column(DataType.Int8, name="age")
12 |     data = Column(DataType.Bytes, name="data")
13 | 
14 | 
15 | @pytest.mark.asyncio
16 | async def test_flush():
17 |     temp_dir = tempfile.TemporaryDirectory()
18 |     option = DbOption(temp_dir.name)
19 |     option.immutable_chunk_num = 1
20 |     option.major_threshold_with_sst_size = 3
21 |     option.level_sst_magnification = 1
22 |     option.max_sst_file_size = 1 * 1024
23 |     db = TonboDB(option, User())
24 |     # db = build_db()
25 |     for i in range(0, 1000):
26 |         if i % 100 == 0:
27 |             await db.flush()
28 |         await db.insert(
29 |             User(
30 |                 id=i,
31 |                 age=i % 128,
32 |                 name=str(i * 10),
33 |                 email=str(i * 20),
34 |                 data=b"Hello Tonbo!",
35 |             )
36 |         )
37 | 
38 |     for i in range(0, 1000):
39 |         user = await db.get(i)
40 |         assert user == {
41 |             "id": i,
42 |             "name": str(i * 10),
43 |             "email": str(i * 20),
44 |             "age": i % 128,
45 |             "data": b"Hello Tonbo!",
46 |         }
47 | 


--------------------------------------------------------------------------------
/bindings/python/tests/test_s3.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | import tempfile
 5 | from tonbo import DbOption, Column, DataType, Record, TonboDB
 6 | from tonbo.fs import FsOptions, AwsCredential, from_url_path
 7 | 
 8 | 
 9 | @Record
10 | class User:
11 |     id = Column(DataType.Int64, name="id", primary_key=True)
12 |     name = Column(DataType.String, name="name")
13 |     email = Column(DataType.String, name="email", nullable=True)
14 |     age = Column(DataType.UInt8, name="age")
15 |     data = Column(DataType.Bytes, name="data")
16 | 
17 | 
18 | @pytest.mark.asyncio
19 | @pytest.mark.skipif("S3" not in os.environ, reason="s3")
20 | async def test_s3_read_write():
21 | 
22 |     temp_dir = tempfile.TemporaryDirectory()
23 | 
24 |     key_id = os.environ['AWS_ACCESS_KEY_ID']
25 |     secret_key = os.environ['AWS_SECRET_ACCESS_KEY']
26 |     credential = AwsCredential(key_id, secret_key)
27 |     fs_option = FsOptions.S3("wasm-data", credential,"ap-southeast-2",None, None, None)
28 | 
29 |     option = DbOption(temp_dir.name)
30 |     option.level_path(0, from_url_path("l0"), fs_option)
31 |     option.level_path(1, from_url_path("l1"), fs_option)
32 |     option.level_path(2, from_url_path("l2"), fs_option)
33 | 
34 |     option.immutable_chunk_num = 1
35 |     option.major_threshold_with_sst_size = 3
36 |     option.level_sst_magnification = 1
37 |     option.max_sst_file_size = 1 * 1024
38 | 
39 |     db = TonboDB(option, User())
40 |     for i in range(0, 500):
41 |         if i % 100 == 0:
42 |             await db.flush()
43 |         await db.insert(
44 |             User(
45 |                 id=i,
46 |                 age=i % 128,
47 |                 name=str(i * 10),
48 |                 email=str(i * 20),
49 |                 data=b"Hello Tonbo!",
50 |             )
51 |         )
52 |     user = await db.get(10)
53 |     assert user == {
54 |         "id": 10,
55 |         "name": str(10 * 10),
56 |         "email": str(10 * 20),
57 |         "age": 10,
58 |         "data": b"Hello Tonbo!",
59 |     }
60 | 
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/bindings/python/tests/test_table_level.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | import pytest
 3 | from tonbo import TonboDB, DbOption, Column, DataType, Record
 4 | from tonbo.fs import from_filesystem_path, parse, from_url_path, FsOptions
 5 | from tonbo.error import PathParseError
 6 | 
 7 | 
 8 | @Record
 9 | class User:
10 |     id = Column(DataType.Int64, name="id", primary_key=True)
11 |     name = Column(DataType.String, name="name")
12 |     email = Column(DataType.String, name="email", nullable=True)
13 |     age = Column(DataType.Int8, name="age")
14 |     data = Column(DataType.Bytes, name="data")
15 | 
16 | 
17 | def test_parse():
18 |     assert parse("/foo/bar") == "foo/bar"
19 |     with pytest.raises(PathParseError):
20 |         parse("//foo/bar")
21 | 
22 | 
23 | def test_from_url_path():
24 |     assert from_url_path("foo%20bar") == "foo bar"
25 |     assert from_url_path("foo%2F%252E%252E%2Fbar") == "foo/%2E%2E/bar"
26 |     assert from_url_path("foo/%252E%252E/bar") == "foo/%2E%2E/bar"
27 |     assert from_url_path("%48%45%4C%4C%4F") == "HELLO"
28 |     with pytest.raises(PathParseError):
29 |         from_url_path("foo/%2E%2E/bar")
30 | 
31 | 
32 | @pytest.mark.asyncio
33 | async def test_table_level_local():
34 |     temp_dir = tempfile.TemporaryDirectory()
35 |     temp_dir0 = tempfile.TemporaryDirectory()
36 |     temp_dir1 = tempfile.TemporaryDirectory()
37 | 
38 |     option = DbOption(from_filesystem_path(temp_dir.name))
39 |     option.level_path(0, from_filesystem_path(temp_dir0.name), FsOptions.Local())
40 |     option.level_path(1, from_filesystem_path(temp_dir1.name), FsOptions.Local())
41 | 
42 |     option.immutable_chunk_num = 1
43 |     option.major_threshold_with_sst_size = 3
44 |     option.level_sst_magnification = 1
45 |     option.max_sst_file_size = 1 * 1024
46 | 
47 |     db = TonboDB(option, User())
48 |     for i in range(0, 1000):
49 |         if i % 50 == 0:
50 |             await db.flush()
51 |         await db.insert(
52 |             User(
53 |                 id=i,
54 |                 age=i % 128,
55 |                 name=str(i * 10),
56 |                 email=str(i * 20),
57 |                 data=b"Hello Tonbo!",
58 |             )
59 |         )
60 | 
61 |     for i in range(0, 1000):
62 |         user = await db.get(i)
63 |         assert user == {
64 |             "id": i,
65 |             "name": str(i * 10),
66 |             "email": str(i * 20),
67 |             "age": i % 128,
68 |             "data": b"Hello Tonbo!",
69 |         }
70 | 


--------------------------------------------------------------------------------
/bindings/python/tests/test_txn.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import tempfile
  3 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound
  4 | from tonbo.error import RepeatedCommitError, WriteConflictError
  5 | 
  6 | 
  7 | @Record
  8 | class User:
  9 |     age = Column(DataType.Int8, name="age", primary_key=True)
 10 |     height = Column(DataType.Int16, name="height", nullable=True)
 11 |     weight = Column(DataType.Int32, name="weight", nullable=False)
 12 | 
 13 | 
 14 | def build_db():
 15 |     temp_dir = tempfile.TemporaryDirectory()
 16 |     return TonboDB(DbOption(temp_dir.name), User())
 17 | 
 18 | 
 19 | @pytest.mark.asyncio
 20 | async def test_txn_write():
 21 |     db = build_db()
 22 |     txn = await db.transaction()
 23 |     for i in range(0, 100):
 24 |         txn.insert(User(age=i, height=i * 10, weight=i * 20))
 25 |     await txn.commit()
 26 | 
 27 | 
 28 | @pytest.mark.asyncio
 29 | async def test_txn_read_write():
 30 |     db = build_db()
 31 |     txn = await db.transaction()
 32 |     for i in range(0, 100):
 33 |         txn.insert(User(age=i, height=i * 10, weight=i * 20))
 34 |     for i in range(0, 100):
 35 |         txn.insert(User(age=i, height=i * 11, weight=i * 22))
 36 | 
 37 |     for i in range(0, 100):
 38 |         user = await txn.get(i)
 39 |         assert user == {"age": i, "height": i * 11, "weight": i * 22}
 40 | 
 41 | 
 42 | @pytest.mark.asyncio
 43 | async def test_txn_remove():
 44 |     db = build_db()
 45 |     txn = await db.transaction()
 46 |     for i in range(0, 100):
 47 |         if i % 2 == 0:
 48 |             txn.insert(User(age=i, height=i * 10, weight=i * 20))
 49 |         else:
 50 |             txn.remove(i)
 51 | 
 52 |     for i in range(0, 100):
 53 |         user = await txn.get(i)
 54 |         if i % 2 == 0:
 55 |             assert user == {"age": i, "height": i * 10, "weight": i * 20}
 56 |         else:
 57 |             assert user is None
 58 | 
 59 | 
 60 | @pytest.mark.asyncio
 61 | async def test_txn_scan():
 62 |     db = build_db()
 63 |     txn = await db.transaction()
 64 |     for i in range(0, 100):
 65 |         txn.insert(User(age=i, height=i * 10, weight=i * 20))
 66 | 
 67 |     scan = await txn.scan(Bound.Included(10), Bound.Excluded(75))
 68 |     i = 10
 69 |     async for user in scan:
 70 |         assert user == {"age": i, "height": i * 10, "weight": i * 20}
 71 |         i += 1
 72 |     assert i == 75
 73 | 
 74 | 
 75 | @pytest.mark.asyncio
 76 | async def test_txn_projection():
 77 |     db = build_db()
 78 |     txn = await db.transaction()
 79 |     for i in range(0, 100):
 80 |         txn.insert(User(age=i, height=i * 10, weight=i * 20))
 81 | 
 82 |     scan = await txn.scan(None, Bound.Excluded(75), projection=["age", "height"])
 83 |     i = 0
 84 |     async for user in scan:
 85 |         assert user == {"age": i, "height": i * 10, "weight": None}
 86 |         i += 1
 87 | 
 88 | 
 89 | @pytest.mark.asyncio
 90 | async def test_txn_limit():
 91 |     db = build_db()
 92 |     txn = await db.transaction()
 93 |     for i in range(0, 100):
 94 |         txn.insert(User(age=i, height=i * 10, weight=i * 20))
 95 | 
 96 |     scan = await txn.scan(None, Bound.Excluded(75), limit=10)
 97 |     i = 0
 98 |     async for user in scan:
 99 |         assert user == {"age": i, "height": i * 10, "weight": i * 20}
100 |         i += 1
101 |     assert i == 10
102 | 
103 | 
104 | @pytest.mark.asyncio
105 | async def test_repeated_commit():
106 |     db = build_db()
107 |     txn = await db.transaction()
108 |     await txn.commit()
109 | 
110 |     with pytest.raises(RepeatedCommitError):
111 |         await txn.commit()
112 | 
113 | 
114 | @pytest.mark.asyncio
115 | async def test_txn_write_conflict():
116 |     db = build_db()
117 |     txn = await db.transaction()
118 |     txn2 = await db.transaction()
119 |     for i in range(0, 10):
120 |         txn.insert(User(age=i, height=i * 10, weight=i * 20))
121 | 
122 |     for i in range(0, 10):
123 |         txn2.insert(User(age=i, height=i * 10, weight=i * 20))
124 | 
125 |     await txn2.commit()
126 |     with pytest.raises(WriteConflictError):
127 |         await txn.commit()
128 | 
129 | 
130 | @pytest.mark.asyncio
131 | async def test_txn_cannot_read_future():
132 |     db = build_db()
133 |     txn = await db.transaction()
134 |     txn2 = await db.transaction()
135 |     for i in range(0, 10):
136 |         txn.insert(User(age=i, height=i * 10, weight=i * 20))
137 | 
138 |     for i in range(0, 10):
139 |         txn2.insert(User(age=i, height=i * 20, weight=i * 40))
140 | 
141 |     await txn2.commit()
142 |     for i in range(0, 10):
143 |         user = await txn.get(i)
144 |         # can not read data in the future
145 |         assert user == { "age": i, "height": i * 10, "weight": i * 20 }
146 | 
147 |     txn3 = await db.transaction()
148 |     for i in range(0, 10):
149 |         user = await txn3.get(i)
150 |         assert user == { "age": i, "height": i * 20, "weight": i * 40 }
151 | 


--------------------------------------------------------------------------------
/clippy.toml:
--------------------------------------------------------------------------------
1 | type-complexity-threshold = 900
2 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | ## Declare
 3 | 
 4 | The [declare.rs](declare.rs) file contains some basic examples to 
 5 | help you understand how to use `Tonbo` for simple data operations in your project.
 6 | 
 7 | ## Datafusion
 8 | The [datafusion.rs](datafusion.rs) file demonstrates the in-depth integration of the `Tonbo` project 
 9 | with `DataFusion` and shows how to use the powerful features of DataFusion to perform complex data processing tasks.
10 | 


--------------------------------------------------------------------------------
/examples/declare.rs:
--------------------------------------------------------------------------------
 1 | use std::ops::Bound;
 2 | 
 3 | use bytes::Bytes;
 4 | use fusio::path::Path;
 5 | use futures_util::stream::StreamExt;
 6 | use tokio::fs;
 7 | use tonbo::{executor::tokio::TokioExecutor, record::F32, DbOption, Projection, Record, DB};
 8 | 
 9 | /// Use macro to define schema of column family just like ORM
10 | /// It provides type-safe read & write API
11 | #[derive(Record, Debug)]
12 | pub struct User {
13 |     #[record(primary_key)]
14 |     name: String,
15 |     email: Option<String>,
16 |     age: u8,
17 |     bytes: Bytes,
18 |     grade: F32,
19 | }
20 | 
21 | #[tokio::main]
22 | async fn main() {
23 |     // make sure the path exists
24 |     let _ = fs::create_dir_all("./db_path/users").await;
25 | 
26 |     let options = DbOption::new(
27 |         Path::from_filesystem_path("./db_path/users").unwrap(),
28 |         &UserSchema,
29 |     );
30 |     // pluggable async runtime and I/O
31 |     let db = DB::new(options, TokioExecutor::current(), UserSchema)
32 |         .await
33 |         .unwrap();
34 | 
35 |     // insert with owned value
36 |     db.insert(User {
37 |         name: "Alice".into(),
38 |         email: Some("alice@gmail.com".into()),
39 |         age: 22,
40 |         bytes: Bytes::from(vec![0, 1, 2]),
41 |         grade: 96.5.into(),
42 |     })
43 |     .await
44 |     .unwrap();
45 | 
46 |     {
47 |         // tonbo supports transaction
48 |         let txn = db.transaction().await;
49 | 
50 |         // get from primary key
51 |         let name = "Alice".into();
52 | 
53 |         // get the zero-copy reference of record without any allocations.
54 |         let user = txn
55 |             .get(
56 |                 &name,
57 |                 // tonbo supports pushing down projection
58 |                 Projection::All,
59 |             )
60 |             .await
61 |             .unwrap();
62 |         assert!(user.is_some());
63 |         assert_eq!(user.unwrap().get().age, Some(22));
64 | 
65 |         {
66 |             let upper = "Blob".into();
67 |             // range scan of user
68 |             let mut scan = txn
69 |                 .scan((Bound::Included(&name), Bound::Excluded(&upper)))
70 |                 // tonbo supports pushing down projection
71 |                 .projection(&["email", "bytes", "grade"])
72 |                 // push down limitation
73 |                 .limit(1)
74 |                 .take()
75 |                 .await
76 |                 .unwrap();
77 |             while let Some(entry) = scan.next().await.transpose().unwrap() {
78 |                 assert_eq!(
79 |                     entry.value(),
80 |                     Some(UserRef {
81 |                         name: "Alice",
82 |                         email: Some("alice@gmail.com"),
83 |                         age: None,
84 |                         bytes: Some(&[0, 1, 2]),
85 |                         grade: Some(96.5.into()),
86 |                     })
87 |                 );
88 |             }
89 |         }
90 | 
91 |         // commit transaction
92 |         txn.commit().await.unwrap();
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/examples/dynamic.rs:
--------------------------------------------------------------------------------
 1 | use std::{fs, sync::Arc};
 2 | 
 3 | use fusio::path::Path;
 4 | use tonbo::{
 5 |     dyn_record, dyn_schema,
 6 |     executor::tokio::TokioExecutor,
 7 |     record::{DataType, Value},
 8 |     DbOption, DB,
 9 | };
10 | 
11 | #[tokio::main]
12 | async fn main() {
13 |     fs::create_dir_all("./db_path/users").unwrap();
14 | 
15 |     let schema = dyn_schema!(("foo", String, false), ("bar", Int32, true), 0);
16 | 
17 |     let options = DbOption::new(
18 |         Path::from_filesystem_path("./db_path/users").unwrap(),
19 |         &schema,
20 |     );
21 |     let db = DB::new(options, TokioExecutor::current(), schema)
22 |         .await
23 |         .unwrap();
24 | 
25 |     {
26 |         let mut txn = db.transaction().await;
27 |         txn.insert(dyn_record!(
28 |             ("foo", String, false, "hello".to_owned()),
29 |             ("bar", Int32, true, 1),
30 |             0
31 |         ));
32 | 
33 |         txn.commit().await.unwrap();
34 |     }
35 | 
36 |     db.get(
37 |         &Value::new(
38 |             DataType::String,
39 |             "foo".into(),
40 |             Arc::new("hello".to_owned()),
41 |             false,
42 |         ),
43 |         |v| {
44 |             let v = v.get();
45 |             println!("{:?}", v.columns[0].value.downcast_ref::<String>());
46 |             Some(())
47 |         },
48 |     )
49 |     .await
50 |     .unwrap();
51 | }
52 | 


--------------------------------------------------------------------------------
/guide/book.toml:
--------------------------------------------------------------------------------
 1 | [book]
 2 | authors = ["crwen"]
 3 | language = "en"
 4 | multilingual = false
 5 | src = "src"
 6 | title = "The Tonbo Guide"
 7 | 
 8 | [output.html]
 9 | git-repository-url = "https://github.com/tonbo-io/tonbo"
10 | mathjax-support = true
11 | [output.html.playground]
12 | runnable = false
13 | 
14 | [preprocessor.toc]
15 | command = "mdbook-toc"
16 | renderer = ["html"]
17 | 


--------------------------------------------------------------------------------
/guide/src/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | [What is Tonbo?](./introduction.md)
 4 | 
 5 | - [Getting Started](./start.md)
 6 | - [Usage]()
 7 |   - [Tonbo](./usage/tonbo.md)
 8 |   - [Python Binding](./usage/python.md)
 9 |   - [Configuration](./usage/conf.md)
10 |   - [Advance](./usage/advance.md)
11 |   - [FAQ](./usage/faq.md)
12 | - [Examples](./examples/index.md)
13 |   - [Using Tonbo](./examples/declare.md)
14 |   - [Using under Wasm](./examples/wasm.md)
15 | - [Contribution]()
16 |   - [Building](./contribution/build.md)
17 |   - [Submitting PR](./contribution/pr.md)
18 | - [TonboLite](./tonbolite/index.md)
19 |   - [Getting Started](./tonbolite/start.md)
20 |   - [Building and Testing](./tonbolite/build.md)
21 |   - [Usage](./tonbolite/usage.md)
22 | 


--------------------------------------------------------------------------------
/guide/src/contribution/build.md:
--------------------------------------------------------------------------------
 1 | # Building and Testing
 2 | <!-- toc -->
 3 | 
 4 | To get started using tonbo you should make sure you have [Rust](https://www.rust-lang.org/tools/install) installed on your system. If you haven't alreadly done yet, try following the instructions [here](https://www.rust-lang.org/tools/install).
 5 | 
 6 | ## Building and Testing for Rust
 7 | 
 8 | ### Building and Testing with Non-WASM
 9 | 
10 | To use local disk as storage backend, you should import [tokio](https://github.com/tokio-rs/tokio) crate and enable "tokio" feature (enabled by default)
11 | 
12 | ```bash
13 | cargo build
14 | ```
15 | 
16 | If you build Tonbo successfully, you can run the tests with:
17 | 
18 | ```bash
19 | cargo test
20 | ```
21 | 
22 | ### Building and Testing with WASM
23 | 
24 | If you want to build tonbo under wasm, you should add wasm32-unknown-unknown target first.
25 | 
26 | ```bash
27 | # add wasm32-unknown-unknown target
28 | rustup target add wasm32-unknown-unknown
29 | # build under wasm
30 | cargo build --target wasm32-unknown-unknown --no-default-features --features wasm
31 | ```
32 | 
33 | Before running the tests, make sure you have installed [wasm-pack](https://github.com/rustwasm/wasm-pack) and run `wasm-pack build` to build the wasm module. If you build successfully, you can run the tests with:
34 | 
35 | ```bash
36 | wasm-pack test --chrome --headless --test wasm --no-default-features --features aws,bytes,opfs
37 | ```
38 | 
39 | 
40 | ## Building and Testing for Python
41 | 
42 | ### Building
43 | We use the [pyo3](https://github.com/PyO3/pyo3) to generate a native Python module and use [maturin](https://github.com/PyO3/maturin) to build Rust-based Python packages.
44 | 
45 | First, follow the commands below to build a new Python virtualenv, and install maturin into the virtualenv using Python's package manager, pip:
46 | 
47 | ```bash
48 | # setup virtualenv
49 | python -m venv .env
50 | # activate venv
51 | source .env/bin/activate
52 | 
53 | # install maturin
54 | pip install maturin
55 | # build bindings
56 | maturin develop
57 | 
58 | ```
59 | 
60 | Whenever Rust code changes run:
61 | 
62 | ```bash
63 | maturin develop
64 | ```
65 | 
66 | ### Testing
67 | 
68 | If you want to run tests, you need to build with "test" options:
69 | 
70 | ```base
71 | maturin develop -E test
72 | ```
73 | 
74 | After building successfully, you can run the tests with:
75 | 
76 | ```bash
77 | # run tests except benchmarks(This need duckdb to be installed)
78 | pytest --ignore=tests/bench -v .
79 | 
80 | # run all tests
81 | pip install duckdb
82 | python -m pytest
83 | ```
84 | 
85 | ## Building and Testing for JavaScript
86 | To build tonbo for JavaScript, you should install [wasm-pack](https://github.com/rustwasm/wasm-pack). If you haven't already done so, try following the instructions [here](https://rustwasm.github.io/wasm-pack/installer/).
87 | 
88 | ```bash
89 | # add wasm32-unknown-unknown target
90 | rustup target add wasm32-unknown-unknown
91 | # build under wasm
92 | wasm-pack build --target web
93 | ```
94 | 


--------------------------------------------------------------------------------
/guide/src/contribution/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonbo-io/tonbo/94627473ea50c2d72725a8bf575f0071009b9cfa/guide/src/contribution/index.md


--------------------------------------------------------------------------------
/guide/src/contribution/pr.md:
--------------------------------------------------------------------------------
 1 | # Submitting a Pull Request
 2 | 
 3 | Thanks for your contribution! The Tonbo project welcomes contribution of various types -- new features, bug fixes and reports, typo fixes, etc. If you want to contribute to the Tonbo project, you will need to pass necessary checks. If you have any question, feel free to start a new discussion or issue, or ask in the Tonbo [Discord](https://discord.gg/j27XVFVmJM).
 4 | 
 5 | ## Running Tests and Checks
 6 | This is a Rust project, so [rustup](https://rustup.rs/) and [cargo](https://doc.rust-lang.org/cargo/) are the best place to start.
 7 | 
 8 | - `cargo check` to analyze the current package and report errors.
 9 | - `cargo +nightly fmt` to format the current code.
10 | - `cargo build` to compile the current package.
11 | - `cargo clippy` to catch common mistakes and improve code.
12 | - `cargo test` to run unit tests.
13 | - `cargo bench` to run benchmark tests.
14 | 
15 | 
16 | > **Note**: If you have any changes to *bindings/python*, please make sure to run checks and tests before submitting your PR. If you don not know how to build and  run tests, please refer to the [Building Tonbo for Python](./build.md#building-tonbo-for-python) section.
17 | 
18 | ## Pull Request title
19 | As described in [here](https://gist.github.com/joshbuchea/6f47e86d2510bce28f8e7f42ae84c716), a valid PR title should begin with one of the following prefixes:
20 | - feat: new feature for the user, not a new feature for build script
21 | - fix: bug fix for the user, not a fix to a build script
22 | - doc: changes to the documentation
23 | - style: formatting, missing semi colons, etc; no production code change
24 | - refactor: refactoring production code, eg. renaming a variable
25 | - test: adding missing tests, refactoring tests; no production code change
26 | - chore: updating grunt tasks etc; no production code change
27 | 
28 | Here is an example of a valid PR title:
29 | ```
30 | feat: add float type
31 | ^--^  ^------------^
32 | |     |
33 | |     +-> Summary in present tense.
34 | |
35 | +-------> Type: chore, docs, feat, fix, refactor, style, or test.
36 | ```
37 | 


--------------------------------------------------------------------------------
/guide/src/contribution/testing.md:
--------------------------------------------------------------------------------
1 | # Testing
2 | 
3 | 
4 | ## Testing Tonbo in Rust
5 | 
6 | ## Testing Tonbo in WASM
7 | 


--------------------------------------------------------------------------------
/guide/src/examples/declare.md:
--------------------------------------------------------------------------------
  1 | # Using Tonbo
  2 | 
  3 | define your schema
  4 | 
  5 | ```rust
  6 | use tonbo::Record;
  7 | 
  8 | /// Use macro to define schema of column family just like ORM
  9 | /// It provides type-safe read & write API
 10 | #[derive(Record, Debug)]
 11 | pub struct User {
 12 |     #[record(primary_key)]
 13 |     name: String,
 14 |     email: Option<String>,
 15 |     age: u8,
 16 |     bytes: Bytes,
 17 | }
 18 | ```
 19 | 
 20 | ```rust
 21 | use std::ops::Bound;
 22 | 
 23 | use bytes::Bytes;
 24 | use fusio::path::Path;
 25 | use futures_util::stream::StreamExt;
 26 | use tokio::fs;
 27 | use tonbo::{executor::tokio::TokioExecutor, DbOption, Projection, Record, DB};
 28 | 
 29 | 
 30 | #[tokio::main]
 31 | async fn main() {
 32 |     // make sure the path exists
 33 |     let _ = fs::create_dir_all("./db_path/users").await;
 34 | 
 35 |     let options = DbOption::new(
 36 |         Path::from_filesystem_path("./db_path/users").unwrap(),
 37 |         &UserSchema,
 38 |     );
 39 |     // pluggable async runtime and I/O
 40 |     let db = DB::new(options, TokioExecutor::current(), UserSchema)
 41 |         .await
 42 |         .unwrap();
 43 | 
 44 |     // insert with owned value
 45 |     db.insert(User {
 46 |         name: "Alice".into(),
 47 |         email: Some("alice@gmail.com".into()),
 48 |         age: 22,
 49 |         bytes: Bytes::from(vec![0, 1, 2]),
 50 |     })
 51 |     .await
 52 |     .unwrap();
 53 | 
 54 |     {
 55 |         // tonbo supports transaction
 56 |         let txn = db.transaction().await;
 57 | 
 58 |         // get from primary key
 59 |         let name = "Alice".into();
 60 | 
 61 |         // get the zero-copy reference of record without any allocations.
 62 |         let user = txn
 63 |             .get(
 64 |                 &name,
 65 |                 // tonbo supports pushing down projection
 66 |                 Projection::All,
 67 |             )
 68 |             .await
 69 |             .unwrap();
 70 |         assert!(user.is_some());
 71 |         assert_eq!(user.unwrap().get().age, Some(22));
 72 | 
 73 |         {
 74 |             let upper = "Blob".into();
 75 |             // range scan of user
 76 |             let mut scan = txn
 77 |                 .scan((Bound::Included(&name), Bound::Excluded(&upper)))
 78 |                 // tonbo supports pushing down projection
 79 |                 .projection(vec![1, 3])
 80 |                 // push down limitation
 81 |                 .limit(1)
 82 |                 .take()
 83 |                 .await
 84 |                 .unwrap();
 85 |             while let Some(entry) = scan.next().await.transpose().unwrap() {
 86 |                 assert_eq!(
 87 |                     entry.value(),
 88 |                     Some(UserRef {
 89 |                         name: "Alice",
 90 |                         email: Some("alice@gmail.com"),
 91 |                         age: None,
 92 |                         bytes: Some(&[0, 1, 2]),
 93 |                     })
 94 |                 );
 95 |             }
 96 |         }
 97 | 
 98 |         // commit transaction
 99 |         txn.commit().await.unwrap();
100 |     }
101 | }
102 | ```
103 | 


--------------------------------------------------------------------------------
/guide/src/examples/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonbo-io/tonbo/94627473ea50c2d72725a8bf575f0071009b9cfa/guide/src/examples/index.md


--------------------------------------------------------------------------------
/guide/src/examples/wasm.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Using under Wasm
 3 | 
 4 | This is the Wasm example of tonbo showing how to use tonbo under Wasm.
 5 | 
 6 | ## `Cargo.toml`
 7 | 
 8 | Since only limited features of tokio can be used in wasm, we need to disable tokio and use `wasm` feature in tonbo.
 9 | 
10 | ```toml
11 | fusio = { git = "https://github.com/tonbo-io/fusio.git", rev = "216eb446fb0a0c6e5e85bfac51a6f6ed8e5ed606", package = "fusio", version = "0.3.3", features = [
12 |   "dyn",
13 |   "fs",
14 | ] }
15 | tonbo = { git = "https://github.com/tonbo-io/tonbo", default-features = false, features = ["wasm"] }
16 | ```
17 | 
18 | ## Create DB
19 | 
20 | Tonbo provide [OPFS(origin private file system)](https://developer.mozilla.org/en-US/docs/Web/API/File_System_API/Origin_private_file_system) as storage backend, but the path is a little different. You should use `Path::from_opfs_path` or `Path::parse` rather than `Path::from_filesystem_path` and it is not permitted to use paths that temporarily step outside the sandbox with something like `../foo` or `./bar`.
21 | 
22 | ```rust
23 | use fusio::path::Path;
24 | use tonbo::{executor::opfs::OpfsExecutor, DbOption, DB};
25 | 
26 | async fn main() {
27 | 
28 |     let options = DbOption::new(
29 |         Path::from_opfs_path("db_path/users").unwrap(),
30 |         &UserSchema,
31 |     );
32 |     let db = DB::<User, OpfsExecutor>::new(options, OpfsExecutor::new(), UserSchema)
33 |         .await
34 |         .unwrap();
35 | }
36 | ```
37 | 
38 | ## Operations on DB
39 | 
40 | After create `DB` instance, you can operate it as usual
41 | 
42 | ```rust
43 | let txn = db.transaction().await;
44 | 
45 | // get from primary key
46 | let name = "Alice".into();
47 | 
48 | let user = txn.get(&name, Projection::All).await.unwrap();
49 | 
50 | let upper = "Blob".into();
51 | // range scan of user
52 | let mut scan = txn
53 |     .scan((Bound::Included(&name), Bound::Excluded(&upper)))
54 |     // tonbo supports pushing down projection
55 |     .projection(vec![1])
56 |     // push down limitation
57 |     .limit(1)
58 |     .take()
59 |     .await
60 |     .unwrap();
61 | 
62 | while let Some(entry) = scan.next().await.transpose().unwrap() {
63 |     assert_eq!(
64 |         entry.value(),
65 |         Some(UserRef {
66 |             name: "Alice",
67 |             email: Some("alice@gmail.com"),
68 |             age: None,
69 |         })
70 |     );
71 | }
72 | ```
73 | 


--------------------------------------------------------------------------------
/guide/src/introduction.md:
--------------------------------------------------------------------------------
 1 | # What is Tonbo?
 2 | 
 3 | [Tonbo](https://github.com/tonbo-io/tonbo) is an in-process KV database that can be embedded in data-intensive applications written in Rust, Python, or JavaScript (WebAssembly / Deno). It is designed for analytical processing. Tonbo can efficiently write data in real time in edge environments such as browsers and AWS Lambda, with the data stored in memory, on local disks, or in S3 using Apache Parquet format.
 4 | 
 5 | ## Build with schema
 6 | Building data-intensive applications in Rust using Tonbo is convenient. You just need to declare the dependency in your `Cargo.toml` file and then create the embedded database. Tonbo supports:
 7 | ```rust
 8 | #[derive(tonbo::Record)]
 9 | pub struct User {
10 |     #[record(primary_key)]
11 |     name: String,
12 |     email: Option<String>,
13 |     age: u8,
14 | }
15 | 
16 | async fn main() {
17 |     let db = tonbo::DB::new("./db_path/users".into(), TokioExecutor::current())
18 |         .await
19 |         .unwrap();
20 | }
21 | ```
22 | 
23 | ## All in Parquet
24 | 
25 | Tonbo organizes all stored data as Apache Parquet files. At each level, these files can reside in memory, on disk, or in S3. This design lets users process their data without any vendor lock-in, including with Tonbo.
26 | 
27 | ```
28 | 			╔═tonbo═════════════════════════════════════════════════════╗
29 | 			║                                                           ║
30 | 			║    ┌──────╂─client storage─┐  ┌──────╂─client storage─┐   ║
31 | 			║    │ ┏━━━━▼━━━━┓           │  │ ┏━━━━▼━━━━┓           │   ║
32 | 			║    │ ┃ parquet ┃           │  │ ┃ parquet ┃           │   ║
33 | 			║    │ ┗━━━━┳━━━━┛           │  │ ┗━━━━┳━━━━┛           │   ║
34 | 			║    └──────╂────────────────┘  └──────╂────────────────┘   ║
35 | 			║           ┣━━━━━━━━━━━━━━━━━━━━━━━━━━┛                    ║
36 | 			║    ┌──────╂────────────────────────────────server ssd─┐   ║
37 | 			║    │      ┣━━━━━━━━━━━┓                               │   ║
38 | 			║    │ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓                          │   ║
39 | 			║    │ ┃ parquet ┃ ┃ parquet ┃                          │   ║
40 | 			║    │ ┗━━━━┳━━━━┛ ┗━━━━┳━━━━┛                          │   ║
41 | 			║    └──────╂───────────╂───────────────────────────────┘   ║
42 | 			║    ┌──────╂───────────╂────────object storage service─┐   ║
43 | 			║    │      ┣━━━━━━━━━━━╋━━━━━━━━━━━┳━━━━━━━━━━━┓       │   ║
44 | 			║    │ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓ ┏━━━━▼━━━━┓  │   ║
45 | 			║    │ ┃ parquet ┃ ┃ parquet ┃ ┃ parquet ┃ ┃ parquet ┃  │   ║
46 | 			║    │ ┗━━━━━━━━━┛ ┗━━━━━━━━━┛ ┗━━━━━━━━━┛ ┗━━━━━━━━━┛  │   ║
47 | 			║    └──────────────────────────────────────────────────┘   ║
48 | 			║                                                           ║
49 | 			╚═══════════════════════════════════════════════════════════╝
50 | ```
51 | 
52 | ## Easy to be integrated
53 | Compared to other analytical databases, Tonbo is extremely lightweight—only 1.3MB when compressed. In addition to being embedded directly as a KV database within applications, Tonbo can also serve as an analytical enhancement for existing OLTP databases.
54 | 
55 | For example, [Tonbolite](https://github.com/tonbo-io/tonbolite) is a SQLite plugin built on Tonbo that provides SQLite with highly compressed, analytical-ready tables using Arrow/Parquet to boost query efficiency. Moreover, it can run alongside SQLite in various environments such as browsers and Linux:
56 | ```
57 | sqlite> .load target/release/libsqlite_tonbo
58 | 
59 | sqlite> CREATE VIRTUAL TABLE temp.tonbo USING tonbo(
60 |     create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)',
61 |     path = 'db_path/tonbo'
62 | );
63 | 
64 | sqlite> insert into tonbo (id, name, like) values (0, 'tonbo', 100);
65 | 
66 | sqlite> select * from tonbo;
67 | 0|tonbo|100
68 | ```
69 | 
70 | We are committed to providing the most convenient and efficient real-time analytical database for edge-first scenarios. In addition to Tonbolite, we will offer the following based on Tonbo:
71 | 1. Time-series data writing and querying for observability and other scenarios.
72 | 2. Real-time index building and search based on BM25 or vectors.
73 | 
74 | We are passionate about establishing Tonbo as an open-source, community-contributed project and are dedicated to building a community around it to develop features for all use cases.
75 | 


--------------------------------------------------------------------------------
/guide/src/tonbolite/build.md:
--------------------------------------------------------------------------------
 1 | # Building TonboLite
 2 | 
 3 | ### Build as Extension
 4 | To build TonboLite as an extension, you should enable loadable_extension features
 5 | ```sh
 6 | cargo build --release --features loadable_extension
 7 | ```
 8 | Once building successfully, you will get a file named libsqlite_tonbo.dylib(`.dll` on windows, `.so` on most other unixes) in *target/release/*
 9 | ### Build on Rust
10 | 
11 | ```sh
12 | cargo build
13 | ```
14 | 
15 | ### Build on Wasm
16 | 
17 | To use TonboLite in wasm, it takes a few steps to build.
18 | 1. Add wasm32-unknown-unknown target
19 | ```sh
20 | rustup target add wasm32-unknown-unknown
21 | ```
22 | 2. Override toolchain with nightly
23 | ```sh
24 | rustup override set nightly
25 | ```
26 | 3. Build with [wasm-pack](https://github.com/rustwasm/wasm-pack)
27 | ```sh
28 | wasm-pack build --target web --no-default-features --features wasm
29 | ```
30 | 
31 | Once you build successfully, you will get a *pkg* folder containing compiled js and wasm files. Copy it to your project and then you can start to use it.
32 | ```js
33 | const tonbo = await import("./pkg/sqlite_tonbo.js");
34 | await tonbo.default();
35 | 
36 | // start to use TonboLite ...
37 | ```
38 | 
39 | 
40 | <div class="warning">
41 | 
42 | TonboLite should be used in a [secure context](https://developer.mozilla.org/en-US/docs/Web/Security/Secure_Contexts) and [cross-origin isolated](https://developer.mozilla.org/en-US/docs/Web/API/Window/crossOriginIsolated), since it uses [`SharedArrayBuffer`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SharedArrayBuffer) to share memory. Please refer to [this article](https://web.dev/articles/coop-coep) for a detailed explanation.
43 | 
44 | </div>
45 | 


--------------------------------------------------------------------------------
/guide/src/tonbolite/index.md:
--------------------------------------------------------------------------------
1 | # TonboLite
2 | 
3 | TonboLite is a WASM compatible SQLite extension that allows users to create tables which supports analytical processing directly in SQLite. Its storage engine is powered by our open-source embedded key-value database, [Tonbo](https://github.com/tonbo-io/tonbo).
4 | 


--------------------------------------------------------------------------------
/guide/src/tonbolite/start.md:
--------------------------------------------------------------------------------
  1 | # Getting Started
  2 | 
  3 | 
  4 | ## Installation
  5 | 
  6 | ### Prerequisite
  7 | To get started using tonbo you should make sure you have [Rust](https://www.rust-lang.org/tools/install) installed on your system. If you haven't alreadly done yet, try following the instructions [here](https://www.rust-lang.org/tools/install).
  8 | 
  9 | ### Building
 10 | 
 11 | To build TonboLite as an extension, you should enable loadable_extension features
 12 | 
 13 | ```sh
 14 | cargo build --release --features loadable_extension
 15 | ```
 16 | 
 17 | Once building successfully, you will get a file named libsqlite_tonbo.dylib(`.dll` on windows, `.so` on most other unixes) in *target/release/*
 18 | 
 19 | ```bash
 20 | target/release/
 21 | ├── build
 22 | ├── deps
 23 | ├── incremental
 24 | ├── libsqlite_tonbo.d
 25 | ├── libsqlite_tonbo.dylib
 26 | └── libsqlite_tonbo.rlib
 27 | ```
 28 | 
 29 | ## Loading TonboLite
 30 | 
 31 | SQLite provide [`.load`](https://www.sqlite.org/cli.html#loading_extensions) command to load a SQLite extension. So, you can load TonboLite extension by running the following command:
 32 | 
 33 | ```bash
 34 | .load target/release/libsqlite_tonbo
 35 | ```
 36 | 
 37 | ## Creating Table
 38 | 
 39 | After loading TonboLite extension successfully, you can [SQLite Virtual Table](https://www.sqlite.org/vtab.html) syntax to create a table:
 40 | 
 41 | ```sql
 42 | CREATE VIRTUAL TABLE temp.tonbo USING tonbo(
 43 |     create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)',
 44 |     path = 'db_path/tonbo'
 45 | );
 46 | ```
 47 | - `create_sql` is a SQL statement that will be executed to create the table.
 48 | - `path` is the path to the database file.
 49 | 
 50 | ## Inserting Data
 51 | 
 52 | After creating a table, you can start to insert data into it using the normal `INSERT INTO` statement:
 53 | 
 54 | ```sql
 55 | INSERT INTO tonbo(id, name, like) VALUES(1, 'tonbo', 100);
 56 | ```
 57 | 
 58 | ## Querying Data
 59 | 
 60 | After inserting data, you can query them by using the `SELECT` statement:
 61 | 
 62 | ```sql
 63 | SELECT * FROM tonbo;
 64 | 
 65 | 1|tonbo|100
 66 | ```
 67 | 
 68 | ## Updating Data
 69 | 
 70 | You can update data in the table using the `UPDATE` statement:
 71 | 
 72 | ```sql
 73 | UPDATE tonbo SET like = 123 WHERE id = 1;
 74 | 
 75 | SELECT * FROM tonbo;
 76 | 1|tonbo|123
 77 | ```
 78 | 
 79 | ## Deleting Data
 80 | 
 81 | You can also delete data by using the `DELETE` statement:
 82 | 
 83 | ```sql
 84 | DELETE FROM tonbo WHERE id = 1;
 85 | ```
 86 | 
 87 | ## Coding with extension
 88 | 
 89 | TonboLite extension can also be used in any place that supports loading SQLite extensions. Here is an example of using TonboLite extension in Python:
 90 | 
 91 | ```py
 92 | import sqlite3
 93 | 
 94 | conn = sqlite3.connect(":memory")
 95 | conn.enable_load_extension(True)
 96 | # Load the tonbolite extension
 97 | conn.load_extension("target/release/libsqlite_tonbo.dylib")
 98 | con.enable_load_extension(False)
 99 | 
100 | conn.execute("CREATE VIRTUAL TABLE temp.tonbo USING tonbo("
101 |                 "create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)', "
102 |                 "path = 'db_path/tonbo'"
103 |              ")")
104 | conn.execute("INSERT INTO tonbo (id, name, like) VALUES (0, 'lol', 1)")
105 | conn.execute("INSERT INTO tonbo (id, name, like) VALUES (1, 'lol', 100)")
106 | rows = conn.execute("SELECT * FROM tonbo;")
107 | for row in rows:
108 |     print(row)
109 | # ......
110 | ```
111 | 


--------------------------------------------------------------------------------
/guide/src/tonbolite/usage.md:
--------------------------------------------------------------------------------
  1 | # Usage
  2 | <!-- toc -->
  3 | ## Using as Extension
  4 | 
  5 | If you do not know how to build TonboLite, please refer to the [Building](./build.md) section.
  6 | 
  7 | ### Loading TonboLite Extension
  8 | 
  9 | Once building successfully, you will get a file named libsqlite_tonbo.dylib(.dll on windows, .so on most other unixes) in *target/release/*(or *target/debug/*).
 10 | 
 11 | SQLite provide [`.load`](https://www.sqlite.org/cli.html#loading_extensions) command to load a SQLite extension. So, you can load TonboLite extension by running the following command:
 12 | 
 13 | ```bash
 14 | .load target/release/libsqlite_tonbo
 15 | ```
 16 | 
 17 | Or you can load TonboLite extension in Python or other languages.
 18 | ```py
 19 | import sqlite3
 20 | 
 21 | conn = sqlite3.connect(":memory")
 22 | conn.enable_load_extension(True)
 23 | # Load the tonbolite extension
 24 | conn.load_extension("target/release/libsqlite_tonbo.dylib")
 25 | con.enable_load_extension(False)
 26 | 
 27 | # ......
 28 | ```
 29 | 
 30 | 
 31 | After loading TonboLite successfully, you can start to use it.
 32 | 
 33 | ### Create Table
 34 | 
 35 | Unlike Normal `CREATE TABLE` statement, TonboLite use [SQLite Virtual Table](https://www.sqlite.org/vtab.html) syntax to create a table:
 36 | 
 37 | ```sql
 38 | CREATE VIRTUAL TABLE temp.tonbo USING tonbo(
 39 |     create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)',
 40 |     path = 'db_path/tonbo'
 41 | );
 42 | ```
 43 | 
 44 | ### Select/Insert/Update/Delete
 45 | 
 46 | you can execute SQL statements just like normal SQL in the SQLite. Here is an example:
 47 | 
 48 | ```sql
 49 | sqlite> .load target/release/libsqlite_tonbo
 50 | 
 51 | sqlite> CREATE VIRTUAL TABLE temp.tonbo USING tonbo(
 52 |     create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)',
 53 |     path = 'db_path/tonbo'
 54 | );
 55 | sqlite> insert into tonbo (id, name, like) values (0, 'tonbo', 100);
 56 | sqlite> insert into tonbo (id, name, like) values (1, 'sqlite', 200);
 57 | 
 58 | sqlite> select * from tonbo;
 59 | 0|tonbo|100
 60 | 1|sqlite|200
 61 | 
 62 | sqlite> update tonbo set like = 123 where id = 0;
 63 | 
 64 | sqlite> select * from tonbo;
 65 | 0|tonbo|123
 66 | 1|sqlite|200
 67 | 
 68 | sqlite> delete from tonbo where id = 0;
 69 | 
 70 | sqlite> select * from tonbo;
 71 | 1|sqlite|200
 72 | ```
 73 | 
 74 | ### Flush
 75 | 
 76 | TonboLite use LSM tree to store data, and it use a WAL buffer size to improve performance, so you may need to flush data to disk manually. But SQLite don't provide flush interface, so we choose to implement it in the [`pragma quick_check`](https://www.sqlite.org/pragma.html#pragma_quick_check).
 77 | 
 78 | ```sql
 79 | PRAGMA tonbo.quick_check;
 80 | ```
 81 | 
 82 | ## Using in Rust
 83 | 
 84 | To use TonboLite in your application, you can import TonboLite in the *Cargo.toml* file.
 85 | 
 86 | ```toml
 87 | tonbolite = { git = "https://github.com/tonbo-io/tonbolite" }
 88 | ```
 89 | 
 90 | You can create use TonboLite just like in [Rusqlite](https://github.com/rusqlite/rusqlite), but you should create table using [SQLite Virtual Table](https://www.sqlite.org/vtab.html) syntax:
 91 | 
 92 | ```rust
 93 | let _ = std::fs::create_dir_all("./db_path/test");
 94 | 
 95 | let db = rusqlite::Connection::open_in_memory()?;
 96 | crate::load_module(&db)?;
 97 | 
 98 | db.execute_batch(
 99 |     "CREATE VIRTUAL TABLE temp.tonbo USING tonbo(
100 |             create_sql = 'create table tonbo(id bigint primary key, name varchar, like int)',
101 |             path = 'db_path/test'
102 |     );"
103 | ).unwrap();
104 | 
105 | db.execute(
106 |     "INSERT INTO tonbo (id, name, like) VALUES (1, 'lol', 12)",
107 |     [],
108 | ).unwrap();
109 | 
110 | let mut stmt = db.prepare("SELECT * FROM tonbo;")?;
111 | let _rows = stmt.query([])?;
112 | ```
113 | for more usage, you can refer to [Rusqlite](https://docs.rs/rusqlite).
114 | 
115 | One difference is that TonboLite extends [`pragma quick_check`](https://www.sqlite.org/pragma.html#pragma_quick_check) to flush WAL to disk. You can use it like this:
116 | 
117 | ```rust
118 | db.pragma(None, "quick_check", "tonbo", |_r| -> rusqlite::Result<()> {
119 |     Ok(())
120 | }).unwrap();
121 | ```
122 | 
123 | ## Using in JavaScript
124 | 
125 | To use TonboLite in wasm, can should enable *wasm* feature.
126 | ```toml
127 | tonbolite = { git = "https://github.com/tonbo-io/tonbolite", default-features = false, features = ["wasm"] }
128 | ```
129 | After building successfully, you will get a *pkg* folder containing compiled js and wasm files. Copy it to your project and then you can start to use it. If you don't know how to build TonboLite on wasm, you can refer to [TonboLite](build.md#build-on-wasm).
130 | 
131 | Here is an example of how to use TonboLite in JavaScript:
132 | 
133 | ```javascript
134 | const tonbo = await import("./pkg/sqlite_tonbo.js");
135 | await tonbo.default();
136 | 
137 | const db = new TonboLite('db_path/test');
138 | await db.create(`CREATE VIRTUAL TABLE temp.tonbo USING tonbo(
139 |   create_sql ='create table tonbo(id bigint primary key, name varchar, like int)',
140 |   path = 'db_path/tonbo'
141 | );`);
142 | 
143 | await db.insert('INSERT INTO tonbo (id, name, like) VALUES (1, \'lol\', 12)');
144 | await conn.delete("DELETE FROM tonbo WHERE id = 4");
145 | await conn.update("UPDATE tonbo SET name = 'tonbo' WHERE id = 6");
146 | 
147 | const rows = await db.select('SELECT * FROM tonbo limit 10;');
148 | console.log(rows);
149 | 
150 | await db.flush();
151 | ```
152 | 
153 | <div class="warning">
154 | 
155 | TonboLite should be used in a [secure context](https://developer.mozilla.org/en-US/docs/Web/Security/Secure_Contexts) and [cross-origin isolated](https://developer.mozilla.org/en-US/docs/Web/API/Window/crossOriginIsolated), since it uses [`SharedArrayBuffer`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SharedArrayBuffer) to share memory. Please refer to [this article](https://web.dev/articles/coop-coep) for a detailed explanation.
156 | 
157 | </div>
158 | 


--------------------------------------------------------------------------------
/guide/src/usage/faq.md:
--------------------------------------------------------------------------------
 1 | # FAQ
 2 | 
 3 | ## Failed to run custom build command for `ring` in macOS
 4 | Apple Clang is a fork of Clang that is specialized to Apple's wishes. It doesn't support wasm32-unknown-unknown. You need to download and use llvm.org Clang instead. You can refer to this [issue](https://github.com/briansmith/ring/issues/1824) for more information.
 5 | 
 6 | ```bash
 7 | brew install llvm
 8 | echo 'export PATH="/opt/homebrew/opt/llvm/bin:$PATH"' >> ~/.zshrc
 9 | ```
10 | 
11 | ## Why my data is not recovered and the size of log file and WAL file is 0?
12 | 
13 | As Tonbo uses buffer for WAL, so it may not be persisted before exiting. You can use `DB::flush_wal` to ensure WAL is persisted or use `DB::flush` to trigger compaction manually.
14 | 
15 | If you don't want to use WAL buffer, you can set `DbOption::wal_buffer_size` to 0. See more details in [Configuration](./conf.md#wal-configuration).
16 | 
17 | ## How to persist metadata files to S3? / Why metadata files are not persisted in serverless environment like AWS Lambda
18 | 
19 | If you want to persist metadata files to S3, you can configure `DbOption::base_fs` with `FsOptions::S3{...}`. This will enable Tonbo to upload metadata files and WAL files to the specified S3 bucket.
20 | 
21 | > **Note**: This will not guarantee the latest metadata will be uploaded to S3. If you want to ensure the latest WAL is uploaded, you can use `DB::flush_wal`. If you want to ensure the latest metadata is uploaded, you can use `DB::flush` to trigger upload manually. If you want tonbo to trigger upload more frequently, you can adjust `DbOption::version_log_snapshot_threshold` to a smaller value. The default value is 200.
22 | 
23 | See more details in [Configuration](./conf.md#manifest-configuration).
24 | 


--------------------------------------------------------------------------------
/guide/src/usage/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonbo-io/tonbo/94627473ea50c2d72725a8bf575f0071009b9cfa/guide/src/usage/index.md


--------------------------------------------------------------------------------
/guide/src/usage/python.md:
--------------------------------------------------------------------------------
 1 | # Tonbo Python Binding
 2 | 
 3 | 
 4 | ## `@Record`
 5 | 
 6 | Tonbo provides ORM-like macro for ease of use, you can use `@Record` to define schema of column family.
 7 | ```py
 8 | @Record
 9 | class User:
10 |    id = Column(DataType.Int64, name="id", primary_key=True)
11 |    age = Column(DataType.Int16, name="age", nullable=True)
12 |    name = Column(DataType.String, name="name", nullable=False)
13 | ```
14 | 
15 | <div class="warning">
16 | 
17 | This is a bad thing that you should pay attention to.
18 | 
19 | Warning blocks should be used sparingly in documentation, to avoid "warning
20 | fatigue," where people are trained to ignore them because they usually don't
21 | matter for what they're doing.
22 | 
23 | </div>
24 | 
25 | 
26 | ## Configuration
27 | 
28 | ## Example
29 | 
30 | ```python
31 | from tonbo import DbOption, Column, DataType, Record, TonboDB, Bound
32 | from tonbo.fs import from_filesystem_path
33 | import asyncio
34 | 
35 | @Record
36 | class User:
37 |    id = Column(DataType.Int64, name="id", primary_key=True)
38 |    age = Column(DataType.Int16, name="age", nullable=True)
39 |    name = Column(DataType.String, name="name", nullable=False)
40 | 
41 | async def main():
42 |     db = TonboDB(DbOption(from_filesystem_path("db_path/user")), User())
43 |     await db.insert(User(id=18, age=175, name="Alice"))
44 |     record = await db.get(18)
45 |     print(record)
46 | 
47 |     # use transcaction
48 |     txn = await db.transaction()
49 |     result = await txn.get(18)
50 |     scan = await txn.scan(Bound.Included(18), None, limit=10, projection=["id", "name"])
51 | 
52 |     async for record in scan:
53 |         print(record)
54 | 
55 | asyncio.run(main())
56 | ````
57 | 


--------------------------------------------------------------------------------
/parquet-lru/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | description = "Implement LRU cache reader for parquet::arrow::async_reader::AsyncFileReader."
 3 | documentation = "https://docs.rs/parquet-lru"
 4 | edition = "2021"
 5 | license = "Apache-2.0"
 6 | name = "parquet-lru"
 7 | version = "0.3.2"
 8 | 
 9 | [package.metadata.docs.rs]
10 | all-features = true
11 | 
12 | [features]
13 | default = []
14 | foyer = ["dep:foyer", "dep:serde"]
15 | 
16 | [dependencies]
17 | bytes = { version = "1.8.0", features = ["serde"] }
18 | foyer = { version = "0.14.1", optional = true }
19 | futures-core = "0.3.31"
20 | futures-util = "0.3.31"
21 | parquet = { version = "55", default-features = false, features = [
22 |     "arrow",
23 |     "async",
24 | ] }
25 | serde = { version = "1.0.214", optional = true }
26 | 


--------------------------------------------------------------------------------
/parquet-lru/src/dyn.rs:
--------------------------------------------------------------------------------
 1 | use std::{ops::Range, sync::Arc};
 2 | 
 3 | use bytes::Bytes;
 4 | use futures_core::future::BoxFuture;
 5 | use parquet::{
 6 |     arrow::{arrow_reader::ArrowReaderOptions, async_reader::AsyncFileReader},
 7 |     errors::Result,
 8 |     file::metadata::ParquetMetaData,
 9 | };
10 | 
11 | use crate::LruCache;
12 | 
13 | pub struct BoxedFileReader {
14 |     inner: Box<dyn AsyncFileReader>,
15 | }
16 | 
17 | impl BoxedFileReader {
18 |     pub fn new<T: AsyncFileReader + 'static>(inner: T) -> Self {
19 |         Self {
20 |             inner: Box::new(inner),
21 |         }
22 |     }
23 | }
24 | 
25 | impl AsyncFileReader for BoxedFileReader {
26 |     fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
27 |         self.inner.get_bytes(range)
28 |     }
29 | 
30 |     fn get_metadata<'s>(
31 |         &'s mut self,
32 |         options: Option<&'s ArrowReaderOptions>,
33 |     ) -> BoxFuture<'s, Result<Arc<ParquetMetaData>>> {
34 |         self.inner.get_metadata(options)
35 |     }
36 | 
37 |     fn get_byte_ranges(&mut self, ranges: Vec<Range<u64>>) -> BoxFuture<'_, Result<Vec<Bytes>>> {
38 |         self.inner.get_byte_ranges(ranges)
39 |     }
40 | }
41 | 
42 | pub trait DynLruCache<K> {
43 |     fn get_reader(&self, key: K, reader: BoxedFileReader) -> BoxFuture<'_, BoxedFileReader>;
44 | }
45 | 
46 | impl<K, C> DynLruCache<K> for C
47 | where
48 |     K: 'static + Send,
49 |     C: LruCache<K> + Sized + Send + Sync,
50 | {
51 |     fn get_reader(&self, key: K, reader: BoxedFileReader) -> BoxFuture<'_, BoxedFileReader> {
52 |         Box::pin(async move { BoxedFileReader::new(self.get_reader(key, reader).await) })
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/parquet-lru/src/foyer.rs:
--------------------------------------------------------------------------------
  1 | use std::{hash::Hash, ops::Range, sync::Arc};
  2 | 
  3 | use bytes::Bytes;
  4 | use futures_core::future::BoxFuture;
  5 | use futures_util::FutureExt;
  6 | use parquet::{
  7 |     arrow::{arrow_reader::ArrowReaderOptions, async_reader::AsyncFileReader},
  8 |     errors::{ParquetError, Result},
  9 |     file::metadata::ParquetMetaData,
 10 | };
 11 | use serde::{Deserialize, Serialize};
 12 | 
 13 | use crate::LruCache;
 14 | 
 15 | #[derive(Clone)]
 16 | pub struct FoyerCache<K>
 17 | where
 18 |     for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static,
 19 | {
 20 |     inner: Arc<FoyerCacheInner<K>>,
 21 | }
 22 | 
 23 | pub struct FoyerCacheInner<K>
 24 | where
 25 |     for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static,
 26 | {
 27 |     meta: foyer::Cache<K, Arc<ParquetMetaData>>,
 28 |     data: foyer::HybridCache<(K, Range<u64>), Bytes>,
 29 | }
 30 | 
 31 | impl<K> LruCache<K> for FoyerCache<K>
 32 | where
 33 |     for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + Clone + 'static,
 34 | {
 35 |     type LruReader<R>
 36 |         = FoyerReader<K, R>
 37 |     where
 38 |         R: AsyncFileReader + 'static;
 39 | 
 40 |     async fn get_reader<R>(&self, key: K, reader: R) -> FoyerReader<K, R>
 41 |     where
 42 |         R: AsyncFileReader,
 43 |     {
 44 |         FoyerReader::new(self.clone(), key, reader)
 45 |     }
 46 | }
 47 | 
 48 | pub struct FoyerReader<K, R>
 49 | where
 50 |     for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static,
 51 | {
 52 |     cache: FoyerCache<K>,
 53 |     key: K,
 54 |     reader: R,
 55 | }
 56 | 
 57 | impl<K, R> FoyerReader<K, R>
 58 | where
 59 |     for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + 'static,
 60 |     R: AsyncFileReader,
 61 | {
 62 |     fn new(cache: FoyerCache<K>, key: K, reader: R) -> Self {
 63 |         Self { cache, key, reader }
 64 |     }
 65 | }
 66 | 
 67 | impl<K, R> AsyncFileReader for FoyerReader<K, R>
 68 | where
 69 |     for<'a> K: Send + Sync + Hash + Eq + Serialize + Deserialize<'a> + Clone + 'static,
 70 |     R: AsyncFileReader,
 71 | {
 72 |     fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
 73 |         async move {
 74 |             if let Some(data) = self
 75 |                 .cache
 76 |                 .inner
 77 |                 .data
 78 |                 .get(&(self.key.clone(), range.clone()))
 79 |                 .await
 80 |                 .map_err(|e| ParquetError::External(e.into()))?
 81 |             {
 82 |                 Ok(data.value().clone())
 83 |             } else {
 84 |                 let data = self.reader.get_bytes(range.clone()).await?;
 85 |                 self.cache
 86 |                     .inner
 87 |                     .data
 88 |                     .insert((self.key.clone(), range), data.clone());
 89 |                 Ok(data)
 90 |             }
 91 |         }
 92 |         .boxed()
 93 |     }
 94 | 
 95 |     fn get_metadata<'s>(
 96 |         &'s mut self,
 97 |         options: Option<&'s ArrowReaderOptions>,
 98 |     ) -> BoxFuture<'s, Result<Arc<ParquetMetaData>>> {
 99 |         async move {
100 |             if let Some(meta) = self.cache.inner.meta.get(&self.key) {
101 |                 Ok(meta.value().clone())
102 |             } else {
103 |                 let meta = self.reader.get_metadata(options).await?;
104 |                 self.cache.inner.meta.insert(self.key.clone(), meta.clone());
105 |                 Ok(meta)
106 |             }
107 |         }
108 |         .boxed()
109 |     }
110 | 
111 |     fn get_byte_ranges(&mut self, ranges: Vec<Range<u64>>) -> BoxFuture<'_, Result<Vec<Bytes>>> {
112 |         async move {
113 |             let mut missed = Vec::with_capacity(ranges.len());
114 |             let mut results = Vec::with_capacity(ranges.len());
115 |             for (id, range) in ranges.iter().enumerate() {
116 |                 if let Some(data) = self
117 |                     .cache
118 |                     .inner
119 |                     .data
120 |                     .get(&(self.key.clone(), range.clone()))
121 |                     .await
122 |                     .map_err(|e| ParquetError::External(e.into()))?
123 |                 {
124 |                     results.push((id, data.value().clone()));
125 |                 } else {
126 |                     missed.push((id, range));
127 |                 }
128 |             }
129 |             if !missed.is_empty() {
130 |                 let data = self
131 |                     .reader
132 |                     .get_byte_ranges(missed.iter().map(|&(_, r)| r.clone()).collect())
133 |                     .await?;
134 |                 for (id, range) in missed {
135 |                     let data = data[id].clone();
136 |                     self.cache
137 |                         .inner
138 |                         .data
139 |                         .insert((self.key.clone(), range.clone()), data.clone());
140 |                     results.push((id, data));
141 |                 }
142 |             }
143 |             results.sort_by_key(|(id, _)| *id);
144 |             Ok(results.into_iter().map(|(_, data)| data).collect())
145 |         }
146 |         .boxed()
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/parquet-lru/src/lib.rs:
--------------------------------------------------------------------------------
 1 | mod r#dyn;
 2 | #[cfg(feature = "foyer")]
 3 | pub mod foyer;
 4 | 
 5 | use std::{future::Future, marker::PhantomData};
 6 | 
 7 | use parquet::arrow::async_reader::AsyncFileReader;
 8 | 
 9 | pub use crate::r#dyn::*;
10 | 
11 | pub trait LruCache<K>
12 | where
13 |     K: 'static,
14 | {
15 |     type LruReader<R>: AsyncFileReader + 'static
16 |     where
17 |         R: AsyncFileReader + 'static;
18 | 
19 |     fn get_reader<R>(&self, key: K, reader: R) -> impl Future<Output = Self::LruReader<R>> + Send
20 |     where
21 |         R: AsyncFileReader + 'static;
22 | }
23 | 
24 | #[derive(Default)]
25 | pub struct NoCache<K> {
26 |     _phantom: PhantomData<K>,
27 | }
28 | 
29 | impl<K> Clone for NoCache<K> {
30 |     fn clone(&self) -> Self {
31 |         Self {
32 |             _phantom: PhantomData,
33 |         }
34 |     }
35 | }
36 | 
37 | unsafe impl<K> Send for NoCache<K> {}
38 | 
39 | unsafe impl<K> Sync for NoCache<K> {}
40 | 
41 | impl<K> LruCache<K> for NoCache<K>
42 | where
43 |     K: 'static,
44 | {
45 |     type LruReader<R>
46 |         = R
47 |     where
48 |         R: AsyncFileReader + 'static;
49 | 
50 |     #[allow(clippy::manual_async_fn)]
51 |     fn get_reader<R>(&self, _key: K, reader: R) -> impl Future<Output = R> + Send
52 |     where
53 |         R: AsyncFileReader,
54 |     {
55 |         async move { reader }
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "1.85"
3 | components = ["clippy", "rust-analyzer", "rustfmt"]
4 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
 1 | comment_width = 100
 2 | edition = "2021"
 3 | format_code_in_doc_comments = true
 4 | format_strings = true
 5 | group_imports = "StdExternalCrate"
 6 | imports_granularity = "Crate"
 7 | max_width = 100
 8 | normalize_comments = true
 9 | normalize_doc_attributes = true
10 | wrap_comments = true
11 | 


--------------------------------------------------------------------------------
/src/context.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use arrow::datatypes::Schema;
 4 | 
 5 | use crate::{
 6 |     fs::manager::StoreManager,
 7 |     record::Record,
 8 |     timestamp::Timestamp,
 9 |     version::{set::VersionSet, TransactionTs},
10 |     ParquetLru,
11 | };
12 | 
13 | pub(crate) struct Context<R: Record> {
14 |     pub(crate) manager: Arc<StoreManager>,
15 |     pub(crate) parquet_lru: ParquetLru,
16 |     pub(crate) version_set: VersionSet<R>,
17 |     pub(crate) arrow_schema: Arc<Schema>,
18 | }
19 | 
20 | impl<R> Context<R>
21 | where
22 |     R: Record,
23 | {
24 |     pub(crate) fn new(
25 |         manager: Arc<StoreManager>,
26 |         parquet_lru: ParquetLru,
27 |         version_set: VersionSet<R>,
28 |         arrow_schema: Arc<Schema>,
29 |     ) -> Self {
30 |         Self {
31 |             manager,
32 |             parquet_lru,
33 |             version_set,
34 |             arrow_schema,
35 |         }
36 |     }
37 | 
38 |     pub(crate) fn version_set(&self) -> &VersionSet<R> {
39 |         &self.version_set
40 |     }
41 | 
42 |     pub(crate) fn storage_manager(&self) -> &StoreManager {
43 |         &self.manager
44 |     }
45 | 
46 |     pub(crate) fn cache(&self) -> &ParquetLru {
47 |         &self.parquet_lru
48 |     }
49 | 
50 |     pub(crate) fn arrow_schema(&self) -> &Arc<Schema> {
51 |         &self.arrow_schema
52 |     }
53 | 
54 |     pub(crate) fn load_ts(&self) -> Timestamp {
55 |         self.version_set.load_ts()
56 |     }
57 | 
58 |     pub(crate) fn increase_ts(&self) -> Timestamp {
59 |         self.version_set.increase_ts()
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/executor.rs:
--------------------------------------------------------------------------------
 1 | use std::future::Future;
 2 | 
 3 | use fusio::MaybeSend;
 4 | 
 5 | pub trait Executor {
 6 |     fn spawn<F>(&self, future: F)
 7 |     where
 8 |         F: Future<Output = ()> + MaybeSend + 'static;
 9 | }
10 | 
11 | #[cfg(feature = "tokio")]
12 | pub mod tokio {
13 |     use std::future::Future;
14 | 
15 |     use fusio::MaybeSend;
16 |     use tokio::runtime::Handle;
17 | 
18 |     use super::Executor;
19 | 
20 |     #[derive(Debug, Clone)]
21 |     pub struct TokioExecutor {
22 |         handle: Handle,
23 |     }
24 | 
25 |     impl TokioExecutor {
26 |         pub fn current() -> Self {
27 |             Self {
28 |                 handle: Handle::current(),
29 |             }
30 |         }
31 |     }
32 | 
33 |     impl Executor for TokioExecutor {
34 |         fn spawn<F>(&self, future: F)
35 |         where
36 |             F: Future<Output = ()> + MaybeSend + 'static,
37 |         {
38 |             self.handle.spawn(future);
39 |         }
40 |     }
41 | }
42 | 
43 | #[cfg(all(feature = "opfs", target_arch = "wasm32"))]
44 | pub mod opfs {
45 |     use std::future::Future;
46 | 
47 |     use fusio::MaybeSend;
48 |     use wasm_bindgen::prelude::*;
49 | 
50 |     use super::Executor;
51 | 
52 |     #[wasm_bindgen]
53 |     pub struct OpfsExecutor();
54 | 
55 |     impl Default for OpfsExecutor {
56 |         fn default() -> Self {
57 |             Self {}
58 |         }
59 |     }
60 | 
61 |     impl OpfsExecutor {
62 |         pub fn new() -> Self {
63 |             Self {}
64 |         }
65 |     }
66 | 
67 |     impl Executor for OpfsExecutor {
68 |         fn spawn<F>(&self, future: F)
69 |         where
70 |             F: Future<Output = ()> + MaybeSend + 'static,
71 |         {
72 |             wasm_bindgen_futures::spawn_local(future);
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/fs/manager.rs:
--------------------------------------------------------------------------------
 1 | use std::{collections::HashMap, sync::Arc};
 2 | 
 3 | use fusio::{disk::LocalFs, dynamic::DynFs, path::Path, Error};
 4 | use fusio_dispatch::FsOptions;
 5 | 
 6 | pub struct StoreManager {
 7 |     base_fs: Arc<dyn DynFs>,
 8 |     local_fs: Arc<dyn DynFs>,
 9 |     fs_map: HashMap<Path, Arc<dyn DynFs>>,
10 | }
11 | 
12 | impl StoreManager {
13 |     pub fn new(
14 |         base_options: FsOptions,
15 |         levels_fs: Vec<Option<(Path, FsOptions)>>,
16 |     ) -> Result<Self, Error> {
17 |         let mut fs_map = HashMap::with_capacity(levels_fs.len());
18 | 
19 |         for (path, fs_options) in levels_fs.into_iter().flatten() {
20 |             fs_map.entry(path).or_insert(fs_options.parse()?);
21 |         }
22 |         let base_fs = base_options.parse()?;
23 | 
24 |         Ok(StoreManager {
25 |             base_fs,
26 |             fs_map,
27 |             local_fs: Arc::new(LocalFs {}),
28 |         })
29 |     }
30 | 
31 |     pub fn base_fs(&self) -> &Arc<dyn DynFs> {
32 |         &self.base_fs
33 |     }
34 | 
35 |     pub fn local_fs(&self) -> &Arc<dyn DynFs> {
36 |         &self.local_fs
37 |     }
38 | 
39 |     pub fn get_fs(&self, path: &Path) -> &Arc<dyn DynFs> {
40 |         self.fs_map.get(path).unwrap_or(&self.base_fs)
41 |     }
42 | }
43 | 
44 | // TODO: TestCases
45 | 


--------------------------------------------------------------------------------
/src/fs/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod manager;
 2 | 
 3 | use std::{
 4 |     fmt::{Display, Formatter},
 5 |     str::FromStr,
 6 | };
 7 | 
 8 | use fusio::{fs::OpenOptions, path::Path};
 9 | use once_cell::sync::OnceCell;
10 | use ulid::{DecodeError, Ulid};
11 | 
12 | pub type FileId = Ulid;
13 | 
14 | static GENERATOR: OnceCell<std::sync::Mutex<ulid::Generator>> = OnceCell::new();
15 | 
16 | #[inline]
17 | pub fn generate_file_id() -> FileId {
18 |     // init
19 |     let m = GENERATOR.get_or_init(|| std::sync::Mutex::new(ulid::Generator::new()));
20 |     let mut guard = m
21 |         .lock()
22 |         .expect("global file id generator lock should not fail");
23 | 
24 |     guard.generate().expect("generator should not fail")
25 | }
26 | 
27 | pub enum FileType {
28 |     Wal,
29 |     Parquet,
30 |     Log,
31 | }
32 | 
33 | impl Display for FileType {
34 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
35 |         match self {
36 |             FileType::Wal => write!(f, "wal"),
37 |             FileType::Parquet => write!(f, "parquet"),
38 |             FileType::Log => write!(f, "log"),
39 |         }
40 |     }
41 | }
42 | 
43 | impl FileType {
44 |     pub(crate) fn open_options(&self, only_read: bool) -> OpenOptions {
45 |         match self {
46 |             FileType::Wal | FileType::Log => OpenOptions::default().create(true).read(true),
47 |             FileType::Parquet => {
48 |                 if only_read {
49 |                     OpenOptions::default().read(true)
50 |                 } else {
51 |                     OpenOptions::default()
52 |                         .create(true)
53 |                         .write(true)
54 |                         .truncate(true)
55 |                 }
56 |             }
57 |         }
58 |     }
59 | }
60 | 
61 | pub(crate) fn parse_file_id(path: &Path, suffix: FileType) -> Result<Option<FileId>, DecodeError> {
62 |     path.filename()
63 |         .map(|file_name| {
64 |             let file_id = file_name
65 |                 .strip_suffix(&format!(".{}", suffix))
66 |                 .unwrap_or(file_name);
67 |             FileId::from_str(file_id)
68 |         })
69 |         .transpose()
70 | }
71 | 


--------------------------------------------------------------------------------
/src/inmem/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod immutable;
2 | pub(crate) mod mutable;
3 | 


--------------------------------------------------------------------------------
/src/magic.rs:
--------------------------------------------------------------------------------
1 | pub const TS: &str = "_ts";
2 | pub(crate) const USER_COLUMN_OFFSET: usize = 2;
3 | 


--------------------------------------------------------------------------------
/src/ondisk/arrows.rs:
--------------------------------------------------------------------------------
 1 | use std::ops::Bound;
 2 | 
 3 | use arrow::{
 4 |     array::{BooleanArray, Datum},
 5 |     buffer::BooleanBuffer,
 6 |     compute::kernels::cmp::{gt, gt_eq, lt_eq},
 7 |     error::ArrowError,
 8 | };
 9 | use parquet::{
10 |     arrow::{
11 |         arrow_reader::{ArrowPredicate, ArrowPredicateFn, RowFilter},
12 |         ProjectionMask,
13 |     },
14 |     schema::types::SchemaDescriptor,
15 | };
16 | 
17 | use crate::{
18 |     record::{Key, Record, Schema},
19 |     timestamp::Timestamp,
20 | };
21 | 
22 | unsafe fn get_range_bound_fn<R>(
23 |     range: Bound<&<R::Schema as Schema>::Key>,
24 | ) -> (
25 |     Option<&'static <R::Schema as Schema>::Key>,
26 |     &'static (dyn Fn(&dyn Datum, &dyn Datum) -> Result<BooleanArray, ArrowError> + Sync),
27 | )
28 | where
29 |     R: Record,
30 | {
31 |     let cmp: &'static (dyn Fn(&dyn Datum, &dyn Datum) -> Result<BooleanArray, ArrowError> + Sync);
32 |     let key = match range {
33 |         Bound::Included(key) => {
34 |             cmp = &gt_eq;
35 |             Some(&*(key as *const _))
36 |         }
37 |         Bound::Excluded(key) => {
38 |             cmp = &gt;
39 |             Some(&*(key as *const _))
40 |         }
41 |         Bound::Unbounded => {
42 |             cmp = &|this, _| {
43 |                 let len = this.get().0.len();
44 |                 Ok(BooleanArray::new(
45 |                     BooleanBuffer::collect_bool(len, |_| true),
46 |                     None,
47 |                 ))
48 |             };
49 |             None
50 |         }
51 |     };
52 |     (key, cmp)
53 | }
54 | 
55 | pub(crate) unsafe fn get_range_filter<R>(
56 |     schema_descriptor: &SchemaDescriptor,
57 |     range: (
58 |         Bound<&<R::Schema as Schema>::Key>,
59 |         Bound<&<R::Schema as Schema>::Key>,
60 |     ),
61 |     ts: Timestamp,
62 | ) -> RowFilter
63 | where
64 |     R: Record,
65 | {
66 |     let (lower_key, lower_cmp) = get_range_bound_fn::<R>(range.0);
67 |     let (upper_key, upper_cmp) = get_range_bound_fn::<R>(range.1);
68 | 
69 |     let mut predictions: Vec<Box<dyn ArrowPredicate>> = vec![Box::new(ArrowPredicateFn::new(
70 |         ProjectionMask::roots(schema_descriptor, [1]),
71 |         move |record_batch| lt_eq(record_batch.column(0), &ts.to_arrow_scalar() as &dyn Datum),
72 |     ))];
73 |     if let Some(lower_key) = lower_key {
74 |         predictions.push(Box::new(ArrowPredicateFn::new(
75 |             ProjectionMask::roots(schema_descriptor, [2]),
76 |             move |record_batch| {
77 |                 lower_cmp(record_batch.column(0), lower_key.to_arrow_datum().as_ref())
78 |             },
79 |         )));
80 |     }
81 |     if let Some(upper_key) = upper_key {
82 |         predictions.push(Box::new(ArrowPredicateFn::new(
83 |             ProjectionMask::roots(schema_descriptor, [2]),
84 |             move |record_batch| {
85 |                 upper_cmp(upper_key.to_arrow_datum().as_ref(), record_batch.column(0))
86 |             },
87 |         )));
88 |     }
89 | 
90 |     RowFilter::new(predictions)
91 | }
92 | 


--------------------------------------------------------------------------------
/src/ondisk/mod.rs:
--------------------------------------------------------------------------------
1 | mod arrows;
2 | pub(crate) mod scan;
3 | pub(crate) mod sstable;
4 | 


--------------------------------------------------------------------------------
/src/ondisk/scan.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     marker::PhantomData,
 3 |     pin::Pin,
 4 |     sync::Arc,
 5 |     task::{Context, Poll},
 6 | };
 7 | 
 8 | use arrow::datatypes::Schema;
 9 | use futures_core::{ready, Stream};
10 | use parquet::arrow::{
11 |     async_reader::{AsyncFileReader, ParquetRecordBatchStream},
12 |     ProjectionMask,
13 | };
14 | use pin_project_lite::pin_project;
15 | 
16 | use crate::{
17 |     record::Record,
18 |     stream::record_batch::{RecordBatchEntry, RecordBatchIterator},
19 | };
20 | 
21 | pin_project! {
22 |     #[derive(Debug)]
23 |     pub struct SsTableScan<'scan, R> {
24 |         #[pin]
25 |         stream: ParquetRecordBatchStream<Box<dyn AsyncFileReader>>,
26 |         iter: Option<RecordBatchIterator<R>>,
27 |         projection_mask: ProjectionMask,
28 |         full_schema: Arc<Schema>,
29 |         _marker: PhantomData<&'scan ()>
30 |     }
31 | }
32 | 
33 | impl<R> SsTableScan<'_, R> {
34 |     pub fn new(
35 |         stream: ParquetRecordBatchStream<Box<dyn AsyncFileReader>>,
36 |         projection_mask: ProjectionMask,
37 |         full_schema: Arc<Schema>,
38 |     ) -> Self {
39 |         SsTableScan {
40 |             stream,
41 |             iter: None,
42 |             projection_mask,
43 |             full_schema,
44 |             _marker: PhantomData,
45 |         }
46 |     }
47 | }
48 | 
49 | impl<'scan, R> Stream for SsTableScan<'scan, R>
50 | where
51 |     R: Record,
52 | {
53 |     type Item = Result<RecordBatchEntry<R>, parquet::errors::ParquetError>;
54 | 
55 |     fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
56 |         let mut this = self.project();
57 |         loop {
58 |             match this.iter {
59 |                 Some(iter) => {
60 |                     if let Some(entry) = iter.next() {
61 |                         return Poll::Ready(Some(Ok(entry)));
62 |                     }
63 |                     *this.iter = None;
64 |                 }
65 |                 None => {
66 |                     let record_batch = ready!(this.stream.as_mut().poll_next(cx)).transpose()?;
67 |                     let record_batch = match record_batch {
68 |                         Some(record_batch) => record_batch,
69 |                         None => return Poll::Ready(None),
70 |                     };
71 |                     *this.iter = Some(RecordBatchIterator::new(
72 |                         record_batch,
73 |                         this.projection_mask.clone(),
74 |                         this.full_schema.clone(),
75 |                     ));
76 |                 }
77 |             }
78 |         }
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/record/key/mod.rs:
--------------------------------------------------------------------------------
 1 | mod num;
 2 | mod str;
 3 | 
 4 | use std::{hash::Hash, sync::Arc};
 5 | 
 6 | use arrow::array::Datum;
 7 | use fusio_log::{Decode, Encode};
 8 | pub use num::*;
 9 | 
10 | pub trait Key:
11 |     'static + Encode + Decode + Ord + Clone + Send + Sync + Hash + std::fmt::Debug
12 | {
13 |     type Ref<'r>: KeyRef<'r, Key = Self>
14 |     where
15 |         Self: 'r;
16 | 
17 |     fn as_key_ref(&self) -> Self::Ref<'_>;
18 | 
19 |     fn to_arrow_datum(&self) -> Arc<dyn Datum>;
20 | }
21 | 
22 | pub trait KeyRef<'r>: Clone + Encode + Send + Sync + Ord + std::fmt::Debug {
23 |     type Key: Key<Ref<'r> = Self>;
24 | 
25 |     fn to_key(self) -> Self::Key;
26 | }
27 | 


--------------------------------------------------------------------------------
/src/record/key/str.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use arrow::array::{Datum, StringArray};
 4 | 
 5 | use super::{Key, KeyRef};
 6 | 
 7 | impl Key for String {
 8 |     type Ref<'r> = &'r str;
 9 | 
10 |     fn as_key_ref(&self) -> Self::Ref<'_> {
11 |         self
12 |     }
13 | 
14 |     fn to_arrow_datum(&self) -> Arc<dyn Datum> {
15 |         Arc::new(StringArray::new_scalar(self))
16 |     }
17 | }
18 | 
19 | impl<'r> KeyRef<'r> for &'r str {
20 |     type Key = String;
21 | 
22 |     fn to_key(self) -> Self::Key {
23 |         self.to_string()
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/record/mod.rs:
--------------------------------------------------------------------------------
  1 | pub mod key;
  2 | pub mod option;
  3 | pub mod runtime;
  4 | #[cfg(test)]
  5 | pub(crate) mod test;
  6 | 
  7 | use std::{error::Error, fmt::Debug, io, sync::Arc};
  8 | 
  9 | use arrow::{array::RecordBatch, datatypes::Schema as ArrowSchema};
 10 | use fusio_log::{Decode, Encode};
 11 | pub use key::*;
 12 | use option::OptionRecordRef;
 13 | use parquet::{arrow::ProjectionMask, format::SortingColumn, schema::types::ColumnPath};
 14 | pub use runtime::*;
 15 | use thiserror::Error;
 16 | 
 17 | use crate::inmem::immutable::ArrowArrays;
 18 | 
 19 | pub trait Schema: Debug + Send + Sync {
 20 |     type Record: Record<Schema = Self>;
 21 | 
 22 |     type Columns: ArrowArrays<Record = Self::Record>;
 23 | 
 24 |     type Key: Key;
 25 | 
 26 |     /// Returns the [`arrow::datatypes::Schema`] of the record.
 27 |     ///
 28 |     /// **Note**: The first column should be `_null`, and the second column should be `_ts`.
 29 |     fn arrow_schema(&self) -> &Arc<ArrowSchema>;
 30 | 
 31 |     /// Returns the index of the primary key column.
 32 |     fn primary_key_index(&self) -> usize;
 33 | 
 34 |     /// Returns the ([`ColumnPath`], [`Vec<SortingColumn>`]) of the primary key column, representing
 35 |     /// the location of the primary key column in the parquet schema and the sort order within a
 36 |     /// RowGroup of a leaf column
 37 |     fn primary_key_path(&self) -> (ColumnPath, Vec<SortingColumn>);
 38 | }
 39 | 
 40 | pub trait Record: 'static + Sized + Decode + Debug + Send + Sync {
 41 |     type Schema: Schema<Record = Self>;
 42 | 
 43 |     type Ref<'r>: RecordRef<'r, Record = Self>
 44 |     where
 45 |         Self: 'r;
 46 | 
 47 |     /// Returns the primary key of the record. This should be the type defined in the
 48 |     /// [`Schema`].
 49 |     fn key(&self) -> <<<Self as Record>::Schema as Schema>::Key as Key>::Ref<'_> {
 50 |         self.as_record_ref().key()
 51 |     }
 52 | 
 53 |     /// Returns a reference to the record.
 54 |     fn as_record_ref(&self) -> Self::Ref<'_>;
 55 | 
 56 |     /// Returns the size of the record in bytes.
 57 |     fn size(&self) -> usize;
 58 | }
 59 | 
 60 | pub trait RecordRef<'r>: Clone + Sized + Encode + Send + Sync {
 61 |     type Record: Record;
 62 | 
 63 |     /// Returns the primary key of the record. This should be the type that defined in the
 64 |     /// [`Schema`].
 65 |     fn key(self) -> <<<Self::Record as Record>::Schema as Schema>::Key as Key>::Ref<'r>;
 66 | 
 67 |     /// Do projection on the record. Only keep the columns specified in the projection mask.
 68 |     ///
 69 |     /// **Note**: Primary key column are always kept.
 70 |     fn projection(&mut self, projection_mask: &ProjectionMask);
 71 | 
 72 |     /// Get the [`RecordRef`] from the [`RecordBatch`] at the given offset.
 73 |     ///
 74 |     /// `full_schema` is the combination of `_null`, `_ts` and all fields defined in the [`Schema`].
 75 |     fn from_record_batch(
 76 |         record_batch: &'r RecordBatch,
 77 |         offset: usize,
 78 |         projection_mask: &'r ProjectionMask,
 79 |         full_schema: &'r Arc<ArrowSchema>,
 80 |     ) -> OptionRecordRef<'r, Self>;
 81 | }
 82 | 
 83 | #[derive(Debug, Error)]
 84 | pub enum RecordEncodeError {
 85 |     #[error("record's field: {field_name} encode error: {error}")]
 86 |     Encode {
 87 |         field_name: String,
 88 |         error: Box<dyn Error + Send + Sync + 'static>,
 89 |     },
 90 |     #[error("record io error: {0}")]
 91 |     Io(#[from] io::Error),
 92 |     #[error("record fusio error: {0}")]
 93 |     Fusio(#[from] fusio::Error),
 94 | }
 95 | 
 96 | #[derive(Debug, Error)]
 97 | pub enum RecordDecodeError {
 98 |     #[error("record's field: {field_name} decode error: {error}")]
 99 |     Decode {
100 |         field_name: String,
101 |         error: Box<dyn Error + Send + Sync + 'static>,
102 |     },
103 |     #[error("record io error: {0}")]
104 |     Io(#[from] io::Error),
105 |     #[error("record fusio error: {0}")]
106 |     Fusio(#[from] fusio::Error),
107 | }
108 | 


--------------------------------------------------------------------------------
/src/record/option.rs:
--------------------------------------------------------------------------------
 1 | use std::{marker::PhantomData, mem::transmute};
 2 | 
 3 | use super::{Key, Record, RecordRef, Schema};
 4 | use crate::timestamp::{Timestamp, Ts};
 5 | 
 6 | #[derive(Debug)]
 7 | pub struct OptionRecordRef<'r, R>
 8 | where
 9 |     R: RecordRef<'r>,
10 | {
11 |     record: Ts<R>,
12 |     null: bool,
13 |     _marker: PhantomData<&'r ()>,
14 | }
15 | 
16 | impl<'r, R> OptionRecordRef<'r, R>
17 | where
18 |     R: RecordRef<'r>,
19 | {
20 |     pub fn new(ts: Timestamp, record: R, null: bool) -> Self {
21 |         Self {
22 |             record: Ts::new(record, ts),
23 |             null,
24 |             _marker: PhantomData,
25 |         }
26 |     }
27 | }
28 | 
29 | impl<'r, R> OptionRecordRef<'r, R>
30 | where
31 |     R: RecordRef<'r>,
32 | {
33 |     pub fn key(&self) -> Ts<<<<R::Record as Record>::Schema as Schema>::Key as Key>::Ref<'_>> {
34 |         // Safety: shorter lifetime of the value must be safe
35 |         unsafe { transmute(Ts::new(self.record.value().clone().key(), self.record.ts())) }
36 |     }
37 | 
38 |     pub fn get(&self) -> Option<R> {
39 |         if self.null {
40 |             return None;
41 |         }
42 | 
43 |         Some(self.record.value().clone())
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/record/runtime/mod.rs:
--------------------------------------------------------------------------------
 1 | pub(crate) mod array;
 2 | mod record;
 3 | mod record_ref;
 4 | mod schema;
 5 | mod value;
 6 | 
 7 | pub use array::*;
 8 | use arrow::datatypes::DataType as ArrowDataType;
 9 | pub use record::*;
10 | pub use record_ref::*;
11 | pub use schema::*;
12 | pub use value::*;
13 | 
14 | #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
15 | pub enum DataType {
16 |     UInt8,
17 |     UInt16,
18 |     UInt32,
19 |     UInt64,
20 |     Int8,
21 |     Int16,
22 |     Int32,
23 |     Int64,
24 |     String,
25 |     Boolean,
26 |     Bytes,
27 |     Float32,
28 |     Float64,
29 | }
30 | 
31 | impl From<&ArrowDataType> for DataType {
32 |     fn from(datatype: &ArrowDataType) -> Self {
33 |         match datatype {
34 |             ArrowDataType::UInt8 => DataType::UInt8,
35 |             ArrowDataType::UInt16 => DataType::UInt16,
36 |             ArrowDataType::UInt32 => DataType::UInt32,
37 |             ArrowDataType::UInt64 => DataType::UInt64,
38 |             ArrowDataType::Int8 => DataType::Int8,
39 |             ArrowDataType::Int16 => DataType::Int16,
40 |             ArrowDataType::Int32 => DataType::Int32,
41 |             ArrowDataType::Int64 => DataType::Int64,
42 |             ArrowDataType::Float32 => DataType::Float32,
43 |             ArrowDataType::Float64 => DataType::Float64,
44 |             ArrowDataType::Utf8 => DataType::String,
45 |             ArrowDataType::Boolean => DataType::Boolean,
46 |             ArrowDataType::Binary => DataType::Bytes,
47 |             _ => todo!(),
48 |         }
49 |     }
50 | }
51 | 
52 | /// Cast the `Arc<dyn Any>` to the value of given type.
53 | #[macro_export]
54 | macro_rules! cast_arc_value {
55 |     ($value:expr, $type:ty) => {
56 |         $value.as_ref().downcast_ref::<$type>().unwrap()
57 |     };
58 | }
59 | 


--------------------------------------------------------------------------------
/src/record/runtime/schema.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, sync::Arc};
  2 | 
  3 | use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
  4 | use parquet::{format::SortingColumn, schema::types::ColumnPath};
  5 | 
  6 | use super::{array::DynRecordImmutableArrays, DynRecord, Value, ValueDesc};
  7 | use crate::{magic, record::Schema};
  8 | 
  9 | #[derive(Debug)]
 10 | pub struct DynSchema {
 11 |     schema: Vec<ValueDesc>,
 12 |     primary_index: usize,
 13 |     arrow_schema: Arc<ArrowSchema>,
 14 | }
 15 | 
 16 | impl DynSchema {
 17 |     pub fn new(schema: Vec<ValueDesc>, primary_index: usize) -> Self {
 18 |         let mut metadata = HashMap::new();
 19 |         metadata.insert("primary_key_index".to_string(), primary_index.to_string());
 20 |         let arrow_schema = Arc::new(ArrowSchema::new_with_metadata(
 21 |             [
 22 |                 Field::new("_null", DataType::Boolean, false),
 23 |                 Field::new(magic::TS, DataType::UInt32, false),
 24 |             ]
 25 |             .into_iter()
 26 |             .chain(schema.iter().map(|desc| desc.arrow_field()))
 27 |             .collect::<Vec<_>>(),
 28 |             metadata,
 29 |         ));
 30 |         Self {
 31 |             schema,
 32 |             primary_index,
 33 |             arrow_schema,
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | impl Schema for DynSchema {
 39 |     type Record = DynRecord;
 40 | 
 41 |     type Columns = DynRecordImmutableArrays;
 42 | 
 43 |     type Key = Value;
 44 | 
 45 |     fn arrow_schema(&self) -> &Arc<ArrowSchema> {
 46 |         &self.arrow_schema
 47 |     }
 48 | 
 49 |     fn primary_key_index(&self) -> usize {
 50 |         self.primary_index + 2
 51 |     }
 52 | 
 53 |     fn primary_key_path(&self) -> (ColumnPath, Vec<SortingColumn>) {
 54 |         (
 55 |             ColumnPath::new(vec![
 56 |                 magic::TS.to_string(),
 57 |                 self.schema[self.primary_index].name.clone(),
 58 |             ]),
 59 |             vec![
 60 |                 SortingColumn::new(1_i32, true, true),
 61 |                 SortingColumn::new(self.primary_key_index() as i32, false, true),
 62 |             ],
 63 |         )
 64 |     }
 65 | }
 66 | 
 67 | /// Creates a [`DynSchema`] from literal slice of values and primary key index, suitable for rapid
 68 | /// testing and development.
 69 | ///
 70 | /// ## Example:
 71 | ///
 72 | /// ```no_run
 73 | /// // dyn_schema!(
 74 | /// //      (name, type, nullable),
 75 | /// //         ......
 76 | /// //      (name, type, nullable),
 77 | /// //      primary_key_index
 78 | /// // );
 79 | /// use tonbo::dyn_schema;
 80 | ///
 81 | /// let schema = dyn_schema!(
 82 | ///     ("foo", String, false),
 83 | ///     ("bar", Int32, true),
 84 | ///     ("baz", UInt64, true),
 85 | ///     0
 86 | /// );
 87 | /// ```
 88 | #[macro_export]
 89 | macro_rules! dyn_schema {
 90 |     ($(($name: expr, $type: ident, $nullable: expr )),*, $primary: literal) => {
 91 |         {
 92 |             $crate::record::DynSchema::new(
 93 |                 vec![
 94 |                     $(
 95 |                         $crate::record::ValueDesc::new($name.into(), $crate::record::DataType::$type, $nullable),
 96 |                     )*
 97 |                 ],
 98 |                 $primary,
 99 |             )
100 |         }
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/src/stream/mem_projection.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     pin::Pin,
  3 |     sync::Arc,
  4 |     task::{Context, Poll},
  5 | };
  6 | 
  7 | use futures_core::Stream;
  8 | use parquet::{arrow::ProjectionMask, errors::ParquetError};
  9 | use pin_project_lite::pin_project;
 10 | 
 11 | use crate::{
 12 |     record::Record,
 13 |     stream::{Entry, ScanStream},
 14 | };
 15 | 
 16 | pin_project! {
 17 |     pub struct MemProjectionStream<'projection, R>
 18 |     where
 19 |         R: Record,
 20 |     {
 21 |         stream: Box<ScanStream<'projection, R>>,
 22 |         projection_mask: Arc<ProjectionMask>,
 23 |     }
 24 | }
 25 | 
 26 | impl<'projection, R> MemProjectionStream<'projection, R>
 27 | where
 28 |     R: Record,
 29 | {
 30 |     pub(crate) fn new(stream: ScanStream<'projection, R>, projection_mask: ProjectionMask) -> Self {
 31 |         Self {
 32 |             stream: Box::new(stream),
 33 |             projection_mask: Arc::new(projection_mask),
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | impl<'projection, R> Stream for MemProjectionStream<'projection, R>
 39 | where
 40 |     R: Record,
 41 | {
 42 |     type Item = Result<Entry<'projection, R>, ParquetError>;
 43 | 
 44 |     fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
 45 |         let mut project = self.project();
 46 | 
 47 |         return match Pin::new(&mut project.stream).poll_next(cx) {
 48 |             Poll::Ready(Some(Ok(entry))) => Poll::Ready(Some(Ok(Entry::Projection((
 49 |                 Box::new(entry),
 50 |                 project.projection_mask.clone(),
 51 |             ))))),
 52 |             poll => poll,
 53 |         };
 54 |     }
 55 | }
 56 | 
 57 | #[cfg(all(test, feature = "tokio"))]
 58 | mod tests {
 59 |     use std::{ops::Bound, sync::Arc};
 60 | 
 61 |     use fusio::{disk::TokioFs, path::Path, DynFs};
 62 |     use futures_util::StreamExt;
 63 |     use parquet::arrow::{ArrowSchemaConverter, ProjectionMask};
 64 | 
 65 |     use crate::{
 66 |         inmem::{immutable::tests::TestSchema, mutable::MutableMemTable},
 67 |         record::Schema,
 68 |         stream::mem_projection::MemProjectionStream,
 69 |         tests::Test,
 70 |         trigger::TriggerFactory,
 71 |         wal::log::LogType,
 72 |         DbOption,
 73 |     };
 74 | 
 75 |     #[tokio::test]
 76 |     async fn merge_mutable() {
 77 |         let temp_dir = tempfile::tempdir().unwrap();
 78 |         let fs = Arc::new(TokioFs) as Arc<dyn DynFs>;
 79 |         let option = DbOption::new(
 80 |             Path::from_filesystem_path(temp_dir.path()).unwrap(),
 81 |             &TestSchema,
 82 |         );
 83 | 
 84 |         fs.create_dir_all(&option.wal_dir_path()).await.unwrap();
 85 | 
 86 |         let trigger = TriggerFactory::create(option.trigger_type);
 87 | 
 88 |         let mutable =
 89 |             MutableMemTable::<Test>::new(&option, trigger, fs.clone(), Arc::new(TestSchema {}))
 90 |                 .await
 91 |                 .unwrap();
 92 | 
 93 |         mutable
 94 |             .insert(
 95 |                 LogType::Full,
 96 |                 Test {
 97 |                     vstring: "0".to_string(),
 98 |                     vu32: 0,
 99 |                     vbool: Some(true),
100 |                 },
101 |                 0.into(),
102 |             )
103 |             .await
104 |             .unwrap();
105 |         mutable
106 |             .insert(
107 |                 LogType::Full,
108 |                 Test {
109 |                     vstring: "1".to_string(),
110 |                     vu32: 1,
111 |                     vbool: Some(true),
112 |                 },
113 |                 0.into(),
114 |             )
115 |             .await
116 |             .unwrap();
117 |         mutable
118 |             .insert(
119 |                 LogType::Full,
120 |                 Test {
121 |                     vstring: "2".to_string(),
122 |                     vu32: 2,
123 |                     vbool: Some(true),
124 |                 },
125 |                 0.into(),
126 |             )
127 |             .await
128 |             .unwrap();
129 | 
130 |         let mask = ProjectionMask::roots(
131 |             &ArrowSchemaConverter::new()
132 |                 .convert(TestSchema.arrow_schema())
133 |                 .unwrap(),
134 |             vec![0, 1, 2, 4],
135 |         );
136 | 
137 |         let mut stream = MemProjectionStream::<Test>::new(
138 |             mutable
139 |                 .scan((Bound::Unbounded, Bound::Unbounded), 6.into())
140 |                 .into(),
141 |             mask,
142 |         );
143 | 
144 |         let entry_0 = stream.next().await.unwrap().unwrap();
145 |         assert!(entry_0.value().unwrap().vu32.is_none());
146 |         assert_eq!(entry_0.value().unwrap().vstring, "0");
147 |         assert_eq!(entry_0.value().unwrap().vbool, Some(true));
148 | 
149 |         let entry_1 = stream.next().await.unwrap().unwrap();
150 |         assert!(entry_1.value().unwrap().vu32.is_none());
151 |         assert_eq!(entry_1.value().unwrap().vstring, "1");
152 |         assert_eq!(entry_1.value().unwrap().vbool, Some(true));
153 | 
154 |         let entry_2 = stream.next().await.unwrap().unwrap();
155 |         assert!(entry_2.value().unwrap().vu32.is_none());
156 |         assert_eq!(entry_2.value().unwrap().vstring, "2");
157 |         assert_eq!(entry_2.value().unwrap().vbool, Some(true));
158 | 
159 |         assert!(stream.next().await.is_none())
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/stream/record_batch.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     fmt::{self, Debug, Formatter},
  3 |     marker::PhantomData,
  4 |     mem::transmute,
  5 |     sync::Arc,
  6 | };
  7 | 
  8 | use arrow::{array::RecordBatch, datatypes::Schema};
  9 | use parquet::arrow::ProjectionMask;
 10 | 
 11 | use crate::{
 12 |     record::{option::OptionRecordRef, Key, Record, RecordRef, Schema as RecordSchema},
 13 |     timestamp::Ts,
 14 | };
 15 | 
 16 | pub struct RecordBatchEntry<R>
 17 | where
 18 |     R: Record,
 19 | {
 20 |     _record_batch: RecordBatch,
 21 |     record_ref: OptionRecordRef<'static, R::Ref<'static>>,
 22 | }
 23 | 
 24 | impl<R> RecordBatchEntry<R>
 25 | where
 26 |     R: Record,
 27 | {
 28 |     pub(crate) fn new(
 29 |         _record_batch: RecordBatch,
 30 |         record_ref: OptionRecordRef<'static, R::Ref<'static>>,
 31 |     ) -> Self {
 32 |         Self {
 33 |             _record_batch,
 34 |             record_ref,
 35 |         }
 36 |     }
 37 | 
 38 |     pub(crate) fn internal_key(&self) -> Ts<<<R::Schema as RecordSchema>::Key as Key>::Ref<'_>> {
 39 |         self.record_ref.key()
 40 |     }
 41 | 
 42 |     pub fn key(&self) -> <<R::Schema as RecordSchema>::Key as Key>::Ref<'_> {
 43 |         self.internal_key().value().clone()
 44 |     }
 45 | 
 46 |     pub fn get(&self) -> Option<R::Ref<'_>> {
 47 |         // Safety: shorter lifetime of the key must be safe
 48 |         unsafe { transmute(self.record_ref.get()) }
 49 |     }
 50 | }
 51 | 
 52 | impl<R> Debug for RecordBatchEntry<R>
 53 | where
 54 |     R: Record + Debug,
 55 | {
 56 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 57 |         f.debug_struct("RecordBatchEntry").finish()
 58 |     }
 59 | }
 60 | 
 61 | #[derive(Debug)]
 62 | pub struct RecordBatchIterator<R> {
 63 |     record_batch: RecordBatch,
 64 |     offset: usize,
 65 |     projection_mask: ProjectionMask,
 66 |     full_schema: Arc<Schema>,
 67 |     _marker: PhantomData<R>,
 68 | }
 69 | 
 70 | impl<R> RecordBatchIterator<R>
 71 | where
 72 |     R: Record,
 73 | {
 74 |     pub(crate) fn new(
 75 |         record_batch: RecordBatch,
 76 |         projection_mask: ProjectionMask,
 77 |         full_schema: Arc<Schema>,
 78 |     ) -> Self {
 79 |         Self {
 80 |             record_batch,
 81 |             offset: 0,
 82 |             projection_mask,
 83 |             full_schema,
 84 |             _marker: PhantomData,
 85 |         }
 86 |     }
 87 | }
 88 | 
 89 | impl<R> Iterator for RecordBatchIterator<R>
 90 | where
 91 |     R: Record,
 92 | {
 93 |     type Item = RecordBatchEntry<R>;
 94 | 
 95 |     fn next(&mut self) -> Option<Self::Item> {
 96 |         if self.offset >= self.record_batch.num_rows() {
 97 |             return None;
 98 |         }
 99 | 
100 |         let record_batch = self.record_batch.clone();
101 |         let record = R::Ref::from_record_batch(
102 |             &self.record_batch,
103 |             self.offset,
104 |             &self.projection_mask,
105 |             &self.full_schema,
106 |         );
107 |         let entry = RecordBatchEntry::new(record_batch, unsafe {
108 |             // Safety: self-referring lifetime is safe
109 |             transmute::<OptionRecordRef<'_, R::Ref<'_>>, OptionRecordRef<'static, R::Ref<'static>>>(
110 |                 record,
111 |             )
112 |         });
113 |         self.offset += 1;
114 |         Some(entry)
115 |     }
116 | }
117 | 
118 | #[cfg(test)]
119 | mod tests {}
120 | 


--------------------------------------------------------------------------------
/src/timestamp/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod timestamped;
 2 | 
 3 | use arrow::{
 4 |     array::{PrimitiveArray, Scalar},
 5 |     datatypes::UInt32Type,
 6 | };
 7 | use fusio::{SeqRead, Write};
 8 | use fusio_log::{Decode, Encode};
 9 | 
10 | pub use self::timestamped::*;
11 | 
12 | #[repr(transparent)]
13 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
14 | pub struct Timestamp(u32);
15 | 
16 | pub(crate) const EPOCH: Timestamp = Timestamp(0);
17 | 
18 | impl From<u32> for Timestamp {
19 |     fn from(ts: u32) -> Self {
20 |         Self(ts)
21 |     }
22 | }
23 | 
24 | impl From<Timestamp> for u32 {
25 |     fn from(value: Timestamp) -> Self {
26 |         value.0
27 |     }
28 | }
29 | 
30 | impl Timestamp {
31 |     pub(crate) fn to_arrow_scalar(self) -> Scalar<PrimitiveArray<UInt32Type>> {
32 |         PrimitiveArray::<UInt32Type>::new_scalar(self.0)
33 |     }
34 | }
35 | 
36 | impl Encode for Timestamp {
37 |     type Error = fusio::Error;
38 |     async fn encode<W>(&self, writer: &mut W) -> Result<(), Self::Error>
39 |     where
40 |         W: Write,
41 |     {
42 |         self.0.encode(writer).await
43 |     }
44 |     fn size(&self) -> usize {
45 |         self.0.size()
46 |     }
47 | }
48 | impl Decode for Timestamp {
49 |     type Error = fusio::Error;
50 |     async fn decode<R>(reader: &mut R) -> Result<Self, Self::Error>
51 |     where
52 |         R: SeqRead,
53 |     {
54 |         u32::decode(reader).await.map(Timestamp)
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/trigger.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     marker::PhantomData,
  3 |     sync::{
  4 |         atomic::{AtomicUsize, Ordering},
  5 |         Arc,
  6 |     },
  7 | };
  8 | 
  9 | use fusio_log::Encode;
 10 | 
 11 | use crate::record::Record;
 12 | 
 13 | pub trait FreezeTrigger<R: Record>: Send + Sync {
 14 |     fn check_if_exceed(&self, item: &R) -> bool;
 15 | 
 16 |     fn reset(&self);
 17 | }
 18 | 
 19 | #[derive(Debug)]
 20 | pub struct SizeOfMemTrigger<R> {
 21 |     threshold: usize,
 22 |     current_size: AtomicUsize,
 23 |     _p: PhantomData<R>,
 24 | }
 25 | 
 26 | impl<T> SizeOfMemTrigger<T> {
 27 |     pub fn new(max_size: usize) -> Self {
 28 |         Self {
 29 |             threshold: max_size,
 30 |             current_size: AtomicUsize::new(0),
 31 |             _p: Default::default(),
 32 |         }
 33 |     }
 34 | }
 35 | 
 36 | impl<R: Record> FreezeTrigger<R> for SizeOfMemTrigger<R> {
 37 |     fn check_if_exceed(&self, item: &R) -> bool {
 38 |         let size = item.size() + item.key().size();
 39 |         self.current_size.fetch_add(size, Ordering::SeqCst) + size >= self.threshold
 40 |     }
 41 | 
 42 |     fn reset(&self) {
 43 |         self.current_size.store(0, Ordering::SeqCst);
 44 |     }
 45 | }
 46 | 
 47 | #[derive(Debug)]
 48 | pub struct LengthTrigger<R> {
 49 |     threshold: usize,
 50 |     count: AtomicUsize,
 51 |     _p: PhantomData<R>,
 52 | }
 53 | 
 54 | impl<T> LengthTrigger<T> {
 55 |     pub fn new(threshold: usize) -> Self {
 56 |         Self {
 57 |             threshold,
 58 |             count: AtomicUsize::new(0),
 59 |             _p: Default::default(),
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | impl<R: Record> FreezeTrigger<R> for LengthTrigger<R> {
 65 |     fn check_if_exceed(&self, _: &R) -> bool {
 66 |         self.count.fetch_add(1, Ordering::SeqCst) + 1 >= self.threshold
 67 |     }
 68 | 
 69 |     fn reset(&self) {
 70 |         self.count.store(0, Ordering::SeqCst);
 71 |     }
 72 | }
 73 | 
 74 | #[derive(Copy, Clone, Debug)]
 75 | pub enum TriggerType {
 76 |     SizeOfMem(usize),
 77 |     #[allow(unused)]
 78 |     Length(usize),
 79 | }
 80 | 
 81 | pub(crate) struct TriggerFactory<R> {
 82 |     _p: PhantomData<R>,
 83 | }
 84 | 
 85 | impl<R: Record> TriggerFactory<R> {
 86 |     pub fn create(trigger_type: TriggerType) -> Arc<dyn FreezeTrigger<R>> {
 87 |         match trigger_type {
 88 |             TriggerType::SizeOfMem(threshold) => Arc::new(SizeOfMemTrigger::new(threshold)),
 89 |             TriggerType::Length(threshold) => Arc::new(LengthTrigger::new(threshold)),
 90 |         }
 91 |     }
 92 | }
 93 | 
 94 | #[cfg(all(test, feature = "tokio"))]
 95 | mod tests {
 96 |     use super::*;
 97 |     use crate::tests::Test;
 98 | 
 99 |     #[tokio::test]
100 |     async fn test_size_of_mem_trigger() {
101 |         let threshold = 16;
102 |         let trigger = SizeOfMemTrigger::new(threshold);
103 | 
104 |         let record = Test {
105 |             vstring: "test".to_string(),
106 |             vu32: 0,
107 |             vbool: None,
108 |         };
109 | 
110 |         let record_size = record.size();
111 |         assert_eq!(record_size, 8);
112 |         let record_size = record.key().size();
113 |         assert_eq!(record_size, 6);
114 | 
115 |         assert!(
116 |             !trigger.check_if_exceed(&record),
117 |             "Trigger should not be exceeded after 1 record"
118 |         );
119 | 
120 |         trigger.check_if_exceed(&record);
121 |         assert!(
122 |             trigger.check_if_exceed(&record),
123 |             "Trigger should be exceeded after 2 records"
124 |         );
125 | 
126 |         trigger.reset();
127 |         assert!(
128 |             !trigger.check_if_exceed(&record),
129 |             "Trigger should not be exceeded after reset"
130 |         );
131 |     }
132 | 
133 |     #[tokio::test]
134 |     async fn test_length_trigger() {
135 |         let threshold = 2;
136 |         let trigger = LengthTrigger::new(threshold);
137 | 
138 |         let record = Test {
139 |             vstring: "test".to_string(),
140 |             vu32: 0,
141 |             vbool: None,
142 |         };
143 | 
144 |         assert!(
145 |             !trigger.check_if_exceed(&record),
146 |             "Trigger should not be exceeded after 1 record"
147 |         );
148 | 
149 |         trigger.check_if_exceed(&record);
150 |         assert!(
151 |             trigger.check_if_exceed(&record),
152 |             "Trigger should be exceeded after 2 records"
153 |         );
154 | 
155 |         trigger.reset();
156 |         assert!(
157 |             !trigger.check_if_exceed(&record),
158 |             "Trigger should not be exceeded after reset"
159 |         );
160 |     }
161 |     #[tokio::test]
162 |     async fn test_trigger_factory() {
163 |         let size_of_mem_trigger = TriggerFactory::<Test>::create(TriggerType::SizeOfMem(16));
164 |         let length_trigger = TriggerFactory::<Test>::create(TriggerType::Length(2));
165 | 
166 |         assert!(!size_of_mem_trigger.check_if_exceed(&Test {
167 |             vstring: "test".to_string(),
168 |             vu32: 0,
169 |             vbool: None
170 |         }));
171 |         assert!(size_of_mem_trigger.check_if_exceed(&Test {
172 |             vstring: "test".to_string(),
173 |             vu32: 0,
174 |             vbool: None
175 |         }));
176 | 
177 |         assert!(!length_trigger.check_if_exceed(&Test {
178 |             vstring: "test".to_string(),
179 |             vu32: 1,
180 |             vbool: Some(true)
181 |         }));
182 |         assert!(length_trigger.check_if_exceed(&Test {
183 |             vstring: "test".to_string(),
184 |             vu32: 1,
185 |             vbool: Some(true)
186 |         }));
187 |     }
188 | }
189 | 


--------------------------------------------------------------------------------
/src/version/edit.rs:
--------------------------------------------------------------------------------
  1 | use std::mem::size_of;
  2 | 
  3 | use fusio::{SeqRead, Write};
  4 | use fusio_log::{Decode, Encode, FsOptions, Options, Path};
  5 | use futures_util::TryStreamExt;
  6 | 
  7 | use crate::{fs::FileId, scope::Scope, timestamp::Timestamp};
  8 | 
  9 | #[derive(Debug, Clone, Eq, PartialEq)]
 10 | pub(crate) enum VersionEdit<K> {
 11 |     Add { level: u8, scope: Scope<K> },
 12 |     Remove { level: u8, gen: FileId },
 13 |     LatestTimeStamp { ts: Timestamp },
 14 |     NewLogLength { len: u32 },
 15 | }
 16 | 
 17 | impl<K> VersionEdit<K>
 18 | where
 19 |     K: Decode + Send,
 20 | {
 21 |     pub(crate) async fn recover(path: Path, fs_option: FsOptions) -> Vec<VersionEdit<K>> {
 22 |         let mut edits = vec![];
 23 | 
 24 |         let mut edits_stream = Options::new(path)
 25 |             .disable_buf()
 26 |             .fs(fs_option)
 27 |             .recover::<VersionEdit<K>>()
 28 |             .await
 29 |             .unwrap();
 30 |         while let Ok(batch) = edits_stream.try_next().await {
 31 |             match batch {
 32 |                 Some(mut batch) => edits.append(&mut batch),
 33 |                 None => break,
 34 |             }
 35 |         }
 36 |         edits
 37 |     }
 38 | }
 39 | 
 40 | impl<K> Encode for VersionEdit<K>
 41 | where
 42 |     K: Encode + Sync,
 43 | {
 44 |     type Error = <K as Encode>::Error;
 45 | 
 46 |     async fn encode<W>(&self, writer: &mut W) -> Result<(), Self::Error>
 47 |     where
 48 |         W: Write,
 49 |     {
 50 |         match self {
 51 |             VersionEdit::Add { scope, level } => {
 52 |                 0u8.encode(writer).await?;
 53 |                 level.encode(writer).await?;
 54 |                 scope.encode(writer).await?;
 55 |             }
 56 |             VersionEdit::Remove { gen, level } => {
 57 |                 1u8.encode(writer).await?;
 58 |                 level.encode(writer).await?;
 59 |                 let (result, _) = writer.write_all(&gen.to_bytes()[..]).await;
 60 |                 result?;
 61 |             }
 62 |             VersionEdit::LatestTimeStamp { ts } => {
 63 |                 2u8.encode(writer).await?;
 64 |                 ts.encode(writer).await?;
 65 |             }
 66 |             VersionEdit::NewLogLength { len } => {
 67 |                 3u8.encode(writer).await?;
 68 |                 len.encode(writer).await?;
 69 |             }
 70 |         }
 71 | 
 72 |         Ok(())
 73 |     }
 74 | 
 75 |     fn size(&self) -> usize {
 76 |         size_of::<u8>()
 77 |             + size_of::<u8>()
 78 |             + match self {
 79 |                 VersionEdit::Add { scope, .. } => scope.size(),
 80 |                 VersionEdit::Remove { .. } => 16,
 81 |                 VersionEdit::LatestTimeStamp { ts } => ts.size(),
 82 |                 VersionEdit::NewLogLength { .. } => size_of::<u32>(),
 83 |             }
 84 |     }
 85 | }
 86 | 
 87 | impl<K> Decode for VersionEdit<K>
 88 | where
 89 |     K: Decode + Send,
 90 | {
 91 |     type Error = <K as Decode>::Error;
 92 | 
 93 |     async fn decode<R: SeqRead>(reader: &mut R) -> Result<Self, Self::Error> {
 94 |         let edit_type = u8::decode(reader).await?;
 95 | 
 96 |         Ok(match edit_type {
 97 |             0 => {
 98 |                 let level = u8::decode(reader).await?;
 99 |                 let scope = Scope::<K>::decode(reader).await?;
100 | 
101 |                 VersionEdit::Add { level, scope }
102 |             }
103 |             1 => {
104 |                 let level = u8::decode(reader).await?;
105 |                 let gen = {
106 |                     let mut buf = [0u8; 16];
107 |                     let (result, _) = reader.read_exact(&mut buf[..]).await;
108 |                     result?;
109 |                     FileId::from_bytes(buf)
110 |                 };
111 |                 VersionEdit::Remove { level, gen }
112 |             }
113 |             2 => {
114 |                 let ts = Timestamp::decode(reader).await?;
115 |                 VersionEdit::LatestTimeStamp { ts }
116 |             }
117 |             3 => {
118 |                 let len = u32::decode(reader).await?;
119 |                 VersionEdit::NewLogLength { len }
120 |             }
121 |             _ => unreachable!(),
122 |         })
123 |     }
124 | }
125 | 
126 | #[cfg(test)]
127 | mod tests {
128 |     use std::io::Cursor;
129 | 
130 |     use fusio_log::{Decode, Encode};
131 |     use tokio::io::AsyncSeekExt;
132 | 
133 |     use crate::{fs::generate_file_id, scope::Scope, version::edit::VersionEdit};
134 | 
135 |     #[tokio::test]
136 |     async fn encode_and_decode() {
137 |         let edits = vec![
138 |             VersionEdit::Add {
139 |                 level: 0,
140 |                 scope: Scope {
141 |                     min: "Min".to_string(),
142 |                     max: "Max".to_string(),
143 |                     gen: Default::default(),
144 |                     wal_ids: Some(vec![generate_file_id(), generate_file_id()]),
145 |                 },
146 |             },
147 |             VersionEdit::Remove {
148 |                 level: 1,
149 |                 gen: Default::default(),
150 |             },
151 |             VersionEdit::LatestTimeStamp { ts: 10.into() },
152 |             VersionEdit::NewLogLength { len: 233 },
153 |         ];
154 | 
155 |         let mut buf = Vec::new();
156 |         let mut cursor = Cursor::new(&mut buf);
157 | 
158 |         for edit in edits.clone() {
159 |             edit.encode(&mut cursor).await.unwrap();
160 |         }
161 | 
162 |         cursor.seek(std::io::SeekFrom::Start(0)).await.unwrap();
163 | 
164 |         let mut decode_edits = Vec::new();
165 | 
166 |         while let Ok(edit) = VersionEdit::decode(&mut cursor).await {
167 |             decode_edits.push(edit);
168 |         }
169 | 
170 |         assert_eq!(edits, decode_edits);
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/wal/log.rs:
--------------------------------------------------------------------------------
  1 | use fusio::{SeqRead, Write};
  2 | use fusio_log::{Decode, Encode};
  3 | 
  4 | use crate::{
  5 |     record::{Record, Schema},
  6 |     timestamp::Ts,
  7 | };
  8 | 
  9 | #[derive(Debug, Clone, Copy)]
 10 | #[repr(u8)]
 11 | pub enum LogType {
 12 |     Full,
 13 |     First,
 14 |     Middle,
 15 |     Last,
 16 | }
 17 | 
 18 | impl From<u8> for LogType {
 19 |     fn from(value: u8) -> Self {
 20 |         match value {
 21 |             0 => Self::Full,
 22 |             1 => Self::First,
 23 |             2 => Self::Middle,
 24 |             3 => Self::Last,
 25 |             _ => unreachable!(),
 26 |         }
 27 |     }
 28 | }
 29 | 
 30 | pub(crate) struct Log<R>
 31 | where
 32 |     R: Record,
 33 | {
 34 |     pub(crate) key: Ts<<R::Schema as Schema>::Key>,
 35 |     pub(crate) value: Option<R>,
 36 |     pub(crate) log_type: Option<LogType>,
 37 | }
 38 | 
 39 | impl<R> Log<R>
 40 | where
 41 |     R: Record,
 42 | {
 43 |     pub(crate) fn new(
 44 |         ts: Ts<<R::Schema as Schema>::Key>,
 45 |         value: Option<R>,
 46 |         log_type: Option<LogType>,
 47 |     ) -> Self {
 48 |         Self {
 49 |             key: ts,
 50 |             value,
 51 |             log_type,
 52 |         }
 53 |     }
 54 | }
 55 | 
 56 | impl<R> Encode for Log<R>
 57 | where
 58 |     R: Record,
 59 | {
 60 |     type Error = fusio::Error;
 61 | 
 62 |     async fn encode<W>(&self, writer: &mut W) -> Result<(), Self::Error>
 63 |     where
 64 |         W: Write,
 65 |     {
 66 |         if let Some(log_type) = self.log_type {
 67 |             (log_type as u8).encode(writer).await?;
 68 |         } else {
 69 |             unreachable!()
 70 |         }
 71 |         self.key.encode(writer).await.unwrap();
 72 |         self.value
 73 |             .as_ref()
 74 |             .map(R::as_record_ref)
 75 |             .encode(writer)
 76 |             .await
 77 |             .unwrap();
 78 |         Ok(())
 79 |     }
 80 | 
 81 |     fn size(&self) -> usize {
 82 |         self.key.size() + self.value.as_ref().map(R::as_record_ref).size() + size_of::<u8>()
 83 |     }
 84 | }
 85 | 
 86 | impl<Re> Decode for Log<Re>
 87 | where
 88 |     Re: Record,
 89 | {
 90 |     type Error = fusio::Error;
 91 | 
 92 |     async fn decode<R>(reader: &mut R) -> Result<Self, Self::Error>
 93 |     where
 94 |         R: SeqRead,
 95 |     {
 96 |         let log_type = LogType::from(u8::decode(reader).await?);
 97 |         let key = Ts::<<Re::Schema as Schema>::Key>::decode(reader)
 98 |             .await
 99 |             .unwrap();
100 |         let record = Option::<Re>::decode(reader).await.unwrap();
101 | 
102 |         Ok(Log::new(key, record, Some(log_type)))
103 |     }
104 | }
105 | 
106 | #[cfg(test)]
107 | mod tests {
108 |     use std::io::Cursor;
109 | 
110 |     use fusio_log::{Decode, Encode};
111 |     use tokio::io::AsyncSeekExt;
112 | 
113 |     use crate::{
114 |         timestamp::Ts,
115 |         wal::log::{Log, LogType},
116 |     };
117 | 
118 |     #[tokio::test]
119 |     async fn encode_and_decode() {
120 |         let entry: Log<String> = Log::new(
121 |             Ts::new("hello".into(), 1.into()),
122 |             Some("hello".into()),
123 |             Some(LogType::Middle),
124 |         );
125 |         let mut bytes = Vec::new();
126 |         let mut cursor = Cursor::new(&mut bytes);
127 |         entry.encode(&mut cursor).await.unwrap();
128 | 
129 |         let decode_entry = {
130 |             cursor.seek(std::io::SeekFrom::Start(0)).await.unwrap();
131 |             Log::<String>::decode(&mut cursor).await.unwrap()
132 |         };
133 | 
134 |         assert_eq!(entry.value, decode_entry.value);
135 |         assert_eq!(entry.key, entry.key);
136 |     }
137 | }
138 | 


--------------------------------------------------------------------------------
/tests/data_integrity.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(all(test, feature = "tokio"))]
  2 | mod tests {
  3 |     use std::{hash::Hasher, ops::Bound};
  4 | 
  5 |     use fusio::path::Path;
  6 |     use futures_util::StreamExt;
  7 |     use tempfile::TempDir;
  8 |     use tonbo::{executor::tokio::TokioExecutor, DbOption, Record, DB};
  9 | 
 10 |     const WRITE_TIMES: usize = 500_000;
 11 |     const STRING_SIZE: usize = 50;
 12 | 
 13 |     #[derive(Record, Debug)]
 14 |     pub struct Customer {
 15 |         #[record(primary_key)]
 16 |         pub c_custkey: i32,
 17 |         pub c_name: String,
 18 |         pub c_address: String,
 19 |         pub c_nationkey: i32,
 20 |         pub c_phone: String,
 21 |         pub c_acctbal: String,
 22 |         pub c_mktsegment: String,
 23 |         pub c_comment: String,
 24 |     }
 25 | 
 26 |     impl Customer {
 27 |         pub fn crc_hash(&self, hasher: &mut crc32fast::Hasher) {
 28 |             hasher.write_i32(self.c_custkey);
 29 |             hasher.write(self.c_name.as_bytes());
 30 |             hasher.write(self.c_address.as_bytes());
 31 |             hasher.write_i32(self.c_nationkey);
 32 |             hasher.write(self.c_phone.as_bytes());
 33 |             hasher.write(self.c_acctbal.as_bytes());
 34 |             hasher.write(self.c_mktsegment.as_bytes());
 35 |             hasher.write(self.c_comment.as_bytes());
 36 |         }
 37 |     }
 38 | 
 39 |     fn gen_string(rng: &mut fastrand::Rng, len: usize) -> String {
 40 |         let charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
 41 | 
 42 |         let random_string: String = (0..len)
 43 |             .map(|_| {
 44 |                 let idx = rng.usize(0..charset.len());
 45 |                 charset.chars().nth(idx).unwrap()
 46 |             })
 47 |             .collect();
 48 |         random_string
 49 |     }
 50 | 
 51 |     fn gen_record(rng: &mut fastrand::Rng, primary_key_count: &mut i32) -> Customer {
 52 |         *primary_key_count += 1;
 53 |         Customer {
 54 |             c_custkey: *primary_key_count,
 55 |             c_name: gen_string(rng, STRING_SIZE),
 56 |             c_address: gen_string(rng, STRING_SIZE),
 57 |             c_nationkey: rng.i32(..),
 58 |             c_phone: gen_string(rng, STRING_SIZE),
 59 |             c_acctbal: gen_string(rng, STRING_SIZE),
 60 |             c_mktsegment: gen_string(rng, STRING_SIZE),
 61 |             c_comment: gen_string(rng, STRING_SIZE),
 62 |         }
 63 |     }
 64 | 
 65 |     #[ignore]
 66 |     #[tokio::test(flavor = "multi_thread")]
 67 |     async fn test_data_integrity() {
 68 |         let mut rng = fastrand::Rng::with_seed(42);
 69 |         let mut primary_key_count = 0;
 70 |         let mut write_hasher = crc32fast::Hasher::new();
 71 | 
 72 |         let temp_dir = TempDir::new().unwrap();
 73 |         let option = DbOption::new(
 74 |             Path::from_filesystem_path(temp_dir.path()).unwrap(),
 75 |             &CustomerSchema,
 76 |         );
 77 | 
 78 |         let db: DB<Customer, TokioExecutor> =
 79 |             DB::new(option, TokioExecutor::current(), CustomerSchema)
 80 |                 .await
 81 |                 .unwrap();
 82 | 
 83 |         for _ in 0..WRITE_TIMES {
 84 |             let customer = gen_record(&mut rng, &mut primary_key_count);
 85 | 
 86 |             customer.crc_hash(&mut write_hasher);
 87 |             db.insert(customer).await.unwrap();
 88 |         }
 89 |         println!("{} items written", WRITE_TIMES);
 90 | 
 91 |         let mut read_hasher = crc32fast::Hasher::new();
 92 |         let mut read_count = 0;
 93 |         let txn = db.transaction().await;
 94 | 
 95 |         let mut stream = txn
 96 |             .scan((Bound::Unbounded, Bound::Unbounded))
 97 |             .take()
 98 |             .await
 99 |             .unwrap();
100 |         while let Some(result) = stream.next().await {
101 |             let entry = result.unwrap();
102 |             let customer_ref = entry.value().unwrap();
103 | 
104 |             Customer {
105 |                 c_custkey: customer_ref.c_custkey,
106 |                 c_name: customer_ref.c_name.unwrap().to_string(),
107 |                 c_address: customer_ref.c_address.unwrap().to_string(),
108 |                 c_nationkey: customer_ref.c_nationkey.unwrap(),
109 |                 c_phone: customer_ref.c_phone.unwrap().to_string(),
110 |                 c_acctbal: customer_ref.c_acctbal.unwrap().to_string(),
111 |                 c_mktsegment: customer_ref.c_mktsegment.unwrap().to_string(),
112 |                 c_comment: customer_ref.c_comment.unwrap().to_string(),
113 |             }
114 |             .crc_hash(&mut read_hasher);
115 |             read_count += 1;
116 |         }
117 |         println!("{} items read", read_count);
118 |         assert_eq!(write_hasher.finish(), read_hasher.finish());
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/tests/fail/01-missing-primary-key.rs:
--------------------------------------------------------------------------------
 1 | use tonbo_macros::Record;
 2 | 
 3 | #[derive(Record, Debug)]
 4 | pub struct User {
 5 |     name: String,
 6 |     email: Option<String>,
 7 |     age: u8,
 8 | }
 9 | 
10 | fn main() {}
11 | 


--------------------------------------------------------------------------------
/tests/fail/01-missing-primary-key.stderr:
--------------------------------------------------------------------------------
1 | error: missing primary key field, use #[record(primary_key)] to define one
2 |  --> tests/fail/01-missing-primary-key.rs:4:12
3 |   |
4 | 4 | pub struct User {
5 |   |            ^^^^
6 | 


--------------------------------------------------------------------------------
/tests/success/01-simple-record.rs:
--------------------------------------------------------------------------------
 1 | use tonbo_macros::Record;
 2 | 
 3 | #[derive(Record, Debug)]
 4 | pub struct User {
 5 |     #[record(primary_key)]
 6 |     name: String,
 7 |     email: Option<String>,
 8 |     age: u8,
 9 | }
10 | 
11 | fn main() {}
12 | 


--------------------------------------------------------------------------------
/tonbo_macros/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | description = "TonboRecord macro"
 3 | documentation = "https://docs.rs/tonbo_macros"
 4 | edition = "2021"
 5 | license = "Apache-2.0"
 6 | name = "tonbo_macros"
 7 | version = "0.3.2"
 8 | 
 9 | [dependencies]
10 | darling = "0.20"
11 | proc-macro2 = "1"
12 | quote = "1"
13 | syn = "2"
14 | 
15 | [lib]
16 | path = "src/lib.rs"
17 | proc-macro = true
18 | 


--------------------------------------------------------------------------------
/tonbo_macros/src/keys.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::{Ident, TokenStream};
 2 | use syn::Type;
 3 | 
 4 | #[derive(Clone)]
 5 | pub(crate) struct PrimaryKey {
 6 |     pub(crate) name: Ident,
 7 |     pub(crate) base_ty: Type,
 8 |     pub(crate) fn_key: TokenStream,
 9 |     pub(crate) builder_append_value: TokenStream,
10 |     pub(crate) index: usize,
11 | }
12 | 


--------------------------------------------------------------------------------
/tonbo_macros/src/lib.rs:
--------------------------------------------------------------------------------
 1 | mod keys;
 2 | mod schema_model;
 3 | 
 4 | mod record;
 5 | 
 6 | pub(crate) mod data_type;
 7 | 
 8 | pub(crate) mod utils;
 9 | 
10 | use proc_macro::TokenStream;
11 | use syn::{parse_macro_input, DeriveInput};
12 | 
13 | use crate::data_type::DataType;
14 | 
15 | /// used to define the structure of Record,
16 | /// will generate the implementation required in Tonbo, allowing derive expansion.
17 | ///
18 | /// # Example
19 | ///
20 | /// ```no_rust
21 | /// use tonbo::Record;
22 | ///
23 | /// #[derive(Record)]
24 | /// pub struct Music {
25 | ///     #[record(primary_key)]
26 | ///     pub id: u32,
27 | ///     pub name: String,
28 | ///     pub url: Option<String>,
29 | ///     pub is_favorite: bool,
30 | /// }
31 | /// ```
32 | #[proc_macro_derive(Record, attributes(record))]
33 | pub fn tonbo_record(input: TokenStream) -> TokenStream {
34 |     let ast = parse_macro_input!(input as DeriveInput);
35 | 
36 |     let result = record::handle(ast);
37 |     match result {
38 |         Ok(codegen) => codegen.into(),
39 |         Err(e) => e.to_compile_error().into(),
40 |     }
41 | }
42 | 
43 | #[proc_macro_derive(KeyAttributes, attributes(primary_key))]
44 | pub fn key_attributes(_input: TokenStream) -> TokenStream {
45 |     let gen = quote::quote! {};
46 |     gen.into()
47 | }
48 | 


--------------------------------------------------------------------------------
/tonbo_macros/src/schema_model.rs:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tonbo_macros/src/utils/ident_generator.rs:
--------------------------------------------------------------------------------
 1 | use syn::Ident;
 2 | 
 3 | pub(crate) trait IdentGenerator {
 4 |     fn to_ref_ident(&self) -> Ident;
 5 | 
 6 |     fn to_schema_ident(&self) -> Ident;
 7 | 
 8 |     fn to_builder_ident(&self) -> Ident;
 9 | 
10 |     fn to_array_ident(&self) -> Ident;
11 | 
12 |     fn to_immutable_array_ident(&self) -> Ident;
13 | }
14 | 
15 | impl IdentGenerator for proc_macro2::Ident {
16 |     fn to_ref_ident(&self) -> Ident {
17 |         Ident::new(&format!("{}Ref", self), self.span())
18 |     }
19 | 
20 |     fn to_schema_ident(&self) -> Ident {
21 |         Ident::new(&format!("{}Schema", self), self.span())
22 |     }
23 | 
24 |     fn to_builder_ident(&self) -> Ident {
25 |         Ident::new(&format!("{}Builder", self), self.span())
26 |     }
27 | 
28 |     fn to_array_ident(&self) -> Ident {
29 |         Ident::new(&format!("{}_array", self), self.span())
30 |     }
31 | 
32 |     fn to_immutable_array_ident(&self) -> Ident {
33 |         Ident::new(&format!("{}ImmutableArrays", self), self.span())
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/tonbo_macros/src/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub(crate) mod ident_generator;
2 | 


--------------------------------------------------------------------------------