├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── HighPriorityIssues.yml │ ├── release.yaml │ └── rust.yaml ├── .gitignore ├── .gitmodules ├── .rustfmt.toml ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── add_rustfmt_hook.sh ├── crates ├── duckdb-loadable-macros │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ └── src │ │ └── lib.rs ├── duckdb │ ├── Cargo.toml │ ├── examples │ │ ├── Movies_Social_metadata.xlsx │ │ ├── appender.rs │ │ ├── basic.rs │ │ ├── date.xlsx │ │ ├── hello-ext-capi │ │ │ └── main.rs │ │ ├── hello-ext │ │ │ └── main.rs │ │ ├── int32_decimal.parquet │ │ └── parquet.rs │ └── src │ │ ├── appender │ │ ├── arrow.rs │ │ └── mod.rs │ │ ├── appender_params.rs │ │ ├── arrow_batch.rs │ │ ├── cache.rs │ │ ├── column.rs │ │ ├── config.rs │ │ ├── core │ │ ├── data_chunk.rs │ │ ├── logical_type.rs │ │ ├── mod.rs │ │ └── vector.rs │ │ ├── error.rs │ │ ├── extension.rs │ │ ├── inner_connection.rs │ │ ├── lib.rs │ │ ├── params.rs │ │ ├── polars_dataframe.rs │ │ ├── pragma.rs │ │ ├── r2d2.rs │ │ ├── raw_statement.rs │ │ ├── row.rs │ │ ├── statement.rs │ │ ├── test_all_types.rs │ │ ├── transaction.rs │ │ ├── types │ │ ├── chrono.rs │ │ ├── from_sql.rs │ │ ├── mod.rs │ │ ├── ordered_map.rs │ │ ├── serde_json.rs │ │ ├── string.rs │ │ ├── to_sql.rs │ │ ├── url.rs │ │ ├── value.rs │ │ └── value_ref.rs │ │ ├── util │ │ ├── mod.rs │ │ └── small_cstr.rs │ │ ├── vscalar │ │ ├── arrow.rs │ │ ├── function.rs │ │ └── mod.rs │ │ └── vtab │ │ ├── arrow.rs │ │ ├── excel.rs │ │ ├── function.rs │ │ ├── mod.rs │ │ └── value.rs └── libduckdb-sys │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── build.rs │ ├── duckdb.tar.gz │ ├── openssl │ ├── cfgs.rs │ ├── expando.c │ ├── find_normal.rs │ ├── find_vendored.rs │ ├── mod.rs │ └── run_bindgen.rs │ ├── src │ ├── bindgen_bundled_version.rs │ ├── bindgen_bundled_version_loadable.rs │ ├── error.rs │ ├── lib.rs │ ├── raw_statement.rs │ └── string.rs │ ├── update_sources.py │ ├── upgrade.sh │ ├── wrapper.h │ └── wrapper_ext.h └── upgrade.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | *.rs linguist-detectable=true 2 | *.cpp linguist-detectable=false 3 | *.hpp linguist-detectable=false 4 | *.h linguist-detectable=false 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | target-branch: main 9 | labels: [auto-dependencies] 10 | -------------------------------------------------------------------------------- /.github/workflows/HighPriorityIssues.yml: -------------------------------------------------------------------------------- 1 | name: Create Internal issue when the "High Priority" label is applied 2 | on: 3 | issues: 4 | types: 5 | - labeled 6 | 7 | env: 8 | GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }} 9 | # an event triggering this workflow is either an issue or a pull request, 10 | # hence only one of the numbers will be filled in the TITLE_PREFIX 11 | TITLE_PREFIX: "[duckdb-rs/#${{ github.event.issue.number }}]" 12 | PUBLIC_ISSUE_TITLE: ${{ github.event.issue.title }} 13 | 14 | jobs: 15 | create_or_label_issue: 16 | if: github.event.label.name == 'High Priority' 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Get mirror issue number 20 | run: | 21 | gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt 22 | echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV 23 | 24 | - name: Print whether mirror issue exists 25 | run: | 26 | if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then 27 | echo "Mirror issue with title prefix '$TITLE_PREFIX' does not exist yet" 28 | else 29 | echo "Mirror issue with title prefix '$TITLE_PREFIX' exists with number $MIRROR_ISSUE_NUMBER" 30 | fi 31 | 32 | - name: Create or label issue 33 | run: | 34 | if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then 35 | gh issue create --repo duckdblabs/duckdb-internal --label "Rust" --label "High Priority" --title "$TITLE_PREFIX - $PUBLIC_ISSUE_TITLE" --body "See https://github.com/duckdb/duckdb-rs/issues/${{ github.event.issue.number }}" 36 | fi 37 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: CD 2 | 3 | on: 4 | push: 5 | tags: [ '*' ] 6 | env: 7 | RUST_BACKTRACE: 1 8 | jobs: 9 | Release: 10 | name: Cargo Publish 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - uses: actions-rust-lang/setup-rust-toolchain@v1 15 | with: 16 | toolchain: stable 17 | target: ${{ matrix.target }} 18 | components: 'rustfmt, clippy' 19 | 20 | # cargo publish 21 | - name: publish crates 22 | uses: katyo/publish-crates@v2 23 | with: 24 | path: './' 25 | args: --no-verify --allow-dirty --all-features 26 | registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} 27 | ignore-unpublished-changes: true 28 | 29 | # create release 30 | - name: "Build Changelog" 31 | id: build_changelog 32 | uses: mikepenz/release-changelog-builder-action@v1 33 | env: 34 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 35 | - name: Create Release 36 | uses: actions/create-release@v1 37 | with: 38 | tag_name: ${{ github.ref }} 39 | release_name: ${{ github.ref }} 40 | body: ${{steps.build_changelog.outputs.changelog}} 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 43 | -------------------------------------------------------------------------------- /.github/workflows/rust.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | env: 9 | RUST_BACKTRACE: 1 10 | jobs: 11 | test: 12 | name: Test ${{ matrix.target }} 13 | strategy: 14 | fail-fast: true 15 | matrix: 16 | include: 17 | - { target: x86_64-pc-windows-msvc, os: windows-latest, duckdb: libduckdb-windows-amd64.zip } 18 | - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest, duckdb: libduckdb-linux-amd64.zip } 19 | #- { target: x86_64-apple-darwin, os: macos-latest } 20 | #- { 21 | #target: x86_64-pc-windows-gnu, 22 | #os: windows-latest, 23 | #host: -x86_64-pc-windows-gnu, 24 | #} 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - uses: actions-rust-lang/setup-rust-toolchain@v1 30 | with: 31 | toolchain: stable 32 | target: ${{ matrix.target }} 33 | components: 'rustfmt, clippy' 34 | 35 | # download libduckdb 36 | - uses: robinraju/release-downloader@v1.4 37 | name: Download duckdb 38 | with: 39 | repository: "duckdb/duckdb" 40 | tag: "v1.3.0" 41 | fileName: ${{ matrix.duckdb }} 42 | out-file-path: . 43 | 44 | # For Linux 45 | - name: Linux extract duckdb 46 | if: matrix.os == 'ubuntu-latest' 47 | uses: ihiroky/extract-action@v1 48 | with: 49 | file_path: ${{ github.workspace }}/${{ matrix.duckdb }} 50 | extract_dir: libduckdb 51 | 52 | - run: cargo fmt --all -- --check 53 | if: matrix.os == 'ubuntu-latest' 54 | 55 | - name: run cargo clippy 56 | if: matrix.os == 'ubuntu-latest' 57 | env: 58 | DUCKDB_LIB_DIR: ${{ github.workspace }}/libduckdb 59 | DUCKDB_INCLUDE_DIR: ${{ github.workspace }}/libduckdb 60 | LD_LIBRARY_PATH: ${{ github.workspace }}/libduckdb 61 | run: | 62 | cargo clippy --all-targets --workspace --all-features -- -D warnings -A clippy::redundant-closure 63 | 64 | 65 | # For windows 66 | - name: Windows extract duckdb 67 | if: matrix.os == 'windows-latest' 68 | uses: DuckSoft/extract-7z-action@v1.0 69 | with: 70 | pathSource: D:\a\duckdb-rs\duckdb-rs\${{ matrix.duckdb }} 71 | pathTarget: ${{ github.workspace }}/libduckdb 72 | 73 | - name: Add path to PATH environment variable 74 | if: matrix.os == 'windows-latest' 75 | uses: myci-actions/export-env-var-powershell@1 76 | with: 77 | name: PATH 78 | value: $env:PATH;${{ github.workspace }}/libduckdb 79 | 80 | - name: Run cargo-test 81 | if: matrix.os == 'windows-latest' 82 | run: cargo test --features "modern-full vtab-full vtab-loadable" 83 | env: 84 | DUCKDB_LIB_DIR: ${{ github.workspace }}/libduckdb 85 | DUCKDB_INCLUDE_DIR: ${{ github.workspace }}/libduckdb 86 | 87 | - name: Build loadable extension 88 | run: cargo build --example hello-ext --features="vtab-loadable" 89 | env: 90 | DUCKDB_LIB_DIR: ${{ github.workspace }}/libduckdb 91 | DUCKDB_INCLUDE_DIR: ${{ github.workspace }}/libduckdb 92 | LD_LIBRARY_PATH: ${{ github.workspace }}/libduckdb 93 | 94 | - name: Build loadable extension 95 | run: cargo build --example hello-ext-capi --features="vtab-loadable loadable-extension" 96 | env: 97 | DUCKDB_LIB_DIR: ${{ github.workspace }}/libduckdb 98 | DUCKDB_INCLUDE_DIR: ${{ github.workspace }}/libduckdb 99 | LD_LIBRARY_PATH: ${{ github.workspace }}/libduckdb 100 | 101 | Windows: 102 | name: Windows build from source 103 | needs: test 104 | runs-on: windows-latest 105 | steps: 106 | - uses: actions/checkout@v2 107 | - uses: actions/cache@v3 108 | with: 109 | path: ~/.cargo/registry/index 110 | key: index-${{ runner.os }}-${{ github.run_number }} 111 | restore-keys: | 112 | index-${{ runner.os }}- 113 | - uses: actions-rust-lang/setup-rust-toolchain@v1 114 | with: 115 | toolchain: stable 116 | target: x86_64-pc-windows-msvc 117 | 118 | - run: cargo install cargo-examples 119 | 120 | Sanitizer: 121 | name: Address Sanitizer 122 | needs: test 123 | runs-on: ubuntu-latest 124 | steps: 125 | - uses: actions/checkout@v2 126 | # Need nightly rust. 127 | - uses: actions-rust-lang/setup-rust-toolchain@v1 128 | with: 129 | toolchain: nightly 130 | components: 'rust-src' 131 | # Install LLVM tools 132 | - name: Install LLVM 133 | run: | 134 | sudo apt-get install -y llvm 135 | - name: Tests with asan 136 | env: 137 | RUSTFLAGS: -Zsanitizer=address -C debuginfo=0 138 | RUSTDOCFLAGS: -Zsanitizer=address 139 | ASAN_OPTIONS: "detect_stack_use_after_return=1:detect_leaks=1:symbolize=1" 140 | # Work around https://github.com/rust-lang/rust/issues/59125 by 141 | # disabling backtraces. In an ideal world we'd probably suppress the 142 | # leak sanitization, but we don't care about backtraces here, so long 143 | # as the other tests have them. 144 | RUST_BACKTRACE: "0" 145 | # We cannot run "modern-full" with asan as the chrono feature will auto-load relase binaries of 146 | # the ICU-extension, which are not built with ASAN and will cause a crash. 147 | run: | 148 | export ASAN_SYMBOLIZER_PATH=$(which llvm-symbolizer) 149 | echo $ASAN_SYMBOLIZER_PATH 150 | cargo -Z build-std test --features "serde_json url r2d2 uuid polars extensions-full" --target x86_64-unknown-linux-gnu --package duckdb 151 | - name: publish crates --dry-run 152 | uses: katyo/publish-crates@v2 153 | with: 154 | path: './' 155 | args: --allow-dirty --all-features 156 | dry-run: true 157 | ignore-unpublished-changes: true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | 13 | # Added by cargo 14 | 15 | /target 16 | **/target 17 | 18 | # Ignore vscode and intellij 19 | .vscode 20 | .idea 21 | 22 | # Ignore zip file 23 | *.zip 24 | 25 | # ccls 26 | .ccls-cache/ 27 | 28 | *.db 29 | 30 | crates/libduckdb-sys/duckdb-sources/* 31 | crates/libduckdb-sys/duckdb/ 32 | crates/libduckdb-sys/._duckdb 33 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "crates/libduckdb-sys/duckdb-sources"] 2 | path = crates/libduckdb-sys/duckdb-sources 3 | url = https://github.com/duckdb/duckdb 4 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | imports_granularity = "Crate" 3 | reorder_imports = true 4 | fn_call_width = 72 5 | # indent_style = "Block" 6 | # tab_spaces = 2 7 | # group_imports="StdExternalCrate" 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | Glad your are interested in this project. Here I'll share how I develop this library, hopefully it will be helpfull for you to get start. 3 | 4 | ## Background 5 | This repo has two crates: `duckdb` and `libduckdb-sys`, which `libduckdb-sys` is the original binding for [duckdb-c-api](https://github.com/duckdb/duckdb/blob/master/src/include/duckdb.h) and `duckdb` is an ergonomic wrapper on `libduckdb-sys`. 6 | 7 | Most user should use `duckdb`, but our development may happen in both of these components. 8 | 9 | ## Development 10 | 11 | ### duckdb-c-api 12 | Some features are still not implemented in the c api, so we may need pull request in the [duckdb repo](https://github.com/duckdb/duckdb). 13 | 14 | ```shell 15 | # build duckdb 16 | cd ~/github/ 17 | git clone git@github.com:duckdb/duckdb.git 18 | cd duckdb 19 | GEN=ninja make debug 20 | ``` 21 | 22 | Related logics: 23 | * header file: https://github.com/duckdb/duckdb/blob/master/src/include/duckdb.h 24 | * impl file: https://github.com/duckdb/duckdb/blob/master/src/main/duckdb-c.cpp 25 | * test file: https://github.com/duckdb/duckdb/blob/master/test/api/capi/test_capi.cpp 26 | * You can refer to one of my previous PR: https://github.com/duckdb/duckdb/pull/1923 27 | 28 | After make the change, we can build the repo and use it in `duckdb-rs` by: 29 | ```shell 30 | # assume in macOS, you may need to change the file in other OS 31 | # export library and header file 32 | cd ~/github/duckdb 33 | mkdir ~/duckdb-lib 34 | cp src/include/duckdb.h build/debug/src/libduckdb.dylib ~/duckdb-lib/ 35 | # set lib dir 36 | export DUCKDB_LIB_DIR=~/duckdb-lib 37 | # set header dir 38 | export DUCKDB_INCLUDE_DIR=~/duckdb-lib 39 | ``` 40 | 41 | ### libduckdb-sys 42 | 43 | Use the exported library and header: 44 | 45 | ```shell 46 | cd ~/github/duckdb-rs/crates/libduckdb-sys 47 | cargo test --features buildtime_bindgen 48 | ``` 49 | 50 | Use the bundled header file: 51 | ```shell 52 | cd ~/github/duckdb-rs/crates/libduckdb-sys 53 | cargo test --features bundled 54 | ``` 55 | 56 | Currently in [github actions](https://github.com/duckdb/duckdb-rs/actions), we always use the bundled file for testing. So if you change the header in duckdb-cpp repo, you need to make the PR merged and updated the [bundled-file](https://github.com/duckdb/duckdb-rs/tree/main/crates/libduckdb-sys/duckdb). 57 | You can generated the amalgamated file by: 58 | 59 | ```shell 60 | cd ~/github/duckdb 61 | mkdir -p build/amaldebug 62 | python scripts/amalgamation.py 63 | cp src/amalgamation/duckdb.cpp src/include/duckdb.h src/amalgamation/duckdb.hpp ../duckdb-rs/crates/libduckdb-sys/duckdb/ 64 | ``` 65 | 66 | ### duckdb-rs 67 | 68 | Use the exported library and header: 69 | 70 | ```shell 71 | cd ~/github/duckdb-rs/ 72 | cargo test --features buildtime_bindgen -- --nocapture 73 | ``` 74 | 75 | Use the bundled header file: 76 | 77 | ```shell 78 | cd ~/github/duckdb-rs 79 | cargo test --features bundled -- --nocapture 80 | ``` 81 | 82 | Detect memory leaks: 83 | ```shell 84 | cd ~/github/duckdb-rs 85 | ASAN_OPTIONS=detect_leaks=1 ASAN_SYMBOLIZER_PATH=/usr/local/opt/llvm/bin/llvm-symbolizer cargo test --features bundled -- --nocapture 86 | ``` 87 | 88 | ### Update to new version 89 | 90 | Everytime duckdb release to a new version, we also need to release a new version. 91 | 92 | We can use the scripts to do the upgrades: 93 | ```shell 94 | ./upgrade.sh 95 | ``` 96 | Which use sed to update the version number and then call `./libduckdb-sys/upgrade.sh` to generated new bindings. 97 | 98 | We may need to fix any error as duckdb's c-api may have breaking changes occasionally. 99 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = [ 4 | "crates/duckdb", 5 | "crates/libduckdb-sys", 6 | "crates/duckdb-loadable-macros", 7 | ] 8 | 9 | [workspace.package] 10 | version = "1.3.0" 11 | authors = ["wangfenjin "] 12 | edition = "2021" 13 | repository = "https://github.com/duckdb/duckdb-rs" 14 | homepage = "https://github.com/duckdb/duckdb-rs" 15 | documentation = "http://docs.rs/duckdb/" 16 | readme = "README.md" 17 | keywords = ["duckdb", "database", "ffi"] 18 | license = "MIT" 19 | categories = ["database"] 20 | 21 | [workspace.dependencies] 22 | duckdb = { version = "=1.3.0", path = "crates/duckdb" } 23 | libduckdb-sys = { version = "=1.3.0", path = "crates/libduckdb-sys" } 24 | duckdb-loadable-macros = { version = "=0.1.7", path = "crates/duckdb-loadable-macros" } 25 | autocfg = "1.0" 26 | bindgen = { version = "0.71.1", default-features = false } 27 | byteorder = "1.3" 28 | calamine = "0.22.0" 29 | cast = "0.3" 30 | cc = "1.0" 31 | chrono = "0.4.22" 32 | csv = "1.1" 33 | doc-comment = "0.3" 34 | fallible-iterator = "0.3" 35 | fallible-streaming-iterator = "0.1" 36 | flate2 = "1.0" 37 | hashlink = "0.9" 38 | lazy_static = "1.4" 39 | num = { version = "0.4", default-features = false } 40 | num-integer = "0.1.46" 41 | pkg-config = "0.3.24" 42 | polars = "0.46" 43 | polars-core = "0.46" 44 | polars-arrow = "0.46" 45 | pretty_assertions = "1.4.0" 46 | prettyplease = "0.2.20" 47 | proc-macro2 = "1.0.56" 48 | quote = "1.0.21" 49 | r2d2 = "0.8.9" 50 | rand = "0.8.3" 51 | regex = "1.6" 52 | rust_decimal = "1.14" 53 | serde = "1.0" 54 | serde_json = "1.0" 55 | smallvec = "1.6.1" 56 | strum = "0.25" 57 | syn = "2.0.15" 58 | tar = "0.4.38" 59 | tempdir = "0.3.7" 60 | tempfile = "3.1.0" 61 | unicase = "2.6.0" 62 | url = "2.1" 63 | uuid = "1.0" 64 | vcpkg = "0.2" 65 | arrow = { version = "55", default-features = false } 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021-2025 Stichting DuckDB Foundation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .EXPORT_ALL_VARIABLES: 2 | 3 | # In order to use buildtime_bindgen 4 | # you need to build duckdb locally and export the envs 5 | LD_LIBRARY_PATH = /Users/wangfenjin/duckdb:$LD_LIBRARY_PATH 6 | DUCKDB_LIB_DIR = /Users/wangfenjin/duckdb 7 | DUCKDB_INCLUDE_DIR = /Users/wangfenjin/duckdb 8 | 9 | all: 10 | cargo test --features buildtime_bindgen --features modern-full -- --nocapture 11 | cargo clippy --all-targets --workspace --features buildtime_bindgen --features modern-full -- -D warnings -A clippy::redundant-closure 12 | 13 | test: 14 | cargo test --features bundled --features modern-full -- --nocapture -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # duckdb-rs 2 | 3 | [![Downloads](https://img.shields.io/crates/d/duckdb)](https://img.shields.io/crates/d/duckdb) 4 | [![Build Status](https://github.com/duckdb/duckdb-rs/workflows/CI/badge.svg)](https://github.com/duckdb/duckdb-rs/actions) 5 | [![dependency status](https://deps.rs/repo/github/wangfenjin/duckdb-rs/status.svg)](https://deps.rs/repo/github/wangfenjin/duckdb-rs) 6 | [![codecov](https://codecov.io/gh/wangfenjin/duckdb-rs/branch/main/graph/badge.svg?token=0xV88q8KU0)](https://codecov.io/gh/wangfenjin/duckdb-rs) 7 | [![Latest Version](https://img.shields.io/crates/v/duckdb.svg)](https://crates.io/crates/duckdb) 8 | [![Docs](https://img.shields.io/badge/docs.rs-duckdb-green)](https://docs.rs/duckdb) 9 | 10 | duckdb-rs is an ergonomic wrapper for using [duckdb](https://github.com/duckdb/duckdb) from Rust. It attempts to expose 11 | an interface similar to [rusqlite](https://github.com/rusqlite/rusqlite). Actually the initial code and even this README is 12 | forked from rusqlite as duckdb also tries to expose a sqlite3 compatible API. 13 | 14 | ```rust 15 | use duckdb::{params, Connection, Result}; 16 | 17 | // In your project, we need to keep the arrow version same as the version used in duckdb. 18 | // Refer to https://github.com/duckdb/duckdb-rs/issues/92 19 | // You can either: 20 | use duckdb::arrow::record_batch::RecordBatch; 21 | // Or in your Cargo.toml, use * as the version; features can be toggled according to your needs 22 | // arrow = { version = "*", default-features = false, features = ["prettyprint"] } 23 | // Then you can: 24 | // use arrow::record_batch::RecordBatch; 25 | 26 | use duckdb::arrow::util::pretty::print_batches; 27 | 28 | #[derive(Debug)] 29 | struct Person { 30 | id: i32, 31 | name: String, 32 | data: Option>, 33 | } 34 | 35 | fn main() -> Result<()> { 36 | let conn = Connection::open_in_memory()?; 37 | 38 | conn.execute_batch( 39 | r"CREATE SEQUENCE seq; 40 | CREATE TABLE person ( 41 | id INTEGER PRIMARY KEY DEFAULT NEXTVAL('seq'), 42 | name TEXT NOT NULL, 43 | data BLOB 44 | ); 45 | ")?; 46 | 47 | let me = Person { 48 | id: 0, 49 | name: "Steven".to_string(), 50 | data: None, 51 | }; 52 | conn.execute( 53 | "INSERT INTO person (name, data) VALUES (?, ?)", 54 | params![me.name, me.data], 55 | )?; 56 | 57 | // query table by rows 58 | let mut stmt = conn.prepare("SELECT id, name, data FROM person")?; 59 | let person_iter = stmt.query_map([], |row| { 60 | Ok(Person { 61 | id: row.get(0)?, 62 | name: row.get(1)?, 63 | data: row.get(2)?, 64 | }) 65 | })?; 66 | 67 | for person in person_iter { 68 | let p = person.unwrap(); 69 | println!("ID: {}", p.id); 70 | println!("Found person {:?}", p); 71 | } 72 | 73 | // query table by arrow 74 | let rbs: Vec = stmt.query_arrow([])?.collect(); 75 | print_batches(&rbs).unwrap(); 76 | Ok(()) 77 | } 78 | ``` 79 | 80 | ## Notes on building duckdb and libduckdb-sys 81 | 82 | `libduckdb-sys` is a separate crate from `duckdb-rs` that provides the Rust 83 | declarations for DuckDB's C API. By default, `libduckdb-sys` attempts to find a DuckDB library that already exists on your system using pkg-config, or a 84 | [Vcpkg](https://github.com/Microsoft/vcpkg) installation for MSVC ABI builds. 85 | 86 | You can adjust this behavior in a number of ways: 87 | 88 | - If you use the `bundled` feature, `libduckdb-sys` will use the 89 | [cc](https://crates.io/crates/cc) crate to compile DuckDB from source and 90 | link against that. This source is embedded in the `libduckdb-sys` crate and 91 | as we are still in development, we will update it regularly. After we are more stable, 92 | we will use the stable released version from [duckdb](https://github.com/duckdb/duckdb/releases). 93 | This is probably the simplest solution to any build problems. You can enable this by adding the following in your `Cargo.toml` file: 94 | 95 | ```bash 96 | cargo add duckdb --features bundled 97 | ``` 98 | 99 | `Cargo.toml` will be updated. 100 | 101 | ```toml 102 | [dependencies] 103 | # Assume that version DuckDB version 0.9.2 is used. 104 | duckdb = { version = "0.9.2", features = ["bundled"] } 105 | ``` 106 | * When linking against a DuckDB library already on the system (so *not* using any of the `bundled` features), you can set the `DUCKDB_LIB_DIR` environment variable to point to a directory containing the library. You can also set the `DUCKDB_INCLUDE_DIR` variable to point to the directory containing `duckdb.h`. 107 | * Installing the duckdb development packages will usually be all that is required, but 108 | the build helpers for [pkg-config](https://github.com/alexcrichton/pkg-config-rs) 109 | and [vcpkg](https://github.com/mcgoo/vcpkg-rs) have some additional configuration 110 | options. The default when using vcpkg is to dynamically link, 111 | which must be enabled by setting `VCPKGRS_DYNAMIC=1` environment variable before build. 112 | 113 | ### Binding generation 114 | 115 | We use [bindgen](https://crates.io/crates/bindgen) to generate the Rust 116 | declarations from DuckDB's C header file. `bindgen` 117 | [recommends](https://github.com/servo/rust-bindgen#library-usage-with-buildrs) 118 | running this as part of the build process of libraries that used this. We tried 119 | this briefly (`duckdb` 0.10.0, specifically), but it had some annoyances: 120 | 121 | * The build time for `libduckdb-sys` (and therefore `duckdb`) increased 122 | dramatically. 123 | * Running `bindgen` requires a relatively-recent version of Clang, which many 124 | systems do not have installed by default. 125 | * Running `bindgen` also requires the DuckDB header file to be present. 126 | 127 | So we try to avoid running `bindgen` at build-time by shipping 128 | pregenerated bindings for DuckDB. 129 | 130 | If you use the `bundled` features, you will get pregenerated bindings for the 131 | bundled version of DuckDB. If you want to run `bindgen` at buildtime to 132 | produce your own bindings, use the `buildtime_bindgen` Cargo feature. 133 | 134 | ## Contributing 135 | 136 | See to [Contributing.md](CONTRIBUTING.md) 137 | 138 | ### Checklist 139 | 140 | - Run `cargo +nightly fmt` to ensure your Rust code is correctly formatted. 141 | - Run `cargo clippy --fix --allow-dirty --all-targets --workspace --all-features -- -D warnings` to fix all clippy issues. 142 | - Ensure `cargo test --all-targets --workspace --features "modern-full extensions-full"` reports no failures. 143 | 144 | ### TODOs 145 | 146 | - [x] Refactor the ErrorCode part, it's borrowed from rusqlite, we should have our own 147 | - [ ] Support more type 148 | - [x] Update duckdb.h 149 | - [x] Adjust the code examples and documentation 150 | - [x] Delete unused code / functions 151 | - [x] Add CI 152 | - [x] Publish to crate 153 | 154 | ## License 155 | 156 | DuckDB and libduckdb-sys are available under the MIT license. See the LICENSE file for more info. 157 | -------------------------------------------------------------------------------- /add_rustfmt_hook.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # https://gist.github.com/folex/9496c457bcbbef36255a533389da740e 4 | 5 | # check that rustfmt installed, or else this hook doesn't make much sense 6 | command -v rustfmt >/dev/null 2>&1 || { echo >&2 "Rustfmt is required but it's not installed. Aborting."; exit 1; } 7 | 8 | # write a whole script to pre-commit hook 9 | # NOTE: it will overwrite pre-commit file! 10 | cat > .git/hooks/pre-commit <<'EOF' 11 | #!/bin/bash 12 | declare -a rust_files=() 13 | files=$(git diff-index --name-only --cached HEAD) 14 | echo 'Formatting source files' 15 | for file in $files; do 16 | if [ ! -f "${file}" ]; then 17 | continue 18 | fi 19 | if [ "${file}" = 'libduckdb-sys/src/bindgen_bundled_version.rs' ]; then 20 | continue 21 | fi 22 | if [[ "${file}" == *.rs ]]; then 23 | rust_files+=("${file}") 24 | fi 25 | done 26 | if [ ${#rust_files[@]} -ne 0 ]; then 27 | command -v rustfmt >/dev/null 2>&1 || { echo >&2 "Rustfmt is required but it's not installed. Aborting."; exit 1; } 28 | $(command -v rustfmt) +nightly ${rust_files[@]} & 29 | fi 30 | wait 31 | if [ ${#rust_files[@]} -ne 0 ]; then 32 | git add ${rust_files[@]} 33 | echo "Formatting done, changed files: ${rust_files[@]}" 34 | else 35 | echo "No changes, formatting skipped" 36 | fi 37 | EOF 38 | 39 | chmod +x .git/hooks/pre-commit 40 | 41 | echo "Hooks updated" 42 | -------------------------------------------------------------------------------- /crates/duckdb-loadable-macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "duckdb-loadable-macros" 3 | version = "0.1.7" 4 | authors.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | homepage.workspace = true 9 | keywords.workspace = true 10 | readme.workspace = true 11 | categories = ["external-ffi-bindings", "database"] 12 | description = "Native bindings to the libduckdb library, C API; build loadable extensions" 13 | 14 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 15 | 16 | [dependencies] 17 | darling = "0.20.10" 18 | proc-macro2 = { workspace = true } 19 | quote = { workspace = true } 20 | syn = { workspace = true, features = ["extra-traits", "full", "fold", "parsing"] } 21 | 22 | [lib] 23 | proc-macro = true 24 | -------------------------------------------------------------------------------- /crates/duckdb-loadable-macros/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/duckdb-loadable-macros/README.md: -------------------------------------------------------------------------------- 1 | ../../README.md -------------------------------------------------------------------------------- /crates/duckdb-loadable-macros/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::redundant_clone)] 2 | use proc_macro2::{Ident, Span}; 3 | 4 | use syn::{parse_macro_input, spanned::Spanned, Item}; 5 | 6 | use proc_macro::TokenStream; 7 | use quote::quote_spanned; 8 | 9 | use darling::{ast::NestedMeta, Error, FromMeta}; 10 | 11 | use std::env; 12 | 13 | const DEFAULT_DUCKDB_VERSION: &str = "v1.2.0"; 14 | 15 | /// For parsing the arguments to the duckdb_entrypoint_c_api macro 16 | #[derive(Debug, FromMeta)] 17 | struct CEntryPointMacroArgs { 18 | #[darling(default)] 19 | /// The name to be given to this extension. This name is used in the entrypoint function called by duckdb 20 | ext_name: Option, 21 | /// The minimum C API version this extension requires. It is recommended to set this to the lowest possible version 22 | /// at which your extension still compiles 23 | min_duckdb_version: Option, 24 | } 25 | 26 | /// Wraps an entrypoint function to expose an unsafe extern "C" function of the same name. 27 | /// Warning: experimental! 28 | #[proc_macro_attribute] 29 | pub fn duckdb_entrypoint_c_api(attr: TokenStream, item: TokenStream) -> TokenStream { 30 | let attr_args = match NestedMeta::parse_meta_list(attr.into()) { 31 | Ok(v) => v, 32 | Err(e) => { 33 | return TokenStream::from(Error::from(e).write_errors()); 34 | } 35 | }; 36 | 37 | let args = match CEntryPointMacroArgs::from_list(&attr_args) { 38 | Ok(v) => v, 39 | Err(e) => { 40 | return TokenStream::from(e.write_errors()); 41 | } 42 | }; 43 | 44 | // Set the minimum duckdb version (dev by default) 45 | let minimum_duckdb_version = match (args.min_duckdb_version, env::var("DUCKDB_EXTENSION_MIN_DUCKDB_VERSION")) { 46 | (Some(i), _) => i, 47 | (None, Ok(i)) => i.to_string(), 48 | _ => DEFAULT_DUCKDB_VERSION.to_string(), 49 | }; 50 | 51 | let extension_name = match (args.ext_name, env::var("DUCKDB_EXTENSION_NAME")) { 52 | (Some(i), _) => i, 53 | (None, Ok(i)) => i.to_string(), 54 | _ => env::var("CARGO_PKG_NAME").unwrap().to_string(), 55 | }; 56 | 57 | let ast = parse_macro_input!(item as syn::Item); 58 | 59 | match ast { 60 | Item::Fn(func) => { 61 | let c_entrypoint = Ident::new(format!("{}_init_c_api", extension_name).as_str(), Span::call_site()); 62 | let prefixed_original_function = func.sig.ident.clone(); 63 | let c_entrypoint_internal = Ident::new( 64 | format!("{}_init_c_api_internal", extension_name).as_str(), 65 | Span::call_site(), 66 | ); 67 | 68 | quote_spanned! {func.span()=> 69 | /// # Safety 70 | /// 71 | /// Internal Entrypoint for error handling 72 | pub unsafe fn #c_entrypoint_internal(info: ffi::duckdb_extension_info, access: *const ffi::duckdb_extension_access) -> Result> { 73 | let have_api_struct = ffi::duckdb_rs_extension_api_init(info, access, #minimum_duckdb_version).unwrap(); 74 | 75 | if !have_api_struct { 76 | // initialization failed to return an api struct, likely due to an API version mismatch, we can simply return here 77 | return Ok(false); 78 | } 79 | 80 | // TODO: handle error here? 81 | let db : ffi::duckdb_database = *(*access).get_database.unwrap()(info); 82 | let connection = Connection::open_from_raw(db.cast())?; 83 | 84 | #prefixed_original_function(connection)?; 85 | 86 | Ok(true) 87 | } 88 | 89 | /// # Safety 90 | /// 91 | /// Entrypoint that will be called by DuckDB 92 | #[no_mangle] 93 | pub unsafe extern "C" fn #c_entrypoint(info: ffi::duckdb_extension_info, access: *const ffi::duckdb_extension_access) -> bool { 94 | let init_result = #c_entrypoint_internal(info, access); 95 | 96 | if let Err(x) = init_result { 97 | let error_c_string = std::ffi::CString::new(x.to_string()); 98 | 99 | match error_c_string { 100 | Ok(e) => { 101 | (*access).set_error.unwrap()(info, e.as_ptr()); 102 | }, 103 | Err(_e) => { 104 | let error_alloc_failure = c"An error occured but the extension failed to allocate memory for an error string"; 105 | (*access).set_error.unwrap()(info, error_alloc_failure.as_ptr()); 106 | } 107 | } 108 | return false; 109 | } 110 | 111 | init_result.unwrap() 112 | } 113 | 114 | #func 115 | } 116 | .into() 117 | } 118 | _ => panic!("Only function items are allowed on duckdb_entrypoint"), 119 | } 120 | } 121 | 122 | /// Wraps an entrypoint function to expose an unsafe extern "C" function of the same name. 123 | #[proc_macro_attribute] 124 | pub fn duckdb_entrypoint(_attr: TokenStream, item: TokenStream) -> TokenStream { 125 | let ast = parse_macro_input!(item as syn::Item); 126 | match ast { 127 | Item::Fn(mut func) => { 128 | let c_entrypoint = func.sig.ident.clone(); 129 | let c_entrypoint_version = Ident::new( 130 | c_entrypoint.to_string().replace("_init", "_version").as_str(), 131 | Span::call_site(), 132 | ); 133 | 134 | let original_funcname = func.sig.ident.to_string(); 135 | func.sig.ident = Ident::new(format!("_{}", original_funcname).as_str(), func.sig.ident.span()); 136 | 137 | let prefixed_original_function = func.sig.ident.clone(); 138 | 139 | quote_spanned! {func.span()=> 140 | #func 141 | 142 | /// # Safety 143 | /// 144 | /// Will be called by duckdb 145 | #[unsafe(no_mangle)] 146 | pub unsafe extern "C" fn #c_entrypoint(db: *mut std::ffi::c_void) { 147 | unsafe { 148 | let connection = Connection::open_from_raw(db.cast()).expect("can't open db connection"); 149 | #prefixed_original_function(connection).expect("init failed"); 150 | } 151 | } 152 | 153 | /// # Safety 154 | /// 155 | /// Predefined function, don't need to change unless you are sure 156 | #[unsafe(no_mangle)] 157 | pub unsafe extern "C" fn #c_entrypoint_version() -> *const std::ffi::c_char { 158 | unsafe { 159 | ffi::duckdb_library_version() 160 | } 161 | } 162 | 163 | 164 | } 165 | .into() 166 | } 167 | _ => panic!("Only function items are allowed on duckdb_entrypoint"), 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /crates/duckdb/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "duckdb" 3 | version = "1.3.0" 4 | authors.workspace = true 5 | edition.workspace = true 6 | repository.workspace = true 7 | homepage.workspace = true 8 | documentation.workspace = true 9 | readme.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | categories.workspace = true 13 | description = "Ergonomic wrapper for DuckDB" 14 | 15 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 16 | 17 | [lib] 18 | name = "duckdb" 19 | 20 | [features] 21 | default = [ ] 22 | bundled = ["libduckdb-sys/bundled"] 23 | json = ["libduckdb-sys/json", "bundled"] 24 | parquet = ["libduckdb-sys/parquet", "bundled"] 25 | vscalar = [] 26 | vscalar-arrow = [] 27 | vtab = [] 28 | vtab-loadable = ["vtab", "duckdb-loadable-macros"] 29 | vtab-excel = ["vtab", "calamine"] 30 | vtab-arrow = ["vtab", "num"] 31 | appender-arrow = ["vtab-arrow"] 32 | vtab-full = ["vtab-excel", "vtab-arrow", "appender-arrow"] 33 | extensions-full = ["json", "parquet", "vtab-full"] 34 | buildtime_bindgen = ["libduckdb-sys/buildtime_bindgen"] 35 | modern-full = ["chrono", "serde_json", "url", "r2d2", "uuid", "polars"] 36 | polars = ["dep:polars", "dep:polars-arrow"] 37 | # FIXME: These were added to make clippy happy: these features appear unused and should perhaps be removed 38 | column_decltype = [] 39 | extra_check = [] 40 | # Warning: experimental feature 41 | loadable-extension = ["libduckdb-sys/loadable-extension"] 42 | 43 | [dependencies] 44 | libduckdb-sys = { workspace = true } 45 | hashlink = { workspace = true } 46 | chrono = { workspace = true, optional = true } 47 | serde_json = { workspace = true, optional = true } 48 | csv = { workspace = true, optional = true } 49 | url = { workspace = true, optional = true } 50 | lazy_static = { workspace = true, optional = true } 51 | byteorder = { workspace = true, features = ["i128"], optional = true } 52 | fallible-iterator = { workspace = true } 53 | fallible-streaming-iterator = { workspace = true } 54 | uuid = { workspace = true, optional = true } 55 | smallvec = { workspace = true } 56 | cast = { workspace = true, features = ["std"] } 57 | arrow = { workspace = true, features = ["prettyprint", "ffi"] } 58 | rust_decimal = { workspace = true } 59 | strum = { workspace = true, features = ["derive"] } 60 | r2d2 = { workspace = true, optional = true } 61 | calamine = { workspace = true, optional = true } 62 | num = { workspace = true, features = ["std"], optional = true } 63 | duckdb-loadable-macros = { workspace = true, optional = true } 64 | polars = { workspace = true, features = ["dtype-full"], optional = true } 65 | polars-arrow = { workspace = true, optional = true } 66 | num-integer = { workspace = true } 67 | 68 | [dev-dependencies] 69 | doc-comment = { workspace = true } 70 | tempfile = { workspace = true } 71 | lazy_static = { workspace = true } 72 | regex = { workspace = true } 73 | uuid = { workspace = true, features = ["v4"] } 74 | unicase = { workspace = true } 75 | rand = { workspace = true } 76 | tempdir = { workspace = true } 77 | polars-core = { workspace = true } 78 | pretty_assertions = { workspace = true } 79 | # criterion = "0.3" 80 | 81 | # [[bench]] 82 | # name = "data_types" 83 | # harness = false 84 | 85 | [package.metadata.docs.rs] 86 | features = ["vtab", "vtab-arrow"] 87 | all-features = false 88 | no-default-features = true 89 | default-target = "x86_64-unknown-linux-gnu" 90 | 91 | [package.metadata.playground] 92 | features = [] 93 | all-features = false 94 | 95 | [[example]] 96 | name = "hello-ext" 97 | crate-type = ["cdylib"] 98 | required-features = ["vtab-loadable"] 99 | 100 | [[example]] 101 | name = "hello-ext-capi" 102 | crate-type = ["cdylib"] 103 | required-features = ["vtab-loadable", "loadable-extension"] 104 | -------------------------------------------------------------------------------- /crates/duckdb/examples/Movies_Social_metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-rs/f631fc6f004d478229cf43e58756b12a6d6899cb/crates/duckdb/examples/Movies_Social_metadata.xlsx -------------------------------------------------------------------------------- /crates/duckdb/examples/appender.rs: -------------------------------------------------------------------------------- 1 | extern crate duckdb; 2 | 3 | use duckdb::{params, Connection, DropBehavior, Result}; 4 | 5 | fn main() -> Result<()> { 6 | //let mut db = Connection::open("10m.db")?; 7 | let mut db = Connection::open_in_memory()?; 8 | 9 | let create_table_sql = " 10 | create table IF NOT EXISTS test 11 | ( 12 | id INTEGER not null, -- primary key, 13 | area CHAR(6), 14 | age TINYINT not null, 15 | active TINYINT not null 16 | );"; 17 | db.execute_batch(create_table_sql)?; 18 | 19 | let row_count = 10_000_000; 20 | { 21 | let mut tx = db.transaction()?; 22 | tx.set_drop_behavior(DropBehavior::Commit); 23 | let mut app = tx.appender("test")?; 24 | // use generator 25 | // for u in firstn(1_000_000) { 26 | // app.append_row(params![u.id, u.area, u.age, u.active])?; 27 | // } 28 | 29 | for i in 0..row_count { 30 | app.append_row(params![ 31 | i, 32 | get_random_area_code(), 33 | get_random_age(), 34 | get_random_active(), 35 | ])?; 36 | } 37 | } 38 | 39 | let val = db.query_row("SELECT count(1) FROM test", [], |row| <(u32,)>::try_from(row))?; 40 | assert_eq!(val, (row_count,)); 41 | Ok(()) 42 | } 43 | 44 | #[allow(dead_code)] 45 | struct User { 46 | id: i32, 47 | area: Option, 48 | age: i8, 49 | active: i8, 50 | } 51 | 52 | #[allow(dead_code)] 53 | fn firstn(n: i32) -> impl std::iter::Iterator { 54 | let mut id = 0; 55 | std::iter::from_fn(move || { 56 | if id >= n { 57 | return None; 58 | } 59 | id += 1; 60 | Some(User { 61 | id, 62 | area: get_random_area_code(), 63 | age: get_random_age(), 64 | active: get_random_active(), 65 | }) 66 | }) 67 | } 68 | 69 | // Modified from https://github.com/avinassh/fast-sqlite3-inserts/blob/master/src/bin/common.rs 70 | use rand::{prelude::SliceRandom, Rng}; 71 | 72 | #[inline] 73 | fn get_random_age() -> i8 { 74 | let vs: Vec = vec![5, 10, 15]; 75 | *vs.choose(&mut rand::thread_rng()).unwrap() 76 | } 77 | 78 | #[inline] 79 | fn get_random_active() -> i8 { 80 | if rand::random() { 81 | return 1; 82 | } 83 | 0 84 | } 85 | 86 | #[inline] 87 | fn get_random_bool() -> bool { 88 | rand::random() 89 | } 90 | 91 | #[inline] 92 | fn get_random_area_code() -> Option { 93 | if !get_random_bool() { 94 | return None; 95 | } 96 | let mut rng = rand::thread_rng(); 97 | Some(format!("{:06}", rng.gen_range(0..999999))) 98 | } 99 | -------------------------------------------------------------------------------- /crates/duckdb/examples/basic.rs: -------------------------------------------------------------------------------- 1 | // Basic example copy from README 2 | 3 | extern crate duckdb; 4 | 5 | use duckdb::{ 6 | arrow::{record_batch::RecordBatch, util::pretty::print_batches}, 7 | params, Connection, Result, 8 | }; 9 | 10 | #[derive(Debug)] 11 | struct Person { 12 | _id: i32, 13 | name: String, 14 | data: Option>, 15 | } 16 | 17 | fn main() -> Result<()> { 18 | let conn = Connection::open_in_memory()?; 19 | 20 | conn.execute_batch( 21 | r"CREATE SEQUENCE seq; 22 | CREATE TABLE person ( 23 | id INTEGER PRIMARY KEY DEFAULT NEXTVAL('seq'), 24 | name TEXT NOT NULL, 25 | data BLOB 26 | ); 27 | ", 28 | )?; 29 | 30 | let me = Person { 31 | _id: 0, 32 | name: "Steven".to_string(), 33 | data: None, 34 | }; 35 | conn.execute( 36 | "INSERT INTO person (name, data) VALUES (?, ?)", 37 | params![me.name, me.data], 38 | )?; 39 | 40 | // query table by rows 41 | let mut stmt = conn.prepare("SELECT id, name, data FROM person")?; 42 | let person_iter = stmt.query_map([], |row| { 43 | Ok(Person { 44 | _id: row.get(0)?, 45 | name: row.get(1)?, 46 | data: row.get(2)?, 47 | }) 48 | })?; 49 | 50 | for person in person_iter { 51 | println!("Found person {:?}", person.unwrap()); 52 | } 53 | 54 | // query table by arrow 55 | let rbs: Vec = stmt.query_arrow([])?.collect(); 56 | print_batches(&rbs).unwrap(); 57 | Ok(()) 58 | } 59 | -------------------------------------------------------------------------------- /crates/duckdb/examples/date.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-rs/f631fc6f004d478229cf43e58756b12a6d6899cb/crates/duckdb/examples/date.xlsx -------------------------------------------------------------------------------- /crates/duckdb/examples/hello-ext-capi/main.rs: -------------------------------------------------------------------------------- 1 | extern crate duckdb; 2 | extern crate duckdb_loadable_macros; 3 | extern crate libduckdb_sys; 4 | 5 | use duckdb::{ 6 | core::{DataChunkHandle, Inserter, LogicalTypeHandle, LogicalTypeId}, 7 | vtab::{BindInfo, InitInfo, TableFunctionInfo, VTab}, 8 | Connection, Result, 9 | }; 10 | use duckdb_loadable_macros::duckdb_entrypoint_c_api; 11 | use libduckdb_sys as ffi; 12 | use std::{ 13 | error::Error, 14 | ffi::CString, 15 | sync::atomic::{AtomicBool, Ordering}, 16 | }; 17 | 18 | #[repr(C)] 19 | struct HelloBindData { 20 | name: String, 21 | } 22 | 23 | #[repr(C)] 24 | struct HelloInitData { 25 | done: AtomicBool, 26 | } 27 | 28 | struct HelloVTab; 29 | 30 | impl VTab for HelloVTab { 31 | type InitData = HelloInitData; 32 | type BindData = HelloBindData; 33 | 34 | fn bind(bind: &BindInfo) -> Result> { 35 | bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar)); 36 | let name = bind.get_parameter(0).to_string(); 37 | Ok(HelloBindData { name }) 38 | } 39 | 40 | fn init(_: &InitInfo) -> Result> { 41 | Ok(HelloInitData { 42 | done: AtomicBool::new(false), 43 | }) 44 | } 45 | 46 | fn func(func: &TableFunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { 47 | let init_data = func.get_init_data(); 48 | let bind_data = func.get_bind_data(); 49 | if init_data.done.swap(true, Ordering::Relaxed) { 50 | output.set_len(0); 51 | } else { 52 | let vector = output.flat_vector(0); 53 | let result = CString::new(format!("Hello {}", bind_data.name))?; 54 | vector.insert(0, result); 55 | output.set_len(1); 56 | } 57 | Ok(()) 58 | } 59 | 60 | fn parameters() -> Option> { 61 | Some(vec![LogicalTypeHandle::from(LogicalTypeId::Varchar)]) 62 | } 63 | } 64 | 65 | #[duckdb_entrypoint_c_api(ext_name = "rusty_quack", min_duckdb_version = "v0.0.1")] 66 | pub fn extension_entrypoint(con: Connection) -> Result<(), Box> { 67 | con.register_table_function::("hello") 68 | .expect("Failed to register hello table function"); 69 | Ok(()) 70 | } 71 | -------------------------------------------------------------------------------- /crates/duckdb/examples/hello-ext/main.rs: -------------------------------------------------------------------------------- 1 | #![warn(unsafe_op_in_unsafe_fn)] 2 | 3 | extern crate duckdb; 4 | extern crate duckdb_loadable_macros; 5 | extern crate libduckdb_sys; 6 | 7 | use duckdb::{ 8 | core::{DataChunkHandle, Inserter, LogicalTypeHandle, LogicalTypeId}, 9 | vtab::{BindInfo, InitInfo, TableFunctionInfo, VTab}, 10 | Connection, Result, 11 | }; 12 | use duckdb_loadable_macros::duckdb_entrypoint; 13 | use libduckdb_sys as ffi; 14 | use std::{ 15 | error::Error, 16 | ffi::CString, 17 | sync::atomic::{AtomicBool, Ordering}, 18 | }; 19 | 20 | struct HelloBindData { 21 | name: String, 22 | } 23 | 24 | struct HelloInitData { 25 | done: AtomicBool, 26 | } 27 | 28 | struct HelloVTab; 29 | 30 | impl VTab for HelloVTab { 31 | type InitData = HelloInitData; 32 | type BindData = HelloBindData; 33 | 34 | fn bind(bind: &BindInfo) -> Result> { 35 | bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar)); 36 | let name = bind.get_parameter(0).to_string(); 37 | Ok(HelloBindData { name }) 38 | } 39 | 40 | fn init(_: &InitInfo) -> Result> { 41 | Ok(HelloInitData { 42 | done: AtomicBool::new(false), 43 | }) 44 | } 45 | 46 | fn func(func: &TableFunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { 47 | let init_data = func.get_init_data(); 48 | let bind_data = func.get_bind_data(); 49 | if init_data.done.swap(true, Ordering::Relaxed) { 50 | output.set_len(0); 51 | } else { 52 | let vector = output.flat_vector(0); 53 | let result = CString::new(format!("Hello {}", bind_data.name))?; 54 | vector.insert(0, result); 55 | output.set_len(1); 56 | } 57 | Ok(()) 58 | } 59 | 60 | fn parameters() -> Option> { 61 | Some(vec![LogicalTypeHandle::from(LogicalTypeId::Varchar)]) 62 | } 63 | } 64 | 65 | // Exposes a extern C function named "libhello_ext_init" in the compiled dynamic library, 66 | // the "entrypoint" that duckdb will use to load the extension. 67 | #[duckdb_entrypoint] 68 | pub fn libhello_ext_init(conn: Connection) -> Result<(), Box> { 69 | conn.register_table_function::("hello")?; 70 | Ok(()) 71 | } 72 | -------------------------------------------------------------------------------- /crates/duckdb/examples/int32_decimal.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-rs/f631fc6f004d478229cf43e58756b12a6d6899cb/crates/duckdb/examples/int32_decimal.parquet -------------------------------------------------------------------------------- /crates/duckdb/examples/parquet.rs: -------------------------------------------------------------------------------- 1 | extern crate duckdb; 2 | use duckdb::{ 3 | arrow::{record_batch::RecordBatch, util::pretty::print_batches}, 4 | Connection, Result, 5 | }; 6 | 7 | fn main() -> Result<()> { 8 | let db = Connection::open_in_memory()?; 9 | db.execute_batch("INSTALL parquet; LOAD parquet;")?; 10 | let rbs: Vec = db 11 | .prepare("SELECT * FROM read_parquet('./examples/int32_decimal.parquet');")? 12 | .query_arrow([])? 13 | .collect(); 14 | assert!(print_batches(&rbs).is_ok()); 15 | Ok(()) 16 | } 17 | -------------------------------------------------------------------------------- /crates/duckdb/src/appender/arrow.rs: -------------------------------------------------------------------------------- 1 | use super::{ffi, Appender, Result}; 2 | use crate::{ 3 | core::{DataChunkHandle, LogicalTypeHandle}, 4 | error::result_from_duckdb_appender, 5 | vtab::{record_batch_to_duckdb_data_chunk, to_duckdb_logical_type}, 6 | Error, 7 | }; 8 | use arrow::record_batch::RecordBatch; 9 | use ffi::duckdb_append_data_chunk; 10 | 11 | impl Appender<'_> { 12 | /// Append one record_batch 13 | /// 14 | /// ## Example 15 | /// 16 | /// ```rust,no_run 17 | /// # use duckdb::{Connection, Result, params}; 18 | /// use arrow::record_batch::RecordBatch; 19 | /// fn insert_record_batch(conn: &Connection,record_batch:RecordBatch) -> Result<()> { 20 | /// let mut app = conn.appender("foo")?; 21 | /// app.append_record_batch(record_batch)?; 22 | /// Ok(()) 23 | /// } 24 | /// ``` 25 | /// 26 | /// # Failure 27 | /// 28 | /// Will return `Err` if append column count not the same with the table schema 29 | #[inline] 30 | pub fn append_record_batch(&mut self, record_batch: RecordBatch) -> Result<()> { 31 | let schema = record_batch.schema(); 32 | let mut logical_type: Vec = vec![]; 33 | for field in schema.fields() { 34 | let logical_t = to_duckdb_logical_type(field.data_type()) 35 | .map_err(|_op| Error::ArrowTypeToDuckdbType(field.to_string(), field.data_type().clone()))?; 36 | logical_type.push(logical_t); 37 | } 38 | 39 | let mut data_chunk = DataChunkHandle::new(&logical_type); 40 | record_batch_to_duckdb_data_chunk(&record_batch, &mut data_chunk).map_err(|_op| Error::AppendError)?; 41 | 42 | let rc = unsafe { duckdb_append_data_chunk(self.app, data_chunk.get_ptr()) }; 43 | result_from_duckdb_appender(rc, &mut self.app) 44 | } 45 | } 46 | 47 | #[cfg(test)] 48 | mod test { 49 | use crate::{Connection, Result}; 50 | use arrow::{ 51 | array::{Int8Array, StringArray}, 52 | datatypes::{DataType, Field, Schema}, 53 | record_batch::RecordBatch, 54 | }; 55 | use std::sync::Arc; 56 | 57 | #[test] 58 | fn test_append_record_batch() -> Result<()> { 59 | let db = Connection::open_in_memory()?; 60 | db.execute_batch("CREATE TABLE foo(id TINYINT not null,area TINYINT not null,name Varchar)")?; 61 | { 62 | let id_array = Int8Array::from(vec![1, 2, 3, 4, 5]); 63 | let area_array = Int8Array::from(vec![11, 22, 33, 44, 55]); 64 | let name_array = StringArray::from(vec![Some("11"), None, None, Some("44"), None]); 65 | let schema = Schema::new(vec![ 66 | Field::new("id", DataType::Int8, true), 67 | Field::new("area", DataType::Int8, true), 68 | Field::new("area", DataType::Utf8, true), 69 | ]); 70 | let record_batch = RecordBatch::try_new( 71 | Arc::new(schema), 72 | vec![Arc::new(id_array), Arc::new(area_array), Arc::new(name_array)], 73 | ) 74 | .unwrap(); 75 | let mut app = db.appender("foo")?; 76 | app.append_record_batch(record_batch)?; 77 | } 78 | let mut stmt = db.prepare("SELECT id, area,name FROM foo")?; 79 | let rbs: Vec = stmt.query_arrow([])?.collect(); 80 | assert_eq!(rbs.iter().map(|op| op.num_rows()).sum::(), 5); 81 | Ok(()) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /crates/duckdb/src/appender/mod.rs: -------------------------------------------------------------------------------- 1 | use super::{ffi, AppenderParams, Connection, Result, ValueRef}; 2 | use std::{ffi::c_void, fmt, os::raw::c_char}; 3 | 4 | use crate::{ 5 | error::result_from_duckdb_appender, 6 | types::{ToSql, ToSqlOutput}, 7 | Error, 8 | }; 9 | 10 | /// Appender for fast import data 11 | pub struct Appender<'conn> { 12 | conn: &'conn Connection, 13 | app: ffi::duckdb_appender, 14 | } 15 | 16 | #[cfg(feature = "appender-arrow")] 17 | mod arrow; 18 | 19 | impl Appender<'_> { 20 | /// Append multiple rows from Iterator 21 | /// 22 | /// ## Example 23 | /// 24 | /// ```rust,no_run 25 | /// # use duckdb::{Connection, Result, params}; 26 | /// fn insert_rows(conn: &Connection) -> Result<()> { 27 | /// let mut app = conn.appender("foo")?; 28 | /// app.append_rows([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])?; 29 | /// Ok(()) 30 | /// } 31 | /// ``` 32 | /// 33 | /// # Failure 34 | /// 35 | /// Will return `Err` if append column count not the same with the table schema 36 | #[inline] 37 | pub fn append_rows(&mut self, rows: I) -> Result<()> 38 | where 39 | I: IntoIterator, 40 | P: AppenderParams, 41 | { 42 | for row in rows { 43 | self.append_row(row)?; 44 | } 45 | Ok(()) 46 | } 47 | 48 | /// Append one row 49 | /// 50 | /// ## Example 51 | /// 52 | /// ```rust,no_run 53 | /// # use duckdb::{Connection, Result, params}; 54 | /// fn insert_row(conn: &Connection) -> Result<()> { 55 | /// let mut app = conn.appender("foo")?; 56 | /// app.append_row([1, 2])?; 57 | /// Ok(()) 58 | /// } 59 | /// ``` 60 | /// 61 | /// # Failure 62 | /// 63 | /// Will return `Err` if append column count not the same with the table schema 64 | #[inline] 65 | pub fn append_row(&mut self, params: P) -> Result<()> { 66 | let _ = unsafe { ffi::duckdb_appender_begin_row(self.app) }; 67 | params.__bind_in(self)?; 68 | // NOTE: we only check end_row return value 69 | let rc = unsafe { ffi::duckdb_appender_end_row(self.app) }; 70 | result_from_duckdb_appender(rc, &mut self.app) 71 | } 72 | 73 | #[inline] 74 | pub(crate) fn bind_parameters

(&mut self, params: P) -> Result<()> 75 | where 76 | P: IntoIterator, 77 | P::Item: ToSql, 78 | { 79 | for p in params.into_iter() { 80 | self.bind_parameter(&p)?; 81 | } 82 | Ok(()) 83 | } 84 | 85 | fn bind_parameter(&self, param: &P) -> Result<()> { 86 | let value = param.to_sql()?; 87 | 88 | let ptr = self.app; 89 | let value = match value { 90 | ToSqlOutput::Borrowed(v) => v, 91 | ToSqlOutput::Owned(ref v) => ValueRef::from(v), 92 | }; 93 | // NOTE: we ignore the return value here 94 | // because if anything failed, end_row will fail 95 | // TODO: append more 96 | let rc = match value { 97 | ValueRef::Null => unsafe { ffi::duckdb_append_null(ptr) }, 98 | ValueRef::Boolean(i) => unsafe { ffi::duckdb_append_bool(ptr, i) }, 99 | ValueRef::TinyInt(i) => unsafe { ffi::duckdb_append_int8(ptr, i) }, 100 | ValueRef::SmallInt(i) => unsafe { ffi::duckdb_append_int16(ptr, i) }, 101 | ValueRef::Int(i) => unsafe { ffi::duckdb_append_int32(ptr, i) }, 102 | ValueRef::BigInt(i) => unsafe { ffi::duckdb_append_int64(ptr, i) }, 103 | ValueRef::UTinyInt(i) => unsafe { ffi::duckdb_append_uint8(ptr, i) }, 104 | ValueRef::USmallInt(i) => unsafe { ffi::duckdb_append_uint16(ptr, i) }, 105 | ValueRef::UInt(i) => unsafe { ffi::duckdb_append_uint32(ptr, i) }, 106 | ValueRef::UBigInt(i) => unsafe { ffi::duckdb_append_uint64(ptr, i) }, 107 | ValueRef::HugeInt(i) => unsafe { 108 | let hi = ffi::duckdb_hugeint { 109 | lower: i as u64, 110 | upper: (i >> 64) as i64, 111 | }; 112 | ffi::duckdb_append_hugeint(ptr, hi) 113 | }, 114 | 115 | ValueRef::Float(r) => unsafe { ffi::duckdb_append_float(ptr, r) }, 116 | ValueRef::Double(r) => unsafe { ffi::duckdb_append_double(ptr, r) }, 117 | ValueRef::Text(s) => unsafe { 118 | ffi::duckdb_append_varchar_length(ptr, s.as_ptr() as *const c_char, s.len() as u64) 119 | }, 120 | ValueRef::Timestamp(u, i) => unsafe { 121 | ffi::duckdb_append_timestamp(ptr, ffi::duckdb_timestamp { micros: u.to_micros(i) }) 122 | }, 123 | ValueRef::Blob(b) => unsafe { ffi::duckdb_append_blob(ptr, b.as_ptr() as *const c_void, b.len() as u64) }, 124 | ValueRef::Date32(d) => unsafe { ffi::duckdb_append_date(ptr, ffi::duckdb_date { days: d }) }, 125 | ValueRef::Time64(u, v) => unsafe { 126 | ffi::duckdb_append_time(ptr, ffi::duckdb_time { micros: u.to_micros(v) }) 127 | }, 128 | ValueRef::Interval { months, days, nanos } => unsafe { 129 | ffi::duckdb_append_interval( 130 | ptr, 131 | ffi::duckdb_interval { 132 | months, 133 | days, 134 | micros: nanos / 1000, 135 | }, 136 | ) 137 | }, 138 | _ => unreachable!("not supported"), 139 | }; 140 | if rc != 0 { 141 | return Err(Error::AppendError); 142 | } 143 | Ok(()) 144 | } 145 | 146 | #[inline] 147 | pub(super) fn new(conn: &Connection, app: ffi::duckdb_appender) -> Appender<'_> { 148 | Appender { conn, app } 149 | } 150 | 151 | /// Flush data into DB 152 | #[inline] 153 | pub fn flush(&mut self) -> Result<()> { 154 | unsafe { 155 | let res = ffi::duckdb_appender_flush(self.app); 156 | result_from_duckdb_appender(res, &mut self.app) 157 | } 158 | } 159 | } 160 | 161 | impl Drop for Appender<'_> { 162 | fn drop(&mut self) { 163 | if !self.app.is_null() { 164 | let _ = self.flush(); // can't safely handle failures here 165 | unsafe { 166 | ffi::duckdb_appender_close(self.app); 167 | ffi::duckdb_appender_destroy(&mut self.app); 168 | } 169 | } 170 | } 171 | } 172 | 173 | impl fmt::Debug for Appender<'_> { 174 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 175 | f.debug_struct("Appender").field("conn", self.conn).finish() 176 | } 177 | } 178 | 179 | #[cfg(test)] 180 | mod test { 181 | use crate::{Connection, Result}; 182 | 183 | #[test] 184 | fn test_append_one_row() -> Result<()> { 185 | let db = Connection::open_in_memory()?; 186 | db.execute_batch("CREATE TABLE foo(x INTEGER)")?; 187 | 188 | { 189 | let mut app = db.appender("foo")?; 190 | app.append_row([42])?; 191 | } 192 | 193 | let val = db.query_row("SELECT x FROM foo", [], |row| <(i32,)>::try_from(row))?; 194 | assert_eq!(val, (42,)); 195 | Ok(()) 196 | } 197 | 198 | #[test] 199 | fn test_append_rows() -> Result<()> { 200 | let db = Connection::open_in_memory()?; 201 | db.execute_batch("CREATE TABLE foo(x INTEGER, y INTEGER)")?; 202 | 203 | { 204 | let mut app = db.appender("foo")?; 205 | app.append_rows([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])?; 206 | } 207 | 208 | let val = db.query_row("SELECT sum(x), sum(y) FROM foo", [], |row| <(i32, i32)>::try_from(row))?; 209 | assert_eq!(val, (25, 30)); 210 | Ok(()) 211 | } 212 | 213 | // Waiting https://github.com/duckdb/duckdb/pull/3405 214 | #[cfg(feature = "uuid")] 215 | #[test] 216 | #[ignore = "not supported for now"] 217 | fn test_append_uuid() -> Result<()> { 218 | use uuid::Uuid; 219 | 220 | let db = Connection::open_in_memory()?; 221 | db.execute_batch("CREATE TABLE foo(x UUID)")?; 222 | 223 | let id = Uuid::new_v4(); 224 | { 225 | let mut app = db.appender("foo")?; 226 | app.append_row([id])?; 227 | } 228 | 229 | let val = db.query_row("SELECT x FROM foo", [], |row| <(Uuid,)>::try_from(row))?; 230 | assert_eq!(val, (id,)); 231 | Ok(()) 232 | } 233 | 234 | #[test] 235 | fn test_append_string_as_ts_row() -> Result<()> { 236 | let db = Connection::open_in_memory()?; 237 | db.execute_batch("CREATE TABLE foo(x TIMESTAMP)")?; 238 | 239 | { 240 | let mut app = db.appender("foo")?; 241 | app.append_row(["2022-04-09 15:56:37.544"])?; 242 | } 243 | 244 | let val = db.query_row("SELECT x FROM foo", [], |row| <(i64,)>::try_from(row))?; 245 | assert_eq!(val, (1649519797544000,)); 246 | Ok(()) 247 | } 248 | 249 | #[test] 250 | fn test_append_timestamp() -> Result<()> { 251 | use std::time::Duration; 252 | let db = Connection::open_in_memory()?; 253 | db.execute_batch("CREATE TABLE foo(x TIMESTAMP)")?; 254 | 255 | let d = Duration::from_secs(1); 256 | { 257 | let mut app = db.appender("foo")?; 258 | app.append_row([d])?; 259 | } 260 | 261 | let val = db.query_row("SELECT x FROM foo where x=?", [d], |row| <(i32,)>::try_from(row))?; 262 | assert_eq!(val, (d.as_micros() as i32,)); 263 | Ok(()) 264 | } 265 | 266 | #[test] 267 | #[cfg(feature = "chrono")] 268 | fn test_append_datetime() -> Result<()> { 269 | use crate::params; 270 | use chrono::{NaiveDate, NaiveDateTime}; 271 | 272 | let db = Connection::open_in_memory()?; 273 | db.execute_batch("CREATE TABLE foo(x DATE, y TIMESTAMP)")?; 274 | 275 | let date = NaiveDate::from_ymd_opt(2024, 6, 5).unwrap(); 276 | let timestamp = date.and_hms_opt(18, 26, 53).unwrap(); 277 | { 278 | let mut app = db.appender("foo")?; 279 | app.append_row(params![date, timestamp])?; 280 | } 281 | let (date2, timestamp2) = db.query_row("SELECT x, y FROM foo", [], |row| { 282 | Ok((row.get::<_, NaiveDate>(0)?, row.get::<_, NaiveDateTime>(1)?)) 283 | })?; 284 | assert_eq!(date, date2); 285 | assert_eq!(timestamp, timestamp2); 286 | Ok(()) 287 | } 288 | 289 | #[test] 290 | fn test_appender_error() -> Result<(), crate::Error> { 291 | use crate::params; 292 | let conn = Connection::open_in_memory()?; 293 | conn.execute( 294 | r"CREATE TABLE foo ( 295 | foobar TEXT, 296 | foobar_int INT, 297 | foobar_split TEXT[] AS (split(trim(foobar), ',')) 298 | );", 299 | [], 300 | )?; 301 | let mut appender = conn.appender("foo")?; 302 | match appender.append_row(params!["foo"]) { 303 | Err(crate::Error::DuckDBFailure(.., Some(msg))) => { 304 | assert_eq!(msg, "Call to EndRow before all columns have been appended to!") 305 | } 306 | Err(err) => panic!("unexpected error: {:?}", err), 307 | Ok(_) => panic!("expected an error but got Ok"), 308 | } 309 | Ok(()) 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /crates/duckdb/src/arrow_batch.rs: -------------------------------------------------------------------------------- 1 | use super::{ 2 | arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, 3 | Statement, 4 | }; 5 | 6 | /// A handle for the resulting RecordBatch of a query. 7 | #[must_use = "Arrow is lazy and will do nothing unless consumed"] 8 | pub struct Arrow<'stmt> { 9 | pub(crate) stmt: Option<&'stmt Statement<'stmt>>, 10 | } 11 | 12 | #[allow(clippy::needless_lifetimes)] 13 | impl<'stmt> Arrow<'stmt> { 14 | #[inline] 15 | pub(crate) fn new(stmt: &'stmt Statement<'stmt>) -> Arrow<'stmt> { 16 | Arrow { stmt: Some(stmt) } 17 | } 18 | 19 | /// return arrow schema 20 | #[inline] 21 | pub fn get_schema(&self) -> SchemaRef { 22 | self.stmt.unwrap().stmt.schema() 23 | } 24 | } 25 | 26 | #[allow(clippy::needless_lifetimes)] 27 | impl<'stmt> Iterator for Arrow<'stmt> { 28 | type Item = RecordBatch; 29 | 30 | fn next(&mut self) -> Option { 31 | Some(RecordBatch::from(&self.stmt?.step()?)) 32 | } 33 | } 34 | 35 | /// A handle for the resulting RecordBatch of a query in streaming 36 | #[must_use = "Arrow stream is lazy and will not fetch data unless consumed"] 37 | #[allow(clippy::needless_lifetimes)] 38 | pub struct ArrowStream<'stmt> { 39 | pub(crate) stmt: Option<&'stmt Statement<'stmt>>, 40 | pub(crate) schema: SchemaRef, 41 | } 42 | 43 | #[allow(clippy::needless_lifetimes)] 44 | impl<'stmt> ArrowStream<'stmt> { 45 | #[inline] 46 | pub(crate) fn new(stmt: &'stmt Statement<'stmt>, schema: SchemaRef) -> ArrowStream<'stmt> { 47 | ArrowStream { 48 | stmt: Some(stmt), 49 | schema, 50 | } 51 | } 52 | 53 | /// return arrow schema 54 | #[inline] 55 | pub fn get_schema(&self) -> SchemaRef { 56 | self.schema.clone() 57 | } 58 | } 59 | 60 | #[allow(clippy::needless_lifetimes)] 61 | impl<'stmt> Iterator for ArrowStream<'stmt> { 62 | type Item = RecordBatch; 63 | 64 | fn next(&mut self) -> Option { 65 | Some(RecordBatch::from(&self.stmt?.stream_step(self.get_schema())?)) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /crates/duckdb/src/column.rs: -------------------------------------------------------------------------------- 1 | use std::str; 2 | 3 | use arrow::datatypes::DataType; 4 | 5 | use crate::{Error, Result, Statement}; 6 | 7 | /// Information about a column of a DuckDB query. 8 | #[derive(Debug)] 9 | pub struct Column<'stmt> { 10 | name: &'stmt str, 11 | decl_type: Option<&'stmt str>, 12 | } 13 | 14 | impl Column<'_> { 15 | /// Returns the name of the column. 16 | #[inline] 17 | pub fn name(&self) -> &str { 18 | self.name 19 | } 20 | 21 | /// Returns the type of the column (`None` for expression). 22 | #[inline] 23 | pub fn decl_type(&self) -> Option<&str> { 24 | self.decl_type 25 | } 26 | } 27 | 28 | impl Statement<'_> { 29 | /// Get all the column names in the result set of the prepared statement. 30 | /// 31 | /// If associated DB schema can be altered concurrently, you should make 32 | /// sure that current statement has already been stepped once before 33 | /// calling this method. 34 | /// 35 | /// # Caveats 36 | /// Panics if the query has not been [`execute`](Statement::execute)d yet. 37 | pub fn column_names(&self) -> Vec { 38 | self.stmt 39 | .schema() 40 | .fields() 41 | .iter() 42 | .map(|f| f.name().to_owned()) 43 | .collect() 44 | } 45 | 46 | /// Return the number of columns in the result set returned by the prepared 47 | /// statement. 48 | /// 49 | /// If associated DB schema can be altered concurrently, you should make 50 | /// sure that current statement has already been stepped once before 51 | /// calling this method. 52 | #[inline] 53 | pub fn column_count(&self) -> usize { 54 | self.stmt.column_count() 55 | } 56 | 57 | /// Check that column name reference lifetime is limited: 58 | /// https://www.sqlite.org/c3ref/column_name.html 59 | /// > The returned string pointer is valid... 60 | /// 61 | /// `column_name` reference can become invalid if `stmt` is reprepared 62 | /// (because of schema change) when `query_row` is called. So we assert 63 | /// that a compilation error happens if this reference is kept alive: 64 | /// ```compile_fail 65 | /// use duckdb::{Connection, Result}; 66 | /// fn main() -> Result<()> { 67 | /// let db = Connection::open_in_memory()?; 68 | /// let mut stmt = db.prepare("SELECT 1 as x")?; 69 | /// let column_name = stmt.column_name(0)?; 70 | /// let x = stmt.query_row([], |r| r.get::<_, i64>(0))?; // E0502 71 | /// assert_eq!(1, x); 72 | /// assert_eq!("x", column_name); 73 | /// Ok(()) 74 | /// } 75 | /// ``` 76 | #[inline] 77 | pub(super) fn column_name_unwrap(&self, col: usize) -> &String { 78 | // Just panic if the bounds are wrong for now, we never call this 79 | // without checking first. 80 | self.column_name(col).expect("Column out of bounds") 81 | } 82 | 83 | /// Returns the name assigned to a particular column in the result set 84 | /// returned by the prepared statement. 85 | /// 86 | /// If associated DB schema can be altered concurrently, you should make 87 | /// sure that current statement has already been stepped once before 88 | /// calling this method. 89 | /// 90 | /// ## Failure 91 | /// 92 | /// Returns an `Error::InvalidColumnIndex` if `idx` is outside the valid 93 | /// column range for this row. 94 | /// 95 | /// # Caveats 96 | /// Panics if the query has not been [`execute`](Statement::execute)d yet 97 | /// or when column name is not valid UTF-8. 98 | #[inline] 99 | pub fn column_name(&self, col: usize) -> Result<&String> { 100 | self.stmt.column_name(col).ok_or(Error::InvalidColumnIndex(col)) 101 | } 102 | 103 | /// Returns the column index in the result set for a given column name. 104 | /// 105 | /// If there is no AS clause then the name of the column is unspecified and 106 | /// may change from one release of DuckDB to the next. 107 | /// 108 | /// If associated DB schema can be altered concurrently, you should make 109 | /// sure that current statement has already been stepped once before 110 | /// calling this method. 111 | /// 112 | /// # Failure 113 | /// 114 | /// Will return an `Error::InvalidColumnName` when there is no column with 115 | /// the specified `name`. 116 | /// 117 | /// # Caveats 118 | /// Panics if the query has not been [`execute`](Statement::execute)d yet. 119 | #[inline] 120 | pub fn column_index(&self, name: &str) -> Result { 121 | let n = self.column_count(); 122 | for i in 0..n { 123 | // Note: `column_name` is only fallible if `i` is out of bounds, 124 | // which we've already checked. 125 | if name.eq_ignore_ascii_case(self.stmt.column_name(i).unwrap()) { 126 | return Ok(i); 127 | } 128 | } 129 | Err(Error::InvalidColumnName(String::from(name))) 130 | } 131 | 132 | /// Returns the declared data type of the column. 133 | /// 134 | /// # Caveats 135 | /// Panics if the query has not been [`execute`](Statement::execute)d yet. 136 | #[inline] 137 | pub fn column_type(&self, idx: usize) -> DataType { 138 | self.stmt.column_type(idx) 139 | } 140 | 141 | /// Returns a slice describing the columns of the result of the query. 142 | /// 143 | /// If associated DB schema can be altered concurrently, you should make 144 | /// sure that current statement has already been stepped once before 145 | /// calling this method. 146 | #[cfg(feature = "column_decltype")] 147 | pub fn columns(&self) -> Vec { 148 | let n = self.column_count(); 149 | let mut cols = Vec::with_capacity(n); 150 | for i in 0..n { 151 | let name = self.column_name_unwrap(i); 152 | let slice = self.stmt.column_decltype(i); 153 | let decl_type = 154 | slice.map(|s| str::from_utf8(s.to_bytes()).expect("Invalid UTF-8 sequence in column declaration")); 155 | cols.push(Column { name, decl_type }); 156 | } 157 | cols 158 | } 159 | } 160 | 161 | #[cfg(test)] 162 | mod test { 163 | use crate::{Connection, Result}; 164 | 165 | #[test] 166 | #[cfg(feature = "column_decltype")] 167 | fn test_columns() -> Result<()> { 168 | use super::Column; 169 | 170 | let db = Connection::open_in_memory()?; 171 | let query = db.prepare("SELECT * FROM sqlite_master")?; 172 | let columns = query.columns(); 173 | let column_names: Vec<&str> = columns.iter().map(Column::name).collect(); 174 | assert_eq!( 175 | column_names.as_slice(), 176 | &["type", "name", "tbl_name", "rootpage", "sql"] 177 | ); 178 | let column_types: Vec> = columns.iter().map(Column::decl_type).collect(); 179 | assert_eq!( 180 | &column_types[..3], 181 | &[Some("VARCHAR"), Some("VARCHAR"), Some("VARCHAR"),] 182 | ); 183 | Ok(()) 184 | } 185 | 186 | #[test] 187 | fn test_column_name_in_error() -> Result<()> { 188 | use crate::{types::Type, Error}; 189 | let db = Connection::open_in_memory()?; 190 | db.execute_batch( 191 | "BEGIN; 192 | CREATE TABLE foo(x INTEGER, y TEXT); 193 | INSERT INTO foo VALUES(4, NULL); 194 | END;", 195 | )?; 196 | let mut stmt = db.prepare("SELECT x as renamed, y FROM foo")?; 197 | let mut rows = stmt.query([])?; 198 | let row = rows.next()?.unwrap(); 199 | match row.get::<_, String>(0).unwrap_err() { 200 | Error::InvalidColumnType(idx, name, ty) => { 201 | assert_eq!(idx, 0); 202 | assert_eq!(name, "renamed"); 203 | assert_eq!(ty, Type::Int); 204 | } 205 | e => { 206 | panic!("Unexpected error type: {e:?}"); 207 | } 208 | } 209 | match row.get::<_, String>("y").unwrap_err() { 210 | Error::InvalidColumnType(idx, name, ty) => { 211 | assert_eq!(idx, 1); 212 | assert_eq!(name, "y"); 213 | assert_eq!(ty, Type::Null); 214 | } 215 | e => { 216 | panic!("Unexpected error type: {e:?}"); 217 | } 218 | } 219 | Ok(()) 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /crates/duckdb/src/config.rs: -------------------------------------------------------------------------------- 1 | use super::{ffi, Result}; 2 | use crate::error::Error; 3 | use std::{default::Default, ffi::CString, os::raw::c_char, ptr}; 4 | 5 | use strum::{Display, EnumString}; 6 | 7 | /// duckdb access mode, default is Automatic 8 | #[derive(Debug, Eq, PartialEq, EnumString, Display)] 9 | pub enum AccessMode { 10 | /// Access mode of the database AUTOMATIC 11 | #[strum(to_string = "AUTOMATIC")] 12 | Automatic, 13 | /// Access mode of the database READ_ONLY 14 | #[strum(to_string = "READ_ONLY")] 15 | ReadOnly, 16 | /// Access mode of the database READ_WRITE 17 | #[strum(to_string = "READ_WRITE")] 18 | ReadWrite, 19 | } 20 | 21 | /// duckdb default order, default is Asc 22 | #[derive(Debug, Eq, PartialEq, EnumString, Display)] 23 | pub enum DefaultOrder { 24 | /// The order type, ASC 25 | #[strum(to_string = "ASC")] 26 | Asc, 27 | /// The order type, DESC 28 | #[strum(to_string = "DESC")] 29 | Desc, 30 | } 31 | 32 | /// duckdb default null order, default is nulls first 33 | #[derive(Debug, Eq, PartialEq, EnumString, Display)] 34 | pub enum DefaultNullOrder { 35 | /// Null ordering, NullsFirst 36 | #[strum(to_string = "NULLS_FIRST")] 37 | NullsFirst, 38 | /// Null ordering, NullsLast 39 | #[strum(to_string = "NULLS_LAST")] 40 | NullsLast, 41 | } 42 | 43 | /// duckdb configuration 44 | /// Refer to 45 | #[derive(Default)] 46 | pub struct Config { 47 | config: Option, 48 | } 49 | 50 | impl Config { 51 | pub(crate) fn duckdb_config(&self) -> ffi::duckdb_config { 52 | self.config.unwrap_or(std::ptr::null_mut() as ffi::duckdb_config) 53 | } 54 | 55 | /// enable autoload extensions 56 | pub fn enable_autoload_extension(mut self, enabled: bool) -> Result { 57 | self.set("autoinstall_known_extensions", &(enabled as i32).to_string())?; 58 | self.set("autoload_known_extensions", &(enabled as i32).to_string())?; 59 | Ok(self) 60 | } 61 | 62 | /// Access mode of the database ([AUTOMATIC], READ_ONLY or READ_WRITE) 63 | pub fn access_mode(mut self, mode: AccessMode) -> Result { 64 | self.set("access_mode", &mode.to_string())?; 65 | Ok(self) 66 | } 67 | 68 | /// Metadata from DuckDB callers 69 | pub fn custom_user_agent(mut self, custom_user_agent: &str) -> Result { 70 | self.set("custom_user_agent", custom_user_agent)?; 71 | Ok(self) 72 | } 73 | 74 | /// The order type used when none is specified ([ASC] or DESC) 75 | pub fn default_order(mut self, order: DefaultOrder) -> Result { 76 | self.set("default_order", &order.to_string())?; 77 | Ok(self) 78 | } 79 | 80 | /// Null ordering used when none is specified ([NULLS_FIRST] or NULLS_LAST) 81 | pub fn default_null_order(mut self, null_order: DefaultNullOrder) -> Result { 82 | self.set("default_null_order", &null_order.to_string())?; 83 | Ok(self) 84 | } 85 | 86 | /// Allow the database to access external state (through e.g. COPY TO/FROM, CSV readers, pandas replacement scans, etc) 87 | pub fn enable_external_access(mut self, enabled: bool) -> Result { 88 | self.set("enable_external_access", &enabled.to_string())?; 89 | Ok(self) 90 | } 91 | 92 | /// Whether or not object cache is used to cache e.g. Parquet metadata 93 | pub fn enable_object_cache(mut self, enabled: bool) -> Result { 94 | self.set("enable_object_cache", &enabled.to_string())?; 95 | Ok(self) 96 | } 97 | 98 | /// Allow to load third-party duckdb extensions. 99 | pub fn allow_unsigned_extensions(mut self) -> Result { 100 | self.set("allow_unsigned_extensions", "true")?; 101 | Ok(self) 102 | } 103 | 104 | /// The maximum memory of the system (e.g. 1GB) 105 | pub fn max_memory(mut self, memory: &str) -> Result { 106 | self.set("max_memory", memory)?; 107 | Ok(self) 108 | } 109 | 110 | /// The number of total threads used by the system 111 | pub fn threads(mut self, thread_num: i64) -> Result { 112 | self.set("threads", &thread_num.to_string())?; 113 | Ok(self) 114 | } 115 | 116 | /// Add any setting to the config. DuckDB will return an error if the setting is unknown or 117 | /// otherwise invalid. 118 | pub fn with(mut self, key: impl AsRef, value: impl AsRef) -> Result { 119 | self.set(key.as_ref(), value.as_ref())?; 120 | Ok(self) 121 | } 122 | 123 | fn set(&mut self, key: &str, value: &str) -> Result<()> { 124 | if self.config.is_none() { 125 | let mut config: ffi::duckdb_config = ptr::null_mut(); 126 | let state = unsafe { ffi::duckdb_create_config(&mut config) }; 127 | assert_eq!(state, ffi::DuckDBSuccess); 128 | self.config = Some(config); 129 | } 130 | let c_key = CString::new(key).unwrap(); 131 | let c_value = CString::new(value).unwrap(); 132 | let state = unsafe { 133 | ffi::duckdb_set_config( 134 | self.config.unwrap(), 135 | c_key.as_ptr() as *const c_char, 136 | c_value.as_ptr() as *const c_char, 137 | ) 138 | }; 139 | if state != ffi::DuckDBSuccess { 140 | return Err(Error::DuckDBFailure( 141 | ffi::Error::new(state), 142 | Some(format!("set {key}:{value} error")), 143 | )); 144 | } 145 | Ok(()) 146 | } 147 | } 148 | 149 | impl Drop for Config { 150 | fn drop(&mut self) { 151 | if self.config.is_some() { 152 | unsafe { ffi::duckdb_destroy_config(&mut self.config.unwrap()) }; 153 | } 154 | } 155 | } 156 | 157 | #[cfg(test)] 158 | mod test { 159 | use crate::{types::Value, Config, Connection, Result}; 160 | 161 | #[test] 162 | fn test_default_config() -> Result<()> { 163 | let config = Config::default(); 164 | let db = Connection::open_in_memory_with_flags(config)?; 165 | db.execute_batch("CREATE TABLE foo(x Text)")?; 166 | 167 | let mut stmt = db.prepare("INSERT INTO foo(x) VALUES (?)")?; 168 | stmt.execute([&"a"])?; 169 | stmt.execute([&"b"])?; 170 | stmt.execute([&"c"])?; 171 | stmt.execute([Value::Null])?; 172 | 173 | let val: Result>> = db 174 | .prepare("SELECT x FROM foo ORDER BY x")? 175 | .query_and_then([], |row| row.get(0))? 176 | .collect(); 177 | let val = val?; 178 | let mut iter = val.iter(); 179 | assert_eq!(val.len(), 4); 180 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "a"); 181 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "b"); 182 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "c"); 183 | assert!(iter.next().unwrap().is_none()); 184 | assert_eq!(iter.next(), None); 185 | 186 | Ok(()) 187 | } 188 | 189 | #[test] 190 | fn test_all_config() -> Result<()> { 191 | let config = Config::default() 192 | .access_mode(crate::AccessMode::ReadWrite)? 193 | .default_null_order(crate::DefaultNullOrder::NullsLast)? 194 | .default_order(crate::DefaultOrder::Desc)? 195 | .enable_external_access(true)? 196 | .enable_object_cache(false)? 197 | .enable_autoload_extension(true)? 198 | .allow_unsigned_extensions()? 199 | .custom_user_agent("test_user_agent")? 200 | .max_memory("2GB")? 201 | .threads(4)? 202 | .with("preserve_insertion_order", "true")?; 203 | 204 | let db = Connection::open_in_memory_with_flags(config)?; 205 | db.execute_batch("CREATE TABLE foo(x Text)")?; 206 | 207 | let mut stmt = db.prepare("INSERT INTO foo(x) VALUES (?)")?; 208 | stmt.execute([&"a"])?; 209 | stmt.execute([&"b"])?; 210 | stmt.execute([&"c"])?; 211 | stmt.execute([Value::Null])?; 212 | 213 | let val: Result>> = db 214 | .prepare("SELECT x FROM foo ORDER BY x")? 215 | .query_and_then([], |row| row.get(0))? 216 | .collect(); 217 | let val = val?; 218 | let mut iter = val.iter(); 219 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "c"); 220 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "b"); 221 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "a"); 222 | assert!(iter.next().unwrap().is_none()); 223 | assert_eq!(iter.next(), None); 224 | 225 | let user_agent: Result = db.query_row("PRAGMA USER_AGENT", [], |row| row.get(0)); 226 | let user_agent = user_agent.unwrap(); 227 | assert!(&user_agent.ends_with("rust test_user_agent")); 228 | 229 | Ok(()) 230 | } 231 | 232 | #[test] 233 | fn test_invalid_setting() -> Result<()> { 234 | let config = Config::default().with("some-invalid-setting", "true")?; 235 | let res = Connection::open_in_memory_with_flags(config); 236 | assert_eq!( 237 | res.unwrap_err().to_string(), 238 | "Invalid Input Error: The following options were not recognized: some-invalid-setting" 239 | ); 240 | Ok(()) 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /crates/duckdb/src/core/data_chunk.rs: -------------------------------------------------------------------------------- 1 | use super::{ 2 | logical_type::LogicalTypeHandle, 3 | vector::{ArrayVector, FlatVector, ListVector, StructVector}, 4 | }; 5 | use crate::ffi::{ 6 | duckdb_create_data_chunk, duckdb_data_chunk, duckdb_data_chunk_get_column_count, duckdb_data_chunk_get_size, 7 | duckdb_data_chunk_get_vector, duckdb_data_chunk_set_size, duckdb_destroy_data_chunk, 8 | }; 9 | 10 | /// Handle to the DataChunk in DuckDB. 11 | pub struct DataChunkHandle { 12 | /// Pointer to the DataChunk in duckdb C API. 13 | ptr: duckdb_data_chunk, 14 | 15 | /// Whether this [DataChunkHandle] own the [DataChunk::ptr]. 16 | owned: bool, 17 | } 18 | 19 | impl Drop for DataChunkHandle { 20 | fn drop(&mut self) { 21 | if self.owned && !self.ptr.is_null() { 22 | unsafe { duckdb_destroy_data_chunk(&mut self.ptr) } 23 | self.ptr = std::ptr::null_mut(); 24 | } 25 | } 26 | } 27 | 28 | impl DataChunkHandle { 29 | #[allow(dead_code)] 30 | pub(crate) unsafe fn new_unowned(ptr: duckdb_data_chunk) -> Self { 31 | Self { ptr, owned: false } 32 | } 33 | 34 | /// Create a new [DataChunkHandle] with the given [LogicalTypeHandle]s. 35 | pub fn new(logical_types: &[LogicalTypeHandle]) -> Self { 36 | let num_columns = logical_types.len(); 37 | let mut c_types = logical_types.iter().map(|t| t.ptr).collect::>(); 38 | let ptr = unsafe { duckdb_create_data_chunk(c_types.as_mut_ptr(), num_columns as u64) }; 39 | DataChunkHandle { ptr, owned: true } 40 | } 41 | 42 | /// Get the vector at the specific column index: `idx`. 43 | pub fn flat_vector(&self, idx: usize) -> FlatVector { 44 | FlatVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) 45 | } 46 | 47 | /// Get a list vector from the column index. 48 | pub fn list_vector(&self, idx: usize) -> ListVector { 49 | ListVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) 50 | } 51 | 52 | /// Get a array vector from the column index. 53 | pub fn array_vector(&self, idx: usize) -> ArrayVector { 54 | ArrayVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) 55 | } 56 | 57 | /// Get struct vector at the column index: `idx`. 58 | pub fn struct_vector(&self, idx: usize) -> StructVector { 59 | StructVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) 60 | } 61 | 62 | /// Set the size of the data chunk 63 | pub fn set_len(&self, new_len: usize) { 64 | unsafe { duckdb_data_chunk_set_size(self.ptr, new_len as u64) }; 65 | } 66 | 67 | /// Get the length / the number of rows in this [DataChunkHandle]. 68 | pub fn len(&self) -> usize { 69 | unsafe { duckdb_data_chunk_get_size(self.ptr) as usize } 70 | } 71 | 72 | /// Check whether this [DataChunkHandle] is empty. 73 | pub fn is_empty(&self) -> bool { 74 | self.len() == 0 75 | } 76 | 77 | /// Get the number of columns in this [DataChunkHandle]. 78 | pub fn num_columns(&self) -> usize { 79 | unsafe { duckdb_data_chunk_get_column_count(self.ptr) as usize } 80 | } 81 | 82 | /// Get the ptr of duckdb_data_chunk in this [DataChunkHandle]. 83 | pub fn get_ptr(&self) -> duckdb_data_chunk { 84 | self.ptr 85 | } 86 | } 87 | 88 | #[cfg(test)] 89 | mod test { 90 | use super::{super::logical_type::LogicalTypeId, *}; 91 | 92 | #[test] 93 | fn test_data_chunk_construction() { 94 | let dc = DataChunkHandle::new(&[LogicalTypeHandle::from(LogicalTypeId::Integer)]); 95 | 96 | assert_eq!(dc.num_columns(), 1); 97 | 98 | drop(dc); 99 | } 100 | 101 | #[test] 102 | fn test_vector() { 103 | let datachunk = DataChunkHandle::new(&[LogicalTypeHandle::from(LogicalTypeId::Bigint)]); 104 | let mut vector = datachunk.flat_vector(0); 105 | let data = vector.as_mut_slice::(); 106 | 107 | data[0] = 42; 108 | } 109 | 110 | #[test] 111 | fn test_logi() { 112 | let key = LogicalTypeHandle::from(LogicalTypeId::Varchar); 113 | 114 | let value = LogicalTypeHandle::from(LogicalTypeId::UTinyint); 115 | 116 | let map = LogicalTypeHandle::map(&key, &value); 117 | 118 | assert_eq!(map.id(), LogicalTypeId::Map); 119 | 120 | // let union_ = LogicalType::new_union_type(HashMap::from([ 121 | // ("number", LogicalType::new(LogicalTypeId::Bigint)), 122 | // ("string", LogicalType::new(LogicalTypeId::Varchar)), 123 | // ])); 124 | // assert_eq!(union_.type_id(), LogicalTypeId::Union); 125 | 126 | // let struct_ = LogicalType::new_struct_type(HashMap::from([ 127 | // ("number", LogicalType::new(LogicalTypeId::Bigint)), 128 | // ("string", LogicalType::new(LogicalTypeId::Varchar)), 129 | // ])); 130 | // assert_eq!(struct_.type_id(), LogicalTypeId::Struct); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /crates/duckdb/src/core/mod.rs: -------------------------------------------------------------------------------- 1 | mod data_chunk; 2 | mod logical_type; 3 | mod vector; 4 | 5 | pub use data_chunk::DataChunkHandle; 6 | pub use logical_type::{LogicalTypeHandle, LogicalTypeId}; 7 | pub use vector::*; 8 | -------------------------------------------------------------------------------- /crates/duckdb/src/extension.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod test { 3 | use crate::{Connection, Result}; 4 | 5 | // https://duckdb.org/docs/extensions/json 6 | #[test] 7 | fn test_extension_json() -> Result<()> { 8 | let db = Connection::open_in_memory()?; 9 | assert_eq!( 10 | 4, 11 | db.query_row::( 12 | r#"SELECT json_array_length('["duck","goose","swan",null]');"#, 13 | [], 14 | |r| r.get(0) 15 | )? 16 | ); 17 | Ok(()) 18 | } 19 | 20 | // https://duckdb.org/docs/data/parquet/overview.html 21 | #[test] 22 | fn test_extension_parquet() -> Result<()> { 23 | let db = Connection::open_in_memory()?; 24 | assert_eq!( 25 | 300f32, 26 | db.query_row::( 27 | r#"SELECT SUM(value) FROM read_parquet('./examples/int32_decimal.parquet');"#, 28 | [], 29 | |r| r.get(0) 30 | )? 31 | ); 32 | Ok(()) 33 | } 34 | 35 | #[test] 36 | fn test_extension_remote_parquet() -> Result<()> { 37 | let db = Connection::open_in_memory()?; 38 | assert_eq!( 39 | 9i64, 40 | db.query_row::( 41 | r#"SELECT count(*) FROM read_parquet('https://duckdb.org/data/prices.parquet');"#, 42 | [], 43 | |r| r.get(0) 44 | )? 45 | ); 46 | Ok(()) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /crates/duckdb/src/inner_connection.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | ffi::{c_void, CStr, CString}, 3 | mem, 4 | os::raw::c_char, 5 | ptr, str, 6 | }; 7 | 8 | use super::{ffi, Appender, Config, Connection, Result}; 9 | use crate::{ 10 | error::{result_from_duckdb_appender, result_from_duckdb_arrow, result_from_duckdb_prepare, Error}, 11 | raw_statement::RawStatement, 12 | statement::Statement, 13 | }; 14 | 15 | pub struct InnerConnection { 16 | pub db: ffi::duckdb_database, 17 | pub con: ffi::duckdb_connection, 18 | owned: bool, 19 | } 20 | 21 | impl InnerConnection { 22 | #[inline] 23 | pub unsafe fn new(db: ffi::duckdb_database, owned: bool) -> Result { 24 | let mut con: ffi::duckdb_connection = ptr::null_mut(); 25 | let r = ffi::duckdb_connect(db, &mut con); 26 | if r != ffi::DuckDBSuccess { 27 | ffi::duckdb_disconnect(&mut con); 28 | return Err(Error::DuckDBFailure( 29 | ffi::Error::new(r), 30 | Some("connect error".to_owned()), 31 | )); 32 | } 33 | Ok(InnerConnection { db, con, owned }) 34 | } 35 | 36 | pub fn open_with_flags(c_path: &CStr, config: Config) -> Result { 37 | unsafe { 38 | let mut db: ffi::duckdb_database = ptr::null_mut(); 39 | let mut c_err = std::ptr::null_mut(); 40 | let r = ffi::duckdb_open_ext(c_path.as_ptr(), &mut db, config.duckdb_config(), &mut c_err); 41 | if r != ffi::DuckDBSuccess { 42 | let msg = Some(CStr::from_ptr(c_err).to_string_lossy().to_string()); 43 | ffi::duckdb_free(c_err as *mut c_void); 44 | return Err(Error::DuckDBFailure(ffi::Error::new(r), msg)); 45 | } 46 | InnerConnection::new(db, true) 47 | } 48 | } 49 | 50 | pub fn close(&mut self) -> Result<()> { 51 | if self.db.is_null() { 52 | return Ok(()); 53 | } 54 | if self.con.is_null() { 55 | return Ok(()); 56 | } 57 | unsafe { 58 | ffi::duckdb_disconnect(&mut self.con); 59 | self.con = ptr::null_mut(); 60 | 61 | if self.owned { 62 | ffi::duckdb_close(&mut self.db); 63 | self.db = ptr::null_mut(); 64 | } 65 | } 66 | Ok(()) 67 | } 68 | 69 | /// Creates a new connection to the already-opened database. 70 | pub fn try_clone(&self) -> Result { 71 | unsafe { InnerConnection::new(self.db, false) } 72 | } 73 | 74 | pub fn execute(&mut self, sql: &str) -> Result<()> { 75 | let c_str = CString::new(sql).unwrap(); 76 | unsafe { 77 | let mut out = mem::zeroed(); 78 | let r = ffi::duckdb_query_arrow(self.con, c_str.as_ptr() as *const c_char, &mut out); 79 | result_from_duckdb_arrow(r, out)?; 80 | ffi::duckdb_destroy_arrow(&mut out); 81 | Ok(()) 82 | } 83 | } 84 | 85 | pub fn prepare<'a>(&mut self, conn: &'a Connection, sql: &str) -> Result> { 86 | let mut c_stmt: ffi::duckdb_prepared_statement = ptr::null_mut(); 87 | let c_str = CString::new(sql).unwrap(); 88 | let r = unsafe { ffi::duckdb_prepare(self.con, c_str.as_ptr() as *const c_char, &mut c_stmt) }; 89 | result_from_duckdb_prepare(r, c_stmt)?; 90 | Ok(Statement::new(conn, unsafe { RawStatement::new(c_stmt) })) 91 | } 92 | 93 | pub fn appender<'a>(&mut self, conn: &'a Connection, table: &str, schema: &str) -> Result> { 94 | let mut c_app: ffi::duckdb_appender = ptr::null_mut(); 95 | let c_table = CString::new(table).unwrap(); 96 | let c_schema = CString::new(schema).unwrap(); 97 | let r = unsafe { 98 | ffi::duckdb_appender_create( 99 | self.con, 100 | c_schema.as_ptr() as *const c_char, 101 | c_table.as_ptr() as *const c_char, 102 | &mut c_app, 103 | ) 104 | }; 105 | result_from_duckdb_appender(r, &mut c_app)?; 106 | Ok(Appender::new(conn, c_app)) 107 | } 108 | 109 | #[inline] 110 | pub fn is_autocommit(&self) -> bool { 111 | true 112 | } 113 | } 114 | 115 | impl Drop for InnerConnection { 116 | #[allow(unused_must_use)] 117 | #[inline] 118 | fn drop(&mut self) { 119 | use std::thread::panicking; 120 | if let Err(e) = self.close() { 121 | if panicking() { 122 | eprintln!("Error while closing DuckDB connection: {e:?}"); 123 | } else { 124 | panic!("Error while closing DuckDB connection: {e:?}"); 125 | } 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /crates/duckdb/src/polars_dataframe.rs: -------------------------------------------------------------------------------- 1 | use polars::prelude::DataFrame; 2 | 3 | use super::{arrow::datatypes::SchemaRef, Statement}; 4 | 5 | /// An handle for the resulting Polars DataFrame of a query. 6 | #[must_use = "Polars is lazy and will do nothing unless consumed"] 7 | pub struct Polars<'stmt> { 8 | pub(crate) stmt: Option<&'stmt Statement<'stmt>>, 9 | } 10 | 11 | impl<'stmt> Polars<'stmt> { 12 | #[inline] 13 | pub(crate) fn new(stmt: &'stmt Statement<'stmt>) -> Polars<'stmt> { 14 | Polars { stmt: Some(stmt) } 15 | } 16 | 17 | /// return arrow schema 18 | #[inline] 19 | pub fn get_schema(&self) -> SchemaRef { 20 | self.stmt.unwrap().stmt.schema() 21 | } 22 | } 23 | 24 | #[allow(clippy::needless_lifetimes)] 25 | impl<'stmt> Iterator for Polars<'stmt> { 26 | type Item = DataFrame; 27 | 28 | fn next(&mut self) -> Option { 29 | let struct_array = self.stmt?.step2()?; 30 | let df = DataFrame::try_from(struct_array).expect("Failed to construct DataFrame from StructArray"); 31 | 32 | Some(df) 33 | } 34 | } 35 | 36 | #[cfg(test)] 37 | mod tests { 38 | use polars::prelude::*; 39 | use polars_core::utils::accumulate_dataframes_vertical_unchecked; 40 | 41 | use crate::{test::checked_memory_handle, Result}; 42 | 43 | #[test] 44 | fn test_query_polars_small() -> Result<()> { 45 | let db = checked_memory_handle(); 46 | let sql = "BEGIN TRANSACTION; 47 | CREATE TABLE test(t INTEGER); 48 | INSERT INTO test VALUES (1); INSERT INTO test VALUES (2); INSERT INTO test VALUES (3); INSERT INTO test VALUES (4); INSERT INTO test VALUES (5); 49 | END TRANSACTION;"; 50 | db.execute_batch(sql)?; 51 | let mut stmt = db.prepare("select t from test order by t desc")?; 52 | let mut polars = stmt.query_polars([])?; 53 | 54 | let df = polars.next().expect("Failed to get DataFrame"); 55 | assert_eq!( 56 | df, 57 | df! ( 58 | "t" => [5i32, 4, 3, 2, 1], 59 | ) 60 | .expect("Failed to construct DataFrame") 61 | ); 62 | assert!(polars.next().is_none()); 63 | 64 | Ok(()) 65 | } 66 | 67 | #[test] 68 | fn test_query_polars_large() -> Result<()> { 69 | let db = checked_memory_handle(); 70 | db.execute_batch("BEGIN TRANSACTION")?; 71 | db.execute_batch("CREATE TABLE test(t INTEGER);")?; 72 | 73 | for _ in 0..600 { 74 | db.execute_batch("INSERT INTO test VALUES (1); INSERT INTO test VALUES (2); INSERT INTO test VALUES (3); INSERT INTO test VALUES (4); INSERT INTO test VALUES (5);")?; 75 | } 76 | 77 | db.execute_batch("END TRANSACTION")?; 78 | let mut stmt = db.prepare("select t from test order by t")?; 79 | let pl = stmt.query_polars([])?; 80 | 81 | let df = accumulate_dataframes_vertical_unchecked(pl); 82 | assert_eq!(df.height(), 3000); 83 | assert_eq!(df.column("t").unwrap().i32().unwrap().sum().unwrap(), 9000); 84 | 85 | Ok(()) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /crates/duckdb/src/r2d2.rs: -------------------------------------------------------------------------------- 1 | #![deny(warnings)] 2 | //! # Duckdb-rs support for the `r2d2` connection pool. 3 | //! 4 | //! 5 | //! Integrated with: [r2d2](https://crates.io/crates/r2d2) 6 | //! 7 | //! 8 | //! ## Example 9 | //! 10 | //! ```rust,no_run 11 | //! extern crate r2d2; 12 | //! extern crate duckdb; 13 | //! 14 | //! 15 | //! use std::thread; 16 | //! use duckdb::{DuckdbConnectionManager, params}; 17 | //! 18 | //! 19 | //! fn main() { 20 | //! let manager = DuckdbConnectionManager::file("file.db").unwrap(); 21 | //! let pool = r2d2::Pool::new(manager).unwrap(); 22 | //! pool.get() 23 | //! .unwrap() 24 | //! .execute("CREATE TABLE IF NOT EXISTS foo (bar INTEGER)", params![]) 25 | //! .unwrap(); 26 | //! 27 | //! (0..10) 28 | //! .map(|i| { 29 | //! let pool = pool.clone(); 30 | //! thread::spawn(move || { 31 | //! let conn = pool.get().unwrap(); 32 | //! conn.execute("INSERT INTO foo (bar) VALUES (?)", &[&i]) 33 | //! .unwrap(); 34 | //! }) 35 | //! }) 36 | //! .collect::>() 37 | //! .into_iter() 38 | //! .map(thread::JoinHandle::join) 39 | //! .collect::>() 40 | //! .unwrap() 41 | //! } 42 | //! ``` 43 | use crate::{Config, Connection, Error, Result}; 44 | use std::{ 45 | path::Path, 46 | sync::{Arc, Mutex}, 47 | }; 48 | 49 | #[cfg(feature = "vscalar")] 50 | use crate::vscalar::VScalar; 51 | #[cfg(feature = "vscalar")] 52 | use std::fmt::Debug; 53 | 54 | #[cfg(feature = "vtab")] 55 | use crate::vtab::VTab; 56 | 57 | /// An `r2d2::ManageConnection` for `duckdb::Connection`s. 58 | pub struct DuckdbConnectionManager { 59 | connection: Arc>, 60 | } 61 | 62 | impl DuckdbConnectionManager { 63 | /// Creates a new `DuckdbConnectionManager` from file. 64 | pub fn file>(path: P) -> Result { 65 | Ok(Self { 66 | connection: Arc::new(Mutex::new(Connection::open(path)?)), 67 | }) 68 | } 69 | /// Creates a new `DuckdbConnectionManager` from file with flags. 70 | pub fn file_with_flags>(path: P, config: Config) -> Result { 71 | Ok(Self { 72 | connection: Arc::new(Mutex::new(Connection::open_with_flags(path, config)?)), 73 | }) 74 | } 75 | 76 | /// Creates a new `DuckdbConnectionManager` from memory. 77 | pub fn memory() -> Result { 78 | Ok(Self { 79 | connection: Arc::new(Mutex::new(Connection::open_in_memory()?)), 80 | }) 81 | } 82 | 83 | /// Creates a new `DuckdbConnectionManager` from memory with flags. 84 | pub fn memory_with_flags(config: Config) -> Result { 85 | Ok(Self { 86 | connection: Arc::new(Mutex::new(Connection::open_in_memory_with_flags(config)?)), 87 | }) 88 | } 89 | 90 | /// Register a table function. 91 | #[cfg(feature = "vtab")] 92 | pub fn register_table_function(&self, name: &str) -> Result<()> { 93 | let conn = self.connection.lock().unwrap(); 94 | conn.register_table_function::(name) 95 | } 96 | 97 | /// Register a scalar function. 98 | #[cfg(feature = "vscalar")] 99 | pub fn register_scalar_function(&self, name: &str) -> Result<()> 100 | where 101 | S::State: Debug, 102 | { 103 | let conn = self.connection.lock().unwrap(); 104 | conn.register_scalar_function::(name) 105 | } 106 | } 107 | 108 | impl r2d2::ManageConnection for DuckdbConnectionManager { 109 | type Connection = Connection; 110 | type Error = Error; 111 | 112 | fn connect(&self) -> Result { 113 | let conn = self.connection.lock().unwrap(); 114 | conn.try_clone() 115 | } 116 | 117 | fn is_valid(&self, conn: &mut Self::Connection) -> Result<(), Self::Error> { 118 | conn.execute_batch("") 119 | } 120 | 121 | fn has_broken(&self, _: &mut Self::Connection) -> bool { 122 | false 123 | } 124 | } 125 | 126 | #[cfg(test)] 127 | mod test { 128 | extern crate r2d2; 129 | use super::*; 130 | use crate::types::Value; 131 | use std::{sync::mpsc, thread}; 132 | 133 | use tempdir::TempDir; 134 | 135 | #[test] 136 | fn test_basic() -> Result<()> { 137 | let manager = DuckdbConnectionManager::file(":memory:")?; 138 | let pool = r2d2::Pool::builder().max_size(2).build(manager).unwrap(); 139 | 140 | let (s1, r1) = mpsc::channel(); 141 | let (s2, r2) = mpsc::channel(); 142 | 143 | let pool1 = pool.clone(); 144 | let t1 = thread::spawn(move || { 145 | let conn = pool1.get().unwrap(); 146 | s1.send(()).unwrap(); 147 | r2.recv().unwrap(); 148 | drop(conn); 149 | }); 150 | 151 | let pool2 = pool.clone(); 152 | let t2 = thread::spawn(move || { 153 | let conn = pool2.get().unwrap(); 154 | s2.send(()).unwrap(); 155 | r1.recv().unwrap(); 156 | drop(conn); 157 | }); 158 | 159 | t1.join().unwrap(); 160 | t2.join().unwrap(); 161 | 162 | pool.get().unwrap(); 163 | Ok(()) 164 | } 165 | 166 | #[test] 167 | fn test_file() -> Result<()> { 168 | let manager = DuckdbConnectionManager::file(":memory:")?; 169 | let pool = r2d2::Pool::builder().max_size(2).build(manager).unwrap(); 170 | 171 | let (s1, r1) = mpsc::channel(); 172 | let (s2, r2) = mpsc::channel(); 173 | 174 | let pool1 = pool.clone(); 175 | let t1 = thread::spawn(move || { 176 | let conn = pool1.get().unwrap(); 177 | s1.send(()).unwrap(); 178 | r2.recv().unwrap(); 179 | drop(conn); 180 | }); 181 | 182 | let pool2 = pool.clone(); 183 | let t2 = thread::spawn(move || { 184 | let conn = pool2.get().unwrap(); 185 | s2.send(()).unwrap(); 186 | r1.recv().unwrap(); 187 | drop(conn); 188 | }); 189 | 190 | t1.join().unwrap(); 191 | t2.join().unwrap(); 192 | 193 | pool.get().unwrap(); 194 | Ok(()) 195 | } 196 | 197 | #[test] 198 | fn test_is_valid() -> Result<()> { 199 | let manager = DuckdbConnectionManager::file(":memory:")?; 200 | let pool = r2d2::Pool::builder() 201 | .max_size(1) 202 | .test_on_check_out(true) 203 | .build(manager) 204 | .unwrap(); 205 | 206 | pool.get().unwrap(); 207 | Ok(()) 208 | } 209 | 210 | #[test] 211 | fn test_error_handling() -> Result<()> { 212 | //! We specify a directory as a database. This is bound to fail. 213 | let dir = TempDir::new("r2d2-duckdb").expect("Could not create temporary directory"); 214 | let dirpath = dir.path().to_str().unwrap(); 215 | assert!(DuckdbConnectionManager::file(dirpath).is_err()); 216 | Ok(()) 217 | } 218 | 219 | #[test] 220 | fn test_with_flags() -> Result<()> { 221 | let config = Config::default() 222 | .access_mode(crate::AccessMode::ReadWrite)? 223 | .default_null_order(crate::DefaultNullOrder::NullsLast)? 224 | .default_order(crate::DefaultOrder::Desc)? 225 | .enable_external_access(true)? 226 | .enable_object_cache(false)? 227 | .max_memory("2GB")? 228 | .threads(4)?; 229 | let manager = DuckdbConnectionManager::file_with_flags(":memory:", config)?; 230 | let pool = r2d2::Pool::builder().max_size(2).build(manager).unwrap(); 231 | let conn = pool.get().unwrap(); 232 | conn.execute_batch("CREATE TABLE foo(x Text)")?; 233 | 234 | let mut stmt = conn.prepare("INSERT INTO foo(x) VALUES (?)")?; 235 | stmt.execute([&"a"])?; 236 | stmt.execute([&"b"])?; 237 | stmt.execute([&"c"])?; 238 | stmt.execute([Value::Null])?; 239 | 240 | let val: Result>> = conn 241 | .prepare("SELECT x FROM foo ORDER BY x")? 242 | .query_and_then([], |row| row.get(0))? 243 | .collect(); 244 | let val = val?; 245 | let mut iter = val.iter(); 246 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "c"); 247 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "b"); 248 | assert_eq!(iter.next().unwrap().as_ref().unwrap(), "a"); 249 | assert!(iter.next().unwrap().is_none()); 250 | assert_eq!(iter.next(), None); 251 | 252 | Ok(()) 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /crates/duckdb/src/types/ordered_map.rs: -------------------------------------------------------------------------------- 1 | /// An ordered map of key-value pairs. 2 | #[derive(Clone, Debug, Eq, PartialEq)] 3 | pub struct OrderedMap(Vec<(K, V)>); 4 | 5 | impl From> for OrderedMap { 6 | fn from(value: Vec<(K, V)>) -> Self { 7 | OrderedMap(value) 8 | } 9 | } 10 | 11 | impl OrderedMap { 12 | /// Returns the value corresponding to the key. 13 | pub fn get(&self, key: &K) -> Option<&V> { 14 | self.0.iter().find(|(k, _)| k == key).map(|(_, v)| v) 15 | } 16 | /// Returns an iterator over the keys in the map. 17 | pub fn keys(&self) -> impl Iterator { 18 | self.0.iter().map(|(k, _)| k) 19 | } 20 | /// Returns an iterator over the values in the map. 21 | pub fn values(&self) -> impl Iterator { 22 | self.0.iter().map(|(_, v)| v) 23 | } 24 | /// Returns an iterator over the key-value pairs in the map. 25 | pub fn iter(&self) -> impl Iterator { 26 | self.0.iter() 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /crates/duckdb/src/types/serde_json.rs: -------------------------------------------------------------------------------- 1 | //! [`ToSql`] and [`FromSql`] implementation for JSON `Value`. 2 | 3 | use serde_json::Value; 4 | 5 | use crate::{ 6 | types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef}, 7 | Result, 8 | }; 9 | 10 | /// Serialize JSON `Value` to text. 11 | impl ToSql for Value { 12 | #[inline] 13 | fn to_sql(&self) -> Result> { 14 | Ok(ToSqlOutput::from(serde_json::to_string(self).unwrap())) 15 | } 16 | } 17 | 18 | /// Deserialize text/blob to JSON `Value`. 19 | impl FromSql for Value { 20 | #[inline] 21 | fn column_result(value: ValueRef<'_>) -> FromSqlResult { 22 | match value { 23 | ValueRef::Text(s) => serde_json::from_slice(s), 24 | ValueRef::Blob(b) => serde_json::from_slice(b), 25 | _ => return Err(FromSqlError::InvalidType), 26 | } 27 | .map_err(|err| FromSqlError::Other(Box::new(err))) 28 | } 29 | } 30 | 31 | #[cfg(test)] 32 | mod test { 33 | use crate::{types::ToSql, Connection, Result}; 34 | 35 | fn checked_memory_handle() -> Result { 36 | let db = Connection::open_in_memory()?; 37 | db.execute_batch("CREATE TABLE foo (t TEXT, b BLOB)")?; 38 | Ok(db) 39 | } 40 | 41 | #[test] 42 | fn test_json_value() -> Result<()> { 43 | let db = checked_memory_handle()?; 44 | 45 | let json = r#"{"foo": 13, "bar": "baz"}"#; 46 | let data: serde_json::Value = serde_json::from_str(json).unwrap(); 47 | db.execute( 48 | "INSERT INTO foo (t, b) VALUES (?, ?)", 49 | [&data as &dyn ToSql, &json.as_bytes()], 50 | )?; 51 | 52 | let t: serde_json::Value = db.query_row("SELECT t FROM foo", [], |r| r.get(0))?; 53 | assert_eq!(data, t); 54 | let b: serde_json::Value = db.query_row("SELECT b FROM foo", [], |r| r.get(0))?; 55 | assert_eq!(data, b); 56 | Ok(()) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /crates/duckdb/src/types/string.rs: -------------------------------------------------------------------------------- 1 | use libduckdb_sys::{duckdb_string_t, duckdb_string_t_data, duckdb_string_t_length}; 2 | 3 | /// Wrapper for underlying duck string type with a lifetime bound to a &mut duckdb_string_t 4 | pub struct DuckString<'a> { 5 | ptr: &'a mut duckdb_string_t, 6 | } 7 | 8 | impl<'a> DuckString<'a> { 9 | /// Creates a DuckString from the underlying duck string type 10 | #[allow(dead_code)] 11 | pub fn new(ptr: &'a mut duckdb_string_t) -> Self { 12 | DuckString { ptr } 13 | } 14 | } 15 | 16 | impl<'a> DuckString<'a> { 17 | /// convert duckdb_string_t to a copy on write string 18 | pub fn as_str(&mut self) -> std::borrow::Cow<'a, str> { 19 | String::from_utf8_lossy(self.as_bytes()) 20 | } 21 | 22 | /// convert duckdb_string_t to a byte slice 23 | pub fn as_bytes(&mut self) -> &'a [u8] { 24 | unsafe { 25 | let len = duckdb_string_t_length(*self.ptr); 26 | let c_ptr = duckdb_string_t_data(self.ptr); 27 | std::slice::from_raw_parts(c_ptr as *const u8, len as usize) 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /crates/duckdb/src/types/to_sql.rs: -------------------------------------------------------------------------------- 1 | use super::{Null, TimeUnit, Value, ValueRef}; 2 | use crate::Result; 3 | use std::borrow::Cow; 4 | 5 | /// `ToSqlOutput` represents the possible output types for implementers of the 6 | /// [`ToSql`] trait. 7 | #[derive(Clone, Debug, PartialEq)] 8 | #[non_exhaustive] 9 | pub enum ToSqlOutput<'a> { 10 | /// A borrowed SQLite-representable value. 11 | Borrowed(ValueRef<'a>), 12 | 13 | /// An owned SQLite-representable value. 14 | Owned(Value), 15 | } 16 | 17 | // Generically allow any type that can be converted into a ValueRef 18 | // to be converted into a ToSqlOutput as well. 19 | impl<'a, T: ?Sized> From<&'a T> for ToSqlOutput<'a> 20 | where 21 | &'a T: Into>, 22 | { 23 | #[inline] 24 | fn from(t: &'a T) -> Self { 25 | ToSqlOutput::Borrowed(t.into()) 26 | } 27 | } 28 | 29 | // We cannot also generically allow any type that can be converted 30 | // into a Value to be converted into a ToSqlOutput because of 31 | // coherence rules (https://github.com/rust-lang/rust/pull/46192), 32 | // so we'll manually implement it for all the types we know can 33 | // be converted into Values. 34 | macro_rules! from_value( 35 | ($t:ty) => ( 36 | impl From<$t> for ToSqlOutput<'_> { 37 | #[inline] 38 | fn from(t: $t) -> Self { ToSqlOutput::Owned(t.into())} 39 | } 40 | ) 41 | ); 42 | from_value!(String); 43 | from_value!(Null); 44 | from_value!(bool); 45 | from_value!(i8); 46 | from_value!(i16); 47 | from_value!(i32); 48 | from_value!(i64); 49 | from_value!(i128); 50 | from_value!(isize); 51 | from_value!(u8); 52 | from_value!(u16); 53 | from_value!(u32); 54 | from_value!(u64); 55 | from_value!(usize); 56 | from_value!(f32); 57 | from_value!(f64); 58 | from_value!(Vec); 59 | 60 | #[cfg(feature = "uuid")] 61 | from_value!(uuid::Uuid); 62 | 63 | impl ToSql for ToSqlOutput<'_> { 64 | #[inline] 65 | fn to_sql(&self) -> Result> { 66 | Ok(match *self { 67 | ToSqlOutput::Borrowed(v) => ToSqlOutput::Borrowed(v), 68 | ToSqlOutput::Owned(ref v) => ToSqlOutput::Borrowed(ValueRef::from(v)), 69 | }) 70 | } 71 | } 72 | 73 | /// A trait for types that can be converted into DuckDB values. Returns 74 | /// [`Error::ToSqlConversionFailure`] if the conversion fails. 75 | pub trait ToSql { 76 | /// Converts Rust value to DuckDB value 77 | fn to_sql(&self) -> Result>; 78 | } 79 | 80 | impl ToSql for Cow<'_, T> { 81 | #[inline] 82 | fn to_sql(&self) -> Result> { 83 | self.as_ref().to_sql() 84 | } 85 | } 86 | 87 | impl ToSql for Box { 88 | #[inline] 89 | fn to_sql(&self) -> Result> { 90 | self.as_ref().to_sql() 91 | } 92 | } 93 | 94 | impl ToSql for std::rc::Rc { 95 | #[inline] 96 | fn to_sql(&self) -> Result> { 97 | self.as_ref().to_sql() 98 | } 99 | } 100 | 101 | impl ToSql for std::sync::Arc { 102 | #[inline] 103 | fn to_sql(&self) -> Result> { 104 | self.as_ref().to_sql() 105 | } 106 | } 107 | 108 | // We should be able to use a generic impl like this: 109 | // 110 | // impl ToSql for T where T: Into { 111 | // fn to_sql(&self) -> Result { 112 | // Ok(ToSqlOutput::from((*self).into())) 113 | // } 114 | // } 115 | // 116 | // instead of the following macro, but this runs afoul of 117 | // https://github.com/rust-lang/rust/issues/30191 and reports conflicting 118 | // implementations even when there aren't any. 119 | 120 | macro_rules! to_sql_self( 121 | ($t:ty) => ( 122 | impl ToSql for $t { 123 | #[inline] 124 | fn to_sql(&self) -> Result> { 125 | Ok(ToSqlOutput::from(*self)) 126 | } 127 | } 128 | ) 129 | ); 130 | 131 | to_sql_self!(Null); 132 | to_sql_self!(bool); 133 | to_sql_self!(i8); 134 | to_sql_self!(i16); 135 | to_sql_self!(i32); 136 | to_sql_self!(i64); 137 | to_sql_self!(i128); 138 | to_sql_self!(isize); 139 | to_sql_self!(u8); 140 | to_sql_self!(u16); 141 | to_sql_self!(u32); 142 | to_sql_self!(f32); 143 | to_sql_self!(f64); 144 | to_sql_self!(u64); 145 | to_sql_self!(usize); 146 | 147 | #[cfg(feature = "uuid")] 148 | to_sql_self!(uuid::Uuid); 149 | 150 | impl ToSql for &'_ T 151 | where 152 | T: ToSql, 153 | { 154 | #[inline] 155 | fn to_sql(&self) -> Result> { 156 | (*self).to_sql() 157 | } 158 | } 159 | 160 | impl ToSql for String { 161 | #[inline] 162 | fn to_sql(&self) -> Result> { 163 | Ok(ToSqlOutput::from(self.as_str())) 164 | } 165 | } 166 | 167 | impl ToSql for str { 168 | #[inline] 169 | fn to_sql(&self) -> Result> { 170 | Ok(ToSqlOutput::from(self)) 171 | } 172 | } 173 | 174 | impl ToSql for Vec { 175 | #[inline] 176 | fn to_sql(&self) -> Result> { 177 | Ok(ToSqlOutput::from(self.as_slice())) 178 | } 179 | } 180 | 181 | impl ToSql for [u8] { 182 | #[inline] 183 | fn to_sql(&self) -> Result> { 184 | Ok(ToSqlOutput::from(self)) 185 | } 186 | } 187 | 188 | impl ToSql for Value { 189 | #[inline] 190 | fn to_sql(&self) -> Result> { 191 | Ok(ToSqlOutput::from(self)) 192 | } 193 | } 194 | 195 | impl ToSql for Option { 196 | #[inline] 197 | fn to_sql(&self) -> Result> { 198 | match *self { 199 | None => Ok(ToSqlOutput::from(Null)), 200 | Some(ref t) => t.to_sql(), 201 | } 202 | } 203 | } 204 | 205 | impl ToSql for std::time::Duration { 206 | fn to_sql(&self) -> crate::Result> { 207 | Ok(ToSqlOutput::Owned(Value::Timestamp( 208 | TimeUnit::Microsecond, 209 | self.as_micros() as i64, 210 | ))) 211 | } 212 | } 213 | 214 | #[cfg(test)] 215 | mod test { 216 | use super::ToSql; 217 | 218 | fn is_to_sql() {} 219 | 220 | #[test] 221 | fn test_integral_types() { 222 | is_to_sql::(); 223 | is_to_sql::(); 224 | is_to_sql::(); 225 | is_to_sql::(); 226 | is_to_sql::(); 227 | is_to_sql::(); 228 | is_to_sql::(); 229 | } 230 | 231 | #[test] 232 | fn test_cow_str() { 233 | use std::borrow::Cow; 234 | let s = "str"; 235 | let cow: Cow = Cow::Borrowed(s); 236 | let r = cow.to_sql(); 237 | assert!(r.is_ok()); 238 | let cow: Cow = Cow::Owned::(String::from(s)); 239 | let r = cow.to_sql(); 240 | assert!(r.is_ok()); 241 | // Ensure this compiles. 242 | let _p: &[&dyn ToSql] = crate::params![cow]; 243 | } 244 | 245 | #[test] 246 | fn test_box_dyn() { 247 | let s: Box = Box::new("Hello world!"); 248 | let _s: &[&dyn ToSql] = crate::params![s]; 249 | let r = ToSql::to_sql(&s); 250 | 251 | assert!(r.is_ok()); 252 | } 253 | 254 | #[test] 255 | fn test_box_deref() { 256 | let s: Box = "Hello world!".into(); 257 | let _s: &[&dyn ToSql] = crate::params![s]; 258 | let r = s.to_sql(); 259 | 260 | assert!(r.is_ok()); 261 | } 262 | 263 | #[test] 264 | fn test_box_direct() { 265 | let s: Box = "Hello world!".into(); 266 | let _s: &[&dyn ToSql] = crate::params![s]; 267 | let r = ToSql::to_sql(&s); 268 | 269 | assert!(r.is_ok()); 270 | } 271 | 272 | #[test] 273 | fn test_cells() { 274 | use std::{rc::Rc, sync::Arc}; 275 | 276 | let source_str: Box = "Hello world!".into(); 277 | 278 | let s: Rc> = Rc::new(source_str.clone()); 279 | let _s: &[&dyn ToSql] = crate::params![s]; 280 | let r = s.to_sql(); 281 | assert!(r.is_ok()); 282 | 283 | let s: Arc> = Arc::new(source_str.clone()); 284 | let _s: &[&dyn ToSql] = crate::params![s]; 285 | let r = s.to_sql(); 286 | assert!(r.is_ok()); 287 | 288 | let s: Arc = Arc::from(&*source_str); 289 | let _s: &[&dyn ToSql] = crate::params![s]; 290 | let r = s.to_sql(); 291 | assert!(r.is_ok()); 292 | 293 | let s: Arc = Arc::new(source_str.clone()); 294 | let _s: &[&dyn ToSql] = crate::params![s]; 295 | let r = s.to_sql(); 296 | assert!(r.is_ok()); 297 | 298 | let s: Rc = Rc::from(&*source_str); 299 | let _s: &[&dyn ToSql] = crate::params![s]; 300 | let r = s.to_sql(); 301 | assert!(r.is_ok()); 302 | 303 | let s: Rc = Rc::new(source_str); 304 | let _s: &[&dyn ToSql] = crate::params![s]; 305 | let r = s.to_sql(); 306 | assert!(r.is_ok()); 307 | } 308 | 309 | // Use gen_random_uuid() to generate uuid 310 | #[test] 311 | fn test_uuid_gen() -> crate::Result<()> { 312 | use crate::Connection; 313 | 314 | let db = Connection::open_in_memory()?; 315 | db.execute_batch("CREATE TABLE foo (id uuid NOT NULL);")?; 316 | 317 | db.execute("INSERT INTO foo (id) VALUES (gen_random_uuid())", [])?; 318 | 319 | let mut stmt = db.prepare("SELECT id FROM foo")?; 320 | let mut rows = stmt.query([])?; 321 | let row = rows.next()?.unwrap(); 322 | let found_id: String = row.get_unwrap(0); 323 | assert_eq!(found_id.len(), 36); 324 | Ok(()) 325 | } 326 | 327 | #[cfg(feature = "uuid")] 328 | #[test] 329 | fn test_uuid_blob_type() -> crate::Result<()> { 330 | use crate::{params, Connection}; 331 | use uuid::Uuid; 332 | 333 | let db = Connection::open_in_memory()?; 334 | db.execute_batch("CREATE TABLE foo (id BLOB CONSTRAINT uuidchk CHECK (octet_length(id) = 16), label TEXT);")?; 335 | 336 | let id = Uuid::new_v4(); 337 | let id_vec = id.as_bytes().to_vec(); 338 | db.execute("INSERT INTO foo (id, label) VALUES (?, ?)", params![id_vec, "target"])?; 339 | 340 | let mut stmt = db.prepare("SELECT id, label FROM foo WHERE id = ?")?; 341 | let mut rows = stmt.query(params![id_vec])?; 342 | let row = rows.next()?.unwrap(); 343 | let found_id: Uuid = row.get_unwrap(0); 344 | let found_label: String = row.get_unwrap(1); 345 | assert_eq!(found_id, id); 346 | assert_eq!(found_label, "target"); 347 | Ok(()) 348 | } 349 | 350 | #[cfg(feature = "uuid")] 351 | #[test] 352 | fn test_uuid_type() -> crate::Result<()> { 353 | use crate::{params, Connection}; 354 | use uuid::Uuid; 355 | 356 | let db = Connection::open_in_memory()?; 357 | db.execute_batch("CREATE TABLE foo (id uuid, label TEXT);")?; 358 | 359 | let id = Uuid::new_v4(); 360 | db.execute("INSERT INTO foo (id, label) VALUES (?, ?)", params![id, "target"])?; 361 | 362 | let mut stmt = db.prepare("SELECT id, label FROM foo WHERE id = ?")?; 363 | let mut rows = stmt.query(params![id])?; 364 | let row = rows.next()?.unwrap(); 365 | let found_id: Uuid = row.get_unwrap(0); 366 | let found_label: String = row.get_unwrap(1); 367 | assert_eq!(found_id, id); 368 | assert_eq!(found_label, "target"); 369 | Ok(()) 370 | } 371 | } 372 | -------------------------------------------------------------------------------- /crates/duckdb/src/types/url.rs: -------------------------------------------------------------------------------- 1 | //! [`ToSql`] and [`FromSql`] implementation for [`url::Url`]. 2 | use crate::{ 3 | types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef}, 4 | Result, 5 | }; 6 | use url::Url; 7 | 8 | /// Serialize `Url` to text. 9 | impl ToSql for Url { 10 | #[inline] 11 | fn to_sql(&self) -> Result> { 12 | Ok(ToSqlOutput::from(self.as_str())) 13 | } 14 | } 15 | 16 | /// Deserialize text to `Url`. 17 | impl FromSql for Url { 18 | #[inline] 19 | fn column_result(value: ValueRef<'_>) -> FromSqlResult { 20 | match value { 21 | ValueRef::Text(s) => { 22 | let s = std::str::from_utf8(s).map_err(|e| FromSqlError::Other(Box::new(e)))?; 23 | Url::parse(s).map_err(|e| FromSqlError::Other(Box::new(e))) 24 | } 25 | _ => Err(FromSqlError::InvalidType), 26 | } 27 | } 28 | } 29 | 30 | #[cfg(test)] 31 | mod test { 32 | use crate::{params, Connection, Error, Result}; 33 | use url::{ParseError, Url}; 34 | 35 | fn checked_memory_handle() -> Result { 36 | let db = Connection::open_in_memory()?; 37 | db.execute_batch("CREATE TABLE urls (i INTEGER, v TEXT)")?; 38 | Ok(db) 39 | } 40 | 41 | fn get_url(db: &Connection, id: i64) -> Result { 42 | db.query_row("SELECT v FROM urls WHERE i = ?", [id], |r| r.get(0)) 43 | } 44 | 45 | #[test] 46 | fn test_sql_url() -> Result<()> { 47 | let db = &checked_memory_handle()?; 48 | 49 | let url0 = Url::parse("http://www.example1.com").unwrap(); 50 | let url1 = Url::parse("http://www.example1.com/👌").unwrap(); 51 | let url2 = "http://www.example2.com/👌"; 52 | 53 | db.execute( 54 | "INSERT INTO urls (i, v) VALUES (0, ?), (1, ?), (2, ?), (3, ?)", 55 | // also insert a non-hex encoded url (which might be present if it was 56 | // inserted separately) 57 | params![url0, url1, url2, "illegal"], 58 | )?; 59 | 60 | assert_eq!(get_url(db, 0)?, url0); 61 | 62 | assert_eq!(get_url(db, 1)?, url1); 63 | 64 | // Should successfully read it, even though it wasn't inserted as an 65 | // escaped url. 66 | let out_url2: Url = get_url(db, 2)?; 67 | assert_eq!(out_url2, Url::parse(url2).unwrap()); 68 | 69 | // Make sure the conversion error comes through correctly. 70 | let err = get_url(db, 3).unwrap_err(); 71 | match err { 72 | Error::FromSqlConversionFailure(_, _, e) => { 73 | assert_eq!(*e.downcast::().unwrap(), ParseError::RelativeUrlWithoutBase,); 74 | } 75 | e => { 76 | panic!("Expected conversion failure, got {e}"); 77 | } 78 | } 79 | Ok(()) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /crates/duckdb/src/types/value.rs: -------------------------------------------------------------------------------- 1 | use super::{Null, OrderedMap, TimeUnit, Type}; 2 | use rust_decimal::prelude::*; 3 | 4 | /// Owning [dynamic type value](http://sqlite.org/datatype3.html). Value's type is typically 5 | /// dictated by DuckDB (not by the caller). 6 | /// 7 | /// See [`ValueRef`](crate::types::ValueRef) for a non-owning dynamic type 8 | /// value. 9 | #[derive(Clone, Debug, PartialEq)] 10 | pub enum Value { 11 | /// The value is a `NULL` value. 12 | Null, 13 | /// The value is a boolean. 14 | Boolean(bool), 15 | /// The value is a signed tiny integer. 16 | TinyInt(i8), 17 | /// The value is a signed small integer. 18 | SmallInt(i16), 19 | /// The value is a signed integer. 20 | Int(i32), 21 | /// The value is a signed big integer. 22 | BigInt(i64), 23 | /// The value is a signed huge integer. 24 | HugeInt(i128), 25 | /// The value is a unsigned tiny integer. 26 | UTinyInt(u8), 27 | /// The value is a unsigned small integer. 28 | USmallInt(u16), 29 | /// The value is a unsigned integer. 30 | UInt(u32), 31 | /// The value is a unsigned big integer. 32 | UBigInt(u64), 33 | /// The value is a f32. 34 | Float(f32), 35 | /// The value is a f64. 36 | Double(f64), 37 | /// The value is a Decimal. 38 | Decimal(Decimal), 39 | /// The value is a timestamp. 40 | Timestamp(TimeUnit, i64), 41 | /// The value is a text string. 42 | Text(String), 43 | /// The value is a blob of data 44 | Blob(Vec), 45 | /// The value is a date32 46 | Date32(i32), 47 | /// The value is a time64 48 | Time64(TimeUnit, i64), 49 | /// The value is an interval (month, day, nano) 50 | Interval { 51 | /// months 52 | months: i32, 53 | /// days 54 | days: i32, 55 | /// nanos 56 | nanos: i64, 57 | }, 58 | /// The value is a list 59 | List(Vec), 60 | /// The value is an enum 61 | Enum(String), 62 | /// The value is a struct 63 | Struct(OrderedMap), 64 | /// The value is an array 65 | Array(Vec), 66 | /// The value is a map 67 | Map(OrderedMap), 68 | /// The value is a union 69 | Union(Box), 70 | } 71 | 72 | impl From for Value { 73 | #[inline] 74 | fn from(_: Null) -> Value { 75 | Value::Null 76 | } 77 | } 78 | 79 | impl From for Value { 80 | #[inline] 81 | fn from(i: bool) -> Value { 82 | Value::Boolean(i) 83 | } 84 | } 85 | 86 | impl From for Value { 87 | #[inline] 88 | fn from(i: usize) -> Value { 89 | Value::UBigInt(i as u64) 90 | } 91 | } 92 | 93 | impl From for Value { 94 | #[inline] 95 | fn from(i: isize) -> Value { 96 | Value::BigInt(i as i64) 97 | } 98 | } 99 | 100 | #[cfg(feature = "uuid")] 101 | impl From for Value { 102 | #[inline] 103 | fn from(id: uuid::Uuid) -> Value { 104 | Value::Text(id.to_string()) 105 | } 106 | } 107 | 108 | impl From for Value { 109 | #[inline] 110 | fn from(i: i8) -> Value { 111 | Value::TinyInt(i) 112 | } 113 | } 114 | 115 | impl From for Value { 116 | #[inline] 117 | fn from(i: i16) -> Value { 118 | Value::SmallInt(i) 119 | } 120 | } 121 | 122 | impl From for Value { 123 | #[inline] 124 | fn from(i: i32) -> Value { 125 | Value::Int(i) 126 | } 127 | } 128 | 129 | impl From for Value { 130 | #[inline] 131 | fn from(i: i64) -> Value { 132 | Value::BigInt(i) 133 | } 134 | } 135 | 136 | impl From for Value { 137 | #[inline] 138 | fn from(i: u8) -> Value { 139 | Value::UTinyInt(i) 140 | } 141 | } 142 | 143 | impl From for Value { 144 | #[inline] 145 | fn from(i: u16) -> Value { 146 | Value::USmallInt(i) 147 | } 148 | } 149 | 150 | impl From for Value { 151 | #[inline] 152 | fn from(i: u32) -> Value { 153 | Value::UInt(i) 154 | } 155 | } 156 | 157 | impl From for Value { 158 | #[inline] 159 | fn from(i: u64) -> Value { 160 | Value::UBigInt(i) 161 | } 162 | } 163 | 164 | impl From for Value { 165 | #[inline] 166 | fn from(i: i128) -> Value { 167 | Value::HugeInt(i) 168 | } 169 | } 170 | 171 | impl From for Value { 172 | #[inline] 173 | fn from(f: f32) -> Value { 174 | Value::Float(f) 175 | } 176 | } 177 | 178 | impl From for Value { 179 | #[inline] 180 | fn from(f: f64) -> Value { 181 | Value::Double(f) 182 | } 183 | } 184 | 185 | impl From for Value { 186 | #[inline] 187 | fn from(s: String) -> Value { 188 | Value::Text(s) 189 | } 190 | } 191 | 192 | impl From> for Value { 193 | #[inline] 194 | fn from(v: Vec) -> Value { 195 | Value::Blob(v) 196 | } 197 | } 198 | 199 | impl From> for Value 200 | where 201 | T: Into, 202 | { 203 | #[inline] 204 | fn from(v: Option) -> Value { 205 | match v { 206 | Some(x) => x.into(), 207 | None => Value::Null, 208 | } 209 | } 210 | } 211 | 212 | impl Value { 213 | /// Returns DuckDB fundamental datatype. 214 | #[inline] 215 | pub fn data_type(&self) -> Type { 216 | match *self { 217 | Value::Null => Type::Null, 218 | Value::Boolean(_) => Type::Boolean, 219 | Value::TinyInt(_) => Type::TinyInt, 220 | Value::SmallInt(_) => Type::SmallInt, 221 | Value::Int(_) => Type::Int, 222 | Value::BigInt(_) => Type::BigInt, 223 | Value::HugeInt(_) => Type::HugeInt, 224 | Value::UTinyInt(_) => Type::UTinyInt, 225 | Value::USmallInt(_) => Type::USmallInt, 226 | Value::UInt(_) => Type::UInt, 227 | Value::UBigInt(_) => Type::UBigInt, 228 | Value::Float(_) => Type::Float, 229 | Value::Double(_) => Type::Double, 230 | Value::Decimal(_) => Type::Decimal, 231 | Value::Timestamp(_, _) => Type::Timestamp, 232 | Value::Text(_) => Type::Text, 233 | Value::Blob(_) => Type::Blob, 234 | Value::Date32(_) => Type::Date32, 235 | Value::Time64(..) => Type::Time64, 236 | Value::Interval { .. } => Type::Interval, 237 | Value::Union(..) | Value::Struct(..) | Value::List(..) | Value::Array(..) | Value::Map(..) => todo!(), 238 | Value::Enum(..) => Type::Enum, 239 | } 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /crates/duckdb/src/util/mod.rs: -------------------------------------------------------------------------------- 1 | // Internal utilities 2 | mod small_cstr; 3 | // pub(crate) use small_cstr::SmallCString; 4 | -------------------------------------------------------------------------------- /crates/duckdb/src/util/small_cstr.rs: -------------------------------------------------------------------------------- 1 | use smallvec::{smallvec, SmallVec}; 2 | use std::ffi::{CStr, CString, NulError}; 3 | 4 | /// Similar to std::ffi::CString, but avoids heap allocating if the string is 5 | /// small enough. Also guarantees it's input is UTF-8 -- used for cases where we 6 | /// need to pass a NUL-terminated string to SQLite, and we have a `&str`. 7 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] 8 | pub(crate) struct SmallCString(smallvec::SmallVec<[u8; 16]>); 9 | 10 | impl SmallCString { 11 | #[inline] 12 | #[allow(dead_code)] 13 | pub fn new(s: &str) -> Result { 14 | if s.as_bytes().contains(&0u8) { 15 | return Err(Self::fabricate_nul_error(s)); 16 | } 17 | let mut buf = SmallVec::with_capacity(s.len() + 1); 18 | buf.extend_from_slice(s.as_bytes()); 19 | buf.push(0); 20 | let res = Self(buf); 21 | res.debug_checks(); 22 | Ok(res) 23 | } 24 | 25 | #[inline] 26 | pub fn as_str(&self) -> &str { 27 | self.debug_checks(); 28 | // Constructor takes a &str so this is safe. 29 | unsafe { std::str::from_utf8_unchecked(self.as_bytes_without_nul()) } 30 | } 31 | 32 | /// Get the bytes not including the NUL terminator. E.g. the bytes which 33 | /// make up our `str`: 34 | /// - `SmallCString::new("foo").as_bytes_without_nul() == b"foo"` 35 | /// - `SmallCString::new("foo").as_bytes_with_nul() == b"foo\0" 36 | #[inline] 37 | pub fn as_bytes_without_nul(&self) -> &[u8] { 38 | self.debug_checks(); 39 | &self.0[..self.len()] 40 | } 41 | 42 | /// Get the bytes behind this str *including* the NUL terminator. This 43 | /// should never return an empty slice. 44 | #[inline] 45 | pub fn as_bytes_with_nul(&self) -> &[u8] { 46 | self.debug_checks(); 47 | &self.0 48 | } 49 | 50 | #[inline] 51 | #[cfg(debug_assertions)] 52 | fn debug_checks(&self) { 53 | debug_assert_ne!(self.0.len(), 0); 54 | debug_assert_eq!(self.0[self.0.len() - 1], 0); 55 | let strbytes = &self.0[..(self.0.len() - 1)]; 56 | debug_assert!(!strbytes.contains(&0)); 57 | debug_assert!(std::str::from_utf8(strbytes).is_ok()); 58 | } 59 | 60 | #[inline] 61 | #[cfg(not(debug_assertions))] 62 | fn debug_checks(&self) {} 63 | 64 | #[inline] 65 | pub fn len(&self) -> usize { 66 | debug_assert_ne!(self.0.len(), 0); 67 | self.0.len() - 1 68 | } 69 | 70 | #[inline] 71 | #[allow(unused)] // clippy wants this function. 72 | pub fn is_empty(&self) -> bool { 73 | self.len() == 0 74 | } 75 | 76 | #[inline] 77 | pub fn as_cstr(&self) -> &CStr { 78 | let bytes = self.as_bytes_with_nul(); 79 | debug_assert!(CStr::from_bytes_with_nul(bytes).is_ok()); 80 | unsafe { CStr::from_bytes_with_nul_unchecked(bytes) } 81 | } 82 | 83 | #[cold] 84 | fn fabricate_nul_error(b: &str) -> NulError { 85 | CString::new(b).unwrap_err() 86 | } 87 | } 88 | 89 | impl Default for SmallCString { 90 | #[inline] 91 | fn default() -> Self { 92 | Self(smallvec![0]) 93 | } 94 | } 95 | 96 | impl std::fmt::Debug for SmallCString { 97 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 98 | f.debug_tuple("SmallCString").field(&self.as_str()).finish() 99 | } 100 | } 101 | 102 | impl std::ops::Deref for SmallCString { 103 | type Target = CStr; 104 | 105 | #[inline] 106 | fn deref(&self) -> &CStr { 107 | self.as_cstr() 108 | } 109 | } 110 | 111 | impl PartialEq for str { 112 | #[inline] 113 | fn eq(&self, s: &SmallCString) -> bool { 114 | s.as_bytes_without_nul() == self.as_bytes() 115 | } 116 | } 117 | 118 | impl PartialEq for SmallCString { 119 | #[inline] 120 | fn eq(&self, s: &str) -> bool { 121 | self.as_bytes_without_nul() == s.as_bytes() 122 | } 123 | } 124 | 125 | impl std::borrow::Borrow for SmallCString { 126 | #[inline] 127 | fn borrow(&self) -> &str { 128 | self.as_str() 129 | } 130 | } 131 | 132 | #[cfg(test)] 133 | mod test { 134 | use super::*; 135 | 136 | #[test] 137 | fn test_small_cstring() { 138 | // We don't go through the normal machinery for default, so make sure 139 | // things work. 140 | assert_eq!(SmallCString::default().0, SmallCString::new("").unwrap().0); 141 | assert_eq!(SmallCString::new("foo").unwrap().len(), 3); 142 | assert_eq!(SmallCString::new("foo").unwrap().as_bytes_with_nul(), b"foo\0"); 143 | assert_eq!(SmallCString::new("foo").unwrap().as_bytes_without_nul(), b"foo",); 144 | 145 | assert_eq!(SmallCString::new("😀").unwrap().len(), 4); 146 | assert_eq!(SmallCString::new("😀").unwrap().0.as_slice(), b"\xf0\x9f\x98\x80\0",); 147 | assert_eq!( 148 | SmallCString::new("😀").unwrap().as_bytes_without_nul(), 149 | b"\xf0\x9f\x98\x80", 150 | ); 151 | 152 | assert_eq!(SmallCString::new("").unwrap().len(), 0); 153 | assert!(SmallCString::new("").unwrap().is_empty()); 154 | 155 | assert_eq!(SmallCString::new("").unwrap().0.as_slice(), b"\0"); 156 | assert_eq!(SmallCString::new("").unwrap().as_bytes_without_nul(), b""); 157 | 158 | assert!(SmallCString::new("\0").is_err()); 159 | assert!(SmallCString::new("\0abc").is_err()); 160 | assert!(SmallCString::new("abc\0").is_err()); 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /crates/duckdb/src/vscalar/function.rs: -------------------------------------------------------------------------------- 1 | pub struct ScalarFunctionSet { 2 | ptr: duckdb_scalar_function_set, 3 | } 4 | 5 | impl ScalarFunctionSet { 6 | pub fn new(name: &str) -> Self { 7 | let c_name = CString::new(name).expect("name should contain valid utf-8"); 8 | Self { 9 | ptr: unsafe { duckdb_create_scalar_function_set(c_name.as_ptr()) }, 10 | } 11 | } 12 | 13 | pub fn add_function(&self, func: ScalarFunction) -> crate::Result<()> { 14 | unsafe { 15 | let rc = duckdb_add_scalar_function_to_set(self.ptr, func.ptr); 16 | if rc != DuckDBSuccess { 17 | return Err(Error::DuckDBFailure(ffi::Error::new(rc), None)); 18 | } 19 | } 20 | 21 | Ok(()) 22 | } 23 | 24 | pub(crate) fn register_with_connection(&self, con: duckdb_connection) -> crate::Result<()> { 25 | unsafe { 26 | let rc = ffi::duckdb_register_scalar_function_set(con, self.ptr); 27 | if rc != ffi::DuckDBSuccess { 28 | return Err(Error::DuckDBFailure(ffi::Error::new(rc), None)); 29 | } 30 | } 31 | Ok(()) 32 | } 33 | } 34 | 35 | /// A function that returns a queryable scalar function 36 | #[derive(Debug)] 37 | pub struct ScalarFunction { 38 | ptr: duckdb_scalar_function, 39 | } 40 | 41 | impl Drop for ScalarFunction { 42 | fn drop(&mut self) { 43 | unsafe { 44 | duckdb_destroy_scalar_function(&mut self.ptr); 45 | } 46 | } 47 | } 48 | 49 | use std::ffi::{c_void, CString}; 50 | 51 | use libduckdb_sys::{ 52 | self as ffi, duckdb_add_scalar_function_to_set, duckdb_connection, duckdb_create_scalar_function, 53 | duckdb_create_scalar_function_set, duckdb_data_chunk, duckdb_delete_callback_t, duckdb_destroy_scalar_function, 54 | duckdb_function_info, duckdb_scalar_function, duckdb_scalar_function_add_parameter, duckdb_scalar_function_set, 55 | duckdb_scalar_function_set_extra_info, duckdb_scalar_function_set_function, duckdb_scalar_function_set_name, 56 | duckdb_scalar_function_set_return_type, duckdb_scalar_function_set_varargs, duckdb_vector, DuckDBSuccess, 57 | }; 58 | 59 | use crate::{core::LogicalTypeHandle, Error}; 60 | 61 | impl ScalarFunction { 62 | /// Creates a new empty scalar function. 63 | pub fn new(name: impl Into) -> Result { 64 | let name: String = name.into(); 65 | let f_ptr = unsafe { duckdb_create_scalar_function() }; 66 | let c_name = CString::new(name).expect("name should contain valid utf-8"); 67 | unsafe { duckdb_scalar_function_set_name(f_ptr, c_name.as_ptr()) }; 68 | 69 | Ok(Self { ptr: f_ptr }) 70 | } 71 | 72 | /// Adds a parameter to the scalar function. 73 | /// 74 | /// # Arguments 75 | /// * `logical_type`: The type of the parameter to add. 76 | pub fn add_parameter(&self, logical_type: &LogicalTypeHandle) -> &Self { 77 | unsafe { 78 | duckdb_scalar_function_add_parameter(self.ptr, logical_type.ptr); 79 | } 80 | self 81 | } 82 | 83 | pub fn add_variadic_parameter(&self, logical_type: &LogicalTypeHandle) -> &Self { 84 | unsafe { 85 | duckdb_scalar_function_set_varargs(self.ptr, logical_type.ptr); 86 | } 87 | self 88 | } 89 | 90 | /// Sets the return type of the scalar function. 91 | /// 92 | /// # Arguments 93 | /// * `logical_type`: The return type of the scalar function. 94 | pub fn set_return_type(&self, logical_type: &LogicalTypeHandle) -> &Self { 95 | unsafe { 96 | duckdb_scalar_function_set_return_type(self.ptr, logical_type.ptr); 97 | } 98 | self 99 | } 100 | 101 | /// Sets the main function of the scalar function 102 | /// 103 | /// # Arguments 104 | /// * `function`: The function 105 | pub fn set_function( 106 | &self, 107 | func: Option, 108 | ) -> &Self { 109 | unsafe { 110 | duckdb_scalar_function_set_function(self.ptr, func); 111 | } 112 | self 113 | } 114 | 115 | /// Assigns extra information to the scalar function that can be fetched during binding, etc. 116 | /// 117 | /// # Arguments 118 | /// * `extra_info`: The extra information 119 | /// * `destroy`: The callback that will be called to destroy the bind data (if any) 120 | /// 121 | /// # Safety 122 | unsafe fn set_extra_info_impl(&self, extra_info: *mut c_void, destroy: duckdb_delete_callback_t) { 123 | duckdb_scalar_function_set_extra_info(self.ptr, extra_info, destroy); 124 | } 125 | 126 | pub fn set_extra_info(&self) -> &ScalarFunction { 127 | unsafe { 128 | let t = Box::new(T::default()); 129 | let c_void = Box::into_raw(t) as *mut c_void; 130 | self.set_extra_info_impl(c_void, Some(drop_ptr::)); 131 | } 132 | self 133 | } 134 | } 135 | 136 | unsafe extern "C" fn drop_ptr(ptr: *mut c_void) { 137 | let _ = Box::from_raw(ptr as *mut T); 138 | } 139 | -------------------------------------------------------------------------------- /crates/duckdb/src/vscalar/mod.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | 3 | use function::{ScalarFunction, ScalarFunctionSet}; 4 | use libduckdb_sys::{ 5 | duckdb_data_chunk, duckdb_function_info, duckdb_scalar_function_get_extra_info, duckdb_scalar_function_set_error, 6 | duckdb_vector, 7 | }; 8 | 9 | use crate::{ 10 | core::{DataChunkHandle, LogicalTypeHandle}, 11 | inner_connection::InnerConnection, 12 | vtab::arrow::WritableVector, 13 | Connection, 14 | }; 15 | mod function; 16 | 17 | /// The duckdb Arrow scalar function interface 18 | #[cfg(feature = "vscalar-arrow")] 19 | pub mod arrow; 20 | 21 | #[cfg(feature = "vscalar-arrow")] 22 | pub use arrow::{ArrowFunctionSignature, ArrowScalarParams, VArrowScalar}; 23 | 24 | /// Duckdb scalar function trait 25 | pub trait VScalar: Sized { 26 | /// State that persists across invocations of the scalar function (the lifetime of the connection) 27 | /// The state can be accessed by multiple threads, so it must be `Send + Sync`. 28 | type State: Default + Sized + Send + Sync; 29 | /// The actual function 30 | /// 31 | /// # Safety 32 | /// 33 | /// This function is unsafe because it: 34 | /// 35 | /// - Dereferences multiple raw pointers (`func`). 36 | /// 37 | unsafe fn invoke( 38 | state: &Self::State, 39 | input: &mut DataChunkHandle, 40 | output: &mut dyn WritableVector, 41 | ) -> Result<(), Box>; 42 | 43 | /// The possible signatures of the scalar function. 44 | /// These will result in DuckDB scalar function overloads. 45 | /// The invoke method should be able to handle all of these signatures. 46 | fn signatures() -> Vec; 47 | } 48 | 49 | /// Duckdb scalar function parameters 50 | pub enum ScalarParams { 51 | /// Exact parameters 52 | Exact(Vec), 53 | /// Variadic parameters 54 | Variadic(LogicalTypeHandle), 55 | } 56 | 57 | /// Duckdb scalar function signature 58 | pub struct ScalarFunctionSignature { 59 | parameters: Option, 60 | return_type: LogicalTypeHandle, 61 | } 62 | 63 | impl ScalarFunctionSignature { 64 | /// Create an exact function signature 65 | pub fn exact(params: Vec, return_type: LogicalTypeHandle) -> Self { 66 | ScalarFunctionSignature { 67 | parameters: Some(ScalarParams::Exact(params)), 68 | return_type, 69 | } 70 | } 71 | 72 | /// Create a variadic function signature 73 | pub fn variadic(param: LogicalTypeHandle, return_type: LogicalTypeHandle) -> Self { 74 | ScalarFunctionSignature { 75 | parameters: Some(ScalarParams::Variadic(param)), 76 | return_type, 77 | } 78 | } 79 | } 80 | 81 | impl ScalarFunctionSignature { 82 | pub(crate) fn register_with_scalar(&self, f: &ScalarFunction) { 83 | f.set_return_type(&self.return_type); 84 | 85 | match &self.parameters { 86 | Some(ScalarParams::Exact(params)) => { 87 | for param in params.iter() { 88 | f.add_parameter(param); 89 | } 90 | } 91 | Some(ScalarParams::Variadic(param)) => { 92 | f.add_variadic_parameter(param); 93 | } 94 | None => { 95 | // do nothing 96 | } 97 | } 98 | } 99 | } 100 | 101 | /// An interface to store and retrieve data during the function execution stage 102 | #[derive(Debug)] 103 | struct ScalarFunctionInfo(duckdb_function_info); 104 | 105 | impl From for ScalarFunctionInfo { 106 | fn from(ptr: duckdb_function_info) -> Self { 107 | Self(ptr) 108 | } 109 | } 110 | 111 | impl ScalarFunctionInfo { 112 | pub unsafe fn get_scalar_extra_info(&self) -> &T { 113 | &*(duckdb_scalar_function_get_extra_info(self.0).cast()) 114 | } 115 | 116 | pub unsafe fn set_error(&self, error: &str) { 117 | let c_str = CString::new(error).unwrap(); 118 | duckdb_scalar_function_set_error(self.0, c_str.as_ptr()); 119 | } 120 | } 121 | 122 | unsafe extern "C" fn scalar_func(info: duckdb_function_info, input: duckdb_data_chunk, mut output: duckdb_vector) 123 | where 124 | T: VScalar, 125 | { 126 | let info = ScalarFunctionInfo::from(info); 127 | let mut input = DataChunkHandle::new_unowned(input); 128 | let result = T::invoke(info.get_scalar_extra_info(), &mut input, &mut output); 129 | if let Err(e) = result { 130 | info.set_error(&e.to_string()); 131 | } 132 | } 133 | 134 | impl Connection { 135 | /// Register the given ScalarFunction with the current db 136 | #[inline] 137 | pub fn register_scalar_function(&self, name: &str) -> crate::Result<()> { 138 | let set = ScalarFunctionSet::new(name); 139 | for signature in S::signatures() { 140 | let scalar_function = ScalarFunction::new(name)?; 141 | signature.register_with_scalar(&scalar_function); 142 | scalar_function.set_function(Some(scalar_func::)); 143 | scalar_function.set_extra_info::(); 144 | set.add_function(scalar_function)?; 145 | } 146 | self.db.borrow_mut().register_scalar_function_set(set) 147 | } 148 | } 149 | 150 | impl InnerConnection { 151 | /// Register the given ScalarFunction with the current db 152 | pub fn register_scalar_function_set(&mut self, f: ScalarFunctionSet) -> crate::Result<()> { 153 | f.register_with_connection(self.con) 154 | } 155 | } 156 | 157 | #[cfg(test)] 158 | mod test { 159 | use std::error::Error; 160 | 161 | use arrow::array::Array; 162 | use libduckdb_sys::duckdb_string_t; 163 | 164 | use crate::{ 165 | core::{DataChunkHandle, Inserter, LogicalTypeHandle, LogicalTypeId}, 166 | types::DuckString, 167 | vtab::arrow::WritableVector, 168 | Connection, 169 | }; 170 | 171 | use super::{ScalarFunctionSignature, VScalar}; 172 | 173 | struct ErrorScalar {} 174 | 175 | impl VScalar for ErrorScalar { 176 | type State = (); 177 | 178 | unsafe fn invoke( 179 | _: &Self::State, 180 | input: &mut DataChunkHandle, 181 | _: &mut dyn WritableVector, 182 | ) -> Result<(), Box> { 183 | let mut msg = input.flat_vector(0).as_slice_with_len::(input.len())[0]; 184 | let string = DuckString::new(&mut msg).as_str(); 185 | Err(format!("Error: {}", string).into()) 186 | } 187 | 188 | fn signatures() -> Vec { 189 | vec![ScalarFunctionSignature::exact( 190 | vec![LogicalTypeId::Varchar.into()], 191 | LogicalTypeId::Varchar.into(), 192 | )] 193 | } 194 | } 195 | 196 | #[derive(Debug)] 197 | struct TestState { 198 | #[allow(dead_code)] 199 | inner: i32, 200 | } 201 | 202 | impl Default for TestState { 203 | fn default() -> Self { 204 | TestState { inner: 42 } 205 | } 206 | } 207 | 208 | struct EchoScalar {} 209 | 210 | impl VScalar for EchoScalar { 211 | type State = TestState; 212 | 213 | unsafe fn invoke( 214 | s: &Self::State, 215 | input: &mut DataChunkHandle, 216 | output: &mut dyn WritableVector, 217 | ) -> Result<(), Box> { 218 | assert_eq!(s.inner, 42); 219 | let values = input.flat_vector(0); 220 | let values = values.as_slice_with_len::(input.len()); 221 | let strings = values 222 | .iter() 223 | .map(|ptr| DuckString::new(&mut { *ptr }).as_str().to_string()) 224 | .take(input.len()); 225 | let output = output.flat_vector(); 226 | for s in strings { 227 | output.insert(0, s.to_string().as_str()); 228 | } 229 | Ok(()) 230 | } 231 | 232 | fn signatures() -> Vec { 233 | vec![ScalarFunctionSignature::exact( 234 | vec![LogicalTypeId::Varchar.into()], 235 | LogicalTypeId::Varchar.into(), 236 | )] 237 | } 238 | } 239 | 240 | struct Repeat {} 241 | 242 | impl VScalar for Repeat { 243 | type State = (); 244 | 245 | unsafe fn invoke( 246 | _: &Self::State, 247 | input: &mut DataChunkHandle, 248 | output: &mut dyn WritableVector, 249 | ) -> Result<(), Box> { 250 | let output = output.flat_vector(); 251 | let counts = input.flat_vector(1); 252 | let values = input.flat_vector(0); 253 | let values = values.as_slice_with_len::(input.len()); 254 | let strings = values 255 | .iter() 256 | .map(|ptr| DuckString::new(&mut { *ptr }).as_str().to_string()); 257 | let counts = counts.as_slice_with_len::(input.len()); 258 | for (count, value) in counts.iter().zip(strings).take(input.len()) { 259 | output.insert(0, value.repeat((*count) as usize).as_str()); 260 | } 261 | 262 | Ok(()) 263 | } 264 | 265 | fn signatures() -> Vec { 266 | vec![ScalarFunctionSignature::exact( 267 | vec![ 268 | LogicalTypeHandle::from(LogicalTypeId::Varchar), 269 | LogicalTypeHandle::from(LogicalTypeId::Integer), 270 | ], 271 | LogicalTypeHandle::from(LogicalTypeId::Varchar), 272 | )] 273 | } 274 | } 275 | 276 | #[test] 277 | fn test_scalar() -> Result<(), Box> { 278 | let conn = Connection::open_in_memory()?; 279 | conn.register_scalar_function::("echo")?; 280 | 281 | let mut stmt = conn.prepare("select echo('hi') as hello")?; 282 | let mut rows = stmt.query([])?; 283 | 284 | while let Some(row) = rows.next()? { 285 | let hello: String = row.get(0)?; 286 | assert_eq!(hello, "hi"); 287 | } 288 | 289 | Ok(()) 290 | } 291 | 292 | #[test] 293 | fn test_scalar_error() -> Result<(), Box> { 294 | let conn = Connection::open_in_memory()?; 295 | conn.register_scalar_function::("error_udf")?; 296 | 297 | let mut stmt = conn.prepare("select error_udf('blurg') as hello")?; 298 | if let Err(err) = stmt.query([]) { 299 | assert!(err.to_string().contains("Error: blurg")); 300 | } else { 301 | panic!("Expected an error"); 302 | } 303 | 304 | Ok(()) 305 | } 306 | 307 | #[test] 308 | fn test_repeat_scalar() -> Result<(), Box> { 309 | let conn = Connection::open_in_memory()?; 310 | conn.register_scalar_function::("nobie_repeat")?; 311 | 312 | let batches = conn 313 | .prepare("select nobie_repeat('Ho ho ho 🎅🎄', 3) as message from range(5)")? 314 | .query_arrow([])? 315 | .collect::>(); 316 | 317 | for batch in batches.iter() { 318 | let array = batch.column(0); 319 | let array = array.as_any().downcast_ref::<::arrow::array::StringArray>().unwrap(); 320 | for i in 0..array.len() { 321 | assert_eq!(array.value(i), "Ho ho ho 🎅🎄Ho ho ho 🎅🎄Ho ho ho 🎅🎄"); 322 | } 323 | } 324 | 325 | Ok(()) 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /crates/duckdb/src/vtab/excel.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{self, AtomicUsize}; 2 | 3 | use super::{BindInfo, DataChunkHandle, InitInfo, LogicalTypeHandle, TableFunctionInfo, VTab}; 4 | use crate::core::{Inserter, LogicalTypeId}; 5 | use calamine::{open_workbook_auto, DataType, Range, Reader}; 6 | 7 | #[repr(C)] 8 | struct ExcelBindData { 9 | range: Range, 10 | width: usize, 11 | height: usize, 12 | } 13 | 14 | #[repr(C)] 15 | struct ExcelInitData { 16 | start: AtomicUsize, 17 | } 18 | 19 | struct ExcelVTab; 20 | 21 | impl VTab for ExcelVTab { 22 | type BindData = ExcelBindData; 23 | type InitData = ExcelInitData; 24 | 25 | fn bind(bind: &BindInfo) -> Result> { 26 | let param_count = bind.get_parameter_count(); 27 | assert!(param_count == 2); 28 | let path = bind.get_parameter(0).to_string(); 29 | let sheet = bind.get_parameter(1).to_string(); 30 | // let sheet = if param_count > 1 { 31 | // bind.get_parameter(1).to_string() 32 | // } else { 33 | // workbook.sheet_names()[0].to_owned() 34 | // }; 35 | let mut workbook = open_workbook_auto(path)?; 36 | let range = workbook 37 | .worksheet_range(&sheet) 38 | .unwrap_or_else(|| panic!("Can't find sheet: {} ?", sheet))?; 39 | let _column_count = range.get_size().1; 40 | let mut rows = range.rows(); 41 | let header = rows.next().unwrap(); 42 | for data in rows.by_ref() { 43 | // find the first row with no empty cell 44 | let mut found = true; 45 | for cell in data.iter() { 46 | match cell { 47 | DataType::Error(_) | DataType::Empty => { 48 | found = false; 49 | break; 50 | } 51 | _ => {} 52 | } 53 | } 54 | if !found { 55 | continue; 56 | } 57 | 58 | // use the first row as data type 59 | for (idx, cell) in data.iter().enumerate() { 60 | match cell { 61 | DataType::String(_) => { 62 | bind.add_result_column( 63 | header[idx] 64 | .get_string() 65 | .unwrap_or_else(|| panic!("idx {} header empty?", idx)), 66 | LogicalTypeHandle::from(LogicalTypeId::Varchar), 67 | ); 68 | } 69 | DataType::Float(_) => { 70 | bind.add_result_column( 71 | header[idx] 72 | .get_string() 73 | .unwrap_or_else(|| panic!("idx {} header empty?", idx)), 74 | LogicalTypeHandle::from(LogicalTypeId::Double), 75 | ); 76 | } 77 | DataType::Int(_) => { 78 | bind.add_result_column( 79 | header[idx] 80 | .get_string() 81 | .unwrap_or_else(|| panic!("idx {} header empty?", idx)), 82 | LogicalTypeHandle::from(LogicalTypeId::Bigint), 83 | ); 84 | } 85 | DataType::Bool(_) => { 86 | bind.add_result_column( 87 | header[idx] 88 | .get_string() 89 | .unwrap_or_else(|| panic!("idx {} header empty?", idx)), 90 | LogicalTypeHandle::from(LogicalTypeId::Boolean), 91 | ); 92 | } 93 | DataType::DateTime(_) => { 94 | bind.add_result_column( 95 | header[idx] 96 | .get_string() 97 | .unwrap_or_else(|| panic!("idx {} header empty?", idx)), 98 | LogicalTypeHandle::from(LogicalTypeId::Date), 99 | ); 100 | } 101 | _ => { 102 | panic!("Shouldn't happen"); 103 | } 104 | } 105 | } 106 | break; 107 | } 108 | 109 | let width = range.get_size().1; 110 | let height = range.get_size().0; 111 | Ok(ExcelBindData { range, width, height }) 112 | } 113 | 114 | fn init(_: &InitInfo) -> Result> { 115 | Ok(ExcelInitData { start: 1.into() }) 116 | } 117 | 118 | fn func(func: &TableFunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { 119 | let init_info = func.get_init_data(); 120 | let bind_info = func.get_bind_data(); 121 | 122 | let start = init_info.start.load(atomic::Ordering::Relaxed); 123 | if start >= bind_info.height { 124 | output.set_len(0); 125 | } else { 126 | let range = &bind_info.range; 127 | let height = std::cmp::min(output.flat_vector(0).capacity(), bind_info.height - start); 128 | for i in 0..bind_info.width { 129 | let mut vector = output.flat_vector(i); 130 | for j in 0..height { 131 | let cell = range.get((start + j, i)); 132 | if cell.is_none() { 133 | continue; 134 | } 135 | match cell.unwrap() { 136 | DataType::String(s) => { 137 | vector.insert(j, s.as_str()); 138 | } 139 | DataType::Float(f) => { 140 | vector.as_mut_slice::()[j] = *f; 141 | } 142 | DataType::Int(ii) => { 143 | vector.as_mut_slice::()[j] = *ii; 144 | } 145 | DataType::Bool(b) => { 146 | vector.as_mut_slice::()[j] = *b; 147 | } 148 | DataType::DateTime(d) => { 149 | vector.as_mut_slice::()[j] = d.round() as i32 - 25569; 150 | } 151 | _ => { 152 | vector.set_null(j); 153 | } 154 | } 155 | } 156 | } 157 | 158 | init_info.start.fetch_add(height, atomic::Ordering::Relaxed); 159 | output.set_len(height); 160 | } 161 | Ok(()) 162 | } 163 | 164 | fn parameters() -> Option> { 165 | Some(vec![ 166 | LogicalTypeHandle::from(LogicalTypeId::Varchar), // file path 167 | LogicalTypeHandle::from(LogicalTypeId::Varchar), // sheet name 168 | ]) 169 | } 170 | } 171 | 172 | #[cfg(test)] 173 | mod test { 174 | use crate::{vtab::excel::ExcelVTab, Connection, Result}; 175 | use arrow::array::{Array, Date32Array, Float64Array, StringArray}; 176 | use std::error::Error; 177 | 178 | #[test] 179 | fn test_excel() -> Result<(), Box> { 180 | let db = Connection::open_in_memory()?; 181 | db.register_table_function::("excel")?; 182 | 183 | let val = db 184 | .prepare("select count(*) from excel('./examples/Movies_Social_metadata.xlsx', 'Data')")? 185 | .query_row::([], |row| row.get(0))?; 186 | assert_eq!(3039, val); 187 | let mut stmt = db.prepare("select genres, sum(movie_facebook_likes) from excel('./examples/Movies_Social_metadata.xlsx', 'Data') group by genres order by genres limit 4")?; 188 | // +-------------+---------------------------+ 189 | // | genres | sum(movie_facebook_likes) | 190 | // +-------------+---------------------------+ 191 | // | Action | 9773520.0 | 192 | // | Adventure | 4355937.0 | 193 | // | Animation | 202219.0 | 194 | // | Biography | 1724632.0 | 195 | // +-------------+---------------------------+ 196 | let mut arr = stmt.query_arrow([])?; 197 | let rb = arr.next().expect("no record batch"); 198 | assert_eq!(rb.num_rows(), 4); 199 | let column = rb.column(0).as_any().downcast_ref::().unwrap(); 200 | assert_eq!(column.len(), 4); 201 | assert_eq!(column.value(0), "Action"); 202 | assert_eq!(column.value(1), "Adventure"); 203 | assert_eq!(column.value(2), "Animation"); 204 | assert_eq!(column.value(3), "Biography"); 205 | let column = rb.column(1).as_any().downcast_ref::().unwrap(); 206 | assert_eq!(column.len(), 4); 207 | assert_eq!(column.value(0), 9773520.0); 208 | assert_eq!(column.value(1), 4355937.0); 209 | assert_eq!(column.value(2), 202219.0); 210 | assert_eq!(column.value(3), 1724632.0); 211 | assert!(arr.next().is_none()); 212 | 213 | Ok(()) 214 | } 215 | 216 | #[test] 217 | fn test_excel_date() -> Result<(), Box> { 218 | let db = Connection::open_in_memory()?; 219 | db.register_table_function::("excel")?; 220 | let mut stmt = db.prepare("select * from excel('./examples/date.xlsx', 'Sheet1')")?; 221 | let mut arr = stmt.query_arrow([])?; 222 | let rb = arr.next().expect("no record batch"); 223 | let column = rb.column(0).as_any().downcast_ref::().unwrap(); 224 | assert_eq!(column.len(), 2); 225 | assert_eq!(column.value_as_date(0).unwrap().to_string(), "2021-01-01"); 226 | assert_eq!(column.value_as_date(1).unwrap().to_string(), "2021-01-02"); 227 | let column = rb.column(1).as_any().downcast_ref::().unwrap(); 228 | assert_eq!(column.len(), 2); 229 | assert_eq!(column.value(0), 15.0); 230 | assert_eq!(column.value(1), 16.0); 231 | assert!(arr.next().is_none()); 232 | 233 | Ok(()) 234 | } 235 | 236 | #[test] 237 | fn test_excel_with_empty_row() -> Result<(), Box> { 238 | let db = Connection::open_in_memory()?; 239 | db.register_table_function::("excel")?; 240 | 241 | // use arrow::record_batch::RecordBatch; 242 | // use arrow::util::pretty::print_batches; 243 | // let val: Vec = db.prepare("select * from excel('./examples/date.xlsx', 'Sheet2')")?.query_arrow([])?.collect(); 244 | // print_batches(&val)?; 245 | 246 | let mut stmt = db.prepare("select * from excel('./examples/date.xlsx', 'Sheet2')")?; 247 | let mut arr = stmt.query_arrow([])?; 248 | let rb = arr.next().expect("no record batch"); 249 | let column = rb.column(0).as_any().downcast_ref::().unwrap(); 250 | assert_eq!(column.len(), 3); 251 | assert!(column.is_null(0)); 252 | assert_eq!(column.value_as_date(1).unwrap().to_string(), "2021-01-01"); 253 | assert_eq!(column.value_as_date(2).unwrap().to_string(), "2021-01-02"); 254 | let column = rb.column(1).as_any().downcast_ref::().unwrap(); 255 | assert_eq!(column.len(), 3); 256 | assert!(column.is_null(0)); 257 | assert_eq!(column.value(1), 15.0); 258 | assert_eq!(column.value(2), 16.0); 259 | assert!(arr.next().is_none()); 260 | Ok(()) 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /crates/duckdb/src/vtab/mod.rs: -------------------------------------------------------------------------------- 1 | // #![warn(unsafe_op_in_unsafe_fn)] 2 | 3 | use std::ffi::c_void; 4 | 5 | use crate::{error::Error, inner_connection::InnerConnection, Connection, Result}; 6 | 7 | use super::ffi; 8 | 9 | mod function; 10 | mod value; 11 | 12 | /// The duckdb Arrow table function interface 13 | #[cfg(feature = "vtab-arrow")] 14 | pub mod arrow; 15 | #[cfg(feature = "vtab-arrow")] 16 | pub use self::arrow::{ 17 | arrow_arraydata_to_query_params, arrow_ffi_to_query_params, arrow_recordbatch_to_query_params, 18 | record_batch_to_duckdb_data_chunk, to_duckdb_logical_type, to_duckdb_type_id, 19 | }; 20 | #[cfg(feature = "vtab-excel")] 21 | mod excel; 22 | 23 | pub use function::{BindInfo, InitInfo, TableFunction, TableFunctionInfo}; 24 | pub use value::Value; 25 | 26 | use crate::core::{DataChunkHandle, LogicalTypeHandle}; 27 | use ffi::{duckdb_bind_info, duckdb_data_chunk, duckdb_function_info, duckdb_init_info}; 28 | 29 | /// Given a raw pointer to a box, free the box and the data contained within it. 30 | /// 31 | /// # Safety 32 | /// The pointer must be a valid pointer to a `Box` created by `Box::into_raw`. 33 | unsafe extern "C" fn drop_boxed(v: *mut c_void) { 34 | drop(unsafe { Box::from_raw(v.cast::()) }); 35 | } 36 | 37 | /// Duckdb table function trait 38 | /// 39 | /// See to the HelloVTab example for more details 40 | /// 41 | pub trait VTab: Sized { 42 | /// The data type of the init data. 43 | /// 44 | /// The init data tracks the state of the table function and is global across threads. 45 | /// 46 | /// The init data is shared across threads so must be `Send + Sync`. 47 | type InitData: Sized + Send + Sync; 48 | 49 | /// The data type of the bind data. 50 | /// 51 | /// The bind data is shared across threads so must be `Send + Sync`. 52 | type BindData: Sized + Send + Sync; 53 | 54 | /// Bind data to the table function 55 | /// 56 | /// This function is used for determining the return type of a table producing function and returning bind data 57 | fn bind(bind: &BindInfo) -> Result>; 58 | 59 | /// Initialize the table function 60 | fn init(init: &InitInfo) -> Result>; 61 | 62 | /// Generate rows from the table function. 63 | /// 64 | /// The implementation should populate the `output` parameter with the rows to be returned. 65 | /// 66 | /// When the table function is done, the implementation should set the length of the output to 0. 67 | fn func(func: &TableFunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box>; 68 | 69 | /// Does the table function support pushdown 70 | /// default is false 71 | fn supports_pushdown() -> bool { 72 | false 73 | } 74 | /// The parameters of the table function 75 | /// default is None 76 | fn parameters() -> Option> { 77 | None 78 | } 79 | /// The named parameters of the table function 80 | /// default is None 81 | fn named_parameters() -> Option> { 82 | None 83 | } 84 | } 85 | 86 | unsafe extern "C" fn func(info: duckdb_function_info, output: duckdb_data_chunk) 87 | where 88 | T: VTab, 89 | { 90 | let info = TableFunctionInfo::::from(info); 91 | let mut data_chunk_handle = DataChunkHandle::new_unowned(output); 92 | let result = T::func(&info, &mut data_chunk_handle); 93 | if result.is_err() { 94 | info.set_error(&result.err().unwrap().to_string()); 95 | } 96 | } 97 | 98 | unsafe extern "C" fn init(info: duckdb_init_info) 99 | where 100 | T: VTab, 101 | { 102 | let info = InitInfo::from(info); 103 | match T::init(&info) { 104 | Ok(init_data) => { 105 | info.set_init_data( 106 | Box::into_raw(Box::new(init_data)) as *mut c_void, 107 | Some(drop_boxed::), 108 | ); 109 | } 110 | Err(e) => { 111 | info.set_error(&e.to_string()); 112 | } 113 | } 114 | } 115 | 116 | unsafe extern "C" fn bind(info: duckdb_bind_info) 117 | where 118 | T: VTab, 119 | { 120 | let info = BindInfo::from(info); 121 | match T::bind(&info) { 122 | Ok(bind_data) => { 123 | info.set_bind_data( 124 | Box::into_raw(Box::new(bind_data)) as *mut c_void, 125 | Some(drop_boxed::), 126 | ); 127 | } 128 | Err(e) => { 129 | info.set_error(&e.to_string()); 130 | } 131 | } 132 | } 133 | 134 | impl Connection { 135 | /// Register the given TableFunction with the current db 136 | #[inline] 137 | pub fn register_table_function(&self, name: &str) -> Result<()> { 138 | let table_function = TableFunction::default(); 139 | table_function 140 | .set_name(name) 141 | .supports_pushdown(T::supports_pushdown()) 142 | .set_bind(Some(bind::)) 143 | .set_init(Some(init::)) 144 | .set_function(Some(func::)); 145 | for ty in T::parameters().unwrap_or_default() { 146 | table_function.add_parameter(&ty); 147 | } 148 | for (name, ty) in T::named_parameters().unwrap_or_default() { 149 | table_function.add_named_parameter(&name, &ty); 150 | } 151 | self.db.borrow_mut().register_table_function(table_function) 152 | } 153 | } 154 | 155 | impl InnerConnection { 156 | /// Register the given TableFunction with the current db 157 | pub fn register_table_function(&mut self, table_function: TableFunction) -> Result<()> { 158 | unsafe { 159 | let rc = ffi::duckdb_register_table_function(self.con, table_function.ptr); 160 | if rc != ffi::DuckDBSuccess { 161 | return Err(Error::DuckDBFailure(ffi::Error::new(rc), None)); 162 | } 163 | } 164 | Ok(()) 165 | } 166 | } 167 | 168 | #[cfg(test)] 169 | mod test { 170 | use super::*; 171 | use crate::core::{Inserter, LogicalTypeId}; 172 | use std::{ 173 | error::Error, 174 | ffi::CString, 175 | sync::atomic::{AtomicBool, Ordering}, 176 | }; 177 | 178 | struct HelloBindData { 179 | name: String, 180 | } 181 | 182 | struct HelloInitData { 183 | done: AtomicBool, 184 | } 185 | 186 | struct HelloVTab; 187 | 188 | impl VTab for HelloVTab { 189 | type InitData = HelloInitData; 190 | type BindData = HelloBindData; 191 | 192 | fn bind(bind: &BindInfo) -> Result> { 193 | bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar)); 194 | let name = bind.get_parameter(0).to_string(); 195 | Ok(HelloBindData { name }) 196 | } 197 | 198 | fn init(_: &InitInfo) -> Result> { 199 | Ok(HelloInitData { 200 | done: AtomicBool::new(false), 201 | }) 202 | } 203 | 204 | fn func( 205 | func: &TableFunctionInfo, 206 | output: &mut DataChunkHandle, 207 | ) -> Result<(), Box> { 208 | let init_data = func.get_init_data(); 209 | let bind_data = func.get_bind_data(); 210 | 211 | if init_data.done.swap(true, Ordering::Relaxed) { 212 | output.set_len(0); 213 | } else { 214 | let vector = output.flat_vector(0); 215 | let result = CString::new(format!("Hello {}", bind_data.name))?; 216 | vector.insert(0, result); 217 | output.set_len(1); 218 | } 219 | Ok(()) 220 | } 221 | 222 | fn parameters() -> Option> { 223 | Some(vec![LogicalTypeHandle::from(LogicalTypeId::Varchar)]) 224 | } 225 | } 226 | 227 | struct HelloWithNamedVTab {} 228 | impl VTab for HelloWithNamedVTab { 229 | type InitData = HelloInitData; 230 | type BindData = HelloBindData; 231 | 232 | fn bind(bind: &BindInfo) -> Result> { 233 | bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar)); 234 | let name = bind.get_named_parameter("name").unwrap().to_string(); 235 | assert!(bind.get_named_parameter("unknown_name").is_none()); 236 | Ok(HelloBindData { name }) 237 | } 238 | 239 | fn init(init_info: &InitInfo) -> Result> { 240 | HelloVTab::init(init_info) 241 | } 242 | 243 | fn func(func: &TableFunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { 244 | let init_data = func.get_init_data(); 245 | let bind_data = func.get_bind_data(); 246 | 247 | if init_data.done.swap(true, Ordering::Relaxed) { 248 | output.set_len(0); 249 | } else { 250 | let vector = output.flat_vector(0); 251 | let result = CString::new(format!("Hello {}", bind_data.name))?; 252 | vector.insert(0, result); 253 | output.set_len(1); 254 | } 255 | Ok(()) 256 | } 257 | 258 | fn named_parameters() -> Option> { 259 | Some(vec![( 260 | "name".to_string(), 261 | LogicalTypeHandle::from(LogicalTypeId::Varchar), 262 | )]) 263 | } 264 | } 265 | 266 | #[test] 267 | fn test_table_function() -> Result<(), Box> { 268 | let conn = Connection::open_in_memory()?; 269 | conn.register_table_function::("hello")?; 270 | 271 | let val = conn.query_row("select * from hello('duckdb')", [], |row| <(String,)>::try_from(row))?; 272 | assert_eq!(val, ("Hello duckdb".to_string(),)); 273 | 274 | Ok(()) 275 | } 276 | 277 | #[test] 278 | fn test_named_table_function() -> Result<(), Box> { 279 | let conn = Connection::open_in_memory()?; 280 | conn.register_table_function::("hello_named")?; 281 | 282 | let val = conn.query_row("select * from hello_named(name = 'duckdb')", [], |row| { 283 | <(String,)>::try_from(row) 284 | })?; 285 | assert_eq!(val, ("Hello duckdb".to_string(),)); 286 | 287 | Ok(()) 288 | } 289 | 290 | #[cfg(feature = "vtab-loadable")] 291 | use duckdb_loadable_macros::duckdb_entrypoint; 292 | 293 | // this function is never called, but is still type checked 294 | // Exposes a extern C function named "libhello_ext_init" in the compiled dynamic library, 295 | // the "entrypoint" that duckdb will use to load the extension. 296 | #[cfg(feature = "vtab-loadable")] 297 | #[duckdb_entrypoint] 298 | fn libhello_ext_init(conn: Connection) -> Result<(), Box> { 299 | conn.register_table_function::("hello")?; 300 | Ok(()) 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /crates/duckdb/src/vtab/value.rs: -------------------------------------------------------------------------------- 1 | use crate::ffi::{duckdb_destroy_value, duckdb_get_int64, duckdb_get_varchar, duckdb_value}; 2 | use std::{ffi::CString, fmt}; 3 | 4 | /// The Value object holds a single arbitrary value of any type that can be 5 | /// stored in the database. 6 | #[derive(Debug)] 7 | pub struct Value { 8 | pub(crate) ptr: duckdb_value, 9 | } 10 | 11 | impl From for Value { 12 | fn from(ptr: duckdb_value) -> Self { 13 | Self { ptr } 14 | } 15 | } 16 | 17 | impl Drop for Value { 18 | fn drop(&mut self) { 19 | if !self.ptr.is_null() { 20 | unsafe { 21 | duckdb_destroy_value(&mut self.ptr); 22 | } 23 | } 24 | self.ptr = std::ptr::null_mut(); 25 | } 26 | } 27 | 28 | impl Value { 29 | /// Returns the value as a int64 30 | pub fn to_int64(&self) -> i64 { 31 | unsafe { duckdb_get_int64(self.ptr) } 32 | } 33 | } 34 | 35 | impl fmt::Display for Value { 36 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 37 | let c_string = unsafe { CString::from_raw(duckdb_get_varchar(self.ptr)) }; 38 | write!(f, "{}", c_string.to_str().unwrap()) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libduckdb-sys" 3 | version = "1.3.0" 4 | authors.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | homepage.workspace = true 9 | keywords.workspace = true 10 | readme.workspace = true 11 | build = "build.rs" 12 | categories = ["external-ffi-bindings", "database"] 13 | description = "Native bindings to the libduckdb library, C API" 14 | exclude = ["duckdb-sources"] 15 | 16 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 17 | 18 | [features] 19 | default = ["vcpkg", "pkg-config"] 20 | bundled = ["cc"] 21 | buildtime_bindgen = ["bindgen", "pkg-config", "vcpkg"] 22 | json = ["bundled"] 23 | parquet = ["bundled"] 24 | extensions-full = ["json", "parquet"] 25 | winduckdb = [] 26 | # Warning: experimental feature 27 | loadable-extension = ["prettyplease", "quote", "syn"] 28 | 29 | [dependencies] 30 | 31 | [build-dependencies] 32 | autocfg = { workspace = true } 33 | bindgen = { workspace = true, features = ["runtime"], optional = true } 34 | flate2 = { workspace = true } 35 | pkg-config = { workspace = true, optional = true } 36 | cc = { workspace = true, features = ["parallel"], optional = true } 37 | vcpkg = { workspace = true, optional = true } 38 | serde = { workspace = true, features = ["derive"] } 39 | serde_json = { workspace = true } 40 | tar = { workspace = true } 41 | syn = { workspace = true, optional = true } 42 | quote = { workspace = true, optional = true } 43 | prettyplease = { workspace = true, optional = true } 44 | 45 | [dev-dependencies] 46 | arrow = { workspace = true, features = ["ffi"] } 47 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/libduckdb-sys/README.md: -------------------------------------------------------------------------------- 1 | ../../README.md -------------------------------------------------------------------------------- /crates/libduckdb-sys/duckdb.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-rs/f631fc6f004d478229cf43e58756b12a6d6899cb/crates/libduckdb-sys/duckdb.tar.gz -------------------------------------------------------------------------------- /crates/libduckdb-sys/openssl/cfgs.rs: -------------------------------------------------------------------------------- 1 | pub fn get(openssl_version: Option, libressl_version: Option) -> Vec<&'static str> { 2 | let mut cfgs = vec![]; 3 | 4 | if let Some(libressl_version) = libressl_version { 5 | cfgs.push("libressl"); 6 | 7 | if libressl_version >= 0x2_05_01_00_0 { 8 | cfgs.push("libressl251"); 9 | } 10 | if libressl_version >= 0x2_05_02_00_0 { 11 | cfgs.push("libressl252"); 12 | } 13 | if libressl_version >= 0x2_06_01_00_0 { 14 | cfgs.push("libressl261"); 15 | } 16 | if libressl_version >= 0x2_07_00_00_0 { 17 | cfgs.push("libressl270"); 18 | } 19 | if libressl_version >= 0x2_07_01_00_0 { 20 | cfgs.push("libressl271"); 21 | } 22 | if libressl_version >= 0x2_07_03_00_0 { 23 | cfgs.push("libressl273"); 24 | } 25 | if libressl_version >= 0x2_08_00_00_0 { 26 | cfgs.push("libressl280"); 27 | } 28 | if libressl_version >= 0x2_08_01_00_0 { 29 | cfgs.push("libressl281"); 30 | } 31 | if libressl_version >= 0x2_09_01_00_0 { 32 | cfgs.push("libressl291"); 33 | } 34 | if libressl_version >= 0x3_02_01_00_0 { 35 | cfgs.push("libressl321"); 36 | } 37 | if libressl_version >= 0x3_03_02_00_0 { 38 | cfgs.push("libressl332"); 39 | } 40 | if libressl_version >= 0x3_04_00_00_0 { 41 | cfgs.push("libressl340"); 42 | } 43 | if libressl_version >= 0x3_05_00_00_0 { 44 | cfgs.push("libressl350"); 45 | } 46 | if libressl_version >= 0x3_06_00_00_0 { 47 | cfgs.push("libressl360"); 48 | } 49 | if libressl_version >= 0x3_07_00_00_0 { 50 | cfgs.push("libressl370"); 51 | } 52 | } else { 53 | let openssl_version = openssl_version.unwrap(); 54 | 55 | if openssl_version >= 0x3_00_00_00_0 { 56 | cfgs.push("ossl300"); 57 | } 58 | if openssl_version >= 0x1_00_01_00_0 { 59 | cfgs.push("ossl101"); 60 | } 61 | if openssl_version >= 0x1_00_02_00_0 { 62 | cfgs.push("ossl102"); 63 | } 64 | if openssl_version >= 0x1_00_02_06_0 { 65 | cfgs.push("ossl102f"); 66 | } 67 | if openssl_version >= 0x1_00_02_08_0 { 68 | cfgs.push("ossl102h"); 69 | } 70 | if openssl_version >= 0x1_01_00_00_0 { 71 | cfgs.push("ossl110"); 72 | } 73 | if openssl_version >= 0x1_01_00_06_0 { 74 | cfgs.push("ossl110f"); 75 | } 76 | if openssl_version >= 0x1_01_00_07_0 { 77 | cfgs.push("ossl110g"); 78 | } 79 | if openssl_version >= 0x1_01_00_08_0 { 80 | cfgs.push("ossl110h"); 81 | } 82 | if openssl_version >= 0x1_01_01_00_0 { 83 | cfgs.push("ossl111"); 84 | } 85 | if openssl_version >= 0x1_01_01_02_0 { 86 | cfgs.push("ossl111b"); 87 | } 88 | if openssl_version >= 0x1_01_01_03_0 { 89 | cfgs.push("ossl111c"); 90 | } 91 | } 92 | 93 | cfgs 94 | } 95 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/openssl/expando.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define VERSION2(n, v) RUST_VERSION_##n##_##v 5 | #define VERSION(n, v) VERSION2(n, v) 6 | 7 | #define NEW_VERSION2(a, b, c) RUST_VERSION_NEW_OPENSSL_##a##_##b##_##c 8 | #define NEW_VERSION(a, b, c) NEW_VERSION2(a, b, c) 9 | 10 | #ifdef LIBRESSL_VERSION_NUMBER 11 | VERSION(LIBRESSL, LIBRESSL_VERSION_NUMBER) 12 | #elif defined OPENSSL_VERSION_MAJOR 13 | NEW_VERSION(OPENSSL_VERSION_MAJOR, OPENSSL_VERSION_MINOR, OPENSSL_VERSION_PATCH) 14 | #else 15 | VERSION(OPENSSL, OPENSSL_VERSION_NUMBER) 16 | #endif 17 | 18 | #ifdef OPENSSL_IS_BORINGSSL 19 | RUST_OPENSSL_IS_BORINGSSL 20 | #endif 21 | 22 | #ifdef OPENSSL_NO_BF 23 | RUST_CONF_OPENSSL_NO_BF 24 | #endif 25 | 26 | #ifdef OPENSSL_NO_BUF_FREELISTS 27 | RUST_CONF_OPENSSL_NO_BUF_FREELISTS 28 | #endif 29 | 30 | #ifdef OPENSSL_NO_CHACHA 31 | RUST_CONF_OPENSSL_NO_CHACHA 32 | #endif 33 | 34 | #ifdef OPENSSL_NO_IDEA 35 | RUST_CONF_OPENSSL_NO_IDEA 36 | #endif 37 | 38 | #ifdef OPENSSL_NO_CAMELLIA 39 | RUST_CONF_OPENSSL_NO_CAMELLIA 40 | #endif 41 | 42 | #ifdef OPENSSL_NO_CAST 43 | RUST_CONF_OPENSSL_NO_CAST 44 | #endif 45 | 46 | #ifdef OPENSSL_NO_CMS 47 | RUST_CONF_OPENSSL_NO_CMS 48 | #endif 49 | 50 | #ifdef OPENSSL_NO_COMP 51 | RUST_CONF_OPENSSL_NO_COMP 52 | #endif 53 | 54 | #ifdef OPENSSL_NO_EC 55 | RUST_CONF_OPENSSL_NO_EC 56 | #endif 57 | 58 | #ifdef OPENSSL_NO_EC2M 59 | RUST_CONF_OPENSSL_NO_EC2M 60 | #endif 61 | 62 | #ifdef OPENSSL_NO_ENGINE 63 | RUST_CONF_OPENSSL_NO_ENGINE 64 | #endif 65 | 66 | #ifdef OPENSSL_NO_KRB5 67 | RUST_CONF_OPENSSL_NO_KRB5 68 | #endif 69 | 70 | #ifdef OPENSSL_NO_NEXTPROTONEG 71 | RUST_CONF_OPENSSL_NO_NEXTPROTONEG 72 | #endif 73 | 74 | #ifdef OPENSSL_NO_OCSP 75 | RUST_CONF_OPENSSL_NO_OCSP 76 | #endif 77 | 78 | #ifdef OPENSSL_NO_PSK 79 | RUST_CONF_OPENSSL_NO_PSK 80 | #endif 81 | 82 | #ifdef OPENSSL_NO_RFC3779 83 | RUST_CONF_OPENSSL_NO_RFC3779 84 | #endif 85 | 86 | #ifdef OPENSSL_NO_RMD160 87 | RUST_CONF_OPENSSL_NO_RMD160 88 | #endif 89 | 90 | #ifdef OPENSSL_NO_SHA 91 | RUST_CONF_OPENSSL_NO_SHA 92 | #endif 93 | 94 | #ifdef OPENSSL_NO_SRP 95 | RUST_CONF_OPENSSL_NO_SRP 96 | #endif 97 | 98 | #ifdef OPENSSL_NO_SSL3_METHOD 99 | RUST_CONF_OPENSSL_NO_SSL3_METHOD 100 | #endif 101 | 102 | #ifdef OPENSSL_NO_TLSEXT 103 | RUST_CONF_OPENSSL_NO_TLSEXT 104 | #endif 105 | 106 | #ifdef OPENSSL_NO_STDIO 107 | RUST_CONF_OPENSSL_NO_STDIO 108 | #endif 109 | 110 | #ifdef OPENSSL_NO_SM3 111 | RUST_CONF_OPENSSL_NO_SM3 112 | #endif 113 | 114 | #ifdef OPENSSL_NO_SM4 115 | RUST_CONF_OPENSSL_NO_SM4 116 | #endif 117 | 118 | #ifdef OPENSSL_NO_DEPRECATED_3_0 119 | RUST_CONF_OPENSSL_NO_DEPRECATED_3_0 120 | #endif 121 | 122 | #ifdef OPENSSL_NO_SEED 123 | RUST_CONF_OPENSSL_NO_SEED 124 | #endif 125 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/openssl/find_normal.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | ffi::OsString, 3 | path::{Path, PathBuf}, 4 | process::Command, 5 | }; 6 | 7 | use super::env; 8 | 9 | pub fn get_openssl(target: &str) -> Result<(Vec, PathBuf), ()> { 10 | let lib_dir = env("OPENSSL_LIB_DIR").map(PathBuf::from); 11 | let include_dir = env("OPENSSL_INCLUDE_DIR").map(PathBuf::from); 12 | 13 | Ok(match (lib_dir, include_dir) { 14 | (Some(lib_dir), Some(include_dir)) => (vec![lib_dir], include_dir), 15 | (lib_dir, include_dir) => { 16 | let openssl_dir = match env("OPENSSL_DIR") { 17 | Some(s) => s, 18 | None => find_openssl_dir(target)?, 19 | }; 20 | let openssl_dir = Path::new(&openssl_dir); 21 | let lib_dir = lib_dir.map(|d| vec![d]).unwrap_or_else(|| { 22 | let mut lib_dirs = vec![]; 23 | // OpenSSL 3.0 now puts it's libraries in lib64/ by default, 24 | // check for both it and lib/. 25 | if openssl_dir.join("lib64").exists() { 26 | lib_dirs.push(openssl_dir.join("lib64")); 27 | } 28 | if openssl_dir.join("lib").exists() { 29 | lib_dirs.push(openssl_dir.join("lib")); 30 | } 31 | lib_dirs 32 | }); 33 | let include_dir = include_dir.unwrap_or_else(|| openssl_dir.join("include")); 34 | (lib_dir, include_dir) 35 | } 36 | }) 37 | } 38 | 39 | fn resolve_with_wellknown_homebrew_location(dir: &str) -> Option { 40 | let versions = ["openssl@3", "openssl@1.1"]; 41 | 42 | // Check up default aarch 64 Homebrew installation location first 43 | // for quick resolution if possible. 44 | // `pkg-config` on brew doesn't necessarily contain settings for openssl apparently. 45 | for version in &versions { 46 | let homebrew = Path::new(dir).join(format!("opt/{}", version)); 47 | if homebrew.exists() { 48 | return Some(homebrew); 49 | } 50 | } 51 | 52 | for version in &versions { 53 | // Calling `brew --prefix ` command usually slow and 54 | // takes seconds, and will be used only as a last resort. 55 | let output = execute_command_and_get_output("brew", &["--prefix", version]); 56 | if let Some(ref output) = output { 57 | let homebrew = Path::new(&output); 58 | if homebrew.exists() { 59 | return Some(homebrew.to_path_buf()); 60 | } 61 | } 62 | } 63 | 64 | None 65 | } 66 | 67 | fn resolve_with_wellknown_location(dir: &str) -> Option { 68 | let root_dir = Path::new(dir); 69 | let include_openssl = root_dir.join("include/openssl"); 70 | if include_openssl.exists() { 71 | Some(root_dir.to_path_buf()) 72 | } else { 73 | None 74 | } 75 | } 76 | 77 | fn find_openssl_dir(target: &str) -> Result { 78 | let host = env::var("HOST").unwrap(); 79 | 80 | if host == target && target.ends_with("-apple-darwin") { 81 | let homebrew_dir = match target { 82 | "aarch64-apple-darwin" => "/opt/homebrew", 83 | _ => "/usr/local", 84 | }; 85 | 86 | if let Some(dir) = resolve_with_wellknown_homebrew_location(homebrew_dir) { 87 | return Ok(dir.into()); 88 | } else if let Some(dir) = resolve_with_wellknown_location("/opt/pkg") { 89 | // pkgsrc 90 | return Ok(dir.into()); 91 | } else if let Some(dir) = resolve_with_wellknown_location("/opt/local") { 92 | // MacPorts 93 | return Ok(dir.into()); 94 | } 95 | } 96 | 97 | try_pkg_config()?; 98 | try_vcpkg()?; 99 | 100 | // FreeBSD ships with OpenSSL but doesn't include a pkg-config file :( 101 | if host == target && target.contains("freebsd") { 102 | return Ok(OsString::from("/usr")); 103 | } 104 | 105 | // DragonFly has libressl (or openssl) in ports, but this doesn't include a pkg-config file 106 | if host == target && target.contains("dragonfly") { 107 | return Ok(OsString::from("/usr/local")); 108 | } 109 | 110 | let mut msg = format!( 111 | " 112 | 113 | Could not find directory of OpenSSL installation, and this `-sys` crate cannot 114 | proceed without this knowledge. If OpenSSL is installed and this crate had 115 | trouble finding it, you can set the `OPENSSL_DIR` environment variable for the 116 | compilation process. 117 | 118 | Make sure you also have the development packages of openssl installed. 119 | For example, `libssl-dev` on Ubuntu or `openssl-devel` on Fedora. 120 | 121 | If you're in a situation where you think the directory *should* be found 122 | automatically, please open a bug at https://github.com/sfackler/rust-openssl 123 | and include information about your system as well as this message. 124 | 125 | $HOST = {} 126 | $TARGET = {} 127 | openssl-sys = {} 128 | 129 | ", 130 | host, 131 | target, 132 | env!("CARGO_PKG_VERSION") 133 | ); 134 | 135 | if host.contains("apple-darwin") && target.contains("apple-darwin") { 136 | let system = Path::new("/usr/lib/libssl.0.9.8.dylib"); 137 | if system.exists() { 138 | msg.push_str( 139 | " 140 | 141 | openssl-sys crate build failed: no supported version of OpenSSL found. 142 | 143 | Ways to fix it: 144 | - Use the `openssl_vendored` feature of libduckdb-sys crate to build OpenSSL from source. 145 | - Use Homebrew to install the `openssl` package. 146 | 147 | ", 148 | ); 149 | } 150 | } 151 | 152 | if host.contains("unknown-linux") 153 | && target.contains("unknown-linux-gnu") 154 | && Command::new("pkg-config").output().is_err() 155 | { 156 | msg.push_str( 157 | " 158 | It looks like you're compiling on Linux and also targeting Linux. Currently this 159 | requires the `pkg-config` utility to find OpenSSL but unfortunately `pkg-config` 160 | could not be found. If you have OpenSSL installed you can likely fix this by 161 | installing `pkg-config`. 162 | 163 | ", 164 | ); 165 | } 166 | 167 | if host.contains("windows") && target.contains("windows-gnu") { 168 | msg.push_str( 169 | " 170 | It looks like you're compiling for MinGW but you may not have either OpenSSL or 171 | pkg-config installed. You can install these two dependencies with: 172 | 173 | pacman -S openssl-devel pkg-config 174 | 175 | and try building this crate again. 176 | 177 | ", 178 | ); 179 | } 180 | 181 | if host.contains("windows") && target.contains("windows-msvc") { 182 | msg.push_str( 183 | " 184 | It looks like you're compiling for MSVC but we couldn't detect an OpenSSL 185 | installation. If there isn't one installed then you can try the rust-openssl 186 | README for more information about how to download precompiled binaries of 187 | OpenSSL: 188 | 189 | https://github.com/sfackler/rust-openssl#windows 190 | 191 | ", 192 | ); 193 | } 194 | 195 | panic!("{}", msg); 196 | } 197 | 198 | /// Attempt to find OpenSSL through pkg-config. 199 | /// 200 | /// Note that if this succeeds then the function does not return as pkg-config 201 | /// typically tells us all the information that we need. 202 | fn try_pkg_config() -> Result<(), ()> { 203 | let target = env::var("TARGET").unwrap(); 204 | let host = env::var("HOST").unwrap(); 205 | 206 | // If we're going to windows-gnu we can use pkg-config, but only so long as 207 | // we're coming from a windows host. 208 | // 209 | // Otherwise if we're going to windows we probably can't use pkg-config. 210 | if target.contains("windows-gnu") && host.contains("windows") { 211 | env::set_var("PKG_CONFIG_ALLOW_CROSS", "1"); 212 | } else if target.contains("windows") { 213 | return Ok(()); 214 | } 215 | 216 | let lib = match pkg_config::Config::new().print_system_libs(false).probe("openssl") { 217 | Ok(lib) => lib, 218 | Err(e) => { 219 | println!("run pkg_config fail: {:?}", e); 220 | return Ok(()); 221 | } 222 | }; 223 | 224 | super::postprocess(&lib.include_paths); 225 | 226 | for include in lib.include_paths.iter() { 227 | println!("cargo:include={}", include.display()); 228 | } 229 | 230 | Err(()) 231 | } 232 | 233 | /// Attempt to find OpenSSL through vcpkg. 234 | /// 235 | /// Note that if this succeeds then the function does not return as vcpkg 236 | /// should emit all of the cargo metadata that we need. 237 | #[cfg(target_env = "msvc")] 238 | fn try_vcpkg() -> Result<(), ()> { 239 | // vcpkg will not emit any metadata if it can not find libraries 240 | // appropriate for the target triple with the desired linkage. 241 | 242 | let lib = match vcpkg::Config::new().emit_includes(true).find_package("openssl") { 243 | Ok(lib) => lib, 244 | Err(e) => { 245 | println!("note: vcpkg did not find openssl: {}", e); 246 | return Ok(()); 247 | } 248 | }; 249 | 250 | super::postprocess(&lib.include_paths); 251 | 252 | println!("cargo:rustc-link-lib=user32"); 253 | println!("cargo:rustc-link-lib=gdi32"); 254 | println!("cargo:rustc-link-lib=crypt32"); 255 | 256 | Err(()) 257 | } 258 | 259 | #[cfg(not(target_env = "msvc"))] 260 | fn try_vcpkg() -> Result<(), ()> { 261 | Ok(()) 262 | } 263 | 264 | fn execute_command_and_get_output(cmd: &str, args: &[&str]) -> Option { 265 | let out = Command::new(cmd).args(args).output(); 266 | if let Ok(ref r1) = out { 267 | if r1.status.success() { 268 | let r2 = String::from_utf8(r1.stdout.clone()); 269 | if let Ok(r3) = r2 { 270 | return Some(r3.trim().to_string()); 271 | } 272 | } 273 | } 274 | 275 | None 276 | } 277 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/openssl/find_vendored.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | pub fn get_openssl(_target: &str) -> (Vec, PathBuf) { 4 | let artifacts = openssl_src::Build::new().build(); 5 | println!("cargo:openssl_vendored=1"); 6 | println!("cargo:root={}", artifacts.lib_dir().parent().unwrap().display()); 7 | 8 | ( 9 | vec![artifacts.lib_dir().to_path_buf()], 10 | artifacts.include_dir().to_path_buf(), 11 | ) 12 | } 13 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/openssl/run_bindgen.rs: -------------------------------------------------------------------------------- 1 | use bindgen::{ 2 | callbacks::{MacroParsingBehavior, ParseCallbacks}, 3 | RustTarget, 4 | }; 5 | use std::{env, path::PathBuf}; 6 | 7 | const INCLUDES: &str = " 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | // this must be included after ssl.h for libressl! 37 | #include 38 | 39 | #if !defined(LIBRESSL_VERSION_NUMBER) 40 | #include 41 | #endif 42 | 43 | #if !defined(LIBRESSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x10100000 44 | #include 45 | #endif 46 | 47 | #if OPENSSL_VERSION_NUMBER >= 0x30000000 48 | #include 49 | #endif 50 | "; 51 | 52 | pub fn run(include_dirs: &[PathBuf]) { 53 | let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap()); 54 | 55 | let mut builder = bindgen::builder() 56 | .parse_callbacks(Box::new(OpensslCallbacks)) 57 | .rust_target(RustTarget::Stable_1_47) 58 | .ctypes_prefix("::libc") 59 | .raw_line("use libc::*;") 60 | .raw_line("type evp_pkey_st = EVP_PKEY;") 61 | .allowlist_file(".*/openssl/[^/]+\\.h") 62 | .allowlist_recursively(false) 63 | // libc is missing pthread_once_t on macOS 64 | .blocklist_type("CRYPTO_ONCE") 65 | .blocklist_function("CRYPTO_THREAD_run_once") 66 | // we don't want to mess with va_list 67 | .blocklist_function("BIO_vprintf") 68 | .blocklist_function("BIO_vsnprintf") 69 | .blocklist_function("ERR_vset_error") 70 | .blocklist_function("ERR_add_error_vdata") 71 | .blocklist_function("EVP_KDF_vctrl") 72 | .blocklist_type("OSSL_FUNC_core_vset_error_fn") 73 | .blocklist_type("OSSL_FUNC_BIO_vprintf_fn") 74 | .blocklist_type("OSSL_FUNC_BIO_vsnprintf_fn") 75 | // Maintain compatibility for existing enum definitions 76 | .rustified_enum("point_conversion_form_t") 77 | // Maintain compatibility for pre-union definitions 78 | .blocklist_type("GENERAL_NAME") 79 | .blocklist_type("GENERAL_NAME_st") 80 | .blocklist_type("EVP_PKEY") 81 | .blocklist_type("evp_pkey_st") 82 | .layout_tests(false) 83 | .header_contents("includes.h", INCLUDES); 84 | 85 | for include_dir in include_dirs { 86 | builder = builder.clang_arg("-I").clang_arg(include_dir.display().to_string()); 87 | } 88 | 89 | builder 90 | .generate() 91 | .unwrap() 92 | .write_to_file(out_dir.join("bindgen.rs")) 93 | .unwrap(); 94 | } 95 | 96 | #[derive(Debug)] 97 | struct OpensslCallbacks; 98 | 99 | impl ParseCallbacks for OpensslCallbacks { 100 | // for now we'll continue hand-writing constants 101 | fn will_parse_macro(&self, _name: &str) -> MacroParsingBehavior { 102 | MacroParsingBehavior::Ignore 103 | } 104 | 105 | fn item_name(&self, original_item_name: &str) -> Option { 106 | match original_item_name { 107 | // Our original definitions of these are wrong, so rename to avoid breakage 108 | "CRYPTO_EX_new" 109 | | "CRYPTO_EX_dup" 110 | | "CRYPTO_EX_free" 111 | | "BIO_meth_set_write" 112 | | "BIO_meth_set_read" 113 | | "BIO_meth_set_puts" 114 | | "BIO_meth_set_ctrl" 115 | | "BIO_meth_set_create" 116 | | "BIO_meth_set_destroy" 117 | | "CRYPTO_set_locking_callback" 118 | | "CRYPTO_set_id_callback" 119 | | "SSL_CTX_set_tmp_dh_callback" 120 | | "SSL_set_tmp_dh_callback" 121 | | "SSL_CTX_set_tmp_ecdh_callback" 122 | | "SSL_set_tmp_ecdh_callback" 123 | | "SSL_CTX_callback_ctrl" 124 | | "SSL_CTX_set_alpn_select_cb" => Some(format!("{}__fixed_rust", original_item_name)), 125 | _ => None, 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/src/error.rs: -------------------------------------------------------------------------------- 1 | use crate::duckdb_state; 2 | use std::{error, fmt}; 3 | 4 | /// Error Codes 5 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 6 | #[non_exhaustive] 7 | pub enum ErrorCode { 8 | /// Internal logic error in SQLite 9 | InternalMalfunction, 10 | /// Access permission denied 11 | PermissionDenied, 12 | /// Callback routine requested an abort 13 | OperationAborted, 14 | /// The database file is locked 15 | DatabaseBusy, 16 | /// A table in the database is locked 17 | DatabaseLocked, 18 | /// A malloc() failed 19 | OutOfMemory, 20 | /// Attempt to write a readonly database 21 | ReadOnly, 22 | /// Operation terminated by sqlite3_interrupt() 23 | OperationInterrupted, 24 | /// Some kind of disk I/O error occurred 25 | SystemIoFailure, 26 | /// The database disk image is malformed 27 | DatabaseCorrupt, 28 | /// Unknown opcode in sqlite3_file_control() 29 | NotFound, 30 | /// Insertion failed because database is full 31 | DiskFull, 32 | /// Unable to open the database file 33 | CannotOpen, 34 | /// Database lock protocol error 35 | FileLockingProtocolFailed, 36 | /// The database schema changed 37 | SchemaChanged, 38 | /// String or BLOB exceeds size limit 39 | TooBig, 40 | /// Abort due to constraint violation 41 | ConstraintViolation, 42 | /// Data type mismatch 43 | TypeMismatch, 44 | /// Library used incorrectly 45 | ApiMisuse, 46 | /// Uses OS features not supported on host 47 | NoLargeFileSupport, 48 | /// Authorization denied 49 | AuthorizationForStatementDenied, 50 | /// 2nd parameter to sqlite3_bind out of range 51 | ParameterOutOfRange, 52 | /// File opened that is not a database file 53 | NotADatabase, 54 | /// SQL error or missing database 55 | Unknown, 56 | } 57 | 58 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 59 | pub struct Error { 60 | pub code: ErrorCode, 61 | pub extended_code: duckdb_state, 62 | } 63 | 64 | impl Error { 65 | pub fn new(result_code: duckdb_state) -> Error { 66 | Error { 67 | code: ErrorCode::Unknown, 68 | extended_code: result_code, 69 | } 70 | } 71 | } 72 | 73 | impl fmt::Display for Error { 74 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 75 | write!( 76 | f, 77 | "Error code {}: {}", 78 | self.extended_code, 79 | code_to_str(self.extended_code) 80 | ) 81 | } 82 | } 83 | 84 | impl error::Error for Error { 85 | fn description(&self) -> &str { 86 | code_to_str(self.extended_code) 87 | } 88 | } 89 | 90 | pub fn code_to_str(_: duckdb_state) -> &'static str { 91 | "Unknown error code" 92 | } 93 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | #![allow(deref_nullptr)] 5 | #![allow(improper_ctypes)] 6 | 7 | #[allow(clippy::all)] 8 | mod bindings { 9 | include!(concat!(env!("OUT_DIR"), "/bindgen.rs")); 10 | } 11 | #[allow(clippy::all)] 12 | pub use bindings::*; 13 | 14 | mod string; 15 | pub use string::*; 16 | 17 | pub const DuckDBError: duckdb_state = duckdb_state_DuckDBError; 18 | pub const DuckDBSuccess: duckdb_state = duckdb_state_DuckDBSuccess; 19 | 20 | pub use self::error::*; 21 | mod error; 22 | 23 | #[cfg(test)] 24 | mod tests { 25 | use super::*; 26 | use std::{ 27 | ffi::{CStr, CString}, 28 | mem, 29 | os::raw::c_char, 30 | ptr, 31 | }; 32 | 33 | use arrow::{ 34 | array::{Array, Int32Array, StructArray}, 35 | datatypes::DataType, 36 | ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema}, 37 | }; 38 | 39 | unsafe fn print_int_result(result: &mut duckdb_result) { 40 | for i in 0..duckdb_column_count(result) { 41 | print!("{} ", CStr::from_ptr(duckdb_column_name(result, i)).to_string_lossy()); 42 | } 43 | println!(); 44 | // print the data of the result 45 | for row_idx in 0..duckdb_row_count(result) { 46 | for col_idx in 0..duckdb_column_count(result) { 47 | let val = duckdb_value_int32(result, col_idx, row_idx); 48 | print!("{val} "); 49 | } 50 | println!(); 51 | } 52 | } 53 | 54 | #[test] 55 | fn test_query_arrow() { 56 | unsafe { 57 | // open db 58 | let mut db: duckdb_database = ptr::null_mut(); 59 | let mut con: duckdb_connection = ptr::null_mut(); 60 | if duckdb_open(ptr::null_mut(), &mut db) != duckdb_state_DuckDBSuccess { 61 | panic!("duckdb_open error") 62 | } 63 | if duckdb_connect(db, &mut con) != duckdb_state_DuckDBSuccess { 64 | panic!("duckdb_connect error") 65 | } 66 | // create a table 67 | let sql = CString::new("CREATE TABLE integers(i INTEGER, j INTEGER);").unwrap(); 68 | if duckdb_query(con, sql.as_ptr() as *const c_char, ptr::null_mut()) != duckdb_state_DuckDBSuccess { 69 | panic!("CREATE TABLE error") 70 | } 71 | 72 | // insert three rows into the table 73 | let sql = CString::new("INSERT INTO integers VALUES (3, 4), (5, 6), (7, NULL);").unwrap(); 74 | let mut result: duckdb_arrow = ptr::null_mut(); 75 | if duckdb_query_arrow(con, sql.as_ptr() as *const c_char, &mut result) != duckdb_state_DuckDBSuccess { 76 | panic!("INSERT error") 77 | } 78 | assert_eq!(duckdb_arrow_rows_changed(result), 3); 79 | duckdb_destroy_arrow(&mut result); 80 | 81 | // query rows again 82 | let mut result: duckdb_arrow = ptr::null_mut(); 83 | let sql = CString::new("select i, j from integers order by i desc").unwrap(); 84 | if duckdb_query_arrow(con, sql.as_ptr() as *const c_char, &mut result) != duckdb_state_DuckDBSuccess { 85 | panic!("SELECT error") 86 | } 87 | assert_eq!(duckdb_arrow_row_count(result), 3); 88 | assert_eq!(duckdb_arrow_column_count(result), 2); 89 | 90 | let mut arrays = FFI_ArrowArray::empty(); 91 | let mut schema = FFI_ArrowSchema::empty(); 92 | if duckdb_query_arrow_schema( 93 | result, 94 | &mut std::ptr::addr_of_mut!(schema) as *mut _ as *mut duckdb_arrow_schema, 95 | ) != duckdb_state_DuckDBSuccess 96 | { 97 | panic!("SELECT error") 98 | } 99 | if duckdb_query_arrow_array( 100 | result, 101 | &mut std::ptr::addr_of_mut!(arrays) as *mut _ as *mut duckdb_arrow_array, 102 | ) != duckdb_state_DuckDBSuccess 103 | { 104 | panic!("SELECT error") 105 | } 106 | let array_data = from_ffi(arrays, &schema).expect("ok"); 107 | let struct_array = StructArray::from(array_data); 108 | assert_eq!(struct_array.len(), 3); 109 | assert_eq!(struct_array.columns().len(), 2); 110 | assert_eq!(struct_array.column(0).data_type(), &DataType::Int32); 111 | assert_eq!(struct_array.column(1).data_type(), &DataType::Int32); 112 | let arr_i = struct_array.column(0).as_any().downcast_ref::().unwrap(); 113 | assert_eq!(arr_i.value(0), 7); 114 | assert_eq!(arr_i.value(1), 5); 115 | assert_eq!(arr_i.value(2), 3); 116 | let arr_j = struct_array.column(1).as_any().downcast_ref::().unwrap(); 117 | assert!(arr_j.is_null(0)); 118 | assert_eq!(arr_j.value(1), 6); 119 | assert_eq!(arr_j.value(2), 4); 120 | 121 | let mut arrays: duckdb_arrow_array = ptr::null_mut(); 122 | if duckdb_query_arrow_array(result, &mut arrays) != duckdb_state_DuckDBSuccess { 123 | panic!("SELECT error") 124 | } 125 | assert!(arrays.is_null()); 126 | duckdb_destroy_arrow(&mut result); 127 | duckdb_disconnect(&mut con); 128 | duckdb_close(&mut db); 129 | } 130 | } 131 | 132 | #[test] 133 | fn basic_api_usage() { 134 | unsafe { 135 | // open db 136 | let mut db: duckdb_database = ptr::null_mut(); 137 | let mut con: duckdb_connection = ptr::null_mut(); 138 | if duckdb_open(ptr::null_mut(), &mut db) != duckdb_state_DuckDBSuccess { 139 | panic!("duckdb_open error") 140 | } 141 | if duckdb_connect(db, &mut con) != duckdb_state_DuckDBSuccess { 142 | panic!("duckdb_connect error") 143 | } 144 | // create a table 145 | let sql = CString::new("CREATE TABLE integers(i INTEGER, j INTEGER);").unwrap(); 146 | if duckdb_query(con, sql.as_ptr() as *const c_char, ptr::null_mut()) != duckdb_state_DuckDBSuccess { 147 | panic!("CREATE TABLE error") 148 | } 149 | // insert three rows into the table 150 | let sql = CString::new("INSERT INTO integers VALUES (3, 4), (5, 6), (7, NULL);").unwrap(); 151 | if duckdb_query(con, sql.as_ptr() as *const c_char, ptr::null_mut()) != duckdb_state_DuckDBSuccess { 152 | panic!("INSERT error") 153 | } 154 | // query rows again 155 | let mut result: duckdb_result = mem::zeroed(); 156 | let sql = CString::new("select * from integers").unwrap(); 157 | if duckdb_query(con, sql.as_ptr() as *const c_char, &mut result) != duckdb_state_DuckDBSuccess { 158 | panic!( 159 | "SELECT error: {}", 160 | CStr::from_ptr(duckdb_result_error(&mut result)).to_string_lossy() 161 | ) 162 | } 163 | assert_eq!(duckdb_row_count(&mut result), 3); 164 | assert_eq!(duckdb_column_count(&mut result), 2); 165 | print_int_result(&mut result); 166 | duckdb_destroy_result(&mut result); 167 | 168 | // test prepare 169 | let mut stmt: duckdb_prepared_statement = ptr::null_mut(); 170 | let sql = CString::new("select * from integers where i>?").unwrap(); 171 | if duckdb_prepare(con, sql.as_ptr() as *const c_char, &mut stmt) != duckdb_state_DuckDBSuccess { 172 | panic!("Prepare error"); 173 | } 174 | if duckdb_bind_int32(stmt, 1, 4) != duckdb_state_DuckDBSuccess { 175 | panic!("Bind params error"); 176 | } 177 | if duckdb_execute_prepared(stmt, &mut result) != duckdb_state_DuckDBSuccess { 178 | panic!("Execute prepared error"); 179 | } 180 | assert_eq!(duckdb_row_count(&mut result), 2); 181 | assert_eq!(duckdb_column_count(&mut result), 2); 182 | print_int_result(&mut result); 183 | duckdb_destroy_result(&mut result); 184 | 185 | // test bind params again 186 | if duckdb_bind_int32(stmt, 1, 5) != duckdb_state_DuckDBSuccess { 187 | panic!("Bind params error"); 188 | } 189 | if duckdb_execute_prepared(stmt, &mut result) != duckdb_state_DuckDBSuccess { 190 | panic!("Execute prepared error"); 191 | } 192 | assert_eq!(duckdb_row_count(&mut result), 1); 193 | assert_eq!(duckdb_column_count(&mut result), 2); 194 | print_int_result(&mut result); 195 | duckdb_destroy_result(&mut result); 196 | duckdb_destroy_prepare(&mut stmt); 197 | 198 | // clean up 199 | duckdb_disconnect(&mut con); 200 | duckdb_close(&mut db); 201 | } 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/src/raw_statement.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/src/string.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | ffi::{c_char, CStr}, 3 | ops::Deref, 4 | }; 5 | 6 | use crate::duckdb_free; 7 | 8 | pub struct DuckDbString { 9 | // Invariant: ptr[0..len+1] is valid C string, i.e. ptr[len] is NUL byte. 10 | ptr: core::ptr::NonNull, 11 | len: usize, 12 | } 13 | 14 | impl DuckDbString { 15 | /// Creates a `DuckDbString` from a raw pointer to a C string. 16 | /// 17 | /// # Safety 18 | /// 19 | /// The caller must ensure that the pointer is valid and points to a null-terminated C string. 20 | /// The memory must remain valid for the lifetime of the returned `DuckDbString`. 21 | pub unsafe fn from_ptr(ptr: *const c_char) -> Self { 22 | let len = unsafe { CStr::from_ptr(ptr) }.to_bytes().len(); 23 | unsafe { Self::from_raw_parts(ptr, len) } 24 | } 25 | 26 | /// Creates a `DuckDbString` from raw parts. 27 | /// 28 | /// # Safety 29 | /// 30 | /// The caller must ensure that: 31 | /// - `ptr` is a valid pointer to a null-terminated C string. 32 | /// - `len` accurately represents the length of the string (excluding the null terminator). 33 | /// - The memory referenced by `ptr` remains valid for the lifetime of the returned `DuckDbString`. 34 | /// - The string data is not mutated for the lifetime of the returned `DuckDbString`. 35 | pub unsafe fn from_raw_parts(ptr: *const c_char, len: usize) -> Self { 36 | let ptr = unsafe { core::ptr::NonNull::new_unchecked(ptr as *mut c_char) }; 37 | Self { ptr, len } 38 | } 39 | 40 | fn to_bytes_with_nul(&self) -> &[u8] { 41 | let ptr = self.ptr.as_ptr() as *const u8; 42 | unsafe { core::slice::from_raw_parts(ptr, self.len + 1) } 43 | } 44 | } 45 | 46 | impl Deref for DuckDbString { 47 | type Target = std::ffi::CStr; 48 | 49 | fn deref(&self) -> &Self::Target { 50 | let bytes = self.to_bytes_with_nul(); 51 | unsafe { CStr::from_bytes_with_nul_unchecked(bytes) } 52 | } 53 | } 54 | 55 | impl Drop for DuckDbString { 56 | fn drop(&mut self) { 57 | let ptr = self.ptr.as_ptr() as *mut core::ffi::c_void; 58 | unsafe { duckdb_free(ptr) }; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/update_sources.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import os 5 | import shutil 6 | import subprocess 7 | 8 | SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) 9 | 10 | # Path to package_build.py 11 | DUCKDB_SCRIPTS_DIR = os.path.join(SCRIPT_DIR, "duckdb-sources", "scripts") 12 | # Path to target 13 | TARGET_DIR = os.path.join(SCRIPT_DIR, "duckdb") 14 | # Path to src 15 | SRC_DIR = os.path.join(SCRIPT_DIR, "src") 16 | 17 | # List of extensions' sources to grab. Technically, these sources will be compiled 18 | # but not included in the final build unless they're explicitly enabled. 19 | EXTENSIONS = ["core_functions", "parquet", "json"] 20 | 21 | # Clear the duckdb directory 22 | try: 23 | shutil.rmtree(os.path.join(TARGET_DIR)) 24 | os.remove(os.path.join(SCRIPT_DIR, "duckdb.tar.gz")) 25 | except FileNotFoundError: 26 | pass 27 | 28 | os.mkdir(TARGET_DIR) 29 | 30 | import sys 31 | 32 | sys.path.append(DUCKDB_SCRIPTS_DIR) 33 | import package_build 34 | 35 | 36 | def get_sources(extensions): 37 | (source_list, include_list, _) = package_build.build_package( 38 | TARGET_DIR, extensions, False 39 | ) 40 | 41 | # Remove the absolute prefix on the files (some get generated with it) 42 | source_list = [ 43 | x[len(SCRIPT_DIR) + 1 :] if x.startswith(SCRIPT_DIR) else x for x in source_list 44 | ] 45 | 46 | return set(source_list), set(include_list) 47 | 48 | 49 | base_source_list, base_include_list = get_sources([]) 50 | 51 | extension_sources = {} 52 | for e in EXTENSIONS: 53 | source_list, include_list = get_sources([e]) 54 | extension_sources[e] = { 55 | "cpp_files": list(source_list - base_source_list), 56 | "include_dirs": list(include_list - base_include_list), 57 | } 58 | 59 | manifest = { 60 | "base": { 61 | "cpp_files": list(base_source_list), 62 | "include_dirs": list(base_include_list), 63 | }, 64 | "extensions": extension_sources, 65 | } 66 | 67 | with open(os.path.join(TARGET_DIR, "manifest.json"), "w") as f: 68 | json.dump(manifest, f, indent=2) 69 | 70 | 71 | subprocess.check_call( 72 | "tar -czf duckdb.tar.gz duckdb", 73 | shell=True, 74 | cwd=SCRIPT_DIR, 75 | ) 76 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/upgrade.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT=$(realpath "$0") 6 | SCRIPT_DIR=$(dirname "$SCRIPT") 7 | 8 | echo "$SCRIPT_DIR" 9 | cd "$SCRIPT_DIR" 10 | cargo clean 11 | mkdir -p "$SCRIPT_DIR/../../target" "$SCRIPT_DIR/duckdb" 12 | export DUCKDB_LIB_DIR="$SCRIPT_DIR/duckdb" 13 | 14 | # Download and extract amalgamation 15 | DUCKDB_VERSION=v1.3.0 16 | git submodule update --init --checkout 17 | cd "$SCRIPT_DIR/duckdb-sources" 18 | git fetch 19 | git checkout "$DUCKDB_VERSION" 20 | cd "$SCRIPT_DIR" 21 | python3 "$SCRIPT_DIR/update_sources.py" 22 | 23 | # Regenerate bindgen file for DUCKDB 24 | cd "$SCRIPT_DIR" 25 | rm -f "$SCRIPT_DIR/src/bindgen_bundled_version_loadable.rs" 26 | find "$SCRIPT_DIR/../../target" -type f -name bindgen.rs -exec rm {} \; 27 | cargo build --features "extensions-full buildtime_bindgen loadable-extension" 28 | find "$SCRIPT_DIR/../../target" -type f -name bindgen.rs -exec cp {} "$SCRIPT_DIR/src/bindgen_bundled_version_loadable.rs" \; 29 | 30 | # Sanity checks 31 | # FIXME: how to test this here? 32 | 33 | # Regenerate bindgen file for DUCKDB 34 | rm -f "$SCRIPT_DIR/src/bindgen_bundled_version.rs" 35 | # Just to make sure there is only one bindgen.rs file in target dir 36 | find "$SCRIPT_DIR/../../target" -type f -name bindgen.rs -exec rm {} \; 37 | cargo build --features "extensions-full buildtime_bindgen" 38 | find "$SCRIPT_DIR/../../target" -type f -name bindgen.rs -exec cp {} "$SCRIPT_DIR/src/bindgen_bundled_version.rs" \; 39 | 40 | # Sanity checks 41 | cd "$SCRIPT_DIR/.." 42 | cargo test --features "extensions-full buildtime_bindgen" 43 | 44 | printf ' \e[35;1mFinished\e[0m bundled DUCKDB tests\n' 45 | -------------------------------------------------------------------------------- /crates/libduckdb-sys/wrapper.h: -------------------------------------------------------------------------------- 1 | #include "duckdb/duckdb.h" -------------------------------------------------------------------------------- /crates/libduckdb-sys/wrapper_ext.h: -------------------------------------------------------------------------------- 1 | // FIXME: remove this once C EXTENSION API is stable (expected for DuckDB v1.2.0 release) 2 | #define DUCKDB_EXTENSION_API_VERSION_DEV 1 3 | 4 | // We need to allow unstable API for now to get the deprecated arrow functions 5 | #define DUCKDB_EXTENSION_API_VERSION_UNSTABLE 6 | #include "duckdb/duckdb_extension.h" -------------------------------------------------------------------------------- /upgrade.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## How to run 4 | ## `./upgrade.sh` 5 | 6 | # https://gist.github.com/lukechilds/a83e1d7127b78fef38c2914c4ececc3c 7 | # Usage 8 | # $ get_latest_release "duckdb/duckdb" 9 | get_latest_release() { 10 | curl --silent "https://api.github.com/repos/$1/releases/latest" | # Get latest release from GitHub api 11 | grep '"tag_name":' | # Get tag line 12 | sed -E 's/.*"v([^"]+)".*/\1/' # Pluck JSON value 13 | } 14 | 15 | duckdb_version=$(get_latest_release "duckdb/duckdb") 16 | duckdb_rs_version=$(get_latest_release "duckdb/duckdb-rs") 17 | 18 | if [ $duckdb_version = $duckdb_rs_version ]; then 19 | echo "Already update to date, latest version is $duckdb_version" 20 | exit 0 21 | fi 22 | 23 | echo "Start to upgrade from $duckdb_rs_version to $duckdb_version" 24 | 25 | sed -i '' "s/$duckdb_rs_version/$duckdb_version/g" Cargo.toml crates/libduckdb-sys/upgrade.sh crates/libduckdb-sys/Cargo.toml .github/workflows/rust.yaml 26 | ./crates/libduckdb-sys/upgrade.sh 27 | --------------------------------------------------------------------------------