├── .editorconfig
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── MainDistributionPipeline.yml
    │   └── schedule-1.2.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── Makefile
├── README.md
├── docs
    └── space-filling-curve-ducks.jpg
├── duckdb_lindel_rust
    ├── Cargo.lock
    ├── Cargo.toml
    ├── cbindgen.toml
    └── src
    │   └── lib.rs
├── extension_config.cmake
├── scripts
    ├── bootstrap-template.py
    └── extension-upload.sh
├── src
    ├── include
    │   ├── lindel_extension.hpp
    │   └── rust.h
    └── lindel_extension.cpp
├── test
    ├── README.md
    └── sql
    │   └── lindel.test
└── vcpkg.json


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # Unix-style newlines with a newline ending every file
 2 | [*.{c,cpp,h,hpp}]
 3 | end_of_line = lf
 4 | insert_final_newline = true
 5 | indent_style = tab
 6 | tab_width = 4
 7 | indent_size = tab
 8 | trim_trailing_whitespace = true
 9 | charset = utf-8
10 | max_line_length = 120
11 | x-soft-wrap-text = true
12 | x-soft-wrap-mode = CharacterWidth
13 | x-soft-wrap-limit = 120
14 | x-show-invisibles = false
15 | x-show-spaces = false
16 | 
17 | [*.{java}]
18 | end_of_line = lf
19 | insert_final_newline = true
20 | indent_style = tab
21 | tab_width = 4
22 | indent_size = tab
23 | trim_trailing_whitespace = false
24 | charset = utf-8
25 | max_line_length = 120
26 | x-soft-wrap-text = true
27 | x-soft-wrap-mode = CharacterWidth
28 | x-soft-wrap-limit = 120
29 | x-show-invisibles = false
30 | x-show-spaces = false
31 | 
32 | [*.{test,test_slow,test_coverage,benchmark}]
33 | end_of_line = lf
34 | insert_final_newline = true
35 | indent_style = tab
36 | tab_width = 4
37 | indent_size = tab
38 | trim_trailing_whitespace = false
39 | charset = utf-8
40 | x-soft-wrap-text = false
41 | 
42 | [Makefile]
43 | end_of_line = lf
44 | insert_final_newline = true
45 | indent_style = tab
46 | tab_width = 4
47 | indent_size = tab
48 | trim_trailing_whitespace = true
49 | charset = utf-8
50 | x-soft-wrap-text = false
51 | 
52 | [*keywords.list]
53 | insert_final_newline = false
54 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "cargo" # See documentation for possible values
 9 |     directory: "/duckdb_lindel_rust" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/MainDistributionPipeline.yml:
--------------------------------------------------------------------------------
 1 | #
 2 | # This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension
 3 | #
 4 | name: Main Extension Distribution Pipeline
 5 | on:
 6 |   push:
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 |   schedule:
10 |     - cron: '0 2 * * *'  # Runs every night at 02:00 UTC
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
14 |   cancel-in-progress: true
15 | 
16 | jobs:
17 |   # duckdb-next-build:
18 |   #   name: Build extension binaries
19 |   #   uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
20 |   #   with:
21 |   #     duckdb_version: main
22 |   #     ci_tools_version: main
23 |   #     enable_rust: true
24 |   #     extension_name: lindel
25 |   #     exclude_archs: "windows_amd64_rtools"
26 | 
27 |   duckdb-stable-build:
28 |     name: Build extension binaries
29 |     uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
30 |     with:
31 |       duckdb_version: main
32 |       ci_tools_version: main
33 |       extension_name: lindel
34 |       enable_rust: true
35 |       exclude_archs: "windows_amd64_rtools;wasm_mvp;wasm_eh;wasm_threads"
36 | 


--------------------------------------------------------------------------------
/.github/workflows/schedule-1.2.yml:
--------------------------------------------------------------------------------
 1 | name: Scheduled Trigger for 1.2
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 12 * * *'  # Runs at 12:00 UTC every day
 6 |   workflow_dispatch:  # Allows manual trigger
 7 | 
 8 | jobs:
 9 |   trigger:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       actions: write  # Allow triggering workflows
13 |     steps:
14 |       - name: Checkout repository  # Required for gh to work
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Install GitHub CLI
18 |         run: |
19 |           sudo apt update && sudo apt install gh -y
20 | 
21 |       - name: Authenticate GH CLI
22 |         run: |
23 |           echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
24 | 
25 |       - name: Trigger Workflow on my-branch
26 |         run: |
27 |           gh workflow run MainDistributionPipeline.yml --ref v1.2
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | .idea
3 | cmake-build-debug
4 | duckdb_unittest_tempdir/
5 | .DS_Store
6 | testext
7 | test/python/__pycache__/
8 | .Rhistory
9 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "duckdb"]
2 | 	path = duckdb
3 | 	url = https://github.com/duckdb/duckdb
4 | 	branch = main
5 | [submodule "extension-ci-tools"]
6 | 	path = extension-ci-tools
7 | 	url = https://github.com/duckdb/extension-ci-tools
8 | 	branch = main


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.5)
  2 | 
  3 | set(CORROSION_VERBOSE_OUTPUT ON)
  4 | set(CMAKE_CXX_STANDARD 11)
  5 | set(CMAKE_CXX_STANDARD_REQUIRED 1)
  6 | 
  7 | 
  8 | set(prefix_to_check "wasm")
  9 | # Get the length of the prefix
 10 | string(LENGTH "${prefix_to_check}" prefix_length)
 11 | # Extract the prefix from the example_string
 12 | string(SUBSTRING "${DUCKDB_PLATFORM}" 0 ${prefix_length} extracted_platform_prefix)
 13 | 
 14 | 
 15 | execute_process(
 16 |     COMMAND rustup target list --installed
 17 |     OUTPUT_VARIABLE RUST_TARGETS
 18 | )
 19 | 
 20 | # Propagate arch to rust build for CI
 21 | set(Rust_CARGO_TARGET "")
 22 | if("${OS_NAME}" STREQUAL "linux")
 23 |     if ("${OS_ARCH}" STREQUAL "arm64")
 24 |         set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
 25 |     elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64")
 26 |         set(Rust_CARGO_TARGET ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc)
 27 |         set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
 28 |     else()
 29 |         string(FIND "${RUST_TARGETS}" "musl" MUSL_TARGET_FOUND)
 30 |         if(NOT MUSL_TARGET_FOUND EQUAL -1)
 31 |             set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
 32 |         else()
 33 |             set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
 34 |         endif()
 35 |     endif()
 36 | elseif("${OS_NAME}" STREQUAL "osx")
 37 |     if ("${OSX_BUILD_ARCH}" STREQUAL "arm64")
 38 |         set(Rust_CARGO_TARGET "aarch64-apple-darwin")
 39 |     elseif ("${OSX_BUILD_ARCH}" STREQUAL "x86_64")
 40 |         set(Rust_CARGO_TARGET "x86_64-apple-darwin")
 41 |     elseif ("${OS_ARCH}" STREQUAL "arm64")
 42 |         set(Rust_CARGO_TARGET "aarch64-apple-darwin")
 43 |     endif()
 44 | elseif(WIN32)
 45 |    if (MINGW AND "${OS_ARCH}" STREQUAL "arm64")
 46 |        set(Rust_CARGO_TARGET "aarch64-pc-windows-gnu")
 47 |    elseif (MINGW AND "${OS_ARCH}" STREQUAL "amd64")
 48 |        set(Rust_CARGO_TARGET "x86_64-pc-windows-gnu")
 49 |    elseif (MSVC AND "${OS_ARCH}" STREQUAL "arm64")
 50 |        set(Rust_CARGO_TARGET "aarch64-pc-windows-msvc")
 51 |    elseif (MSVC AND "${OS_ARCH}" STREQUAL "amd64")
 52 |        set(Rust_CARGO_TARGET "x86_64-pc-windows-msvc")
 53 |    endif()
 54 | endif()
 55 | 
 56 | string(FIND "${RUST_TARGETS}" "wasm32-unknown-emscripten" WASM_TARGET_FOUND)
 57 | 
 58 | if (NOT WASM_TARGET_FOUND EQUAL -1)
 59 |   set(Rust_CARGO_TARGET "wasm32-unknown-emscripten")
 60 | endif()
 61 | 
 62 | message(STATUS "RUST_TARGETS: ${RUST_TARGETS}")
 63 | message(STATUS "WASM_TARGET_FOUND: ${WASM_TARGET_FOUND}")
 64 | message(STATUS "TARGET: ${TARGET}")
 65 | message(STATUS "DUCKDB_BUILD_TYPE: ${DUCKDB_BUILD_TYPE}")
 66 | message(STATUS "TARGET NAME: ${TARGET_NAME}")
 67 | message(STATUS "DUCKDB_PLATFORM: ${DUCKDB_PLATFORM}")
 68 | message(STATUS "OS_ARCH: ${OS_ARCH}")
 69 | message(STATUS "OS_NAME: ${OS_NAME}")
 70 | message(STATUS "Rust_CARGO_TARGET: ${Rust_CARGO_TARGET}")
 71 | # We currently only support the predefined targets.
 72 | #if ("${Rust_CARGO_TARGET}" STREQUAL "")
 73 | #    message(FATAL_ERROR "Failed to detect the correct platform")
 74 | #endif()
 75 | 
 76 | 
 77 | include(FetchContent)
 78 | 
 79 | FetchContent_Declare(
 80 |     Corrosion
 81 |     GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git
 82 |     GIT_TAG v0.5
 83 | )
 84 | # Set any global configuration variables such as `Rust_TOOLCHAIN` before this line!
 85 | FetchContent_MakeAvailable(Corrosion)
 86 | 
 87 | # Import targets defined in a package or workspace manifest `Cargo.toml` file
 88 | corrosion_import_crate(MANIFEST_PATH "${CMAKE_SOURCE_DIR}/../duckdb_lindel_rust/Cargo.toml"
 89 | CRATES "duckdb_lindel_rust"
 90 | )
 91 | 
 92 | # Set extension name here
 93 | set(TARGET_NAME lindel)
 94 | 
 95 | set(EXTENSION_NAME ${TARGET_NAME}_extension)
 96 | set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
 97 | 
 98 | project(${TARGET_NAME})
 99 | 
100 | include_directories(src/include)
101 | 
102 | set(EXTENSION_SOURCES src/lindel_extension.cpp)
103 | 
104 | build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
105 | build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
106 | 
107 | get_target_property(fake_includes duckdb_lindel_rust INCLUDE_DIRECTORIES)
108 | 
109 | target_link_libraries(${EXTENSION_NAME} duckdb_lindel_rust-static)
110 | target_link_libraries(${LOADABLE_EXTENSION_NAME} duckdb_lindel_rust)
111 | 
112 | install(
113 |   TARGETS ${EXTENSION_NAME}
114 |   EXPORT "${DUCKDB_EXPORT_SET}"
115 |   LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
116 |   ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
117 | 
118 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2024 Rusty Conover <rusty@conover.me>
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 2 | 
 3 | # Configuration of extension
 4 | EXT_NAME=lindel
 5 | EXT_CONFIG=${PROJ_DIR}extension_config.cmake
 6 | 
 7 | # Include the Makefile from extension-ci-tools
 8 | include extension-ci-tools/makefiles/duckdb_extension.Makefile
 9 | 
10 | rust_binding_headers:
11 | 	cd duckdb_lindel_rust && cbindgen --config ./cbindgen.toml --crate duckdb_lindel_rust --output ../src/include/rust.h
12 | 
13 | clean_all: clean
14 | 	cd duckdb_lindel_rust && cargo clean


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Lindel (linearizer-delinearizer) Extension for DuckDB
  2 | 
  3 | ![Ducks filling Space-Filling Curves](./docs/space-filling-curve-ducks.jpg)
  4 | 
  5 | This `lindel` extension adds functions for the [linearization](https://en.wikipedia.org/wiki/Linearization) and delinearization of numeric arrays in [DuckDB](https://www.duckdb.org).  It allows you to order multi-dimensional data using space-filling curves.
  6 | 
  7 | ## Installation
  8 | 
  9 | **`lindel` is a [DuckDB Community Extension](https://github.com/duckdb/community-extensions).**
 10 | 
 11 | You can now use this by using this SQL:
 12 | 
 13 | ```sql
 14 | install lindel from community;
 15 | load lindel;
 16 | ```
 17 | 
 18 | ## What is linearization?
 19 | 
 20 | <image align="right" src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/7c/Hilbert-curve_rounded-gradient-animated.gif/440px-Hilbert-curve_rounded-gradient-animated.gif" alt="An animation of the Hilbert Curve from Wikipedia" width="200px"/>
 21 | 
 22 | [Linearization](https://en.wikipedia.org/wiki/Linearization) maps multi-dimensional data into a one-dimensional sequence while [preserving locality](https://en.wikipedia.org/wiki/Locality_of_reference), enhancing the efficiency of data structures and algorithms for spatial data, such as in databases, GIS, and memory caches.
 23 | 
 24 | > "The principle of locality states that programs tend to reuse data and instructions they have used recently."
 25 | 
 26 | In SQL, sorting by a single column (e.g., time or identifier) is often sufficient, but sometimes queries involve multiple fields, such as:
 27 | 
 28 | - Time and identifier (historical trading data)
 29 | - Latitude and Longitude (GIS applications)
 30 | - Latitude, Longitude, and Altitude (flight tracking)
 31 | - Latitude, Longitude, Altitude, and Time (flight history)
 32 | 
 33 | Sorting by a single field isn't optimal for multi-field queries. Linearization maps multiple fields into a single value, while preserving locality—meaning values close in the original representation remain close in the mapped representation.
 34 | 
 35 | #### Where has this been used before?
 36 | 
 37 | DataBricks has long supported Z-Ordering (they also now default to using the Hilbert curve for the ordering).  This [video explains how Delta Lake queries are faster when the data is Z-Ordered.](https://www.youtube.com/watch?v=A1aR1A8OwOU) This extension also allows DuckDB to write files with the same ordering optimization.
 38 | 
 39 | Numerous articles describe the benefits of applying a Z-Ordering/Hilbert ordering to data for query performance.
 40 | 
 41 | - [https://delta.io/blog/2023-06-03-delta-lake-z-order/](https://delta.io/blog/2023-06-03-delta-lake-z-order/)
 42 | - [https://blog.cloudera.com/speeding-up-queries-with-z-order/](https://blog.cloudera.com/speeding-up-queries-with-z-order/)
 43 | - [https://www.linkedin.com/pulse/z-order-visualization-implementation-nick-karpov/](https://www.linkedin.com/pulse/z-order-visualization-implementation-nick-karpov/)
 44 | 
 45 | From one of the articles:
 46 | 
 47 | ![Delta Lake Query Speed Improvement from using Z-Ordering](https://delta.io/static/c1801cd120999d77de0ee51b227acccb/a13c9/image1.png)
 48 | 
 49 | Your particular performance improvements will vary, but for some query patterns Z-Ordering and Hilbert ordering will make quite a big difference.
 50 | 
 51 | ## When would I use this?
 52 | 
 53 | For query patterns across multiple numeric or short text columns, consider sorting rows using Hilbert encoding when storing data in Parquet:
 54 | 
 55 | ```sql
 56 | COPY (
 57 |   select * from 'source.csv'
 58 |   order by
 59 |   hilbert_encode([source_data.time, source_data.symbol_id]::integer[2])
 60 | )
 61 | TO 'example.parquet' (FORMAT PARQUET)
 62 | 
 63 | -- or if dealing with latitude and longitude
 64 | 
 65 | COPY (
 66 |   select * from 'source.csv'
 67 |   order by
 68 |   hilbert_encode([source_data.lat, source_data.lon]::double[2])
 69 | ) TO 'example.parquet' (FORMAT PARQUET)
 70 | ```
 71 | 
 72 | The Parquet file format stores statistics for each row group. Since rows are sorted with locality into these row groups the query execution may be able to skip row groups that contain no relevant rows, leading to faster query execution times.
 73 | 
 74 | ## Encoding Types
 75 | 
 76 | This extension offers two different encoding types, [Hilbert](https://en.wikipedia.org/wiki/Hilbert_curve) and [Morton](https://en.wikipedia.org/wiki/Z-order_curve) encoding.
 77 | 
 78 | ### Hilbert Encoding
 79 | 
 80 | Hilbert encoding uses the Hilbert curve, a continuous fractal space-filling curve named after [David Hilbert](https://en.wikipedia.org/wiki/David_Hilbert). It rearranges coordinates based on the Hilbert curve's path, preserving spatial locality better than Morton encoding.
 81 | 
 82 | This is a great explanation of the [Hilbert curve](https://www.youtube.com/watch?v=3s7h2MHQtxc).
 83 | 
 84 | 
 85 | 
 86 | ### Morton Encoding (Z-order Curve)
 87 | 
 88 | Morton encoding, also known as the Z-order curve, interleaves the binary representations of coordinates into a single integer. It is named after Glenn K. Morton.
 89 | 
 90 | **Locality:** Hilbert encoding generally preserves locality better than Morton encoding, making it preferable for applications where spatial proximity matters.
 91 | 
 92 | ## API
 93 | 
 94 | ### Encoding
 95 | 
 96 | **Supported types:** Any signed or unsigned integer, float, or double (`INPUT_TYPE`).
 97 | **Output:** The smallest unsigned integer type that can represent the input array.
 98 | 
 99 | ### Encoding Functions
100 | 
101 | * `hilbert_encode(ARRAY[INPUT_TYPE, 1-16])`
102 | * `morton_encode(ARRAY[INPUT_TYPE, 1-16])`
103 | 
104 | Output is limited to a 128-bit `UHUGEINT`. The input array size is validated to ensure it fits within this limit.
105 | 
106 | | Input Type | Maximum Number of Elements | Output Type (depends on number of elements) |
107 | |---|--|-------------|
108 | | `UTINYINT`   | 16 | 1: `UTINYINT`<br/>2: `USMALLINT`<br/>3-4: `UINTEGER`<br/> 4-8: `UBIGINT`<br/> 8-16: `UHUGEINT`|
109 | | `USMALLINT`  | 8 | 1: `USMALLINT`<br/>2: `UINTEGER`<br/>3-4: `UBIGINT`<br/>4-8: `UHUGEINT` |
110 | | `UINTEGER`   | 4 | 1: `UINTEGER`<br/>2: `UBIGINT`<br/>3-4: `UHUGEINT` |
111 | | `UBIGINT`    | 2 | 1: `UBIGINT`<br/>2: `UHUGEINT` |
112 | | `FLOAT`      | 4 | 1: `UINTEGER`<br/>2: `UBIGINT`<br/>3-4: `UHUGEINT` |
113 | | `DOUBLE`     | 2 | 1: `UBIGINT`<br/>2: `UHUGEINT` |
114 | 
115 | ### Encoding examples
116 | 
117 | ```sql
118 | install lindel from community;
119 | load lindel;
120 | 
121 | with elements as (
122 |   select * as id from range(3)
123 | )
124 | select
125 |   a.id as a,
126 |   b.id as b,
127 |   hilbert_encode([a.id, b.id]::tinyint[2]) as hilbert,
128 |   morton_encode([a.id, b.id]::tinyint[2]) as morton
129 |   from
130 | elements as a cross join elements as b;
131 | ┌───────┬───────┬─────────┬────────┐
132 | │   a   │   b   │ hilbert │ morton │
133 | │ int64 │ int64 │ uint16  │ uint16 │
134 | ├───────┼───────┼─────────┼────────┤
135 | │     0 │     0 │       0 │      0 │
136 | │     0 │     1 │       3 │      1 │
137 | │     0 │     2 │       4 │      4 │
138 | │     1 │     0 │       1 │      2 │
139 | │     1 │     1 │       2 │      3 │
140 | │     1 │     2 │       7 │      6 │
141 | │     2 │     0 │      14 │      8 │
142 | │     2 │     1 │      13 │      9 │
143 | │     2 │     2 │       8 │     12 │
144 | └───────┴───────┴─────────┴────────┘
145 | 
146 | -- Now sort that same table using Hilbert encoding
147 | 
148 | ┌───────┬───────┬─────────┬────────┐
149 | │   a   │   b   │ hilbert │ morton │
150 | │ int64 │ int64 │ uint16  │ uint16 │
151 | ├───────┼───────┼─────────┼────────┤
152 | │     0 │     0 │       0 │      0 │
153 | │     1 │     0 │       1 │      2 │
154 | │     1 │     1 │       2 │      3 │
155 | │     0 │     1 │       3 │      1 │
156 | │     0 │     2 │       4 │      4 │
157 | │     1 │     2 │       7 │      6 │
158 | │     2 │     2 │       8 │     12 │
159 | │     2 │     1 │      13 │      9 │
160 | │     2 │     0 │      14 │      8 │
161 | └───────┴───────┴─────────┴────────┘
162 | 
163 | -- Do you notice how when A and B are closer to 2 the rows are "closer"?
164 | ```
165 | 
166 | Encoding doesn't only work with integers it can also be used with floats.
167 | 
168 | ```sql
169 | install lindel from community;
170 | load lindel;
171 | 
172 | -- Encode two 32-bit floats into one uint64
173 | select hilbert_encode([37.8, .2]::float[2]) as hilbert;
174 | ┌─────────────────────┐
175 | │       hilbert       │
176 | │       uint64        │
177 | ├─────────────────────┤
178 | │ 2303654869236839926 │
179 | └─────────────────────┘
180 | 
181 | -- Since doubles use 64 bits of precision the encoding
182 | -- must result in a uint128
183 | 
184 | select hilbert_encode([37.8, .2]::double[2]) as hilbert;
185 | ┌────────────────────────────────────────┐
186 | │                hilbert                 │
187 | │                uint128                 │
188 | ├────────────────────────────────────────┤
189 | │ 42534209309512799991913666633619307890 │
190 | └────────────────────────────────────────┘
191 | 
192 | -- 3 dimensional encoding.
193 | select hilbert_encode([1.0, 5.0, 6.0]::float[3]) as hilbert;
194 | ┌──────────────────────────────┐
195 | │           hilbert            │
196 | │           uint128            │
197 | ├──────────────────────────────┤
198 | │ 8002395622101954260073409974 │
199 | └──────────────────────────────┘
200 | ```
201 | 
202 | Not to be left out you can also encode strings.
203 | 
204 | ```sql
205 | 
206 | select hilbert_encode([ord(x) for x in split('abcd', '')]::tinyint[4]) as hilbert;
207 | ┌───────────┐
208 | │  hilbert  │
209 | │  uint32   │
210 | ├───────────┤
211 | │ 178258816 │
212 | └───────────┘
213 | 
214 | --- This splits the string 'abcd' by character, then converts each character into
215 | --- its ordinal representation, finally converts them all to 8 bit integers and then
216 | --- performs encoding.
217 | 
218 | ```
219 | 
220 | Currently, the input for `hilbert_encode()` and `morton_encode()` functions in DuckDB requires that all elements in the input array be of the same size. If you need to encode different-sized types, you must break up larger data types into units of the smallest data type. Results may vary.
221 | 
222 | ### Decoding Functions
223 | 
224 | * `hilbert_encode(ANY_UNSIGNED_INTEGER_TYPE, TINYINT, BOOLEAN, BOOLEAN)`
225 | * `morton_encode(ANY_UNSIGNED_INTEGER_TYPE, TINYINT, BOOLEAN, BOOLEAN)`
226 | 
227 | The decoding functions take four parameters:
228 | 
229 | 1. **Value to be decoded:** This is always an unsigned integer type.
230 | 2. **Number of elements to decode:** This is a `TINYINT` specifying how many elements should be decoded.
231 | 3. **Float return type:** This `BOOLEAN` indicates whether the values should be returned as floats (REAL or DOUBLE). Set to true to enable this.
232 | 4. **Unsigned return type:** This `BOOLEAN` indicates whether the values should be unsigned if not using floats.
233 | 
234 | The return type of these functions is always an array, with the element type determined by the number of elements requested and whether "float" handling is enabled by the third parameter.
235 | 
236 | ### Examples
237 | 
238 | ```sql
239 | -- Start out just by encoding two values.
240 | select hilbert_encode([1, 2]::tinyint[2]) as hilbert;
241 | ┌─────────┐
242 | │ hilbert │
243 | │ uint16  │
244 | ├─────────┤
245 | │       7 │
246 | └─────────┘
247 | D select hilbert_decode(7::uint16, 2, false, true) as values;
248 | ┌─────────────┐
249 | │   values    │
250 | │ utinyint[2] │
251 | ├─────────────┤
252 | │ [1, 2]      │
253 | └─────────────┘
254 | 
255 | -- Show that the decoder works with the encoder.
256 | select hilbert_decode(hilbert_encode([1, 2]::tinyint[2]), 2, false, false) as values;
257 | ┌─────────────┐
258 | │   values    │
259 | │ utinyint[2] │
260 | ├─────────────┤
261 | │ [1, 2]      │
262 | └─────────────┘
263 | 
264 | -- FIXME: need to implement a signed or unsigned flag on the decoder function.
265 | select hilbert_decode(hilbert_encode([1, -2]::bigint[2]), 2, false, false) as values;
266 | ┌───────────┐
267 | │  values   │
268 | │ bigint[2] │
269 | ├───────────┤
270 | │ [1, -2]   │
271 | └───────────┘
272 | 
273 | select hilbert_encode([1.0, 5.0, 6.0]::float[3]) as hilbert;
274 | ┌──────────────────────────────┐
275 | │           hilbert            │
276 | │           uint128            │
277 | ├──────────────────────────────┤
278 | │ 8002395622101954260073409974 │
279 | └──────────────────────────────┘
280 | 
281 | select hilbert_decode(8002395622101954260073409974::UHUGEINT, 3, True, False) as values;
282 | ┌─────────────────┐
283 | │     values      │
284 | │    float[3]     │
285 | ├─────────────────┤
286 | │ [1.0, 5.0, 6.0] │
287 | └─────────────────┘
288 | ```
289 | ## Credits
290 | 
291 | 1. This DuckDB extension utilizes and is named after the [`lindel`](https://crates.io/crates/lindel) Rust crate created by [DoubleHyphen](https://github.com/DoubleHyphen).
292 | 
293 | 2. It also uses the [DuckDB Extension Template](https://github.com/duckdb/extension-template).
294 | 
295 | 3. This extension uses [Corrosion](https://github.com/corrosion-rs/corrosion) to combine CMake with a Rust/Cargo build process.
296 | 
297 | 4. I've gotten a lot of help from the generous DuckDB developer community.
298 | 
299 | ### Build Architecture
300 | 
301 | For the DuckDB extension to call the Rust code a tool called `cbindgen` is used to write the C++ headers for the exposed Rust interface.
302 | 
303 | The headers can be updated by running `make rust_binding_headers`.
304 | 
305 | #### Building on MacOS X
306 | 
307 | Example setup + build steps for macOS users:
308 | 
309 | ```sh
310 | # Remove rust if previously installed via brew
311 | brew uninstall rust
312 | 
313 | # Install rustup + cbindgen
314 | # (use rustup to switch versions of Rust without extra fuss)
315 | brew install cbindgen rustup
316 | 
317 | rustup toolchain install stable
318 | 
319 | # Initialize rustup
320 | # Zsh users: customize installation, answer n to "Modify PATH variable?",
321 | # and continue with defaults for everything else
322 | rustup-init
323 | 
324 | # OPTIONAL step for zsh users: add rust + cargo env setup to zshrc:
325 | echo '. "$HOME/.cargo/env"' >> ~/.zshrc
326 | 
327 | # Use rustc stable version by default
328 | rustup default stable
329 | 
330 | # Build headers
331 | make rust_binding_headers
332 | 
333 | GEN=ninja make
334 | ```
335 | 
336 | ### Build steps
337 | Now to build the extension, run:
338 | ```sh
339 | make
340 | ```
341 | The main binaries that will be built are:
342 | ```sh
343 | ./build/release/duckdb
344 | ./build/release/test/unittest
345 | ./build/release/extension/lindel/lindel.duckdb_extension
346 | ```
347 | - `duckdb` is the binary for the duckdb shell with the extension code automatically loaded.
348 | - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary.
349 | - `lindel.duckdb_extension` is the loadable binary as it would be distributed.
350 | 
351 | ## Running the extension
352 | To run the extension code, simply start the shell with `./build/release/duckdb`.
353 | 
354 | Now we can use the features from the extension directly in DuckDB.
355 | 
356 | ```
357 | D select hilbert_encode([1.0, 5.0, 6.0]::float[3]) as hilbert;
358 | ┌──────────────────────────────┐
359 | │           hilbert            │
360 | │           uint128            │
361 | ├──────────────────────────────┤
362 | │ 8002395622101954260073409974 │
363 | └──────────────────────────────┘
364 | ```
365 | 
366 | ## Running the tests
367 | Different tests can be created for DuckDB extensions. The primary way of testing DuckDB extensions should be the SQL tests in `./test/sql`. These SQL tests can be run using:
368 | ```sh
369 | make test
370 | ```
371 | 
372 | ### Installing the deployed binaries
373 | To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the
374 | `allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples:
375 | 
376 | CLI:
377 | ```shell
378 | duckdb -unsigned
379 | ```
380 | 
381 | Python:
382 | ```python
383 | con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'})
384 | ```
385 | 
386 | NodeJS:
387 | ```js
388 | db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"});
389 | ```
390 | 
391 | Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension
392 | you want to install. To do this run the following SQL query in DuckDB:
393 | ```sql
394 | SET custom_extension_repository='bucket.s3.us-east-1.amazonaws.com/lindel/latest';
395 | ```
396 | Note that the `/latest` path will allow you to install the latest extension version available for your current version of
397 | DuckDB. To specify a specific version, you can pass the version instead.
398 | 
399 | After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB:
400 | ```sql
401 | INSTALL lindel
402 | LOAD lindel
403 | ```
404 | 


--------------------------------------------------------------------------------
/docs/space-filling-curve-ducks.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Query-farm/lindel/ebf029f3039986fb01822dfefad31732a072c440/docs/space-filling-curve-ducks.jpg


--------------------------------------------------------------------------------
/duckdb_lindel_rust/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "autocfg"
  7 | version = "1.4.0"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 10 | 
 11 | [[package]]
 12 | name = "duckdb_lindel_rust"
 13 | version = "0.1.0"
 14 | dependencies = [
 15 |  "lindel",
 16 | ]
 17 | 
 18 | [[package]]
 19 | name = "lindel"
 20 | version = "0.1.1"
 21 | source = "registry+https://github.com/rust-lang/crates.io-index"
 22 | checksum = "e049ba2901c1380bbc3d9a10646d1eea9a478ec72e8de8cabb6d67e589aca99c"
 23 | dependencies = [
 24 |  "morton-encoding",
 25 |  "num",
 26 |  "num-traits",
 27 | ]
 28 | 
 29 | [[package]]
 30 | name = "morton-encoding"
 31 | version = "2.0.1"
 32 | source = "registry+https://github.com/rust-lang/crates.io-index"
 33 | checksum = "f66c953d92a578cd98a4598021e3b473520d214665917eb51dba49dc227936c8"
 34 | dependencies = [
 35 |  "num",
 36 |  "num-traits",
 37 | ]
 38 | 
 39 | [[package]]
 40 | name = "num"
 41 | version = "0.2.1"
 42 | source = "registry+https://github.com/rust-lang/crates.io-index"
 43 | checksum = "b8536030f9fea7127f841b45bb6243b27255787fb4eb83958aa1ef9d2fdc0c36"
 44 | dependencies = [
 45 |  "num-bigint",
 46 |  "num-complex",
 47 |  "num-integer",
 48 |  "num-iter",
 49 |  "num-rational",
 50 |  "num-traits",
 51 | ]
 52 | 
 53 | [[package]]
 54 | name = "num-bigint"
 55 | version = "0.2.6"
 56 | source = "registry+https://github.com/rust-lang/crates.io-index"
 57 | checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304"
 58 | dependencies = [
 59 |  "autocfg",
 60 |  "num-integer",
 61 |  "num-traits",
 62 | ]
 63 | 
 64 | [[package]]
 65 | name = "num-complex"
 66 | version = "0.2.4"
 67 | source = "registry+https://github.com/rust-lang/crates.io-index"
 68 | checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95"
 69 | dependencies = [
 70 |  "autocfg",
 71 |  "num-traits",
 72 | ]
 73 | 
 74 | [[package]]
 75 | name = "num-integer"
 76 | version = "0.1.46"
 77 | source = "registry+https://github.com/rust-lang/crates.io-index"
 78 | checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
 79 | dependencies = [
 80 |  "num-traits",
 81 | ]
 82 | 
 83 | [[package]]
 84 | name = "num-iter"
 85 | version = "0.1.45"
 86 | source = "registry+https://github.com/rust-lang/crates.io-index"
 87 | checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
 88 | dependencies = [
 89 |  "autocfg",
 90 |  "num-integer",
 91 |  "num-traits",
 92 | ]
 93 | 
 94 | [[package]]
 95 | name = "num-rational"
 96 | version = "0.2.4"
 97 | source = "registry+https://github.com/rust-lang/crates.io-index"
 98 | checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef"
 99 | dependencies = [
100 |  "autocfg",
101 |  "num-bigint",
102 |  "num-integer",
103 |  "num-traits",
104 | ]
105 | 
106 | [[package]]
107 | name = "num-traits"
108 | version = "0.2.19"
109 | source = "registry+https://github.com/rust-lang/crates.io-index"
110 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
111 | dependencies = [
112 |  "autocfg",
113 | ]
114 | 


--------------------------------------------------------------------------------
/duckdb_lindel_rust/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "duckdb_lindel_rust"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | publish = false
 6 | 
 7 | [lib]
 8 | name = "duckdb_lindel_rust"
 9 | crate-type = ["staticlib"]
10 | 
11 | [dependencies]
12 | lindel = "0.1.1"
13 | 


--------------------------------------------------------------------------------
/duckdb_lindel_rust/cbindgen.toml:
--------------------------------------------------------------------------------
  1 | # This is a template cbindgen.toml file with all of the default values.
  2 | # Some values are commented out because their absence is the real default.
  3 | #
  4 | # See https://github.com/mozilla/cbindgen/blob/master/docs.md#cbindgentoml
  5 | # for detailed documentation of every option here.
  6 | 
  7 | 
  8 | 
  9 | language = "C++"
 10 | 
 11 | 
 12 | 
 13 | ############## Options for Wrapping the Contents of the Header #################
 14 | 
 15 | # header = "/* Text to put at the beginning of the generated file. Probably a license. */"
 16 | # trailer = "/* Text to put at the end of the generated file */"
 17 | # include_guard = "my_bindings_h"
 18 | # pragma_once = true
 19 | # autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */"
 20 | include_version = false
 21 | # namespace = "my_namespace"
 22 | namespaces = []
 23 | using_namespaces = []
 24 | sys_includes = []
 25 | includes = []
 26 | no_includes = false
 27 | # cpp_compat = true
 28 | after_includes = ""
 29 | 
 30 | 
 31 | 
 32 | 
 33 | ############################ Code Style Options ################################
 34 | 
 35 | braces = "SameLine"
 36 | line_length = 100
 37 | tab_width = 2
 38 | documentation = true
 39 | documentation_style = "auto"
 40 | documentation_length = "full"
 41 | line_endings = "LF" # also "CR", "CRLF", "Native"
 42 | 
 43 | 
 44 | 
 45 | 
 46 | ############################# Codegen Options ##################################
 47 | 
 48 | style = "both"
 49 | sort_by = "Name" # default for `fn.sort_by` and `const.sort_by`
 50 | usize_is_size_t = true
 51 | 
 52 | 
 53 | 
 54 | [defines]
 55 | # "target_os = freebsd" = "DEFINE_FREEBSD"
 56 | # "feature = serde" = "DEFINE_SERDE"
 57 | 
 58 | 
 59 | 
 60 | [export]
 61 | include = []
 62 | exclude = []
 63 | # prefix = "CAPI_"
 64 | item_types = []
 65 | renaming_overrides_prefixing = false
 66 | 
 67 | 
 68 | 
 69 | [export.rename]
 70 | 
 71 | 
 72 | 
 73 | [export.body]
 74 | 
 75 | 
 76 | [export.mangle]
 77 | 
 78 | 
 79 | [fn]
 80 | rename_args = "None"
 81 | # must_use = "MUST_USE_FUNC"
 82 | # deprecated = "DEPRECATED_FUNC"
 83 | # deprecated_with_note = "DEPRECATED_FUNC_WITH_NOTE"
 84 | # no_return = "NO_RETURN"
 85 | # prefix = "START_FUNC"
 86 | # postfix = "END_FUNC"
 87 | args = "auto"
 88 | sort_by = "Name"
 89 | 
 90 | 
 91 | 
 92 | 
 93 | [struct]
 94 | rename_fields = "None"
 95 | # must_use = "MUST_USE_STRUCT"
 96 | # deprecated = "DEPRECATED_STRUCT"
 97 | # deprecated_with_note = "DEPRECATED_STRUCT_WITH_NOTE"
 98 | derive_constructor = false
 99 | derive_eq = false
100 | derive_neq = false
101 | derive_lt = false
102 | derive_lte = false
103 | derive_gt = false
104 | derive_gte = false
105 | 
106 | 
107 | 
108 | 
109 | [enum]
110 | rename_variants = "None"
111 | # must_use = "MUST_USE_ENUM"
112 | # deprecated = "DEPRECATED_ENUM"
113 | # deprecated_with_note = "DEPRECATED_ENUM_WITH_NOTE"
114 | add_sentinel = false
115 | prefix_with_name = false
116 | derive_helper_methods = false
117 | derive_const_casts = false
118 | derive_mut_casts = false
119 | # cast_assert_name = "ASSERT"
120 | derive_tagged_enum_destructor = false
121 | derive_tagged_enum_copy_constructor = false
122 | enum_class = true
123 | private_default_tagged_enum_constructor = false
124 | 
125 | 
126 | 
127 | 
128 | [const]
129 | allow_static_const = true
130 | allow_constexpr = false
131 | sort_by = "Name"
132 | 
133 | 
134 | 
135 | 
136 | [macro_expansion]
137 | bitflags = false
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | ############## Options for How Your Rust library Should Be Parsed ##############
145 | 
146 | [parse]
147 | parse_deps = false
148 | # include = []
149 | exclude = []
150 | clean = false
151 | extra_bindings = []
152 | 
153 | 
154 | 
155 | [parse.expand]
156 | crates = ["duckdb_lindel_rust"]
157 | all_features = false
158 | default_features = true
159 | features = []


--------------------------------------------------------------------------------
/duckdb_lindel_rust/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // duckdb_lindel_rust
  2 | // Copyright 2024 Rusty Conover <rusty@conover.me>
  3 | // Licensed under the MIT License
  4 | 
  5 | use std::ffi::c_void;
  6 | 
  7 | // Decode an encoded value and store it in the destination pointer.
  8 | #[no_mangle]
  9 | pub extern "C" fn perform_decode(
 10 |     encoding_type: u8,
 11 |     element_bit_width: u8,
 12 |     src: *const c_void,
 13 |     dest: *mut c_void,
 14 |     dest_len: usize,
 15 | ) {
 16 |     macro_rules! decode_and_copy {
 17 |         ($dest_type: ty, $src_type:ty, $len:expr) => {{
 18 |             unsafe {
 19 |                 let dest_ptr = dest as *mut $dest_type;
 20 |                 let function = match encoding_type {
 21 |                     0 => lindel::hilbert_decode,
 22 |                     1 => lindel::morton_decode,
 23 |                     _ => panic!("Invalid encoding type"),
 24 |                 };
 25 |                 let values: [$dest_type; $len] = function(*(src as *const $src_type));
 26 |                 for i in 0..$len {
 27 |                     *dest_ptr.add(i) = values[i];
 28 |                 }
 29 |             };
 30 |         }};
 31 |     }
 32 | 
 33 |     match element_bit_width {
 34 |         8 => match dest_len {
 35 |             1 => decode_and_copy!(u8, u8, 1),
 36 |             2 => decode_and_copy!(u8, u16, 2),
 37 |             3 => decode_and_copy!(u8, u32, 3),
 38 |             4 => decode_and_copy!(u8, u32, 4),
 39 |             5 => decode_and_copy!(u8, u64, 5),
 40 |             6 => decode_and_copy!(u8, u64, 6),
 41 |             7 => decode_and_copy!(u8, u64, 7),
 42 |             8 => decode_and_copy!(u8, u64, 8),
 43 |             9 => decode_and_copy!(u8, u128, 9),
 44 |             10 => decode_and_copy!(u8, u128, 10),
 45 |             11 => decode_and_copy!(u8, u128, 11),
 46 |             12 => decode_and_copy!(u8, u128, 12),
 47 |             13 => decode_and_copy!(u8, u128, 13),
 48 |             14 => decode_and_copy!(u8, u128, 14),
 49 |             15 => decode_and_copy!(u8, u128, 15),
 50 |             16 => decode_and_copy!(u8, u128, 16),
 51 |             _ => panic!("Invalid length"),
 52 |         },
 53 |         16 => match dest_len {
 54 |             1 => decode_and_copy!(u16, u16, 1),
 55 |             2 => decode_and_copy!(u16, u32, 2),
 56 |             3 => decode_and_copy!(u16, u64, 3),
 57 |             4 => decode_and_copy!(u16, u64, 4),
 58 |             5 => decode_and_copy!(u16, u128, 5),
 59 |             6 => decode_and_copy!(u16, u128, 6),
 60 |             7 => decode_and_copy!(u16, u128, 7),
 61 |             8 => decode_and_copy!(u16, u128, 8),
 62 |             _ => panic!("Invalid length"),
 63 |         },
 64 |         32 => match dest_len {
 65 |             1 => decode_and_copy!(u32, u32, 1),
 66 |             2 => decode_and_copy!(u32, u64, 2),
 67 |             3 => decode_and_copy!(u32, u128, 3),
 68 |             4 => decode_and_copy!(u32, u128, 4),
 69 |             _ => panic!("Invalid length"),
 70 |         },
 71 |         64 => match dest_len {
 72 |             1 => decode_and_copy!(u64, u64, 1),
 73 |             2 => decode_and_copy!(u64, u128, 2),
 74 |             _ => panic!("Invalid length"),
 75 |         },
 76 |         _ => panic!("Invalid element bit width"),
 77 |     }
 78 | }
 79 | 
 80 | // Create a macro to handle the repetitive part
 81 | macro_rules! encode_and_store {
 82 |     ($function:expr, $array:expr, $type:ty, $result:expr) => {{
 83 |         let calculated_result = $function($array);
 84 |         let result_ptr = $result as *mut $type;
 85 |         unsafe {
 86 |             *result_ptr = calculated_result;
 87 |         }
 88 |     }};
 89 | }
 90 | 
 91 | macro_rules! generic_encode_u8_var {
 92 |     ($func_name:ident, $encoding_expr: expr) => {
 93 |         /// # Safety
 94 |         ///
 95 |         /// This function is unsafe because it dereferences raw pointers.
 96 |         #[no_mangle]
 97 |         pub unsafe extern "C" fn $func_name(ptr: *const u8, len: usize, result: *mut c_void) -> () {
 98 |             let args = unsafe {
 99 |                 assert!(!ptr.is_null());
100 |                 std::slice::from_raw_parts(ptr, len)
101 |             };
102 | 
103 |             match args.len() {
104 |                 1 => encode_and_store!($encoding_expr, [args[0]], u8, result),
105 |                 2 => encode_and_store!($encoding_expr, [args[0], args[1]], u16, result),
106 |                 3 => encode_and_store!($encoding_expr, [args[0], args[1], args[2]], u32, result),
107 |                 4 => encode_and_store!(
108 |                     $encoding_expr,
109 |                     [args[0], args[1], args[2], args[3]],
110 |                     u32,
111 |                     result
112 |                 ),
113 |                 5 => encode_and_store!(
114 |                     $encoding_expr,
115 |                     [args[0], args[1], args[2], args[3], args[4]],
116 |                     u64,
117 |                     result
118 |                 ),
119 |                 6 => encode_and_store!(
120 |                     $encoding_expr,
121 |                     [args[0], args[1], args[2], args[3], args[4], args[5]],
122 |                     u64,
123 |                     result
124 |                 ),
125 |                 7 => encode_and_store!(
126 |                     $encoding_expr,
127 |                     [args[0], args[1], args[2], args[3], args[4], args[5], args[6]],
128 |                     u64,
129 |                     result
130 |                 ),
131 |                 8 => encode_and_store!(
132 |                     $encoding_expr,
133 |                     [args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7]],
134 |                     u64,
135 |                     result
136 |                 ),
137 |                 9 => encode_and_store!(
138 |                     $encoding_expr,
139 |                     [
140 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
141 |                         args[8]
142 |                     ],
143 |                     u128,
144 |                     result
145 |                 ),
146 |                 10 => encode_and_store!(
147 |                     $encoding_expr,
148 |                     [
149 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
150 |                         args[8], args[9]
151 |                     ],
152 |                     u128,
153 |                     result
154 |                 ),
155 |                 11 => encode_and_store!(
156 |                     $encoding_expr,
157 |                     [
158 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
159 |                         args[8], args[9], args[10]
160 |                     ],
161 |                     u128,
162 |                     result
163 |                 ),
164 |                 12 => encode_and_store!(
165 |                     $encoding_expr,
166 |                     [
167 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
168 |                         args[8], args[9], args[10], args[11]
169 |                     ],
170 |                     u128,
171 |                     result
172 |                 ),
173 |                 13 => encode_and_store!(
174 |                     $encoding_expr,
175 |                     [
176 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
177 |                         args[8], args[9], args[10], args[11], args[12]
178 |                     ],
179 |                     u128,
180 |                     result
181 |                 ),
182 |                 14 => encode_and_store!(
183 |                     $encoding_expr,
184 |                     [
185 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
186 |                         args[8], args[9], args[10], args[11], args[12], args[13]
187 |                     ],
188 |                     u128,
189 |                     result
190 |                 ),
191 |                 15 => encode_and_store!(
192 |                     $encoding_expr,
193 |                     [
194 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
195 |                         args[8], args[9], args[10], args[11], args[12], args[13], args[14]
196 |                     ],
197 |                     u128,
198 |                     result
199 |                 ),
200 |                 16 => encode_and_store!(
201 |                     $encoding_expr,
202 |                     [
203 |                         args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7],
204 |                         args[8], args[9], args[10], args[11], args[12], args[13], args[14],
205 |                         args[16]
206 |                     ],
207 |                     u128,
208 |                     result
209 |                 ),
210 |                 _ => panic!("Invalid length"),
211 |             }
212 |         }
213 |     };
214 | }
215 | 
216 | generic_encode_u8_var!(hilbert_encode_u8_var, lindel::hilbert_encode);
217 | generic_encode_u8_var!(morton_encode_u8_var, lindel::morton_encode);
218 | 
219 | macro_rules! generic_encode_u16_var {
220 |     ($func_name:ident, $encoding_expr: expr) => {
221 |         /// # Safety
222 |         ///
223 |         /// This function is unsafe because it dereferences raw pointers.
224 |         #[no_mangle]
225 |         pub unsafe extern "C" fn $func_name(
226 |             ptr: *const u16,
227 |             len: usize,
228 |             result: *mut c_void,
229 |         ) -> () {
230 |             let args = unsafe {
231 |                 assert!(!ptr.is_null());
232 |                 std::slice::from_raw_parts(ptr, len)
233 |             };
234 | 
235 |             match args.len() {
236 |                 1 => encode_and_store!($encoding_expr, [args[0]], u16, result), // 16
237 |                 2 => encode_and_store!($encoding_expr, [args[0], args[1]], u32, result), //32
238 |                 3 => encode_and_store!($encoding_expr, [args[0], args[1], args[2]], u64, result), // 48 - 64
239 |                 4 => encode_and_store!(
240 |                     $encoding_expr,
241 |                     [args[0], args[1], args[2], args[3]],
242 |                     u64,
243 |                     result
244 |                 ), // 64 - 64
245 |                 5 => encode_and_store!(
246 |                     $encoding_expr,
247 |                     [args[0], args[1], args[2], args[3], args[4]],
248 |                     u128,
249 |                     result
250 |                 ),
251 |                 6 => encode_and_store!(
252 |                     $encoding_expr,
253 |                     [args[0], args[1], args[2], args[3], args[4], args[5]],
254 |                     u128,
255 |                     result
256 |                 ),
257 |                 7 => encode_and_store!(
258 |                     $encoding_expr,
259 |                     [args[0], args[1], args[2], args[3], args[4], args[5], args[6]],
260 |                     u128,
261 |                     result
262 |                 ),
263 |                 8 => encode_and_store!(
264 |                     $encoding_expr,
265 |                     [args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7]],
266 |                     u128,
267 |                     result
268 |                 ),
269 |                 _ => panic!("Invalid length"),
270 |             }
271 |         }
272 |     };
273 | }
274 | 
275 | generic_encode_u16_var!(hilbert_encode_u16_var, lindel::hilbert_encode);
276 | generic_encode_u16_var!(morton_encode_u16_var, lindel::morton_encode);
277 | 
278 | macro_rules! generic_encode_u32_var {
279 |     ($func_name:ident, $encoding_expr: expr) => {
280 |         /// # Safety
281 |         ///
282 |         /// This function is unsafe because it dereferences raw pointers.
283 |         #[no_mangle]
284 |         pub unsafe extern "C" fn $func_name(
285 |             ptr: *const u32,
286 |             len: usize,
287 |             result: *mut c_void,
288 |         ) -> () {
289 |             let args = unsafe {
290 |                 assert!(!ptr.is_null());
291 |                 std::slice::from_raw_parts(ptr, len)
292 |             };
293 | 
294 |             match args.len() {
295 |                 1 => encode_and_store!($encoding_expr, [args[0]], u32, result),
296 |                 2 => encode_and_store!($encoding_expr, [args[0], args[1]], u64, result),
297 |                 3 => encode_and_store!($encoding_expr, [args[0], args[1], args[2]], u128, result),
298 |                 4 => encode_and_store!(
299 |                     $encoding_expr,
300 |                     [args[0], args[1], args[2], args[3]],
301 |                     u128,
302 |                     result
303 |                 ),
304 |                 _ => panic!("Invalid length"),
305 |             }
306 |         }
307 |     };
308 | }
309 | 
310 | generic_encode_u32_var!(hilbert_encode_u32_var, lindel::hilbert_encode);
311 | generic_encode_u32_var!(morton_encode_u32_var, lindel::morton_encode);
312 | 
313 | macro_rules! generic_encode_u64_var {
314 |     ($func_name:ident, $encoding_expr: expr) => {
315 |         /// # Safety
316 |         ///
317 |         /// This function is unsafe because it dereferences raw pointers.
318 |         #[no_mangle]
319 |         pub unsafe extern "C" fn $func_name(
320 |             ptr: *const u64,
321 |             len: usize,
322 |             result: *mut c_void,
323 |         ) -> () {
324 |             let args = unsafe {
325 |                 assert!(!ptr.is_null());
326 |                 std::slice::from_raw_parts(ptr, len)
327 |             };
328 | 
329 |             match args.len() {
330 |                 1 => encode_and_store!($encoding_expr, [args[0]], u64, result),
331 |                 2 => encode_and_store!($encoding_expr, [args[0], args[1]], u128, result),
332 |                 _ => panic!("Invalid length"),
333 |             }
334 |         }
335 |     };
336 | }
337 | 
338 | generic_encode_u64_var!(hilbert_encode_u64_var, lindel::hilbert_encode);
339 | generic_encode_u64_var!(morton_encode_u64_var, lindel::morton_encode);
340 | 
341 | #[cfg(test)]
342 | mod tests {}
343 | 


--------------------------------------------------------------------------------
/extension_config.cmake:
--------------------------------------------------------------------------------
 1 | # This file is included by DuckDB's build system. It specifies which extension to load
 2 | 
 3 | # Extension from this repo
 4 | duckdb_extension_load(lindel
 5 |     SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
 6 |     LOAD_TESTS
 7 |     LINKED_LIBS "../../cargo/build/wasm32-unknown-emscripten/release/libduckdb_lindel_rust.a"
 8 | )
 9 | 
10 | # Any extra extensions that should be built
11 | # e.g.: duckdb_extension_load(json)


--------------------------------------------------------------------------------
/scripts/bootstrap-template.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import sys, os, shutil, re
 4 | from pathlib import Path
 5 | 
 6 | shutil.copyfile(f'docs/NEXT_README.md', f'README.md')
 7 | os.remove(f'docs/NEXT_README.md')
 8 | os.remove(f'docs/README.md')
 9 | 
10 | if (len(sys.argv) != 2):
11 |     raise Exception('usage: python3 bootstrap-template.py <name_for_extension_in_snake_case>')
12 | 
13 | name_extension = sys.argv[1]
14 | 
15 | def is_snake_case(s):
16 |     # Define the regex pattern for snake case with numbers
17 |     pattern = r'^[a-z0-9]+(_[a-z0-9]+)*$'
18 | 
19 |     # Use re.match to check if the string matches the pattern
20 |     if re.match(pattern, s):
21 |         return True
22 |     else:
23 |         return False
24 | 
25 | if name_extension[0].isdigit():
26 |     raise Exception('Please dont start your extension name with a number.')
27 | 
28 | if not is_snake_case(name_extension):
29 |     raise Exception('Please enter the name of your extension in valid snake_case containing only lower case letters and numbers')
30 | 
31 | def to_camel_case(snake_str):
32 |     return "".join(x.capitalize() for x in snake_str.lower().split("_"))
33 | 
34 | def replace(file_name, to_find, to_replace):
35 |     with open(file_name, 'r', encoding="utf8") as file :
36 |         filedata = file.read()
37 |     filedata = filedata.replace(to_find, to_replace)
38 |     with open(file_name, 'w', encoding="utf8") as file:
39 |         file.write(filedata)
40 | 
41 | files_to_search = []
42 | files_to_search.extend(Path('./.github').rglob('./**/*.yml'))
43 | files_to_search.extend(Path('./test').rglob('./**/*.test'))
44 | files_to_search.extend(Path('./src').rglob('./**/*.hpp'))
45 | files_to_search.extend(Path('./src').rglob('./**/*.cpp'))
46 | files_to_search.extend(Path('./src').rglob('./**/*.txt'))
47 | files_to_search.extend(Path('./src').rglob('./*.md'))
48 | 
49 | def replace_everywhere(to_find, to_replace):
50 |     for path in files_to_search:
51 |         replace(path, to_find, to_replace)
52 |         replace(path, to_find.capitalize(), to_camel_case(to_replace))
53 |         replace(path, to_find.upper(), to_replace.upper())
54 |     
55 |     replace("./CMakeLists.txt", to_find, to_replace)
56 |     replace("./Makefile", to_find, to_replace)
57 |     replace("./Makefile", to_find.capitalize(), to_camel_case(to_replace))
58 |     replace("./Makefile", to_find.upper(), to_replace.upper())
59 |     replace("./README.md", to_find, to_replace)
60 |     replace("./extension_config.cmake", to_find, to_replace)
61 | 
62 | replace_everywhere("quack", name_extension)
63 | replace_everywhere("Quack", name_extension.capitalize())
64 | replace_everywhere("<extension_name>", name_extension)
65 | 
66 | string_to_replace = name_extension
67 | string_to_find = "quack"
68 | 
69 | # rename files
70 | os.rename(f'test/sql/{string_to_find}.test', f'test/sql/{string_to_replace}.test')
71 | os.rename(f'src/{string_to_find}_extension.cpp', f'src/{string_to_replace}_extension.cpp')
72 | os.rename(f'src/include/{string_to_find}_extension.hpp', f'src/include/{string_to_replace}_extension.hpp')
73 | 
74 | # remove template-specific files
75 | os.remove('.github/workflows/ExtensionTemplate.yml')
76 | 
77 | # finally, remove this bootstrap file
78 | os.remove(__file__)


--------------------------------------------------------------------------------
/scripts/extension-upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Extension upload script
 4 | 
 5 | # Usage: ./extension-upload.sh <name> <extension_version> <duckdb_version> <architecture> <s3_bucket> <copy_to_latest> <copy_to_versioned>
 6 | # <name>                : Name of the extension
 7 | # <extension_version>   : Version (commit / version tag) of the extension
 8 | # <duckdb_version>      : Version (commit / version tag) of DuckDB
 9 | # <architecture>        : Architecture target of the extension binary
10 | # <s3_bucket>           : S3 bucket to upload to
11 | # <copy_to_latest>      : Set this as the latest version ("true" / "false", default: "false")
12 | # <copy_to_versioned>   : Set this as a versioned version that will prevent its deletion
13 | 
14 | set -e
15 | 
16 | if [[ $4 == wasm* ]]; then
17 |   ext="/tmp/extension/$1.duckdb_extension.wasm"
18 | else
19 |   ext="/tmp/extension/$1.duckdb_extension"
20 | fi
21 | 
22 | echo $ext
23 | 
24 | script_dir="$(dirname "$(readlink -f "$0")")"
25 | 
26 | # calculate SHA256 hash of extension binary
27 | cat $ext > $ext.append
28 | 
29 | if [[ $4 == wasm* ]]; then
30 |   # 0 for custom section
31 |   # 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256)
32 |   # [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02]
33 |   echo -n -e '\x00' >> $ext.append
34 |   echo -n -e '\x93\x02' >> $ext.append
35 |   # 10 in hex = 16 in decimal, lenght of name, 1 byte
36 |   echo -n -e '\x10' >> $ext.append
37 |   echo -n -e 'duckdb_signature' >> $ext.append
38 |   # the name of the WebAssembly custom section, 16 bytes
39 |   # 100 in hex, 256 in decimal
40 |   # [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)],
41 |   # for a grand total of 2 bytes
42 |   echo -n -e '\x80\x02' >> $ext.append
43 | fi
44 | 
45 | # (Optionally) Sign binary
46 | if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then
47 |   echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem
48 |   $script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash
49 |   openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign
50 |   rm -f private.pem
51 | fi
52 | 
53 | # Signature is always there, potentially defaulting to 256 zeros
54 | truncate -s 256 $ext.sign
55 | 
56 | # append signature to extension binary
57 | cat $ext.sign >> $ext.append
58 | 
59 | # compress extension binary
60 | if [[ $4 == wasm_* ]]; then
61 |   brotli < $ext.append > "$ext.compressed"
62 | else
63 |   gzip < $ext.append > "$ext.compressed"
64 | fi
65 | 
66 | set -e
67 | 
68 | # Abort if AWS key is not set
69 | if [ -z "$AWS_ACCESS_KEY_ID" ]; then
70 |     echo "No AWS key found, skipping.."
71 |     exit 0
72 | fi
73 | 
74 | # upload versioned version
75 | if [[ $7 = 'true' ]]; then
76 |   if [[ $4 == wasm* ]]; then
77 |     aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
78 |   else
79 |     aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read
80 |   fi
81 | fi
82 | 
83 | # upload to latest version
84 | if [[ $6 = 'true' ]]; then
85 |   if [[ $4 == wasm* ]]; then
86 |     aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
87 |   else
88 |     aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read
89 |   fi
90 | fi
91 | 


--------------------------------------------------------------------------------
/src/include/lindel_extension.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb.hpp"
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | class LindelExtension : public Extension {
 8 | public:
 9 | 	void Load(DuckDB &db) override;
10 | 	std::string Name() override;
11 |         std::string Version() const override;
12 | };
13 | 
14 | } // namespace duckdb
15 | 


--------------------------------------------------------------------------------
/src/include/rust.h:
--------------------------------------------------------------------------------
 1 | #include <cstdarg>
 2 | #include <cstddef>
 3 | #include <cstdint>
 4 | #include <cstdlib>
 5 | #include <ostream>
 6 | #include <new>
 7 | 
 8 | 
 9 | 
10 | extern "C" {
11 | 
12 | ///Free a value returned from `duckdb_malloc`, `duckdb_value_varchar`, `duckdb_value_blob`, or `duckdb_value_string`.
13 | ///
14 | /// ptr: The memory region to de-allocate.
15 | extern void duckdb_free(void *ptr);
16 | 
17 | ///Allocate `size` bytes of memory using the duckdb internal malloc function. Any memory allocated in this manner should be freed using `duckdb_free`.
18 | ///
19 | /// size: The number of bytes to allocate.  returns: A pointer to the allocated memory region.
20 | extern void *duckdb_malloc(size_t size);
21 | 
22 | void hilbert_encode_u16_var(const uint16_t *ptr, size_t len, void *result);
23 | 
24 | void hilbert_encode_u32_var(const uint32_t *ptr, size_t len, void *result);
25 | 
26 | void hilbert_encode_u64_var(const uint64_t *ptr, size_t len, void *result);
27 | 
28 | void hilbert_encode_u8_var(const uint8_t *ptr, size_t len, void *result);
29 | 
30 | void morton_encode_u16_var(const uint16_t *ptr, size_t len, void *result);
31 | 
32 | void morton_encode_u32_var(const uint32_t *ptr, size_t len, void *result);
33 | 
34 | void morton_encode_u64_var(const uint64_t *ptr, size_t len, void *result);
35 | 
36 | void morton_encode_u8_var(const uint8_t *ptr, size_t len, void *result);
37 | 
38 | void perform_decode(uint8_t encoding_type,
39 |                     uint8_t element_bit_width,
40 |                     const void *src,
41 |                     void *dest,
42 |                     size_t dest_len);
43 | 
44 | } // extern "C"
45 | 


--------------------------------------------------------------------------------
/src/lindel_extension.cpp:
--------------------------------------------------------------------------------
  1 | #define DUCKDB_EXTENSION_MAIN
  2 | 
  3 | #include "lindel_extension.hpp"
  4 | #include "duckdb.hpp"
  5 | #include "duckdb/common/exception.hpp"
  6 | #include "duckdb/common/optional_idx.hpp"
  7 | #include "duckdb/common/string_util.hpp"
  8 | #include "duckdb/function/scalar_function.hpp"
  9 | #include "duckdb/planner/expression/bound_function_expression.hpp"
 10 | #include "duckdb/main/extension_util.hpp"
 11 | #include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
 12 | 
 13 | // Include the declarations of things from Rust.
 14 | #include "rust.h"
 15 | 
 16 | namespace duckdb
 17 | {
 18 | 
 19 |     // Since we have functions that can decode or encode using two different types of encoding to reduce
 20 |     // the number of functions we need to write we'll use a single function to handle both.
 21 |     // and just store the encoding type in the bind_info.
 22 |     //
 23 |     // The encoding type is 0 for Hilbert and 1 for Morton.
 24 |     //
 25 |     // This extension supports two different types of encoding, Hilbert and Morton.
 26 |     //
 27 |     // In both cases the encoding is done in a similar way, the only difference is the
 28 |     // encoding function that is called.
 29 |     //
 30 |     // Rather than writing two separate functions for each encoding type we'll write a single
 31 |     // function that can handle both and just store the encoding type in the bind_info object.
 32 |     //
 33 |     // The bind_info object is created before the functions are called but when DuckDB starts to evaluate
 34 |     // the expression.
 35 |     struct lindelEncodingBindData : public FunctionData
 36 |     {
 37 |         uint8_t encoding_type;
 38 |         lindelEncodingBindData(uint8_t encoding_type_p) : FunctionData(), encoding_type(encoding_type_p)
 39 |         {
 40 |         }
 41 | 
 42 |         duckdb::unique_ptr<FunctionData> Copy() const override
 43 |         {
 44 |             return make_uniq<lindelEncodingBindData>(encoding_type);
 45 |         }
 46 | 
 47 |         bool Equals(const FunctionData &other_p) const override
 48 |         {
 49 |             auto &other = other_p.Cast<lindelEncodingBindData>();
 50 |             return encoding_type == other.encoding_type;
 51 |         }
 52 |     };
 53 | 
 54 |     // This is the "bind" fucntion that is called when we are decoding an array of values.
 55 |     //
 56 |     // In SQL this will be a function of the form:
 57 |     //
 58 |     // hilbert_decode(UTINYINT|USMALLINT|UINTEGER|UBIGINT|UHUGEINT, TINYINT, BOOLEAN)
 59 |     // morton_decode(UTINYINT|USMALLINT|UINTEGER|UBIGINT|UHUGEINT, TINYINT, BOOLEAN)
 60 |     //
 61 |     // The arguments are as follows:
 62 |     //
 63 |     // 1. The value to decode.
 64 |     // 2. The number of parts to return.
 65 |     // 3. Whether or not to return the parts as floats or integers.
 66 |     // 4. Whether or not to return unsigned integers (true if unsigned)
 67 |     //
 68 |     // This binding function also needs to determine the encoding type by looking at the bound function name.
 69 |     //
 70 |     // This function also determines the actual type that will be returned by the function, it will always be an array
 71 |     // but the type of element and number of elements will depend on the input type and what the caller requests.
 72 |     //
 73 |     static unique_ptr<FunctionData> lindelDecodeToArrayBind(ClientContext &context, ScalarFunction &bound_function,
 74 |                                                             vector<unique_ptr<Expression>> &arguments)
 75 |     {
 76 |         unique_ptr<lindelEncodingBindData> bind_data = make_uniq<lindelEncodingBindData>(0);
 77 |         if (bound_function.name == "hilbert_decode")
 78 |         {
 79 |             bind_data->encoding_type = 0;
 80 |         }
 81 |         else if (bound_function.name == "morton_decode")
 82 |         {
 83 |             bind_data->encoding_type = 1;
 84 |         }
 85 |         else
 86 |         {
 87 |             throw NotImplementedException("Unknown function name in lindelDecodeToArrayBind, expected either hilbert_decode() or morton_decode()");
 88 |         }
 89 | 
 90 |         auto &left_type = arguments[0]->return_type;
 91 | 
 92 |         auto get_foldable_value = [&](size_t index, LogicalType expected_type, const string &error_msg) -> Value
 93 |         {
 94 |             if (!arguments[index]->IsFoldable())
 95 |             {
 96 |                 throw NotImplementedException(error_msg);
 97 |             }
 98 |             Value val = ExpressionExecutor::EvaluateScalar(context, *arguments[index]).CastAs(context, expected_type);
 99 |             if (val.IsNull())
100 |             {
101 |                 throw NotImplementedException(error_msg + " expected a not-null value");
102 |             }
103 |             return val;
104 |         };
105 | 
106 |         auto return_number_of_parts = UTinyIntValue::Get(get_foldable_value(1, LogicalType::UTINYINT, "hilbert_decode(ANY, TINYINT, BOOLEAN, BOOLEAN)"));
107 |         auto return_float = BooleanValue::Get(get_foldable_value(2, LogicalType::BOOLEAN, "hilbert_decode(ANY, TINYINT, BOOLEAN, BOOLEAN)"));
108 |         auto return_unsigned = BooleanValue::Get(get_foldable_value(3, LogicalType::BOOLEAN, "hilbert_decode(ANY, TINYINT, BOOLEAN, BOOLEAN)"));
109 | 
110 |         if (return_number_of_parts == 0)
111 |         {
112 |             throw InvalidInputException("Number of parts to return must be greater than 0.");
113 |         }
114 | 
115 |         auto set_return_type = [&](LogicalType base_type, size_t parts, string_t allowed_types, const vector<LogicalType> &type_options)
116 |         {
117 |             if (find(type_options.begin(), type_options.end(), left_type.id()) == type_options.end())
118 |             {
119 |                 throw InvalidInputException("Expected one of the following types:" + allowed_types.GetString());
120 |             }
121 |             bound_function.return_type = LogicalType::ARRAY(base_type, parts);
122 |         };
123 | 
124 |         if (return_float)
125 |         {
126 |             switch (left_type.id())
127 |             {
128 |             case LogicalType::UINTEGER:
129 |                 set_return_type(LogicalType::FLOAT, 1, "UINTEGER", {LogicalType::UINTEGER});
130 |                 break;
131 |             case LogicalType::UBIGINT:
132 |                 if (return_number_of_parts == 1)
133 |                 {
134 |                     set_return_type(LogicalType::DOUBLE, 1, "UBIGINT", {LogicalType::UBIGINT});
135 |                 }
136 |                 else if (return_number_of_parts == 2)
137 |                 {
138 |                     set_return_type(LogicalType::FLOAT, 2, "UBIGINT", {LogicalType::UBIGINT});
139 |                 }
140 |                 else
141 |                 {
142 |                     throw InvalidInputException("Expected 1 or 2 parts for UBIGINT");
143 |                 }
144 |                 break;
145 |             case LogicalType::UHUGEINT:
146 |                 if (return_number_of_parts == 2)
147 |                 {
148 |                     set_return_type(LogicalType::DOUBLE, 2, "UHUGEINT", {LogicalType::UHUGEINT});
149 |                 }
150 |                 else if (return_number_of_parts >= 3 && return_number_of_parts <= 4)
151 |                 {
152 |                     set_return_type(LogicalType::FLOAT, return_number_of_parts, "UHUGEINT", {LogicalType::UHUGEINT});
153 |                 }
154 |                 else
155 |                 {
156 |                     throw InvalidInputException("Expected 2-4 parts for UHUGEINT");
157 |                 }
158 |                 break;
159 |             default:
160 |                 throw InvalidInputException("Expected UINTEGER, UBIGINT, or UHUGEINT");
161 |             }
162 |             return bind_data;
163 |         }
164 | 
165 |         if (return_number_of_parts == 1)
166 |         {
167 |             set_return_type(left_type.id(), 1, "UINTEGER, USMALLINT, UTINYINT, UBIGINT, UHUGEINT", {
168 |                                                                                                        (return_unsigned ? LogicalType::UINTEGER : LogicalType::INTEGER),
169 |                                                                                                        (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT),
170 |                                                                                                        (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT),
171 |                                                                                                        (return_unsigned ? LogicalType::UBIGINT : LogicalType::BIGINT),
172 |                                                                                                    });
173 |             return bind_data;
174 |         }
175 | 
176 |         auto set_integer_return_type = [&](LogicalType base_type, size_t parts, string_t allowed_types, string_t bounds, const map<size_t, LogicalType> &type_map)
177 |         {
178 |             if (type_map.find(return_number_of_parts) != type_map.end())
179 |             {
180 |                 set_return_type(type_map.at(return_number_of_parts), return_number_of_parts, allowed_types, {base_type});
181 |             }
182 |             else
183 |             {
184 |                 throw InvalidInputException("Expected " + bounds.GetString() + " parts for " + base_type.ToString());
185 |             }
186 |         };
187 | 
188 |         // The number of parts in the output array is determined by the number of parts requested and the datatype passed
189 |         // to decode.
190 | 
191 |         switch (left_type.id())
192 |         {
193 |         case LogicalType::UTINYINT:
194 |             throw InvalidInputException("Expected 1 parts for UTINYINT");
195 |         case LogicalType::USMALLINT:
196 |             set_integer_return_type(LogicalType::USMALLINT, return_number_of_parts, "UTINYINT", "2", {{2, return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT}});
197 |             break;
198 |         case LogicalType::UINTEGER:
199 |             set_integer_return_type(LogicalType::UINTEGER, return_number_of_parts, "UTINYINT, USMALLINT", "2-4", {{2, (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT)}, {3, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}});
200 |             break;
201 |         case LogicalType::UBIGINT:
202 |             set_integer_return_type(LogicalType::UBIGINT, return_number_of_parts, "UTINYINT, USMALLINT, UINTEGER", "2-8", {{2, (return_unsigned ? LogicalType::UINTEGER : LogicalType::INTEGER)}, {3, (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT)}, {4, (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT)}, {5, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {6, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {7, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {8, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}});
203 |             break;
204 |         case LogicalType::UHUGEINT:
205 |             set_integer_return_type(LogicalType::UHUGEINT, return_number_of_parts, "UTINYINT, USMALLINT, UINTEGER, UBIGINT", "2-16", {{2, (return_unsigned ? LogicalType::UBIGINT : LogicalType::BIGINT)}, {3, (return_unsigned ? LogicalType::UINTEGER : LogicalType::INTEGER)}, {4, (return_unsigned ? LogicalType::UINTEGER : LogicalType::INTEGER)}, {5, (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT)}, {6, (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT)}, {7, (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT)}, {8, (return_unsigned ? LogicalType::USMALLINT : LogicalType::SMALLINT)}, {9, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {10, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {11, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {12, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {13, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {14, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {15, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}, {16, (return_unsigned ? LogicalType::UTINYINT : LogicalType::TINYINT)}});
206 |             break;
207 |         default:
208 |             throw InvalidInputException("Expected UINTEGER, USMALLINT, UTINYINT, UBIGINT, or UHUGEINT");
209 |         }
210 | 
211 |         return bind_data;
212 |     }
213 | 
214 |     // This function performs the actual decoding of values as a DuckDB scalar function.
215 |     //
216 |     inline void lindelDecodeArrayFun(DataChunk &args, ExpressionState &state, Vector &result)
217 |     {
218 |         // This is the number of elements in the output array, not the number of rows being procssed.
219 |         auto output_number_of_elements = ArrayType::GetSize(result.GetType());
220 | 
221 |         // The type of the elements in the output array this will either be an integer type or a float type.
222 |         auto output_child_type = ArrayType::GetChildType(result.GetType());
223 | 
224 |         // Get a reference to the bind data that was already created that will determine the type
225 |         // of encoding to use.
226 |         auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
227 |         auto &bind_info = func_expr.bind_info->Cast<lindelEncodingBindData>();
228 | 
229 |         // Reference the source data.
230 |         auto left = args.data[0];
231 | 
232 |         // Standardize the vectors to a unified format, so it can be iterated.
233 |         UnifiedVectorFormat left_format;
234 |         left.ToUnifiedFormat(args.size(), left_format);
235 | 
236 |         // Since this function can take a variety of input types with different sizes, get different
237 |         // pointers to the different data types of the input.
238 |         auto left_data_8 = FlatVector::GetData<uint8_t>(left);
239 | 
240 |         // So the output type changes based on the number of inputs and the type of inputs.
241 | 
242 |         // Get the reference to the children of the result.
243 |         auto &result_data_children = ArrayVector::GetEntry(result);
244 | 
245 |         // Since this function can produce a variety of output types with different sizes follow
246 |         // the same pattern that was used for the input types.  All of these are just pointers.
247 |         auto result_data_u8 = FlatVector::GetData<uint8_t>(result_data_children);
248 | 
249 |         uint8_t output_element_bit_width;
250 | 
251 |         switch (output_child_type.id())
252 |         {
253 |         case LogicalTypeId::UTINYINT:
254 |         case LogicalTypeId::TINYINT:
255 |         {
256 |             output_element_bit_width = 8;
257 |         }
258 |         break;
259 |         case LogicalTypeId::USMALLINT:
260 |         case LogicalTypeId::SMALLINT:
261 |         {
262 |             output_element_bit_width = 16;
263 |         }
264 |         break;
265 |         case LogicalTypeId::UINTEGER:
266 |         case LogicalTypeId::INTEGER:
267 |         case LogicalTypeId::FLOAT:
268 |         {
269 |             output_element_bit_width = 32;
270 |         }
271 |         break;
272 |         case LogicalTypeId::UBIGINT:
273 |         case LogicalTypeId::BIGINT:
274 |         case LogicalTypeId::DOUBLE:
275 |         {
276 |             output_element_bit_width = 64;
277 |         }
278 |         break;
279 |         case LogicalTypeId::UHUGEINT:
280 |         case LogicalTypeId::HUGEINT:
281 |         {
282 |             output_element_bit_width = 128;
283 |         }
284 |         break;
285 |         default:
286 |             throw NotImplementedException("hilbert_decode()/morton_decode() only supports destination types of UTINYINT, USMALLINT, UINTEGER, UBIGINT, UHUGEINT types");
287 |         }
288 |         size_t input_pointer_increment;
289 |         switch (left.GetType().id())
290 |         {
291 |         case LogicalTypeId::UTINYINT:
292 |         case LogicalTypeId::TINYINT:
293 |         {
294 |             input_pointer_increment = 1;
295 |         }
296 |         break;
297 |         case LogicalTypeId::USMALLINT:
298 |         case LogicalTypeId::SMALLINT:
299 |         {
300 |             input_pointer_increment = 2;
301 |         }
302 |         break;
303 |         case LogicalTypeId::UINTEGER:
304 |         case LogicalTypeId::INTEGER:
305 |         {
306 |             input_pointer_increment = 4;
307 |         }
308 |         break;
309 |         case LogicalTypeId::UBIGINT:
310 |         case LogicalTypeId::BIGINT:
311 |         {
312 |             input_pointer_increment = 8;
313 |         }
314 |         break;
315 |         case LogicalTypeId::UHUGEINT:
316 |         case LogicalTypeId::HUGEINT:
317 |         {
318 |             input_pointer_increment = 16;
319 |         }
320 |         break;
321 |         default:
322 |             throw NotImplementedException("hilbert_decode()/morton_decode() only supports incoming sources of UTINYINT, USMALLINT, UINTEGER, UBIGINT, UHUGEINT types");
323 |         }
324 | 
325 |         const size_t output_pointer_increment = output_element_bit_width / 8;
326 | 
327 |         for (idx_t i = 0; i < args.size(); i++)
328 |         {
329 |             auto left_idx = left_format.sel->get_index(i);
330 | 
331 |             // If the input value is NULL then the output value should be NULL.
332 |             if (!left_format.validity.RowIsValid(left_idx))
333 |             {
334 |                 FlatVector::SetNull(result, i, true);
335 |                 continue;
336 |             }
337 | 
338 |             // Get the offset of where the result for this row should begin, since
339 |             // there is always a fixed number of result elements, its pretty simple.
340 |             auto result_offset = i * output_number_of_elements;
341 | 
342 |             // Depending on the output type call the appropriate decode function with the appropriate
343 |             // result location.
344 | 
345 |             void *output_location = result_data_u8 + result_offset * output_pointer_increment;
346 |             void *source_location = left_data_8 + (left_idx * input_pointer_increment);
347 | 
348 |             perform_decode(bind_info.encoding_type, output_element_bit_width, source_location, output_location, output_number_of_elements);
349 |         }
350 | 
351 |         if (args.size() == 1)
352 |         {
353 |             result.SetVectorType(VectorType::CONSTANT_VECTOR);
354 |         }
355 |     }
356 | 
357 |     // This is the "bind" function that is called for encoding an array of values.
358 |     //
359 |     // It doesn't have to do anything with the return type right now but it may in the future.
360 |     static unique_ptr<FunctionData> lindelEncodeArrayBind(ClientContext &context, ScalarFunction &bound_function,
361 |                                                           vector<unique_ptr<Expression>> &arguments)
362 |     {
363 |         unique_ptr<lindelEncodingBindData> bind_data = make_uniq<lindelEncodingBindData>(0);
364 |         if (bound_function.name == "hilbert_encode")
365 |         {
366 |             bind_data->encoding_type = 0;
367 |         }
368 |         else if (bound_function.name == "morton_encode")
369 |         {
370 |             bind_data->encoding_type = 1;
371 |         }
372 |         else
373 |         {
374 |             throw NotImplementedException("Unknown function name in lindelEncodeBind");
375 |         }
376 | 
377 |         // Now deal with validating the input type
378 |         auto &left_type = arguments[0]->return_type;
379 | 
380 |         // This is the number of elements in the output array, not the number of rows being procssed.
381 |         auto input_number_of_elements = ArrayType::GetSize(left_type);
382 | 
383 |         // The type of the elements in the output array this will either be an integer type or a float type.
384 |         auto input_child_type = ArrayType::GetChildType(left_type);
385 | 
386 |         switch (input_child_type.id())
387 |         {
388 |         case LogicalTypeId::DOUBLE:
389 |         {
390 |             switch (input_number_of_elements)
391 |             {
392 |             case 1:
393 |                 bound_function.return_type = LogicalType::UBIGINT;
394 |                 break;
395 |             case 2:
396 |                 bound_function.return_type = LogicalType::UHUGEINT;
397 |                 break;
398 |             default:
399 |                 throw InvalidInputException("hilbert_encode()/morton_encode() only supports arrays of lengths of 1 or 2 for DOUBLE.");
400 |             }
401 |         }
402 |         break;
403 |         case LogicalTypeId::FLOAT:
404 |         {
405 |             switch (input_number_of_elements)
406 |             {
407 |             case 1:
408 |                 bound_function.return_type = LogicalType::UINTEGER;
409 |                 break;
410 |             case 2:
411 |                 bound_function.return_type = LogicalType::UBIGINT;
412 |                 break;
413 |             case 3:
414 |             case 4:
415 |                 bound_function.return_type = LogicalType::UHUGEINT;
416 |                 break;
417 |             default:
418 |                 throw InvalidInputException("hilbert_encode()/morton_encode() only supports arrays of lengths 1-4 for FLOAT.");
419 |             }
420 |         }
421 |         break;
422 |         case LogicalTypeId::UBIGINT:
423 |         case LogicalTypeId::BIGINT:
424 |         {
425 |             switch (input_number_of_elements)
426 |             {
427 |             case 1:
428 |                 bound_function.return_type = LogicalType::UBIGINT;
429 |                 break;
430 |             case 2:
431 |                 bound_function.return_type = LogicalType::UHUGEINT;
432 |                 break;
433 |             default:
434 |                 throw InvalidInputException("hilbert_encode()/morton_encode() only supports arrays of lengths of 1 or 2 for BIGINT/UBIGINT.");
435 |             }
436 |         }
437 |         break;
438 |         case LogicalTypeId::UINTEGER:
439 |         case LogicalTypeId::INTEGER:
440 |         {
441 |             switch (input_number_of_elements)
442 |             {
443 |             case 1:
444 |                 bound_function.return_type = LogicalType::UINTEGER;
445 |                 break;
446 |             case 2:
447 |                 bound_function.return_type = LogicalType::UBIGINT;
448 |                 break;
449 |             case 3:
450 |             case 4:
451 |                 bound_function.return_type = LogicalType::UHUGEINT;
452 |                 break;
453 |             default:
454 |                 throw InvalidInputException("hilbert_encode()/morton_encode() only supports arrays of lengths 1-4 for UINTEGER/INTEGER.");
455 |             }
456 |         }
457 |         break;
458 |         case LogicalTypeId::USMALLINT:
459 |         case LogicalTypeId::SMALLINT:
460 |         {
461 |             switch (input_number_of_elements)
462 |             {
463 |             case 1: // 16
464 |                 bound_function.return_type = LogicalType::USMALLINT;
465 |                 break;
466 |             case 2: // 32
467 |                 bound_function.return_type = LogicalType::UINTEGER;
468 |                 break;
469 |             case 3:
470 |             case 4:
471 |                 bound_function.return_type = LogicalType::UBIGINT;
472 |                 break;
473 |             case 5:
474 |             case 6:
475 |             case 7:
476 |             case 8:
477 |                 bound_function.return_type = LogicalType::UHUGEINT;
478 |                 break;
479 |             default:
480 |                 throw InvalidInputException("hilbert_encode()/morton_encode() only supports arrays of lengths 1-8 for USMALLINT/SMALLINT.");
481 |             }
482 |         }
483 |         break;
484 |         case LogicalTypeId::UTINYINT:
485 |         case LogicalTypeId::TINYINT:
486 |         {
487 |             switch (input_number_of_elements)
488 |             {
489 |             case 1:
490 |                 bound_function.return_type = LogicalType::UTINYINT;
491 |                 break;
492 |             case 2:
493 |                 bound_function.return_type = LogicalType::USMALLINT;
494 |                 break;
495 |             case 3:
496 |             case 4:
497 |                 bound_function.return_type = LogicalType::UINTEGER;
498 |                 break;
499 |             case 5:
500 |             case 6:
501 |             case 7:
502 |             case 8:
503 |                 bound_function.return_type = LogicalType::UBIGINT;
504 |                 break;
505 |             case 9:
506 |             case 10:
507 |             case 11:
508 |             case 12:
509 |             case 13:
510 |             case 14:
511 |             case 15:
512 |             case 16:
513 |                 bound_function.return_type = LogicalType::UHUGEINT;
514 |                 break;
515 |             default:
516 |                 throw InvalidInputException("hilbert_encode()/morton_encode() only supports arrays of lengths 1-16 for UTINYINT/TINYINT.");
517 |             }
518 |         }
519 |         break;
520 |         default:
521 |             throw InvalidInputException("hilbert_encode()/morton_encode() only supports arrays of types DOUBLE, FLOAT, UBIGINT, BIGINT, UINTEGER, INTEGER, USMALLINT, SMALLINT, UTINYINT, TINYINT");
522 |         }
523 | 
524 |         return bind_data;
525 |     }
526 | 
527 |     // Perform encoding for an array of values.
528 |     inline void lindelEncodeArrayFunc(DataChunk &args, ExpressionState &state, Vector &result)
529 |     {
530 |         // Get a reference to the bind data.
531 |         auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
532 |         auto &bind_info = func_expr.bind_info->Cast<lindelEncodingBindData>();
533 | 
534 |         // This is the size of the array
535 |         auto array_number_of_elements = ArrayType::GetSize(args.data[0].GetType());
536 |         auto child_type = ArrayType::GetChildType(args.data[0].GetType());
537 | 
538 |         // Get a pointer to the input data.
539 |         auto left = args.data[0];
540 |         auto &left_child = ArrayVector::GetEntry(left);
541 |         auto &left_child_validity = FlatVector::Validity(left_child);
542 |         UnifiedVectorFormat left_format;
543 | 
544 |         left.ToUnifiedFormat(args.size(), left_format);
545 | 
546 |         // Need the different input types since we're doing pointer math below.
547 |         auto left_data_8 = FlatVector::GetData<int8_t>(left_child);
548 |         auto left_data_16 = FlatVector::GetData<int16_t>(left_child);
549 |         auto left_data_32 = FlatVector::GetData<int32_t>(left_child);
550 |         auto left_data_64 = FlatVector::GetData<int64_t>(left_child);
551 |         auto left_data_float = FlatVector::GetData<float_t>(left_child);
552 |         auto left_data_double = FlatVector::GetData<double_t>(left_child);
553 | 
554 |         // So the output type changes based on the number of inputs and the type of inputs.
555 |         auto result_data_u8 = FlatVector::GetData<uint8_t>(result);
556 |         auto result_data_u16 = FlatVector::GetData<uint16_t>(result);
557 |         auto result_data_u32 = FlatVector::GetData<uint32_t>(result);
558 |         auto result_data_u64 = FlatVector::GetData<uint64_t>(result);
559 |         auto result_data_u128 = FlatVector::GetData<uhugeint_t>(result);
560 | 
561 |         for (idx_t i = 0; i < args.size(); i++)
562 |         {
563 |             auto left_idx = left_format.sel->get_index(i);
564 |             if (!left_format.validity.RowIsValid(left_idx))
565 |             {
566 |                 FlatVector::SetNull(result, i, true);
567 |                 continue;
568 |             }
569 | 
570 |             auto left_offset = left_idx * array_number_of_elements;
571 |             if (!left_child_validity.CheckAllValid(left_offset + array_number_of_elements, left_offset))
572 |             {
573 |                 throw InvalidInputException(StringUtil::Format("%s: array can not contain NULL values", "hilbert_encode"));
574 |             }
575 | 
576 |             switch (child_type.id())
577 |             {
578 |             case LogicalTypeId::DOUBLE:
579 |             {
580 |                 auto encoder = bind_info.encoding_type == 0 ? hilbert_encode_u64_var : morton_encode_u64_var;
581 |                 switch (array_number_of_elements)
582 |                 {
583 |                 case 1:
584 |                 {
585 |                     encoder((uint64_t *)(left_data_double + left_offset), array_number_of_elements, result_data_u64 + i);
586 |                     break;
587 |                 }
588 |                 case 2:
589 |                 {
590 |                     encoder((uint64_t *)(left_data_double + left_offset), array_number_of_elements, result_data_u128 + i);
591 |                     break;
592 |                 }
593 |                 default:
594 |                     throw NotImplementedException("hilbert_encode()/morton_encode() only supports arrays of lengths of 1 or 2 for DOUBLE.");
595 |                 }
596 |             }
597 |             break;
598 |             case LogicalTypeId::FLOAT:
599 |             {
600 |                 // The number of elements in the array dictates the output type.
601 |                 auto encoder = bind_info.encoding_type == 0 ? hilbert_encode_u32_var : morton_encode_u32_var;
602 |                 switch (array_number_of_elements)
603 |                 {
604 |                 case 1:
605 |                 {
606 |                     encoder((uint32_t *)(left_data_float + left_offset), array_number_of_elements, result_data_u32 + i);
607 |                     break;
608 |                 }
609 |                 case 2:
610 |                 case 3:
611 |                 {
612 |                     encoder((uint32_t *)(left_data_float + left_offset), array_number_of_elements, result_data_u64 + i);
613 |                     break;
614 |                 }
615 |                 case 4:
616 |                 {
617 |                     hilbert_encode_u32_var((uint32_t *)(left_data_float + left_offset), array_number_of_elements, result_data_u128 + i);
618 |                     break;
619 |                 }
620 |                 default:
621 |                     throw NotImplementedException("hilbert_encode()/morton_encode() only supports arrays of lengths 1-4 for FLOAT.");
622 |                 }
623 |             }
624 |             break;
625 |             case LogicalTypeId::UBIGINT:
626 |             case LogicalTypeId::BIGINT:
627 |             {
628 |                 auto encoder = bind_info.encoding_type == 0 ? hilbert_encode_u64_var : morton_encode_u64_var;
629 |                 switch (array_number_of_elements)
630 |                 {
631 |                 case 1:
632 |                 {
633 |                     encoder((uint64_t *)(left_data_64 + left_offset), array_number_of_elements, result_data_u64 + i);
634 |                     break;
635 |                 }
636 |                 case 2:
637 |                 {
638 |                     encoder((uint64_t *)(left_data_64 + left_offset), array_number_of_elements, result_data_u128 + i);
639 |                     break;
640 |                 }
641 |                 default:
642 |                     throw NotImplementedException("hilbert_encode()/morton_encode() only supports arrays of lengths of 1 or 2 for BIGINT/UBIGINT.");
643 |                 }
644 |             }
645 |             break;
646 | 
647 |             case LogicalTypeId::UINTEGER:
648 |             case LogicalTypeId::INTEGER:
649 |             {
650 |                 auto encoder = bind_info.encoding_type == 0 ? hilbert_encode_u32_var : morton_encode_u32_var;
651 |                 // The number of elements in the array dictates the output type.
652 |                 switch (array_number_of_elements)
653 |                 {
654 |                 case 1:
655 |                 {
656 |                     encoder((uint32_t *)(left_data_32 + left_offset), array_number_of_elements, result_data_u32 + i);
657 |                     break;
658 |                 }
659 |                 case 2:
660 |                 case 3:
661 |                 {
662 |                     encoder((uint32_t *)(left_data_32 + left_offset), array_number_of_elements, result_data_u64 + i);
663 |                     break;
664 |                 }
665 |                 case 4:
666 |                 {
667 |                     encoder((uint32_t *)(left_data_32 + left_offset), array_number_of_elements, result_data_u128 + i);
668 |                     break;
669 |                 }
670 |                 default:
671 |                     throw NotImplementedException("hilbert_encode()/morton_encode() only supports arrays of lengths 1-4 for UINTEGER/INTEGER.");
672 |                 }
673 |             }
674 |             break;
675 |             case LogicalTypeId::SMALLINT:
676 |             case LogicalTypeId::USMALLINT:
677 |             {
678 |                 // The number of elements in the array dictates the output type.
679 |                 auto encoder = bind_info.encoding_type == 0 ? hilbert_encode_u16_var : morton_encode_u16_var;
680 |                 switch (array_number_of_elements)
681 |                 {
682 |                 case 1:
683 |                 {
684 |                     encoder((uint16_t *)(left_data_16 + left_offset), array_number_of_elements, result_data_u16 + i);
685 |                     break;
686 |                 }
687 |                 case 2:
688 |                 {
689 |                     encoder((uint16_t *)(left_data_16 + left_offset), array_number_of_elements, result_data_u32 + i);
690 |                     break;
691 |                 }
692 |                 case 3:
693 |                 case 4:
694 |                 {
695 |                     encoder((uint16_t *)(left_data_16 + left_offset), array_number_of_elements, result_data_u64 + i);
696 |                     break;
697 |                 }
698 |                 case 5:
699 |                 case 6:
700 |                 case 7:
701 |                 case 8:
702 |                 {
703 |                     encoder((uint16_t *)(left_data_16 + left_offset), array_number_of_elements, result_data_u128 + i);
704 |                     break;
705 |                 }
706 |                 default:
707 |                     throw NotImplementedException("hilbert_encode()/morton_encode() only supports arrays of length 1-8 for SMALLINT/USMALLINT.");
708 |                 }
709 |             }
710 |             break;
711 |             case LogicalTypeId::TINYINT:
712 |             case LogicalTypeId::UTINYINT:
713 |             {
714 |                 // The number of elements in the array dictates the output type.
715 |                 auto encoder = bind_info.encoding_type == 0 ? hilbert_encode_u8_var : morton_encode_u8_var;
716 |                 switch (array_number_of_elements)
717 |                 {
718 |                 case 1:
719 |                 {
720 |                     encoder((uint8_t *)(left_data_8 + left_offset), array_number_of_elements, result_data_u8 + i);
721 |                     break;
722 |                 }
723 |                 case 2:
724 |                 {
725 |                     encoder((uint8_t *)(left_data_8 + left_offset), array_number_of_elements, result_data_u16 + i);
726 |                     break;
727 |                 }
728 |                 case 3:
729 |                 case 4:
730 |                 {
731 |                     encoder((uint8_t *)(left_data_8 + left_offset), array_number_of_elements, result_data_u32 + i);
732 |                     break;
733 |                 }
734 |                 case 5:
735 |                 case 6:
736 |                 case 7:
737 |                 case 8:
738 |                 {
739 |                     encoder((uint8_t *)(left_data_8 + left_offset), array_number_of_elements, result_data_u64 + i);
740 |                     break;
741 |                 }
742 |                 case 9:
743 |                 case 10:
744 |                 case 11:
745 |                 case 12:
746 |                 case 13:
747 |                 case 14:
748 |                 case 15:
749 |                 case 16:
750 |                 {
751 |                     encoder((uint8_t *)(left_data_8 + left_offset), array_number_of_elements, result_data_u128 + i);
752 |                     break;
753 |                 }
754 |                 default:
755 |                     throw NotImplementedException("hilbert_encode()/morton_encode() only supports arrays of length 1-16 for UTINYINT/TINYINT.");
756 |                 }
757 |             }
758 |             break;
759 |             default:
760 |                 throw NotImplementedException("hilbert_encode()/morton_encode() only supports arrays of FLOAT, DOUBLE, BIGINT, UBIGINT, INTEGER, UINTEGER, SMALLINT, USMALLINT, TINYINT, UTINYINT types");
761 |             }
762 |         }
763 | 
764 |         if (args.size() == 1)
765 |         {
766 |             result.SetVectorType(VectorType::CONSTANT_VECTOR);
767 |         }
768 |     }
769 | 
770 |     // Extension initalization.
771 |     static void LoadInternal(DatabaseInstance &instance)
772 |     {
773 |         ScalarFunctionSet hilbert_encode("hilbert_encode");
774 |         ScalarFunctionSet morton_encode("morton_encode");
775 | 
776 |         using SF = ScalarFunction; // Alias for ScalarFunction
777 | 
778 |         hilbert_encode.AddFunction(SF({LogicalType::ARRAY(LogicalType::ANY, optional_idx::Invalid())}, LogicalType::ANY, lindelEncodeArrayFunc, lindelEncodeArrayBind));
779 |         morton_encode.AddFunction(SF({LogicalType::ARRAY(LogicalType::ANY, optional_idx::Invalid())}, LogicalType::ANY, lindelEncodeArrayFunc, lindelEncodeArrayBind));
780 | 
781 |         ExtensionUtil::RegisterFunction(instance, hilbert_encode);
782 |         ExtensionUtil::RegisterFunction(instance, morton_encode);
783 | 
784 |         ScalarFunctionSet hilbert_decode = ScalarFunctionSet("hilbert_decode");
785 |         ScalarFunctionSet morton_decode = ScalarFunctionSet("morton_decode");
786 | 
787 |         std::vector<LogicalType> types_that_can_be_decoded = {
788 |             LogicalType::UTINYINT,
789 |             LogicalType::USMALLINT,
790 |             LogicalType::UINTEGER,
791 |             LogicalType::UBIGINT,
792 |             LogicalType::UHUGEINT};
793 | 
794 |         for (const auto &decodable_type : types_that_can_be_decoded)
795 |         {
796 |             hilbert_decode.AddFunction(
797 |                 ScalarFunction({decodable_type, LogicalType::UTINYINT, LogicalType::BOOLEAN, LogicalType::BOOLEAN}, LogicalType::ARRAY(LogicalType::ANY, optional_idx::Invalid()),
798 |                                lindelDecodeArrayFun,
799 |                                lindelDecodeToArrayBind));
800 | 
801 |             morton_decode.AddFunction(
802 |                 ScalarFunction({decodable_type, LogicalType::UTINYINT, LogicalType::BOOLEAN, LogicalType::BOOLEAN}, LogicalType::ARRAY(LogicalType::ANY, optional_idx::Invalid()),
803 |                                lindelDecodeArrayFun,
804 |                                lindelDecodeToArrayBind));
805 |         }
806 | 
807 |         ExtensionUtil::RegisterFunction(instance, hilbert_decode);
808 |         ExtensionUtil::RegisterFunction(instance, morton_decode);
809 |     }
810 | 
811 |     void LindelExtension::Load(DuckDB &db)
812 |     {
813 |         LoadInternal(*db.instance);
814 |     }
815 |     std::string LindelExtension::Name()
816 |     {
817 |         return "lindel";
818 |     }
819 | 
820 |     std::string LindelExtension::Version() const
821 |     {
822 | #ifdef EXT_VERSION_QUACK
823 |         return EXT_VERSION_QUACK;
824 | #else
825 |         return "";
826 | #endif
827 |     }
828 | 
829 | } // namespace duckdb
830 | 
831 | extern "C"
832 | {
833 | 
834 |     DUCKDB_EXTENSION_API void lindel_init(duckdb::DatabaseInstance &db)
835 |     {
836 |         duckdb::DuckDB db_wrapper(db);
837 |         db_wrapper.LoadExtension<duckdb::LindelExtension>();
838 |     }
839 | 
840 |     DUCKDB_EXTENSION_API const char *lindel_version()
841 |     {
842 |         return "1.0.1";
843 |     }
844 | }
845 | 
846 | #ifndef DUCKDB_EXTENSION_MAIN
847 | #error DUCKDB_EXTENSION_MAIN not defined
848 | #endif
849 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | # Testing this extension
 2 | This directory contains all the tests for this extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most of its tests in this format as SQL statements, so for the lindel extension, this should probably be the goal too.
 3 | 
 4 | The root makefile contains targets to build and run all of these tests. To run the SQLLogicTests:
 5 | ```bash
 6 | make test
 7 | ```
 8 | or
 9 | ```bash
10 | make test_debug
11 | ```


--------------------------------------------------------------------------------
/test/sql/lindel.test:
--------------------------------------------------------------------------------
  1 | # name: test/sql/lindel.test
  2 | # description: test lindel extension
  3 | # group: [lindel]
  4 | 
  5 | # Before we load the extension, this will fail
  6 | statement error
  7 | SELECT hilbert_encode([1, 2, 3]::tinyint[3]);
  8 | ----
  9 | Catalog Error: Scalar Function with name hilbert_encode does not exist!
 10 | 
 11 | # Require statement will ensure this test is run with this extension loaded
 12 | require lindel
 13 | 
 14 | # Confirm the extension works
 15 | query I
 16 | SELECT hilbert_encode([1, 2, 3]::tinyint[3]);
 17 | ----
 18 | 22
 19 | 
 20 | query I
 21 | SELECT morton_encode([1, 2, 3]::tinyint[3]);
 22 | ----
 23 | 29
 24 | 
 25 | query I
 26 | select hilbert_decode(22::uinteger, 3, false, false)
 27 | ----
 28 | [1, 2, 3]
 29 | 
 30 | query IIIIII
 31 | with elements as (
 32 |   select * as id from range(5)
 33 | )
 34 | select
 35 |   a.id as a,
 36 |   b.id as b,
 37 |   hilbert_encode([a.id, b.id]::tinyint[2]) as hilbert,
 38 |   morton_encode([a.id, b.id]::tinyint[2]) as morton,
 39 |   hilbert_decode(hilbert_encode([a.id, b.id]::tinyint[2]), 2, false, false) as hilbert_decoded,
 40 |   morton_decode(morton_encode([a.id, b.id]::tinyint[2]), 2, false, false) as morton_decoded,
 41 |   from
 42 | elements as a cross join elements as b order by a, b;
 43 | ----
 44 | 0	0	0	0	[0, 0]	[0, 0]
 45 | 0	1	3	1	[0, 1]	[0, 1]
 46 | 0	2	4	4	[0, 2]	[0, 2]
 47 | 0	3	5	5	[0, 3]	[0, 3]
 48 | 0	4	58	16	[0, 4]	[0, 4]
 49 | 1	0	1	2	[1, 0]	[1, 0]
 50 | 1	1	2	3	[1, 1]	[1, 1]
 51 | 1	2	7	6	[1, 2]	[1, 2]
 52 | 1	3	6	7	[1, 3]	[1, 3]
 53 | 1	4	57	18	[1, 4]	[1, 4]
 54 | 2	0	14	8	[2, 0]	[2, 0]
 55 | 2	1	13	9	[2, 1]	[2, 1]
 56 | 2	2	8	12	[2, 2]	[2, 2]
 57 | 2	3	9	13	[2, 3]	[2, 3]
 58 | 2	4	54	24	[2, 4]	[2, 4]
 59 | 3	0	15	10	[3, 0]	[3, 0]
 60 | 3	1	12	11	[3, 1]	[3, 1]
 61 | 3	2	11	14	[3, 2]	[3, 2]
 62 | 3	3	10	15	[3, 3]	[3, 3]
 63 | 3	4	53	26	[3, 4]	[3, 4]
 64 | 4	0	16	32	[4, 0]	[4, 0]
 65 | 4	1	17	33	[4, 1]	[4, 1]
 66 | 4	2	30	36	[4, 2]	[4, 2]
 67 | 4	3	31	37	[4, 3]	[4, 3]
 68 | 4	4	32	48	[4, 4]	[4, 4]
 69 | 
 70 | 
 71 | # Try to encode as integers.
 72 | 
 73 | query IIIIII
 74 | with elements as (
 75 |   select * as id from range(5)
 76 | )
 77 | select
 78 |   a.id as a,
 79 |   b.id as b,
 80 |   hilbert_encode([a.id, b.id]::int[2]) as hilbert,
 81 |   morton_encode([a.id, b.id]::int[2]) as morton,
 82 |   hilbert_decode(hilbert_encode([a.id, b.id]::int[2]), 2, false, false) as hilbert_decoded,
 83 |   morton_decode(morton_encode([a.id, b.id]::int[2]), 2, false, false) as morton_decoded,
 84 |   from
 85 | elements as a cross join elements as b order by a, b;
 86 | ----
 87 | 0	0	0	0	[0, 0]	[0, 0]
 88 | 0	1	3	1	[0, 1]	[0, 1]
 89 | 0	2	4	4	[0, 2]	[0, 2]
 90 | 0	3	5	5	[0, 3]	[0, 3]
 91 | 0	4	58	16	[0, 4]	[0, 4]
 92 | 1	0	1	2	[1, 0]	[1, 0]
 93 | 1	1	2	3	[1, 1]	[1, 1]
 94 | 1	2	7	6	[1, 2]	[1, 2]
 95 | 1	3	6	7	[1, 3]	[1, 3]
 96 | 1	4	57	18	[1, 4]	[1, 4]
 97 | 2	0	14	8	[2, 0]	[2, 0]
 98 | 2	1	13	9	[2, 1]	[2, 1]
 99 | 2	2	8	12	[2, 2]	[2, 2]
100 | 2	3	9	13	[2, 3]	[2, 3]
101 | 2	4	54	24	[2, 4]	[2, 4]
102 | 3	0	15	10	[3, 0]	[3, 0]
103 | 3	1	12	11	[3, 1]	[3, 1]
104 | 3	2	11	14	[3, 2]	[3, 2]
105 | 3	3	10	15	[3, 3]	[3, 3]
106 | 3	4	53	26	[3, 4]	[3, 4]
107 | 4	0	16	32	[4, 0]	[4, 0]
108 | 4	1	17	33	[4, 1]	[4, 1]
109 | 4	2	30	36	[4, 2]	[4, 2]
110 | 4	3	31	37	[4, 3]	[4, 3]
111 | 4	4	32	48	[4, 4]	[4, 4]


--------------------------------------------------------------------------------
/vcpkg.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": []
3 | }


--------------------------------------------------------------------------------