├── .editorconfig
├── .github
    └── workflows
    │   ├── MainDistributionPipeline.yml
    │   └── schedule-1.2.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── Makefile
├── README.md
├── docs
    └── duckdb-fuzzycompletion.jpeg
├── duckdb_fuzzycomplete_rust
    ├── Cargo.lock
    ├── Cargo.toml
    ├── cbindgen.toml
    └── src
    │   └── lib.rs
├── extension_config.cmake
├── scripts
    ├── bootstrap-template.py
    └── extension-upload.sh
├── src
    ├── fuzzycomplete_extension.cpp
    └── include
    │   ├── fuzzycomplete_extension.hpp
    │   └── rust.h
├── test
    ├── README.md
    └── sql
    │   └── fuzzycomplete.test
└── vcpkg.json


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # Unix-style newlines with a newline ending every file
 2 | [*.{c,cpp,h,hpp}]
 3 | end_of_line = lf
 4 | insert_final_newline = true
 5 | indent_style = tab
 6 | tab_width = 4
 7 | indent_size = tab
 8 | trim_trailing_whitespace = true
 9 | charset = utf-8
10 | max_line_length = 120
11 | x-soft-wrap-text = true
12 | x-soft-wrap-mode = CharacterWidth
13 | x-soft-wrap-limit = 120
14 | x-show-invisibles = false
15 | x-show-spaces = false
16 | 
17 | [*.{java}]
18 | end_of_line = lf
19 | insert_final_newline = true
20 | indent_style = tab
21 | tab_width = 4
22 | indent_size = tab
23 | trim_trailing_whitespace = false
24 | charset = utf-8
25 | max_line_length = 120
26 | x-soft-wrap-text = true
27 | x-soft-wrap-mode = CharacterWidth
28 | x-soft-wrap-limit = 120
29 | x-show-invisibles = false
30 | x-show-spaces = false
31 | 
32 | [*.{test,test_slow,test_coverage,benchmark}]
33 | end_of_line = lf
34 | insert_final_newline = true
35 | indent_style = tab
36 | tab_width = 4
37 | indent_size = tab
38 | trim_trailing_whitespace = false
39 | charset = utf-8
40 | x-soft-wrap-text = false
41 | 
42 | [Makefile]
43 | end_of_line = lf
44 | insert_final_newline = true
45 | indent_style = tab
46 | tab_width = 4
47 | indent_size = tab
48 | trim_trailing_whitespace = true
49 | charset = utf-8
50 | x-soft-wrap-text = false
51 | 
52 | [*keywords.list]
53 | insert_final_newline = false
54 | 


--------------------------------------------------------------------------------
/.github/workflows/MainDistributionPipeline.yml:
--------------------------------------------------------------------------------
 1 | #
 2 | # This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension
 3 | #
 4 | name: Main Extension Distribution Pipeline
 5 | on:
 6 |   push:
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 |   schedule:
10 |     - cron: '0 2 * * *'  # Runs every night at 02:00 UTC
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
14 |   cancel-in-progress: true
15 | 
16 | jobs:
17 |   duckdb-stable-build:
18 |     name: Build extension binaries
19 |     uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
20 |     with:
21 |       duckdb_version: main
22 |       ci_tools_version: main
23 |       extension_name: fuzzycomplete
24 |       enable_rust: true
25 |       exclude_archs: "linux_amd64_musl"
26 | 


--------------------------------------------------------------------------------
/.github/workflows/schedule-1.2.yml:
--------------------------------------------------------------------------------
 1 | name: Scheduled Trigger for 1.2
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 12 * * *'  # Runs at 12:00 UTC every day
 6 |   workflow_dispatch:  # Allows manual trigger
 7 | 
 8 | jobs:
 9 |   trigger:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       actions: write  # Allow triggering workflows
13 |     steps:
14 |       - name: Checkout repository  # Required for gh to work
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Install GitHub CLI
18 |         run: |
19 |           sudo apt update && sudo apt install gh -y
20 | 
21 |       - name: Authenticate GH CLI
22 |         run: |
23 |           echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
24 | 
25 |       - name: Trigger Workflow on my-branch
26 |         run: |
27 |           gh workflow run MainDistributionPipeline.yml --ref v1.2
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | .idea
3 | cmake-build-debug
4 | duckdb_unittest_tempdir/
5 | .DS_Store
6 | testext
7 | test/python/__pycache__/
8 | .Rhistory
9 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "duckdb"]
2 | 	path = duckdb
3 | 	url = https://github.com/duckdb/duckdb
4 | 	branch = main
5 | [submodule "extension-ci-tools"]
6 | 	path = extension-ci-tools
7 | 	url = https://github.com/duckdb/extension-ci-tools
8 | 	branch = main


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.5)
  2 | 
  3 | set(CORROSION_VERBOSE_OUTPUT ON)
  4 | set(CMAKE_CXX_STANDARD 11)
  5 | set(CMAKE_CXX_STANDARD_REQUIRED 1)
  6 | 
  7 | 
  8 | set(prefix_to_check "wasm")
  9 | # Get the length of the prefix
 10 | string(LENGTH "${prefix_to_check}" prefix_length)
 11 | # Extract the prefix from the example_string
 12 | string(SUBSTRING "${DUCKDB_PLATFORM}" 0 ${prefix_length} extracted_platform_prefix)
 13 | 
 14 | # Propagate arch to rust build for CI
 15 | set(Rust_CARGO_TARGET "")
 16 | if("${OS_NAME}" STREQUAL "linux")
 17 |     if ("${OS_ARCH}" STREQUAL "arm64")
 18 |         set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
 19 |     elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64")
 20 |         set(Rust_CARGO_TARGET ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc)
 21 |         set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
 22 |     else()
 23 |         set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
 24 |     endif()
 25 | elseif("${OS_NAME}" STREQUAL "osx")
 26 |     if ("${OSX_BUILD_ARCH}" STREQUAL "arm64")
 27 |         set(Rust_CARGO_TARGET "aarch64-apple-darwin")
 28 |     elseif ("${OSX_BUILD_ARCH}" STREQUAL "x86_64")
 29 |         set(Rust_CARGO_TARGET "x86_64-apple-darwin")
 30 |     elseif ("${OS_ARCH}" STREQUAL "arm64")
 31 |         set(Rust_CARGO_TARGET "aarch64-apple-darwin")
 32 |     endif()
 33 | elseif(WIN32)
 34 |    if (MINGW AND "${OS_ARCH}" STREQUAL "arm64")
 35 |        set(Rust_CARGO_TARGET "aarch64-pc-windows-gnu")
 36 |    elseif (MINGW AND "${OS_ARCH}" STREQUAL "amd64")
 37 |        set(Rust_CARGO_TARGET "x86_64-pc-windows-gnu")
 38 |    elseif (MSVC AND "${OS_ARCH}" STREQUAL "arm64")
 39 |        set(Rust_CARGO_TARGET "aarch64-pc-windows-msvc")
 40 |    elseif (MSVC AND "${OS_ARCH}" STREQUAL "amd64")
 41 |        set(Rust_CARGO_TARGET "x86_64-pc-windows-msvc")
 42 |    endif()
 43 | endif()
 44 | 
 45 | execute_process(
 46 |     COMMAND rustup target list --installed
 47 |     OUTPUT_VARIABLE RUST_TARGETS
 48 | )
 49 | string(FIND "${RUST_TARGETS}" "wasm32-unknown-emscripten" WASM_TARGET_FOUND)
 50 | 
 51 | if (NOT WASM_TARGET_FOUND EQUAL -1)
 52 |   set(Rust_CARGO_TARGET "wasm32-unknown-emscripten")
 53 | endif()
 54 | 
 55 | message(STATUS "RUST_TARGETS: ${RUST_TARGETS}")
 56 | message(STATUS "WASM_TARGET_FOUND: ${WASM_TARGET_FOUND}")
 57 | message(STATUS "TARGET: ${TARGET}")
 58 | message(STATUS "DUCKDB_BUILD_TYPE: ${DUCKDB_BUILD_TYPE}")
 59 | message(STATUS "TARGET NAME: ${TARGET_NAME}")
 60 | message(STATUS "DUCKDB_PLATFORM: ${DUCKDB_PLATFORM}")
 61 | message(STATUS "OS_ARCH: ${OS_ARCH}")
 62 | message(STATUS "OS_NAME: ${OS_NAME}")
 63 | message(STATUS "Rust_CARGO_TARGET: ${Rust_CARGO_TARGET}")
 64 | # We currently only support the predefined targets.
 65 | #if ("${Rust_CARGO_TARGET}" STREQUAL "")
 66 | #    message(FATAL_ERROR "Failed to detect the correct platform")
 67 | #endif()
 68 | 
 69 | 
 70 | include(FetchContent)
 71 | 
 72 | FetchContent_Declare(
 73 |     Corrosion
 74 |     GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git
 75 |     GIT_TAG v0.5
 76 | )
 77 | # Set any global configuration variables such as `Rust_TOOLCHAIN` before this line!
 78 | FetchContent_MakeAvailable(Corrosion)
 79 | 
 80 | # Import targets defined in a package or workspace manifest `Cargo.toml` file
 81 | corrosion_import_crate(MANIFEST_PATH "${CMAKE_SOURCE_DIR}/../duckdb_fuzzycomplete_rust/Cargo.toml"
 82 | CRATES "duckdb_fuzzycomplete_rust"
 83 | )
 84 | 
 85 | # Set extension name here
 86 | set(TARGET_NAME fuzzycomplete)
 87 | 
 88 | set(EXTENSION_NAME ${TARGET_NAME}_extension)
 89 | set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
 90 | 
 91 | project(${TARGET_NAME})
 92 | 
 93 | include_directories(src/include)
 94 | 
 95 | set(EXTENSION_SOURCES src/fuzzycomplete_extension.cpp)
 96 | 
 97 | build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
 98 | build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
 99 | 
100 | get_target_property(fake_includes duckdb_fuzzycomplete_rust INCLUDE_DIRECTORIES)
101 | 
102 | target_link_libraries(${EXTENSION_NAME} duckdb_fuzzycomplete_rust-static)
103 | target_link_libraries(${LOADABLE_EXTENSION_NAME} duckdb_fuzzycomplete_rust)
104 | 
105 | install(
106 |   TARGETS ${EXTENSION_NAME}
107 |   EXPORT "${DUCKDB_EXPORT_SET}"
108 |   LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
109 |   ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
110 | 
111 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2024 Rusty Conover <rusty@conover.me>
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 2 | 
 3 | # Configuration of extension
 4 | EXT_NAME=fuzzycomplete
 5 | EXT_CONFIG=${PROJ_DIR}extension_config.cmake
 6 | 
 7 | # Include the Makefile from extension-ci-tools
 8 | include extension-ci-tools/makefiles/duckdb_extension.Makefile
 9 | 
10 | rust_binding_headers:
11 | 	cd duckdb_fuzzycomplete_rust && cbindgen --config ./cbindgen.toml --crate duckdb_fuzzycomplete_rust --output ../src/include/rust.h
12 | 
13 | clean_all: clean
14 | 	cd duckdb_fuzzycomplete_rust && cargo clean


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fuzzycomplete Extension for DuckDB
 2 | 
 3 | ![A duck trying to complete a crossword puzzle](./docs/duckdb-fuzzycompletion.jpeg)
 4 | 
 5 | This `fuzzycomplete` extension serves as an alternative to DuckDB's [autocomplete](https://duckdb.org/docs/api/cli/autocomplete.html) extension, with several key differences:
 6 | 
 7 | **Algorithm:** Unlike the [autocomplete extension](https://duckdb.org/docs/extensions/autocomplete.html), which uses edit distance as its metric, the fuzzycomplete extension employs a fuzzy string matching algorithm derived from Visual Studio Code. This provides more intuitive and flexible completion suggestions.
 8 | 
 9 | **Scope:** The `fuzzycomplete` extension can complete table names across different databases and schemas. It respects the current search path and offers suggestions accordingly, even when multiple databases are attached.
10 | 
11 | It may not yet be the best solution for SQL completion, but it has proven to be useful to the author.
12 | 
13 | ## Installation
14 | 
15 | **`fuzzycomplete` is a [DuckDB Community Extension](https://github.com/duckdb/community-extensions).**
16 | 
17 | You can now use this by using this SQL:
18 | 
19 | ```sql
20 | install fuzzycomplete from community;
21 | load fuzzycomplete;
22 | ```
23 | 
24 | ## Details of the fuzzy matching algorithm
25 | 
26 | This extension uses the Rust crate [`code-fuzzy-match`](https://crates.io/crates/code-fuzzy-match)
27 | 
28 | The algorithm ensures that characters in the query string appear in the same order in the target string. It handles substring queries efficiently, allowing searches within the middle of the target string without significantly impacting the match score. The algorithm prioritizes matches that occur at the beginning of words, where words are defined as they commonly appear in code (e.g., letters following a separator or in camel case). Sequential matches are also given preference.
29 | 
30 | In addition to the basic matching algorithm, matches then scored using this criteria if they have an equal score from `code-fuzzy-match`:
31 | 
32 | 1. In the event of a tie in the match score, completion results are first ordered by the number of pseudo-words in the candidate strings, favoring shorter completions.
33 | 2. A standard lexical sorting is then applied.
34 | 
35 | ## When would I use this?
36 | 
37 | If you're looking to try a different completion algorithm or need to complete table names from various databases and schemas, you might find this extension beneficial.
38 | 
39 | ### Build Architecture
40 | 
41 | For the DuckDB extension to call the Rust code a tool called `cbindgen` is used to write the C++ headers for the exposed Rust interface.
42 | 
43 | The headers can be updated by running `make rust_binding_headers`.
44 | 
45 | ### Build steps
46 | Now to build the extension, run:
47 | ```sh
48 | make
49 | ```
50 | The main binaries that will be built are:
51 | ```sh
52 | ./build/release/duckdb
53 | ./build/release/test/unittest
54 | ./build/release/extension/fuzzycomplete/fuzzycomplete.duckdb_extension
55 | ```
56 | - `duckdb` is the binary for the duckdb shell with the extension code automatically loaded.
57 | - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary.
58 | - `fuzzycomplete.duckdb_extension` is the loadable binary as it would be distributed.
59 | 
60 | ## Running the extension
61 | To run the extension code, simply start the shell with `./build/release/duckdb`.
62 | 
63 | Now we can use the features from the extension directly in DuckDB.
64 | 
65 | ### Installing the deployed binaries
66 | To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the
67 | `allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples:
68 | 
69 | CLI:
70 | ```shell
71 | duckdb -unsigned
72 | ```
73 | 
74 | Python:
75 | ```python
76 | con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'})
77 | ```
78 | 
79 | NodeJS:
80 | ```js
81 | db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"});
82 | ```
83 | 
84 | Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension
85 | you want to install. To do this run the following SQL query in DuckDB:
86 | ```sql
87 | SET custom_extension_repository='bucket.s3.us-east-1.amazonaws.com/fuzzycomplete/latest';
88 | ```
89 | Note that the `/latest` path will allow you to install the latest extension version available for your current version of
90 | DuckDB. To specify a specific version, you can pass the version instead.
91 | 
92 | After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB:
93 | ```sql
94 | INSTALL fuzzycomplete
95 | LOAD fuzzycomplete
96 | ```
97 | 


--------------------------------------------------------------------------------
/docs/duckdb-fuzzycompletion.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Query-farm/fuzzycomplete/a833e2910612602a49a80aee0ccea9945ea740fc/docs/duckdb-fuzzycompletion.jpeg


--------------------------------------------------------------------------------
/duckdb_fuzzycomplete_rust/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | version = 3
 4 | 
 5 | [[package]]
 6 | name = "code-fuzzy-match"
 7 | version = "0.2.2"
 8 | source = "registry+https://github.com/rust-lang/crates.io-index"
 9 | checksum = "ac44797f65a7f21689e1b71746ae51461d60965fd20b273fbc223156a32fd3e7"
10 | 
11 | [[package]]
12 | name = "duckdb_fuzzycomplete_rust"
13 | version = "0.1.0"
14 | dependencies = [
15 |  "code-fuzzy-match",
16 | ]
17 | 


--------------------------------------------------------------------------------
/duckdb_fuzzycomplete_rust/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "duckdb_fuzzycomplete_rust"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | publish = false
 6 | 
 7 | [lib]
 8 | name = "duckdb_fuzzycomplete_rust"
 9 | crate-type = ["staticlib"]
10 | 
11 | [dependencies]
12 | code-fuzzy-match = "0.2.2"
13 | 


--------------------------------------------------------------------------------
/duckdb_fuzzycomplete_rust/cbindgen.toml:
--------------------------------------------------------------------------------
  1 | # This is a template cbindgen.toml file with all of the default values.
  2 | # Some values are commented out because their absence is the real default.
  3 | #
  4 | # See https://github.com/mozilla/cbindgen/blob/master/docs.md#cbindgentoml
  5 | # for detailed documentation of every option here.
  6 | 
  7 | 
  8 | 
  9 | language = "C++"
 10 | 
 11 | 
 12 | 
 13 | ############## Options for Wrapping the Contents of the Header #################
 14 | 
 15 | # header = "/* Text to put at the beginning of the generated file. Probably a license. */"
 16 | # trailer = "/* Text to put at the end of the generated file */"
 17 | # include_guard = "my_bindings_h"
 18 | # pragma_once = true
 19 | # autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */"
 20 | include_version = false
 21 | # namespace = "my_namespace"
 22 | namespaces = []
 23 | using_namespaces = []
 24 | sys_includes = []
 25 | includes = []
 26 | no_includes = false
 27 | # cpp_compat = true
 28 | after_includes = ""
 29 | 
 30 | 
 31 | 
 32 | 
 33 | ############################ Code Style Options ################################
 34 | 
 35 | braces = "SameLine"
 36 | line_length = 100
 37 | tab_width = 2
 38 | documentation = true
 39 | documentation_style = "auto"
 40 | documentation_length = "full"
 41 | line_endings = "LF" # also "CR", "CRLF", "Native"
 42 | 
 43 | 
 44 | 
 45 | 
 46 | ############################# Codegen Options ##################################
 47 | 
 48 | style = "both"
 49 | sort_by = "Name" # default for `fn.sort_by` and `const.sort_by`
 50 | usize_is_size_t = true
 51 | 
 52 | 
 53 | 
 54 | [defines]
 55 | # "target_os = freebsd" = "DEFINE_FREEBSD"
 56 | # "feature = serde" = "DEFINE_SERDE"
 57 | 
 58 | 
 59 | 
 60 | [export]
 61 | include = []
 62 | exclude = []
 63 | # prefix = "CAPI_"
 64 | item_types = []
 65 | renaming_overrides_prefixing = false
 66 | 
 67 | 
 68 | 
 69 | [export.rename]
 70 | 
 71 | 
 72 | 
 73 | [export.body]
 74 | 
 75 | 
 76 | [export.mangle]
 77 | 
 78 | 
 79 | [fn]
 80 | rename_args = "None"
 81 | # must_use = "MUST_USE_FUNC"
 82 | # deprecated = "DEPRECATED_FUNC"
 83 | # deprecated_with_note = "DEPRECATED_FUNC_WITH_NOTE"
 84 | # no_return = "NO_RETURN"
 85 | # prefix = "START_FUNC"
 86 | # postfix = "END_FUNC"
 87 | args = "auto"
 88 | sort_by = "Name"
 89 | 
 90 | 
 91 | 
 92 | 
 93 | [struct]
 94 | rename_fields = "None"
 95 | # must_use = "MUST_USE_STRUCT"
 96 | # deprecated = "DEPRECATED_STRUCT"
 97 | # deprecated_with_note = "DEPRECATED_STRUCT_WITH_NOTE"
 98 | derive_constructor = false
 99 | derive_eq = false
100 | derive_neq = false
101 | derive_lt = false
102 | derive_lte = false
103 | derive_gt = false
104 | derive_gte = false
105 | 
106 | 
107 | 
108 | 
109 | [enum]
110 | rename_variants = "None"
111 | # must_use = "MUST_USE_ENUM"
112 | # deprecated = "DEPRECATED_ENUM"
113 | # deprecated_with_note = "DEPRECATED_ENUM_WITH_NOTE"
114 | add_sentinel = false
115 | prefix_with_name = false
116 | derive_helper_methods = false
117 | derive_const_casts = false
118 | derive_mut_casts = false
119 | # cast_assert_name = "ASSERT"
120 | derive_tagged_enum_destructor = false
121 | derive_tagged_enum_copy_constructor = false
122 | enum_class = true
123 | private_default_tagged_enum_constructor = false
124 | 
125 | 
126 | 
127 | 
128 | [const]
129 | allow_static_const = true
130 | allow_constexpr = false
131 | sort_by = "Name"
132 | 
133 | 
134 | 
135 | 
136 | [macro_expansion]
137 | bitflags = false
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | ############## Options for How Your Rust library Should Be Parsed ##############
145 | 
146 | [parse]
147 | parse_deps = false
148 | # include = []
149 | exclude = []
150 | clean = false
151 | extra_bindings = []
152 | 
153 | 
154 | 
155 | [parse.expand]
156 | crates = ["duckdb_fuzzycomplete_rust"]
157 | all_features = false
158 | default_features = true
159 | features = []


--------------------------------------------------------------------------------
/duckdb_fuzzycomplete_rust/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // duckdb_fuzzycomplete_rust
 2 | // Copyright 2024 Rusty Conover <rusty@conover.me>
 3 | // Licensed under the MIT License
 4 | 
 5 | use code_fuzzy_match;
 6 | 
 7 | use core::str;
 8 | use std::ffi::CStr;
 9 | use std::{ffi::c_char, slice};
10 | 
11 | macro_rules! make_str {
12 |     ( $s : expr , $len : expr ) => {
13 |         unsafe { str::from_utf8_unchecked(slice::from_raw_parts($s as *const u8, $len)) }
14 |     };
15 | }
16 | 
17 | #[no_mangle]
18 | pub extern "C" fn perform_matches(
19 |     // These are the array of strings that we are going to match against.
20 |     candidate_pool: *const *const c_char,
21 |     candidate_pool_size: usize,
22 | 
23 |     // This is the query sting to match
24 |     query: *const c_char,
25 |     query_len: usize,
26 | 
27 |     // The maximum number of results to return
28 |     max_results: usize,
29 | 
30 |     // The output ranking of candidates (pointers from the candidate pool)
31 |     ranked_candidates: *mut *const c_char,
32 | 
33 |     // The actual number of produced results.
34 |     actual_results: *mut usize,
35 | ) {
36 |     let mut matcher = code_fuzzy_match::FuzzyMatcher::new();
37 | 
38 |     let candidates: Vec<_> = (0..candidate_pool_size).map(|i| unsafe {
39 |         let c_str_ptr = *candidate_pool.add(i);
40 |         (CStr::from_ptr(c_str_ptr).to_str().unwrap(), c_str_ptr)
41 |     }).collect();
42 | 
43 |     let query = make_str!(query, query_len);
44 | 
45 |     let mut match_results: Vec<_> = if query.trim().is_empty() {
46 |         candidates.iter().map(|s| (s, 0)).collect()
47 |     } else {
48 |         candidates.iter()
49 |             .filter_map(|s| matcher.fuzzy_match(s.0, query).map(|score| (s, score)))
50 |             .collect()
51 |     };
52 | 
53 |     fn count_word_occurrances(s: &str) -> usize {
54 |         s.chars().filter(|&c| c == '_' || c == '.').count()
55 |     }
56 |     match_results.sort_by(|a, b| {
57 |         // Sort by the store first, then the number of components, then lexically
58 |         b.1.cmp(&a.1)
59 |             // Ordering by words splitting by _ or . then by length then by the string itself
60 |             .then_with(|| count_word_occurrances(a.0 .0).cmp(&count_word_occurrances(b.0 .0)))
61 |             // Order by length.
62 |             .then_with(|| a.0 .0.cmp(&b.0 .0))
63 |     });
64 | 
65 |     unsafe {
66 |         let result_count = std::cmp::min(match_results.len(), max_results);
67 |         *actual_results = result_count;
68 |         for (index, result) in match_results.iter().enumerate().take(result_count) {
69 |             *ranked_candidates.add(index) = result.0 .1;
70 |         }
71 |     }
72 | }
73 | 
74 | #[cfg(test)]
75 | mod tests {}
76 | 


--------------------------------------------------------------------------------
/extension_config.cmake:
--------------------------------------------------------------------------------
 1 | # This file is included by DuckDB's build system. It specifies which extension to load
 2 | 
 3 | # Extension from this repo
 4 | duckdb_extension_load(fuzzycomplete
 5 |     SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
 6 |     LOAD_TESTS
 7 | )
 8 | 
 9 | # Any extra extensions that should be built
10 | # e.g.: duckdb_extension_load(json)


--------------------------------------------------------------------------------
/scripts/bootstrap-template.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import sys, os, shutil, re
 4 | from pathlib import Path
 5 | 
 6 | shutil.copyfile(f'docs/NEXT_README.md', f'README.md')
 7 | os.remove(f'docs/NEXT_README.md')
 8 | os.remove(f'docs/README.md')
 9 | 
10 | if (len(sys.argv) != 2):
11 |     raise Exception('usage: python3 bootstrap-template.py <name_for_extension_in_snake_case>')
12 | 
13 | name_extension = sys.argv[1]
14 | 
15 | def is_snake_case(s):
16 |     # Define the regex pattern for snake case with numbers
17 |     pattern = r'^[a-z0-9]+(_[a-z0-9]+)*$'
18 | 
19 |     # Use re.match to check if the string matches the pattern
20 |     if re.match(pattern, s):
21 |         return True
22 |     else:
23 |         return False
24 | 
25 | if name_extension[0].isdigit():
26 |     raise Exception('Please dont start your extension name with a number.')
27 | 
28 | if not is_snake_case(name_extension):
29 |     raise Exception('Please enter the name of your extension in valid snake_case containing only lower case letters and numbers')
30 | 
31 | def to_camel_case(snake_str):
32 |     return "".join(x.capitalize() for x in snake_str.lower().split("_"))
33 | 
34 | def replace(file_name, to_find, to_replace):
35 |     with open(file_name, 'r', encoding="utf8") as file :
36 |         filedata = file.read()
37 |     filedata = filedata.replace(to_find, to_replace)
38 |     with open(file_name, 'w', encoding="utf8") as file:
39 |         file.write(filedata)
40 | 
41 | files_to_search = []
42 | files_to_search.extend(Path('./.github').rglob('./**/*.yml'))
43 | files_to_search.extend(Path('./test').rglob('./**/*.test'))
44 | files_to_search.extend(Path('./src').rglob('./**/*.hpp'))
45 | files_to_search.extend(Path('./src').rglob('./**/*.cpp'))
46 | files_to_search.extend(Path('./src').rglob('./**/*.txt'))
47 | files_to_search.extend(Path('./src').rglob('./*.md'))
48 | 
49 | def replace_everywhere(to_find, to_replace):
50 |     for path in files_to_search:
51 |         replace(path, to_find, to_replace)
52 |         replace(path, to_find.capitalize(), to_camel_case(to_replace))
53 |         replace(path, to_find.upper(), to_replace.upper())
54 |     
55 |     replace("./CMakeLists.txt", to_find, to_replace)
56 |     replace("./Makefile", to_find, to_replace)
57 |     replace("./Makefile", to_find.capitalize(), to_camel_case(to_replace))
58 |     replace("./Makefile", to_find.upper(), to_replace.upper())
59 |     replace("./README.md", to_find, to_replace)
60 |     replace("./extension_config.cmake", to_find, to_replace)
61 | 
62 | replace_everywhere("quack", name_extension)
63 | replace_everywhere("Quack", name_extension.capitalize())
64 | replace_everywhere("<extension_name>", name_extension)
65 | 
66 | string_to_replace = name_extension
67 | string_to_find = "quack"
68 | 
69 | # rename files
70 | os.rename(f'test/sql/{string_to_find}.test', f'test/sql/{string_to_replace}.test')
71 | os.rename(f'src/{string_to_find}_extension.cpp', f'src/{string_to_replace}_extension.cpp')
72 | os.rename(f'src/include/{string_to_find}_extension.hpp', f'src/include/{string_to_replace}_extension.hpp')
73 | 
74 | # remove template-specific files
75 | os.remove('.github/workflows/ExtensionTemplate.yml')
76 | 
77 | # finally, remove this bootstrap file
78 | os.remove(__file__)


--------------------------------------------------------------------------------
/scripts/extension-upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Extension upload script
 4 | 
 5 | # Usage: ./extension-upload.sh <name> <extension_version> <duckdb_version> <architecture> <s3_bucket> <copy_to_latest> <copy_to_versioned>
 6 | # <name>                : Name of the extension
 7 | # <extension_version>   : Version (commit / version tag) of the extension
 8 | # <duckdb_version>      : Version (commit / version tag) of DuckDB
 9 | # <architecture>        : Architecture target of the extension binary
10 | # <s3_bucket>           : S3 bucket to upload to
11 | # <copy_to_latest>      : Set this as the latest version ("true" / "false", default: "false")
12 | # <copy_to_versioned>   : Set this as a versioned version that will prevent its deletion
13 | 
14 | set -e
15 | 
16 | if [[ $4 == wasm* ]]; then
17 |   ext="/tmp/extension/$1.duckdb_extension.wasm"
18 | else
19 |   ext="/tmp/extension/$1.duckdb_extension"
20 | fi
21 | 
22 | echo $ext
23 | 
24 | script_dir="$(dirname "$(readlink -f "$0")")"
25 | 
26 | # calculate SHA256 hash of extension binary
27 | cat $ext > $ext.append
28 | 
29 | if [[ $4 == wasm* ]]; then
30 |   # 0 for custom section
31 |   # 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256)
32 |   # [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02]
33 |   echo -n -e '\x00' >> $ext.append
34 |   echo -n -e '\x93\x02' >> $ext.append
35 |   # 10 in hex = 16 in decimal, lenght of name, 1 byte
36 |   echo -n -e '\x10' >> $ext.append
37 |   echo -n -e 'duckdb_signature' >> $ext.append
38 |   # the name of the WebAssembly custom section, 16 bytes
39 |   # 100 in hex, 256 in decimal
40 |   # [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)],
41 |   # for a grand total of 2 bytes
42 |   echo -n -e '\x80\x02' >> $ext.append
43 | fi
44 | 
45 | # (Optionally) Sign binary
46 | if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then
47 |   echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem
48 |   $script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash
49 |   openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign
50 |   rm -f private.pem
51 | fi
52 | 
53 | # Signature is always there, potentially defaulting to 256 zeros
54 | truncate -s 256 $ext.sign
55 | 
56 | # append signature to extension binary
57 | cat $ext.sign >> $ext.append
58 | 
59 | # compress extension binary
60 | if [[ $4 == wasm_* ]]; then
61 |   brotli < $ext.append > "$ext.compressed"
62 | else
63 |   gzip < $ext.append > "$ext.compressed"
64 | fi
65 | 
66 | set -e
67 | 
68 | # Abort if AWS key is not set
69 | if [ -z "$AWS_ACCESS_KEY_ID" ]; then
70 |     echo "No AWS key found, skipping.."
71 |     exit 0
72 | fi
73 | 
74 | # upload versioned version
75 | if [[ $7 = 'true' ]]; then
76 |   if [[ $4 == wasm* ]]; then
77 |     aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
78 |   else
79 |     aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read
80 |   fi
81 | fi
82 | 
83 | # upload to latest version
84 | if [[ $6 = 'true' ]]; then
85 |   if [[ $4 == wasm* ]]; then
86 |     aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
87 |   else
88 |     aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read
89 |   fi
90 | fi
91 | 


--------------------------------------------------------------------------------
/src/fuzzycomplete_extension.cpp:
--------------------------------------------------------------------------------
  1 | #define DUCKDB_EXTENSION_MAIN
  2 | 
  3 | #include "fuzzycomplete_extension.hpp"
  4 | 
  5 | #include "duckdb/catalog/catalog.hpp"
  6 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
  7 | #include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp"
  8 | #include "duckdb/catalog/catalog_search_path.hpp"
  9 | #include "duckdb/common/case_insensitive_map.hpp"
 10 | #include "duckdb/common/exception.hpp"
 11 | #include "duckdb/common/file_opener.hpp"
 12 | #include "duckdb/function/table_function.hpp"
 13 | #include "duckdb/main/client_context.hpp"
 14 | #include "duckdb/main/client_data.hpp"
 15 | #include "duckdb/main/database_manager.hpp"
 16 | #include "duckdb/main/extension_util.hpp"
 17 | #include "duckdb/parser/keyword_helper.hpp"
 18 | #include "duckdb/parser/parser.hpp"
 19 | 
 20 | // Include the declarations of things from Rust.
 21 | #include "rust.h"
 22 | 
 23 | namespace duckdb
 24 | {
 25 | 
 26 | 	struct SQLFuzzyCompleteFunctionData : public TableFunctionData
 27 | 	{
 28 | 		explicit SQLFuzzyCompleteFunctionData(vector<string> suggestions_p, idx_t start_pos)
 29 | 				: suggestions(std::move(suggestions_p)), start_pos(start_pos)
 30 | 		{
 31 | 		}
 32 | 
 33 | 		vector<string> suggestions;
 34 | 		idx_t start_pos;
 35 | 	};
 36 | 
 37 | 	struct SQLFuzzyCompleteData : public GlobalTableFunctionState
 38 | 	{
 39 | 		SQLFuzzyCompleteData() : offset(0)
 40 | 		{
 41 | 		}
 42 | 
 43 | 		idx_t offset;
 44 | 	};
 45 | 
 46 | 	struct FuzzyCompleteCandidate
 47 | 	{
 48 | 		explicit FuzzyCompleteCandidate(string candidate_p, int32_t score_bonus = 0)
 49 | 				: candidate(std::move(candidate_p)), score_bonus(score_bonus)
 50 | 		{
 51 | 		}
 52 | 
 53 | 		string candidate;
 54 | 		//! The higher the score bonus, the more likely this candidate will be chosen
 55 | 		int32_t score_bonus;
 56 | 	};
 57 | 
 58 | 	static vector<string> ComputeSuggestions(vector<FuzzyCompleteCandidate> available_suggestions, const string &prefix,
 59 | 																					 const unordered_set<string> &extra_keywords, bool add_quotes = false)
 60 | 	{
 61 | 		for (auto &kw : extra_keywords)
 62 | 		{
 63 | 			available_suggestions.emplace_back(std::move(kw));
 64 | 		}
 65 | 
 66 | 		const size_t max_results = 20;
 67 | 
 68 | 		// Create a vector of const char* pointers
 69 | 		std::vector<const char *> candidate_pool_pointers;
 70 | 		std::vector<const char *> suggestion_results(max_results);
 71 | 
 72 | 		// Make sure all of the suggestions are unique.
 73 | 
 74 | 		std::set<shared_ptr<std::string>> unique_suggestions;
 75 | 		for (const auto &str : available_suggestions)
 76 | 		{
 77 | 			unique_suggestions.insert(make_shared_ptr<string>(str.candidate));
 78 | 		}
 79 | 
 80 | 		candidate_pool_pointers.reserve(unique_suggestions.size());
 81 | 
 82 | 		for (const auto &str : unique_suggestions)
 83 | 		{
 84 | 			candidate_pool_pointers.push_back(str->c_str());
 85 | 		}
 86 | 
 87 | 		// Get the suggestions from rust
 88 | 		size_t actual_matches;
 89 | 
 90 | 		perform_matches(
 91 | 				candidate_pool_pointers.data(),
 92 | 				candidate_pool_pointers.size(),
 93 | 				prefix.c_str(),
 94 | 				prefix.size(),
 95 | 				max_results,
 96 | 				suggestion_results.data(),
 97 | 				&actual_matches);
 98 | 
 99 | 		vector<string> results;
100 | 		results.reserve(actual_matches);
101 | 		for (size_t i = 0; i < actual_matches; i++)
102 | 		{
103 | 			results.emplace_back(string(suggestion_results[i]));
104 | 		}
105 | 
106 | 		if (add_quotes)
107 | 		{
108 | 			for (auto &result : results)
109 | 			{
110 | 				if (extra_keywords.find(result) == extra_keywords.end())
111 | 				{
112 | 					result = KeywordHelper::WriteOptionallyQuoted(result, '"', true);
113 | 				}
114 | 				else
115 | 				{
116 | 					result = result + " ";
117 | 				}
118 | 			}
119 | 		}
120 | 		return results;
121 | 	}
122 | 
123 | 	static vector<string> InitialKeywords()
124 | 	{
125 | 		return vector<string>{"SELECT", "INSERT", "DELETE", "UPDATE", "CREATE", "DROP", "COPY",
126 | 													"ALTER", "WITH", "EXPORT", "BEGIN", "VACUUM", "PREPARE", "EXECUTE",
127 | 													"DEALLOCATE", "CALL", "ANALYZE", "EXPLAIN", "DESCRIBE", "SUMMARIZE", "LOAD",
128 | 													"CHECKPOINT", "ROLLBACK", "COMMIT", "CALL", "FROM", "PIVOT", "UNPIVOT"};
129 | 	}
130 | 
131 | 	static vector<FuzzyCompleteCandidate> SuggestKeyword(ClientContext &context)
132 | 	{
133 | 		auto keywords = InitialKeywords();
134 | 		vector<FuzzyCompleteCandidate> result;
135 | 		for (auto &kw : keywords)
136 | 		{
137 | 			auto score = 0;
138 | 			if (kw == "SELECT")
139 | 			{
140 | 				score = 2;
141 | 			}
142 | 			if (kw == "FROM" || kw == "DELETE" || kw == "INSERT" || kw == "UPDATE")
143 | 			{
144 | 				score = 1;
145 | 			}
146 | 			result.emplace_back(kw + " ", score);
147 | 		}
148 | 		return result;
149 | 	}
150 | 
151 | 	static vector<reference<CatalogEntry>> GetAllTables(ClientContext &context, bool for_table_names)
152 | 	{
153 | 		vector<reference<CatalogEntry>> result;
154 | 		// scan all the schemas for tables and collect them and collect them
155 | 		// for column names we avoid adding internal entries, because it pollutes the auto-complete too much
156 | 		// for table names this is generally fine, however
157 | 		auto schemas = Catalog::GetAllSchemas(context);
158 | 		for (auto &schema_ref : schemas)
159 | 		{
160 | 			auto &schema = schema_ref.get();
161 | 			schema.Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry &entry)
162 | 									{
163 | 			if (!entry.internal || for_table_names) {
164 | 				result.push_back(entry);
165 | 			} });
166 | 		};
167 | 		if (for_table_names)
168 | 		{
169 | 			for (auto &schema_ref : schemas)
170 | 			{
171 | 				auto &schema = schema_ref.get();
172 | 				schema.Scan(context, CatalogType::TABLE_FUNCTION_ENTRY,
173 | 										[&](CatalogEntry &entry)
174 | 										{ result.push_back(entry); });
175 | 			};
176 | 		}
177 | 		else
178 | 		{
179 | 			for (auto &schema_ref : schemas)
180 | 			{
181 | 				auto &schema = schema_ref.get();
182 | 				schema.Scan(context, CatalogType::SCALAR_FUNCTION_ENTRY,
183 | 										[&](CatalogEntry &entry)
184 | 										{ result.push_back(entry); });
185 | 			};
186 | 		}
187 | 		return result;
188 | 	}
189 | 
190 | 	static vector<FuzzyCompleteCandidate> SuggestTableName(ClientContext &context)
191 | 	{
192 | 		vector<FuzzyCompleteCandidate> suggestions;
193 | 		auto all_entries = GetAllTables(context, true);
194 | 
195 | 		auto default_database = DatabaseManager::GetDefaultDatabase(context);
196 | 
197 | 		auto default_schema = ClientData::Get(context).catalog_search_path->GetDefault().schema;
198 | 
199 | 		// So really we should add the other catalog names.
200 | 
201 | 		// Then the other schema names.
202 | 		std::set<std::string> seen_databases = {};
203 | 		std::set<std::string> seen_schemas = {};
204 | 
205 | 		// To get the list of available entries
206 | 
207 | 		// There are a few possiblities.
208 | 
209 | 		// same database same schema.
210 | 		for (auto &entry_ref : all_entries)
211 | 		{
212 | 			auto &entry = entry_ref.get();
213 | 			// prioritize user-defined entries (views & tables)
214 | 			int32_t bonus = (entry.internal || entry.type == CatalogType::TABLE_FUNCTION_ENTRY) ? 0 : 1;
215 | 
216 | 			// We should use the full path based on the calling context of the user.
217 | 			auto catalog_name = entry_ref.get().ParentCatalog().GetName();
218 | 			auto schema_name = entry_ref.get().ParentSchema().name;
219 | 
220 | 			auto quoted_schema_name = KeywordHelper::WriteOptionallyQuoted(schema_name, '"', true);
221 | 			auto quoted_catalog_name = KeywordHelper::WriteOptionallyQuoted(catalog_name, '"', true);
222 | 			auto quoted_entry_name = KeywordHelper::WriteOptionallyQuoted(entry.name, '"', true);
223 | 
224 | 			string prefix = quoted_catalog_name + "." + quoted_schema_name;
225 | 
226 | 			if (catalog_name != default_database)
227 | 			{
228 | 				seen_databases.insert(quoted_catalog_name);
229 | 				seen_schemas.insert(prefix);
230 | 				suggestions.emplace_back(prefix + "." + quoted_entry_name, bonus);
231 | 			}
232 | 			else
233 | 			{
234 | 				if (schema_name == default_schema)
235 | 				{
236 | 					suggestions.emplace_back(quoted_entry_name, bonus);
237 | 				}
238 | 				else
239 | 				{
240 | 					suggestions.emplace_back(quoted_schema_name + "." + quoted_entry_name, bonus);
241 | 					seen_schemas.insert(prefix);
242 | 				}
243 | 			}
244 | 		}
245 | 
246 | 		for (auto &database : seen_databases)
247 | 		{
248 | 			suggestions.emplace_back(database, 1);
249 | 		}
250 | 
251 | 		for (auto &schema : seen_schemas)
252 | 		{
253 | 			suggestions.emplace_back(schema, 1);
254 | 		}
255 | 
256 | 		vector<string> suggestions_str;
257 | 		for (auto &suggestion : suggestions)
258 | 		{
259 | 			suggestions_str.emplace_back(suggestion.candidate);
260 | 		}
261 | 
262 | 		return suggestions;
263 | 	}
264 | 
265 | 	static vector<FuzzyCompleteCandidate> SuggestColumnName(ClientContext &context)
266 | 	{
267 | 		vector<FuzzyCompleteCandidate> suggestions;
268 | 		auto all_entries = GetAllTables(context, false);
269 | 		for (auto &entry_ref : all_entries)
270 | 		{
271 | 			auto &entry = entry_ref.get();
272 | 			if (entry.type == CatalogType::TABLE_ENTRY)
273 | 			{
274 | 				auto &table = entry.Cast<TableCatalogEntry>();
275 | 				for (auto &col : table.GetColumns().Logical())
276 | 				{
277 | 					suggestions.emplace_back(col.GetName(), 1);
278 | 				}
279 | 			}
280 | 			else if (entry.type == CatalogType::VIEW_ENTRY)
281 | 			{
282 | 				auto &view = entry.Cast<ViewCatalogEntry>();
283 | 				for (auto &col : view.aliases)
284 | 				{
285 | 					suggestions.emplace_back(col, 1);
286 | 				}
287 | 			}
288 | 			else
289 | 			{
290 | 				if (StringUtil::CharacterIsOperator(entry.name[0]))
291 | 				{
292 | 					continue;
293 | 				}
294 | 				suggestions.emplace_back(entry.name);
295 | 			};
296 | 		}
297 | 		return suggestions;
298 | 	}
299 | 
300 | 	static bool KnownExtension(const string &fname)
301 | 	{
302 | 		vector<string> known_extensions{".parquet", ".csv", ".tsv", ".csv.gz", ".tsv.gz", ".tbl"};
303 | 		for (auto &ext : known_extensions)
304 | 		{
305 | 			if (StringUtil::EndsWith(fname, ext))
306 | 			{
307 | 				return true;
308 | 			}
309 | 		}
310 | 		return false;
311 | 	}
312 | 
313 | 	static vector<FuzzyCompleteCandidate> SuggestFileName(ClientContext &context, string &prefix, idx_t &last_pos)
314 | 	{
315 | 		auto &fs = FileSystem::GetFileSystem(context);
316 | 		string search_dir;
317 | 		D_ASSERT(last_pos >= prefix.size());
318 | 		auto is_path_absolute = fs.IsPathAbsolute(prefix);
319 | 		for (idx_t i = prefix.size(); i > 0; i--, last_pos--)
320 | 		{
321 | 			if (prefix[i - 1] == '/' || prefix[i - 1] == '\\')
322 | 			{
323 | 				search_dir = prefix.substr(0, i - 1);
324 | 				prefix = prefix.substr(i);
325 | 				break;
326 | 			}
327 | 		}
328 | 		if (search_dir.empty())
329 | 		{
330 | 			search_dir = is_path_absolute ? "/" : ".";
331 | 		}
332 | 		else
333 | 		{
334 | 			search_dir = fs.ExpandPath(search_dir);
335 | 		}
336 | 		vector<FuzzyCompleteCandidate> result;
337 | 		fs.ListFiles(search_dir, [&](const string &fname, bool is_dir)
338 | 								 {
339 | 		string suggestion;
340 | 		if (is_dir) {
341 | 			suggestion = fname + fs.PathSeparator(fname);
342 | 		} else {
343 | 			suggestion = fname + "'";
344 | 		}
345 | 		int score = 0;
346 | 		if (is_dir && fname[0] != '.') {
347 | 			score = 2;
348 | 		}
349 | 		if (KnownExtension(fname)) {
350 | 			score = 1;
351 | 		}
352 | 		result.emplace_back(std::move(suggestion), score); });
353 | 		return result;
354 | 	}
355 | 
356 | 	enum class SuggestionState : uint8_t
357 | 	{
358 | 		SUGGEST_KEYWORD,
359 | 		SUGGEST_TABLE_NAME,
360 | 		SUGGEST_COLUMN_NAME,
361 | 		SUGGEST_FILE_NAME
362 | 	};
363 | 
364 | 	static bool is_word_break(char c)
365 | 	{
366 | 		return StringUtil::CharacterIsOperator(c) && c != '.';
367 | 	}
368 | 
369 | 	static duckdb::unique_ptr<SQLFuzzyCompleteFunctionData> GenerateSuggestions(ClientContext &context, const string &sql)
370 | 	{
371 | 		// for auto-completion, we consider 4 scenarios
372 | 		// * there is nothing in the buffer, or only one word -> suggest a keyword
373 | 		// * the previous keyword is SELECT, WHERE, BY, HAVING, ... -> suggest a column name
374 | 		// * the previous keyword is FROM, INSERT, UPDATE ,... -> select a table name
375 | 		// * we are in a string constant -> suggest a filename
376 | 		// figure out which state we are in by doing a run through the query
377 | 		idx_t pos = 0;
378 | 		idx_t last_pos = 0;
379 | 		idx_t pos_offset = 0;
380 | 		bool seen_word = false;
381 | 		unordered_set<string> suggested_keywords;
382 | 		SuggestionState suggest_state = SuggestionState::SUGGEST_KEYWORD;
383 | 		case_insensitive_set_t column_name_keywords = {"SELECT", "WHERE", "BY", "HAVING", "QUALIFY",
384 | 																									 "LIMIT", "SET", "USING", "ON"};
385 | 		case_insensitive_set_t table_name_keywords = {"FROM", "JOIN", "INSERT", "UPDATE", "DELETE",
386 | 																									"ALTER", "DROP", "CALL", "DESCRIBE"};
387 | 		case_insensitive_map_t<unordered_set<string>> next_keyword_map;
388 | 		next_keyword_map["SELECT"] = {"FROM", "WHERE", "GROUP", "HAVING", "WINDOW", "ORDER", "LIMIT",
389 | 																	"QUALIFY", "SAMPLE", "VALUES", "UNION", "EXCEPT", "INTERSECT", "DISTINCT"};
390 | 		next_keyword_map["WITH"] = {"RECURSIVE", "SELECT", "AS"};
391 | 		next_keyword_map["INSERT"] = {"INTO", "VALUES", "SELECT", "DEFAULT"};
392 | 		next_keyword_map["DELETE"] = {"FROM", "WHERE", "USING"};
393 | 		next_keyword_map["UPDATE"] = {"SET", "WHERE"};
394 | 		next_keyword_map["CREATE"] = {"TABLE", "SCHEMA", "VIEW", "SEQUENCE", "MACRO", "FUNCTION", "SECRET", "TYPE"};
395 | 		next_keyword_map["DROP"] = next_keyword_map["CREATE"];
396 | 		next_keyword_map["ALTER"] = {"TABLE", "VIEW", "ADD", "DROP", "COLUMN", "SET", "TYPE", "DEFAULT", "DATA", "RENAME"};
397 | 
398 | 	regular_scan:
399 | 		for (; pos < sql.size(); pos++)
400 | 		{
401 | 			if (sql[pos] == '\'')
402 | 			{
403 | 				pos++;
404 | 				last_pos = pos;
405 | 				goto in_string_constant;
406 | 			}
407 | 			if (sql[pos] == '"')
408 | 			{
409 | 				pos++;
410 | 				last_pos = pos;
411 | 				goto in_quotes;
412 | 			}
413 | 			if (sql[pos] == '-' && pos + 1 < sql.size() && sql[pos + 1] == '-')
414 | 			{
415 | 				goto in_comment;
416 | 			}
417 | 			if (sql[pos] == ';')
418 | 			{
419 | 				// semicolon: restart suggestion flow
420 | 				suggest_state = SuggestionState::SUGGEST_KEYWORD;
421 | 				suggested_keywords.clear();
422 | 				last_pos = pos + 1;
423 | 				continue;
424 | 			}
425 | 			if (StringUtil::CharacterIsSpace(sql[pos]) || is_word_break(sql[pos]))
426 | 			{
427 | 				if (seen_word)
428 | 				{
429 | 					goto process_word;
430 | 				}
431 | 			}
432 | 			else
433 | 			{
434 | 				seen_word = true;
435 | 			}
436 | 		}
437 | 		goto standard_suggestion;
438 | 	in_comment:
439 | 		for (; pos < sql.size(); pos++)
440 | 		{
441 | 			if (sql[pos] == '\n' || sql[pos] == '\r')
442 | 			{
443 | 				pos++;
444 | 				goto regular_scan;
445 | 			}
446 | 		}
447 | 		// no suggestions inside comments
448 | 		return make_uniq<SQLFuzzyCompleteFunctionData>(vector<string>(), 0);
449 | 	in_quotes:
450 | 		for (; pos < sql.size(); pos++)
451 | 		{
452 | 			if (sql[pos] == '"')
453 | 			{
454 | 				pos++;
455 | 				last_pos = pos;
456 | 				seen_word = true;
457 | 				goto regular_scan;
458 | 			}
459 | 		}
460 | 		pos_offset = 1;
461 | 		goto standard_suggestion;
462 | 	in_string_constant:
463 | 		for (; pos < sql.size(); pos++)
464 | 		{
465 | 			if (sql[pos] == '\'')
466 | 			{
467 | 				pos++;
468 | 				last_pos = pos;
469 | 				seen_word = true;
470 | 				goto regular_scan;
471 | 			}
472 | 		}
473 | 		suggest_state = SuggestionState::SUGGEST_FILE_NAME;
474 | 		goto standard_suggestion;
475 | 	process_word:
476 | 	{
477 | 		while ((last_pos < sql.size()) &&
478 | 					 (StringUtil::CharacterIsSpace(sql[last_pos]) || StringUtil::CharacterIsOperator(sql[last_pos])))
479 | 		{
480 | 			last_pos++;
481 | 		}
482 | 		auto next_word = sql.substr(last_pos, pos - last_pos);
483 | 		if (table_name_keywords.find(next_word) != table_name_keywords.end())
484 | 		{
485 | 			suggest_state = SuggestionState::SUGGEST_TABLE_NAME;
486 | 		}
487 | 		else if (column_name_keywords.find(next_word) != column_name_keywords.end())
488 | 		{
489 | 			suggest_state = SuggestionState::SUGGEST_COLUMN_NAME;
490 | 		}
491 | 		auto entry = next_keyword_map.find(next_word);
492 | 		if (entry != next_keyword_map.end())
493 | 		{
494 | 			suggested_keywords = entry->second;
495 | 		}
496 | 		else
497 | 		{
498 | 			suggested_keywords.erase(next_word);
499 | 		}
500 | 		if (std::all_of(next_word.begin(), next_word.end(), ::isdigit))
501 | 		{
502 | 			// Numbers are OK
503 | 			suggested_keywords.clear();
504 | 		}
505 | 		seen_word = false;
506 | 		last_pos = pos;
507 | 		goto regular_scan;
508 | 	}
509 | 	standard_suggestion:
510 | 		if (suggest_state != SuggestionState::SUGGEST_FILE_NAME)
511 | 		{
512 | 			while ((last_pos < sql.size()) &&
513 | 						 (StringUtil::CharacterIsSpace(sql[last_pos]) || is_word_break(sql[last_pos])))
514 | 			{
515 | 				last_pos++;
516 | 			}
517 | 		}
518 | 		auto last_word = sql.substr(last_pos, pos - last_pos);
519 | 		last_pos -= pos_offset;
520 | 		vector<string> suggestions;
521 | 
522 | 		switch (suggest_state)
523 | 		{
524 | 		case SuggestionState::SUGGEST_KEYWORD:
525 | 			suggestions = ComputeSuggestions(SuggestKeyword(context), last_word, suggested_keywords);
526 | 			break;
527 | 		case SuggestionState::SUGGEST_TABLE_NAME:
528 | 			suggestions = ComputeSuggestions(SuggestTableName(context), last_word, suggested_keywords, false);
529 | 			break;
530 | 		case SuggestionState::SUGGEST_COLUMN_NAME:
531 | 			suggestions = ComputeSuggestions(SuggestColumnName(context), last_word, suggested_keywords, true);
532 | 			break;
533 | 		case SuggestionState::SUGGEST_FILE_NAME:
534 | 			last_pos = pos;
535 | 			suggestions =
536 | 					ComputeSuggestions(SuggestFileName(context, last_word, last_pos), last_word, unordered_set<string>());
537 | 			break;
538 | 		default:
539 | 			throw InternalException("Unrecognized suggestion state");
540 | 		}
541 | 		if (last_pos > sql.size())
542 | 		{
543 | 			D_ASSERT(false);
544 | 			throw NotImplementedException("last_pos out of range");
545 | 		}
546 | 		if (!last_word.empty() && std::all_of(last_word.begin(), last_word.end(), ::isdigit))
547 | 		{
548 | 			// avoid giving auto-complete suggestion for digits
549 | 			suggestions.clear();
550 | 		}
551 | 		return make_uniq<SQLFuzzyCompleteFunctionData>(std::move(suggestions), last_pos);
552 | 	}
553 | 
554 | 	static duckdb::unique_ptr<FunctionData> SQLFuzzyCompleteBind(ClientContext &context, TableFunctionBindInput &input,
555 | 																															 vector<LogicalType> &return_types, vector<string> &names)
556 | 	{
557 | 		if (input.inputs[0].IsNull())
558 | 		{
559 | 			throw BinderException("sql_auto_complete first parameter cannot be NULL");
560 | 		}
561 | 		names.emplace_back("suggestion");
562 | 		return_types.emplace_back(LogicalType::VARCHAR);
563 | 
564 | 		names.emplace_back("suggestion_start");
565 | 		return_types.emplace_back(LogicalType::INTEGER);
566 | 
567 | 		return GenerateSuggestions(context, StringValue::Get(input.inputs[0]));
568 | 	}
569 | 
570 | 	unique_ptr<GlobalTableFunctionState> SQLFuzzyCompleteInit(ClientContext &context, TableFunctionInitInput &input)
571 | 	{
572 | 		return make_uniq<SQLFuzzyCompleteData>();
573 | 	}
574 | 
575 | 	void SQLFuzzyCompleteFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output)
576 | 	{
577 | 		auto &bind_data = data_p.bind_data->Cast<SQLFuzzyCompleteFunctionData>();
578 | 		auto &data = data_p.global_state->Cast<SQLFuzzyCompleteData>();
579 | 		if (data.offset >= bind_data.suggestions.size())
580 | 		{
581 | 			// finished returning values
582 | 			return;
583 | 		}
584 | 		// start returning values
585 | 		// either fill up the chunk or return all the remaining columns
586 | 		idx_t count = 0;
587 | 		while (data.offset < bind_data.suggestions.size() && count < STANDARD_VECTOR_SIZE)
588 | 		{
589 | 			auto &entry = bind_data.suggestions[data.offset++];
590 | 
591 | 			// suggestion, VARCHAR
592 | 			output.SetValue(0, count, Value(entry));
593 | 
594 | 			// suggestion_start, INTEGER
595 | 			output.SetValue(1, count, Value::INTEGER(bind_data.start_pos));
596 | 
597 | 			count++;
598 | 		}
599 | 		output.SetCardinality(count);
600 | 	}
601 | 
602 | 	static void LoadInternal(DatabaseInstance &db)
603 | 	{
604 | 		// For now just only load if the autocomplete extension isn't loaded.
605 | 		// there are plans to improve this in the future.
606 | 		if (!db.ExtensionIsLoaded("autocomplete")) {
607 | 			TableFunction auto_complete_fun("sql_auto_complete", {LogicalType::VARCHAR}, SQLFuzzyCompleteFunction,
608 | 																			SQLFuzzyCompleteBind, SQLFuzzyCompleteInit);
609 | 			ExtensionUtil::RegisterFunction(db, auto_complete_fun);
610 | 		}
611 | 	}
612 | 
613 | 	void FuzzycompleteExtension::Load(DuckDB &db)
614 | 	{
615 | 		LoadInternal(*db.instance);
616 | 	}
617 | 
618 | 	std::string FuzzycompleteExtension::Name()
619 | 	{
620 | 		return "fuzzycomplete";
621 | 	}
622 | 
623 | 	std::string FuzzycompleteExtension::Version() const
624 | 	{
625 | #ifdef EXT_VERSION_FuzzyComplete
626 | 		return EXT_VERSION_FuzzyComplete;
627 | #else
628 | 		return "";
629 | #endif
630 | 	}
631 | 
632 | } // namespace duckdb
633 | extern "C"
634 | {
635 | 
636 | 	DUCKDB_EXTENSION_API void fuzzycomplete_init(duckdb::DatabaseInstance &db)
637 | 	{
638 | 		LoadInternal(db);
639 | 	}
640 | 
641 | 	DUCKDB_EXTENSION_API const char *fuzzycomplete_version()
642 | 	{
643 | 		return duckdb::DuckDB::LibraryVersion();
644 | 	}
645 | }
646 | 
647 | #ifndef DUCKDB_EXTENSION_MAIN
648 | #error DUCKDB_EXTENSION_MAIN not defined
649 | #endif
650 | 


--------------------------------------------------------------------------------
/src/include/fuzzycomplete_extension.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb.hpp"
 4 | 
 5 | namespace duckdb
 6 | {
 7 | 
 8 | 	class FuzzycompleteExtension : public Extension
 9 | 	{
10 | 	public:
11 | 		void Load(DuckDB &db) override;
12 | 		std::string Name() override;
13 | 		std::string Version() const override;
14 | 	};
15 | 
16 | } // namespace duckdb
17 | 


--------------------------------------------------------------------------------
/src/include/rust.h:
--------------------------------------------------------------------------------
 1 | #include <cstdarg>
 2 | #include <cstddef>
 3 | #include <cstdint>
 4 | #include <cstdlib>
 5 | #include <ostream>
 6 | #include <new>
 7 | 
 8 | 
 9 | 
10 | extern "C" {
11 | 
12 | void perform_matches(const char *const *candidate_pool,
13 |                      size_t candidate_pool_size,
14 |                      const char *query,
15 |                      size_t query_len,
16 |                      size_t max_results,
17 |                      const char **ranked_candidates,
18 |                      size_t *actual_results);
19 | 
20 | } // extern "C"
21 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | # Testing this extension
 2 | This directory contains all the tests for this extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most of its tests in this format as SQL statements, so for the fuzzycomplete extension, this should probably be the goal too.
 3 | 
 4 | The root makefile contains targets to build and run all of these tests. To run the SQLLogicTests:
 5 | ```bash
 6 | make test
 7 | ```
 8 | or
 9 | ```bash
10 | make test_debug
11 | ```


--------------------------------------------------------------------------------
/test/sql/fuzzycomplete.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/fuzzycomplete.test
 2 | # description: test fuzzycomplete extension
 3 | # group: [fuzzycomplete]
 4 | 
 5 | # Before we load the extension, this will fail
 6 | statement error
 7 | SELECT * FROM sql_auto_complete('SEL');
 8 | ----
 9 | Catalog Error: Table Function with name "sql_auto_complete" is not in the catalog, but it exists in the autocomplete extension.
10 | 
11 | # Require statement will ensure this test is run with this extension loaded
12 | require fuzzycomplete
13 | 
14 | # Confirm the extension works
15 | query I
16 | SELECT trim(suggestion) from sql_auto_complete('SEL')
17 | ----
18 | SELECT
19 | 
20 | query I
21 | SELECT suggestion from sql_auto_complete('SELECT * from tables') limit 1
22 | ----
23 | "system".main.duckdb_tables
24 | 


--------------------------------------------------------------------------------
/vcpkg.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": []
3 | }


--------------------------------------------------------------------------------