├── .editorconfig ├── .github └── workflows │ ├── MainDistributionPipeline.yml │ └── schedule-1.2.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── docs └── duckdb-fuzzycompletion.jpeg ├── duckdb_fuzzycomplete_rust ├── Cargo.lock ├── Cargo.toml ├── cbindgen.toml └── src │ └── lib.rs ├── extension_config.cmake ├── scripts ├── bootstrap-template.py └── extension-upload.sh ├── src ├── fuzzycomplete_extension.cpp └── include │ ├── fuzzycomplete_extension.hpp │ └── rust.h ├── test ├── README.md └── sql │ └── fuzzycomplete.test └── vcpkg.json /.editorconfig: -------------------------------------------------------------------------------- 1 | # Unix-style newlines with a newline ending every file 2 | [*.{c,cpp,h,hpp}] 3 | end_of_line = lf 4 | insert_final_newline = true 5 | indent_style = tab 6 | tab_width = 4 7 | indent_size = tab 8 | trim_trailing_whitespace = true 9 | charset = utf-8 10 | max_line_length = 120 11 | x-soft-wrap-text = true 12 | x-soft-wrap-mode = CharacterWidth 13 | x-soft-wrap-limit = 120 14 | x-show-invisibles = false 15 | x-show-spaces = false 16 | 17 | [*.{java}] 18 | end_of_line = lf 19 | insert_final_newline = true 20 | indent_style = tab 21 | tab_width = 4 22 | indent_size = tab 23 | trim_trailing_whitespace = false 24 | charset = utf-8 25 | max_line_length = 120 26 | x-soft-wrap-text = true 27 | x-soft-wrap-mode = CharacterWidth 28 | x-soft-wrap-limit = 120 29 | x-show-invisibles = false 30 | x-show-spaces = false 31 | 32 | [*.{test,test_slow,test_coverage,benchmark}] 33 | end_of_line = lf 34 | insert_final_newline = true 35 | indent_style = tab 36 | tab_width = 4 37 | indent_size = tab 38 | trim_trailing_whitespace = false 39 | charset = utf-8 40 | x-soft-wrap-text = false 41 | 42 | [Makefile] 43 | end_of_line = lf 44 | insert_final_newline = true 45 | indent_style = tab 46 | tab_width = 4 47 | indent_size = tab 48 | trim_trailing_whitespace = true 49 | charset = utf-8 50 | x-soft-wrap-text = false 51 | 52 | [*keywords.list] 53 | insert_final_newline = false 54 | -------------------------------------------------------------------------------- /.github/workflows/MainDistributionPipeline.yml: -------------------------------------------------------------------------------- 1 | # 2 | # This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension 3 | # 4 | name: Main Extension Distribution Pipeline 5 | on: 6 | push: 7 | pull_request: 8 | workflow_dispatch: 9 | schedule: 10 | - cron: '0 2 * * *' # Runs every night at 02:00 UTC 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | duckdb-stable-build: 18 | name: Build extension binaries 19 | uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main 20 | with: 21 | duckdb_version: main 22 | ci_tools_version: main 23 | extension_name: fuzzycomplete 24 | enable_rust: true 25 | exclude_archs: "linux_amd64_musl" 26 | -------------------------------------------------------------------------------- /.github/workflows/schedule-1.2.yml: -------------------------------------------------------------------------------- 1 | name: Scheduled Trigger for 1.2 2 | 3 | on: 4 | schedule: 5 | - cron: '0 12 * * *' # Runs at 12:00 UTC every day 6 | workflow_dispatch: # Allows manual trigger 7 | 8 | jobs: 9 | trigger: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | actions: write # Allow triggering workflows 13 | steps: 14 | - name: Checkout repository # Required for gh to work 15 | uses: actions/checkout@v4 16 | 17 | - name: Install GitHub CLI 18 | run: | 19 | sudo apt update && sudo apt install gh -y 20 | 21 | - name: Authenticate GH CLI 22 | run: | 23 | echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token 24 | 25 | - name: Trigger Workflow on my-branch 26 | run: | 27 | gh workflow run MainDistributionPipeline.yml --ref v1.2 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .idea 3 | cmake-build-debug 4 | duckdb_unittest_tempdir/ 5 | .DS_Store 6 | testext 7 | test/python/__pycache__/ 8 | .Rhistory 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "duckdb"] 2 | path = duckdb 3 | url = https://github.com/duckdb/duckdb 4 | branch = main 5 | [submodule "extension-ci-tools"] 6 | path = extension-ci-tools 7 | url = https://github.com/duckdb/extension-ci-tools 8 | branch = main -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | set(CORROSION_VERBOSE_OUTPUT ON) 4 | set(CMAKE_CXX_STANDARD 11) 5 | set(CMAKE_CXX_STANDARD_REQUIRED 1) 6 | 7 | 8 | set(prefix_to_check "wasm") 9 | # Get the length of the prefix 10 | string(LENGTH "${prefix_to_check}" prefix_length) 11 | # Extract the prefix from the example_string 12 | string(SUBSTRING "${DUCKDB_PLATFORM}" 0 ${prefix_length} extracted_platform_prefix) 13 | 14 | # Propagate arch to rust build for CI 15 | set(Rust_CARGO_TARGET "") 16 | if("${OS_NAME}" STREQUAL "linux") 17 | if ("${OS_ARCH}" STREQUAL "arm64") 18 | set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") 19 | elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64") 20 | set(Rust_CARGO_TARGET ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc) 21 | set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") 22 | else() 23 | set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") 24 | endif() 25 | elseif("${OS_NAME}" STREQUAL "osx") 26 | if ("${OSX_BUILD_ARCH}" STREQUAL "arm64") 27 | set(Rust_CARGO_TARGET "aarch64-apple-darwin") 28 | elseif ("${OSX_BUILD_ARCH}" STREQUAL "x86_64") 29 | set(Rust_CARGO_TARGET "x86_64-apple-darwin") 30 | elseif ("${OS_ARCH}" STREQUAL "arm64") 31 | set(Rust_CARGO_TARGET "aarch64-apple-darwin") 32 | endif() 33 | elseif(WIN32) 34 | if (MINGW AND "${OS_ARCH}" STREQUAL "arm64") 35 | set(Rust_CARGO_TARGET "aarch64-pc-windows-gnu") 36 | elseif (MINGW AND "${OS_ARCH}" STREQUAL "amd64") 37 | set(Rust_CARGO_TARGET "x86_64-pc-windows-gnu") 38 | elseif (MSVC AND "${OS_ARCH}" STREQUAL "arm64") 39 | set(Rust_CARGO_TARGET "aarch64-pc-windows-msvc") 40 | elseif (MSVC AND "${OS_ARCH}" STREQUAL "amd64") 41 | set(Rust_CARGO_TARGET "x86_64-pc-windows-msvc") 42 | endif() 43 | endif() 44 | 45 | execute_process( 46 | COMMAND rustup target list --installed 47 | OUTPUT_VARIABLE RUST_TARGETS 48 | ) 49 | string(FIND "${RUST_TARGETS}" "wasm32-unknown-emscripten" WASM_TARGET_FOUND) 50 | 51 | if (NOT WASM_TARGET_FOUND EQUAL -1) 52 | set(Rust_CARGO_TARGET "wasm32-unknown-emscripten") 53 | endif() 54 | 55 | message(STATUS "RUST_TARGETS: ${RUST_TARGETS}") 56 | message(STATUS "WASM_TARGET_FOUND: ${WASM_TARGET_FOUND}") 57 | message(STATUS "TARGET: ${TARGET}") 58 | message(STATUS "DUCKDB_BUILD_TYPE: ${DUCKDB_BUILD_TYPE}") 59 | message(STATUS "TARGET NAME: ${TARGET_NAME}") 60 | message(STATUS "DUCKDB_PLATFORM: ${DUCKDB_PLATFORM}") 61 | message(STATUS "OS_ARCH: ${OS_ARCH}") 62 | message(STATUS "OS_NAME: ${OS_NAME}") 63 | message(STATUS "Rust_CARGO_TARGET: ${Rust_CARGO_TARGET}") 64 | # We currently only support the predefined targets. 65 | #if ("${Rust_CARGO_TARGET}" STREQUAL "") 66 | # message(FATAL_ERROR "Failed to detect the correct platform") 67 | #endif() 68 | 69 | 70 | include(FetchContent) 71 | 72 | FetchContent_Declare( 73 | Corrosion 74 | GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git 75 | GIT_TAG v0.5 76 | ) 77 | # Set any global configuration variables such as `Rust_TOOLCHAIN` before this line! 78 | FetchContent_MakeAvailable(Corrosion) 79 | 80 | # Import targets defined in a package or workspace manifest `Cargo.toml` file 81 | corrosion_import_crate(MANIFEST_PATH "${CMAKE_SOURCE_DIR}/../duckdb_fuzzycomplete_rust/Cargo.toml" 82 | CRATES "duckdb_fuzzycomplete_rust" 83 | ) 84 | 85 | # Set extension name here 86 | set(TARGET_NAME fuzzycomplete) 87 | 88 | set(EXTENSION_NAME ${TARGET_NAME}_extension) 89 | set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) 90 | 91 | project(${TARGET_NAME}) 92 | 93 | include_directories(src/include) 94 | 95 | set(EXTENSION_SOURCES src/fuzzycomplete_extension.cpp) 96 | 97 | build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) 98 | build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) 99 | 100 | get_target_property(fake_includes duckdb_fuzzycomplete_rust INCLUDE_DIRECTORIES) 101 | 102 | target_link_libraries(${EXTENSION_NAME} duckdb_fuzzycomplete_rust-static) 103 | target_link_libraries(${LOADABLE_EXTENSION_NAME} duckdb_fuzzycomplete_rust) 104 | 105 | install( 106 | TARGETS ${EXTENSION_NAME} 107 | EXPORT "${DUCKDB_EXPORT_SET}" 108 | LIBRARY DESTINATION "${INSTALL_LIB_DIR}" 109 | ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") 110 | 111 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 Rusty Conover 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) 2 | 3 | # Configuration of extension 4 | EXT_NAME=fuzzycomplete 5 | EXT_CONFIG=${PROJ_DIR}extension_config.cmake 6 | 7 | # Include the Makefile from extension-ci-tools 8 | include extension-ci-tools/makefiles/duckdb_extension.Makefile 9 | 10 | rust_binding_headers: 11 | cd duckdb_fuzzycomplete_rust && cbindgen --config ./cbindgen.toml --crate duckdb_fuzzycomplete_rust --output ../src/include/rust.h 12 | 13 | clean_all: clean 14 | cd duckdb_fuzzycomplete_rust && cargo clean -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fuzzycomplete Extension for DuckDB 2 | 3 | ![A duck trying to complete a crossword puzzle](./docs/duckdb-fuzzycompletion.jpeg) 4 | 5 | This `fuzzycomplete` extension serves as an alternative to DuckDB's [autocomplete](https://duckdb.org/docs/api/cli/autocomplete.html) extension, with several key differences: 6 | 7 | **Algorithm:** Unlike the [autocomplete extension](https://duckdb.org/docs/extensions/autocomplete.html), which uses edit distance as its metric, the fuzzycomplete extension employs a fuzzy string matching algorithm derived from Visual Studio Code. This provides more intuitive and flexible completion suggestions. 8 | 9 | **Scope:** The `fuzzycomplete` extension can complete table names across different databases and schemas. It respects the current search path and offers suggestions accordingly, even when multiple databases are attached. 10 | 11 | It may not yet be the best solution for SQL completion, but it has proven to be useful to the author. 12 | 13 | ## Installation 14 | 15 | **`fuzzycomplete` is a [DuckDB Community Extension](https://github.com/duckdb/community-extensions).** 16 | 17 | You can now use this by using this SQL: 18 | 19 | ```sql 20 | install fuzzycomplete from community; 21 | load fuzzycomplete; 22 | ``` 23 | 24 | ## Details of the fuzzy matching algorithm 25 | 26 | This extension uses the Rust crate [`code-fuzzy-match`](https://crates.io/crates/code-fuzzy-match) 27 | 28 | The algorithm ensures that characters in the query string appear in the same order in the target string. It handles substring queries efficiently, allowing searches within the middle of the target string without significantly impacting the match score. The algorithm prioritizes matches that occur at the beginning of words, where words are defined as they commonly appear in code (e.g., letters following a separator or in camel case). Sequential matches are also given preference. 29 | 30 | In addition to the basic matching algorithm, matches then scored using this criteria if they have an equal score from `code-fuzzy-match`: 31 | 32 | 1. In the event of a tie in the match score, completion results are first ordered by the number of pseudo-words in the candidate strings, favoring shorter completions. 33 | 2. A standard lexical sorting is then applied. 34 | 35 | ## When would I use this? 36 | 37 | If you're looking to try a different completion algorithm or need to complete table names from various databases and schemas, you might find this extension beneficial. 38 | 39 | ### Build Architecture 40 | 41 | For the DuckDB extension to call the Rust code a tool called `cbindgen` is used to write the C++ headers for the exposed Rust interface. 42 | 43 | The headers can be updated by running `make rust_binding_headers`. 44 | 45 | ### Build steps 46 | Now to build the extension, run: 47 | ```sh 48 | make 49 | ``` 50 | The main binaries that will be built are: 51 | ```sh 52 | ./build/release/duckdb 53 | ./build/release/test/unittest 54 | ./build/release/extension/fuzzycomplete/fuzzycomplete.duckdb_extension 55 | ``` 56 | - `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. 57 | - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary. 58 | - `fuzzycomplete.duckdb_extension` is the loadable binary as it would be distributed. 59 | 60 | ## Running the extension 61 | To run the extension code, simply start the shell with `./build/release/duckdb`. 62 | 63 | Now we can use the features from the extension directly in DuckDB. 64 | 65 | ### Installing the deployed binaries 66 | To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the 67 | `allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples: 68 | 69 | CLI: 70 | ```shell 71 | duckdb -unsigned 72 | ``` 73 | 74 | Python: 75 | ```python 76 | con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'}) 77 | ``` 78 | 79 | NodeJS: 80 | ```js 81 | db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"}); 82 | ``` 83 | 84 | Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension 85 | you want to install. To do this run the following SQL query in DuckDB: 86 | ```sql 87 | SET custom_extension_repository='bucket.s3.us-east-1.amazonaws.com/fuzzycomplete/latest'; 88 | ``` 89 | Note that the `/latest` path will allow you to install the latest extension version available for your current version of 90 | DuckDB. To specify a specific version, you can pass the version instead. 91 | 92 | After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB: 93 | ```sql 94 | INSTALL fuzzycomplete 95 | LOAD fuzzycomplete 96 | ``` 97 | -------------------------------------------------------------------------------- /docs/duckdb-fuzzycompletion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Query-farm/fuzzycomplete/a833e2910612602a49a80aee0ccea9945ea740fc/docs/duckdb-fuzzycompletion.jpeg -------------------------------------------------------------------------------- /duckdb_fuzzycomplete_rust/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "code-fuzzy-match" 7 | version = "0.2.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "ac44797f65a7f21689e1b71746ae51461d60965fd20b273fbc223156a32fd3e7" 10 | 11 | [[package]] 12 | name = "duckdb_fuzzycomplete_rust" 13 | version = "0.1.0" 14 | dependencies = [ 15 | "code-fuzzy-match", 16 | ] 17 | -------------------------------------------------------------------------------- /duckdb_fuzzycomplete_rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "duckdb_fuzzycomplete_rust" 3 | version = "0.1.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | [lib] 8 | name = "duckdb_fuzzycomplete_rust" 9 | crate-type = ["staticlib"] 10 | 11 | [dependencies] 12 | code-fuzzy-match = "0.2.2" 13 | -------------------------------------------------------------------------------- /duckdb_fuzzycomplete_rust/cbindgen.toml: -------------------------------------------------------------------------------- 1 | # This is a template cbindgen.toml file with all of the default values. 2 | # Some values are commented out because their absence is the real default. 3 | # 4 | # See https://github.com/mozilla/cbindgen/blob/master/docs.md#cbindgentoml 5 | # for detailed documentation of every option here. 6 | 7 | 8 | 9 | language = "C++" 10 | 11 | 12 | 13 | ############## Options for Wrapping the Contents of the Header ################# 14 | 15 | # header = "/* Text to put at the beginning of the generated file. Probably a license. */" 16 | # trailer = "/* Text to put at the end of the generated file */" 17 | # include_guard = "my_bindings_h" 18 | # pragma_once = true 19 | # autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" 20 | include_version = false 21 | # namespace = "my_namespace" 22 | namespaces = [] 23 | using_namespaces = [] 24 | sys_includes = [] 25 | includes = [] 26 | no_includes = false 27 | # cpp_compat = true 28 | after_includes = "" 29 | 30 | 31 | 32 | 33 | ############################ Code Style Options ################################ 34 | 35 | braces = "SameLine" 36 | line_length = 100 37 | tab_width = 2 38 | documentation = true 39 | documentation_style = "auto" 40 | documentation_length = "full" 41 | line_endings = "LF" # also "CR", "CRLF", "Native" 42 | 43 | 44 | 45 | 46 | ############################# Codegen Options ################################## 47 | 48 | style = "both" 49 | sort_by = "Name" # default for `fn.sort_by` and `const.sort_by` 50 | usize_is_size_t = true 51 | 52 | 53 | 54 | [defines] 55 | # "target_os = freebsd" = "DEFINE_FREEBSD" 56 | # "feature = serde" = "DEFINE_SERDE" 57 | 58 | 59 | 60 | [export] 61 | include = [] 62 | exclude = [] 63 | # prefix = "CAPI_" 64 | item_types = [] 65 | renaming_overrides_prefixing = false 66 | 67 | 68 | 69 | [export.rename] 70 | 71 | 72 | 73 | [export.body] 74 | 75 | 76 | [export.mangle] 77 | 78 | 79 | [fn] 80 | rename_args = "None" 81 | # must_use = "MUST_USE_FUNC" 82 | # deprecated = "DEPRECATED_FUNC" 83 | # deprecated_with_note = "DEPRECATED_FUNC_WITH_NOTE" 84 | # no_return = "NO_RETURN" 85 | # prefix = "START_FUNC" 86 | # postfix = "END_FUNC" 87 | args = "auto" 88 | sort_by = "Name" 89 | 90 | 91 | 92 | 93 | [struct] 94 | rename_fields = "None" 95 | # must_use = "MUST_USE_STRUCT" 96 | # deprecated = "DEPRECATED_STRUCT" 97 | # deprecated_with_note = "DEPRECATED_STRUCT_WITH_NOTE" 98 | derive_constructor = false 99 | derive_eq = false 100 | derive_neq = false 101 | derive_lt = false 102 | derive_lte = false 103 | derive_gt = false 104 | derive_gte = false 105 | 106 | 107 | 108 | 109 | [enum] 110 | rename_variants = "None" 111 | # must_use = "MUST_USE_ENUM" 112 | # deprecated = "DEPRECATED_ENUM" 113 | # deprecated_with_note = "DEPRECATED_ENUM_WITH_NOTE" 114 | add_sentinel = false 115 | prefix_with_name = false 116 | derive_helper_methods = false 117 | derive_const_casts = false 118 | derive_mut_casts = false 119 | # cast_assert_name = "ASSERT" 120 | derive_tagged_enum_destructor = false 121 | derive_tagged_enum_copy_constructor = false 122 | enum_class = true 123 | private_default_tagged_enum_constructor = false 124 | 125 | 126 | 127 | 128 | [const] 129 | allow_static_const = true 130 | allow_constexpr = false 131 | sort_by = "Name" 132 | 133 | 134 | 135 | 136 | [macro_expansion] 137 | bitflags = false 138 | 139 | 140 | 141 | 142 | 143 | 144 | ############## Options for How Your Rust library Should Be Parsed ############## 145 | 146 | [parse] 147 | parse_deps = false 148 | # include = [] 149 | exclude = [] 150 | clean = false 151 | extra_bindings = [] 152 | 153 | 154 | 155 | [parse.expand] 156 | crates = ["duckdb_fuzzycomplete_rust"] 157 | all_features = false 158 | default_features = true 159 | features = [] -------------------------------------------------------------------------------- /duckdb_fuzzycomplete_rust/src/lib.rs: -------------------------------------------------------------------------------- 1 | // duckdb_fuzzycomplete_rust 2 | // Copyright 2024 Rusty Conover 3 | // Licensed under the MIT License 4 | 5 | use code_fuzzy_match; 6 | 7 | use core::str; 8 | use std::ffi::CStr; 9 | use std::{ffi::c_char, slice}; 10 | 11 | macro_rules! make_str { 12 | ( $s : expr , $len : expr ) => { 13 | unsafe { str::from_utf8_unchecked(slice::from_raw_parts($s as *const u8, $len)) } 14 | }; 15 | } 16 | 17 | #[no_mangle] 18 | pub extern "C" fn perform_matches( 19 | // These are the array of strings that we are going to match against. 20 | candidate_pool: *const *const c_char, 21 | candidate_pool_size: usize, 22 | 23 | // This is the query sting to match 24 | query: *const c_char, 25 | query_len: usize, 26 | 27 | // The maximum number of results to return 28 | max_results: usize, 29 | 30 | // The output ranking of candidates (pointers from the candidate pool) 31 | ranked_candidates: *mut *const c_char, 32 | 33 | // The actual number of produced results. 34 | actual_results: *mut usize, 35 | ) { 36 | let mut matcher = code_fuzzy_match::FuzzyMatcher::new(); 37 | 38 | let candidates: Vec<_> = (0..candidate_pool_size).map(|i| unsafe { 39 | let c_str_ptr = *candidate_pool.add(i); 40 | (CStr::from_ptr(c_str_ptr).to_str().unwrap(), c_str_ptr) 41 | }).collect(); 42 | 43 | let query = make_str!(query, query_len); 44 | 45 | let mut match_results: Vec<_> = if query.trim().is_empty() { 46 | candidates.iter().map(|s| (s, 0)).collect() 47 | } else { 48 | candidates.iter() 49 | .filter_map(|s| matcher.fuzzy_match(s.0, query).map(|score| (s, score))) 50 | .collect() 51 | }; 52 | 53 | fn count_word_occurrances(s: &str) -> usize { 54 | s.chars().filter(|&c| c == '_' || c == '.').count() 55 | } 56 | match_results.sort_by(|a, b| { 57 | // Sort by the store first, then the number of components, then lexically 58 | b.1.cmp(&a.1) 59 | // Ordering by words splitting by _ or . then by length then by the string itself 60 | .then_with(|| count_word_occurrances(a.0 .0).cmp(&count_word_occurrances(b.0 .0))) 61 | // Order by length. 62 | .then_with(|| a.0 .0.cmp(&b.0 .0)) 63 | }); 64 | 65 | unsafe { 66 | let result_count = std::cmp::min(match_results.len(), max_results); 67 | *actual_results = result_count; 68 | for (index, result) in match_results.iter().enumerate().take(result_count) { 69 | *ranked_candidates.add(index) = result.0 .1; 70 | } 71 | } 72 | } 73 | 74 | #[cfg(test)] 75 | mod tests {} 76 | -------------------------------------------------------------------------------- /extension_config.cmake: -------------------------------------------------------------------------------- 1 | # This file is included by DuckDB's build system. It specifies which extension to load 2 | 3 | # Extension from this repo 4 | duckdb_extension_load(fuzzycomplete 5 | SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} 6 | LOAD_TESTS 7 | ) 8 | 9 | # Any extra extensions that should be built 10 | # e.g.: duckdb_extension_load(json) -------------------------------------------------------------------------------- /scripts/bootstrap-template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import sys, os, shutil, re 4 | from pathlib import Path 5 | 6 | shutil.copyfile(f'docs/NEXT_README.md', f'README.md') 7 | os.remove(f'docs/NEXT_README.md') 8 | os.remove(f'docs/README.md') 9 | 10 | if (len(sys.argv) != 2): 11 | raise Exception('usage: python3 bootstrap-template.py ') 12 | 13 | name_extension = sys.argv[1] 14 | 15 | def is_snake_case(s): 16 | # Define the regex pattern for snake case with numbers 17 | pattern = r'^[a-z0-9]+(_[a-z0-9]+)*$' 18 | 19 | # Use re.match to check if the string matches the pattern 20 | if re.match(pattern, s): 21 | return True 22 | else: 23 | return False 24 | 25 | if name_extension[0].isdigit(): 26 | raise Exception('Please dont start your extension name with a number.') 27 | 28 | if not is_snake_case(name_extension): 29 | raise Exception('Please enter the name of your extension in valid snake_case containing only lower case letters and numbers') 30 | 31 | def to_camel_case(snake_str): 32 | return "".join(x.capitalize() for x in snake_str.lower().split("_")) 33 | 34 | def replace(file_name, to_find, to_replace): 35 | with open(file_name, 'r', encoding="utf8") as file : 36 | filedata = file.read() 37 | filedata = filedata.replace(to_find, to_replace) 38 | with open(file_name, 'w', encoding="utf8") as file: 39 | file.write(filedata) 40 | 41 | files_to_search = [] 42 | files_to_search.extend(Path('./.github').rglob('./**/*.yml')) 43 | files_to_search.extend(Path('./test').rglob('./**/*.test')) 44 | files_to_search.extend(Path('./src').rglob('./**/*.hpp')) 45 | files_to_search.extend(Path('./src').rglob('./**/*.cpp')) 46 | files_to_search.extend(Path('./src').rglob('./**/*.txt')) 47 | files_to_search.extend(Path('./src').rglob('./*.md')) 48 | 49 | def replace_everywhere(to_find, to_replace): 50 | for path in files_to_search: 51 | replace(path, to_find, to_replace) 52 | replace(path, to_find.capitalize(), to_camel_case(to_replace)) 53 | replace(path, to_find.upper(), to_replace.upper()) 54 | 55 | replace("./CMakeLists.txt", to_find, to_replace) 56 | replace("./Makefile", to_find, to_replace) 57 | replace("./Makefile", to_find.capitalize(), to_camel_case(to_replace)) 58 | replace("./Makefile", to_find.upper(), to_replace.upper()) 59 | replace("./README.md", to_find, to_replace) 60 | replace("./extension_config.cmake", to_find, to_replace) 61 | 62 | replace_everywhere("quack", name_extension) 63 | replace_everywhere("Quack", name_extension.capitalize()) 64 | replace_everywhere("", name_extension) 65 | 66 | string_to_replace = name_extension 67 | string_to_find = "quack" 68 | 69 | # rename files 70 | os.rename(f'test/sql/{string_to_find}.test', f'test/sql/{string_to_replace}.test') 71 | os.rename(f'src/{string_to_find}_extension.cpp', f'src/{string_to_replace}_extension.cpp') 72 | os.rename(f'src/include/{string_to_find}_extension.hpp', f'src/include/{string_to_replace}_extension.hpp') 73 | 74 | # remove template-specific files 75 | os.remove('.github/workflows/ExtensionTemplate.yml') 76 | 77 | # finally, remove this bootstrap file 78 | os.remove(__file__) -------------------------------------------------------------------------------- /scripts/extension-upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Extension upload script 4 | 5 | # Usage: ./extension-upload.sh 6 | # : Name of the extension 7 | # : Version (commit / version tag) of the extension 8 | # : Version (commit / version tag) of DuckDB 9 | # : Architecture target of the extension binary 10 | # : S3 bucket to upload to 11 | # : Set this as the latest version ("true" / "false", default: "false") 12 | # : Set this as a versioned version that will prevent its deletion 13 | 14 | set -e 15 | 16 | if [[ $4 == wasm* ]]; then 17 | ext="/tmp/extension/$1.duckdb_extension.wasm" 18 | else 19 | ext="/tmp/extension/$1.duckdb_extension" 20 | fi 21 | 22 | echo $ext 23 | 24 | script_dir="$(dirname "$(readlink -f "$0")")" 25 | 26 | # calculate SHA256 hash of extension binary 27 | cat $ext > $ext.append 28 | 29 | if [[ $4 == wasm* ]]; then 30 | # 0 for custom section 31 | # 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256) 32 | # [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02] 33 | echo -n -e '\x00' >> $ext.append 34 | echo -n -e '\x93\x02' >> $ext.append 35 | # 10 in hex = 16 in decimal, lenght of name, 1 byte 36 | echo -n -e '\x10' >> $ext.append 37 | echo -n -e 'duckdb_signature' >> $ext.append 38 | # the name of the WebAssembly custom section, 16 bytes 39 | # 100 in hex, 256 in decimal 40 | # [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)], 41 | # for a grand total of 2 bytes 42 | echo -n -e '\x80\x02' >> $ext.append 43 | fi 44 | 45 | # (Optionally) Sign binary 46 | if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then 47 | echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem 48 | $script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash 49 | openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign 50 | rm -f private.pem 51 | fi 52 | 53 | # Signature is always there, potentially defaulting to 256 zeros 54 | truncate -s 256 $ext.sign 55 | 56 | # append signature to extension binary 57 | cat $ext.sign >> $ext.append 58 | 59 | # compress extension binary 60 | if [[ $4 == wasm_* ]]; then 61 | brotli < $ext.append > "$ext.compressed" 62 | else 63 | gzip < $ext.append > "$ext.compressed" 64 | fi 65 | 66 | set -e 67 | 68 | # Abort if AWS key is not set 69 | if [ -z "$AWS_ACCESS_KEY_ID" ]; then 70 | echo "No AWS key found, skipping.." 71 | exit 0 72 | fi 73 | 74 | # upload versioned version 75 | if [[ $7 = 'true' ]]; then 76 | if [[ $4 == wasm* ]]; then 77 | aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" 78 | else 79 | aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read 80 | fi 81 | fi 82 | 83 | # upload to latest version 84 | if [[ $6 = 'true' ]]; then 85 | if [[ $4 == wasm* ]]; then 86 | aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" 87 | else 88 | aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read 89 | fi 90 | fi 91 | -------------------------------------------------------------------------------- /src/fuzzycomplete_extension.cpp: -------------------------------------------------------------------------------- 1 | #define DUCKDB_EXTENSION_MAIN 2 | 3 | #include "fuzzycomplete_extension.hpp" 4 | 5 | #include "duckdb/catalog/catalog.hpp" 6 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" 7 | #include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp" 8 | #include "duckdb/catalog/catalog_search_path.hpp" 9 | #include "duckdb/common/case_insensitive_map.hpp" 10 | #include "duckdb/common/exception.hpp" 11 | #include "duckdb/common/file_opener.hpp" 12 | #include "duckdb/function/table_function.hpp" 13 | #include "duckdb/main/client_context.hpp" 14 | #include "duckdb/main/client_data.hpp" 15 | #include "duckdb/main/database_manager.hpp" 16 | #include "duckdb/main/extension_util.hpp" 17 | #include "duckdb/parser/keyword_helper.hpp" 18 | #include "duckdb/parser/parser.hpp" 19 | 20 | // Include the declarations of things from Rust. 21 | #include "rust.h" 22 | 23 | namespace duckdb 24 | { 25 | 26 | struct SQLFuzzyCompleteFunctionData : public TableFunctionData 27 | { 28 | explicit SQLFuzzyCompleteFunctionData(vector suggestions_p, idx_t start_pos) 29 | : suggestions(std::move(suggestions_p)), start_pos(start_pos) 30 | { 31 | } 32 | 33 | vector suggestions; 34 | idx_t start_pos; 35 | }; 36 | 37 | struct SQLFuzzyCompleteData : public GlobalTableFunctionState 38 | { 39 | SQLFuzzyCompleteData() : offset(0) 40 | { 41 | } 42 | 43 | idx_t offset; 44 | }; 45 | 46 | struct FuzzyCompleteCandidate 47 | { 48 | explicit FuzzyCompleteCandidate(string candidate_p, int32_t score_bonus = 0) 49 | : candidate(std::move(candidate_p)), score_bonus(score_bonus) 50 | { 51 | } 52 | 53 | string candidate; 54 | //! The higher the score bonus, the more likely this candidate will be chosen 55 | int32_t score_bonus; 56 | }; 57 | 58 | static vector ComputeSuggestions(vector available_suggestions, const string &prefix, 59 | const unordered_set &extra_keywords, bool add_quotes = false) 60 | { 61 | for (auto &kw : extra_keywords) 62 | { 63 | available_suggestions.emplace_back(std::move(kw)); 64 | } 65 | 66 | const size_t max_results = 20; 67 | 68 | // Create a vector of const char* pointers 69 | std::vector candidate_pool_pointers; 70 | std::vector suggestion_results(max_results); 71 | 72 | // Make sure all of the suggestions are unique. 73 | 74 | std::set> unique_suggestions; 75 | for (const auto &str : available_suggestions) 76 | { 77 | unique_suggestions.insert(make_shared_ptr(str.candidate)); 78 | } 79 | 80 | candidate_pool_pointers.reserve(unique_suggestions.size()); 81 | 82 | for (const auto &str : unique_suggestions) 83 | { 84 | candidate_pool_pointers.push_back(str->c_str()); 85 | } 86 | 87 | // Get the suggestions from rust 88 | size_t actual_matches; 89 | 90 | perform_matches( 91 | candidate_pool_pointers.data(), 92 | candidate_pool_pointers.size(), 93 | prefix.c_str(), 94 | prefix.size(), 95 | max_results, 96 | suggestion_results.data(), 97 | &actual_matches); 98 | 99 | vector results; 100 | results.reserve(actual_matches); 101 | for (size_t i = 0; i < actual_matches; i++) 102 | { 103 | results.emplace_back(string(suggestion_results[i])); 104 | } 105 | 106 | if (add_quotes) 107 | { 108 | for (auto &result : results) 109 | { 110 | if (extra_keywords.find(result) == extra_keywords.end()) 111 | { 112 | result = KeywordHelper::WriteOptionallyQuoted(result, '"', true); 113 | } 114 | else 115 | { 116 | result = result + " "; 117 | } 118 | } 119 | } 120 | return results; 121 | } 122 | 123 | static vector InitialKeywords() 124 | { 125 | return vector{"SELECT", "INSERT", "DELETE", "UPDATE", "CREATE", "DROP", "COPY", 126 | "ALTER", "WITH", "EXPORT", "BEGIN", "VACUUM", "PREPARE", "EXECUTE", 127 | "DEALLOCATE", "CALL", "ANALYZE", "EXPLAIN", "DESCRIBE", "SUMMARIZE", "LOAD", 128 | "CHECKPOINT", "ROLLBACK", "COMMIT", "CALL", "FROM", "PIVOT", "UNPIVOT"}; 129 | } 130 | 131 | static vector SuggestKeyword(ClientContext &context) 132 | { 133 | auto keywords = InitialKeywords(); 134 | vector result; 135 | for (auto &kw : keywords) 136 | { 137 | auto score = 0; 138 | if (kw == "SELECT") 139 | { 140 | score = 2; 141 | } 142 | if (kw == "FROM" || kw == "DELETE" || kw == "INSERT" || kw == "UPDATE") 143 | { 144 | score = 1; 145 | } 146 | result.emplace_back(kw + " ", score); 147 | } 148 | return result; 149 | } 150 | 151 | static vector> GetAllTables(ClientContext &context, bool for_table_names) 152 | { 153 | vector> result; 154 | // scan all the schemas for tables and collect them and collect them 155 | // for column names we avoid adding internal entries, because it pollutes the auto-complete too much 156 | // for table names this is generally fine, however 157 | auto schemas = Catalog::GetAllSchemas(context); 158 | for (auto &schema_ref : schemas) 159 | { 160 | auto &schema = schema_ref.get(); 161 | schema.Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry &entry) 162 | { 163 | if (!entry.internal || for_table_names) { 164 | result.push_back(entry); 165 | } }); 166 | }; 167 | if (for_table_names) 168 | { 169 | for (auto &schema_ref : schemas) 170 | { 171 | auto &schema = schema_ref.get(); 172 | schema.Scan(context, CatalogType::TABLE_FUNCTION_ENTRY, 173 | [&](CatalogEntry &entry) 174 | { result.push_back(entry); }); 175 | }; 176 | } 177 | else 178 | { 179 | for (auto &schema_ref : schemas) 180 | { 181 | auto &schema = schema_ref.get(); 182 | schema.Scan(context, CatalogType::SCALAR_FUNCTION_ENTRY, 183 | [&](CatalogEntry &entry) 184 | { result.push_back(entry); }); 185 | }; 186 | } 187 | return result; 188 | } 189 | 190 | static vector SuggestTableName(ClientContext &context) 191 | { 192 | vector suggestions; 193 | auto all_entries = GetAllTables(context, true); 194 | 195 | auto default_database = DatabaseManager::GetDefaultDatabase(context); 196 | 197 | auto default_schema = ClientData::Get(context).catalog_search_path->GetDefault().schema; 198 | 199 | // So really we should add the other catalog names. 200 | 201 | // Then the other schema names. 202 | std::set seen_databases = {}; 203 | std::set seen_schemas = {}; 204 | 205 | // To get the list of available entries 206 | 207 | // There are a few possiblities. 208 | 209 | // same database same schema. 210 | for (auto &entry_ref : all_entries) 211 | { 212 | auto &entry = entry_ref.get(); 213 | // prioritize user-defined entries (views & tables) 214 | int32_t bonus = (entry.internal || entry.type == CatalogType::TABLE_FUNCTION_ENTRY) ? 0 : 1; 215 | 216 | // We should use the full path based on the calling context of the user. 217 | auto catalog_name = entry_ref.get().ParentCatalog().GetName(); 218 | auto schema_name = entry_ref.get().ParentSchema().name; 219 | 220 | auto quoted_schema_name = KeywordHelper::WriteOptionallyQuoted(schema_name, '"', true); 221 | auto quoted_catalog_name = KeywordHelper::WriteOptionallyQuoted(catalog_name, '"', true); 222 | auto quoted_entry_name = KeywordHelper::WriteOptionallyQuoted(entry.name, '"', true); 223 | 224 | string prefix = quoted_catalog_name + "." + quoted_schema_name; 225 | 226 | if (catalog_name != default_database) 227 | { 228 | seen_databases.insert(quoted_catalog_name); 229 | seen_schemas.insert(prefix); 230 | suggestions.emplace_back(prefix + "." + quoted_entry_name, bonus); 231 | } 232 | else 233 | { 234 | if (schema_name == default_schema) 235 | { 236 | suggestions.emplace_back(quoted_entry_name, bonus); 237 | } 238 | else 239 | { 240 | suggestions.emplace_back(quoted_schema_name + "." + quoted_entry_name, bonus); 241 | seen_schemas.insert(prefix); 242 | } 243 | } 244 | } 245 | 246 | for (auto &database : seen_databases) 247 | { 248 | suggestions.emplace_back(database, 1); 249 | } 250 | 251 | for (auto &schema : seen_schemas) 252 | { 253 | suggestions.emplace_back(schema, 1); 254 | } 255 | 256 | vector suggestions_str; 257 | for (auto &suggestion : suggestions) 258 | { 259 | suggestions_str.emplace_back(suggestion.candidate); 260 | } 261 | 262 | return suggestions; 263 | } 264 | 265 | static vector SuggestColumnName(ClientContext &context) 266 | { 267 | vector suggestions; 268 | auto all_entries = GetAllTables(context, false); 269 | for (auto &entry_ref : all_entries) 270 | { 271 | auto &entry = entry_ref.get(); 272 | if (entry.type == CatalogType::TABLE_ENTRY) 273 | { 274 | auto &table = entry.Cast(); 275 | for (auto &col : table.GetColumns().Logical()) 276 | { 277 | suggestions.emplace_back(col.GetName(), 1); 278 | } 279 | } 280 | else if (entry.type == CatalogType::VIEW_ENTRY) 281 | { 282 | auto &view = entry.Cast(); 283 | for (auto &col : view.aliases) 284 | { 285 | suggestions.emplace_back(col, 1); 286 | } 287 | } 288 | else 289 | { 290 | if (StringUtil::CharacterIsOperator(entry.name[0])) 291 | { 292 | continue; 293 | } 294 | suggestions.emplace_back(entry.name); 295 | }; 296 | } 297 | return suggestions; 298 | } 299 | 300 | static bool KnownExtension(const string &fname) 301 | { 302 | vector known_extensions{".parquet", ".csv", ".tsv", ".csv.gz", ".tsv.gz", ".tbl"}; 303 | for (auto &ext : known_extensions) 304 | { 305 | if (StringUtil::EndsWith(fname, ext)) 306 | { 307 | return true; 308 | } 309 | } 310 | return false; 311 | } 312 | 313 | static vector SuggestFileName(ClientContext &context, string &prefix, idx_t &last_pos) 314 | { 315 | auto &fs = FileSystem::GetFileSystem(context); 316 | string search_dir; 317 | D_ASSERT(last_pos >= prefix.size()); 318 | auto is_path_absolute = fs.IsPathAbsolute(prefix); 319 | for (idx_t i = prefix.size(); i > 0; i--, last_pos--) 320 | { 321 | if (prefix[i - 1] == '/' || prefix[i - 1] == '\\') 322 | { 323 | search_dir = prefix.substr(0, i - 1); 324 | prefix = prefix.substr(i); 325 | break; 326 | } 327 | } 328 | if (search_dir.empty()) 329 | { 330 | search_dir = is_path_absolute ? "/" : "."; 331 | } 332 | else 333 | { 334 | search_dir = fs.ExpandPath(search_dir); 335 | } 336 | vector result; 337 | fs.ListFiles(search_dir, [&](const string &fname, bool is_dir) 338 | { 339 | string suggestion; 340 | if (is_dir) { 341 | suggestion = fname + fs.PathSeparator(fname); 342 | } else { 343 | suggestion = fname + "'"; 344 | } 345 | int score = 0; 346 | if (is_dir && fname[0] != '.') { 347 | score = 2; 348 | } 349 | if (KnownExtension(fname)) { 350 | score = 1; 351 | } 352 | result.emplace_back(std::move(suggestion), score); }); 353 | return result; 354 | } 355 | 356 | enum class SuggestionState : uint8_t 357 | { 358 | SUGGEST_KEYWORD, 359 | SUGGEST_TABLE_NAME, 360 | SUGGEST_COLUMN_NAME, 361 | SUGGEST_FILE_NAME 362 | }; 363 | 364 | static bool is_word_break(char c) 365 | { 366 | return StringUtil::CharacterIsOperator(c) && c != '.'; 367 | } 368 | 369 | static duckdb::unique_ptr GenerateSuggestions(ClientContext &context, const string &sql) 370 | { 371 | // for auto-completion, we consider 4 scenarios 372 | // * there is nothing in the buffer, or only one word -> suggest a keyword 373 | // * the previous keyword is SELECT, WHERE, BY, HAVING, ... -> suggest a column name 374 | // * the previous keyword is FROM, INSERT, UPDATE ,... -> select a table name 375 | // * we are in a string constant -> suggest a filename 376 | // figure out which state we are in by doing a run through the query 377 | idx_t pos = 0; 378 | idx_t last_pos = 0; 379 | idx_t pos_offset = 0; 380 | bool seen_word = false; 381 | unordered_set suggested_keywords; 382 | SuggestionState suggest_state = SuggestionState::SUGGEST_KEYWORD; 383 | case_insensitive_set_t column_name_keywords = {"SELECT", "WHERE", "BY", "HAVING", "QUALIFY", 384 | "LIMIT", "SET", "USING", "ON"}; 385 | case_insensitive_set_t table_name_keywords = {"FROM", "JOIN", "INSERT", "UPDATE", "DELETE", 386 | "ALTER", "DROP", "CALL", "DESCRIBE"}; 387 | case_insensitive_map_t> next_keyword_map; 388 | next_keyword_map["SELECT"] = {"FROM", "WHERE", "GROUP", "HAVING", "WINDOW", "ORDER", "LIMIT", 389 | "QUALIFY", "SAMPLE", "VALUES", "UNION", "EXCEPT", "INTERSECT", "DISTINCT"}; 390 | next_keyword_map["WITH"] = {"RECURSIVE", "SELECT", "AS"}; 391 | next_keyword_map["INSERT"] = {"INTO", "VALUES", "SELECT", "DEFAULT"}; 392 | next_keyword_map["DELETE"] = {"FROM", "WHERE", "USING"}; 393 | next_keyword_map["UPDATE"] = {"SET", "WHERE"}; 394 | next_keyword_map["CREATE"] = {"TABLE", "SCHEMA", "VIEW", "SEQUENCE", "MACRO", "FUNCTION", "SECRET", "TYPE"}; 395 | next_keyword_map["DROP"] = next_keyword_map["CREATE"]; 396 | next_keyword_map["ALTER"] = {"TABLE", "VIEW", "ADD", "DROP", "COLUMN", "SET", "TYPE", "DEFAULT", "DATA", "RENAME"}; 397 | 398 | regular_scan: 399 | for (; pos < sql.size(); pos++) 400 | { 401 | if (sql[pos] == '\'') 402 | { 403 | pos++; 404 | last_pos = pos; 405 | goto in_string_constant; 406 | } 407 | if (sql[pos] == '"') 408 | { 409 | pos++; 410 | last_pos = pos; 411 | goto in_quotes; 412 | } 413 | if (sql[pos] == '-' && pos + 1 < sql.size() && sql[pos + 1] == '-') 414 | { 415 | goto in_comment; 416 | } 417 | if (sql[pos] == ';') 418 | { 419 | // semicolon: restart suggestion flow 420 | suggest_state = SuggestionState::SUGGEST_KEYWORD; 421 | suggested_keywords.clear(); 422 | last_pos = pos + 1; 423 | continue; 424 | } 425 | if (StringUtil::CharacterIsSpace(sql[pos]) || is_word_break(sql[pos])) 426 | { 427 | if (seen_word) 428 | { 429 | goto process_word; 430 | } 431 | } 432 | else 433 | { 434 | seen_word = true; 435 | } 436 | } 437 | goto standard_suggestion; 438 | in_comment: 439 | for (; pos < sql.size(); pos++) 440 | { 441 | if (sql[pos] == '\n' || sql[pos] == '\r') 442 | { 443 | pos++; 444 | goto regular_scan; 445 | } 446 | } 447 | // no suggestions inside comments 448 | return make_uniq(vector(), 0); 449 | in_quotes: 450 | for (; pos < sql.size(); pos++) 451 | { 452 | if (sql[pos] == '"') 453 | { 454 | pos++; 455 | last_pos = pos; 456 | seen_word = true; 457 | goto regular_scan; 458 | } 459 | } 460 | pos_offset = 1; 461 | goto standard_suggestion; 462 | in_string_constant: 463 | for (; pos < sql.size(); pos++) 464 | { 465 | if (sql[pos] == '\'') 466 | { 467 | pos++; 468 | last_pos = pos; 469 | seen_word = true; 470 | goto regular_scan; 471 | } 472 | } 473 | suggest_state = SuggestionState::SUGGEST_FILE_NAME; 474 | goto standard_suggestion; 475 | process_word: 476 | { 477 | while ((last_pos < sql.size()) && 478 | (StringUtil::CharacterIsSpace(sql[last_pos]) || StringUtil::CharacterIsOperator(sql[last_pos]))) 479 | { 480 | last_pos++; 481 | } 482 | auto next_word = sql.substr(last_pos, pos - last_pos); 483 | if (table_name_keywords.find(next_word) != table_name_keywords.end()) 484 | { 485 | suggest_state = SuggestionState::SUGGEST_TABLE_NAME; 486 | } 487 | else if (column_name_keywords.find(next_word) != column_name_keywords.end()) 488 | { 489 | suggest_state = SuggestionState::SUGGEST_COLUMN_NAME; 490 | } 491 | auto entry = next_keyword_map.find(next_word); 492 | if (entry != next_keyword_map.end()) 493 | { 494 | suggested_keywords = entry->second; 495 | } 496 | else 497 | { 498 | suggested_keywords.erase(next_word); 499 | } 500 | if (std::all_of(next_word.begin(), next_word.end(), ::isdigit)) 501 | { 502 | // Numbers are OK 503 | suggested_keywords.clear(); 504 | } 505 | seen_word = false; 506 | last_pos = pos; 507 | goto regular_scan; 508 | } 509 | standard_suggestion: 510 | if (suggest_state != SuggestionState::SUGGEST_FILE_NAME) 511 | { 512 | while ((last_pos < sql.size()) && 513 | (StringUtil::CharacterIsSpace(sql[last_pos]) || is_word_break(sql[last_pos]))) 514 | { 515 | last_pos++; 516 | } 517 | } 518 | auto last_word = sql.substr(last_pos, pos - last_pos); 519 | last_pos -= pos_offset; 520 | vector suggestions; 521 | 522 | switch (suggest_state) 523 | { 524 | case SuggestionState::SUGGEST_KEYWORD: 525 | suggestions = ComputeSuggestions(SuggestKeyword(context), last_word, suggested_keywords); 526 | break; 527 | case SuggestionState::SUGGEST_TABLE_NAME: 528 | suggestions = ComputeSuggestions(SuggestTableName(context), last_word, suggested_keywords, false); 529 | break; 530 | case SuggestionState::SUGGEST_COLUMN_NAME: 531 | suggestions = ComputeSuggestions(SuggestColumnName(context), last_word, suggested_keywords, true); 532 | break; 533 | case SuggestionState::SUGGEST_FILE_NAME: 534 | last_pos = pos; 535 | suggestions = 536 | ComputeSuggestions(SuggestFileName(context, last_word, last_pos), last_word, unordered_set()); 537 | break; 538 | default: 539 | throw InternalException("Unrecognized suggestion state"); 540 | } 541 | if (last_pos > sql.size()) 542 | { 543 | D_ASSERT(false); 544 | throw NotImplementedException("last_pos out of range"); 545 | } 546 | if (!last_word.empty() && std::all_of(last_word.begin(), last_word.end(), ::isdigit)) 547 | { 548 | // avoid giving auto-complete suggestion for digits 549 | suggestions.clear(); 550 | } 551 | return make_uniq(std::move(suggestions), last_pos); 552 | } 553 | 554 | static duckdb::unique_ptr SQLFuzzyCompleteBind(ClientContext &context, TableFunctionBindInput &input, 555 | vector &return_types, vector &names) 556 | { 557 | if (input.inputs[0].IsNull()) 558 | { 559 | throw BinderException("sql_auto_complete first parameter cannot be NULL"); 560 | } 561 | names.emplace_back("suggestion"); 562 | return_types.emplace_back(LogicalType::VARCHAR); 563 | 564 | names.emplace_back("suggestion_start"); 565 | return_types.emplace_back(LogicalType::INTEGER); 566 | 567 | return GenerateSuggestions(context, StringValue::Get(input.inputs[0])); 568 | } 569 | 570 | unique_ptr SQLFuzzyCompleteInit(ClientContext &context, TableFunctionInitInput &input) 571 | { 572 | return make_uniq(); 573 | } 574 | 575 | void SQLFuzzyCompleteFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) 576 | { 577 | auto &bind_data = data_p.bind_data->Cast(); 578 | auto &data = data_p.global_state->Cast(); 579 | if (data.offset >= bind_data.suggestions.size()) 580 | { 581 | // finished returning values 582 | return; 583 | } 584 | // start returning values 585 | // either fill up the chunk or return all the remaining columns 586 | idx_t count = 0; 587 | while (data.offset < bind_data.suggestions.size() && count < STANDARD_VECTOR_SIZE) 588 | { 589 | auto &entry = bind_data.suggestions[data.offset++]; 590 | 591 | // suggestion, VARCHAR 592 | output.SetValue(0, count, Value(entry)); 593 | 594 | // suggestion_start, INTEGER 595 | output.SetValue(1, count, Value::INTEGER(bind_data.start_pos)); 596 | 597 | count++; 598 | } 599 | output.SetCardinality(count); 600 | } 601 | 602 | static void LoadInternal(DatabaseInstance &db) 603 | { 604 | // For now just only load if the autocomplete extension isn't loaded. 605 | // there are plans to improve this in the future. 606 | if (!db.ExtensionIsLoaded("autocomplete")) { 607 | TableFunction auto_complete_fun("sql_auto_complete", {LogicalType::VARCHAR}, SQLFuzzyCompleteFunction, 608 | SQLFuzzyCompleteBind, SQLFuzzyCompleteInit); 609 | ExtensionUtil::RegisterFunction(db, auto_complete_fun); 610 | } 611 | } 612 | 613 | void FuzzycompleteExtension::Load(DuckDB &db) 614 | { 615 | LoadInternal(*db.instance); 616 | } 617 | 618 | std::string FuzzycompleteExtension::Name() 619 | { 620 | return "fuzzycomplete"; 621 | } 622 | 623 | std::string FuzzycompleteExtension::Version() const 624 | { 625 | #ifdef EXT_VERSION_FuzzyComplete 626 | return EXT_VERSION_FuzzyComplete; 627 | #else 628 | return ""; 629 | #endif 630 | } 631 | 632 | } // namespace duckdb 633 | extern "C" 634 | { 635 | 636 | DUCKDB_EXTENSION_API void fuzzycomplete_init(duckdb::DatabaseInstance &db) 637 | { 638 | LoadInternal(db); 639 | } 640 | 641 | DUCKDB_EXTENSION_API const char *fuzzycomplete_version() 642 | { 643 | return duckdb::DuckDB::LibraryVersion(); 644 | } 645 | } 646 | 647 | #ifndef DUCKDB_EXTENSION_MAIN 648 | #error DUCKDB_EXTENSION_MAIN not defined 649 | #endif 650 | -------------------------------------------------------------------------------- /src/include/fuzzycomplete_extension.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | namespace duckdb 6 | { 7 | 8 | class FuzzycompleteExtension : public Extension 9 | { 10 | public: 11 | void Load(DuckDB &db) override; 12 | std::string Name() override; 13 | std::string Version() const override; 14 | }; 15 | 16 | } // namespace duckdb 17 | -------------------------------------------------------------------------------- /src/include/rust.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | 10 | extern "C" { 11 | 12 | void perform_matches(const char *const *candidate_pool, 13 | size_t candidate_pool_size, 14 | const char *query, 15 | size_t query_len, 16 | size_t max_results, 17 | const char **ranked_candidates, 18 | size_t *actual_results); 19 | 20 | } // extern "C" 21 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Testing this extension 2 | This directory contains all the tests for this extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most of its tests in this format as SQL statements, so for the fuzzycomplete extension, this should probably be the goal too. 3 | 4 | The root makefile contains targets to build and run all of these tests. To run the SQLLogicTests: 5 | ```bash 6 | make test 7 | ``` 8 | or 9 | ```bash 10 | make test_debug 11 | ``` -------------------------------------------------------------------------------- /test/sql/fuzzycomplete.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/fuzzycomplete.test 2 | # description: test fuzzycomplete extension 3 | # group: [fuzzycomplete] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT * FROM sql_auto_complete('SEL'); 8 | ---- 9 | Catalog Error: Table Function with name "sql_auto_complete" is not in the catalog, but it exists in the autocomplete extension. 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require fuzzycomplete 13 | 14 | # Confirm the extension works 15 | query I 16 | SELECT trim(suggestion) from sql_auto_complete('SEL') 17 | ---- 18 | SELECT 19 | 20 | query I 21 | SELECT suggestion from sql_auto_complete('SELECT * from tables') limit 1 22 | ---- 23 | "system".main.duckdb_tables 24 | -------------------------------------------------------------------------------- /vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [] 3 | } --------------------------------------------------------------------------------