├── .gitmodules ├── .gitignore ├── test ├── python │ └── quack_test.py ├── sql │ └── quack.test ├── nodejs │ └── quack_test.js └── README.md ├── src ├── include │ ├── duckprompt_extension.hpp │ ├── https.hpp │ ├── chat.hpp │ └── quacking_duck.hpp ├── https.cpp ├── quacking_duck.cpp ├── chat.cpp └── duckprompt_extension.cpp ├── LICENSE ├── scripts ├── extension-upload.sh ├── set_extension_name.py └── build_sample_db.sql ├── .github └── workflows │ ├── NodeJS.yml │ ├── Python.yml │ ├── Windows.yml │ ├── MacOS.yml │ ├── Linux.yml │ └── ExtensionTemplate.yml ├── CMakeLists.txt ├── docs ├── NEXT_README.md └── README.md ├── Makefile └── README.md /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "duckdb"] 2 | path = duckdb 3 | url = https://github.com/duckdb/duckdb 4 | branch = master 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .idea 3 | cmake-build-debug 4 | duckdb_unittest_tempdir/ 5 | .DS_Store 6 | testext 7 | test/python/__pycache__/ 8 | .Rhistory 9 | CMakeCache.txt 10 | CMakeFiles/ 11 | .vscode/ 12 | -------------------------------------------------------------------------------- /test/python/quack_test.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | def test_quack(): 4 | conn = duckdb.connect(''); 5 | conn.execute("SELECT quack('Sam') as value;"); 6 | res = conn.fetchall() 7 | assert(res[0][0] == "Quack Sam 🐥"); -------------------------------------------------------------------------------- /src/include/duckprompt_extension.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | namespace duckdb { 6 | 7 | class DuckpromptExtension : public Extension { 8 | public: 9 | void Load(DuckDB &db) override; 10 | std::string Name() override; 11 | 12 | }; 13 | 14 | } // namespace duckdb 15 | -------------------------------------------------------------------------------- /test/sql/quack.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/quack.test 2 | # description: test quack extension 3 | # group: [quack] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT quack('Sam'); 8 | ---- 9 | Catalog Error: Scalar Function with name quack does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require quack 13 | 14 | # Confirm the extension works 15 | query I 16 | SELECT quack('Sam'); 17 | ---- 18 | Quack Sam 🐥 -------------------------------------------------------------------------------- /test/nodejs/quack_test.js: -------------------------------------------------------------------------------- 1 | var duckdb = require('../../duckdb/tools/nodejs'); 2 | var assert = require('assert'); 3 | 4 | describe(`quack extension`, () => { 5 | let db; 6 | let conn; 7 | before((done) => { 8 | db = new duckdb.Database(':memory:'); 9 | conn = new duckdb.Connection(db); 10 | done(); 11 | }); 12 | 13 | it('function should return expected constant', function (done) { 14 | db.all("SELECT quack('Sam') as value;", function (err, res) { 15 | if (err) throw err; 16 | assert.deepEqual(res, [{value: "Quack Sam 🐥"}]); 17 | done(); 18 | }); 19 | }); 20 | }); -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Testing the quack extension 2 | This directory contains all the tests for the quack extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most its tests in this format as SQL statements, so for the quack extension, this should probably be the goal too. However, client specific testing is also available. 3 | 4 | The root makefile contains targets to build and run all of these tests. To run the SQLLogicTests: 5 | ```bash 6 | make test 7 | ``` 8 | 9 | To run the python tests: 10 | ```sql 11 | make test_python 12 | ``` 13 | 14 | For other client tests check the makefile in the root of this repository. -------------------------------------------------------------------------------- /src/include/https.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace duckdb_httplib_openssl { 7 | class SSLClient; 8 | } 9 | 10 | struct HTTPSResponse { 11 | HTTPSResponse() :code(-1), response("") {} 12 | HTTPSResponse(int c, std::string r) :code(c), response(r) {} 13 | static HTTPSResponse InvalidResponse() {return HTTPSResponse(-1, "");} 14 | 15 | int code; 16 | std::string response; 17 | }; 18 | 19 | class HTTPS { 20 | public: 21 | HTTPS(std::string host); 22 | ~HTTPS(); 23 | HTTPSResponse Post( 24 | std::string path, 25 | const std::vector> & all_headers, 26 | std::string body); 27 | 28 | private: 29 | duckdb_httplib_openssl::SSLClient* client_; 30 | std::string host_; 31 | }; 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018-2022 DuckDB Labs BV 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /scripts/extension-upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Usage: ./extension-upload.sh 4 | # : Name of the extension 5 | # : Version (commit / version tag) of the extension 6 | # : Version (commit / version tag) of DuckDB 7 | # : Architecture target of the extension binary 8 | # : S3 bucket to upload to 9 | # : Set this as the latest version ("true" / "false", default: "false") 10 | 11 | set -e 12 | 13 | ext="build/release/extension/$1/$1.duckdb_extension" 14 | 15 | # compress extension binary 16 | gzip < "${ext}" > "$1.duckdb_extension.gz" 17 | 18 | # upload compressed extension binary to S3 19 | aws s3 cp "$1.duckdb_extension.gz s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz" --acl public-read 20 | 21 | # upload to latest if copy_to_latest is set to true 22 | if [[ $6 = 'true' ]]; then 23 | aws s3 cp "$1.duckdb_extension.gz s3://$5/$1/latest/$3/$4/$1.duckdb_extension.gz" --acl public-read 24 | fi 25 | -------------------------------------------------------------------------------- /.github/workflows/NodeJS.yml: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE: if NodeJS tests are unused, deleting this file or disabling the workflow on GitHub will speed up CI 3 | # 4 | 5 | name: NodeJS 6 | on: [push, pull_request,repository_dispatch] 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 9 | cancel-in-progress: true 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | nodejs: 16 | name: NodeJS 17 | runs-on: ubuntu-latest 18 | env: 19 | GEN: ninja 20 | 21 | steps: 22 | - name: Install Ninja 23 | run: | 24 | sudo apt-get update -y -qq 25 | sudo apt-get install -y -qq ninja-build 26 | 27 | - uses: actions/checkout@v2 28 | with: 29 | fetch-depth: 0 30 | submodules: 'true' 31 | 32 | - uses: actions/setup-python@v2 33 | with: 34 | python-version: '3.9' 35 | 36 | - name: Build DuckDB NodeJS client 37 | run: make debug_js 38 | 39 | - name: Run NodeJS client tests 40 | run: make test_debug_js -------------------------------------------------------------------------------- /.github/workflows/Python.yml: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE: if python tests are unused, deleting this file or disabling the workflow on GitHub will speed up CI 3 | # 4 | 5 | name: Python 6 | on: [push, pull_request,repository_dispatch] 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 9 | cancel-in-progress: true 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | python: 16 | name: Python 17 | runs-on: ubuntu-latest 18 | env: 19 | GEN: ninja 20 | 21 | steps: 22 | - name: Install Ninja 23 | run: | 24 | sudo apt-get update -y -qq 25 | sudo apt-get install -y -qq ninja-build 26 | 27 | - uses: actions/checkout@v2 28 | with: 29 | fetch-depth: 0 30 | submodules: 'true' 31 | 32 | - uses: actions/setup-python@v2 33 | with: 34 | python-version: '3.9' 35 | 36 | - name: Build DuckDB Python client 37 | run: make debug_python 38 | 39 | - name: Install Python test dependencies 40 | run: python -m pip install --upgrade pytest 41 | 42 | - name: Run Python client tests 43 | run: | 44 | make test_debug_python -------------------------------------------------------------------------------- /src/include/chat.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | class ChatContext { 8 | public: 9 | ChatContext() : role(""), content("") {}; 10 | ChatContext(std::string r, std::string c) : role(r), content(c) {}; 11 | ChatContext(const ChatContext& other) : role(other.role), content(other.content) {}; 12 | ChatContext& operator=(const ChatContext& other) { 13 | if (this != &other) { 14 | this->role = other.role; 15 | this->content = other.content; 16 | } 17 | return *this; 18 | } 19 | 20 | std::string GenerateMessage(); 21 | std::string role; 22 | std::string content; 23 | }; 24 | 25 | class Chat { 26 | public: 27 | Chat(std::string model) : model_(model.length() > 0 ? model : c_model) { } 28 | 29 | void Reset(std::string context); 30 | 31 | void SetSystemContext(std::string context); 32 | 33 | std::string SendPrompt(std::string prompt); 34 | 35 | private: 36 | std::string GenerateMessages(); 37 | std::string GenerateRequest(); 38 | 39 | private: 40 | static const char* c_open_ai_host; 41 | static const char* c_chat_uri; 42 | static const char* c_model; 43 | 44 | std::string model_; 45 | std::vector context_; 46 | }; 47 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12) 2 | 3 | # Set extension name here 4 | set(TARGET_NAME duckprompt) 5 | 6 | set(EXTENSION_NAME ${TARGET_NAME}_extension) 7 | project(${TARGET_NAME}) 8 | 9 | set(OPENSSL_USE_STATIC_LIBS TRUE) 10 | set(OPENSSL_ROOT_DIR /opt/homebrew/opt/openssl@3) 11 | find_package(OpenSSL REQUIRED) 12 | message("OpenSSL path: '${OPENSSL_SSL_LIBRARY}'") 13 | message("OpenSSL crypto path: '${OPENSSL_CRYPTO_LIBRARY}'") 14 | 15 | include_directories(src/include) 16 | include_directories(${OPENSSL_INCLUDE_DIR}) 17 | include_directories(duckdb/third_party/httplib) 18 | if(NOT TARGET yyjson) 19 | include_directories(duckdb/extension/json/yyjson/include) 20 | add_subdirectory(duckdb/extension/json/yyjson) 21 | endif() 22 | 23 | 24 | set(EXTENSION_SOURCES src/duckprompt_extension.cpp src/https.cpp src/chat.cpp src/quacking_duck.cpp ${YYJSON_OBJECT_FILES}) 25 | add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) 26 | target_link_libraries(${EXTENSION_NAME} 27 | ${OPENSSL_LIBRARIES} 28 | ) 29 | 30 | set(PARAMETERS "-warnings") 31 | build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES}) 32 | 33 | install( 34 | TARGETS ${EXTENSION_NAME} 35 | EXPORT "${DUCKDB_EXPORT_SET}" 36 | LIBRARY DESTINATION "${INSTALL_LIB_DIR}" 37 | ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") 38 | -------------------------------------------------------------------------------- /src/include/quacking_duck.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "chat.hpp" 7 | 8 | struct ExtractedSchema { 9 | std::vector table_ddl; 10 | std::string SchemaToString() const; 11 | }; 12 | 13 | class DatabaseInterface { 14 | public: 15 | virtual void ExtractSchema(ExtractedSchema& ex) = 0; 16 | virtual std::string ValidateParse(std::string query) = 0; 17 | virtual std::string ValidateSemantics(std::string query) = 0; 18 | }; 19 | 20 | // Runs natual language to SQL prompting. 21 | class QuackingDuck { 22 | public: 23 | QuackingDuck(DatabaseInterface& db, std::string model = "") : db_(db), chat_(model) { } 24 | 25 | // Returns a one sentance summary of a schema. 26 | std::string ExplainSchema(); 27 | 28 | // Asks a Natural Language prompt, returns a SQL query. 29 | std::string Ask(std::string prompt); 30 | 31 | // Given a query that may or may not be syntactically correct, 32 | // returns a fixed version of the query. 33 | std::string FixupQuery(std::string query); 34 | 35 | private: 36 | // In order to get QuackingDuck to take into account schema, call 37 | // this before asking other questions. 38 | std::string ExplainSchemaPrompt(const ExtractedSchema& extracted_schema); 39 | 40 | // Ask a question, get a SQL query in response. 41 | std::string AskPrompt(std::string question); 42 | 43 | // Analyze a query. This is useful for adding context before Fixup. 44 | std::string AnalyzeQueryPrompt(std::string query); 45 | 46 | // You should call Ask or AnalyzeQuery before calling FixupQuery. 47 | std::string FixupQueryPrompt(std::string error_message); 48 | 49 | private: 50 | Chat chat_; 51 | DatabaseInterface& db_; 52 | std::string query_; 53 | std::string schema_summary_; 54 | }; 55 | -------------------------------------------------------------------------------- /scripts/set_extension_name.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import sys, os, shutil 4 | from pathlib import Path 5 | 6 | shutil.copyfile(f'docs/NEXT_README.md', f'README.md') 7 | 8 | if (len(sys.argv) != 3): 9 | raise Exception('usage: python3 set_extension_name.py ') 10 | 11 | name_extension = sys.argv[1] 12 | name_function = sys.argv[2] 13 | 14 | def replace(file_name, to_find, to_replace): 15 | with open(file_name, 'r', encoding="utf8") as file : 16 | filedata = file.read() 17 | filedata = filedata.replace(to_find, to_replace) 18 | with open(file_name, 'w', encoding="utf8") as file: 19 | file.write(filedata) 20 | 21 | files_to_search = [] 22 | files_to_search.extend(Path('./.github').rglob('./**/*.yml')) 23 | files_to_search.extend(Path('./test').rglob('./**/*.py')) 24 | files_to_search.extend(Path('./test').rglob('./**/*.test')) 25 | files_to_search.extend(Path('./test').rglob('./**/*.js')) 26 | files_to_search.extend(Path('./src').rglob('./**/*.hpp')) 27 | files_to_search.extend(Path('./src').rglob('./**/*.cpp')) 28 | files_to_search.extend(Path('./src').rglob('./**/*.txt')) 29 | files_to_search.extend(Path('./src').rglob('./*.md')) 30 | 31 | def replace_everywhere(to_find, to_replace): 32 | for path in files_to_search: 33 | replace(path, to_find, to_replace) 34 | replace(path, to_find.capitalize(), to_replace.capitalize()) 35 | 36 | replace("./CMakeLists.txt", to_find, to_replace) 37 | replace("./Makefile", to_find, to_replace) 38 | replace("./Makefile", to_find.capitalize(), to_replace.capitalize()) 39 | replace("./Makefile", to_find.upper(), to_replace.upper()) 40 | replace("./README.md", to_find, to_replace) 41 | 42 | replace_everywhere("quack", name_function) 43 | replace_everywhere("", name_extension) 44 | 45 | string_to_replace = name_function 46 | string_to_find = "quack" 47 | 48 | # rename files 49 | os.rename(f'test/python/{string_to_find}_test.py', f'test/python/{string_to_replace}_test.py') 50 | os.rename(f'test/sql/{string_to_find}.test', f'test/sql/{string_to_replace}.test') 51 | os.rename(f'src/{string_to_find}_extension.cpp', f'src/{string_to_replace}_extension.cpp') 52 | os.rename(f'src/include/{string_to_find}_extension.hpp', f'src/include/{string_to_replace}_extension.hpp') 53 | os.rename(f'test/nodejs/{string_to_find}_test.js', f'test/nodejs/{string_to_replace}_test.js') 54 | -------------------------------------------------------------------------------- /.github/workflows/Windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | on: [push, pull_request,repository_dispatch] 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 5 | cancel-in-progress: true 6 | defaults: 7 | run: 8 | shell: bash 9 | 10 | jobs: 11 | windows: 12 | name: Release 13 | runs-on: windows-latest 14 | strategy: 15 | matrix: 16 | # Add commits/tags to build against other DuckDB versions 17 | duckdb_version: [ '' ] 18 | 19 | steps: 20 | - uses: actions/checkout@v3 21 | with: 22 | fetch-depth: 0 23 | submodules: 'true' 24 | 25 | - uses: actions/setup-python@v2 26 | with: 27 | python-version: '3.7' 28 | 29 | - name: Checkout DuckDB to version 30 | # Add commits/tags to build against other DuckDB versions 31 | if: ${{ matrix.duckdb_version != ''}} 32 | run: | 33 | cd duckdb 34 | git checkout ${{ matrix.duckdb_version }} 35 | 36 | - name: Build extension 37 | run: | 38 | make release 39 | build/release/test/Release/unittest.exe 40 | 41 | - uses: actions/upload-artifact@v2 42 | with: 43 | name: linux-extensions-64-aarch64 44 | path: | 45 | build/release/extension/quack/quack.duckdb_extension 46 | 47 | - name: Deploy 48 | env: 49 | AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEPLOY_ID }} 50 | AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEPLOY_KEY }} 51 | AWS_DEFAULT_REGION: ${{ secrets.S3_REGION }} 52 | BUCKET_NAME: ${{ secrets.S3_BUCKET }} 53 | run: | 54 | cd duckdb 55 | git fetch --tags 56 | export DUCKDB_VERSION=`git tag --points-at HEAD` 57 | export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`} 58 | cd .. 59 | if [[ "$AWS_ACCESS_KEY_ID" == "" ]] ; then 60 | echo 'No key set, skipping' 61 | elif [[ "$GITHUB_REF" =~ ^(refs/tags/v.+)$ ]] ; then 62 | python -m pip install awscli 63 | ./scripts/extension-upload.sh quack ${{ github.ref_name }} $DUCKDB_VERSION windows_amd64 $BUCKET_NAME true 64 | elif [[ "$GITHUB_REF" =~ ^(refs/heads/main)$ ]] ; then 65 | python -m pip install awscli 66 | ./scripts/extension-upload.sh quack `git log -1 --format=%h` $DUCKDB_VERSION windows_amd64 $BUCKET_NAME false 67 | fi -------------------------------------------------------------------------------- /.github/workflows/MacOS.yml: -------------------------------------------------------------------------------- 1 | name: MacOS 2 | on: [push, pull_request,repository_dispatch] 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 5 | cancel-in-progress: true 6 | defaults: 7 | run: 8 | shell: bash 9 | 10 | jobs: 11 | macos: 12 | name: MacOS Release (Universal) 13 | runs-on: macos-latest 14 | strategy: 15 | matrix: 16 | # Add commits/tags to build against other DuckDB versions 17 | duckdb_version: [ '' ] 18 | 19 | env: 20 | OSX_BUILD_UNIVERSAL: 1 21 | GEN: ninja 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | with: 26 | fetch-depth: 0 27 | submodules: 'true' 28 | 29 | - name: Install Ninja 30 | run: brew install ninja 31 | 32 | - uses: actions/setup-python@v2 33 | with: 34 | python-version: '3.7' 35 | 36 | - name: Checkout DuckDB to version 37 | if: ${{ matrix.duckdb_version != ''}} 38 | run: | 39 | cd duckdb 40 | git checkout ${{ matrix.duckdb_version }} 41 | 42 | # Build extension 43 | - name: Build extension 44 | shell: bash 45 | run: | 46 | make release 47 | make test 48 | 49 | - name: Deploy 50 | env: 51 | AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEPLOY_ID }} 52 | AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEPLOY_KEY }} 53 | AWS_DEFAULT_REGION: ${{ secrets.S3_REGION }} 54 | BUCKET_NAME: ${{ secrets.S3_BUCKET }} 55 | run: | 56 | cd duckdb 57 | git fetch --tags 58 | export DUCKDB_VERSION=`git tag --points-at HEAD` 59 | echo $DUCKDB_VERSION 60 | export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`} 61 | echo $DUCKDB_VERSION 62 | cd .. 63 | if [[ "$AWS_ACCESS_KEY_ID" == "" ]] ; then 64 | echo 'No key set, skipping' 65 | elif [[ "$GITHUB_REF" =~ ^(refs/tags/v.+)$ ]] ; then 66 | python -m pip install awscli 67 | ./scripts/extension-upload.sh quack ${{ github.ref_name }} $DUCKDB_VERSION osx_amd64 $BUCKET_NAME true 68 | ./scripts/extension-upload.sh quack ${{ github.ref_name }} $DUCKDB_VERSION osx_arm64 $BUCKET_NAME true 69 | elif [[ "$GITHUB_REF" =~ ^(refs/heads/main)$ ]] ; then 70 | python -m pip install awscli 71 | ./scripts/extension-upload.sh quack `git log -1 --format=%h` $DUCKDB_VERSION osx_amd64 $BUCKET_NAME false 72 | ./scripts/extension-upload.sh quack `git log -1 --format=%h` $DUCKDB_VERSION osx_arm64 $BUCKET_NAME false 73 | fi -------------------------------------------------------------------------------- /docs/NEXT_README.md: -------------------------------------------------------------------------------- 1 | # 2 | 3 | This repository is based on https://github.com/duckdb/extension-template, check it out if you want to build and ship your own DuckDB extension. 4 | 5 | --- 6 | 7 | This extension, , allow you to ... . 8 | 9 | 10 | ## Building 11 | To build the extension: 12 | ```sh 13 | make 14 | ``` 15 | The main binaries that will be built are: 16 | ```sh 17 | ./build/release/duckdb 18 | ./build/release/test/unittest 19 | ./build/release/extension//.duckdb_extension 20 | ``` 21 | - `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. 22 | - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary. 23 | - `.duckdb_extension` is the loadable binary as it would be distributed. 24 | 25 | ## Running the extension 26 | To run the extension code, simply start the shell with `./build/release/duckdb`. 27 | 28 | Now we can use the features from the extension directly in DuckDB. The template contains a single scalar function `quack()` that takes a string arguments and returns a string: 29 | ``` 30 | D select quack('Jane') as result; 31 | ┌───────────────┐ 32 | │ result │ 33 | │ varchar │ 34 | ├───────────────┤ 35 | │ Quack Jane 🐥 │ 36 | └───────────────┘ 37 | ``` 38 | 39 | ## Running the tests 40 | Different tests can be created for DuckDB extensions. The primary way of testing DuckDB extensions should be the SQL tests in `./test/sql`. These SQL tests can be run using: 41 | ```sh 42 | make test 43 | ``` 44 | 45 | ### Installing the deployed binaries 46 | To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the 47 | `allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples: 48 | 49 | CLI: 50 | ```shell 51 | duckdb -unsigned 52 | ``` 53 | 54 | Python: 55 | ```python 56 | con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'}) 57 | ``` 58 | 59 | NodeJS: 60 | ```js 61 | db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"}); 62 | ``` 63 | 64 | Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension 65 | you want to install. To do this run the following SQL query in DuckDB: 66 | ```sql 67 | SET custom_extension_repository='bucket.s3.eu-west-1.amazonaws.com//latest'; 68 | ``` 69 | Note that the `/latest` path will allow you to install the latest extension version available for your current version of 70 | DuckDB. To specify a specific version, you can pass the version instead. 71 | 72 | After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB: 73 | ```sql 74 | INSTALL 75 | LOAD 76 | ``` 77 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean format debug release duckdb_debug duckdb_release pull update 2 | 3 | all: release 4 | 5 | MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) 6 | PROJ_DIR := $(dir $(MKFILE_PATH)) 7 | 8 | OSX_BUILD_UNIVERSAL_FLAG= 9 | ifeq (${OSX_BUILD_UNIVERSAL}, 1) 10 | OSX_BUILD_UNIVERSAL_FLAG=-DOSX_BUILD_UNIVERSAL=1 11 | endif 12 | ifeq (${STATIC_LIBCPP}, 1) 13 | STATIC_LIBCPP=-DSTATIC_LIBCPP=TRUE 14 | endif 15 | 16 | ifeq ($(GEN),ninja) 17 | GENERATOR=-G "Ninja" 18 | FORCE_COLOR=-DFORCE_COLORED_OUTPUT=1 19 | endif 20 | 21 | BUILD_FLAGS=-DEXTENSION_STATIC_BUILD=1 -DBUILD_TPCH_EXTENSION=1 -DBUILD_PARQUET_EXTENSION=1 ${OSX_BUILD_UNIVERSAL_FLAG} ${STATIC_LIBCPP} 22 | 23 | CLIENT_FLAGS := 24 | 25 | # These flags will make DuckDB build the extension 26 | EXTENSION_FLAGS=-DDUCKDB_OOT_EXTENSION_NAMES="duckprompt" -DDUCKDB_OOT_EXTENSION_DUCKPROMPT_PATH="$(PROJ_DIR)" -DDUCKDB_OOT_EXTENSION_DUCKPROMPT_SHOULD_LINK="TRUE" -DDUCKDB_OOT_EXTENSION_DUCKPROMPT_INCLUDE_PATH="$(PROJ_DIR)src/include" 27 | 28 | pull: 29 | git submodule init 30 | git submodule update --recursive --remote 31 | 32 | clean: 33 | rm -rf build 34 | rm -rf testext 35 | cd duckdb && make clean 36 | 37 | # Main build 38 | debug: 39 | mkdir -p build/debug && \ 40 | cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S ./duckdb/ -B build/debug && \ 41 | cmake --build build/debug --config Debug 42 | 43 | release: 44 | mkdir -p build/release && \ 45 | cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S ./duckdb/ -B build/release && \ 46 | cmake --build build/release --config Release 47 | 48 | # Client build 49 | debug_js: CLIENT_FLAGS=-DBUILD_NODE=1 -DBUILD_JSON_EXTENSION=1 50 | debug_js: debug 51 | 52 | debug_r: CLIENT_FLAGS=-DBUILD_R=1 53 | debug_r: debug 54 | 55 | debug_python: CLIENT_FLAGS=-DBUILD_PYTHON=1 -DBUILD_JSON_EXTENSION=1 -DBUILD_FTS_EXTENSION=1 -DBUILD_TPCH_EXTENSION=1 -DBUILD_VISUALIZER_EXTENSION=1 -DBUILD_TPCDS_EXTENSION=1 56 | debug_python: debug 57 | 58 | release_js: CLIENT_FLAGS=-DBUILD_NODE=1 -DBUILD_JSON_EXTENSION=1 59 | release_js: release 60 | 61 | release_r: CLIENT_FLAGS=-DBUILD_R=1 62 | release_r: release 63 | 64 | release_python: CLIENT_FLAGS=-DBUILD_PYTHON=1 -DBUILD_JSON_EXTENSION=1 -DBUILD_FTS_EXTENSION=1 -DBUILD_TPCH_EXTENSION=1 -DBUILD_VISUALIZER_EXTENSION=1 -DBUILD_TPCDS_EXTENSION=1 65 | release_python: release 66 | 67 | # Main tests 68 | test: test_release 69 | 70 | test_release: release 71 | ./build/release/test/unittest --test-dir . "[sql]" 72 | 73 | test_debug: debug 74 | ./build/debug/test/unittest --test-dir . "[sql]" 75 | 76 | # Client tests 77 | test_js: test_debug_js 78 | test_debug_js: debug_js 79 | cd duckdb/tools/nodejs && npm run test-path -- "../../../test/nodejs/**/*.js" 80 | 81 | test_release_js: release_js 82 | cd duckdb/tools/nodejs && npm run test-path -- "../../../test/nodejs/**/*.js" 83 | 84 | test_python: test_debug_python 85 | test_debug_python: debug_python 86 | cd test/python && python3 -m pytest 87 | 88 | test_release_python: release_python 89 | cd test/python && python3 -m pytest 90 | 91 | format: 92 | find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i 93 | cmake-format -i CMakeLists.txt 94 | 95 | update: 96 | git submodule update --remote --merge 97 | -------------------------------------------------------------------------------- /src/https.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "https.hpp" 6 | 7 | #define CPPHTTPLIB_OPENSSL_SUPPORT 8 | #include "httplib.hpp" 9 | 10 | #include "duckdb/common/exception.hpp" 11 | 12 | // Set PROMPT_DEBUG = 1 to see request bodies / responses 13 | // Set PROMPT_DEBUG = 2 to see headers as well. 14 | int GetDebugLevel() { 15 | char * debug_env = std::getenv("PROMPT_DEBUG"); 16 | if (debug_env == nullptr || strlen(debug_env) == 0) {return 0;} 17 | int level = debug_env[0] - '0'; 18 | if (level < 0 || level > 9) {return 0;} 19 | return level; 20 | } 21 | 22 | static void logger(const duckdb_httplib_openssl::Request& request, const duckdb_httplib_openssl::Response& response) { 23 | int debug_level = GetDebugLevel(); 24 | if (debug_level <= 0) { 25 | return; 26 | } 27 | std::cerr << "Remote Addr: " << request.remote_addr << " Port:" << request.remote_port << "\n"; 28 | std::cerr << "Request:" << request.method << " " << request.path << "\n"; 29 | 30 | if (debug_level > 1) { 31 | for (auto header : request.headers) { 32 | if (header.first == "Authorization") { 33 | std::cerr << " " << header.first << ": " << "[Redacted]\n"; 34 | } else { 35 | std::cerr << " " << header.first << ": " << header.second << "\n"; 36 | } 37 | } 38 | } 39 | 40 | std::cerr << " Body:\n " << request.body; 41 | std::cerr << "\n\nResponse Code: " << response.status << "\n"; 42 | if (debug_level > 1) { 43 | for (auto header : response.headers) { 44 | std::cerr << " " << header.first << ": " << header.second << "\n"; 45 | } 46 | } 47 | std::cerr << " Body:\n " << response.body; 48 | } 49 | 50 | 51 | duckdb_httplib_openssl::SSLClient* GetClient(std::string host_port) { 52 | duckdb_httplib_openssl::SSLClient* client = new duckdb_httplib_openssl::SSLClient(host_port.c_str(), 443); 53 | client->set_follow_location(true); 54 | client->set_keep_alive(true); 55 | client->enable_server_certificate_verification(false); 56 | client->set_decompress(false); 57 | client->set_logger(logger); 58 | client->set_read_timeout(300); // seconds 59 | client->set_write_timeout(300); // seconds 60 | client->set_connection_timeout(300); // seconds 61 | return client; 62 | } 63 | 64 | HTTPS::HTTPS(std::string host) : client_(GetClient(host)) , host_(host) { } 65 | 66 | HTTPS::~HTTPS() { 67 | if (client_ != nullptr) { 68 | delete client_; 69 | client_ = nullptr; 70 | } 71 | } 72 | 73 | HTTPSResponse HTTPS::Post( 74 | std::string path, const std::vector> & all_headers, 75 | std::string body) { 76 | 77 | if (!client_->is_valid()) { 78 | std::cerr << "SSL Client invalid\n"; 79 | throw duckdb::IOException("Unable to open SSL path to %s", host_); 80 | } 81 | duckdb_httplib_openssl::Headers headers; 82 | for(auto h : all_headers) { 83 | headers.emplace(h.first, h.second); 84 | } 85 | 86 | auto uri = "https://" + host_ + "/" + path; 87 | auto res = client_->Post(uri.c_str(), headers, body.c_str(), body.size(), "application/json"); 88 | 89 | if (res == nullptr) { 90 | std::cerr << "Post returned null\n"; 91 | throw duckdb::IOException("No response for HTTP %s to '%s'", "POST", uri); 92 | } 93 | return HTTPSResponse(res->status, res->body); 94 | } 95 | -------------------------------------------------------------------------------- /.github/workflows/Linux.yml: -------------------------------------------------------------------------------- 1 | name: Linux 2 | on: [push, pull_request,repository_dispatch] 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 5 | cancel-in-progress: true 6 | defaults: 7 | run: 8 | shell: bash 9 | 10 | jobs: 11 | linux: 12 | name: Linux Release 13 | runs-on: ubuntu-latest 14 | container: ${{ matrix.container }} 15 | strategy: 16 | matrix: 17 | # Add commits/tags to build against other DuckDB versions 18 | duckdb_version: [ '' ] 19 | arch: ['linux_amd64', 'linux_arm64', 'linux_amd64_gcc4'] 20 | include: 21 | - arch: 'linux_amd64' 22 | container: 'ubuntu:16.04' 23 | - arch: 'linux_arm64' 24 | container: 'ubuntu:18.04' 25 | - arch: 'linux_amd64_gcc4' 26 | container: 'quay.io/pypa/manylinux2014_x86_64' 27 | env: 28 | GEN: ninja 29 | 30 | steps: 31 | - name: Install required ubuntu packages 32 | if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_arm64' }} 33 | run: | 34 | apt-get update -y -qq 35 | apt-get install -y -qq software-properties-common 36 | add-apt-repository ppa:git-core/ppa 37 | apt-get update -y -qq 38 | apt-get install -y -qq ninja-build make gcc-multilib g++-multilib libssl-dev wget openjdk-8-jdk zip maven unixodbc-dev libc6-dev-i386 lib32readline6-dev libssl-dev libcurl4-gnutls-dev libexpat1-dev gettext unzip build-essential checkinstall libffi-dev curl libz-dev openssh-client 39 | 40 | - name: Install Git 2.18.5 41 | if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_arm64' }} 42 | run: | 43 | wget https://github.com/git/git/archive/refs/tags/v2.18.5.tar.gz 44 | tar xvf v2.18.5.tar.gz 45 | cd git-2.18.5 46 | make 47 | make prefix=/usr install 48 | git --version 49 | 50 | - uses: actions/checkout@v3 51 | with: 52 | fetch-depth: 0 53 | submodules: 'true' 54 | 55 | - name: Checkout DuckDB to version 56 | if: ${{ matrix.duckdb_version != ''}} 57 | run: | 58 | cd duckdb 59 | git checkout ${{ matrix.duckdb_version }} 60 | 61 | - if: ${{ matrix.arch == 'linux_amd64_gcc4' }} 62 | uses: ./duckdb/.github/actions/centos_7_setup 63 | with: 64 | openssl: 0 65 | 66 | - if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_arm64' }} 67 | uses: ./duckdb/.github/actions/ubuntu_16_setup 68 | with: 69 | aarch64_cross_compile: ${{ matrix.arch == 'linux_arm64' && 1 }} 70 | 71 | # Build extension 72 | - name: Build extension 73 | env: 74 | GEN: ninja 75 | STATIC_LIBCPP: 1 76 | CC: ${{ matrix.arch == 'linux_arm64' && 'aarch64-linux-gnu-gcc' || '' }} 77 | CXX: ${{ matrix.arch == 'linux_arm64' && 'aarch64-linux-gnu-g++' || '' }} 78 | run: | 79 | make release 80 | 81 | - name: Build extension 82 | if: ${{ matrix.arch != 'linux_arm64'}} 83 | run: | 84 | make test 85 | 86 | - uses: actions/upload-artifact@v2 87 | with: 88 | name: ${{matrix.arch}}-extensions 89 | path: | 90 | build/release/extension/quack/quack.duckdb_extension 91 | 92 | - name: Deploy 93 | env: 94 | AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEPLOY_ID }} 95 | AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEPLOY_KEY }} 96 | AWS_DEFAULT_REGION: ${{ secrets.S3_REGION }} 97 | BUCKET_NAME: ${{ secrets.S3_BUCKET }} 98 | run: | 99 | git config --global --add safe.directory '*' 100 | cd duckdb 101 | git fetch --tags 102 | export DUCKDB_VERSION=`git tag --points-at HEAD` 103 | export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`} 104 | cd .. 105 | if [[ "$AWS_ACCESS_KEY_ID" == "" ]] ; then 106 | echo 'No key set, skipping' 107 | elif [[ "$GITHUB_REF" =~ ^(refs/tags/v.+)$ ]] ; then 108 | python3 -m pip install pip awscli 109 | ./scripts/extension-upload.sh quack ${{ github.ref_name }} $DUCKDB_VERSION ${{matrix.arch}} $BUCKET_NAME true 110 | elif [[ "$GITHUB_REF" =~ ^(refs/heads/main)$ ]] ; then 111 | python3 -m pip install pip awscli 112 | ./scripts/extension-upload.sh quack `git log -1 --format=%h` $DUCKDB_VERSION ${{matrix.arch}} $BUCKET_NAME false 113 | fi -------------------------------------------------------------------------------- /.github/workflows/ExtensionTemplate.yml: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE: this workflow is for testing the extension template itself, feel free to delete this file in your own repo. 3 | # 4 | 5 | name: Extension Template 6 | on: [push, pull_request,repository_dispatch] 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 9 | cancel-in-progress: true 10 | 11 | jobs: 12 | linux: 13 | name: Linux Extensions 14 | if: ${{ vars.RUN_RENAME_TEST == 'true' }} 15 | runs-on: ubuntu-latest 16 | container: ubuntu:16.04 17 | strategy: 18 | matrix: 19 | # Add commits/tags to build against other DuckDB versions 20 | duckdb_version: [ '' ] 21 | env: 22 | GEN: ninja 23 | defaults: 24 | run: 25 | shell: bash 26 | 27 | steps: 28 | - name: Install required ubuntu packages 29 | run: | 30 | apt-get update -y -qq 31 | apt-get install -y -qq software-properties-common 32 | add-apt-repository ppa:git-core/ppa 33 | apt-get update -y -qq 34 | apt-get install -y -qq ninja-build make gcc-multilib g++-multilib libssl-dev wget openjdk-8-jdk zip maven unixodbc-dev libc6-dev-i386 lib32readline6-dev libssl-dev libcurl4-gnutls-dev libexpat1-dev gettext unzip build-essential checkinstall libffi-dev curl libz-dev openssh-client 35 | 36 | - name: Install Git 2.18.5 37 | run: | 38 | wget https://github.com/git/git/archive/refs/tags/v2.18.5.tar.gz 39 | tar xvf v2.18.5.tar.gz 40 | cd git-2.18.5 41 | make 42 | make prefix=/usr install 43 | git --version 44 | 45 | - uses: actions/checkout@v3 46 | with: 47 | fetch-depth: 0 48 | submodules: 'true' 49 | 50 | - name: Checkout DuckDB to version 51 | if: ${{ matrix.duckdb_version != ''}} 52 | run: | 53 | cd duckdb 54 | git checkout ${{ matrix.duckdb_version }} 55 | 56 | - uses: ./duckdb/.github/actions/ubuntu_16_setup 57 | 58 | - name: Rename extension 59 | run: | 60 | python3 scripts/set_extension_name.py testext 61 | 62 | - name: Build 63 | run: | 64 | make 65 | 66 | - name: Test 67 | run: | 68 | make test 69 | 70 | macos: 71 | name: MacOS 72 | if: ${{ vars.RUN_RENAME_TEST == 'true' }} 73 | runs-on: macos-latest 74 | strategy: 75 | matrix: 76 | # Add commits/tags to build against other DuckDB versions 77 | duckdb_version: [ ''] 78 | env: 79 | OSX_BUILD_UNIVERSAL: 1 80 | GEN: ninja 81 | defaults: 82 | run: 83 | shell: bash 84 | 85 | steps: 86 | - uses: actions/checkout@v3 87 | with: 88 | fetch-depth: 0 89 | submodules: 'true' 90 | 91 | - name: Install Ninja 92 | run: brew install ninja 93 | 94 | - uses: actions/setup-python@v2 95 | with: 96 | python-version: '3.7' 97 | 98 | - name: Checkout DuckDB to version 99 | if: ${{ matrix.duckdb_version != ''}} 100 | run: | 101 | cd duckdb 102 | git checkout ${{ matrix.duckdb_version }} 103 | 104 | - name: Rename extension 105 | run: | 106 | python scripts/set_extension_name.py testext 107 | 108 | - name: Build 109 | run: | 110 | make 111 | 112 | - name: Test 113 | run: | 114 | make test 115 | 116 | windows: 117 | name: Windows Extensions (x64) 118 | if: ${{ vars.RUN_RENAME_TEST == 'true' }} 119 | runs-on: windows-latest 120 | strategy: 121 | matrix: 122 | # Add commits/tags to build against other DuckDB versions 123 | duckdb_version: [ '' ] 124 | defaults: 125 | run: 126 | shell: bash 127 | 128 | steps: 129 | - uses: actions/checkout@v3 130 | with: 131 | fetch-depth: 0 132 | submodules: 'true' 133 | 134 | - uses: actions/setup-python@v2 135 | with: 136 | python-version: '3.7' 137 | 138 | - name: Checkout DuckDB to version 139 | # Add commits/tags to build against other DuckDB versions 140 | if: ${{ matrix.duckdb_version != ''}} 141 | run: | 142 | cd duckdb 143 | git checkout ${{ matrix.duckdb_version }} 144 | 145 | - name: Rename extension 146 | run: | 147 | python scripts/set_extension_name.py testext 148 | 149 | - name: Build 150 | run: | 151 | make 152 | 153 | - name: Build extension 154 | run: | 155 | build/release/test/Release/unittest.exe -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # duckprompt 2 | 3 | This is a simple DuckDB extension that calls OpenAI's ChatGPT to do natural language queries. It is based on work from 4 | Till Döhmen that he described in his blog post here: https://tdoehmen.github.io/blog/2023/03/07/quackingduck.html. 5 | 6 | To summarize, we first call ChatGPT to provide the schema, then we call again to get it generate a SQL query. We then check whether 7 | the query is valid, and if not, we ask ChatGPT to fix it. 8 | 9 | The context only includes the schema, not data, which makes it harder for chatGPT to answer some questions. But it works surprisingly 10 | well with the toy schema I've been working with, even coming up with JOINs and window functions. 11 | 12 | There are three basic functions: 13 | * Ask a natural language query and get SQL back (`prompt_sql` table function) 14 | * Ask a natual language query and run it (`prompt_query` pragma) 15 | * Provide SQL that may or may not be valid and fix it (`prompt_fixup` table function) 16 | 17 | --- 18 | 19 | ## Installing the extension 20 | The binaries for OSX amd64 are built and staged for duckdb 0.8.0 under the s3 location `s3://motherduck-duckdb-extensions/jordan/duckprompt`. 21 | They're not signed so you need to run duckdb with the `-unsigned` flag. 22 | ``` 23 | % duckdb -unsigned 24 | v0.8.0 e8e4cea5ec 25 | Enter ".help" for usage hints. 26 | D SET custom_extension_repository='motherduck-duckdb-extensions.s3.amazonaws.com/jordan/duckprompt/duckprompt/0.0.1'; 27 | D install duckprompt; 28 | D load duckprompt; 29 | ``` 30 | 31 | ## Running the extension 32 | 33 | To run, you'll need an openai key. You can get one here: 34 | https://platform.openai.com/account/api-keys 35 | 36 | To run, first install and load the extension. If you build it yourself you can run the duckdb from `build/release/duckdb` and you can skip this step. 37 | ``` 38 | force install '/duckprompt' 39 | load duckprompt 40 | 41 | ``` 42 | 43 | To run a natual language query, use `pragma prompt_query`. For example: 44 | ``` 45 | D pragma prompt_query('Return the minimum amount paid by customers who used a visa card (debit or credit) to purchase a product.') ; 46 | ┌───────────┐ 47 | │ min(paid) │ 48 | │ double │ 49 | ├───────────┤ 50 | │ 360.0 │ 51 | └───────────┘ 52 | ``` 53 | 54 | If you want to see what queries actually get run, try the `prompt_sql` table function: 55 | ``` 56 | D select * from prompt_sql('Return the minimum amount paid by customers who used a visa card (debit or credit) to purchase a product.') ; 57 | ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ 58 | │ prompt('Return the minimum amount paid by customers who used a visa card (debit or credit) to purchase a product.') │ 59 | │ varchar │ 60 | ├─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤ 61 | │ SELECT MIN(paid) FROM sales WHERE type_of_payment LIKE '%visa%'; │ 62 | └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ 63 | ``` 64 | 65 | You can also "fix" a query. To fix a query, run `prompt_fixup' table function. This can detect errors with syntax, 66 | usage, and even fix problems relating to the schame. 67 | ``` 68 | D select * from prompt_fixup("SEELECT * from customers"); 69 | ┌─────────────────────────┐ 70 | │ query │ 71 | │ varchar │ 72 | ├─────────────────────────┤ 73 | │ SELECT * FROM customers │ 74 | └─────────────────────────┘ 75 | ``` 76 | 77 | ## Examples 78 | Also note that if you want to set up sample data with a sales star schema you 79 | can run the following script 80 | ``` 81 | duckdb -init ./scripts/build_sample_db.sql ./build/release/sales.db 82 | ``` 83 | 84 | Here are some example questions to ask: 85 | ``` 86 | pragma prompt_query("Who bought the most PCs, print also the users name?"); 87 | pragma prompt_query("List only the model number of all products made by maker B."); 88 | pragma prompt_query("Return the minimum amount paid by customers who used a visa card (debit or credit) to purchase a product."); 89 | pragma prompt_query("Find the customer_id of customers who have the letter 'e' either in their first name or in their last name."); 90 | pragma prompt_query(" 91 | Assume all prices in the Laptops table are in Euro. List the model numbers of all laptops with ram at least 1024. For each model, 92 | list also its price in USD. Assume that 1 USD = 0.85 EURO (you need to divide the price by 0.85). Name the price column 'price (USD)'."); 93 | pragma prompt_query("Return a list of makers that make more than four different models."); 94 | pragma prompt_query("List all first names of customers in an ascending order based on the number of purchases made by customers with that first name."); 95 | pragma prompt_query("Show a list of sales per customer, with a running sum of their total spending across all of their sales"); 96 | 97 | select * from prompt_fixup("SELEECT * from customers"); 98 | select * from prompt_fixup("SELECT * from customer"); 99 | -------------------------------------------------------------------------------- /scripts/build_sample_db.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE TABLE customers ( 2 | customer_id char(10) NOT NULL, 3 | firstname varchar(32) default NULL, 4 | lastname varchar(32) default NULL, 5 | city varchar(32) default NULL, 6 | address varchar(128) default NULL, 7 | email varchar(128) default NULL, 8 | 9 | PRIMARY KEY (customer_id) ); 10 | 11 | CREATE OR REPLACE TABLE laptops ( 12 | model char(4) NOT NULL default '', 13 | speed double default NULL, 14 | ram int default NULL, 15 | hd int default NULL, 16 | screen double default NULL, 17 | price double default NULL, 18 | 19 | PRIMARY KEY (model) ); 20 | 21 | CREATE OR REPLACE TABLE pcs ( 22 | model char(4) NOT NULL, 23 | speed double NOT NULL, 24 | ram int NOT NULL, 25 | hd int NOT NULL, 26 | price double NOT NULL, 27 | 28 | PRIMARY KEY (model) ); 29 | 30 | CREATE OR REPLACE TABLE printers ( 31 | model char(4) NOT NULL default '', 32 | color varchar(5) default NULL, 33 | type varchar(10) default NULL, 34 | price double default NULL, 35 | 36 | PRIMARY KEY (model) ); 37 | 38 | CREATE OR REPLACE TABLE products ( 39 | maker char(1) default NULL, 40 | model char(4) NOT NULL default '', 41 | type varchar(10) default NULL, 42 | 43 | PRIMARY KEY (model) ); 44 | 45 | CREATE OR REPLACE TABLE sales ( 46 | customer_id char(10) NOT NULL default '', 47 | model char(4) NOT NULL default '', 48 | quantity int default NULL, 49 | day date NOT NULL default '0000-00-00', 50 | paid double default NULL, 51 | type_of_payment varchar(32) default NULL, 52 | 53 | PRIMARY KEY (customer_id,model,day) ); 54 | 55 | INSERT INTO customers (customer_id, firstname, lastname, city, address, email) 56 | VALUES 57 | ('1122334455', 'Ann', 'O''Brien', 'Rotterdam', '1 Jervis St.', 'ann@uva.nl'), 58 | ('1231231231', 'John', 'Doe', 'Amsterdam', NULL, NULL), 59 | ('1234567890', 'Maya', 'Ramanath', 'Diemen', 'Dalsteindreef 3002', NULL), 60 | ('9876543210', 'Ji', 'Zhang', 'Utrecht', 'Jaarbeursplien 24', 'jack@ucc.ie'), 61 | ('9999999999', 'Norah', 'Jones', 'Amsterdam', 'Passheuvelweg 34', 'nj@yahoo.com'); 62 | 63 | INSERT INTO laptops (model, speed, ram, hd, screen, price) 64 | VALUES 65 | ('2001', 2, 2048, 240, 20.1, 3673), 66 | ('2002', 1.73, 1024, 80, 17, 949), 67 | ('2003', 1.8, 512, 60, 15.4, 549), 68 | ('2004', 2, 512, 60, 13.3, 1150), 69 | ('2005', 2.16, 1024, 120, 17, 2500), 70 | ('2006', 2, 2048, 80, 15.4, 1700), 71 | ('2007', 1.83, 1024, 120, 13.3, 1429), 72 | ('2008', 1.6, 1024, 100, 15.4, 900), 73 | ('2009', 1.6, 512, 80, 14.1, 680), 74 | ('2010', 2, 2048, 160, 15.4, 2300); 75 | 76 | INSERT INTO pcs (model, speed, ram, hd, price) 77 | VALUES 78 | ('1001', 2.66, 1024, 250, 2114), 79 | ('1002', 2.1, 512, 250, 995), 80 | ('1003', 1.42, 512, 80, 478), 81 | ('1004', 2.8, 1024, 250, 649), 82 | ('1005', 3.2, 512, 250, 630), 83 | ('1006', 3.2, 1024, 320, 1049), 84 | ('1007', 2.2, 1024, 200, 510), 85 | ('1008', 2.2, 2048, 250, 770), 86 | ('1009', 2, 1024, 250, 650), 87 | ('1010', 2.8, 2048, 300, 770), 88 | ('1011', 1.86, 2048, 160, 959), 89 | ('1012', 2.8, 1024, 160, 649), 90 | ('1013', 3.06, 512, 80, 529); 91 | 92 | INSERT INTO printers (model, color, type, price) 93 | VALUES 94 | ('3001', 'TRUE', 'ink-jet', 99), 95 | ('3002', 'FALSE', 'laser', 239), 96 | ('3003', 'TRUE', 'laser', 899), 97 | ('3004', 'TRUE', 'ink-jet', 120), 98 | ('3005', 'FALSE', 'laser', 120), 99 | ('3006', 'TRUE', 'ink-jet', 100), 100 | ('3007', 'TRUE', 'laser', 200); 101 | 102 | INSERT INTO products (maker, model, type) 103 | VALUES 104 | ('A', '1001', 'pc'), 105 | ('A', '1002', 'pc'), 106 | ('A', '1003', 'pc'), 107 | ('B', '1004', 'pc'), 108 | ('B', '1005', 'pc'), 109 | ('B', '1006', 'pc'), 110 | ('C', '1007', 'pc'), 111 | ('D', '1008', 'pc'), 112 | ('D', '1009', 'pc'), 113 | ('D', '1010', 'pc'), 114 | ('E', '1011', 'pc'), 115 | ('E', '1012', 'pc'), 116 | ('E', '1013', 'pc'), 117 | ('E', '2001', 'laptop'), 118 | ('E', '2002', 'laptop'), 119 | ('E', '2003', 'laptop'), 120 | ('A', '2004', 'laptop'), 121 | ('A', '2005', 'laptop'), 122 | ('A', '2006', 'laptop'), 123 | ('B', '2007', 'laptop'), 124 | ('F', '2008', 'laptop'), 125 | ('F', '2009', 'laptop'), 126 | ('G', '2010', 'laptop'), 127 | ('E', '3001', 'printer'), 128 | ('E', '3002', 'printer'), 129 | ('E', '3003', 'printer'), 130 | ('D', '3004', 'printer'), 131 | ('D', '3005', 'printer'), 132 | ('H', '3006', 'printer'), 133 | ('H', '3007', 'printer'); 134 | 135 | INSERT INTO sales (customer_id, model, quantity, day, paid, type_of_payment) 136 | VALUES 137 | ('1122334455', '2010', 1, '2020-12-19', 2300, 'mastercard credit'), 138 | ('1122334455', '3001', 1, '2020-12-18', 99, 'cash'), 139 | ('1231231231', '2002', 2, '2020-12-19', 1898, 'visa credit'), 140 | ('1231231231', '3002', 1, '2020-12-18', 239, 'cash'), 141 | ('1234567890', '1001', 1, '2020-12-20', 1902.6, 'mastercard credit'), 142 | ('9876543210', '1007', 1, '2020-12-17', 510, 'visa debit'), 143 | ('9876543210', '1007', 3, '2020-12-19', 1530, 'visa debit'), 144 | ('9876543210', '2002', 1, '2020-12-17', 949, 'visa debit'), 145 | ('9999999999', '1007', 1, '2020-12-20', 459, 'visa credit'), 146 | ('9999999999', '3007', 2, '2020-12-20', 360, 'visa credit'); 147 | 148 | -------------------------------------------------------------------------------- /src/quacking_duck.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "quacking_duck.hpp" 7 | #include "chat.hpp" 8 | 9 | 10 | #include "yyjson.hpp" 11 | 12 | 13 | std::string QuackingDuck::ExplainSchema() { 14 | ExtractedSchema extracted_schema; 15 | db_.ExtractSchema(extracted_schema); 16 | return ExplainSchemaPrompt(extracted_schema); 17 | } 18 | 19 | std::string QuackingDuck::Ask(std::string prompt) { 20 | ExplainSchema(); 21 | std::string query = AskPrompt(prompt); 22 | std::string fixed = FixupQuery(query); 23 | return (query == fixed)? query : AskPrompt(prompt); 24 | } 25 | 26 | std::string QuackingDuck::FixupQuery(std::string query) { 27 | std::string error = db_.ValidateParse(query); 28 | if (error.size() > 0) { 29 | AnalyzeQueryPrompt(query); 30 | return FixupQueryPrompt(error); 31 | } 32 | ExplainSchema(); 33 | error = db_.ValidateSemantics(query); 34 | if (error.size() > 0) { 35 | AnalyzeQueryPrompt(query); 36 | return FixupQueryPrompt(error); 37 | } 38 | return query; 39 | } 40 | 41 | static const char * c_ask_template = "Output a single SQL query without any explanation and do " 42 | "not add anything to the query that was not part of the question. " 43 | "Make sure to only use tables and columns from the schema above and write a query " 44 | "to answer the following question:\n" 45 | "{question}" 46 | "\n"; 47 | 48 | std::string TemplateReplace(std::string prompt_template, std::initializer_list args) { 49 | int ii = 0; 50 | std::string name; 51 | int idx = 0; 52 | for (auto cur = args.begin(); cur != args.end(); ++cur, ++idx) { 53 | if (idx % 2 == 0) { 54 | name = *cur; 55 | continue; 56 | } 57 | std::string value = *cur; 58 | int pos = prompt_template.find("{" + name + "}"); 59 | if (pos == std::string::npos) { 60 | throw duckdb::IOException("Key %s not found in prompt template %s", name, prompt_template); 61 | } 62 | auto len = name.length() + 2; 63 | prompt_template = prompt_template.replace(pos, len, value); 64 | } 65 | return prompt_template; 66 | } 67 | 68 | std::string QuackingDuck::AskPrompt(std::string question) { 69 | chat_.SetSystemContext("You are a helpful assistant that can generate Postgresql code based on " 70 | "the user input. You do not respond with any human readable text, only SQL code."); 71 | std::string whole_prompt = TemplateReplace(c_ask_template, {"question", question}); 72 | query_ = chat_.SendPrompt(whole_prompt); 73 | return query_; 74 | } 75 | 76 | static const char* c_schema_template = "SQL schema of my database:\n" 77 | "```{schema}```" 78 | "\nExplain in one sentence what the data is about"; 79 | std::string QuackingDuck::ExplainSchemaPrompt(const ExtractedSchema& extracted_schema) { 80 | if (schema_summary_.length() > 0) { 81 | return schema_summary_; 82 | } 83 | chat_.Reset("You are a helpful assistant that can generate an human redable summary " 84 | " of database content based on the schema."); 85 | 86 | std::string whole_prompt = TemplateReplace(c_schema_template, {"schema", extracted_schema.SchemaToString()}); 87 | 88 | schema_summary_ = chat_.SendPrompt(whole_prompt); 89 | return schema_summary_; 90 | } 91 | 92 | std::string ExtractedSchema::SchemaToString() const { 93 | std::string schema = ""; 94 | for (std::string current_table_ddl : table_ddl) { 95 | schema.append(current_table_ddl); 96 | schema.append("\n"); 97 | } 98 | 99 | return schema; 100 | } 101 | 102 | static const char* c_analyze_template = "Here is my SQL query:\n" 103 | "```{query}```" 104 | "\nPlease respond with the SQL query and an explaiation about what the query will do."; 105 | 106 | std::string QuackingDuck::AnalyzeQueryPrompt(std::string query) { 107 | if (query_.length() > 0) { 108 | return query_; 109 | } 110 | chat_.SetSystemContext("You are a helpful assistant that is an expert in SQL code who can output " 111 | "a human readable summary of a SQL query."); 112 | std::string whole_prompt = TemplateReplace(c_analyze_template, {"query", query}); 113 | std::string result = chat_.SendPrompt(whole_prompt); 114 | query_ = query; 115 | return result; 116 | } 117 | 118 | // Watch out, the . character doesn't match newlines, so we need to handle them specially. 119 | static const std::string c_tripple_quote_enclosure_str = "```\\n*((.|\\n)*.?)```"; 120 | static const std::regex c_tripple_quote_enclosure_pattern(c_tripple_quote_enclosure_str); 121 | 122 | std::string ExtractMarkdownSelect(const std::string& message) { 123 | std::smatch match; 124 | if (!std::regex_search(message, match, c_tripple_quote_enclosure_pattern)) { 125 | return message; 126 | } 127 | std::string matched_string = match[1]; 128 | return matched_string; 129 | } 130 | 131 | std::string ExtractSelect(std::string message) { 132 | message = ExtractMarkdownSelect(message); 133 | size_t pos = message.find(" SELECT"); 134 | if (pos == std::string::npos ) { 135 | pos = message.find("\nSELECT"); 136 | } 137 | 138 | std::string result = (pos == std::string::npos) 139 | ? message 140 | : message.substr(pos + 1); // go beyond the ' '. 141 | 142 | pos = result.find(";\n"); 143 | result = (pos == std::string::npos) 144 | ? result 145 | : result.substr(0, pos); 146 | 147 | return result; 148 | } 149 | 150 | static const char* c_fixup_template = "When I ran the previous query, I got the following exception:\n" 151 | "{error_message}" 152 | "\nPlease correct and output only the resulting SQL code."; 153 | 154 | std::string QuackingDuck::FixupQueryPrompt(std::string error_message) { 155 | if (query_.length() == 0) { 156 | // Should have called Ask or AnalyzeQuery to set the query. 157 | return ""; 158 | } 159 | 160 | chat_.SetSystemContext("You are a helpful assistant that can generate Postgresql code based on " 161 | "the user input. You do not respond with any human readable text, only SQL code."); 162 | 163 | std::string whole_prompt = TemplateReplace(c_fixup_template, {"error_message", error_message}); 164 | 165 | std::string response = chat_.SendPrompt(whole_prompt); 166 | // This is a hack .... if the select statement is at the end, return it. 167 | query_ = ExtractSelect(response); 168 | return query_; 169 | } 170 | -------------------------------------------------------------------------------- /src/chat.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "chat.hpp" 5 | #include "https.hpp" 6 | 7 | #include "yyjson.hpp" 8 | 9 | const char* Chat::c_open_ai_host = "api.openai.com"; 10 | const char* Chat::c_chat_uri = "v1/chat/completions"; 11 | const char* Chat::c_model = "gpt-3.5-turbo"; 12 | 13 | std::string GetAuthorizationHeader() { 14 | char * key = std::getenv("OPENAI_API_KEY"); 15 | if (key == nullptr) { 16 | return ""; 17 | } else { 18 | return "Bearer " + std::string(key); 19 | } 20 | } 21 | 22 | void Chat::Reset(std::string initial_context) { 23 | context_.clear(); 24 | if (initial_context.length() > 0) { 25 | context_.push_back(ChatContext("system", initial_context)); 26 | } 27 | } 28 | 29 | void Chat::SetSystemContext(std::string system_context) { 30 | for (ChatContext& current : context_) { 31 | if (current.role == "system") { 32 | current.content = system_context; 33 | return; 34 | } 35 | } 36 | context_.push_back(ChatContext("system", system_context)); 37 | } 38 | 39 | std::string JsonEncode(std::string unencoded) { 40 | return unencoded; 41 | } 42 | 43 | std::string GenerateMessage(ChatContext context) { 44 | std::string msg; 45 | msg.append("{\"role\": \""); 46 | msg.append(context.role); 47 | msg.append("\", \"content\":\""); 48 | msg.append(JsonEncode(context.content)); 49 | msg.append("\"}"); 50 | return msg; 51 | } 52 | 53 | std::string join(const std::vector& v, char c) { 54 | std::string s; 55 | for (std::vector::const_iterator p = v.begin(); 56 | p != v.end(); ++p) { 57 | s += *p; 58 | if (p != v.end() - 1) 59 | s += c; 60 | } 61 | return s; 62 | } 63 | 64 | /* Exmaple 65 | {"id":"chatcmpl-7LLgyWTjgbnz6d46npSn2iehhvLDg", 66 | "object":"chat.completion","created":1685322628, 67 | "model":"gpt-3.5-turbo-0301","usage":{"prompt_tokens":103,"completion_tokens":94,"total_tokens":197}, 68 | "choices":[{"message":{"role":"assistant", 69 | "content":"SELECT \n sum(l_extendedprice * (1 - l_discount)) as revenue \nFROM \n lineitem \nWHERE \n 70 | l_shipdate >= date '1994-01-01'\n AND l_shipdate < date '1994-01-01' + interval '1' year \n 71 | AND l_discount between 0.06 - 0.01 AND 0.06 + 0.01 \n AND l_quantity < 24;"}, 72 | "finish_reason":"stop","index":0}]} 73 | */ 74 | 75 | std::string ParseResponse(std::string response) { 76 | yyjson_doc *doc = yyjson_read(response.c_str(), response.length(), 0); 77 | std::string content; 78 | do { 79 | yyjson_val *root_json = yyjson_doc_get_root(doc); 80 | if (root_json == nullptr) { 81 | std::cerr << "Unable to read the root of the response"; 82 | throw duckdb::IOException("Invalid Chat Response: missing root"); 83 | } 84 | yyjson_val *choices_json = yyjson_obj_get(root_json, "choices"); 85 | if (choices_json == nullptr) { 86 | std::cerr << "Unable to choices object from the root"; 87 | throw duckdb::IOException("Invalid Chat Response: missing 'choices''"); 88 | break; 89 | } 90 | size_t idx, max; 91 | yyjson_val *choice_json; 92 | 93 | yyjson_arr_foreach(choices_json, idx, max, choice_json) { 94 | yyjson_val* message_json = yyjson_obj_get(choice_json, "message"); 95 | if (message_json == nullptr) { 96 | std::cerr << "Unable to read the message object from the choice"; 97 | throw duckdb::IOException("Invalid Chat Response: missing 'message''"); 98 | } 99 | 100 | yyjson_val* content_json = yyjson_obj_get(message_json, "content"); 101 | if (message_json == nullptr) { 102 | std::cerr << "Unable to read the content object from the message"; 103 | throw duckdb::IOException("Invalid Chat Response: missing 'content''"); 104 | } 105 | content = yyjson_get_str(content_json); 106 | break; 107 | } 108 | } while(false); 109 | if (content.size() == 0) { 110 | throw duckdb::IOException("Invalid Chat Response: empty content"); 111 | } 112 | yyjson_doc_free(doc); 113 | return content; 114 | } 115 | 116 | std::string Chat::GenerateMessages() { 117 | std::string inner; 118 | 119 | std::vector messages; 120 | for (auto single_context : context_) { 121 | messages.push_back(GenerateMessage(single_context)); 122 | } 123 | return "[" + join(messages, ',') + "]"; 124 | } 125 | 126 | /* 127 | Example: 128 | {"model": "gpt-3.5-turbo", 129 | "messages":[{"role": "system", "content":"Prompt goes here"}]} 130 | */ 131 | std::string Chat::GenerateRequest() { 132 | // Create a mutable doc 133 | yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); 134 | yyjson_mut_val *root = yyjson_mut_obj(doc); 135 | yyjson_mut_doc_set_root(doc, root); 136 | yyjson_mut_obj_add_str(doc, root, "model", model_.c_str()); 137 | 138 | yyjson_mut_val* message_arr = yyjson_mut_arr(doc); 139 | 140 | 141 | // Create objects and add them to the array 142 | 143 | for (auto single_context : context_) { 144 | yyjson_mut_val *obj = yyjson_mut_obj(doc); 145 | yyjson_mut_obj_add_strcpy(doc, obj, "role", single_context.role.c_str()); 146 | yyjson_mut_obj_add_strcpy(doc, obj, "content", single_context.content.c_str()); 147 | yyjson_mut_arr_append(message_arr, obj); 148 | } 149 | 150 | // Add the array to the root object 151 | yyjson_mut_obj_add(root, yyjson_mut_str(doc, "messages"), message_arr); 152 | 153 | // To string, minified 154 | const char *json = yyjson_mut_write(doc, 0, NULL); 155 | std::string result; 156 | if (json) { 157 | result = std::string(json); 158 | free((void *)json); 159 | } else { 160 | std::cerr << "Invalid json generated"; 161 | throw duckdb::InternalException("Error generating json message"); 162 | } 163 | 164 | // Free the doc 165 | yyjson_mut_doc_free(doc); 166 | return result; 167 | } 168 | 169 | std::string Chat::SendPrompt(std::string prompt) { 170 | HTTPS https(c_open_ai_host); 171 | std::string question = prompt; 172 | std::vector > headers; 173 | std::string auth_header = GetAuthorizationHeader(); 174 | if (auth_header.length() > 0) { 175 | headers.push_back(std::make_pair(std::string("Authorization"), auth_header)); 176 | } else { 177 | std::cerr << "Missing authorization key for OpenAI"; 178 | } 179 | 180 | context_.push_back(ChatContext("user", prompt)); 181 | std::string body = GenerateRequest(); 182 | 183 | HTTPSResponse response = https.Post(c_chat_uri, headers, body); 184 | if (response.code != 200) { 185 | throw duckdb::IOException("HTTP Request returned HTTP %d for %s to '%s'", response.code, "POST", c_chat_uri); 186 | } 187 | 188 | std::string result = ParseResponse(response.response); 189 | if (result.size() > 0) { 190 | context_.push_back(ChatContext("assistant", result)); 191 | } 192 | return result; 193 | 194 | } -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # WIP Disclaimer 2 | This template is currently a work-in-progress. Feel free to play around with it and give us feedback. Note also that this template depends on a development version of DuckDB. 3 | 4 | Get in contact with fellow extension developers on https://discord.duckdb.org and follow https://duckdb.org/news for more information on official launch. 5 | 6 | # DuckDB Extension Template 7 | The main goal of this template is to allow users to easily develop, test and distribute their own DuckDB extension. 8 | 9 | ## Getting started 10 | First step to getting started is to create your own repo from this template by clicking `Use this template`. Then clone your new repository using 11 | ```sh 12 | git clone --recurse-submodules https://github.com//.git 13 | ``` 14 | Note that `--recurse-submodules` will ensure the correct version of duckdb is pulled allowing you to get started right away. 15 | 16 | ## Building 17 | To build the extension: 18 | ```sh 19 | make 20 | ``` 21 | The main binaries that will be built are: 22 | ```sh 23 | ./build/release/duckdb 24 | ./build/release/test/unittest 25 | ./build/release/extension//.duckdb_extension 26 | ``` 27 | - `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. 28 | - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary. 29 | - `.duckdb_extension` is the loadable binary as it would be distributed. 30 | 31 | ## Running the extension 32 | To run the extension code, simply start the shell with `./build/release/duckdb`. 33 | 34 | Now we can use the features from the extension directly in DuckDB. The template contains a single scalar function `quack()` that takes a string arguments and returns a string: 35 | ``` 36 | D select quack('Jane') as result; 37 | ┌───────────────┐ 38 | │ result │ 39 | │ varchar │ 40 | ├───────────────┤ 41 | │ Quack Jane 🐥 │ 42 | └───────────────┘ 43 | ``` 44 | 45 | ## Running the tests 46 | Different tests can be created for DuckDB extensions. The primary way of testing DuckDB extensions should be the SQL tests in `./test/sql`. These SQL tests can be run using: 47 | ```sh 48 | make test 49 | ``` 50 | 51 | ## Getting started with your own extension 52 | After creating a repository from this template, the first step is to name your extension. To rename the extension, run: 53 | ``` 54 | python3 ./scripts/set_extension_name.py 55 | ``` 56 | Feel free to delete the script after this step. 57 | 58 | Now you're good to go! After a (re)build, you should now be able to use your duckdb extension: 59 | ``` 60 | ./build/release/duckdb 61 | D select ('Jane') as result; 62 | ┌─────────────────────────────────────┐ 63 | │ result │ 64 | │ varchar │ 65 | ├─────────────────────────────────────┤ 66 | │ Jane 🐥 │ 67 | └─────────────────────────────────────┘ 68 | ``` 69 | 70 | For inspiration/examples on how to extend DuckDB in a more meaningful way, check out the [test extensions](https://github.com/duckdb/duckdb/blob/master/test/extension), 71 | the [in-tree extensions](https://github.com/duckdb/duckdb/tree/master/extension), and the [out-of-tree extensions](https://github.com/duckdblabs). 72 | 73 | ## Distributing your extension 74 | Easy distribution of extensions built with this template is facilitated using a similar process used by DuckDB itself. 75 | Binaries are generated for various versions/platforms allowing duckdb to automatically install the correct binary. 76 | 77 | This step requires that you pass the following 4 parameters to your GitHub repo as action secrets: 78 | 79 | | secret name | description | 80 | | ------------- | ----------------------------------- | 81 | | S3_REGION | s3 region holding your bucket | 82 | | S3_BUCKET | the name of the bucket to deploy to | 83 | | S3_DEPLOY_ID | the S3 key id | 84 | | S3_DEPLOY_KEY | the S3 key secret | 85 | 86 | After setting these variables, all pushes to master will trigger a new (dev) release. Note that your AWS token should 87 | have full permissions to the bucket, and you will need to have ACLs enabled. 88 | 89 | ### Installing the deployed binaries 90 | To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the 91 | `allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples: 92 | 93 | CLI: 94 | ```shell 95 | duckdb -unsigned 96 | ``` 97 | 98 | Python: 99 | ```python 100 | con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'}) 101 | ``` 102 | 103 | NodeJS: 104 | ```js 105 | db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"}); 106 | ``` 107 | 108 | Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension 109 | you want to install. To do this run the following SQL query in DuckDB: 110 | ```sql 111 | SET custom_extension_repository='bucket.s3.eu-west-1.amazonaws.com//latest'; 112 | ``` 113 | Note that the `/latest` path will allow you to install the latest extension version available for your current version of 114 | DuckDB. To specify a specific version, you can pass the version instead. 115 | 116 | After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB: 117 | ```sql 118 | INSTALL 119 | LOAD 120 | ``` 121 | 122 | ### Versioning of your extension 123 | Extension binaries will only work for the specific DuckDB version they were built for. Since you may want to support multiple 124 | versions of DuckDB for a release of your extension, you can specify which versions to build for in the CI of this template. 125 | By default, the CI will build your extension against the version of the DuckDB submodule, which should generally be the most 126 | recent version of DuckDB. To build for multiple versions of DuckDB, simply add the version to the matrix variable, e.g.: 127 | ``` 128 | strategy: 129 | matrix: 130 | duckdb_version: [ '', 'v0.7.0'] 131 | ``` 132 | 133 | ## Setting up CLion 134 | 135 | ### Opening project 136 | Configuring CLion with the extension template requires a little work. Firstly, make sure that the DuckDB submodule is available. 137 | Then make sure to open `./duckdb/CMakeLists.txt` (so not the top level `CMakeLists.txt` file from this repo) as a project in CLion. 138 | Now to fix your project path go to `tools->CMake->Change Project Root`([docs](https://www.jetbrains.com/help/clion/change-project-root-directory.html)) to set the project root to the root dir of this repo. 139 | 140 | ### Debugging 141 | To set up debugging in CLion, there are two simple steps required. Firstly, in `CLion -> Settings / Preferences -> Build, Execution, Deploy -> CMake` you will need to add the desired builds (e.g. Debug, Release, RelDebug, etc). There's different ways to configure this, but the easiest is to leave all empty, except the `build path`, which needs to be set to `../build/{build type}`. Now on a clean repository you will first need to run `make {build type}` to initialize the CMake build directory. After running make, you will be able to (re)build from CLion by using the build target we just created. 142 | 143 | The second step is to configure the unittest runner as a run/debug configuration. To do this, go to `Run -> Edit Configurations` and click `+ -> Cmake Application`. The target and executable should be `unittest`. This will run all the DuckDB tests. To specify only running the extension specific tests, add `--test-dir ../../.. [sql]` to the `Program Arguments`. Note that it is recommended to use the `unittest` executable for testing/development within CLion. The actual DuckDB CLI currently does not reliably work as a run target in CLion. 144 | -------------------------------------------------------------------------------- /src/duckprompt_extension.cpp: -------------------------------------------------------------------------------- 1 | #define DUCKDB_EXTENSION_MAIN 2 | 3 | #include 4 | #include 5 | 6 | #include "duckprompt_extension.hpp" 7 | #include "chat.hpp" 8 | #include "quacking_duck.hpp" 9 | #include "duckdb.hpp" 10 | #include "duckdb/catalog/catalog.hpp" 11 | #include "duckdb/catalog/catalog_entry.hpp" 12 | #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" 13 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" 14 | #include "duckdb/common/exception.hpp" 15 | #include "duckdb/common/string_util.hpp" 16 | #include "duckdb/function/function_binder.hpp" 17 | #include "duckdb/function/scalar_function.hpp" 18 | #include "duckdb/main/extension_util.hpp" 19 | #include "duckdb/function/table_function.hpp" 20 | #include "duckdb/parser/parsed_data/create_pragma_function_info.hpp" 21 | #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" 22 | #include "duckdb/parser/parsed_data/create_table_function_info.hpp" 23 | #include "duckdb/parser/parsed_data/create_view_info.hpp" 24 | #include "duckdb/parser/parser.hpp" 25 | #include "duckdb/parser/statement/select_statement.hpp" 26 | #include "duckdb/planner/binder.hpp" 27 | #include "duckdb/planner/expression/bound_function_expression.hpp" 28 | 29 | namespace duckdb { 30 | 31 | // Encapsulates database operations that are needed by the 32 | // QuackingDuck prompt engine. 33 | class DuckDatabaseInterface : public DatabaseInterface { 34 | public: 35 | DuckDatabaseInterface(ClientContext& context) : context_(context) { } 36 | virtual void ExtractSchema(ExtractedSchema& extracted_schema) { 37 | auto &catalog = duckdb::Catalog::GetCatalog(context_, INVALID_CATALOG); 38 | auto callback = [&]( 39 | duckdb::SchemaCatalogEntry& schema_entry) { 40 | ExtractSchema(schema_entry, extracted_schema); 41 | }; 42 | catalog.ScanSchemas(context_, callback); 43 | } 44 | 45 | virtual std::string ValidateParse(std::string query) { 46 | return ValidateQuery(query, false); 47 | } 48 | virtual std::string ValidateSemantics(std::string query) { 49 | return ValidateQuery(query, true); 50 | } 51 | 52 | private: 53 | ClientContext& context_; 54 | 55 | private: 56 | void ExtractSchema( 57 | duckdb::SchemaCatalogEntry& schema_entry, 58 | ExtractedSchema& extracted_schema) { 59 | 60 | auto callback = [&](duckdb::CatalogEntry& entry) { 61 | auto &table = (duckdb::TableCatalogEntry &)entry; 62 | std::string name = table.name; 63 | std::string sql = table.ToSQL(); 64 | if (sql.substr(0, 6) == "SELECT") { 65 | // this is a system view that for some reason shows up 66 | // as a table (system views). 67 | return; 68 | } 69 | extracted_schema.table_ddl.emplace_back(sql); 70 | }; 71 | schema_entry.Scan(duckdb::CatalogType::TABLE_ENTRY, callback); 72 | } 73 | 74 | // Validates a query and returns an error message if the query fails. 75 | std::string ValidateQuery(std::string query, bool bind) { 76 | std::string error_message; 77 | duckdb::Parser parser; 78 | try { 79 | parser.ParseQuery(query); 80 | if (parser.statements.size() == 0) { 81 | // This is not a good query. (how do we find out the error?) 82 | return "Unable to parse query"; 83 | } 84 | if (bind) { 85 | shared_ptr binder = Binder::CreateBinder(context_); 86 | binder->Bind(*parser.statements[0]); 87 | } 88 | 89 | // Passed the parse and the bind test! 90 | } catch (duckdb::ParserException parser_exception) { 91 | error_message = parser_exception.RawMessage(); 92 | } catch (duckdb::BinderException binder_exception) { 93 | error_message = binder_exception.RawMessage(); 94 | } catch (duckdb::Exception other_exception) { 95 | // This isn't an error we can likely correct. 96 | error_message = "Unexpected Error Validating Query."; 97 | } 98 | return error_message; 99 | } 100 | }; 101 | 102 | struct PromptFunctionData : public TableFunctionData { 103 | PromptFunctionData() : finished(false) { } 104 | std::string prompt; 105 | std::string model; 106 | std::string schema_template; 107 | std::string prompt_template; 108 | bool finished; 109 | }; 110 | 111 | static void SummarizeSchemaFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { 112 | auto &data = (PromptFunctionData &)*data_p.bind_data; 113 | if (data.finished) { 114 | return; 115 | } 116 | data.finished = true; 117 | 118 | DuckDatabaseInterface dbInterface(context); 119 | QuackingDuck quacking_duck(dbInterface, data.model); 120 | std::string response = quacking_duck.ExplainSchema(); 121 | output.SetCardinality(1); 122 | output.SetValue(0, 0, Value(response)); 123 | } 124 | 125 | static void PromptSqlFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { 126 | auto &data = (PromptFunctionData &)*data_p.bind_data; 127 | if (data.finished) { 128 | return; 129 | } 130 | data.finished = true; 131 | 132 | DuckDatabaseInterface dbInterface(context); 133 | QuackingDuck quacking_duck(dbInterface, data.model); 134 | std::string response = quacking_duck.Ask(data.prompt); 135 | output.SetCardinality(1); 136 | output.SetValue(0, 0, Value(response)); 137 | } 138 | 139 | 140 | static unique_ptr SummarizeBind(ClientContext &context, TableFunctionBindInput &input, 141 | vector &return_types, vector &names) { 142 | auto result = make_uniq(); 143 | return_types.emplace_back(LogicalType::VARCHAR); 144 | names.emplace_back("summary"); 145 | return std::move(result); 146 | } 147 | 148 | static unique_ptr PromptBind(ClientContext &context, TableFunctionBindInput &input, 149 | vector &return_types, vector &names) { 150 | auto result = make_uniq(); 151 | if (input.inputs.size() > 0) { 152 | result->prompt = input.inputs[0].template GetValue(); 153 | } 154 | for (auto &kv : input.named_parameters) { 155 | if (kv.first == "model") { 156 | result->model = StringValue::Get(kv.second); 157 | } 158 | } 159 | return_types.emplace_back(LogicalType::VARCHAR); 160 | names.emplace_back("query"); 161 | return std::move(result); 162 | } 163 | 164 | static void FixupFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { 165 | auto &data = (PromptFunctionData &)*data_p.bind_data; 166 | if (data.finished) { 167 | return; 168 | } 169 | data.finished = true; 170 | 171 | DuckDatabaseInterface dbInterface(context); 172 | QuackingDuck quacking_duck(dbInterface, data.model); 173 | std::string response = quacking_duck.FixupQuery(data.prompt); 174 | output.SetCardinality(1); 175 | output.SetValue(0, 0, Value(response)); 176 | } 177 | 178 | static string PragmaPromptQuery(ClientContext &context, const FunctionParameters ¶meters) { 179 | auto prompt = StringValue::Get(parameters.values[0]); 180 | DuckDatabaseInterface dbInterface(context); 181 | QuackingDuck quacking_duck(dbInterface); 182 | std::string query_result = quacking_duck.Ask(prompt); 183 | return query_result; 184 | } 185 | 186 | void LoadInternal(DatabaseInstance &db_instance) { 187 | // create the TPCH pragma that allows us to run the query 188 | auto prompt_query_func = PragmaFunction::PragmaCall("prompt_query", PragmaPromptQuery, 189 | {LogicalType::VARCHAR}); 190 | ExtensionUtil::RegisterFunction(db_instance, prompt_query_func); 191 | 192 | TableFunction summarize_func("prompt_schema", {}, SummarizeSchemaFunction, SummarizeBind); 193 | ExtensionUtil::RegisterFunction(db_instance, summarize_func); 194 | 195 | TableFunction prompt_func("prompt_sql", {LogicalType::VARCHAR}, PromptSqlFunction, PromptBind); 196 | prompt_func.named_parameters["model"] = LogicalType::VARCHAR; 197 | ExtensionUtil::RegisterFunction(db_instance, prompt_func); 198 | 199 | TableFunction fixup_func("prompt_fixup", {LogicalType::VARCHAR}, FixupFunction, PromptBind); 200 | fixup_func.named_parameters["model"] = LogicalType::VARCHAR; 201 | ExtensionUtil::RegisterFunction(db_instance, fixup_func); 202 | } 203 | 204 | void DuckpromptExtension::Load(DuckDB &db) { 205 | LoadInternal(*db.instance); 206 | } 207 | std::string DuckpromptExtension::Name() { 208 | return "duckprompt"; 209 | } 210 | 211 | } // namespace duckdb 212 | 213 | extern "C" { 214 | 215 | DUCKDB_EXTENSION_API void duckprompt_init(duckdb::DatabaseInstance &db) { 216 | LoadInternal(db); 217 | } 218 | 219 | DUCKDB_EXTENSION_API const char *duckprompt_version() { 220 | return duckdb::DuckDB::LibraryVersion(); 221 | } 222 | } 223 | 224 | #ifndef DUCKDB_EXTENSION_MAIN 225 | #error DUCKDB_EXTENSION_MAIN not defined 226 | #endif 227 | --------------------------------------------------------------------------------