├── .gitignore
├── extensions
    ├── jsonata
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── adbc_scanner
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── file_dialog
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── title_mapper
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── st_read_multi
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── evalexpr_rhai
    │   ├── function_descriptions.csv
    │   └── description.yml
    ├── quackformers
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── duck_delta_share
    │   └── description.yml
    ├── fivetran
    │   └── description.yml
    ├── snowflake
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── substrait
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── anofox_forecast
    │   └── description.yml
    ├── cwiqduck
    │   └── description.yml
    ├── anofox_tabular
    │   └── description.yml
    ├── anofox_statistics
    │   └── description.yml
    ├── blockduck
    │   └── description.yml
    ├── hashfuncs
    │   └── description.yml
    ├── json_schema
    │   └── description.yml
    ├── lua
    │   └── description.yml
    ├── zipfs
    │   └── description.yml
    ├── lindel
    │   └── description.yml
    ├── cloudfront
    │   └── description.yml
    ├── gsheets
    │   ├── docs
    │   │   └── function_description.csv
    │   └── description.yml
    ├── rapidfuzz
    │   └── description.yml
    ├── a5
    │   └── description.yml
    ├── encoding
    │   └── docs
    │   │   └── function_descriptions.csv
    ├── tributary
    │   └── description.yml
    ├── h3
    │   └── description.yml
    ├── shellfs
    │   └── description.yml
    ├── capi_quack
    │   └── description.yml
    ├── inflector
    │   └── description.yml
    ├── crypto
    │   └── description.yml
    ├── erpl_web
    │   └── description.yml
    ├── rusty_sheet
    │   ├── docs
    │   │   └── function_description.csv
    │   └── description.yml
    ├── tera
    │   └── description.yml
    ├── minijinja
    │   └── description.yml
    ├── radio
    │   └── description.yml
    ├── textplot
    │   └── description.yml
    ├── fuzzycomplete
    │   └── description.yml
    ├── hdf5
    │   └── description.yml
    ├── stochastic
    │   └── description.yml
    ├── datasketches
    │   └── description.yml
    ├── marisa
    │   └── description.yml
    ├── bitfilters
    │   └── description.yml
    ├── highs
    │   ├── description.yml
    │   └── docs
    │   │   └── function_descriptions.csv
    ├── dash
    │   └── description.yml
    ├── tarfs
    │   └── description.yml
    ├── pivot_table
    │   └── description.yml
    ├── quackstore
    │   └── description.yml
    ├── lsh
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── ulid
    │   └── description.yml
    ├── airport
    │   └── description.yml
    ├── ofquack
    │   └── description.yml
    ├── mooncake
    │   └── description.yml
    ├── splink_udfs
    │   └── description.yml
    ├── pdal
    │   └── docs
    │   │   └── function_descriptions.csv
    ├── rusty_quack
    │   └── description.yml
    ├── geography
    │   └── description.yml
    ├── bigquery
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── chaos
    │   └── description.yml
    ├── observefs
    │   └── description.yml
    ├── curl_httpfs
    │   └── description.yml
    ├── quack
    │   └── description.yml
    ├── netquack
    │   ├── description.yml
    │   └── docs
    │   │   └── function_descriptions.csv
    ├── psyduck
    │   └── description.yml
    ├── system_stats
    │   └── description.yml
    ├── eeagrid
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── geotiff
    │   └── description.yml
    ├── mlpack
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── dns
    │   └── docs
    │   │   └── function_descriptions.csv
    ├── fit
    │   └── description.yml
    ├── duckherder
    │   └── description.yml
    ├── gcs
    │   └── description.yml
    ├── psql
    │   └── description.yml
    ├── prql
    │   └── description.yml
    ├── scrooge
    │   └── description.yml
    ├── cache_httpfs
    │   └── description.yml
    ├── onelake
    │   └── docs
    │   │   └── functions_description.csv
    ├── magic
    │   └── description.yml
    ├── nanoarrow
    │   └── description.yml
    ├── jwt
    │   └── description.yml
    ├── arrow
    │   └── description.yml
    ├── sshfs
    │   └── description.yml
    ├── pyroscope
    │   └── description.yml
    ├── nats_js
    │   └── description.yml
    ├── miniplot
    │   └── description.yml
    ├── bvh2sql
    │   └── description.yml
    ├── gaggle
    │   ├── description.yml
    │   └── docs
    │   │   └── function_descriptions.csv
    ├── infera
    │   ├── description.yml
    │   └── docs
    │   │   └── function_descriptions.csv
    ├── quackfix
    │   └── description.yml
    ├── http_request
    │   └── description.yml
    ├── wireduck
    │   └── description.yml
    ├── web_archive
    │   └── description.yml
    ├── pbix
    │   └── description.yml
    ├── nanodbc
    │   └── description.yml
    ├── msolap
    │   └── description.yml
    ├── anndata
    │   └── description.yml
    ├── duckpgq
    │   └── description.yml
    ├── read_stat
    │   └── description.yml
    ├── webdavfs
    │   └── description.yml
    ├── yardstick
    │   └── description.yml
    ├── hostfs
    │   └── description.yml
    ├── faiss
    │   └── description.yml
    ├── flock
    │   ├── docs
    │   │   └── function_descriptions.csv
    │   └── description.yml
    ├── acp
    │   └── description.yml
    ├── sazgar
    │   └── description.yml
    ├── pcap_reader
    │   └── description.yml
    ├── quickjs
    │   └── description.yml
    ├── duck_tails
    │   └── description.yml
    ├── cassandra
    │   └── description.yml
    ├── warc
    │   └── description.yml
    ├── poached
    │   └── description.yml
    ├── http_client
    │   └── description.yml
    ├── yaml
    │   └── description.yml
    ├── sheetreader
    │   └── description.yml
    └── duck_hunt
    │   └── description.yml
├── layout
    ├── screenshot.md
    └── default.md
├── scripts
    ├── get_extension_list.sh
    ├── get_stars.py
    ├── pretty_print.py
    ├── fetch_extensions.sh
    ├── create_build_all_invocation.py
    ├── clean_caches.sh
    └── build.py
├── .github
    ├── config
    │   └── vcpkg_caching
    │   │   ├── manual_excludes.json
    │   │   └── README.md
    └── workflows
    │   ├── build_next.yml
    │   ├── test_all.yml
    │   ├── clean_caches.yml
    │   ├── _extension_archive.yml
    │   ├── build_all.yml
    │   ├── generate_docs.yml
    │   ├── cache_warming.yml
    │   └── deploy_docs.yml
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | downloads-last-week.json
3 | *.db
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/extensions/jsonata/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | 


--------------------------------------------------------------------------------
/extensions/adbc_scanner/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | 


--------------------------------------------------------------------------------
/layout/screenshot.md:
--------------------------------------------------------------------------------
1 | ```sql
2 | INSTALL {{ page.extension.name }}
3 |    FROM community;
4 | 
5 | LOAD {{ page.extension.name }};
6 | ```
7 | 


--------------------------------------------------------------------------------
/extensions/file_dialog/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | "choose_file","Choose a file via native file dialog","","FROM read_csv(choose_file());"
3 | 


--------------------------------------------------------------------------------
/extensions/title_mapper/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | "standardize_title","Returns the BLS standard title using TF-IDF","","SELECT standardize_title(scraped_title_column) FROM your_table;"


--------------------------------------------------------------------------------
/extensions/st_read_multi/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | "st_read_multi","Read multiple geospatial files. Currently, only GeoJSON and GeoPackages are supported.","","FROM ST_Read_Multi('path/to/*.geojson');"
3 | 


--------------------------------------------------------------------------------
/extensions/evalexpr_rhai/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | evalexpr_rhai,Evaluate a Rhai expression,The function evaluates a Rhai expression and returns the result.  Optionally the caller and provide a context to pass to the expression.,"select evalexpr_rhai("1 + 2");"


--------------------------------------------------------------------------------
/scripts/get_extension_list.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eo pipefail
 4 | 
 5 | echo -n "EXTENSION_LIST=[" > extension_list
 6 | for extension_folder in extensions/*;
 7 | do
 8 |     extension_name=$(basename -- $extension_folder)
 9 |     echo -n "'$extension_name'," >> extension_list
10 | done
11 | echo "]" >> extension_list
12 | 


--------------------------------------------------------------------------------
/extensions/quackformers/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | "embed","Embed text using vanilla BERT implementation, 384 output dim float32","","SELECT embed('Quack Quack Quack!');"
3 | "embed_jina","Embed text using Jina BERT implementation, 768 output dim float32","","SELECT embed_jina('Quack Quack Quack!');"
4 | 


--------------------------------------------------------------------------------
/scripts/get_stars.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import yaml
 4 | import subprocess
 5 | 
 6 | desc_file = sys.argv[1]
 7 | duckdb = sys.argv[2]
 8 | 
 9 | with open(desc_file, 'r') as stream:
10 | 	desc = yaml.safe_load(stream)
11 | 
12 | subprocess.run(["gh", "api", "https://api.github.com/repos/" + desc['repo']['github'], "--jq=.stargazers_count"])
13 | 


--------------------------------------------------------------------------------
/scripts/pretty_print.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | line = sys.stdin.readline()
 4 | 
 5 | if line is None or line == '':
 6 |     print('n/a')
 7 | else:
 8 |     try:
 9 |         x = int(line)
10 |         if x < 1000:
11 |             print(x)
12 |         else:
13 |             print(f'{x / 1000:.1f}k')
14 |     except ValueError:
15 |         print('n/a')
16 | 


--------------------------------------------------------------------------------
/.github/config/vcpkg_caching/manual_excludes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "linux_amd64": [
 3 |   ],
 4 |   "linux_amd64_musl": [
 5 |   ],
 6 |   "linux_arm64": [
 7 |   ],
 8 |   "osx_amd64": [
 9 |   ],
10 |   "osx_arm64": [
11 |   ],
12 |   "wasm_eh": [
13 |   ],
14 |   "wasm_mvp": [
15 |   ],
16 |   "wasm_threads": [
17 |   ],
18 |   "windows_amd64": [
19 |   ],
20 |   "windows_amd64_mingw": [
21 |   ]
22 | }


--------------------------------------------------------------------------------
/extensions/duck_delta_share/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: duck_delta_share
 3 |   description: DuckDB Extension for Delta Sharing
 4 |   version: 0.0.2
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_mingw"
 9 |   maintainers:
10 |     - changmct
11 | 
12 | repo:
13 |   github: cwiq-os/duck_delta_share
14 |   ref: cb54b8a94e9803c4d7c29813c1f20db86402c497
15 | 


--------------------------------------------------------------------------------
/layout/default.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Installing and Loading
 3 | ```sql
 4 | INSTALL {{ page.extension.name }} FROM community;
 5 | LOAD {{ page.extension.name }};
 6 | ```
 7 | 
 8 | {% if page.docs.hello_world %}
 9 | ### Example
10 | ```sql
11 | {{ page.docs.hello_world }}```
12 | {% endif %}
13 | 
14 | {% if page.docs.extended_description %}
15 | ### About {{ page.extension.name }}
16 | {{ page.docs.extended_description }}
17 | {% endif %}
18 | 
19 | 


--------------------------------------------------------------------------------
/extensions/fivetran/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: |
 3 |     See the [README](https://github.com/lnkuiper/fivetran).
 4 | extension:
 5 |   build: cmake
 6 |   description: Fivetran community extension.
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |     - lnkuiper
11 |   name: fivetran
12 |   version: 2025121000
13 | repo:
14 |   github: lnkuiper/fivetran
15 |   ref: 32a906b45e1ea5677949f7222ffe6382c0985317
16 | 


--------------------------------------------------------------------------------
/extensions/snowflake/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | snowflake_query,"Execute a SQL query directly against a Snowflake database and return the results as a table. Uses the specified secret for authentication credentials.","","SELECT * FROM snowflake_query('SELECT * FROM customers WHERE state = ''CA''', 'my_snowflake_secret');"
3 | snowflake_version,"Returns the version of the Snowflake extension.","","SELECT snowflake_version();"
4 | 
5 | 


--------------------------------------------------------------------------------
/extensions/substrait/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | get_substrait,Converts the provided query into a binary Substrait plan,"",""
3 | get_substrait_json,Converts the provided query into a Substrait plan in JSON,"",""
4 | from_substrait,Executes a binary Substrait plan (provided as bytes) against DuckDB and returns the results,"","" 
5 | from_substrait_json,Executes a Substrait plan written in JSON against DuckDB and returns the results,"",""
6 | 


--------------------------------------------------------------------------------
/extensions/anofox_forecast/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: anofox_forecast
 3 |   description: Statistical timeseries forecasting in DuckDB. Support ARIMA, SARIMA, ETS, TBATS, MFLES, MSTL, and other models.
 4 |   language: C++
 5 |   build: cmake
 6 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;"
 7 |   license: BSL 1.1
 8 |   maintainers:
 9 |     - sipemu
10 | repo:
11 |   github: DataZooDE/anofox-forecast
12 |   ref: d96383f7f8b2fc98c8b0cb7c90ec3868801ca5f8
13 | 


--------------------------------------------------------------------------------
/extensions/cwiqduck/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: cwiqduck
 3 |   description: DuckDB filesystem extension for CWIQ FS
 4 |   version: 0.0.2
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads;osx_amd64;osx_arm64"
 9 |   maintainers:
10 |     - ph-maxinechang
11 | 
12 | repo:
13 |   github: cwiq-os/cwiqduck
14 |   ref: 81bb6a0c509ef91919560cbcddaf8e489e0ac8eb 
15 | 


--------------------------------------------------------------------------------
/extensions/anofox_tabular/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: anofox_tabular
 3 |   description: A duckdb extension which combines data quality and data preparation tools for tabular data.
 4 |   language: C++
 5 |   build: cmake
 6 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads;"
 7 |   license: BSL 1.1
 8 |   maintainers:
 9 |     - jrosskopf
10 | repo:
11 |   github: DataZooDE/anofox-tabular
12 |   ref: 058ad4ea3ecb38ed00e8a4c135b0f427d119c8b4
13 | 


--------------------------------------------------------------------------------
/extensions/anofox_statistics/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: anofox_statistics
 3 |   description: A DuckDB extension for statistical regression analysis, providing OLS, Ridge, WLS, and time-series regression capabilities with complete diagnostics and inference directly in SQL.
 4 |   language: C++
 5 |   build: cmake
 6 |   license: BSL 1.1
 7 |   maintainers:
 8 |     - sipemu
 9 |   requires_toolchains: rust
10 | repo:
11 |   github: DataZooDE/anofox-statistics
12 |   ref: d3d677e4f4a09b87e5c82b841f4092784cbb347c
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DuckDB Community Extensions Repository 
 2 | 
 3 | This repository collects third-party extensions created for [DuckDB](https://www.duckdb.org).
 4 | 
 5 | View the [list of Community Extensions](https://duckdb.org/community_extensions/list_of_extensions).
 6 | 
 7 | Visit the [DuckDB Community Extensions website](https://duckdb.org/community_extensions) to learn more about the available extensions.
 8 | 
 9 | [Learn about the official extensions and community extensions.](https://duckdb.org/2024/07/05/community-extensions)
10 | 


--------------------------------------------------------------------------------
/extensions/blockduck/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: blockduck
 3 |   description: Live SQL Queries on Blockchain
 4 |   version: 0.8.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads"
 9 |   maintainers:
10 |     - luohaha
11 | 
12 | repo:
13 |   github: luohaha/BlockDuck
14 |   ref: 60e9c73da0376c2be2e5f82dfb1c756516373344
15 | 
16 | docs:
17 |   https://yixins-organization.gitbook.io/blockduck-docs
18 | 


--------------------------------------------------------------------------------
/extensions/hashfuncs/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_hashfuncs.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Non-cryptographic hash functions, xxHash, rapidhash and Murmurhash3
 6 |   language: C++
 7 |   license: Apache-2.0
 8 |   maintainers:
 9 |   - rustyconover
10 |   name: hashfuncs
11 |   version: '2025120401'
12 | repo:
13 |   github: query-farm/hashfuncs
14 |   ref: a5898a4aae082444894238cdeaafb12f54e09401
15 | 


--------------------------------------------------------------------------------
/extensions/json_schema/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_json_schema.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Adds support for validating JSON data with JSON schemas.
 6 |   language: C++
 7 |   license: Apache-2.0
 8 |   maintainers:
 9 |   - rustyconover
10 |   name: json_schema
11 |   version: '2025120401'
12 | repo:
13 |   github: query-farm/json_schema
14 |   ref: d506aff832d137a4eb3fa826a860e5808bf657f1
15 | 


--------------------------------------------------------------------------------
/extensions/lua/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: lua
 3 |   description: Evaluate Lua scripts within queries
 4 |   version: 1.4.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - isaacbrodsky
10 | 
11 | repo:
12 |   github: isaacbrodsky/duckdb-lua
13 |   ref: 517a68f90e650e13fb69fa44465dd2bc97a96a3e
14 | 
15 | docs:
16 |   hello_world: |
17 |     SELECT lua('return "Hello " .. context', 'World');
18 |   extended_description: |
19 |     Adds support for the Lua embedded scripting language to DuckDB.
20 | 


--------------------------------------------------------------------------------
/extensions/zipfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: zipfs
 3 |   description: Read files within zip archives
 4 |   version: 1.4.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - isaacbrodsky
10 | 
11 | repo:
12 |   github: isaacbrodsky/duckdb-zipfs
13 |   ref: e2c7c9a08202416494fcfc9ba43a9444a32e0bdd
14 | 
15 | docs:
16 |   hello_world: |
17 |     SELECT * FROM 'zip://my_zip.zip/my_file.csv';
18 |   extended_description: |
19 |     The zipfs extension adds support for reading files from within zip archives.
20 | 


--------------------------------------------------------------------------------
/extensions/lindel/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_lindel.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Linearization/Delinearization, Z-Order, Hilbert and Morton Curves
 6 |   language: C++
 7 |   license: Apache-2.0
 8 |   maintainers:
 9 |   - rustyconover
10 |   name: lindel
11 |   requires_toolchains: rust
12 |   version: '2025120401'
13 | repo:
14 |   github: query-farm/lindel
15 |   ref: 3f9906c71e7883e0f2ee8696d9e8648750bdb76b
16 | 


--------------------------------------------------------------------------------
/extensions/cloudfront/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   build: cmake
 3 |   description: AWS CloudFront signed cookie authentication for DuckDB httpfs
 4 |   excluded_platforms: linux_amd64_musl;windows_amd64_mingw
 5 |   language: C++
 6 |   license: MIT
 7 |   maintainers:
 8 |     - onnimonni
 9 |   name: cloudfront
10 |   vcpkg_url: https://github.com/microsoft/vcpkg.git
11 |   vcpkg_commit: ce613c41372b23b1f51333815feb3edd87ef8a8b
12 |   version: 0.1.0
13 | repo:
14 |   github: midwork-finds-jobs/duckdb-cloudfront
15 |   ref: e6b2ff5e933c296ffbe1cb7403cac688f8e41e97
16 | 


--------------------------------------------------------------------------------
/extensions/gsheets/docs/function_description.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | read_gsheet,"Read a single sheet directly from a Google Sheet via the sheet URL, or spreadsheet ID.",,"SELECT * FROM read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=644613997#gid=644613997');"
3 | "COPY TO","Write data from a table to a Google Sheet via the sheet URL, or spreadsheet ID.",,"COPY <table_name> TO 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987' (FORMAT gsheet);"


--------------------------------------------------------------------------------
/extensions/rapidfuzz/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_rapidfuzz.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Provides adds high-performance fuzzy string matching functions, powered
 6 |     by the RapidFuzz C++ library.
 7 |   language: C++
 8 |   license: MIT
 9 |   maintainers:
10 |   - rustyconover
11 |   name: rapidfuzz
12 |   version: '2025120401'
13 | repo:
14 |   github: query-farm/rapidfuzz
15 |   ref: eb2fe18d3f60205722521ff2ddd8eb3c932c7888
16 | 


--------------------------------------------------------------------------------
/extensions/a5/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_a5.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Hierarchical pentagonial indexing for geospatial data that is equal-area
 6 |     and millimeter accurate.
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |   - rustyconover
11 |   name: a5
12 |   requires_toolchains: rust
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/a5
16 |   ref: b24e35cb1ab875f7cb48d698293d0652f3c135d1
17 | 


--------------------------------------------------------------------------------
/extensions/encoding/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,fun_type,description,comment,example
2 | "force_utf8_encoding","scalar","Forces text with potentially mixed or unknown encoding to UTF-8. Handles hex-encoded byte sequences and attempts automatic encoding detection.","","SELECT force_utf8_encoding('\x5B\x7B\x22Langue\x22\x3A\x5B\x22Fran\xE7ais\x22,\x22English\x22\x5D\x7D\x5D');"
3 | "detect_encoding","scalar","Detects the likely encoding of input text and returns the encoding name.","","SELECT detect_encoding('\x5B\x7B\x22Langue\x22\x3A\x5B\x22Fran\xE7ais\x22,\x22English\x22\x5D\x7D\x5D');"


--------------------------------------------------------------------------------
/extensions/tributary/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_tributary.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Enable DuckDB to interact with Apache Kafka
 6 |   excluded_platforms: wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw;
 7 |   language: C++
 8 |   license: MIT
 9 |   maintainers:
10 |   - rustyconover
11 |   name: tributary
12 |   version: '2025120401'
13 | repo:
14 |   github: query-farm/tributary
15 |   ref: 36faa17973a9b45180871e4db6929a382b3214ab
16 | 


--------------------------------------------------------------------------------
/.github/config/vcpkg_caching/README.md:
--------------------------------------------------------------------------------
 1 | # VCPKG caching
 2 | To speed up the performance of community extension builds, we autogenerate a list of vcpkg dependencies who's binaries we'd like to cache.
 3 | For this the generated_list.json file is used. This file can be automatically updated by running:
 4 | 
 5 | ```sh
 6 | python3 -m venv venv
 7 | . ./venv/bin/activate
 8 | python3 -m pip install requests pyyaml GitPython 
 9 | python3 scripts/parse_vcpkg_deps.py
10 | ```
11 | 
12 | To add additional excludes, edit the `.github/config/vcpkg_caching/manual_excludes.json` file manually and rerun the script.


--------------------------------------------------------------------------------
/extensions/h3/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: h3
 3 |   description: Hierarchical hexagonal indexing for geospatial data
 4 |   version: 1.4.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: Apache-2.0
 8 |   maintainers:
 9 |     - isaacbrodsky
10 | 
11 | repo:
12 |   github: isaacbrodsky/h3-duckdb
13 |   ref: 1e701fdc5194c85bbbfd0c085fb185d327c0bf65
14 | 
15 | docs:
16 |   hello_world: |
17 |     SELECT h3_latlng_to_cell(37.7887987, -122.3931578, 9);
18 |   extended_description: |
19 |     The H3 extension adds support for the [H3 hierarchical hexagonal grid system](https://h3geo.org/).
20 | 


--------------------------------------------------------------------------------
/extensions/shellfs/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_shellfs.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Allow shell commands to be used for input and output
 6 |   excluded_platforms: wasm_mvp;wasm_eh;wasm_threads
 7 |   language: C++
 8 |   license: MIT
 9 |   maintainers:
10 |   - rustyconover
11 |   name: shellfs
12 |   requires_toolchains: python3
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/shellfs
16 |   ref: 4fef6d3d58068ce8ef6ed002441e0eca002cf372
17 | 


--------------------------------------------------------------------------------
/extensions/capi_quack/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: capi_quack
 3 |   description: Provides a hello world example demo from the C/C++ C API template
 4 |   version: 0.0.1
 5 |   language: C/C++
 6 |   build: CMake
 7 |   license: MIT
 8 |   requires_toolchains: "python3"
 9 |   maintainers:
10 |     - samansmink
11 | 
12 | repo:
13 |   github: duckdb/extension-template-c
14 |   ref: 7f71365c5ce61b2b346717af07c9d448cfc9d3c3
15 | 
16 | docs:
17 |   extended_description: |
18 |     The capi_quack extension is based on DuckDB's [C/C++ C API template](https://github.com/duckdb/extension-template-c/).
19 | 
20 | 


--------------------------------------------------------------------------------
/extensions/inflector/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_inflector.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Powerful string case transformation and inflection capabilities directly
 6 |     to your SQL queries.
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |   - rustyconover
11 |   name: inflector
12 |   requires_toolchains: rust
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/inflector
16 |   ref: 1c2cd4eb439853e34cebe4a58ea01b15515e1c66
17 | 


--------------------------------------------------------------------------------
/extensions/crypto/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_crypto.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Cryptographic hash functions and HMAC
 6 |   excluded_platforms: wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;
 7 |   language: C++
 8 |   license: MIT
 9 |   maintainers:
10 |   - rustyconover
11 |   name: crypto
12 |   requires_toolchains: rust
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/crypto
16 |   ref: 9308fe3e74a56b3dd5b046533bac582532bdc7bd
17 | 


--------------------------------------------------------------------------------
/extensions/erpl_web/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: erpl_web
 3 |   description: ERPL is a DuckDB extension to connect to API based ecosystems via standard interfaces like OData, GraphQL and REST. This works e.g. for SAP ERP, SAP Ariba, SAP C4C, Microsoft Dynamics and many more.
 4 |   language: C++
 5 |   build: cmake
 6 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads;"
 7 |   license: BSL 1.1
 8 |   requires_toolchains: "cmake, openssl"
 9 |   maintainers:
10 |     - jrosskopf
11 | repo:
12 |   github: DataZooDE/erpl-web
13 |   ref: 8d0b2eb30f9ffc097e2dae8cda558cb43b9ef180
14 | 


--------------------------------------------------------------------------------
/extensions/rusty_sheet/docs/function_description.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | analyze_sheet,"Analyzes a single spreadsheet sheet to detect column names and data types.","","FROM analyze_sheet('data.xlsx', analyze_rows=20);"
3 | analyze_sheets,"Analyzes multiple spreadsheet sheets across files to detect column structures.","","FROM analyze_sheets(['*.xls']);"
4 | read_sheet,"Reads data from a single spreadsheet sheet into a DuckDB table.","","FROM read_sheet('data.ods');"
5 | read_sheets,"Reads data from multiple spreadsheet sheets across files into DuckDB tables.","","FROM read_sheets(['*.et'], union_by_name=true);"
6 | 


--------------------------------------------------------------------------------
/.github/workflows/build_next.yml:
--------------------------------------------------------------------------------
 1 | name: Community Extension with latest DuckDB
 2 | on:
 3 |   pull_request:
 4 |     paths-ignore:
 5 |       - '**'
 6 |       - '!scripts/build.py'
 7 |       - '!.github/workflows/build_next.yml'
 8 |       - '!extensions/*/description.yml'
 9 |   push:
10 |     paths-ignore:
11 |       - '**'
12 |       - '!scripts/build.py'
13 |       - '!.github/workflows/build_next.yml'
14 |       - '!extensions/*/description.yml'
15 | 
16 | jobs:
17 |   test_against_latest:
18 |     if: false
19 |     uses: ./.github/workflows/build.yml 
20 |     with:
21 |       duckdb_version: 'main'
22 |       deploy: 'false'
23 | 


--------------------------------------------------------------------------------
/extensions/adbc_scanner/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_adbc_scanner.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: An ADBC client extension for DuckDB that can access ADBC provided data
 6 |     sources.
 7 |   excluded_platforms: wasm_mvp;wasm_eh;wasm_threads
 8 |   language: C++
 9 |   license: Apache-2.0
10 |   maintainers:
11 |   - rustyconover
12 |   name: adbc_scanner
13 |   version: '2025120801'
14 | repo:
15 |   github: query-farm/adbc_scanner
16 |   ref: 7a7d206cbff1ad61436ba8bd1a4c438028b8c815
17 | 


--------------------------------------------------------------------------------
/extensions/tera/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_tera.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Adds templating to DuckDB, enabling dynamic generation of text, HTML,
 6 |     and reports directly within SQL queries using the Tera engine.
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |   - rustyconover
11 |   name: tera
12 |   requires_toolchains: rust
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/tera
16 |   ref: 724e371cc674061e0b8524b77d3fcf18339c8875
17 | 


--------------------------------------------------------------------------------
/extensions/jsonata/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_jsonata.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: The JSONata extension for DuckDB enables expressive, JSON-focused querying
 6 |     and transformation directly within SQL using the powerful JSONata expression language.
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |   - rustyconover
11 |   name: jsonata
12 |   version: 2025110901
13 | repo:
14 |   github: query-farm/jsonata
15 |   ref: 4f087e852214aea931d3301982f64effec3f3b22
16 | 


--------------------------------------------------------------------------------
/extensions/evalexpr_rhai/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_evalexpr_rhai.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Evaluate the Rhai scripting language in DuckDB
 6 |   excluded_platforms: windows_amd64_rtools;windows_amd64_mingw;
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |   - rustyconover
11 |   name: evalexpr_rhai
12 |   requires_toolchains: rust
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/evalexpr_rhai
16 |   ref: f4493ade6aa592402b1b80a3eec002d94254b5e9
17 | 


--------------------------------------------------------------------------------
/extensions/minijinja/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_minijinja.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Adds templating to DuckDB, enabling dynamic generation of text, HTML,
 6 |     and reports directly within SQL queries using the MiniJinja engine.
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |   - rustyconover
11 |   name: minijinja
12 |   requires_toolchains: rust
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/minijinja
16 |   ref: f27e100457e2f51a71d343442d0fbb956afb10dd
17 | 


--------------------------------------------------------------------------------
/extensions/radio/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_radio.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Allow interaction with event buses like Websocket and Redis publish/subscribe
 6 |     servers.
 7 |   excluded_platforms: wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_amd64_mingw;windows_amd64_rtools
 8 |   language: C++
 9 |   license: MIT
10 |   maintainers:
11 |   - rustyconover
12 |   name: radio
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/radio
16 |   ref: 0ace6af84ec2289c23944857fd9aded06e65ec9c
17 | 


--------------------------------------------------------------------------------
/extensions/textplot/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_textplot.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Enables text-based data visualization directly in SQL queries, including
 6 |     ASCII/Unicode bar charts, density plots, and sparklines for lightweight analytics
 7 |     and dashboards.
 8 |   language: C++
 9 |   license: Apache-2.0
10 |   maintainers:
11 |   - rustyconover
12 |   name: textplot
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/textplot
16 |   ref: d3b90faa3372d4204915343546fa0556601bf243
17 | 


--------------------------------------------------------------------------------
/extensions/fuzzycomplete/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_fuzzycomplete.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Fuzzy matching based autocompletion
 6 |   excluded_platforms: linux_amd64_musl;wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;
 7 |   language: C++
 8 |   license: Apache-2.0
 9 |   maintainers:
10 |   - rustyconover
11 |   name: fuzzycomplete
12 |   requires_toolchains: rust
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/fuzzycomplete
16 |   ref: 74630cc8548a8ee6b32129abdc8bd70e1cf2bf87
17 | 


--------------------------------------------------------------------------------
/extensions/hdf5/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: hdf5
 3 |   description: Read HDF5 files from DuckDB
 4 |   version: 0.1.5
 5 |   language: Rust
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl;wasm_mvp;wasm_eh;wasm_threads"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - Berrysoft
12 | 
13 | repo:
14 |   github: Berrysoft/duckdb-hdf5
15 |   ref: 5bb76c775d132f56869bd65b2ec267b951d2ff69
16 | 
17 | docs:
18 |   hello_world: |
19 |     FROM read_hdf5('some_file.h5', 'dataset');
20 |   extended_description: |
21 |     This extension provides a read function for HDF5 files.
22 | 


--------------------------------------------------------------------------------
/extensions/stochastic/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_stochastic.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Add comprehensive statistical distribution functions to DuckDB, enabling
 6 |     advanced statistical analysis, probability calculations, and random sampling directly
 7 |     within SQL queries.
 8 |   language: C++
 9 |   license: Apache-2.0
10 |   maintainers:
11 |   - rustyconover
12 |   name: stochastic
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/stochastic
16 |   ref: 020205b1862e9ab919ad58b1a4a496058a478519
17 | 


--------------------------------------------------------------------------------
/extensions/datasketches/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_datasketches.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: By utilizing the Apache DataSketches library this extension can efficiently
 6 |     compute approximate distinct item counts and estimations of quantiles, while allowing
 7 |     the sketches to be serialized.
 8 |   language: C++
 9 |   license: MIT
10 |   maintainers:
11 |   - rustyconover
12 |   name: datasketches
13 |   version: '2025101201'
14 | repo:
15 |   github: query-farm/datasketches
16 |   ref: d7ff45ac116b81f5958d40783470557f27a97911
17 | 


--------------------------------------------------------------------------------
/scripts/fetch_extensions.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | trap cleanup SIGINT SIGTERM
 6 | 
 7 | cleanup() {
 8 |     trap - SIGINT SIGTERM
 9 |     echo "Script interrupted"
10 |     exit 1
11 | }
12 | 
13 | 
14 | if [ $# -lt 1 ]; then
15 |     echo "Usage: ./scripts/fetch_extensions.sh path_to_duckdb_binary"
16 |     exit 1
17 | fi
18 | 
19 | rm -rf build
20 | for extension_folder in extensions/*;
21 | do
22 |     extension_name=$(basename -- $extension_folder)
23 |     echo "Installing $extension_name"
24 |     $1 -c "SET extension_directory = 'build/extension_dir'; FORCE INSTALL '$extension_name' FROM community;" || echo "Missing $extension_name"
25 | done
26 | 


--------------------------------------------------------------------------------
/extensions/marisa/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_marisa.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: Adds MARISA (Matching Algorithm with Recursively Implemented StorAge)
 6 |     trie functionality for DuckDB. MARISA is a static and space-efficient trie data
 7 |     structure that enables fast string lookups, prefix searches, and predictive text
 8 |     operations.
 9 |   language: C++
10 |   license: MIT
11 |   maintainers:
12 |   - rustyconover
13 |   name: marisa
14 |   version: '2025120401'
15 | repo:
16 |   github: query-farm/marisa
17 |   ref: 6e81aadaa0a41901218fcf02ff39d8ff2594ffa7
18 | 


--------------------------------------------------------------------------------
/extensions/bitfilters/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_bitfilters.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: "Provides high-performance, space-efficient probabilistic data structures\u2014\
 6 |     including quotient, XOR, and binary fuse filters\u2014for fast approximate set\
 7 |     \ membership testing with no false negatives and configurable false positive rates."
 8 |   language: C++
 9 |   license: MIT
10 |   maintainers:
11 |   - rustyconover
12 |   name: bitfilters
13 |   version: '2025120401'
14 | repo:
15 |   github: query-farm/bitfilters
16 |   ref: 1acc412f932a9b88f1504bf76fc0cbdd891b97f3
17 | 


--------------------------------------------------------------------------------
/extensions/highs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: highs
 3 |   description: HiGHS - High Performance Optimization Software
 4 |   version: 1.7.2
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - fhk
10 | 
11 | repo:
12 |   github: fhk/highs-duckdb
13 |   ref: 0c343b5d3b54fb63231b445617eaff847dbdc719
14 | 
15 | docs:
16 |   hello_world: |
17 |     SELECT highs_version('Test');
18 |   extended_description: |
19 |     HiGHS is software for the definition, modification and solution of large scale sparse linear optimization models.
20 | 
21 |     HiGHS is freely available from [GitHub](https://github.com/ERGO-Code/HiGHS) under the MIT licence and has no third-party dependencies.
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/extensions/substrait/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: substrait
 3 |   description: Allows conversion execution of Substrait query plans
 4 |   version: 1.2.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: Apache-2.0
 8 |   maintainers:
 9 |     - anshuldata
10 |     - cgkiran
11 |     - EpsilonPrime
12 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;windows_amd64"
13 | 
14 | repo:
15 |   github: substrait-io/duckdb-substrait-extension
16 |   ref: ec9f8725df7aa22bae7217ece2f221ac37563da4
17 | 
18 | docs:
19 |   hello_world: |
20 |     .mode line
21 |     CALL get_substrait('SELECT count(exercise) AS exercise FROM crossfit WHERE difficulty_level <= 5');
22 | 
23 | redirect_from:
24 |   - /docs/extensions/substrait
25 | 


--------------------------------------------------------------------------------
/extensions/dash/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: dash
 3 |   description: GUI to create interactive plots and dashboards *within* DuckDB
 4 |   version: 0.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - Gropaul
10 | 
11 | repo:
12 |   github: gropaul/dash
13 |   ref: 546d6cc9c412837eb585c9e4d6ce23f2b55e4f19
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Start dash GUI server
18 |     PRAGMA dash;
19 |   extended_description: >
20 |     The Dash extension allows you to create interactive plots and dashboards locally *within* DuckDB.
21 |     For more information, please see the [Dash documentation](https://www.dash.builders/) or check out the 
22 |     repository [here](https://github.com/gropaul/dash).


--------------------------------------------------------------------------------
/extensions/tarfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: tarfs
 3 |   description: glob, open and read files within `.tar` archives
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - Maxxen
10 | 
11 | repo:
12 |   github: Maxxen/duckdb_tarfs
13 |   ref: 6d468b45f38f16d58e49032edd76aa76c2a2b078
14 | 
15 | docs:
16 |   hello_world: |
17 |     SELECT filename FROM read_blob('tar://data/csv/tar/ab.tar/*');
18 |   extended_description: |
19 |     This extension provides a duckdb file-system abstraction to read and glob files within __uncompressed__ tar archives.
20 |     For more information and information regarding usage, limitations and performance, see the [tarfs README](https://github.com/Maxxen/duckdb_tarfs).
21 | 


--------------------------------------------------------------------------------
/extensions/pivot_table/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: pivot_table
 3 |   description: Provides a spreadsheet-style pivot_table function
 4 |   version: 0.0.2
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - Alex-Monahan
10 | 
11 | repo:
12 |   github: Alex-Monahan/pivot_table
13 |   ref: 3e87d43b4c15a3640b26eb8942ce5e75335c879a
14 | 
15 | docs:
16 |   hello_world: |
17 |     FROM pivot_table(['duckdb_databases'], [], ['database_name'], [], []);
18 |   extended_description: |
19 |     This extension, pivot_table, allow you to pivot your data using a spreadsheet-like pivot API. It is also similar to the Pandas pivot_table function. It does this solely through SQL macros - there are no C++ functions as a part of this extension.
20 | 


--------------------------------------------------------------------------------
/extensions/quackstore/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: quackstore
 3 |   description: QuackStore - Smart Block-Based Caching for Remote Files. Speed up repeated queries on remote data with intelligent block-level caching.
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads"
 9 |   maintainers:
10 |     - dkosmakovcog
11 |     - mullinsms
12 |     - rnestertsov
13 | 
14 | repo:
15 |   github: coginiti-dev/QuackStore
16 |   ref: 4646572622c226a41011d74ebab5254afa3fbb3d
17 |   ref_next: b03909034d49a77540772d592c7efa6849b3521b
18 | 
19 | docs:
20 |   extended_description: |
21 |     See [README.md](https://github.com/coginiti-dev/QuackStore)


--------------------------------------------------------------------------------
/extensions/lsh/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | lsh_min,"Computes band hashes for each input string (or list of existing shingles) based on its MinHash signature","Produces list of 64-bit band hashes",""
3 | lsh_min32,"Computes band hashes for each input string (or list of existing shingles) based on its MinHash signature","Reduces each band hash to 32 bits",""
4 | lsh_euclidean,"Computes band hashes for each input point based on its Euclidean LSH signature","Produces list of 64-bit band hashes",""
5 | lsh_euclidean32,"Computes band hashes for each input point based on its Euclidean LSH signature","Reduces each band hash to 32 bits",""
6 | lsh_jaccard,"Computes Jaccard similarity for each input string pair","Accepts ngram argument, unlike core Jaccard function",""
7 | 


--------------------------------------------------------------------------------
/extensions/st_read_multi/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: st_read_multi
 3 |   description: Read multiple geospatial files
 4 |   version: 0.0.3
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;linux_amd64_musl;windows_amd64_mingw"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - yutannihilation
12 | 
13 | repo:
14 |   github: yutannihilation/duckdb-ext-st-read-multi
15 |   ref: da24cc8bd5cd140a3d98a87206b494bd41011ebe
16 | 
17 | docs:
18 |   hello_world: |
19 |     LOAD spatial;
20 | 
21 |     SELECT * REPLACE (ST_GeomFromWkb(geometry) as geometry) FROM ST_Read_Multi('path/to/*.geojson');
22 |   extended_description: |
23 |     Read multiple geospatial files. Currently, only GeoJSON and GeoPackages are supported.
24 | 


--------------------------------------------------------------------------------
/extensions/ulid/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: ulid
 3 |   description: ULID data type for DuckDB
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - Maxxen
10 | 
11 | repo:
12 |   github: Maxxen/duckdb_ulid
13 |   ref: b8368f646d57aa1bc73a8fee37621fcb87e4ccd2
14 | 
15 | docs:
16 |   hello_world: |
17 |     SELECT ulid() AS result;
18 |   extended_description: |
19 |     This extension adds a new `ULID` data type to DuckDB. 
20 |     A [ULID](https://github.com/ulid/spec) is similar to a UUID except that it also contains a timestamp component, which makes it more suitable for use cases where the order of creation is important. 
21 |     Additionally, the string representation is lexicographically sortable while preserving the sort order of the timestamps.
22 | 


--------------------------------------------------------------------------------
/extensions/airport/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: For more information regarding usage, see the [documentation](https://query.farm/duckdb_extension_airport.html).
 3 | extension:
 4 |   build: cmake
 5 |   description: The Airport extension brings Arrow Flight support to DuckDB, enabling
 6 |     DuckDB to query, modify, and store data from Arrow Flight servers.
 7 |   excluded_platforms: wasm_mvp;wasm_eh;wasm_threads
 8 |   language: C++
 9 |   license: MIT
10 |   maintainers:
11 |   - rustyconover
12 |   name: airport
13 |   requires_toolchains: parser_tools
14 |   test_config: "{\"test_env_variables\":\n  {\n    \"AIRPORT_TEST_SERVER\": \"grpc+tls://airport-ci.query.farm\"\
15 |     \n  }\n}\n"
16 |   version: '2025101201'
17 | repo:
18 |   github: query-farm/airport
19 |   ref: 31c249a2e300207159256d3f42bb6064647ea73a
20 | 


--------------------------------------------------------------------------------
/extensions/file_dialog/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: file_dialog
 3 |   description: Choose a file via native file dialog
 4 |   version: 0.0.3
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - yutannihilation
12 | 
13 | repo:
14 |   github: yutannihilation/duckdb-ext-file-dialog
15 |   ref: 981ba64e4bdfe43e5ff7d98573e3f6c4602bd2cd
16 | 
17 | docs:
18 |   hello_world: |
19 |     FROM read_csv(choose_file());
20 | 
21 |     -- Optionally, you can filter files by the extension. For example, this
22 |     -- makes the dialog list CSV files only
23 |     FROM read_csv(choose_file('csv'));
24 |   extended_description: |
25 |     This extension is a tiny utility to choose a file interactively.
26 | 


--------------------------------------------------------------------------------
/extensions/ofquack/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: ofquack
 3 |   description: The Ofquack extension provides seamless integration between DuckDB and Oracle Fusion via WSDL-based SOAP calls.
 4 |   version: 0.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - krokozyab
10 | 
11 | repo:
12 |   github: krokozyab/ofquack
13 |   ref: 4f71175a5c91526795a25e9ad6876608714eae6d
14 | 
15 | docs:
16 |   extended_description: |
17 |     The Ofquack extension provides seamless integration between DuckDB and Oracle Fusion via WSDL-based SOAP calls.
18 |     It allows you to run arbitrary SQL queries against Oracle Fusion database directly from DuckDB, inferring column names at runtime and returning all data as VARCHAR columns—as native DuckDB tables and as resultsets that can be directly consumed by downstream applications.


--------------------------------------------------------------------------------
/extensions/mooncake/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: mooncake
 3 |   description: Read Iceberg tables written by moonlink in real time
 4 |   version: 0.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - dpxcc
10 |   excluded_platforms: windows_amd64;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads
11 |   requires_toolchains: rust
12 | 
13 | repo:
14 |   github: Mooncake-Labs/duckdb_mooncake
15 |   ref: bd9e5f3ade4ae41d5d13d9cfd4b37b09b39996ab
16 | 
17 | docs:
18 |   hello_world: |
19 |     ATTACH DATABASE 'mooncake' (TYPE mooncake, URI '/var/lib/postgresql/data/pg_mooncake/moonlink.sock', DATABASE 'postgres');
20 |     SELECT * FROM mooncake.public.c;
21 |   extended_description: |
22 |     For more information regarding usage, see [README.md](https://github.com/Mooncake-Labs/duckdb_mooncake/blob/v0.0.1/README.md).
23 | 


--------------------------------------------------------------------------------
/extensions/splink_udfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: splink_udfs
 3 |   description: Phonetic, text normalization and address matching functions for record linkage.
 4 |   version: 0.0.11
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - RobinL
10 | 
11 | repo:
12 |   github: moj-analytical-services/splink_udfs
13 |   ref: cf00056f887486d0aee0a853a764f9775aa40438
14 | 
15 | docs:
16 |   hello_world: |
17 |     LOAD splink_udfs;
18 |     SELECT soundex(unaccent('Jürgen'));  -- returns 'J625'
19 |   extended_description: |
20 |     The splink_udfs extension provides functions for data cleaning and phonetic matching.
21 | 
22 |     Includes `soundex(str)`, `strip_diacritics(str)`, `unaccent(str)`,
23 |     `ngrams(list,n)`, `double_metaphone(str)`
24 |     and faster versions of `levenshtein` and `damerau_levenshtein`.
25 | 


--------------------------------------------------------------------------------
/extensions/pdal/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | PDAL_Drivers,"Returns the list of supported stage types of a PDAL Pipeline.","","SELECT * FROM PDAL_Drivers();"
3 | PDAL_Read,"Read and import a variety of point cloud data file formats using the PDAL library.","","SELECT * FROM PDAL_Read('./test/data/autzen_trim.laz');"
4 | PDAL_Info,"Read the metadata from point cloud file[s].","","SELECT * FROM PDAL_Info('./test/data/autzen_trim.la*');"
5 | PDAL_Pipeline,"Read and import a point cloud data file, applying also a custom processing pipeline to the data.","","SELECT * FROM PDAL_Pipeline('path/to/your/filename.las', 'path/to/your/pipeline.json');"
6 | COPY_TO_PDAL,"Write collection of data points to a point cloud data file.","","COPY ( ... ) TO './test/data/autzen_new.laz' WITH (FORMAT PDAL, DRIVER 'LAS', CREATION_OPTIONS ('COMPRESSION=true', ...));"
7 | 


--------------------------------------------------------------------------------
/extensions/rusty_quack/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: rusty_quack
 3 |   description: Provides a hello world example demo from the Rust-based extension template
 4 |   version: 0.0.1
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - samansmink
12 |     - mlafeldt
13 | 
14 | repo:
15 |   github: duckdb/extension-template-rs
16 |   ref: 2381c7e1c5026f21c92b7ffe3ae7a6b1d8d177c0
17 | 
18 | docs:
19 |   hello_world: |
20 |     FROM rusty_quack('world');
21 |   extended_description: |
22 |     The quack extension is based on DuckDB's [Rust Extension Template](https://github.com/duckdb/extension-template-rs/), and it's a great starting point to get started building DuckDB extensions in Rust.
23 | 
24 | 


--------------------------------------------------------------------------------
/extensions/geography/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: geography
 3 |   description: Global spatial data processing on the sphere
 4 |   version: 0.1.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads"
 9 |   maintainers:
10 |     - paleolimbot
11 | 
12 | repo:
13 |   github: paleolimbot/duckdb-geography
14 |   ref: 1f66ba5a3a0e72f53c7237987ef743605189fa69
15 | 
16 | docs:
17 |   hello_world: |
18 |     SELECT * FROM s2_data_countries();
19 |   extended_description: |
20 |     The geography extension provides global spatial indexing and analysis on the sphere
21 |     using Google's s2geometry library. For full documentation, see the
22 |     [README](https://github.com/paleolimbot/duckdb-geography/blob/main/README.md)
23 |     and [function documentation](https://github.com/paleolimbot/duckdb-geography/blob/main/docs/function-reference.md).
24 | 


--------------------------------------------------------------------------------
/extensions/bigquery/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | bigquery_attach,"Attach to a BigQuery project.","","ATTACH 'project=my_gcp_project' as bq (TYPE bigquery);"
3 | bigquery_scan,"Scan a single table directly from BigQuery.",,"SELECT * FROM bigquery_scan('my_gcp_project.quacking_dataset.duck_tbl');"
4 | bigquery_query,"Run a custom GoogleSQL query in BigQuery and read the results.",,"SELECT * FROM bigquery_query('bq', 'SELECT * FROM quacking_dataset.duck_tbl WHERE duck_id = 123');"
5 | bigquery_execute,"Execute an arbitrary GoogleSQL query in BigQuery.",,"CALL bigquery_execute('bq', 'CREATE SCHEMA deluxe_dataset OPTIONS(location=""us"", default_table_expiration_days=3.75);')"
6 | bigquery_jobs,"List jobs in a BigQuery project.","","SELECT * FROM bigquery_jobs('bq');"
7 | bigquery_clear_cache,"Clear the internal caches to refetch the most current project information from BigQuery.","","CALL bigquery_clear_cache();"
8 | 


--------------------------------------------------------------------------------
/extensions/chaos/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: chaos
 3 |   description: Creates chaos! ⋆✴︎˚｡⋆ Chaos allows you to throw any type of DuckDB exception, or to raise a SIGSEGV, SIGABRT, or SIGBUS signal.
 4 |   version: 0.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - taniabogatsch
10 | repo:
11 |   github: taniabogatsch/duckdb-chaos
12 |   ref: e112242b794466d2a4a577af0a92fce836708a71
13 | 
14 | docs:
15 |   hello_world: |
16 |     -- Throw exceptions!
17 |     SELECT chaos_exception('hello', 'CATALOG');
18 |     SELECT chaos_exception('hello', 'INTERNAL');
19 |     SELECT chaos_exception('hello', 'FATAL');
20 | 
21 |     -- Raise signals!
22 |     SELECT chaos_signal('SIGSEGV');
23 |     SELECT chaos_signal('SIGABRT');
24 |     SELECT chaos_signal('SIGBUS');
25 | 
26 |   extended_description: |
27 |     Creates chaos! ⋆✴︎˚｡⋆ Chaos allows you to throw any type of DuckDB exception, or to raise a SIGSEGV, SIGABRT, or SIGBUS signal.
28 |     Signals do not work on Windows.
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/extensions/lsh/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: lsh
 3 |   description: Extension for locality-sensitive hashing (LSH)
 4 |   version: 0.2.1
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - yoonspark
12 |     - ericmanning
13 | 
14 | repo:
15 |   github: princeton-ddss/lsh
16 |   ref: 1df52d0ada664e9282d6c7c5d572f743163e2874
17 | 
18 | docs:
19 |   hello_world: |
20 |     -- Create toy data
21 |     CREATE TEMPORARY TABLE temp_names AS
22 |     SELECT * FROM (
23 |         VALUES
24 |             ('Alice Johnson'),
25 |             ('Robert Smith'),
26 |             (NULL),
27 |             ('Charlotte Brown'),
28 |     ) AS t(name);
29 | 
30 |     -- Apply MinHash
31 |     SELECT lsh_min(name, 2, 3, 2, 123) AS hash FROM temp_names;
32 | 
33 |   extended_description: |
34 |     For more information regarding usage, see the [documentation](https://github.com/princeton-ddss/lsh).
35 | 


--------------------------------------------------------------------------------
/extensions/quackformers/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: quackformers
 3 |   description: Bert-based embedding extension.
 4 |   version: 0.1.4.2
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - martin-conur
12 | 
13 | repo:
14 |   github: martin-conur/quackformers
15 |   ref: 2781108818462c964239f12ddd7fc232fdf17b1c
16 | 
17 | docs:
18 |   hello_world: |
19 |     SELECT embed('this is an embeddable sentence'); -- This is vanilla BERT (https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
20 |     SELECT embed_jina('this is an embeddable sentence'); -- This is Jina BERT 
21 |   extended_description: |
22 |     Quackformers, a DuckDB extension embeddings. Intended to be used alongside VSS vector search for RAG-type functionalities.
23 |     Quackformers is based on DuckDB's [Rust Extension Template](https://github.com/duckdb/extension-template-rs/)
24 | 


--------------------------------------------------------------------------------
/extensions/observefs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: observefs
 3 |   description: Provides IO observability to filesystem
 4 |   version: 0.4.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;windows_amd64_mingw"
 9 |   maintainers:
10 |     - dentiny
11 |     - peterxcli
12 | 
13 | repo:
14 |   github: dentiny/duckdb-filesystem-observability
15 |   ref: 4f505aadb7835475300ede1e49eced1e65ca8a2a
16 | 
17 | docs:
18 |   hello_world: |
19 |     COPY (SELECT observefs_get_profile()) TO '/tmp/output.txt';
20 |   extended_description: |
21 |     This extension provides observability to duckdb filesystems.
22 |     It supports a few key features:
23 |     - 100% compatible with duckdb httpfs
24 |     - Provides both process-wise and bucket-wise latency stats (including histogram and quantile estimation) to all read operations
25 |     - Provides cache access insight to duckdb external file cache
26 |     - Allows registering ANY duckdb compatible filesystems (i.e., azure filesystem)
27 | 


--------------------------------------------------------------------------------
/extensions/curl_httpfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: curl_httpfs
 3 |   description: httpfs with connection pool, HTTP/2 and async IO. 
 4 |   version: 0.2.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;windows_amd64_mingw"
 9 |   maintainers:
10 |     - dentiny
11 | 
12 | repo:
13 |   github: dentiny/duckdb-curl-filesystem
14 |   ref: 5c9f0c085fb0733e00d575a1f76186638cd0c0a8
15 | 
16 | docs:
17 |   hello_world: |
18 |     SELECT length(content) AS char_count FROM read_text('https://raw.githubusercontent.com/dentiny/duck-read-cache-fs/main/test/data/stock-exchanges.csv');
19 |   extended_description: |
20 |     This extension rewrites HTTP layer for httpfs based on libcurl and epoll, it provides a few features:
21 |     - 100% compatible with httpfs; use curl-based implementation by default, but also allows users to fallback to httplib.
22 |     - Enable HTTP/2 by default.
23 |     - Implements TCP connection pool.
24 |     - All network IO operations are performed in asynchronously.
25 | 


--------------------------------------------------------------------------------
/extensions/quack/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: quack
 3 |   description: Provides a hello world example demo
 4 |   version: 0.0.2
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - hannes
10 | 
11 |   # (Optional) param that specifies required extra toolchains
12 |   requires_toolchains: "rust"
13 |   # (Optional) param that specifies a precise vcpkg commit to use
14 |   vcpkg_commit: "ce613c41372b23b1f51333815feb3edd87ef8a8b"
15 |   # (Optional) this extension requires additional custom toolchain setup
16 |   custom_toolchain_script: true
17 |   # (Optional) ';' separated list of additional platforms
18 |   opt_in_platforms: "windows_arm64;"
19 | 
20 | repo:
21 |   github: duckdb/extension-template
22 |   ref: e52f46eeca9157124cbc910f52ea8637c95084a1
23 | 
24 | docs:
25 |   hello_world: |
26 |     SELECT quack('world');
27 |   extended_description: |
28 |     The quack extension is based on DuckDB's [Extension Template](https://github.com/duckdb/extension-template/), and it's a great starting point to get started building more advanced extensions.
29 | 


--------------------------------------------------------------------------------
/extensions/netquack/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: netquack
 3 |   description: DuckDB extension for parsing, extracting, and analyzing domains, URIs, and paths with ease.
 4 |   version: 1.7.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - hatamiarash7
10 | 
11 | repo:
12 |   github: hatamiarash7/duckdb-netquack
13 |   ref: 76f7263bf02cc698ffaee0bc30ea7d3ddb1b3a3b
14 | 
15 | docs:
16 |   extended_description: |
17 |     This extension designed to simplify working with domains, URIs, IPs, and web paths directly within your database queries. Whether you're extracting top-level domains (TLDs), parsing URI components, or analyzing web paths, Netquack provides a suite of intuitive functions to handle all your network tasks efficiently. Built for data engineers, analysts, and developers.
18 | 
19 |     With Netquack, you can unlock deeper insights from your web-related datasets without the need for external tools or complex workflows.
20 | 
21 |     Check the [documentation](https://github.com/hatamiarash7/duckdb-netquack) for more details and examples on each function.
22 | 


--------------------------------------------------------------------------------
/extensions/psyduck/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: psyduck
 3 |   description: Pokemon data native in DuckDB
 4 |   version: 1.0.2
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - Ian-Fogelman
10 | 
11 | repo:
12 |   github: Ian-Fogelman/psyduck
13 |   ref: c4f8faa4d25e4ca8b3cb0bac8cb958b030f1b2b5
14 |   
15 | docs:
16 |   hello_world: |
17 |     -- Create a table of pokémon and return... A duck
18 |     CREATE TABLE pokemon AS SELECT * FROM list_pokemon();
19 |     SELECT name FROM pokemon WHERE is_duck = 1 AND name LIKE '%Psyduck%';
20 |     
21 |     -- Select the top 10 pokémon
22 |     SELECT * FROM list_pokemon() WHERE number <= 10;
23 |     
24 |     -- Select legendary pokémon
25 |     SELECT * FROM list_pokemon() WHERE is_legendary = 1;
26 |     
27 |     -- Select the best pokémon
28 |     SELECT number, name, type1, "height(m)", "weight(kg)", base_total FROM list_pokemon() WHERE name = 'Charizard';
29 | 
30 |     -- Select pokemon moves
31 |     SELECT * FROM list_pokemon_moves();
32 |     
33 |     -- Select gen1 items
34 |     SELECT * FROM list_pokemon_items();
35 | 


--------------------------------------------------------------------------------
/extensions/system_stats/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: system_stats
 3 |   description: Provides table functions to access system-level statistics for monitoring purpose
 4 |   version: 0.3.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;windows_amd64_mingw"
 9 |   maintainers:
10 |     - dentiny
11 | 
12 | repo:
13 |   github: dentiny/system_stats
14 |   ref: d69b30ec406f0a9f06a9f73f7d781633a3645ac9
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Get memory information
19 |     SELECT * FROM sys_memory_info();
20 | 
21 |     -- Get CPU information
22 |     SELECT * FROM sys_cpu_info();
23 | 
24 |     -- Get disk information
25 |     SELECT * FROM sys_disk_info();
26 | 
27 |     -- Get network information
28 |     SELECT * FROM sys_network_info();
29 | 
30 |     -- Get OS information
31 |     SELECT * FROM sys_os_info();
32 |   extended_description: |
33 |     The system_stats extension provides table functions to access system-level statistics (including memory, CPU, and disk) that can be used for monitoring.
34 | 


--------------------------------------------------------------------------------
/extensions/eeagrid/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,description,comment,example
2 | EEA_CoordXY2GridNum,"Returns the EEA Reference Grid code to a given XY coordinate (EPSG:3035).","","SELECT EEA_CoordXY2GridNum(5078600, 2871400);"
3 | EEA_GridNum2CoordX,"Returns the X-coordinate (EPSG:3035) of the grid cell corresponding to a given EEA Reference Grid code, optionally truncating the value to a specified resolution.","","SELECT EEA_GridNum2CoordX(23090257455218688);"
4 | EEA_GridNum2CoordY,"Returns the Y-coordinate (EPSG:3035) of the grid cell corresponding to a given EEA Reference Grid code, optionally truncating the value to a specified resolution.","","SELECT EEA_GridNum2CoordY(23090257455218688);"
5 | EEA_GridNumAt100m,"Returns the Grid code at 100 m resolution given an EEA reference Grid code.","","SELECT EEA_GridNumAt100m(23090257455218688);"
6 | EEA_GridNumAt10km,"Returns the Grid code at 10 km resolution given an EEA reference Grid code.","","SELECT EEA_GridNumAt10km(23090257455218688);"
7 | EEA_GridNumAt1km,"Returns the Grid code at 1 km resolution given an EEA reference Grid code.","","SELECT EEA_GridNumAt1km(23090257455218688);"
8 | 


--------------------------------------------------------------------------------
/.github/workflows/test_all.yml:
--------------------------------------------------------------------------------
 1 | name: Test all extensions
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       duckdb_version:
 6 |         type: string
 7 |       deploy:
 8 |         type: string
 9 | 
10 | jobs:
11 |   collect_extensions:
12 |     outputs:
13 |       COMMUNITY_EXTENSION_LIST: ${{ steps.generate_list.outputs.EXTENSION_LIST }}
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |     - uses: actions/checkout@v4
17 |       with:
18 |         fetch-depth: 0
19 | 
20 |     - name: Generate extension list
21 |       id: generate_list
22 |       run: |
23 |         ./scripts/get_extension_list.sh
24 |         cat extension_list
25 |         cat extension_list >> $GITHUB_OUTPUT
26 |       
27 |   test_all:
28 |     needs:
29 |       - collect_extensions
30 |     strategy:
31 |       fail-fast: false
32 |       matrix:
33 |         extension_name: ${{ fromJson(needs.collect_extensions.outputs.COMMUNITY_EXTENSION_LIST) }}
34 |     uses: ./.github/workflows/build.yml 
35 |     with:
36 |       extension_name: ${{ matrix.extension_name }}
37 |       duckdb_version: ${{ inputs.duckdb_version }}
38 |       deploy: ${{ inputs.deploy }}
39 | 


--------------------------------------------------------------------------------
/extensions/highs/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
 1 |   function,description,comment,example
 2 |   highs_version,Get HiGHS version with greeting message,"Returns a greeting with HiGHS and OpenSSL version info","SELECT highs_version('Sam');"
 3 |   highs_openssl_version,Get HiGHS and OpenSSL version with greeting,"Returns greeting with both HiGHS and OpenSSL version details","SELECT highs_openssl_version('Michael');"
 4 |   highs_create_variables,Create optimization variables for a model,"Creates decision variables with bounds and objective coefficients","SELECT * FROM highs_create_variables('model1', 
 5 |   'x', 0.0, 1e30, 1.0, 'continuous');"
 6 |   highs_create_constraints,Create constraints for a model,"Defines constraint bounds for the optimization problem","SELECT * FROM highs_create_constraints('model1', 'c1', -1e30, 7.0);"
 7 |   highs_set_coefficients,Set constraint matrix coefficients,"Associates variables with constraints using coefficient values","SELECT * FROM highs_set_coefficients('model1', 'c1', 'x', 
 8 |   1.0);"
 9 |   highs_solve,Solve the optimization model,"Executes HiGHS solver and returns optimal solution values","SELECT * FROM highs_solve('model1');"
10 | 


--------------------------------------------------------------------------------
/extensions/geotiff/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: geotiff
 3 |   description: Read GeoTIFF rasters as (cell_id, value) via GDAL
 4 |   version: 0.1.2
 5 |   language: C++
 6 |   build: cmake
 7 |   licence: MIT
 8 |   maintainers:
 9 |     - babaknaimi
10 |   excluded_platforms: "windows_amd64_mingw"
11 | 
12 | repo:
13 |   github: babaknaimi/duckdb-geotiff
14 |   ref: 3fa51dbe85cb890dea8416c07c90f30147b9d6a4
15 |   
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- one band -> (cell_id, value)
20 |     INSTALL geotiff FROM community;
21 |     LOAD geotiff;
22 |     SELECT * FROM read_geotiff('cea.tif', band := 1) LIMIT 5;
23 | 
24 |     -- multiple bands -> (cell_id, band1, band2, ...)
25 |     SELECT * FROM read_geotiff('cea.tif', band := [1,2,3]) LIMIT 5;
26 |   extended_description: |
27 |     The geotiff extension streams GeoTIFF rasters using GDAL and exposes them as
28 |     DuckDB table functions. For a single band it returns (cell_id BIGINT, value DOUBLE).
29 |     If you pass multiple bands (e.g., band := [1,2,3]) it returns a wide schema:
30 |     (cell_id, band1, band2, …). This is optimized for fast CTAS/UPDATE patterns
31 |     when building “wide” raster tables inside DuckDB.


--------------------------------------------------------------------------------
/.github/workflows/clean_caches.yml:
--------------------------------------------------------------------------------
 1 | #
 2 | # Clean caches for DuckDB's S3 buckets
 3 | #
 4 | 
 5 | name: Caches cleanup
 6 | on:
 7 |   workflow_call:
 8 |     inputs:
 9 |       # The regex pattern of the extension
10 |       pattern:
11 |         required: true
12 |         type: string
13 |   workflow_dispatch:
14 |     inputs:
15 |       # The regex pattern of the extension
16 |       pattern:
17 |         required: true
18 |         type: string
19 | 
20 | jobs:
21 |   clean_caches:
22 |     name: Clean Caches
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - uses: actions/checkout@v3
26 | 
27 |       - name: Deploy
28 |         shell: bash
29 |         env:
30 |           AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_COMMUNITY_EXTENSION_S3_ID }}
31 |           AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_COMMUNITY_EXTENSION_S3_SECRET }}
32 |           AWS_DEFAULT_REGION: ${{ secrets.S3_DUCKDB_ORG_REGION }}
33 |           CLOUDFLARE_CACHE_PURGE_TOKEN: ${{ secrets.CLOUDFLARE_CACHE_PURGE_TOKEN }}
34 |           PATTERN: "${{ inputs.pattern }}"
35 |           DUCKDB_CLEAN_CACHES_SCRIPT_MODE: for_real
36 |         run: |
37 |           bash ./scripts/clean_caches.sh duckdb-community-extensions community-extensions "$PATTERN"
38 | 
39 | 


--------------------------------------------------------------------------------
/extensions/mlpack/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
 1 | function,description,comment,example
 2 | mlpack_adaboost_train,use adaboost to train and store a model,"parameters 'iterations', 'tolerance', 'perceptronIter' and 'silent'",""
 3 | mlpack_adaboost_pred,predict classification using stored adaboost stored model,"",""
 4 | mlpack_linear_regression_fit,fit and store linear regression model,"parameters 'lambda', 'intercept' and 'silent'",""
 5 | mlpack_linear_regression_pred,predict using stored linear regression model,"",""
 6 | mlpack_logistic_regression_fit,fit and store logistic regression model,"parameters 'lambda', 'intercept' and 'silent'",""
 7 | mlpack_logistic_regression_pred,predict classification using stored logistic regression model,"",""
 8 | mlpack_random_forest_train,use random forest to train and store a model,"parameters 'nclasses', 'ntrees', 'seed', 'threads' and 'silent'",""
 9 | mlpack_random_forest_pred,predict classification using stored random forest model,"",""
10 | mlpack_kmeans,use kmeans unsupervised clustering,"parameters 'clusters', and 'iterations'",""
11 | mlpack_mlpack_version,returns the version string for the mlpack version used,"",""
12 | mlpack_armadillo_version,returns the version string for the armadillo version used,"",""
13 | 


--------------------------------------------------------------------------------
/extensions/dns/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
1 | function,fun_type,description,comment,example
2 | "dns_lookup","scalar","Performs a forward DNS lookup to resolve a hostname to its first IPv4 address, or to the first record of a specified DNS record type as second parameter.","","SELECT dns_lookup('google.com');"
3 | "dns_lookup_all","scalar","Performs a forward DNS lookup to resolve a hostname to all its IPv4 addresses, or to all records of a specified DNS record type second parameter.","","SELECT dns_lookup_all('cloudflare.com');"
4 | "reverse_dns_lookup","scalar","Performs a reverse DNS lookup to resolve an IPv4 address given as a parameter to a hostname.","","SELECT reverse_dns_lookup('8.8.8.8');"
5 | "set_dns_config","scalar","Updates the DNS resolver configuration for all subsequent DNS queries.","","SELECT set_dns_config('google');"
6 | "set_dns_concurrency_limit","scalar","Updates the concurrency limit for DNS lookup operations to prevent TCP connection exhaustion.","","SELECT set_dns_concurrency_limit(100);"
7 | "set_dns_cache_size","scalar","Updates the DNS cache size for the resolver.","","SELECT set_dns_cache_size(8192);"
8 | "corey","table","Queries all TXT records for a hostname and returns them as a table with one row per TXT record.","","SELECT * FROM corey('lastweekinaws.com');"


--------------------------------------------------------------------------------
/extensions/fit/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: fit
 3 |   description: Read Garmin .fit files using DuckDB - GPS tracks, heart rate, power metrics, and fitness device data
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers: 
 9 |     - antoriche
10 | 
11 | repo:
12 |   github: antoriche/duckdb-fit-extension
13 |   ref: 157a5762bba68e84e9affa4af17af45ef15f4be3
14 | 
15 | docs:
16 |   hello_world:  |
17 |     -- Read GPS records from a FIT file
18 |     SELECT * FROM fit_records('activity.fit') LIMIT 5;
19 |     
20 |     -- Get activity metadata
21 |     SELECT * FROM fit_activities('activity.fit');
22 |   extended_description: |
23 |     This extension allows you to read Garmin .fit files directly in DuckDB. 
24 |     
25 |     **Available table functions:**
26 |     - `fit_records(filename)` - Main records with GPS tracks and sensor data
27 |     - `fit_activities(filename)` - Activity metadata and summaries
28 |     - `fit_sessions(filename)` - Training session information
29 |     - `fit_laps(filename)` - Individual lap data and splits
30 |     - `fit_devices(filename)` - Device information and sensor details
31 |     - `fit_events(filename)` - Activity events and markers
32 |     - `fit_users(filename)` - User profile information
33 | 


--------------------------------------------------------------------------------
/extensions/duckherder/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: duckherder
 3 |   description: Run duckdb query on remote server
 4 |   version: 0.0.5
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;windows_amd64_mingw"
 9 |   requires_toolchains: parser_tools
10 |   maintainers:
11 |     - dentiny
12 | 
13 | repo:
14 |   github: dentiny/duckdb-distributed-execution
15 |   ref: 02f60de8d45c9d3547b1795d4bc7d0b5c1bcf14f
16 | 
17 | docs:
18 |   hello_world: |
19 |     SELECT duckherder_start_local_server(8815); ATTACH DATABASE 'dh' (TYPE duckherder, server_host 'localhost', server_port 8815);
20 |   extended_description: |
21 |     This extension is built on storage extension, to execute query on remote server and transfer data back with arrow flight.
22 |     For remote and distributed execution, there're two key components: driver and worker(s).
23 |     As their names suggest, driver node is the control plane, and worker nodes do the real execution tasks.
24 |     Users are allowed to implement their own driver and worker(s), and register to the duckdb client-side, as long they speaks duckherder dialect (i.e., grpc stubs and arrow flight).
25 |     From users' perspective, all DML and DDL SQL statements should be used exactly the same as local duckdb.
26 | 


--------------------------------------------------------------------------------
/extensions/gcs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: gcs
 3 |   description: DuckDB GCS Extension
 4 |   extended_description: A native GCS extension with support for standard Google auth methods
 5 |   version: 0.0.3
 6 |   language: C++
 7 |   build: cmake
 8 |   license: MIT
 9 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw;windows_amd64"
10 |   maintainers:
11 |     - northpolesec
12 | 
13 | repo:
14 |   github: northpolesec/duckdb-gcs
15 |   ref: 5465cad377f83637f8233b9756062b98352e3cdd
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Add auth credentials using Application Default Creds
20 |     D CREATE SECRET secret (TYPE gcp, PROVIDER credential_chain);
21 | 
22 |     -- Read a file from GCS
23 |     D SELECT * FROM read_text('gcss://rah-public-gcs-testing/quack.txt');
24 |     ┌─────────────────────────────────────────┬─────────┬───────┬──────────────────────────┐
25 |     │                filename                 │ content │ size  │      last_modified       │
26 |     │                 varchar                 │ varchar │ int64 │ timestamp with time zone │
27 |     ├─────────────────────────────────────────┼─────────┼───────┼──────────────────────────┤
28 |     │ gcss://rah-public-gcs-testing/quack.txt │ 🦆      │   4   │ 2025-09-23 16:20:03-04   │
29 |     └─────────────────────────────────────────┴─────────┴───────┴──────────────────────────┘
30 | 
31 | 


--------------------------------------------------------------------------------
/extensions/netquack/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
 1 | function,description,comment,example
 2 | "extract_domain","Extracting the main domain from a URL","","SELECT extract_domain('a.example.com') as domain;"
 3 | "extract_host","Extracting the hostname from a URL","","SELECT extract_host('https://b.a.example.com/path/path') as host;"
 4 | "extract_path","Extracting the path from a URL","","SELECT extract_path('example.com/path/path/image.png') as path;"
 5 | "extract_query_string","Extracting the query string from a URL","","SELECT extract_query_string('example.com?key=value') as query;"
 6 | "extract_query_parameters","Extracting the query parameters from a URL","","SELECT * FROM extract_query_parameters('example.com?key=value&key2=value2');"
 7 | "extract_schema","Extracting the schema from a URL","","SELECT extract_schema('mailto:someone@example.com') as schema;"
 8 | "extract_subdomain","Extracting the subdomain from a URL","","SELECT extract_subdomain('test.example.com.ac') as dns_record;"
 9 | "extract_tld","Extracting the top-level domain from a URL","","SELECT extract_tld('a.example.com') as tld;"
10 | "ipcalc","Calculating IP information from a CIDR notation","","SELECT * FROM ipcalc('192.168.1.0/24');"
11 | "get_tranco_rank","Getting the Tranco rank of a domain","","SELECT get_tranco_rank('cloudflare.com') as rank;"
12 | "update_suffixes","Update public suffixes","","SELECT update_suffixes();"
13 | "update_tranco","Update tranco data","","SELECT update_tranco(true);"
14 | 


--------------------------------------------------------------------------------
/extensions/psql/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: psql
 3 |   description: Support for PSQL, a piped SQL dialect for DuckDB
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - ywelsch
10 | 
11 | repo:
12 |   github: ywelsch/duckdb-psql
13 |   ref: f0ee86e08020f4b7d7300da5be9c7a0f01c63dce
14 | 
15 | docs:
16 |   hello_world: |
17 |     from 'https://raw.githubusercontent.com/ywelsch/duckdb-psql/main/example/invoices.csv' |>
18 |     where invoice_date >= date '1970-01-16' |>
19 |     select
20 |       *, 
21 |       0.8 as transaction_fees,
22 |       total - transaction_fees as income |>
23 |     where income > 1 |>
24 |     select
25 |       customer_id, 
26 |       avg(total), 
27 |       sum(income) as sum_income, 
28 |       count() as ct
29 |       group by customer_id |>
30 |     order by sum_income desc |>
31 |     limit 10 |>
32 |     as invoices
33 |       join 'https://raw.githubusercontent.com/ywelsch/duckdb-psql/main/example/customers.csv'
34 |         as customers
35 |       on invoices.customer_id = customers.customer_id |>
36 |     select
37 |       customer_id,
38 |       last_name || ', ' || first_name as name,
39 |       sum_income,
40 |       version() as db_version;
41 |   extended_description: |
42 |     PSQL extends DuckDB's SQL with a pipe syntax to provide simple composable queries. It's a lightweight variant of piped languages such as PRQL and Kusto, yet leveraging the full power of DuckDB's SQL.
43 | 


--------------------------------------------------------------------------------
/extensions/prql/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: prql
 3 |   description: Support for PRQL, the Pipelined Relational Query Language
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   requires_toolchains: "rust"
 8 |   license: MIT
 9 |   excluded_platforms: "linux_amd64_musl"
10 |   maintainers:
11 |     - ywelsch
12 | 
13 | repo:
14 |   github: ywelsch/duckdb-prql
15 |   ref: 0b411575bb454e96cd5bd8aa97ba1d73ed689a34
16 | 
17 | docs:
18 |   hello_world: |
19 |     let invoices = s"select * from 'https://raw.githubusercontent.com/PRQL/prql/0.8.0/prql-compiler/tests/integration/data/chinook/invoices.csv'"
20 |     let customers = s"select * from 'https://raw.githubusercontent.com/PRQL/prql/0.8.0/prql-compiler/tests/integration/data/chinook/customers.csv'"
21 |     from invoices
22 |     filter invoice_date >= @1970-01-16
23 |     derive {
24 |       transaction_fees = 0.8,
25 |       income = total - transaction_fees
26 |     }
27 |     filter income > 1
28 |     group customer_id (
29 |       aggregate {
30 |         average total,
31 |         sum_income = sum income,
32 |         ct = count total,
33 |       }
34 |     )
35 |     sort {-sum_income}
36 |     take 10
37 |     join c=customers (==customer_id)
38 |     derive name = f"{c.last_name}, {c.first_name}"
39 |     select {
40 |       c.customer_id, name, sum_income
41 |     }
42 |     derive db_version = s"version()"
43 |   extended_description: |
44 |     The PRQL extension adds support for the [Pipelined Relational Query Language](https://prql-lang.org).
45 | 


--------------------------------------------------------------------------------
/extensions/scrooge/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: scrooge
 3 |   description: Provides functionality for financial data-analysis, including data scanners for the Ethereum Blockchain and Yahoo Finance
 4 |   version: 0.0.2
 5 |   language: C++
 6 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw"
 7 |   build: cmake
 8 |   license: MIT
 9 |   maintainers:
10 |     - pdet
11 | 
12 | repo:
13 |   github: pdet/Scrooge-McDuck
14 |   ref: 222e094570f307208258b8faf90dd401fa152acd
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Set the RPC Provider
19 |     SET eth_node_url = 'https://mempool.merkle.io/rpc/eth/pk_mbs_0b647b195065b3294a5254838a33d062';
20 |     -- Query Transfer events of USDT from blocks 20034078 - 20034100 while parallelizing on one block per thread
21 |     FROM read_eth(
22 |         'USDT',
23 |         'Transfer',
24 |         20034078,
25 |         20034100, 
26 |         blocks_per_thread = 1
27 |     );
28 |   extended_description: |
29 |     Scrooge McDuck is a third-party financial extension for DuckDB. 
30 |     This extension's main goal is to support a set of aggregation functions and data scanners for financial data. 
31 |     It currently supports access to the logs of Ethereum nodes and stock information from Yahoo Finance.
32 |     More information on the supported scanners and functions can be found on Scrooge's [wiki page](https://github.com/pdet/Scrooge-McDuck/wiki).
33 |     You can also find a ROI example of Ether on the [following blogpost](https://pdet-blog.github.io/2024/06/30/ethereum.html)
34 | 


--------------------------------------------------------------------------------
/extensions/cache_httpfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: cache_httpfs
 3 |   description: Read cached filesystem for httpfs
 4 |   version: 0.11.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;windows_amd64_mingw"
 9 |   maintainers:
10 |     - dentiny
11 |     - DouEnergy
12 | 
13 | repo:
14 |   github: dentiny/duck-read-cache-fs
15 |   ref: 9c7a70963341fcc57091e5cdcead0b5f9a2d6aea
16 | 
17 | docs:
18 |   hello_world: |
19 |     SELECT cache_httpfs_get_ondisk_data_cache_size();
20 |   extended_description: |
21 |     This extension adds a read cache filesystem to DuckDB, which acts as a wrapper of httpfs extention. 
22 |     It supports a few key features:
23 |     - Supports both file metadata, glob, file handle and data block cache
24 |     - Supports both on-disk cache and in-memory cache for data blocks, with cache mode, block size, cache directories tunable
25 |     - Supports disk cache file eviction based on access timestamp or LRU, allows tunable disk space reservation
26 |     - Supports parallel IO request, with request size and parallelism tunable
27 |     - Supports optional cache entries validation, by default off
28 |     - Supports profiling for IO latency and cache hit / miss ratio for a few operations (i.e open, read, glob), which provides an insight on workload characterization
29 |     - Exposes function to get cache size and cleanup cache
30 |     - Provides an option to disable / enable cache, which could act as a drop-in replacement for httpfs
31 | 


--------------------------------------------------------------------------------
/extensions/onelake/docs/functions_description.csv:
--------------------------------------------------------------------------------
1 | "function"            , "description"                                                                , "comment", "example"
2 | "onelake_attach"      , "Attach to a OneLake workspace and lakehouse."                               , ""       , "ATTACH 'workspace/lakehouse.Lakehouse' AS my_lakehouse (TYPE ONELAKE);"
3 | "onelake_scan"        , "Scan a Delta table from the attached lakehouse."                            , ""       , "SELECT * FROM my_lakehouse.my_schema.my_table;"
4 | "onelake_iceberg_scan", "Scan an Iceberg table from the attached lakehouse using the ICEBERG syntax.", ""       , "SELECT * FROM my_lakehouse.my_schema.my_table USING ICEBERG;"
5 | "onelake_secret"      , "Create a secret for OneLake authentication."                                , ""       , "CREATE SECRET my_secret (TYPE ONELAKE, TENANT_ID '...', CLIENT_ID '...', CLIENT_SECRET '...');"
6 | "onelake_secret_chain", "Create a secret using the credential chain provider."                       , ""       , "CREATE SECRET my_chain (TYPE ONELAKE, PROVIDER credential_chain, CHAIN 'env');"
7 | "onelake_write"       , "Insert data into a OneLake table."                                          , ""       , "INSERT INTO my_lakehouse.my_schema.my_table SELECT * FROM source_table;"
8 | "onelake_create_table", "Create a new Delta table in OneLake."                                       , ""       , "CREATE TABLE my_lakehouse.my_schema.new_table (id INT) PARTITION BY (id);"
9 | "onelake_drop_table"  , "Drop a table from OneLake."                                                 , ""       , "DROP TABLE my_lakehouse.my_schema.my_table;"


--------------------------------------------------------------------------------
/extensions/magic/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: magic
 3 |   description: libmagic/file utilities ported to DuckDB
 4 |   version: 0.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   excluded_platforms: "linux_amd64_musl;windows_amd64_rtools;windows_amd64_mingw;windows_amd64"
 8 |   license: MIT
 9 |   maintainers:
10 |     - carlopi
11 | 
12 | repo:
13 |   github: carlopi/duckdb_magic
14 |   ref: 6a214b48b6dc760e398c73131e00ee62f2c5f1bc
15 | 
16 | docs:
17 |   hello_world: |
18 |     --- Discover autodetected types for files in a local folder
19 |     SELECT magic_mime(file), magic_type(file), file
20 |         FROM glob('path/to/folder/**');
21 |     
22 |     --- Discover autodetected types for a remote file
23 |     LOAD httpfs;  --- this needs to currently be explcit once per session
24 |     SELECT magic_mime(file), magic_type(file), file
25 |         FROM glob('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/adam_genotypes.parquet');
26 |     
27 |     --- Read file without providing detail on type
28 |     FROM read_any('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/adam_genotypes.parquet');
29 |   extended_description: |
30 |     Very experimental port of libmagic (that powers file UNIX utility), allow to classify files based on the content of the header, accoring to the libmagic library.
31 |     Packaged with version 5.45 of the magic library. The magic.mgc database is at the moment statically compiled in the library, so it's the same across platforms but immutable.
32 |     Currently not available in Windows and Wasm, due to different but likely solvable vc-packaging issue, to be sorted out independently.
33 | 


--------------------------------------------------------------------------------
/extensions/nanoarrow/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: nanoarrow
 3 |   description: Allows the consumption and production of the Apache Arrow interprocess communication (IPC) format, both from files and directly from stream buffers.
 4 |   version: 1.4.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - paleolimbot 
10 |     - pdet
11 |     - evertlammerts
12 | repo:
13 |   github: paleolimbot/duckdb-nanoarrow
14 |   ref: 81ec79bc35090c65f6a7c9066bd373b5dd3b7e73
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Read from a file in Arrow IPC format
19 |     FROM 'arrow_file.arrow';
20 |     FROM 'arrow_file.arrows';
21 |     FROM read_arrow('arrow_file.arrow');
22 | 
23 |     -- Write a file in Arrow IPC stream format
24 |     CREATE TABLE arrow_libraries AS SELECT 'nanoarrow' as name, '0.6' as version;
25 |     COPY arrow_libraries TO 'test.arrows' (FORMAT ARROWS, BATCH_SIZE 100);
26 | 
27 |     -- Write to buffers: This returns IPC message BLOBs and indicates which one is the header.
28 |     FROM to_arrow_ipc((FROM arrow_libraries));
29 | 
30 | 
31 |   extended_description: |
32 |     The Arrow IPC library allows users to read and write data in the Arrow IPC stream format. 
33 |     This can be done by either reading and producing `.arrow` files or by directly reading buffers using their pointers and sizes. 
34 |     It is important to note that reading buffers is dangerous, as an incorrect pointer can crash the database system. 
35 |     This process is temporary and will be deprecated in the future, as clients (e.g., the Python DuckDB client) will have a function that internally extracts these buffers from an Arrow stream.
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/.github/workflows/_extension_archive.yml:
--------------------------------------------------------------------------------
 1 | #
 2 | # Reusable workflow that deploys the extensions source code
 3 | 
 4 | name: Extension Archieve Sources
 5 | on:
 6 |   workflow_call:
 7 |     inputs:
 8 |       # The name of the extension
 9 |       extension_name:
10 |         required: true
11 |         type: string
12 |       repository:
13 |         required: false
14 |         type: string
15 |         default: ""
16 |       ref:
17 |         required: false
18 |         type: string
19 |         default: ""
20 | jobs:
21 |   fetch_repo:
22 |     name: Fetch repo and save
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - uses: actions/checkout@v3
26 |         with:
27 |           repository: ${{ inputs.repository }}
28 |           ref: ${{ inputs.ref }}
29 |           fetch-depth: 0
30 |           submodules: 'true'
31 |           path: ./repo
32 | 
33 |       - name: Compress the repository sources
34 |         run: |
35 |           rm -rf repo/duckdb
36 |           zip -9 -r compressed.zip repo
37 | 
38 |       - name: Deploy the submodule
39 |         shell: bash
40 |         env:
41 |           AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_COMMUNITY_EXTENSION_S3_ID }}
42 |           AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_COMMUNITY_EXTENSION_S3_SECRET }}
43 |           AWS_ENDPOINT_URL: ${{ secrets.DUCKDB_COMMUNITY_EXTENSION_S3_ENDPOINT }}
44 |           BUCKET_NAME: duckdb-community-extensions
45 |           PIP_BREAK_SYSTEM_PACKAGES: 1
46 |         run: |
47 |           python3 -m pip install awscli
48 |           if [ "${AWS_SECRET_ACCESS_KEY}" ]; then
49 |              aws s3 cp compressed.zip s3://${BUCKET_NAME}/archived_extension_sources/${{ inputs.extension_name }}/repo_sources_${{ inputs.ref }}.zip
50 |           fi
51 | 


--------------------------------------------------------------------------------
/extensions/jwt/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: jwt
 3 |   description: Decode and work with JWT (JSON Web Token) in SQL queries
 4 |   version: 0.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - GalvinGao
10 | 
11 | repo:
12 |   github: GalvinGao/duckdb_jwt
13 |   ref: 9511967148dd532b45e4088182501187559f1829
14 | 
15 | docs:
16 |   decode_jwt: |
17 |     -- Decode a JWT token payload
18 |     SELECT jwt_decode_payload('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c') AS payload;
19 |   
20 |   extract_claim: |
21 |     -- Extract the 'sub' claim from a JWT token
22 |     SELECT 
23 |       json_extract(
24 |         jwt_decode_payload('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c'),
25 |         '$.sub'
26 |       ) AS subject;
27 |   
28 |   extended_description: |
29 |     The JWT extension provides functionality to decode and work with JWT (JSON Web Token) directly in DuckDB SQL queries. 
30 |     
31 |     ## Features
32 |     
33 |     - `jwt_decode_payload(token)`: Decodes the payload part of a JWT token and returns it as a JSON string
34 |     - Base64 URL-safe decoding for JWT token components
35 |     
36 |     ## Use Cases
37 |     
38 |     - Analyzing JWT tokens in your data
39 |     - Extracting claims from authentication tokens
40 |     - Debugging JWT-based authentication systems
41 |     - Working with JWT tokens in data pipelines
42 |     
43 |     You can combine the JWT functions with DuckDB's built-in JSON functionality to extract specific claims from tokens. 


--------------------------------------------------------------------------------
/extensions/arrow/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: arrow
 3 |   description: This extension is an alias to the nanoarrow extension. Allows the consumption and production of the Apache Arrow interprocess communication (IPC) format, both from files and directly from stream buffers.
 4 |   version: 1.2.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   requires_toolchains: "python3" 
 9 |   maintainers:
10 |     - pdet
11 | repo:
12 |   github: duckdb/duckdb-extension-alias
13 |   ref: 7e7a9c0dd16ad46b3c2da4e96dedbfbce4aa67ae
14 |   canonical_name: nanoarrow
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Read from a file in Arrow IPC format
19 |     FROM 'arrow_file.arrow';
20 |     FROM 'arrow_file.arrows';
21 |     FROM read_arrow('arrow_file.arrow');
22 | 
23 |     -- Write a file in Arrow IPC stream format
24 |     CREATE TABLE arrow_libraries AS SELECT 'nanoarrow' as name, '0.6' as version;
25 |     COPY arrow_libraries TO 'test.arrows' (FORMAT ARROWS, BATCH_SIZE 100);
26 | 
27 |     -- Write to buffers: This returns IPC message BLOBs and indicates which one is the header.
28 |     FROM to_arrow_ipc((FROM arrow_libraries));
29 | 
30 | 
31 |   extended_description: |
32 |     The Arrow IPC library allows users to read and write data in the Arrow IPC stream format. 
33 |     This can be done by either reading and producing `.arrow` files or by directly reading buffers using their pointers and sizes. 
34 |     It is important to note that reading buffers is dangerous, as an incorrect pointer can crash the database system. 
35 |     This process is temporary and will be deprecated in the future, as clients (e.g., the Python DuckDB client) will have a function that internally extracts these buffers from an Arrow stream.
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/.github/workflows/build_all.yml:
--------------------------------------------------------------------------------
 1 | name: Build all extensions
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       duckdb_version:
 6 |         type: string
 7 |       duckdb_tag:
 8 |         type: string
 9 |       deploy:
10 |         type: string
11 |       build_extensions:
12 |         description: "List of extensions to build, formatted as: `['ext1', 'ext2']`"
13 |         type: string
14 |         required: false
15 |         default: ''
16 | 
17 | 
18 | # #
19 | # WARNING: BUILDING ALL EXTENSIONS AT ONCE IS NOT RECOMMENDED, USE `scripts/create_build_all_invocation.py` TO SPLIT THE BUILD INTO PARTS
20 | # #
21 | 
22 | jobs:
23 |   collect_extensions:
24 |     outputs:
25 |       COMMUNITY_EXTENSION_LIST: ${{ steps.generate_list.outputs.EXTENSION_LIST }}
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |     - uses: actions/checkout@v4
29 |       with:
30 |         fetch-depth: 0
31 | 
32 |     - name: Generate extension list
33 |       id: generate_list
34 |       run: |
35 |         if [[ "${{inputs.build_extensions}}" == "" ]]; then
36 |           ./scripts/get_extension_list.sh
37 |           cat extension_list
38 |           cat extension_list >> $GITHUB_OUTPUT
39 |         else
40 |           echo "EXTENSION_LIST=${{inputs.build_extensions}}" >> $GITHUB_OUTPUT
41 |         fi
42 | 
43 |   build_all:
44 |     needs:
45 |       - collect_extensions
46 |     strategy:
47 |       fail-fast: false
48 |       matrix:
49 |         extension_name: ${{ fromJson(needs.collect_extensions.outputs.COMMUNITY_EXTENSION_LIST) }}
50 |     uses: ./.github/workflows/build.yml 
51 |     secrets: inherit
52 |     with:
53 |       extension_name: ${{ matrix.extension_name }}
54 |       duckdb_version: ${{ inputs.duckdb_version }}
55 |       duckdb_tag: ${{ inputs.duckdb_tag }}
56 |       deploy: ${{ inputs.deploy }}
57 | 


--------------------------------------------------------------------------------
/extensions/sshfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: sshfs
 3 |   description: Allows reading and writing files over SSH
 4 |   version: 1.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;windows_amd64;wasm_threads;wasm_eh;wasm_mvp;linux_amd64_musl;"
 9 |   requires_toolchains: "vcpkg"
10 |   maintainers:
11 |     - onnimonni
12 |   vcpkg_commit: 'dd3097e305afa53f7b4312371f62058d2e665320'
13 | repo:
14 |   github: midwork-finds-jobs/duckdb-sshfs
15 |   ref: d1884ceb1e3912d8d636998156a5016c3abe87be
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Install & load the extension
20 |     INSTALL sshfs FROM community;
21 |     LOAD sshfs;
22 | 
23 |     -- Authenticate with SSH key file
24 |     CREATE SECRET my_hetzner_storagebox (
25 |         TYPE SSH,
26 |         USERNAME 'user',
27 |         KEY_PATH '/Users/' || getenv('USER') || '/.ssh/storagebox_key',
28 |         PORT 23
29 |         SCOPE 'sshfs://u123456.your-storagebox.de'
30 |     );
31 | 
32 |     -- or with password
33 |     CREATE SECRET my_server (
34 |       TYPE SSH,
35 |       USERNAME 'user',
36 |       PASSWORD 'password',
37 |       SCOPE 'sshfs://your-server.example.com'
38 |     );
39 | 
40 |     -- Write data to remote server
41 |     COPY (SELECT * FROM large_table)
42 |     TO 'sshfs://your-server.example.com/data.parquet';
43 |     
44 |     -- Read The uploaded parquet file using WebDAV
45 |     SELECT * FROM 'sshfs://your-server.example.com/data.parquet';
46 | 
47 |   extended_description: |
48 |     DuckDB sshfs extension enables seamless integration with servers through ssh, allowing users to read from and write to remote file systems directly within DuckDB.
49 | 
50 |     See: https://github.com/midwork-finds-jobs/duckdb-sshfs/blob/main/README.md for more examples and details.
51 | 


--------------------------------------------------------------------------------
/extensions/pyroscope/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: pyroscope
 3 |   description: DuckDB Pyroscope Extension for Continuous Profiling
 4 |   version: 0.1.3
 5 |   language: Rust
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;windows_amd64;wasm_threads;wasm_eh;wasm_mvp;linux_amd64_musl;"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - lmangani
12 |     - akvlad
13 | 
14 | repo:
15 |   github: quackscience/duckdb-extension-pyroscope
16 |   ref: e72cf4cdbfbe79476f55adb2a98aec2792d72f68
17 | 
18 | docs:
19 |   hello_world: |
20 |     ---- Start the tracer, requires backend URL
21 |     D SELECT * FROM trace_start('https://pyroscope:4000');
22 |     
23 |     ---- Stop the tracer
24 |     D SELECT * FROM trace_stop();
25 |     
26 |   extended_description: |
27 |     ### Pyroscope Continuous Profiling
28 |     This experimental community extension adds pyroscope continuous profiling features to DuckDB
29 | 
30 |     #### Grafana
31 |     Create a `Free` account on [Grafana Cloud](https://grafana.com/auth/sign-up/create-user?pg=prod-cloud&plcmt=hero-btn-1) create a Token for Pyroscope profile sending and use the extension:
32 |     ```sql
33 |     ---- Start the tracer to Grafana Cloud Pyroscope
34 |     D SELECT * FROM trace_start('https://user:token@profiles-prod-xxx.grafana.net');
35 |     ```
36 | 
37 |     #### Gigapipe
38 |     Create a `Free` account on [Gigapipe](https://gigapipe.com) create a Token for Pyroscope profile sending and use the extension:
39 |     ```sql
40 |     ---- Start the tracer to Grafana Cloud Pyroscope
41 |     D SELECT * FROM trace_start('https://user:token@your-account.gigapipe.com');
42 |     ```
43 |         
44 |     ![pyroscope_duckdb_large](https://github.com/user-attachments/assets/74fad3ec-3bc3-4880-be4b-8149c5431115)
45 | 
46 | 


--------------------------------------------------------------------------------
/.github/workflows/generate_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Community Extension Generate Docs
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       extension_name:
 6 |         type: string
 7 |       duckdb_version:
 8 |         type: string
 9 |   workflow_call:
10 |     inputs:
11 |       extension_name:
12 |         type: string
13 |       duckdb_version:
14 |         type: string
15 | 
16 | env:
17 |   GH_TOKEN: ${{ github.token }}
18 | 
19 | jobs:
20 |   docs:
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |     - uses: actions/checkout@v4
24 |       with:
25 |         repository: 'duckdb/community-extensions'
26 | 
27 |     - name: Set up DuckDB
28 |       run: |
29 |         wget https://github.com/duckdb/duckdb/releases/download/v1.4.3/duckdb_cli-linux-amd64.zip
30 |         unzip duckdb_cli-linux-amd64.zip
31 |         chmod +x duckdb
32 | 
33 |     - name: Fetch extensions
34 |       if: ${{ inputs.extension_name == '' }}
35 |       run: |
36 |         ./scripts/fetch_extensions.sh ./duckdb
37 | 
38 |     - uses: actions/download-artifact@v4
39 |       if: ${{ inputs.extension_name != '' }}
40 |       with:
41 |         name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version}}-extension-linux_amd64
42 |         path: build/downloaded
43 | 
44 |     - name: Install downloaded extension
45 |       if: ${{ inputs.extension_name != '' }}
46 |       env:
47 |         NAME: ${{ inputs.extension_name }} 
48 |       run: |
49 |         ./duckdb -c "SET extension_directory = 'build/extension_dir'; FORCE INSTALL 'build/downloaded/$NAME.duckdb_extension';"
50 | 
51 |     - name: Generate docs
52 |       run: |
53 |         ./scripts/generate_md.sh ./duckdb
54 |         zip -r build/generated_md.zip build/docs
55 | 
56 |     - uses: actions/upload-artifact@v4
57 |       with:
58 |         name: generated_markdowns
59 |         path: |
60 |           build/generated_md.zip
61 | 


--------------------------------------------------------------------------------
/extensions/nats_js/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: nats_js
 3 |   description: Query NATS JetStream message streams directly with SQL
 4 |   version: 0.1.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "linux_amd64_musl;windows_amd64_mingw"
 9 |   maintainers:
10 |     - brannn
11 | 
12 | repo:
13 |   github: brannn/duckdb-nats-jetstream
14 |   ref: v0.1.1
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Query messages from a NATS JetStream stream
19 |     SELECT subject, seq, ts_nats, payload
20 |     FROM nats_scan('telemetry')
21 |     WHERE seq BETWEEN 1 AND 100;
22 | 
23 |     -- Extract JSON fields with dot notation
24 |     SELECT device_id, zone, kw
25 |     FROM nats_scan('telemetry',
26 |         json_extract := ['device_id', 'zone', 'kw']
27 |     );
28 | 
29 |     -- Extract Protocol Buffers fields with nested navigation
30 |     SELECT device_id, location_zone, metrics_kw
31 |     FROM nats_scan('telemetry',
32 |         proto_file := 'telemetry.proto',
33 |         proto_message := 'Telemetry',
34 |         proto_extract := ['device_id', 'location.zone', 'metrics.kw']
35 |     );
36 | 
37 |   extended_description: |
38 |     The NATS JetStream extension enables direct SQL querying of NATS JetStream message streams without establishing durable consumers.
39 |     
40 |     Features:
41 |     - Sequence-based range queries (start_seq, end_seq)
42 |     - Timestamp-based range queries with binary search (start_time, end_time)
43 |     - Subject filtering for targeted message retrieval
44 |     - JSON payload extraction with dot notation for nested fields
45 |     - Protocol Buffers support with runtime schema parsing and nested message navigation
46 |     
47 |     Perfect for ETL workflows, analytics, and ad-hoc querying of message streams.
48 |     
49 |     GitHub: https://github.com/brannn/duckdb-nats-jetstream
50 | 
51 | 


--------------------------------------------------------------------------------
/scripts/create_build_all_invocation.py:
--------------------------------------------------------------------------------
 1 | # WHAT'S THIS?
 2 | #
 3 | #   > A script to split up the workflow invocation in more manageable chunks to avoid overloading the GitHub UIs
 4 | #
 5 | # HOW TO USE
 6 | #
 7 | #   > python3 scripts/create_build_all_invocation.py --batch_size=5 --duckdb_tag=v1.4.1 --duckdb_version=b390a7c3760bd95926fe8aefde20d04b349b472e
 8 | #
 9 | 
10 | import argparse
11 | import json
12 | import subprocess
13 | from math import ceil
14 | import glob
15 | import yaml
16 | 
17 | 
18 | def read_extension_list():
19 |     extensions = []
20 |     for path in glob.glob('./extensions/*/description.yml'):
21 |         with open(path) as f:
22 |             desc = yaml.safe_load(f)
23 |             extensions.append(desc['extension']['name'])
24 |     return extensions
25 | 
26 | 
27 | def split_into_batches(lst, batch_size):
28 |     return [lst[i:i + batch_size] for i in range(0, len(lst), batch_size)]
29 | 
30 | 
31 | def main():
32 |     parser = argparse.ArgumentParser()
33 |     parser.add_argument('--duckdb_version', type=str, help='DuckDB version hash', required=True)
34 |     parser.add_argument('--duckdb_tag', type=str, help='DuckDB version tag', required=True)
35 | 
36 |     # Number of extensions to build per workflow invocation
37 |     parser.add_argument('--batch_size', type=int, help='Number of extensions per invocation', required=False, default=10)
38 | 
39 |     args = parser.parse_args()
40 | 
41 |     extensions = read_extension_list()
42 |     batches = split_into_batches(extensions, args.batch_size)
43 | 
44 |     for batch in batches:
45 |         json_str = json.dumps(batch).replace('"', "'")
46 |         cmd = f'gh workflow run build_all.yml -f build_extensions=\"{json_str}\" -f duckdb_version={args.duckdb_version} -f duckdb_tag={args.duckdb_tag} -f deploy=true'
47 |         print(cmd)
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/extensions/miniplot/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: miniplot
 3 |   description: Interactive chart visualization for DuckDB - faster than pandas with Plotly-like charts
 4 |   version: 0.0.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - nkwork9999
10 | 
11 | repo:
12 |   github: nkwork9999/miniplot
13 |   ref: 32fc16fb4beec1fe5c3b1a705a00426e8dc3863a
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Bar chart (opens in browser)
18 |     SELECT bar_chart(
19 |         ['Q1', 'Q2', 'Q3', 'Q4'],
20 |         [100.0, 150.0, 200.0, 180.0],
21 |         'Quarterly Sales'
22 |     );
23 | 
24 |     -- Line chart
25 |     SELECT line_chart(
26 |         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri'],
27 |         [5000.0, 6500.0, 4800.0, 7200.0, 8500.0],
28 |         'Weekly Revenue'
29 |     );
30 | 
31 |     -- Scatter chart
32 |     SELECT scatter_chart(
33 |         [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
34 |         [10.0, 25.0, 30.0, 45.0, 60.0, 75.0, 85.0, 95.0],
35 |         'Performance vs Time'
36 |     );
37 | 
38 |     -- Area chart
39 |     SELECT area_chart(
40 |         ['Jan', 'Feb', 'Mar', 'Apr', 'May'],
41 |         [1000.0, 1500.0, 1200.0, 1800.0, 2100.0],
42 |         'Monthly Growth'
43 |     );
44 | 
45 |     -- Save to file without opening browser
46 |     SELECT bar_chart(
47 |         ['A', 'B', 'C'],
48 |         [10.0, 20.0, 15.0],
49 |         'Sample Chart',
50 |         'output.html'
51 |     );
52 | 
53 |     -- 3D Scatter chart
54 |     SELECT scatter_3d_chart(
55 |         [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
56 |         [2.0, 4.0, 1.0, 5.0, 3.0, 6.0],
57 |         [3.0, 1.0, 4.0, 2.0, 5.0, 3.5],
58 |         '3D Data Visualization'
59 |     );
60 | 
61 |     -- Visualize CSV data
62 |     SELECT line_chart(
63 |         list(month),
64 |         list(sales),
65 |         'Monthly Sales from CSV'
66 |     ) FROM read_csv('sales.csv');
67 | 


--------------------------------------------------------------------------------
/extensions/bvh2sql/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: bvh2sql
 3 |   description: BVH (BioVision Hierarchy) motion capture file parser for DuckDB
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - nkwork9999
10 | 
11 | repo:
12 |   github: nkwork9999/bvh2sql
13 |   ref: 62e96ba71a1bc57175a18827a2ad807bff9bfe5f
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Load the extension
18 |     LOAD bvh2sql;
19 | 
20 |     -- Read BVH file and get absolute joint positions
21 |     SELECT frame_id, time, joint_name, world_x, world_y, world_z
22 |     FROM bvh_absolute_positions('motion.bvh')
23 |     WHERE joint_name = 'Hips'
24 |     LIMIT 10;
25 | 
26 |     -- Analyze joint movement over time
27 |     SELECT 
28 |       joint_name,
29 |       AVG(world_y) as avg_height,
30 |       MAX(world_y) - MIN(world_y) as height_range
31 |     FROM bvh_absolute_positions('motion.bvh')
32 |     GROUP BY joint_name
33 |     ORDER BY avg_height DESC;
34 | 
35 |   extended_description: |
36 |     The BVH2SQL extension allows you to read BioVision Hierarchy (BVH) motion capture files
37 |     directly into DuckDB for analysis and processing.
38 | 
39 |     **Features:**
40 |     - Parse BVH hierarchy structure with joint offsets
41 |     - Calculate absolute world positions for all joints using matrix transformations
42 |     - Support for position channels (Xposition, Yposition, Zposition)
43 |     - Support for rotation channels (Xrotation, Yrotation, Zrotation)
44 |     - Efficient frame-by-frame processing
45 |     - Returns data as a standard SQL table with columns: frame_id, time, joint_name, world_x, world_y, world_z, rot_x, rot_y, rot_z
46 | 
47 |     **Use Cases:**
48 |     - Motion capture data analysis
49 |     - Animation retargeting preparation
50 |     - Character movement statistics
51 |     - Biomechanics research
52 |     - Game development data pipeline
53 | 


--------------------------------------------------------------------------------
/extensions/gaggle/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: gaggle
 3 |   description: A DuckDB extension for working with Kaggle datasets
 4 |   version: 0.1.0-alpha.4
 5 |   language: Rust & C++
 6 |   build: cmake
 7 |   license: MIT OR Apache-2.0
 8 |   maintainers:
 9 |     - habedi
10 |   excluded_platforms: "windows_amd64_mingw;osx_amd64;wasm_mvp;wasm_eh;wasm_threads"
11 |   requires_toolchains: rust
12 | 
13 | repo:
14 |   github: CogitatorTech/gaggle
15 |   ref: 0ed1f71b40d9cc245b8ff6aceeb5da5df33396e7
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- 0. Assuming the extension is already installed and loaded
20 |     
21 |     -- 1. Get extension version
22 |     SELECT gaggle_version();
23 |   
24 |     -- 2. List files in the dataset
25 |     SELECT * FROM gaggle_ls('habedi/flickr-8k-dataset-clean', true) LIMIT 5;
26 |     
27 |     -- 3. Read a Parquet file FROM local cache using a prepared statement
28 |     PREPARE rp as SELECT * FROM read_parquet(?) LIMIT 10;
29 |     EXECUTE rp(gaggle_file_path('habedi/flickr-8k-dataset-clean', 'flickr8k.parquet'));
30 |     
31 |     -- 4. Alternatively, we can use a replacement scan to read directly via `kaggle:` prefix
32 |     SELECT COUNT(*) FROM 'kaggle:habedi/flickr-8k-dataset-clean/flickr8k.parquet';
33 |     
34 |     -- 5. Check cache info
35 |     SELECT gaggle_cache_info();
36 |     
37 |     -- 6. Check if cached dataset is current (is the newest version?)
38 |     SELECT gaggle_is_current('habedi/flickr-8k-dataset-clean');
39 | 
40 |   extended_description: |
41 |     Gaggle is a DuckDB extension that uses the Kaggle API to let you query Kaggle datasets directly with SQL.
42 |     It aims to simplify the data science workflows by hiding the complexity of manually downloading, extracting, and managing dataset files from Kaggle.
43 |     
44 |     For more information, like API references and usage examples, visit the project's [GitHub repository](https://github.com/CogitatorTech/gaggle).
45 | 


--------------------------------------------------------------------------------
/extensions/infera/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: infera
 3 |   description: A DuckDB extension for in-database inference
 4 |   version: 0.2.0
 5 |   language: Rust & C++
 6 |   build: cmake
 7 |   license: MIT OR Apache-2.0
 8 |   maintainers:
 9 |     - habedi
10 |   excluded_platforms: "windows_amd64_mingw;osx_amd64;wasm_mvp;wasm_eh;wasm_threads"
11 |   requires_toolchains: rust
12 | 
13 | repo:
14 |   github: CogitatorTech/infera
15 |   ref: c17833bed6e5d7af9ce75fa4fc414431600a5514
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- 0. Assuming the extension is already installed and loaded
20 |     
21 |     -- 1. Load a simple linear model from a remote URL
22 |     SELECT infera_load_model('linear_model',
23 |     'https://github.com/CogitatorTech/infera/raw/refs/heads/main/test/models/linear.onnx');
24 |     
25 |     -- 2. Run a prediction using a very simple linear model
26 |     -- Model: y = 2*x1 - 1*x2 + 0.5*x3 + 0.25
27 |     SELECT infera_predict('linear_model', 1.0, 2.0, 3.0);
28 |     -- Expected output: 1.75
29 |     
30 |     -- 3. Unload the model when we're done with it
31 |     SELECT infera_unload_model('linear_model');
32 |     
33 |     -- 4. Check the Infera version
34 |     SELECT infera_get_version();
35 | 
36 |   extended_description: |
37 |     Infera extension allows users to use machine learning models directly in SQL queries to perform inference on data stored in DuckDB tables.
38 |     It is developed in Rust and uses [Tract](https://github.com/snipsco/tract) as the backend inference engine.
39 |     Infera supports loading and running models in [ONNX](https://onnx.ai/) format.
40 |     Check out the [ONNX Model Zoo](https://huggingface.co/onnxmodelzoo) repository on Hugging Face for a large collection of ready-to-use models that can be used with Infera.
41 |     
42 |     For more information, like API references and usage examples, visit the project's [GitHub repository](https://github.com/CogitatorTech/infera).
43 | 


--------------------------------------------------------------------------------
/extensions/quackfix/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: quackfix
 3 |   description: Allows the consumption FIX protocol format logs, from all supported duckdb filesystems.
 4 |   version: 0.0.2
 5 |   language: c++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - hyehudai
10 | 
11 | repo:
12 |   github: hyehudai/quackfix
13 |   ref: cddee855946b0eb70087792982b9c978f491d5e3
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Basic FIX parsing - from http location
18 |     D SELECT OrderID, ExecID, LastQty FROM read_fix('https://raw.githubusercontent.com/hyehudai/QuackFIX/refs/heads/main/testdata/sample.fix') WHERE MsgType = '8' ORDER BY OrderID;
19 |        ─────────┬──────────┬─────────┐
20 |       │ OrderID │  ExecID  │ LastQty │
21 |       │ varchar │ varchar  │ double  │
22 |       ├─────────┼──────────┼─────────┤
23 |       │ EXEC001 │ TRADE001 │   100.0 │
24 |       │ EXEC002 │ TRADE002 │    50.0 │
25 |       │ EXEC003 │ TRADE003 │   200.0 │
26 |       └─────────┴──────────┴─────────┘
27 | 
28 |   extended_description: |
29 |     
30 |       QuackFIX is a DuckDB extension that lets you query FIX logs directly with SQL. It parses raw FIX messages into a structured, queryable format, making FIX log analysis faster and more intuitive for trading, compliance, and financial operations.
31 | 
32 |       
33 |       **Native DuckDB Integration** 
34 |       Query FIX logs directly in DuckDB—no pre-parsing, no pandas round-trips.
35 |       No Glue code!
36 | 
37 |       **Dialect-Aware** 
38 |       Supports custom FIX dialects via XML dictionaries, so venue-specific tags just work.
39 | 
40 |       **Fast and Scalable** 
41 |       Built on DuckDB’s in-memory, columnar engine to efficiently handle large log volumes.
42 |       
43 |       **Support multiple FIX dialects**
44 | 
45 |       > For examples and instructions check out [User Guide](https://github.com/hyehudai/QuackFIX/blob/main/USERGUIDE.md) 
46 |       
47 | 
48 |       
49 | 
50 | 


--------------------------------------------------------------------------------
/extensions/http_request/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: http_request
 3 |   description: HTTP client extension for DuckDB with GET/POST/PUT/PATCH/DELETE and byte-range requests
 4 |   version: 0.3.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_mingw"
 9 |   maintainers:
10 |     - onnimonni
11 | 
12 | repo:
13 |   github: midwork-finds-jobs/duckdb_http_request
14 |   ref: 5122074317dfcfaebd38987e1db64c78005c94b2
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Simple GET request
19 |     SELECT http_get('https://example.com/');
20 | 
21 |     -- Access response fields
22 |     SELECT
23 |         r.status,
24 |         r.content_type,
25 |         r.content_length,
26 |         r.cookies[1].name as first_cookie
27 |     FROM (SELECT http_get('https://example.com/') as r);
28 | 
29 |     -- GET with custom headers
30 |     SELECT http_get('https://httpbin.org/get', headers := {'Accept': 'application/json'}).body;
31 | 
32 |     -- Byte-range request for partial content
33 |     SELECT http_get(
34 |         'https://example.com/largefile.gz',
35 |         {'Range': byte_range(0, 1024)}
36 |     );
37 | 
38 |   extended_description: |
39 |     HTTP client extension providing scalar and table functions for making HTTP requests.
40 | 
41 |     Features:
42 |     - All HTTP methods: GET, HEAD, POST, PUT, PATCH, DELETE
43 |     - Parallel execution: requests within a chunk run concurrently
44 |     - Custom headers via STRUCT parameter
45 |     - Query parameters via STRUCT
46 |     - Byte-range requests with helper function
47 |     - Auto-decompression of gzip/zstd responses
48 |     - Form-encoded POST with http_post_form()
49 |     - Parsed Set-Cookie headers into structured cookies array
50 |     - Convenience fields: content_type, content_length
51 |     - Respects duckdb http and proxy settings
52 |     - Configurable concurrency via http_max_concurrency setting (default: 32)
53 | 
54 |     Uses DuckDB's built-in httplib for HTTP connections.
55 | 


--------------------------------------------------------------------------------
/extensions/wireduck/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: wireduck
 3 |   description: Read and dissect PCAP files from DuckDB
 4 |   version: 0.0.2
 5 |   language: c++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "osx_amd64;osx_arm64;windows_amd64;wasm"
 9 |   maintainers:
10 |     - hyehudai
11 | 
12 | repo:
13 |   github: hyehudai/wireduck
14 |   ref: 19c4018cc8ebad08547d621bcdc25df86294ca2b
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Basic PCAP reader for local files.
19 |     D select count(*) , sum ("tcp.len") , "tcp.srcport" ,"tcp.dstport"   from read_pcap('~/wireduck/fix.pcap', protocols:=['ip','tcp'],climit:=100)  group by  "tcp.srcport" ,"tcp.dstport" ;;
20 |     ┌──────────────┬────────────────┬─────────────┬─────────────┐
21 |     │ count_star() │ sum("tcp.len") │ tcp.srcport │ tcp.dstport │
22 |     │    int64     │     int128     │    int64    │    int64    │
23 |     ├──────────────┼────────────────┼─────────────┼─────────────┤
24 |     │          429 │         259678 │       11001 │       53867 │
25 |     │           56 │          19702 │       53867 │       11001 │
26 |     └──────────────┴────────────────┴─────────────┴─────────────┘
27 | 
28 |   extended_description: |
29 |     ### pcap dissector extention
30 |     Wireduck runs tshark behind the scenes utilizing wireshark's glossary to be able to parse any packet from any supported protocol to its fields. 
31 |     enabeling network data analysis and analytics.
32 | 
33 |     ### Features
34 |     - read_pcap table function.
35 |     - support any protocol supported by wireshark.
36 |     - allow push down filters to wireshark using cfilter climit  parameters
37 | 
38 |     ### Prerequities
39 |     tshark (installed as part of wireshark) should be installed.
40 |     validate its exists via
41 |     ```
42 |     tshark --version
43 |     ```
44 |     > For examples and instructions check out [Readme](https://github.com/hyehudai/wireduck)
45 |     
46 |         
47 |     > Note: Wireduck is still experimental.
48 | 


--------------------------------------------------------------------------------
/extensions/web_archive/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: web_archive
 3 |   description: Query Common Crawl and Wayback Machine web archive CDX APIs directly from SQL
 4 |   version: 0.2.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - onnimonni
10 | 
11 | repo:
12 |   github: midwork-finds-jobs/duckdb-web-archive
13 |   ref: v0.2.1
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Find archived pages from Wayback Machine
18 |     SELECT url, timestamp, statuscode
19 |     FROM wayback_machine()
20 |     WHERE url LIKE 'example.com/%'
21 |       AND statuscode = 200
22 |     LIMIT 10;
23 | 
24 |     -- One snapshot per year using DISTINCT ON pushdown
25 |     SELECT DISTINCT ON(year) url, timestamp
26 |     FROM wayback_machine()
27 |     WHERE url = 'github.com/duckdb/duckdb'
28 |       AND statuscode = 200
29 |     LIMIT 5;
30 | 
31 |     -- Query Common Crawl index
32 |     SELECT url, timestamp, mimetype
33 |     FROM common_crawl_index()
34 |     WHERE url LIKE '%.example.com/%'
35 |       AND statuscode = 200
36 |     LIMIT 10;
37 | 
38 |   extended_description: |
39 |     This extension provides two table functions to query web archive CDX APIs:
40 | 
41 |     - **wayback_machine()**: Query Internet Archive Wayback Machine (1996-present)
42 |     - **common_crawl_index()**: Query Common Crawl archive (2008-present, monthly snapshots)
43 | 
44 |     Features:
45 |     - **DISTINCT ON pushdown**: `DISTINCT ON(year)` becomes `&collapse=timestamp:4`
46 |     - **Filter pushdown**: WHERE clauses for statuscode/mimetype pushed to CDX API
47 |     - **LIMIT pushdown**: Only fetches requested number of records
48 |     - **SELECT pushdown**: Only fetches columns you need via `&fl=` parameter
49 |     - **Response fetching**: Download archived page content via `response.body`
50 |     - **Virtual columns**: `year` and `month` extracted from timestamp
51 | 
52 |     For full documentation, visit the [extension repository](https://github.com/midwork-finds-jobs/duckdb-web-archive).
53 | 


--------------------------------------------------------------------------------
/extensions/pbix/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: pbix
 3 |   description: Extension that allows parsing the data model embedded in PowerBI (pbix) files
 4 |   version: 0.5.3
 5 |   language: C++
 6 |   build: cmake
 7 |   excluded_platforms: "linux_amd64_musl"
 8 |   license: MIT
 9 |   maintainers:
10 |     - Hugoberry
11 | repo:
12 |   github: Hugoberry/duckdb-pbix-extension
13 |   ref: 3d863fc006d22e9a6dcfae59fbf4a46edca8fd13
14 | docs:
15 |   hello_world: |
16 |     -- Get metadata tables from a PowerBI file
17 |     SELECT Name FROM pbix_meta('Adventure Works DW 2020.pbix','table') WHERE isHidden=0;
18 |     
19 |     -- Read data from a specific table in the PowerBI file
20 |     SELECT 
21 |       ResellerKey, 
22 |       "Business Type", 
23 |       Reseller, 
24 |       "Reseller ID" 
25 |     FROM pbix_read('Adventure Works DW 2020.pbix','Reseller') 
26 |     LIMIT 10;
27 | 
28 |     -- Read metadata about models in a folder of pbix files
29 |     SELECT
30 |       file,
31 |       list_transform(pbix2vpax(file).Tables, t->t.TableName) as tab
32 |     FROM glob('data/**/*.pbix');
33 |   extended_description: >
34 |     The PBIX extension allows you to parse the data model embedded in PowerBI (pbix) files directly in DuckDB.
35 |     
36 |     
37 |     It provides three functions:
38 |     - `pbix_meta()`: Returns metadata tables for a data model (consult [MS-SSAS-T](https://learn.microsoft.com/en-us/openspecs/sql_server_protocols/ms-ssas-t/f85cd3b9-690c-4bc7-a1f0-a854d7daecd8) for metadata structures)
39 |     - `pbix_read()`: Returns the contents of a specific table from a pbix file
40 |     - `pbix2vpax()`: Generate comprehensive VPAX serialisation of the entire data model (scalar function)
41 |     
42 |     
43 |     For a pure Python implementation of the pbix parser, check out the [PBIXray](https://github.com/Hugoberry/pbixray) library.
44 |     
45 |     
46 |     *Note:* Current limitations include the inability of the WASM version to parse `https` hosted files, and that pbix_read() will decompress the entire model in memory. 
47 | 


--------------------------------------------------------------------------------
/extensions/nanodbc/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: nanodbc
 3 |   description: Connect to any ODBC-compatible database and query data directly from DuckDB
 4 |   version: 0.4.0
 5 |   language: C++
 6 |   build: cmake
 7 |   excluded_platforms: "linux_amd64_musl;osx_amd64;wasm_mvp;wasm_eh;wasm_threads"
 8 |   license: MIT
 9 |   maintainers:
10 |     - Hugoberry
11 | repo:
12 |   github: Hugoberry/duckdb-nanodbc-extension
13 |   ref: 0740a99bc20a9c5d1c07c39b0779104e5b5b88e5
14 | docs:
15 |   hello_world: |
16 |     -- Query a table using DSN
17 |     SELECT * FROM odbc_scan(table_name='customers', connection='MyODBCDSN');
18 |     
19 |     -- Execute custom SQL with connection string
20 |     SELECT * FROM odbc_query(
21 |         connection='Driver={SQL Server};Server=localhost;Database=mydb;',
22 |         query='SELECT id, name, amount FROM sales WHERE amount > 1000'
23 |     );
24 |     
25 |     -- Attach all tables from an ODBC source
26 |     CALL odbc_attach(connection='MyODBCDSN');
27 |   extended_description: >
28 |     The ODBC extension allows DuckDB to seamlessly connect to any database that provides an ODBC driver,
29 |     enabling you to query and analyze data from a wide variety of data sources without leaving the DuckDB ecosystem.
30 |     
31 |     
32 |     Key features:
33 |     - `odbc_scan()`: Query tables from any ODBC data source
34 |     - `odbc_query()`: Execute custom SQL queries against external databases
35 |     - `odbc_exec()`: Execute DDL/DML statements without returning results
36 |     - `odbc_attach()`: Attach all tables from an ODBC source as views in DuckDB
37 |     - Cross-platform character encoding support
38 |     - Automatic type conversion between ODBC and DuckDB types
39 |     - Support for DSNs and direct connection strings
40 |     
41 |     
42 |     The extension works on Windows, macOS, and Linux platforms and has been tested with SQL Server, MySQL, 
43 |     PostgreSQL, Snowflake, SQLite, and many other databases. All functions use named parameters for better 
44 |     readability and flexibility.
45 | 
46 |     
47 | 


--------------------------------------------------------------------------------
/.github/workflows/cache_warming.yml:
--------------------------------------------------------------------------------
 1 | name: Cache warming
 2 | on:
 3 |   repository_dispatch:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: '0 0 * * *' # Every midnight
 7 | 
 8 | jobs:
 9 |   duckdb_ccache:
10 |     name: Warm CCache using DuckDB C++ extension template
11 |     uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
12 |     with:
13 |       duckdb_version: 'v1.4.3'
14 |       ci_tools_version: 'main' # TODO: this should probably be same as duckdb_version?
15 |       extra_toolchains: ''
16 |       extension_name: 'quack'
17 |       override_repository: 'duckdb/extension-template'
18 |       override_ref: 'main'
19 |       exclude_archs: 'windows_amd64_mingw' # TODO: fixme: extension template fails?
20 |       skip_tests: true
21 |       save_cache: true
22 |     secrets: inherit
23 | 
24 |   parse_dependency_list:
25 |     name: Fetch VCPKG dependency list
26 |     runs-on: ubuntu-latest
27 |     outputs:
28 |       dependencies: ${{ steps.read-deps.outputs.dependencies }}
29 |     steps:
30 |       - uses: actions/checkout@v4
31 | 
32 |       - id: read-deps
33 |         run: |
34 |           deps="`cat .github/config/vcpkg_caching/generated_dep_list.json | tr -d '\n'`"
35 |           echo "dependencies=$deps" >> $GITHUB_OUTPUT
36 |           echo `cat $GITHUB_OUTPUT`
37 | 
38 |   duckdb_vcpkg_cache:
39 |     name: Warm VCPKG Binary Cache
40 |     uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
41 |     needs: parse_dependency_list
42 |     with:
43 |       duckdb_version: 'v1.4.3'
44 |       ci_tools_version: 'main' # TODO: this should probably be same as duckdb_version?
45 |       extra_toolchains: ';rust;go;fortran;parser_tools;'
46 |       extension_name: 'capi_quack'
47 |       override_repository: 'duckdb/extension-template-c'
48 |       override_ref: 'main'
49 |       skip_tests: true
50 |       save_cache: true
51 |       vcpkg_extra_dependencies: ${{needs.parse_dependency_list.outputs.dependencies}}
52 |       vcpkg_binary_sources: ${{ vars.VCPKG_BINARY_SOURCES }}
53 |     secrets: inherit
54 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Generate and Deploy Website
 2 | on:
 3 |   workflow_dispatch:
 4 |   workflow_call:
 5 |   schedule:
 6 |     - cron: "0 */6 * * *"
 7 | 
 8 | jobs:
 9 |   generate_docs:
10 |     uses: ./.github/workflows/generate_docs.yml
11 | 
12 |   deploy_docs:
13 |     needs:
14 |       - generate_docs
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |     - uses: actions/download-artifact@v4
18 |       with:
19 |         name: generated_markdowns
20 | 
21 |     - uses: actions/checkout@v4
22 |       with:
23 |         repository: 'duckdb/duckdb-web'
24 |         path: 'web'
25 |         token: ${{ secrets.GITHUB_TOKEN }}
26 | 
27 |     - name: Generate docs
28 |       run: |
29 |         unzip generated_md.zip
30 |         cp build/docs/*.md web/community_extensions/extensions/.
31 |         cp build/docs/extensions_list.md.tmp web/_includes/list_of_community_extensions.md
32 | 
33 |     - name: Upload to duckdb/duckdb-web
34 |       if: false
35 |       env:
36 |         GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
37 |       run: |
38 |         cd web
39 |         git config user.email "some-user@some-domain.com"
40 |         git config user.name "Quack McQuacker"
41 |         git checkout -B update_community_extensions
42 |         git add community_extensions
43 |         git commit -m "[update docs bot] Update community_extensions"
44 |         git push -f --set-upstream origin update_community_extensions
45 | 
46 |     - name: Deploy website
47 |       if: github.repository == 'duckdb/duckdb-web'
48 |       working-directory: 'web/_site'
49 |       env:
50 |         AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_COMMUNITY_EXTENSION_S3_ID }}
51 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_COMMUNITY_EXTENSION_S3_SECRET }}
52 |         AWS_DEFAULT_REGION: ${{ secrets.S3_DUCKDB_ORG_REGION }}
53 |       run: |
54 |         aws s3 cp build/docs/community_extensions.csv s3://duckdb-community-extensions/extensions/community_extensions.csv --acl public-read
55 |         aws s3 cp build/docs/community_extensions.csv s3://duckdb-community-extensions/extensions/list.csv --acl public-read
56 | 


--------------------------------------------------------------------------------
/extensions/title_mapper/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: title_mapper
 3 |   description: Efficiently standardizes scraped job titles to Bureau of Labor Statistics (BLS) titles using a high-performance TF-IDF algorithm.
 4 |   version: 1.4.1.0
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - martin-conur
12 | 
13 | repo:
14 |   github: martin-conur/duckdb-title-mapper
15 |   ref: 48519d2d38eda1103d8cc3261c84fbbd4dc90b18
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Standardize a column
20 |     SELECT standardize_title(scraped_title_column) FROM your_table;
21 | 
22 |     -- Standardize tech job titles
23 |     SELECT standardize_title('Sr. Software Eng') AS standardized_title;
24 |     -- Result: 'Software Engineer - Software Developers'
25 | 
26 |     -- Standardize healthcare titles
27 |     SELECT standardize_title('RN - Emergency Room') AS standardized_title;
28 |     -- Result: 'Registered Nurse - Registered Nurses'
29 |   extended_description: |
30 |     # DuckDB Title Mapper
31 |     `duckdb-title-mapper` is a highly optimized DuckDB extension written in Rust. It standardizes scraped job titles to BLS (Bureau of Labor Statistics) standard titles using a fast TF-IDF implementation.
32 | 
33 |     ## What It Does
34 | 
35 |     This extension transforms messy, inconsistent job titles from various sources into standardized BLS titles:
36 | 
37 |     | Scraped Title (Input) | Standardized Title (Output) |
38 |     |------------------------|------------------------------|
39 |     | Sr. Software Eng | Software Engineer |
40 |     | Registered Nurse - ICU | Registered Nurse |
41 |     | Accountant III | Accountant |
42 |     | Sales Rep (B2B) | Sales Representative |
43 |     | Elementary School Teacher - 3rd Grade | Elementary School Teacher |
44 |     | Exec. Chef | Executive Chef |
45 |     | Marketing Coordinator/Specialist | Marketing Specialist |
46 |     | Licensed Practical Nurse (LPN) | Licensed Practical Nurse |


--------------------------------------------------------------------------------
/extensions/msolap/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: msolap
 3 |   description: Extension that allows DuckDB to connect to Microsoft SQL Server Analysis Services (SSAS) and other OLAP data sources using the MSOLAP provider
 4 |   version: 0.1.2 
 5 |   language: C++
 6 |   build: cmake
 7 |   excluded_platforms: "linux_arm64;linux_amd64_musl;osx_amd64;osx_arm64;wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw"
 8 |   license: MIT
 9 |   maintainers:
10 |     - Hugoberry
11 | repo:
12 |   github: Hugoberry/duckdb-msolap-extension
13 |   ref: 84e1b00e2e42a905c0efd5eb55f8bef1cefa6173
14 | docs:
15 |   hello_world: |
16 |     -- Execute a simple DAX query against a local SSAS instance
17 |     SELECT * FROM msolap('Data Source=localhost;Catalog=AdventureWorks', 'EVALUATE DimProduct');
18 |     
19 |     -- Execute a more complex DAX query against PowerBI Desktop instance
20 |     SELECT * FROM msolap('Data Source=localhost:61324;Catalog=0ec50266-bdf5-4582-bc8c-82584866bcb7', 
21 |     'EVALUATE
22 |     SUMMARIZECOLUMNS(
23 |         DimProduct[Color],
24 |         "Total Sales", SUM(FactInternetSales[SalesAmount])
25 |     )');
26 |   extended_description: >
27 |     The MSOLAP extension allows DuckDB to connect to Microsoft SQL Server Analysis Services (SSAS) and other OLAP data sources using the MSOLAP provider. It enables multidimensional and tabular models with DAX queries to be queried directly from DuckDB.
28 |     
29 |     
30 |     The extension provides one primary function:
31 |     - `msolap(connection_string, dax_query)`: Execute a custom DAX query against an OLAP source
32 |     
33 |     
34 |     This extension is handy for data analysts who work with the Microsoft Business Intelligence stack (SSAS, Power BI) and want to incorporate this data into their DuckDB workflows.
35 |     
36 |     
37 |     *Note:* Current limitations include Windows-only support due to COM dependencies, limited data type conversion for complex OLAP types, and limited support for calculated measures and hierarchies. The extension requires the installation of the Microsoft OLEDB provider for Analysis Services (MSOLAP.8).
38 | 


--------------------------------------------------------------------------------
/scripts/clean_caches.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
 4 |   echo "Usage ./clean-caches.sh <bucket> <endpoint> <pattern>"
 5 |   exit 1
 6 | fi
 7 | 
 8 | BUCKET="$1"
 9 | ENDPOINT="$2"
10 | PATTERN="$3"
11 | 
12 | CLOUDFRONT_DISTRIBUTION_ID=E2Z28NDMI4PVXP
13 | 
14 | ### INVALIDATE THE CLOUDFRONT CACHE AND CLOUDFLARE
15 | # For double checking we are invalidating the correct domain
16 | # CLOUDFRONT_ORIGINS=`aws cloudfront get-distribution --id $CLOUDFRONT_DISTRIBUTION_ID --query 'Distribution.DistributionConfig.Origins.Items[*].DomainName' --output text`
17 | 
18 | ouput=$(aws s3 sync s3://$BUCKET . --exclude "*" --include "$PATTERN" --dryrun | awk ' { print $5 } ')
19 | 
20 | #if [ "$DUCKDB_CLEAN_CACHES_SCRIPT_MODE" == "for_real" ]; then
21 | #  echo "CLOUDFRONT INVALIDATION"
22 | #  while IFS= read -r path; do
23 | #    aws cloudfront create-invalidation --distribution-id "$CLOUDFRONT_DISTRIBUTION_ID" --paths "$path"
24 | #  done <<< $ouput
25 | #else
26 |   echo "INVALIDATION (DRY RUN)"
27 |   echo "> Domain: $CLOUDFRONT_ORIGINS"
28 |   echo "> Paths:"
29 |   while IFS= read -r path; do
30 |     echo "    $path"
31 |   done <<< $ouput
32 | #fi
33 | 
34 | echo ""
35 | 
36 | if [ ! -z "$CLOUDFLARE_CACHE_PURGE_TOKEN" ]; then
37 |    if [ "$DUCKDB_CLEAN_CACHES_SCRIPT_MODE" == "for_real" ]; then
38 |      echo "CLOUDFLARE INVALIDATION"
39 |      while IFS= read -r path; do
40 |        curl  --request POST --url https://api.cloudflare.com/client/v4/zones/84f631c38b77d4631b561207f2477332/purge_cache --header 'Content-Type: application/json' --header "Authorization: Bearer $CLOUDFLARE_CACHE_PURGE_TOKEN" --data "{\"files\": [\"http://$ENDPOINT.duckdb.org/$path\"]}"
41 |        echo ""
42 |      done
43 |    else
44 |      echo "CLOUDFLARE INVALIDATION (DRY RUN)"
45 |      echo "> Paths:"
46 |      while IFS= read -r path; do
47 |        echo "    http://$ENDPOINT.duckdb.org/$path"
48 |      done <<< $ouput
49 |    fi
50 | else
51 |     echo "##########################################"
52 |     echo "WARNING! CLOUDFLARE INVALIDATION DISABLED!"
53 |     echo "##########################################"
54 | fi
55 | 


--------------------------------------------------------------------------------
/extensions/anndata/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: anndata
 3 |   description: Read AnnData (.h5ad) files for single-cell genomics data analysis
 4 |   version: 0.9.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - honicky
10 | 
11 | repo:
12 |   github: honicky/anndata-duckdb-extension
13 |   ref: b3206219e67fa3621c0db29793b333aeb2f72e80
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Attach an AnnData file
18 |     ATTACH 'data.h5ad' AS scdata (TYPE ANNDATA);
19 | 
20 |     -- Query cell metadata
21 |     SELECT * FROM scdata.obs LIMIT 10;
22 | 
23 |     -- Query gene metadata
24 |     SELECT * FROM scdata.var LIMIT 10;
25 | 
26 |     -- Query expression matrix
27 |     SELECT * FROM scdata.X LIMIT 10;
28 | 
29 |     -- Detach when done
30 |     DETACH scdata;
31 | 
32 |   extended_description: |
33 |     The AnnData extension provides read-only access to AnnData (.h5ad) files,
34 |     the standard format for single-cell genomics data.
35 | 
36 |     ## ATTACH Syntax
37 | 
38 |     ```sql
39 |     ATTACH 'file.h5ad' AS name (TYPE ANNDATA);
40 |     ```
41 | 
42 |     ## Available Tables
43 | 
44 |     - `obs` - Observation (cell) metadata
45 |     - `var` - Variable (gene) metadata
46 |     - `X` - Expression matrix (genes as columns)
47 |     - `obsm_*` - Dimensional reductions (PCA, UMAP, etc.)
48 |     - `varm_*` - Variable embeddings
49 |     - `layers_*` - Alternative expression matrices
50 |     - `obsp_*` - Cell-cell pairwise matrices
51 |     - `varp_*` - Gene-gene pairwise matrices
52 |     - `uns` - Unstructured metadata
53 | 
54 |     ## Table Functions
55 | 
56 |     ```sql
57 |     -- Core data
58 |     SELECT * FROM anndata_scan_obs('file.h5ad');
59 |     SELECT * FROM anndata_scan_var('file.h5ad');
60 |     SELECT * FROM anndata_scan_x('file.h5ad');
61 | 
62 |     -- Dimensional reductions
63 |     SELECT * FROM anndata_scan_obsm('file.h5ad', 'X_pca');
64 |     SELECT * FROM anndata_scan_obsm('file.h5ad', 'X_umap');
65 | 
66 |     -- Layers
67 |     SELECT * FROM anndata_scan_layers('file.h5ad', 'raw');
68 | 
69 |     -- File info
70 |     SELECT * FROM anndata_info('file.h5ad');
71 |     ```
72 | 


--------------------------------------------------------------------------------
/extensions/duckpgq/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: duckpgq
 3 |   description: Extension that adds support for SQL/PGQ and graph algorithms
 4 |   version: 0.2.7
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - Dtenwolde
10 | 
11 | repo:
12 |   github: cwida/duckpgq-extension
13 |   ref: ffeee447afd82247fd5ffd1f3f314392d6f7dab1
14 | 
15 | docs:
16 |   hello_world: |
17 |     CREATE TABLE Person AS SELECT * FROM 'https://gist.githubusercontent.com/Dtenwolde/2b02aebbed3c9638a06fda8ee0088a36/raw/8c4dc551f7344b12eaff2d1438c9da08649d00ec/person-sf0.003.csv';
18 |     CREATE TABLE Person_knows_person AS SELECT * FROM 'https://gist.githubusercontent.com/Dtenwolde/81c32c9002d4059c2c3073dbca155275/raw/8b440e810a48dcaa08c07086e493ec0e2ec6b3cb/person_knows_person-sf0.003.csv';
19 | 
20 |     CREATE PROPERTY GRAPH snb
21 |       VERTEX TABLES (
22 |         Person
23 |       )
24 |       EDGE TABLES (
25 |         Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id)
26 |                             DESTINATION KEY (Person2Id) REFERENCES Person (id)
27 |         LABEL knows
28 |       );
29 | 
30 |     FROM GRAPH_TABLE (snb
31 |       MATCH (a:Person)-[k:knows]->(b:Person)
32 |       COLUMNS (a.id, b.id)
33 |     )
34 |     LIMIT 1;
35 | 
36 |     FROM GRAPH_TABLE (snb 
37 |       MATCH p = ANY SHORTEST (a:person)-[k:knows]->{1,3}(b:Person) 
38 |       COLUMNS (a.id, b.id, path_length(p))
39 |     ) 
40 |     LIMIT 1;
41 | 
42 |     FROM local_clustering_coefficient(snb, person, knows);
43 | 
44 |     DROP PROPERTY GRAPH snb; 
45 | 
46 |   extended_description: >
47 |     The DuckPGQ extension supports the SQL/PGQ syntax as part of the official SQL:2023 standard developed by ISO.
48 |     
49 |     
50 |     It introduces visual graph pattern matching and a more concise syntax for path-finding.
51 |     For more information, please see the [DuckPGQ documentation](https://duckpgq.org).
52 |     
53 | 
54 |     *Disclaimer:* As this extension is part of an ongoing research project by the Database Architectures group at CWI, some features may still be under development. We appreciate your understanding and patience as we continue to improve it.
55 | 


--------------------------------------------------------------------------------
/extensions/read_stat/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: read_stat
 3 |   description: Read data sets from SAS, Stata, and SPSS with ReadStat
 4 |   version: 0.2.3
 5 |   language: C
 6 |   build: cmake
 7 |   license: MIT
 8 |   requires_toolchains: "python3"
 9 |   maintainers:
10 |     - mettekou
11 | 
12 | repo:
13 |   github: mettekou/duckdb-read-stat
14 |   ref: d66821b3626caddbc8da7794617747e079f2ff64
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Read a SAS `.sas7bdat` or `.xpt` file
19 |     FROM read_stat('sas_data.sas7bdat');
20 |     FROM read_stat('sas_data.xpt');
21 |     -- Read an SPSS `.sav`, `.zsav`, or `.por` file
22 |     FROM read_stat('spss_data.sav');
23 |     FROM read_stat('compressed_spss_data.zsav');
24 |     FROM read_stat('portable_spss_data.por');
25 |     -- Read a Stata .dta file
26 |     FROM read_stat('stata_data.dta');
27 | 
28 |     -- If the file extension is not `.sas7bdat`, `.xpt`, `.sav`, `.zsav`, `.por`, or `.dta`,
29 |     -- use the `read_stat` function for the right file type with the `format` parameter:
30 |     FROM read_stat('sas_data.other_extension', format = 'sas7bdat');
31 |     FROM read_stat('sas_data.other_extension', format = 'xpt');
32 |     -- SPSS `.sav` and `.zsav` can both be read through the format `'sav'`
33 |     FROM read_stat(
34 |         'spss_data_possibly_compressed.other_extension',
35 |         format = 'sav'
36 |     );
37 |     FROM read_stat('portable_spss_data.other_extension', format = 'por');
38 |     FROM read_stat('stata_data.other_extension', format = 'dta');
39 | 
40 |     -- Override the character encoding with an `iconv`` encoding name,
41 |     -- see https://www.gnu.org/software/libiconv/
42 |     CREATE TABLE other_data AS FROM read_stat('latin1_encoded.sas7bdat', encoding = 'iso-8859-1');
43 |   extended_description: |
44 |     ## Usage
45 | 
46 |     ### Parameters
47 | 
48 |     | Name | Description | Type | Default |
49 |     |:----|:-----------|:----:|:-------|
50 |     | `format` | The format of the input file, when its extension does not indicate it, either `'sas7bdat'`, `'xpt'`, `'sav'`, `'por'`, or `'dta'` | `VARCHAR` | `NULL` |
51 |     | `encoding` | The character encoding of the input file, as defined by `iconv`, see https://www.gnu.org/software/libiconv/ | `VARCHAR` | `NULL` |
52 | 


--------------------------------------------------------------------------------
/extensions/webdavfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: webdavfs
 3 |   description: Allows reading and writing files over WebDAV protocol
 4 |   version: 1.0.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;windows_amd64;wasm_threads;wasm_eh;wasm_mvp;linux_amd64_musl;"
 9 |   requires_toolchains: "vcpkg"
10 |   maintainers:
11 |     - onnimonni
12 |   vcpkg_commit: 'dd3097e305afa53f7b4312371f62058d2e665320'
13 | repo:
14 |   github: midwork-finds-jobs/duckdb-webdavfs
15 |   ref: 00894c37e652a256a26299b7023fc000d47a85c5
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Load the extension
20 |     INSTALL webdavfs FROM community;
21 |     LOAD webdavfs;
22 | 
23 |     -- Authenticate with any WebDAV server eg nextcloud or owncloud
24 |     CREATE SECRET my_storagebox (
25 |       TYPE WEBDAV,
26 |       USERNAME 'u123456',
27 |       PASSWORD 'password',
28 |       SCOPE 'webdav://webdav-server.example.com/'
29 |     );
30 | 
31 |     -- or with Hetzner specific Storage Box
32 |     CREATE SECRET my_storagebox (
33 |       TYPE WEBDAV,
34 |       USERNAME 'u123456',
35 |       PASSWORD 'password',
36 |       SCOPE 'storagebox://u123456'
37 |     );
38 | 
39 |     -- Convert local csv into parquet and upload it using WebDAV
40 |     COPY (
41 |         FROM 'local.csv'
42 |     ) TO 'storagebox://u123456/remote.parquet';
43 |     
44 |     -- Read The uploaded parquet file using WebDAV
45 |     SELECT * FROM 'storagebox://u123456/remote.parquet';
46 | 
47 |   extended_description: |
48 |     DuckDB WebDAVfs extension enables seamless integration with WebDAV servers, allowing users to read from and write to remote file systems directly within DuckDB. This extension supports authentication mechanisms compatible with various WebDAV services, including Hetzner Storage Boxes.
49 | 
50 |     WebDAV is nice because it builds on top of HTTP/HTTPS protocols. This enables DuckDB to leverage http 1.1 range feature to only download parts of files when needed, making it efficient for working with large datasets stored remotely.
51 | 
52 |     Hetzner Storage Boxes are a popular choice for WebDAV storage being one of the cheapest storage options available.
53 | 
54 |     See: https://github.com/midwork-finds-jobs/webdavfs/blob/main/README.md for more examples and details.
55 | 


--------------------------------------------------------------------------------
/extensions/bigquery/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: bigquery
 3 |   description: Integrates DuckDB with Google BigQuery, allowing direct querying and management of BigQuery datasets
 4 |   version: 0.6.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw"
 9 |   vcpkg_commit: "ef7dbf94b9198bc58f45951adcf1f041fcbc5ea0"
10 |   requires_toolchains: "parser_tools"
11 |   maintainers:
12 |     - hafenkran
13 | 
14 | repo:
15 |   github: hafenkran/duckdb-bigquery
16 |   ref: a59a495035e6bc48e739539c93f59f5a656c0358
17 | 
18 | docs:
19 |   hello_world: |
20 |     -- Attach to your BigQuery Project
21 |     D ATTACH 'project=my_gcp_project' AS bq (TYPE bigquery, READ_ONLY);
22 | 
23 |     -- Show all tables in all datasets in the attached BigQuery project
24 |     D SHOW ALL TABLES;
25 |     ┌──────────┬──────────────────┬──────────┬──────────────┬───────────────────┬───────────┐
26 |     │ database │      schema      │   name   │ column_names │   column_types    │ temporary │
27 |     │ varchar  │     varchar      │  varchar │  varchar[]   │     varchar[]     │  boolean  │
28 |     ├──────────┼──────────────────┼──────────┼──────────────┼───────────────────┼───────────┤
29 |     │ bq       │ quacking_dataset │ duck_tbl │ [i, s]       │ [BIGINT, VARCHAR] │ false     │
30 |     | bq       | barking_dataset  | dog_tbl  | [i, s]       | [BIGINT, VARCHAR] │ false     |
31 |     └──────────┴──────────────────┴──────────┴──────────────┴───────────────────┴───────────┘
32 | 
33 |     -- Select data from a specific table in BigQuery
34 |     D SELECT * FROM bq.quacking_dataset.duck_tbl;
35 |     ┌───────┬────────────────┐
36 |     │   i   │       s        │
37 |     │ int32 │    varchar     │
38 |     ├───────┼────────────────┤
39 |     │    12 │ quack 🦆       │
40 |     │    13 │ quack quack 🦆 │
41 |     └───────┴────────────────┘
42 | 
43 |   extended_description: |
44 |     This community-maintained extension allows DuckDB to connect to Google BigQuery using the BigQuery Storage (read/write) and REST APIs. 
45 |     It enables users to read, write, and manage their BigQuery datasets/tables directly from DuckDB using standard SQL queries.
46 |     For detailed setup and usage instructions, visit the [extension repository](https://github.com/hafenkran/duckdb-bigquery).
47 | 


--------------------------------------------------------------------------------
/extensions/yardstick/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: yardstick
 3 |   description: Measure-aware SQL implementing Julian Hyde's 'Measures in SQL' paper
 4 |   version: 0.4.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   requires_toolchains: "rust"
 9 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_arm64;windows_amd64_mingw"
10 |   maintainers:
11 |     - sidequery
12 | 
13 | repo:
14 |   github: sidequery/yardstick
15 |   ref: 7fbe8abd40a926300e58dc6a365c43399710ef42
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Create a view with measures
20 |     CREATE VIEW sales_v AS
21 |     SELECT
22 |         year,
23 |         region,
24 |         SUM(amount) AS MEASURE revenue,
25 |         COUNT(*) AS MEASURE order_count
26 |     FROM sales;
27 | 
28 |     -- Query with AGGREGATE() and AT modifiers
29 |     SEMANTIC SELECT
30 |         year,
31 |         region,
32 |         AGGREGATE(revenue) AS revenue,
33 |         AGGREGATE(revenue) AT (ALL region) AS year_total,
34 |         AGGREGATE(revenue) / AGGREGATE(revenue) AT (ALL region) AS pct_of_year
35 |     FROM sales_v;
36 | 
37 |   extended_description: |
38 |     `yardstick` implements Julian Hyde's "Measures in SQL" paper ([arXiv:2406.00251](https://arxiv.org/abs/2406.00251)), adding measure-aware SQL to DuckDB.
39 | 
40 |     Measures are aggregations that know how to re-aggregate themselves when the query context changes. This enables:
41 | 
42 |     **Percent of total** calculations without CTEs or window functions:
43 |     ```sql
44 |     SEMANTIC SELECT region, AGGREGATE(revenue) / AGGREGATE(revenue) AT (ALL) AS pct
45 |     FROM sales_v;
46 |     ```
47 | 
48 |     **Year-over-year comparisons** with simple syntax:
49 |     ```sql
50 |     SEMANTIC SELECT year, AGGREGATE(revenue) AT (SET year = year - 1) AS prior_year
51 |     FROM sales_v;
52 |     ```
53 | 
54 |     **AT Modifiers**:
55 |     - `AT (ALL)` - Grand total across all dimensions
56 |     - `AT (ALL dim)` - Total excluding specific dimension
57 |     - `AT (SET dim = val)` - Fix dimension to specific value
58 |     - `AT (SET dim = expr)` - Fix dimension to expression
59 |     - `AT (WHERE cond)` - Pre-aggregation filter
60 |     - `AT (VISIBLE)` - Use query's WHERE clause
61 | 
62 |     For more details, visit the [extension repository](https://github.com/sidequery/yardstick).
63 | 


--------------------------------------------------------------------------------
/extensions/hostfs/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: hostfs
 3 |   description: Navigate and explore the filesystem using SQL
 4 |   version: 0.0.3
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - Gropaul
10 | 
11 | repo:
12 |   github: gropaul/hostFS
13 |   ref: 29017b23edd9bed0cbd9847c9ba76b84427df1f7
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Navigate to the workspace and list the files
18 |     D PRAGMA cd('/Users/paul/workspace');
19 |     D PRAGMA ls;
20 |     ┌───────────────────────────────┐
21 |     │             path              │
22 |     │            varchar            │
23 |     ├───────────────────────────────┤
24 |     │ ./duckdb                      │
25 |     │ ./playground                  │
26 |     │ ./hostfs                      │
27 |     -- Find the files you were working on last
28 |     D SELECT path, file_last_modified(path) AS date FROM ls() WHERE 'csv' IN file_extension(path) ORDER BY date LIMIT 1 ;
29 |     ┌───────────────────────────┬─────────────────────┐
30 |     │           path            │        date         │
31 |     │          varchar          │      timestamp      │
32 |     ├───────────────────────────┼─────────────────────┤
33 |     │ ./sketch_results_join.csv │ 2024-07-13 23:25:48 │
34 |     └───────────────────────────┴─────────────────────┘
35 |     -- List the top 3 file types by total size, with file count, ordered by size.
36 |     D SELECT size, count, file_extension AS "type"
37 |     FROM (
38 |     SELECT SUM(file_size(path)) AS size_raw, format_bytes(size_raw) AS size, COUNT(*) AS count, file_extension(path) AS file_extension
39 |     FROM lsr('/Users/paul/workspace', 10)
40 |     GROUP BY file_extension(path)
41 |     ) AS subquery
42 |     ORDER BY size_raw DESC LIMIT 3;
43 |     ┌───────────┬───────┬─────────┐
44 |     │   size    │ count │  type   │
45 |     │  varchar  │ int64 │ varchar │
46 |     ├───────────┼───────┼─────────┤
47 |     │ 246.95 GB │    29 │ .duckdb │
48 |     │ 90.33 GB  │  3776 │ .tmp    │
49 |     │ 26.17 GB  │ 28175 │ .csv    │
50 |     └───────────┴───────┴─────────┘
51 |   extended_description: >
52 |     The HostFS extension allows you to navigate and explore the filesystem using SQL. It provides a set of functions to list files, get file metadata, and more. 
53 |     For more information, please see the [HostFS documentation](https://github.com/gropaul/hostFS).


--------------------------------------------------------------------------------
/extensions/faiss/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: faiss
 3 |   description: Provides access to faiss indices from DuckDB.
 4 |   version: 0.12.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - JAicewizard
10 |     - arjenpdevries
11 |   excluded_platforms: "osx_amd64;wasm_mvp;wasm_eh;wasm_threads;linux_amd64_musl"
12 |   requires_toolchains: "fortran;omp"
13 |   vcpkg_url: "https://github.com/microsoft/vcpkg.git"
14 |   vcpkg_commit: "54760c3439fa2fdf2f42ccd730fcf2639c3fe101" 
15 | 
16 | repo:
17 |   github: "duckdb-faiss-ext/duckdb-faiss-ext"
18 |   ref: "6b824231e6291b689328d67a22440301337bbc3f"
19 | 
20 | docs:
21 |   hello_world: |
22 |     -- Generate semi-random input data and queries
23 |     -- Note that the dimensionality of our data will be 5
24 |     CREATE TABLE input AS SELECT i AS id, apply(generate_series(1, 5), j-> CAST(hash(i*1000+j) AS FLOAT)/18446744073709551615) AS data FROM generate_series(1, 1000) s(i);
25 |     CREATE TABLE queries AS SELECT i AS id, apply(generate_series(1, 5), j-> CAST(hash(i*1000+j+8047329823) AS FLOAT)/18446744073709551615) AS data FROM generate_series(1, 10) s(i);
26 |     -- Create the index and insert data into it
27 |     CALL FAISS_CREATE('name', 5, 'IDMap,HNSW32');
28 |     CALL FAISS_ADD((SELECT id, data FROM input), 'name');
29 |     -- On linux, with cuda, we can move the index to the GPU
30 |     -- CALL FAISS_TO_GPU('name', 0);
31 |     -- Get 10 results with uneven id
32 |     SELECT id, UNNEST(FAISS_SEARCH_FILTER('name', 10, data, 'id%2==1', 'rowid', 'input')) FROM queries;
33 |     -- Get 10 results with even id
34 |     SELECT id, UNNEST(FAISS_SEARCH_FILTER('name', 10, data, 'id%2==0', 'rowid', 'input')) FROM queries;
35 |     -- Get 10 results
36 |     SELECT id, UNNEST(FAISS_SEARCH('name', 10, data)) FROM queries;
37 |   extended_description: |
38 |     The FAISS extension allows DuckDB users to store vector data in faiss, and query this data, making reliable vector search more accessible. On all linux platforms, this platform also supports GPU indexes, you can move a supported index to the GPU using `CALL FAISS_MOVE_GPU({index_name}, {gpu number})`. Currently only CUDA is supported, note that GPU support may be split into a seperate extension in the future.
39 | 
40 |     Some (most) indices are not supported for gpus, however this is very easily resolvable. Please open an issue over at our repository in order to get this resolved!
41 | 


--------------------------------------------------------------------------------
/extensions/rusty_sheet/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: rusty_sheet
 3 |   description: An Excel/WPS/OpenDocument Spreadsheets file reader for DuckDB
 4 |   version: 0.4.2
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - redraiment
12 | 
13 | repo:
14 |   github: redraiment/rusty-sheet
15 |   ref: v0.4.2
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Read first sheet of spreadsheet with headers
20 |     FROM read_sheet('data.xlsx');
21 |     
22 |     -- Read without headers
23 |     FROM read_sheet('data.xlsx', header=false);
24 |     
25 |     -- Read specific worksheet
26 |     FROM read_sheet('workbook.xlsx', sheet='Sheet2');
27 |     
28 |     -- Analyze more rows (deafult 10) to detect column types
29 |     FROM analyze_sheet('data.xlsx', analyze_rows=20);
30 |     
31 |     -- Override specific column types (others auto-detected)
32 |     FROM read_sheet('data.xlsx', columns={'id': 'bigint'});
33 |     
34 |     -- Read specific data range (Excel-style notation)
35 |     FROM read_sheet('data.xlsx', range='A2:E100');
36 |     
37 |     -- Read all worksheets in multiple file types with different extensions
38 |     FROM read_sheets(['*.xlsx', '*.ods', '*.et']);
39 |     
40 |     -- Analyze with wildcard pattern
41 |     FROM analyze_sheets(['*.xlsx'], sheets=['Sheet*']);
42 |     
43 |     -- Match specific worksheets only in specific file types
44 |     FROM read_sheets(['*.xlsx'], sheets=['*.xlsx=Sheet*']);
45 |     
46 |     -- Track data sources with custom column names
47 |     FROM read_sheets(['*.xlsx'], file_name_column='file', sheet_name_column='sheet');
48 |     
49 |     -- Union data by column name instead of position
50 |     FROM read_sheets(['*.xlsx'], union_by_name=true);
51 | 
52 |     -- With custom HTTP headers: must be a persistent SECRET
53 |     CREATE PERSISTENT SECRET http_auth (TYPE HTTP, BEARER_TOKEN 'token');
54 |     FROM read_sheet('https://example.com/data.xlsx');
55 | 
56 |   extended_description: |
57 |     The DuckDB rusty-sheet extension that enables reading Excel, WPS and OpenDocument spreadsheet files directly within SQL queries. This extension provides seamless integration for analyzing spreadsheet data using DuckDB's powerful SQL engine.
58 |     For detailed setup and usage instructions, visit the docs at [rusty-sheet](https://github.com/redraiment/rusty-sheet).
59 | 


--------------------------------------------------------------------------------
/extensions/flock/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
 1 | function,description,comment,example
 2 | llm_complete,Generates text completions using a specified language model,Requires a defined prompt and model,"SELECT llm_complete({'model_name': 'default'}, {'prompt_name': 'hello-world'});"
 3 | llm_filter,Filters data based on language model evaluations, returning boolean values,"SELECT * FROM data WHERE llm_filter({'model_name': 'default'}, {'prompt_name': 'is_relevant', 'context_columns': [{'data': content}]});"
 4 | llm_embedding,Generates embeddings for input text,Useful for semantic similarity tasks,"SELECT llm_embedding({'model_name': 'default'}, {'context_columns': [{'data': 'Sample text'}]});"
 5 | llm_reduce,Aggregates multiple inputs into a single output using a language model,Summarizes or combines multiple rows,"SELECT llm_reduce({'model_name': 'default'}, {'prompt_name': 'summarize', 'context_columns': [{'data': content}]}) FROM documents;"
 6 | llm_rerank,Reorders query results based on relevance scores from a language model,Enhances result relevance in search applications,"SELECT llm_rerank({'model_name': 'default'}, {'prompt_name': 'rank_relevance', 'context_columns': [{'data': content}]}) FROM search_results;"
 7 | llm_first,Selects the top-ranked result after reranking,Retrieves the most relevant item,"SELECT llm_first({'model_name': 'default'}, {'prompt_name': 'rank_relevance', 'context_columns': [{'data': content}]}) FROM search_results;"
 8 | llm_last,Selects the bottom-ranked result after reranking,Retrieves the least relevant item,"SELECT llm_last({'model_name': 'default'}, {'prompt_name': 'rank_relevance', 'context_columns': [{'data': content}]}) FROM search_results;"
 9 | fusion_rrf,Implements Reciprocal Rank Fusion (RRF) to combine rankings,Combines rankings from multiple scoring systems,"SELECT fusion_rrf(score1, score2) FROM combined_scores;"
10 | fusion_combsum,Sums normalized scores from different scoring systems,Useful for aggregating scores from various models,"SELECT fusion_combsum(score1, score2) FROM combined_scores;"
11 | fusion_combmnz,Sums normalized scores and multiplies by the hit count,Enhances the impact of frequently occurring items,"SELECT fusion_combmnz(score1, score2) FROM combined_scores;"
12 | fusion_combmed,Computes the median of normalized scores,Reduces the effect of outliers in combined scores,"SELECT fusion_combmed(score1, score2) FROM combined_scores;"
13 | fusion_combanz,Calculates the average of normalized scores,Provides a balanced aggregation of scores,"SELECT fusion_combanz(score1, score2) FROM combined_scores;"
14 | 


--------------------------------------------------------------------------------
/extensions/infera/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
 1 | function,description,comment,example
 2 | infera_load_model,"Load an ONNX model from a local file path or a remote URL and assign it a unique name.","Supports local paths and remote URLs; caches remote models.","select infera_load_model('local_model','/path/to/model.onnx');"
 3 | infera_unload_model,"Unload a model, freeing its associated resources.","Returns true on success.","select infera_unload_model('local_model');"
 4 | infera_set_autoload_dir,"Scan a directory for .onnx files, load them automatically, and return a JSON report.","Returns JSON with loaded models and any errors.","select infera_set_autoload_dir('path/to/your/models');"
 5 | infera_get_loaded_models,"Return a JSON array containing the names of all currently loaded models.","JSON array of model names.","select infera_get_loaded_models();"
 6 | infera_get_model_info,"Return a JSON object with metadata for a specific loaded model (name, input/output shapes).","Throws an error if the model is not loaded.","select infera_get_model_info('local_model');"
 7 | infera_predict,"Perform inference on a batch of data; returns a single float value per input row.","Features accept FLOAT, DOUBLE, INTEGER, BIGINT, DECIMAL (cast to float).","select infera_predict('my_model', 1.0, 2.5, 3.0) as prediction;"
 8 | infera_predict_multi,"Perform inference and return all outputs as a JSON-encoded array.","Useful for models that produce multiple predictions per sample.","select infera_predict_multi('multi_output_model', 1.0, 2.0);"
 9 | infera_predict_multi_list,"Perform inference and return outputs as a typed LIST\[FLOAT\].","Avoids JSON parsing for multi-output models.","select infera_predict_multi_list('multi_output_model', 1.0, 2.0);"
10 | infera_predict_from_blob,"Perform inference on raw BLOB data (e.g., image tensor); returns LIST\[FLOAT\].","Accepts raw tensor BLOB data from a column.","select infera_predict_from_blob('my_model', my_blob_column) from my_table;"
11 | infera_is_model_loaded,"Return true if the given model is currently loaded, otherwise false.","","select infera_is_model_loaded('squeezenet');"
12 | infera_get_version,"Return a JSON object with version and build information for the Infera extension.","","select infera_get_version();"
13 | infera_clear_cache,"Clear the entire model cache directory to free up disk space.","Returns true on success.","select infera_clear_cache();"
14 | infera_get_cache_info,"Return cache statistics: directory path, total size in bytes, file count, and configured size limit.","Returns JSON with cache fields.","select infera_get_cache_info();"
15 | 


--------------------------------------------------------------------------------
/scripts/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import yaml
 4 | # TODO: check prefix, needs to be in installation dir
 5 | 
 6 | if 'ALL_CHANGED_FILES' in os.environ:
 7 | 	desc_files = os.environ['ALL_CHANGED_FILES'].split(' ')
 8 | else:
 9 | 	desc_files = []
10 | 
11 | print(f"Files changed: {desc_files}")
12 | 
13 | if len(desc_files) > 1:
14 | 	raise ValueError('cannot have multiple descriptors changed or packages with spaces in their names')
15 | 
16 | deploy = True
17 | 
18 | if len(desc_files) == 0 or len(desc_files[0]) == 0:
19 | 	print("No changed files, only quack will be built as a test")
20 | 	desc_files = ['extensions/quack/description.yml']
21 | 	deploy = False
22 | 
23 | desc_file = desc_files[0]
24 | 
25 | with open(desc_file, 'r') as stream:
26 | 	desc = yaml.safe_load(stream)
27 | 
28 | print(desc)
29 | 
30 | # todo check other stuff like build system etc.
31 | 
32 | with open('env.sh', 'w+') as hdl:
33 | 	hdl.write(f"COMMUNITY_EXTENSION_GITHUB={desc['repo']['github']}\n")
34 | 	if 'canonical_name' in desc.get('repo', {}):
35 | 		hdl.write(f"COMMUNITY_EXTENSION_CANONICAL_NAME={desc['repo']['canonical_name']}\n")
36 | 	extension_ref = desc['repo']['ref']
37 | 	if  os.environ['DUCKDB_VERSION'] != os.environ['DUCKDB_LATEST_STABLE']:
38 | 		if 'ref_next' in desc['repo']:
39 | 			extension_ref = desc['repo']['ref_next']
40 | 	hdl.write(f"COMMUNITY_EXTENSION_REF={extension_ref}\n")
41 | 	hdl.write(f"COMMUNITY_EXTENSION_NAME={desc['extension']['name']}\n")
42 | 	excluded_platforms = desc['extension'].get('excluded_platforms')
43 | 	opt_in_platforms = desc['extension'].get('opt_in_platforms')
44 | 	requires_toolchains = desc['extension'].get('requires_toolchains')
45 | 	custom_toolchain_script = desc['extension'].get('custom_toolchain_script')
46 | 	vcpkg_url = desc['extension'].get('vcpkg_url')
47 | 	vcpkg_commit = desc['extension'].get('vcpkg_commit')
48 | 	test_config = desc['extension'].get('test_config')
49 | 	if excluded_platforms:
50 | 		hdl.write(f"COMMUNITY_EXTENSION_EXCLUDE_PLATFORMS={excluded_platforms}\n")
51 | 	if opt_in_platforms:
52 | 		hdl.write(f"COMMUNITY_EXTENSION_OPT_IN_PLATFORMS={opt_in_platforms}\n")
53 | 	if requires_toolchains:
54 | 		hdl.write(f"COMMUNITY_EXTENSION_REQUIRES_TOOLCHAINS={requires_toolchains}\n")
55 | 	if vcpkg_url:
56 | 		hdl.write(f"COMMUNITY_EXTENSION_VCPKG_URL={vcpkg_url}\n")
57 | 	if vcpkg_commit:
58 | 		hdl.write(f"COMMUNITY_EXTENSION_VCPKG_COMMIT={vcpkg_commit}\n")
59 | 	if deploy:
60 | 		hdl.write(f"COMMUNITY_EXTENSION_DEPLOY=1\n")
61 | 	if test_config:
62 | 		escaped_config =test_config.replace("\n", "")
63 | 		hdl.write(f"COMMUNITY_EXTENSION_TEST_CONFIG={escaped_config}\n")
64 | 


--------------------------------------------------------------------------------
/extensions/acp/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: acp
 3 |   description: Natural language to SQL via Claude Code using the Agent Client Protocol (ACP). Query your data with plain English via CLAUDE statements or the claude() table function.
 4 |   version: 0.2.1
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   requires_toolchains: "rust"
 9 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_amd64_mingw"
10 |   maintainers:
11 |     - sidequery
12 | 
13 | repo:
14 |   github: sidequery/duckdb-acp
15 |   ref: 4d047876179e9d5c09da98e837f77245a094541f
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Load the extension
20 |     LOAD 'acp';
21 | 
22 |     -- Create sample data
23 |     CREATE TABLE sales (id INT, product VARCHAR, amount DECIMAL, sale_date DATE);
24 |     INSERT INTO sales VALUES
25 |         (1, 'Widget', 99.99, '2024-01-15'),
26 |         (2, 'Gadget', 149.99, '2024-01-16'),
27 |         (3, 'Widget', 99.99, '2024-01-17');
28 | 
29 |     -- Query with natural language (statement syntax)
30 |     CLAUDE what is the total revenue by product?
31 | 
32 |     -- Or use the table function
33 |     SELECT * FROM claude('which product has the highest average sale amount?');
34 | 
35 |   extended_description: |
36 |     The ACP extension enables natural language to SQL translation using the Agent Client Protocol (ACP). Write queries in plain English and let an AI agent explore your schema and generate accurate SQL.
37 | 
38 |     **Two Query Interfaces**:
39 |     - Statement syntax: `CLAUDE show me the top 10 customers by revenue`
40 |     - Table function: `SELECT * FROM claude('what products sold the most last month?')`
41 | 
42 |     **How It Works**:
43 |     1. The extension intercepts natural language queries
44 |     2. An embedded MCP server provides schema exploration tools
45 |     3. The agent (Claude Code) discovers tables, columns, and data patterns
46 |     4. SQL is generated, tested, and executed against your database
47 | 
48 |     **Configuration Settings**:
49 |     - `acp_agent`: Agent command (default: `claude-code`)
50 |     - `acp_safe_mode`: Block mutation queries (default: `true`)
51 |     - `acp_debug`: Enable debug output (default: `false`)
52 |     - `acp_timeout`: Timeout in seconds (default: `300`)
53 | 
54 |     **Safety Features**:
55 |     - Safe mode blocks INSERT, UPDATE, DELETE by default
56 |     - Agent has access only to SQL execution, no external tools
57 |     - Configurable timeout prevents runaway queries
58 | 
59 |     **Requirements**:
60 |     - DuckDB 1.1.0+
61 |     - bun or Node.js (for the agent runtime)
62 |     - Claude Code with Anthropic API credentials or a Claude Pro/Max account
63 | 


--------------------------------------------------------------------------------
/extensions/gaggle/docs/function_descriptions.csv:
--------------------------------------------------------------------------------
 1 | function,description,comment,example
 2 | gaggle_set_credentials,"Sets Kaggle API credentials from SQL.","Alternatively use env vars or `~/.kaggle/kaggle.json`. Returns true on success.","select gaggle_set_credentials('your-username', 'your-api-key');"
 3 | gaggle_download,"Downloads a Kaggle dataset to the local cache directory and returns the local dataset path.","This function is idempotent.","select gaggle_download('habedi/flickr-8k-dataset-clean') as local_path;"
 4 | gaggle_search,"Searches Kaggle datasets and returns a JSON array.","Constraints: page >= 1, 1 <= page_size <= 100.","select gaggle_search('flickr', 1, 5);"
 5 | gaggle_info,"Returns metadata for a dataset as JSON.","For example, title, url, last_updated.","select gaggle_info('habedi/flickr-8k-dataset-clean') as dataset_metadata;"
 6 | gaggle_version,"Returns the extension version string.","For example, ""0.1.0"".","select gaggle_version();"
 7 | gaggle_clear_cache,"Clears the dataset cache directory.","Returns true on success.","select gaggle_clear_cache();"
 8 | gaggle_cache_info,"Returns cache info JSON.","Includes path, size_mb, limit_mb, usage_percent, is_soft_limit, and type fields.","select gaggle_cache_info();"
 9 | gaggle_enforce_cache_limit,"Manually enforces cache size limit using LRU eviction.","Returns true on success. (Automatic with soft limit by default).","select gaggle_enforce_cache_limit();"
10 | gaggle_is_current,"Checks if cached dataset is the latest version from Kaggle.","Returns false if not cached or outdated.","select gaggle_is_current('owner/dataset') as is_current;"
11 | gaggle_update_dataset,"Forces update to latest version (ignores cache).","Returns local path to freshly downloaded dataset.","select gaggle_update_dataset('owner/dataset') as updated_path;"
12 | gaggle_version_info,"Returns version info.","Includes: cached_version, latest_version, is_current, is_cached.","select gaggle_version_info('owner/dataset') as version_info;"
13 | gaggle_json_each,"Expands a JSON object into newline-delimited JSON rows.","Fields: key, value, type, path. Users normally shouldn't need to use this function.","select gaggle_json_each('{""a"":1,""b"":[true,{""c"":""x""}]}') as rows;"
14 | gaggle_file_path,"Resolves a specific file's local path inside a downloaded dataset.","Will retrieve (and cache if not downloaded). Set GAGGLE_STRICT_ONDEMAND=1 to prevent fallback to full download.","select gaggle_file_path('owner/dataset', 'file.parquet');"
15 | gaggle_ls,"Lists files in the dataset's local directory; non-recursive by default.","Set recursive=true to walk subdirs. size is in MB. path is relative 'owner/dataset/<path>'.","select * from gaggle_ls('habedi/flickr-8k-dataset-clean') limit 5;"


--------------------------------------------------------------------------------
/extensions/sazgar/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: sazgar
 3 |   description: Comprehensive system monitoring - 20 table functions for CPU, memory, disk, network, processes, ports, services, Docker, GPU, and more
 4 |   version: 0.3.0
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - Angelerator
12 | 
13 | repo:
14 |   github: Angelerator/Sazgar
15 |   ref: aa5d876f9dfb8218d5aed9ecdf8b7798aebd693c
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Get comprehensive system overview
20 |     SELECT * FROM sazgar_system();
21 | 
22 |     -- Memory usage in GB
23 |     SELECT * FROM sazgar_memory(unit := 'GB');
24 | 
25 |     -- Swap memory
26 |     SELECT * FROM sazgar_swap('GiB');
27 | 
28 |     -- Per-core CPU usage
29 |     SELECT * FROM sazgar_cpu_cores();
30 | 
31 |     -- Open network ports
32 |     SELECT * FROM sazgar_ports('') WHERE local_port < 1024;
33 | 
34 |     -- Docker containers
35 |     SELECT * FROM sazgar_docker();
36 | 
37 |     -- System services
38 |     SELECT * FROM sazgar_services() WHERE status = 'running' LIMIT 10;
39 | 
40 |     -- System uptime
41 |     SELECT * FROM sazgar_uptime();
42 |   extended_description: |
43 |     Sazgar (Persian: سازگار, meaning "compatible/harmonious") is a comprehensive DuckDB extension for system resource monitoring with 20 table functions.
44 | 
45 |     **Functions:**
46 |     - `sazgar_system()` - Complete system overview
47 |     - `sazgar_os()` - Operating system information
48 |     - `sazgar_memory(unit)` - RAM usage with unit conversion
49 |     - `sazgar_swap(unit)` - Swap/virtual memory info
50 |     - `sazgar_cpu()` - CPU information
51 |     - `sazgar_cpu_cores()` - Per-core CPU usage
52 |     - `sazgar_disks(unit)` - Disk usage
53 |     - `sazgar_network()` - Network interface statistics
54 |     - `sazgar_ports(filter)` - Open network ports and connections
55 |     - `sazgar_processes()` - Running processes
56 |     - `sazgar_services()` - System services (launchctl/systemd)
57 |     - `sazgar_docker()` - Docker containers
58 |     - `sazgar_load()` - System load averages
59 |     - `sazgar_uptime()` - Detailed uptime information
60 |     - `sazgar_users()` - System users
61 |     - `sazgar_environment(filter)` - Environment variables
62 |     - `sazgar_components()` - Temperature sensors
63 |     - `sazgar_gpu()` - NVIDIA GPU info (optional)
64 |     - `sazgar_fds(pid)` - File descriptors (Linux)
65 |     - `sazgar_version()` - Extension version
66 | 
67 |     **Unit conversion** supports: bytes, KB, KiB, MB, MiB, GB, GiB, TB, TiB
68 | 
69 |     Cross-platform: Linux, macOS, Windows (full support), Android, iOS (partial support)
70 | 
71 | 


--------------------------------------------------------------------------------
/extensions/pcap_reader/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: pcap_reader
 3 |   description: Read PCAP files from DuckDB
 4 |   version: 0.1.3
 5 |   language: Rust
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl"
 9 |   requires_toolchains: "rust;python3"
10 |   maintainers:
11 |     - lmangani
12 |     - glongo
13 |     - kYroL01
14 | 
15 | repo:
16 |   github: quackscience/duckdb-extension-pcap
17 |   ref: 0a11ddc058beb2d480ccbfa83e16a68400c5d076
18 | 
19 | docs:
20 |   hello_world: |
21 |     -- Basic PCAP reader for local or remote files
22 |     D SELECT * FROM pcap_reader('test.pcap') LIMIT 3;
23 |     ┌─────────────────────┬────────────────┬────────────────┬──────────┬──────────┬──────────┬────────┬───────────────────────────────────────────┐
24 |     │      timestamp      │     src_ip     │     dst_ip     │ src_port │ dst_port │ protocol │ length │                 payload                   │
25 |     │      timestamp      │    varchar     │    varchar     │ int32    │ int32    │ varchar  │ int32  │                 varchar                   │
26 |     ├─────────────────────┼────────────────┼────────────────┼──────────┼──────────┼──────────┼────────┼───────────────────────────────────────────┤
27 |     │ 2024-12-06 19:30:2… │ xx.xx.xx.xxx   │ yyy.yyy.yy.yyy │ 64078    │ 5080     │ UDP      │ 756    │ INVITE sip:810442837619024@yyy.yyy.yy.y…  │
28 |     │ 2024-12-06 19:30:2… │ yyy.yyy.yy.yyy │ xx.xx.xx.xxx   │ 5080     │ 64078    │ UDP      │ 360    │ SIP/2.0 100 Trying\r\nVia: SIP/2.0/UDP …  │
29 |     │ 2024-12-06 19:30:2… │ yyy.yyy.yy.yyy │ xx.xx.xx.xxx   │ 5080     │ 64078    │ UDP      │ 909    │ SIP/2.0 480 Temporarily Unavailable\r\n…  │
30 |     ├─────────────────────┴────────────────┴────────────────┴──────────┴──────────┴──────────┴────────┴───────────────────────────────────────────┤
31 |     │ 3 rows                                                                                                                            8 columns │
32 |     └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
33 | 
34 |   extended_description: |
35 |     ## DuckDB PCAP Reader
36 |     `pcap_reader` is a DuckDB community extension that empowers network analysts to directly query and analyze IPv4 and IPv6 PCAP files using SQL. 
37 |     
38 |     Built with Rust for performance and safety, it leverages the `pcap-parser` crate to efficiently process packet capture data.
39 | 
40 |     #### Features
41 |     - Direct PCAP Access: Load PCAP files directly into DuckDB without external tools.
42 |     - SQL-PCAP Analysis: Use DuckDB to filter, aggregate, and analyze IPv4/IPv6 network traffic.
43 |     
44 |     > The PCAP Reader Extension is experimental, use at your own risk!
45 | 


--------------------------------------------------------------------------------
/extensions/gsheets/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: gsheets
 3 |   description: Read and write Google Sheets using SQL
 4 |   version: 0.0.7
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads"
 9 |   maintainers:
10 |     - archiewood
11 |     - mharrisb1
12 | 
13 | repo:
14 |   github: evidence-dev/duckdb_gsheets
15 |   ref: f44cfdd97c83489a5ffea15712fc24d0e257ff44
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Authenticate with Google Account in the browser (easiest)
20 |     CREATE SECRET (TYPE gsheet);
21 | 
22 |     -- OR create a secret with your Google API access token (boring, see extension docs)
23 |     CREATE SECRET (
24 |       TYPE gsheet, 
25 |       PROVIDER access_token, 
26 |       TOKEN '<your_token>'
27 |     );
28 | 
29 |     -- Read a spreadsheet by full URL
30 |     FROM read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit');
31 | 
32 |     -- Read a spreadsheet by full URL, implicitly
33 |     FROM 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit';
34 | 
35 |     -- Read a spreadsheet by spreadsheet id
36 |     FROM read_gsheet('11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8');
37 | 
38 |     -- Read a spreadsheet with no header row
39 |     SELECT * FROM read_gsheet('11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8', headers=false);
40 | 
41 |     -- Read all values in as varchar, skipping type inference
42 |     SELECT * FROM read_gsheet('11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8', all_varchar=true);
43 | 
44 |     -- Read a sheet other than the first sheet using the sheet name
45 |     SELECT * FROM read_gsheet('11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8', sheet='Sheet2');
46 | 
47 |     -- Read a sheet other than the first sheet using the sheet id in the URL
48 |     SELECT * FROM read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=644613997#gid=644613997');
49 | 
50 |     -- Write a spreadsheet from a table by spreadsheet id
51 |     COPY <table_name> TO '11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8' (FORMAT gsheet);
52 | 
53 |     -- Write a spreadsheet from a table by full URL
54 |     COPY <table_name> TO 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?usp=sharing' (FORMAT gsheet);
55 | 
56 |     -- Write a spreadsheet to a specific sheet using the sheet id in the URL
57 |     COPY <table_name> TO 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987' (FORMAT gsheet);
58 | 
59 |   extended_description: |
60 |     The DuckDB GSheets Extension allows reading and writing of data in Google Sheets from DuckDB.
61 |     For detailed setup and usage instructions, visit the docs at [duckdb-gsheets.com](https://duckdb-gsheets.com).
62 | 


--------------------------------------------------------------------------------
/extensions/eeagrid/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: eeagrid
 3 |   description: Extension that adds support for working with the EEA Reference Grid System.
 4 |   version: 0.1.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - ahuarte47
10 | 
11 | repo:
12 |   github: ahuarte47/duckdb-eeagrid
13 |   ref: f5da6388c0b50ea8436040ce28c82c0b65e953d9
14 | 
15 | docs:
16 |   hello_world: |
17 |     SELECT EEA_CoordXY2GridNum(5078600, 2871400);
18 |     ----
19 |     23090257455218688
20 | 
21 |     SELECT EEA_GridNum2CoordX(23090257455218688);
22 |     ----
23 |     5078600
24 | 
25 |     SELECT EEA_GridNum2CoordX(23090257455218688, 1_000_000);
26 |     ----
27 |     5000000
28 | 
29 |     SELECT EEA_GridNum2CoordY(23090257455218688);
30 |     ----
31 |     2871400
32 | 
33 |     SELECT EEA_GridNum2CoordY(23090257455218688, 1_000);
34 |     ----
35 |     2871000
36 | 
37 |     SELECT EEA_GridNumAt100m(23090257455218688);
38 |     ----
39 |     23090257455218688
40 | 
41 |     SELECT EEA_GridNumAt1km(23090257455218688);
42 |     ----
43 |     23090257448665088
44 | 
45 |     SELECT EEA_GridNumAt10km(23090257455218688);
46 |     ----
47 |     23090255284404224
48 | 
49 |   extended_description: |
50 |     The EEA Reference Grid extension adds support for working with the [EEA Reference Grid System](https://sdi.eea.europa.eu/catalogue/srv/api/records/aac8379a-5c4e-445c-b2ef-23a6a2701ef0/attachments/eea_reference_grid_v1.pdf).
51 | 
52 |     The **EEA Reference Grid** is a standardized spatial grid system used across Europe for environmental data analysis and reporting. It is maintained by the [European Environment Agency](https://www.eea.europa.eu/en) (EEA) and forms the basis for aggregating and exchanging geospatial data in a consistent format:
53 | 
54 |       * `Coordinate Reference System (CRS)`: ETRS89 / LAEA Europe ([EPSG:3035](https://epsg.io/3035)), which minimizes area distortion across Europe. The Geodetic Datum is the European Terrestrial Reference System 1989 (EPSG:6258). The Lambert Azimuthal Equal Area (LAEA) projection is centred at 10°E, 52°N. Coordinates are based on a false Easting of 4321000 meters, and a false Northing of 3210000 meters.
55 |       * `Supported resolutions`: Typically available at 10 km, 1 km, and 100 m resolutions.
56 |       * `Structure`: Regular square grid with unique cell codes and identifiers assigned based on position and resolution.
57 |       * `Purpose`: Enables harmonized spatial analysis, mapping, and cross-border environmental assessments.
58 | 
59 |     This grid system is widely used in European environmental datasets, including air quality, land use, biodiversity, and climate change indicators.
60 | 
61 |     The extension provides functions to calculate grid cell identifiers (`INT64`) from XY coordinates based on the `EPSG:3035` coordinate reference system, and vice versa. Please see the [function table](docs/functions.md) for the current implementation status.
62 | 


--------------------------------------------------------------------------------
/extensions/quickjs/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: "## QuickJS DuckDB Extension\nThis extension provides an embedded\
 3 |     \ QuickJS-NG engine for DuckDB. It allows executing JavaScript code directly within\
 4 |     \ your SQL queries. \n> QuickJS-NG is a small, fast, and embeddable JavaScript\
 5 |     \ engine that supports modern JavaScript features including ES2020.\n\nThis extension\
 6 |     \ is experimental and potentially unstable. Do not use it in production."
 7 |   hello_world: "-- Quack JS with QuickJS\n-- Scalar\nD SELECT quickjs('2+2');\n\u250C\
 8 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
 9 |     \u2500\u2500\u2500\u2510\n\u2502 quickjs('2+2') \u2502\n\u2502    varchar    \
10 |     \ \u2502\n\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
11 |     \u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 4              \u2502\n\u2514\u2500\
12 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
13 |     \u2500\u2500\u2518\n\n-- Scalar Eval\nD SELECT quickjs_eval('(a, b) => a + b',\
14 |     \ 5, 3);\n\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
15 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
16 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
17 |     \u2500\u2500\u2510\n\u2502 quickjs_eval('(a, b) => a + b', 5, 3) \u2502\n\u2502\
18 |     \                 json                  \u2502\n\u251C\u2500\u2500\u2500\u2500\
19 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
20 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
21 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 8       \
22 |     \                              \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\
23 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
24 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
25 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n\n-- Table Eval\nD SELECT *\
26 |     \ FROM quickjs('parsed_arg0.map(x => x * arg1)', '[1, 2, 3, 4, 5]', 3);\n\u250C\
27 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 result \u2502\n\
28 |     \u2502  json  \u2502\n\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\
29 |     \n\u2502 3      \u2502\n\u2502 6      \u2502\n\u2502 9      \u2502\n\u2502 12\
30 |     \     \u2502\n\u2502 15     \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\
31 |     \u2500\u2500\u2518\n"
32 | extension:
33 |   build: cmake
34 |   description: DuckDB QuickJS Runtime Extension
35 |   excluded_platforms: windows_amd64_mingw
36 |   language: C++
37 |   license: MIT
38 |   maintainers:
39 |   - lmangani
40 |   name: quickjs
41 |   version: '2025120401'
42 | repo:
43 |   github: quackscience/duckdb-quickjs
44 |   ref: 2d31ccebd7f44babc901c84ba0fe8b560647e136
45 | 


--------------------------------------------------------------------------------
/extensions/duck_tails/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: duck_tails
 3 |   description: Smart Development Intelligence for DuckDB - Git-aware data analysis capabilities that allow querying git history, accessing files at any revision, and performing version-aware data analysis with SQL.
 4 |   version: 1.2.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   requires_toolchains: "vcpkg"
 9 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_arm64;windows_amd64_mingw;windows_arm64_mingw"
10 |   maintainers:
11 |     - teaguesterling
12 | repo:
13 |   github: teaguesterling/duck_tails
14 |   ref: 9b1b9fb4678430379412cf28b3dd164e11e127cb
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Load the extension
19 |     LOAD 'duck_tails';
20 |     
21 |     -- Query git history (defaults to current directory)
22 |     SELECT commit_hash, author_name, message, author_date 
23 |     FROM git_log() LIMIT 5;
24 |     
25 |     -- Access files from git repository at specific revisions
26 |     SELECT * FROM read_csv('git://data/sales.csv@HEAD');
27 |     
28 |     -- Compare data between commits
29 |     SELECT COUNT(*) FROM read_csv('git://data/sales.csv@HEAD') AS current_count,
30 |            COUNT(*) FROM read_csv('git://data/sales.csv@HEAD~1') AS previous_count;
31 |     
32 |     -- Analyze text differences
33 |     SELECT * FROM read_git_diff('git://README.md@HEAD', 'git://README.md@HEAD~1');
34 | 
35 |   extended_description: |
36 |     Duck Tails brings git-aware data analysis capabilities to DuckDB, enabling sophisticated version-controlled data workflows. The extension provides three core capabilities:
37 |     
38 |     **Git Filesystem Access**: Use the `git://` protocol to access any file in your git repository at any commit, branch, or tag. This allows you to query historical data states, compare versions, and perform temporal analysis directly in SQL.
39 |     
40 |     **Repository Metadata Queries**: Query git repository information directly with table functions like `git_log()`, `git_branches()`, and `git_tags()`. Analyze commit histories, track development patterns, and integrate repository metadata into your analytical workflows.
41 |     
42 |     **Text Diff Analysis**: Comprehensive text diffing capabilities with functions like `diff_text()`, `read_git_diff()`, and `text_diff_stats()`. Analyze changes between file versions, track configuration drift, and perform code change analysis.
43 |     
44 |     Key functions include:
45 |     - `git_log([path])` - Query commit history
46 |     - `git_branches([path])` - List repository branches  
47 |     - `git_tags([path])` - List repository tags
48 |     - `diff_text(old, new)` - Compute text differences
49 |     - `read_git_diff(file1, [file2])` - Structured diff analysis
50 |     - `text_diff_lines(diff)` - Parse diff into line-by-line changes
51 |     - `text_diff_stats(old, new)` - Diff statistics and metrics
52 |     
53 |     The extension supports mixed file systems, allowing you to combine git://, local files, S3, and other DuckDB-supported protocols in a single query. Built with libgit2 for robust git operations and comprehensive error handling.
54 | 


--------------------------------------------------------------------------------
/extensions/flock/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: flock
 3 |   description: LLM & RAG extension to combine analytics and semantic analysis
 4 |   version: 0.5.0
 5 |   language: SQL & C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_rtools;wasm_mvp;wasm_eh;wasm_threads"
 9 | 
10 |   maintainers:
11 |     - anasdorbani
12 |     - queryproc
13 | 
14 | repo:
15 |   github: dais-polymtl/flock
16 |   ref: 7f1c36abe481b97c9e2c6e7303f36005c8d242fa
17 | 
18 | docs:
19 |   hello_world: |
20 |     -- After loading, any function call will throw an error if the provider's secret doesn't exist
21 | 
22 |     -- Create your provider secret by following the [documentation](https://dais-polymtl.github.io/flock/docs/what-is-flock/). For example, you can create a default OpenAI API key as follows:
23 |     D CREATE SECRET (TYPE OPENAI, API_KEY 'your-api-key');
24 | 
25 |     -- Call an OpenAI model with a predefined prompt ('Tell me hello world') and default model ('gpt-4o-mini')
26 |     D SELECT llm_complete({'model_name': 'default'}, {'prompt_name': 'hello-world'});
27 |     ┌──────────────────────────────────────────┐
28 |     │ llm_complete(hello_world, default_model) │
29 |     │                 varchar                  │
30 |     ├──────────────────────────────────────────┤
31 |     │                Hello world               │
32 |     └──────────────────────────────────────────┘
33 | 
34 |     -- Check the prompts and supported models
35 |     D GET PROMPTS;
36 |     D GET MODELS;
37 | 
38 |     -- Create a new prompt for summarizing text
39 |     D CREATE PROMPT('summarize', 'summarize the text into 1 word: {{text}}');
40 | 
41 |     -- Create a variable name for the model to do the summarizing
42 |     D CREATE MODEL('summarizer-model', 'gpt-4o', 'openai');
43 | 
44 |     -- Summarize text and pass it as parameter 
45 |     D SELECT llm_complete({'model_name': 'summarizer-model'}, {'prompt_name': 'summarize','context_columns': [{'data': 'We support more functions and approaches to combine relational analytics and semantic analysis. Check our repo for documentation and examples.'}}]);
46 | 
47 |   extended_description: |
48 |     **Flock** is an experimental DuckDB extension that enables seamless integration of large language models (LLMs) and retrieval-augmented generation (RAG) directly within SQL.
49 | 
50 |     It introduces `MODEL` and `PROMPT` objects as first-class SQL entities, making it easy to define, manage, and reuse LLM interactions. Core functions like `llm_complete`, `llm_filter`, and `llm_rerank` allow you to perform generation, semantic filtering, and ranking—all from SQL.
51 | 
52 |     Flock is designed for rapid prototyping of LLM-based analytics and is optimized with batching and caching features for better performance.
53 | 
54 |     📄 For more details and examples, see the [Flock documentation](https://dais-polymtl.github.io/flock/docs/what-is-flock).
55 | 
56 |     > *Note:* Flock is part of ongoing research by the [Data & AI Systems (DAIS) Laboratory @ Polytechnique Montréal](https://dais-polymtl.github.io/). It is under active development, and some features may evolve. Feedback and contributions are welcome!
57 | 


--------------------------------------------------------------------------------
/extensions/cassandra/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: cassandra
 3 |   description: Connect to Apache Cassandra, ScyllaDB, and DataStax Astra databases directly from DuckDB. Query Cassandra tables using SQL with support for all major Cassandra data types, SSL/TLS connections, and cloud deployments. 
 4 |   version: 1.0.0
 5 |   language: C++
 6 |   build: cmake
 7 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw;"
 8 |   license: MIT
 9 |   requires_toolchains: "cmake, openssl"
10 |   maintainers:
11 |     - dioptre
12 |     
13 | repo:
14 |   github: dioptre/duckdb-cassandra
15 |   ref: a5d974c422c75f217e6f495c0dc159993a30c0f4
16 |   canonical_name: cassandra
17 | 
18 | docs:
19 |   hello_world: |
20 |     -- Load the extension
21 |     LOAD 'cassandra';
22 | 
23 |     -- Connect to local Cassandra
24 |     SELECT * FROM cassandra_scan('my_keyspace.my_table', host='127.0.0.1', port=9042);
25 |     
26 |     -- Execute custom CQL queries
27 |     SELECT * FROM cassandra_query('SELECT * FROM my_keyspace.users WHERE status = ''active''', host='127.0.0.1');
28 |     
29 |     -- Attach a Cassandra database for persistent access
30 |     ATTACH 'host=127.0.0.1 port=9042 keyspace=my_keyspace' AS cass_db (TYPE cassandra);
31 |     SELECT * FROM cass_db.users LIMIT 10;
32 | 
33 |   extended_description: |
34 |     The Cassandra extension enables DuckDB to connect to Apache Cassandra, ScyllaDB, and DataStax Astra databases.
35 |     It supports three connection methods:
36 |     
37 |     **1. Direct Table Scanning** - Query specific tables with `cassandra_scan()`
38 |     **2. Custom CQL Queries** - Execute any CQL query with `cassandra_query()`  
39 |     **3. Database Attachment** - Attach entire keyspaces with `ATTACH` for persistent access
40 |     
41 |     **Supported Databases:**
42 |     - Apache Cassandra (local and remote)
43 |     - DataStax Astra (cloud Cassandra)
44 |     - ScyllaDB (Cassandra-compatible)
45 |     - Any Cassandra-compatible database
46 |     
47 |     **Security Features:**
48 |     - SSL/TLS encryption with custom certificates
49 |     - Username/password authentication
50 |     - DataStax Astra token-based authentication
51 |     - Base64 and hex-encoded certificate support
52 |     
53 |     **Data Type Support:**
54 |     - All primitive types (text, int, bigint, double, boolean, etc.)
55 |     - UUID and TimeUUID with proper conversion
56 |     - Timestamp with timezone support
57 |     - Collections (list, set, map) as JSON strings
58 |     - Blob data with binary support
59 |     - NULL value handling
60 |     
61 |     **Connection Examples:**
62 |     ```sql
63 |     -- SSL connection with certificates
64 |     SELECT * FROM cassandra_scan('keyspace.table', 
65 |         host='secure-cluster.com', 
66 |         ssl=true, 
67 |         certfile_b64='LS0tLS1CRU...',
68 |         usercert_b64='LS0tLS1CRU...',
69 |         userkey_b64='LS0tLS1CRU...'
70 |     );
71 |     
72 |     -- DataStax Astra connection
73 |     SELECT * FROM cassandra_query('SELECT * FROM users', 
74 |         client_id='your-client-id',
75 |         client_secret='your-client-secret',
76 |         astra_host='your-db-id-region.apps.astra.datastax.com',
77 |         astra_ca_cert_b64='LS0tLS1CRU...',
78 |         astra_client_cert_b64='LS0tLS1CRU...',
79 |         astra_client_key_b64='LS0tLS1CRU...'
80 |     );
81 |     ```
82 | 


--------------------------------------------------------------------------------
/extensions/snowflake/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: snowflake
 3 |   description: Snowflake data source extension - query Snowflake databases directly from DuckDB
 4 |   version: 0.2.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - iqea-ai
10 | 
11 | repo:
12 |   github: iqea-ai/duckdb-snowflake
13 |   ref: ee2388dc6c32b1231c493be4dacc9897690936de
14 | 
15 | install_notes: |
16 |   **Important:** This extension requires DuckDB 1.4.3 and the Apache Arrow ADBC Snowflake driver to function properly.
17 |   
18 |   **You must install the ADBC driver separately after installing this extension.** The extension will not work without the driver.
19 |   
20 |   For complete installation instructions, platform-specific setup, and troubleshooting, please refer to the official documentation:
21 |   
22 |   **[ADBC Driver Installation Guide](https://github.com/iqea-ai/duckdb-snowflake#adbc-driver-setup)**
23 |   
24 |   The documentation includes:
25 |   - Step-by-step installation instructions for all platforms
26 |   - Automated setup scripts
27 |   - Manual installation procedures
28 |   - Driver location requirements
29 |   - Troubleshooting common issues
30 |   
31 |   Please visit the [extension repository](https://github.com/iqea-ai/duckdb-snowflake) for installation instructions and setup options.
32 | 
33 | docs:
34 |   hello_world: |
35 |     -- Install and load the extension
36 |     INSTALL snowflake FROM community;
37 |     LOAD snowflake;
38 |     
39 |     -- Create a Snowflake secret with password authentication
40 |     CREATE SECRET my_snowflake (
41 |         TYPE snowflake,
42 |         ACCOUNT 'your-account',
43 |         USER 'your-username',
44 |         PASSWORD 'your-password',
45 |         DATABASE 'your-database'
46 |     );
47 |     
48 |     -- Query Snowflake data using pass-through query function
49 |     SELECT * FROM snowflake_query(
50 |         'SELECT * FROM customers WHERE state = ''CA''',
51 |         'my_snowflake'
52 |     );
53 |     
54 |     -- Attach Snowflake database for direct SQL access
55 |     ATTACH '' AS sf (TYPE snowflake, SECRET my_snowflake, READ_ONLY);
56 |     SELECT * FROM sf.schema.customers WHERE state = 'CA';
57 | 
58 |   extended_description: |
59 |     This community-maintained extension allows DuckDB to connect to Snowflake using Arrow ADBC drivers. 
60 |     It provides seamless connectivity between DuckDB and Snowflake, supporting multiple authentication methods 
61 |     (password, external browser/SSO, key pair), predicate pushdown optimization, and comprehensive SQL operations.
62 |     
63 |     **Features:**
64 |     - Multiple authentication methods (Password, External Browser/SSO, Key Pair)
65 |     - Direct SQL passthrough via `snowflake_query()` function
66 |     - ATTACH support for mounting Snowflake databases as DuckDB catalogs
67 |     - Predicate pushdown optimization (optional)
68 |     - Hybrid queries: join Snowflake tables with local DuckDB tables
69 |     - Full DML read operations: SELECT with WHERE, JOIN, aggregations, subqueries
70 |     
71 |     **Prerequisites:** The Apache Arrow ADBC Snowflake driver must be installed separately. 
72 |     **See the [ADBC Driver Installation Guide](https://github.com/iqea-ai/duckdb-snowflake#adbc-driver-setup) 
73 |     for complete setup instructions.** For comprehensive usage examples, authentication methods, and 
74 |     advanced features, visit the [extension repository](https://github.com/iqea-ai/duckdb-snowflake).
75 | 


--------------------------------------------------------------------------------
/extensions/warc/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: warc
 3 |   description: Parse WARC (Web ARChive) records for Common Crawl data processing
 4 |   version: 0.1.0
 5 |   language: Rust
 6 |   build: cargo
 7 |   license: MIT
 8 |   excluded_platforms: "wasm_mvp;wasm_eh;wasm_threads;linux_amd64_musl"
 9 |   requires_toolchains: "rust"
10 |   maintainers:
11 |     - onnimonni
12 | 
13 | repo:
14 |   github: midwork-finds-jobs/duckdb_warc
15 |   ref: 7184683eec41abd8dbe34b8457c780cd80648cae
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Parse a WARC record from a gzip-compressed file
20 |     SELECT parse_warc(content) FROM read_blob('record.warc.gz');
21 |     ┌─────────────────────────────────────────────────────────────────────────────┐
22 |     │                              parse_warc(content)                            │
23 |     │ struct(warc_version varchar, warc_headers varchar, http_version varchar,    │
24 |     │        http_status integer, http_headers varchar, http_body blob)           │
25 |     ├─────────────────────────────────────────────────────────────────────────────┤
26 |     │ {'warc_version': '1.0', 'warc_headers': '{"WARC-Type": "response", ...}',   │
27 |     │  'http_version': 'HTTP/1.1', 'http_status': 200,                            │
28 |     │  'http_headers': '{"content-type": "text/html", ...}',                      │
29 |     │  'http_body': <!doctype html>...}                                           │
30 |     └─────────────────────────────────────────────────────────────────────────────┘
31 | 
32 |     -- Extract specific fields
33 |     SELECT
34 |         (parse_warc(content)).http_status,
35 |         (parse_warc(content)).http_body
36 |     FROM read_blob('record.warc.gz');
37 | 
38 |   extended_description: |
39 |     The WARC extension parses WARC (Web ARChive) records, the standard format used by Common Crawl
40 |     and web archiving tools. It enables efficient processing of web archive data directly in DuckDB.
41 | 
42 |     ## Function
43 | 
44 |     ### `parse_warc(data)`
45 | 
46 |     Parse a WARC record and return a struct with all components.
47 | 
48 |     **Parameters:**
49 |     - `data` (BLOB or VARCHAR): WARC record data (auto-detects gzip compression)
50 | 
51 |     **Returns:** STRUCT with fields:
52 |     - `warc_version` (VARCHAR): WARC format version (e.g., "1.0")
53 |     - `warc_headers` (VARCHAR): JSON object of WARC headers
54 |     - `http_version` (VARCHAR): HTTP version (e.g., "HTTP/1.1")
55 |     - `http_status` (INTEGER): HTTP status code (e.g., 200)
56 |     - `http_headers` (VARCHAR): JSON object of HTTP headers (lowercase keys)
57 |     - `http_body` (BLOB): Response body content
58 | 
59 |     ## Common Crawl Workflow
60 | 
61 |     The recommended workflow for processing Common Crawl data:
62 | 
63 |     1. **Query the columnar index** (Parquet) to find records of interest
64 |     2. **Fetch only the specific byte ranges** you need using HTTP Range requests
65 |     3. **Parse with this extension**
66 | 
67 |     ```sql
68 |     -- Example: Parse a downloaded Common Crawl record
69 |     -- First download: curl -r"46376769-46377713" "https://data.commoncrawl.org/crawl-data/..." > record.warc.gz
70 |     SELECT
71 |         (parse_warc(content)).http_status,
72 |         decode((parse_warc(content)).http_body) as html
73 |     FROM read_blob('record.warc.gz');
74 |     ```
75 | 
76 |     ## Features
77 | 
78 |     - Auto-detects gzip compression
79 |     - Handles binary content (skips body for non-text responses)
80 |     - HTTP header keys are lowercased for consistent access
81 |     - Works with both BLOB and VARCHAR input types
82 | 


--------------------------------------------------------------------------------
/extensions/mlpack/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: mlpack
 3 |   description: Connecting duckdb to the mlpack C++ machine learning library 
 4 |   version: 0.0.5
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   excluded_platforms: "windows_amd64_mingw;windows_amd64;wasm_mvp;wasm_eh;wasm_threads"
 9 |   requires_toolchains: "fortran;omp"    
10 |   maintainers:
11 |     - eddelbuettel
12 | 
13 | repo:
14 |   github: eddelbuettel/duckdb-mlpack
15 |   ref: 1c1f71363b06afcf376e538a7b5e78ebfc7f8c0a
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Perform adaBoost (using weak learner 'Perceptron' by default)
20 |     -- Read 'features' into 'X', 'labels' into 'Y', use optional parameters
21 |     -- from 'Z', and prepare model storage in 'M'
22 |     CREATE TABLE X AS SELECT * FROM read_csv("https://eddelbuettel.github.io/duckdb-mlpack/data/iris.csv");
23 |     CREATE TABLE Y AS SELECT * FROM read_csv("https://eddelbuettel.github.io/duckdb-mlpack/data/iris_labels.csv");
24 |     CREATE TABLE Z (name VARCHAR, value VARCHAR);
25 |     INSERT INTO Z VALUES ('iterations', '50'), ('tolerance', '1e-7');
26 |     CREATE TABLE M (key VARCHAR, json VARCHAR);
27 | 
28 |     -- Train model for 'Y' on 'X' using parameters 'Z', store in 'M'
29 |     CREATE TEMP TABLE A AS SELECT * FROM mlpack_adaboost("X", "Y", "Z", "M");
30 | 
31 |     -- Count by predicted group
32 |     SELECT COUNT(*) as n, predicted FROM A GROUP BY predicted;
33 | 
34 |     -- Model 'M' can be used to predict
35 |     CREATE TABLE N (x1 DOUBLE, x2 DOUBLE, x3 DOUBLE, x4 DOUBLE);
36 |     -- inserting approximate column mean values
37 |     INSERT INTO N VALUES (5.843, 3.054, 3.759, 1.199);
38 |     -- inserting approximate column mean values, min values, max values
39 |     INSERT INTO N VALUES (5.843, 3.054, 3.759, 1.199), (4.3, 2.0, 1.0, 0.1), (7.9, 4.4, 6.9, 2.5);
40 |     -- and this predict one element each
41 |     SELECT * FROM mlpack_adaboost_pred("N", "M");
42 | 
43 |   extended_description: |
44 |     ### Supervised Learning
45 |     The mlpack extension allows to fit (or train) and predict (or classify) from the models implemented, currently adaBoost, random forests as well as (regularized) linear and logistic regression.
46 |     The format is the same for these four methods: four tables, say, "X", "Y", "Z" and "M" provide input for, respectively, features "X", labels "Y", optional parameters varying by model in "Z" as well as an output table "M" for the JSON-serialized model.
47 |     For all four methods, following a model fit (or training), a prediction (or classification) can be made using "M" and new predictor values "N" as shown in the example.
48 |     All these "fit" (or "train") methods take four parameter tables, all "predict" methods take two.
49 | 
50 |     ### Unsupervised Learning
51 |     A kmeans clustering method is also available.
52 |     It uses three tables for data, parameters and results.
53 |   
54 |     ### General Information
55 |     A pair of paramaters "mlpack_verbose" (to show additional data) and "mlpack_silent" (to suppress display of minimal summaries) can also be set.
56 | 
57 |     The implementation still stresses the 'minimal' part of 'a (initial) MVP demo' (where MVP stands for 'minimally viable product').
58 |     It wraps five supervised and unsupervised machine learning methods, and provides Linux and macOS builds.
59 |     More methods, options or parameters can be added quite easily.
60 |     As interfaces may change while we may work out how to automate interface generation from mlpack itself, it should be considered experimental.
61 | 
62 |     For more, please see the [repo](https://github.com/eddelbuettel/duckdb-mlpack).
63 | 


--------------------------------------------------------------------------------
/extensions/poached/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: poached
 3 |   description: SQL parsing and introspection for IDEs, editors, and developer tools
 4 |   version: 0.2.3
 5 |   language: C/C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   requires_toolchains: "python3"
 9 |   excluded_platforms: "windows_amd64;windows_arm64;windows_amd64_mingw"
10 |   maintainers:
11 |     - sidequery
12 | 
13 | repo:
14 |   github: sidequery/poached
15 |   ref: 3ea18b13eef73620a4707c70753331b235a87f37
16 | 
17 | docs:
18 |   hello_world: |
19 |     -- Tokenize SQL for syntax highlighting (with byte positions)
20 |     SELECT * FROM tokenize_sql('SELECT * FROM users WHERE id = 1');
21 |     ┌───────────────┬──────────────────┐
22 |     │ byte_position │     category     │
23 |     ├───────────────┼──────────────────┤
24 |     │             0 │ KEYWORD          │
25 |     │             7 │ OPERATOR         │
26 |     │             9 │ KEYWORD          │
27 |     │            14 │ IDENTIFIER       │
28 |     │            20 │ KEYWORD          │
29 |     │            26 │ IDENTIFIER       │
30 |     │            29 │ OPERATOR         │
31 |     │            31 │ NUMERIC_CONSTANT │
32 |     └───────────────┴──────────────────┘
33 | 
34 |     -- Validate SQL and get error messages
35 |     SELECT is_valid_sql('SELECT * FROM'), sql_error_message('SELECT * FROM');
36 |     ┌──────────────┬────────────────────────────────────────────┐
37 |     │ is_valid_sql │            sql_error_message               │
38 |     ├──────────────┼────────────────────────────────────────────┤
39 |     │ false        │ Parser Error: syntax error at end of input │
40 |     └──────────────┴────────────────────────────────────────────┘
41 | 
42 |     -- Get result column types without executing
43 |     SELECT * FROM parse_columns('SELECT 1 AS num, ''hello'' AS str', 0);
44 |     ┌───────────┬──────────┬──────────┐
45 |     │ col_index │ col_name │ col_type │
46 |     ├───────────┼──────────┼──────────┤
47 |     │         0 │ num      │ INTEGER  │
48 |     │         1 │ str      │ VARCHAR  │
49 |     └───────────┴──────────┴──────────┘
50 | 
51 |     -- Extract function calls with type info
52 |     SELECT * FROM parse_functions('SELECT COUNT(*), UPPER(name) FROM t');
53 |     ┌───────────────┬───────────────┐
54 |     │ function_name │ function_type │
55 |     ├───────────────┼───────────────┤
56 |     │ count_star    │ aggregate     │
57 |     │ upper         │ scalar        │
58 |     └───────────────┴───────────────┘
59 | 
60 |     -- Get full query plan as JSON
61 |     SELECT sql_parse_json('SELECT 1 + 2 AS result');
62 | 
63 |   extended_description: |
64 |     `poached` is a DuckDB extension for SQL parsing and introspection, designed for building IDEs, SQL editors, query analyzers, and developer tools.
65 | 
66 |     **Tokenization**: `tokenize_sql()` returns tokens with byte positions and categories (KEYWORD, IDENTIFIER, OPERATOR, NUMERIC_CONSTANT, STRING_CONSTANT, COMMENT, ERROR) for accurate syntax highlighting.
67 | 
68 |     **Validation**: `is_valid_sql()` and `sql_error_message()` for parse error detection and reporting.
69 | 
70 |     **Schema introspection**: `parse_columns()`, `parse_column_types()`, `parse_type_info()` to get result column names and types without executing queries.
71 | 
72 |     **Query analysis**: `parse_tables()`, `parse_functions()`, `parse_where()` to extract structural information from queries.
73 | 
74 |     **Parameters**: `parse_parameters()` to extract prepared statement parameters.
75 | 
76 |     **JSON output**: `sql_parse_json()` for full query plan access as JSON.
77 | 
78 |     For more details, visit the [extension repository](https://github.com/sidequery/poached).
79 | 


--------------------------------------------------------------------------------
/extensions/http_client/description.yml:
--------------------------------------------------------------------------------
 1 | docs:
 2 |   extended_description: The HTTP Client Extension is experimental, use at your own
 3 |     risk!
 4 |   hello_world: "-- GET Request Example w/ JSON Parsing\nWITH __input AS (\n  SELECT\n\
 5 |     \    http_get(\n        'https://httpbin.org/delay/0',\n        headers => MAP\
 6 |     \ {\n          'accept': 'application/json',\n        },\n        params => MAP\
 7 |     \ {\n          'limit': 1\n        }\n    ) AS res\n),\n__response AS (\n  SELECT\n\
 8 |     \    (res->>'status')::INT AS status,\n    (res->>'reason') AS reason,\n    unnest(\
 9 |     \ from_json(((res->>'body')::JSON)->'headers', '{\"Host\": \"VARCHAR\"}') ) AS\
10 |     \ features\n  FROM\n    __input\n)\nSELECT\n  __response.status,\n  __response.reason,\n\
11 |     \  __response.Host AS host\nFROM\n  __response\n;\n\u250C\u2500\u2500\u2500\u2500\
12 |     \u2500\u2500\u2500\u2500\u252C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
13 |     \u2500\u252C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
14 |     \u2500\u2500\u2510\n\u2502 status \u2502 reason  \u2502    host     \u2502\n\u2502\
15 |     \ int32  \u2502 varchar \u2502   varchar   \u2502\n\u251C\u2500\u2500\u2500\u2500\
16 |     \u2500\u2500\u2500\u2500\u253C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
17 |     \u2500\u253C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
18 |     \u2500\u2500\u2524\n\u2502    200 \u2502 OK      \u2502 httpbin.org \u2502\n\u2514\
19 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\
20 |     \u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
21 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2518\n\n-- POST Request Example w/ Headers\
22 |     \ and Parameters\nWITH __input AS (\nSELECT\n  http_post(\n      'https://httpbin.org/delay/0',\n\
23 |     \      headers => MAP {\n        'accept': 'application/json',\n      },\n   \
24 |     \   params => MAP {\n        'limit': 1\n      }\n  ) AS res\n),\n__response AS\
25 |     \ (\n  SELECT\n    (res->>'status')::INT AS status,\n    (res->>'reason') AS reason,\n\
26 |     \    unnest( from_json(((res->>'body')::JSON)->'headers', '{\"Host\": \"VARCHAR\"\
27 |     }') ) AS features\n  FROM\n    __input\n)\nSELECT\n  __response.status,\n  __response.reason,\n\
28 |     \  __response.Host AS host,\nFROM\n  __response\n;\n\u250C\u2500\u2500\u2500\u2500\
29 |     \u2500\u2500\u2500\u2500\u252C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
30 |     \u2500\u252C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
31 |     \u2500\u2500\u2510\n\u2502 status \u2502 reason  \u2502    host     \u2502\n\u2502\
32 |     \ int32  \u2502 varchar \u2502   varchar   \u2502\n\u251C\u2500\u2500\u2500\u2500\
33 |     \u2500\u2500\u2500\u2500\u253C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
34 |     \u2500\u253C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
35 |     \u2500\u2500\u2524\n\u2502    200 \u2502 OK      \u2502 httpbin.org \u2502\n\u2514\
36 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\
37 |     \u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
38 |     \u2500\u2500\u2500\u2500\u2500\u2500\u2518\n"
39 | extension:
40 |   build: cmake
41 |   description: DuckDB HTTP Client Extension
42 |   excluded_platforms: windows_amd64_mingw
43 |   language: C++
44 |   license: MIT
45 |   maintainers:
46 |   - lmangani
47 |   - ahuarte47
48 |   - Okabintaro
49 |   - rustyconover
50 |   name: http_client
51 |   version: '2025120401'
52 | repo:
53 |   github: query-farm/httpclient
54 |   ref: 8cd2b68349581f733f325c32a16e9fc237ab3eba
55 | 


--------------------------------------------------------------------------------
/extensions/yaml/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: yaml
 3 |   description: Read YAML files into DuckDB with native YAML type support, comprehensive extraction functions, and seamless JSON interoperability
 4 |   version: 1.3.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - teaguesterling
10 | 
11 |   # yaml package issues for windows in previous vcpkg
12 |   vcpkg_commit: "656be05781442d5c4cb14978f6c7bf47b6e12b32"
13 |   
14 | repo:
15 |   github: teaguesterling/duckdb_yaml
16 |   ref: "6ae4f53abffceb4cbd5098179539acd39063c3a4"
17 | 
18 | docs:
19 |   hello_world: |
20 |     -- Load the extension
21 |     LOAD yaml;
22 |     
23 |     -- Query YAML files directly
24 |     SELECT * FROM 'config.yaml';
25 |     SELECT * FROM 'data/*.yml' WHERE active = true;
26 |     
27 |     -- Create tables with YAML columns
28 |     CREATE TABLE configs(id INTEGER, config YAML);
29 |     INSERT INTO configs VALUES (1, E'server: production\nport: 8080\nfeatures: [logging, metrics]');
30 |     
31 |     -- Extract data using YAML functions
32 |     SELECT 
33 |         yaml_extract_string(config, '$.server') AS environment,
34 |         yaml_extract(config, '$.port') AS port,
35 |         yaml_extract(config, '$.features[0]') AS first_feature
36 |     FROM configs;
37 |     
38 |     -- Convert between YAML and JSON
39 |     SELECT yaml_to_json(config) AS json_config FROM configs;
40 |     SELECT value_to_yaml({name: 'John', age: 30}) AS yaml_person;
41 |     
42 |     -- Write query results to YAML
43 |     COPY (SELECT * FROM users) TO 'output.yaml' (FORMAT yaml, STYLE block);
44 | 
45 |   extended_description: |
46 |     The YAML extension brings comprehensive YAML support to DuckDB, enabling seamless integration of YAML data within SQL queries. 
47 |     
48 |     **Key Features:**
49 |     
50 |     - **Native YAML Type**: Full YAML type support with automatic casting between YAML, JSON, and VARCHAR
51 |     - **File Reading**: Read YAML files with `read_yaml()` and `read_yaml_objects()` functions supporting multi-document files, top-level sequences, and robust error handling
52 |     - **Direct File Querying**: Query YAML files directly using `FROM 'file.yaml'` syntax
53 |     - **Extraction Functions**: Query YAML data with `yaml_extract()`, `yaml_type()`, `yaml_exists()`, and path-based extraction
54 |     - **Type Detection**: Comprehensive automatic type detection for temporal types (DATE, TIME, TIMESTAMP), optimal numeric types, and boolean values
55 |     - **Column Type Specification**: Explicitly define column types when reading YAML files for schema consistency
56 |     - **YAML Output**: Write query results to YAML files using `COPY TO` with configurable formatting styles
57 |     - **Multi-Document Support**: Handle files with multiple YAML documents separated by `---`
58 |     - **Error Recovery**: Continue processing valid documents even when some contain errors
59 |     - **JSON Interoperability**: Seamless conversion between YAML and JSON formats
60 |     - **Frontmatter Extraction**: Extract YAML frontmatter metadata from other files
61 |     
62 |     **Example Use Cases:**
63 |     
64 |     - Configuration file management and querying
65 |     - Log file analysis and processing  
66 |     - Data migration between YAML and relational formats
67 |     - Integration with YAML-based CI/CD pipelines
68 |     - Processing Kubernetes manifests and Helm charts
69 |     
70 |     The extension is built using yaml-cpp and follows DuckDB's extension development best practices, ensuring reliable performance and cross-platform compatibility.
71 |     
72 |     **Note**: This extension was written primarily using Claude and Claude Code as an exercise in AI-driven development.
73 | 


--------------------------------------------------------------------------------
/extensions/sheetreader/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: sheetreader
 3 |   description: Fast XLSX file importer
 4 |   version: 0.1.0
 5 |   language: C++
 6 |   build: cmake
 7 |   excluded_platforms: "windows_amd64_rtools;windows_amd64_mingw"
 8 |   license: MIT
 9 |   maintainers:
10 |     - freddie-freeloader
11 | 
12 | repo:
13 |   github: polydbms/sheetreader-duckdb
14 |   ref: 887013792794aaac8bffd040fcb2439dd797ad53
15 | 
16 | docs:
17 |   hello_world: |
18 |     -- Create table from XLSX file & use default values for parameters
19 |     CREATE TABLE data AS FROM sheetreader('data.xlsx');
20 | 
21 |     -- Example usage of available named parameters
22 |     CREATE TABLE data2 AS FROM sheetreader(
23 |         'data2.xlsx',
24 |         sheet_index = 1,
25 |         threads = 16,
26 |         skip_rows = 0,
27 |         has_header = true,
28 |         types = [BOOLEAN, VARCHAR],
29 |         coerce_to_string = true,
30 |         force_types = true
31 |     );
32 | 
33 | 
34 |   extended_description: |
35 |     `sheetreader` is a DuckDB extension that allows reading XLSX files into DuckDB tables with SheetReader, our blazingly fast XLSX parser (https://github.com/polydbms/sheetreader-core).
36 | 
37 |     #### Parameters
38 | 
39 |     | Name | Description | Type | Default |
40 |     |:----|:-----------|:----:|:-------|
41 |     | `sheet_index` | Index of the sheet to read. Starts at 1. | `INTEGER` | `1` |
42 |     | `sheet_name` | Name of the sheet to read. <br> Only either `sheet_index` or `sheet_name` can be set.  | `VARCHAR` | `""` |
43 |     | `threads` | Number of threads to use, while parsing | `INTEGER` | Half of available cores; minimum 1 |
44 |     | `skip_rows` | Number of rows to skip | `INTEGER` | `0` |
45 |     | `has_header` | Force to treat first row as header row. <br> <ul> <li> If successful, the cell contents are used for column names. </li> <li> If set to `false` (which is the default), the extension will still try to treat the first row as header row. <br> The difference is that it will not fail, if the first row is not usable. </li> </ul> | `BOOLEAN` | `false` |
46 |     | `types` | List of types for all columns <ul> <li> Types currently available:<br> `VARCHAR`,`BOOLEAN`,`DOUBLE`, `DATE`.</li> <li> Useful in combination with `coerce_to_string` and `force_types`. </li> </ul> | `LIST(VARCHAR)` | Uses types determined by first & second row (after skipped rows) |
47 |     | `coerce_to_string` | Coerce all cells in column of type `VARCHAR` to string (i.e. `VARCHAR`). | `BOOLEAN` | `false` |
48 |     | `force_types` | Use `types` even if they are not compatible with types determined by first/second row. <br> Cells, that are not of the column type, are set to `NULL` or coerced to string, if option is set. | `BOOLEAN` | `false` |
49 | 
50 |     #### More Information
51 | 
52 |     SheetReader was published in the [Information Systems Journal](https://www.sciencedirect.com/science/article/abs/pii/S0306437923000194)
53 | 
54 |     ```bibtex
55 |     @article{DBLP:journals/is/GavriilidisHZM23,
56 |       author       = {Haralampos Gavriilidis and
57 |                       Felix Henze and
58 |                       Eleni Tzirita Zacharatou and
59 |                       Volker Markl},
60 |       title        = {SheetReader: Efficient Specialized Spreadsheet Parsing},
61 |       journal      = {Inf. Syst.},
62 |       volume       = {115},
63 |       pages        = {102183},
64 |       year         = {2023},
65 |       url          = {https://doi.org/10.1016/j.is.2023.102183},
66 |       doi          = {10.1016/J.IS.2023.102183},
67 |       timestamp    = {Mon, 26 Jun 2023 20:54:32 +0200},
68 |       biburl       = {https://dblp.org/rec/journals/is/GavriilidisHZM23.bib},
69 |       bibsource    = {dblp computer science bibliography, https://dblp.org}
70 |     }
71 |     ```
72 | 


--------------------------------------------------------------------------------
/extensions/duck_hunt/description.yml:
--------------------------------------------------------------------------------
 1 | extension:
 2 |   name: duck_hunt
 3 |   description: Parse and analyze test results, build outputs, and CI/CD pipeline logs from 45+ development tools with dynamic regexp patterns
 4 |   version: 1.2.0
 5 |   language: C++
 6 |   build: cmake
 7 |   license: MIT
 8 |   maintainers:
 9 |     - teaguesterling
10 | 
11 | repo:
12 |   github: teaguesterling/duck_hunt
13 |   ref: f5e22c5dbd5fc428b7cb3e0f33ded8c51591e522
14 | 
15 | docs:
16 |   hello_world: |
17 |     -- Parse build errors
18 |     SELECT file_path, line_number, message
19 |     FROM read_duck_hunt_log('build.log', 'auto')
20 |     WHERE status = 'ERROR';
21 |     
22 |     -- Parse test results
23 |     SELECT test_name, status, execution_time
24 |     FROM read_duck_hunt_log('pytest.json', 'pytest_json')
25 |     WHERE status = 'FAIL';
26 |     
27 |     -- Custom regex pattern
28 |     SELECT severity, message
29 |     FROM parse_duck_hunt_log(
30 |       'ERROR: Connection failed\nWARNING: Retrying...',
31 |       'regexp:(?P<severity>ERROR|WARNING):\s+(?P<message>.+)'
32 |     );
33 |     
34 |     -- Build health badge
35 |     SELECT status_badge(
36 |       COUNT(*) FILTER (WHERE status = 'ERROR'),
37 |       COUNT(*) FILTER (WHERE status = 'WARNING')
38 |     ) FROM read_duck_hunt_log('build.log', 'auto');
39 | 
40 |   extended_description: |
41 |     Duck Hunt is a comprehensive DuckDB extension for parsing and analyzing development tool outputs.
42 |     It provides a unified SQL interface to query test results, build logs, linting output, and CI/CD
43 |     pipeline data from 45+ tools and formats.
44 | 
45 |     See: <https://github.com/teaguesterling/duck_hunt/blob/main/docs/field_mappings.md>
46 | 
47 |     **Core Table Functions:**
48 |     - `read_duck_hunt_log(file, format)` - Parse tool outputs from files
49 |     - `parse_duck_hunt_log(content, format)` - Parse tool outputs from strings
50 |     - `read_duck_hunt_workflow_log(file, format)` - Parse CI/CD workflow logs from files
51 |     - `parse_duck_hunt_workflow_log(content, format)` - Parse CI/CD workflow logs from strings
52 | 
53 |     **Scalar Functions:**
54 |     - `status_badge(status)` - Convert status to badge: [ OK ], [FAIL], [WARN], [ .. ], [ ?? ]
55 |     - `status_badge(errors, warnings)` - Compute badge from counts
56 |     - `status_badge(errors, warnings, is_running)` - Badge with running state
57 | 
58 |     **Supported Formats (45+):**
59 |     See: <https://github.com/teaguesterling/duck_hunt/blob/main/docs/formats.md>
60 |     - **Dynamic:** `regexp:<pattern>` - Custom patterns with named capture groups
61 |     - **Test Frameworks:** pytest, Go test, Cargo test, JUnit, RSpec, Mocha/Chai, Google Test, NUnit/xUnit
62 |     - **Linting Tools:** ESLint, RuboCop, Pylint, Flake8, MyPy, Clippy, SwiftLint, PHPStan, and more
63 |     - **Build Systems:** CMake, Make, Maven, Gradle, Cargo, MSBuild, Node.js, Python
64 |     - **CI/CD Engines:** GitHub Actions, GitLab CI, Jenkins, Docker
65 |     - **Debugging:** Valgrind, GDB/LLDB
66 | 
67 |     **Schema Fields (38):**
68 |     See: <https://github.com/teaguesterling/duck_hunt/blob/main/docs/schema.md>
69 |     - Core: event_id, tool_name, event_type, file_path, line_number, column_number, status, severity, message
70 |     - Error Analysis: error_fingerprint, similarity_score, pattern_id, root_cause_category
71 |     - Workflow: workflow_name, job_name, step_name, workflow_status, job_status, step_status, duration
72 | 
73 |     **Key Features:**
74 |     - Automatic format detection
75 |     - Error pattern clustering and fingerprinting
76 |     - Root cause categorization (network, permission, config, syntax, build, resource)
77 |     - Multi-file glob processing with Hive-style paths
78 |     - Pipeline integration with stdin support
79 |     - Hierarchical CI/CD workflow parsing
80 | 
81 |     Perfect for CI/CD analysis, automated debugging, test aggregation, quality gates, and agent-driven development workflows.
82 | 


--------------------------------------------------------------------------------