├── .clang-format ├── .clang-tidy ├── .editorconfig ├── vcpkg.json ├── .gitignore ├── .gitmodules ├── Makefile ├── extension_config.cmake ├── src ├── include │ ├── parser_tools_extension.hpp │ ├── parse_statements.hpp │ ├── parse_functions.hpp │ ├── parse_where.hpp │ └── parse_tables.hpp ├── parser_tools_extension.cpp ├── parse_statements.cpp ├── parse_functions.cpp ├── parse_tables.cpp └── parse_where.cpp ├── test ├── README.md └── sql │ └── parse_tools │ ├── scalar_functions │ ├── is_parsable.test │ ├── parse_statements.test │ ├── num_statements.test │ ├── parse_tables.test │ ├── parse_table_names.test │ ├── parse_function_names.test │ └── parse_functions.test │ └── table_functions │ ├── parse_statements.test │ ├── parse_tables.test │ ├── parse_where.test │ └── parse_functions.test ├── CMakeLists.txt ├── .vscode └── tasks.json ├── LICENSE ├── .github └── workflows │ └── MainDistributionPipeline.yml ├── docs └── UPDATING.md ├── scripts └── extension-upload.sh └── README.md /.clang-format: -------------------------------------------------------------------------------- 1 | duckdb/.clang-format -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | duckdb/.clang-tidy -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | duckdb/.editorconfig -------------------------------------------------------------------------------- /vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ 3 | ] 4 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .idea 3 | cmake-build-debug 4 | duckdb_unittest_tempdir/ 5 | .DS_Store 6 | testext 7 | test/python/__pycache__/ 8 | .Rhistory 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "duckdb"] 2 | path = duckdb 3 | url = https://github.com/duckdb/duckdb 4 | branch = main 5 | [submodule "extension-ci-tools"] 6 | path = extension-ci-tools 7 | url = https://github.com/duckdb/extension-ci-tools 8 | branch = main 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) 2 | 3 | # Configuration of extension 4 | EXT_NAME=parser_tools 5 | EXT_CONFIG=${PROJ_DIR}extension_config.cmake 6 | 7 | # Include the Makefile from extension-ci-tools 8 | include extension-ci-tools/makefiles/duckdb_extension.Makefile -------------------------------------------------------------------------------- /extension_config.cmake: -------------------------------------------------------------------------------- 1 | # This file is included by DuckDB's build system. It specifies which extension to load 2 | 3 | # Extension from this repo 4 | duckdb_extension_load(parser_tools 5 | SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} 6 | LOAD_TESTS 7 | ) 8 | 9 | # Any extra extensions that should be built 10 | # e.g.: duckdb_extension_load(json) -------------------------------------------------------------------------------- /src/include/parser_tools_extension.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | namespace duckdb { 6 | 7 | class ParserToolsExtension : public Extension { 8 | public: 9 | void Load(ExtensionLoader &loader) override; 10 | std::string Name() override; 11 | std::string Version() const override; 12 | }; 13 | 14 | } // namespace duckdb 15 | -------------------------------------------------------------------------------- /src/include/parse_statements.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | #include 5 | #include 6 | 7 | namespace duckdb { 8 | 9 | // Forward declarations 10 | class ExtensionLoader; 11 | 12 | struct StatementResult { 13 | std::string statement; 14 | }; 15 | 16 | void RegisterParseStatementsFunction(ExtensionLoader &loader); 17 | void RegisterParseStatementsScalarFunction(ExtensionLoader &loader); 18 | 19 | } // namespace duckdb -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Testing this extension 2 | This directory contains all the tests for this extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most its tests in this format as SQL statements, so for the quack extension, this should probably be the goal too. 3 | 4 | The root makefile contains targets to build and run all of these tests. To run the SQLLogicTests: 5 | ```bash 6 | make test 7 | ``` 8 | or 9 | ```bash 10 | make test_debug 11 | ``` -------------------------------------------------------------------------------- /src/include/parse_functions.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | #include 5 | #include 6 | 7 | namespace duckdb { 8 | 9 | // Forward declarations 10 | class ExtensionLoader; 11 | 12 | struct FunctionResult { 13 | std::string function_name; 14 | std::string schema; 15 | std::string context; // The context where this function appears (SELECT, WHERE, etc.) 16 | }; 17 | 18 | void RegisterParseFunctionsFunction(ExtensionLoader &loader); 19 | void RegisterParseFunctionScalarFunction(ExtensionLoader &loader); 20 | 21 | } // namespace duckdb -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | # Set extension name here 4 | set(TARGET_NAME parser_tools) 5 | 6 | set(EXTENSION_NAME ${TARGET_NAME}_extension) 7 | set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) 8 | 9 | project(${TARGET_NAME}) 10 | include_directories(src/include) 11 | 12 | set(EXTENSION_SOURCES 13 | src/parser_tools_extension.cpp 14 | src/parse_tables.cpp 15 | src/parse_where.cpp 16 | src/parse_functions.cpp 17 | src/parse_statements.cpp 18 | ) 19 | 20 | build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) 21 | build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) 22 | 23 | install( 24 | TARGETS ${EXTENSION_NAME} 25 | EXPORT "${DUCKDB_EXPORT_SET}" 26 | LIBRARY DESTINATION "${INSTALL_LIB_DIR}" 27 | ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") 28 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "tasks": [ 3 | { 4 | "type": "cppbuild", 5 | "label": "C/C++: clang++ build active file", 6 | "command": "/usr/bin/clang++", 7 | "args": [ 8 | "-fcolor-diagnostics", 9 | "-fansi-escape-codes", 10 | "-g", 11 | "${file}", 12 | "-o", 13 | "${fileDirname}/${fileBasenameNoExtension}" 14 | ], 15 | "options": { 16 | "cwd": "${fileDirname}" 17 | }, 18 | "problemMatcher": [ 19 | "$gcc" 20 | ], 21 | "group": { 22 | "kind": "build", 23 | "isDefault": true 24 | }, 25 | "detail": "Task generated by Debugger." 26 | } 27 | ], 28 | "version": "2.0.0" 29 | } -------------------------------------------------------------------------------- /test/sql/parse_tools/scalar_functions/is_parsable.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/scalar_functions/is_parsable.test 2 | # description: test is_parsable scalar function 3 | # group: [parse_table_names] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT is_parsable('select * from MyTable'); 8 | ---- 9 | Catalog Error: Scalar Function with name is_parsable does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | 15 | # base case 16 | query I 17 | SELECT is_parsable('select * from MyTable'); 18 | ---- 19 | true 20 | 21 | # false 22 | # ----- 23 | 24 | query I 25 | SELECT is_parsable('select * from'); 26 | ---- 27 | false 28 | 29 | query I 30 | SELECT is_parsable('select'); 31 | ---- 32 | false 33 | 34 | # true 35 | # ----- 36 | 37 | query I 38 | SELECT is_parsable('select 1'); 39 | ---- 40 | true -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018-2025 Stichting DuckDB Foundation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /.github/workflows/MainDistributionPipeline.yml: -------------------------------------------------------------------------------- 1 | # 2 | # This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension 3 | # 4 | name: Main Extension Distribution Pipeline 5 | on: 6 | push: 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 12 | cancel-in-progress: true 13 | 14 | jobs: 15 | duckdb-next-build: 16 | name: Build extension binaries 17 | uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main 18 | with: 19 | duckdb_version: main 20 | ci_tools_version: main 21 | extension_name: parser_tools 22 | 23 | duckdb-stable-build: 24 | name: Build extension binaries 25 | uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0 26 | with: 27 | duckdb_version: v1.4.0 28 | ci_tools_version: v1.4.0 29 | extension_name: parser_tools 30 | -------------------------------------------------------------------------------- /src/include/parse_where.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | #include 5 | #include 6 | 7 | namespace duckdb { 8 | 9 | // Forward declarations 10 | class ExtensionLoader; 11 | 12 | struct WhereConditionResult { 13 | std::string condition; 14 | std::string table_name; // The table this condition applies to (if determinable) 15 | std::string context; // The context where this condition appears (WHERE, HAVING, etc.) 16 | }; 17 | 18 | struct DetailedWhereConditionResult { 19 | std::string column_name; // The column being compared 20 | std::string operator_type; // The comparison operator (>, <, =, etc.) 21 | std::string value; // The value being compared against 22 | std::string table_name; // The table this condition applies to (if determinable) 23 | std::string context; // The context where this condition appears (WHERE, HAVING, etc.) 24 | }; 25 | 26 | void RegisterParseWhereFunction(ExtensionLoader &loader); 27 | void RegisterParseWhereScalarFunction(ExtensionLoader &loader); 28 | void RegisterParseWhereDetailedFunction(ExtensionLoader &loader); 29 | 30 | } // namespace duckdb -------------------------------------------------------------------------------- /src/include/parse_tables.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | namespace duckdb { 6 | 7 | /** 8 | * Represents where a table is used in a query. 9 | */ 10 | enum class TableContext { 11 | From, // table in from clause 12 | JoinLeft, // table in left side of a join 13 | JoinRight, // table in right side of a join 14 | FromCTE, // table in from clause that references a CTE 15 | CTE, // table is defined as a CTE 16 | Subquery // table in a subquery 17 | }; 18 | 19 | const char *ToString(TableContext context); 20 | const TableContext FromString(const char *context); 21 | 22 | struct TableRefResult { 23 | std::string schema; 24 | std::string table; 25 | TableContext context; 26 | }; 27 | 28 | static void ExtractTablesFromSQL(const std::string &sql, std::vector &results); 29 | static void ExtractTablesFromQueryNode( 30 | const duckdb::QueryNode &node, 31 | std::vector &results, 32 | const TableContext context = TableContext::From, 33 | const duckdb::CommonTableExpressionMap *cte_map = nullptr 34 | ); 35 | 36 | void RegisterParseTablesFunction(duckdb::ExtensionLoader &loader); 37 | void RegisterParseTableScalarFunction(ExtensionLoader &loader); 38 | 39 | } // namespace duckdb 40 | -------------------------------------------------------------------------------- /test/sql/parse_tools/scalar_functions/parse_statements.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/scalar_functions/parse_statements.test 2 | # description: test parse_statements scalar function 3 | # group: [parse_statements] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT parse_statements('SELECT 42; SELECT 43;'); 8 | ---- 9 | Catalog Error: Scalar Function with name parse_statements does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # Single statement 15 | query I 16 | SELECT parse_statements('SELECT 42;'); 17 | ---- 18 | [SELECT 42] 19 | 20 | # Multiple statements 21 | query I 22 | SELECT parse_statements('SELECT 42; SELECT 43;'); 23 | ---- 24 | [SELECT 42, SELECT 43] 25 | 26 | # Three statements 27 | query I 28 | SELECT parse_statements('SELECT 1; SELECT 2; SELECT 3;'); 29 | ---- 30 | [SELECT 1, SELECT 2, SELECT 3] 31 | 32 | # Multiple statements with different types 33 | query I 34 | SELECT parse_statements('SELECT 1; INSERT INTO test VALUES (2); SELECT 3;'); 35 | ---- 36 | [SELECT 1, 'INSERT INTO test (VALUES (2))', SELECT 3] 37 | 38 | # Complex multi-statement query 39 | query I 40 | SELECT parse_statements($$ 41 | WITH cte AS (SELECT 1 as a) SELECT * FROM cte; 42 | SELECT upper('hello'); 43 | SELECT count(*) FROM users WHERE id > 10; 44 | $$); 45 | ---- 46 | ['WITH cte AS (SELECT 1 AS a)SELECT * FROM cte', 'SELECT upper(\'hello\')', 'SELECT count_star() FROM users WHERE (id > 10)'] 47 | 48 | # Empty input 49 | query I 50 | SELECT parse_statements(''); 51 | ---- 52 | [] 53 | 54 | # Whitespace only 55 | query I 56 | SELECT parse_statements(' '); 57 | ---- 58 | [] 59 | 60 | # Invalid SQL should return empty list 61 | query I 62 | SELECT parse_statements('INVALID SQL SYNTAX HERE'); 63 | ---- 64 | [] -------------------------------------------------------------------------------- /test/sql/parse_tools/scalar_functions/num_statements.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/scalar_functions/num_statements.test 2 | # description: test num_statements scalar function 3 | # group: [num_statements] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT num_statements('SELECT 42; SELECT 43;'); 8 | ---- 9 | Catalog Error: Scalar Function with name num_statements does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # Single statement 15 | query I 16 | SELECT num_statements('SELECT 42;'); 17 | ---- 18 | 1 19 | 20 | # Two statements 21 | query I 22 | SELECT num_statements('SELECT 42; SELECT 43;'); 23 | ---- 24 | 2 25 | 26 | # Three statements 27 | query I 28 | SELECT num_statements('SELECT 1; SELECT 2; SELECT 3;'); 29 | ---- 30 | 3 31 | 32 | # Multiple statements with different types 33 | query I 34 | SELECT num_statements('SELECT 1; INSERT INTO test VALUES (2); UPDATE test SET x = 1; SELECT 3;'); 35 | ---- 36 | 4 37 | 38 | # Complex multi-statement query 39 | query I 40 | SELECT num_statements($$ 41 | WITH cte AS (SELECT 1 as a) SELECT * FROM cte; 42 | SELECT upper('hello'); 43 | SELECT count(*) FROM users WHERE id > 10; 44 | INSERT INTO log VALUES ('done'); 45 | $$); 46 | ---- 47 | 4 48 | 49 | # Single complex statement 50 | query I 51 | SELECT num_statements($$ 52 | WITH cte1 AS (SELECT * FROM table1), 53 | cte2 AS (SELECT * FROM table2) 54 | SELECT cte1.id, cte2.name 55 | FROM cte1 56 | JOIN cte2 ON cte1.id = cte2.id 57 | WHERE cte1.active = true 58 | ORDER BY cte1.created_at DESC; 59 | $$); 60 | ---- 61 | 1 62 | 63 | # Empty input 64 | query I 65 | SELECT num_statements(''); 66 | ---- 67 | 0 68 | 69 | # Whitespace only 70 | query I 71 | SELECT num_statements(' '); 72 | ---- 73 | 0 74 | 75 | # Invalid SQL should return 0 76 | query I 77 | SELECT num_statements('INVALID SQL SYNTAX HERE'); 78 | ---- 79 | 0 -------------------------------------------------------------------------------- /docs/UPDATING.md: -------------------------------------------------------------------------------- 1 | # Extension updating 2 | When cloning this template, the target version of DuckDB should be the latest stable release of DuckDB. However, there 3 | will inevitably come a time when a new DuckDB is released and the extension repository needs updating. This process goes 4 | as follows: 5 | 6 | - Bump submodules 7 | - `./duckdb` should be set to latest tagged release 8 | - `./extension-ci-tools` should be set to updated branch corresponding to latest DuckDB release. So if you're building for DuckDB `v1.1.0` there will be a branch in `extension-ci-tools` named `v1.1.0` to which you should check out. 9 | - Bump versions in `./github/workflows` 10 | - `duckdb_version` input in `duckdb-stable-build` job in `MainDistributionPipeline.yml` should be set to latest tagged release 11 | - `duckdb_version` input in `duckdb-stable-deploy` job in `MainDistributionPipeline.yml` should be set to latest tagged release 12 | - the reusable workflow `duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml` for the `duckdb-stable-build` job should be set to latest tagged release 13 | 14 | # API changes 15 | DuckDB extensions built with this extension template are built against the internal C++ API of DuckDB. This API is not guaranteed to be stable. 16 | What this means for extension development is that when updating your extensions DuckDB target version using the above steps, you may run into the fact that your extension no longer builds properly. 17 | 18 | Currently, DuckDB does not (yet) provide a specific change log for these API changes, but it is generally not too hard to figure out what has changed. 19 | 20 | For figuring out how and why the C++ API changed, we recommend using the following resources: 21 | - DuckDB's [Release Notes](https://github.com/duckdb/duckdb/releases) 22 | - DuckDB's history of [Core extension patches](https://github.com/duckdb/duckdb/commits/main/.github/patches/extensions) 23 | - The git history of the relevant C++ Header file of the API that has changed -------------------------------------------------------------------------------- /src/parser_tools_extension.cpp: -------------------------------------------------------------------------------- 1 | #define DUCKDB_EXTENSION_MAIN 2 | 3 | #include "parser_tools_extension.hpp" 4 | #include "parse_tables.hpp" 5 | #include "parse_where.hpp" 6 | #include "parse_functions.hpp" 7 | #include "parse_statements.hpp" 8 | #include "duckdb.hpp" 9 | #include "duckdb/common/exception.hpp" 10 | #include "duckdb/common/string_util.hpp" 11 | #include "duckdb/function/scalar_function.hpp" 12 | #include 13 | #include "duckdb/parser/parser.hpp" 14 | #include "duckdb/parser/statement/select_statement.hpp" 15 | #include "duckdb/parser/query_node/select_node.hpp" 16 | #include "duckdb/parser/tableref/basetableref.hpp" 17 | #include "duckdb/parser/tableref/joinref.hpp" 18 | #include "duckdb/parser/tableref/subqueryref.hpp" 19 | #include "duckdb/parser/statement/insert_statement.hpp" 20 | 21 | namespace duckdb { 22 | 23 | // --------------------------------------------------- 24 | // EXTENSION SCAFFOLDING 25 | 26 | static void LoadInternal(ExtensionLoader &loader) { 27 | RegisterParseTablesFunction(loader); 28 | RegisterParseTableScalarFunction(loader); 29 | RegisterParseWhereFunction(loader); 30 | RegisterParseWhereScalarFunction(loader); 31 | RegisterParseWhereDetailedFunction(loader); 32 | RegisterParseFunctionsFunction(loader); 33 | RegisterParseFunctionScalarFunction(loader); 34 | RegisterParseStatementsFunction(loader); 35 | RegisterParseStatementsScalarFunction(loader); 36 | } 37 | 38 | void ParserToolsExtension::Load(ExtensionLoader &loader) { 39 | LoadInternal(loader); 40 | } 41 | 42 | std::string ParserToolsExtension::Name() { 43 | return "parser"; 44 | } 45 | 46 | std::string ParserToolsExtension::Version() const { 47 | #ifdef EXT_VERSION_PARSER_TOOLS 48 | return EXT_VERSION_PARSER_TOOLS; 49 | #else 50 | return ""; 51 | #endif 52 | } 53 | 54 | } // namespace duckdb 55 | 56 | extern "C" { 57 | 58 | DUCKDB_CPP_EXTENSION_ENTRY(parser_tools, loader) { 59 | duckdb::LoadInternal(loader); 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /test/sql/parse_tools/table_functions/parse_statements.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/table_functions/parse_statements.test 2 | # description: test parse_statements table function 3 | # group: [parse_statements] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT * FROM parse_statements('SELECT 42; SELECT 43;'); 8 | ---- 9 | Catalog Error: Table Function with name parse_statements does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # Single statement 15 | query I 16 | SELECT * FROM parse_statements('SELECT 42;'); 17 | ---- 18 | SELECT 42 19 | 20 | # Multiple statements 21 | query I 22 | SELECT * FROM parse_statements('SELECT 42; SELECT 43;'); 23 | ---- 24 | SELECT 42 25 | SELECT 43 26 | 27 | # Multiple statements with different types 28 | query I 29 | SELECT * FROM parse_statements('SELECT 1; INSERT INTO test VALUES (2); SELECT 3;'); 30 | ---- 31 | SELECT 1 32 | INSERT INTO test (VALUES (2)) 33 | SELECT 3 34 | 35 | # Complex multi-statement query 36 | query I 37 | SELECT * FROM parse_statements($$ 38 | WITH cte AS (SELECT 1 as a) SELECT * FROM cte; 39 | SELECT upper('hello'); 40 | SELECT count(*) FROM users WHERE id > 10; 41 | $$); 42 | ---- 43 | WITH cte AS (SELECT 1 AS a)SELECT * FROM cte 44 | SELECT upper('hello') 45 | SELECT count_star() FROM users WHERE (id > 10) 46 | 47 | # Statements with CTEs and joins 48 | query I 49 | SELECT * FROM parse_statements($$ 50 | SELECT a.id FROM table_a a JOIN table_b b ON a.id = b.id; 51 | WITH data AS (SELECT * FROM source) SELECT count(*) FROM data; 52 | $$); 53 | ---- 54 | SELECT a.id FROM table_a AS a INNER JOIN table_b AS b ON ((a.id = b.id)) 55 | WITH "data" AS (SELECT * FROM "source")SELECT count_star() FROM "data" 56 | 57 | # Empty input 58 | query I 59 | SELECT * FROM parse_statements(''); 60 | ---- 61 | 62 | # Whitespace only 63 | query I 64 | SELECT * FROM parse_statements(' '); 65 | ---- 66 | 67 | # Invalid SQL should return no results 68 | query I 69 | SELECT * FROM parse_statements('INVALID SQL SYNTAX HERE'); 70 | ---- -------------------------------------------------------------------------------- /test/sql/parse_tools/scalar_functions/parse_tables.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/scalar_functions/parse_tables.test 2 | # description: test parse_tables table function 3 | # group: [parse_tables] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT parse_tables('select * from MyTable'); 8 | ---- 9 | Catalog Error: Scalar Function with name parse_tables does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | query I 15 | SELECT parse_tables('select * from MyTable'); 16 | ---- 17 | [{'schema': main, 'table': MyTable, 'context': from}] 18 | 19 | # demonstrate list filter functionality 20 | query I 21 | SELECT parse_tables('select * from MyTable t inner join Other o on o.id = t.id'); 22 | ---- 23 | [{'schema': main, 'table': MyTable, 'context': from}, {'schema': main, 'table': Other, 'context': join_right}] 24 | 25 | 26 | # demonstrate list filter functionality 27 | query I 28 | SELECT list_filter(parse_tables('select * from MyTable t inner join Other o on o.id = t.id'),t -> t.context = 'from') AS filtered_tables; 29 | ---- 30 | [{'schema': main, 'table': MyTable, 'context': from}] 31 | 32 | 33 | # Unsupported 34 | # ----------- 35 | 36 | # INSERT INTO ... SELECT 37 | query I 38 | select parse_tables('INSERT INTO m SELECT * FROM n;'); 39 | ---- 40 | [] 41 | 42 | # UPDATE with FROM 43 | query I 44 | select parse_tables('UPDATE o SET x = p.x FROM p WHERE o.id = p.id;'); 45 | ---- 46 | [] 47 | 48 | # DELETE with USING 49 | query I 50 | select parse_tables('DELETE FROM q USING r WHERE q.id = r.id;'); 51 | ---- 52 | [] 53 | 54 | # CREATE VIEW AS SELECT 55 | query I 56 | select parse_tables('CREATE VIEW v AS SELECT * FROM s JOIN t ON s.id = t.id;'); 57 | ---- 58 | [] 59 | 60 | # CREATE TABLE AS SELECT 61 | query I 62 | select parse_tables('CREATE TABLE u AS SELECT * FROM v;'); 63 | ---- 64 | [] 65 | 66 | # no tables 67 | query I 68 | select parse_tables('SELECT 1;'); 69 | ---- 70 | [] 71 | 72 | # malformed SQL should not error 73 | query I 74 | SELECT parse_tables('SELECT * FROM WHERE'); 75 | ---- 76 | [] -------------------------------------------------------------------------------- /test/sql/parse_tools/scalar_functions/parse_table_names.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/scalar_functions/parse_table_names.test 2 | # description: test parse_table_names scalar function 3 | # group: [parse_table_names] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT parse_table_names('select * from MyTable'); 8 | ---- 9 | Catalog Error: Scalar Function with name parse_table_names does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | query I 15 | SELECT parse_table_names('select * from MyTable'); 16 | ---- 17 | [MyTable] 18 | 19 | # default behavior: exclude CTE 20 | query I 21 | SELECT parse_table_names('with cte_test as(select 1) select * from MyTable, cte_test'); 22 | ---- 23 | [MyTable] 24 | 25 | # default behavior: exclude CTE (explicitly specify true) 26 | query I 27 | SELECT parse_table_names('with cte_test as(select 1) select * from MyTable, cte_test', true); 28 | ---- 29 | [MyTable] 30 | 31 | # test optional bool argument: don't exclude CTE tables 32 | query I 33 | SELECT parse_table_names('with cte_test as(select 1) select * from MyTable, cte_test', false); 34 | ---- 35 | [cte_test, MyTable, cte_test] 36 | 37 | # Unsupported 38 | # ----------- 39 | 40 | # INSERT INTO ... SELECT 41 | query I 42 | select parse_table_names('INSERT INTO m SELECT * FROM n;'); 43 | ---- 44 | [] 45 | 46 | # UPDATE with FROM 47 | query I 48 | select parse_table_names('UPDATE o SET x = p.x FROM p WHERE o.id = p.id;'); 49 | ---- 50 | [] 51 | 52 | # DELETE with USING 53 | query I 54 | select parse_table_names('DELETE FROM q USING r WHERE q.id = r.id;'); 55 | ---- 56 | [] 57 | 58 | # CREATE VIEW AS SELECT 59 | query I 60 | select parse_table_names('CREATE VIEW v AS SELECT * FROM s JOIN t ON s.id = t.id;'); 61 | ---- 62 | [] 63 | 64 | # CREATE TABLE AS SELECT 65 | query I 66 | select parse_table_names('CREATE TABLE u AS SELECT * FROM v;'); 67 | ---- 68 | [] 69 | 70 | # no tables 71 | query I 72 | select parse_table_names('SELECT 1;'); 73 | ---- 74 | [] 75 | 76 | # malformed SQL should not error 77 | query I 78 | SELECT parse_table_names('SELECT * FROM WHERE'); 79 | ---- 80 | [] 81 | -------------------------------------------------------------------------------- /test/sql/parse_tools/scalar_functions/parse_function_names.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/scalar_functions/parse_function_names.test 2 | # description: test parse_function_names scalar function 3 | # group: [parse_function_names] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT parse_function_names('SELECT upper(name) FROM my_table;'); 8 | ---- 9 | Catalog Error: Scalar Function with name parse_function_names does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # simple function in SELECT 15 | query I 16 | SELECT parse_function_names('SELECT upper(name) FROM my_table;'); 17 | ---- 18 | [upper] 19 | 20 | # multiple functions in SELECT 21 | query I 22 | SELECT parse_function_names('SELECT upper(name), lower(email), length(address) FROM users;'); 23 | ---- 24 | [upper, lower, length] 25 | 26 | # function in WHERE clause 27 | query I 28 | SELECT parse_function_names('SELECT * FROM users WHERE length(name) > 5;'); 29 | ---- 30 | [length] 31 | 32 | # nested functions 33 | query I 34 | SELECT parse_function_names('SELECT upper(left(name, 3)) FROM users;'); 35 | ---- 36 | [upper, left] 37 | 38 | # complex query with multiple contexts 39 | query I 40 | SELECT parse_function_names($$ 41 | SELECT upper(name), count(*) 42 | FROM users 43 | WHERE length(email) > 0 44 | GROUP BY substr(department, 1, 3) 45 | HAVING sum(salary) > 100000 46 | ORDER BY lower(name) 47 | $$); 48 | ---- 49 | [upper, count_star, length, substr, sum, lower] 50 | 51 | # window functions 52 | query I 53 | SELECT parse_function_names('SELECT name, row_number() OVER (ORDER BY salary) FROM users;'); 54 | ---- 55 | [row_number] 56 | 57 | # aggregate functions 58 | query I 59 | SELECT parse_function_names('SELECT count(*), sum(salary), avg(age) FROM users;'); 60 | ---- 61 | [count_star, sum, avg] 62 | 63 | # CTE with functions 64 | query I 65 | SELECT parse_function_names('WITH ranked AS (SELECT name, rank() OVER (ORDER BY salary) as r FROM users) SELECT upper(name) FROM ranked;'); 66 | ---- 67 | [rank, upper] 68 | 69 | # no functions 70 | query I 71 | SELECT parse_function_names('SELECT name, age FROM users;'); 72 | ---- 73 | [] 74 | 75 | # malformed SQL should not error 76 | query I 77 | SELECT parse_function_names('SELECT upper( FROM users'); 78 | ---- 79 | [] 80 | 81 | # INSERT statement (unsupported) 82 | query I 83 | SELECT parse_function_names('INSERT INTO users VALUES (upper("test"));'); 84 | ---- 85 | [] 86 | 87 | # UPDATE statement (unsupported) 88 | query I 89 | SELECT parse_function_names('UPDATE users SET name = upper(name);'); 90 | ---- 91 | [] 92 | 93 | # CREATE VIEW statement (unsupported) 94 | query I 95 | SELECT parse_function_names('CREATE VIEW v AS SELECT upper(name) FROM users;'); 96 | ---- 97 | [] -------------------------------------------------------------------------------- /test/sql/parse_tools/table_functions/parse_tables.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/tables_functions/parse_tables.test 2 | # description: test parse_tables table function 3 | # group: [parse_tables] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT * FROM parse_tables('SELECT * FROM my_table;'); 8 | ---- 9 | Catalog Error: Table Function with name parse_tables does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # simple FROM 15 | query III 16 | SELECT * FROM parse_tables('SELECT * FROM my_table;'); 17 | ---- 18 | main my_table from 19 | 20 | # schema-qualified FROM 21 | query III 22 | SELECT * FROM parse_tables('SELECT * FROM my_schema.my_table;'); 23 | ---- 24 | my_schema my_table from 25 | 26 | # CTE usage 27 | query III 28 | SELECT * FROM parse_tables('WITH x AS (SELECT * FROM d JOIN e ON d.id = e.id) SELECT * FROM x;'); 29 | ---- 30 | (empty) x cte 31 | main d from 32 | main e join_right 33 | main x from_cte 34 | 35 | # JOIN between two tables 36 | query III 37 | SELECT * FROM parse_tables('SELECT * FROM a JOIN b ON a.id = b.id;'); 38 | ---- 39 | main a from 40 | main b join_right 41 | 42 | # chained JOINs 43 | query III 44 | SELECT * FROM parse_tables('SELECT * FROM a JOIN b ON a.id = b.id JOIN c ON b.id = c.id;'); 45 | ---- 46 | main a from 47 | main b join_right 48 | main c join_right 49 | 50 | # subquery and join 51 | query III 52 | SELECT * FROM parse_tables('SELECT * FROM (SELECT * FROM f) sub JOIN g ON sub.id = g.id;'); 53 | ---- 54 | main f from 55 | main g join_right 56 | 57 | # mixed qualified and unqualified 58 | query III 59 | SELECT * FROM parse_tables('SELECT * FROM h JOIN schema1.i ON h.id = i.id;'); 60 | ---- 61 | main h from 62 | schema1 i join_right 63 | 64 | # duplicate references 65 | query III 66 | SELECT * FROM parse_tables('SELECT * FROM j JOIN j ON j.id = j.id;'); 67 | ---- 68 | main j from 69 | main j join_right 70 | 71 | # deeply nested subqueries 72 | query III 73 | SELECT * FROM parse_tables($$ 74 | SELECT * FROM ( 75 | SELECT * FROM (SELECT * FROM k) inner1 76 | JOIN (SELECT * FROM l) inner2 ON inner1.id = inner2.id 77 | ) final; 78 | $$); 79 | ---- 80 | main k from 81 | main l from 82 | 83 | # INSERT INTO ... SELECT 84 | query III 85 | SELECT * FROM parse_tables('INSERT INTO m SELECT * FROM n;'); 86 | ---- 87 | 88 | # UPDATE with FROM 89 | query III 90 | SELECT * FROM parse_tables('UPDATE o SET x = p.x FROM p WHERE o.id = p.id;'); 91 | ---- 92 | 93 | # DELETE with USING 94 | query III 95 | SELECT * FROM parse_tables('DELETE FROM q USING r WHERE q.id = r.id;'); 96 | ---- 97 | 98 | # CREATE VIEW AS SELECT 99 | query III 100 | SELECT * FROM parse_tables('CREATE VIEW v AS SELECT * FROM s JOIN t ON s.id = t.id;'); 101 | ---- 102 | 103 | # CREATE TABLE AS SELECT 104 | query III 105 | SELECT * FROM parse_tables('CREATE TABLE u AS SELECT * FROM v;'); 106 | ---- 107 | 108 | # no tables 109 | query III 110 | SELECT * FROM parse_tables('SELECT 1;'); 111 | ---- 112 | 113 | # malformed SQL should not error 114 | query III 115 | SELECT * FROM parse_tables('SELECT * FROM WHERE'); 116 | ---- -------------------------------------------------------------------------------- /test/sql/parse_tools/table_functions/parse_where.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/tables_functions/parse_where.test 2 | # description: test parse_where and parse_where_detailed table functions 3 | # group: [parse_where] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT * FROM parse_where('SELECT * FROM my_table WHERE x > 1;'); 8 | ---- 9 | Catalog Error: Table Function with name parse_where does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # Simple comparison 15 | query III 16 | SELECT * FROM parse_where('SELECT * FROM my_table WHERE x > 1;'); 17 | ---- 18 | (x > 1) my_table WHERE 19 | 20 | # Simple comparison with detailed parser 21 | query IIIII 22 | SELECT * FROM parse_where_detailed('SELECT * FROM my_table WHERE x > 1;'); 23 | ---- 24 | x > 1 my_table WHERE 25 | 26 | # Multiple conditions with AND 27 | query III 28 | SELECT * FROM parse_where('SELECT * FROM my_table WHERE x > 1 AND y < 100;'); 29 | ---- 30 | (x > 1) my_table WHERE 31 | (y < 100) my_table WHERE 32 | 33 | # Multiple conditions with AND (detailed) 34 | query IIIII 35 | SELECT * FROM parse_where_detailed('SELECT * FROM my_table WHERE x > 1 AND y < 100;'); 36 | ---- 37 | x > 1 my_table WHERE 38 | y < 100 my_table WHERE 39 | 40 | # BETWEEN condition 41 | query III 42 | SELECT * FROM parse_where('SELECT * FROM my_table WHERE x BETWEEN 1 AND 100;'); 43 | ---- 44 | (x BETWEEN 1 AND 100) my_table WHERE 45 | 46 | # BETWEEN condition (detailed) 47 | query IIIII 48 | SELECT * FROM parse_where_detailed('SELECT * FROM my_table WHERE x BETWEEN 1 AND 100;'); 49 | ---- 50 | x >= 1 my_table WHERE 51 | x <= 100 my_table WHERE 52 | 53 | # Complex conditions with AND/OR 54 | query III 55 | SELECT * FROM parse_where('SELECT * FROM my_table WHERE (x > 1 AND y < 100) OR z = 42;'); 56 | ---- 57 | (x > 1) my_table WHERE 58 | (y < 100) my_table WHERE 59 | (z = 42) my_table WHERE 60 | 61 | # Complex conditions with AND/OR (detailed) 62 | query IIIII 63 | SELECT * FROM parse_where_detailed('SELECT * FROM my_table WHERE (x > 1 AND y < 100) OR z = 42;'); 64 | ---- 65 | x > 1 my_table WHERE 66 | y < 100 my_table WHERE 67 | z = 42 my_table WHERE 68 | 69 | # Multiple operators 70 | query III 71 | SELECT * FROM parse_where('SELECT * FROM my_table WHERE x >= 1 AND x <= 100 AND y != 42;'); 72 | ---- 73 | (x >= 1) my_table WHERE 74 | (x <= 100) my_table WHERE 75 | (y != 42) my_table WHERE 76 | 77 | # Multiple operators (detailed) 78 | query IIIII 79 | SELECT * FROM parse_where_detailed('SELECT * FROM my_table WHERE x >= 1 AND x <= 100 AND y != 42;'); 80 | ---- 81 | x >= 1 my_table WHERE 82 | x <= 100 my_table WHERE 83 | y != 42 my_table WHERE 84 | 85 | # No WHERE clause 86 | query III 87 | SELECT * FROM parse_where('SELECT * FROM my_table;'); 88 | ---- 89 | 90 | # No WHERE clause (detailed) 91 | query IIIII 92 | SELECT * FROM parse_where_detailed('SELECT * FROM my_table;'); 93 | ---- 94 | 95 | # Malformed SQL should not error 96 | query III 97 | SELECT * FROM parse_where('SELECT * FROM my_table WHERE'); 98 | ---- 99 | 100 | # Malformed SQL should not error (detailed) 101 | query IIIII 102 | SELECT * FROM parse_where_detailed('SELECT * FROM my_table WHERE'); 103 | ---- 104 | -------------------------------------------------------------------------------- /scripts/extension-upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Extension upload script 4 | 5 | # Usage: ./extension-upload.sh 6 | # : Name of the extension 7 | # : Version (commit / version tag) of the extension 8 | # : Version (commit / version tag) of DuckDB 9 | # : Architecture target of the extension binary 10 | # : S3 bucket to upload to 11 | # : Set this as the latest version ("true" / "false", default: "false") 12 | # : Set this as a versioned version that will prevent its deletion 13 | 14 | set -e 15 | 16 | if [[ $4 == wasm* ]]; then 17 | ext="/tmp/extension/$1.duckdb_extension.wasm" 18 | else 19 | ext="/tmp/extension/$1.duckdb_extension" 20 | fi 21 | 22 | echo $ext 23 | 24 | script_dir="$(dirname "$(readlink -f "$0")")" 25 | 26 | # calculate SHA256 hash of extension binary 27 | cat $ext > $ext.append 28 | 29 | if [[ $4 == wasm* ]]; then 30 | # 0 for custom section 31 | # 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256) 32 | # [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02] 33 | echo -n -e '\x00' >> $ext.append 34 | echo -n -e '\x93\x02' >> $ext.append 35 | # 10 in hex = 16 in decimal, lenght of name, 1 byte 36 | echo -n -e '\x10' >> $ext.append 37 | echo -n -e 'duckdb_signature' >> $ext.append 38 | # the name of the WebAssembly custom section, 16 bytes 39 | # 100 in hex, 256 in decimal 40 | # [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)], 41 | # for a grand total of 2 bytes 42 | echo -n -e '\x80\x02' >> $ext.append 43 | fi 44 | 45 | # (Optionally) Sign binary 46 | if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then 47 | echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem 48 | $script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash 49 | openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign 50 | rm -f private.pem 51 | fi 52 | 53 | # Signature is always there, potentially defaulting to 256 zeros 54 | truncate -s 256 $ext.sign 55 | 56 | # append signature to extension binary 57 | cat $ext.sign >> $ext.append 58 | 59 | # compress extension binary 60 | if [[ $4 == wasm_* ]]; then 61 | brotli < $ext.append > "$ext.compressed" 62 | else 63 | gzip < $ext.append > "$ext.compressed" 64 | fi 65 | 66 | set -e 67 | 68 | # Abort if AWS key is not set 69 | if [ -z "$AWS_ACCESS_KEY_ID" ]; then 70 | echo "No AWS key found, skipping.." 71 | exit 0 72 | fi 73 | 74 | # upload versioned version 75 | if [[ $7 = 'true' ]]; then 76 | if [[ $4 == wasm* ]]; then 77 | aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" 78 | else 79 | aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read 80 | fi 81 | fi 82 | 83 | # upload to latest version 84 | if [[ $6 = 'true' ]]; then 85 | if [[ $4 == wasm* ]]; then 86 | aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" 87 | else 88 | aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read 89 | fi 90 | fi 91 | -------------------------------------------------------------------------------- /test/sql/parse_tools/table_functions/parse_functions.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/table_functions/parse_functions.test 2 | # description: test parse_functions table function 3 | # group: [parse_functions] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT * FROM parse_functions('SELECT upper(name) FROM my_table;'); 8 | ---- 9 | Catalog Error: Table Function with name parse_functions does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # simple function in SELECT 15 | query III 16 | SELECT * FROM parse_functions('SELECT upper(name) FROM my_table;'); 17 | ---- 18 | upper main select 19 | 20 | # multiple functions in SELECT 21 | query III 22 | SELECT * FROM parse_functions('SELECT upper(name), lower(email), length(address) FROM users;'); 23 | ---- 24 | upper main select 25 | lower main select 26 | length main select 27 | 28 | # function in WHERE clause 29 | query III 30 | SELECT * FROM parse_functions('SELECT * FROM users WHERE length(name) > 5;'); 31 | ---- 32 | length main where 33 | 34 | # function in HAVING clause 35 | query III 36 | SELECT * FROM parse_functions('SELECT count(*) FROM users GROUP BY department HAVING max(salary) > 50000;'); 37 | ---- 38 | count_star main select 39 | max main having 40 | 41 | # function in ORDER BY clause 42 | query III 43 | SELECT * FROM parse_functions('SELECT * FROM users ORDER BY upper(name);'); 44 | ---- 45 | upper main order_by 46 | 47 | # function in GROUP BY clause 48 | query III 49 | SELECT * FROM parse_functions('SELECT count(*) FROM users GROUP BY substr(name, 1, 1);'); 50 | ---- 51 | count_star main select 52 | substr main group_by 53 | 54 | # nested functions 55 | query III 56 | SELECT * FROM parse_functions('SELECT upper(left(name, 3)) FROM users;'); 57 | ---- 58 | upper main select 59 | left main nested 60 | 61 | # window functions 62 | query III 63 | SELECT * FROM parse_functions('SELECT name, row_number() OVER (ORDER BY salary) FROM users;'); 64 | ---- 65 | row_number main select 66 | 67 | # aggregate functions 68 | query III 69 | SELECT * FROM parse_functions('SELECT count(*), sum(salary), avg(age) FROM users;'); 70 | ---- 71 | count_star main select 72 | sum main select 73 | avg main select 74 | 75 | # functions with schema qualification 76 | query III 77 | SELECT * FROM parse_functions('SELECT my_schema.custom_func(name) FROM users;'); 78 | ---- 79 | custom_func my_schema select 80 | 81 | # complex query with multiple contexts 82 | query III 83 | SELECT * FROM parse_functions($$ 84 | SELECT upper(name), count(*) 85 | FROM users 86 | WHERE length(email) > 0 87 | GROUP BY substr(department, 1, 3) 88 | HAVING sum(salary) > 100000 89 | ORDER BY lower(name) 90 | $$); 91 | ---- 92 | upper main select 93 | count_star main select 94 | length main where 95 | substr main group_by 96 | sum main having 97 | lower main order_by 98 | 99 | # CTE with functions 100 | query III 101 | SELECT * FROM parse_functions('WITH ranked AS (SELECT name, rank() OVER (ORDER BY salary) as r FROM users) SELECT upper(name) FROM ranked;'); 102 | ---- 103 | rank main select 104 | upper main select 105 | 106 | # no functions 107 | query III 108 | SELECT * FROM parse_functions('SELECT name, age FROM users;'); 109 | ---- 110 | 111 | # malformed SQL should not error 112 | query III 113 | SELECT * FROM parse_functions('SELECT upper( FROM users'); 114 | ---- -------------------------------------------------------------------------------- /test/sql/parse_tools/scalar_functions/parse_functions.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/parser_tools/scalar_functions/parse_functions.test 2 | # description: test parse_functions scalar function (struct variant) 3 | # group: [parse_functions] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT parse_functions('SELECT upper(name) FROM my_table;'); 8 | ---- 9 | Catalog Error: Scalar Function with name parse_functions does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require parser_tools 13 | 14 | # simple function in SELECT 15 | query I 16 | SELECT parse_functions('SELECT upper(name) FROM my_table;'); 17 | ---- 18 | [{'function_name': upper, 'schema': main, 'context': select}] 19 | 20 | # multiple functions in SELECT 21 | query I 22 | SELECT parse_functions('SELECT upper(name), lower(email), length(address) FROM users;'); 23 | ---- 24 | [{'function_name': upper, 'schema': main, 'context': select}, {'function_name': lower, 'schema': main, 'context': select}, {'function_name': length, 'schema': main, 'context': select}] 25 | 26 | # function in WHERE clause 27 | query I 28 | SELECT parse_functions('SELECT * FROM users WHERE length(name) > 5;'); 29 | ---- 30 | [{'function_name': length, 'schema': main, 'context': where}] 31 | 32 | # function in HAVING clause 33 | query I 34 | SELECT parse_functions('SELECT count_star(*) FROM users GROUP BY department HAVING max(salary) > 50000;'); 35 | ---- 36 | [{'function_name': count_star, 'schema': main, 'context': select}, {'function_name': max, 'schema': main, 'context': having}] 37 | 38 | # function in ORDER BY clause 39 | query I 40 | SELECT parse_functions('SELECT * FROM users ORDER BY upper(name);'); 41 | ---- 42 | [{'function_name': upper, 'schema': main, 'context': order_by}] 43 | 44 | # function in GROUP BY clause 45 | query I 46 | SELECT parse_functions('SELECT count_star(*) FROM users GROUP BY substr(name, 1, 1);'); 47 | ---- 48 | [{'function_name': count_star, 'schema': main, 'context': select}, {'function_name': substr, 'schema': main, 'context': group_by}] 49 | 50 | # nested functions 51 | query I 52 | SELECT parse_functions('SELECT upper(left(name, 3)) FROM users;'); 53 | ---- 54 | [{'function_name': upper, 'schema': main, 'context': select}, {'function_name': left, 'schema': main, 'context': nested}] 55 | 56 | # functions with schema qualification 57 | query I 58 | SELECT parse_functions('SELECT my_schema.custom_func(name) FROM users;'); 59 | ---- 60 | [{'function_name': custom_func, 'schema': my_schema, 'context': select}] 61 | 62 | # demonstrate list filter functionality on context 63 | query I 64 | SELECT list_filter(parse_functions('SELECT upper(name), count_star(*) FROM users WHERE length(email) > 0'), f -> f.context = 'select') AS select_functions; 65 | ---- 66 | [{'function_name': upper, 'schema': main, 'context': select}, {'function_name': count_star, 'schema': main, 'context': select}] 67 | 68 | # demonstrate list filter functionality on function name 69 | query I 70 | SELECT list_filter(parse_functions('SELECT upper(name), lower(name), count_star(*) FROM users'), f -> f.function_name LIKE '%er') AS er_functions; 71 | ---- 72 | [{'function_name': upper, 'schema': main, 'context': select}, {'function_name': lower, 'schema': main, 'context': select}] 73 | 74 | # CTE with functions 75 | query I 76 | SELECT parse_functions('WITH ranked AS (SELECT name, rank() OVER (ORDER BY salary) as r FROM users) SELECT upper(name) FROM ranked;'); 77 | ---- 78 | [{'function_name': rank, 'schema': main, 'context': select}, {'function_name': upper, 'schema': main, 'context': select}] 79 | 80 | # no functions 81 | query I 82 | SELECT parse_functions('SELECT name, age FROM users;'); 83 | ---- 84 | [] 85 | 86 | # malformed SQL should not error 87 | query I 88 | SELECT parse_functions('SELECT upper( FROM users'); 89 | ---- 90 | [] 91 | 92 | # Unsupported statements 93 | # --------------------- 94 | 95 | # INSERT statement (unsupported) 96 | query I 97 | SELECT parse_functions('INSERT INTO users VALUES (upper("test"));'); 98 | ---- 99 | [] 100 | 101 | # UPDATE statement (unsupported) 102 | query I 103 | SELECT parse_functions('UPDATE users SET name = upper(name);'); 104 | ---- 105 | [] 106 | 107 | # CREATE VIEW statement (unsupported) 108 | query I 109 | SELECT parse_functions('CREATE VIEW v AS SELECT upper(name) FROM users;'); 110 | ---- 111 | [] -------------------------------------------------------------------------------- /src/parse_statements.cpp: -------------------------------------------------------------------------------- 1 | #include "parse_statements.hpp" 2 | #include "duckdb.hpp" 3 | #include "duckdb/parser/parser.hpp" 4 | #include "duckdb/parser/statement/select_statement.hpp" 5 | #include "duckdb/function/scalar/nested_functions.hpp" 6 | 7 | namespace duckdb { 8 | 9 | struct ParseStatementsState : public GlobalTableFunctionState { 10 | idx_t row = 0; 11 | vector results; 12 | }; 13 | 14 | struct ParseStatementsBindData : public TableFunctionData { 15 | string sql; 16 | }; 17 | 18 | // BIND function: runs during query planning to decide output schema 19 | static unique_ptr ParseStatementsBind(ClientContext &context, 20 | TableFunctionBindInput &input, 21 | vector &return_types, 22 | vector &names) { 23 | 24 | string sql_input = StringValue::Get(input.inputs[0]); 25 | 26 | // Return single column with statement text 27 | return_types = {LogicalType::VARCHAR}; 28 | names = {"statement"}; 29 | 30 | // Create a bind data object to hold the SQL input 31 | auto result = make_uniq(); 32 | result->sql = sql_input; 33 | 34 | return std::move(result); 35 | } 36 | 37 | // INIT function: runs before table function execution 38 | static unique_ptr ParseStatementsInit(ClientContext &context, 39 | TableFunctionInitInput &input) { 40 | return make_uniq(); 41 | } 42 | 43 | static void ExtractStatementsFromSQL(const std::string &sql, std::vector &results) { 44 | Parser parser; 45 | 46 | try { 47 | parser.ParseQuery(sql); 48 | } catch (const ParserException &ex) { 49 | // Swallow parser exceptions to make this function more robust 50 | return; 51 | } 52 | 53 | for (auto &stmt : parser.statements) { 54 | if (stmt) { 55 | // Convert statement back to string 56 | auto statement_str = stmt->ToString(); 57 | results.push_back(StatementResult{statement_str}); 58 | } 59 | } 60 | } 61 | 62 | static void ParseStatementsFunction(ClientContext &context, 63 | TableFunctionInput &data, 64 | DataChunk &output) { 65 | auto &state = (ParseStatementsState &)*data.global_state; 66 | auto &bind_data = (ParseStatementsBindData &)*data.bind_data; 67 | 68 | if (state.results.empty() && state.row == 0) { 69 | ExtractStatementsFromSQL(bind_data.sql, state.results); 70 | } 71 | 72 | if (state.row >= state.results.size()) { 73 | return; 74 | } 75 | 76 | auto &stmt = state.results[state.row]; 77 | output.SetCardinality(1); 78 | output.SetValue(0, 0, Value(stmt.statement)); 79 | 80 | state.row++; 81 | } 82 | 83 | static void ParseStatementsScalarFunction(DataChunk &args, ExpressionState &state, Vector &result) { 84 | UnaryExecutor::Execute(args.data[0], result, args.size(), 85 | [&result](string_t query) -> list_entry_t { 86 | // Parse the SQL query and extract statements 87 | auto query_string = query.GetString(); 88 | std::vector parsed_statements; 89 | ExtractStatementsFromSQL(query_string, parsed_statements); 90 | 91 | auto current_size = ListVector::GetListSize(result); 92 | auto number_of_statements = parsed_statements.size(); 93 | auto new_size = current_size + number_of_statements; 94 | 95 | // Grow list if needed 96 | if (ListVector::GetListCapacity(result) < new_size) { 97 | ListVector::Reserve(result, new_size); 98 | } 99 | 100 | // Write the statements into the child vector 101 | auto statements = FlatVector::GetData(ListVector::GetEntry(result)); 102 | for (size_t i = 0; i < parsed_statements.size(); i++) { 103 | auto &stmt = parsed_statements[i]; 104 | statements[current_size + i] = StringVector::AddStringOrBlob(ListVector::GetEntry(result), stmt.statement); 105 | } 106 | 107 | // Update size 108 | ListVector::SetListSize(result, new_size); 109 | 110 | return list_entry_t(current_size, number_of_statements); 111 | }); 112 | } 113 | 114 | static void NumStatementsScalarFunction(DataChunk &args, ExpressionState &state, Vector &result) { 115 | UnaryExecutor::Execute(args.data[0], result, args.size(), 116 | [](string_t query) -> int64_t { 117 | // Parse the SQL query and count statements 118 | auto query_string = query.GetString(); 119 | std::vector parsed_statements; 120 | ExtractStatementsFromSQL(query_string, parsed_statements); 121 | 122 | return static_cast(parsed_statements.size()); 123 | }); 124 | } 125 | 126 | // Extension scaffolding 127 | // --------------------------------------------------- 128 | 129 | void RegisterParseStatementsFunction(ExtensionLoader &loader) { 130 | // Table function that returns one row per statement 131 | TableFunction tf("parse_statements", {LogicalType::VARCHAR}, ParseStatementsFunction, ParseStatementsBind, ParseStatementsInit); 132 | loader.RegisterFunction(tf); 133 | } 134 | 135 | void RegisterParseStatementsScalarFunction(ExtensionLoader &loader) { 136 | // parse_statements is a scalar function that returns a list of statement strings 137 | ScalarFunction sf("parse_statements", {LogicalType::VARCHAR}, LogicalType::LIST(LogicalType::VARCHAR), ParseStatementsScalarFunction); 138 | loader.RegisterFunction(sf); 139 | 140 | // num_statements is a scalar function that returns the count of statements 141 | ScalarFunction num_sf("num_statements", {LogicalType::VARCHAR}, LogicalType::BIGINT, NumStatementsScalarFunction); 142 | loader.RegisterFunction(num_sf); 143 | } 144 | 145 | } // namespace duckdb -------------------------------------------------------------------------------- /src/parse_functions.cpp: -------------------------------------------------------------------------------- 1 | #include "parse_functions.hpp" 2 | #include "duckdb.hpp" 3 | #include "duckdb/parser/parser.hpp" 4 | #include "duckdb/parser/statement/select_statement.hpp" 5 | #include "duckdb/parser/query_node/cte_node.hpp" 6 | #include "duckdb/parser/query_node/select_node.hpp" 7 | #include "duckdb/parser/expression/function_expression.hpp" 8 | #include "duckdb/parser/expression/window_expression.hpp" 9 | #include "duckdb/parser/parsed_expression_iterator.hpp" 10 | #include "duckdb/parser/result_modifier.hpp" 11 | #include "duckdb/function/scalar/nested_functions.hpp" 12 | 13 | 14 | namespace duckdb { 15 | 16 | enum class FunctionContext { 17 | Select, 18 | Where, 19 | Having, 20 | OrderBy, 21 | GroupBy, 22 | Join, 23 | WindowFunction, 24 | Nested 25 | }; 26 | 27 | inline const char *ToString(FunctionContext context) { 28 | switch (context) { 29 | case FunctionContext::Select: return "select"; 30 | case FunctionContext::Where: return "where"; 31 | case FunctionContext::Having: return "having"; 32 | case FunctionContext::OrderBy: return "order_by"; 33 | case FunctionContext::GroupBy: return "group_by"; 34 | case FunctionContext::Join: return "join"; 35 | case FunctionContext::WindowFunction: return "window"; 36 | case FunctionContext::Nested: return "nested"; 37 | default: return "unknown"; 38 | } 39 | } 40 | 41 | struct ParseFunctionsState : public GlobalTableFunctionState { 42 | idx_t row = 0; 43 | vector results; 44 | }; 45 | 46 | struct ParseFunctionsBindData : public TableFunctionData { 47 | string sql; 48 | }; 49 | 50 | // BIND function: runs during query planning to decide output schema 51 | static unique_ptr ParseFunctionsBind(ClientContext &context, 52 | TableFunctionBindInput &input, 53 | vector &return_types, 54 | vector &names) { 55 | 56 | string sql_input = StringValue::Get(input.inputs[0]); 57 | 58 | // always return the same columns: 59 | return_types = {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}; 60 | // function name, schema name, usage context 61 | names = {"function_name", "schema", "context"}; 62 | 63 | // create a bind data object to hold the SQL input 64 | auto result = make_uniq(); 65 | result->sql = sql_input; 66 | 67 | return std::move(result); 68 | } 69 | 70 | // INIT function: runs before table function execution 71 | static unique_ptr ParseFunctionsInit(ClientContext &context, 72 | TableFunctionInitInput &input) { 73 | return make_uniq(); 74 | } 75 | 76 | class FunctionExtractor { 77 | public: 78 | static void ExtractFromExpression(const ParsedExpression &expr, 79 | std::vector &results, 80 | FunctionContext context = FunctionContext::Select) { 81 | if (expr.expression_class == ExpressionClass::FUNCTION) { 82 | auto &func = (FunctionExpression &)expr; 83 | results.push_back(FunctionResult{ 84 | func.function_name, 85 | func.schema.empty() ? "main" : func.schema, 86 | ToString(context) 87 | }); 88 | 89 | // For nested function calls within this function, mark as nested 90 | ParsedExpressionIterator::EnumerateChildren(expr, [&](const ParsedExpression &child) { 91 | ExtractFromExpression(child, results, FunctionContext::Nested); 92 | }); 93 | } else if (expr.expression_class == ExpressionClass::WINDOW) { 94 | auto &window_expr = (WindowExpression &)expr; 95 | results.push_back(FunctionResult{ 96 | window_expr.function_name, 97 | window_expr.schema.empty() ? "main" : window_expr.schema, 98 | ToString(context) 99 | }); 100 | 101 | // Extract functions from window function arguments 102 | for (const auto &child : window_expr.children) { 103 | if (child) { 104 | ExtractFromExpression(*child, results, FunctionContext::Nested); 105 | } 106 | } 107 | 108 | // Extract functions from PARTITION BY expressions 109 | for (const auto &partition : window_expr.partitions) { 110 | if (partition) { 111 | ExtractFromExpression(*partition, results, FunctionContext::Nested); 112 | } 113 | } 114 | 115 | // Extract functions from ORDER BY expressions 116 | for (const auto &order : window_expr.orders) { 117 | if (order.expression) { 118 | ExtractFromExpression(*order.expression, results, FunctionContext::Nested); 119 | } 120 | } 121 | 122 | // Extract functions from argument ordering expressions 123 | for (const auto &arg_order : window_expr.arg_orders) { 124 | if (arg_order.expression) { 125 | ExtractFromExpression(*arg_order.expression, results, FunctionContext::Nested); 126 | } 127 | } 128 | 129 | // Extract functions from frame expressions 130 | if (window_expr.start_expr) { 131 | ExtractFromExpression(*window_expr.start_expr, results, FunctionContext::Nested); 132 | } 133 | if (window_expr.end_expr) { 134 | ExtractFromExpression(*window_expr.end_expr, results, FunctionContext::Nested); 135 | } 136 | if (window_expr.offset_expr) { 137 | ExtractFromExpression(*window_expr.offset_expr, results, FunctionContext::Nested); 138 | } 139 | if (window_expr.default_expr) { 140 | ExtractFromExpression(*window_expr.default_expr, results, FunctionContext::Nested); 141 | } 142 | 143 | // Extract functions from filter expression 144 | if (window_expr.filter_expr) { 145 | ExtractFromExpression(*window_expr.filter_expr, results, FunctionContext::Nested); 146 | } 147 | } else { 148 | // For non-function expressions, preserve the current context 149 | ParsedExpressionIterator::EnumerateChildren(expr, [&](const ParsedExpression &child) { 150 | ExtractFromExpression(child, results, context); 151 | }); 152 | } 153 | } 154 | 155 | static void ExtractFromExpressionList(const vector> &expressions, 156 | std::vector &results, 157 | FunctionContext context) { 158 | for (const auto &expr : expressions) { 159 | if (expr) { 160 | ExtractFromExpression(*expr, results, context); 161 | } 162 | } 163 | } 164 | }; 165 | 166 | 167 | static void ExtractFunctionsFromQueryNode(const QueryNode &node, std::vector &results) { 168 | if (node.type == QueryNodeType::SELECT_NODE) { 169 | auto &select_node = (SelectNode &)node; 170 | 171 | // Extract from CTEs first (to match expected order in tests) 172 | for (const auto &cte : select_node.cte_map.map) { 173 | if (cte.second && cte.second->query && cte.second->query->node) { 174 | ExtractFunctionsFromQueryNode(*cte.second->query->node, results); 175 | } 176 | } 177 | 178 | // Extract from SELECT list 179 | FunctionExtractor::ExtractFromExpressionList(select_node.select_list, results, FunctionContext::Select); 180 | 181 | // Extract from WHERE clause 182 | if (select_node.where_clause) { 183 | FunctionExtractor::ExtractFromExpression(*select_node.where_clause, results, FunctionContext::Where); 184 | } 185 | 186 | // Extract from GROUP BY clause 187 | FunctionExtractor::ExtractFromExpressionList(select_node.groups.group_expressions, results, FunctionContext::GroupBy); 188 | 189 | // Extract from HAVING clause 190 | if (select_node.having) { 191 | FunctionExtractor::ExtractFromExpression(*select_node.having, results, FunctionContext::Having); 192 | } 193 | 194 | // Extract from ORDER BY clause 195 | for (const auto &modifier : select_node.modifiers) { 196 | if (modifier->type == ResultModifierType::ORDER_MODIFIER) { 197 | auto &order_modifier = (OrderModifier &)*modifier; 198 | for (const auto &order : order_modifier.orders) { 199 | if (order.expression) { 200 | FunctionExtractor::ExtractFromExpression(*order.expression, results, FunctionContext::OrderBy); 201 | } 202 | } 203 | } 204 | } 205 | // additional step necessary for duckdb v1.4.0: unwrap CTE node 206 | } else if (node.type == QueryNodeType::CTE_NODE) { 207 | auto &cte_node = (CTENode &)node; 208 | 209 | if (cte_node.child) { 210 | ExtractFunctionsFromQueryNode(*cte_node.child, results); 211 | } 212 | } 213 | } 214 | 215 | static void ExtractFunctionsFromSQL(const std::string &sql, std::vector &results) { 216 | Parser parser; 217 | 218 | try { 219 | parser.ParseQuery(sql); 220 | } catch (const ParserException &ex) { 221 | // swallow parser exceptions to make this function more robust. is_parsable can be used if needed 222 | return; 223 | } 224 | 225 | for (auto &stmt : parser.statements) { 226 | if (stmt->type == StatementType::SELECT_STATEMENT) { 227 | auto &select_stmt = (SelectStatement &)*stmt; 228 | if (select_stmt.node) { 229 | ExtractFunctionsFromQueryNode(*select_stmt.node, results); 230 | } 231 | } 232 | } 233 | } 234 | 235 | static void ParseFunctionsFunction(ClientContext &context, 236 | TableFunctionInput &data, 237 | DataChunk &output) { 238 | auto &state = (ParseFunctionsState &)*data.global_state; 239 | auto &bind_data = (ParseFunctionsBindData &)*data.bind_data; 240 | 241 | if (state.results.empty() && state.row == 0) { 242 | ExtractFunctionsFromSQL(bind_data.sql, state.results); 243 | } 244 | 245 | if (state.row >= state.results.size()) { 246 | return; 247 | } 248 | 249 | auto &func = state.results[state.row]; 250 | output.SetCardinality(1); 251 | output.SetValue(0, 0, Value(func.function_name)); 252 | output.SetValue(1, 0, Value(func.schema)); 253 | output.SetValue(2, 0, Value(func.context)); 254 | 255 | state.row++; 256 | } 257 | 258 | static void ParseFunctionNamesScalarFunction(DataChunk &args, ExpressionState &state, Vector &result) { 259 | UnaryExecutor::Execute(args.data[0], result, args.size(), 260 | [&result](string_t query) -> list_entry_t { 261 | // Parse the SQL query and extract function names 262 | auto query_string = query.GetString(); 263 | std::vector parsed_functions; 264 | ExtractFunctionsFromSQL(query_string, parsed_functions); 265 | 266 | auto current_size = ListVector::GetListSize(result); 267 | auto number_of_functions = parsed_functions.size(); 268 | auto new_size = current_size + number_of_functions; 269 | 270 | // grow list if needed 271 | if (ListVector::GetListCapacity(result) < new_size) { 272 | ListVector::Reserve(result, new_size); 273 | } 274 | 275 | // Write the function names into the child vector 276 | auto functions = FlatVector::GetData(ListVector::GetEntry(result)); 277 | for (size_t i = 0; i < parsed_functions.size(); i++) { 278 | auto &func = parsed_functions[i]; 279 | functions[current_size + i] = StringVector::AddStringOrBlob(ListVector::GetEntry(result), func.function_name); 280 | } 281 | 282 | // Update size 283 | ListVector::SetListSize(result, new_size); 284 | 285 | return list_entry_t(current_size, number_of_functions); 286 | }); 287 | } 288 | 289 | static void ParseFunctionsScalarFunction_struct(DataChunk &args, ExpressionState &state, Vector &result) { 290 | UnaryExecutor::Execute(args.data[0], result, args.size(), 291 | [&result](string_t query) -> list_entry_t { 292 | // Parse the SQL query and extract function names 293 | auto query_string = query.GetString(); 294 | std::vector parsed_functions; 295 | ExtractFunctionsFromSQL(query_string, parsed_functions); 296 | 297 | auto current_size = ListVector::GetListSize(result); 298 | auto number_of_functions = parsed_functions.size(); 299 | auto new_size = current_size + number_of_functions; 300 | 301 | // Grow list vector if needed 302 | if (ListVector::GetListCapacity(result) < new_size) { 303 | ListVector::Reserve(result, new_size); 304 | } 305 | 306 | // Get the struct child vector of the list 307 | auto &struct_vector = ListVector::GetEntry(result); 308 | 309 | // Ensure list size is updated 310 | ListVector::SetListSize(result, new_size); 311 | 312 | // Get the fields in the STRUCT 313 | auto &entries = StructVector::GetEntries(struct_vector); 314 | auto &function_name_entry = *entries[0]; // "function_name" field 315 | auto &schema_entry = *entries[1]; // "schema" field 316 | auto &context_entry = *entries[2]; // "context" field 317 | 318 | auto function_name_data = FlatVector::GetData(function_name_entry); 319 | auto schema_data = FlatVector::GetData(schema_entry); 320 | auto context_data = FlatVector::GetData(context_entry); 321 | 322 | for (size_t i = 0; i < number_of_functions; i++) { 323 | const auto &func = parsed_functions[i]; 324 | auto idx = current_size + i; 325 | 326 | function_name_data[idx] = StringVector::AddStringOrBlob(function_name_entry, func.function_name); 327 | schema_data[idx] = StringVector::AddStringOrBlob(schema_entry, func.schema); 328 | context_data[idx] = StringVector::AddStringOrBlob(context_entry, func.context); 329 | } 330 | 331 | return list_entry_t(current_size, number_of_functions); 332 | }); 333 | } 334 | 335 | // Extension scaffolding 336 | // --------------------------------------------------- 337 | 338 | void RegisterParseFunctionsFunction(ExtensionLoader &loader) { 339 | TableFunction tf("parse_functions", {LogicalType::VARCHAR}, ParseFunctionsFunction, ParseFunctionsBind, ParseFunctionsInit); 340 | loader.RegisterFunction(tf); 341 | } 342 | 343 | void RegisterParseFunctionScalarFunction(ExtensionLoader &loader) { 344 | // parse_function_names is a scalar function that returns a list of function names 345 | ScalarFunction sf("parse_function_names", {LogicalType::VARCHAR}, LogicalType::LIST(LogicalType::VARCHAR), ParseFunctionNamesScalarFunction); 346 | loader.RegisterFunction(sf); 347 | 348 | // parse_functions_struct is a scalar function that returns a list of structs 349 | auto return_type = LogicalType::LIST(LogicalType::STRUCT({ 350 | {"function_name", LogicalType::VARCHAR}, 351 | {"schema", LogicalType::VARCHAR}, 352 | {"context", LogicalType::VARCHAR} 353 | })); 354 | ScalarFunction sf_struct("parse_functions", {LogicalType::VARCHAR}, return_type, ParseFunctionsScalarFunction_struct); 355 | loader.RegisterFunction(sf_struct); 356 | } 357 | 358 | 359 | 360 | } // namespace duckdb 361 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parser Tools 2 | 3 | An experimental DuckDB extension that exposes functionality from DuckDB's native SQL parser. 4 | 5 | ## Overview 6 | 7 | `parser_tools` is a DuckDB extension designed to provide SQL parsing capabilities within the database. It allows you to analyze SQL queries and extract structural information directly in SQL. This extension provides parsing functions for tables, WHERE clauses, function calls, and statements. 8 | 9 | ## Features 10 | 11 | - **Extract table references** from a SQL query with context information (e.g. `FROM`, `JOIN`, etc.) 12 | - **Extract function calls** from a SQL query with context information (e.g. `SELECT`, `WHERE`, `HAVING`, etc.) 13 | - **Parse WHERE clauses** to extract conditions and operators 14 | - **Parse multi-statement SQL** to extract individual statements or count the number of statements 15 | - Support for **window functions**, **nested functions**, and **CTEs** 16 | - Includes **schema**, **name**, and **context** information for all extractions 17 | - Built on DuckDB's native SQL parser 18 | - Simple SQL interface — no external tooling required 19 | 20 | 21 | ## Known Limitations 22 | - Only `SELECT` statements are supported for table and function parsing 23 | - WHERE clause parsing supports additional statement types 24 | - Full parse tree is not exposed (only specific structural elements) 25 | 26 | ## Installation 27 | 28 | ```sql 29 | INSTALL parser_tools FROM community;; 30 | LOAD parser_tools; 31 | ``` 32 | 33 | ## Usage 34 | 35 | ### Parse table references from a query 36 | #### Simple example 37 | 38 | ```sql 39 | SELECT * FROM parse_tables('SELECT * FROM MyTable'); 40 | ``` 41 | 42 | ##### Output 43 | 44 | ``` 45 | ┌─────────┬─────────┬─────────┐ 46 | │ schema │ table │ context │ 47 | │ varchar │ varchar │ varchar │ 48 | ├─────────┼─────────┼─────────┤ 49 | │ main │ MyTable │ from │ 50 | └─────────┴─────────┴─────────┘ 51 | ``` 52 | 53 | This tells you that `MyTable` in the `main` schema was used in the `FROM` clause of the query. 54 | 55 | #### CTE Example 56 | ```sql 57 | select * from parse_tables('with EarlyAdopters as (select * from Users where id < 10) select * from EarlyAdopters;'); 58 | ``` 59 | 60 | ##### Output 61 | ``` 62 | ┌─────────┬───────────────┬──────────┐ 63 | │ schema │ table │ context │ 64 | │ varchar │ varchar │ varchar │ 65 | ├─────────┼───────────────┼──────────┤ 66 | │ │ EarlyAdopters │ cte │ 67 | │ main │ Users │ from │ 68 | │ main │ EarlyAdopters │ from_cte │ 69 | └─────────┴───────────────┴──────────┘ 70 | ``` 71 | This tells us a few things: 72 | * `EarlyAdopters` was defined as a CTE. 73 | * The `Users` table was referenced in a from clause. 74 | * `EarlyAdopters` was referenced in a from clause (but it's a cte, not a table). 75 | 76 | ## Context 77 | 78 | Context helps identify where elements are used in the query. 79 | 80 | ### Table Context 81 | - `from`: table in the main `FROM` clause 82 | - `join_left`: left side of a `JOIN` 83 | - `join_right`: right side of a `JOIN` 84 | - `cte`: a Common Table Expression being defined 85 | - `from_cte`: usage of a CTE as if it were a table 86 | - `subquery`: table reference inside a subquery 87 | 88 | ### Function Context 89 | - `select`: function in a `SELECT` clause 90 | - `where`: function in a `WHERE` clause 91 | - `having`: function in a `HAVING` clause 92 | - `order_by`: function in an `ORDER BY` clause 93 | - `group_by`: function in a `GROUP BY` clause 94 | - `nested`: function call nested within another function 95 | 96 | ## Functions 97 | 98 | This extension provides parsing functions for tables, functions, WHERE clauses, and statements. Each category includes both table functions (for detailed results) and scalar functions (for programmatic use). 99 | 100 | In general, errors (e.g. Parse Exception) will not be exposed to the user, but instead will result in an empty result. This simplifies batch processing. When validity is needed, [is_parsable](#is_parsablesql_query--scalar-function) can be used. 101 | 102 | ### Function Parsing Functions 103 | 104 | These functions extract function calls from SQL queries, including window functions and nested function calls. 105 | 106 | #### `parse_functions(sql_query)` – Table Function 107 | 108 | Parses a SQL `SELECT` query and returns all function calls along with their context of use (e.g. `select`, `where`, `having`, `order_by`, etc.). 109 | 110 | ##### Usage 111 | ```sql 112 | SELECT * FROM parse_functions('SELECT upper(name), count(*) FROM users WHERE length(email) > 0;'); 113 | ``` 114 | 115 | ##### Returns 116 | A table with: 117 | - `function_name`: the name of the function 118 | - `schema`: schema name (default `"main"` if unspecified) 119 | - `context`: where the function appears in the query 120 | 121 | ##### Example 122 | ```sql 123 | SELECT * FROM parse_functions($$ 124 | SELECT upper(name), count(*) 125 | FROM users 126 | WHERE length(email) > 0 127 | GROUP BY substr(department, 1, 3) 128 | HAVING sum(salary) > 100000 129 | ORDER BY lower(name) 130 | $$); 131 | ``` 132 | 133 | | function_name | schema | context | 134 | |---------------|--------|------------| 135 | | upper | main | select | 136 | | count_star | main | select | 137 | | length | main | where | 138 | | substr | main | group_by | 139 | | sum | main | having | 140 | | lower | main | order_by | 141 | 142 | --- 143 | 144 | #### `parse_function_names(sql_query)` – Scalar Function 145 | 146 | Returns a list of function names (strings) referenced in the SQL query. 147 | 148 | ##### Usage 149 | ```sql 150 | SELECT parse_function_names('SELECT upper(name), lower(email) FROM users;'); 151 | ---- 152 | ['upper', 'lower'] 153 | ``` 154 | 155 | ##### Returns 156 | A list of strings, each being a function name. 157 | 158 | ##### Example 159 | ```sql 160 | SELECT parse_function_names('SELECT rank() OVER (ORDER BY salary) FROM users;'); 161 | ---- 162 | ['rank'] 163 | ``` 164 | 165 | --- 166 | 167 | #### `parse_functions(sql_query)` – Scalar Function (Structured) 168 | 169 | Similar to the table function, but returns a **list of structs** instead of a result table. Each struct contains: 170 | 171 | - `function_name` (VARCHAR) 172 | - `schema` (VARCHAR) 173 | - `context` (VARCHAR) 174 | 175 | ##### Usage 176 | ```sql 177 | SELECT parse_functions('SELECT upper(name), count(*) FROM users;'); 178 | ---- 179 | [{'function_name': upper, 'schema': main, 'context': select}, {'function_name': count_star, 'schema': main, 'context': select}] 180 | ``` 181 | 182 | ##### Returns 183 | A list of STRUCTs with function name, schema, and context. 184 | 185 | ##### Example with filtering 186 | ```sql 187 | SELECT list_filter(parse_functions('SELECT upper(name) FROM users WHERE lower(email) LIKE "%@example.com"'), f -> f.context = 'where') AS where_functions; 188 | ---- 189 | [{'function_name': lower, 'schema': main, 'context': where}] 190 | ``` 191 | 192 | --- 193 | 194 | ### Table Parsing Functions 195 | 196 | #### `parse_tables(sql_query)` – Table Function 197 | 198 | Parses a SQL `SELECT` query and returns all referenced tables along with their context of use (e.g. `from`, `join_left`, `cte`, etc.). 199 | 200 | #### Usage 201 | ```sql 202 | SELECT * FROM parse_tables('SELECT * FROM my_table JOIN other_table USING (id)'); 203 | ``` 204 | 205 | #### Returns 206 | A table with: 207 | - `schema`: schema name (default `"main"` if unspecified) 208 | - `table`: table name 209 | - `context`: where the table appears in the query 210 | One of: `from`, `join_left`, `join_right`, `from_cte`, `cte`, `subquery` 211 | 212 | #### Example 213 | ```sql 214 | SELECT * FROM parse_tables($$ 215 | WITH cte1 AS (SELECT * FROM x) 216 | SELECT * FROM cte1 JOIN y ON cte1.id = y.id 217 | $$); 218 | ``` 219 | 220 | | schema | table | context | 221 | |--------|--------|------------| 222 | | | cte1 | cte | 223 | | main | x | from | 224 | | main | y | join_right | 225 | | | cte1 | from_cte | 226 | 227 | --- 228 | 229 | ### `parse_table_names(sql_query [, exclude_cte=true])` – Scalar Function 230 | 231 | Returns a list of table names (strings) referenced in the SQL query. Can optionally exclude CTE-related references. 232 | 233 | #### Usage 234 | ```sql 235 | SELECT parse_table_names('SELECT * FROM my_table'); 236 | ---- 237 | ['my_table'] 238 | ``` 239 | 240 | #### Optional Parameter 241 | ```sql 242 | SELECT parse_table_names('with cte_test as(select 1) select * from MyTable, cte_test', false); -- include CTEs 243 | ---- 244 | [cte_test, MyTable, cte_test] 245 | ``` 246 | 247 | #### Returns 248 | A list of strings, each being a table name. 249 | 250 | #### Example 251 | ```sql 252 | SELECT parse_table_names('SELECT * FROM a JOIN b USING (id)'); 253 | ---- 254 | ['a', 'b'] 255 | ``` 256 | 257 | --- 258 | 259 | ### `parse_tables(sql_query)` – Scalar Function (Structured) 260 | 261 | Similar to the table function, but returns a **list of structs** instead of a result table. Each struct contains: 262 | 263 | - `schema` (VARCHAR) 264 | - `table` (VARCHAR) 265 | - `context` (VARCHAR) 266 | 267 | #### Usage 268 | ```sql 269 | SELECT parse_tables('select * from MyTable'); 270 | ---- 271 | [{'schema': main, 'table': MyTable, 'context': from}] 272 | ``` 273 | 274 | #### Returns 275 | A list of STRUCTs with schema, table name, and context. 276 | 277 | #### Example 278 | ```sql 279 | SELECT parse_tables('select * from MyTable t inner join Other o on o.id = t.id'); 280 | ---- 281 | [{'schema': main, 'table': MyTable, 'context': from}, {'schema': main, 'table': Other, 'context': join_right}] 282 | ``` 283 | 284 | 285 | ### `is_parsable(sql_query)` – Scalar Function 286 | 287 | Checks whether a given SQL string is syntactically valid (i.e. can be parsed by DuckDB). 288 | 289 | #### Usage 290 | ```sql 291 | SELECT is_parsable('SELECT * FROM users'); 292 | -- true 293 | 294 | SELECT is_parsable('SELEKT * FROM users'); 295 | -- false 296 | ``` 297 | 298 | #### Returns 299 | A boolean indicating whether the input SQL string is parsable (`true`) or not (`false`). 300 | 301 | #### Example 302 | ```sql 303 | SELECT query, is_parsable(query) AS valid 304 | FROM (VALUES 305 | ('SELECT * FROM good_table'), 306 | ('BAD SQL SELECT *'), 307 | ('WITH cte AS (SELECT 1) SELECT * FROM cte') 308 | ) AS t(query); 309 | ``` 310 | 311 | ##### Output 312 | ``` 313 | ┌───────────────────────────────────────────────┬────────┐ 314 | │ query │ valid │ 315 | │ varchar │ boolean│ 316 | ├───────────────────────────────────────────────┼────────┤ 317 | │ SELECT * FROM good_table │ true │ 318 | │ BAD SQL SELECT * │ false │ 319 | │ WITH cte AS (SELECT 1) SELECT * FROM cte │ true │ 320 | └───────────────────────────────────────────────┴────────┘ 321 | ``` 322 | 323 | --- 324 | 325 | ### Statement Parsing Functions 326 | 327 | These functions parse multi-statement SQL strings and extract individual statements or count them. 328 | 329 | #### `parse_statements(sql_query)` – Table Function 330 | 331 | Parses a SQL string containing multiple statements and returns each statement as a separate row. 332 | 333 | ##### Usage 334 | ```sql 335 | SELECT * FROM parse_statements('SELECT 42; SELECT 43;'); 336 | ``` 337 | 338 | ##### Returns 339 | A table with: 340 | - `statement`: the SQL statement text 341 | 342 | ##### Example 343 | ```sql 344 | SELECT * FROM parse_statements($$ 345 | SELECT * FROM users WHERE active = true; 346 | INSERT INTO log VALUES ('query executed'); 347 | SELECT count(*) FROM transactions; 348 | $$); 349 | ``` 350 | 351 | | statement | 352 | |-----------| 353 | | SELECT * FROM users WHERE (active = true) | 354 | | INSERT INTO log (VALUES ('query executed')) | 355 | | SELECT count_star() FROM transactions | 356 | 357 | --- 358 | 359 | #### `parse_statements(sql_query)` – Scalar Function 360 | 361 | Returns a list of statement strings from a multi-statement SQL query. 362 | 363 | ##### Usage 364 | ```sql 365 | SELECT parse_statements('SELECT 42; SELECT 43;'); 366 | ---- 367 | [SELECT 42, SELECT 43] 368 | ``` 369 | 370 | ##### Returns 371 | A list of strings, each being a SQL statement. 372 | 373 | ##### Example 374 | ```sql 375 | SELECT parse_statements('SELECT 1; INSERT INTO test VALUES (2); SELECT 3;'); 376 | ---- 377 | [SELECT 1, 'INSERT INTO test (VALUES (2))', SELECT 3] 378 | ``` 379 | 380 | --- 381 | 382 | #### `num_statements(sql_query)` – Scalar Function 383 | 384 | Returns the number of statements in a multi-statement SQL query. 385 | 386 | ##### Usage 387 | ```sql 388 | SELECT num_statements('SELECT 42; SELECT 43;'); 389 | ---- 390 | 2 391 | ``` 392 | 393 | ##### Returns 394 | An integer count of the number of SQL statements. 395 | 396 | ##### Example 397 | ```sql 398 | SELECT num_statements($$ 399 | WITH cte AS (SELECT 1) SELECT * FROM cte; 400 | UPDATE users SET last_seen = now(); 401 | SELECT count(*) FROM users; 402 | DELETE FROM temp_data; 403 | $$); 404 | ---- 405 | 4 406 | ``` 407 | 408 | --- 409 | 410 | ## Development 411 | 412 | ### Build steps 413 | To build the extension, run: 414 | ```sh 415 | GEN=ninja make 416 | ``` 417 | The main binaries that will be built are: 418 | ```sh 419 | ./build/release/duckdb 420 | ./build/release/test/unittest 421 | ./build/release/extension/parser_tools/parser_tools.duckdb_extension 422 | ``` 423 | - `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. 424 | - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary. 425 | - `parser_tools.duckdb_extension` is the loadable binary as it would be distributed. 426 | 427 | ## Running the extension 428 | To run the extension code, simply start the shell with `./build/release/duckdb` (which has the parser_tools extension built-in). 429 | 430 | Now we can use the features from the extension directly in DuckDB: 431 | ``` 432 | D select * from parse_tables('select * from MyTable'); 433 | ┌─────────┬─────────┬─────────┐ 434 | │ schema │ table │ context │ 435 | │ varchar │ varchar │ varchar │ 436 | ├─────────┼─────────┼─────────┤ 437 | │ main │ MyTable │ from │ 438 | └─────────┴─────────┴─────────┘ 439 | ``` 440 | 441 | ## Running the extension from a duckdb distribution 442 | To run the extension dev build from an existing distribution of duckdb (e.g. cli): 443 | ``` 444 | $ duckdb -unsigned 445 | 446 | D install parser_tools from './build/release/repository/v1.2.1/osx_amd64/parser_tools.duckdb_extension'; 447 | D load parser_tools; 448 | 449 | D select * from parse_tables('select * from MyTable'); 450 | ┌─────────┬─────────┬─────────┐ 451 | │ schema │ table │ context │ 452 | │ varchar │ varchar │ varchar │ 453 | ├─────────┼─────────┼─────────┤ 454 | │ main │ MyTable │ from │ 455 | └─────────┴─────────┴─────────┘ 456 | ``` 457 | 458 | ## Running the tests 459 | See [Writing Tests](https://duckdb.org/docs/stable/dev/sqllogictest/writing_tests.html) to learn more about duckdb's testing philosophy. To that end, we define tests in sql at: [test/sql](test/sql/). 460 | 461 | The tests can be run with: 462 | ```sh 463 | make test 464 | ``` 465 | 466 | and easily re-ran as changes are made with: 467 | ```sh 468 | GEN=ninja make && make test 469 | ``` 470 | -------------------------------------------------------------------------------- /src/parse_tables.cpp: -------------------------------------------------------------------------------- 1 | #include "parse_tables.hpp" 2 | #include "duckdb.hpp" 3 | #include "duckdb/parser/parser.hpp" 4 | #include "duckdb/parser/parser_options.hpp" 5 | #include 6 | #include 7 | #include "duckdb/parser/statement/select_statement.hpp" 8 | #include "duckdb/parser/query_node/select_node.hpp" 9 | #include "duckdb/parser/query_node/cte_node.hpp" 10 | #include "duckdb/parser/tableref/basetableref.hpp" 11 | #include "duckdb/parser/tableref/joinref.hpp" 12 | #include "duckdb/parser/tableref/subqueryref.hpp" 13 | #include "duckdb/function/scalar/nested_functions.hpp" 14 | 15 | namespace duckdb { 16 | 17 | inline const char *ToString(TableContext context) { 18 | switch (context) { 19 | case TableContext::From: return "from"; 20 | case TableContext::JoinLeft: return "join_left"; 21 | case TableContext::JoinRight: return "join_right"; 22 | case TableContext::FromCTE: return "from_cte"; 23 | case TableContext::CTE: return "cte"; 24 | case TableContext::Subquery: return "subquery"; 25 | default: return "unknown"; 26 | } 27 | } 28 | 29 | inline const TableContext FromString(const char *context) { 30 | if (strcmp(context, "from") == 0) return TableContext::From; 31 | if (strcmp(context, "join_left") == 0) return TableContext::JoinLeft; 32 | if (strcmp(context, "join_right") == 0) return TableContext::JoinRight; 33 | if (strcmp(context, "from_cte") == 0) return TableContext::FromCTE; 34 | if (strcmp(context, "cte") == 0) return TableContext::CTE; 35 | if (strcmp(context, "subquery") == 0) return TableContext::Subquery; 36 | throw InternalException("Unknown table context: %s", context); 37 | } 38 | 39 | struct ParseTablesState : public GlobalTableFunctionState { 40 | idx_t row = 0; 41 | vector results; 42 | }; 43 | 44 | struct ParseTablesBindData : public TableFunctionData { 45 | string sql; 46 | }; 47 | 48 | // BIND function: runs during query planning to decide output schema 49 | static unique_ptr ParseTablesBind(ClientContext &context, 50 | TableFunctionBindInput &input, 51 | vector &return_types, 52 | vector &names) { 53 | 54 | string sql_input = StringValue::Get(input.inputs[0]); 55 | 56 | // always return the same columns: 57 | 58 | return_types = {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}; 59 | // schema name, table name, usage context (from, join, cte, etc) 60 | names = {"schema", "table", "context"}; 61 | 62 | // create a bind data object to hold the SQL input 63 | 64 | auto result = make_uniq(); 65 | result->sql = sql_input; 66 | 67 | return std::move(result); 68 | } 69 | 70 | // INIT function: runs before table function execution 71 | static unique_ptr ParseTablesInit(ClientContext &context, 72 | TableFunctionInitInput &input) { 73 | return make_uniq(); 74 | } 75 | 76 | static void ExtractTablesFromRef( 77 | const duckdb::TableRef &ref, 78 | std::vector &results, 79 | const TableContext context = TableContext::From, 80 | bool is_top_level = false, 81 | const duckdb::CommonTableExpressionMap *cte_map = nullptr 82 | ) { 83 | using namespace duckdb; 84 | 85 | switch (ref.type) { 86 | case TableReferenceType::BASE_TABLE: { 87 | auto &base = (BaseTableRef &)ref; 88 | TableContext context_label = context; 89 | 90 | if (cte_map && cte_map->map.find(base.table_name) != cte_map->map.end()) { 91 | context_label = TableContext::FromCTE; 92 | } else if (is_top_level) { 93 | context_label = TableContext::From; 94 | } 95 | 96 | results.push_back(TableRefResult{ 97 | base.schema_name.empty() ? "main" : base.schema_name, 98 | base.table_name, 99 | context_label 100 | }); 101 | break; 102 | } 103 | case TableReferenceType::JOIN: { 104 | auto &join = (JoinRef &)ref; 105 | ExtractTablesFromRef(*join.left, results, TableContext::JoinLeft, is_top_level, cte_map); 106 | ExtractTablesFromRef(*join.right, results, TableContext::JoinRight, false, cte_map); 107 | break; 108 | } 109 | case TableReferenceType::SUBQUERY: { 110 | auto &subquery = (SubqueryRef &)ref; 111 | if (subquery.subquery && subquery.subquery->node) { 112 | ExtractTablesFromQueryNode(*subquery.subquery->node, results, TableContext::Subquery, cte_map); 113 | } 114 | break; 115 | } 116 | default: 117 | break; 118 | } 119 | } 120 | 121 | 122 | static void ExtractTablesFromQueryNode( 123 | const duckdb::QueryNode &node, 124 | std::vector &results, 125 | const TableContext context, 126 | const duckdb::CommonTableExpressionMap *cte_map 127 | ) { 128 | using namespace duckdb; 129 | 130 | if (node.type == QueryNodeType::SELECT_NODE) { 131 | auto &select_node = (SelectNode &)node; 132 | 133 | // Handle CTE definitions 134 | for (const auto &entry : select_node.cte_map.map) { 135 | results.push_back(TableRefResult{ 136 | "", entry.first, TableContext::CTE 137 | }); 138 | 139 | if (entry.second && entry.second->query && entry.second->query->node) { 140 | ExtractTablesFromQueryNode(*entry.second->query->node, results, TableContext::From, &select_node.cte_map); 141 | } 142 | } 143 | 144 | if (select_node.from_table) { 145 | ExtractTablesFromRef(*select_node.from_table, results, context, true, &select_node.cte_map); 146 | } 147 | } 148 | // additional step necessary for duckdb v1.4.0: unwrap CTE node 149 | else if (node.type == QueryNodeType::CTE_NODE) { 150 | auto &cte_node = (CTENode &)node; 151 | 152 | if (cte_node.child) { 153 | ExtractTablesFromQueryNode(*cte_node.child, results, context, cte_map); 154 | } 155 | } 156 | } 157 | 158 | static void ExtractTablesFromSQL(const std::string &sql, std::vector &results) { 159 | Parser parser; 160 | 161 | try { 162 | parser.ParseQuery(sql); 163 | } catch (const ParserException &ex) { 164 | // swallow parser exceptions to make this function more robust. is_parsable can be used if needed 165 | return; 166 | } 167 | 168 | for (auto &stmt : parser.statements) { 169 | if (stmt->type == StatementType::SELECT_STATEMENT) { 170 | auto &select_stmt = (SelectStatement &)*stmt; 171 | if (select_stmt.node) { 172 | ExtractTablesFromQueryNode(*select_stmt.node, results); 173 | } 174 | } 175 | } 176 | } 177 | 178 | static void ExtractTablesFromSQL(const std::string & sql, std::vector &result, std::unordered_set excluded_types) { 179 | std::vector temp_result; 180 | ExtractTablesFromSQL(sql, temp_result); 181 | std::unordered_set e_types; 182 | 183 | for (auto &type : excluded_types) { 184 | e_types.insert(FromString(type.c_str())); 185 | } 186 | 187 | for (auto &table : temp_result) { 188 | if (e_types.count(table.context) == 0) { 189 | result.push_back(table); 190 | } 191 | } 192 | } 193 | 194 | static void ParseTablesFunction(ClientContext &context, 195 | TableFunctionInput &data, 196 | DataChunk &output) { 197 | auto &state = (ParseTablesState &)*data.global_state; 198 | auto &bind_data = (ParseTablesBindData &)*data.bind_data; 199 | 200 | if (state.results.empty() && state.row == 0) { 201 | ExtractTablesFromSQL(bind_data.sql, state.results); 202 | } 203 | 204 | if (state.row >= state.results.size()) { 205 | return; 206 | } 207 | 208 | auto &ref = state.results[state.row]; 209 | output.SetCardinality(1); 210 | output.SetValue(0, 0, Value(ref.schema)); 211 | output.SetValue(1, 0, Value(ref.table)); 212 | output.SetValue(2, 0, Value(ToString(ref.context))); 213 | 214 | state.row++; 215 | } 216 | 217 | static void ParseTablesScalarFunction(DataChunk &args, ExpressionState &state, Vector &result) { 218 | Vector flag(LogicalType::BOOLEAN); 219 | 220 | // Allow for the optional boolean argument. if not provided, default to true 221 | if (args.ColumnCount() == 1) { 222 | // create a default argument to pass below. we'll use a constant vector since all values are the same 223 | Vector c(LogicalType::BOOLEAN); 224 | c.Reference(Value::BOOLEAN(true)); 225 | ConstantVector::Reference(flag, c, 0, args.size()); 226 | } else if (args.ColumnCount() == 2) { 227 | flag.Reference(args.data[1]); 228 | } else { 229 | throw InvalidInputException("parse_tables() expects 1 or 2 arguments"); 230 | } 231 | 232 | // Execute does the heavy lifting of iterating over the input data 233 | // and calling the provided lambda function for each input value. 234 | // The lambda function is responsible for parsing the SQL query and 235 | // extracting the table names. 236 | BinaryExecutor::Execute(args.data[0], flag, result, args.size(), 237 | [&result](string_t query, bool exclude_cte) -> list_entry_t { 238 | // Parse the SQL query and extract table names 239 | auto query_string = query.GetString(); 240 | std::vector parsed_tables; 241 | if (exclude_cte) { 242 | std::unordered_set excluded_types = {"cte", "from_cte"}; 243 | ExtractTablesFromSQL(query_string, parsed_tables, excluded_types); 244 | } else { 245 | ExtractTablesFromSQL(query_string, parsed_tables); 246 | } 247 | 248 | 249 | auto current_size = ListVector::GetListSize(result); 250 | auto number_of_tables = parsed_tables.size(); 251 | auto new_size = current_size + number_of_tables; 252 | 253 | // grow list if needed 254 | if (ListVector::GetListCapacity(result) < new_size) { 255 | ListVector::Reserve(result, new_size); 256 | } 257 | 258 | // Write the string into the child vector 259 | auto tables = FlatVector::GetData(ListVector::GetEntry(result)); 260 | for (size_t i = 0; i < parsed_tables.size(); i++) { 261 | auto &table = parsed_tables[i]; 262 | tables[current_size + i] = StringVector::AddStringOrBlob(ListVector::GetEntry(result), table.table); 263 | } 264 | 265 | // Update size 266 | ListVector::SetListSize(result, new_size); 267 | 268 | return list_entry_t(current_size, number_of_tables); 269 | }); 270 | } 271 | 272 | static void ParseTablesScalarFunction_struct(DataChunk &args, ExpressionState &state, Vector &result) { 273 | UnaryExecutor::Execute(args.data[0], result, args.size(), 274 | [&result](string_t query) -> list_entry_t { 275 | // Parse the SQL query and extract table names 276 | auto query_string = query.GetString(); 277 | std::vector parsed_tables; 278 | ExtractTablesFromSQL(query_string, parsed_tables); 279 | 280 | auto current_size = ListVector::GetListSize(result); 281 | auto number_of_tables = parsed_tables.size(); 282 | auto new_size = current_size + number_of_tables; 283 | 284 | // Grow list vector if needed 285 | if (ListVector::GetListCapacity(result) < new_size) { 286 | ListVector::Reserve(result, new_size); 287 | } 288 | 289 | // Get the struct child vector of the list 290 | auto &struct_vector = ListVector::GetEntry(result); 291 | 292 | // Ensure list size is updated 293 | ListVector::SetListSize(result, new_size); 294 | 295 | // Get the fields in the STRUCT 296 | auto &entries = StructVector::GetEntries(struct_vector); 297 | auto &schema_entry = *entries[0]; // "schema" field 298 | auto &table_entry = *entries[1]; // "table" field 299 | auto &context_entry = *entries[2]; // "context" field 300 | 301 | auto schema_data = FlatVector::GetData(schema_entry); 302 | auto table_data = FlatVector::GetData(table_entry); 303 | auto context_data = FlatVector::GetData(context_entry); 304 | 305 | 306 | for (size_t i = 0; i < number_of_tables; i++) { 307 | const auto &table = parsed_tables[i]; 308 | auto idx = current_size + i; 309 | 310 | schema_data[idx] = StringVector::AddStringOrBlob(schema_entry, table.schema); 311 | table_data[idx] = StringVector::AddStringOrBlob(table_entry, table.table); 312 | context_data[idx] = StringVector::AddStringOrBlob(context_entry, ToString(table.context)); 313 | } 314 | 315 | return list_entry_t(current_size, number_of_tables); 316 | }); 317 | } 318 | 319 | static void IsParsableFunction(DataChunk &args, ExpressionState &state, Vector &result) { 320 | UnaryExecutor::Execute(args.data[0], result, args.size(), 321 | [](string_t query) -> bool { 322 | try { 323 | Parser parser; 324 | parser.ParseQuery(query.GetString()); 325 | return true; 326 | } catch (const std::exception &) { 327 | return false; 328 | } 329 | }); 330 | } 331 | 332 | // Extension scaffolding 333 | // --------------------------------------------------- 334 | 335 | void RegisterParseTablesFunction(ExtensionLoader &loader) { 336 | TableFunction tf("parse_tables", {LogicalType::VARCHAR}, ParseTablesFunction, ParseTablesBind, ParseTablesInit); 337 | loader.RegisterFunction(tf); 338 | } 339 | 340 | void RegisterParseTableScalarFunction(ExtensionLoader &loader) { 341 | // parse_table_names is overloaded, allowing for an optional boolean argument 342 | // that indicates whether to include CTEs in the result 343 | // usage: parse_tables(sql_query [, include_cte]) 344 | ScalarFunctionSet set("parse_table_names"); 345 | set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::LIST(LogicalType::VARCHAR), ParseTablesScalarFunction)); 346 | set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::BOOLEAN}, LogicalType::LIST(LogicalType::VARCHAR), ParseTablesScalarFunction)); 347 | loader.RegisterFunction(set); 348 | 349 | // parse_tables_struct is a scalar function that returns a list of structs 350 | auto return_type = LogicalType::LIST(LogicalType::STRUCT({ 351 | {"schema", LogicalType::VARCHAR}, 352 | {"table", LogicalType::VARCHAR}, 353 | {"context", LogicalType::VARCHAR} 354 | })); 355 | ScalarFunction sf("parse_tables", {LogicalType::VARCHAR}, return_type, ParseTablesScalarFunction_struct); 356 | loader.RegisterFunction(sf); 357 | 358 | // is_parsable is a scalar function that returns a boolean indicating whether the SQL query is parsable (no parse errors) 359 | ScalarFunction is_parsable("is_parsable", {LogicalType::VARCHAR}, LogicalType::BOOLEAN, IsParsableFunction); 360 | loader.RegisterFunction(is_parsable); 361 | } 362 | 363 | } // namespace duckdb 364 | -------------------------------------------------------------------------------- /src/parse_where.cpp: -------------------------------------------------------------------------------- 1 | #include "parse_where.hpp" 2 | #include "duckdb.hpp" 3 | #include "duckdb/parser/parser.hpp" 4 | #include "duckdb/parser/statement/select_statement.hpp" 5 | #include "duckdb/parser/query_node/select_node.hpp" 6 | #include "duckdb/parser/expression/columnref_expression.hpp" 7 | #include "duckdb/parser/expression/comparison_expression.hpp" 8 | #include "duckdb/parser/expression/conjunction_expression.hpp" 9 | #include "duckdb/parser/expression/constant_expression.hpp" 10 | #include "duckdb/parser/expression/function_expression.hpp" 11 | #include "duckdb/parser/expression/operator_expression.hpp" 12 | #include "duckdb/parser/expression/star_expression.hpp" 13 | #include "duckdb/parser/expression/subquery_expression.hpp" 14 | #include "duckdb/parser/expression/window_expression.hpp" 15 | #include "duckdb/parser/expression/case_expression.hpp" 16 | #include "duckdb/parser/expression/cast_expression.hpp" 17 | #include "duckdb/parser/expression/between_expression.hpp" 18 | #include "duckdb/parser/expression/lambda_expression.hpp" 19 | #include "duckdb/parser/expression/positional_reference_expression.hpp" 20 | #include "duckdb/parser/expression/parameter_expression.hpp" 21 | #include "duckdb/parser/tableref/basetableref.hpp" 22 | 23 | namespace duckdb { 24 | 25 | struct ParseWhereState : public GlobalTableFunctionState { 26 | idx_t row = 0; 27 | vector results; 28 | }; 29 | 30 | struct ParseWhereBindData : public TableFunctionData { 31 | string sql; 32 | }; 33 | 34 | static unique_ptr ParseWhereBind(ClientContext &context, 35 | TableFunctionBindInput &input, 36 | vector &return_types, 37 | vector &names) { 38 | string sql_input = StringValue::Get(input.inputs[0]); 39 | 40 | return_types = { 41 | LogicalType::VARCHAR, // condition 42 | LogicalType::VARCHAR, // table_name 43 | LogicalType::VARCHAR // context 44 | }; 45 | 46 | names = {"condition", "table_name", "context"}; 47 | 48 | auto result = make_uniq(); 49 | result->sql = sql_input; 50 | 51 | return std::move(result); 52 | } 53 | 54 | static unique_ptr ParseWhereInit(ClientContext &context, 55 | TableFunctionInitInput &input) { 56 | return make_uniq(); 57 | } 58 | 59 | static string ExpressionToString(const ParsedExpression &expr) { 60 | return expr.ToString(); 61 | } 62 | 63 | static void ExtractWhereConditionsFromExpression( 64 | const ParsedExpression &expr, 65 | vector &results, 66 | const string &context = "WHERE", 67 | const string &table_name = "" 68 | ) { 69 | if (expr.type == ExpressionType::INVALID) return; 70 | 71 | switch (expr.GetExpressionClass()) { 72 | case ExpressionClass::CONJUNCTION: { 73 | auto &conj = (ConjunctionExpression &)expr; 74 | for (auto &child : conj.children) { 75 | ExtractWhereConditionsFromExpression(*child, results, context, table_name); 76 | } 77 | break; 78 | } 79 | case ExpressionClass::COMPARISON: { 80 | auto &comp = (ComparisonExpression &)expr; 81 | results.push_back(WhereConditionResult{ 82 | ExpressionToString(comp), 83 | table_name, 84 | context 85 | }); 86 | break; 87 | } 88 | case ExpressionClass::OPERATOR: { 89 | auto &op = (OperatorExpression &)expr; 90 | results.push_back(WhereConditionResult{ 91 | ExpressionToString(op), 92 | table_name, 93 | context 94 | }); 95 | break; 96 | } 97 | case ExpressionClass::FUNCTION: { 98 | auto &func = (FunctionExpression &)expr; 99 | results.push_back(WhereConditionResult{ 100 | ExpressionToString(func), 101 | table_name, 102 | context 103 | }); 104 | break; 105 | } 106 | case ExpressionClass::BETWEEN: { 107 | auto &between = (BetweenExpression &)expr; 108 | results.push_back(WhereConditionResult{ 109 | ExpressionToString(between), 110 | table_name, 111 | context 112 | }); 113 | break; 114 | } 115 | case ExpressionClass::CASE: { 116 | auto &case_expr = (CaseExpression &)expr; 117 | results.push_back(WhereConditionResult{ 118 | ExpressionToString(case_expr), 119 | table_name, 120 | context 121 | }); 122 | break; 123 | } 124 | default: 125 | break; 126 | } 127 | } 128 | 129 | static void ExtractWhereConditionsFromQueryNode( 130 | const QueryNode &node, 131 | vector &results 132 | ) { 133 | if (node.type == QueryNodeType::SELECT_NODE) { 134 | auto &select_node = (SelectNode &)node; 135 | string table_name = "(empty)"; // Default table name 136 | 137 | // Extract table name from FROM clause 138 | if (select_node.from_table) { 139 | if (select_node.from_table->type == TableReferenceType::BASE_TABLE) { 140 | auto &base = (BaseTableRef &)*select_node.from_table; 141 | table_name = base.table_name; 142 | } 143 | } 144 | 145 | // Extract WHERE conditions 146 | if (select_node.where_clause) { 147 | ExtractWhereConditionsFromExpression(*select_node.where_clause, results, "WHERE", table_name); 148 | } 149 | 150 | // Extract HAVING conditions 151 | if (select_node.having) { 152 | ExtractWhereConditionsFromExpression(*select_node.having, results, "HAVING", table_name); 153 | } 154 | } 155 | } 156 | 157 | static void ExtractWhereConditionsFromSQL(const string &sql, vector &results) { 158 | Parser parser; 159 | 160 | try { 161 | parser.ParseQuery(sql); 162 | } catch (const ParserException &ex) { 163 | return; 164 | } 165 | 166 | for (auto &stmt : parser.statements) { 167 | if (stmt->type == StatementType::SELECT_STATEMENT) { 168 | auto &select_stmt = (SelectStatement &)*stmt; 169 | if (select_stmt.node) { 170 | ExtractWhereConditionsFromQueryNode(*select_stmt.node, results); 171 | } 172 | } 173 | } 174 | } 175 | 176 | static void ParseWhereFunction(ClientContext &context, 177 | TableFunctionInput &data, 178 | DataChunk &output) { 179 | auto &state = (ParseWhereState &)*data.global_state; 180 | auto &bind_data = (ParseWhereBindData &)*data.bind_data; 181 | 182 | if (state.results.empty() && state.row == 0) { 183 | ExtractWhereConditionsFromSQL(bind_data.sql, state.results); 184 | } 185 | 186 | if (state.row >= state.results.size()) { 187 | return; 188 | } 189 | 190 | auto &result = state.results[state.row]; 191 | output.SetCardinality(1); 192 | output.SetValue(0, 0, Value(result.condition)); 193 | output.SetValue(1, 0, Value(result.table_name)); 194 | output.SetValue(2, 0, Value(result.context)); 195 | 196 | state.row++; 197 | } 198 | 199 | static void ParseWhereScalarFunction(DataChunk &args, ExpressionState &state, Vector &result) { 200 | UnaryExecutor::Execute(args.data[0], result, args.size(), 201 | [&result](string_t query) -> list_entry_t { 202 | auto query_string = query.GetString(); 203 | vector conditions; 204 | ExtractWhereConditionsFromSQL(query_string, conditions); 205 | 206 | auto current_size = ListVector::GetListSize(result); 207 | auto number_of_conditions = conditions.size(); 208 | auto new_size = current_size + number_of_conditions; 209 | 210 | if (ListVector::GetListCapacity(result) < new_size) { 211 | ListVector::Reserve(result, new_size); 212 | } 213 | 214 | auto &struct_vector = ListVector::GetEntry(result); 215 | auto &entries = StructVector::GetEntries(struct_vector); 216 | auto &condition_entry = *entries[0]; 217 | auto &table_entry = *entries[1]; 218 | auto &context_entry = *entries[2]; 219 | 220 | auto condition_data = FlatVector::GetData(condition_entry); 221 | auto table_data = FlatVector::GetData(table_entry); 222 | auto context_data = FlatVector::GetData(context_entry); 223 | 224 | for (size_t i = 0; i < number_of_conditions; i++) { 225 | const auto &condition = conditions[i]; 226 | auto idx = current_size + i; 227 | 228 | condition_data[idx] = StringVector::AddStringOrBlob(condition_entry, condition.condition); 229 | table_data[idx] = StringVector::AddStringOrBlob(table_entry, condition.table_name); 230 | context_data[idx] = StringVector::AddStringOrBlob(context_entry, condition.context); 231 | } 232 | 233 | ListVector::SetListSize(result, new_size); 234 | return list_entry_t(current_size, number_of_conditions); 235 | }); 236 | } 237 | 238 | void RegisterParseWhereFunction(ExtensionLoader &loader) { 239 | TableFunction tf("parse_where", {LogicalType::VARCHAR}, ParseWhereFunction, ParseWhereBind, ParseWhereInit); 240 | loader.RegisterFunction(tf); 241 | } 242 | 243 | void RegisterParseWhereScalarFunction(ExtensionLoader &loader) { 244 | auto return_type = LogicalType::LIST(LogicalType::STRUCT({ 245 | {"condition", LogicalType::VARCHAR}, 246 | {"table_name", LogicalType::VARCHAR}, 247 | {"context", LogicalType::VARCHAR} 248 | })); 249 | ScalarFunction sf("parse_where", {LogicalType::VARCHAR}, return_type, ParseWhereScalarFunction); 250 | loader.RegisterFunction(sf); 251 | } 252 | 253 | static string DetailedExpressionTypeToOperator(ExpressionType type) { 254 | switch (type) { 255 | case ExpressionType::COMPARE_EQUAL: 256 | return "="; 257 | case ExpressionType::COMPARE_NOTEQUAL: 258 | return "!="; 259 | case ExpressionType::COMPARE_LESSTHAN: 260 | return "<"; 261 | case ExpressionType::COMPARE_GREATERTHAN: 262 | return ">"; 263 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: 264 | return "<="; 265 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: 266 | return ">="; 267 | case ExpressionType::COMPARE_DISTINCT_FROM: 268 | return "IS DISTINCT FROM"; 269 | case ExpressionType::COMPARE_NOT_DISTINCT_FROM: 270 | return "IS NOT DISTINCT FROM"; 271 | default: 272 | return "UNKNOWN"; 273 | } 274 | } 275 | 276 | static void ExtractDetailedWhereConditionsFromExpression( 277 | const ParsedExpression &expr, 278 | vector &results, 279 | const string &context = "WHERE", 280 | const string &table_name = "" 281 | ) { 282 | if (expr.type == ExpressionType::INVALID) return; 283 | 284 | switch (expr.GetExpressionClass()) { 285 | case ExpressionClass::CONJUNCTION: { 286 | auto &conj = (ConjunctionExpression &)expr; 287 | for (auto &child : conj.children) { 288 | ExtractDetailedWhereConditionsFromExpression(*child, results, context, table_name); 289 | } 290 | break; 291 | } 292 | case ExpressionClass::COMPARISON: { 293 | auto &comp = (ComparisonExpression &)expr; 294 | DetailedWhereConditionResult result; 295 | result.context = context; 296 | result.table_name = table_name; 297 | 298 | // Extract column name 299 | if (comp.left->GetExpressionClass() == ExpressionClass::COLUMN_REF) { 300 | auto &col_ref = (ColumnRefExpression &)*comp.left; 301 | result.column_name = col_ref.GetColumnName(); 302 | } 303 | 304 | // Extract operator 305 | result.operator_type = DetailedExpressionTypeToOperator(comp.type); 306 | 307 | // Extract value 308 | if (comp.right->GetExpressionClass() == ExpressionClass::CONSTANT) { 309 | auto &const_expr = (ConstantExpression &)*comp.right; 310 | result.value = const_expr.value.ToString(); 311 | } else { 312 | result.value = comp.right->ToString(); 313 | } 314 | 315 | results.push_back(result); 316 | break; 317 | } 318 | case ExpressionClass::BETWEEN: { 319 | auto &between = (BetweenExpression &)expr; 320 | DetailedWhereConditionResult result; 321 | result.context = context; 322 | result.table_name = table_name; 323 | 324 | // Extract column name 325 | if (between.input->GetExpressionClass() == ExpressionClass::COLUMN_REF) { 326 | auto &col_ref = (ColumnRefExpression &)*between.input; 327 | result.column_name = col_ref.GetColumnName(); 328 | } 329 | 330 | // For BETWEEN, we'll create two conditions: >= lower AND <= upper 331 | result.operator_type = ">="; 332 | if (between.lower->GetExpressionClass() == ExpressionClass::CONSTANT) { 333 | auto &const_expr = (ConstantExpression &)*between.lower; 334 | result.value = const_expr.value.ToString(); 335 | } else { 336 | result.value = between.lower->ToString(); 337 | } 338 | results.push_back(result); 339 | 340 | // Add the upper bound condition 341 | DetailedWhereConditionResult upper_result = result; 342 | upper_result.operator_type = "<="; 343 | if (between.upper->GetExpressionClass() == ExpressionClass::CONSTANT) { 344 | auto &const_expr = (ConstantExpression &)*between.upper; 345 | upper_result.value = const_expr.value.ToString(); 346 | } else { 347 | upper_result.value = between.upper->ToString(); 348 | } 349 | results.push_back(upper_result); 350 | break; 351 | } 352 | case ExpressionClass::OPERATOR: { 353 | auto &op = (OperatorExpression &)expr; 354 | if (op.children.size() >= 2) { 355 | DetailedWhereConditionResult result; 356 | result.context = context; 357 | result.table_name = table_name; 358 | 359 | // Extract column name 360 | if (op.children[0]->GetExpressionClass() == ExpressionClass::COLUMN_REF) { 361 | auto &col_ref = (ColumnRefExpression &)*op.children[0]; 362 | result.column_name = col_ref.GetColumnName(); 363 | } 364 | 365 | // Extract operator 366 | result.operator_type = DetailedExpressionTypeToOperator(op.type); 367 | 368 | // Extract value 369 | if (op.children[1]->GetExpressionClass() == ExpressionClass::CONSTANT) { 370 | auto &const_expr = (ConstantExpression &)*op.children[1]; 371 | result.value = const_expr.value.ToString(); 372 | } else { 373 | result.value = op.children[1]->ToString(); 374 | } 375 | 376 | results.push_back(result); 377 | } 378 | break; 379 | } 380 | default: 381 | break; 382 | } 383 | } 384 | 385 | struct ParseWhereDetailedState : public GlobalTableFunctionState { 386 | idx_t row = 0; 387 | vector results; 388 | }; 389 | 390 | struct ParseWhereDetailedBindData : public TableFunctionData { 391 | string sql; 392 | }; 393 | 394 | static unique_ptr ParseWhereDetailedBind(ClientContext &context, 395 | TableFunctionBindInput &input, 396 | vector &return_types, 397 | vector &names) { 398 | string sql_input = StringValue::Get(input.inputs[0]); 399 | 400 | return_types = { 401 | LogicalType::VARCHAR, // column_name 402 | LogicalType::VARCHAR, // operator_type 403 | LogicalType::VARCHAR, // value 404 | LogicalType::VARCHAR, // table_name 405 | LogicalType::VARCHAR // context 406 | }; 407 | 408 | names = {"column_name", "operator_type", "value", "table_name", "context"}; 409 | 410 | auto result = make_uniq(); 411 | result->sql = sql_input; 412 | 413 | return std::move(result); 414 | } 415 | 416 | static unique_ptr ParseWhereDetailedInit(ClientContext &context, 417 | TableFunctionInitInput &input) { 418 | return make_uniq(); 419 | } 420 | 421 | static void ParseWhereDetailedFunction(ClientContext &context, 422 | TableFunctionInput &data, 423 | DataChunk &output) { 424 | auto &state = (ParseWhereDetailedState &)*data.global_state; 425 | auto &bind_data = (ParseWhereDetailedBindData &)*data.bind_data; 426 | 427 | if (state.results.empty() && state.row == 0) { 428 | Parser parser; 429 | try { 430 | parser.ParseQuery(bind_data.sql); 431 | } catch (const ParserException &ex) { 432 | return; 433 | } 434 | 435 | for (auto &stmt : parser.statements) { 436 | if (stmt->type == StatementType::SELECT_STATEMENT) { 437 | auto &select_stmt = (SelectStatement &)*stmt; 438 | if (select_stmt.node) { 439 | if (select_stmt.node->type == QueryNodeType::SELECT_NODE) { 440 | auto &select_node = (SelectNode &)*select_stmt.node; 441 | string table_name = "(empty)"; // Default table name 442 | 443 | // Try to extract table name from FROM clause 444 | if (select_node.from_table) { 445 | if (select_node.from_table->type == TableReferenceType::BASE_TABLE) { 446 | auto &base_table = (BaseTableRef &)*select_node.from_table; 447 | table_name = base_table.table_name; 448 | } 449 | } 450 | 451 | if (select_node.where_clause) { 452 | ExtractDetailedWhereConditionsFromExpression(*select_node.where_clause, state.results, "WHERE", table_name); 453 | } 454 | if (select_node.having) { 455 | ExtractDetailedWhereConditionsFromExpression(*select_node.having, state.results, "HAVING", table_name); 456 | } 457 | } 458 | } 459 | } 460 | } 461 | } 462 | 463 | if (state.row >= state.results.size()) { 464 | return; 465 | } 466 | 467 | auto &result = state.results[state.row]; 468 | output.SetCardinality(1); 469 | output.SetValue(0, 0, Value(result.column_name)); 470 | output.SetValue(1, 0, Value(result.operator_type)); 471 | output.SetValue(2, 0, Value(result.value)); 472 | output.SetValue(3, 0, Value(result.table_name)); 473 | output.SetValue(4, 0, Value(result.context)); 474 | 475 | state.row++; 476 | } 477 | 478 | void RegisterParseWhereDetailedFunction(ExtensionLoader &loader) { 479 | TableFunction tf("parse_where_detailed", {LogicalType::VARCHAR}, ParseWhereDetailedFunction, ParseWhereDetailedBind, ParseWhereDetailedInit); 480 | loader.RegisterFunction(tf); 481 | } 482 | 483 | } // namespace duckdb 484 | --------------------------------------------------------------------------------