├── .clang-format ├── .devcontainer ├── .bashrc ├── .psqlrc ├── Dockerfile_14 ├── Dockerfile_15 ├── Dockerfile_16 ├── Dockerfile_17 ├── devcontainer.json └── postStartCommand.sh ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ ├── epic.yml │ └── feature_request.yml ├── .gitignore ├── .gitmodules ├── .vscode ├── c_cpp_properties.json ├── launch.json └── settings.json ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── Makefile ├── Makefile.build ├── README.md ├── pg_mooncake.control ├── rust_extensions └── delta │ ├── Cargo.lock │ ├── Cargo.toml │ ├── build.rs │ └── src │ └── lib.rs ├── sql ├── pg_mooncake--0.1.0--0.1.1.sql ├── pg_mooncake--0.1.0.sql └── pg_mooncake--0.1.1--0.1.2.sql ├── src ├── columnstore │ ├── columnstore.cpp │ ├── columnstore.hpp │ ├── columnstore_metadata.cpp │ ├── columnstore_metadata.hpp │ ├── columnstore_statistics.cpp │ ├── columnstore_statistics.hpp │ ├── columnstore_table.cpp │ ├── columnstore_table.hpp │ └── execution │ │ ├── columnstore_delete.cpp │ │ ├── columnstore_insert.cpp │ │ ├── columnstore_scan.cpp │ │ └── columnstore_update.cpp ├── columnstore_handler.cpp ├── columnstore_handler.hpp ├── lake │ ├── lake.cpp │ └── lake.hpp ├── pgduckdb │ ├── .clang-format │ ├── LICENSE │ ├── catalog │ │ ├── pgduckdb_catalog.cpp │ │ ├── pgduckdb_catalog.hpp │ │ ├── pgduckdb_schema.cpp │ │ ├── pgduckdb_schema.hpp │ │ ├── pgduckdb_storage.cpp │ │ ├── pgduckdb_storage.hpp │ │ ├── pgduckdb_table.cpp │ │ ├── pgduckdb_table.hpp │ │ ├── pgduckdb_transaction.cpp │ │ ├── pgduckdb_transaction.hpp │ │ ├── pgduckdb_transaction_manager.cpp │ │ └── pgduckdb_transaction_manager.hpp │ ├── logger.hpp │ ├── pg │ │ ├── declarations.hpp │ │ ├── error_data.cpp │ │ ├── error_data.hpp │ │ ├── relations.cpp │ │ ├── relations.hpp │ │ ├── snapshots.hpp │ │ ├── transactions.cpp │ │ └── transactions.hpp │ ├── pgduckdb.cpp │ ├── pgduckdb.h │ ├── pgduckdb_background_worker.cpp │ ├── pgduckdb_background_worker.hpp │ ├── pgduckdb_ddl.cpp │ ├── pgduckdb_ddl.hpp │ ├── pgduckdb_detoast.cpp │ ├── pgduckdb_detoast.hpp │ ├── pgduckdb_duckdb.cpp │ ├── pgduckdb_duckdb.hpp │ ├── pgduckdb_filter.cpp │ ├── pgduckdb_filter.hpp │ ├── pgduckdb_guc.h │ ├── pgduckdb_hooks.cpp │ ├── pgduckdb_metadata_cache.cpp │ ├── pgduckdb_metadata_cache.hpp │ ├── pgduckdb_node.cpp │ ├── pgduckdb_node.hpp │ ├── pgduckdb_options.cpp │ ├── pgduckdb_options.hpp │ ├── pgduckdb_planner.cpp │ ├── pgduckdb_planner.hpp │ ├── pgduckdb_process_lock.hpp │ ├── pgduckdb_ruleutils.cpp │ ├── pgduckdb_ruleutils.h │ ├── pgduckdb_table_am.cpp │ ├── pgduckdb_table_am.hpp │ ├── pgduckdb_types.cpp │ ├── pgduckdb_types.hpp │ ├── pgduckdb_utils.cpp │ ├── pgduckdb_utils.hpp │ ├── pgduckdb_xact.cpp │ ├── pgduckdb_xact.hpp │ ├── scan │ │ ├── heap_reader.cpp │ │ ├── heap_reader.hpp │ │ ├── postgres_scan.cpp │ │ ├── postgres_scan.hpp │ │ ├── postgres_seq_scan.cpp │ │ └── postgres_seq_scan.hpp │ ├── utility │ │ ├── allocator.hpp │ │ ├── copy.cpp │ │ ├── copy.hpp │ │ ├── cpp_only_file.hpp │ │ ├── cpp_wrapper.hpp │ │ └── rename_ruleutils.h │ └── vendor │ │ ├── .clang-format │ │ ├── pg_explain.cpp │ │ ├── pg_explain.hpp │ │ ├── pg_list.hpp │ │ ├── pg_numeric_c.hpp │ │ ├── pg_ruleutils.h │ │ ├── pg_ruleutils_14.c │ │ ├── pg_ruleutils_15.c │ │ ├── pg_ruleutils_16.c │ │ └── pg_ruleutils_17.c ├── pgmooncake.cpp └── pgmooncake_guc.hpp ├── test ├── expected │ ├── approx_count_distinct.out │ ├── cardinality.out │ ├── columns.out │ ├── constraints.out │ ├── copy.out │ ├── create_table_as.out │ ├── cursor.out │ ├── non_superuser.out │ ├── returning.out │ ├── sanity.out │ ├── segment_elimination.out │ ├── transaction.out │ ├── trigger.out │ ├── unsupported │ │ ├── alter.out │ │ ├── columns.out │ │ ├── types.out │ │ └── update.out │ └── update_delete_with_join.out └── sql │ ├── approx_count_distinct.sql │ ├── cardinality.sql │ ├── columns.sql │ ├── constraints.sql │ ├── copy.sql │ ├── create_table_as.sql │ ├── cursor.sql │ ├── non_superuser.sql │ ├── returning.sql │ ├── sanity.sql │ ├── segment_elimination.sql │ ├── transaction.sql │ ├── trigger.sql │ ├── unsupported │ ├── alter.sql │ ├── columns.sql │ ├── types.sql │ └── update.sql │ └── update_delete_with_join.sql └── third_party └── pg_mooncake_extensions.cmake /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | TabWidth: 4 4 | IndentWidth: 4 5 | ColumnLimit: 120 6 | AllowShortFunctionsOnASingleLine: Empty 7 | --- 8 | Language: Cpp 9 | AccessModifierOffset: -4 10 | ... 11 | -------------------------------------------------------------------------------- /.devcontainer/.bashrc: -------------------------------------------------------------------------------- 1 | source /etc/bash_completion.d/git-prompt 2 | export GIT_PS1_SHOWDIRTYSTATE=1 3 | export PS1='\[\e[1;32m\]\u@dev\[\e[m\]:\[\e[1;34m\]\w\[\e[0;33m\]$(__git_ps1 " (%s)")\[\e[1;34m\]$\[\e[m\] ' 4 | -------------------------------------------------------------------------------- /.devcontainer/.psqlrc: -------------------------------------------------------------------------------- 1 | \set PROMPT1 '%/ (pid: %p) %R%# ' 2 | \set PROMPT2 ' ' 3 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile_14: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN yes | unminimize 4 | 5 | RUN apt update \ 6 | && apt install -y \ 7 | clang-format \ 8 | cmake \ 9 | curl \ 10 | g++ \ 11 | gdb \ 12 | git \ 13 | libicu-dev \ 14 | libreadline-dev \ 15 | libssl-dev \ 16 | locales \ 17 | man \ 18 | pkg-config \ 19 | sudo \ 20 | vim \ 21 | zlib1g-dev \ 22 | && rm -rf /var/lib/apt/lists/* 23 | 24 | RUN locale-gen en_US.UTF-8 25 | 26 | RUN useradd -ms /bin/bash postgres \ 27 | && echo 'postgres ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 28 | USER postgres 29 | WORKDIR /home/postgres 30 | RUN echo 'source /workspaces/pg_mooncake/.devcontainer/.bashrc' >> .bashrc 31 | 32 | RUN curl https://sh.rustup.rs | sh -s -- -y 33 | 34 | ENV PATH="/usr/local/pgsql/bin:${PATH}" 35 | RUN curl https://ftp.postgresql.org/pub/source/v14.15/postgresql-14.15.tar.bz2 | bzip2 -d | tar x \ 36 | && cd postgresql-14.15 \ 37 | && ./configure --enable-cassert --enable-debug CFLAGS='-ggdb3' \ 38 | && make -j$(nproc) \ 39 | && sudo make install \ 40 | && sudo chown -R postgres:postgres /usr/local/pgsql 41 | RUN echo '\\i /workspaces/pg_mooncake/.devcontainer/.psqlrc' > .psqlrc 42 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile_15: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN yes | unminimize 4 | 5 | RUN apt update \ 6 | && apt install -y \ 7 | clang-format \ 8 | cmake \ 9 | curl \ 10 | g++ \ 11 | gdb \ 12 | git \ 13 | libicu-dev \ 14 | libreadline-dev \ 15 | libssl-dev \ 16 | locales \ 17 | man \ 18 | pkg-config \ 19 | sudo \ 20 | vim \ 21 | zlib1g-dev \ 22 | && rm -rf /var/lib/apt/lists/* 23 | 24 | RUN locale-gen en_US.UTF-8 25 | 26 | RUN useradd -ms /bin/bash postgres \ 27 | && echo 'postgres ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 28 | USER postgres 29 | WORKDIR /home/postgres 30 | RUN echo 'source /workspaces/pg_mooncake/.devcontainer/.bashrc' >> .bashrc 31 | 32 | RUN curl https://sh.rustup.rs | sh -s -- -y 33 | 34 | ENV PATH="/usr/local/pgsql/bin:${PATH}" 35 | RUN curl https://ftp.postgresql.org/pub/source/v15.10/postgresql-15.10.tar.bz2 | bzip2 -d | tar x \ 36 | && cd postgresql-15.10 \ 37 | && ./configure --enable-cassert --enable-debug CFLAGS='-ggdb3' \ 38 | && make -j$(nproc) \ 39 | && sudo make install \ 40 | && sudo chown -R postgres:postgres /usr/local/pgsql 41 | RUN echo '\\i /workspaces/pg_mooncake/.devcontainer/.psqlrc' > .psqlrc 42 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile_16: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN yes | unminimize 4 | 5 | RUN apt update \ 6 | && apt install -y \ 7 | clang-format \ 8 | cmake \ 9 | curl \ 10 | g++ \ 11 | gdb \ 12 | git \ 13 | libicu-dev \ 14 | libreadline-dev \ 15 | libssl-dev \ 16 | locales \ 17 | man \ 18 | pkg-config \ 19 | sudo \ 20 | vim \ 21 | zlib1g-dev \ 22 | && rm -rf /var/lib/apt/lists/* 23 | 24 | RUN locale-gen en_US.UTF-8 25 | 26 | RUN useradd -ms /bin/bash postgres \ 27 | && echo 'postgres ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 28 | USER postgres 29 | WORKDIR /home/postgres 30 | RUN echo 'source /workspaces/pg_mooncake/.devcontainer/.bashrc' >> .bashrc 31 | 32 | RUN curl https://sh.rustup.rs | sh -s -- -y 33 | 34 | ENV PATH="/usr/local/pgsql/bin:${PATH}" 35 | RUN curl https://ftp.postgresql.org/pub/source/v16.6/postgresql-16.6.tar.bz2 | bzip2 -d | tar x \ 36 | && cd postgresql-16.6 \ 37 | && ./configure --enable-cassert --enable-debug CFLAGS='-ggdb3' \ 38 | && make -j$(nproc) \ 39 | && sudo make install \ 40 | && sudo chown -R postgres:postgres /usr/local/pgsql 41 | RUN echo '\\i /workspaces/pg_mooncake/.devcontainer/.psqlrc' > .psqlrc 42 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile_17: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN yes | unminimize 4 | 5 | RUN apt update \ 6 | && apt install -y \ 7 | bison \ 8 | clang-format \ 9 | cmake \ 10 | curl \ 11 | flex \ 12 | g++ \ 13 | gdb \ 14 | git \ 15 | libicu-dev \ 16 | libreadline-dev \ 17 | libssl-dev \ 18 | locales \ 19 | man \ 20 | pkg-config \ 21 | sudo \ 22 | vim \ 23 | zlib1g-dev \ 24 | && rm -rf /var/lib/apt/lists/* 25 | 26 | RUN locale-gen en_US.UTF-8 27 | 28 | RUN useradd -ms /bin/bash postgres \ 29 | && echo 'postgres ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 30 | USER postgres 31 | WORKDIR /home/postgres 32 | RUN echo 'source /workspaces/pg_mooncake/.devcontainer/.bashrc' >> .bashrc 33 | 34 | RUN curl https://sh.rustup.rs | sh -s -- -y 35 | 36 | ENV PATH="/usr/local/pgsql/bin:${PATH}" 37 | RUN curl https://ftp.postgresql.org/pub/source/v17.2/postgresql-17.2.tar.bz2 | bzip2 -d | tar x \ 38 | && cd postgresql-17.2 \ 39 | && ./configure --enable-cassert --enable-debug CFLAGS='-ggdb3' \ 40 | && make -j$(nproc) \ 41 | && sudo make install \ 42 | && sudo chown -R postgres:postgres /usr/local/pgsql 43 | RUN echo '\\i /workspaces/pg_mooncake/.devcontainer/.psqlrc' > .psqlrc 44 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "dockerfile": "Dockerfile_17" 4 | }, 5 | "postStartCommand": "bash .devcontainer/postStartCommand.sh", 6 | "customizations": { 7 | "vscode": { 8 | "extensions": [ 9 | "ms-vscode.cpptools", 10 | "eamodio.gitlens", 11 | "rust-lang.rust-analyzer" 12 | ] 13 | } 14 | }, 15 | "runArgs": [ 16 | "--cap-add=SYS_PTRACE" 17 | ], 18 | "features": { 19 | "ghcr.io/devcontainers/features/sshd:1": {} 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /.devcontainer/postStartCommand.sh: -------------------------------------------------------------------------------- 1 | git config --global --add safe.directory /workspaces/pg_mooncake 2 | git config --global --add safe.directory /workspaces/pg_mooncake/third_party/duckdb 3 | 4 | rm -rf /usr/local/pgsql/data 5 | initdb -D /usr/local/pgsql/data 6 | pg_ctl -D /usr/local/pgsql/data start 7 | createdb mooncake 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report to help us improve. 3 | labels: 4 | - bug 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: What happens? 9 | description: A short, clear and concise description of the bug. 10 | validations: 11 | required: true 12 | 13 | - type: textarea 14 | attributes: 15 | label: To Reproduce 16 | description: | 17 | Please provide steps to reproduce the behavior, preferably a [minimal reproducible example](https://en.wikipedia.org/wiki/Minimal_reproducible_example). 18 | 19 | Format the code and the output as [code blocks](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks) using triple backticks: 20 | ```` 21 | ``` 22 | CODE HERE 23 | ``` 24 | ```` 25 | validations: 26 | required: true 27 | 28 | - type: markdown 29 | attributes: 30 | value: "# Environment" 31 | - type: input 32 | attributes: 33 | label: "OS:" 34 | placeholder: e.g., Linux, macOS, etc. 35 | description: Please include operating system version and architecture (e.g., aarch64, x86_64, etc.). 36 | validations: 37 | required: true 38 | - type: input 39 | attributes: 40 | label: "pg_mooncake Version:" 41 | placeholder: e.g., commit hash 42 | validations: 43 | required: true 44 | - type: input 45 | attributes: 46 | label: "Postgres Version:" 47 | placeholder: e.g., 17.0 48 | validations: 49 | required: true 50 | - type: dropdown 51 | attributes: 52 | label: Are you using pg_mooncake Docker, Neon, or the extension standalone? 53 | options: 54 | - pg_mooncake Docker Image 55 | - pg_mooncake on Neon 56 | - pg_mooncake extension standalone 57 | validations: 58 | required: true 59 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Mooncake Devs Slack 4 | url: https://join.slack.com/t/mooncakelabs/shared_invite/zt-2sepjh5hv-rb9jUtfYZ9bvbxTCUrsEEA 5 | about: Our Slack community is the best place to get quick help and feedback. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/epic.yml: -------------------------------------------------------------------------------- 1 | name: Epic 2 | description: Create an epic for related issues. 3 | labels: 4 | - epic 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: Description 9 | validations: 10 | required: true 11 | 12 | - type: textarea 13 | attributes: 14 | label: Issues in this epic 15 | validations: 16 | required: true 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Suggest an idea for this project. 3 | labels: 4 | - feature 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: What feature are you requesting? 9 | description: A short, clear and concise description of the desired feature. 10 | validations: 11 | required: true 12 | 13 | - type: textarea 14 | attributes: 15 | label: Why are you requesting this feature? 16 | description: A short, clear and concise description of why this feature is important. 17 | validations: 18 | required: true 19 | 20 | - type: textarea 21 | attributes: 22 | label: What is your proposed implementation for this feature? 23 | description: A short, clear and concise description of how you'd implement this feature. 24 | validations: 25 | required: false 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | /rust_extensions/*/target/ 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/duckdb"] 2 | path = third_party/duckdb 3 | url = https://github.com/duckdb/duckdb.git 4 | -------------------------------------------------------------------------------- /.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 4, 3 | "configurations": [ 4 | { 5 | "name": "Dev Container", 6 | "includePath": [ 7 | "/home/postgres/postgresql-17.2/src/**", 8 | "${workspaceFolder}/**" 9 | ] 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "type": "cppdbg", 6 | "request": "attach", 7 | "name": "Attach Postgres", 8 | "processId": "${command:pickProcess}", 9 | "program": "/usr/local/pgsql/bin/postgres", 10 | "additionalSOLibSearchPath": "/usr/local/pgsql/lib", 11 | "setupCommands": [ 12 | { 13 | "text": "-enable-pretty-printing", 14 | "description": "enable pretty printing", 15 | "ignoreFailures": true 16 | } 17 | ] 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rust-analyzer.linkedProjects": [ 3 | "rust_extensions/delta/Cargo.toml" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.1.2 (2025-02-11) 2 | ### Added 3 | - Support NOT NULL constraint (#116) 4 | ### Fixed 5 | - Manully load iceberg extension since it's not autoloadable in DuckDB v1.1.3 6 | - Fix use-after-free bug when reading statistics 7 | - Allow non-superusers to set maximum_memory and maximum_threads 8 | - Fix ALTER TABLE ... SET ACCESS METHOD DEFAULT (#115) 9 | 10 | ## 0.1.1 (2025-01-29) 11 | ### Added 12 | - Preload pg_mooncake in Docker image 13 | - Add a command to reset DuckDB 14 | - Expose GUCs to set maximum memory and threads DuckDB can use 15 | ### Fixed 16 | - Fix DuckDB extension autoloading (#99) 17 | - Fix query failure involving subplans (#100) 18 | - Suppress unnecessary default value error on Postgres heap tables 19 | 20 | ## 0.1.0 (2025-01-10) 21 | ### Added 22 | - Transactional INSERT, SELECT, UPDATE, DELETE, and COPY 23 | - JOIN with regular Postgres heap tables 24 | - Load Parquet, CSV, and JSON files into columnstore tables 25 | - Read existing Iceberg and Delta Lake tables 26 | - File statistics and skipping 27 | - Write Delta Lake tables 28 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Dev Container / Github Codespaces 4 | The easiest way to start contributing is via our Dev Container. This container works both locally in Visual Studio Code as well as [Github Codespaces](https://github.com/features/codespaces). To open the project in vscode you will need the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers). For codespaces you will need to [create a new codespace](https://codespace.new/Mooncake-Labs/pg_mooncake). 5 | 6 | With the extension installed you can run the following from the `Command Palette` to get started 7 | ``` 8 | > Dev Containers: Clone Repository in Container Volume... 9 | ``` 10 | 11 | In the subsequent popup paste the url to the repo and hit enter. 12 | ``` 13 | https://github.com/Mooncake-Labs/pg_mooncake 14 | ``` 15 | 16 | This will create an isolated Workspace in vscode, including all tools required to build, test and run the `pg_mooncake` extension. 17 | 18 | Now you can compile and install the extension 19 | ```bash 20 | git submodule update --init --recursive 21 | make debug 22 | make install 23 | ``` 24 | Then, connect to Postgres using `psql`. 25 | 26 | Once connected, you can enable the extension and begin development: 27 | ```sql 28 | CREATE EXTENSION pg_mooncake; 29 | ``` 30 | 31 | ### Debugging 32 | 1. Identify the Process: Take note of the pid that appears in your psql prompt. For example: 33 | ``` 34 | mooncake (pid: 1219) =# 35 | ``` 36 | This pid (1219 in this case) indicates the process that you should attach the debugger to. 37 | 38 | 2. Start Debugging: Press F5 to start debugging. When prompted, you'll need to attach the debugger to the appropriate Postgres process. 39 | 40 | 3. Set Breakpoints and Debug: With the debugger attached, you can set breakpoints within the code. This allows you to step through the code execution, inspect variables, and fully debug the Postgres instance running in your container. 41 | 42 | ## Testing 43 | Tests use standard regression tests for Postgres extensions. To run tests, run `make installcheck`. 44 | 45 | ## Formatting 46 | Ensure to run `make format` to format the code. 47 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:17 AS builder 2 | 3 | RUN apt update \ 4 | && apt install -y \ 5 | curl \ 6 | g++ \ 7 | liblz4-dev \ 8 | cmake \ 9 | postgresql-server-dev-17 \ 10 | && rm -rf /var/lib/apt/lists/* 11 | 12 | RUN curl https://sh.rustup.rs | sh -s -- -y 13 | 14 | ENV PATH="/root/.cargo/bin:$PATH" 15 | 16 | COPY . /tmp/pg_mooncake 17 | 18 | RUN cd /tmp/pg_mooncake \ 19 | && make clean-all \ 20 | && make release \ 21 | && DESTDIR=/out make install 22 | 23 | FROM postgres:17 24 | 25 | RUN apt update \ 26 | && apt install -y ca-certificates \ 27 | && rm -rf /var/lib/apt/lists/* 28 | 29 | RUN echo "shared_preload_libraries = 'pg_mooncake'" >> /usr/share/postgresql/postgresql.conf.sample 30 | 31 | COPY --from=builder /out / 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024-2025 Mooncake Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # ======================== 2 | # Shared Variables 3 | # ======================== 4 | BUILD_DIR := build/$(BUILD_TYPE) 5 | BUILD_RUST_DIR := build/src/rust_extensions 6 | CURRENT_BUILD_DIR := build/current 7 | DELTA_DIR := rust_extensions/delta 8 | DELTA_HEADER := $(DELTA_DIR)/target/cxxbridge/delta/src/lib.rs.h 9 | DELTA_LIB := $(DELTA_DIR)/target/$(BUILD_TYPE)/libdelta.a 10 | DUCKDB_DIR := third_party/duckdb 11 | DUCKDB_LIB := $(DUCKDB_DIR)/build/$(BUILD_TYPE)/src/libduckdb.so 12 | SRC_DIR := src 13 | 14 | # ======================== 15 | # Flags 16 | # ======================== 17 | export override DEBUG := $(filter debug,$(BUILD_TYPE)) 18 | CARGO_FLAGS := $(if $(DEBUG),,--release) 19 | MAKEFLAGS := --no-print-directory 20 | 21 | # ======================== 22 | # Phony Targets 23 | # ======================== 24 | .PHONY: .BUILD all clean clean-all clean-delta clean-duckdb debug delta \ 25 | duckdb duckdb-fast format format-delta help install installcheck \ 26 | release uninstall 27 | 28 | # ======================== 29 | # Default Target: help 30 | # ======================== 31 | help: 32 | @echo "Usage: make " 33 | @echo "" 34 | @echo "Available targets:" 35 | @echo " debug Build in debug mode" 36 | @echo " release Build in release mode" 37 | @echo " clean Remove build artifacts" 38 | @echo " clean-all Remove all build artifacts and clean everything" 39 | @echo " install Install build artifacts" 40 | @echo " installcheck Run regression tests" 41 | @echo " uninstall Uninstall build artifacts" 42 | @echo " format Format source files" 43 | 44 | # ======================== 45 | # Build Targets 46 | # ======================== 47 | debug: 48 | @$(MAKE) BUILD_TYPE=debug all 49 | 50 | release: 51 | @$(MAKE) BUILD_TYPE=release all 52 | 53 | all: duckdb-fast delta | .BUILD 54 | install -C Makefile.build $(BUILD_DIR)/Makefile 55 | @$(MAKE) -C $(BUILD_DIR) 56 | 57 | .BUILD: | $(BUILD_DIR) 58 | ifeq ($(findstring $(BUILD_TYPE),debug release),) 59 | @echo "Invalid BUILD_TYPE = $(BUILD_TYPE)"; exit 1 60 | endif 61 | @rm -f $(CURRENT_BUILD_DIR) 62 | @ln -s $(BUILD_TYPE) $(CURRENT_BUILD_DIR) 63 | 64 | $(BUILD_DIR): 65 | @mkdir -p $(BUILD_DIR) 66 | 67 | # ======================== 68 | # Clean Targets 69 | # ======================== 70 | clean: 71 | rm -rf build 72 | 73 | clean-all: clean clean-duckdb clean-delta 74 | 75 | clean-duckdb: 76 | $(MAKE) -C $(DUCKDB_DIR) clean 77 | 78 | clean-delta: 79 | cargo clean --manifest-path=$(DELTA_DIR)/Cargo.toml 80 | 81 | # ======================== 82 | # Install Targets 83 | # ======================== 84 | install: 85 | @$(MAKE) -C $(CURRENT_BUILD_DIR) install 86 | 87 | installcheck: 88 | @$(MAKE) -C $(CURRENT_BUILD_DIR) installcheck 89 | 90 | uninstall: 91 | @$(MAKE) -C $(CURRENT_BUILD_DIR) uninstall 92 | 93 | # ======================== 94 | # Format Targets 95 | # ======================== 96 | format: format-delta 97 | find $(SRC_DIR) -name '*.c' -o -name '*.cpp' -o -name '*.h' -o -name '*.hpp' | xargs clang-format -i 98 | 99 | format-delta: 100 | cargo fmt --manifest-path=$(DELTA_DIR)/Cargo.toml 101 | 102 | # ======================== 103 | # DuckDB Targets 104 | # ======================== 105 | duckdb-fast: $(DUCKDB_LIB) 106 | install -C $< $(BUILD_DIR)/libduckdb.so 107 | 108 | duckdb: | .BUILD 109 | CMAKE_BUILD_PARALLEL_LEVEL=$(or $(patsubst -j%,%,$(filter -j%,$(MAKEFLAGS))),1) \ 110 | CMAKE_VARS="-DBUILD_PYTHON=0 -DBUILD_SHELL=0 -DBUILD_UNITTESTS=0" \ 111 | DISABLE_SANITIZER=1 \ 112 | EXTENSION_CONFIGS="../pg_mooncake_extensions.cmake" \ 113 | OVERRIDE_GIT_DESCRIBE=v1.2.0 \ 114 | $(MAKE) -C $(DUCKDB_DIR) $(BUILD_TYPE) 115 | ifeq ($(BUILD_TYPE), debug) 116 | gdb-add-index $(DUCKDB_LIB) 117 | endif 118 | 119 | $(DUCKDB_LIB): | .BUILD 120 | @$(MAKE) duckdb 121 | 122 | # ======================== 123 | # Delta Targets 124 | # ======================== 125 | delta: | .BUILD $(BUILD_RUST_DIR) 126 | cargo build --manifest-path=$(DELTA_DIR)/Cargo.toml $(CARGO_FLAGS) 127 | install -C $$(readlink -f $(DELTA_HEADER)) $(BUILD_RUST_DIR)/delta.hpp 128 | install -C $(DELTA_LIB) $(BUILD_DIR)/libdelta.a 129 | 130 | $(BUILD_RUST_DIR): 131 | @mkdir -p $@ 132 | -------------------------------------------------------------------------------- /Makefile.build: -------------------------------------------------------------------------------- 1 | # ======================== 2 | # Shared Variables 3 | # ======================== 4 | DUCKDB_DIR := ../../third_party/duckdb 5 | EXTENSION_NAME := pg_mooncake 6 | SQL_DIR := ../../sql 7 | SRC_DIR := ../../src 8 | TEST_DIR := ../../test 9 | 10 | # ======================== 11 | # Postgres Setup 12 | # ======================== 13 | PG_CONFIG ?= pg_config 14 | PG_LIB_DIR := $(shell $(PG_CONFIG) --pkglibdir) 15 | 16 | # ======================== 17 | # Source Files 18 | # ======================== 19 | SRCS_C := $(shell cd $(SRC_DIR); find * -name '*.c') 20 | SRCS_CXX := $(shell cd $(SRC_DIR); find * -name '*.cpp') 21 | SRCS := $(SRCS_C) $(SRCS_CXX) thrift/transport/TBufferTransports.cpp 22 | OBJS := $(SRCS:%=%.o) libduckdb.so libdelta.a 23 | DEPS := $(SRCS:%=%.d) 24 | 25 | # ======================== 26 | # Regression Tests 27 | # ======================== 28 | REGRESS_SQL := $(shell cd $(TEST_DIR)/sql; find * -name '*.sql') 29 | REGRESS := $(REGRESS_SQL:%.sql=%) 30 | REGRESS_OPTS = --encoding=UTF8 --inputdir=$(TEST_DIR) --load-extension=$(EXTENSION_NAME) 31 | 32 | # ======================== 33 | # Compilation Flags 34 | # ======================== 35 | PG_CPPFLAGS := -I$(SRC_DIR) \ 36 | -I$(DUCKDB_DIR)/extension/parquet/include \ 37 | -I$(DUCKDB_DIR)/src/include \ 38 | -I$(DUCKDB_DIR)/third_party/fastpforlib \ 39 | -I$(DUCKDB_DIR)/third_party/parquet \ 40 | -I$(DUCKDB_DIR)/third_party/thrift \ 41 | -I../src \ 42 | -MMD -MP 43 | PG_CFLAGS := $(if $(DEBUG),-ggdb3 -O0,-O2) 44 | PG_CXXFLAGS := $(if $(DEBUG),-ggdb3 -O0,-O2) -Werror -Wno-register -Wno-sign-compare -std=c++17 45 | SHLIB_LINK := -L. -Wl,-rpath,$(PG_LIB_DIR) -lduckdb -lstdc++ 46 | 47 | # ======================== 48 | # PGXS Configuration 49 | # ======================== 50 | MODULES := libduckdb 51 | MODULE_big := $(EXTENSION_NAME) 52 | DATA := $(SQL_DIR)/pg_mooncake--0.1.0.sql \ 53 | $(SQL_DIR)/pg_mooncake--0.1.0--0.1.1.sql \ 54 | $(SQL_DIR)/pg_mooncake--0.1.1--0.1.2.sql 55 | EXTENSION := ../../$(EXTENSION_NAME) 56 | PGXS := $(shell $(PG_CONFIG) --pgxs) 57 | override with_llvm := no 58 | include $(PGXS) 59 | 60 | # ======================== 61 | # Phony Targets 62 | # ======================== 63 | .PHONY: installcheck-dirs 64 | 65 | # ======================== 66 | # Compilation Rules 67 | # ======================== 68 | $(SRCS_C:%=%.o): %.o: $(SRC_DIR)/% 69 | @mkdir -p $(dir $@) 70 | $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@ 71 | 72 | $(SRCS_CXX:%=%.o): %.o: $(SRC_DIR)/% 73 | @mkdir -p $(dir $@) 74 | $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ 75 | 76 | thrift/transport/TBufferTransports.cpp.o: ../../third_party/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp 77 | @mkdir -p $(dir $@) 78 | $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ 79 | 80 | # ======================== 81 | # Regression Testing 82 | # ======================== 83 | installcheck: installcheck-dirs 84 | 85 | installcheck-dirs: $(addprefix results/, $(sort $(dir $(REGRESS)))) 86 | 87 | results/%: 88 | @mkdir -p $@ 89 | 90 | # ======================== 91 | # Include Dependency Files 92 | # ======================== 93 | -include $(DEPS) 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # pg_mooncake 🥮 4 | Postgres extension for 1000x faster analytics 5 | 6 | [![License](https://img.shields.io/badge/License-MIT-blue)](https://github.com/Mooncake-Labs/pg_mooncake/blob/main/LICENSE) 7 | [![Slack](https://img.shields.io/badge/Mooncake%20Slack-purple?logo=slack)](https://join.slack.com/t/mooncakelabs/shared_invite/zt-2sepjh5hv-rb9jUtfYZ9bvbxTCUrsEEA) 8 | [![Twitter](https://img.shields.io/twitter/url?url=https%3A%2F%2Fx.com%2Fmooncakelabs&label=%40mooncakelabs)](https://x.com/mooncakelabs) 9 | [![Docs](https://img.shields.io/badge/Documentation-pgmooncake.com-blue?style=flat&logo=readthedocs&logoColor=white)](https://pgmooncake.com/docs) 10 | 11 |
12 | 13 | ## Overview 14 | **pg_mooncake** is a Postgres extension that adds columnar storage and vectorized execution (DuckDB) for fast analytics within Postgres. Postgres + pg_mooncake ranks among the top 10 fastest in [ClickBench](https://www.mooncake.dev/blog/clickbench-v0.1). 15 | 16 | Columnstore tables are stored as [Iceberg](https://github.com/apache/iceberg) or [Delta Lake](https://github.com/delta-io/delta) tables in local file system or cloud storage. 17 | 18 | The extension is maintained by [Mooncake Labs](https://mooncake.dev/) and is available on [Neon Postgres](https://neon.tech/home). 19 |
20 | 21 | 22 | 23 |
24 | 25 | ## [Installation](https://pgmooncake.com/docs/installation) 26 | 27 | ### Option 1: Docker 28 | Get started quickly with our Docker image: 29 | ```bash 30 | docker pull mooncakelabs/pg_mooncake 31 | 32 | # server 33 | docker run --name mooncake-demo -e POSTGRES_HOST_AUTH_METHOD=trust -d mooncakelabs/pg_mooncake 34 | 35 | # client 36 | docker run -it --rm --link mooncake-demo:postgres mooncakelabs/pg_mooncake psql -h postgres -U postgres 37 | ``` 38 | 39 | ### Option 2: From Source 40 | Get source code from [releases](https://github.com/Mooncake-Labs/pg_mooncake/releases) or clone: 41 | ```bash 42 | git clone --recurse-submodules https://github.com/Mooncake-Labs/pg_mooncake.git 43 | ``` 44 | 45 | Build for Postgres versions 14–17: 46 | ```bash 47 | make release -j$(nproc) 48 | make install 49 | ``` 50 | 51 | ### Option 3: On Neon Postgres 52 | 1. [Create a Neon project](https://console.neon.tech/signup) 53 | 2. Enable beta extensions: 54 | ```sql 55 | SET neon.allow_unstable_extensions='true'; 56 | ``` 57 | 58 | ## [Quick Start](https://pgmooncake.com/docs/quick-start) 59 | 1. Enable the extension 60 | ```sql 61 | CREATE EXTENSION pg_mooncake; 62 | ``` 63 | 2. Create a columnstore table: 64 | ```sql 65 | CREATE TABLE user_activity( 66 | user_id BIGINT, 67 | activity_type TEXT, 68 | activity_timestamp TIMESTAMP, 69 | duration INT 70 | ) USING columnstore; 71 | ``` 72 | 3. Insert data: 73 | ```sql 74 | INSERT INTO user_activity VALUES 75 | (1, 'login', '2024-01-01 08:00:00', 120), 76 | (2, 'page_view', '2024-01-01 08:05:00', 30), 77 | (3, 'logout', '2024-01-01 08:30:00', 60), 78 | (4, 'error', '2024-01-01 08:13:00', 60); 79 | 80 | SELECT * from user_activity; 81 | ``` 82 | 83 | Columnstore tables behave just like regular Postgres heap tables, supporting transactions, updates, deletes, joins, and more. 84 | 85 | ## [Cloud Storage](https://pgmooncake.com/docs/cloud-storage) 86 | Columnstore tables are stored in the local file system by default. You can configure `mooncake.default_bucket` to store data in S3 or R2 buckets instead. 87 | 88 | > **Note**: On Neon, only cloud storage is supported. Neon users must bring their own S3 or R2 buckets or get a free S3 bucket by signing up at [s3.pgmooncake.com](https://s3.pgmooncake.com/). For cloud storage configuration instructions, see [Cloud Storage](https://pgmooncake.com/docs/cloud-storage). We are working to improve this experience. 89 | 90 | ## [Load Data](https://pgmooncake.com/docs/load-data) 91 | **pg_mooncake** supports loading data from: 92 | - Postgres heap tables 93 | - Parquet, CSV, JSON files 94 | - Iceberg, Delta Lake tables 95 | - Hugging Face datasets 96 | 97 | ## Columnstore Tables as Iceberg or Delta Lake Tables 98 | Find your columnstore table location: 99 | ```sql 100 | SELECT * FROM mooncake.columnstore_tables; 101 | ``` 102 | 103 | The directory contains a Delta Lake (and soon Iceberg) table that can be queried directly using Pandas, DuckDB, Polars, or Spark. 104 | 105 | ## Roadmap 106 | - [x] **Transactional INSERT, SELECT, UPDATE, DELETE, and COPY** 107 | - [x] **JOIN with regular Postgres heap tables** 108 | - [x] **Load Parquet, CSV, and JSON files into columnstore tables** 109 | - [x] **Read existing Iceberg and Delta Lake tables** 110 | - [x] **File statistics and skipping** 111 | - [x] **Write Delta Lake tables** 112 | - [ ] **Write Iceberg tables** 113 | - [ ] **Batched small writes and compaction** 114 | - [ ] **Secondary indexes and constraints** 115 | - [ ] **Partitioned tables ^** 116 | 117 | > [^](https://github.com/Mooncake-Labs/pg_mooncake/issues/17) File statistics and skipping should cover most use cases of partitioned tables in Postgres, including time series. 118 | 119 | [v0.2.0 Roadmap](https://github.com/Mooncake-Labs/pg_mooncake/discussions/91) 120 | -------------------------------------------------------------------------------- /pg_mooncake.control: -------------------------------------------------------------------------------- 1 | comment = 'Columnstore Table in Postgres' 2 | default_version = '0.1.2' 3 | module_pathname = '$libdir/pg_mooncake' 4 | relocatable = true 5 | -------------------------------------------------------------------------------- /rust_extensions/delta/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "delta" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | crate-type = ["staticlib"] 8 | 9 | [dependencies] 10 | cxx = "1.0" 11 | deltalake = {version = "0.24", features = ["s3"] } 12 | tokio = "1.41" 13 | serde_json = "1.0" 14 | 15 | [build-dependencies] 16 | cxx-build = "1.0" 17 | -------------------------------------------------------------------------------- /rust_extensions/delta/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | cxx_build::bridge("src/lib.rs").compile("cxxbridge"); 3 | 4 | println!("cargo:rerun-if-changed=src/lib.rs"); 5 | } 6 | -------------------------------------------------------------------------------- /rust_extensions/delta/src/lib.rs: -------------------------------------------------------------------------------- 1 | use cxx::{CxxString, CxxVector}; 2 | use deltalake::aws::register_handlers; 3 | use deltalake::kernel::{Action, Add, ArrayType, DataType, PrimitiveType, Remove, StructField}; 4 | use deltalake::operations::create::CreateBuilder; 5 | use deltalake::operations::transaction::CommitBuilder; 6 | use deltalake::protocol::{DeltaOperation, SaveMode}; 7 | use deltalake::{open_table_with_storage_options, TableProperty}; 8 | use std::collections::HashMap; 9 | 10 | #[cxx::bridge] 11 | mod ffi { 12 | extern "Rust" { 13 | fn DeltaInit(); 14 | 15 | fn DeltaCreateTable( 16 | table_name: &CxxString, 17 | path: &CxxString, 18 | options: &CxxString, 19 | column_names: &CxxVector, 20 | column_types: &CxxVector, 21 | ) -> Result<()>; 22 | 23 | fn DeltaModifyFiles( 24 | path: &CxxString, 25 | options: &CxxString, 26 | file_paths: &CxxVector, 27 | file_sizes: &CxxVector, 28 | is_add_files: &CxxVector, 29 | ) -> Result<()>; 30 | } 31 | } 32 | 33 | #[allow(non_snake_case)] 34 | pub fn DeltaInit() { 35 | // Register S3 handlers 36 | register_handlers(None); 37 | } 38 | 39 | #[allow(non_snake_case)] 40 | pub fn DeltaCreateTable( 41 | table_name: &CxxString, 42 | path: &CxxString, 43 | options: &CxxString, 44 | column_names: &CxxVector, 45 | column_types: &CxxVector, 46 | ) -> Result<(), Box> { 47 | let runtime = tokio::runtime::Runtime::new()?; 48 | runtime.block_on(async { 49 | let mut storage_options: HashMap = 50 | serde_json::from_str(options.to_str()?).expect("invalid options"); 51 | // Write directly to S3 without locking is safe since Mooncake is the only writer 52 | storage_options.insert("AWS_S3_ALLOW_UNSAFE_RENAME".to_string(), "true".to_string()); 53 | let metadata = vec![( 54 | "creator".to_string(), 55 | serde_json::json!("pg_mooncake_extension"), 56 | )]; 57 | let _table = CreateBuilder::new() 58 | .with_location(path.to_str()?) 59 | .with_storage_options(storage_options) 60 | .with_table_name(table_name.to_str()?) 61 | .with_configuration_property(TableProperty::MinReaderVersion, Some("3")) 62 | .with_configuration_property(TableProperty::MinWriterVersion, Some("7")) 63 | .with_columns(map_postgres_columns(column_names, column_types)) 64 | .with_metadata(metadata) 65 | .with_save_mode(SaveMode::ErrorIfExists) 66 | .await?; 67 | Ok(()) 68 | }) 69 | } 70 | 71 | #[allow(non_snake_case)] 72 | pub fn DeltaModifyFiles( 73 | path: &CxxString, 74 | options: &CxxString, 75 | file_paths: &CxxVector, 76 | file_sizes: &CxxVector, 77 | is_add_files: &CxxVector, 78 | ) -> Result<(), Box> { 79 | let runtime: tokio::runtime::Runtime = tokio::runtime::Runtime::new()?; 80 | runtime.block_on(async { 81 | let mut actions = Vec::new(); 82 | for ((file_path, file_size), is_add) in file_paths 83 | .iter() 84 | .zip(file_sizes.iter()) 85 | .zip(is_add_files.iter()) 86 | { 87 | if *is_add == 1 { 88 | let add = Add { 89 | path: file_path.to_string(), 90 | size: *file_size, 91 | data_change: true, 92 | ..Default::default() 93 | }; 94 | actions.push(Action::Add(add)); 95 | } else { 96 | let rm = Remove { 97 | path: file_path.to_string(), 98 | data_change: true, 99 | ..Default::default() 100 | }; 101 | actions.push(Action::Remove(rm)); 102 | } 103 | } 104 | let mut storage_options: HashMap = 105 | serde_json::from_str(options.to_str()?).expect("invalid options"); 106 | // Write directly to S3 without locking is safe since Mooncake is the only writer 107 | storage_options.insert("AWS_S3_ALLOW_UNSAFE_RENAME".to_string(), "true".to_string()); 108 | let mut table: deltalake::DeltaTable = 109 | open_table_with_storage_options(path.to_string(), storage_options).await?; 110 | let op = DeltaOperation::Write { 111 | mode: SaveMode::Append, 112 | partition_by: None, 113 | predicate: None, 114 | }; 115 | CommitBuilder::default() 116 | .with_actions(actions) 117 | .build(Some(table.snapshot()?), table.log_store().clone(), op) 118 | .await?; 119 | table.update().await?; 120 | Ok(()) 121 | }) 122 | } 123 | 124 | fn map_postgres_columns( 125 | column_names: &CxxVector, 126 | column_types: &CxxVector, 127 | ) -> Vec { 128 | column_names 129 | .into_iter() 130 | .zip(column_types.into_iter()) 131 | .map(|(column_name, column_type)| { 132 | StructField::new( 133 | column_name.to_string(), 134 | convert_postgres_to_delta_type(&column_type.to_string()), 135 | true, // Assuming all columns are nullable for simplicity 136 | ) 137 | }) 138 | .collect() 139 | } 140 | 141 | fn convert_postgres_to_delta_type(column_type: &str) -> DataType { 142 | match column_type { 143 | "smallint" => DataType::Primitive(PrimitiveType::Short), 144 | "integer" => DataType::Primitive(PrimitiveType::Integer), 145 | "bigint" => DataType::Primitive(PrimitiveType::Long), 146 | "real" => DataType::Primitive(PrimitiveType::Float), 147 | "double precision" => DataType::Primitive(PrimitiveType::Double), 148 | "boolean" => DataType::Primitive(PrimitiveType::Boolean), 149 | "character varying" | "text" => DataType::Primitive(PrimitiveType::String), 150 | "date" => DataType::Primitive(PrimitiveType::Date), 151 | "timestamp without time zone" => DataType::Primitive(PrimitiveType::TimestampNtz), 152 | "timestamp with time zone" => DataType::Primitive(PrimitiveType::Timestamp), 153 | "time without time zone" | "time with time zone" => { 154 | DataType::Primitive(PrimitiveType::String) 155 | } 156 | "numeric" | "decimal" => DataType::Primitive(PrimitiveType::Decimal(38, 10)), // Default precision and scale 157 | "bytea" => DataType::Primitive(PrimitiveType::Binary), 158 | _ if column_type.ends_with("[]") => { 159 | let base_type = &column_type[..column_type.len() - 2]; 160 | DataType::from(ArrayType::new( 161 | convert_postgres_to_delta_type(base_type), 162 | true, 163 | )) 164 | } 165 | _ => DataType::Primitive(PrimitiveType::String), // Default to string for unsupported types 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /sql/pg_mooncake--0.1.0--0.1.1.sql: -------------------------------------------------------------------------------- 1 | CREATE PROCEDURE mooncake.reset_duckdb() 2 | SET search_path = pg_catalog, pg_temp 3 | LANGUAGE C AS 'MODULE_PATHNAME', 'mooncake_reset_duckdb'; 4 | -------------------------------------------------------------------------------- /sql/pg_mooncake--0.1.1--0.1.2.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mooncake-Labs/pg_mooncake/d68f1e5f023ba36e0810dbcf7d8deecc275eccc8/sql/pg_mooncake--0.1.1--0.1.2.sql -------------------------------------------------------------------------------- /src/columnstore/columnstore.cpp: -------------------------------------------------------------------------------- 1 | #include "columnstore/columnstore.hpp" 2 | #include "columnstore/columnstore_metadata.hpp" 3 | #include "duckdb/main/secret/secret_manager.hpp" 4 | #include "lake/lake.hpp" 5 | #include "pgduckdb/pgduckdb_utils.hpp" 6 | #include "pgduckdb/utility/cpp_wrapper.hpp" 7 | 8 | namespace duckdb { 9 | 10 | void Columnstore::CreateTable(Oid oid) { 11 | ColumnstoreMetadata metadata(NULL /*snapshot*/); 12 | string path = metadata.GetTablePath(oid); 13 | if (!path.empty() && !duckdb::FileSystem::IsRemoteFile(path)) { 14 | FileSystem::CreateLocal()->CreateDirectory(path); 15 | } 16 | metadata.TablesInsert(oid, path); 17 | InvokeCPPFunc(LakeCreateTable, oid, path); 18 | } 19 | 20 | void Columnstore::TruncateTable(Oid oid) { 21 | ColumnstoreMetadata metadata(NULL /*snapshot*/); 22 | vector file_names = metadata.DataFilesSearch(oid); 23 | metadata.DataFilesDelete(oid); 24 | for (auto file_name : file_names) { 25 | LakeDeleteFile(oid, file_name); 26 | } 27 | } 28 | 29 | void Columnstore::Abort() { 30 | LakeAbort(); 31 | } 32 | 33 | void Columnstore::Commit() { 34 | InvokeCPPFunc(LakeCommit); 35 | } 36 | 37 | void Columnstore::LoadSecrets(ClientContext &context) { 38 | ColumnstoreMetadata metadata(NULL /*snapshot*/); 39 | bool require_new_transaction = !context.transaction.HasActiveTransaction(); 40 | if (require_new_transaction) { 41 | context.transaction.BeginTransaction(); 42 | } 43 | auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context); 44 | auto secrets = SecretManager::Get(context).AllSecrets(transaction); 45 | for (auto secret : secrets) { 46 | SecretManager::Get(context).DropSecretByName(context, secret.secret->GetName(), 47 | duckdb::OnEntryNotFound::RETURN_NULL); 48 | } 49 | if (require_new_transaction) { 50 | context.transaction.Commit(); 51 | } 52 | auto queries = metadata.SecretsGetDuckdbQueries(); 53 | for (const auto &query : queries) { 54 | pgduckdb::DuckDBQueryOrThrow(context, query); 55 | } 56 | } 57 | 58 | } // namespace duckdb 59 | -------------------------------------------------------------------------------- /src/columnstore/columnstore.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/unique_ptr.hpp" 4 | #include "pgduckdb/pg/declarations.hpp" 5 | 6 | namespace duckdb { 7 | 8 | class ClientContext; 9 | class LogicalDelete; 10 | class LogicalInsert; 11 | class LogicalUpdate; 12 | class PhysicalOperator; 13 | 14 | class Columnstore { 15 | public: 16 | static void CreateTable(Oid oid); 17 | 18 | static void TruncateTable(Oid oid); 19 | 20 | static void Abort(); 21 | 22 | static void Commit(); 23 | 24 | static void LoadSecrets(ClientContext &context); 25 | 26 | static unique_ptr PlanInsert(ClientContext &context, LogicalInsert &op, 27 | unique_ptr plan); 28 | 29 | static unique_ptr PlanDelete(ClientContext &context, LogicalDelete &op, 30 | unique_ptr plan); 31 | 32 | static unique_ptr PlanUpdate(ClientContext &context, LogicalUpdate &op, 33 | unique_ptr plan); 34 | }; 35 | 36 | } // namespace duckdb 37 | -------------------------------------------------------------------------------- /src/columnstore/columnstore_metadata.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/vector.hpp" 4 | #include "pgduckdb/pg/declarations.hpp" 5 | 6 | namespace duckdb { 7 | 8 | class ClientContext; 9 | class ColumnList; 10 | struct string_t; 11 | 12 | class ColumnstoreMetadata { 13 | public: 14 | explicit ColumnstoreMetadata(Snapshot snapshot) : snapshot(snapshot) {} 15 | 16 | public: 17 | void TablesInsert(Oid oid, const string &path); 18 | void TablesDelete(Oid oid); 19 | std::tuple TablesSearch(Oid oid); 20 | 21 | string GetTablePath(Oid oid); 22 | std::tuple /*column_names*/, vector /*column_types*/> 23 | GetTableMetadata(Oid oid); 24 | 25 | void DataFilesInsert(Oid oid, const string &file_name, const string_t &file_metadata); 26 | void DataFilesDelete(const string &file_name); 27 | void DataFilesDelete(Oid oid); 28 | vector DataFilesSearch(Oid oid, ClientContext *context = nullptr, const string *path = nullptr, 29 | const ColumnList *columns = nullptr); 30 | 31 | vector SecretsGetDuckdbQueries(); 32 | string SecretsSearchDeltaOptions(const string &path); 33 | 34 | private: 35 | Snapshot snapshot; 36 | }; 37 | 38 | } // namespace duckdb 39 | -------------------------------------------------------------------------------- /src/columnstore/columnstore_statistics.cpp: -------------------------------------------------------------------------------- 1 | #include "columnstore/columnstore_statistics.hpp" 2 | #include "parquet_reader.hpp" 3 | 4 | namespace duckdb { 5 | 6 | DataFileStatistics::DataFileStatistics(ParquetReader &reader, const ColumnList &columns) : num_rows(reader.NumRows()) { 7 | for (auto &col : columns.Physical()) { 8 | auto name = col.GetName(); 9 | column_stats[name] = reader.ReadStatistics(name); 10 | } 11 | } 12 | 13 | ObjectCache columnstore_stats; 14 | 15 | } // namespace duckdb 16 | -------------------------------------------------------------------------------- /src/columnstore/columnstore_statistics.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/unique_ptr.hpp" 4 | #include "duckdb/storage/object_cache.hpp" 5 | 6 | namespace duckdb { 7 | 8 | class BaseStatistics; 9 | class ColumnList; 10 | class ParquetReader; 11 | 12 | class DataFileStatistics : public ObjectCacheEntry { 13 | public: 14 | DataFileStatistics(ParquetReader &reader, const ColumnList &columns); 15 | 16 | public: 17 | static string ObjectType() { 18 | return "data_file_statistics"; 19 | } 20 | 21 | string GetObjectType() override { 22 | return ObjectType(); 23 | } 24 | 25 | idx_t NumRows() { 26 | return num_rows; 27 | } 28 | 29 | BaseStatistics *Get(const string &name) { 30 | return column_stats.at(name).get(); 31 | } 32 | 33 | private: 34 | idx_t num_rows; 35 | unordered_map> column_stats; 36 | }; 37 | 38 | extern ObjectCache columnstore_stats; 39 | 40 | } // namespace duckdb 41 | -------------------------------------------------------------------------------- /src/columnstore/columnstore_table.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" 4 | #include "pgduckdb/pg/declarations.hpp" 5 | 6 | namespace duckdb { 7 | 8 | class ColumnDataCollection; 9 | class ColumnstoreMetadata; 10 | class ColumnstoreWriter; 11 | class DataChunk; 12 | 13 | class ColumnstoreTable : public TableCatalogEntry { 14 | public: 15 | ColumnstoreTable(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info, Oid oid, Snapshot snapshot); 16 | 17 | ~ColumnstoreTable() override; 18 | 19 | public: 20 | unique_ptr GetStatistics(ClientContext &context, column_t column_id) override { 21 | throw NotImplementedException("GetStatistics not supported yet"); 22 | } 23 | 24 | TableFunction GetScanFunction(ClientContext &context, unique_ptr &bind_data) override; 25 | 26 | TableStorageInfo GetStorageInfo(ClientContext &context) override; 27 | 28 | public: 29 | void VerifyConstraints(DataChunk &chunk, const vector> &bound_constraints) const; 30 | 31 | void Insert(ClientContext &context, DataChunk &chunk); 32 | 33 | void FinalizeInsert(); 34 | 35 | void Delete(ClientContext &context, unordered_set &row_ids_set, 36 | ColumnDataCollection *return_collection = nullptr); 37 | 38 | private: 39 | static vector GetFilePaths(const string &path, const vector &file_names); 40 | 41 | static idx_t Cardinality(const vector &file_names); 42 | 43 | private: 44 | Oid oid; 45 | unique_ptr metadata; 46 | string path; 47 | unique_ptr writer; 48 | }; 49 | 50 | } // namespace duckdb 51 | -------------------------------------------------------------------------------- /src/columnstore/execution/columnstore_delete.cpp: -------------------------------------------------------------------------------- 1 | #include "columnstore/columnstore.hpp" 2 | #include "columnstore/columnstore_table.hpp" 3 | #include "duckdb/common/types/column/column_data_collection.hpp" 4 | #include "duckdb/execution/physical_operator.hpp" 5 | #include "duckdb/planner/expression/bound_reference_expression.hpp" 6 | #include "duckdb/planner/operator/logical_delete.hpp" 7 | 8 | namespace duckdb { 9 | 10 | class ColumnstoreDeleteSourceState : public GlobalSourceState { 11 | public: 12 | ColumnDataScanState scan_state; 13 | }; 14 | 15 | class ColumnstoreDeleteGlobalState : public GlobalSinkState { 16 | public: 17 | ColumnstoreDeleteGlobalState(ClientContext &context, const vector &types) 18 | : return_collection(context, types) {} 19 | 20 | unordered_set row_ids; 21 | ColumnDataCollection return_collection; 22 | }; 23 | 24 | class ColumnstoreDelete : public PhysicalOperator { 25 | public: 26 | ColumnstoreDelete(vector types, idx_t estimated_cardinality, ColumnstoreTable &table, 27 | idx_t row_id_index, bool return_chunk) 28 | : PhysicalOperator(PhysicalOperatorType::EXTENSION, std::move(types), estimated_cardinality), table(table), 29 | row_id_index(row_id_index), return_chunk(return_chunk) {} 30 | 31 | ColumnstoreTable &table; 32 | idx_t row_id_index; 33 | bool return_chunk; 34 | 35 | public: 36 | string GetName() const override { 37 | return "COLUMNSTORE_DELETE"; 38 | } 39 | 40 | public: 41 | // Source interface 42 | unique_ptr GetGlobalSourceState(ClientContext &context) const override { 43 | auto state = make_uniq(); 44 | auto &gstate = sink_state->Cast(); 45 | if (return_chunk) { 46 | gstate.return_collection.InitializeScan(state->scan_state); 47 | } 48 | return std::move(state); 49 | } 50 | 51 | SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override { 52 | auto &state = input.global_state.Cast(); 53 | auto &gstate = sink_state->Cast(); 54 | if (!return_chunk) { 55 | chunk.SetCardinality(1); 56 | chunk.SetValue(0, 0, Value::BIGINT(NumericCast(gstate.row_ids.size()))); 57 | return SourceResultType::FINISHED; 58 | } 59 | gstate.return_collection.Scan(state.scan_state, chunk); 60 | return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT; 61 | } 62 | 63 | bool IsSource() const override { 64 | return true; 65 | } 66 | 67 | public: 68 | // Sink interface 69 | SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override { 70 | auto &gstate = input.global_state.Cast(); 71 | auto &row_ids = chunk.data[row_id_index]; 72 | row_ids.Flatten(chunk.size()); 73 | auto row_ids_data = FlatVector::GetData(row_ids); 74 | for (idx_t i = 0; i < chunk.size(); i++) { 75 | gstate.row_ids.insert(row_ids_data[i]); 76 | } 77 | return SinkResultType::NEED_MORE_INPUT; 78 | } 79 | 80 | SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context, 81 | OperatorSinkFinalizeInput &input) const override { 82 | auto &gstate = input.global_state.Cast(); 83 | table.Delete(context, gstate.row_ids, return_chunk ? &gstate.return_collection : nullptr); 84 | return SinkFinalizeType::READY; 85 | } 86 | 87 | unique_ptr GetGlobalSinkState(ClientContext &context) const override { 88 | return make_uniq(context, table.GetTypes()); 89 | } 90 | 91 | bool IsSink() const override { 92 | return true; 93 | } 94 | }; 95 | 96 | unique_ptr Columnstore::PlanDelete(ClientContext &context, LogicalDelete &op, 97 | unique_ptr plan) { 98 | auto &bound_ref = op.expressions[0]->Cast(); 99 | auto del = make_uniq(op.types, op.estimated_cardinality, op.table.Cast(), 100 | bound_ref.index, op.return_chunk); 101 | del->children.push_back(std::move(plan)); 102 | return std::move(del); 103 | } 104 | 105 | } // namespace duckdb 106 | -------------------------------------------------------------------------------- /src/columnstore/execution/columnstore_insert.cpp: -------------------------------------------------------------------------------- 1 | #include "columnstore/columnstore.hpp" 2 | #include "columnstore/columnstore_table.hpp" 3 | #include "duckdb/planner/operator/logical_insert.hpp" 4 | 5 | namespace duckdb { 6 | 7 | class ColumnstoreInsertSourceState : public GlobalSourceState { 8 | public: 9 | ColumnDataScanState scan_state; 10 | }; 11 | 12 | class ColumnstoreInsertGlobalState : public GlobalSinkState { 13 | public: 14 | ColumnstoreInsertGlobalState(ClientContext &context, const vector &types, 15 | const vector> &bound_defaults) 16 | : executor(context, bound_defaults), insert_count(0), return_collection(context, types) { 17 | chunk.Initialize(Allocator::Get(context), types); 18 | } 19 | 20 | DataChunk chunk; 21 | ExpressionExecutor executor; 22 | idx_t insert_count; 23 | ColumnDataCollection return_collection; 24 | }; 25 | 26 | class ColumnstoreInsert : public PhysicalOperator { 27 | public: 28 | ColumnstoreInsert(vector types, idx_t estimated_cardinality, ColumnstoreTable &table, 29 | physical_index_vector_t column_index_map, vector> bound_defaults, 30 | vector> bound_constraints, bool return_chunk) 31 | : PhysicalOperator(PhysicalOperatorType::EXTENSION, std::move(types), estimated_cardinality), table(table), 32 | column_index_map(std::move(column_index_map)), bound_defaults(std::move(bound_defaults)), 33 | bound_constraints(std::move(bound_constraints)), return_chunk(return_chunk) {} 34 | 35 | ColumnstoreTable &table; 36 | physical_index_vector_t column_index_map; 37 | vector> bound_defaults; 38 | vector> bound_constraints; 39 | bool return_chunk; 40 | 41 | public: 42 | string GetName() const override { 43 | return "COLUMNSTORE_INSERT"; 44 | } 45 | 46 | public: 47 | // Source interface 48 | unique_ptr GetGlobalSourceState(ClientContext &context) const override { 49 | auto state = make_uniq(); 50 | auto &gstate = sink_state->Cast(); 51 | if (return_chunk) { 52 | gstate.return_collection.InitializeScan(state->scan_state); 53 | } 54 | return std::move(state); 55 | } 56 | 57 | SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override { 58 | auto &state = input.global_state.Cast(); 59 | auto &gstate = sink_state->Cast(); 60 | if (!return_chunk) { 61 | chunk.SetCardinality(1); 62 | chunk.SetValue(0, 0, Value::BIGINT(NumericCast(gstate.insert_count))); 63 | return SourceResultType::FINISHED; 64 | } 65 | gstate.return_collection.Scan(state.scan_state, chunk); 66 | return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT; 67 | } 68 | 69 | bool IsSource() const override { 70 | return true; 71 | } 72 | 73 | public: 74 | // Sink interface 75 | SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override { 76 | auto &gstate = input.global_state.Cast(); 77 | chunk.Flatten(); 78 | gstate.executor.SetChunk(chunk); 79 | gstate.chunk.Reset(); 80 | gstate.chunk.SetCardinality(chunk); 81 | if (!column_index_map.empty()) { 82 | // columns specified by the user, use column_index_map 83 | for (auto &col : table.GetColumns().Physical()) { 84 | auto storage_idx = col.StorageOid(); 85 | auto mapped_index = column_index_map[col.Physical()]; 86 | if (mapped_index == DConstants::INVALID_INDEX) { 87 | // insert default value 88 | gstate.executor.ExecuteExpression(storage_idx, gstate.chunk.data[storage_idx]); 89 | } else { 90 | // get value from child chunk 91 | D_ASSERT(mapped_index < chunk.ColumnCount()); 92 | D_ASSERT(gstate.chunk.data[storage_idx].GetType() == chunk.data[mapped_index].GetType()); 93 | gstate.chunk.data[storage_idx].Reference(chunk.data[mapped_index]); 94 | } 95 | } 96 | } else { 97 | // no columns specified, just append directly 98 | for (idx_t i = 0; i < gstate.chunk.ColumnCount(); i++) { 99 | D_ASSERT(gstate.chunk.data[i].GetType() == chunk.data[i].GetType()); 100 | gstate.chunk.data[i].Reference(chunk.data[i]); 101 | } 102 | } 103 | if (return_chunk) { 104 | gstate.return_collection.Append(gstate.chunk); 105 | } 106 | gstate.insert_count += gstate.chunk.size(); 107 | table.VerifyConstraints(gstate.chunk, bound_constraints); 108 | table.Insert(context.client, gstate.chunk); 109 | return SinkResultType::NEED_MORE_INPUT; 110 | } 111 | 112 | SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context, 113 | OperatorSinkFinalizeInput &input) const override { 114 | table.FinalizeInsert(); 115 | return SinkFinalizeType::READY; 116 | } 117 | 118 | unique_ptr GetGlobalSinkState(ClientContext &context) const override { 119 | return make_uniq(context, table.GetTypes(), bound_defaults); 120 | } 121 | 122 | bool IsSink() const override { 123 | return true; 124 | } 125 | }; 126 | 127 | unique_ptr Columnstore::PlanInsert(ClientContext &context, LogicalInsert &op, 128 | unique_ptr plan) { 129 | auto insert = make_uniq(op.types, op.estimated_cardinality, op.table.Cast(), 130 | op.column_index_map, std::move(op.bound_defaults), 131 | std::move(op.bound_constraints), op.return_chunk); 132 | insert->children.push_back(std::move(plan)); 133 | return std::move(insert); 134 | } 135 | 136 | } // namespace duckdb 137 | -------------------------------------------------------------------------------- /src/columnstore/execution/columnstore_update.cpp: -------------------------------------------------------------------------------- 1 | #include "columnstore/columnstore.hpp" 2 | #include "columnstore/columnstore_table.hpp" 3 | #include "duckdb/common/types/column/column_data_collection.hpp" 4 | #include "duckdb/execution/physical_operator.hpp" 5 | #include "duckdb/planner/operator/logical_update.hpp" 6 | 7 | namespace duckdb { 8 | 9 | class ColumnstoreUpdateSourceState : public GlobalSourceState { 10 | public: 11 | ColumnDataScanState scan_state; 12 | }; 13 | 14 | class ColumnstoreUpdateGlobalState : public GlobalSinkState { 15 | public: 16 | ColumnstoreUpdateGlobalState(ClientContext &context, const vector &types) 17 | : return_collection(context, types) { 18 | chunk.Initialize(Allocator::Get(context), types); 19 | } 20 | 21 | DataChunk chunk; 22 | unordered_set row_ids; 23 | ColumnDataCollection return_collection; 24 | }; 25 | 26 | class ColumnstoreUpdate : public PhysicalOperator { 27 | public: 28 | ColumnstoreUpdate(vector types, idx_t estimated_cardinality, ColumnstoreTable &table, 29 | vector columns, vector> bound_constraints, 30 | bool return_chunk) 31 | : PhysicalOperator(PhysicalOperatorType::EXTENSION, std::move(types), estimated_cardinality), table(table), 32 | columns(std::move(columns)), bound_constraints(std::move(bound_constraints)), return_chunk(return_chunk) {} 33 | 34 | ColumnstoreTable &table; 35 | vector columns; 36 | vector> bound_constraints; 37 | bool return_chunk; 38 | 39 | public: 40 | string GetName() const override { 41 | return "COLUMNSTORE_UPDATE"; 42 | } 43 | 44 | public: 45 | // Source interface 46 | unique_ptr GetGlobalSourceState(ClientContext &context) const override { 47 | auto state = make_uniq(); 48 | auto &gstate = sink_state->Cast(); 49 | if (return_chunk) { 50 | gstate.return_collection.InitializeScan(state->scan_state); 51 | } 52 | return std::move(state); 53 | } 54 | 55 | SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override { 56 | auto &state = input.global_state.Cast(); 57 | auto &gstate = sink_state->Cast(); 58 | if (!return_chunk) { 59 | chunk.SetCardinality(1); 60 | chunk.SetValue(0, 0, Value::BIGINT(NumericCast(gstate.row_ids.size()))); 61 | return SourceResultType::FINISHED; 62 | } 63 | gstate.return_collection.Scan(state.scan_state, chunk); 64 | return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT; 65 | } 66 | 67 | bool IsSource() const override { 68 | return true; 69 | } 70 | 71 | public: 72 | // Sink interface 73 | SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override { 74 | auto &gstate = input.global_state.Cast(); 75 | auto &row_ids = chunk.data[chunk.ColumnCount() - 1]; 76 | row_ids.Flatten(chunk.size()); 77 | auto row_ids_data = FlatVector::GetData(row_ids); 78 | 79 | SelectionVector sel(STANDARD_VECTOR_SIZE); 80 | idx_t count = 0; 81 | for (idx_t i = 0; i < chunk.size(); i++) { 82 | row_t row_id = row_ids_data[i]; 83 | if (gstate.row_ids.find(row_id) == gstate.row_ids.end()) { 84 | gstate.row_ids.insert(row_id); 85 | sel.set_index(count++, i); 86 | } 87 | } 88 | if (count != chunk.size()) { 89 | chunk.Slice(sel, count); 90 | } 91 | 92 | gstate.chunk.SetCardinality(chunk); 93 | for (idx_t i = 0; i < columns.size(); i++) { 94 | gstate.chunk.data[columns[i].index].Reference(chunk.data[i]); 95 | } 96 | table.VerifyConstraints(gstate.chunk, bound_constraints); 97 | table.Insert(context.client, gstate.chunk); 98 | if (return_chunk) { 99 | gstate.return_collection.Append(gstate.chunk); 100 | } 101 | return SinkResultType::NEED_MORE_INPUT; 102 | } 103 | 104 | SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context, 105 | OperatorSinkFinalizeInput &input) const override { 106 | auto &gstate = input.global_state.Cast(); 107 | table.Delete(context, gstate.row_ids); 108 | return SinkFinalizeType::READY; 109 | } 110 | 111 | unique_ptr GetGlobalSinkState(ClientContext &context) const override { 112 | return make_uniq(context, table.GetTypes()); 113 | } 114 | 115 | bool IsSink() const override { 116 | return true; 117 | } 118 | }; 119 | 120 | unique_ptr Columnstore::PlanUpdate(ClientContext &context, LogicalUpdate &op, 121 | unique_ptr plan) { 122 | D_ASSERT(op.update_is_del_and_insert); 123 | auto update = make_uniq(op.types, op.estimated_cardinality, op.table.Cast(), 124 | std::move(op.columns), std::move(op.bound_constraints), op.return_chunk); 125 | update->children.push_back(std::move(plan)); 126 | return std::move(update); 127 | } 128 | 129 | } // namespace duckdb 130 | -------------------------------------------------------------------------------- /src/columnstore_handler.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pgduckdb/pg/declarations.hpp" 4 | 5 | bool IsColumnstoreTable(Relation rel); 6 | 7 | bool IsColumnstoreTable(Oid oid); 8 | -------------------------------------------------------------------------------- /src/lake/lake.cpp: -------------------------------------------------------------------------------- 1 | #include "columnstore/columnstore_metadata.hpp" 2 | #include "duckdb/common/unordered_set.hpp" 3 | #include "pgmooncake_guc.hpp" 4 | #include "rust_extensions/delta.hpp" 5 | 6 | #include 7 | 8 | namespace duckdb { 9 | 10 | namespace { 11 | 12 | class LakeWriter { 13 | public: 14 | LakeWriter() { 15 | DeltaInit(); 16 | } 17 | 18 | public: 19 | void CreateTable(Oid oid, const string &path) { 20 | ColumnstoreMetadata metadata(NULL /*snapshot*/); 21 | auto [table_name, column_names, column_types] = metadata.GetTableMetadata(oid); 22 | DeltaCreateTable(table_name, path, metadata.SecretsSearchDeltaOptions(path), column_names, column_types); 23 | } 24 | 25 | void ChangeFile(Oid oid, string file_name, int64_t file_size, bool is_add_file) { 26 | if (cached_table_infos.count(oid) == 0) { 27 | ColumnstoreMetadata metadata(NULL /*snapshot*/); 28 | auto [path, timeline_id] = metadata.TablesSearch(oid); 29 | cached_table_infos[oid] = {path, std::move(timeline_id), metadata.SecretsSearchDeltaOptions(path)}; 30 | } 31 | auto &files = xact_state[oid]; 32 | auto files_iter = files.find(file_name); 33 | if (files_iter == files.end()) { 34 | files.emplace(std::move(file_name), FileInfo{file_size, is_add_file}); 35 | } else { 36 | D_ASSERT(files_iter->second.is_add_file && !is_add_file); 37 | files.erase(files_iter); 38 | } 39 | } 40 | 41 | void Abort() { 42 | xact_state.clear(); 43 | } 44 | 45 | void Commit() { 46 | if (xact_state.empty()) { 47 | return; 48 | } 49 | for (auto &[oid, files] : xact_state) { 50 | vector file_names; 51 | file_names.reserve(files.size()); 52 | vector file_sizes; 53 | file_sizes.reserve(files.size()); 54 | vector is_add_files; 55 | is_add_files.reserve(files.size()); 56 | for (const auto &[file_name, file_info] : files) { 57 | file_names.emplace_back(file_name); 58 | file_sizes.emplace_back(file_info.file_size); 59 | is_add_files.emplace_back(file_info.is_add_file); 60 | } 61 | if (!file_names.empty()) { 62 | auto info = cached_table_infos[oid]; 63 | if (info.timeline_id == mooncake_timeline_id) { 64 | DeltaModifyFiles(info.path, info.delta_options, file_names, file_sizes, is_add_files); 65 | } 66 | } 67 | } 68 | xact_state.clear(); 69 | } 70 | 71 | private: 72 | struct CachedTableInfoEntry { 73 | string path; 74 | string timeline_id; 75 | string delta_options; 76 | }; 77 | unordered_map cached_table_infos; 78 | 79 | struct FileInfo { 80 | int64_t file_size; 81 | bool is_add_file; 82 | }; 83 | unordered_map> xact_state; 84 | }; 85 | 86 | LakeWriter lake_writer; 87 | 88 | } // namespace 89 | 90 | void LakeCreateTable(Oid oid, const string &path) { 91 | lake_writer.CreateTable(oid, path); 92 | } 93 | 94 | void LakeAddFile(Oid oid, string file_name, int64_t file_size) { 95 | lake_writer.ChangeFile(oid, std::move(file_name), file_size, true /*is_add_file*/); 96 | } 97 | 98 | void LakeDeleteFile(Oid oid, string file_name) { 99 | lake_writer.ChangeFile(oid, std::move(file_name), 0 /*file_size*/, false /*is_add_file*/); 100 | } 101 | 102 | void LakeAbort() { 103 | lake_writer.Abort(); 104 | } 105 | 106 | void LakeCommit() { 107 | lake_writer.Commit(); 108 | } 109 | 110 | } // namespace duckdb 111 | -------------------------------------------------------------------------------- /src/lake/lake.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/string.hpp" 4 | #include "pgduckdb/pg/declarations.hpp" 5 | 6 | namespace duckdb { 7 | 8 | void LakeCreateTable(Oid oid, const string &path); 9 | 10 | void LakeAddFile(Oid oid, string file_name, int64_t file_size); 11 | 12 | void LakeDeleteFile(Oid oid, string file_name); 13 | 14 | void LakeAbort(); 15 | 16 | void LakeCommit(); 17 | 18 | } // namespace duckdb 19 | -------------------------------------------------------------------------------- /src/pgduckdb/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | TabWidth: 4 4 | IndentWidth: 4 5 | ColumnLimit: 120 6 | AllowShortFunctionsOnASingleLine: false 7 | --- 8 | UseTab: ForIndentation 9 | DerivePointerAlignment: false 10 | PointerAlignment: Right 11 | AlignConsecutiveMacros: true 12 | AlignTrailingComments: true 13 | AllowAllArgumentsOnNextLine: true 14 | AllowAllConstructorInitializersOnNextLine: true 15 | AllowAllParametersOfDeclarationOnNextLine: true 16 | AlwaysBreakAfterReturnType: AllDefinitions 17 | AlignAfterOpenBracket: Align 18 | IncludeBlocks: Preserve 19 | IncludeCategories: # we want to ensure postgres.h appear first 20 | - Regex: '^"postgres\.h"' 21 | Priority: -2 22 | - Regex: '^"c\.h"' 23 | Priority: -1 24 | IncludeIsMainRegex: '' 25 | KeepEmptyLinesAtTheStartOfBlocks: true 26 | SortIncludes: false 27 | SpaceBeforeCpp11BracedList: true 28 | SpaceBeforeCtorInitializerColon: true 29 | SpaceBeforeInheritanceColon: true 30 | SpacesInAngles: false 31 | SpacesInCStyleCastParentheses: false 32 | SpacesInConditionalStatement: false 33 | AllowShortLambdasOnASingleLine: Inline 34 | AllowShortLoopsOnASingleLine: false 35 | AlwaysBreakTemplateDeclarations: Yes 36 | Language: Cpp 37 | AccessModifierOffset: -4 38 | -------------------------------------------------------------------------------- /src/pgduckdb/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 Stichting DuckDB Foundation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_catalog.cpp: -------------------------------------------------------------------------------- 1 | #include "columnstore/columnstore.hpp" 2 | #include "duckdb/parser/parsed_data/attach_info.hpp" 3 | #include "duckdb/parser/parsed_data/create_schema_info.hpp" 4 | #include "pgduckdb/catalog/pgduckdb_catalog.hpp" 5 | #include "pgduckdb/catalog/pgduckdb_schema.hpp" 6 | #include "pgduckdb/catalog/pgduckdb_storage.hpp" 7 | #include "pgduckdb/catalog/pgduckdb_transaction.hpp" 8 | 9 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 10 | 11 | namespace pgduckdb { 12 | 13 | PostgresCatalog::PostgresCatalog(duckdb::AttachedDatabase &_db, const duckdb::string &connection_string, 14 | duckdb::AccessMode _access_mode) 15 | : Catalog(_db), path(connection_string), access_mode(_access_mode) { 16 | } 17 | 18 | duckdb::unique_ptr 19 | PostgresCatalog::Attach(duckdb::StorageExtensionInfo *, duckdb::ClientContext &, duckdb::AttachedDatabase &db, 20 | const duckdb::string &, duckdb::AttachInfo &info, duckdb::AccessMode access_mode) { 21 | return duckdb::make_uniq(db, info.path, access_mode); 22 | } 23 | 24 | // ------------------ Catalog API --------------------- 25 | 26 | void 27 | PostgresCatalog::Initialize(bool /*load_builtin*/) { 28 | } 29 | 30 | duckdb::string 31 | PostgresCatalog::GetCatalogType() { 32 | return "pgduckdb"; 33 | } 34 | 35 | duckdb::optional_ptr 36 | PostgresCatalog::CreateSchema(duckdb::CatalogTransaction, duckdb::CreateSchemaInfo &) { 37 | throw duckdb::NotImplementedException("CreateSchema not supported yet"); 38 | } 39 | 40 | duckdb::optional_ptr 41 | PostgresCatalog::GetSchema(duckdb::CatalogTransaction catalog_transaction, const duckdb::string &schema_name, 42 | duckdb::OnEntryNotFound, duckdb::QueryErrorContext) { 43 | auto &pg_transaction = catalog_transaction.transaction->Cast(); 44 | auto res = pg_transaction.GetCatalogEntry(duckdb::CatalogType::SCHEMA_ENTRY, schema_name, ""); 45 | D_ASSERT(res); 46 | D_ASSERT(res->type == duckdb::CatalogType::SCHEMA_ENTRY); 47 | return (duckdb::SchemaCatalogEntry *)res.get(); 48 | } 49 | 50 | void 51 | PostgresCatalog::ScanSchemas(duckdb::ClientContext &, std::function) { 52 | } 53 | 54 | duckdb::unique_ptr 55 | PostgresCatalog::PlanCreateTableAs(duckdb::ClientContext &, duckdb::LogicalCreateTable &, 56 | duckdb::unique_ptr) { 57 | throw duckdb::NotImplementedException("PlanCreateTableAs not supported yet"); 58 | } 59 | 60 | duckdb::unique_ptr 61 | PostgresCatalog::PlanInsert(duckdb::ClientContext &context, duckdb::LogicalInsert &op, 62 | duckdb::unique_ptr plan) { 63 | if (db.name == "pgmooncake") { 64 | return duckdb::Columnstore::PlanInsert(context, op, std::move(plan)); 65 | } 66 | throw duckdb::NotImplementedException("PlanInsert not supported yet"); 67 | } 68 | 69 | duckdb::unique_ptr 70 | PostgresCatalog::PlanDelete(duckdb::ClientContext &context, duckdb::LogicalDelete &op, 71 | duckdb::unique_ptr plan) { 72 | if (db.name == "pgmooncake") { 73 | return duckdb::Columnstore::PlanDelete(context, op, std::move(plan)); 74 | } 75 | throw duckdb::NotImplementedException("PlanDelete not supported yet"); 76 | } 77 | 78 | duckdb::unique_ptr 79 | PostgresCatalog::PlanUpdate(duckdb::ClientContext &context, duckdb::LogicalUpdate &op, 80 | duckdb::unique_ptr plan) { 81 | if (db.name == "pgmooncake") { 82 | return duckdb::Columnstore::PlanUpdate(context, op, std::move(plan)); 83 | } 84 | throw duckdb::NotImplementedException("PlanUpdate not supported yet"); 85 | } 86 | 87 | duckdb::unique_ptr 88 | PostgresCatalog::BindCreateIndex(duckdb::Binder &, duckdb::CreateStatement &, duckdb::TableCatalogEntry &, 89 | duckdb::unique_ptr) { 90 | throw duckdb::NotImplementedException("BindCreateIndex not supported yet"); 91 | } 92 | 93 | duckdb::DatabaseSize 94 | PostgresCatalog::GetDatabaseSize(duckdb::ClientContext &) { 95 | throw duckdb::NotImplementedException("GetDatabaseSize not supported yet"); 96 | } 97 | 98 | bool 99 | PostgresCatalog::InMemory() { 100 | return false; 101 | } 102 | 103 | duckdb::string 104 | PostgresCatalog::GetDBPath() { 105 | return path; 106 | } 107 | 108 | void 109 | PostgresCatalog::DropSchema(duckdb::ClientContext &, duckdb::DropInfo &) { 110 | throw duckdb::NotImplementedException("DropSchema not supported yet"); 111 | } 112 | 113 | } // namespace pgduckdb 114 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_catalog.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/storage/storage_extension.hpp" 4 | #include "duckdb/catalog/catalog.hpp" 5 | 6 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 7 | 8 | namespace pgduckdb { 9 | 10 | class PostgresSchema; 11 | 12 | class PostgresCatalog : public duckdb::Catalog { 13 | public: 14 | PostgresCatalog(duckdb::AttachedDatabase &db, const duckdb::string &connection_string, 15 | duckdb::AccessMode access_mode); 16 | 17 | public: 18 | static duckdb::unique_ptr Attach(duckdb::StorageExtensionInfo *storage_info, 19 | duckdb::ClientContext &context, duckdb::AttachedDatabase &db, 20 | const duckdb::string &name, duckdb::AttachInfo &info, 21 | duckdb::AccessMode access_mode); 22 | 23 | public: 24 | duckdb::string path; 25 | duckdb::AccessMode access_mode; 26 | 27 | public: 28 | // -- Catalog API -- 29 | void Initialize(bool load_builtin) override; 30 | duckdb::string GetCatalogType() override; 31 | duckdb::optional_ptr CreateSchema(duckdb::CatalogTransaction transaction, 32 | duckdb::CreateSchemaInfo &info) override; 33 | duckdb::optional_ptr 34 | GetSchema(duckdb::CatalogTransaction transaction, const duckdb::string &schema_name, 35 | duckdb::OnEntryNotFound if_not_found, 36 | duckdb::QueryErrorContext error_context = duckdb::QueryErrorContext()) override; 37 | void ScanSchemas(duckdb::ClientContext &context, 38 | std::function callback) override; 39 | duckdb::unique_ptr 40 | PlanCreateTableAs(duckdb::ClientContext &context, duckdb::LogicalCreateTable &op, 41 | duckdb::unique_ptr plan) override; 42 | duckdb::unique_ptr PlanInsert(duckdb::ClientContext &context, duckdb::LogicalInsert &op, 43 | duckdb::unique_ptr plan) override; 44 | duckdb::unique_ptr PlanDelete(duckdb::ClientContext &context, duckdb::LogicalDelete &op, 45 | duckdb::unique_ptr plan) override; 46 | duckdb::unique_ptr PlanUpdate(duckdb::ClientContext &context, duckdb::LogicalUpdate &op, 47 | duckdb::unique_ptr plan) override; 48 | duckdb::unique_ptr 49 | BindCreateIndex(duckdb::Binder &binder, duckdb::CreateStatement &stmt, duckdb::TableCatalogEntry &table, 50 | duckdb::unique_ptr plan) override; 51 | duckdb::DatabaseSize GetDatabaseSize(duckdb::ClientContext &context) override; 52 | bool InMemory() override; 53 | duckdb::string GetDBPath() override; 54 | void DropSchema(duckdb::ClientContext &context, duckdb::DropInfo &info) override; 55 | 56 | private: 57 | duckdb::case_insensitive_map_t> schemas; 58 | }; 59 | 60 | } // namespace pgduckdb 61 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_schema.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/catalog/pgduckdb_schema.hpp" 2 | #include "pgduckdb/catalog/pgduckdb_table.hpp" 3 | #include "pgduckdb/catalog/pgduckdb_transaction.hpp" 4 | #include "duckdb/parser/parsed_data/create_table_info.hpp" 5 | 6 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 7 | 8 | namespace pgduckdb { 9 | 10 | PostgresSchema::PostgresSchema(duckdb::Catalog &_catalog, duckdb::CreateSchemaInfo &_info, Snapshot _snapshot) 11 | : SchemaCatalogEntry(_catalog, _info), snapshot(_snapshot), catalog(_catalog) { 12 | } 13 | 14 | void 15 | PostgresSchema::Scan(duckdb::ClientContext &, duckdb::CatalogType, const std::function &) { 16 | } 17 | 18 | void 19 | PostgresSchema::Scan(duckdb::CatalogType, const std::function &) { 20 | throw duckdb::NotImplementedException("Scan(no context) not supported yet"); 21 | } 22 | 23 | duckdb::optional_ptr 24 | PostgresSchema::CreateIndex(duckdb::CatalogTransaction, duckdb::CreateIndexInfo &, duckdb::TableCatalogEntry &) { 25 | throw duckdb::NotImplementedException("CreateIndex not supported yet"); 26 | } 27 | 28 | duckdb::optional_ptr 29 | PostgresSchema::CreateFunction(duckdb::CatalogTransaction, duckdb::CreateFunctionInfo &) { 30 | throw duckdb::NotImplementedException("CreateFunction not supported yet"); 31 | } 32 | 33 | duckdb::optional_ptr 34 | PostgresSchema::CreateTable(duckdb::CatalogTransaction, duckdb::BoundCreateTableInfo &) { 35 | throw duckdb::NotImplementedException("CreateTable not supported yet"); 36 | } 37 | 38 | duckdb::optional_ptr 39 | PostgresSchema::CreateView(duckdb::CatalogTransaction, duckdb::CreateViewInfo &) { 40 | throw duckdb::NotImplementedException("CreateView not supported yet"); 41 | } 42 | 43 | duckdb::optional_ptr 44 | PostgresSchema::CreateSequence(duckdb::CatalogTransaction, duckdb::CreateSequenceInfo &) { 45 | throw duckdb::NotImplementedException("CreateSequence not supported yet"); 46 | } 47 | 48 | duckdb::optional_ptr 49 | PostgresSchema::CreateTableFunction(duckdb::CatalogTransaction, duckdb::CreateTableFunctionInfo &) { 50 | throw duckdb::NotImplementedException("CreateTableFunction not supported yet"); 51 | } 52 | 53 | duckdb::optional_ptr 54 | PostgresSchema::CreateCopyFunction(duckdb::CatalogTransaction, duckdb::CreateCopyFunctionInfo &) { 55 | throw duckdb::NotImplementedException("CreateCopyFunction not supported yet"); 56 | } 57 | 58 | duckdb::optional_ptr 59 | PostgresSchema::CreatePragmaFunction(duckdb::CatalogTransaction, duckdb::CreatePragmaFunctionInfo &) { 60 | throw duckdb::NotImplementedException("CreatePragmaFunction not supported yet"); 61 | } 62 | 63 | duckdb::optional_ptr 64 | PostgresSchema::CreateCollation(duckdb::CatalogTransaction, duckdb::CreateCollationInfo &) { 65 | throw duckdb::NotImplementedException("CreateCollation not supported yet"); 66 | } 67 | 68 | duckdb::optional_ptr 69 | PostgresSchema::CreateType(duckdb::CatalogTransaction, duckdb::CreateTypeInfo &) { 70 | throw duckdb::NotImplementedException("CreateType not supported yet"); 71 | } 72 | 73 | duckdb::optional_ptr 74 | PostgresSchema::GetEntry(duckdb::CatalogTransaction _catalog_transaction, duckdb::CatalogType _type, 75 | const duckdb::string &_entry_name) { 76 | auto &pg_transaction = _catalog_transaction.transaction->Cast(); 77 | return pg_transaction.GetCatalogEntry(_type, name, _entry_name); 78 | } 79 | 80 | void 81 | PostgresSchema::DropEntry(duckdb::ClientContext &, duckdb::DropInfo &) { 82 | throw duckdb::NotImplementedException("DropEntry not supported yet"); 83 | } 84 | 85 | void 86 | PostgresSchema::Alter(duckdb::CatalogTransaction, duckdb::AlterInfo &) { 87 | throw duckdb::NotImplementedException("Alter not supported yet"); 88 | } 89 | 90 | } // namespace pgduckdb 91 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_schema.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" 4 | #include "pgduckdb/pg/declarations.hpp" 5 | 6 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 7 | 8 | namespace pgduckdb { 9 | 10 | class PostgresSchema : public duckdb::SchemaCatalogEntry { 11 | public: 12 | PostgresSchema(duckdb::Catalog &catalog, duckdb::CreateSchemaInfo &info, Snapshot snapshot); 13 | 14 | public: 15 | // -- Schema API -- 16 | void Scan(duckdb::ClientContext &context, duckdb::CatalogType type, 17 | const std::function &callback) override; 18 | void Scan(duckdb::CatalogType type, const std::function &callback) override; 19 | duckdb::optional_ptr CreateIndex(duckdb::CatalogTransaction transaction, 20 | duckdb::CreateIndexInfo &info, 21 | duckdb::TableCatalogEntry &table) override; 22 | duckdb::optional_ptr CreateFunction(duckdb::CatalogTransaction transaction, 23 | duckdb::CreateFunctionInfo &info) override; 24 | duckdb::optional_ptr CreateTable(duckdb::CatalogTransaction transaction, 25 | duckdb::BoundCreateTableInfo &info) override; 26 | duckdb::optional_ptr CreateView(duckdb::CatalogTransaction transaction, 27 | duckdb::CreateViewInfo &info) override; 28 | duckdb::optional_ptr CreateSequence(duckdb::CatalogTransaction transaction, 29 | duckdb::CreateSequenceInfo &info) override; 30 | duckdb::optional_ptr CreateTableFunction(duckdb::CatalogTransaction transaction, 31 | duckdb::CreateTableFunctionInfo &info) override; 32 | duckdb::optional_ptr CreateCopyFunction(duckdb::CatalogTransaction transaction, 33 | duckdb::CreateCopyFunctionInfo &info) override; 34 | duckdb::optional_ptr CreatePragmaFunction(duckdb::CatalogTransaction transaction, 35 | duckdb::CreatePragmaFunctionInfo &info) override; 36 | duckdb::optional_ptr CreateCollation(duckdb::CatalogTransaction transaction, 37 | duckdb::CreateCollationInfo &info) override; 38 | duckdb::optional_ptr CreateType(duckdb::CatalogTransaction transaction, 39 | duckdb::CreateTypeInfo &info) override; 40 | duckdb::optional_ptr GetEntry(duckdb::CatalogTransaction transaction, 41 | duckdb::CatalogType type, const duckdb::string &name) override; 42 | void DropEntry(duckdb::ClientContext &context, duckdb::DropInfo &info) override; 43 | void Alter(duckdb::CatalogTransaction transaction, duckdb::AlterInfo &info) override; 44 | 45 | public: 46 | Snapshot snapshot; 47 | duckdb::Catalog &catalog; 48 | }; 49 | 50 | } // namespace pgduckdb 51 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_storage.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/catalog/pgduckdb_storage.hpp" 2 | #include "pgduckdb/catalog/pgduckdb_catalog.hpp" 3 | #include "pgduckdb/catalog/pgduckdb_transaction_manager.hpp" 4 | 5 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 6 | 7 | namespace pgduckdb { 8 | 9 | static duckdb::unique_ptr 10 | CreateTransactionManager(duckdb::StorageExtensionInfo *, duckdb::AttachedDatabase &db, duckdb::Catalog &catalog) { 11 | return duckdb::make_uniq(db, catalog.Cast()); 12 | } 13 | 14 | PostgresStorageExtension::PostgresStorageExtension() { 15 | attach = PostgresCatalog::Attach; 16 | create_transaction_manager = CreateTransactionManager; 17 | } 18 | 19 | } // namespace pgduckdb 20 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_storage.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/storage/storage_extension.hpp" 4 | 5 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 6 | 7 | namespace pgduckdb { 8 | 9 | class PostgresStorageExtension : public duckdb::StorageExtension { 10 | public: 11 | PostgresStorageExtension(); 12 | }; 13 | 14 | } // namespace pgduckdb 15 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_table.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/catalog/pgduckdb_table.hpp" 2 | 3 | #include "pgduckdb/catalog/pgduckdb_schema.hpp" 4 | #include "pgduckdb/logger.hpp" 5 | #include "pgduckdb/pg/relations.hpp" 6 | #include "pgduckdb/pgduckdb_process_lock.hpp" 7 | #include "pgduckdb/pgduckdb_types.hpp" // ConvertPostgresToDuckColumnType 8 | #include "pgduckdb/scan/postgres_seq_scan.hpp" 9 | 10 | #include "duckdb/parser/constraints/not_null_constraint.hpp" 11 | #include "duckdb/parser/expression/constant_expression.hpp" 12 | #include "duckdb/parser/expression/function_expression.hpp" 13 | #include "duckdb/parser/parsed_data/create_table_info.hpp" 14 | 15 | extern "C" { 16 | #include "postgres.h" 17 | 18 | #include "catalog/dependency.h" 19 | #include "utils/rel.h" 20 | } 21 | 22 | namespace pgduckdb { 23 | 24 | PostgresTable::PostgresTable(duckdb::Catalog &_catalog, duckdb::SchemaCatalogEntry &_schema, 25 | duckdb::CreateTableInfo &_info, Relation _rel, Cardinality _cardinality, 26 | Snapshot _snapshot) 27 | : duckdb::TableCatalogEntry(_catalog, _schema, _info), rel(_rel), cardinality(_cardinality), snapshot(_snapshot) { 28 | } 29 | 30 | PostgresTable::~PostgresTable() { 31 | std::lock_guard lock(DuckdbProcessLock::GetLock()); 32 | CloseRelation(rel); 33 | } 34 | 35 | Relation 36 | PostgresTable::OpenRelation(Oid relid) { 37 | std::lock_guard lock(DuckdbProcessLock::GetLock()); 38 | return pgduckdb::OpenRelation(relid); 39 | } 40 | 41 | void 42 | PostgresTable::SetTableInfo(duckdb::CreateTableInfo &info, Relation rel, bool setDefaultValue) { 43 | using namespace duckdb; 44 | 45 | auto tupleDesc = RelationGetDescr(rel); 46 | int defval_index = 0; 47 | 48 | const auto n = GetTupleDescNatts(tupleDesc); 49 | for (int i = 0; i < n; ++i) { 50 | Form_pg_attribute attr = GetAttr(tupleDesc, i); 51 | auto col_name = duckdb::string(GetAttName(attr)); 52 | auto duck_type = ConvertPostgresToDuckColumnType(attr); 53 | ColumnDefinition column(col_name, duck_type); 54 | if (setDefaultValue) { 55 | if (attr->atthasdef) { 56 | Assert(tupleDesc->constr); 57 | Assert(tupleDesc->constr->defval); 58 | Assert(defval_index < tupleDesc->constr->num_defval); 59 | AttrDefault &defval = tupleDesc->constr->defval[defval_index++]; 60 | Assert(defval.adnum == i + 1); 61 | Node *node = static_cast(stringToNode(defval.adbin)); 62 | if (!IsA(node, Const)) { 63 | throw duckdb::NotImplementedException("column \"%s\" has unsupported default value ", 64 | NameStr(attr->attname)); 65 | } 66 | Const *val = castNode(Const, node); 67 | if (val->constisnull) { 68 | column.SetDefaultValue(make_uniq(duckdb::Value(duck_type))); 69 | } else { 70 | column.SetDefaultValue(make_uniq( 71 | pgduckdb::ConvertPostgresParameterToDuckValue(val->constvalue, val->consttype))); 72 | } 73 | } else if (attr->attidentity) { 74 | #if PG_VERSION_NUM >= 170000 75 | Oid seqid = getIdentitySequence(rel, i + 1, false /*missing_ok*/); 76 | #else 77 | Oid seqid = getIdentitySequence(RelationGetRelid(rel), i + 1, false /*missing_ok*/); 78 | #endif 79 | vector> children; 80 | children.push_back(make_uniq(duckdb::Value::UINTEGER(seqid))); 81 | column.SetDefaultValue(make_uniq("pg_nextval", std::move(children))); 82 | } 83 | } 84 | info.columns.AddColumn(std::move(column)); 85 | if (attr->attnotnull) { 86 | info.constraints.push_back(make_uniq(LogicalIndex(i))); 87 | } 88 | /* Log column name and type */ 89 | pd_log(DEBUG2, "(DuckDB/SetTableInfo) Column name: %s, Type: %s --", col_name.c_str(), 90 | duck_type.ToString().c_str()); 91 | } 92 | } 93 | 94 | Cardinality 95 | PostgresTable::GetTableCardinality(Relation rel) { 96 | Cardinality cardinality; 97 | BlockNumber n_pages; 98 | double allvisfrac; 99 | EstimateRelSize(rel, NULL, &n_pages, &cardinality, &allvisfrac); 100 | return cardinality; 101 | } 102 | 103 | //===--------------------------------------------------------------------===// 104 | // PostgresHeapTable 105 | //===--------------------------------------------------------------------===// 106 | 107 | PostgresHeapTable::PostgresHeapTable(duckdb::Catalog &_catalog, duckdb::SchemaCatalogEntry &_schema, 108 | duckdb::CreateTableInfo &_info, Relation _rel, Cardinality _cardinality, 109 | Snapshot _snapshot) 110 | : PostgresTable(_catalog, _schema, _info, _rel, _cardinality, _snapshot) { 111 | } 112 | 113 | duckdb::unique_ptr 114 | PostgresHeapTable::GetStatistics(duckdb::ClientContext &, duckdb::column_t) { 115 | throw duckdb::NotImplementedException("GetStatistics not supported yet"); 116 | } 117 | 118 | duckdb::TableFunction 119 | PostgresHeapTable::GetScanFunction(duckdb::ClientContext &, duckdb::unique_ptr &bind_data) { 120 | bind_data = duckdb::make_uniq(rel, cardinality, snapshot); 121 | return PostgresSeqScanFunction(); 122 | } 123 | 124 | duckdb::TableStorageInfo 125 | PostgresHeapTable::GetStorageInfo(duckdb::ClientContext &) { 126 | throw duckdb::NotImplementedException("GetStorageInfo not supported yet"); 127 | } 128 | 129 | } // namespace pgduckdb 130 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_table.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" 4 | #include "duckdb/storage/table_storage_info.hpp" 5 | 6 | #include "pgduckdb/pg/declarations.hpp" 7 | 8 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 9 | 10 | namespace pgduckdb { 11 | 12 | class PostgresTable : public duckdb::TableCatalogEntry { 13 | public: 14 | virtual ~PostgresTable(); 15 | 16 | public: 17 | static Relation OpenRelation(Oid relid); 18 | static void SetTableInfo(duckdb::CreateTableInfo &info, Relation rel, bool setDefaultValue); 19 | static Cardinality GetTableCardinality(Relation rel); 20 | 21 | protected: 22 | PostgresTable(duckdb::Catalog &catalog, duckdb::SchemaCatalogEntry &schema, duckdb::CreateTableInfo &info, 23 | Relation rel, Cardinality cardinality, Snapshot snapshot); 24 | 25 | protected: 26 | Relation rel; 27 | Cardinality cardinality; 28 | Snapshot snapshot; 29 | }; 30 | 31 | class PostgresHeapTable : public PostgresTable { 32 | public: 33 | PostgresHeapTable(duckdb::Catalog &catalog, duckdb::SchemaCatalogEntry &schema, duckdb::CreateTableInfo &info, 34 | Relation rel, Cardinality cardinality, Snapshot snapshot); 35 | 36 | public: 37 | // -- Table API -- 38 | duckdb::unique_ptr GetStatistics(duckdb::ClientContext &context, 39 | duckdb::column_t column_id) override; 40 | duckdb::TableFunction GetScanFunction(duckdb::ClientContext &context, 41 | duckdb::unique_ptr &bind_data) override; 42 | duckdb::TableStorageInfo GetStorageInfo(duckdb::ClientContext &context) override; 43 | }; 44 | 45 | } // namespace pgduckdb 46 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_transaction.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/catalog/pgduckdb_catalog.hpp" 2 | #include "pgduckdb/catalog/pgduckdb_schema.hpp" 3 | #include "pgduckdb/catalog/pgduckdb_transaction.hpp" 4 | #include "pgduckdb/catalog/pgduckdb_table.hpp" 5 | #include "pgduckdb/scan/postgres_scan.hpp" 6 | #include "pgduckdb/pg/relations.hpp" 7 | 8 | #include "columnstore/columnstore_table.hpp" 9 | #include "columnstore_handler.hpp" 10 | #include "duckdb/parser/parsed_data/create_table_info.hpp" 11 | #include "duckdb/parser/parsed_data/create_schema_info.hpp" 12 | #include "duckdb/catalog/catalog.hpp" 13 | 14 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 15 | 16 | namespace pgduckdb { 17 | 18 | void 19 | ClosePostgresRelations(duckdb::ClientContext &context) { 20 | auto context_state = context.registered_state->GetOrCreate("pgduckdb"); 21 | context_state->QueryEnd(); 22 | } 23 | 24 | PostgresTransaction::PostgresTransaction(duckdb::TransactionManager &_manager, duckdb::ClientContext &_context, 25 | PostgresCatalog &_catalog, Snapshot _snapshot) 26 | : duckdb::Transaction(_manager, _context), catalog(_catalog), snapshot(_snapshot) { 27 | } 28 | 29 | PostgresTransaction::~PostgresTransaction() { 30 | } 31 | 32 | SchemaItems::SchemaItems(duckdb::unique_ptr &&_schema, const duckdb::string &_name) 33 | : name(_name), schema(std::move(_schema)) { 34 | } 35 | 36 | duckdb::optional_ptr 37 | SchemaItems::GetTable(const duckdb::string &entry_name) { 38 | auto it = tables.find(entry_name); 39 | if (it != tables.end()) { 40 | return it->second.get(); 41 | } 42 | 43 | Oid rel_oid = GetRelidFromSchemaAndTable(name.c_str(), entry_name.c_str()); 44 | if (!IsValidOid(rel_oid)) { 45 | return nullptr; // Table could not be found 46 | } 47 | 48 | Relation rel = PostgresTable::OpenRelation(rel_oid); 49 | if (IsRelView(rel)) { 50 | // Let the replacement scan handle this, the ReplacementScan replaces the view with its view_definition, which 51 | // will get bound again and hit a PostgresIndexTable / PostgresHeapTable. 52 | return nullptr; 53 | } 54 | 55 | duckdb::CreateTableInfo info; 56 | info.table = entry_name; 57 | 58 | if (IsColumnstoreTable(rel)) { 59 | PostgresTable::SetTableInfo(info, rel, true /*setDefaultValue*/); 60 | CloseRelation(rel); 61 | tables.emplace(entry_name, duckdb::make_uniq(schema->catalog, *schema, info, rel_oid, 62 | schema->snapshot)); 63 | } else { 64 | PostgresTable::SetTableInfo(info, rel, false /*setDefaultValue*/); 65 | auto cardinality = PostgresTable::GetTableCardinality(rel); 66 | tables.emplace(entry_name, duckdb::make_uniq(schema->catalog, *schema, info, rel, 67 | cardinality, schema->snapshot)); 68 | } 69 | return tables[entry_name].get(); 70 | } 71 | 72 | duckdb::optional_ptr 73 | SchemaItems::GetSchema() const { 74 | return schema.get(); 75 | } 76 | 77 | duckdb::optional_ptr 78 | PostgresTransaction::GetSchema(const duckdb::string &name) { 79 | auto context_state = context.lock()->registered_state->GetOrCreate("pgduckdb"); 80 | auto schemas = &context_state->schemas; 81 | auto it = schemas->find(name); 82 | if (it != schemas->end()) { 83 | return it->second.GetSchema(); 84 | } 85 | 86 | duckdb::CreateSchemaInfo create_schema; 87 | create_schema.schema = name; 88 | auto pg_schema = duckdb::make_uniq(catalog, create_schema, snapshot); 89 | schemas->emplace(std::make_pair(name, SchemaItems(std::move(pg_schema), name))); 90 | return schemas->at(name).GetSchema(); 91 | } 92 | 93 | void 94 | PostgresContextState::QueryEnd() { 95 | schemas.clear(); 96 | } 97 | 98 | duckdb::optional_ptr 99 | PostgresTransaction::GetCatalogEntry(duckdb::CatalogType type, const duckdb::string &schema, 100 | const duckdb::string &name) { 101 | switch (type) { 102 | case duckdb::CatalogType::TABLE_ENTRY: { 103 | auto context_state = context.lock()->registered_state->GetOrCreate("pgduckdb"); 104 | auto schemas = &context_state->schemas; 105 | auto it = schemas->find(schema); 106 | if (it == schemas->end()) { 107 | return nullptr; 108 | } 109 | 110 | auto &schema_entry = it->second; 111 | return schema_entry.GetTable(name); 112 | } 113 | case duckdb::CatalogType::SCHEMA_ENTRY: { 114 | return GetSchema(schema); 115 | } 116 | default: 117 | return nullptr; 118 | } 119 | } 120 | 121 | } // namespace pgduckdb 122 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_transaction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/main/client_context.hpp" 4 | #include "duckdb/main/client_context_state.hpp" 5 | #include "duckdb/transaction/transaction.hpp" 6 | #include "pgduckdb/pg/declarations.hpp" 7 | 8 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 9 | 10 | namespace pgduckdb { 11 | 12 | class PostgresCatalog; 13 | class PostgresSchema; 14 | class PostgresTable; 15 | 16 | void ClosePostgresRelations(duckdb::ClientContext &context); 17 | 18 | class SchemaItems { 19 | public: 20 | SchemaItems(duckdb::unique_ptr &&schema, const duckdb::string &name); 21 | 22 | duckdb::optional_ptr GetTable(const duckdb::string &name); 23 | 24 | duckdb::optional_ptr GetSchema() const; 25 | 26 | private: 27 | duckdb::string name; 28 | duckdb::unique_ptr schema; 29 | duckdb::case_insensitive_map_t> tables; 30 | }; 31 | 32 | class PostgresContextState : public duckdb::ClientContextState { 33 | public: 34 | duckdb::case_insensitive_map_t schemas; 35 | void QueryEnd() override; 36 | }; 37 | 38 | class PostgresTransaction : public duckdb::Transaction { 39 | public: 40 | PostgresTransaction(duckdb::TransactionManager &manager, duckdb::ClientContext &context, PostgresCatalog &catalog, 41 | Snapshot snapshot); 42 | ~PostgresTransaction() override; 43 | 44 | duckdb::optional_ptr GetCatalogEntry(duckdb::CatalogType type, const duckdb::string &schema, 45 | const duckdb::string &name); 46 | 47 | private: 48 | duckdb::optional_ptr GetSchema(const duckdb::string &name); 49 | 50 | PostgresCatalog &catalog; 51 | Snapshot snapshot; 52 | }; 53 | 54 | } // namespace pgduckdb 55 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_transaction_manager.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/catalog/pgduckdb_transaction_manager.hpp" 2 | #include "duckdb/main/client_context.hpp" 3 | #include "pgduckdb/catalog/pgduckdb_transaction.hpp" 4 | #include "pgduckdb/pg/snapshots.hpp" 5 | #include "pgduckdb/pgduckdb_process_lock.hpp" 6 | 7 | #include "duckdb/main/attached_database.hpp" 8 | 9 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 10 | 11 | namespace pgduckdb { 12 | 13 | PostgresTransactionManager::PostgresTransactionManager(duckdb::AttachedDatabase &_db_p, PostgresCatalog &_catalog) 14 | : TransactionManager(_db_p), catalog(_catalog) { 15 | } 16 | 17 | duckdb::Transaction & 18 | PostgresTransactionManager::StartTransaction(duckdb::ClientContext &context) { 19 | auto transaction = duckdb::make_uniq(*this, context, catalog, GetActiveSnapshot()); 20 | auto &result = *transaction; 21 | duckdb::lock_guard l(transaction_lock); 22 | transactions[result] = std::move(transaction); 23 | return result; 24 | } 25 | 26 | duckdb::ErrorData 27 | PostgresTransactionManager::CommitTransaction(duckdb::ClientContext &context, duckdb::Transaction &transaction) { 28 | duckdb::lock_guard l(transaction_lock); 29 | ClosePostgresRelations(context); 30 | transactions.erase(transaction); 31 | return duckdb::ErrorData(); 32 | } 33 | 34 | void 35 | PostgresTransactionManager::RollbackTransaction(duckdb::Transaction &transaction) { 36 | duckdb::lock_guard l(transaction_lock); 37 | duckdb::shared_ptr context = transaction.context.lock(); 38 | if (context) { 39 | ClosePostgresRelations(*context); 40 | } 41 | transactions.erase(transaction); 42 | } 43 | 44 | void 45 | PostgresTransactionManager::Checkpoint(duckdb::ClientContext &, bool /*force*/) { 46 | } 47 | 48 | } // namespace pgduckdb 49 | -------------------------------------------------------------------------------- /src/pgduckdb/catalog/pgduckdb_transaction_manager.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/transaction/transaction_manager.hpp" 4 | #include "duckdb/common/reference_map.hpp" 5 | #include "pgduckdb/pg/declarations.hpp" 6 | 7 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 8 | 9 | namespace pgduckdb { 10 | 11 | class PostgresCatalog; 12 | class PostgresTransaction; 13 | 14 | class PostgresTransactionManager : public duckdb::TransactionManager { 15 | public: 16 | PostgresTransactionManager(duckdb::AttachedDatabase &db_p, PostgresCatalog &catalog); 17 | 18 | duckdb::Transaction &StartTransaction(duckdb::ClientContext &context) override; 19 | duckdb::ErrorData CommitTransaction(duckdb::ClientContext &context, duckdb::Transaction &transaction) override; 20 | void RollbackTransaction(duckdb::Transaction &transaction) override; 21 | 22 | void Checkpoint(duckdb::ClientContext &context, bool force = false) override; 23 | 24 | private: 25 | PostgresCatalog &catalog; 26 | duckdb::mutex transaction_lock; 27 | duckdb::reference_map_t> transactions; 28 | }; 29 | 30 | } // namespace pgduckdb 31 | -------------------------------------------------------------------------------- /src/pgduckdb/logger.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pgduckdb/pgduckdb_process_lock.hpp" 4 | 5 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 6 | 7 | extern "C" { 8 | bool errstart(int elevel, const char *domain); 9 | void errfinish(const char *filename, int lineno, const char *funcname); 10 | int errmsg_internal(const char *fmt, ...); 11 | bool message_level_is_interesting(int elevel); 12 | } 13 | 14 | namespace pgduckdb { 15 | 16 | /* PG Error level codes */ 17 | #define DEBUG5 10 18 | #define DEBUG4 11 19 | #define DEBUG3 12 20 | #define DEBUG2 13 21 | #define DEBUG1 14 22 | #define LOG 15 23 | #define INFO 17 24 | #define NOTICE 18 25 | #define WARNING 19 26 | #define PGWARNING 19 27 | #define WARNING_CLIENT_ONLY 20 28 | 29 | // From PG elog.h 30 | #ifdef __GNUC__ 31 | #define pg_attribute_unused() __attribute__((unused)) 32 | #else 33 | #define pg_attribute_unused() 34 | #endif 35 | 36 | #if defined(errno) && defined(__linux__) 37 | #define pd_prevent_errno_in_scope() int __errno_location pg_attribute_unused() 38 | #elif defined(errno) && (defined(__darwin__) || defined(__FreeBSD__)) 39 | #define pd_prevent_errno_in_scope() int __error pg_attribute_unused() 40 | #else 41 | #define pd_prevent_errno_in_scope() 42 | #endif 43 | 44 | #define pd_ereport_domain(elevel, domain, ...) \ 45 | do { \ 46 | pd_prevent_errno_in_scope(); \ 47 | static_assert(elevel >= DEBUG5 && elevel <= WARNING_CLIENT_ONLY, "Invalid error level"); \ 48 | if (message_level_is_interesting(elevel)) { \ 49 | std::lock_guard lock(DuckdbProcessLock::GetLock()); \ 50 | if (errstart(elevel, domain)) \ 51 | __VA_ARGS__, errfinish(__FILE__, __LINE__, __func__); \ 52 | } \ 53 | } while (0) 54 | 55 | #define TEXTDOMAIN NULL 56 | 57 | #define pd_ereport(elevel, ...) pd_ereport_domain(elevel, TEXTDOMAIN, __VA_ARGS__) 58 | 59 | #define pd_log(elevel, ...) pd_ereport(elevel, errmsg_internal(__VA_ARGS__)) 60 | 61 | } // namespace pgduckdb 62 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/declarations.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | This file contains a few Postgres declarations. 7 | 8 | This is meant to be used in files that are mostly C++, but 9 | need to interact with Postgres C code (eg. catalog implementation). 10 | 11 | It should not include any C++ code, only Postgres C declarations. 12 | */ 13 | 14 | extern "C" { 15 | typedef int16_t AttrNumber; 16 | 17 | typedef uint32_t BlockNumber; 18 | 19 | typedef int Buffer; 20 | 21 | struct BufferAccessStrategyData; 22 | typedef struct BufferAccessStrategyData *BufferAccessStrategy; 23 | 24 | typedef double Cardinality; 25 | 26 | typedef uintptr_t Datum; 27 | 28 | struct FormData_pg_attribute; 29 | typedef FormData_pg_attribute *Form_pg_attribute; 30 | 31 | struct FormData_pg_class; 32 | typedef FormData_pg_class *Form_pg_class; 33 | 34 | struct HeapTupleData; 35 | typedef HeapTupleData *HeapTuple; 36 | 37 | struct Node; 38 | 39 | typedef uint16_t OffsetNumber; 40 | 41 | typedef unsigned int Oid; 42 | 43 | struct ParamListInfoData; 44 | typedef struct ParamListInfoData *ParamListInfo; 45 | 46 | struct PlannedStmt; 47 | 48 | typedef char *Pointer; 49 | typedef Pointer Page; 50 | 51 | struct Query; 52 | 53 | struct RelationData; 54 | typedef struct RelationData *Relation; 55 | 56 | struct SnapshotData; 57 | typedef struct SnapshotData *Snapshot; 58 | 59 | struct TupleDescData; 60 | typedef struct TupleDescData *TupleDesc; 61 | 62 | struct TupleTableSlot; 63 | 64 | struct TableAmRoutine; 65 | 66 | typedef uint32_t CommandId; 67 | 68 | typedef uint32_t SubTransactionId; 69 | } 70 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/error_data.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/pg/error_data.hpp" 2 | 3 | extern "C" { 4 | #include "postgres.h" 5 | } 6 | 7 | namespace pgduckdb::pg { 8 | const char * 9 | GetErrorDataMessage(ErrorData *error_data) { 10 | return error_data->message; 11 | } 12 | } // namespace pgduckdb::pg 13 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/error_data.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" { 4 | struct ErrorData; 5 | } 6 | 7 | namespace pgduckdb::pg { 8 | const char *GetErrorDataMessage(ErrorData *error_data); 9 | } 10 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/relations.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/pg/relations.hpp" 2 | 3 | #include "pgduckdb/pgduckdb_utils.hpp" 4 | 5 | extern "C" { 6 | #include "postgres.h" 7 | #include "access/htup_details.h" // GETSTRUCT 8 | #include "access/relation.h" // relation_open and relation_close 9 | #include "catalog/namespace.h" // makeRangeVarFromNameList, RangeVarGetRelid 10 | #include "optimizer/plancat.h" // estimate_rel_size 11 | #include "utils/rel.h" 12 | #include "utils/resowner.h" // CurrentResourceOwner and TopTransactionResourceOwner 13 | #include "utils/syscache.h" // RELOID 14 | } 15 | 16 | namespace pgduckdb { 17 | 18 | #undef RelationGetDescr 19 | 20 | TupleDesc 21 | RelationGetDescr(Relation rel) { 22 | return rel->rd_att; 23 | } 24 | 25 | int 26 | GetTupleDescNatts(const TupleDesc tupleDesc) { 27 | return tupleDesc->natts; 28 | } 29 | 30 | const char * 31 | GetAttName(const Form_pg_attribute att) { 32 | return NameStr(att->attname); 33 | } 34 | 35 | Form_pg_attribute 36 | GetAttr(const TupleDesc tupleDesc, int i) { 37 | return &tupleDesc->attrs[i]; 38 | } 39 | 40 | Relation 41 | OpenRelation(Oid relationId) { 42 | /* 43 | * We always open & close the relation using the 44 | * TopTransactionResourceOwner to avoid having to close the relation 45 | * whenever Postgres switches resource owners, because opening a relation 46 | * with one resource owner and closing it with another is not allowed. 47 | */ 48 | ResourceOwner saveResourceOwner = CurrentResourceOwner; 49 | CurrentResourceOwner = TopTransactionResourceOwner; 50 | auto rel = PostgresFunctionGuard(relation_open, relationId, AccessShareLock); 51 | CurrentResourceOwner = saveResourceOwner; 52 | return rel; 53 | } 54 | 55 | void 56 | CloseRelation(Relation rel) { 57 | /* 58 | * We always open & close the relation using the 59 | * TopTransactionResourceOwner to avoid having to close the relation 60 | * whenever Postgres switches resource owners, because opening a relation 61 | * with one resource owner and closing it with another is not allowed. 62 | */ 63 | ResourceOwner saveResourceOwner = CurrentResourceOwner; 64 | CurrentResourceOwner = TopTransactionResourceOwner; 65 | PostgresFunctionGuard(relation_close, rel, NoLock); 66 | 67 | CurrentResourceOwner = saveResourceOwner; 68 | } 69 | 70 | void 71 | EstimateRelSize(Relation rel, int32_t *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { 72 | PostgresFunctionGuard(estimate_rel_size, rel, attr_widths, pages, tuples, allvisfrac); 73 | } 74 | 75 | Oid 76 | PGGetRelidFromSchemaAndTable(const char *schema_name, const char *entry_name) { 77 | List *name_list = NIL; 78 | name_list = lappend(name_list, makeString(pstrdup(schema_name))); 79 | name_list = lappend(name_list, makeString(pstrdup(entry_name))); 80 | RangeVar *table_range_var = makeRangeVarFromNameList(name_list); 81 | return RangeVarGetRelid(table_range_var, AccessShareLock, true); 82 | } 83 | 84 | Oid 85 | GetRelidFromSchemaAndTable(const char *schema_name, const char *entry_name) { 86 | return PostgresFunctionGuard(PGGetRelidFromSchemaAndTable, schema_name, entry_name); 87 | } 88 | 89 | bool 90 | IsValidOid(Oid oid) { 91 | return oid != InvalidOid; 92 | } 93 | 94 | bool 95 | IsRelView(Relation rel) { 96 | return rel->rd_rel->relkind == RELKIND_VIEW; 97 | } 98 | 99 | bool 100 | IsValidBlockNumber(BlockNumber block_number) { 101 | return block_number != InvalidBlockNumber; 102 | } 103 | 104 | } // namespace pgduckdb 105 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/relations.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pgduckdb/pg/declarations.hpp" 4 | 5 | namespace pgduckdb { 6 | 7 | TupleDesc RelationGetDescr(Relation relation); 8 | 9 | // Not thread-safe. Must be called under a lock. 10 | Relation OpenRelation(Oid relationId); 11 | 12 | // Not thread-safe. Must be called under a lock. 13 | void CloseRelation(Relation relation); 14 | 15 | int GetTupleDescNatts(const TupleDesc tupleDesc); 16 | 17 | const char *GetAttName(const Form_pg_attribute); 18 | 19 | Form_pg_attribute GetAttr(const TupleDesc tupleDesc, int i); 20 | 21 | void EstimateRelSize(Relation rel, int32_t *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac); 22 | 23 | Oid GetRelidFromSchemaAndTable(const char *, const char *); 24 | 25 | bool IsValidOid(Oid); 26 | 27 | bool IsValidBlockNumber(BlockNumber); 28 | 29 | bool IsRelView(Relation); 30 | 31 | } // namespace pgduckdb 32 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/snapshots.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" { 4 | extern Snapshot GetActiveSnapshot(void); 5 | } 6 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/transactions.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/pgduckdb_utils.hpp" 2 | 3 | extern "C" { 4 | #include "postgres.h" 5 | #include "access/xact.h" // RegisterXactCallback, XactEvent, SubXactEvent, SubTransactionId 6 | #include "access/xlog.h" // XactLastRecEnd 7 | } 8 | 9 | namespace pgduckdb::pg { 10 | 11 | bool 12 | DidWalWrites() { 13 | return XactLastRecEnd != InvalidXLogRecPtr; 14 | } 15 | 16 | CommandId 17 | GetCurrentCommandId(bool used = false) { 18 | return PostgresFunctionGuard(::GetCurrentCommandId, used); 19 | } 20 | 21 | bool 22 | IsInTransactionBlock(bool is_top_level) { 23 | return PostgresFunctionGuard(::IsInTransactionBlock, is_top_level); 24 | } 25 | 26 | void 27 | PreventInTransactionBlock(bool is_top_level, const char *statement_type) { 28 | return PostgresFunctionGuard(::PreventInTransactionBlock, is_top_level, statement_type); 29 | } 30 | 31 | void 32 | RegisterXactCallback(XactCallback callback, void *arg) { 33 | return PostgresFunctionGuard(::RegisterXactCallback, callback, arg); 34 | } 35 | 36 | void 37 | UnregisterXactCallback(XactCallback callback, void *arg) { 38 | return PostgresFunctionGuard(::UnregisterXactCallback, callback, arg); 39 | } 40 | 41 | void 42 | RegisterSubXactCallback(SubXactCallback callback, void *arg) { 43 | return PostgresFunctionGuard(::RegisterSubXactCallback, callback, arg); 44 | } 45 | 46 | void 47 | UnregisterSubXactCallback(SubXactCallback callback, void *arg) { 48 | return PostgresFunctionGuard(::UnregisterSubXactCallback, callback, arg); 49 | } 50 | 51 | } // namespace pgduckdb::pg 52 | -------------------------------------------------------------------------------- /src/pgduckdb/pg/transactions.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pgduckdb/pg/declarations.hpp" 4 | 5 | extern "C" { 6 | extern bool IsSubTransaction(void); 7 | 8 | /* 9 | * These enum definitions are vendored in so we can implement a postgres 10 | * XactCallback in C++. It's not expected that these will ever change. 11 | */ 12 | typedef enum { 13 | XACT_EVENT_COMMIT, 14 | XACT_EVENT_PARALLEL_COMMIT, 15 | XACT_EVENT_ABORT, 16 | XACT_EVENT_PARALLEL_ABORT, 17 | XACT_EVENT_PREPARE, 18 | XACT_EVENT_PRE_COMMIT, 19 | XACT_EVENT_PARALLEL_PRE_COMMIT, 20 | XACT_EVENT_PRE_PREPARE, 21 | } XactEvent; 22 | 23 | typedef void (*XactCallback)(XactEvent event, void *arg); 24 | 25 | typedef enum { 26 | SUBXACT_EVENT_START_SUB, 27 | SUBXACT_EVENT_COMMIT_SUB, 28 | SUBXACT_EVENT_ABORT_SUB, 29 | SUBXACT_EVENT_PRE_COMMIT_SUB, 30 | } SubXactEvent; 31 | 32 | typedef void (*SubXactCallback)(SubXactEvent event, SubTransactionId mySubid, SubTransactionId parentSubid, void *arg); 33 | } 34 | 35 | namespace pgduckdb::pg { 36 | bool DidWalWrites(); 37 | CommandId GetCurrentCommandId(bool used = false); 38 | bool IsInTransactionBlock(bool top_level); 39 | void PreventInTransactionBlock(bool is_top_level, const char *statement_type); 40 | void RegisterXactCallback(XactCallback callback, void *arg); 41 | void UnregisterXactCallback(XactCallback callback, void *arg); 42 | void RegisterSubXactCallback(SubXactCallback callback, void *arg); 43 | void UnregisterSubXactCallback(SubXactCallback callback, void *arg); 44 | } // namespace pgduckdb::pg 45 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* Values for the backslash_quote GUC */ 4 | typedef enum { 5 | MOTHERDUCK_OFF, 6 | MOTHERDUCK_ON, 7 | MOTHERDUCK_AUTO, 8 | } MotherDuckEnabled; 9 | 10 | // pgduckdb.c 11 | extern "C" void _PG_init(void); 12 | 13 | // pgduckdb_hooks.c 14 | void DuckdbInitHooks(void); 15 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_background_worker.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | void DuckdbInitBackgroundWorker(void); 4 | 5 | namespace pgduckdb { 6 | 7 | void SyncMotherDuckCatalogsWithPg(bool drop_with_cascade); 8 | extern bool doing_motherduck_sync; 9 | extern char *current_duckdb_database_name; 10 | extern char *current_motherduck_catalog_version; 11 | 12 | } // namespace pgduckdb 13 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_ddl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pgduckdb/pg/declarations.hpp" 4 | 5 | void DuckdbTruncateTable(Oid relation_oid); 6 | void DuckdbInitUtilityHook(); 7 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_detoast.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb.hpp" 2 | 3 | #include "pgduckdb/pgduckdb_types.hpp" 4 | #include "pgduckdb/pgduckdb_utils.hpp" 5 | 6 | extern "C" { 7 | #include "postgres.h" 8 | #include "pg_config.h" 9 | #if PG_VERSION_NUM >= 160000 10 | #include "varatt.h" 11 | #endif 12 | 13 | #ifdef USE_LZ4 14 | #include 15 | #endif 16 | 17 | #include "access/detoast.h" 18 | #include "access/table.h" 19 | #include "access/tableam.h" 20 | #include "access/toast_internals.h" 21 | #include "common/pg_lzcompress.h" 22 | #include "utils/expandeddatum.h" 23 | } 24 | 25 | #include "pgduckdb/pgduckdb_process_lock.hpp" 26 | #include "pgduckdb/pgduckdb_detoast.hpp" 27 | 28 | /* 29 | * Following functions are direct logic found in postgres code but for duckdb execution they are needed to be thread 30 | * safe. Functions as palloc/pfree are exchanged with duckdb_malloc/duckdb_free. Access to toast table is protected with 31 | * lock also for thread safe reasons. This is initial implementation but should be revisisted in future for better 32 | * performances. 33 | */ 34 | 35 | namespace pgduckdb { 36 | 37 | struct varlena * 38 | PglzDecompressDatum(const struct varlena *value) { 39 | struct varlena *result = (struct varlena *)duckdb_malloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); 40 | 41 | int32 raw_size = pglz_decompress((char *)value + VARHDRSZ_COMPRESSED, VARSIZE(value) - VARHDRSZ_COMPRESSED, 42 | VARDATA(result), VARDATA_COMPRESSED_GET_EXTSIZE(value), true); 43 | if (raw_size < 0) { 44 | throw duckdb::InvalidInputException("(PGDuckDB/PglzDecompressDatum) Compressed pglz data is corrupt"); 45 | } 46 | 47 | SET_VARSIZE(result, raw_size + VARHDRSZ); 48 | 49 | return result; 50 | } 51 | 52 | struct varlena * 53 | Lz4DecompresDatum(const struct varlena *value) { 54 | #ifndef USE_LZ4 55 | return NULL; /* keep compiler quiet */ 56 | #else 57 | struct varlena *result = (struct varlena *)duckdb_malloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); 58 | 59 | int32 raw_size = LZ4_decompress_safe((char *)value + VARHDRSZ_COMPRESSED, VARDATA(result), 60 | VARSIZE(value) - VARHDRSZ_COMPRESSED, VARDATA_COMPRESSED_GET_EXTSIZE(value)); 61 | if (raw_size < 0) { 62 | throw duckdb::InvalidInputException("(PGDuckDB/Lz4DecompresDatum) Compressed lz4 data is corrupt"); 63 | } 64 | 65 | SET_VARSIZE(result, raw_size + VARHDRSZ); 66 | 67 | return result; 68 | #endif 69 | } 70 | 71 | static struct varlena * 72 | ToastDecompressDatum(struct varlena *attr) { 73 | ToastCompressionId cmid = (ToastCompressionId)TOAST_COMPRESS_METHOD(attr); 74 | switch (cmid) { 75 | case TOAST_PGLZ_COMPRESSION_ID: 76 | return PglzDecompressDatum(attr); 77 | case TOAST_LZ4_COMPRESSION_ID: 78 | return Lz4DecompresDatum(attr); 79 | default: 80 | throw duckdb::InvalidInputException("(PGDuckDB/ToastDecompressDatum) Invalid compression method id %d", 81 | TOAST_COMPRESS_METHOD(attr)); 82 | return NULL; /* keep compiler quiet */ 83 | } 84 | } 85 | 86 | bool 87 | table_relation_fetch_toast_slice(const struct varatt_external &toast_pointer, int32 attrsize, struct varlena *result) { 88 | Relation toast_rel = try_table_open(toast_pointer.va_toastrelid, AccessShareLock); 89 | 90 | if (toast_rel == NULL) { 91 | return false; 92 | } 93 | 94 | table_relation_fetch_toast_slice(toast_rel, toast_pointer.va_valueid, attrsize, 0, attrsize, result); 95 | 96 | table_close(toast_rel, AccessShareLock); 97 | return true; 98 | } 99 | 100 | static struct varlena * 101 | ToastFetchDatum(struct varlena *attr) { 102 | if (!VARATT_IS_EXTERNAL_ONDISK(attr)) { 103 | throw duckdb::InvalidInputException("(PGDuckDB/ToastFetchDatum) Shouldn't be called for non-ondisk datums"); 104 | } 105 | 106 | /* Must copy to access aligned fields */ 107 | struct varatt_external toast_pointer; 108 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); 109 | 110 | int32 attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); 111 | 112 | struct varlena *result = (struct varlena *)duckdb_malloc(attrsize + VARHDRSZ); 113 | 114 | #pragma GCC diagnostic push 115 | #pragma GCC diagnostic ignored \ 116 | "-Wsign-compare" // Ignore sign comparison warning that VARATT_EXTERNAL_IS_COMPRESSED generatess 117 | if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { 118 | #pragma GCC diagnostic pop 119 | SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ); 120 | } else { 121 | SET_VARSIZE(result, attrsize + VARHDRSZ); 122 | } 123 | 124 | if (attrsize == 0) { 125 | return result; 126 | } 127 | 128 | std::lock_guard lock(DuckdbProcessLock::GetLock()); 129 | 130 | if (!PostgresFunctionGuard(table_relation_fetch_toast_slice, toast_pointer, attrsize, result)) { 131 | duckdb_free(result); 132 | throw duckdb::InternalException("(PGDuckDB/ToastFetchDatum) Error toast relation is NULL"); 133 | } 134 | 135 | return result; 136 | } 137 | 138 | Datum 139 | DetoastPostgresDatum(struct varlena *attr, bool *should_free) { 140 | struct varlena *toasted_value = nullptr; 141 | *should_free = true; 142 | if (VARATT_IS_EXTERNAL_ONDISK(attr)) { 143 | toasted_value = ToastFetchDatum(attr); 144 | if (VARATT_IS_COMPRESSED(toasted_value)) { 145 | struct varlena *tmp = toasted_value; 146 | toasted_value = ToastDecompressDatum(tmp); 147 | duckdb_free(tmp); 148 | } 149 | } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { 150 | struct varatt_indirect redirect; 151 | VARATT_EXTERNAL_GET_POINTER(redirect, attr); 152 | toasted_value = (struct varlena *)redirect.pointer; 153 | toasted_value = reinterpret_cast(DetoastPostgresDatum(attr, should_free)); 154 | if (attr == (struct varlena *)redirect.pointer) { 155 | struct varlena *result; 156 | result = (struct varlena *)(VARSIZE_ANY(attr)); 157 | memcpy(result, attr, VARSIZE_ANY(attr)); 158 | toasted_value = result; 159 | } 160 | } else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) { 161 | ExpandedObjectHeader *eoh; 162 | Size resultsize; 163 | eoh = DatumGetEOHP(PointerGetDatum(attr)); 164 | resultsize = EOH_get_flat_size(eoh); 165 | toasted_value = (struct varlena *)duckdb_malloc(resultsize); 166 | EOH_flatten_into(eoh, (void *)toasted_value, resultsize); 167 | } else if (VARATT_IS_COMPRESSED(attr)) { 168 | toasted_value = ToastDecompressDatum(attr); 169 | } else if (VARATT_IS_SHORT(attr)) { 170 | Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; 171 | Size new_size = data_size + VARHDRSZ; 172 | toasted_value = (struct varlena *)duckdb_malloc(new_size); 173 | SET_VARSIZE(toasted_value, new_size); 174 | memcpy(VARDATA(toasted_value), VARDATA_SHORT(attr), data_size); 175 | } else { 176 | toasted_value = attr; 177 | *should_free = false; 178 | } 179 | 180 | return reinterpret_cast(toasted_value); 181 | } 182 | 183 | } // namespace pgduckdb 184 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_detoast.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" { 4 | #include "postgres.h" 5 | } 6 | 7 | namespace pgduckdb { 8 | 9 | Datum DetoastPostgresDatum(struct varlena *value, bool *should_free); 10 | 11 | } // namespace pgduckdb 12 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_duckdb.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | namespace pgduckdb { 6 | 7 | namespace ddb { 8 | bool DidWrites(); 9 | bool DidWrites(duckdb::ClientContext &context); 10 | } // namespace ddb 11 | 12 | class DuckDBManager { 13 | public: 14 | static inline bool 15 | IsInitialized() { 16 | return manager_instance.database != nullptr; 17 | } 18 | 19 | static inline DuckDBManager & 20 | Get() { 21 | if (!manager_instance.database) { 22 | manager_instance.Initialize(); 23 | } 24 | return manager_instance; 25 | } 26 | 27 | static duckdb::unique_ptr CreateConnection(); 28 | static duckdb::Connection *GetConnection(bool force_transaction = false); 29 | static duckdb::Connection *GetConnectionUnsafe(); 30 | 31 | inline const std::string & 32 | GetDefaultDBName() const { 33 | return default_dbname; 34 | } 35 | 36 | void 37 | Reset() { 38 | connection = nullptr; 39 | delete database; 40 | database = nullptr; 41 | } 42 | 43 | private: 44 | DuckDBManager(); 45 | static DuckDBManager manager_instance; 46 | 47 | void Initialize(); 48 | 49 | void InitializeDatabase(); 50 | 51 | void LoadSecrets(duckdb::ClientContext &); 52 | void DropSecrets(duckdb::ClientContext &); 53 | void LoadExtensions(duckdb::ClientContext &); 54 | void LoadFunctions(duckdb::ClientContext &); 55 | void RefreshConnectionState(duckdb::ClientContext &); 56 | 57 | inline bool 58 | IsSecretSeqLessThan(int64_t seq) const { 59 | return secret_table_current_seq < seq; 60 | } 61 | 62 | inline bool 63 | IsExtensionsSeqLessThan(int64_t seq) const { 64 | return extensions_table_current_seq < seq; 65 | } 66 | 67 | inline void 68 | UpdateSecretSeq(int64_t seq) { 69 | secret_table_current_seq = seq; 70 | } 71 | 72 | inline void 73 | UpdateExtensionsSeq(int64_t seq) { 74 | extensions_table_current_seq = seq; 75 | } 76 | 77 | int secret_table_num_rows; 78 | int64_t secret_table_current_seq; 79 | int64_t extensions_table_current_seq; 80 | /* 81 | * FIXME: Use a unique_ptr instead of a raw pointer. For now this is not 82 | * possible though, as the MotherDuck extension causes an ABORT when the 83 | * DuckDB database its destructor is run at the exit of the process. This 84 | * then in turn crashes Postgres, which we obviously dont't want. Not 85 | * running the destructor also doesn't really have any downsides, as the 86 | * process is going to die anyway. It's probably even a tiny bit more 87 | * efficient not to run the destructor at all. But we should still fix 88 | * this, because running the destructor is a good way to find bugs (such 89 | * as the one reported in #279). 90 | */ 91 | duckdb::DuckDB *database; 92 | duckdb::unique_ptr connection; 93 | std::string default_dbname; 94 | }; 95 | 96 | std::string CreateOrGetDirectoryPath(const char *directory_name); 97 | 98 | } // namespace pgduckdb 99 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_filter.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb.hpp" 2 | #include "duckdb/planner/filter/constant_filter.hpp" 3 | #include "pgduckdb/pgduckdb_types.hpp" 4 | 5 | extern "C" { 6 | #include "postgres.h" 7 | #include "catalog/pg_type.h" 8 | #include "utils/builtins.h" 9 | #include "utils/date.h" 10 | #include "utils/timestamp.h" 11 | #if PG_VERSION_NUM >= 160000 12 | #include "varatt.h" 13 | #endif 14 | } 15 | 16 | #include "pgduckdb/pgduckdb_filter.hpp" 17 | #include "pgduckdb/pgduckdb_detoast.hpp" 18 | 19 | namespace pgduckdb { 20 | 21 | template 22 | bool 23 | TemplatedFilterOperation(const T &value, const duckdb::Value &constant) { 24 | return OP::Operation(value, constant.GetValueUnsafe()); 25 | } 26 | 27 | template 28 | bool 29 | StringFilterOperation(const Datum &value, const duckdb::Value &constant, bool is_bpchar) { 30 | if (value == (Datum)0 || constant.IsNull()) { 31 | return false; // Comparison to NULL always returns false. 32 | } 33 | 34 | bool should_free = false; 35 | const auto detoasted_value = DetoastPostgresDatum(reinterpret_cast(value), &should_free); 36 | 37 | /* bpchar adds zero padding so we need to read true len of bpchar */ 38 | auto detoasted_val_len = is_bpchar ? bpchartruelen(VARDATA_ANY(detoasted_value), VARSIZE_ANY_EXHDR(detoasted_value)) 39 | : VARSIZE_ANY_EXHDR(detoasted_value); 40 | 41 | const auto datum_sv = std::string_view((const char *)VARDATA_ANY(detoasted_value), detoasted_val_len); 42 | const auto val = duckdb::StringValue::Get(constant); 43 | const auto val_sv = std::string_view(val); 44 | const bool res = OP::Operation(datum_sv, val_sv); 45 | 46 | if (should_free) { 47 | duckdb_free(reinterpret_cast(detoasted_value)); 48 | } 49 | return res; 50 | } 51 | 52 | template 53 | static bool 54 | FilterOperationSwitch(const Datum &value, const duckdb::Value &constant, Oid type_oid) { 55 | switch (type_oid) { 56 | case BOOLOID: 57 | return TemplatedFilterOperation(DatumGetBool(value), constant); 58 | case CHAROID: 59 | return TemplatedFilterOperation(DatumGetChar(value), constant); 60 | case INT2OID: 61 | return TemplatedFilterOperation(DatumGetInt16(value), constant); 62 | case INT4OID: 63 | return TemplatedFilterOperation(DatumGetInt32(value), constant); 64 | case INT8OID: 65 | return TemplatedFilterOperation(DatumGetInt64(value), constant); 66 | case FLOAT4OID: 67 | return TemplatedFilterOperation(DatumGetFloat4(value), constant); 68 | case FLOAT8OID: 69 | return TemplatedFilterOperation(DatumGetFloat8(value), constant); 70 | case DATEOID: { 71 | int32_t date = DatumGetDateADT(value) + pgduckdb::PGDUCKDB_DUCK_DATE_OFFSET; 72 | return TemplatedFilterOperation(date, constant); 73 | } 74 | case TIMESTAMPOID: { 75 | int64_t timestamp = DatumGetTimestamp(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET; 76 | return TemplatedFilterOperation(timestamp, constant); 77 | } 78 | case TIMESTAMPTZOID: { 79 | int64_t timestamptz = DatumGetTimestampTz(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET; 80 | return TemplatedFilterOperation(timestamptz, constant); 81 | } 82 | case BPCHAROID: 83 | case TEXTOID: 84 | case VARCHAROID: 85 | return StringFilterOperation(value, constant, type_oid == BPCHAROID); 86 | case BYTEAOID: 87 | return StringFilterOperation(value, constant, false); 88 | default: 89 | throw duckdb::InvalidTypeException( 90 | duckdb::string("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: " + std::to_string(type_oid))); 91 | } 92 | } 93 | 94 | bool 95 | ApplyValueFilter(const duckdb::TableFilter &filter, const Datum &value, bool is_null, Oid type_oid) { 96 | switch (filter.filter_type) { 97 | case duckdb::TableFilterType::CONJUNCTION_AND: { 98 | const auto &conjunction = filter.Cast(); 99 | for (const auto &child_filter : conjunction.child_filters) { 100 | if (!ApplyValueFilter(*child_filter, value, is_null, type_oid)) { 101 | return false; 102 | } 103 | } 104 | return true; 105 | } 106 | case duckdb::TableFilterType::CONSTANT_COMPARISON: { 107 | auto &constant_filter = filter.Cast(); 108 | switch (constant_filter.comparison_type) { 109 | case duckdb::ExpressionType::COMPARE_EQUAL: 110 | return FilterOperationSwitch(value, constant_filter.constant, type_oid); 111 | case duckdb::ExpressionType::COMPARE_LESSTHAN: 112 | return FilterOperationSwitch(value, constant_filter.constant, type_oid); 113 | case duckdb::ExpressionType::COMPARE_LESSTHANOREQUALTO: 114 | return FilterOperationSwitch(value, constant_filter.constant, type_oid); 115 | case duckdb::ExpressionType::COMPARE_GREATERTHAN: 116 | return FilterOperationSwitch(value, constant_filter.constant, type_oid); 117 | case duckdb::ExpressionType::COMPARE_GREATERTHANOREQUALTO: 118 | return FilterOperationSwitch(value, constant_filter.constant, type_oid); 119 | default: 120 | D_ASSERT(0); 121 | } 122 | break; 123 | } 124 | case duckdb::TableFilterType::IS_NOT_NULL: 125 | return is_null == false; 126 | case duckdb::TableFilterType::IS_NULL: 127 | return is_null == true; 128 | default: 129 | D_ASSERT(0); 130 | break; 131 | } 132 | } 133 | 134 | } // namespace pgduckdb 135 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_filter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | extern "C" { 6 | #include "postgres.h" 7 | } 8 | 9 | namespace pgduckdb { 10 | 11 | bool ApplyValueFilter(const duckdb::TableFilter &filter, const Datum &value, bool is_null, Oid type_oid); 12 | 13 | } // namespace pgduckdb 14 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_guc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern bool duckdb_force_execution; 4 | extern int duckdb_maximum_threads; 5 | extern char *duckdb_maximum_memory; 6 | extern char *duckdb_disabled_filesystems; 7 | extern bool duckdb_enable_external_access; 8 | extern bool duckdb_allow_unsigned_extensions; 9 | extern bool duckdb_autoinstall_known_extensions; 10 | extern bool duckdb_autoload_known_extensions; 11 | extern int duckdb_max_threads_per_postgres_scan; 12 | extern char *duckdb_motherduck_postgres_database; 13 | extern int duckdb_motherduck_enabled; 14 | extern char *duckdb_motherduck_token; 15 | extern char *duckdb_postgres_role; 16 | extern char *duckdb_motherduck_default_database; 17 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_metadata_cache.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pgduckdb/pg/declarations.hpp" 4 | 5 | namespace pgduckdb { 6 | bool IsExtensionRegistered(); 7 | bool IsDuckdbOnlyFunction(Oid function_oid); 8 | uint64 CacheVersion(); 9 | Oid ExtensionOid(); 10 | Oid DuckdbTableAmOid(); 11 | bool IsMotherDuckEnabled(); 12 | bool IsMotherDuckEnabledAnywhere(); 13 | bool IsMotherDuckPostgresDatabase(); 14 | Oid MotherDuckPostgresUser(); 15 | Oid IsDuckdbTable(Form_pg_class relation); 16 | Oid IsDuckdbTable(Relation relation); 17 | Oid IsMotherDuckTable(Form_pg_class relation); 18 | Oid IsMotherDuckTable(Relation relation); 19 | Oid IsDuckdbExecutionAllowed(); 20 | } // namespace pgduckdb 21 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_node.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" { 4 | #include "postgres.h" 5 | #include "nodes/extensible.h" 6 | } 7 | 8 | extern CustomScanMethods duckdb_scan_scan_methods; 9 | extern "C" void DuckdbInitNode(void); 10 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_options.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace pgduckdb { 7 | 8 | /* constants for duckdb.secrets */ 9 | #define Natts_duckdb_secret 11 10 | #define Anum_duckdb_secret_name 1 11 | #define Anum_duckdb_secret_type 2 12 | #define Anum_duckdb_secret_key_id 3 13 | #define Anum_duckdb_secret_secret 4 14 | #define Anum_duckdb_secret_region 5 15 | #define Anum_duckdb_secret_session_token 6 16 | #define Anum_duckdb_secret_endpoint 7 17 | #define Anum_duckdb_secret_r2_account_id 8 18 | #define Anum_duckdb_secret_use_ssl 9 19 | #define Anum_duckdb_secret_scope 10 20 | #define Anum_duckdb_secret_connection_string 11 21 | 22 | enum SecretType { S3, R2, GCS, AZURE }; 23 | 24 | typedef struct DuckdbSecret { 25 | std::string name; 26 | SecretType type; 27 | std::string key_id; 28 | std::string secret; 29 | std::string region; 30 | std::string session_token; 31 | std::string endpoint; 32 | std::string r2_account_id; 33 | bool use_ssl; 34 | std::string scope; 35 | std::string connection_string; // Used for Azure 36 | } DuckdbSecret; 37 | 38 | std::string SecretTypeToString(SecretType type); 39 | 40 | extern std::vector ReadDuckdbSecrets(); 41 | 42 | /* constants for duckdb.extensions */ 43 | #define Natts_duckdb_extension 2 44 | #define Anum_duckdb_extension_name 1 45 | #define Anum_duckdb_extension_enable 2 46 | 47 | typedef struct DuckdbExension { 48 | std::string name; 49 | bool enabled; 50 | } DuckdbExtension; 51 | 52 | extern std::vector ReadDuckdbExtensions(); 53 | 54 | } // namespace pgduckdb 55 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_planner.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/pgduckdb_planner.hpp" 2 | 3 | #include "duckdb.hpp" 4 | 5 | #include "pgduckdb/catalog/pgduckdb_transaction.hpp" 6 | #include "pgduckdb/scan/postgres_scan.hpp" 7 | #include "pgduckdb/pgduckdb_types.hpp" 8 | 9 | extern "C" { 10 | #include "postgres.h" 11 | #include "access/xact.h" 12 | #include "catalog/pg_type.h" 13 | #include "nodes/makefuncs.h" 14 | #include "nodes/nodes.h" 15 | #include "nodes/params.h" 16 | #include "optimizer/optimizer.h" 17 | #include "optimizer/planner.h" 18 | #include "optimizer/planmain.h" 19 | #include "tcop/pquery.h" 20 | #include "utils/syscache.h" 21 | #include "utils/guc.h" 22 | 23 | #include "pgduckdb/pgduckdb_ruleutils.h" 24 | } 25 | 26 | #include "pgduckdb/pgduckdb_duckdb.hpp" 27 | #include "pgduckdb/pgduckdb_node.hpp" 28 | #include "pgduckdb/vendor/pg_list.hpp" 29 | #include "pgduckdb/utility/cpp_wrapper.hpp" 30 | #include "pgduckdb/pgduckdb_types.hpp" 31 | 32 | bool duckdb_explain_analyze = false; 33 | 34 | duckdb::unique_ptr 35 | DuckdbPrepare(const Query *query) { 36 | Query *copied_query = (Query *)copyObjectImpl(query); 37 | const char *query_string = pgduckdb_get_querydef(copied_query); 38 | 39 | if (ActivePortal && ActivePortal->commandTag == CMDTAG_EXPLAIN) { 40 | if (duckdb_explain_analyze) { 41 | query_string = psprintf("EXPLAIN ANALYZE %s", query_string); 42 | } else { 43 | query_string = psprintf("EXPLAIN %s", query_string); 44 | } 45 | } 46 | 47 | elog(DEBUG2, "(PGDuckDB/DuckdbPrepare) Preparing: %s", query_string); 48 | 49 | auto con = pgduckdb::DuckDBManager::GetConnection(); 50 | auto prepared_query = con->context->Prepare(query_string); 51 | return prepared_query; 52 | } 53 | 54 | static Plan * 55 | CreatePlan(Query *query, bool throw_error) { 56 | int elevel = throw_error ? ERROR : WARNING; 57 | /* 58 | * Prepare the query, se we can get the returned types and column names. 59 | */ 60 | 61 | duckdb::unique_ptr prepared_query = DuckdbPrepare(query); 62 | 63 | if (prepared_query->HasError()) { 64 | elog(elevel, "(PGDuckDB/CreatePlan) Prepared query returned an error: '%s", prepared_query->GetError().c_str()); 65 | return nullptr; 66 | } 67 | 68 | CustomScan *duckdb_node = makeNode(CustomScan); 69 | 70 | auto &prepared_result_types = prepared_query->GetTypes(); 71 | 72 | for (size_t i = 0; i < prepared_result_types.size(); i++) { 73 | auto &column = prepared_result_types[i]; 74 | Oid postgresColumnOid = pgduckdb::GetPostgresDuckDBType(column); 75 | 76 | if (!OidIsValid(postgresColumnOid)) { 77 | elog(elevel, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid); 78 | return nullptr; 79 | } 80 | 81 | HeapTuple tp; 82 | Form_pg_type typtup; 83 | 84 | tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(postgresColumnOid)); 85 | if (!HeapTupleIsValid(tp)) { 86 | elog(elevel, "(PGDuckDB/CreatePlan) Cache lookup failed for type %u", postgresColumnOid); 87 | return nullptr; 88 | } 89 | 90 | typtup = (Form_pg_type)GETSTRUCT(tp); 91 | 92 | /* We fill in the varno later, once we know the index of the custom RTE 93 | * that we create. We'll know this at the end of DuckdbPlanNode. This 94 | * can probably be simplified when we don't call the standard_planner 95 | * anymore inside DuckdbPlanNode, because then we only need a single 96 | * RTE. */ 97 | Var *var = makeVar(0, i + 1, postgresColumnOid, typtup->typtypmod, typtup->typcollation, 0); 98 | 99 | TargetEntry *target_entry = 100 | makeTargetEntry((Expr *)var, i + 1, (char *)pstrdup(prepared_query->GetNames()[i].c_str()), false); 101 | 102 | /* Our custom scan node needs the custom_scan_tlist to be set */ 103 | duckdb_node->custom_scan_tlist = lappend(duckdb_node->custom_scan_tlist, copyObjectImpl(target_entry)); 104 | 105 | /* For the plan its targetlist we use INDEX_VAR as the varno, which 106 | * means it references our custom_scan_tlist. */ 107 | var->varno = INDEX_VAR; 108 | 109 | /* But we also need an actual target list, because Postgres expects it 110 | * for things like materialization */ 111 | duckdb_node->scan.plan.targetlist = lappend(duckdb_node->scan.plan.targetlist, target_entry); 112 | 113 | ReleaseSysCache(tp); 114 | } 115 | 116 | duckdb_node->custom_private = list_make1(query); 117 | duckdb_node->methods = &duckdb_scan_scan_methods; 118 | 119 | return (Plan *)duckdb_node; 120 | } 121 | 122 | /* Creates a matching RangeTblEntry for the given CustomScan node */ 123 | static RangeTblEntry * 124 | DuckdbRangeTableEntry(CustomScan *custom_scan) { 125 | List *column_names = NIL; 126 | foreach_node(TargetEntry, target_entry, custom_scan->scan.plan.targetlist) { 127 | column_names = lappend(column_names, makeString(target_entry->resname)); 128 | } 129 | RangeTblEntry *rte = makeNode(RangeTblEntry); 130 | 131 | /* We need to choose an RTE kind here. RTE_RELATION does not work due to 132 | * various asserts that fail due to us not setting some of the fields on 133 | * the entry. Instead of filling those fields in with dummy values we use 134 | * RTE_NAMEDTUPLESTORE, for which no special fields exist. */ 135 | rte->rtekind = RTE_NAMEDTUPLESTORE; 136 | rte->eref = makeAlias("duckdb_scan", column_names); 137 | rte->inFromCl = true; 138 | 139 | return rte; 140 | } 141 | 142 | PlannedStmt * 143 | DuckdbPlanNode(Query *parse, const char *query_string, int cursor_options, ParamListInfo bound_params, 144 | bool throw_error) { 145 | /* We need to check can we DuckDB create plan */ 146 | 147 | Plan *duckdb_plan = InvokeCPPFunc(CreatePlan, parse, throw_error); 148 | CustomScan *custom_scan = castNode(CustomScan, duckdb_plan); 149 | 150 | if (!duckdb_plan) { 151 | return nullptr; 152 | } 153 | 154 | /* 155 | * If creating a plan for a scrollable cursor add a Material node at the 156 | * top because or CustomScan does not support backwards scanning. 157 | */ 158 | if (cursor_options & CURSOR_OPT_SCROLL) { 159 | duckdb_plan = materialize_finished_plan(duckdb_plan); 160 | } 161 | 162 | /* 163 | * We let postgres generate a basic plan, but then completely overwrite the 164 | * actual plan with our CustomScan node. This is useful to get the correct 165 | * values for all the other many fields of the PLannedStmt. 166 | * 167 | * XXX: The primary reason we do this is that Postgres fills in permInfos 168 | * and rtable correctly. Those are needed for postgres to do its permission 169 | * checks on the used tables. 170 | * 171 | * FIXME: For some reason this needs an additional query copy to allow 172 | * re-planning of the query later during execution. But I don't really 173 | * understand why this is needed. 174 | */ 175 | Query *copied_query = (Query *)copyObjectImpl(parse); 176 | PlannedStmt *postgres_plan = standard_planner(copied_query, query_string, cursor_options, bound_params); 177 | 178 | postgres_plan->planTree = duckdb_plan; 179 | 180 | /* Put a DuckdDB RTE at the end of the rtable */ 181 | RangeTblEntry *rte = DuckdbRangeTableEntry(custom_scan); 182 | postgres_plan->rtable = lappend(postgres_plan->rtable, rte); 183 | 184 | /* Update the varno of the Var nodes in the custom_scan_tlist, to point to 185 | * our new RTE. This should not be necessary anymore when we stop relying 186 | * on the standard_planner here. */ 187 | foreach_node(TargetEntry, target_entry, custom_scan->custom_scan_tlist) { 188 | Var *var = castNode(Var, target_entry->expr); 189 | 190 | var->varno = list_length(postgres_plan->rtable); 191 | } 192 | 193 | return postgres_plan; 194 | } 195 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_planner.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | #include "pgduckdb/pg/declarations.hpp" 6 | #include "pgduckdb/pgduckdb_duckdb.hpp" 7 | 8 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 9 | 10 | extern bool duckdb_explain_analyze; 11 | 12 | PlannedStmt *DuckdbPlanNode(Query *parse, const char *query_string, int cursor_options, ParamListInfo bound_params, 13 | bool throw_error); 14 | duckdb::unique_ptr DuckdbPrepare(const Query *query); 15 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_process_lock.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace pgduckdb { 6 | 7 | /* 8 | * DuckdbProcessLock is used to synchronize calls to PG functions that modify global variables. Examples 9 | * for this synchronization are functions that read buffers/etc. This lock is shared between all threads and all 10 | * replacement scans. 11 | */ 12 | struct DuckdbProcessLock { 13 | public: 14 | static std::mutex & 15 | GetLock() { 16 | static std::mutex lock; 17 | return lock; 18 | } 19 | }; 20 | 21 | } // namespace pgduckdb 22 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_ruleutils.h: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | 3 | char *pgduckdb_relation_name(Oid relid); 4 | char *pgduckdb_function_name(Oid function_oid); 5 | char *pgduckdb_get_querydef(Query *); 6 | char *pgduckdb_get_tabledef(Oid relation_id); 7 | bool pgduckdb_is_not_default_expr(Node *node, void *context); 8 | List *pgduckdb_db_and_schema(const char *postgres_schema_name, bool is_duckdb_table, bool is_columnstore_table); 9 | const char *pgduckdb_db_and_schema_string(const char *postgres_schema_name, bool is_duckdb_table, 10 | bool is_columnstore_table); 11 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_table_am.hpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/pg/declarations.hpp" 2 | 3 | namespace pgduckdb { 4 | bool IsDuckdbTableAm(const TableAmRoutine *am); 5 | } 6 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/types/data_chunk.hpp" 4 | #include "pgduckdb/pg/declarations.hpp" 5 | 6 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 7 | 8 | namespace pgduckdb { 9 | 10 | class PostgresScanGlobalState; 11 | class PostgresScanLocalState; 12 | 13 | // DuckDB has date starting from 1/1/1970 while PG starts from 1/1/2000 14 | constexpr int32_t PGDUCKDB_DUCK_DATE_OFFSET = 10957; 15 | constexpr int64_t PGDUCKDB_DUCK_TIMESTAMP_OFFSET = 16 | static_cast(PGDUCKDB_DUCK_DATE_OFFSET) * static_cast(86400000000) /* USECS_PER_DAY */; 17 | 18 | duckdb::LogicalType ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute); 19 | Oid GetPostgresDuckDBType(const duckdb::LogicalType &type); 20 | int32_t GetPostgresDuckDBTypemod(const duckdb::LogicalType &type); 21 | duckdb::Value ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type); 22 | void ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, uint64_t offset); 23 | bool ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, uint64_t col); 24 | void InsertTupleIntoChunk(duckdb::DataChunk &output, duckdb::shared_ptr scan_global_state, 25 | duckdb::shared_ptr scan_local_state, HeapTupleData *tuple); 26 | 27 | } // namespace pgduckdb 28 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_utils.cpp: -------------------------------------------------------------------------------- 1 | #include "pgduckdb/pgduckdb_utils.hpp" 2 | 3 | namespace pgduckdb { 4 | 5 | duckdb::unique_ptr 6 | DuckDBQueryOrThrow(duckdb::ClientContext &context, const std::string &query) { 7 | auto res = context.Query(query, false); 8 | if (res->HasError()) { 9 | res->ThrowError(); 10 | } 11 | return res; 12 | } 13 | 14 | duckdb::unique_ptr 15 | DuckDBQueryOrThrow(duckdb::Connection &connection, const std::string &query) { 16 | return DuckDBQueryOrThrow(*connection.context, query); 17 | } 18 | 19 | duckdb::unique_ptr 20 | DuckDBQueryOrThrow(const std::string &query) { 21 | auto connection = pgduckdb::DuckDBManager::GetConnection(); 22 | return DuckDBQueryOrThrow(*connection, query); 23 | } 24 | 25 | } // namespace pgduckdb 26 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/exception.hpp" 4 | #include "duckdb/common/error_data.hpp" 5 | #include "pgduckdb/pgduckdb_duckdb.hpp" 6 | #include "pgduckdb/pg/error_data.hpp" 7 | #include "pgduckdb/logger.hpp" 8 | 9 | #include 10 | 11 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 12 | 13 | extern "C" { 14 | // Note: these forward-declarations could live in a header under the `pg/` folder. 15 | // But since they are (hopefully) only used in this file, we keep them here. 16 | struct ErrorContextCallback; 17 | struct MemoryContextData; 18 | 19 | typedef struct MemoryContextData *MemoryContext; 20 | 21 | extern sigjmp_buf *PG_exception_stack; 22 | extern MemoryContext CurrentMemoryContext; 23 | extern ErrorContextCallback *error_context_stack; 24 | extern ErrorData *CopyErrorData(); 25 | extern void FlushErrorState(); 26 | } 27 | 28 | namespace pgduckdb { 29 | 30 | struct PgExceptionGuard { 31 | PgExceptionGuard() : _save_exception_stack(PG_exception_stack), _save_context_stack(error_context_stack) { 32 | } 33 | 34 | ~PgExceptionGuard() noexcept { 35 | RestoreStacks(); 36 | } 37 | 38 | void 39 | RestoreStacks() const noexcept { 40 | PG_exception_stack = _save_exception_stack; 41 | error_context_stack = _save_context_stack; 42 | } 43 | 44 | sigjmp_buf *_save_exception_stack; 45 | ErrorContextCallback *_save_context_stack; 46 | }; 47 | 48 | /* 49 | * DuckdbGlobalLock should be held before calling. 50 | */ 51 | template 52 | typename std::invoke_result::type 53 | __PostgresFunctionGuard__(const char *func_name, FuncArgs... args) { 54 | MemoryContext ctx = CurrentMemoryContext; 55 | ErrorData *edata = nullptr; 56 | { // Scope for PG_END_TRY 57 | PgExceptionGuard g; 58 | sigjmp_buf _local_sigjmp_buf; 59 | if (sigsetjmp(_local_sigjmp_buf, 0) == 0) { 60 | PG_exception_stack = &_local_sigjmp_buf; 61 | return func(std::forward(args)...); 62 | } else { 63 | g.RestoreStacks(); 64 | CurrentMemoryContext = ctx; 65 | edata = CopyErrorData(); 66 | FlushErrorState(); 67 | } 68 | } // PG_END_TRY(); 69 | 70 | auto message = duckdb::StringUtil::Format("(PGDuckDB/%s) %s", func_name, pg::GetErrorDataMessage(edata)); 71 | throw duckdb::Exception(duckdb::ExceptionType::EXECUTOR, message); 72 | } 73 | 74 | #define PostgresFunctionGuard(FUNC, ...) \ 75 | pgduckdb::__PostgresFunctionGuard__(__func__, __VA_ARGS__) 76 | 77 | duckdb::unique_ptr DuckDBQueryOrThrow(duckdb::ClientContext &context, const std::string &query); 78 | 79 | duckdb::unique_ptr DuckDBQueryOrThrow(duckdb::Connection &connection, const std::string &query); 80 | 81 | duckdb::unique_ptr DuckDBQueryOrThrow(const std::string &query); 82 | 83 | } // namespace pgduckdb 84 | -------------------------------------------------------------------------------- /src/pgduckdb/pgduckdb_xact.hpp: -------------------------------------------------------------------------------- 1 | namespace pgduckdb { 2 | 3 | namespace pg { 4 | bool IsInTransactionBlock(); 5 | void PreventInTransactionBlock(const char *statement_type); 6 | } // namespace pg 7 | 8 | void ClaimCurrentCommandId(); 9 | void RegisterDuckdbXactCallback(); 10 | void AutocommitSingleStatementQueries(); 11 | void MarkStatementNotTopLevel(); 12 | } // namespace pgduckdb 13 | -------------------------------------------------------------------------------- /src/pgduckdb/scan/heap_reader.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb.hpp" 2 | 3 | #include "pgduckdb/scan/heap_reader.hpp" 4 | #include "pgduckdb/pgduckdb_types.hpp" 5 | #include "pgduckdb/pgduckdb_utils.hpp" 6 | 7 | extern "C" { 8 | #include "postgres.h" 9 | #include "pgstat.h" 10 | #include "access/heapam.h" 11 | #include "storage/bufmgr.h" 12 | #include "storage/bufpage.h" 13 | #include "utils/rel.h" 14 | } 15 | 16 | #include "pgduckdb/pgduckdb_process_lock.hpp" 17 | 18 | #include 19 | 20 | namespace pgduckdb { 21 | 22 | // 23 | // HeapReaderGlobalState 24 | // 25 | 26 | HeapReaderGlobalState::HeapReaderGlobalState(Relation rel) 27 | : m_nblocks(RelationGetNumberOfBlocks(rel)), m_last_assigned_block_number(InvalidBlockNumber) { 28 | } 29 | 30 | BlockNumber 31 | HeapReaderGlobalState::AssignNextBlockNumber(std::mutex &lock) { 32 | lock.lock(); 33 | BlockNumber block_number = InvalidBlockNumber; 34 | if (m_nblocks > 0 && m_last_assigned_block_number == InvalidBlockNumber) { 35 | block_number = m_last_assigned_block_number = 0; 36 | } else if (m_nblocks > 0 && m_last_assigned_block_number < m_nblocks - 1) { 37 | block_number = ++m_last_assigned_block_number; 38 | } 39 | lock.unlock(); 40 | return block_number; 41 | } 42 | 43 | // 44 | // HeapReader 45 | // 46 | 47 | HeapReader::HeapReader(Relation rel, duckdb::shared_ptr heap_reader_global_state, 48 | duckdb::shared_ptr global_state, 49 | duckdb::shared_ptr local_state) 50 | : m_global_state(global_state), m_heap_reader_global_state(heap_reader_global_state), m_local_state(local_state), 51 | m_rel(rel), m_inited(false), m_read_next_page(true), m_block_number(InvalidBlockNumber), m_buffer(InvalidBuffer), 52 | m_current_tuple_index(InvalidOffsetNumber), m_page_tuples_left(0) { 53 | m_tuple = duckdb::make_uniq(); 54 | m_tuple->t_data = NULL; 55 | m_tuple->t_tableOid = RelationGetRelid(m_rel); 56 | ItemPointerSetInvalid(&m_tuple->t_self); 57 | DuckdbProcessLock::GetLock().lock(); 58 | m_buffer_access_strategy = GetAccessStrategy(BAS_BULKREAD); 59 | DuckdbProcessLock::GetLock().unlock(); 60 | } 61 | 62 | HeapReader::~HeapReader() { 63 | DuckdbProcessLock::GetLock().lock(); 64 | /* If execution is interrupted and buffer is still opened close it now */ 65 | if (m_buffer != InvalidBuffer) { 66 | UnlockReleaseBuffer(m_buffer); 67 | } 68 | FreeAccessStrategy(m_buffer_access_strategy); 69 | DuckdbProcessLock::GetLock().unlock(); 70 | } 71 | 72 | Page 73 | HeapReader::PreparePageRead() { 74 | Page page = BufferGetPage(m_buffer); 75 | #if PG_VERSION_NUM < 170000 76 | TestForOldSnapshot(m_global_state->m_snapshot, m_rel, page); 77 | #endif 78 | m_page_tuples_all_visible = PageIsAllVisible(page) && !m_global_state->m_snapshot->takenDuringRecovery; 79 | m_page_tuples_left = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1; 80 | m_current_tuple_index = FirstOffsetNumber; 81 | return page; 82 | } 83 | 84 | bool 85 | HeapReader::ReadPageTuples(duckdb::DataChunk &output) { 86 | BlockNumber block = InvalidBlockNumber; 87 | Page page = nullptr; 88 | 89 | if (!m_inited) { 90 | block = m_block_number = m_heap_reader_global_state->AssignNextBlockNumber(m_global_state->m_lock); 91 | if (m_block_number == InvalidBlockNumber) { 92 | return false; 93 | } 94 | m_inited = true; 95 | m_read_next_page = true; 96 | } else { 97 | block = m_block_number; 98 | if (!m_read_next_page) { 99 | page = BufferGetPage(m_buffer); 100 | } 101 | } 102 | 103 | while (block != InvalidBlockNumber) { 104 | if (m_read_next_page) { 105 | CHECK_FOR_INTERRUPTS(); 106 | std::lock_guard lock(DuckdbProcessLock::GetLock()); 107 | block = m_block_number; 108 | 109 | m_buffer = PostgresFunctionGuard(ReadBufferExtended, m_rel, MAIN_FORKNUM, block, RBM_NORMAL, 110 | m_buffer_access_strategy); 111 | 112 | PostgresFunctionGuard(LockBuffer, m_buffer, BUFFER_LOCK_SHARE); 113 | 114 | page = PreparePageRead(); 115 | m_read_next_page = false; 116 | } 117 | 118 | for (; m_page_tuples_left > 0 && m_local_state->m_output_vector_size < STANDARD_VECTOR_SIZE; 119 | m_page_tuples_left--, m_current_tuple_index++) { 120 | bool visible = true; 121 | ItemId lpp = PageGetItemId(page, m_current_tuple_index); 122 | 123 | if (!ItemIdIsNormal(lpp)) 124 | continue; 125 | 126 | m_tuple->t_data = (HeapTupleHeader)PageGetItem(page, lpp); 127 | m_tuple->t_len = ItemIdGetLength(lpp); 128 | ItemPointerSet(&(m_tuple->t_self), block, m_current_tuple_index); 129 | 130 | if (!m_page_tuples_all_visible) { 131 | std::lock_guard lock(DuckdbProcessLock::GetLock()); 132 | visible = HeapTupleSatisfiesVisibility(m_tuple.get(), m_global_state->m_snapshot, m_buffer); 133 | /* skip tuples not visible to this snapshot */ 134 | if (!visible) 135 | continue; 136 | } 137 | 138 | pgstat_count_heap_getnext(m_rel); 139 | InsertTupleIntoChunk(output, m_global_state, m_local_state, m_tuple.get()); 140 | } 141 | 142 | /* No more items on current page */ 143 | if (!m_page_tuples_left) { 144 | DuckdbProcessLock::GetLock().lock(); 145 | UnlockReleaseBuffer(m_buffer); 146 | DuckdbProcessLock::GetLock().unlock(); 147 | m_buffer = InvalidBuffer; 148 | m_read_next_page = true; 149 | /* Handle cancel request */ 150 | if (QueryCancelPending) { 151 | block = m_block_number = InvalidBlockNumber; 152 | } else { 153 | block = m_block_number = m_heap_reader_global_state->AssignNextBlockNumber(m_global_state->m_lock); 154 | } 155 | } 156 | 157 | /* We have collected STANDARD_VECTOR_SIZE */ 158 | if (m_local_state->m_output_vector_size == STANDARD_VECTOR_SIZE) { 159 | output.SetCardinality(m_local_state->m_output_vector_size); 160 | output.Verify(); 161 | m_local_state->m_output_vector_size = 0; 162 | return true; 163 | } 164 | } 165 | 166 | /* Next assigned block number is InvalidBlockNumber so we check did we write any tuples in output vector */ 167 | if (m_local_state->m_output_vector_size) { 168 | output.SetCardinality(m_local_state->m_output_vector_size); 169 | output.Verify(); 170 | m_local_state->m_output_vector_size = 0; 171 | } 172 | 173 | m_buffer = InvalidBuffer; 174 | m_block_number = InvalidBlockNumber; 175 | m_tuple->t_data = NULL; 176 | m_read_next_page = false; 177 | 178 | return false; 179 | } 180 | } // namespace pgduckdb 181 | -------------------------------------------------------------------------------- /src/pgduckdb/scan/heap_reader.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | #include "pgduckdb/scan/postgres_scan.hpp" 6 | #include "pgduckdb/pg/declarations.hpp" 7 | 8 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 9 | 10 | namespace pgduckdb { 11 | 12 | // HeapReaderGlobalState 13 | 14 | class HeapReaderGlobalState { 15 | public: 16 | HeapReaderGlobalState(Relation rel); 17 | BlockNumber AssignNextBlockNumber(std::mutex &lock); 18 | 19 | private: 20 | BlockNumber m_nblocks; 21 | BlockNumber m_last_assigned_block_number; 22 | }; 23 | 24 | // HeapReader 25 | 26 | class HeapReader { 27 | public: 28 | HeapReader(Relation rel, duckdb::shared_ptr heap_reader_global_state, 29 | duckdb::shared_ptr global_state, 30 | duckdb::shared_ptr local_state); 31 | ~HeapReader(); 32 | HeapReader(const HeapReader &other) = delete; 33 | HeapReader &operator=(const HeapReader &other) = delete; 34 | HeapReader &operator=(HeapReader &&other) = delete; 35 | HeapReader(HeapReader &&other) = delete; 36 | bool ReadPageTuples(duckdb::DataChunk &output); 37 | BlockNumber 38 | GetCurrentBlockNumber() { 39 | return m_block_number; 40 | } 41 | 42 | private: 43 | Page PreparePageRead(); 44 | 45 | duckdb::shared_ptr m_global_state; 46 | duckdb::shared_ptr m_heap_reader_global_state; 47 | duckdb::shared_ptr m_local_state; 48 | Relation m_rel; 49 | bool m_inited; 50 | bool m_read_next_page; 51 | bool m_page_tuples_all_visible; 52 | BlockNumber m_block_number; 53 | Buffer m_buffer; 54 | OffsetNumber m_current_tuple_index; 55 | int m_page_tuples_left; 56 | duckdb::unique_ptr m_tuple; 57 | BufferAccessStrategy m_buffer_access_strategy; 58 | }; 59 | 60 | } // namespace pgduckdb 61 | -------------------------------------------------------------------------------- /src/pgduckdb/scan/postgres_scan.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb/main/client_context.hpp" 2 | #include "duckdb/function/replacement_scan.hpp" 3 | #include "duckdb/parser/tableref/table_function_ref.hpp" 4 | #include "duckdb/parser/parser.hpp" 5 | #include "duckdb/parser/tableref/subqueryref.hpp" 6 | #include "duckdb/parser/expression/function_expression.hpp" 7 | #include "duckdb/parser/statement/select_statement.hpp" 8 | #include "duckdb/parser/expression/constant_expression.hpp" 9 | #include "duckdb/parser/expression/comparison_expression.hpp" 10 | #include "duckdb/parser/expression/columnref_expression.hpp" 11 | #include "duckdb/parser/qualified_name.hpp" 12 | #include "duckdb/common/enums/statement_type.hpp" 13 | #include "duckdb/common/enums/expression_type.hpp" 14 | 15 | #include "pgduckdb/scan/postgres_scan.hpp" 16 | #include "pgduckdb/pgduckdb_types.hpp" 17 | #include "pgduckdb/pgduckdb_utils.hpp" 18 | 19 | extern "C" { 20 | #include "postgres.h" 21 | #include "access/htup_details.h" 22 | #include "catalog/namespace.h" 23 | #include "catalog/pg_class.h" 24 | #include "optimizer/planmain.h" 25 | #include "optimizer/planner.h" 26 | #include "utils/builtins.h" 27 | #include "utils/regproc.h" 28 | #include "utils/snapmgr.h" 29 | #include "utils/syscache.h" 30 | } 31 | 32 | #include "pgduckdb/pgduckdb_process_lock.hpp" 33 | 34 | namespace pgduckdb { 35 | 36 | void 37 | PostgresScanGlobalState::InitGlobalState(duckdb::TableFunctionInitInput &input) { 38 | /* SELECT COUNT(*) FROM */ 39 | if (input.column_ids.size() == 1 && input.column_ids[0] == UINT64_MAX) { 40 | m_count_tuples_only = true; 41 | return; 42 | } 43 | 44 | /* 45 | * We need to read columns from the Postgres tuple in column order, but for 46 | * outputting them we care about the DuckDB order. A map automatically 47 | * orders them based on key, which in this case is the Postgres column 48 | * order 49 | */ 50 | duckdb::map pg_column_order; 51 | duckdb::idx_t scan_index = 0; 52 | for (const auto &pg_column : input.column_ids) { 53 | /* Postgres AttrNumbers are 1-based */ 54 | pg_column_order[pg_column + 1] = scan_index++; 55 | } 56 | 57 | auto table_filters = input.filters.get(); 58 | m_column_filters.resize(input.column_ids.size(), 0); 59 | 60 | for (auto const &[att_num, duckdb_scanned_index] : pg_column_order) { 61 | m_columns_to_scan.emplace_back(att_num, duckdb_scanned_index); 62 | 63 | if (!table_filters) { 64 | continue; 65 | } 66 | 67 | auto column_filter_it = table_filters->filters.find(duckdb_scanned_index); 68 | if (column_filter_it != table_filters->filters.end()) { 69 | m_column_filters[duckdb_scanned_index] = column_filter_it->second.get(); 70 | } 71 | } 72 | 73 | /* We need to check do we consider projection_ids or column_ids list to be used 74 | * for writing to output vector. Projection ids list will be used when 75 | * columns that are used for query filtering are not used afterwards; otherwise 76 | * column ids list will be used and all read tuple columns need to passed 77 | * to upper layers of query execution. 78 | */ 79 | if (input.CanRemoveFilterColumns()) { 80 | for (const auto &projection_id : input.projection_ids) { 81 | m_output_columns.emplace_back(projection_id, input.column_ids[projection_id] + 1); 82 | } 83 | } else { 84 | duckdb::idx_t output_index = 0; 85 | for (const auto &column_id : input.column_ids) { 86 | m_output_columns.emplace_back(output_index++, column_id + 1); 87 | } 88 | } 89 | } 90 | 91 | void 92 | PostgresScanGlobalState::InitRelationMissingAttrs(TupleDesc tuple_desc) { 93 | std::lock_guard lock(DuckdbProcessLock::GetLock()); 94 | for (int attnum = 0; attnum < tuple_desc->natts; attnum++) { 95 | bool is_null = false; 96 | Datum attr = PostgresFunctionGuard(getmissingattr, tuple_desc, attnum + 1, &is_null); 97 | /* Add missing attr datum if not null*/ 98 | if (!is_null) { 99 | m_relation_missing_attrs[attnum] = attr; 100 | } 101 | } 102 | } 103 | 104 | static Oid 105 | FindMatchingRelation(const duckdb::string &schema, const duckdb::string &table) { 106 | List *name_list = NIL; 107 | if (!schema.empty()) { 108 | name_list = lappend(name_list, makeString(pstrdup(schema.c_str()))); 109 | } 110 | 111 | name_list = lappend(name_list, makeString(pstrdup(table.c_str()))); 112 | 113 | RangeVar *table_range_var = makeRangeVarFromNameList(name_list); 114 | return RangeVarGetRelid(table_range_var, AccessShareLock, true); 115 | } 116 | 117 | const char * 118 | pgduckdb_pg_get_viewdef(Oid view) { 119 | auto oid = ObjectIdGetDatum(view); 120 | Datum viewdef = DirectFunctionCall1(pg_get_viewdef, oid); 121 | return text_to_cstring(DatumGetTextP(viewdef)); 122 | } 123 | 124 | duckdb::unique_ptr 125 | ReplaceView(Oid view) { 126 | const auto view_definition = PostgresFunctionGuard(pgduckdb_pg_get_viewdef, view); 127 | 128 | if (!view_definition) { 129 | throw duckdb::InvalidInputException("Could not retrieve view definition for Relation with relid: %u", view); 130 | } 131 | 132 | duckdb::Parser parser; 133 | parser.ParseQuery(view_definition); 134 | auto &statements = parser.statements; 135 | if (statements.size() != 1) { 136 | throw duckdb::InvalidInputException("View definition contained more than 1 statement!"); 137 | } 138 | 139 | if (statements[0]->type != duckdb::StatementType::SELECT_STATEMENT) { 140 | throw duckdb::InvalidInputException("View definition (%s) did not contain a SELECT statement!", 141 | view_definition); 142 | } 143 | 144 | auto select = duckdb::unique_ptr_cast(std::move(statements[0])); 145 | return duckdb::make_uniq(std::move(select)); 146 | } 147 | 148 | duckdb::unique_ptr 149 | PostgresReplacementScan(duckdb::ClientContext &, duckdb::ReplacementScanInput &input, 150 | duckdb::optional_ptr) { 151 | 152 | auto &schema_name = input.schema_name; 153 | auto &table_name = input.table_name; 154 | 155 | auto relid = PostgresFunctionGuard(FindMatchingRelation, schema_name, table_name); 156 | if (relid == InvalidOid) { 157 | return nullptr; 158 | } 159 | 160 | auto tuple = PostgresFunctionGuard(SearchSysCache1, RELOID, ObjectIdGetDatum(relid)); 161 | if (!HeapTupleIsValid(tuple)) { 162 | elog(WARNING, "(PGDuckDB/PostgresReplacementScan) Cache lookup failed for relation %u", relid); 163 | return nullptr; 164 | } 165 | 166 | auto relForm = (Form_pg_class)GETSTRUCT(tuple); 167 | if (relForm->relkind != RELKIND_VIEW) { 168 | PostgresFunctionGuard(ReleaseSysCache, tuple); 169 | return nullptr; 170 | } 171 | 172 | PostgresFunctionGuard(ReleaseSysCache, tuple); 173 | return ReplaceView(relid); 174 | } 175 | 176 | } // namespace pgduckdb 177 | -------------------------------------------------------------------------------- /src/pgduckdb/scan/postgres_scan.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | #include "pgduckdb/pg/declarations.hpp" 6 | #include "pgduckdb/utility/allocator.hpp" 7 | 8 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 9 | 10 | namespace pgduckdb { 11 | 12 | class PostgresScanGlobalState { 13 | public: 14 | PostgresScanGlobalState() : m_snapshot(nullptr), m_count_tuples_only(false), m_total_row_count(0) { 15 | } 16 | 17 | void InitGlobalState(duckdb::TableFunctionInitInput &input); 18 | 19 | void InitRelationMissingAttrs(TupleDesc tuple_desc); 20 | 21 | Snapshot m_snapshot; 22 | TupleDesc m_tuple_desc; 23 | std::mutex m_lock; // Lock for one replacement scan 24 | bool m_count_tuples_only; 25 | /* Postgres column id to duckdb scanned index. The scanned index is DuckDB 26 | * its scan order of the columns. */ 27 | std::vector> m_columns_to_scan; 28 | /* These are indexed by the DuckDB scan index */ 29 | std::vector m_column_filters; 30 | /* Duckdb output vector idx with information about postgres column id */ 31 | duckdb::vector> m_output_columns; 32 | std::atomic m_total_row_count; 33 | duckdb::map m_relation_missing_attrs; 34 | }; 35 | 36 | class PostgresScanLocalState { 37 | public: 38 | PostgresScanLocalState(const PostgresScanGlobalState *psgs) : m_output_vector_size(0), m_exhausted_scan(false) { 39 | if (!psgs->m_count_tuples_only) { 40 | const auto s = psgs->m_columns_to_scan.size(); 41 | values.resize(s); 42 | nulls.resize(s); 43 | } 44 | } 45 | 46 | uint32_t m_output_vector_size; 47 | bool m_exhausted_scan; 48 | std::vector> values; 49 | std::vector> nulls; 50 | }; 51 | 52 | duckdb::unique_ptr PostgresReplacementScan(duckdb::ClientContext &context, 53 | duckdb::ReplacementScanInput &input, 54 | duckdb::optional_ptr data); 55 | 56 | } // namespace pgduckdb 57 | -------------------------------------------------------------------------------- /src/pgduckdb/scan/postgres_seq_scan.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb.hpp" 2 | 3 | #include "pgduckdb/scan/postgres_seq_scan.hpp" 4 | #include "pgduckdb/pgduckdb_types.hpp" 5 | #include "pgduckdb/logger.hpp" 6 | #include "pgduckdb/scan/heap_reader.hpp" 7 | #include "pgduckdb/pg/relations.hpp" 8 | 9 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 10 | 11 | namespace pgduckdb { 12 | 13 | // 14 | // PostgresSeqScanGlobalState 15 | // 16 | 17 | PostgresSeqScanGlobalState::PostgresSeqScanGlobalState(Relation rel, duckdb::TableFunctionInitInput &input) 18 | : m_global_state(duckdb::make_shared_ptr()), 19 | m_heap_reader_global_state(duckdb::make_shared_ptr(rel)), m_rel(rel) { 20 | m_global_state->InitGlobalState(input); 21 | m_global_state->m_tuple_desc = RelationGetDescr(m_rel); 22 | m_global_state->InitRelationMissingAttrs(m_global_state->m_tuple_desc); 23 | pd_log(DEBUG2, "(DuckDB/PostgresSeqScanGlobalState) Running %" PRIu64 " threads -- ", (uint64_t)MaxThreads()); 24 | } 25 | 26 | PostgresSeqScanGlobalState::~PostgresSeqScanGlobalState() { 27 | } 28 | 29 | // 30 | // PostgresSeqScanLocalState 31 | // 32 | 33 | PostgresSeqScanLocalState::PostgresSeqScanLocalState(Relation rel, 34 | duckdb::shared_ptr heap_reder_global_state, 35 | duckdb::shared_ptr global_state) { 36 | m_local_state = duckdb::make_shared_ptr(global_state.get()); 37 | m_heap_table_reader = duckdb::make_uniq(rel, heap_reder_global_state, global_state, m_local_state); 38 | } 39 | 40 | PostgresSeqScanLocalState::~PostgresSeqScanLocalState() { 41 | } 42 | 43 | // 44 | // PostgresSeqScanFunctionData 45 | // 46 | 47 | PostgresSeqScanFunctionData::PostgresSeqScanFunctionData(Relation rel, uint64_t cardinality, Snapshot snapshot) 48 | : m_rel(rel), m_cardinality(cardinality), m_snapshot(snapshot) { 49 | } 50 | 51 | PostgresSeqScanFunctionData::~PostgresSeqScanFunctionData() { 52 | } 53 | 54 | // 55 | // PostgresSeqScanFunction 56 | // 57 | 58 | PostgresSeqScanFunction::PostgresSeqScanFunction() 59 | : TableFunction("postgres_seq_scan", {}, PostgresSeqScanFunc, nullptr, PostgresSeqScanInitGlobal, 60 | PostgresSeqScanInitLocal) { 61 | named_parameters["cardinality"] = duckdb::LogicalType::UBIGINT; 62 | named_parameters["relid"] = duckdb::LogicalType::UINTEGER; 63 | named_parameters["snapshot"] = duckdb::LogicalType::POINTER; 64 | projection_pushdown = true; 65 | filter_pushdown = true; 66 | filter_prune = true; 67 | cardinality = PostgresSeqScanCardinality; 68 | } 69 | 70 | duckdb::unique_ptr 71 | PostgresSeqScanFunction::PostgresSeqScanInitGlobal(duckdb::ClientContext &, duckdb::TableFunctionInitInput &input) { 72 | auto &bind_data = input.bind_data->CastNoConst(); 73 | auto global_state = duckdb::make_uniq(bind_data.m_rel, input); 74 | global_state->m_global_state->m_snapshot = bind_data.m_snapshot; 75 | #pragma GCC diagnostic push 76 | #pragma GCC diagnostic ignored "-Wredundant-move" 77 | return std::move(global_state); 78 | #pragma GCC diagnostic pop 79 | } 80 | 81 | duckdb::unique_ptr 82 | PostgresSeqScanFunction::PostgresSeqScanInitLocal(duckdb::ExecutionContext &, duckdb::TableFunctionInitInput &, 83 | duckdb::GlobalTableFunctionState *gstate) { 84 | auto global_state = reinterpret_cast(gstate); 85 | return duckdb::make_uniq(global_state->m_rel, global_state->m_heap_reader_global_state, 86 | global_state->m_global_state); 87 | } 88 | 89 | void 90 | PostgresSeqScanFunction::PostgresSeqScanFunc(duckdb::ClientContext &, duckdb::TableFunctionInput &data, 91 | duckdb::DataChunk &output) { 92 | auto &local_state = data.local_state->Cast(); 93 | 94 | local_state.m_local_state->m_output_vector_size = 0; 95 | 96 | /* We have exhausted seq scan of heap table so we can return */ 97 | if (local_state.m_local_state->m_exhausted_scan) { 98 | output.SetCardinality(0); 99 | return; 100 | } 101 | 102 | auto hasTuple = local_state.m_heap_table_reader->ReadPageTuples(output); 103 | 104 | if (!hasTuple || !IsValidBlockNumber(local_state.m_heap_table_reader->GetCurrentBlockNumber())) { 105 | local_state.m_local_state->m_exhausted_scan = true; 106 | } 107 | } 108 | 109 | duckdb::unique_ptr 110 | PostgresSeqScanFunction::PostgresSeqScanCardinality(duckdb::ClientContext &, const duckdb::FunctionData *data) { 111 | auto &bind_data = data->Cast(); 112 | return duckdb::make_uniq(bind_data.m_cardinality, bind_data.m_cardinality); 113 | } 114 | 115 | } // namespace pgduckdb 116 | -------------------------------------------------------------------------------- /src/pgduckdb/scan/postgres_seq_scan.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | #include "pgduckdb/pgduckdb_guc.h" 6 | #include "pgduckdb/pg/declarations.hpp" 7 | 8 | #include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. 9 | 10 | namespace pgduckdb { 11 | 12 | class HeapReaderGlobalState; 13 | class HeapReader; 14 | class PostgresScanGlobalState; 15 | class PostgresScanLocalState; 16 | 17 | // Global State 18 | 19 | struct PostgresSeqScanGlobalState : public duckdb::GlobalTableFunctionState { 20 | explicit PostgresSeqScanGlobalState(Relation rel, duckdb::TableFunctionInitInput &input); 21 | ~PostgresSeqScanGlobalState(); 22 | idx_t 23 | MaxThreads() const override { 24 | return duckdb_max_threads_per_postgres_scan; 25 | } 26 | 27 | public: 28 | duckdb::shared_ptr m_global_state; 29 | duckdb::shared_ptr m_heap_reader_global_state; 30 | Relation m_rel; 31 | }; 32 | 33 | // Local State 34 | 35 | struct PostgresSeqScanLocalState : public duckdb::LocalTableFunctionState { 36 | public: 37 | PostgresSeqScanLocalState(Relation rel, duckdb::shared_ptr heap_reader_global_state, 38 | duckdb::shared_ptr global_state); 39 | ~PostgresSeqScanLocalState() override; 40 | 41 | public: 42 | duckdb::shared_ptr m_local_state; 43 | duckdb::unique_ptr m_heap_table_reader; 44 | }; 45 | 46 | // PostgresSeqScanFunctionData 47 | 48 | struct PostgresSeqScanFunctionData : public duckdb::TableFunctionData { 49 | public: 50 | PostgresSeqScanFunctionData(Relation rel, uint64_t cardinality, Snapshot snapshot); 51 | ~PostgresSeqScanFunctionData() override; 52 | 53 | public: 54 | Relation m_rel; 55 | uint64_t m_cardinality; 56 | Snapshot m_snapshot; 57 | }; 58 | 59 | // PostgresSeqScanFunction 60 | 61 | struct PostgresSeqScanFunction : public duckdb::TableFunction { 62 | public: 63 | PostgresSeqScanFunction(); 64 | 65 | public: 66 | static duckdb::unique_ptr 67 | PostgresSeqScanInitGlobal(duckdb::ClientContext &context, duckdb::TableFunctionInitInput &input); 68 | static duckdb::unique_ptr 69 | PostgresSeqScanInitLocal(duckdb::ExecutionContext &context, duckdb::TableFunctionInitInput &input, 70 | duckdb::GlobalTableFunctionState *gstate); 71 | static void PostgresSeqScanFunc(duckdb::ClientContext &context, duckdb::TableFunctionInput &data, 72 | duckdb::DataChunk &output); 73 | 74 | static duckdb::unique_ptr PostgresSeqScanCardinality(duckdb::ClientContext &context, 75 | const duckdb::FunctionData *data); 76 | }; 77 | 78 | } // namespace pgduckdb 79 | -------------------------------------------------------------------------------- /src/pgduckdb/utility/allocator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace pgduckdb { 4 | 5 | template 6 | struct DuckDBMallocator { 7 | typedef T value_type; 8 | 9 | DuckDBMallocator() = default; 10 | 11 | template 12 | constexpr DuckDBMallocator(const DuckDBMallocator &) noexcept { 13 | } 14 | 15 | [[nodiscard]] T * 16 | allocate(std::size_t n) { 17 | if (n > std::numeric_limits::max() / sizeof(T)) { 18 | throw std::bad_array_new_length(); 19 | } 20 | 21 | auto p = static_cast(duckdb_malloc(n * sizeof(T))); 22 | if (p == nullptr) { 23 | throw std::bad_alloc(); 24 | } 25 | 26 | return p; 27 | } 28 | 29 | void 30 | deallocate(T *p, std::size_t) noexcept { 31 | duckdb_free(p); 32 | } 33 | }; 34 | 35 | template 36 | bool 37 | operator==(const DuckDBMallocator &, const DuckDBMallocator &) { 38 | return true; 39 | } 40 | 41 | template 42 | bool 43 | operator!=(const DuckDBMallocator &, const DuckDBMallocator &) { 44 | return false; 45 | } 46 | 47 | } // namespace pgduckdb 48 | -------------------------------------------------------------------------------- /src/pgduckdb/utility/copy.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" { 4 | #include "postgres.h" 5 | #include "nodes/plannodes.h" 6 | } 7 | 8 | const char *MakeDuckdbCopyQuery(PlannedStmt *pstmt, const char *query_string, struct QueryEnvironment *query_env); 9 | -------------------------------------------------------------------------------- /src/pgduckdb/utility/cpp_only_file.hpp: -------------------------------------------------------------------------------- 1 | #if defined(POSTGRES_H) 2 | static_assert(false, "No Postgres header should be included in this file."); 3 | #endif 4 | -------------------------------------------------------------------------------- /src/pgduckdb/utility/cpp_wrapper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" { 4 | #include "postgres.h" 5 | } 6 | 7 | namespace pgduckdb { 8 | 9 | template 10 | typename std::invoke_result::type 11 | __CPPFunctionGuard__(const char *func_name, FuncArgs... args) { 12 | const char *error_message = nullptr; 13 | try { 14 | return func(args...); 15 | } catch (duckdb::Exception &ex) { 16 | duckdb::ErrorData edata(ex.what()); 17 | error_message = pstrdup(edata.Message().c_str()); 18 | } catch (std::exception &ex) { 19 | const auto msg = ex.what(); 20 | if (msg[0] == '{') { 21 | duckdb::ErrorData edata(ex.what()); 22 | error_message = pstrdup(edata.Message().c_str()); 23 | } else { 24 | error_message = pstrdup(ex.what()); 25 | } 26 | } 27 | 28 | elog(ERROR, "(PGDuckDB/%s) %s", func_name, error_message); 29 | } 30 | 31 | } // namespace pgduckdb 32 | 33 | #define InvokeCPPFunc(FUNC, ...) pgduckdb::__CPPFunctionGuard__(__FUNCTION__, ##__VA_ARGS__) 34 | 35 | // Wrappers 36 | 37 | #define DECLARE_PG_FUNCTION(func_name) \ 38 | PG_FUNCTION_INFO_V1(func_name); \ 39 | Datum func_name##_cpp(PG_FUNCTION_ARGS); \ 40 | Datum func_name(PG_FUNCTION_ARGS) { \ 41 | return InvokeCPPFunc(func_name##_cpp, fcinfo); \ 42 | } \ 43 | Datum func_name##_cpp(PG_FUNCTION_ARGS __attribute__((unused))) 44 | -------------------------------------------------------------------------------- /src/pgduckdb/utility/rename_ruleutils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * This file contains the renaming of the functions exposed by 5 | * vendor/pg_ruleutils.h functions to avoid conflicts with the PostgreSQL 6 | * functions. 7 | */ 8 | #define pg_get_indexdef_string pgduckdb_pg_get_indexdef_string 9 | #define pg_get_indexdef_columns pgduckdb_pg_get_indexdef_columns 10 | #define pg_get_indexdef_columns_extended pgduckdb_pg_get_indexdef_columns_extended 11 | #define pg_get_querydef pgduckdb_pg_get_querydef_internal 12 | #define pg_get_partkeydef_columns pgduckdb_pg_get_partkeydef_columns 13 | #define pg_get_partconstrdef_string pgduckdb_pg_get_partconstrdef_string 14 | #define pg_get_constraintdef_command pgduckdb_pg_get_constraintdef_command 15 | #define deparse_expression pgduckdb_deparse_expression 16 | #define deparse_context_for pgduckdb_deparse_context_for 17 | #define deparse_context_for_plan_tree pgduckdb_deparse_context_for_plan_tree 18 | #define set_deparse_context_plan pgduckdb_set_deparse_context_plan 19 | #define select_rtable_names_for_explain pgduckdb_select_rtable_names_for_explain 20 | #define generate_collation_name pgduckdb_generate_collation_name 21 | #define generate_opclass_name pgduckdb_generate_opclass_name 22 | #define get_range_partbound_string pgduckdb_get_range_partbound_string 23 | #define pg_get_statisticsobjdef_string pgduckdb_pg_get_statisticsobjdef_string 24 | #define get_list_partvalue_string pgduckdb_get_list_partvalue_string 25 | 26 | /* 27 | * The following replaces all usages of generate_qualified_relation_name and 28 | * generate_relation_name with calls to the pgduckdb_relation_name function 29 | */ 30 | #define generate_qualified_relation_name pgduckdb_relation_name 31 | #define generate_relation_name(relid, namespaces) pgduckdb_relation_name(relid) 32 | -------------------------------------------------------------------------------- /src/pgduckdb/vendor/.clang-format: -------------------------------------------------------------------------------- 1 | DisableFormat: true 2 | SortIncludes: false 3 | -------------------------------------------------------------------------------- /src/pgduckdb/vendor/pg_explain.cpp: -------------------------------------------------------------------------------- 1 | /*------------------------------------------------------------------------- 2 | * 3 | * pg_explain.cpp 4 | * Explain query execution plans 5 | * 6 | * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group 7 | * Portions Copyright (c) 1994-5, Regents of the University of California 8 | * 9 | *------------------------------------------------------------------------- 10 | */ 11 | extern "C" { 12 | #include "postgres.h" 13 | 14 | #include "optimizer/planner.h" 15 | #include "tcop/tcopprot.h" 16 | 17 | #include "pgduckdb/vendor/pg_explain.hpp" 18 | 19 | #if PG_VERSION_NUM < 170000 20 | 21 | /* 22 | * standard_ExplainOneQuery - 23 | * print out the execution plan for one Query, without calling a hook. 24 | * 25 | * This is a PG16 version of the standard ExplainOneQuery function that was 26 | * introduced in PG17. 27 | */ 28 | void 29 | standard_ExplainOneQuery(Query *query, int cursorOptions, IntoClause *into, ExplainState *es, const char *queryString, 30 | ParamListInfo params, QueryEnvironment *queryEnv) { 31 | PlannedStmt *plan; 32 | instr_time planstart, planduration; 33 | BufferUsage bufusage_start, bufusage; 34 | 35 | if (es->buffers) 36 | bufusage_start = pgBufferUsage; 37 | INSTR_TIME_SET_CURRENT(planstart); 38 | 39 | /* plan the query */ 40 | plan = pg_plan_query(query, queryString, cursorOptions, params); 41 | 42 | INSTR_TIME_SET_CURRENT(planduration); 43 | INSTR_TIME_SUBTRACT(planduration, planstart); 44 | 45 | /* calc differences of buffer counters. */ 46 | if (es->buffers) { 47 | memset(&bufusage, 0, sizeof(BufferUsage)); 48 | BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start); 49 | } 50 | 51 | /* run it (if needed) and produce output */ 52 | ExplainOnePlan(plan, into, es, queryString, params, queryEnv, &planduration, (es->buffers ? &bufusage : NULL)); 53 | } 54 | #endif 55 | } 56 | -------------------------------------------------------------------------------- /src/pgduckdb/vendor/pg_explain.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | extern "C" { 3 | #include "postgres.h" 4 | 5 | #include "commands/explain.h" 6 | 7 | #if PG_VERSION_NUM < 170000 8 | void standard_ExplainOneQuery(Query *query, int cursorOptions, IntoClause *into, ExplainState *es, 9 | const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv); 10 | #endif 11 | } 12 | -------------------------------------------------------------------------------- /src/pgduckdb/vendor/pg_list.hpp: -------------------------------------------------------------------------------- 1 | /*------------------------------------------------------------------------- 2 | * 3 | * pg_list.hpp 4 | * PG17 list macros from pg_list.h backported to lower versions, with a 5 | * small modification to make foreach_ptr work in C++ (which has stricter 6 | * casting rules from void *) 7 | * 8 | * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group 9 | * Portions Copyright (c) 1994, Regents of the University of California 10 | * 11 | *------------------------------------------------------------------------- 12 | */ 13 | 14 | #pragma once 15 | 16 | #include "c.h" 17 | 18 | #include "nodes/pg_list.h" 19 | /* 20 | * Remove the original definition of foreach_delete_current so we can redefine 21 | * it below in a way that works for the easier to use foreach_* macros. 22 | */ 23 | #undef foreach_delete_current 24 | 25 | #if PG_VERSION_NUM >= 170000 26 | #undef foreach_ptr 27 | #undef foreach_int 28 | #undef foreach_oid 29 | #undef foreach_xid 30 | #undef foreach_internal 31 | #undef foreach_node 32 | #endif 33 | 34 | // clang-format off 35 | /* 36 | * foreach_delete_current - 37 | * delete the current list element from the List associated with a 38 | * surrounding foreach() or foreach_*() loop, returning the new List 39 | * pointer; pass the name of the iterator variable. 40 | * 41 | * This is similar to list_delete_cell(), but it also adjusts the loop's state 42 | * so that no list elements will be missed. Do not delete elements from an 43 | * active foreach or foreach_* loop's list in any other way! 44 | */ 45 | #define foreach_delete_current(lst, var_or_cell) \ 46 | ((List *) (var_or_cell##__state.l = list_delete_nth_cell(lst, var_or_cell##__state.i--))) 47 | 48 | /* 49 | * Convenience macros that loop through a list without needing a separate 50 | * "ListCell *" variable. Instead, the macros declare a locally-scoped loop 51 | * variable with the provided name and the appropriate type. 52 | * 53 | * Since the variable is scoped to the loop, it's not possible to detect an 54 | * early break by checking its value after the loop completes, as is common 55 | * practice. If you need to do this, you can either use foreach() instead or 56 | * manually track early breaks with a separate variable declared outside of the 57 | * loop. 58 | * 59 | * Note that the caveats described in the comment above the foreach() macro 60 | * also apply to these convenience macros. 61 | */ 62 | #define foreach_ptr(type, var, lst) foreach_internal(type, *, var, lst, lfirst) 63 | #define foreach_int(var, lst) foreach_internal(int, , var, lst, lfirst_int) 64 | #define foreach_oid(var, lst) foreach_internal(Oid, , var, lst, lfirst_oid) 65 | #define foreach_xid(var, lst) foreach_internal(TransactionId, , var, lst, lfirst_xid) 66 | 67 | /* 68 | * The internal implementation of the above macros. Do not use directly. 69 | * 70 | * This macro actually generates two loops in order to declare two variables of 71 | * different types. The outer loop only iterates once, so we expect optimizing 72 | * compilers will unroll it, thereby optimizing it away. 73 | */ 74 | #define foreach_internal(type, pointer, var, lst, func) \ 75 | for (type pointer var = 0, pointer var##__outerloop = (type pointer) 1; \ 76 | var##__outerloop; \ 77 | var##__outerloop = 0) \ 78 | for (ForEachState var##__state = {(lst), 0}; \ 79 | (var##__state.l != NIL && \ 80 | var##__state.i < var##__state.l->length && \ 81 | (var = (type pointer) func(&var##__state.l->elements[var##__state.i]), true)); \ 82 | var##__state.i++) 83 | 84 | /* 85 | * foreach_node - 86 | * The same as foreach_ptr, but asserts that the element is of the specified 87 | * node type. 88 | */ 89 | #define foreach_node(type, var, lst) \ 90 | for (type * var = 0, *var##__outerloop = (type *) 1; \ 91 | var##__outerloop; \ 92 | var##__outerloop = 0) \ 93 | for (ForEachState var##__state = {(lst), 0}; \ 94 | (var##__state.l != NIL && \ 95 | var##__state.i < var##__state.l->length && \ 96 | (var = lfirst_node(type, &var##__state.l->elements[var##__state.i]), true)); \ 97 | var##__state.i++) 98 | // clang-format on 99 | -------------------------------------------------------------------------------- /src/pgduckdb/vendor/pg_ruleutils.h: -------------------------------------------------------------------------------- 1 | /*------------------------------------------------------------------------- 2 | * 3 | * pg_ruleutils.h 4 | * Declarations for ruleutils.c 5 | * 6 | * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group 7 | * Portions Copyright (c) 1994, Regents of the University of California 8 | * 9 | *------------------------------------------------------------------------- 10 | */ 11 | #ifndef RULEUTILS_H 12 | #define RULEUTILS_H 13 | 14 | #include "postgres.h" 15 | 16 | #include "nodes/nodes.h" 17 | #include "nodes/parsenodes.h" 18 | #include "nodes/pg_list.h" 19 | 20 | struct Plan; /* avoid including plannodes.h here */ 21 | struct PlannedStmt; 22 | 23 | /* Flags for pg_get_indexdef_columns_extended() */ 24 | #define RULE_INDEXDEF_PRETTY 0x01 25 | #define RULE_INDEXDEF_KEYS_ONLY 0x02 /* ignore included attributes */ 26 | 27 | extern char *pgduckdb_pg_get_indexdef_string(Oid indexrelid); 28 | extern char *pgduckdb_pg_get_indexdef_columns(Oid indexrelid, bool pretty); 29 | extern char *pgduckdb_pg_get_indexdef_columns_extended(Oid indexrelid, 30 | bits16 flags); 31 | extern char *pgduckdb_pg_get_querydef_internal(Query *query, bool pretty); 32 | 33 | extern char *pgduckdb_pg_get_partkeydef_columns(Oid relid, bool pretty); 34 | extern char *pgduckdb_pg_get_partconstrdef_string(Oid partitionId, char *aliasname); 35 | 36 | extern char *pgduckdb_pg_get_constraintdef_command(Oid constraintId); 37 | extern char *pgduckdb_deparse_expression(Node *expr, List *dpcontext, 38 | bool forceprefix, bool showimplicit); 39 | extern List *pgduckdb_deparse_context_for(const char *aliasname, Oid relid); 40 | extern List *pgduckdb_deparse_context_for_plan_tree(struct PlannedStmt *pstmt, 41 | List *rtable_names); 42 | extern List *pgduckdb_set_deparse_context_plan(List *dpcontext, 43 | struct Plan *plan, List *ancestors); 44 | extern List *pgduckdb_select_rtable_names_for_explain(List *rtable, 45 | Bitmapset *rels_used); 46 | extern char *pgduckdb_generate_collation_name(Oid collid); 47 | extern char *pgduckdb_generate_opclass_name(Oid opclass); 48 | extern char *pgduckdb_get_range_partbound_string(List *bound_datums); 49 | 50 | extern char *pgduckdb_pg_get_statisticsobjdef_string(Oid statextid); 51 | 52 | extern char *pgduckdb_get_list_partvalue_string(Const *val); 53 | 54 | #endif /* RULEUTILS_H */ 55 | -------------------------------------------------------------------------------- /src/pgmooncake.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb/common/file_system.hpp" 2 | #include "pgduckdb/pgduckdb_xact.hpp" 3 | 4 | extern "C" { 5 | #include "postgres.h" 6 | 7 | #include "fmgr.h" 8 | #include "utils/guc.h" 9 | } 10 | 11 | void MooncakeInitGUC(); 12 | void DuckdbInitHooks(); 13 | 14 | const char *x_mooncake_local_cache = "mooncake_local_cache/"; 15 | 16 | bool mooncake_allow_local_tables = true; 17 | char *mooncake_default_bucket = strdup(""); 18 | bool mooncake_enable_local_cache = true; 19 | bool mooncake_enable_memory_metadata_cache = false; 20 | const char *mooncake_timeline_id = "main"; 21 | 22 | extern "C" { 23 | PG_MODULE_MAGIC; 24 | 25 | void DuckdbInitNode(); 26 | 27 | void _PG_init() { 28 | MooncakeInitGUC(); 29 | DuckdbInitHooks(); 30 | DuckdbInitNode(); 31 | pgduckdb::RegisterDuckdbXactCallback(); 32 | 33 | const char *neon_timeline_id = 34 | GetConfigOption("neon.timeline_id", true /*missing_ok*/, false /*restrict_privileged*/); 35 | if (neon_timeline_id) { 36 | mooncake_allow_local_tables = false; 37 | mooncake_timeline_id = neon_timeline_id; 38 | } 39 | 40 | auto local_fs = duckdb::FileSystem::CreateLocal(); 41 | local_fs->CreateDirectory("mooncake_local_cache"); 42 | local_fs->CreateDirectory("mooncake_local_tables"); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/pgmooncake_guc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern const char *x_mooncake_local_cache; 4 | 5 | extern bool mooncake_allow_local_tables; 6 | extern char *mooncake_default_bucket; 7 | extern bool mooncake_enable_local_cache; 8 | extern bool mooncake_enable_memory_metadata_cache; 9 | extern const char *mooncake_timeline_id; 10 | -------------------------------------------------------------------------------- /test/expected/approx_count_distinct.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text) USING columnstore; 2 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); 3 | INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); 4 | SELECT mooncake.approx_count_distinct(a), mooncake.approx_count_distinct(b) FROM t; 5 | approx_count_distinct | approx_count_distinct 6 | -----------------------+----------------------- 7 | 5 | 9 8 | (1 row) 9 | 10 | SELECT a, mooncake.approx_count_distinct(b) FROM t GROUP BY a ORDER BY a; 11 | a | approx_count_distinct 12 | ---+----------------------- 13 | 1 | 1 14 | 2 | 2 15 | 3 | 2 16 | 4 | 2 17 | 5 | 1 18 | (5 rows) 19 | 20 | SELECT a, mooncake.approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a; 21 | a | approx_count_distinct 22 | ---+----------------------- 23 | 1 | 1 24 | 2 | 2 25 | 2 | 2 26 | 3 | 2 27 | 3 | 2 28 | 4 | 2 29 | 4 | 2 30 | 5 | 1 31 | (8 rows) 32 | 33 | DROP TABLE t; 34 | -------------------------------------------------------------------------------- /test/expected/cardinality.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | INSERT INTO t SELECT * FROM generate_series(1, 100); 3 | EXPLAIN SELECT * FROM t; 4 | QUERY PLAN 5 | -------------------------------------------------------------------- 6 | Custom Scan (MooncakeDuckDBScan) (cost=0.00..0.00 rows=0 width=0) 7 | DuckDB Execution Plan: 8 | 9 | ┌───────────────────────────┐ 10 | │ COLUMNSTORE_SCAN │ 11 | │ ──────────────────── │ 12 | │ Function: │ 13 | │ COLUMNSTORE_SCAN │ 14 | │ │ 15 | │ Projections: a │ 16 | │ │ 17 | │ ~100 Rows │ 18 | └───────────────────────────┘ 19 | 20 | 21 | (15 rows) 22 | 23 | INSERT INTO t SELECT * FROM generate_series(1, 200); 24 | EXPLAIN SELECT * FROM t; 25 | QUERY PLAN 26 | -------------------------------------------------------------------- 27 | Custom Scan (MooncakeDuckDBScan) (cost=0.00..0.00 rows=0 width=0) 28 | DuckDB Execution Plan: 29 | 30 | ┌───────────────────────────┐ 31 | │ COLUMNSTORE_SCAN │ 32 | │ ──────────────────── │ 33 | │ Function: │ 34 | │ COLUMNSTORE_SCAN │ 35 | │ │ 36 | │ Projections: a │ 37 | │ │ 38 | │ ~300 Rows │ 39 | └───────────────────────────┘ 40 | 41 | 42 | (15 rows) 43 | 44 | DROP TABLE t; 45 | -------------------------------------------------------------------------------- /test/expected/columns.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int DEFAULT NULL, b int) USING columnstore; 2 | INSERT INTO t VALUES (DEFAULT, 1), (DEFAULT, 2); 3 | INSERT INTO t VALUES (3, 3), (4, 4); 4 | INSERT INTO t SELECT 5, 5; 5 | SELECT * FROM t; 6 | a | b 7 | ---+--- 8 | | 1 9 | | 2 10 | 3 | 3 11 | 4 | 4 12 | 5 | 5 13 | (5 rows) 14 | 15 | DROP TABLE t; 16 | CREATE TABLE t (a int DEFAULT 3, b int) USING columnstore; 17 | INSERT INTO t VALUES (DEFAULT, 1), (DEFAULT, 2); 18 | INSERT INTO t VALUES (3, 3), (4, 4); 19 | INSERT INTO t SELECT 5, 5; 20 | SELECT * FROM t; 21 | a | b 22 | ---+--- 23 | 3 | 1 24 | 3 | 2 25 | 3 | 3 26 | 4 | 4 27 | 5 | 5 28 | (5 rows) 29 | 30 | DROP TABLE t; 31 | CREATE TABLE t (a int DEFAULT 1 + 2, b int) USING columnstore; 32 | INSERT INTO t (b) VALUES (1), (2); 33 | ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Not implemented Error: column "a" has unsupported default value 34 | DROP TABLE t; 35 | CREATE TABLE t (a int GENERATED ALWAYS AS IDENTITY, b int) USING columnstore; 36 | INSERT INTO t (b) VALUES (1), (2); 37 | INSERT INTO t (b) SELECT 3; 38 | SELECT * FROM t; 39 | a | b 40 | ---+--- 41 | 1 | 1 42 | 2 | 2 43 | 3 | 3 44 | (3 rows) 45 | 46 | DROP TABLE t; 47 | CREATE TABLE t (a int) USING columnstore; 48 | INSERT INTO t VALUES (1), (2); 49 | CREATE TABLE r (b int DEFAULT 1 + 2); 50 | INSERT INTO r VALUES (1), (2); 51 | SELECT * FROM t JOIN r ON a = b; 52 | a | b 53 | ---+--- 54 | 1 | 1 55 | 2 | 2 56 | (2 rows) 57 | 58 | DROP TABLE t, r; 59 | -------------------------------------------------------------------------------- /test/expected/constraints.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b int NOT NULL, c text NOT NULL) USING columnstore; 2 | INSERT INTO t VALUES (1, 2, 'a'), (3, 4, 'b'), (5, 6, 'c'); 3 | INSERT INTO t VALUES (7, NULL, 'd'); 4 | ERROR: (PGDuckDB/Duckdb_ExecCustomScan) Constraint Error: NOT NULL constraint failed: t.b 5 | INSERT INTO t (a, b) VALUES (7, 8); 6 | ERROR: (PGDuckDB/Duckdb_ExecCustomScan) Constraint Error: NOT NULL constraint failed: t.c 7 | INSERT INTO t (b, c) VALUES (8, 'd'); 8 | UPDATE t SET b = 0 WHERE a = 1; 9 | UPDATE t SET c = NULL WHERE a = 1; 10 | ERROR: (PGDuckDB/Duckdb_ExecCustomScan) Constraint Error: NOT NULL constraint failed: t.c 11 | SELECT * FROM t; 12 | a | b | c 13 | ---+---+--- 14 | | 8 | d 15 | 1 | 0 | a 16 | 3 | 4 | b 17 | 5 | 6 | c 18 | (4 rows) 19 | 20 | DROP TABLE t; 21 | -------------------------------------------------------------------------------- /test/expected/copy.out: -------------------------------------------------------------------------------- 1 | \set pwd `pwd` 2 | \set csv_file '\'' :pwd '/results/copy.csv' '\'' 3 | COPY (SELECT * FROM generate_series(1, 5)) TO :csv_file; 4 | CREATE TABLE t (a int) USING columnstore; 5 | COPY t FROM :csv_file; 6 | SELECT * FROM t; 7 | a 8 | --- 9 | 1 10 | 2 11 | 3 12 | 4 13 | 5 14 | (5 rows) 15 | 16 | COPY t TO :csv_file; 17 | COPY (SELECT * FROM t) TO :csv_file; 18 | DROP TABLE t; 19 | -------------------------------------------------------------------------------- /test/expected/create_table_as.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE r (a int); 2 | INSERT INTO r VALUES (1), (2), (3); 3 | CREATE TABLE c (a int) USING columnstore; 4 | INSERT INTO c VALUES (1), (2), (3); 5 | CREATE TABLE t AS SELECT * FROM r; 6 | SELECT * FROM t; 7 | a 8 | --- 9 | 1 10 | 2 11 | 3 12 | (3 rows) 13 | 14 | DROP TABLE t; 15 | CREATE TABLE t AS SELECT * FROM c; 16 | SELECT * FROM t; 17 | a 18 | --- 19 | 1 20 | 2 21 | 3 22 | (3 rows) 23 | 24 | DROP TABLE t; 25 | CREATE TABLE t USING columnstore AS SELECT * FROM r; 26 | SELECT * FROM t; 27 | a 28 | --- 29 | 1 30 | 2 31 | 3 32 | (3 rows) 33 | 34 | DROP TABLE t; 35 | CREATE TABLE t USING columnstore AS SELECT * FROM c; 36 | SELECT * FROM t; 37 | a 38 | --- 39 | 1 40 | 2 41 | 3 42 | (3 rows) 43 | 44 | DROP TABLE t; 45 | CREATE TABLE t USING columnstore AS SELECT * FROM r WITH NO DATA; 46 | SELECT * FROM t; 47 | a 48 | --- 49 | (0 rows) 50 | 51 | DROP TABLE t; 52 | CREATE TABLE t USING columnstore AS TABLE r; 53 | SELECT * FROM t; 54 | a 55 | --- 56 | 1 57 | 2 58 | 3 59 | (3 rows) 60 | 61 | DROP TABLE t; 62 | CREATE TABLE t USING columnstore AS VALUES (123, 'abc'); 63 | SELECT * FROM t; 64 | column1 | column2 65 | ---------+--------- 66 | 123 | abc 67 | (1 row) 68 | 69 | DROP TABLE t; 70 | DROP TABLE r, c; 71 | -------------------------------------------------------------------------------- /test/expected/cursor.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int); 2 | INSERT INTO t VALUES (1); 3 | BEGIN; 4 | DECLARE c SCROLL CURSOR FOR SELECT a FROM t; 5 | FETCH NEXT FROM c; 6 | a 7 | --- 8 | 1 9 | (1 row) 10 | 11 | FETCH NEXT FROM c; 12 | a 13 | --- 14 | (0 rows) 15 | 16 | FETCH PRIOR FROM c; 17 | a 18 | --- 19 | 1 20 | (1 row) 21 | 22 | COMMIT; 23 | DROP TABLE t; 24 | CREATE TABLE t (a int) USING columnstore; 25 | INSERT INTO t VALUES (1); 26 | BEGIN; 27 | DECLARE c SCROLL CURSOR FOR SELECT a FROM t; 28 | FETCH NEXT FROM c; 29 | a 30 | --- 31 | 1 32 | (1 row) 33 | 34 | FETCH NEXT FROM c; 35 | a 36 | --- 37 | (0 rows) 38 | 39 | FETCH PRIOR FROM c; 40 | a 41 | --- 42 | 1 43 | (1 row) 44 | 45 | COMMIT; 46 | DROP TABLE t; 47 | -------------------------------------------------------------------------------- /test/expected/non_superuser.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | INSERT INTO t VALUES (1), (2), (3); 3 | CREATE USER user1; 4 | GRANT SELECT ON t TO user1; 5 | SET ROLE user1; 6 | SELECT * FROM t; 7 | a 8 | --- 9 | 1 10 | 2 11 | 3 12 | (3 rows) 13 | 14 | RESET ROLE; 15 | DROP TABLE t; 16 | DROP USER user1; 17 | -------------------------------------------------------------------------------- /test/expected/returning.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | INSERT INTO t VALUES (1), (2), (3); 3 | INSERT INTO t VALUES (123) RETURNING a + 1; 4 | ?column? 5 | ---------- 6 | 124 7 | (1 row) 8 | 9 | UPDATE t SET a = -a RETURNING a + 1; 10 | ?column? 11 | ---------- 12 | 0 13 | -1 14 | -2 15 | -122 16 | (4 rows) 17 | 18 | DELETE FROM t RETURNING a + 1; 19 | ?column? 20 | ---------- 21 | 0 22 | -1 23 | -2 24 | -122 25 | (4 rows) 26 | 27 | DROP TABLE t; 28 | -------------------------------------------------------------------------------- /test/expected/sanity.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text) USING columnstore; 2 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); 3 | INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); 4 | UPDATE t SET b = a + 1 WHERE a > 3; 5 | DELETE FROM t WHERE a < 3; 6 | INSERT INTO t SELECT 2, b FROM t WHERE a = 3; 7 | SELECT * FROM t; 8 | a | b 9 | ---+--- 10 | 4 | 5 11 | 5 | 6 12 | 4 | 5 13 | 3 | c 14 | 3 | g 15 | 2 | c 16 | 2 | g 17 | (7 rows) 18 | 19 | DROP TABLE t; 20 | CREATE TABLE t (a int) USING columnstore; 21 | ANALYZE t; 22 | BEGIN; 23 | INSERT INTO t VALUES (123); 24 | SELECT * FROM t; 25 | a 26 | ----- 27 | 123 28 | (1 row) 29 | 30 | ROLLBACK; 31 | SELECT * FROM t; 32 | a 33 | --- 34 | (0 rows) 35 | 36 | BEGIN; 37 | INSERT INTO t VALUES (123); 38 | SELECT * FROM t; 39 | a 40 | ----- 41 | 123 42 | (1 row) 43 | 44 | COMMIT; 45 | SELECT * FROM t; 46 | a 47 | ----- 48 | 123 49 | (1 row) 50 | 51 | DROP TABLE t; 52 | -------------------------------------------------------------------------------- /test/expected/segment_elimination.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text) USING columnstore; 2 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'); 3 | INSERT INTO t VALUES (123, 'abc'); 4 | UPDATE t SET b = 'def' WHERE a = 123; 5 | SELECT * FROM t; 6 | a | b 7 | -----+----- 8 | 1 | a 9 | 2 | b 10 | 3 | c 11 | 123 | def 12 | (4 rows) 13 | 14 | DROP TABLE t; 15 | -------------------------------------------------------------------------------- /test/expected/transaction.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE s (a int); 2 | CREATE TABLE t (b int) USING columnstore; 3 | BEGIN; 4 | INSERT INTO s VALUES (1); 5 | INSERT INTO t VALUES (2); 6 | COMMIT; 7 | SELECT * FROM s; 8 | a 9 | --- 10 | 1 11 | (1 row) 12 | 13 | SELECT * FROM t; 14 | b 15 | --- 16 | 2 17 | (1 row) 18 | 19 | DROP TABLE s, t; 20 | -------------------------------------------------------------------------------- /test/expected/trigger.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text); 2 | CREATE TABLE t_log (operation text, a int, b text) USING COLUMNSTORE; 3 | CREATE FUNCTION t_trigger_handler() 4 | RETURNS TRIGGER LANGUAGE 'plpgsql' 5 | AS 6 | $func$ 7 | BEGIN 8 | IF TG_OP = 'INSERT' THEN 9 | INSERT INTO t_log VALUES ('INSERT', NEW.a, NEW.b); 10 | ELSIF TG_OP = 'DELETE' THEN 11 | INSERT INTO t_log VALUES ('DELETE', OLD.a, OLD.b); 12 | ELSIF TG_OP = 'UPDATE' THEN 13 | INSERT INTO t_log VALUES ('DELETE', OLD.a, OLD.b), ('INSERT', NEW.a, NEW.b); 14 | END IF; 15 | RETURN NEW; 16 | END; 17 | $func$; 18 | CREATE TRIGGER t_trigger 19 | AFTER INSERT OR DELETE OR UPDATE ON t 20 | FOR EACH ROW 21 | EXECUTE FUNCTION t_trigger_handler(); 22 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); 23 | INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); 24 | UPDATE t SET b = a + 1 WHERE a > 3; 25 | DELETE FROM t WHERE a < 3; 26 | SELECT * FROM t; 27 | a | b 28 | ---+--- 29 | 3 | c 30 | 3 | g 31 | 4 | 5 32 | 5 | 6 33 | 4 | 5 34 | (5 rows) 35 | 36 | SELECT * FROM t_log; 37 | operation | a | b 38 | -----------+---+--- 39 | INSERT | 1 | a 40 | INSERT | 2 | b 41 | INSERT | 3 | c 42 | INSERT | 4 | d 43 | INSERT | 5 | e 44 | INSERT | 2 | f 45 | INSERT | 3 | g 46 | INSERT | 4 | h 47 | DELETE | 4 | d 48 | INSERT | 4 | 5 49 | DELETE | 5 | e 50 | INSERT | 5 | 6 51 | DELETE | 4 | h 52 | INSERT | 4 | 5 53 | DELETE | 1 | a 54 | DELETE | 2 | b 55 | DELETE | 2 | f 56 | (17 rows) 57 | 58 | DROP TABLE t, t_log; 59 | DROP FUNCTION t_trigger_handler; 60 | -------------------------------------------------------------------------------- /test/expected/unsupported/alter.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | ALTER TABLE t ADD COLUMN b int; 3 | ERROR: ALTER TABLE on columnstore table is not supported 4 | DROP TABLE t; 5 | -------------------------------------------------------------------------------- /test/expected/unsupported/columns.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int GENERATED ALWAYS AS (b + 1) STORED, b int) USING columnstore; 2 | ERROR: unsupported generated column "a" 3 | -------------------------------------------------------------------------------- /test/expected/unsupported/types.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a jsonb) USING columnstore; 2 | ERROR: column "a" has unsupported type 3 | CREATE TYPE point AS (x int, y int); 4 | CREATE TABLE t (a point) USING columnstore; 5 | ERROR: column "a" has unsupported type 6 | -------------------------------------------------------------------------------- /test/expected/unsupported/update.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b int) USING columnstore; 2 | UPDATE t SET (a, b) = (SELECT 1, 2); 3 | ERROR: DuckDB does not support UPDATE with multi-column assignment 4 | UPDATE t SET (b, a) = (SELECT 1, 2); 5 | ERROR: DuckDB does not support UPDATE with multi-column assignment 6 | DROP TABLE t; 7 | -------------------------------------------------------------------------------- /test/expected/update_delete_with_join.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE s (a int, b int); 2 | INSERT INTO s VALUES (1, 123), (1, 456); 3 | CREATE TABLE t (c int, d int) USING columnstore; 4 | INSERT INTO t VALUES (1, 0), (2, 0); 5 | UPDATE t SET d = b FROM s WHERE c = a; 6 | SELECT * FROM t; 7 | c | d 8 | ---+----- 9 | 1 | 123 10 | 2 | 0 11 | (2 rows) 12 | 13 | DELETE FROM t USING s WHERE c = a; 14 | SELECT * FROM t; 15 | c | d 16 | ---+--- 17 | 2 | 0 18 | (1 row) 19 | 20 | DROP TABLE s, t; 21 | -------------------------------------------------------------------------------- /test/sql/approx_count_distinct.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text) USING columnstore; 2 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); 3 | INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); 4 | SELECT mooncake.approx_count_distinct(a), mooncake.approx_count_distinct(b) FROM t; 5 | SELECT a, mooncake.approx_count_distinct(b) FROM t GROUP BY a ORDER BY a; 6 | SELECT a, mooncake.approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a; 7 | DROP TABLE t; 8 | -------------------------------------------------------------------------------- /test/sql/cardinality.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | INSERT INTO t SELECT * FROM generate_series(1, 100); 3 | EXPLAIN SELECT * FROM t; 4 | INSERT INTO t SELECT * FROM generate_series(1, 200); 5 | EXPLAIN SELECT * FROM t; 6 | DROP TABLE t; 7 | -------------------------------------------------------------------------------- /test/sql/columns.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int DEFAULT NULL, b int) USING columnstore; 2 | INSERT INTO t VALUES (DEFAULT, 1), (DEFAULT, 2); 3 | INSERT INTO t VALUES (3, 3), (4, 4); 4 | INSERT INTO t SELECT 5, 5; 5 | SELECT * FROM t; 6 | DROP TABLE t; 7 | 8 | CREATE TABLE t (a int DEFAULT 3, b int) USING columnstore; 9 | INSERT INTO t VALUES (DEFAULT, 1), (DEFAULT, 2); 10 | INSERT INTO t VALUES (3, 3), (4, 4); 11 | INSERT INTO t SELECT 5, 5; 12 | SELECT * FROM t; 13 | DROP TABLE t; 14 | 15 | CREATE TABLE t (a int DEFAULT 1 + 2, b int) USING columnstore; 16 | INSERT INTO t (b) VALUES (1), (2); 17 | DROP TABLE t; 18 | 19 | CREATE TABLE t (a int GENERATED ALWAYS AS IDENTITY, b int) USING columnstore; 20 | INSERT INTO t (b) VALUES (1), (2); 21 | INSERT INTO t (b) SELECT 3; 22 | SELECT * FROM t; 23 | DROP TABLE t; 24 | 25 | CREATE TABLE t (a int) USING columnstore; 26 | INSERT INTO t VALUES (1), (2); 27 | CREATE TABLE r (b int DEFAULT 1 + 2); 28 | INSERT INTO r VALUES (1), (2); 29 | SELECT * FROM t JOIN r ON a = b; 30 | DROP TABLE t, r; 31 | -------------------------------------------------------------------------------- /test/sql/constraints.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b int NOT NULL, c text NOT NULL) USING columnstore; 2 | INSERT INTO t VALUES (1, 2, 'a'), (3, 4, 'b'), (5, 6, 'c'); 3 | INSERT INTO t VALUES (7, NULL, 'd'); 4 | INSERT INTO t (a, b) VALUES (7, 8); 5 | INSERT INTO t (b, c) VALUES (8, 'd'); 6 | UPDATE t SET b = 0 WHERE a = 1; 7 | UPDATE t SET c = NULL WHERE a = 1; 8 | SELECT * FROM t; 9 | DROP TABLE t; 10 | -------------------------------------------------------------------------------- /test/sql/copy.sql: -------------------------------------------------------------------------------- 1 | \set pwd `pwd` 2 | \set csv_file '\'' :pwd '/results/copy.csv' '\'' 3 | 4 | COPY (SELECT * FROM generate_series(1, 5)) TO :csv_file; 5 | 6 | CREATE TABLE t (a int) USING columnstore; 7 | COPY t FROM :csv_file; 8 | SELECT * FROM t; 9 | 10 | COPY t TO :csv_file; 11 | COPY (SELECT * FROM t) TO :csv_file; 12 | 13 | DROP TABLE t; 14 | -------------------------------------------------------------------------------- /test/sql/create_table_as.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE r (a int); 2 | INSERT INTO r VALUES (1), (2), (3); 3 | 4 | CREATE TABLE c (a int) USING columnstore; 5 | INSERT INTO c VALUES (1), (2), (3); 6 | 7 | CREATE TABLE t AS SELECT * FROM r; 8 | SELECT * FROM t; 9 | DROP TABLE t; 10 | 11 | CREATE TABLE t AS SELECT * FROM c; 12 | SELECT * FROM t; 13 | DROP TABLE t; 14 | 15 | CREATE TABLE t USING columnstore AS SELECT * FROM r; 16 | SELECT * FROM t; 17 | DROP TABLE t; 18 | 19 | CREATE TABLE t USING columnstore AS SELECT * FROM c; 20 | SELECT * FROM t; 21 | DROP TABLE t; 22 | 23 | CREATE TABLE t USING columnstore AS SELECT * FROM r WITH NO DATA; 24 | SELECT * FROM t; 25 | DROP TABLE t; 26 | 27 | CREATE TABLE t USING columnstore AS TABLE r; 28 | SELECT * FROM t; 29 | DROP TABLE t; 30 | 31 | CREATE TABLE t USING columnstore AS VALUES (123, 'abc'); 32 | SELECT * FROM t; 33 | DROP TABLE t; 34 | 35 | DROP TABLE r, c; 36 | -------------------------------------------------------------------------------- /test/sql/cursor.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int); 2 | INSERT INTO t VALUES (1); 3 | 4 | BEGIN; 5 | DECLARE c SCROLL CURSOR FOR SELECT a FROM t; 6 | FETCH NEXT FROM c; 7 | FETCH NEXT FROM c; 8 | FETCH PRIOR FROM c; 9 | COMMIT; 10 | 11 | DROP TABLE t; 12 | 13 | CREATE TABLE t (a int) USING columnstore; 14 | INSERT INTO t VALUES (1); 15 | 16 | BEGIN; 17 | DECLARE c SCROLL CURSOR FOR SELECT a FROM t; 18 | FETCH NEXT FROM c; 19 | FETCH NEXT FROM c; 20 | FETCH PRIOR FROM c; 21 | COMMIT; 22 | 23 | DROP TABLE t; 24 | -------------------------------------------------------------------------------- /test/sql/non_superuser.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | INSERT INTO t VALUES (1), (2), (3); 3 | CREATE USER user1; 4 | GRANT SELECT ON t TO user1; 5 | SET ROLE user1; 6 | SELECT * FROM t; 7 | RESET ROLE; 8 | DROP TABLE t; 9 | DROP USER user1; 10 | -------------------------------------------------------------------------------- /test/sql/returning.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | INSERT INTO t VALUES (1), (2), (3); 3 | 4 | INSERT INTO t VALUES (123) RETURNING a + 1; 5 | UPDATE t SET a = -a RETURNING a + 1; 6 | DELETE FROM t RETURNING a + 1; 7 | 8 | DROP TABLE t; 9 | -------------------------------------------------------------------------------- /test/sql/sanity.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text) USING columnstore; 2 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); 3 | INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); 4 | UPDATE t SET b = a + 1 WHERE a > 3; 5 | DELETE FROM t WHERE a < 3; 6 | INSERT INTO t SELECT 2, b FROM t WHERE a = 3; 7 | SELECT * FROM t; 8 | DROP TABLE t; 9 | 10 | CREATE TABLE t (a int) USING columnstore; 11 | ANALYZE t; 12 | 13 | BEGIN; 14 | INSERT INTO t VALUES (123); 15 | SELECT * FROM t; 16 | ROLLBACK; 17 | SELECT * FROM t; 18 | 19 | BEGIN; 20 | INSERT INTO t VALUES (123); 21 | SELECT * FROM t; 22 | COMMIT; 23 | SELECT * FROM t; 24 | 25 | DROP TABLE t; 26 | -------------------------------------------------------------------------------- /test/sql/segment_elimination.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text) USING columnstore; 2 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'); 3 | INSERT INTO t VALUES (123, 'abc'); 4 | UPDATE t SET b = 'def' WHERE a = 123; 5 | SELECT * FROM t; 6 | DROP TABLE t; 7 | -------------------------------------------------------------------------------- /test/sql/transaction.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE s (a int); 2 | CREATE TABLE t (b int) USING columnstore; 3 | 4 | BEGIN; 5 | INSERT INTO s VALUES (1); 6 | INSERT INTO t VALUES (2); 7 | COMMIT; 8 | 9 | SELECT * FROM s; 10 | SELECT * FROM t; 11 | 12 | DROP TABLE s, t; 13 | -------------------------------------------------------------------------------- /test/sql/trigger.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b text); 2 | CREATE TABLE t_log (operation text, a int, b text) USING COLUMNSTORE; 3 | 4 | CREATE FUNCTION t_trigger_handler() 5 | RETURNS TRIGGER LANGUAGE 'plpgsql' 6 | AS 7 | $func$ 8 | BEGIN 9 | IF TG_OP = 'INSERT' THEN 10 | INSERT INTO t_log VALUES ('INSERT', NEW.a, NEW.b); 11 | ELSIF TG_OP = 'DELETE' THEN 12 | INSERT INTO t_log VALUES ('DELETE', OLD.a, OLD.b); 13 | ELSIF TG_OP = 'UPDATE' THEN 14 | INSERT INTO t_log VALUES ('DELETE', OLD.a, OLD.b), ('INSERT', NEW.a, NEW.b); 15 | END IF; 16 | RETURN NEW; 17 | END; 18 | $func$; 19 | 20 | CREATE TRIGGER t_trigger 21 | AFTER INSERT OR DELETE OR UPDATE ON t 22 | FOR EACH ROW 23 | EXECUTE FUNCTION t_trigger_handler(); 24 | 25 | INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'); 26 | INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h'); 27 | UPDATE t SET b = a + 1 WHERE a > 3; 28 | DELETE FROM t WHERE a < 3; 29 | 30 | SELECT * FROM t; 31 | SELECT * FROM t_log; 32 | 33 | DROP TABLE t, t_log; 34 | DROP FUNCTION t_trigger_handler; 35 | -------------------------------------------------------------------------------- /test/sql/unsupported/alter.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int) USING columnstore; 2 | ALTER TABLE t ADD COLUMN b int; 3 | DROP TABLE t; 4 | -------------------------------------------------------------------------------- /test/sql/unsupported/columns.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int GENERATED ALWAYS AS (b + 1) STORED, b int) USING columnstore; 2 | -------------------------------------------------------------------------------- /test/sql/unsupported/types.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a jsonb) USING columnstore; 2 | 3 | CREATE TYPE point AS (x int, y int); 4 | CREATE TABLE t (a point) USING columnstore; 5 | -------------------------------------------------------------------------------- /test/sql/unsupported/update.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE t (a int, b int) USING columnstore; 2 | UPDATE t SET (a, b) = (SELECT 1, 2); 3 | UPDATE t SET (b, a) = (SELECT 1, 2); 4 | DROP TABLE t; 5 | -------------------------------------------------------------------------------- /test/sql/update_delete_with_join.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE s (a int, b int); 2 | INSERT INTO s VALUES (1, 123), (1, 456); 3 | 4 | CREATE TABLE t (c int, d int) USING columnstore; 5 | INSERT INTO t VALUES (1, 0), (2, 0); 6 | 7 | UPDATE t SET d = b FROM s WHERE c = a; 8 | SELECT * FROM t; 9 | 10 | DELETE FROM t USING s WHERE c = a; 11 | SELECT * FROM t; 12 | 13 | DROP TABLE s, t; 14 | -------------------------------------------------------------------------------- /third_party/pg_mooncake_extensions.cmake: -------------------------------------------------------------------------------- 1 | duckdb_extension_load(httpfs 2 | GIT_URL https://github.com/duckdb/duckdb-httpfs 3 | GIT_TAG 85ac4667bcb0d868199e156f8dd918b0278db7b9 4 | INCLUDE_DIR extension/httpfs/include 5 | ) 6 | duckdb_extension_load(icu) 7 | duckdb_extension_load(json) 8 | --------------------------------------------------------------------------------