├── NOTICE ├── .gitignore ├── log_fdw.conf ├── log_fdw.control ├── changelog.md ├── .github ├── PULL_REQUEST_TEMPLATE.md ├── ISSUE_TEMPLATE │ ├── support.md │ ├── feature_request.md │ └── bug_report.md └── workflows │ ├── semgrep.yml │ └── build_and_test.yml ├── CODE_OF_CONDUCT.md ├── .editorconfig ├── Makefile ├── log_fdw--1.4--1.5.sql ├── CONTRIBUTING.md ├── log_fdw--1.4.sql ├── sql └── log_fdw.sql ├── expected └── log_fdw.out ├── README.md ├── LICENSE └── log_fdw.c /NOTICE: -------------------------------------------------------------------------------- 1 | log_fdw 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated subdirectories and files 2 | /log/ 3 | /results/ 4 | /tmp_check/ 5 | *.so 6 | *.o 7 | -------------------------------------------------------------------------------- /log_fdw.conf: -------------------------------------------------------------------------------- 1 | logging_collector = on 2 | log_destination = 'stderr, csvlog' 3 | # these ensure stability of test results: 4 | log_rotation_age = 0 5 | lc_messages = 'C' 6 | -------------------------------------------------------------------------------- /log_fdw.control: -------------------------------------------------------------------------------- 1 | # log_fdw extension 2 | comment = 'foreign-data wrapper for Postgres log file access' 3 | default_version = '1.4' 4 | module_pathname = '$libdir/log_fdw' 5 | relocatable = true 6 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | 6 | ## [Unreleased] -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Issue #, if available: 2 | 3 | Description of changes: 4 | 5 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/support.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General 3 | about: Have a general question? We're here to help. 4 | title: '' 5 | labels: general 6 | assignees: '' 7 | --- 8 | 9 | ## Describe the problem 10 | 11 | * What are you trying to solve? 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | trim_trailing_whitespace = true 8 | 9 | [*.out] 10 | trim_trailing_whitespace = false 11 | 12 | [*.{c,h}] 13 | indent_style = space 14 | indent_size = 4 15 | 16 | [*.{sql,sql.in}] 17 | indent_style = space 18 | indent_size = 2 19 | 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Have an idea for a new log_fdw feature? 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | --- 8 | 9 | ## Describe the problem 10 | 11 | * What are you trying to solve? 12 | 13 | ## Describe the proposal 14 | 15 | * What is the feature you are proposing? 16 | * How would it solve the problem? 17 | 18 | ## Describe alternatives 19 | 20 | * How would you solve the problem today if the feature is not built? 21 | * What other alternatives are there? 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Report a bug 3 | about: Found a bug? Let us know! 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | --- 8 | 9 | ## Description 10 | 11 | A brief description on what the bug is. 12 | 13 | ## Steps to reproduce 14 | 15 | 1. Step 1... 16 | 1. Step 2... 17 | 18 | ## Expected outcome 19 | 20 | A description of the expected outcome. 21 | 22 | ## Actual outcome 23 | 24 | A description of what the outcome should be 25 | 26 | ## Analysis 27 | 28 | Any additional thoughts you have on the issue, or a recommended solution. 29 | 30 | If applicable, please provide logs that demonstrate the issue. 31 | Please remove any sensitive information from the logs. 32 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # postgresql-logfdw/Makefile 2 | 3 | MODULES = log_fdw 4 | 5 | EXTENSION = log_fdw 6 | DATA = log_fdw--1.4.sql log_fdw--1.4--1.5.sql 7 | PGFILEDESC = "log_fdw - foreign data wrapper for Postgres log files" 8 | 9 | REGRESS = log_fdw 10 | 11 | REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/postgresql-logfdw/log_fdw.conf 12 | 13 | # Disabled because these tests require extra parameters to be set 14 | # (see log_fdw.conf), which some installcheck users do not have 15 | # (e.g. buildfarm clients). 16 | NO_INSTALLCHECK = 1 17 | 18 | ifdef USE_PGXS 19 | PG_CONFIG = pg_config 20 | PGXS := $(shell $(PG_CONFIG) --pgxs) 21 | include $(PGXS) 22 | else 23 | subdir = contrib/postgresql-logfdw 24 | top_builddir = ../.. 25 | include $(top_builddir)/src/Makefile.global 26 | include $(top_srcdir)/contrib/contrib-global.mk 27 | endif 28 | -------------------------------------------------------------------------------- /.github/workflows/semgrep.yml: -------------------------------------------------------------------------------- 1 | # Name of this GitHub Actions workflow. 2 | name: Semgrep 3 | 4 | on: 5 | # Scan changed files in PRs (diff-aware scanning): 6 | pull_request: {} 7 | # Scan mainline branches and report all findings: 8 | push: 9 | branches: ["main"] 10 | # Schedule the CI job (this method uses cron syntax): 11 | schedule: 12 | - cron: '20 17 * * *' # Sets Semgrep to scan every day at 17:20 UTC. 13 | # It is recommended to change the schedule to a random time. 14 | 15 | jobs: 16 | semgrep: 17 | # User-definable name of this GitHub Actions job: 18 | name: Scan 19 | # If you are self-hosting, change the following `runs-on` value: 20 | runs-on: ubuntu-latest 21 | 22 | container: 23 | # A Docker image with Semgrep installed. Do not change this. 24 | image: returntocorp/semgrep 25 | 26 | # Skip any PR created by dependabot to avoid permission issues: 27 | if: (github.actor != 'dependabot[bot]') 28 | 29 | steps: 30 | # Fetch project source with GitHub Actions Checkout. 31 | - uses: actions/checkout@v3 32 | # Run the "semgrep ci" command on the command line of the docker image. 33 | - run: semgrep ci 34 | env: 35 | # Add the rules that Semgrep uses by setting the SEMGREP_RULES environment variable. 36 | SEMGREP_RULES: p/default # more at semgrep.dev/explore -------------------------------------------------------------------------------- /log_fdw--1.4--1.5.sql: -------------------------------------------------------------------------------- 1 | /* log_fdw--1.4--1.5 */ 2 | 3 | /* 4 | * Creates foreign table for a given server log file. 5 | */ 6 | CREATE OR REPLACE FUNCTION create_foreign_table_for_log_file( 7 | table_name TEXT, 8 | server_name TEXT, 9 | log_file_name TEXT, 10 | if_not_exists BOOL) 11 | RETURNS void AS 12 | $BODY$ 13 | DECLARE 14 | l_exists_str text := ''; 15 | BEGIN 16 | IF if_not_exists 17 | THEN 18 | l_exists_str := 'IF NOT EXISTS'; 19 | END IF; 20 | 21 | IF $3 LIKE '%.csv' or $3 LIKE '%.csv.gz' 22 | THEN 23 | EXECUTE format('CREATE FOREIGN TABLE %s %I ( 24 | log_time timestamp(3) with time zone, 25 | user_name text, 26 | database_name text, 27 | process_id integer, 28 | connection_from text, 29 | session_id text, 30 | session_line_num bigint, 31 | command_tag text, 32 | session_start_time timestamp with time zone, 33 | virtual_transaction_id text, 34 | transaction_id bigint, 35 | error_severity text, 36 | sql_state_code text, 37 | message text, 38 | detail text, 39 | hint text, 40 | internal_query text, 41 | internal_query_pos integer, 42 | context text, 43 | query text, 44 | query_pos integer, 45 | location text, 46 | application_name text, 47 | backend_type text, 48 | leader_pid integer, 49 | query_id bigint 50 | ) SERVER %I 51 | OPTIONS (filename %L)', 52 | l_exists_str, $1, $2, $3); 53 | ELSE 54 | EXECUTE format('CREATE FOREIGN TABLE %s %I ( 55 | log_entry text 56 | ) SERVER %I 57 | OPTIONS (filename %L)', 58 | l_exists_str, $1, $2, $3); 59 | END IF; 60 | END 61 | $BODY$ 62 | LANGUAGE plpgsql; 63 | 64 | CREATE OR REPLACE FUNCTION create_foreign_table_for_log_file( 65 | table_name TEXT, 66 | server_name TEXT, 67 | log_file_name TEXT) 68 | RETURNS void 69 | LANGUAGE sql 70 | BEGIN ATOMIC 71 | SELECT create_foreign_table_for_log_file(table_name, server_name, log_file_name, false); 72 | END; 73 | -------------------------------------------------------------------------------- /.github/workflows/build_and_test.yml: -------------------------------------------------------------------------------- 1 | name: log_fdw CI 2 | on: 3 | #schedule: 4 | # # Runs at 00:00 UTC on every Sunday. 5 | # - cron: '0 0 * * Sun' 6 | push: 7 | pull_request: 8 | jobs: 9 | build-and-test: 10 | strategy: 11 | matrix: 12 | os: [ubuntu-latest] 13 | version: [master, REL_17_STABLE, REL_16_STABLE, REL_15_STABLE, REL_14_STABLE] 14 | runs-on: ${{ matrix.os }} 15 | timeout-minutes: 10 16 | 17 | steps: 18 | - name: Test details 19 | run: echo Build and test log_fdw on ${{ matrix.os }} with PostgreSQL ${{ matrix.version }} branch 20 | 21 | - name: Checkout and build PostgreSQL code 22 | run: | 23 | sudo apt-get update -qq 24 | sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 25 | sudo apt-get install -qq acl bison flex libssl-dev libreadline-dev 26 | rm -rf postgres 27 | git clone --branch ${{ matrix.version }} https://github.com/postgres/postgres.git 28 | pushd postgres 29 | git branch 30 | ./configure --prefix=$PWD/inst/ 31 | make -j4 install 32 | 33 | # Here we just initialize the cluster and see if postgres comes up 34 | # and stop. We could further run basic postgres tests with 35 | # "make installcheck", but that's not the intention of this test. 36 | cd inst/bin 37 | ./initdb -D data 38 | ./pg_ctl -D data -l logfile start 39 | ./pg_ctl -D data -l logfile stop 40 | cd ../.. 41 | 42 | # Create the directory for log_fdw extension checkout. 43 | mkdir contrib/postgresql-logfdw 44 | popd 45 | 46 | - name: Checkout logfdw extension code 47 | uses: actions/checkout@v3 48 | with: 49 | path: postgres/contrib/postgresql-logfdw 50 | 51 | - name: Build and test logfdw extension 52 | run: | 53 | pushd postgres 54 | cd contrib/postgresql-logfdw 55 | make check || (cat regression.diffs && false) 56 | popd 57 | # Clean the directory 58 | rm -rf postgres 59 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /log_fdw--1.4.sql: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"). 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION 18 | \echo Use "ALTER EXTENSION log_fdw UPDATE TO '1.4'" to load this file. \quit 19 | 20 | CREATE FUNCTION log_fdw_handler() 21 | RETURNS fdw_handler 22 | AS 'MODULE_PATHNAME' 23 | LANGUAGE C STRICT; 24 | 25 | CREATE FUNCTION log_fdw_validator(text[], oid) 26 | RETURNS void 27 | AS 'MODULE_PATHNAME' 28 | LANGUAGE C STRICT; 29 | 30 | CREATE FOREIGN DATA WRAPPER log_fdw 31 | HANDLER log_fdw_handler 32 | VALIDATOR log_fdw_validator; 33 | 34 | /* 35 | * Although pg_ls_logdir() can be used instead of this function, we maintain 36 | * list_postgres_log_files() as a wrapper around that to not to break 37 | * applications built around log_fdw extension. 38 | */ 39 | CREATE OR REPLACE FUNCTION list_postgres_log_files( 40 | OUT file_name TEXT, 41 | OUT file_size_bytes BIGINT) 42 | RETURNS SETOF record 43 | LANGUAGE sql 44 | VOLATILE PARALLEL SAFE STRICT 45 | BEGIN ATOMIC 46 | SELECT name AS file_name, size AS file_size_bytes FROM pg_ls_logdir(); 47 | END; 48 | 49 | /* 50 | * Creates foreign table for a given server log file. 51 | */ 52 | CREATE OR REPLACE FUNCTION create_foreign_table_for_log_file( 53 | table_name TEXT, 54 | server_name TEXT, 55 | log_file_name TEXT) 56 | RETURNS void AS 57 | $BODY$ 58 | BEGIN 59 | IF $3 LIKE '%.csv' or $3 LIKE '%.csv.gz' 60 | THEN 61 | EXECUTE format('CREATE FOREIGN TABLE %I ( 62 | log_time timestamp(3) with time zone, 63 | user_name text, 64 | database_name text, 65 | process_id integer, 66 | connection_from text, 67 | session_id text, 68 | session_line_num bigint, 69 | command_tag text, 70 | session_start_time timestamp with time zone, 71 | virtual_transaction_id text, 72 | transaction_id bigint, 73 | error_severity text, 74 | sql_state_code text, 75 | message text, 76 | detail text, 77 | hint text, 78 | internal_query text, 79 | internal_query_pos integer, 80 | context text, 81 | query text, 82 | query_pos integer, 83 | location text, 84 | application_name text, 85 | backend_type text, 86 | leader_pid integer, 87 | query_id bigint 88 | ) SERVER %I 89 | OPTIONS (filename %L)', 90 | $1, $2, $3); 91 | ELSE 92 | EXECUTE format('CREATE FOREIGN TABLE %I ( 93 | log_entry text 94 | ) SERVER %I 95 | OPTIONS (filename %L)', 96 | $1, $2, $3); 97 | END IF; 98 | END 99 | $BODY$ 100 | LANGUAGE plpgsql; 101 | 102 | REVOKE ALL ON FUNCTION log_fdw_handler() FROM PUBLIC; 103 | REVOKE ALL ON FUNCTION log_fdw_validator(text[], oid) FROM PUBLIC; 104 | REVOKE ALL ON FOREIGN DATA WRAPPER log_fdw FROM PUBLIC; 105 | REVOKE ALL ON FUNCTION list_postgres_log_files() FROM PUBLIC; 106 | REVOKE ALL ON FUNCTION create_foreign_table_for_log_file(text, text, text) FROM PUBLIC; 107 | -------------------------------------------------------------------------------- /sql/log_fdw.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- Test foreign-data wrapper log_fdw. 3 | -- 4 | 5 | -- Install log_fdw 6 | CREATE EXTENSION log_fdw; 7 | 8 | -- Create foreign server 9 | CREATE SERVER log_fdw_server FOREIGN DATA WRAPPER log_fdw; 10 | 11 | -- Invalid option name 12 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server OPTIONS (foo 'bar'); -- ERROR 13 | 14 | -- Invalid option value i.e. absolute path as value 15 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server OPTIONS (filename '/foo/bar'); -- ERROR 16 | 17 | -- Option provided more than once 18 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server OPTIONS (filename 'foo', filename 'bar'); -- ERROR 19 | 20 | -- Provided no option 21 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server; -- ERROR 22 | 23 | -- Check permissions 24 | CREATE ROLE regress_log_fdw_nsuperuser; 25 | 26 | -- A non-superuser has no permission to use log_fdw by default 27 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 28 | 'create_foreign_table_for_log_file(text, text, text)', 'EXECUTE'); -- no 29 | 30 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 31 | 'list_postgres_log_files()', 'EXECUTE'); -- no 32 | 33 | -- A non-superuser can be granted permission to use log_fdw 34 | GRANT EXECUTE ON FUNCTION create_foreign_table_for_log_file(text, text, text) 35 | TO regress_log_fdw_nsuperuser; 36 | 37 | GRANT EXECUTE ON FUNCTION list_postgres_log_files() 38 | TO regress_log_fdw_nsuperuser; 39 | 40 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 41 | 'create_foreign_table_for_log_file(text, text, text)', 'EXECUTE'); -- yes 42 | 43 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 44 | 'list_postgres_log_files()', 'EXECUTE'); -- yes 45 | 46 | -- Ensure that something gets logged to the server log file 47 | SELECT 1/0; 48 | 49 | -- Ensure there exists at least one server log file to work on 50 | SElECT pg_rotate_logfile(); 51 | DO $$ 52 | DECLARE 53 | log_file INT := 0; 54 | loops INT := 0; 55 | BEGIN 56 | LOOP 57 | log_file := COUNT(*) FROM list_postgres_log_files(); 58 | -- Check if both .csv and .log files get created 59 | IF log_file >= 2 OR loops > 180 * 100 THEN EXIT; END IF; 60 | PERFORM pg_sleep(0.01); 61 | loops := loops + 1; 62 | END LOOP; 63 | END 64 | $$; 65 | 66 | -- Check if .csv file gets created 67 | SELECT COUNT(*) > 0 AS OK FROM list_postgres_log_files() 68 | WHERE file_name LIKE '%.csv'; 69 | 70 | -- Check if .log file gets created 71 | SELECT COUNT(*) > 0 AS OK FROM list_postgres_log_files() 72 | WHERE file_name LIKE '%.log'; 73 | 74 | -- Get .csv server log file name and create a foreign table for it 75 | DO $$ 76 | DECLARE 77 | log_file_name TEXT; 78 | BEGIN 79 | log_file_name := file_name FROM list_postgres_log_files() 80 | WHERE file_name LIKE '%.csv' ORDER BY file_name ASC LIMIT 1; 81 | PERFORM create_foreign_table_for_log_file('log_fdw_ftbl_csv','log_fdw_server', log_file_name); 82 | END 83 | $$; 84 | 85 | -- Ensure that the foreign table gets created 86 | SELECT COUNT(*) > 0 AS OK FROM pg_foreign_table WHERE ftrelid = 'log_fdw_ftbl_csv'::regclass; 87 | 88 | -- Ensure that the foreign table has read data from .csv server log file 89 | SELECT COUNT(*) > 0 AS OK FROM log_fdw_ftbl_csv; 90 | 91 | -- Get .log server log file name and create a foreign table for it 92 | DO $$ 93 | DECLARE 94 | log_file_name TEXT; 95 | BEGIN 96 | log_file_name := file_name FROM list_postgres_log_files() 97 | WHERE file_name LIKE '%.log' ORDER BY file_name ASC LIMIT 1; 98 | PERFORM create_foreign_table_for_log_file('log_fdw_ftbl_log','log_fdw_server', log_file_name); 99 | END 100 | $$; 101 | 102 | -- Ensure that the foreign table gets created 103 | SELECT COUNT(*) > 0 AS OK FROM pg_foreign_table WHERE ftrelid = 'log_fdw_ftbl_log'::regclass; 104 | 105 | -- Ensure that the foreign table has read data from .log server log file 106 | SELECT COUNT(*) > 0 AS OK FROM log_fdw_ftbl_log; 107 | 108 | -- Clean up 109 | DROP EXTENSION log_fdw CASCADE; 110 | -------------------------------------------------------------------------------- /expected/log_fdw.out: -------------------------------------------------------------------------------- 1 | -- 2 | -- Test foreign-data wrapper log_fdw. 3 | -- 4 | -- Install log_fdw 5 | CREATE EXTENSION log_fdw; 6 | -- Create foreign server 7 | CREATE SERVER log_fdw_server FOREIGN DATA WRAPPER log_fdw; 8 | -- Invalid option name 9 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server OPTIONS (foo 'bar'); -- ERROR 10 | ERROR: invalid option "foo" 11 | HINT: Valid options in this context are: filename 12 | -- Invalid option value i.e. absolute path as value 13 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server OPTIONS (filename '/foo/bar'); -- ERROR 14 | ERROR: absolute path is not allowed as filename for log_fdw foreign tables 15 | -- Option provided more than once 16 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server OPTIONS (filename 'foo', filename 'bar'); -- ERROR 17 | ERROR: option "filename" provided more than once 18 | -- Provided no option 19 | CREATE FOREIGN TABLE log_fdw_ftbl_err () SERVER log_fdw_server; -- ERROR 20 | ERROR: filename is required for log_fdw foreign tables 21 | -- Check permissions 22 | CREATE ROLE regress_log_fdw_nsuperuser; 23 | -- A non-superuser has no permission to use log_fdw by default 24 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 25 | 'create_foreign_table_for_log_file(text, text, text)', 'EXECUTE'); -- no 26 | has_function_privilege 27 | ------------------------ 28 | f 29 | (1 row) 30 | 31 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 32 | 'list_postgres_log_files()', 'EXECUTE'); -- no 33 | has_function_privilege 34 | ------------------------ 35 | f 36 | (1 row) 37 | 38 | -- A non-superuser can be granted permission to use log_fdw 39 | GRANT EXECUTE ON FUNCTION create_foreign_table_for_log_file(text, text, text) 40 | TO regress_log_fdw_nsuperuser; 41 | GRANT EXECUTE ON FUNCTION list_postgres_log_files() 42 | TO regress_log_fdw_nsuperuser; 43 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 44 | 'create_foreign_table_for_log_file(text, text, text)', 'EXECUTE'); -- yes 45 | has_function_privilege 46 | ------------------------ 47 | t 48 | (1 row) 49 | 50 | SELECT has_function_privilege('regress_log_fdw_nsuperuser', 51 | 'list_postgres_log_files()', 'EXECUTE'); -- yes 52 | has_function_privilege 53 | ------------------------ 54 | t 55 | (1 row) 56 | 57 | -- Ensure that something gets logged to the server log file 58 | SELECT 1/0; 59 | ERROR: division by zero 60 | -- Ensure there exists at least one server log file to work on 61 | SElECT pg_rotate_logfile(); 62 | pg_rotate_logfile 63 | ------------------- 64 | t 65 | (1 row) 66 | 67 | DO $$ 68 | DECLARE 69 | log_file INT := 0; 70 | loops INT := 0; 71 | BEGIN 72 | LOOP 73 | log_file := COUNT(*) FROM list_postgres_log_files(); 74 | -- Check if both .csv and .log files get created 75 | IF log_file >= 2 OR loops > 180 * 100 THEN EXIT; END IF; 76 | PERFORM pg_sleep(0.01); 77 | loops := loops + 1; 78 | END LOOP; 79 | END 80 | $$; 81 | -- Check if .csv file gets created 82 | SELECT COUNT(*) > 0 AS OK FROM list_postgres_log_files() 83 | WHERE file_name LIKE '%.csv'; 84 | ok 85 | ---- 86 | t 87 | (1 row) 88 | 89 | -- Check if .log file gets created 90 | SELECT COUNT(*) > 0 AS OK FROM list_postgres_log_files() 91 | WHERE file_name LIKE '%.log'; 92 | ok 93 | ---- 94 | t 95 | (1 row) 96 | 97 | -- Get .csv server log file name and create a foreign table for it 98 | DO $$ 99 | DECLARE 100 | log_file_name TEXT; 101 | BEGIN 102 | log_file_name := file_name FROM list_postgres_log_files() 103 | WHERE file_name LIKE '%.csv' ORDER BY file_name ASC LIMIT 1; 104 | PERFORM create_foreign_table_for_log_file('log_fdw_ftbl_csv','log_fdw_server', log_file_name); 105 | END 106 | $$; 107 | -- Ensure that the foreign table gets created 108 | SELECT COUNT(*) > 0 AS OK FROM pg_foreign_table WHERE ftrelid = 'log_fdw_ftbl_csv'::regclass; 109 | ok 110 | ---- 111 | t 112 | (1 row) 113 | 114 | -- Ensure that the foreign table has read data from .csv server log file 115 | SELECT COUNT(*) > 0 AS OK FROM log_fdw_ftbl_csv; 116 | ok 117 | ---- 118 | t 119 | (1 row) 120 | 121 | -- Get .log server log file name and create a foreign table for it 122 | DO $$ 123 | DECLARE 124 | log_file_name TEXT; 125 | BEGIN 126 | log_file_name := file_name FROM list_postgres_log_files() 127 | WHERE file_name LIKE '%.log' ORDER BY file_name ASC LIMIT 1; 128 | PERFORM create_foreign_table_for_log_file('log_fdw_ftbl_log','log_fdw_server', log_file_name); 129 | END 130 | $$; 131 | -- Ensure that the foreign table gets created 132 | SELECT COUNT(*) > 0 AS OK FROM pg_foreign_table WHERE ftrelid = 'log_fdw_ftbl_log'::regclass; 133 | ok 134 | ---- 135 | t 136 | (1 row) 137 | 138 | -- Ensure that the foreign table has read data from .log server log file 139 | SELECT COUNT(*) > 0 AS OK FROM log_fdw_ftbl_log; 140 | ok 141 | ---- 142 | t 143 | (1 row) 144 | 145 | -- Clean up 146 | DROP EXTENSION log_fdw CASCADE; 147 | NOTICE: drop cascades to 3 other objects 148 | DETAIL: drop cascades to server log_fdw_server 149 | drop cascades to foreign table log_fdw_ftbl_csv 150 | drop cascades to foreign table log_fdw_ftbl_log 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # log_fdw 2 | 3 | This is a PostgreSQL extension built using Foreign-Data Wrapper facility to 4 | enable reading log files via SQL. It basically provides SQL interface to create 5 | foreign tables for each PostgreSQL log file through which the file contents can 6 | be read and analyzed. Only superusers are allowed to create this extension. 7 | 8 | ## SQL functions 9 | To create foreign table, use: 10 | ``` 11 | create_foreign_table_for_log_file(IN table_name TEXT, IN server_name TEXT, IN log_file_name TEXT) 12 | ``` 13 | To list files and their sizes present in PostgreSQL log directory, use: 14 | ``` 15 | list_postgres_log_files(OUT file_name TEXT, OUT file_size_bytes BIGINT) 16 | ``` 17 | Note that `list_postgres_log_files()` function is a wrapper around PostgreSQL's 18 | core function [pg_ls_logdir](https://www.postgresql.org/docs/current/functions-admin.html#FUNCTIONS-ADMIN-GENFILE) 19 | and exists for compatibility reasons. 20 | 21 | By default, use of this extension's functions is restricted to superusers. 22 | Access may be granted by superusers to others using GRANT as needed. 23 | For instance, following are the minimal things that one needs to do for 24 | enabling others to use the extension's functions: 25 | ``` 26 | CREATE ROLE foo; -- a non-superuser 27 | GRANT pg_monitor TO foo; -- do this only when list_postgres_log_files() is used because the underlying function pg_ls_logdir() needs it 28 | GRANT CREATE ON SCHEMA bar TO foo; -- to create foreign tables in schema named bar 29 | GRANT USAGE ON FOREIGN SERVER log_fdw_server TO foo; -- to use log_fdw foreign server 30 | SET ROLE foo; 31 | SELECT * FROM create_foreign_table_for_log_file('log_file_tbl', 'log_fdw_server', 'log_file.csv'); 32 | ``` 33 | 34 | ## Quick install instructions 35 | 36 | Clone the repository from https://github.com/aws/postgresql-logfdw: 37 | 38 | ``` 39 | git clone https://github.com/aws/postgresql-logfdw.git 40 | ``` 41 | 42 | The extension can be installed in two different ways. As a stand alone project, 43 | first set the `PATH` environment variable to point to `pg_config`. Then run the 44 | following in the postgresql-logfdw directory: 45 | 46 | ``` 47 | export USE_PGXS=1 48 | make 49 | make install 50 | ``` 51 | 52 | Alternatively, if the extension needs to be part of a larger PostgreSQL 53 | distrubution, the extension source code can be copied to the `contrib` directory 54 | under PostgreSQL source tree and installed from there. 55 | 56 | ## Usage 57 | 58 | ### Create extension: 59 | 60 | ``` 61 | postgres=# create extension log_fdw; 62 | CREATE EXTENSION 63 | ``` 64 | 65 | ### See functions created by extension: 66 | 67 | ``` 68 | postgres=# \df 69 | List of functions 70 | Schema | Name | Result data type | Argument data types | Type 71 | --------+-----------------------------------+------------------+------------------------------------------------------------------------------+------ 72 | public | create_foreign_table_for_log_file | void | table_name text, server_name text, log_file_name text | func 73 | public | create_foreign_table_for_log_file | void | table_name text, server_name text, log_file_name text, if_not_exists boolean | func 74 | public | list_postgres_log_files | SETOF record | OUT file_name text, OUT file_size_bytes bigint | func 75 | public | log_fdw_handler | fdw_handler | | func 76 | public | log_fdw_validator | void | text[], oid | func 77 | (5 rows) 78 | ``` 79 | 80 | ``` 81 | postgres=# SELECT * FROM list_postgres_log_files() LIMIT 10; 82 | file_name | file_size_bytes 83 | ---------------------------+----------------- 84 | postgresql-2022-10-13.csv | 0 85 | postgresql-2022-11-14.log | 8006 86 | postgresql-2022-11-01.csv | 4025 87 | postgresql-2022-10-27.csv | 0 88 | postgresql-2022-10-24.log | 0 89 | postgresql-2022-11-05.log | 0 90 | postgresql-2022-11-23.log | 789872 91 | postgresql-2022-11-07.csv | 0 92 | postgresql-2022-11-04.csv | 3943 93 | postgresql-2022-11-16.log | 0 94 | (10 rows) 95 | ``` 96 | 97 | ``` 98 | postgres=# SELECT * FROM list_postgres_log_files() ORDER BY 1 DESC LIMIT 2; 99 | file_name | file_size_bytes 100 | ---------------------------+----------------- 101 | postgresql-2022-11-28.log | 1754 102 | postgresql-2022-11-28.csv | 1948 103 | (2 rows) 104 | ``` 105 | 106 | ### Create server: 107 | 108 | ``` 109 | postgres=# CREATE SERVER log_fdw_server FOREIGN DATA WRAPPER log_fdw; 110 | CREATE SERVER 111 | ``` 112 | 113 | ### Create foreign tables from csv files and log files: 114 | 115 | ``` 116 | postgres=# SELECT * FROM create_foreign_table_for_log_file('postgresql_2022_11_28_csv','log_fdw_server','postgresql-2022-11-28.csv'); 117 | create_foreign_table_for_log_file 118 | ----------------------------------- 119 | 120 | (1 row) 121 | ``` 122 | 123 | ``` 124 | postgres=# SELECT * FROM create_foreign_table_for_log_file('postgresql_2022_11_28_log','log_fdw_server','postgresql-2022-11-28.log'); 125 | create_foreign_table_for_log_file 126 | ----------------------------------- 127 | 128 | (1 row) 129 | ``` 130 | 131 | ### See foreign tables created: 132 | 133 | ``` 134 | postgres=# \detr 135 | List of foreign tables 136 | Schema | Table | Server 137 | --------+---------------------------+---------------- 138 | public | postgresql_2022_11_28_csv | log_fdw_server 139 | public | postgresql_2022_11_28_log | log_fdw_server 140 | (2 rows) 141 | ``` 142 | 143 | ### Read log file contents via foreign tables created: 144 | 145 | SELECT * FROM postgresql_2022_11_14_log LIMIT 2; 146 | 147 | ``` 148 | postgres=# \x 149 | Expanded display is on. 150 | postgres=# select * from postgresql_2022_11_28_log limit 2; 151 | -[ RECORD 1 ]--------------------------------------------------------------------------------------------------------------------------- 152 | log_entry | 2022-11-28 20:37:51.767 UTC 14170 637e8d69.375a 7 2022-11-23 21:15:21 UTC 0 00000LOG: received fast shutdown request 153 | -[ RECORD 2 ]--------------------------------------------------------------------------------------------------------------------------- 154 | log_entry | 2022-11-28 20:37:51.769 UTC 14170 637e8d69.375a 8 2022-11-23 21:15:21 UTC 0 00000LOG: aborting any active transactions 155 | ``` 156 | 157 | SELECT * FROM postgresql_2022_11_28_csv LIMIT 2; 158 | 159 | ``` 160 | postgres=# select * from postgresql_2022_11_28_csv limit 2; 161 | -[ RECORD 1 ]----------+--------------------------------- 162 | log_time | 2022-11-28 20:37:51.767+00 163 | user_name | 164 | database_name | 165 | process_id | 14170 166 | connection_from | 167 | session_id | 637e8d69.375a 168 | session_line_num | 5 169 | command_tag | 170 | session_start_time | 2022-11-23 21:15:21+00 171 | virtual_transaction_id | 172 | transaction_id | 0 173 | error_severity | LOG 174 | sql_state_code | 00000 175 | message | received fast shutdown request 176 | detail | 177 | hint | 178 | internal_query | 179 | internal_query_pos | 180 | context | 181 | query | 182 | query_pos | 183 | location | 184 | application_name | 185 | backend_type | postmaster 186 | leader_pid | 187 | query_id | 0 188 | -[ RECORD 2 ]----------+--------------------------------- 189 | log_time | 2022-11-28 20:37:51.769+00 190 | user_name | 191 | database_name | 192 | process_id | 14170 193 | connection_from | 194 | session_id | 637e8d69.375a 195 | session_line_num | 6 196 | command_tag | 197 | session_start_time | 2022-11-23 21:15:21+00 198 | virtual_transaction_id | 199 | transaction_id | 0 200 | error_severity | LOG 201 | sql_state_code | 00000 202 | message | aborting any active transactions 203 | detail | 204 | hint | 205 | internal_query | 206 | internal_query_pos | 207 | context | 208 | query | 209 | query_pos | 210 | location | 211 | application_name | 212 | backend_type | postmaster 213 | leader_pid | 214 | query_id | 0 215 | ``` 216 | 217 | ### Remove extension: 218 | 219 | DROP EXTENSION log_fdw CASCADE; 220 | 221 | ``` 222 | postgres=# DROP EXTENSION log_fdw CASCADE; 223 | NOTICE: drop cascades to 3 other objects 224 | DETAIL: drop cascades to server log_fdw_server 225 | drop cascades to foreign table postgresql_2022_11_28_csv 226 | drop cascades to foreign table postgresql_2022_11_28_log 227 | DROP EXTENSION 228 | ``` 229 | 230 | ## Compatibility with PostgreSQL 231 | 232 | This extension currently works well with PostgreSQL version 14, 15 and 16devel. 233 | 234 | ## LICENSE 235 | 236 | See [LICENSE](https://github.com/aws/postgresql-logfdw/blob/main/LICENSE) for 237 | detailed information. 238 | 239 | ## Contributing 240 | 241 | See [CODE_OF_CONDUCT](https://github.com/aws/postgresql-logfdw/blob/main/CODE_OF_CONDUCT.md) 242 | and [CONTRIBUTING](https://github.com/aws/postgresql-logfdw/blob/main/CONTRIBUTING.md) 243 | for detailed information. 244 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /log_fdw.c: -------------------------------------------------------------------------------- 1 | /*------------------------------------------------------------------------- 2 | * 3 | * log_fdw.c 4 | * foreign-data wrapper for Postgres log files. 5 | * 6 | * Portions Copyright (c) 2022, Amazon Web Services 7 | * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group 8 | * 9 | * Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 10 | * 11 | * IDENTIFICATION 12 | * postgresql-log_fdw/log_fdw.c 13 | * 14 | *------------------------------------------------------------------------- 15 | */ 16 | #include "postgres.h" 17 | 18 | #include 19 | #include 20 | 21 | #include "access/htup_details.h" 22 | #include "access/reloptions.h" 23 | #include "access/sysattr.h" 24 | #include "access/table.h" 25 | #include "catalog/pg_foreign_table.h" 26 | #include "commands/copy.h" 27 | #include "commands/defrem.h" 28 | #if (PG_VERSION_NUM >= 180000) 29 | #include "commands/explain_state.h" 30 | #include "commands/explain_format.h" 31 | #else 32 | #include "commands/explain.h" 33 | #endif 34 | #include "commands/vacuum.h" 35 | #include "common/string.h" 36 | #include "foreign/fdwapi.h" 37 | #include "foreign/foreign.h" 38 | #include "funcapi.h" 39 | #include "miscadmin.h" 40 | #include "nodes/makefuncs.h" 41 | #include "optimizer/optimizer.h" 42 | #include "optimizer/pathnode.h" 43 | #include "optimizer/planmain.h" 44 | #include "optimizer/restrictinfo.h" 45 | #include "postmaster/syslogger.h" 46 | #include "utils/memutils.h" 47 | #include "utils/rel.h" 48 | #include "utils/sampling.h" 49 | 50 | #define CSV_FILE_EXTENSION ".csv" 51 | #define CSV_GZ_FILE_EXTENSION ".csv.gz" 52 | 53 | PG_MODULE_MAGIC; 54 | 55 | /* 56 | * Describes the valid options for objects that use this wrapper. 57 | */ 58 | struct FileFdwOption 59 | { 60 | const char *optname; 61 | Oid optcontext; /* Oid of catalog in which option may appear */ 62 | }; 63 | 64 | /* 65 | * Valid options for log_fdw. 66 | * These options are based on the options for the COPY FROM command. 67 | * But note that force_not_null and force_null are handled as boolean options 68 | * attached to a column, not as table options. 69 | * 70 | * Note: If you are adding new option for user mapping, you need to modify 71 | * fileGetOptions(), which currently doesn't bother to look at user mappings. 72 | */ 73 | static const struct FileFdwOption valid_options[] = { 74 | /* File options */ 75 | {"filename", ForeignTableRelationId}, 76 | 77 | /* Sentinel */ 78 | {NULL, InvalidOid} 79 | }; 80 | 81 | /* 82 | * FDW-specific information for RelOptInfo.fdw_private. 83 | */ 84 | typedef struct FileFdwPlanState 85 | { 86 | char *filename; /* file to read */ 87 | List *options; /* merged COPY options, excluding filename */ 88 | BlockNumber pages; /* estimate of file's physical size */ 89 | double ntuples; /* estimate of number of rows in file */ 90 | } FileFdwPlanState; 91 | 92 | /* 93 | * FDW-specific information for ForeignScanState.fdw_state. 94 | */ 95 | typedef struct FileFdwExecutionState 96 | { 97 | char *filename; /* file to read */ 98 | List *options; /* merged COPY options, excluding filename */ 99 | CopyFromState cstate; /* state of reading file */ 100 | } FileFdwExecutionState; 101 | 102 | /* 103 | * SQL functions 104 | */ 105 | PG_FUNCTION_INFO_V1(log_fdw_handler); 106 | PG_FUNCTION_INFO_V1(log_fdw_validator); 107 | 108 | /* 109 | * FDW callback routines 110 | */ 111 | static void fileGetForeignRelSize(PlannerInfo *root, 112 | RelOptInfo *baserel, 113 | Oid foreigntableid); 114 | static void fileGetForeignPaths(PlannerInfo *root, 115 | RelOptInfo *baserel, 116 | Oid foreigntableid); 117 | static ForeignScan *fileGetForeignPlan(PlannerInfo *root, 118 | RelOptInfo *baserel, 119 | Oid foreigntableid, 120 | ForeignPath *best_path, 121 | List *tlist, 122 | List *scan_clauses, 123 | Plan *outer_plan); 124 | static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es); 125 | static void fileBeginForeignScan(ForeignScanState *node, int eflags); 126 | static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node); 127 | static void fileReScanForeignScan(ForeignScanState *node); 128 | static void fileEndForeignScan(ForeignScanState *node); 129 | static bool fileAnalyzeForeignTable(Relation relation, 130 | AcquireSampleRowsFunc *func, 131 | BlockNumber *totalpages); 132 | static bool fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, 133 | RangeTblEntry *rte); 134 | 135 | /* 136 | * Helper functions 137 | */ 138 | static bool is_valid_option(const char *option, Oid context); 139 | static void fileGetOptions(Oid foreigntableid, 140 | char **filename, List **other_options); 141 | static bool check_selective_binary_conversion(RelOptInfo *baserel, 142 | Oid foreigntableid, 143 | List **columns); 144 | static void estimate_size(PlannerInfo *root, RelOptInfo *baserel, 145 | FileFdwPlanState *fdw_private); 146 | static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, 147 | FileFdwPlanState *fdw_private, 148 | Cost *startup_cost, Cost *total_cost); 149 | static int file_acquire_sample_rows(Relation onerel, int elevel, 150 | HeapTuple *rows, int targrows, 151 | double *totalrows, double *totaldeadrows); 152 | 153 | /* 154 | * Foreign-data wrapper handler function: return a struct with pointers 155 | * to my callback routines. 156 | */ 157 | Datum 158 | log_fdw_handler(PG_FUNCTION_ARGS) 159 | { 160 | FdwRoutine *fdwroutine = makeNode(FdwRoutine); 161 | 162 | fdwroutine->GetForeignRelSize = fileGetForeignRelSize; 163 | fdwroutine->GetForeignPaths = fileGetForeignPaths; 164 | fdwroutine->GetForeignPlan = fileGetForeignPlan; 165 | fdwroutine->ExplainForeignScan = fileExplainForeignScan; 166 | fdwroutine->BeginForeignScan = fileBeginForeignScan; 167 | fdwroutine->IterateForeignScan = fileIterateForeignScan; 168 | fdwroutine->ReScanForeignScan = fileReScanForeignScan; 169 | fdwroutine->EndForeignScan = fileEndForeignScan; 170 | fdwroutine->AnalyzeForeignTable = fileAnalyzeForeignTable; 171 | fdwroutine->IsForeignScanParallelSafe = fileIsForeignScanParallelSafe; 172 | 173 | PG_RETURN_POINTER(fdwroutine); 174 | } 175 | 176 | /* 177 | * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER, 178 | * USER MAPPING or FOREIGN TABLE that uses log_fdw. 179 | * 180 | * Raise an ERROR if the option or its value is considered invalid. 181 | */ 182 | Datum 183 | log_fdw_validator(PG_FUNCTION_ARGS) 184 | { 185 | List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); 186 | Oid catalog = PG_GETARG_OID(1); 187 | char *filename = NULL; 188 | ListCell *cell; 189 | 190 | /* 191 | * Check that only options supported by log_fdw, and allowed for the 192 | * current object type, are given. 193 | */ 194 | foreach(cell, options_list) 195 | { 196 | DefElem *def = (DefElem *) lfirst(cell); 197 | 198 | if (!is_valid_option(def->defname, catalog)) 199 | { 200 | const struct FileFdwOption *opt; 201 | StringInfoData buf; 202 | 203 | /* 204 | * Unknown option specified, complain about it. Provide a hint 205 | * with list of valid options for the object. 206 | */ 207 | initStringInfo(&buf); 208 | for (opt = valid_options; opt->optname; opt++) 209 | { 210 | if (catalog == opt->optcontext) 211 | appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "", 212 | opt->optname); 213 | } 214 | 215 | ereport(ERROR, 216 | (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), 217 | errmsg("invalid option \"%s\"", def->defname), 218 | buf.len > 0 219 | ? errhint("Valid options in this context are: %s", 220 | buf.data) 221 | : errhint("There are no valid options in this context."))); 222 | } 223 | 224 | /* Separate out filename. */ 225 | if (strcmp(def->defname, "filename") == 0) 226 | { 227 | if (filename) 228 | ereport(ERROR, 229 | (errcode(ERRCODE_SYNTAX_ERROR), 230 | errmsg("conflicting or redundant options"))); 231 | 232 | filename = defGetString(def); 233 | 234 | if (is_absolute_path(filename)) 235 | ereport(ERROR, 236 | (errcode(ERRCODE_SYNTAX_ERROR), 237 | errmsg("absolute path is not allowed as filename for log_fdw foreign tables"))); 238 | } 239 | } 240 | 241 | /* Filename option is required for log_fdw foreign tables. */ 242 | if (catalog == ForeignTableRelationId && filename == NULL) 243 | ereport(ERROR, 244 | (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), 245 | errmsg("filename is required for log_fdw foreign tables"))); 246 | 247 | PG_RETURN_VOID(); 248 | } 249 | 250 | /* 251 | * Check if the provided option is one of the valid options. 252 | * context is the Oid of the catalog holding the object the option is for. 253 | */ 254 | static bool 255 | is_valid_option(const char *option, Oid context) 256 | { 257 | const struct FileFdwOption *opt; 258 | 259 | for (opt = valid_options; opt->optname; opt++) 260 | { 261 | if (context == opt->optcontext && strcmp(opt->optname, option) == 0) 262 | return true; 263 | } 264 | return false; 265 | } 266 | 267 | /* 268 | * Fetch the options for a log_fdw foreign table. 269 | * 270 | * We have to separate out "filename" from the other options because 271 | * it must not appear in the options list passed to the core COPY code. 272 | */ 273 | static void 274 | fileGetOptions(Oid foreigntableid, 275 | char **filename, List **other_options) 276 | { 277 | ForeignTable *table; 278 | ForeignServer *server; 279 | ForeignDataWrapper *wrapper; 280 | List *options; 281 | ListCell *lc; 282 | char *full_filename; 283 | 284 | /* 285 | * Extract options from FDW objects. We ignore user mappings because 286 | * log_fdw doesn't have any options that can be specified there. 287 | * 288 | * (XXX Actually, given the current contents of valid_options[], there's 289 | * no point in examining anything except the foreign table's own options. 290 | * Simplify?) 291 | */ 292 | table = GetForeignTable(foreigntableid); 293 | server = GetForeignServer(table->serverid); 294 | wrapper = GetForeignDataWrapper(server->fdwid); 295 | 296 | options = NIL; 297 | options = list_concat(options, wrapper->options); 298 | options = list_concat(options, server->options); 299 | options = list_concat(options, table->options); 300 | 301 | /* 302 | * Separate out the filename. 303 | */ 304 | *filename = NULL; 305 | foreach(lc, options) 306 | { 307 | DefElem *def = (DefElem *) lfirst(lc); 308 | 309 | if (strcmp(def->defname, "filename") == 0) 310 | { 311 | *filename = defGetString(def); 312 | options = foreach_delete_current(options, lc); 313 | break; 314 | } 315 | } 316 | 317 | /* 318 | * The validator should have checked that a filename was included in the 319 | * options, but check again, just in case. 320 | */ 321 | if (*filename == NULL) 322 | elog(ERROR, "filename is required for log_fdw foreign tables"); 323 | 324 | if (is_absolute_path(*filename)) 325 | ereport(ERROR, 326 | (errcode(ERRCODE_SYNTAX_ERROR), 327 | errmsg("absolute path is not allowed as filename for log_fdw foreign tables"))); 328 | 329 | full_filename = (char *) palloc(MAXPGPATH); 330 | 331 | if (strlen(Log_directory) > 0 && is_absolute_path(Log_directory)) 332 | snprintf(full_filename, MAXPGPATH, "%s/%s", Log_directory, *filename); 333 | else 334 | snprintf(full_filename, MAXPGPATH, "%s/%s/%s", DataDir, Log_directory, *filename); 335 | 336 | *filename = full_filename; 337 | 338 | /* Determine the format based on the file name. */ 339 | if (pg_str_endswith(full_filename, CSV_FILE_EXTENSION) 340 | || pg_str_endswith(full_filename, CSV_GZ_FILE_EXTENSION)) 341 | options = lappend(options, makeDefElem("format", (Node *) makeString("csv"), -1)); 342 | else 343 | { 344 | options = lappend(options, makeDefElem("format", (Node *) makeString("text"), -1)); 345 | 346 | /* 347 | * The default delimiter in text mode is the tab character. This can 348 | * cause problems if the tab character is used in the log file. To 349 | * get around this, set the delimiter to be the EOT character. This 350 | * is a much rarer character to encounter in a log file, so it should 351 | * virtually remove the delimiter. 352 | * 353 | * TODO: find a better way to remove the delimiter character 354 | */ 355 | options = lappend(options, makeDefElem("delimiter", (Node *) makeString("\x4"), -1)); 356 | } 357 | 358 | *other_options = options; 359 | } 360 | 361 | /* 362 | * fileGetForeignRelSize 363 | * Obtain relation size estimates for a foreign table 364 | */ 365 | static void 366 | fileGetForeignRelSize(PlannerInfo *root, 367 | RelOptInfo *baserel, 368 | Oid foreigntableid) 369 | { 370 | FileFdwPlanState *fdw_private; 371 | 372 | /* 373 | * Fetch options. We only need filename at this point, but we might as 374 | * well get everything and not need to re-fetch it later in planning. 375 | */ 376 | fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); 377 | fileGetOptions(foreigntableid, 378 | &fdw_private->filename, 379 | &fdw_private->options); 380 | baserel->fdw_private = (void *) fdw_private; 381 | 382 | /* Estimate relation size */ 383 | estimate_size(root, baserel, fdw_private); 384 | } 385 | 386 | /* 387 | * fileGetForeignPaths 388 | * Create possible access paths for a scan on the foreign table 389 | * 390 | * Currently we don't support any push-down feature, so there is only one 391 | * possible access path, which simply returns all records in the order in 392 | * the data file. 393 | */ 394 | static void 395 | fileGetForeignPaths(PlannerInfo *root, 396 | RelOptInfo *baserel, 397 | Oid foreigntableid) 398 | { 399 | FileFdwPlanState *fdw_private = (FileFdwPlanState *) baserel->fdw_private; 400 | Cost startup_cost; 401 | Cost total_cost; 402 | List *columns; 403 | List *coptions = NIL; 404 | 405 | /* Decide whether to selectively perform binary conversion */ 406 | if (check_selective_binary_conversion(baserel, 407 | foreigntableid, 408 | &columns)) 409 | coptions = list_make1(makeDefElem("convert_selectively", 410 | (Node *) columns, -1)); 411 | 412 | /* Estimate costs */ 413 | estimate_costs(root, baserel, fdw_private, 414 | &startup_cost, &total_cost); 415 | 416 | /* 417 | * Create a ForeignPath node and add it as only possible path. We use the 418 | * fdw_private list of the path to carry the convert_selectively option; 419 | * it will be propagated into the fdw_private list of the Plan node. 420 | */ 421 | add_path(baserel, (Path *) 422 | create_foreignscan_path(root, baserel, 423 | NULL, /* default pathtarget */ 424 | baserel->rows, 425 | #if (PG_VERSION_NUM >= 180000) 426 | 0, 427 | #endif 428 | startup_cost, 429 | total_cost, 430 | NIL, /* no pathkeys */ 431 | baserel->lateral_relids, 432 | NULL, /* no extra plan */ 433 | #if (PG_VERSION_NUM >= 170000) 434 | NIL, /* no fdw_restrictinfo list */ 435 | #endif 436 | coptions)); 437 | 438 | /* 439 | * If data file was sorted, and we knew it somehow, we could insert 440 | * appropriate pathkeys into the ForeignPath node to tell the planner 441 | * that. 442 | */ 443 | } 444 | 445 | /* 446 | * fileGetForeignPlan 447 | * Create a ForeignScan plan node for scanning the foreign table 448 | */ 449 | static ForeignScan * 450 | fileGetForeignPlan(PlannerInfo *root, 451 | RelOptInfo *baserel, 452 | Oid foreigntableid, 453 | ForeignPath *best_path, 454 | List *tlist, 455 | List *scan_clauses, 456 | Plan *outer_plan) 457 | { 458 | Index scan_relid = baserel->relid; 459 | 460 | /* 461 | * We have no native ability to evaluate restriction clauses, so we just 462 | * put all the scan_clauses into the plan node's qual list for the 463 | * executor to check. So all we have to do here is strip RestrictInfo 464 | * nodes from the clauses and ignore pseudoconstants (which will be 465 | * handled elsewhere). 466 | */ 467 | scan_clauses = extract_actual_clauses(scan_clauses, false); 468 | 469 | /* Create the ForeignScan node */ 470 | return make_foreignscan(tlist, 471 | scan_clauses, 472 | scan_relid, 473 | NIL, /* no expressions to evaluate */ 474 | best_path->fdw_private, 475 | NIL, /* no custom tlist */ 476 | NIL, /* no remote quals */ 477 | outer_plan); 478 | } 479 | 480 | /* 481 | * fileExplainForeignScan 482 | * Produce extra output for EXPLAIN 483 | */ 484 | static void 485 | fileExplainForeignScan(ForeignScanState *node, ExplainState *es) 486 | { 487 | char *filename; 488 | List *options; 489 | 490 | /* Fetch options --- we only need filename at this point */ 491 | fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), 492 | &filename, &options); 493 | 494 | ExplainPropertyText("Foreign File", filename, es); 495 | 496 | /* Suppress file size if we're not showing cost details */ 497 | if (es->costs) 498 | { 499 | struct stat stat_buf; 500 | 501 | if (stat(filename, &stat_buf) == 0) 502 | ExplainPropertyInteger("Foreign File Size", "b", 503 | (int64) stat_buf.st_size, es); 504 | } 505 | } 506 | 507 | /* 508 | * fileBeginForeignScan 509 | * Initiate access to the file by creating CopyFromState 510 | */ 511 | static void 512 | fileBeginForeignScan(ForeignScanState *node, int eflags) 513 | { 514 | ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; 515 | char *filename; 516 | List *options; 517 | CopyFromState cstate; 518 | FileFdwExecutionState *festate; 519 | 520 | /* 521 | * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL. 522 | */ 523 | if (eflags & EXEC_FLAG_EXPLAIN_ONLY) 524 | return; 525 | 526 | /* Fetch options of foreign table */ 527 | fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), 528 | &filename, &options); 529 | 530 | /* Add any options from the plan (currently only convert_selectively) */ 531 | options = list_concat(options, plan->fdw_private); 532 | 533 | /* 534 | * Create CopyFromState from FDW options. We always acquire all columns, 535 | * so as to match the expected ScanTupleSlot signature. 536 | */ 537 | cstate = BeginCopyFrom(NULL, 538 | node->ss.ss_currentRelation, 539 | NULL, 540 | filename, 541 | false, 542 | NULL, 543 | NIL, 544 | options); 545 | 546 | /* 547 | * Save state in node->fdw_state. We must save enough information to call 548 | * BeginCopyFrom() again. 549 | */ 550 | festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState)); 551 | festate->filename = filename; 552 | festate->options = options; 553 | festate->cstate = cstate; 554 | 555 | node->fdw_state = (void *) festate; 556 | } 557 | 558 | /* 559 | * fileIterateForeignScan 560 | * Read next record from the data file and store it into the 561 | * ScanTupleSlot as a virtual tuple 562 | */ 563 | static TupleTableSlot * 564 | fileIterateForeignScan(ForeignScanState *node) 565 | { 566 | FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; 567 | TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; 568 | bool found; 569 | ErrorContextCallback errcallback; 570 | MemoryContext ccxt = CurrentMemoryContext; 571 | 572 | /* Set up callback to identify error line number. */ 573 | errcallback.callback = CopyFromErrorCallback; 574 | errcallback.arg = (void *) festate->cstate; 575 | errcallback.previous = error_context_stack; 576 | error_context_stack = &errcallback; 577 | 578 | /* 579 | * The protocol for loading a virtual tuple into a slot is first 580 | * ExecClearTuple, then fill the values/isnull arrays, then 581 | * ExecStoreVirtualTuple. If we don't find another row in the file, we 582 | * just skip the last step, leaving the slot empty as required. 583 | * 584 | * We can pass ExprContext = NULL because we read all columns from the 585 | * file, so no need to evaluate default expressions. 586 | * 587 | * We can also pass tupleOid = NULL because we don't allow oids for 588 | * foreign tables. 589 | */ 590 | ExecClearTuple(slot); 591 | 592 | /* 593 | * In Postgres version 13, we add one additional column "backend_type" in 594 | * csvlog file, thus we need to update log_fdw to 1.2 handle it. But if 595 | * still use log_fdw 1.1 to read a log file part of which have this 596 | * additional column , we will get error from NextCopyFrom(), about 597 | * "ERROR: extra data after last expected column"; similarly if we use 598 | * log_fdw 1.2 to read a log file part of which do not contain this 599 | * column, we will get error from NextCopyFrom(), about "ERROR: missing 600 | * data after last expected column". Thus we wanna give proper hint to 601 | * customer about why it happens by catching these 2 errors. Notice that 602 | * some other problems like wrong format for some certain columns will be 603 | * catched as well, for which we did not replace original error message 604 | * and error hints, as their errors are irrelevant with "backend_type" 605 | * issue, thus we just reuse existing hints. Moreover this code change is 606 | * generically applicable to any log formatting change in the future, 607 | * which we should keep an eye on it for future update. 608 | */ 609 | PG_TRY(); 610 | { 611 | found = NextCopyFrom(festate->cstate, NULL, 612 | slot->tts_values, slot->tts_isnull); 613 | } 614 | PG_CATCH(); 615 | { 616 | ErrorData *errdata; 617 | char *customhint = "This could mean that the log file or a portion of the log " 618 | "file was created by a version of PostgreSQL that the installed " 619 | "version of log_fdw cannot read. It may be possible to read the " 620 | "file after running the command 'ALTER EXTENSION log_fdw UPDATE' " 621 | "and recreating the foreign table."; 622 | 623 | MemoryContextSwitchTo(ccxt); 624 | errdata = CopyErrorData(); 625 | FlushErrorState(); 626 | ereport(ERROR, 627 | (errcode(errdata->sqlerrcode), 628 | errmsg("%s", errdata->message), 629 | errdata->hint ? errhint("%s", errdata->hint) : errhint("%s", customhint))); 630 | FreeErrorData(errdata); 631 | } 632 | PG_END_TRY(); 633 | 634 | if (found) 635 | ExecStoreVirtualTuple(slot); 636 | 637 | /* Remove error callback. */ 638 | error_context_stack = errcallback.previous; 639 | 640 | return slot; 641 | } 642 | 643 | /* 644 | * fileReScanForeignScan 645 | * Rescan table, possibly with new parameters 646 | */ 647 | static void 648 | fileReScanForeignScan(ForeignScanState *node) 649 | { 650 | FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; 651 | 652 | EndCopyFrom(festate->cstate); 653 | 654 | festate->cstate = BeginCopyFrom(NULL, 655 | node->ss.ss_currentRelation, 656 | NULL, 657 | festate->filename, 658 | false, 659 | NULL, 660 | NIL, 661 | festate->options); 662 | } 663 | 664 | /* 665 | * fileEndForeignScan 666 | * Finish scanning foreign table and dispose objects used for this scan 667 | */ 668 | static void 669 | fileEndForeignScan(ForeignScanState *node) 670 | { 671 | FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; 672 | 673 | /* if festate is NULL, we are in EXPLAIN; nothing to do */ 674 | if (festate) 675 | EndCopyFrom(festate->cstate); 676 | } 677 | 678 | /* 679 | * fileAnalyzeForeignTable 680 | * Test whether analyzing this foreign table is supported 681 | */ 682 | static bool 683 | fileAnalyzeForeignTable(Relation relation, 684 | AcquireSampleRowsFunc *func, 685 | BlockNumber *totalpages) 686 | { 687 | char *filename; 688 | List *options; 689 | struct stat stat_buf; 690 | 691 | /* Fetch options of foreign table */ 692 | fileGetOptions(RelationGetRelid(relation), &filename, &options); 693 | 694 | /* 695 | * Get size of the file. (XXX if we fail here, would it be better to just 696 | * return false to skip analyzing the table?) 697 | */ 698 | if (stat(filename, &stat_buf) < 0) 699 | ereport(ERROR, 700 | (errcode_for_file_access(), 701 | errmsg("could not stat file \"%s\": %m", 702 | filename))); 703 | 704 | /* 705 | * Convert size to pages. Must return at least 1 so that we can tell 706 | * later on that pg_class.relpages is not default. 707 | */ 708 | *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ; 709 | if (*totalpages < 1) 710 | *totalpages = 1; 711 | 712 | *func = file_acquire_sample_rows; 713 | 714 | return true; 715 | } 716 | 717 | /* 718 | * fileIsForeignScanParallelSafe 719 | * Reading a file in a parallel worker should work just the same as 720 | * reading it in the leader, so mark scans safe. 721 | */ 722 | static bool 723 | fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, 724 | RangeTblEntry *rte) 725 | { 726 | return true; 727 | } 728 | 729 | /* 730 | * check_selective_binary_conversion 731 | * 732 | * Check to see if it's useful to convert only a subset of the file's columns 733 | * to binary. If so, construct a list of the column names to be converted, 734 | * return that at *columns, and return TRUE. (Note that it's possible to 735 | * determine that no columns need be converted, for instance with a COUNT(*) 736 | * query. So we can't use returning a NIL list to indicate failure.) 737 | */ 738 | static bool 739 | check_selective_binary_conversion(RelOptInfo *baserel, 740 | Oid foreigntableid, 741 | List **columns) 742 | { 743 | ListCell *lc; 744 | Relation rel; 745 | TupleDesc tupleDesc; 746 | #if (PG_VERSION_NUM >= 160000) 747 | int attidx; 748 | #else 749 | AttrNumber attnum; 750 | #endif 751 | Bitmapset *attrs_used = NULL; 752 | bool has_wholerow = false; 753 | int numattrs; 754 | int i; 755 | 756 | *columns = NIL; /* default result */ 757 | 758 | /* Collect all the attributes needed for joins or final output. */ 759 | pull_varattnos((Node *) baserel->reltarget->exprs, baserel->relid, 760 | &attrs_used); 761 | 762 | /* Add all the attributes used by restriction clauses. */ 763 | foreach(lc, baserel->baserestrictinfo) 764 | { 765 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); 766 | 767 | pull_varattnos((Node *) rinfo->clause, baserel->relid, 768 | &attrs_used); 769 | } 770 | 771 | /* Convert attribute numbers to column names. */ 772 | rel = table_open(foreigntableid, AccessShareLock); 773 | tupleDesc = RelationGetDescr(rel); 774 | 775 | #if (PG_VERSION_NUM >= 160000) 776 | attidx = -1; 777 | while ((attidx = bms_next_member(attrs_used, attidx)) >= 0) 778 | { 779 | /* attidx is zero-based, attnum is the normal attribute number */ 780 | AttrNumber attnum = attidx + FirstLowInvalidHeapAttributeNumber; 781 | #else 782 | while ((attnum = bms_first_member(attrs_used)) >= 0) 783 | { 784 | /* Adjust for system attributes. */ 785 | attnum += FirstLowInvalidHeapAttributeNumber; 786 | #endif 787 | if (attnum == 0) 788 | { 789 | has_wholerow = true; 790 | break; 791 | } 792 | 793 | /* Ignore system attributes. */ 794 | if (attnum < 0) 795 | continue; 796 | 797 | /* Get user attributes. */ 798 | if (attnum > 0) 799 | { 800 | Form_pg_attribute attr = TupleDescAttr(tupleDesc, attnum - 1); 801 | char *attname = NameStr(attr->attname); 802 | 803 | /* Skip dropped attributes (probably shouldn't see any here). */ 804 | if (attr->attisdropped) 805 | continue; 806 | *columns = lappend(*columns, makeString(pstrdup(attname))); 807 | } 808 | } 809 | 810 | /* Count non-dropped user attributes while we have the tupdesc. */ 811 | numattrs = 0; 812 | for (i = 0; i < tupleDesc->natts; i++) 813 | { 814 | Form_pg_attribute attr = TupleDescAttr(tupleDesc, i); 815 | 816 | if (attr->attisdropped) 817 | continue; 818 | numattrs++; 819 | } 820 | 821 | table_close(rel, AccessShareLock); 822 | 823 | /* If there's a whole-row reference, fail: we need all the columns. */ 824 | if (has_wholerow) 825 | { 826 | *columns = NIL; 827 | return false; 828 | } 829 | 830 | /* If all the user attributes are needed, fail. */ 831 | if (numattrs == list_length(*columns)) 832 | { 833 | *columns = NIL; 834 | return false; 835 | } 836 | 837 | return true; 838 | } 839 | 840 | /* 841 | * Estimate size of a foreign table. 842 | * 843 | * The main result is returned in baserel->rows. We also set 844 | * fdw_private->pages and fdw_private->ntuples for later use in the cost 845 | * calculation. 846 | */ 847 | static void 848 | estimate_size(PlannerInfo *root, RelOptInfo *baserel, 849 | FileFdwPlanState *fdw_private) 850 | { 851 | struct stat stat_buf; 852 | BlockNumber pages; 853 | double ntuples; 854 | double nrows; 855 | 856 | /* 857 | * Get size of the file. It might not be there at plan time, though, in 858 | * which case we have to use a default estimate. 859 | */ 860 | if (stat(fdw_private->filename, &stat_buf) < 0) 861 | stat_buf.st_size = 10 * BLCKSZ; 862 | 863 | /* 864 | * Convert size to pages for use in I/O cost estimate later. 865 | */ 866 | pages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ; 867 | if (pages < 1) 868 | pages = 1; 869 | fdw_private->pages = pages; 870 | 871 | /* 872 | * Estimate the number of tuples in the file. 873 | */ 874 | if (baserel->pages > 0) 875 | { 876 | /* 877 | * We have # of pages and # of tuples from pg_class (that is, from a 878 | * previous ANALYZE), so compute a tuples-per-page estimate and scale 879 | * that by the current file size. 880 | */ 881 | double density; 882 | 883 | density = baserel->tuples / (double) baserel->pages; 884 | ntuples = clamp_row_est(density * (double) pages); 885 | } 886 | else 887 | { 888 | /* 889 | * Otherwise we have to fake it. We back into this estimate using the 890 | * planner's idea of the relation width; which is bogus if not all 891 | * columns are being read, not to mention that the text representation 892 | * of a row probably isn't the same size as its internal 893 | * representation. Possibly we could do something better, but the 894 | * real answer to anyone who complains is "ANALYZE" ... 895 | */ 896 | int tuple_width; 897 | 898 | tuple_width = MAXALIGN(baserel->reltarget->width) + 899 | MAXALIGN(SizeofHeapTupleHeader); 900 | ntuples = clamp_row_est((double) stat_buf.st_size / 901 | (double) tuple_width); 902 | } 903 | fdw_private->ntuples = ntuples; 904 | 905 | /* 906 | * Now estimate the number of rows returned by the scan after applying the 907 | * baserestrictinfo quals. 908 | */ 909 | nrows = ntuples * 910 | clauselist_selectivity(root, 911 | baserel->baserestrictinfo, 912 | 0, 913 | JOIN_INNER, 914 | NULL); 915 | 916 | nrows = clamp_row_est(nrows); 917 | 918 | /* Save the output-rows estimate for the planner */ 919 | baserel->rows = nrows; 920 | } 921 | 922 | /* 923 | * Estimate costs of scanning a foreign table. 924 | * 925 | * Results are returned in *startup_cost and *total_cost. 926 | */ 927 | static void 928 | estimate_costs(PlannerInfo *root, RelOptInfo *baserel, 929 | FileFdwPlanState *fdw_private, 930 | Cost *startup_cost, Cost *total_cost) 931 | { 932 | BlockNumber pages = fdw_private->pages; 933 | double ntuples = fdw_private->ntuples; 934 | Cost run_cost = 0; 935 | Cost cpu_per_tuple; 936 | 937 | /* 938 | * We estimate costs almost the same way as cost_seqscan(), thus assuming 939 | * that I/O costs are equivalent to a regular table file of the same size. 940 | * However, we take per-tuple CPU costs as 10x of a seqscan, to account 941 | * for the cost of parsing records. 942 | */ 943 | run_cost += seq_page_cost * pages; 944 | 945 | *startup_cost = baserel->baserestrictcost.startup; 946 | cpu_per_tuple = cpu_tuple_cost * 10 + baserel->baserestrictcost.per_tuple; 947 | run_cost += cpu_per_tuple * ntuples; 948 | *total_cost = *startup_cost + run_cost; 949 | } 950 | 951 | /* 952 | * file_acquire_sample_rows -- acquire a random sample of rows from the table 953 | * 954 | * Selected rows are returned in the caller-allocated array rows[], 955 | * which must have at least targrows entries. 956 | * The actual number of rows selected is returned as the function result. 957 | * We also count the total number of rows in the file and return it into 958 | * *totalrows. Note that *totaldeadrows is always set to 0. 959 | * 960 | * Note that the returned list of rows is not always in order by physical 961 | * position in the file. Therefore, correlation estimates derived later 962 | * may be meaningless, but it's OK because we don't use the estimates 963 | * currently (the planner only pays attention to correlation for indexscans). 964 | */ 965 | static int 966 | file_acquire_sample_rows(Relation onerel, int elevel, 967 | HeapTuple *rows, int targrows, 968 | double *totalrows, double *totaldeadrows) 969 | { 970 | int numrows = 0; 971 | double rowstoskip = -1; /* -1 means not set yet */ 972 | ReservoirStateData rstate; 973 | TupleDesc tupDesc; 974 | Datum *values; 975 | bool *nulls; 976 | bool found; 977 | char *filename; 978 | List *options; 979 | CopyFromState cstate; 980 | ErrorContextCallback errcallback; 981 | MemoryContext oldcontext = CurrentMemoryContext; 982 | MemoryContext tupcontext; 983 | 984 | Assert(onerel); 985 | Assert(targrows > 0); 986 | 987 | tupDesc = RelationGetDescr(onerel); 988 | values = (Datum *) palloc(tupDesc->natts * sizeof(Datum)); 989 | nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); 990 | 991 | /* Fetch options of foreign table */ 992 | fileGetOptions(RelationGetRelid(onerel), &filename, &options); 993 | 994 | /* 995 | * Create CopyFromState from FDW options. 996 | */ 997 | cstate = BeginCopyFrom(NULL, onerel, NULL, filename, false, NULL, NIL, options); 998 | 999 | /* 1000 | * Use per-tuple memory context to prevent leak of memory used to read 1001 | * rows from the file with Copy routines. 1002 | */ 1003 | tupcontext = AllocSetContextCreate(CurrentMemoryContext, 1004 | "log_fdw temporary context", 1005 | ALLOCSET_DEFAULT_SIZES); 1006 | 1007 | /* Prepare for sampling rows */ 1008 | reservoir_init_selection_state(&rstate, targrows); 1009 | 1010 | /* Set up callback to identify error line number. */ 1011 | errcallback.callback = CopyFromErrorCallback; 1012 | errcallback.arg = (void *) cstate; 1013 | errcallback.previous = error_context_stack; 1014 | error_context_stack = &errcallback; 1015 | 1016 | *totalrows = 0; 1017 | *totaldeadrows = 0; 1018 | for (;;) 1019 | { 1020 | /* Check for user-requested abort or sleep */ 1021 | #if (PG_VERSION_NUM >= 180000) 1022 | vacuum_delay_point(true); 1023 | #else 1024 | vacuum_delay_point(); 1025 | #endif 1026 | 1027 | /* Fetch next row */ 1028 | MemoryContextReset(tupcontext); 1029 | MemoryContextSwitchTo(tupcontext); 1030 | 1031 | found = NextCopyFrom(cstate, NULL, values, nulls); 1032 | 1033 | MemoryContextSwitchTo(oldcontext); 1034 | 1035 | if (!found) 1036 | break; 1037 | 1038 | /* 1039 | * The first targrows sample rows are simply copied into the 1040 | * reservoir. Then we start replacing tuples in the sample until we 1041 | * reach the end of the relation. This algorithm is from Jeff Vitter's 1042 | * paper (see more info in commands/analyze.c). 1043 | */ 1044 | if (numrows < targrows) 1045 | { 1046 | rows[numrows++] = heap_form_tuple(tupDesc, values, nulls); 1047 | } 1048 | else 1049 | { 1050 | /* 1051 | * t in Vitter's paper is the number of records already processed. 1052 | * If we need to compute a new S value, we must use the 1053 | * not-yet-incremented value of totalrows as t. 1054 | */ 1055 | if (rowstoskip < 0) 1056 | rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows); 1057 | 1058 | if (rowstoskip <= 0) 1059 | { 1060 | /* 1061 | * Found a suitable tuple, so save it, replacing one old tuple 1062 | * at random 1063 | */ 1064 | #if (PG_VERSION_NUM < 150000) 1065 | int k = (int) (targrows * sampler_random_fract(rstate.randstate)); 1066 | #else 1067 | int k = (int) (targrows * sampler_random_fract(&rstate.randstate)); 1068 | #endif 1069 | Assert(k >= 0 && k < targrows); 1070 | heap_freetuple(rows[k]); 1071 | rows[k] = heap_form_tuple(tupDesc, values, nulls); 1072 | } 1073 | 1074 | rowstoskip -= 1; 1075 | } 1076 | 1077 | *totalrows += 1; 1078 | } 1079 | 1080 | /* Remove error callback. */ 1081 | error_context_stack = errcallback.previous; 1082 | 1083 | /* Clean up. */ 1084 | MemoryContextDelete(tupcontext); 1085 | 1086 | EndCopyFrom(cstate); 1087 | 1088 | pfree(values); 1089 | pfree(nulls); 1090 | 1091 | /* 1092 | * Emit some interesting relation info 1093 | */ 1094 | ereport(elevel, 1095 | (errmsg("\"%s\": file contains %.0f rows; " 1096 | "%d rows in sample", 1097 | RelationGetRelationName(onerel), 1098 | *totalrows, numrows))); 1099 | 1100 | return numrows; 1101 | } 1102 | --------------------------------------------------------------------------------