├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── Changes
├── META.json
├── Makefile
├── README.md
├── doc
    └── pgtelemetry.html
├── expected
    └── definitions.out
├── extension
    ├── pgtelemetry--0.1--0.2.sql
    ├── pgtelemetry--0.1--1.0.sql
    ├── pgtelemetry--0.1.sql
    ├── pgtelemetry--0.2--1.0.sql
    ├── pgtelemetry--0.2.sql
    ├── pgtelemetry--1.0--1.1.sql
    ├── pgtelemetry--1.0.sql
    ├── pgtelemetry--1.1--1.2.sql
    ├── pgtelemetry--1.1.sql
    ├── pgtelemetry--1.2--1.3.sql
    ├── pgtelemetry--1.2.sql
    ├── pgtelemetry--1.3--1.4.sql
    ├── pgtelemetry--1.3.sql
    ├── pgtelemetry--1.4--1.4a.sql
    ├── pgtelemetry--1.4.sql
    ├── pgtelemetry--1.4a--1.4b.sql
    ├── pgtelemetry--1.4a.sql
    ├── pgtelemetry--1.4b--1.5.sql
    ├── pgtelemetry--1.4b.sql
    ├── pgtelemetry--1.5--1.6.sql
    ├── pgtelemetry--1.5.sql
    ├── pgtelemetry--1.6.sql
    └── pgtelemetry-head.sql
├── extras
    ├── prometheus
    │   ├── queries-statements.yaml
    │   ├── queries-wal.yaml
    │   └── queries.yaml
    ├── regression
    │   └── postgresql.conf
    └── zabbix
    │   ├── checkNoLongRunningQueries.pl
    │   ├── checkReplicationLag.pl
    │   ├── checkTablespaceSize.pl
    │   └── checkWaitingQueries.pl
├── pgtelemetry.control
└── sql
    └── definitions.sql


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
  1 | name: Test extension
  2 | 
  3 | # Run this workflow every time a new commit pushed to your repository
  4 | on: 
  5 |   push:
  6 |     branches: ['*']
  7 |     #branches-ignore: 'master'
  8 | 
  9 |   pull_request:
 10 |     branches: ['*']
 11 |     #branches-ignore: 'master'
 12 | 
 13 |   workflow_dispatch:
 14 | 
 15 | 
 16 | defaults:
 17 |   run:
 18 |     shell: bash
 19 | 
 20 | 
 21 | jobs:
 22 |   pr-test:
 23 |     name: Test the extension
 24 |     runs-on: ${{ matrix.os }}
 25 |     env:
 26 |       EXTENSION_NAME: pgtelemetry
 27 |       EXTENSION_DB: ajtest
 28 |       EXTENSION_BRANCH: master
 29 |       EXTENSION_SUBDIRECTORY: ""
 30 |       EXTENSION_TEST_QUERY: ""
 31 |     strategy:
 32 |       matrix:
 33 |         # also test 'latest', eventually this will be upgraded to a newer version and might fail early
 34 |         #os: [ubuntu-18.04, ubuntu-20.04, ubuntu-latest]
 35 |         os: [ubuntu-latest]
 36 |         postgresql: [16, 15, 14, 13, 12, 11, 10]
 37 | 
 38 |     steps:
 39 |       - name: Checkout code
 40 |         uses: actions/checkout@v3
 41 | 
 42 |       - name: Versions
 43 |         run: echo "${{ matrix.os }} - ${{ matrix.postgresql }}"
 44 | 
 45 |       - name: Calculate working directory
 46 |         run: echo "PWD=$(realpath ./$EXTENSION_SUBDIRECTORY)" >> $GITHUB_OUTPUT
 47 |         id: pwd
 48 | 
 49 |       - name: Working directory
 50 |         run: echo "${{ steps.pwd.outputs.PWD }}"
 51 | 
 52 | 
 53 |       # GitHub does only checkout the current branch
 54 |       # in case this is a PR the check also needs $EXTENSION_BRANCH for the .control file
 55 |       - name: get branch
 56 |         run: git fetch --depth=5 origin $EXTENSION_BRANCH
 57 | 
 58 |       - name: See the .control file
 59 |         run: git show origin/$EXTENSION_BRANCH:$EXTENSION_SUBDIRECTORY$EXTENSION_NAME.control
 60 | 
 61 | 
 62 |       # there might be PostgreSQL packages pre-installed, remove them
 63 |       - name: Installed PostgreSQL packages
 64 |         run: dpkg --list | grep postgresql
 65 | 
 66 |       - name: Get list of PostgreSQL packages
 67 |         run: echo "Packages=$(dpkg-query -f '${Package}\n' -W | grep ^postgresql | xargs)" >> $GITHUB_OUTPUT
 68 |         id: preinstalled_packages
 69 | 
 70 |       - name: Remove preinstalled PostgreSQL packages
 71 |         run: sudo dpkg --purge ${{ steps.preinstalled_packages.outputs.Packages }}
 72 | 
 73 | 
 74 |       # verify result
 75 |       - name: Installed PostgreSQL packages
 76 |         run: dpkg --list | grep postgresql
 77 |         continue-on-error: true
 78 | 
 79 | 
 80 |       # install build tools
 81 |       - name: Install build-essential and other tools
 82 |         run: sudo apt-get install -y build-essential ruby curl ca-certificates gnupg
 83 | 
 84 | 
 85 |       # enable PostgreSQL APT repository
 86 |       - name: Install GPG Key for PostgreSQL repository
 87 |         run: curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
 88 | 
 89 |       - name: Install repository
 90 |         run: sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
 91 | 
 92 |       - name: Update repository
 93 |         run: sudo apt-get update
 94 | 
 95 |       # install the requested version
 96 |       - name: Install PostgreSQL
 97 |         run: sudo apt-get install -y postgresql-${{ matrix.postgresql }} postgresql-server-dev-${{ matrix.postgresql }} postgresql-client-${{ matrix.postgresql }}
 98 | 
 99 |       # debug output
100 |       - name: Path of pg_config
101 |         run: which pg_config
102 | 
103 |       - name: pg_config output
104 |         run: pg_config
105 | 
106 |       - name: Update pg_hba.conf
107 |         run: sudo bash -c "echo 'local all all trust' > /etc/postgresql/${{ matrix.postgresql }}/main/pg_hba.conf"
108 | 
109 |       - name: Update pg_hba.conf
110 |         run: sudo bash -c "echo 'host all all 0/0 trust' >> /etc/postgresql/${{ matrix.postgresql }}/main/pg_hba.conf"
111 | 
112 |       - name: Restart PostgreSQL
113 |         run: sudo service postgresql reload
114 | 
115 | 
116 |       # do the actual compilation
117 |       - name: Compile the extension
118 |         run: cd ${{ steps.pwd.outputs.PWD }} && make
119 | 
120 |       - name: Test the extension
121 |         run: cd ${{ steps.pwd.outputs.PWD }} && make check
122 | 
123 |       # install extension
124 |       - name: Install the extension
125 |         run: cd ${{ steps.pwd.outputs.PWD }} && sudo make install
126 | 
127 |       - name: Update postgresql.auto.conf
128 |         run: sudo bash -c "echo 'shared_preload_libraries = ''pg_stat_statements''' >> /etc/postgresql/${{ matrix.postgresql }}/main/postgresql.auto.conf"
129 | 
130 |       - name: Restart PostgreSQL
131 |         run: sudo service postgresql restart
132 | 
133 |       - name: Test the extension
134 |         run: cd ${{ steps.pwd.outputs.PWD }} && make PGUSER=postgres installcheck
135 | 
136 |       - name: Show regression.out
137 |         run: cat ${{ steps.pwd.outputs.PWD }}/regression.out
138 |         if: ${{ failure() }}
139 | 
140 |       - name: Show regression.diffs
141 |         run: cat ${{ steps.pwd.outputs.PWD }}/regression.diffs
142 |         if: ${{ failure() }}
143 | 
144 | 
145 |       # start testing
146 | 
147 |       - name: Get current branch name
148 |         run: echo "Packages=$(git branch --show-current)" >> $GITHUB_OUTPUT
149 |         id: current_branch
150 | 
151 |       # in a PR this version might be different
152 |       - name: Get current extension version
153 |         run: echo "Version=$(cat $EXTENSION_SUBDIRECTORY$EXTENSION_NAME.control | grep default_version | sed 's/[^0-9\.]*//g')" >> $GITHUB_OUTPUT
154 |         id: current_extension_version
155 | 
156 |       # the version from the branch in $EXTENSION_BRANCH
157 |       - name: Get installed extension version
158 |         run: echo "Version=$(git show origin/$EXTENSION_BRANCH:$EXTENSION_SUBDIRECTORY$EXTENSION_NAME.control | grep default_version | sed 's/[^0-9\.]*//g')" >> $GITHUB_OUTPUT
159 |         id: installed_extension_version
160 | 
161 |       - name: Show versions
162 |         run: echo "${{ steps.installed_extension_version.outputs.Version }} - ${{ steps.current_extension_version.outputs.Version }}"
163 | 
164 |       - name: Test current version string
165 |         run: exit 1
166 |         if: steps.current_extension_version.outputs.Version == ''
167 | 
168 |       - name: Test installed version string
169 |         run: exit 1
170 |         if: steps.installed_extension_version.outputs.Version == ''
171 | 
172 |       - name: Create test database
173 |         run: createdb -U postgres $EXTENSION_DB
174 | 
175 |       # install the version from $EXTENSION_BRANCH
176 |       - name: Install pg_stat_statements extension in database
177 |         run: psql -U postgres -c "CREATE EXTENSION pg_stat_statements" $EXTENSION_DB
178 | 
179 |       - name: Install extension in database
180 |         run: psql -U postgres -c "CREATE EXTENSION $EXTENSION_NAME VERSION '${{ steps.installed_extension_version.outputs.Version }}'" $EXTENSION_DB
181 | 
182 |       - name: Get extension version installed in the database - Step 1
183 |         run: psql -U postgres -A -q -t -o /tmp/installed_version_step_1.txt -c "SELECT extversion FROM pg_catalog.pg_extension WHERE extname='$EXTENSION_NAME'" $EXTENSION_DB
184 | 
185 |       - name: Get extension version installed in the database - Step 2
186 |         run: echo "Version=$(cat /tmp/installed_version_step_1.txt)" >> $GITHUB_OUTPUT
187 |         id: installed_version_step_1
188 | 
189 |       - name: Show installed version - after extension install
190 |         run: echo "${{ steps.installed_version_step_1.outputs.Version }}"
191 | 
192 |       # if this is a PR, the version might be different - try an extension upgrade in this case
193 |       - name: Upgrade extension in database
194 |         run: psql -U postgres -c "ALTER EXTENSION $EXTENSION_NAME UPDATE TO '${{ steps.current_extension_version.outputs.Version }}'" $EXTENSION_DB
195 |         if: steps.installed_extension_version.outputs.Version != steps.current_extension_version.outputs.Version
196 | 
197 |       - name: Get extension version installed in the database - Step 1
198 |         run: psql -U postgres -A -q -t -o /tmp/installed_version_step_2.txt -c "SELECT extversion FROM pg_catalog.pg_extension WHERE extname='$EXTENSION_NAME'" $EXTENSION_DB
199 | 
200 |       - name: Get extension version installed in the database - Step 2
201 |         run: echo "Version=$(cat /tmp/installed_version_step_2.txt)" >> $GITHUB_OUTPUT
202 |         id: installed_version_step_2
203 | 
204 |       - name: Show installed version - after extension update
205 |         run: echo "${{ steps.installed_version_step_2.outputs.Version }}"
206 | 
207 |       - name: Run test query
208 |         run: psql -U postgres -c "$EXTENSION_TEST_QUERY" $EXTENSION_DB
209 |         if: env.EXTENSION_TEST_QUERY != ''
210 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | log/
2 | results/
3 | 


--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
 1 | PGTelemetry Change Log
 2 | 
 3 | 2020-02-28 1.4.b Improve the long-running queries report
 4 | 	* Make sure the report contains both query and current state duration.
 5 | 	* Count running in the current state, as opposed to the total query time, if state is listed in the rules.
 6 | 	* Use a prefix match for pg_dump, as some distros prepend a version to it (i.e. pg_dump11).
 7 | 
 8 | 2017-09-30	1.0	First full stable release
 9 | 	* Fixed wal telemetry on slaves
10 | 	* Added lock detail metrics
11 | 
12 | 2017-08-23	0.2	First release on pgxn
13 | 	* Support for PostgreSQL 10 added
14 | 	* Support for wal telemetry on slaves
15 | 
16 | 2017-08-08	0.1	First semi-public release
17 | 	* Basic metrics
18 | 	* Some Zabbix alert scripts
19 | 	* A sample Prometheus postgres_exporter config
20 | 


--------------------------------------------------------------------------------
/META.json:
--------------------------------------------------------------------------------
 1 | {
 2 |    "name": "pgtelemetry",
 3 |    "abstract": "Useful monitoring and metrics views for PostgreSQL",
 4 |    "version": "1.6.0",
 5 |    "maintainer" : [
 6 |       "adjustgmbh"
 7 |    ],
 8 |    "license": {
 9 |       "PostgreSQL": "http://www.postgresql.org/about/licence"
10 |    },
11 |    "provides": { 
12 |       "pgtelemetry": {
13 |          "file": "extension/pgtelemetry-HEAD.sql",
14 |          "docfile": "doc/pgtelemetry.html",
15 |          "version": "1.6.0",
16 |          "abstract": "Useful monitoring and metrics for PostgreSQL"
17 |       }
18 |    },
19 |    "meta-spec": {
20 |      "version": "1.0.0",
21 |      "url": "http://pgxn.org/meta/spec.txt"
22 |    },
23 |    "description": "pgtelemetry is a suite of views, functions and tables which gather and produce a variety of useful metrics for monitoring health of clusters including locks, state of queries, query plans, object size, replication health, and WAL telemetry.",
24 |    "tags": [ "monitoring"],
25 |    "prereqs": {
26 |       "runtime": {
27 |         "requires": {
28 |            "PostgreSQL": "11.0.0",
29 |            "pg_stat_statements": 0,
30 |            "plpgsql": 0
31 |         }
32 |       }
33 |    },
34 |    "resources": {
35 |        "homepage": "http://big-elephants.com/pg-telemetry/",
36 |        "bugtracker": {
37 |            "web": "http://github.com/adjust/pg-telemetry/issues"
38 |        },
39 |        "repository": {
40 |           "url": "git://github.com/adjust/pg-telemetry.git",
41 |           "web": "http://github.com/adjust/pg-telemetry",
42 |           "type": "git"
43 |        }
44 |    }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | EXTENSION = pgtelemetry
 2 | DATA = extension/*
 3 | 
 4 | ifeq ($(PG_CONFIG),)
 5 | PG_CONFIG = pg_config
 6 | endif
 7 | REGRESS = definitions
 8 | REGRESS_OPTS = --load-extension=pg_stat_statements --temp-config=extras/regression/postgresql.conf --temp-instance=tmp-regression-test
 9 | PGXS := $(shell $(PG_CONFIG) --pgxs)
10 | include $(PGXS)
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![TEST](https://github.com/adjust/pg-telemetry/actions/workflows/test.yml/badge.svg)](https://github.com/adjust/pg-telemetry/actions/workflows/test.yml)
 2 | 
 3 | 
 4 | # pg_telemetry, an extension for getting usage states from PostgreSQL
 5 | 
 6 | Welcome to pg_telemetry, an extension for monitoring usage of PostgreSQL.
 7 | 
 8 | This module provides functions for deriving usage stats from system stats
 9 | and other system administration functions.  It aims to provide a generally
10 | useful and reusable series of metrics functions for monitoring purposes.
11 | 
12 | Most of the data is set to export numbers and can be used to supply SNMP
13 | agents, Zabbix agents, and other monitoring programs with data.
14 | 
15 | ## Intended Usage
16 | 
17 | The module is intended to be used with monitoring and visualizaiton
18 | programs such as pganalyze, zabbix, and munin.  These tend to use an agent
19 | which collects performance data and sends it to a server for graphics
20 | generation.  Some of the functions here are wrapped in views.
21 | 
22 | A second class of functions are in place for administrators to use in
23 | troubleshooting and debugging performance issues.  However these are to be
24 | used as needed while the general stats functions are assumed to be run
25 | every few min at least.
26 | 
27 | We expect this extension usually to be installed in its own schema.
28 | However, it can be safely installed anywhere its names don't conflict
29 | with anything else.
30 | 
31 | ## Areas of focus
32 | 
33 | In the initial phase, there are several areas of focus for this project:
34 | 
35 |  * Disk usage
36 |  * Sources of concurrent queries
37 |  * WAL throughput
38 |  * Replication Monitoring
39 | 
40 | ## Requirements
41 | 
42 | Currently this is expected to require PostgreSQL 10+.
43 | 
44 | This module also requires that pg_stat_statements is installed.
45 | 
46 | ## Documentation of monitoring views, relations, and functions
47 | 
48 | See the [pgtelemetry.html](doc/pgtelemetry.html) in the doc folder.
49 | 
50 | ## Installation
51 | 
52 | pg_stats_statements must be installed and preloaded.  Once that is the case,
53 | you can use the standard make/make install process to install this extension:
54 | 
55 |     make install
56 | 
57 | That copies the files into the PostgreSQL extensions directory.  In some
58 | cases (Windows) you may need to install appropriate development tools
59 | such as MinGW and on Linux in some cases you may need the development
60 | libraries.
61 | 
62 | After that, in the database you want to base your monitoring on:
63 | 
64 |     create extension pgtelemetry;
65 | 
66 | This will create the pgtelemetry schema and place all the objects there.
67 | 
68 | ## Extras
69 | 
70 | In the extras directory there are a number of important integration examples.
71 | 
72 | extras/prometheus:
73 | 
74 |    * queries.yaml includes a basic yaml for pulling stats into Prometheus
75 |    * queries_wal.yaml is a basic wal telemetry yaml that can be run on
76 | master databasess
77 | 
78 | extras/zabbix:
79 | 
80 |    * checkTablespaceSize.pl raises an alarm if any tablespace is too big
81 |    * checkNoLongRunningQueries.pl raises an alarm if queries have been
82 |      running too long.
83 |    * checkWaitingQueries.pl raises an alarm if too many queries are
84 |      waiting on locks and latches.
85 |    * checkReplicationLag.pl raises an alarm if replication lag is too
86 |      high (checks downstream).
87 | 
88 | What is too big?  To high?  Too long?  These are set in the beginning
89 | of the scripts making this easy to integrate with Zabbix.
90 | 


--------------------------------------------------------------------------------
/expected/definitions.out:
--------------------------------------------------------------------------------
  1 | set client_min_messages to warning;
  2 | create extension pgtelemetry cascade;
  3 | select count(*) > 0 from pgtelemetry.database_size;
  4 |  ?column? 
  5 | ----------
  6 |  t
  7 | (1 row)
  8 | 
  9 | SELECT count(*) > 0 from pgtelemetry.relation_total_size;
 10 |  ?column? 
 11 | ----------
 12 |  t
 13 | (1 row)
 14 | 
 15 | SELECT count(*) > 0 from pgtelemetry.catalog_total_size;
 16 |  ?column? 
 17 | ----------
 18 |  t
 19 | (1 row)
 20 | 
 21 | SELECT count(*) > 0 from pgtelemetry.index_size;
 22 |  ?column? 
 23 | ----------
 24 |  t
 25 | (1 row)
 26 | 
 27 | SELECT count(*) > 0 from pgtelemetry.relation_toast_size;
 28 |  ?column? 
 29 | ----------
 30 |  t
 31 | (1 row)
 32 | 
 33 | SELECT count(*) > 0 from pgtelemetry.tablespace_size;
 34 |  ?column? 
 35 | ----------
 36 |  t
 37 | (1 row)
 38 | 
 39 | SELECT count(*) > 0 from pgtelemetry.database_size;
 40 |  ?column? 
 41 | ----------
 42 |  t
 43 | (1 row)
 44 | 
 45 | SELECT count(*) > 0 from pgtelemetry.connections_by_application;
 46 |  ?column? 
 47 | ----------
 48 |  t
 49 | (1 row)
 50 | 
 51 | SELECT count(*) > 0 from pgtelemetry.connections_by_state;
 52 |  ?column? 
 53 | ----------
 54 |  t
 55 | (1 row)
 56 | 
 57 | SELECT count(*) > 0 from pgtelemetry.connections_by_ip_source;
 58 |  ?column? 
 59 | ----------
 60 |  t
 61 | (1 row)
 62 | 
 63 | SELECT count(*) > 0 from pgtelemetry.longest_running_active_queries;
 64 |  ?column? 
 65 | ----------
 66 |  t
 67 | (1 row)
 68 | 
 69 | SELECT count(*) > -1 from pgtelemetry.waiting_connections_by_event_type;
 70 |  ?column? 
 71 | ----------
 72 |  t
 73 | (1 row)
 74 | 
 75 | SELECT count(*) > 0 from pgtelemetry.locks_by_type;
 76 |  ?column? 
 77 | ----------
 78 |  t
 79 | (1 row)
 80 | 
 81 | SELECT count(*) > 0 from pgtelemetry.locks_by_mode;
 82 |  ?column? 
 83 | ----------
 84 |  t
 85 | (1 row)
 86 | 
 87 | SELECT pgtelemetry.count_waiting_on_locks_more_than_seconds(300) >= 0;
 88 |  ?column? 
 89 | ----------
 90 |  t
 91 | (1 row)
 92 | 
 93 | SELECT count(*) > 0 from pgtelemetry.tuple_access_stats;
 94 |  ?column? 
 95 | ----------
 96 |  t
 97 | (1 row)
 98 | 
 99 | SELECT count(*) > 0 from pgtelemetry.autovacuum_stats;
100 |  ?column? 
101 | ----------
102 |  t
103 | (1 row)
104 | 
105 | SELECT count(*) > 0 from pgtelemetry.statement_query_rows_time;
106 |  ?column? 
107 | ----------
108 |  t
109 | (1 row)
110 | 
111 | SELECT count(*) > 0 from pgtelemetry.statement_query_buffers;
112 |  ?column? 
113 | ----------
114 |  t
115 | (1 row)
116 | 
117 | SELECT count(*) > 0 from pgtelemetry.wal_telemetry();
118 |  ?column? 
119 | ----------
120 |  t
121 | (1 row)
122 | 
123 | SELECT count(*) > 0 from pgtelemetry.replication_slot_lag;
124 |  ?column? 
125 | ----------
126 |  f
127 | (1 row)
128 | 
129 | SELECT count(*) > 0 from pgtelemetry.get_autovacuum_vacuum_info();
130 |  ?column? 
131 | ----------
132 |  t
133 | (1 row)
134 | 
135 | SELECT count(*) > 0 from pgtelemetry.get_autovacuum_analyze_info();
136 |  ?column? 
137 | ----------
138 |  t
139 | (1 row)
140 | 
141 | SELECT count(*) > 0 from pgtelemetry.vacuum_progress();
142 |  ?column? 
143 | ----------
144 |  f
145 | (1 row)
146 | 
147 | SELECT count(*) > 0 from pgtelemetry.long_running_queries;
148 |  ?column? 
149 | ----------
150 |  f
151 | (1 row)
152 | 
153 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--0.1--0.2.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | -- cheating but needed to make plans safe on replicas.
 3 | create function is_replica() returns bool language sql IMMUTABLE
 4 | AS $$
 5 |   select pg_is_in_recovery();
 6 | $$;
 7 |        
 8 |        
 9 | -- 9.6-compatibility for PostgreSQL 10 and above
10 | do $d$ begin
11 | if version() not like 'PostgreSQL 9.%' then
12 |    CREATE FUNCTION pg_current_xlog_location() RETURNS pg_lsn language sql as $$
13 |        select pg_current_wal_lsn();
14 |    $$;
15 |    CREATE FUNCTION pg_last_xlog_replay_location() returns pg_lsn language sql as $$
16 |        select pg_last_wal_replay_lsn();
17 |    $$;
18 | end if;
19 | end;$d$ language plpgsql;
20 | 
21 | create or replace function wal_telemetry() returns table (
22 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
23 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
24 |    bytes_per_sec numeric
25 | ) language sql as $$
26 | WITH insert_record AS (
27 |        insert into pg_telemetry_wal_log
28 |        select extract('epoch' from now()), now(),
29 |                    pg_current_xlog_location() as wal_location
30 |        WHERE NOT is_replica()
31 |        returning *
32 |    ), current_record AS (
33 |        select * from insert_record where not is_replica()
34 |        UNION
35 |        SELECT * from pg_telemetry_wal_log
36 |        WHERE is_replica()
37 |        order by run_time desc limit 1
38 |    )
39 |    select c.run_time as current_epoch, l.run_time as last_epoch,
40 |           c.run_time - l.run_time as secs_elapsed,
41 |           c.lsn as current_lsn, l.lsn as last_lsn,
42 |           c.lsn - l.lsn as bytes_elapsed,
43 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
44 |      FROM current_record c,
45 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
46 |             order by run_time desc limit 1) l;
47 | $$ set search_path from current;
48 | 
49 | select wal_telemetry();
50 | 
51 | comment on function wal_telemetry() is $$
52 | The wal_telemetry() function checks the current wal location and compares
53 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
54 | both current and last data, and the differences between them.  These include
55 | bytes elapsed and seconds elapsed, and bytes per sec.
56 | 
57 | The function is designed so that you can export delta information to a monitoring
58 | solution such as munin or prometheus without the latter having to know anything
59 | about lsn representation or losing information in the process.
60 | 
61 | On a replica this does not write to the table and measures deltas from the last
62 | this was written on the master.
63 | $$;
64 | 
65 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--0.1--1.0.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | -- cheating but needed to make plans safe on replicas.
  3 | create function is_replica() returns bool language sql IMMUTABLE
  4 | AS $$
  5 |   select pg_is_in_recovery();
  6 | $$;
  7 |        
  8 |        
  9 | -- 9.6-compatibility for PostgreSQL 10 and above
 10 | do $d$ begin
 11 | if version() not like 'PostgreSQL 9.%' then
 12 |    CREATE FUNCTION pg_current_xlog_location() RETURNS pg_lsn language sql as $$
 13 |        select pg_current_wal_lsn();
 14 |    $$;
 15 |    CREATE FUNCTION pg_last_xlog_replay_location() returns pg_lsn language sql as $$
 16 |        select pg_last_wal_replay_lsn();
 17 |    $$;
 18 | end if;
 19 | end;$d$ language plpgsql;
 20 | 
 21 | create or replace function wal_telemetry() returns table (
 22 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
 23 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
 24 |    bytes_per_sec numeric
 25 | ) language sql as $$
 26 | WITH insert_record AS (
 27 |        insert into pg_telemetry_wal_log
 28 |        select extract('epoch' from now()), now(),
 29 |                    pg_current_xlog_location() as wal_location
 30 |        WHERE NOT is_replica()
 31 |        returning *
 32 |    ), current_record AS (
 33 |        select * from insert_record where not is_replica()
 34 |        UNION
 35 |        SELECT * from pg_telemetry_wal_log
 36 |        WHERE is_replica()
 37 |        order by run_time desc limit 1
 38 |    )
 39 |    select c.run_time as current_epoch, l.run_time as last_epoch,
 40 |           c.run_time - l.run_time as secs_elapsed,
 41 |           c.lsn as current_lsn, l.lsn as last_lsn,
 42 |           c.lsn - l.lsn as bytes_elapsed,
 43 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
 44 |      FROM current_record c,
 45 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
 46 |             order by run_time desc limit 1) l;
 47 | $$ set search_path from current;
 48 | 
 49 | select wal_telemetry();
 50 | 
 51 | comment on function wal_telemetry() is $$
 52 | The wal_telemetry() function checks the current wal location and compares
 53 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
 54 | both current and last data, and the differences between them.  These include
 55 | bytes elapsed and seconds elapsed, and bytes per sec.
 56 | 
 57 | The function is designed so that you can export delta information to a monitoring
 58 | solution such as munin or prometheus without the latter having to know anything
 59 | about lsn representation or losing information in the process.
 60 | 
 61 | On a replica this does not write to the table and measures deltas from the last
 62 | this was written on the master.
 63 | $$;
 64 | 
 65 | 
 66 | create function wal_telemetry_create_or_select_record()
 67 | returns pg_telemetry_wal_log language plpgsql as
 68 | $$
 69 | declare log_entry pg_telemetry_wal_log;
 70 | begin
 71 |     if pg_is_in_recovery() then
 72 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
 73 |     else
 74 |        insert into pg_telemetry_wal_log
 75 |        select extract('epoch' from now()), now(),
 76 |                    pg_current_xlog_location() as wal_location
 77 |        returning * into log_entry;
 78 |     end if;
 79 |     return log_entry;
 80 | end;
 81 | $$;
 82 | 
 83 | create or replace function wal_telemetry() returns table (
 84 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
 85 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
 86 |    bytes_per_sec numeric
 87 | ) language sql as $$
 88 |    select c.run_time as current_epoch, l.run_time as last_epoch,
 89 |           c.run_time - l.run_time as secs_elapsed,
 90 |           c.lsn as current_lsn, l.lsn as last_lsn,
 91 |           c.lsn - l.lsn as bytes_elapsed,
 92 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
 93 |      FROM wal_telemetry_create_or_select_record() c,
 94 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
 95 |             order by run_time desc limit 1) l;
 96 | $$ set search_path from current;
 97 | 
 98 | select wal_telemetry();
 99 | 
100 | CREATE VIEW waiting_queries_reason_details AS
101 | select wait_event_type, wait_event, count(*) from pg_stat_activity
102 |  WHERE wait_event is not null
103 |  GROUP BY wait_event_type, wait_event;
104 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--0.1.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | -- disk space
  3 | 
  4 | CREATE VIEW relation_total_size AS
  5 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
  6 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
  7 |        pg_relation_size(c.oid) as exclusive_bytes,
  8 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
  9 |   from pg_class c
 10 |   join pg_namespace n ON c.relnamespace = n.oid
 11 |  WHERE relkind = 'r' 
 12 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 13 | 
 14 | COMMENT ON VIEW relation_total_size IS
 15 | $$
 16 | This view provides basic information on relation size.  Catalogs and tables
 17 | in the information schema are exclused, as are TOAST tables.
 18 | 
 19 | The inclusive metrics show the relation along with indexes and TOAST.  The 
 20 | exclusiove metrics show without these things.  The bytes metrics are intended
 21 | for graph drawing, while the sizes are there for administrators who want to
 22 | quickly query this information and make decisions.
 23 | $$;
 24 | 
 25 | CREATE VIEW catalog_total_size AS
 26 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 27 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 28 |        pg_relation_size(c.oid) as exclusive_bytes,
 29 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 30 |   from pg_class c
 31 |   join pg_namespace n ON c.relnamespace = n.oid
 32 |  WHERE relkind = 'r'
 33 |        and n.nspname in ('pg_catalog', 'information_schema');
 34 | 
 35 | COMMENT ON VIEW relation_total_size IS
 36 | $$
 37 | This view provides basic information on relation size in PostgreSQL system
 38 | tables (those in pg_catalog and information_schema).
 39 | 
 40 | The inclusive metrics show the relation along with indexes and TOAST.  The 
 41 | exclusiove metrics show without these things.  The bytes metrics are intended
 42 | for graph drawing, while the sizes are there for administrators who want to
 43 | quickly query this information and make decisions.
 44 | $$;
 45 | 
 46 | -- biggest indexes
 47 | 
 48 | CREATE VIEW index_size AS
 49 | select c.oid, c.oid::regclass as index,
 50 |        pg_relation_size(c.oid) as bytes,
 51 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 52 |   from pg_class c
 53 |   join pg_namespace n ON c.relnamespace = n.oid
 54 |  WHERE relkind = 'i' 
 55 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 56 | 
 57 | COMMENT ON VIEW index_size IS 
 58 | $$
 59 | This table is most useful in tracking down questions of bloat, fill factor, and
 60 | performance of GIN indexes among other things.
 61 | $$;
 62 | 
 63 | -- Tables by size  (TOAST)
 64 | 
 65 | CREATE VIEW relation_toast_size AS
 66 | select c.oid, c.oid::regclass as relation,
 67 |        pg_relation_size(t.oid) as exclusive_bytes,
 68 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 69 |   from pg_class c
 70 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 71 |   join pg_namespace n ON c.relnamespace = n.oid;
 72 | 
 73 | COMMENT ON VIEW relation_toast_size IS
 74 | $$
 75 | This measures the amount of space in a relation's TOAST tables.  These are 
 76 | populated when data exceeds what can be reasonably stored inline in the main
 77 | heap pages.  You would expect to see this non-zero where you have large fields
 78 | being stored, particularly arrays of composite types.
 79 | 
 80 | Performance-wise moving data to TOAST improves sequential scans where the data
 81 | is not required (count(*) for example) at the cost of making the data that has 
 82 | been moved far more expensive to retrieve and process.
 83 | $$;
 84 | 
 85 | -- tablespaces size
 86 | 
 87 | CREATE VIEW tablespace_size AS
 88 | select spcname as name, pg_tablespace_size(oid) as bytes, 
 89 |        pg_size_pretty(pg_tablespace_size(oid)) as size
 90 |   from pg_tablespace;
 91 | 
 92 | COMMENT ON VIEW tablespace_size IS
 93 | $$
 94 | This provides database-cluster-wide statistics on disk usage by tablespace.
 95 | 
 96 | Note that tablespaces and databases are orthogonal.  Typically if you are 
 97 | running out of disk space, you want to check this one first, then database_size
 98 | and then the size of the relations in the largest database in that order.
 99 | $$;
100 | 
101 | -- database size
102 | 
103 | CREATE VIEW database_size AS
104 | SELECT datname as name, pg_database_size(oid) as bytes,
105 |        pg_size_pretty(pg_database_size(oid)) as size
106 |   FROM pg_database;
107 | 
108 | comment on view database_size is
109 | $$
110 | This provides cluser-wide size statistics of databases.
111 | $$;
112 | 
113 | -- connections by application_name
114 | 
115 | CREATE VIEW connections_by_application AS
116 | select application_name, count(*)
117 |   from pg_stat_activity group by application_name;
118 | 
119 | comment on view connections_by_application is
120 | $$
121 | This gives you the number of connections (cluster-wide) by application name.
122 | 
123 | By default the application name is the program name that connected to the db.
124 | $$;
125 | 
126 | -- connections by state
127 | 
128 | CREATE VIEW connections_by_state AS
129 | select case when wait_event is null then state else 'waiting' end as state,
130 |        count(*) 
131 |   from pg_stat_activity group by 1;
132 | 
133 | 
134 | comment on view connections_by_state is
135 | $$
136 | This gives you the number of connections (cluster-wide) by state (active, idle, 
137 | idle in transaction, etc).  If the query is active but is waiting on a lock or
138 | latch, we change this to 'waiting.'
139 | $$;
140 | 
141 | 
142 | -- connections by ip address source
143 | 
144 | CREATE VIEW connections_by_ip_source as
145 | SELECT client_addr, count(*) from pg_stat_activity
146 |  GROUP BY client_addr;
147 | 
148 | comment on view connections_by_ip_source is
149 | $$
150 | This is a cluster-wide breakdown of connections by IP source.  Between this and
151 | the applicaiton_name it is a good indication of where server laod is coming from
152 | as well as porblems like connection handle leaks.
153 | $$;
154 | 
155 | -- table access stats
156 | -- longest-running active queries
157 | 
158 | CREATE VIEW longest_running_active_queries AS
159 | select application_name, state, wait_event_type, wait_event, query, pid, 
160 |        client_addr,
161 |        age(now(), query_start) as running_for
162 |   from pg_stat_activity where state = 'active'
163 |  ORDER BY age(now(), query_start) desc;
164 | 
165 | comment on view longest_running_active_queries is
166 | $$
167 | This view is intended to be typically used by administrators in determining
168 | which queries to focus on.  However it can be used for reporting and alerting
169 | as well.
170 | $$;
171 | 
172 | -- waiting connections
173 | 
174 | CREATE VIEW waiting_connections_by_event_type AS
175 | select wait_event_type, count(*) from pg_stat_activity
176 |  WHERE wait_event is not null
177 |  GROUP BY wait_event_type;
178 | 
179 | comment on view waiting_connections_by_event_type is
180 | $$
181 | This view provides basic, cluster-global, statistics on why queries are waiting
182 | on other queries.
183 | $$;
184 | 
185 | -- locks by type
186 | 
187 | CREATE VIEW locks_by_type AS
188 | SELECT locktype, count(*) from pg_locks
189 |  GROUP BY locktype;
190 | 
191 | COMMENT ON VIEW locks_by_type is
192 | $$
193 | This view provides cluster-wide statistics on what sorts of locks are present.
194 | 
195 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
196 | helpful in determining where the locks are coming from.
197 | $$;
198 | 
199 | -- locks by mode
200 | CREATE VIEW locks_by_mode AS
201 | SELECT mode, count(*) from pg_locks 
202 |  GROUP BY mode;
203 | 
204 | COMMENT ON view locks_by_mode is
205 | $$
206 | This view provides cluster-wide statistics on locks by lock mode (access share
207 | vs exclusive for example).  Combined with the locks_by_type view, this view 
208 | provides a some opportunities to spot locking problems.
209 | $$;
210 | 
211 | 
212 | CREATE VIEW tuple_access_stats AS
213 | select schemaname, relname, 
214 |        seq_scan, seq_tup_read, 
215 |        idx_scan, idx_tup_fetch, 
216 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, 
217 |        n_mod_since_analyze
218 |   FROM pg_stat_user_tables;
219 | 
220 | comment on view tuple_access_stats is
221 | $$
222 | This view provides statistcs for scans (index and sequential) along with
223 | numbers of tuples updated through various means.  It allows you to get a pretty
224 | good idea of where you may need indexes or where IO-related problems may be
225 | coming from.
226 | $$;
227 | 
228 | -- autovacuum stats
229 | CREATE VIEW autovacuum_stats AS
230 | select schemaname, relname, 
231 |        last_vacuum, 
232 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
233 |        vacuum_count,
234 |        last_autovacuum, 
235 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
236 |        autovacuum_count,
237 |        last_analyze, 
238 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
239 |        analyze_count,
240 |        last_autoanalyze, 
241 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
242 |        autoanalyze_count
243 |   FROM pg_stat_user_tables;
244 | 
245 | comment on view autovacuum_stats is
246 | $$
247 | This provides basic metrics per table in the current database for when 
248 | autovacuum and analyze were last run (as well as manual maintenance).
249 | $$;
250 | 
251 | -- query stats
252 | 
253 | -- call, time, rows
254 | 
255 | CREATE VIEW statement_query_rows_time AS
256 | SELECT datname, queryid, query, sum(calls) as calls, 
257 |        sum(total_time) as total_time, sum(rows) as rows
258 |   FROM pg_stat_statements
259 |   JOIN pg_database d ON d.oid = dbid
260 |  GROUP BY datname, queryid, query;
261 | 
262 | comment on view statement_query_rows_time is
263 | $$
264 | This gives aggregated of stats for a given query (cluster-wide)
265 | per query and database name.  This view provides high level timing and row
266 | statistics.
267 | $$;
268 | 
269 | -- buffers
270 | CREATE VIEW statement_query_buffers AS 
271 | SELECT datname, queryid, query, sum(calls), 
272 |        sum(shared_blks_hit) as shared_blks_hit,
273 |        sum(shared_blks_read) as shared_blks_read, 
274 |        sum(shared_blks_dirtied) as shared_blks_dirtied, 
275 |        sum(shared_blks_written) as shared_blks_written,
276 |        sum(temp_blks_read) as tmp_blkd_read,
277 |        sum(temp_blks_written) as tmp_blkd_written
278 |   FROM pg_stat_statements
279 |   JOIN pg_database d ON d.oid = dbid
280 |  GROUP BY datname, queryid, query;
281 | 
282 | comment on view statement_query_buffers is
283 | $$
284 | This gives aggregated of stats for a given query (cluster-wide)
285 | per query and database name.  This view provides low-level IO statistics.
286 | $$;
287 | 
288 | -- WAL last state
289 | 
290 | CREATE TABLE pg_telemetry_wal_log (
291 |    run_time numeric unique, -- epoch timestamp
292 |    timestamp timestamp unique,
293 |    lsn pg_lsn
294 | );
295 | 
296 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
297 | This table logs the times and results of wal telemetry readings so that
298 | deltas can be calculated.  At least one row must be present to get any useful
299 | data out of the wal_telemetry() function at all.
300 | 
301 | If you get one telemetry entry a minute, over the course of a year you will get
302 | just over half a million entries.  These are indexed on both epoch and timestamp
303 | so access is not impaired, but if you want ot purge, be careful to leave at
304 | least one entry at the end.
305 | 
306 | You can also process these as a time series using WINDOW functions like lag.
307 | $$;
308 | 
309 | -- WAL telemetry
310 | 
311 | create function wal_telemetry() returns table (
312 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
313 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
314 |    bytes_per_sec numeric
315 | ) language sql as $$
316 | WITH current_record AS (
317 |        insert into pg_telemetry_wal_log
318 |        select extract('epoch' from now()), now(),
319 |               case when pg_is_in_recovery()
320 |                    then pg_last_xlog_replay_location()
321 |                    else pg_current_xlog_location() end as wal_location
322 |        returning *
323 |    )
324 |    select c.run_time as current_epoch, l.run_time as last_epoch,
325 |           c.run_time - l.run_time as secs_elapsed,
326 |           c.lsn as current_lsn, l.lsn as last_lsn,
327 |           c.lsn - l.lsn as bytes_elapsed,
328 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
329 |      FROM current_record c,
330 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
331 |             order by run_time desc limit 1) l;
332 | $$ set search_path from current;
333 | 
334 | select wal_telemetry();
335 | 
336 | comment on function wal_telemetry() is $$
337 | The wal_telemetry() function checks the current wal location and compares
338 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
339 | both current and last data, and the differences between them.  These include
340 | bytes elapsed and seconds elapsed, and bytes per sec.
341 | 
342 | The function is designed so that you can export delta information to a monitoring
343 | solution such as munin or prometheus without the latter having to know anything
344 | about lsn representation or losing information in the process.
345 | 
346 | This function cnnnot be run on a replica though you can analyxe the wal logs.
347 | $$;
348 | 
349 | -- Replication slots
350 | 
351 | CREATE OR REPLACE VIEW replication_slot_lag as
352 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
353 |        now() as querytime, CASE WHEN pg_is_in_recovery()
354 |                                 THEN pg_last_xlog_replay_location()
355 |                                 ELSE pg_current_xlog_location() END
356 |                            AS pg_current_xlog_location,
357 |        CASE WHEN pg_is_in_recovery() THEN null::int
358 |             ELSE pg_current_xlog_location() - restart_lsn END
359 |        AS current_lag_bytes
360 |   FROM pg_replication_slots s
361 |  ORDER BY s.slot_name;
362 | 
363 | COMMENT ON VIEW replication_slot_lag IS
364 | $$
365 | This view monitors lag on downstream slots.  It compares the last sent wal 
366 | segment to the current known wal location.
367 | 
368 | For master database, the current wal location is self-explanatory.  For replicas
369 | we use the last received WAL location instead.  Note that replicas can have 
370 | replication slots for downstream replication tracking.
371 | $$;
372 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--0.2--1.0.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | create function wal_telemetry_create_or_select_record()
 3 | returns pg_telemetry_wal_log language plpgsql as
 4 | $$
 5 | declare log_entry pg_telemetry_wal_log;
 6 | begin
 7 |     if pg_is_in_recovery() then
 8 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
 9 |     else
10 |        insert into pg_telemetry_wal_log
11 |        select extract('epoch' from now()), now(),
12 |                    pg_current_xlog_location() as wal_location
13 |        returning * into log_entry;
14 |     end if;
15 |     return log_entry;
16 | end;
17 | $$;
18 | 
19 | create or replace function wal_telemetry() returns table (
20 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
21 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
22 |    bytes_per_sec numeric
23 | ) language sql as $$
24 |    select c.run_time as current_epoch, l.run_time as last_epoch,
25 |           c.run_time - l.run_time as secs_elapsed,
26 |           c.lsn as current_lsn, l.lsn as last_lsn,
27 |           c.lsn - l.lsn as bytes_elapsed,
28 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
29 |      FROM wal_telemetry_create_or_select_record() c,
30 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
31 |             order by run_time desc limit 1) l;
32 | $$ set search_path from current;
33 | 
34 | select wal_telemetry();
35 | 
36 | CREATE VIEW waiting_queries_reason_details AS
37 | select wait_event_type, wait_event, count(*) from pg_stat_activity
38 |  WHERE wait_event is not null
39 |  GROUP BY wait_event_type, wait_event;
40 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--0.2.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | -- cheating but needed to make plans safe on replicas.
  3 | create function is_replica() returns bool language sql IMMUTABLE
  4 | AS $$
  5 |   select pg_is_in_recovery();
  6 | $$;
  7 | 
  8 |        
  9 | -- 9.6-compatibility for PostgreSQL 10 and above
 10 | do $d$ begin
 11 | if version() not like 'PostgreSQL 9.%' then
 12 |    CREATE FUNCTION pg_current_xlog_location() RETURNS pg_lsn language sql as $$
 13 |        select pg_current_wal_lsn();
 14 |    $$;
 15 |    CREATE FUNCTION pg_last_xlog_replay_location() returns pg_lsn language sql as $$
 16 |        select pg_last_wal_replay_lsn();
 17 |    $$;
 18 | end if;
 19 | end;$d$ language plpgsql;
 20 | 
 21 | -- disk space
 22 | 
 23 | CREATE VIEW relation_total_size AS
 24 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 25 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 26 |        pg_relation_size(c.oid) as exclusive_bytes,
 27 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 28 |   from pg_class c
 29 |   join pg_namespace n ON c.relnamespace = n.oid
 30 |  WHERE relkind = 'r' 
 31 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 32 | 
 33 | COMMENT ON VIEW relation_total_size IS
 34 | $$
 35 | This view provides basic information on relation size.  Catalogs and tables
 36 | in the information schema are exclused, as are TOAST tables.
 37 | 
 38 | The inclusive metrics show the relation along with indexes and TOAST.  The 
 39 | exclusiove metrics show without these things.  The bytes metrics are intended
 40 | for graph drawing, while the sizes are there for administrators who want to
 41 | quickly query this information and make decisions.
 42 | $$;
 43 | 
 44 | CREATE VIEW catalog_total_size AS
 45 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 46 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 47 |        pg_relation_size(c.oid) as exclusive_bytes,
 48 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 49 |   from pg_class c
 50 |   join pg_namespace n ON c.relnamespace = n.oid
 51 |  WHERE relkind = 'r'
 52 |        and n.nspname in ('pg_catalog', 'information_schema');
 53 | 
 54 | COMMENT ON VIEW relation_total_size IS
 55 | $$
 56 | This view provides basic information on relation size in PostgreSQL system
 57 | tables (those in pg_catalog and information_schema).
 58 | 
 59 | The inclusive metrics show the relation along with indexes and TOAST.  The 
 60 | exclusiove metrics show without these things.  The bytes metrics are intended
 61 | for graph drawing, while the sizes are there for administrators who want to
 62 | quickly query this information and make decisions.
 63 | $$;
 64 | 
 65 | -- biggest indexes
 66 | 
 67 | CREATE VIEW index_size AS
 68 | select c.oid, c.oid::regclass as index,
 69 |        pg_relation_size(c.oid) as bytes,
 70 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 71 |   from pg_class c
 72 |   join pg_namespace n ON c.relnamespace = n.oid
 73 |  WHERE relkind = 'i' 
 74 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 75 | 
 76 | COMMENT ON VIEW index_size IS 
 77 | $$
 78 | This table is most useful in tracking down questions of bloat, fill factor, and
 79 | performance of GIN indexes among other things.
 80 | $$;
 81 | 
 82 | -- Tables by size  (TOAST)
 83 | 
 84 | CREATE VIEW relation_toast_size AS
 85 | select c.oid, c.oid::regclass as relation,
 86 |        pg_relation_size(t.oid) as exclusive_bytes,
 87 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 88 |   from pg_class c
 89 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 90 |   join pg_namespace n ON c.relnamespace = n.oid;
 91 | 
 92 | COMMENT ON VIEW relation_toast_size IS
 93 | $$
 94 | This measures the amount of space in a relation's TOAST tables.  These are 
 95 | populated when data exceeds what can be reasonably stored inline in the main
 96 | heap pages.  You would expect to see this non-zero where you have large fields
 97 | being stored, particularly arrays of composite types.
 98 | 
 99 | Performance-wise moving data to TOAST improves sequential scans where the data
100 | is not required (count(*) for example) at the cost of making the data that has 
101 | been moved far more expensive to retrieve and process.
102 | $$;
103 | 
104 | -- tablespaces size
105 | 
106 | CREATE VIEW tablespace_size AS
107 | select spcname as name, pg_tablespace_size(oid) as bytes, 
108 |        pg_size_pretty(pg_tablespace_size(oid)) as size
109 |   from pg_tablespace;
110 | 
111 | COMMENT ON VIEW tablespace_size IS
112 | $$
113 | This provides database-cluster-wide statistics on disk usage by tablespace.
114 | 
115 | Note that tablespaces and databases are orthogonal.  Typically if you are 
116 | running out of disk space, you want to check this one first, then database_size
117 | and then the size of the relations in the largest database in that order.
118 | $$;
119 | 
120 | -- database size
121 | 
122 | CREATE VIEW database_size AS
123 | SELECT datname as name, pg_database_size(oid) as bytes,
124 |        pg_size_pretty(pg_database_size(oid)) as size
125 |   FROM pg_database;
126 | 
127 | comment on view database_size is
128 | $$
129 | This provides cluser-wide size statistics of databases.
130 | $$;
131 | 
132 | -- connections by application_name
133 | 
134 | CREATE VIEW connections_by_application AS
135 | select application_name, count(*)
136 |   from pg_stat_activity group by application_name;
137 | 
138 | comment on view connections_by_application is
139 | $$
140 | This gives you the number of connections (cluster-wide) by application name.
141 | 
142 | By default the application name is the program name that connected to the db.
143 | $$;
144 | 
145 | -- connections by state
146 | 
147 | CREATE VIEW connections_by_state AS
148 | select case when wait_event is null then state else 'waiting' end as state,
149 |        count(*) 
150 |   from pg_stat_activity group by 1;
151 | 
152 | 
153 | comment on view connections_by_state is
154 | $$
155 | This gives you the number of connections (cluster-wide) by state (active, idle, 
156 | idle in transaction, etc).  If the query is active but is waiting on a lock or
157 | latch, we change this to 'waiting.'
158 | $$;
159 | 
160 | 
161 | -- connections by ip address source
162 | 
163 | CREATE VIEW connections_by_ip_source as
164 | SELECT client_addr, count(*) from pg_stat_activity
165 |  GROUP BY client_addr;
166 | 
167 | comment on view connections_by_ip_source is
168 | $$
169 | This is a cluster-wide breakdown of connections by IP source.  Between this and
170 | the applicaiton_name it is a good indication of where server laod is coming from
171 | as well as porblems like connection handle leaks.
172 | $$;
173 | 
174 | -- table access stats
175 | -- longest-running active queries
176 | 
177 | CREATE VIEW longest_running_active_queries AS
178 | select application_name, state, wait_event_type, wait_event, query, pid, 
179 |        client_addr,
180 |        age(now(), query_start) as running_for
181 |   from pg_stat_activity where state = 'active'
182 |  ORDER BY age(now(), query_start) desc;
183 | 
184 | comment on view longest_running_active_queries is
185 | $$
186 | This view is intended to be typically used by administrators in determining
187 | which queries to focus on.  However it can be used for reporting and alerting
188 | as well.
189 | $$;
190 | 
191 | -- waiting connections
192 | 
193 | CREATE VIEW waiting_connections_by_event_type AS
194 | select wait_event_type, count(*) from pg_stat_activity
195 |  WHERE wait_event is not null
196 |  GROUP BY wait_event_type;
197 | 
198 | comment on view waiting_connections_by_event_type is
199 | $$
200 | This view provides basic, cluster-global, statistics on why queries are waiting
201 | on other queries.
202 | $$;
203 | 
204 | -- locks by type
205 | 
206 | CREATE VIEW locks_by_type AS
207 | SELECT locktype, count(*) from pg_locks
208 |  GROUP BY locktype;
209 | 
210 | COMMENT ON VIEW locks_by_type is
211 | $$
212 | This view provides cluster-wide statistics on what sorts of locks are present.
213 | 
214 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
215 | helpful in determining where the locks are coming from.
216 | $$;
217 | 
218 | -- locks by mode
219 | CREATE VIEW locks_by_mode AS
220 | SELECT mode, count(*) from pg_locks 
221 |  GROUP BY mode;
222 | 
223 | COMMENT ON view locks_by_mode is
224 | $$
225 | This view provides cluster-wide statistics on locks by lock mode (access share
226 | vs exclusive for example).  Combined with the locks_by_type view, this view 
227 | provides a some opportunities to spot locking problems.
228 | $$;
229 | 
230 | 
231 | CREATE VIEW tuple_access_stats AS
232 | select schemaname, relname, 
233 |        seq_scan, seq_tup_read, 
234 |        idx_scan, idx_tup_fetch, 
235 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, 
236 |        n_mod_since_analyze
237 |   FROM pg_stat_user_tables;
238 | 
239 | comment on view tuple_access_stats is
240 | $$
241 | This view provides statistcs for scans (index and sequential) along with
242 | numbers of tuples updated through various means.  It allows you to get a pretty
243 | good idea of where you may need indexes or where IO-related problems may be
244 | coming from.
245 | $$;
246 | 
247 | -- autovacuum stats
248 | CREATE VIEW autovacuum_stats AS
249 | select schemaname, relname, 
250 |        last_vacuum, 
251 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
252 |        vacuum_count,
253 |        last_autovacuum, 
254 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
255 |        autovacuum_count,
256 |        last_analyze, 
257 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
258 |        analyze_count,
259 |        last_autoanalyze, 
260 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
261 |        autoanalyze_count
262 |   FROM pg_stat_user_tables;
263 | 
264 | comment on view autovacuum_stats is
265 | $$
266 | This provides basic metrics per table in the current database for when 
267 | autovacuum and analyze were last run (as well as manual maintenance).
268 | $$;
269 | 
270 | -- query stats
271 | 
272 | -- call, time, rows
273 | 
274 | CREATE VIEW statement_query_rows_time AS
275 | SELECT datname, queryid, query, sum(calls) as calls, 
276 |        sum(total_time) as total_time, sum(rows) as rows
277 |   FROM pg_stat_statements
278 |   JOIN pg_database d ON d.oid = dbid
279 |  GROUP BY datname, queryid, query;
280 | 
281 | comment on view statement_query_rows_time is
282 | $$
283 | This gives aggregated of stats for a given query (cluster-wide)
284 | per query and database name.  This view provides high level timing and row
285 | statistics.
286 | $$;
287 | 
288 | -- buffers
289 | CREATE VIEW statement_query_buffers AS 
290 | SELECT datname, queryid, query, sum(calls), 
291 |        sum(shared_blks_hit) as shared_blks_hit,
292 |        sum(shared_blks_read) as shared_blks_read, 
293 |        sum(shared_blks_dirtied) as shared_blks_dirtied, 
294 |        sum(shared_blks_written) as shared_blks_written,
295 |        sum(temp_blks_read) as tmp_blkd_read,
296 |        sum(temp_blks_written) as tmp_blkd_written
297 |   FROM pg_stat_statements
298 |   JOIN pg_database d ON d.oid = dbid
299 |  GROUP BY datname, queryid, query;
300 | 
301 | comment on view statement_query_buffers is
302 | $$
303 | This gives aggregated of stats for a given query (cluster-wide)
304 | per query and database name.  This view provides low-level IO statistics.
305 | $$;
306 | 
307 | -- WAL last state
308 | 
309 | CREATE TABLE pg_telemetry_wal_log (
310 |    run_time numeric unique, -- epoch timestamp
311 |    timestamp timestamp unique,
312 |    lsn pg_lsn
313 | );
314 | 
315 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
316 | This table logs the times and results of wal telemetry readings so that
317 | deltas can be calculated.  At least one row must be present to get any useful
318 | data out of the wal_telemetry() function at all.
319 | 
320 | If you get one telemetry entry a minute, over the course of a year you will get
321 | just over half a million entries.  These are indexed on both epoch and timestamp
322 | so access is not impaired, but if you want ot purge, be careful to leave at
323 | least one entry at the end.
324 | 
325 | You can also process these as a time series using WINDOW functions like lag.
326 | $$;
327 | 
328 | -- WAL telemetry
329 | 
330 | create or replace function wal_telemetry() returns table (
331 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
332 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
333 |    bytes_per_sec numeric
334 | ) language sql as $$
335 | WITH insert_record AS (
336 |        insert into pg_telemetry_wal_log
337 |        select extract('epoch' from now()), now(),
338 |                    pg_current_xlog_location() as wal_location
339 |        WHERE NOT is_replica()
340 |        returning *
341 |    ), current_record AS (
342 |        select * from insert_record where not is_replica()
343 |        UNION
344 |        SELECT * from pg_telemetry_wal_log
345 |        WHERE is_replica()
346 |        order by run_time desc limit 1
347 |    )
348 |    select c.run_time as current_epoch, l.run_time as last_epoch,
349 |           c.run_time - l.run_time as secs_elapsed,
350 |           c.lsn as current_lsn, l.lsn as last_lsn,
351 |           c.lsn - l.lsn as bytes_elapsed,
352 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
353 |      FROM current_record c,
354 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
355 |             order by run_time desc limit 1) l;
356 | $$ set search_path from current;
357 | 
358 | select wal_telemetry();
359 | 
360 | comment on function wal_telemetry() is $$
361 | The wal_telemetry() function checks the current wal location and compares
362 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
363 | both current and last data, and the differences between them.  These include
364 | bytes elapsed and seconds elapsed, and bytes per sec.
365 | 
366 | The function is designed so that you can export delta information to a monitoring
367 | solution such as munin or prometheus without the latter having to know anything
368 | about lsn representation or losing information in the process.
369 | 
370 | On a replica this does not write to the table and measures deltas from the last
371 | this was written on the master.
372 | $$;
373 | 
374 | -- Replication slots
375 | 
376 | CREATE OR REPLACE VIEW replication_slot_lag as
377 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
378 |        now() as querytime, CASE WHEN pg_is_in_recovery()
379 |                                 THEN pg_last_xlog_replay_location()
380 |                                 ELSE pg_current_xlog_location() END
381 |                            AS pg_current_xlog_location,
382 |        CASE WHEN pg_is_in_recovery() THEN null::int
383 |             ELSE pg_current_xlog_location() - restart_lsn END
384 |        AS current_lag_bytes
385 |   FROM pg_replication_slots s
386 |  ORDER BY s.slot_name;
387 | 
388 | COMMENT ON VIEW replication_slot_lag IS
389 | $$
390 | This view monitors lag on downstream slots.  It compares the last sent wal 
391 | segment to the current known wal location.
392 | 
393 | For master database, the current wal location is self-explanatory.  For replicas
394 | we use the last received WAL location instead.  Note that replicas can have 
395 | replication slots for downstream replication tracking.
396 | $$;
397 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.0--1.1.sql:
--------------------------------------------------------------------------------
 1 | create or replace function get_autovacuum_vacuum_info
 2 | (
 3 |       _except regclass[] default null,
 4 |   out queue_depth int8,
 5 |   out total_dead_tup int8
 6 | )
 7 |   returns record
 8 |   language plpgsql stable as
 9 | $fnc$
10 | begin
11 |   select
12 |       count(*),
13 |       sum(n_dead_tup)
14 |       into queue_depth, total_dead_tup
15 |     from
16 |       pg_class c, --lateral
17 |         pg_stat_get_dead_tuples(c.oid) n_dead_tup
18 |     where
19 |           coalesce(c.oid != any (_except), true)
20 |       and n_dead_tup >
21 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_threshold'), current_setting('autovacuum_vacuum_threshold')::int4)+
22 |           reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_scale_factor'), current_setting('autovacuum_vacuum_scale_factor')::float4)
23 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
24 |     ;
25 |     return;
26 | end;
27 | $fnc$;
28 | 
29 | create or replace function get_autovacuum_analyze_info
30 | (
31 |       _except regclass[] default array['pg_catalog.pg_statistic'],
32 |   out queue_depth int8,
33 |   out total_mod_since_analyze int8
34 | )
35 |   returns record
36 |   language plpgsql stable as
37 | $fnc$
38 | begin
39 |   select
40 |       count(*),
41 |       sum(n_mod_since_analyze)
42 |       into queue_depth, total_mod_since_analyze
43 |     from
44 |       pg_class c,
45 |         pg_stat_get_mod_since_analyze(c.oid) n_mod_since_analyze
46 |     where
47 |           c.relnamespace != 'pg_toast'::regnamespace
48 |       and coalesce(c.oid != any (_except), true)
49 |       and n_mod_since_analyze >
50 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_threshold'), current_setting('autovacuum_analyze_threshold')::int4)+
51 |             reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_scale_factor'), current_setting('autovacuum_analyze_scale_factor')::float4)
52 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
53 | 
54 |     ;
55 |     return;
56 | end;
57 | $fnc$;
58 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.0.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | -- cheating but needed to make plans safe on replicas.
  3 | create function is_replica() returns bool language sql IMMUTABLE
  4 | AS $$
  5 |   select pg_is_in_recovery();
  6 | $$;
  7 | 
  8 | -- version compatibility
  9 | do $d$ begin
 10 | if version() not like 'PostgreSQL 9.%' then
 11 |    CREATE FUNCTION pg_current_xlog_location() RETURNS pg_lsn language sql as $$
 12 |        select pg_current_wal_lsn();
 13 |    $$;
 14 |    CREATE FUNCTION pg_last_xlog_replay_location() returns pg_lsn language sql as $$
 15 |        select pg_last_wal_replay_lsn();
 16 |    $$;
 17 | end if;
 18 | end;$d$ language plpgsql;
 19 | 
 20 | -- disk space
 21 | 
 22 | CREATE VIEW relation_total_size AS
 23 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 24 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 25 |        pg_relation_size(c.oid) as exclusive_bytes,
 26 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 27 |   from pg_class c
 28 |   join pg_namespace n ON c.relnamespace = n.oid
 29 |  WHERE relkind = 'r' 
 30 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 31 | 
 32 | COMMENT ON VIEW relation_total_size IS
 33 | $$
 34 | This view provides basic information on relation size.  Catalogs and tables
 35 | in the information schema are exclused, as are TOAST tables.
 36 | 
 37 | The inclusive metrics show the relation along with indexes and TOAST.  The 
 38 | exclusiove metrics show without these things.  The bytes metrics are intended
 39 | for graph drawing, while the sizes are there for administrators who want to
 40 | quickly query this information and make decisions.
 41 | $$;
 42 | 
 43 | CREATE VIEW catalog_total_size AS
 44 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 45 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 46 |        pg_relation_size(c.oid) as exclusive_bytes,
 47 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 48 |   from pg_class c
 49 |   join pg_namespace n ON c.relnamespace = n.oid
 50 |  WHERE relkind = 'r'
 51 |        and n.nspname in ('pg_catalog', 'information_schema');
 52 | 
 53 | COMMENT ON VIEW relation_total_size IS
 54 | $$
 55 | This view provides basic information on relation size in PostgreSQL system
 56 | tables (those in pg_catalog and information_schema).
 57 | 
 58 | The inclusive metrics show the relation along with indexes and TOAST.  The 
 59 | exclusiove metrics show without these things.  The bytes metrics are intended
 60 | for graph drawing, while the sizes are there for administrators who want to
 61 | quickly query this information and make decisions.
 62 | $$;
 63 | 
 64 | -- biggest indexes
 65 | 
 66 | CREATE VIEW index_size AS
 67 | select c.oid, c.oid::regclass as index,
 68 |        pg_relation_size(c.oid) as bytes,
 69 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 70 |   from pg_class c
 71 |   join pg_namespace n ON c.relnamespace = n.oid
 72 |  WHERE relkind = 'i' 
 73 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 74 | 
 75 | COMMENT ON VIEW index_size IS 
 76 | $$
 77 | This table is most useful in tracking down questions of bloat, fill factor, and
 78 | performance of GIN indexes among other things.
 79 | $$;
 80 | 
 81 | -- Tables by size  (TOAST)
 82 | 
 83 | CREATE VIEW relation_toast_size AS
 84 | select c.oid, c.oid::regclass as relation,
 85 |        pg_relation_size(t.oid) as exclusive_bytes,
 86 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 87 |   from pg_class c
 88 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 89 |   join pg_namespace n ON c.relnamespace = n.oid;
 90 | 
 91 | COMMENT ON VIEW relation_toast_size IS
 92 | $$
 93 | This measures the amount of space in a relation's TOAST tables.  These are 
 94 | populated when data exceeds what can be reasonably stored inline in the main
 95 | heap pages.  You would expect to see this non-zero where you have large fields
 96 | being stored, particularly arrays of composite types.
 97 | 
 98 | Performance-wise moving data to TOAST improves sequential scans where the data
 99 | is not required (count(*) for example) at the cost of making the data that has 
100 | been moved far more expensive to retrieve and process.
101 | $$;
102 | 
103 | -- tablespaces size
104 | 
105 | CREATE VIEW tablespace_size AS
106 | select spcname as name, pg_tablespace_size(oid) as bytes, 
107 |        pg_size_pretty(pg_tablespace_size(oid)) as size
108 |   from pg_tablespace;
109 | 
110 | COMMENT ON VIEW tablespace_size IS
111 | $$
112 | This provides database-cluster-wide statistics on disk usage by tablespace.
113 | 
114 | Note that tablespaces and databases are orthogonal.  Typically if you are 
115 | running out of disk space, you want to check this one first, then database_size
116 | and then the size of the relations in the largest database in that order.
117 | $$;
118 | 
119 | -- database size
120 | 
121 | CREATE VIEW database_size AS
122 | SELECT datname as name, pg_database_size(oid) as bytes,
123 |        pg_size_pretty(pg_database_size(oid)) as size
124 |   FROM pg_database;
125 | 
126 | comment on view database_size is
127 | $$
128 | This provides cluser-wide size statistics of databases.
129 | $$;
130 | 
131 | -- connections by application_name
132 | 
133 | CREATE VIEW connections_by_application AS
134 | select application_name, count(*)
135 |   from pg_stat_activity group by application_name;
136 | 
137 | comment on view connections_by_application is
138 | $$
139 | This gives you the number of connections (cluster-wide) by application name.
140 | 
141 | By default the application name is the program name that connected to the db.
142 | $$;
143 | 
144 | -- connections by state
145 | 
146 | CREATE VIEW connections_by_state AS
147 | select case when wait_event is null then state else 'waiting' end as state,
148 |        count(*) 
149 |   from pg_stat_activity group by 1;
150 | 
151 | CREATE VIEW waiting_queries_reason_details AS
152 | select wait_event_type, wait_event, count(*) from pg_stat_activity
153 |  WHERE wait_event is not null
154 |  GROUP BY wait_event_type, wait_event;
155 | 
156 | 
157 | comment on view connections_by_state is
158 | $$
159 | This gives you the number of connections (cluster-wide) by state (active, idle, 
160 | idle in transaction, etc).  If the query is active but is waiting on a lock or
161 | latch, we change this to 'waiting.'
162 | $$;
163 | 
164 | 
165 | -- connections by ip address source
166 | 
167 | CREATE VIEW connections_by_ip_source as
168 | SELECT client_addr, count(*) from pg_stat_activity
169 |  GROUP BY client_addr;
170 | 
171 | comment on view connections_by_ip_source is
172 | $$
173 | This is a cluster-wide breakdown of connections by IP source.  Between this and
174 | the applicaiton_name it is a good indication of where server laod is coming from
175 | as well as porblems like connection handle leaks.
176 | $$;
177 | 
178 | -- table access stats
179 | -- longest-running active queries
180 | 
181 | CREATE VIEW longest_running_active_queries AS
182 | select application_name, state, wait_event_type, wait_event, query, pid, 
183 |        client_addr,
184 |        age(now(), query_start) as running_for
185 |   from pg_stat_activity where state = 'active'
186 |  ORDER BY age(now(), query_start) desc;
187 | 
188 | comment on view longest_running_active_queries is
189 | $$
190 | This view is intended to be typically used by administrators in determining
191 | which queries to focus on.  However it can be used for reporting and alerting
192 | as well.
193 | $$;
194 | 
195 | -- waiting connections
196 | 
197 | CREATE VIEW waiting_connections_by_event_type AS
198 | select wait_event_type, count(*) from pg_stat_activity
199 |  WHERE wait_event is not null
200 |  GROUP BY wait_event_type;
201 | 
202 | comment on view waiting_connections_by_event_type is
203 | $$
204 | This view provides basic, cluster-global, statistics on why queries are waiting
205 | on other queries.
206 | $$;
207 | 
208 | -- locks by type
209 | 
210 | CREATE VIEW locks_by_type AS
211 | SELECT locktype, count(*) from pg_locks
212 |  GROUP BY locktype;
213 | 
214 | COMMENT ON VIEW locks_by_type is
215 | $$
216 | This view provides cluster-wide statistics on what sorts of locks are present.
217 | 
218 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
219 | helpful in determining where the locks are coming from.
220 | $$;
221 | 
222 | -- locks by mode
223 | CREATE VIEW locks_by_mode AS
224 | SELECT mode, count(*) from pg_locks 
225 |  GROUP BY mode;
226 | 
227 | COMMENT ON view locks_by_mode is
228 | $$
229 | This view provides cluster-wide statistics on locks by lock mode (access share
230 | vs exclusive for example).  Combined with the locks_by_type view, this view 
231 | provides a some opportunities to spot locking problems.
232 | $$;
233 | 
234 | 
235 | CREATE VIEW tuple_access_stats AS
236 | select schemaname, relname, 
237 |        seq_scan, seq_tup_read, 
238 |        idx_scan, idx_tup_fetch, 
239 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, 
240 |        n_mod_since_analyze
241 |   FROM pg_stat_user_tables;
242 | 
243 | comment on view tuple_access_stats is
244 | $$
245 | This view provides statistcs for scans (index and sequential) along with
246 | numbers of tuples updated through various means.  It allows you to get a pretty
247 | good idea of where you may need indexes or where IO-related problems may be
248 | coming from.
249 | $$;
250 | 
251 | -- autovacuum stats
252 | CREATE VIEW autovacuum_stats AS
253 | select schemaname, relname, 
254 |        last_vacuum, 
255 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
256 |        vacuum_count,
257 |        last_autovacuum, 
258 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
259 |        autovacuum_count,
260 |        last_analyze, 
261 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
262 |        analyze_count,
263 |        last_autoanalyze, 
264 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
265 |        autoanalyze_count
266 |   FROM pg_stat_user_tables;
267 | 
268 | comment on view autovacuum_stats is
269 | $$
270 | This provides basic metrics per table in the current database for when 
271 | autovacuum and analyze were last run (as well as manual maintenance).
272 | $$;
273 | 
274 | -- query stats
275 | 
276 | -- call, time, rows
277 | 
278 | CREATE VIEW statement_query_rows_time AS
279 | SELECT datname, queryid, query, sum(calls) as calls, 
280 |        sum(total_time) as total_time, sum(rows) as rows
281 |   FROM pg_stat_statements
282 |   JOIN pg_database d ON d.oid = dbid
283 |  GROUP BY datname, queryid, query;
284 | 
285 | comment on view statement_query_rows_time is
286 | $$
287 | This gives aggregated of stats for a given query (cluster-wide)
288 | per query and database name.  This view provides high level timing and row
289 | statistics.
290 | $$;
291 | 
292 | -- buffers
293 | CREATE VIEW statement_query_buffers AS 
294 | SELECT datname, queryid, query, sum(calls), 
295 |        sum(shared_blks_hit) as shared_blks_hit,
296 |        sum(shared_blks_read) as shared_blks_read, 
297 |        sum(shared_blks_dirtied) as shared_blks_dirtied, 
298 |        sum(shared_blks_written) as shared_blks_written,
299 |        sum(temp_blks_read) as tmp_blkd_read,
300 |        sum(temp_blks_written) as tmp_blkd_written
301 |   FROM pg_stat_statements
302 |   JOIN pg_database d ON d.oid = dbid
303 |  GROUP BY datname, queryid, query;
304 | 
305 | comment on view statement_query_buffers is
306 | $$
307 | This gives aggregated of stats for a given query (cluster-wide)
308 | per query and database name.  This view provides low-level IO statistics.
309 | $$;
310 | 
311 | -- WAL last state
312 | 
313 | CREATE TABLE pg_telemetry_wal_log (
314 |    run_time numeric unique, -- epoch timestamp
315 |    timestamp timestamp unique,
316 |    lsn pg_lsn
317 | );
318 | 
319 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
320 | This table logs the times and results of wal telemetry readings so that
321 | deltas can be calculated.  At least one row must be present to get any useful
322 | data out of the wal_telemetry() function at all.
323 | 
324 | If you get one telemetry entry a minute, over the course of a year you will get
325 | just over half a million entries.  These are indexed on both epoch and timestamp
326 | so access is not impaired, but if you want ot purge, be careful to leave at
327 | least one entry at the end.
328 | 
329 | You can also process these as a time series using WINDOW functions like lag.
330 | $$;
331 | 
332 | -- WAL telemetry
333 | 
334 | create function wal_telemetry_create_or_select_record()
335 | returns pg_telemetry_wal_log language plpgsql as
336 | $$
337 | declare log_entry pg_telemetry_wal_log;
338 | begin
339 |     if pg_is_in_recovery() then
340 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
341 |     else
342 |        insert into pg_telemetry_wal_log
343 |        select extract('epoch' from now()), now(),
344 |                    pg_current_xlog_location() as wal_location
345 |        returning * into log_entry;
346 |     end if;
347 |     return log_entry;
348 | end;
349 | $$;
350 | 
351 | create function wal_telemetry() returns table (
352 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
353 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
354 |    bytes_per_sec numeric
355 | ) language sql as $$
356 |    select c.run_time as current_epoch, l.run_time as last_epoch,
357 |           c.run_time - l.run_time as secs_elapsed,
358 |           c.lsn as current_lsn, l.lsn as last_lsn,
359 |           c.lsn - l.lsn as bytes_elapsed,
360 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
361 |      FROM wal_telemetry_create_or_select_record() c,
362 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
363 |             order by run_time desc limit 1) l;
364 | $$ set search_path from current;
365 | 
366 | select wal_telemetry();
367 | 
368 | comment on function wal_telemetry() is $$
369 | The wal_telemetry() function checks the current wal location and compares
370 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
371 | both current and last data, and the differences between them.  These include
372 | bytes elapsed and seconds elapsed, and bytes per sec.
373 | 
374 | The function is designed so that you can export delta information to a monitoring
375 | solution such as munin or prometheus without the latter having to know anything
376 | about lsn representation or losing information in the process.
377 | 
378 | On a replica this does not write to the table and measures deltas from the last
379 | this was written on the master.
380 | $$;
381 | 
382 | -- Replication slots
383 | 
384 | CREATE OR REPLACE VIEW replication_slot_lag as
385 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
386 |        now() as querytime, CASE WHEN pg_is_in_recovery()
387 |                                 THEN pg_last_xlog_replay_location()
388 |                                 ELSE pg_current_xlog_location() END
389 |                            AS pg_current_xlog_location,
390 |        CASE WHEN pg_is_in_recovery() THEN null::int
391 |             ELSE pg_current_xlog_location() - restart_lsn END
392 |        AS current_lag_bytes
393 |   FROM pg_replication_slots s
394 |  ORDER BY s.slot_name;
395 | 
396 | COMMENT ON VIEW replication_slot_lag IS
397 | $$
398 | This view monitors lag on downstream slots.  It compares the last sent wal 
399 | segment to the current known wal location.
400 | 
401 | For master database, the current wal location is self-explanatory.  For replicas
402 | we use the last received WAL location instead.  Note that replicas can have 
403 | replication slots for downstream replication tracking.
404 | $$;
405 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.1--1.2.sql:
--------------------------------------------------------------------------------
 1 | create view client_stat_activity as select * from pg_stat_activity where backend_type = 'client backend';
 2 | -- the following functions and views needs to be recreated
 3 | -- in order to delete three obsolete functions they used to reference.
 4 | 
 5 | create or replace function wal_telemetry_create_or_select_record()
 6 | returns pg_telemetry_wal_log language plpgsql as
 7 | $$
 8 | declare log_entry pg_telemetry_wal_log;
 9 | begin
10 |     if pg_is_in_recovery() then
11 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
12 |     else
13 |        insert into pg_telemetry_wal_log
14 |        select extract('epoch' from now()), now(),
15 |                    pg_current_wal_lsn() as wal_location
16 |        returning * into log_entry;
17 |     end if;
18 |     return log_entry;
19 | end;
20 | $$;
21 | 
22 | create or replace view replication_slot_lag as
23 | select slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
24 |        now() as querytime, case when pg_is_in_recovery()
25 |                                 then pg_last_wal_replay_lsn()
26 |                                 else pg_current_wal_lsn() end
27 |                            as pg_current_xlog_location,
28 |        case when pg_is_in_recovery() then null::int
29 |             else pg_current_wal_lsn() - restart_lsn end
30 |        as current_lag_bytes
31 |   from pg_replication_slots s
32 |  order by s.slot_name;
33 | 
34 | create or replace function wal_telemetry_create_or_select_record()
35 | returns pg_telemetry_wal_log language plpgsql as
36 | $$
37 | declare log_entry pg_telemetry_wal_log;
38 | begin
39 |     if pg_is_in_recovery() then
40 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
41 |     else
42 |        insert into pg_telemetry_wal_log
43 |        select extract('epoch' from now()), now(),
44 |                    pg_current_wal_lsn() as wal_location
45 |        returning * into log_entry;
46 |     end if;
47 |     return log_entry;
48 | end;
49 | $$;
50 | 
51 | drop function is_replica();
52 | drop function if exists pg_current_xlog_location();
53 | drop function if exists pg_last_xlog_replay_location();
54 | 
55 | create or replace view connections_by_ip_source as
56 | select client_addr, count(*) from client_stat_activity
57 |  group by client_addr;
58 | 
59 | -- count client backends waiting on a lock for more than given number of seconds
60 | create or replace function count_waiting_on_locks_more_than_seconds(int default 300)
61 | returns bigint
62 | as
63 | $$
64 | select count(1)
65 |  from pg_locks
66 |  join  @extschema@.client_stat_activity using(pid)
67 | where granted = 'f' and
68 |       extract('epoch' from now() - query_start) > $1;
69 | $$ language sql;
70 | comment on function count_waiting_on_locks_more_than_seconds is
71 | $$
72 | this function provides the number of client backend processes waiting on a lock
73 | for more than given number of seconds (5 minutes if not supplied). can be used to spot
74 | locking conflicts.
75 | $$;
76 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.1.sql:
--------------------------------------------------------------------------------
  1 | create or replace function get_autovacuum_vacuum_info
  2 | (
  3 |       _except regclass[] default null,
  4 |   out queue_depth int8,
  5 |   out total_dead_tup int8
  6 | )
  7 |   returns record
  8 |   language plpgsql stable as
  9 | $fnc$
 10 | begin
 11 |   select
 12 |       count(*),
 13 |       sum(n_dead_tup)
 14 |       into queue_depth, total_dead_tup
 15 |     from
 16 |       pg_class c, --lateral
 17 |         pg_stat_get_dead_tuples(c.oid) n_dead_tup
 18 |     where
 19 |           coalesce(c.oid != any (_except), true)
 20 |       and n_dead_tup >
 21 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_threshold'), current_setting('autovacuum_vacuum_threshold')::int4)+
 22 |           reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_scale_factor'), current_setting('autovacuum_vacuum_scale_factor')::float4)
 23 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
 24 |     ;
 25 |     return;
 26 | end;
 27 | $fnc$;
 28 | 
 29 | create or replace function get_autovacuum_analyze_info
 30 | (
 31 |       _except regclass[] default array['pg_catalog.pg_statistic'],
 32 |   out queue_depth int8,
 33 |   out total_mod_since_analyze int8
 34 | )
 35 |   returns record
 36 |   language plpgsql stable as
 37 | $fnc$
 38 | begin
 39 |   select
 40 |       count(*),
 41 |       sum(n_mod_since_analyze)
 42 |       into queue_depth, total_mod_since_analyze
 43 |     from
 44 |       pg_class c,
 45 |         pg_stat_get_mod_since_analyze(c.oid) n_mod_since_analyze
 46 |     where
 47 |           c.relnamespace != 'pg_toast'::regnamespace
 48 |       and coalesce(c.oid != any (_except), true)
 49 |       and n_mod_since_analyze >
 50 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_threshold'), current_setting('autovacuum_analyze_threshold')::int4)+
 51 |             reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_scale_factor'), current_setting('autovacuum_analyze_scale_factor')::float4)
 52 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
 53 |     ;
 54 |     return;
 55 | end;
 56 | $fnc$;
 57 | 
 58 | -- cheating but needed to make plans safe on replicas.
 59 | create function is_replica() returns bool language sql IMMUTABLE
 60 | AS $$
 61 |   select pg_is_in_recovery();
 62 | $$;
 63 | 
 64 | -- version compatibility
 65 | do $d$ begin
 66 | if version() not like 'PostgreSQL 9.%' then
 67 |    CREATE FUNCTION pg_current_xlog_location() RETURNS pg_lsn language sql as $$
 68 |        select pg_current_wal_lsn();
 69 |    $$;
 70 |    CREATE FUNCTION pg_last_xlog_replay_location() returns pg_lsn language sql as $$
 71 |        select pg_last_wal_replay_lsn();
 72 |    $$;
 73 | end if;
 74 | end;$d$ language plpgsql;
 75 | 
 76 | -- disk space
 77 | 
 78 | CREATE VIEW relation_total_size AS
 79 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 80 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 81 |        pg_relation_size(c.oid) as exclusive_bytes,
 82 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 83 |   from pg_class c
 84 |   join pg_namespace n ON c.relnamespace = n.oid
 85 |  WHERE relkind = 'r'
 86 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 87 | 
 88 | COMMENT ON VIEW relation_total_size IS
 89 | $$
 90 | This view provides basic information on relation size.  Catalogs and tables
 91 | in the information schema are exclused, as are TOAST tables.
 92 | 
 93 | The inclusive metrics show the relation along with indexes and TOAST.  The
 94 | exclusiove metrics show without these things.  The bytes metrics are intended
 95 | for graph drawing, while the sizes are there for administrators who want to
 96 | quickly query this information and make decisions.
 97 | $$;
 98 | 
 99 | CREATE VIEW catalog_total_size AS
100 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
101 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
102 |        pg_relation_size(c.oid) as exclusive_bytes,
103 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
104 |   from pg_class c
105 |   join pg_namespace n ON c.relnamespace = n.oid
106 |  WHERE relkind = 'r'
107 |        and n.nspname in ('pg_catalog', 'information_schema');
108 | 
109 | COMMENT ON VIEW relation_total_size IS
110 | $$
111 | This view provides basic information on relation size in PostgreSQL system
112 | tables (those in pg_catalog and information_schema).
113 | 
114 | The inclusive metrics show the relation along with indexes and TOAST.  The
115 | exclusiove metrics show without these things.  The bytes metrics are intended
116 | for graph drawing, while the sizes are there for administrators who want to
117 | quickly query this information and make decisions.
118 | $$;
119 | 
120 | -- biggest indexes
121 | 
122 | CREATE VIEW index_size AS
123 | select c.oid, c.oid::regclass as index,
124 |        pg_relation_size(c.oid) as bytes,
125 |        pg_size_pretty(pg_relation_size(c.oid)) as size
126 |   from pg_class c
127 |   join pg_namespace n ON c.relnamespace = n.oid
128 |  WHERE relkind = 'i'
129 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
130 | 
131 | COMMENT ON VIEW index_size IS
132 | $$
133 | This table is most useful in tracking down questions of bloat, fill factor, and
134 | performance of GIN indexes among other things.
135 | $$;
136 | 
137 | -- Tables by size  (TOAST)
138 | 
139 | CREATE VIEW relation_toast_size AS
140 | select c.oid, c.oid::regclass as relation,
141 |        pg_relation_size(t.oid) as exclusive_bytes,
142 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
143 |   from pg_class c
144 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
145 |   join pg_namespace n ON c.relnamespace = n.oid;
146 | 
147 | COMMENT ON VIEW relation_toast_size IS
148 | $$
149 | This measures the amount of space in a relation's TOAST tables.  These are
150 | populated when data exceeds what can be reasonably stored inline in the main
151 | heap pages.  You would expect to see this non-zero where you have large fields
152 | being stored, particularly arrays of composite types.
153 | 
154 | Performance-wise moving data to TOAST improves sequential scans where the data
155 | is not required (count(*) for example) at the cost of making the data that has
156 | been moved far more expensive to retrieve and process.
157 | $$;
158 | 
159 | -- tablespaces size
160 | 
161 | CREATE VIEW tablespace_size AS
162 | select spcname as name, pg_tablespace_size(oid) as bytes,
163 |        pg_size_pretty(pg_tablespace_size(oid)) as size
164 |   from pg_tablespace;
165 | 
166 | COMMENT ON VIEW tablespace_size IS
167 | $$
168 | This provides database-cluster-wide statistics on disk usage by tablespace.
169 | 
170 | Note that tablespaces and databases are orthogonal.  Typically if you are
171 | running out of disk space, you want to check this one first, then database_size
172 | and then the size of the relations in the largest database in that order.
173 | $$;
174 | 
175 | -- database size
176 | 
177 | CREATE VIEW database_size AS
178 | SELECT datname as name, pg_database_size(oid) as bytes,
179 |        pg_size_pretty(pg_database_size(oid)) as size
180 |   FROM pg_database;
181 | 
182 | comment on view database_size is
183 | $$
184 | This provides cluser-wide size statistics of databases.
185 | $$;
186 | 
187 | -- connections by application_name
188 | 
189 | CREATE VIEW connections_by_application AS
190 | select application_name, count(*)
191 |   from pg_stat_activity group by application_name;
192 | 
193 | comment on view connections_by_application is
194 | $$
195 | This gives you the number of connections (cluster-wide) by application name.
196 | 
197 | By default the application name is the program name that connected to the db.
198 | $$;
199 | 
200 | -- connections by state
201 | 
202 | CREATE VIEW connections_by_state AS
203 | select case when wait_event is null then state else 'waiting' end as state,
204 |        count(*)
205 |   from pg_stat_activity group by 1;
206 | 
207 | CREATE VIEW waiting_queries_reason_details AS
208 | select wait_event_type, wait_event, count(*) from pg_stat_activity
209 |  WHERE wait_event is not null
210 |  GROUP BY wait_event_type, wait_event;
211 | 
212 | 
213 | comment on view connections_by_state is
214 | $$
215 | This gives you the number of connections (cluster-wide) by state (active, idle,
216 | idle in transaction, etc).  If the query is active but is waiting on a lock or
217 | latch, we change this to 'waiting.'
218 | $$;
219 | 
220 | 
221 | -- connections by ip address source
222 | 
223 | CREATE VIEW connections_by_ip_source as
224 | SELECT client_addr, count(*) from pg_stat_activity
225 |  GROUP BY client_addr;
226 | 
227 | comment on view connections_by_ip_source is
228 | $$
229 | This is a cluster-wide breakdown of connections by IP source.  Between this and
230 | the applicaiton_name it is a good indication of where server laod is coming from
231 | as well as porblems like connection handle leaks.
232 | $$;
233 | 
234 | -- table access stats
235 | -- longest-running active queries
236 | 
237 | CREATE VIEW longest_running_active_queries AS
238 | select application_name, state, wait_event_type, wait_event, query, pid,
239 |        client_addr,
240 |        age(now(), query_start) as running_for
241 |   from pg_stat_activity where state = 'active'
242 |  ORDER BY age(now(), query_start) desc;
243 | 
244 | comment on view longest_running_active_queries is
245 | $$
246 | This view is intended to be typically used by administrators in determining
247 | which queries to focus on.  However it can be used for reporting and alerting
248 | as well.
249 | $$;
250 | 
251 | -- waiting connections
252 | 
253 | CREATE VIEW waiting_connections_by_event_type AS
254 | select wait_event_type, count(*) from pg_stat_activity
255 |  WHERE wait_event is not null
256 |  GROUP BY wait_event_type;
257 | 
258 | comment on view waiting_connections_by_event_type is
259 | $$
260 | This view provides basic, cluster-global, statistics on why queries are waiting
261 | on other queries.
262 | $$;
263 | 
264 | -- locks by type
265 | 
266 | CREATE VIEW locks_by_type AS
267 | SELECT locktype, count(*) from pg_locks
268 |  GROUP BY locktype;
269 | 
270 | COMMENT ON VIEW locks_by_type is
271 | $$
272 | This view provides cluster-wide statistics on what sorts of locks are present.
273 | 
274 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
275 | helpful in determining where the locks are coming from.
276 | $$;
277 | 
278 | -- locks by mode
279 | CREATE VIEW locks_by_mode AS
280 | SELECT mode, count(*) from pg_locks
281 |  GROUP BY mode;
282 | 
283 | COMMENT ON view locks_by_mode is
284 | $$
285 | This view provides cluster-wide statistics on locks by lock mode (access share
286 | vs exclusive for example).  Combined with the locks_by_type view, this view
287 | provides a some opportunities to spot locking problems.
288 | $$;
289 | 
290 | 
291 | CREATE VIEW tuple_access_stats AS
292 | select schemaname, relname,
293 |        seq_scan, seq_tup_read,
294 |        idx_scan, idx_tup_fetch,
295 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup,
296 |        n_mod_since_analyze
297 |   FROM pg_stat_user_tables;
298 | 
299 | comment on view tuple_access_stats is
300 | $$
301 | This view provides statistcs for scans (index and sequential) along with
302 | numbers of tuples updated through various means.  It allows you to get a pretty
303 | good idea of where you may need indexes or where IO-related problems may be
304 | coming from.
305 | $$;
306 | 
307 | -- autovacuum stats
308 | CREATE VIEW autovacuum_stats AS
309 | select schemaname, relname,
310 |        last_vacuum,
311 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
312 |        vacuum_count,
313 |        last_autovacuum,
314 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
315 |        autovacuum_count,
316 |        last_analyze,
317 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
318 |        analyze_count,
319 |        last_autoanalyze,
320 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
321 |        autoanalyze_count
322 |   FROM pg_stat_user_tables;
323 | 
324 | comment on view autovacuum_stats is
325 | $$
326 | This provides basic metrics per table in the current database for when
327 | autovacuum and analyze were last run (as well as manual maintenance).
328 | $$;
329 | 
330 | -- query stats
331 | 
332 | -- call, time, rows
333 | 
334 | CREATE VIEW statement_query_rows_time AS
335 | SELECT datname, queryid, query, sum(calls) as calls,
336 |        sum(total_time) as total_time, sum(rows) as rows
337 |   FROM pg_stat_statements
338 |   JOIN pg_database d ON d.oid = dbid
339 |  GROUP BY datname, queryid, query;
340 | 
341 | comment on view statement_query_rows_time is
342 | $$
343 | This gives aggregated of stats for a given query (cluster-wide)
344 | per query and database name.  This view provides high level timing and row
345 | statistics.
346 | $$;
347 | 
348 | -- buffers
349 | CREATE VIEW statement_query_buffers AS
350 | SELECT datname, queryid, query, sum(calls),
351 |        sum(shared_blks_hit) as shared_blks_hit,
352 |        sum(shared_blks_read) as shared_blks_read,
353 |        sum(shared_blks_dirtied) as shared_blks_dirtied,
354 |        sum(shared_blks_written) as shared_blks_written,
355 |        sum(temp_blks_read) as tmp_blkd_read,
356 |        sum(temp_blks_written) as tmp_blkd_written
357 |   FROM pg_stat_statements
358 |   JOIN pg_database d ON d.oid = dbid
359 |  GROUP BY datname, queryid, query;
360 | 
361 | comment on view statement_query_buffers is
362 | $$
363 | This gives aggregated of stats for a given query (cluster-wide)
364 | per query and database name.  This view provides low-level IO statistics.
365 | $$;
366 | 
367 | -- WAL last state
368 | 
369 | CREATE TABLE pg_telemetry_wal_log (
370 |    run_time numeric unique, -- epoch timestamp
371 |    timestamp timestamp unique,
372 |    lsn pg_lsn
373 | );
374 | 
375 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
376 | This table logs the times and results of wal telemetry readings so that
377 | deltas can be calculated.  At least one row must be present to get any useful
378 | data out of the wal_telemetry() function at all.
379 | 
380 | If you get one telemetry entry a minute, over the course of a year you will get
381 | just over half a million entries.  These are indexed on both epoch and timestamp
382 | so access is not impaired, but if you want ot purge, be careful to leave at
383 | least one entry at the end.
384 | 
385 | You can also process these as a time series using WINDOW functions like lag.
386 | $$;
387 | 
388 | -- WAL telemetry
389 | 
390 | create function wal_telemetry_create_or_select_record()
391 | returns pg_telemetry_wal_log language plpgsql as
392 | $$
393 | declare log_entry pg_telemetry_wal_log;
394 | begin
395 |     if pg_is_in_recovery() then
396 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
397 |     else
398 |        insert into pg_telemetry_wal_log
399 |        select extract('epoch' from now()), now(),
400 |                    pg_current_xlog_location() as wal_location
401 |        returning * into log_entry;
402 |     end if;
403 |     return log_entry;
404 | end;
405 | $$;
406 | 
407 | create function wal_telemetry() returns table (
408 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
409 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
410 |    bytes_per_sec numeric
411 | ) language sql as $$
412 |    select c.run_time as current_epoch, l.run_time as last_epoch,
413 |           c.run_time - l.run_time as secs_elapsed,
414 |           c.lsn as current_lsn, l.lsn as last_lsn,
415 |           c.lsn - l.lsn as bytes_elapsed,
416 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
417 |      FROM wal_telemetry_create_or_select_record() c,
418 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
419 |             order by run_time desc limit 1) l;
420 | $$ set search_path from current;
421 | 
422 | select wal_telemetry();
423 | 
424 | comment on function wal_telemetry() is $$
425 | The wal_telemetry() function checks the current wal location and compares
426 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
427 | both current and last data, and the differences between them.  These include
428 | bytes elapsed and seconds elapsed, and bytes per sec.
429 | 
430 | The function is designed so that you can export delta information to a monitoring
431 | solution such as munin or prometheus without the latter having to know anything
432 | about lsn representation or losing information in the process.
433 | 
434 | On a replica this does not write to the table and measures deltas from the last
435 | this was written on the master.
436 | $$;
437 | 
438 | -- Replication slots
439 | 
440 | CREATE OR REPLACE VIEW replication_slot_lag as
441 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
442 |        now() as querytime, CASE WHEN pg_is_in_recovery()
443 |                                 THEN pg_last_xlog_replay_location()
444 |                                 ELSE pg_current_xlog_location() END
445 |                            AS pg_current_xlog_location,
446 |        CASE WHEN pg_is_in_recovery() THEN null::int
447 |             ELSE pg_current_xlog_location() - restart_lsn END
448 |        AS current_lag_bytes
449 |   FROM pg_replication_slots s
450 |  ORDER BY s.slot_name;
451 | 
452 | COMMENT ON VIEW replication_slot_lag IS
453 | $$
454 | This view monitors lag on downstream slots.  It compares the last sent wal
455 | segment to the current known wal location.
456 | 
457 | For master database, the current wal location is self-explanatory.  For replicas
458 | we use the last received WAL location instead.  Note that replicas can have
459 | replication slots for downstream replication tracking.
460 | $$;
461 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.2--1.3.sql:
--------------------------------------------------------------------------------
 1 | create or replace function @extschema@.vacuum_progress()
 2 |   returns table
 3 |   (
 4 |     datname               name,
 5 |     schemaname            name,
 6 |     relname               name,
 7 |     phase                 text,
 8 |     heap_blks_total       int8,
 9 |     heap_blks_scanned     int8,
10 |     heap_blks_vacuumed    int8,
11 |     index_vacuum_count    int8,
12 |     max_dead_tuples       int8,
13 |     num_dead_tuples       int8
14 |   )
15 |   language plpgsql stable as
16 | $fnc$
17 | begin
18 |   /* too lazy to check which version of pg the view was added in, so
19 |    * just check if its there and return null if its not
20 |    */
21 |   if exists (select from pg_class c where c.relnamespace='pg_catalog'::regnamespace and c.relname = 'pg_stat_progress_vacuum') then
22 |     return query
23 |       select
24 |           v.datname,
25 |           c.relnamespace::regnamespace::name as schemaname,
26 |           c.relname,
27 |           v.phase,
28 |           v.heap_blks_total,
29 |           v.heap_blks_scanned,
30 |           v.heap_blks_vacuumed,
31 |           v.index_vacuum_count,
32 |           v.max_dead_tuples,
33 |           v.num_dead_tuples
34 |         from pg_stat_progress_vacuum v
35 |           left join pg_class c on c.oid=v.relid;
36 |   end if;
37 | 
38 |   return;
39 | end;
40 | $fnc$;
41 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.2.sql:
--------------------------------------------------------------------------------
  1 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  2 | --\echo Use "CREATE EXTENSION pgtelemetry" to load this file. \quit
  3 | 
  4 | -- filter non-backend connections from pg_stat_activity
  5 | create view client_stat_activity as select * from pg_stat_activity where backend_type = 'client backend';
  6 | 
  7 | -- disk space
  8 | CREATE VIEW relation_total_size AS
  9 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 10 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 11 |        pg_relation_size(c.oid) as exclusive_bytes,
 12 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 13 |   from pg_class c
 14 |   join pg_namespace n ON c.relnamespace = n.oid
 15 |  WHERE relkind = 'r'
 16 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 17 | 
 18 | COMMENT ON VIEW relation_total_size IS
 19 | $$
 20 | This view provides basic information on relation size.  Catalogs and tables
 21 | in the information schema are exclused, as are TOAST tables.
 22 | 
 23 | The inclusive metrics show the relation along with indexes and TOAST.  The
 24 | exclusiove metrics show without these things.  The bytes metrics are intended
 25 | for graph drawing, while the sizes are there for administrators who want to
 26 | quickly query this information and make decisions.
 27 | $$;
 28 | 
 29 | CREATE VIEW catalog_total_size AS
 30 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 31 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 32 |        pg_relation_size(c.oid) as exclusive_bytes,
 33 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 34 |   from pg_class c
 35 |   join pg_namespace n ON c.relnamespace = n.oid
 36 |  WHERE relkind = 'r'
 37 |        and n.nspname in ('pg_catalog', 'information_schema');
 38 | 
 39 | COMMENT ON VIEW relation_total_size IS
 40 | $$
 41 | This view provides basic information on relation size in PostgreSQL system
 42 | tables (those in pg_catalog and information_schema).
 43 | 
 44 | The inclusive metrics show the relation along with indexes and TOAST.  The
 45 | exclusiove metrics show without these things.  The bytes metrics are intended
 46 | for graph drawing, while the sizes are there for administrators who want to
 47 | quickly query this information and make decisions.
 48 | $$;
 49 | 
 50 | -- biggest indexes
 51 | 
 52 | CREATE VIEW index_size AS
 53 | select c.oid, c.oid::regclass as index,
 54 |        pg_relation_size(c.oid) as bytes,
 55 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 56 |   from pg_class c
 57 |   join pg_namespace n ON c.relnamespace = n.oid
 58 |  WHERE relkind = 'i'
 59 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 60 | 
 61 | COMMENT ON VIEW index_size IS
 62 | $$
 63 | This table is most useful in tracking down questions of bloat, fill factor, and
 64 | performance of GIN indexes among other things.
 65 | $$;
 66 | 
 67 | -- Tables by size  (TOAST)
 68 | 
 69 | CREATE VIEW relation_toast_size AS
 70 | select c.oid, c.oid::regclass as relation,
 71 |        pg_relation_size(t.oid) as exclusive_bytes,
 72 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 73 |   from pg_class c
 74 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 75 |   join pg_namespace n ON c.relnamespace = n.oid;
 76 | 
 77 | COMMENT ON VIEW relation_toast_size IS
 78 | $$
 79 | This measures the amount of space in a relation's TOAST tables.  These are
 80 | populated when data exceeds what can be reasonably stored inline in the main
 81 | heap pages.  You would expect to see this non-zero where you have large fields
 82 | being stored, particularly arrays of composite types.
 83 | 
 84 | Performance-wise moving data to TOAST improves sequential scans where the data
 85 | is not required (count(*) for example) at the cost of making the data that has
 86 | been moved far more expensive to retrieve and process.
 87 | $$;
 88 | 
 89 | -- tablespaces size
 90 | 
 91 | CREATE VIEW tablespace_size AS
 92 | select spcname as name, pg_tablespace_size(oid) as bytes,
 93 |        pg_size_pretty(pg_tablespace_size(oid)) as size
 94 |   from pg_tablespace;
 95 | 
 96 | COMMENT ON VIEW tablespace_size IS
 97 | $$
 98 | This provides database-cluster-wide statistics on disk usage by tablespace.
 99 | 
100 | Note that tablespaces and databases are orthogonal.  Typically if you are
101 | running out of disk space, you want to check this one first, then database_size
102 | and then the size of the relations in the largest database in that order.
103 | $$;
104 | 
105 | -- database size
106 | 
107 | CREATE VIEW database_size AS
108 | SELECT datname as name, pg_database_size(oid) as bytes,
109 |        pg_size_pretty(pg_database_size(oid)) as size
110 |   FROM pg_database;
111 | 
112 | comment on view database_size is
113 | $$
114 | This provides cluser-wide size statistics of databases.
115 | $$;
116 | 
117 | -- connections by application_name
118 | 
119 | CREATE VIEW connections_by_application AS
120 | select application_name, count(*)
121 |   from pg_stat_activity group by application_name;
122 | 
123 | comment on view connections_by_application is
124 | $$
125 | This gives you the number of connections (cluster-wide) by application name.
126 | 
127 | By default the application name is the program name that connected to the db.
128 | $$;
129 | 
130 | -- connections by state
131 | 
132 | CREATE VIEW connections_by_state AS
133 | select case when wait_event is null then state else 'waiting' end as state,
134 |        count(*)
135 |   from pg_stat_activity group by 1;
136 | 
137 | CREATE VIEW waiting_queries_reason_details AS
138 | select wait_event_type, wait_event, count(*) from pg_stat_activity
139 |  WHERE wait_event is not null
140 |  GROUP BY wait_event_type, wait_event;
141 | 
142 | 
143 | comment on view connections_by_state is
144 | $$
145 | This gives you the number of connections (cluster-wide) by state (active, idle,
146 | idle in transaction, etc).  If the query is active but is waiting on a lock or
147 | latch, we change this to 'waiting.'
148 | $$;
149 | 
150 | 
151 | -- connections by ip address source
152 | 
153 | CREATE VIEW connections_by_ip_source as
154 | SELECT client_addr,
155 |        count(*) as count,
156 |        count(*) filter(where state = 'active') as active_count,
157 |        count(*) filter(where state = 'idle in transaction' or state = 'idle in transaction (aborted)') as idle_in_transaction_count,
158 |        count(*) filter(where state = 'idle') as idle_count
159 |        from  @extschema@.client_stat_activity
160 |  GROUP BY client_addr;
161 | 
162 | comment on view connections_by_ip_source is
163 | $$
164 | This is a cluster-wide breakdown of connections by IP source.  Between this and
165 | the applicaiton_name it is a good indication of where server laod is coming from
166 | as well as porblems like connection handle leaks.
167 | $$;
168 | 
169 | -- table access stats
170 | -- longest-running active queries
171 | 
172 | CREATE VIEW longest_running_active_queries AS
173 | select application_name, state, wait_event_type, wait_event, query, pid,
174 |        client_addr,
175 |        age(now(), query_start) as running_for
176 |   from pg_stat_activity where state = 'active'
177 |  ORDER BY age(now(), query_start) desc;
178 | 
179 | comment on view longest_running_active_queries is
180 | $$
181 | This view is intended to be typically used by administrators in determining
182 | which queries to focus on.  However it can be used for reporting and alerting
183 | as well.
184 | $$;
185 | 
186 | -- waiting connections
187 | 
188 | CREATE VIEW waiting_connections_by_event_type AS
189 | select wait_event_type, count(*) from pg_stat_activity
190 |  WHERE wait_event is not null
191 |  GROUP BY wait_event_type;
192 | 
193 | comment on view waiting_connections_by_event_type is
194 | $$
195 | This view provides basic, cluster-global, statistics on why queries are waiting
196 | on other queries.
197 | $$;
198 | 
199 | -- locks by type
200 | 
201 | CREATE VIEW locks_by_type AS
202 | SELECT locktype, count(*) from pg_locks
203 |  GROUP BY locktype;
204 | 
205 | COMMENT ON VIEW locks_by_type is
206 | $$
207 | This view provides cluster-wide statistics on what sorts of locks are present.
208 | 
209 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
210 | helpful in determining where the locks are coming from.
211 | $$;
212 | 
213 | -- locks by mode
214 | CREATE VIEW locks_by_mode AS
215 | SELECT mode, count(*) from pg_locks
216 |  GROUP BY mode;
217 | 
218 | COMMENT ON view locks_by_mode is
219 | $$
220 | This view provides cluster-wide statistics on locks by lock mode (access share
221 | vs exclusive for example).  Combined with the locks_by_type view, this view
222 | provides a some opportunities to spot locking problems.
223 | $$;
224 | 
225 | -- count client backends waiting on a lock for more than given number of seconds
226 | create or replace function count_waiting_on_locks_more_than_seconds(int default 300)
227 | returns bigint
228 | as
229 | $$
230 | select count(1)
231 |  from pg_locks
232 |  join  @extschema@.client_stat_activity using(pid)
233 | where granted = 'f' and
234 |       extract('epoch' from now() - query_start) > $1;
235 | $$ language sql;
236 | 
237 | comment on function count_waiting_on_locks_more_than_seconds is
238 | $$
239 | This function provides the number of client backend processes waiting on a lock
240 | for more than given number of seconds (5 minutes if not supplied). Can be used to spot
241 | locking conflicts.
242 | $$;
243 | 
244 | 
245 | CREATE VIEW tuple_access_stats AS
246 | select schemaname, relname,
247 |        seq_scan, seq_tup_read,
248 |        idx_scan, idx_tup_fetch,
249 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup,
250 |        n_mod_since_analyze
251 |   FROM pg_stat_user_tables;
252 | 
253 | comment on view tuple_access_stats is
254 | $$
255 | This view provides statistcs for scans (index and sequential) along with
256 | numbers of tuples updated through various means.  It allows you to get a pretty
257 | good idea of where you may need indexes or where IO-related problems may be
258 | coming from.
259 | $$;
260 | 
261 | -- autovacuum stats
262 | CREATE VIEW autovacuum_stats AS
263 | select schemaname, relname,
264 |        last_vacuum,
265 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
266 |        vacuum_count,
267 |        last_autovacuum,
268 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
269 |        autovacuum_count,
270 |        last_analyze,
271 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
272 |        analyze_count,
273 |        last_autoanalyze,
274 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
275 |        autoanalyze_count
276 |   FROM pg_stat_user_tables;
277 | 
278 | comment on view autovacuum_stats is
279 | $$
280 | This provides basic metrics per table in the current database for when
281 | autovacuum and analyze were last run (as well as manual maintenance).
282 | $$;
283 | 
284 | -- query stats
285 | 
286 | -- call, time, rows
287 | 
288 | CREATE VIEW statement_query_rows_time AS
289 | SELECT datname, queryid, query, sum(calls) as calls,
290 |        sum(total_time) as total_time, sum(rows) as rows
291 |   FROM pg_stat_statements
292 |   JOIN pg_database d ON d.oid = dbid
293 |  GROUP BY datname, queryid, query;
294 | 
295 | comment on view statement_query_rows_time is
296 | $$
297 | This gives aggregated of stats for a given query (cluster-wide)
298 | per query and database name.  This view provides high level timing and row
299 | statistics.
300 | $$;
301 | 
302 | -- buffers
303 | CREATE VIEW statement_query_buffers AS
304 | SELECT datname, queryid, query, sum(calls),
305 |        sum(shared_blks_hit) as shared_blks_hit,
306 |        sum(shared_blks_read) as shared_blks_read,
307 |        sum(shared_blks_dirtied) as shared_blks_dirtied,
308 |        sum(shared_blks_written) as shared_blks_written,
309 |        sum(temp_blks_read) as tmp_blkd_read,
310 |        sum(temp_blks_written) as tmp_blkd_written
311 |   FROM pg_stat_statements
312 |   JOIN pg_database d ON d.oid = dbid
313 |  GROUP BY datname, queryid, query;
314 | 
315 | comment on view statement_query_buffers is
316 | $$
317 | This gives aggregated of stats for a given query (cluster-wide)
318 | per query and database name.  This view provides low-level IO statistics.
319 | $$;
320 | 
321 | -- WAL last state
322 | 
323 | CREATE TABLE pg_telemetry_wal_log (
324 |    run_time numeric unique, -- epoch timestamp
325 |    timestamp timestamp unique,
326 |    lsn pg_lsn
327 | );
328 | 
329 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
330 | This table logs the times and results of wal telemetry readings so that
331 | deltas can be calculated.  At least one row must be present to get any useful
332 | data out of the wal_telemetry() function at all.
333 | 
334 | If you get one telemetry entry a minute, over the course of a year you will get
335 | just over half a million entries.  These are indexed on both epoch and timestamp
336 | so access is not impaired, but if you want ot purge, be careful to leave at
337 | least one entry at the end.
338 | 
339 | You can also process these as a time series using WINDOW functions like lag.
340 | $$;
341 | 
342 | -- WAL telemetry
343 | 
344 | create function wal_telemetry_create_or_select_record()
345 | returns pg_telemetry_wal_log language plpgsql as
346 | $$
347 | declare log_entry pg_telemetry_wal_log;
348 | begin
349 |     if pg_is_in_recovery() then
350 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
351 |     else
352 |        insert into pg_telemetry_wal_log
353 |        select extract('epoch' from now()), now(),
354 |                    pg_current_wal_lsn() as wal_location
355 |        returning * into log_entry;
356 |     end if;
357 |     return log_entry;
358 | end;
359 | $$;
360 | 
361 | create function wal_telemetry() returns table (
362 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
363 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
364 |    bytes_per_sec numeric
365 | ) language sql as $$
366 |    select c.run_time as current_epoch, l.run_time as last_epoch,
367 |           c.run_time - l.run_time as secs_elapsed,
368 |           c.lsn as current_lsn, l.lsn as last_lsn,
369 |           c.lsn - l.lsn as bytes_elapsed,
370 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
371 |      FROM wal_telemetry_create_or_select_record() c,
372 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
373 |             order by run_time desc limit 1) l;
374 | $$ set search_path from current;
375 | 
376 | select wal_telemetry();
377 | 
378 | comment on function wal_telemetry() is $$
379 | The wal_telemetry() function checks the current wal location and compares
380 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
381 | both current and last data, and the differences between them.  These include
382 | bytes elapsed and seconds elapsed, and bytes per sec.
383 | 
384 | The function is designed so that you can export delta information to a monitoring
385 | solution such as munin or prometheus without the latter having to know anything
386 | about lsn representation or losing information in the process.
387 | 
388 | On a replica this does not write to the table and measures deltas from the last
389 | this was written on the master.
390 | $$;
391 | 
392 | -- Replication slots
393 | 
394 | CREATE OR REPLACE VIEW replication_slot_lag as
395 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
396 |        now() as querytime, CASE WHEN pg_is_in_recovery()
397 |                                 THEN pg_last_wal_replay_lsn()
398 |                                 ELSE pg_current_wal_lsn() END
399 |                            AS pg_current_xlog_location,
400 |        CASE WHEN pg_is_in_recovery() THEN null::int
401 |             ELSE pg_current_wal_lsn() - restart_lsn END
402 |        AS current_lag_bytes
403 |   FROM pg_replication_slots s
404 |  ORDER BY s.slot_name;
405 | 
406 | COMMENT ON VIEW replication_slot_lag IS
407 | $$
408 | This view monitors lag on downstream slots.  It compares the last sent wal
409 | segment to the current known wal location.
410 | 
411 | For master database, the current wal location is self-explanatory.  For replicas
412 | we use the last received WAL location instead.  Note that replicas can have
413 | replication slots for downstream replication tracking.
414 | $$;
415 | 
416 | create or replace function get_autovacuum_vacuum_info
417 | (
418 |       _except regclass[] default null,
419 |   out queue_depth int8,
420 |   out total_dead_tup int8
421 | )
422 |   returns record
423 |   language plpgsql stable as
424 | $fnc$
425 | begin
426 |   select
427 |       count(*),
428 |       sum(n_dead_tup)
429 |       into queue_depth, total_dead_tup
430 |     from
431 |       pg_class c, --lateral
432 |         pg_stat_get_dead_tuples(c.oid) n_dead_tup
433 |     where
434 |           coalesce(c.oid != any (_except), true)
435 |       and n_dead_tup >
436 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_threshold'), current_setting('autovacuum_vacuum_threshold')::int4)+
437 |           reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_scale_factor'), current_setting('autovacuum_vacuum_scale_factor')::float4)
438 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
439 |     ;
440 |     return;
441 | end;
442 | $fnc$;
443 | 
444 | create or replace function get_autovacuum_analyze_info
445 | (
446 |       _except regclass[] default array['pg_catalog.pg_statistic'],
447 |   out queue_depth int8,
448 |   out total_mod_since_analyze int8
449 | )
450 |   returns record
451 |   language plpgsql stable as
452 | $fnc$
453 | begin
454 |   select
455 |       count(*),
456 |       sum(n_mod_since_analyze)
457 |       into queue_depth, total_mod_since_analyze
458 |     from
459 |       pg_class c,
460 |         pg_stat_get_mod_since_analyze(c.oid) n_mod_since_analyze
461 |     where
462 |           c.relnamespace != 'pg_toast'::regnamespace
463 |       and coalesce(c.oid != any (_except), true)
464 |       and n_mod_since_analyze >
465 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_threshold'), current_setting('autovacuum_analyze_threshold')::int4)+
466 |             reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_scale_factor'), current_setting('autovacuum_analyze_scale_factor')::float4)
467 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
468 |     ;
469 |     return;
470 | end;
471 | $fnc$;


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.3--1.4.sql:
--------------------------------------------------------------------------------
 1 | create table @extschema@.long_running_query_rules
 2 | (
 3 |   priority                  int4,
 4 |   application_name_ilike    text,
 5 |   usename                   name,
 6 |   state                     text,
 7 |   alert_threshold           interval not null,
 8 |   active_since              timestamptz,
 9 |   active_until              timestamptz,
10 |   created_at                timestamptz default now(),
11 |   comment                   text
12 | );
13 | 
14 | insert into @extschema@.long_running_query_rules(priority, application_name_ilike, usename, state, alert_threshold)
15 |   values
16 |   (0, 'pg\_dump', null, null, interval'6 hours'), -- pg_dump 6 hours
17 |   (0, 'pg2ch', null, null, interval'3 hours'), -- pg2ch 3 hours
18 |   (100, null, null, 'idle in transaction', interval'5 minutes'), -- any idle transaction 5 minutes
19 |   (100, null, null, 'idle in transaction (aborted)', interval'5 minutes'), -- same as above, except one of the statements in the transaction caused an error
20 |   (1000, null, null, null, interval'1 hour'); -- anything else 1 hour
21 | 
22 | create or replace view @extschema@.long_running_queries
23 | (
24 |     duration,
25 |     pid,
26 |     is_slave,
27 |     application_name,
28 |     username,
29 |     database,
30 |     backend_type,
31 |     client_addr,
32 |     state,
33 |     wait_event,
34 |     wait_event_type,
35 |     query
36 | ) as
37 | select
38 |         now() - p.query_start as duration,
39 |         p.pid,
40 |         pg_is_in_recovery() as is_slave,
41 |         p.application_name,
42 |         p.usename,
43 |         p.datname as database,
44 |         p.backend_type,
45 |         p.client_addr,
46 |         p.state,
47 |         p.wait_event,
48 |         p.wait_event_type,
49 |         p.query
50 |     from pg_catalog.pg_stat_activity p,
51 |         lateral
52 |         (
53 |             select
54 |                     alert_threshold,
55 |                     active_since,
56 |                     active_until
57 |                 from @extschema@.long_running_query_rules l
58 |                 where
59 |                         coalesce(p.application_name ilike l.application_name_ilike, true)
60 |                     and coalesce(p.usename = l.usename, true)
61 |                     and coalesce(p.state = l.state, true)
62 |                 order by priority asc
63 |                 limit 1
64 |         ) l
65 |     where
66 |             p.state != 'idle'
67 |         and backend_type = 'client backend'
68 |         and age(now(), query_start) > l.alert_threshold
69 |         and coalesce(now() >= l.active_since, true)
70 |         and coalesce(now() <= l.active_until, true)
71 |     order by duration desc;
72 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.3.sql:
--------------------------------------------------------------------------------
  1 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  2 | --\echo Use "CREATE EXTENSION pgtelemetry" to load this file. \quit
  3 | 
  4 | -- filter non-backend connections from pg_stat_activity
  5 | create view client_stat_activity as select * from pg_stat_activity where backend_type = 'client backend';
  6 | 
  7 | -- disk space
  8 | CREATE VIEW relation_total_size AS
  9 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 10 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 11 |        pg_relation_size(c.oid) as exclusive_bytes,
 12 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 13 |   from pg_class c
 14 |   join pg_namespace n ON c.relnamespace = n.oid
 15 |  WHERE relkind = 'r'
 16 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 17 | 
 18 | COMMENT ON VIEW relation_total_size IS
 19 | $$
 20 | This view provides basic information on relation size.  Catalogs and tables
 21 | in the information schema are exclused, as are TOAST tables.
 22 | 
 23 | The inclusive metrics show the relation along with indexes and TOAST.  The
 24 | exclusiove metrics show without these things.  The bytes metrics are intended
 25 | for graph drawing, while the sizes are there for administrators who want to
 26 | quickly query this information and make decisions.
 27 | $$;
 28 | 
 29 | CREATE VIEW catalog_total_size AS
 30 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 31 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 32 |        pg_relation_size(c.oid) as exclusive_bytes,
 33 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 34 |   from pg_class c
 35 |   join pg_namespace n ON c.relnamespace = n.oid
 36 |  WHERE relkind = 'r'
 37 |        and n.nspname in ('pg_catalog', 'information_schema');
 38 | 
 39 | COMMENT ON VIEW relation_total_size IS
 40 | $$
 41 | This view provides basic information on relation size in PostgreSQL system
 42 | tables (those in pg_catalog and information_schema).
 43 | 
 44 | The inclusive metrics show the relation along with indexes and TOAST.  The
 45 | exclusiove metrics show without these things.  The bytes metrics are intended
 46 | for graph drawing, while the sizes are there for administrators who want to
 47 | quickly query this information and make decisions.
 48 | $$;
 49 | 
 50 | -- biggest indexes
 51 | 
 52 | CREATE VIEW index_size AS
 53 | select c.oid, c.oid::regclass as index,
 54 |        pg_relation_size(c.oid) as bytes,
 55 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 56 |   from pg_class c
 57 |   join pg_namespace n ON c.relnamespace = n.oid
 58 |  WHERE relkind = 'i'
 59 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 60 | 
 61 | COMMENT ON VIEW index_size IS
 62 | $$
 63 | This table is most useful in tracking down questions of bloat, fill factor, and
 64 | performance of GIN indexes among other things.
 65 | $$;
 66 | 
 67 | -- Tables by size  (TOAST)
 68 | 
 69 | CREATE VIEW relation_toast_size AS
 70 | select c.oid, c.oid::regclass as relation,
 71 |        pg_relation_size(t.oid) as exclusive_bytes,
 72 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 73 |   from pg_class c
 74 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 75 |   join pg_namespace n ON c.relnamespace = n.oid;
 76 | 
 77 | COMMENT ON VIEW relation_toast_size IS
 78 | $$
 79 | This measures the amount of space in a relation's TOAST tables.  These are
 80 | populated when data exceeds what can be reasonably stored inline in the main
 81 | heap pages.  You would expect to see this non-zero where you have large fields
 82 | being stored, particularly arrays of composite types.
 83 | 
 84 | Performance-wise moving data to TOAST improves sequential scans where the data
 85 | is not required (count(*) for example) at the cost of making the data that has
 86 | been moved far more expensive to retrieve and process.
 87 | $$;
 88 | 
 89 | -- tablespaces size
 90 | 
 91 | CREATE VIEW tablespace_size AS
 92 | select spcname as name, pg_tablespace_size(oid) as bytes,
 93 |        pg_size_pretty(pg_tablespace_size(oid)) as size
 94 |   from pg_tablespace;
 95 | 
 96 | COMMENT ON VIEW tablespace_size IS
 97 | $$
 98 | This provides database-cluster-wide statistics on disk usage by tablespace.
 99 | 
100 | Note that tablespaces and databases are orthogonal.  Typically if you are
101 | running out of disk space, you want to check this one first, then database_size
102 | and then the size of the relations in the largest database in that order.
103 | $$;
104 | 
105 | -- database size
106 | 
107 | CREATE VIEW database_size AS
108 | SELECT datname as name, pg_database_size(oid) as bytes,
109 |        pg_size_pretty(pg_database_size(oid)) as size
110 |   FROM pg_database;
111 | 
112 | comment on view database_size is
113 | $$
114 | This provides cluser-wide size statistics of databases.
115 | $$;
116 | 
117 | -- connections by application_name
118 | 
119 | CREATE VIEW connections_by_application AS
120 | select application_name, count(*)
121 |   from pg_stat_activity group by application_name;
122 | 
123 | comment on view connections_by_application is
124 | $$
125 | This gives you the number of connections (cluster-wide) by application name.
126 | 
127 | By default the application name is the program name that connected to the db.
128 | $$;
129 | 
130 | -- connections by state
131 | 
132 | CREATE VIEW connections_by_state AS
133 | select case when wait_event is null then state else 'waiting' end as state,
134 |        count(*)
135 |   from pg_stat_activity group by 1;
136 | 
137 | CREATE VIEW waiting_queries_reason_details AS
138 | select wait_event_type, wait_event, count(*) from pg_stat_activity
139 |  WHERE wait_event is not null
140 |  GROUP BY wait_event_type, wait_event;
141 | 
142 | 
143 | comment on view connections_by_state is
144 | $$
145 | This gives you the number of connections (cluster-wide) by state (active, idle,
146 | idle in transaction, etc).  If the query is active but is waiting on a lock or
147 | latch, we change this to 'waiting.'
148 | $$;
149 | 
150 | 
151 | -- connections by ip address source
152 | 
153 | CREATE VIEW connections_by_ip_source as
154 | SELECT client_addr,
155 |        count(*) as count,
156 |        count(*) filter(where state = 'active') as active_count,
157 |        count(*) filter(where state = 'idle in transaction' or state = 'idle in transaction (aborted)') as idle_in_transaction_count,
158 |        count(*) filter(where state = 'idle') as idle_count
159 |        from  @extschema@.client_stat_activity
160 |  GROUP BY client_addr;
161 | 
162 | comment on view connections_by_ip_source is
163 | $$
164 | This is a cluster-wide breakdown of connections by IP source.  Between this and
165 | the applicaiton_name it is a good indication of where server laod is coming from
166 | as well as porblems like connection handle leaks.
167 | $$;
168 | 
169 | -- table access stats
170 | -- longest-running active queries
171 | 
172 | CREATE VIEW longest_running_active_queries AS
173 | select application_name, state, wait_event_type, wait_event, query, pid,
174 |        client_addr,
175 |        age(now(), query_start) as running_for
176 |   from pg_stat_activity where state = 'active'
177 |  ORDER BY age(now(), query_start) desc;
178 | 
179 | comment on view longest_running_active_queries is
180 | $$
181 | This view is intended to be typically used by administrators in determining
182 | which queries to focus on.  However it can be used for reporting and alerting
183 | as well.
184 | $$;
185 | 
186 | -- waiting connections
187 | 
188 | CREATE VIEW waiting_connections_by_event_type AS
189 | select wait_event_type, count(*) from pg_stat_activity
190 |  WHERE wait_event is not null
191 |  GROUP BY wait_event_type;
192 | 
193 | comment on view waiting_connections_by_event_type is
194 | $$
195 | This view provides basic, cluster-global, statistics on why queries are waiting
196 | on other queries.
197 | $$;
198 | 
199 | -- locks by type
200 | 
201 | CREATE VIEW locks_by_type AS
202 | SELECT locktype, count(*) from pg_locks
203 |  GROUP BY locktype;
204 | 
205 | COMMENT ON VIEW locks_by_type is
206 | $$
207 | This view provides cluster-wide statistics on what sorts of locks are present.
208 | 
209 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
210 | helpful in determining where the locks are coming from.
211 | $$;
212 | 
213 | -- locks by mode
214 | CREATE VIEW locks_by_mode AS
215 | SELECT mode, count(*) from pg_locks
216 |  GROUP BY mode;
217 | 
218 | COMMENT ON view locks_by_mode is
219 | $$
220 | This view provides cluster-wide statistics on locks by lock mode (access share
221 | vs exclusive for example).  Combined with the locks_by_type view, this view
222 | provides a some opportunities to spot locking problems.
223 | $$;
224 | 
225 | -- count client backends waiting on a lock for more than given number of seconds
226 | create or replace function count_waiting_on_locks_more_than_seconds(int default 300)
227 | returns bigint
228 | as
229 | $$
230 | select count(1)
231 |  from pg_locks
232 |  join  @extschema@.client_stat_activity using(pid)
233 | where granted = 'f' and
234 |       extract('epoch' from now() - query_start) > $1;
235 | $$ language sql;
236 | 
237 | comment on function count_waiting_on_locks_more_than_seconds is
238 | $$
239 | This function provides the number of client backend processes waiting on a lock
240 | for more than given number of seconds (5 minutes if not supplied). Can be used to spot
241 | locking conflicts.
242 | $$;
243 | 
244 | 
245 | CREATE VIEW tuple_access_stats AS
246 | select schemaname, relname,
247 |        seq_scan, seq_tup_read,
248 |        idx_scan, idx_tup_fetch,
249 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup,
250 |        n_mod_since_analyze
251 |   FROM pg_stat_user_tables;
252 | 
253 | comment on view tuple_access_stats is
254 | $$
255 | This view provides statistcs for scans (index and sequential) along with
256 | numbers of tuples updated through various means.  It allows you to get a pretty
257 | good idea of where you may need indexes or where IO-related problems may be
258 | coming from.
259 | $$;
260 | 
261 | -- autovacuum stats
262 | CREATE VIEW autovacuum_stats AS
263 | select schemaname, relname,
264 |        last_vacuum,
265 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
266 |        vacuum_count,
267 |        last_autovacuum,
268 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
269 |        autovacuum_count,
270 |        last_analyze,
271 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
272 |        analyze_count,
273 |        last_autoanalyze,
274 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
275 |        autoanalyze_count
276 |   FROM pg_stat_user_tables;
277 | 
278 | comment on view autovacuum_stats is
279 | $$
280 | This provides basic metrics per table in the current database for when
281 | autovacuum and analyze were last run (as well as manual maintenance).
282 | $$;
283 | 
284 | -- query stats
285 | 
286 | -- call, time, rows
287 | 
288 | CREATE VIEW statement_query_rows_time AS
289 | SELECT datname, queryid, query, sum(calls) as calls,
290 |        sum(total_time) as total_time, sum(rows) as rows
291 |   FROM pg_stat_statements
292 |   JOIN pg_database d ON d.oid = dbid
293 |  GROUP BY datname, queryid, query;
294 | 
295 | comment on view statement_query_rows_time is
296 | $$
297 | This gives aggregated of stats for a given query (cluster-wide)
298 | per query and database name.  This view provides high level timing and row
299 | statistics.
300 | $$;
301 | 
302 | -- buffers
303 | CREATE VIEW statement_query_buffers AS
304 | SELECT datname, queryid, query, sum(calls),
305 |        sum(shared_blks_hit) as shared_blks_hit,
306 |        sum(shared_blks_read) as shared_blks_read,
307 |        sum(shared_blks_dirtied) as shared_blks_dirtied,
308 |        sum(shared_blks_written) as shared_blks_written,
309 |        sum(temp_blks_read) as tmp_blkd_read,
310 |        sum(temp_blks_written) as tmp_blkd_written
311 |   FROM pg_stat_statements
312 |   JOIN pg_database d ON d.oid = dbid
313 |  GROUP BY datname, queryid, query;
314 | 
315 | comment on view statement_query_buffers is
316 | $$
317 | This gives aggregated of stats for a given query (cluster-wide)
318 | per query and database name.  This view provides low-level IO statistics.
319 | $$;
320 | 
321 | -- WAL last state
322 | 
323 | CREATE TABLE pg_telemetry_wal_log (
324 |    run_time numeric unique, -- epoch timestamp
325 |    timestamp timestamp unique,
326 |    lsn pg_lsn
327 | );
328 | 
329 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
330 | This table logs the times and results of wal telemetry readings so that
331 | deltas can be calculated.  At least one row must be present to get any useful
332 | data out of the wal_telemetry() function at all.
333 | 
334 | If you get one telemetry entry a minute, over the course of a year you will get
335 | just over half a million entries.  These are indexed on both epoch and timestamp
336 | so access is not impaired, but if you want ot purge, be careful to leave at
337 | least one entry at the end.
338 | 
339 | You can also process these as a time series using WINDOW functions like lag.
340 | $$;
341 | 
342 | -- WAL telemetry
343 | 
344 | create function wal_telemetry_create_or_select_record()
345 | returns pg_telemetry_wal_log language plpgsql as
346 | $$
347 | declare log_entry pg_telemetry_wal_log;
348 | begin
349 |     if pg_is_in_recovery() then
350 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
351 |     else
352 |        insert into pg_telemetry_wal_log
353 |        select extract('epoch' from now()), now(),
354 |                    pg_current_wal_lsn() as wal_location
355 |        returning * into log_entry;
356 |     end if;
357 |     return log_entry;
358 | end;
359 | $$;
360 | 
361 | create function wal_telemetry() returns table (
362 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
363 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
364 |    bytes_per_sec numeric
365 | ) language sql as $$
366 |    select c.run_time as current_epoch, l.run_time as last_epoch,
367 |           c.run_time - l.run_time as secs_elapsed,
368 |           c.lsn as current_lsn, l.lsn as last_lsn,
369 |           c.lsn - l.lsn as bytes_elapsed,
370 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
371 |      FROM wal_telemetry_create_or_select_record() c,
372 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
373 |             order by run_time desc limit 1) l;
374 | $$ set search_path from current;
375 | 
376 | select wal_telemetry();
377 | 
378 | comment on function wal_telemetry() is $$
379 | The wal_telemetry() function checks the current wal location and compares
380 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
381 | both current and last data, and the differences between them.  These include
382 | bytes elapsed and seconds elapsed, and bytes per sec.
383 | 
384 | The function is designed so that you can export delta information to a monitoring
385 | solution such as munin or prometheus without the latter having to know anything
386 | about lsn representation or losing information in the process.
387 | 
388 | On a replica this does not write to the table and measures deltas from the last
389 | this was written on the master.
390 | $$;
391 | 
392 | -- Replication slots
393 | 
394 | CREATE OR REPLACE VIEW replication_slot_lag as
395 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
396 |        now() as querytime, CASE WHEN pg_is_in_recovery()
397 |                                 THEN pg_last_wal_replay_lsn()
398 |                                 ELSE pg_current_wal_lsn() END
399 |                            AS pg_current_xlog_location,
400 |        CASE WHEN pg_is_in_recovery() THEN null::int
401 |             ELSE pg_current_wal_lsn() - restart_lsn END
402 |        AS current_lag_bytes
403 |   FROM pg_replication_slots s
404 |  ORDER BY s.slot_name;
405 | 
406 | COMMENT ON VIEW replication_slot_lag IS
407 | $$
408 | This view monitors lag on downstream slots.  It compares the last sent wal
409 | segment to the current known wal location.
410 | 
411 | For master database, the current wal location is self-explanatory.  For replicas
412 | we use the last received WAL location instead.  Note that replicas can have
413 | replication slots for downstream replication tracking.
414 | $$;
415 | 
416 | create or replace function get_autovacuum_vacuum_info
417 | (
418 |       _except regclass[] default null,
419 |   out queue_depth int8,
420 |   out total_dead_tup int8
421 | )
422 |   returns record
423 |   language plpgsql stable as
424 | $fnc$
425 | begin
426 |   select
427 |       count(*),
428 |       sum(n_dead_tup)
429 |       into queue_depth, total_dead_tup
430 |     from
431 |       pg_class c, --lateral
432 |         pg_stat_get_dead_tuples(c.oid) n_dead_tup
433 |     where
434 |           coalesce(c.oid != any (_except), true)
435 |       and n_dead_tup >
436 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_threshold'), current_setting('autovacuum_vacuum_threshold')::int4)+
437 |           reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_scale_factor'), current_setting('autovacuum_vacuum_scale_factor')::float4)
438 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
439 |     ;
440 |     return;
441 | end;
442 | $fnc$;
443 | 
444 | create or replace function get_autovacuum_analyze_info
445 | (
446 |       _except regclass[] default array['pg_catalog.pg_statistic'],
447 |   out queue_depth int8,
448 |   out total_mod_since_analyze int8
449 | )
450 |   returns record
451 |   language plpgsql stable as
452 | $fnc$
453 | begin
454 |   select
455 |       count(*),
456 |       sum(n_mod_since_analyze)
457 |       into queue_depth, total_mod_since_analyze
458 |     from
459 |       pg_class c,
460 |         pg_stat_get_mod_since_analyze(c.oid) n_mod_since_analyze
461 |     where
462 |           c.relnamespace != 'pg_toast'::regnamespace
463 |       and coalesce(c.oid != any (_except), true)
464 |       and n_mod_since_analyze >
465 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_threshold'), current_setting('autovacuum_analyze_threshold')::int4)+
466 |             reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_scale_factor'), current_setting('autovacuum_analyze_scale_factor')::float4)
467 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
468 |     ;
469 |     return;
470 | end;
471 | $fnc$;
472 | 
473 | create or replace function vacuum_progress()
474 |   returns table
475 |   (
476 |     datname               name,
477 |     schemaname            name,
478 |     relname               name,
479 |     phase                 text,
480 |     heap_blks_total       int8,
481 |     heap_blks_scanned     int8,
482 |     heap_blks_vacuumed    int8,
483 |     index_vacuum_count    int8,
484 |     max_dead_tuples       int8,
485 |     num_dead_tuples       int8
486 |   )
487 |   language plpgsql stable as
488 | $fnc$
489 | begin
490 |   /* too lazy to check which version of pg the view was added in, so
491 |    * just check if its there and return null if its not
492 |    */
493 |   if exists (select from pg_class c where c.relnamespace='pg_catalog'::regnamespace and c.relname = 'pg_stat_progress_vacuum') then
494 |     return query
495 |       select
496 |           v.datname,
497 |           c.relnamespace::regnamespace::name as schemaname,
498 |           c.relname,
499 |           v.phase,
500 |           v.heap_blks_total,
501 |           v.heap_blks_scanned,
502 |           v.heap_blks_vacuumed,
503 |           v.index_vacuum_count,
504 |           v.max_dead_tuples,
505 |           v.num_dead_tuples
506 |         from pg_stat_progress_vacuum v
507 |           left join pg_class c on c.oid=v.relid;
508 |   end if;
509 | 
510 |   return;
511 | end;
512 | $fnc$;
513 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.4--1.4a.sql:
--------------------------------------------------------------------------------
 1 | create or replace view @extschema@.long_running_queries
 2 | (
 3 |     duration,
 4 |     pid,
 5 |     is_slave,
 6 |     application_name,
 7 |     username,
 8 |     database,
 9 |     backend_type,
10 |     client_addr,
11 |     state,
12 |     wait_event,
13 |     wait_event_type,
14 |     query
15 | ) as
16 | select
17 |         now() - coalesce(p.state_change, p.query_start) as duration,
18 |         p.pid,
19 |         pg_is_in_recovery() as is_slave,
20 |         p.application_name,
21 |         p.usename,
22 |         p.datname as database,
23 |         p.backend_type,
24 |         p.client_addr,
25 |         p.state,
26 |         p.wait_event,
27 |         p.wait_event_type,
28 |         p.query
29 |     from pg_catalog.pg_stat_activity p,
30 |         lateral
31 |         (
32 |             select
33 |                     alert_threshold,
34 |                     active_since,
35 |                     active_until
36 |                 from @extschema@.long_running_query_rules l
37 |                 where
38 |                         coalesce(p.application_name ilike l.application_name_ilike, true)
39 |                     and coalesce(p.usename = l.usename, true)
40 |                     and coalesce(p.state = l.state, true)
41 |                     and coalesce(now() >= l.active_since, true)
42 |                     and coalesce(now() <= l.active_until, true)
43 |                 order by priority asc
44 |                 limit 1
45 |         ) l
46 |     where
47 |             p.state != 'idle'
48 |         and backend_type = 'client backend'
49 |         and age(now(), query_start) > l.alert_threshold
50 |     order by duration desc;
51 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.4.sql:
--------------------------------------------------------------------------------
  1 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  2 | --\echo Use "CREATE EXTENSION pgtelemetry" to load this file. \quit
  3 | 
  4 | -- filter non-backend connections from pg_stat_activity
  5 | create view client_stat_activity as select * from pg_stat_activity where backend_type = 'client backend';
  6 | 
  7 | -- disk space
  8 | CREATE VIEW relation_total_size AS
  9 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 10 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 11 |        pg_relation_size(c.oid) as exclusive_bytes,
 12 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 13 |   from pg_class c
 14 |   join pg_namespace n ON c.relnamespace = n.oid
 15 |  WHERE relkind = 'r'
 16 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 17 | 
 18 | COMMENT ON VIEW relation_total_size IS
 19 | $$
 20 | This view provides basic information on relation size.  Catalogs and tables
 21 | in the information schema are exclused, as are TOAST tables.
 22 | 
 23 | The inclusive metrics show the relation along with indexes and TOAST.  The
 24 | exclusiove metrics show without these things.  The bytes metrics are intended
 25 | for graph drawing, while the sizes are there for administrators who want to
 26 | quickly query this information and make decisions.
 27 | $$;
 28 | 
 29 | CREATE VIEW catalog_total_size AS
 30 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 31 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 32 |        pg_relation_size(c.oid) as exclusive_bytes,
 33 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 34 |   from pg_class c
 35 |   join pg_namespace n ON c.relnamespace = n.oid
 36 |  WHERE relkind = 'r'
 37 |        and n.nspname in ('pg_catalog', 'information_schema');
 38 | 
 39 | COMMENT ON VIEW relation_total_size IS
 40 | $$
 41 | This view provides basic information on relation size in PostgreSQL system
 42 | tables (those in pg_catalog and information_schema).
 43 | 
 44 | The inclusive metrics show the relation along with indexes and TOAST.  The
 45 | exclusiove metrics show without these things.  The bytes metrics are intended
 46 | for graph drawing, while the sizes are there for administrators who want to
 47 | quickly query this information and make decisions.
 48 | $$;
 49 | 
 50 | -- biggest indexes
 51 | 
 52 | CREATE VIEW index_size AS
 53 | select c.oid, c.oid::regclass as index,
 54 |        pg_relation_size(c.oid) as bytes,
 55 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 56 |   from pg_class c
 57 |   join pg_namespace n ON c.relnamespace = n.oid
 58 |  WHERE relkind = 'i'
 59 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 60 | 
 61 | COMMENT ON VIEW index_size IS
 62 | $$
 63 | This table is most useful in tracking down questions of bloat, fill factor, and
 64 | performance of GIN indexes among other things.
 65 | $$;
 66 | 
 67 | -- Tables by size  (TOAST)
 68 | 
 69 | CREATE VIEW relation_toast_size AS
 70 | select c.oid, c.oid::regclass as relation,
 71 |        pg_relation_size(t.oid) as exclusive_bytes,
 72 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 73 |   from pg_class c
 74 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 75 |   join pg_namespace n ON c.relnamespace = n.oid;
 76 | 
 77 | COMMENT ON VIEW relation_toast_size IS
 78 | $$
 79 | This measures the amount of space in a relation's TOAST tables.  These are
 80 | populated when data exceeds what can be reasonably stored inline in the main
 81 | heap pages.  You would expect to see this non-zero where you have large fields
 82 | being stored, particularly arrays of composite types.
 83 | 
 84 | Performance-wise moving data to TOAST improves sequential scans where the data
 85 | is not required (count(*) for example) at the cost of making the data that has
 86 | been moved far more expensive to retrieve and process.
 87 | $$;
 88 | 
 89 | -- tablespaces size
 90 | 
 91 | CREATE VIEW tablespace_size AS
 92 | select spcname as name, pg_tablespace_size(oid) as bytes,
 93 |        pg_size_pretty(pg_tablespace_size(oid)) as size
 94 |   from pg_tablespace;
 95 | 
 96 | COMMENT ON VIEW tablespace_size IS
 97 | $$
 98 | This provides database-cluster-wide statistics on disk usage by tablespace.
 99 | 
100 | Note that tablespaces and databases are orthogonal.  Typically if you are
101 | running out of disk space, you want to check this one first, then database_size
102 | and then the size of the relations in the largest database in that order.
103 | $$;
104 | 
105 | -- database size
106 | 
107 | CREATE VIEW database_size AS
108 | SELECT datname as name, pg_database_size(oid) as bytes,
109 |        pg_size_pretty(pg_database_size(oid)) as size
110 |   FROM pg_database;
111 | 
112 | comment on view database_size is
113 | $$
114 | This provides cluser-wide size statistics of databases.
115 | $$;
116 | 
117 | -- connections by application_name
118 | 
119 | CREATE VIEW connections_by_application AS
120 | select application_name, count(*)
121 |   from pg_stat_activity group by application_name;
122 | 
123 | comment on view connections_by_application is
124 | $$
125 | This gives you the number of connections (cluster-wide) by application name.
126 | 
127 | By default the application name is the program name that connected to the db.
128 | $$;
129 | 
130 | -- connections by state
131 | 
132 | CREATE VIEW connections_by_state AS
133 | select case when wait_event is null then state else 'waiting' end as state,
134 |        count(*)
135 |   from pg_stat_activity group by 1;
136 | 
137 | CREATE VIEW waiting_queries_reason_details AS
138 | select wait_event_type, wait_event, count(*) from pg_stat_activity
139 |  WHERE wait_event is not null
140 |  GROUP BY wait_event_type, wait_event;
141 | 
142 | 
143 | comment on view connections_by_state is
144 | $$
145 | This gives you the number of connections (cluster-wide) by state (active, idle,
146 | idle in transaction, etc).  If the query is active but is waiting on a lock or
147 | latch, we change this to 'waiting.'
148 | $$;
149 | 
150 | 
151 | -- connections by ip address source
152 | 
153 | CREATE VIEW connections_by_ip_source as
154 | SELECT client_addr,
155 |        count(*) as count,
156 |        count(*) filter(where state = 'active') as active_count,
157 |        count(*) filter(where state = 'idle in transaction' or state = 'idle in transaction (aborted)') as idle_in_transaction_count,
158 |        count(*) filter(where state = 'idle') as idle_count
159 |        from  @extschema@.client_stat_activity
160 |  GROUP BY client_addr;
161 | 
162 | comment on view connections_by_ip_source is
163 | $$
164 | This is a cluster-wide breakdown of connections by IP source.  Between this and
165 | the applicaiton_name it is a good indication of where server laod is coming from
166 | as well as porblems like connection handle leaks.
167 | $$;
168 | 
169 | -- table access stats
170 | -- longest-running active queries
171 | 
172 | CREATE VIEW longest_running_active_queries AS
173 | select application_name, state, wait_event_type, wait_event, query, pid,
174 |        client_addr,
175 |        age(now(), query_start) as running_for
176 |   from pg_stat_activity where state = 'active'
177 |  ORDER BY age(now(), query_start) desc;
178 | 
179 | comment on view longest_running_active_queries is
180 | $$
181 | This view is intended to be typically used by administrators in determining
182 | which queries to focus on.  However it can be used for reporting and alerting
183 | as well.
184 | $$;
185 | 
186 | -- waiting connections
187 | 
188 | CREATE VIEW waiting_connections_by_event_type AS
189 | select wait_event_type, count(*) from pg_stat_activity
190 |  WHERE wait_event is not null
191 |  GROUP BY wait_event_type;
192 | 
193 | comment on view waiting_connections_by_event_type is
194 | $$
195 | This view provides basic, cluster-global, statistics on why queries are waiting
196 | on other queries.
197 | $$;
198 | 
199 | -- locks by type
200 | 
201 | CREATE VIEW locks_by_type AS
202 | SELECT locktype, count(*) from pg_locks
203 |  GROUP BY locktype;
204 | 
205 | COMMENT ON VIEW locks_by_type is
206 | $$
207 | This view provides cluster-wide statistics on what sorts of locks are present.
208 | 
209 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
210 | helpful in determining where the locks are coming from.
211 | $$;
212 | 
213 | -- locks by mode
214 | CREATE VIEW locks_by_mode AS
215 | SELECT mode, count(*) from pg_locks
216 |  GROUP BY mode;
217 | 
218 | COMMENT ON view locks_by_mode is
219 | $$
220 | This view provides cluster-wide statistics on locks by lock mode (access share
221 | vs exclusive for example).  Combined with the locks_by_type view, this view
222 | provides a some opportunities to spot locking problems.
223 | $$;
224 | 
225 | -- count client backends waiting on a lock for more than given number of seconds
226 | create or replace function count_waiting_on_locks_more_than_seconds(int default 300)
227 | returns bigint
228 | as
229 | $$
230 | select count(1)
231 |  from pg_locks
232 |  join  @extschema@.client_stat_activity using(pid)
233 | where granted = 'f' and
234 |       extract('epoch' from now() - query_start) > $1;
235 | $$ language sql;
236 | 
237 | comment on function count_waiting_on_locks_more_than_seconds is
238 | $$
239 | This function provides the number of client backend processes waiting on a lock
240 | for more than given number of seconds (5 minutes if not supplied). Can be used to spot
241 | locking conflicts.
242 | $$;
243 | 
244 | 
245 | CREATE VIEW tuple_access_stats AS
246 | select schemaname, relname,
247 |        seq_scan, seq_tup_read,
248 |        idx_scan, idx_tup_fetch,
249 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup,
250 |        n_mod_since_analyze
251 |   FROM pg_stat_user_tables;
252 | 
253 | comment on view tuple_access_stats is
254 | $$
255 | This view provides statistcs for scans (index and sequential) along with
256 | numbers of tuples updated through various means.  It allows you to get a pretty
257 | good idea of where you may need indexes or where IO-related problems may be
258 | coming from.
259 | $$;
260 | 
261 | -- autovacuum stats
262 | CREATE VIEW autovacuum_stats AS
263 | select schemaname, relname,
264 |        last_vacuum,
265 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
266 |        vacuum_count,
267 |        last_autovacuum,
268 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
269 |        autovacuum_count,
270 |        last_analyze,
271 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
272 |        analyze_count,
273 |        last_autoanalyze,
274 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
275 |        autoanalyze_count
276 |   FROM pg_stat_user_tables;
277 | 
278 | comment on view autovacuum_stats is
279 | $$
280 | This provides basic metrics per table in the current database for when
281 | autovacuum and analyze were last run (as well as manual maintenance).
282 | $$;
283 | 
284 | -- query stats
285 | 
286 | -- call, time, rows
287 | 
288 | CREATE VIEW statement_query_rows_time AS
289 | SELECT datname, queryid, query, sum(calls) as calls,
290 |        sum(total_time) as total_time, sum(rows) as rows
291 |   FROM pg_stat_statements
292 |   JOIN pg_database d ON d.oid = dbid
293 |  GROUP BY datname, queryid, query;
294 | 
295 | comment on view statement_query_rows_time is
296 | $$
297 | This gives aggregated of stats for a given query (cluster-wide)
298 | per query and database name.  This view provides high level timing and row
299 | statistics.
300 | $$;
301 | 
302 | -- buffers
303 | CREATE VIEW statement_query_buffers AS
304 | SELECT datname, queryid, query, sum(calls),
305 |        sum(shared_blks_hit) as shared_blks_hit,
306 |        sum(shared_blks_read) as shared_blks_read,
307 |        sum(shared_blks_dirtied) as shared_blks_dirtied,
308 |        sum(shared_blks_written) as shared_blks_written,
309 |        sum(temp_blks_read) as tmp_blkd_read,
310 |        sum(temp_blks_written) as tmp_blkd_written
311 |   FROM pg_stat_statements
312 |   JOIN pg_database d ON d.oid = dbid
313 |  GROUP BY datname, queryid, query;
314 | 
315 | comment on view statement_query_buffers is
316 | $$
317 | This gives aggregated of stats for a given query (cluster-wide)
318 | per query and database name.  This view provides low-level IO statistics.
319 | $$;
320 | 
321 | -- WAL last state
322 | 
323 | CREATE TABLE pg_telemetry_wal_log (
324 |    run_time numeric unique, -- epoch timestamp
325 |    timestamp timestamp unique,
326 |    lsn pg_lsn
327 | );
328 | 
329 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
330 | This table logs the times and results of wal telemetry readings so that
331 | deltas can be calculated.  At least one row must be present to get any useful
332 | data out of the wal_telemetry() function at all.
333 | 
334 | If you get one telemetry entry a minute, over the course of a year you will get
335 | just over half a million entries.  These are indexed on both epoch and timestamp
336 | so access is not impaired, but if you want ot purge, be careful to leave at
337 | least one entry at the end.
338 | 
339 | You can also process these as a time series using WINDOW functions like lag.
340 | $$;
341 | 
342 | -- WAL telemetry
343 | 
344 | create function wal_telemetry_create_or_select_record()
345 | returns pg_telemetry_wal_log language plpgsql as
346 | $$
347 | declare log_entry pg_telemetry_wal_log;
348 | begin
349 |     if pg_is_in_recovery() then
350 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
351 |     else
352 |        insert into pg_telemetry_wal_log
353 |        select extract('epoch' from now()), now(),
354 |                    pg_current_wal_lsn() as wal_location
355 |        returning * into log_entry;
356 |     end if;
357 |     return log_entry;
358 | end;
359 | $$;
360 | 
361 | create function wal_telemetry() returns table (
362 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
363 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
364 |    bytes_per_sec numeric
365 | ) language sql as $$
366 |    select c.run_time as current_epoch, l.run_time as last_epoch,
367 |           c.run_time - l.run_time as secs_elapsed,
368 |           c.lsn as current_lsn, l.lsn as last_lsn,
369 |           c.lsn - l.lsn as bytes_elapsed,
370 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
371 |      FROM wal_telemetry_create_or_select_record() c,
372 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
373 |             order by run_time desc limit 1) l;
374 | $$ set search_path from current;
375 | 
376 | select wal_telemetry();
377 | 
378 | comment on function wal_telemetry() is $$
379 | The wal_telemetry() function checks the current wal location and compares
380 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
381 | both current and last data, and the differences between them.  These include
382 | bytes elapsed and seconds elapsed, and bytes per sec.
383 | 
384 | The function is designed so that you can export delta information to a monitoring
385 | solution such as munin or prometheus without the latter having to know anything
386 | about lsn representation or losing information in the process.
387 | 
388 | On a replica this does not write to the table and measures deltas from the last
389 | this was written on the master.
390 | $$;
391 | 
392 | -- Replication slots
393 | 
394 | CREATE OR REPLACE VIEW replication_slot_lag as
395 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
396 |        now() as querytime, CASE WHEN pg_is_in_recovery()
397 |                                 THEN pg_last_wal_replay_lsn()
398 |                                 ELSE pg_current_wal_lsn() END
399 |                            AS pg_current_xlog_location,
400 |        CASE WHEN pg_is_in_recovery() THEN null::int
401 |             ELSE pg_current_wal_lsn() - restart_lsn END
402 |        AS current_lag_bytes
403 |   FROM pg_replication_slots s
404 |  ORDER BY s.slot_name;
405 | 
406 | COMMENT ON VIEW replication_slot_lag IS
407 | $$
408 | This view monitors lag on downstream slots.  It compares the last sent wal
409 | segment to the current known wal location.
410 | 
411 | For master database, the current wal location is self-explanatory.  For replicas
412 | we use the last received WAL location instead.  Note that replicas can have
413 | replication slots for downstream replication tracking.
414 | $$;
415 | 
416 | create or replace function get_autovacuum_vacuum_info
417 | (
418 |       _except regclass[] default null,
419 |   out queue_depth int8,
420 |   out total_dead_tup int8
421 | )
422 |   returns record
423 |   language plpgsql stable as
424 | $fnc$
425 | begin
426 |   select
427 |       count(*),
428 |       sum(n_dead_tup)
429 |       into queue_depth, total_dead_tup
430 |     from
431 |       pg_class c, --lateral
432 |         pg_stat_get_dead_tuples(c.oid) n_dead_tup
433 |     where
434 |           coalesce(c.oid != any (_except), true)
435 |       and n_dead_tup >
436 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_threshold'), current_setting('autovacuum_vacuum_threshold')::int4)+
437 |           reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_scale_factor'), current_setting('autovacuum_vacuum_scale_factor')::float4)
438 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
439 |     ;
440 |     return;
441 | end;
442 | $fnc$;
443 | 
444 | create or replace function get_autovacuum_analyze_info
445 | (
446 |       _except regclass[] default array['pg_catalog.pg_statistic'],
447 |   out queue_depth int8,
448 |   out total_mod_since_analyze int8
449 | )
450 |   returns record
451 |   language plpgsql stable as
452 | $fnc$
453 | begin
454 |   select
455 |       count(*),
456 |       sum(n_mod_since_analyze)
457 |       into queue_depth, total_mod_since_analyze
458 |     from
459 |       pg_class c,
460 |         pg_stat_get_mod_since_analyze(c.oid) n_mod_since_analyze
461 |     where
462 |           c.relnamespace != 'pg_toast'::regnamespace
463 |       and coalesce(c.oid != any (_except), true)
464 |       and n_mod_since_analyze >
465 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_threshold'), current_setting('autovacuum_analyze_threshold')::int4)+
466 |             reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_scale_factor'), current_setting('autovacuum_analyze_scale_factor')::float4)
467 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
468 |     ;
469 |     return;
470 | end;
471 | $fnc$;
472 | 
473 | create or replace function @extschema@.vacuum_progress()
474 |   returns table
475 |   (
476 |     datname               name,
477 |     schemaname            name,
478 |     relname               name,
479 |     phase                 text,
480 |     heap_blks_total       int8,
481 |     heap_blks_scanned     int8,
482 |     heap_blks_vacuumed    int8,
483 |     index_vacuum_count    int8,
484 |     max_dead_tuples       int8,
485 |     num_dead_tuples       int8
486 |   )
487 |   language plpgsql stable as
488 | $fnc$
489 | begin
490 |   /* too lazy to check which version of pg the view was added in, so
491 |    * just check if its there and return null if its not
492 |    */
493 |   if exists (select from pg_class c where c.relnamespace='pg_catalog'::regnamespace and c.relname = 'pg_stat_progress_vacuum') then
494 |     return query
495 |       select
496 |           v.datname,
497 |           c.relnamespace::regnamespace::name as schemaname,
498 |           c.relname,
499 |           v.phase,
500 |           v.heap_blks_total,
501 |           v.heap_blks_scanned,
502 |           v.heap_blks_vacuumed,
503 |           v.index_vacuum_count,
504 |           v.max_dead_tuples,
505 |           v.num_dead_tuples
506 |         from pg_stat_progress_vacuum v
507 |           left join pg_class c on c.oid=v.relid;
508 |   end if;
509 | 
510 |   return;
511 | end;
512 | $fnc$;
513 | 
514 | create table @extschema@.long_running_query_rules
515 | (
516 |   priority                  int4,
517 |   application_name_ilike    text,
518 |   usename                   name,
519 |   state                     text,
520 |   alert_threshold           interval not null,
521 |   active_since              timestamptz,
522 |   active_until              timestamptz,
523 |   created_at                timestamptz default now(),
524 |   comment                   text
525 | );
526 | 
527 | insert into @extschema@.long_running_query_rules(priority, application_name_ilike, usename, state, alert_threshold)
528 |   values
529 |   (0, 'pg\_dump', null, null, interval'6 hours'), -- pg_dump 6 hours
530 |   (0, 'pg2ch', null, null, interval'3 hours'), -- pg2ch 3 hours
531 |   (100, null, null, 'idle in transaction', interval'5 minutes'), -- any idle transaction 5 minutes
532 |   (100, null, null, 'idle in transaction (aborted)', interval'5 minutes'), -- same as above, except one of the statements in the transaction caused an error
533 |   (1000, null, null, null, interval'1 hour'); -- anything else 1 hour
534 | 
535 | 
536 | create or replace view @extschema@.long_running_queries
537 | (
538 |     duration,
539 |     pid,
540 |     is_slave,
541 |     application_name,
542 |     username,
543 |     database,
544 |     backend_type,
545 |     client_addr,
546 |     state,
547 |     wait_event,
548 |     wait_event_type,
549 |     query
550 | ) as
551 | select
552 |         now() - p.query_start as duration,
553 |         p.pid,
554 |         pg_is_in_recovery() as is_slave,
555 |         p.application_name,
556 |         p.usename,
557 |         p.datname as database,
558 |         p.backend_type,
559 |         p.client_addr,
560 |         p.state,
561 |         p.wait_event,
562 |         p.wait_event_type,
563 |         p.query
564 |     from pg_catalog.pg_stat_activity p,
565 |         lateral
566 |         (
567 |             select
568 |                     alert_threshold,
569 |                     active_since,
570 |                     active_until
571 |                 from @extschema@.long_running_query_rules l
572 |                 where
573 |                         coalesce(p.application_name ilike l.application_name_ilike, true)
574 |                     and coalesce(p.usename = l.usename, true)
575 |                     and coalesce(p.state = l.state, true)
576 |                 order by priority asc
577 |                 limit 1
578 |         ) l
579 |     where
580 |             p.state != 'idle'
581 |         and backend_type = 'client backend'
582 |         and age(now(), query_start) > l.alert_threshold
583 |         and coalesce(now() >= l.active_since, true)
584 |         and coalesce(now() <= l.active_until, true)
585 |     order by duration desc;
586 | 
587 | 
588 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.4a--1.4b.sql:
--------------------------------------------------------------------------------
 1 | update
 2 |     @extschema@.long_running_query_rules
 3 | set
 4 |     application_name_ilike = 'pg\_dump%'
 5 | where
 6 |     application_name_ilike = 'pg\_dump';
 7 | 
 8 | drop view @extschema@.long_running_queries; -- cannot change the view column name during create or replace.
 9 | create view @extschema@.long_running_queries
10 | (
11 |     current_state_duration,
12 |     query_duration,
13 |     pid,
14 |     is_slave,
15 |     application_name,
16 |     username,
17 |     database,
18 |     backend_type,
19 |     client_addr,
20 |     state,
21 |     wait_event,
22 |     wait_event_type,
23 |     query
24 | ) as
25 | select
26 |         now() - p.state_change as current_state_duration,
27 |         now() - p.query_start as query_duration,
28 |         p.pid,
29 |         pg_is_in_recovery() as is_slave,
30 |         p.application_name,
31 |         p.usename,
32 |         p.datname as database,
33 |         p.backend_type,
34 |         p.client_addr,
35 |         p.state,
36 |         p.wait_event,
37 |         p.wait_event_type,
38 |         p.query
39 |     from pg_catalog.pg_stat_activity p,
40 |         lateral
41 |         (
42 |             select
43 |                     alert_threshold,
44 |                     active_since,
45 |                     active_until,
46 |                     state
47 |                 from @extschema@.long_running_query_rules l
48 |                 where
49 |                         coalesce(p.application_name ilike l.application_name_ilike, true)
50 |                     and coalesce(p.usename = l.usename, true)
51 |                     and coalesce(p.state = l.state, true)
52 |                     and coalesce(now() >= l.active_since, true)
53 |                     and coalesce(now() <= l.active_until, true)
54 |                 order by priority asc
55 |                 limit 1
56 |         ) l
57 |     where
58 |             p.state != 'idle'
59 |         and backend_type = 'client backend'
60 |         and ((l.state is NOT NULL and age(now(), state_change) > l.alert_threshold)
61 |           or (l.state is NULL and age(now(), query_start) > l.alert_threshold))
62 |     order by current_state_duration desc;
63 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.4a.sql:
--------------------------------------------------------------------------------
  1 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  2 | --\echo Use "CREATE EXTENSION pgtelemetry" to load this file. \quit
  3 | 
  4 | -- filter non-backend connections from pg_stat_activity
  5 | create view client_stat_activity as select * from pg_stat_activity where backend_type = 'client backend';
  6 | 
  7 | -- disk space
  8 | CREATE VIEW relation_total_size AS
  9 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 10 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 11 |        pg_relation_size(c.oid) as exclusive_bytes,
 12 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 13 |   from pg_class c
 14 |   join pg_namespace n ON c.relnamespace = n.oid
 15 |  WHERE relkind = 'r'
 16 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 17 | 
 18 | COMMENT ON VIEW relation_total_size IS
 19 | $$
 20 | This view provides basic information on relation size.  Catalogs and tables
 21 | in the information schema are exclused, as are TOAST tables.
 22 | 
 23 | The inclusive metrics show the relation along with indexes and TOAST.  The
 24 | exclusiove metrics show without these things.  The bytes metrics are intended
 25 | for graph drawing, while the sizes are there for administrators who want to
 26 | quickly query this information and make decisions.
 27 | $$;
 28 | 
 29 | CREATE VIEW catalog_total_size AS
 30 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 31 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 32 |        pg_relation_size(c.oid) as exclusive_bytes,
 33 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 34 |   from pg_class c
 35 |   join pg_namespace n ON c.relnamespace = n.oid
 36 |  WHERE relkind = 'r'
 37 |        and n.nspname in ('pg_catalog', 'information_schema');
 38 | 
 39 | COMMENT ON VIEW relation_total_size IS
 40 | $$
 41 | This view provides basic information on relation size in PostgreSQL system
 42 | tables (those in pg_catalog and information_schema).
 43 | 
 44 | The inclusive metrics show the relation along with indexes and TOAST.  The
 45 | exclusiove metrics show without these things.  The bytes metrics are intended
 46 | for graph drawing, while the sizes are there for administrators who want to
 47 | quickly query this information and make decisions.
 48 | $$;
 49 | 
 50 | -- biggest indexes
 51 | 
 52 | CREATE VIEW index_size AS
 53 | select c.oid, c.oid::regclass as index,
 54 |        pg_relation_size(c.oid) as bytes,
 55 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 56 |   from pg_class c
 57 |   join pg_namespace n ON c.relnamespace = n.oid
 58 |  WHERE relkind = 'i'
 59 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 60 | 
 61 | COMMENT ON VIEW index_size IS
 62 | $$
 63 | This table is most useful in tracking down questions of bloat, fill factor, and
 64 | performance of GIN indexes among other things.
 65 | $$;
 66 | 
 67 | -- Tables by size  (TOAST)
 68 | 
 69 | CREATE VIEW relation_toast_size AS
 70 | select c.oid, c.oid::regclass as relation,
 71 |        pg_relation_size(t.oid) as exclusive_bytes,
 72 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 73 |   from pg_class c
 74 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 75 |   join pg_namespace n ON c.relnamespace = n.oid;
 76 | 
 77 | COMMENT ON VIEW relation_toast_size IS
 78 | $$
 79 | This measures the amount of space in a relation's TOAST tables.  These are
 80 | populated when data exceeds what can be reasonably stored inline in the main
 81 | heap pages.  You would expect to see this non-zero where you have large fields
 82 | being stored, particularly arrays of composite types.
 83 | 
 84 | Performance-wise moving data to TOAST improves sequential scans where the data
 85 | is not required (count(*) for example) at the cost of making the data that has
 86 | been moved far more expensive to retrieve and process.
 87 | $$;
 88 | 
 89 | -- tablespaces size
 90 | 
 91 | CREATE VIEW tablespace_size AS
 92 | select spcname as name, pg_tablespace_size(oid) as bytes,
 93 |        pg_size_pretty(pg_tablespace_size(oid)) as size
 94 |   from pg_tablespace;
 95 | 
 96 | COMMENT ON VIEW tablespace_size IS
 97 | $$
 98 | This provides database-cluster-wide statistics on disk usage by tablespace.
 99 | 
100 | Note that tablespaces and databases are orthogonal.  Typically if you are
101 | running out of disk space, you want to check this one first, then database_size
102 | and then the size of the relations in the largest database in that order.
103 | $$;
104 | 
105 | -- database size
106 | 
107 | CREATE VIEW database_size AS
108 | SELECT datname as name, pg_database_size(oid) as bytes,
109 |        pg_size_pretty(pg_database_size(oid)) as size
110 |   FROM pg_database;
111 | 
112 | comment on view database_size is
113 | $$
114 | This provides cluser-wide size statistics of databases.
115 | $$;
116 | 
117 | -- connections by application_name
118 | 
119 | CREATE VIEW connections_by_application AS
120 | select application_name, count(*)
121 |   from pg_stat_activity group by application_name;
122 | 
123 | comment on view connections_by_application is
124 | $$
125 | This gives you the number of connections (cluster-wide) by application name.
126 | 
127 | By default the application name is the program name that connected to the db.
128 | $$;
129 | 
130 | -- connections by state
131 | 
132 | CREATE VIEW connections_by_state AS
133 | select case when wait_event is null then state else 'waiting' end as state,
134 |        count(*)
135 |   from pg_stat_activity group by 1;
136 | 
137 | CREATE VIEW waiting_queries_reason_details AS
138 | select wait_event_type, wait_event, count(*) from pg_stat_activity
139 |  WHERE wait_event is not null
140 |  GROUP BY wait_event_type, wait_event;
141 | 
142 | 
143 | comment on view connections_by_state is
144 | $$
145 | This gives you the number of connections (cluster-wide) by state (active, idle,
146 | idle in transaction, etc).  If the query is active but is waiting on a lock or
147 | latch, we change this to 'waiting.'
148 | $$;
149 | 
150 | 
151 | -- connections by ip address source
152 | 
153 | CREATE VIEW connections_by_ip_source as
154 | SELECT client_addr,
155 |        count(*) as count,
156 |        count(*) filter(where state = 'active') as active_count,
157 |        count(*) filter(where state = 'idle in transaction' or state = 'idle in transaction (aborted)') as idle_in_transaction_count,
158 |        count(*) filter(where state = 'idle') as idle_count
159 |        from  @extschema@.client_stat_activity
160 |  GROUP BY client_addr;
161 | 
162 | comment on view connections_by_ip_source is
163 | $$
164 | This is a cluster-wide breakdown of connections by IP source.  Between this and
165 | the applicaiton_name it is a good indication of where server laod is coming from
166 | as well as porblems like connection handle leaks.
167 | $$;
168 | 
169 | -- table access stats
170 | -- longest-running active queries
171 | 
172 | CREATE VIEW longest_running_active_queries AS
173 | select application_name, state, wait_event_type, wait_event, query, pid,
174 |        client_addr,
175 |        age(now(), query_start) as running_for
176 |   from pg_stat_activity where state = 'active'
177 |  ORDER BY age(now(), query_start) desc;
178 | 
179 | comment on view longest_running_active_queries is
180 | $$
181 | This view is intended to be typically used by administrators in determining
182 | which queries to focus on.  However it can be used for reporting and alerting
183 | as well.
184 | $$;
185 | 
186 | -- waiting connections
187 | 
188 | CREATE VIEW waiting_connections_by_event_type AS
189 | select wait_event_type, count(*) from pg_stat_activity
190 |  WHERE wait_event is not null
191 |  GROUP BY wait_event_type;
192 | 
193 | comment on view waiting_connections_by_event_type is
194 | $$
195 | This view provides basic, cluster-global, statistics on why queries are waiting
196 | on other queries.
197 | $$;
198 | 
199 | -- locks by type
200 | 
201 | CREATE VIEW locks_by_type AS
202 | SELECT locktype, count(*) from pg_locks
203 |  GROUP BY locktype;
204 | 
205 | COMMENT ON VIEW locks_by_type is
206 | $$
207 | This view provides cluster-wide statistics on what sorts of locks are present.
208 | 
209 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
210 | helpful in determining where the locks are coming from.
211 | $$;
212 | 
213 | -- locks by mode
214 | CREATE VIEW locks_by_mode AS
215 | SELECT mode, count(*) from pg_locks
216 |  GROUP BY mode;
217 | 
218 | COMMENT ON view locks_by_mode is
219 | $$
220 | This view provides cluster-wide statistics on locks by lock mode (access share
221 | vs exclusive for example).  Combined with the locks_by_type view, this view
222 | provides a some opportunities to spot locking problems.
223 | $$;
224 | 
225 | -- count client backends waiting on a lock for more than given number of seconds
226 | create or replace function count_waiting_on_locks_more_than_seconds(int default 300)
227 | returns bigint
228 | as
229 | $$
230 | select count(1)
231 |  from pg_locks
232 |  join  @extschema@.client_stat_activity using(pid)
233 | where granted = 'f' and
234 |       extract('epoch' from now() - query_start) > $1;
235 | $$ language sql;
236 | 
237 | comment on function count_waiting_on_locks_more_than_seconds is
238 | $$
239 | This function provides the number of client backend processes waiting on a lock
240 | for more than given number of seconds (5 minutes if not supplied). Can be used to spot
241 | locking conflicts.
242 | $$;
243 | 
244 | 
245 | CREATE VIEW tuple_access_stats AS
246 | select schemaname, relname,
247 |        seq_scan, seq_tup_read,
248 |        idx_scan, idx_tup_fetch,
249 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup,
250 |        n_mod_since_analyze
251 |   FROM pg_stat_user_tables;
252 | 
253 | comment on view tuple_access_stats is
254 | $$
255 | This view provides statistcs for scans (index and sequential) along with
256 | numbers of tuples updated through various means.  It allows you to get a pretty
257 | good idea of where you may need indexes or where IO-related problems may be
258 | coming from.
259 | $$;
260 | 
261 | -- autovacuum stats
262 | CREATE VIEW autovacuum_stats AS
263 | select schemaname, relname,
264 |        last_vacuum,
265 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
266 |        vacuum_count,
267 |        last_autovacuum,
268 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
269 |        autovacuum_count,
270 |        last_analyze,
271 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
272 |        analyze_count,
273 |        last_autoanalyze,
274 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
275 |        autoanalyze_count
276 |   FROM pg_stat_user_tables;
277 | 
278 | comment on view autovacuum_stats is
279 | $$
280 | This provides basic metrics per table in the current database for when
281 | autovacuum and analyze were last run (as well as manual maintenance).
282 | $$;
283 | 
284 | -- query stats
285 | 
286 | -- call, time, rows
287 | 
288 | CREATE VIEW statement_query_rows_time AS
289 | SELECT datname, queryid, query, sum(calls) as calls,
290 |        sum(total_time) as total_time, sum(rows) as rows
291 |   FROM pg_stat_statements
292 |   JOIN pg_database d ON d.oid = dbid
293 |  GROUP BY datname, queryid, query;
294 | 
295 | comment on view statement_query_rows_time is
296 | $$
297 | This gives aggregated of stats for a given query (cluster-wide)
298 | per query and database name.  This view provides high level timing and row
299 | statistics.
300 | $$;
301 | 
302 | -- buffers
303 | CREATE VIEW statement_query_buffers AS
304 | SELECT datname, queryid, query, sum(calls),
305 |        sum(shared_blks_hit) as shared_blks_hit,
306 |        sum(shared_blks_read) as shared_blks_read,
307 |        sum(shared_blks_dirtied) as shared_blks_dirtied,
308 |        sum(shared_blks_written) as shared_blks_written,
309 |        sum(temp_blks_read) as tmp_blkd_read,
310 |        sum(temp_blks_written) as tmp_blkd_written
311 |   FROM pg_stat_statements
312 |   JOIN pg_database d ON d.oid = dbid
313 |  GROUP BY datname, queryid, query;
314 | 
315 | comment on view statement_query_buffers is
316 | $$
317 | This gives aggregated of stats for a given query (cluster-wide)
318 | per query and database name.  This view provides low-level IO statistics.
319 | $$;
320 | 
321 | -- WAL last state
322 | 
323 | CREATE TABLE pg_telemetry_wal_log (
324 |    run_time numeric unique, -- epoch timestamp
325 |    timestamp timestamp unique,
326 |    lsn pg_lsn
327 | );
328 | 
329 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
330 | This table logs the times and results of wal telemetry readings so that
331 | deltas can be calculated.  At least one row must be present to get any useful
332 | data out of the wal_telemetry() function at all.
333 | 
334 | If you get one telemetry entry a minute, over the course of a year you will get
335 | just over half a million entries.  These are indexed on both epoch and timestamp
336 | so access is not impaired, but if you want ot purge, be careful to leave at
337 | least one entry at the end.
338 | 
339 | You can also process these as a time series using WINDOW functions like lag.
340 | $$;
341 | 
342 | -- WAL telemetry
343 | 
344 | create function wal_telemetry_create_or_select_record()
345 | returns pg_telemetry_wal_log language plpgsql as
346 | $$
347 | declare log_entry pg_telemetry_wal_log;
348 | begin
349 |     if pg_is_in_recovery() then
350 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
351 |     else
352 |        insert into pg_telemetry_wal_log
353 |        select extract('epoch' from now()), now(),
354 |                    pg_current_wal_lsn() as wal_location
355 |        returning * into log_entry;
356 |     end if;
357 |     return log_entry;
358 | end;
359 | $$;
360 | 
361 | create function wal_telemetry() returns table (
362 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
363 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
364 |    bytes_per_sec numeric
365 | ) language sql as $$
366 |    select c.run_time as current_epoch, l.run_time as last_epoch,
367 |           c.run_time - l.run_time as secs_elapsed,
368 |           c.lsn as current_lsn, l.lsn as last_lsn,
369 |           c.lsn - l.lsn as bytes_elapsed,
370 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
371 |      FROM wal_telemetry_create_or_select_record() c,
372 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
373 |             order by run_time desc limit 1) l;
374 | $$ set search_path from current;
375 | 
376 | select wal_telemetry();
377 | 
378 | comment on function wal_telemetry() is $$
379 | The wal_telemetry() function checks the current wal location and compares
380 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
381 | both current and last data, and the differences between them.  These include
382 | bytes elapsed and seconds elapsed, and bytes per sec.
383 | 
384 | The function is designed so that you can export delta information to a monitoring
385 | solution such as munin or prometheus without the latter having to know anything
386 | about lsn representation or losing information in the process.
387 | 
388 | On a replica this does not write to the table and measures deltas from the last
389 | this was written on the master.
390 | $$;
391 | 
392 | -- Replication slots
393 | 
394 | CREATE OR REPLACE VIEW replication_slot_lag as
395 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
396 |        now() as querytime, CASE WHEN pg_is_in_recovery()
397 |                                 THEN pg_last_wal_replay_lsn()
398 |                                 ELSE pg_current_wal_lsn() END
399 |                            AS pg_current_xlog_location,
400 |        CASE WHEN pg_is_in_recovery() THEN null::int
401 |             ELSE pg_current_wal_lsn() - restart_lsn END
402 |        AS current_lag_bytes
403 |   FROM pg_replication_slots s
404 |  ORDER BY s.slot_name;
405 | 
406 | COMMENT ON VIEW replication_slot_lag IS
407 | $$
408 | This view monitors lag on downstream slots.  It compares the last sent wal
409 | segment to the current known wal location.
410 | 
411 | For master database, the current wal location is self-explanatory.  For replicas
412 | we use the last received WAL location instead.  Note that replicas can have
413 | replication slots for downstream replication tracking.
414 | $$;
415 | 
416 | create or replace function get_autovacuum_vacuum_info
417 | (
418 |       _except regclass[] default null,
419 |   out queue_depth int8,
420 |   out total_dead_tup int8
421 | )
422 |   returns record
423 |   language plpgsql stable as
424 | $fnc$
425 | begin
426 |   select
427 |       count(*),
428 |       sum(n_dead_tup)
429 |       into queue_depth, total_dead_tup
430 |     from
431 |       pg_class c, --lateral
432 |         pg_stat_get_dead_tuples(c.oid) n_dead_tup
433 |     where
434 |           coalesce(c.oid != any (_except), true)
435 |       and n_dead_tup >
436 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_threshold'), current_setting('autovacuum_vacuum_threshold')::int4)+
437 |           reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_scale_factor'), current_setting('autovacuum_vacuum_scale_factor')::float4)
438 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
439 |     ;
440 |     return;
441 | end;
442 | $fnc$;
443 | 
444 | create or replace function get_autovacuum_analyze_info
445 | (
446 |       _except regclass[] default array['pg_catalog.pg_statistic'],
447 |   out queue_depth int8,
448 |   out total_mod_since_analyze int8
449 | )
450 |   returns record
451 |   language plpgsql stable as
452 | $fnc$
453 | begin
454 |   select
455 |       count(*),
456 |       sum(n_mod_since_analyze)
457 |       into queue_depth, total_mod_since_analyze
458 |     from
459 |       pg_class c,
460 |         pg_stat_get_mod_since_analyze(c.oid) n_mod_since_analyze
461 |     where
462 |           c.relnamespace != 'pg_toast'::regnamespace
463 |       and coalesce(c.oid != any (_except), true)
464 |       and n_mod_since_analyze >
465 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_threshold'), current_setting('autovacuum_analyze_threshold')::int4)+
466 |             reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_scale_factor'), current_setting('autovacuum_analyze_scale_factor')::float4)
467 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
468 |     ;
469 |     return;
470 | end;
471 | $fnc$;
472 | 
473 | create or replace function @extschema@.vacuum_progress()
474 |   returns table
475 |   (
476 |     datname               name,
477 |     schemaname            name,
478 |     relname               name,
479 |     phase                 text,
480 |     heap_blks_total       int8,
481 |     heap_blks_scanned     int8,
482 |     heap_blks_vacuumed    int8,
483 |     index_vacuum_count    int8,
484 |     max_dead_tuples       int8,
485 |     num_dead_tuples       int8
486 |   )
487 |   language plpgsql stable as
488 | $fnc$
489 | begin
490 |   /* too lazy to check which version of pg the view was added in, so
491 |    * just check if its there and return null if its not
492 |    */
493 |   if exists (select from pg_class c where c.relnamespace='pg_catalog'::regnamespace and c.relname = 'pg_stat_progress_vacuum') then
494 |     return query
495 |       select
496 |           v.datname,
497 |           c.relnamespace::regnamespace::name as schemaname,
498 |           c.relname,
499 |           v.phase,
500 |           v.heap_blks_total,
501 |           v.heap_blks_scanned,
502 |           v.heap_blks_vacuumed,
503 |           v.index_vacuum_count,
504 |           v.max_dead_tuples,
505 |           v.num_dead_tuples
506 |         from pg_stat_progress_vacuum v
507 |           left join pg_class c on c.oid=v.relid;
508 |   end if;
509 | 
510 |   return;
511 | end;
512 | $fnc$;
513 | 
514 | create table @extschema@.long_running_query_rules
515 | (
516 |   priority                  int4,
517 |   application_name_ilike    text,
518 |   usename                   name,
519 |   state                     text,
520 |   alert_threshold           interval not null,
521 |   active_since              timestamptz,
522 |   active_until              timestamptz,
523 |   created_at                timestamptz default now(),
524 |   comment                   text
525 | );
526 | 
527 | insert into @extschema@.long_running_query_rules(priority, application_name_ilike, usename, state, alert_threshold)
528 |   values
529 |   (0, 'pg\_dump', null, null, interval'6 hours'), -- pg_dump 6 hours
530 |   (0, 'pg2ch', null, null, interval'3 hours'), -- pg2ch 3 hours
531 |   (100, null, null, 'idle in transaction', interval'5 minutes'), -- any idle transaction 5 minutes
532 |   (100, null, null, 'idle in transaction (aborted)', interval'5 minutes'), -- same as above, except one of the statements in the transaction caused an error
533 |   (1000, null, null, null, interval'1 hour'); -- anything else 1 hour
534 | 
535 | 
536 | create or replace view @extschema@.long_running_queries
537 | (
538 |     duration,
539 |     pid,
540 |     is_slave,
541 |     application_name,
542 |     username,
543 |     database,
544 |     backend_type,
545 |     client_addr,
546 |     state,
547 |     wait_event,
548 |     wait_event_type,
549 |     query
550 | ) as
551 | select
552 |         now() - coalesce(p.state_change, p.query_start) as duration,
553 |         p.pid,
554 |         pg_is_in_recovery() as is_slave,
555 |         p.application_name,
556 |         p.usename,
557 |         p.datname as database,
558 |         p.backend_type,
559 |         p.client_addr,
560 |         p.state,
561 |         p.wait_event,
562 |         p.wait_event_type,
563 |         p.query
564 |     from pg_catalog.pg_stat_activity p,
565 |         lateral
566 |         (
567 |             select
568 |                     alert_threshold,
569 |                     active_since,
570 |                     active_until
571 |                 from @extschema@.long_running_query_rules l
572 |                 where
573 |                         coalesce(p.application_name ilike l.application_name_ilike, true)
574 |                     and coalesce(p.usename = l.usename, true)
575 |                     and coalesce(p.state = l.state, true)
576 |                     and coalesce(now() >= l.active_since, true)
577 |                     and coalesce(now() <= l.active_until, true)
578 |                 order by priority asc
579 |                 limit 1
580 |         ) l
581 |     where
582 |             p.state != 'idle'
583 |         and backend_type = 'client backend'
584 |         and age(now(), query_start) > l.alert_threshold
585 |     order by duration desc;
586 | 
587 | 
588 | 
589 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.4b--1.5.sql:
--------------------------------------------------------------------------------
 1 | DROP VIEW statement_query_rows_time;
 2 | 
 3 | DO
 4 | $$
 5 | DECLARE
 6 |     total_time TEXT;
 7 | BEGIN
 8 |     /*
 9 |      * Starting with v1.8 pg_stat_statements defines two separate fields:
10 |      * `total_exec_time` and `total_plan_time` (see documentation for details)
11 |      * while older versions only have `total_time`.
12 |      */
13 |     IF EXISTS (
14 |         SELECT attname FROM pg_attribute
15 |         WHERE attrelid = 'pg_stat_statements'::regclass
16 |         AND attname = 'total_exec_time')
17 |     THEN
18 |         total_time = 'total_exec_time + total_plan_time';
19 |     ELSE
20 |         total_time = 'total_time';
21 |     END IF;
22 | 
23 |     EXECUTE format(
24 |         $query$
25 |             CREATE VIEW statement_query_rows_time AS
26 |             SELECT datname, queryid, query, sum(calls) as calls,
27 |                    sum(%s) as total_time, sum(rows) as rows
28 |             FROM pg_stat_statements
29 |             JOIN pg_database d ON d.oid = dbid
30 |             GROUP BY datname, queryid, query
31 |         $query$,
32 |         total_time);
33 | END
34 | $$ LANGUAGE plpgsql;
35 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.4b.sql:
--------------------------------------------------------------------------------
  1 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  2 | --\echo Use "CREATE EXTENSION pgtelemetry" to load this file. \quit
  3 | 
  4 | -- filter non-backend connections from pg_stat_activity
  5 | create view client_stat_activity as select * from pg_stat_activity where backend_type = 'client backend';
  6 | 
  7 | -- disk space
  8 | CREATE VIEW relation_total_size AS
  9 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 10 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 11 |        pg_relation_size(c.oid) as exclusive_bytes,
 12 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 13 |   from pg_class c
 14 |   join pg_namespace n ON c.relnamespace = n.oid
 15 |  WHERE relkind = 'r'
 16 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 17 | 
 18 | COMMENT ON VIEW relation_total_size IS
 19 | $$
 20 | This view provides basic information on relation size.  Catalogs and tables
 21 | in the information schema are exclused, as are TOAST tables.
 22 | 
 23 | The inclusive metrics show the relation along with indexes and TOAST.  The
 24 | exclusiove metrics show without these things.  The bytes metrics are intended
 25 | for graph drawing, while the sizes are there for administrators who want to
 26 | quickly query this information and make decisions.
 27 | $$;
 28 | 
 29 | CREATE VIEW catalog_total_size AS
 30 | select c.oid, c.oid::regclass as relation, pg_total_relation_size(c.oid) as inclusive_bytes,
 31 |        pg_size_pretty(pg_total_relation_size(c.oid)) as inclusive_size,
 32 |        pg_relation_size(c.oid) as exclusive_bytes,
 33 |        pg_size_pretty(pg_relation_size(c.oid)) as exclusive_size
 34 |   from pg_class c
 35 |   join pg_namespace n ON c.relnamespace = n.oid
 36 |  WHERE relkind = 'r'
 37 |        and n.nspname in ('pg_catalog', 'information_schema');
 38 | 
 39 | COMMENT ON VIEW relation_total_size IS
 40 | $$
 41 | This view provides basic information on relation size in PostgreSQL system
 42 | tables (those in pg_catalog and information_schema).
 43 | 
 44 | The inclusive metrics show the relation along with indexes and TOAST.  The
 45 | exclusiove metrics show without these things.  The bytes metrics are intended
 46 | for graph drawing, while the sizes are there for administrators who want to
 47 | quickly query this information and make decisions.
 48 | $$;
 49 | 
 50 | -- biggest indexes
 51 | 
 52 | CREATE VIEW index_size AS
 53 | select c.oid, c.oid::regclass as index,
 54 |        pg_relation_size(c.oid) as bytes,
 55 |        pg_size_pretty(pg_relation_size(c.oid)) as size
 56 |   from pg_class c
 57 |   join pg_namespace n ON c.relnamespace = n.oid
 58 |  WHERE relkind = 'i'
 59 |        and n.nspname not in ('pg_toast', 'pg_catalog', 'information_schema');
 60 | 
 61 | COMMENT ON VIEW index_size IS
 62 | $$
 63 | This table is most useful in tracking down questions of bloat, fill factor, and
 64 | performance of GIN indexes among other things.
 65 | $$;
 66 | 
 67 | -- Tables by size  (TOAST)
 68 | 
 69 | CREATE VIEW relation_toast_size AS
 70 | select c.oid, c.oid::regclass as relation,
 71 |        pg_relation_size(t.oid) as exclusive_bytes,
 72 |        pg_size_pretty(pg_relation_size(t.oid)) as exclusive_size
 73 |   from pg_class c
 74 |   join pg_class t ON t.relname = 'pg_toast_' || c.oid::text
 75 |   join pg_namespace n ON c.relnamespace = n.oid;
 76 | 
 77 | COMMENT ON VIEW relation_toast_size IS
 78 | $$
 79 | This measures the amount of space in a relation's TOAST tables.  These are
 80 | populated when data exceeds what can be reasonably stored inline in the main
 81 | heap pages.  You would expect to see this non-zero where you have large fields
 82 | being stored, particularly arrays of composite types.
 83 | 
 84 | Performance-wise moving data to TOAST improves sequential scans where the data
 85 | is not required (count(*) for example) at the cost of making the data that has
 86 | been moved far more expensive to retrieve and process.
 87 | $$;
 88 | 
 89 | -- tablespaces size
 90 | 
 91 | CREATE VIEW tablespace_size AS
 92 | select spcname as name, pg_tablespace_size(oid) as bytes,
 93 |        pg_size_pretty(pg_tablespace_size(oid)) as size
 94 |   from pg_tablespace;
 95 | 
 96 | COMMENT ON VIEW tablespace_size IS
 97 | $$
 98 | This provides database-cluster-wide statistics on disk usage by tablespace.
 99 | 
100 | Note that tablespaces and databases are orthogonal.  Typically if you are
101 | running out of disk space, you want to check this one first, then database_size
102 | and then the size of the relations in the largest database in that order.
103 | $$;
104 | 
105 | -- database size
106 | 
107 | CREATE VIEW database_size AS
108 | SELECT datname as name, pg_database_size(oid) as bytes,
109 |        pg_size_pretty(pg_database_size(oid)) as size
110 |   FROM pg_database;
111 | 
112 | comment on view database_size is
113 | $$
114 | This provides cluser-wide size statistics of databases.
115 | $$;
116 | 
117 | -- connections by application_name
118 | 
119 | CREATE VIEW connections_by_application AS
120 | select application_name, count(*)
121 |   from pg_stat_activity group by application_name;
122 | 
123 | comment on view connections_by_application is
124 | $$
125 | This gives you the number of connections (cluster-wide) by application name.
126 | 
127 | By default the application name is the program name that connected to the db.
128 | $$;
129 | 
130 | -- connections by state
131 | 
132 | CREATE VIEW connections_by_state AS
133 | select case when wait_event is null then state else 'waiting' end as state,
134 |        count(*)
135 |   from pg_stat_activity group by 1;
136 | 
137 | CREATE VIEW waiting_queries_reason_details AS
138 | select wait_event_type, wait_event, count(*) from pg_stat_activity
139 |  WHERE wait_event is not null
140 |  GROUP BY wait_event_type, wait_event;
141 | 
142 | 
143 | comment on view connections_by_state is
144 | $$
145 | This gives you the number of connections (cluster-wide) by state (active, idle,
146 | idle in transaction, etc).  If the query is active but is waiting on a lock or
147 | latch, we change this to 'waiting.'
148 | $$;
149 | 
150 | 
151 | -- connections by ip address source
152 | 
153 | CREATE VIEW connections_by_ip_source as
154 | SELECT client_addr,
155 |        count(*) as count,
156 |        count(*) filter(where state = 'active') as active_count,
157 |        count(*) filter(where state = 'idle in transaction' or state = 'idle in transaction (aborted)') as idle_in_transaction_count,
158 |        count(*) filter(where state = 'idle') as idle_count
159 |        from  @extschema@.client_stat_activity
160 |  GROUP BY client_addr;
161 | 
162 | comment on view connections_by_ip_source is
163 | $$
164 | This is a cluster-wide breakdown of connections by IP source.  Between this and
165 | the applicaiton_name it is a good indication of where server laod is coming from
166 | as well as porblems like connection handle leaks.
167 | $$;
168 | 
169 | -- table access stats
170 | -- longest-running active queries
171 | 
172 | CREATE VIEW longest_running_active_queries AS
173 | select application_name, state, wait_event_type, wait_event, query, pid,
174 |        client_addr,
175 |        age(now(), query_start) as running_for
176 |   from pg_stat_activity where state = 'active'
177 |  ORDER BY age(now(), query_start) desc;
178 | 
179 | comment on view longest_running_active_queries is
180 | $$
181 | This view is intended to be typically used by administrators in determining
182 | which queries to focus on.  However it can be used for reporting and alerting
183 | as well.
184 | $$;
185 | 
186 | -- waiting connections
187 | 
188 | CREATE VIEW waiting_connections_by_event_type AS
189 | select wait_event_type, count(*) from pg_stat_activity
190 |  WHERE wait_event is not null
191 |  GROUP BY wait_event_type;
192 | 
193 | comment on view waiting_connections_by_event_type is
194 | $$
195 | This view provides basic, cluster-global, statistics on why queries are waiting
196 | on other queries.
197 | $$;
198 | 
199 | -- locks by type
200 | 
201 | CREATE VIEW locks_by_type AS
202 | SELECT locktype, count(*) from pg_locks
203 |  GROUP BY locktype;
204 | 
205 | COMMENT ON VIEW locks_by_type is
206 | $$
207 | This view provides cluster-wide statistics on what sorts of locks are present.
208 | 
209 | These incude advisory locks, relation, tuple, transaction id, etc.  This can be
210 | helpful in determining where the locks are coming from.
211 | $$;
212 | 
213 | -- locks by mode
214 | CREATE VIEW locks_by_mode AS
215 | SELECT mode, count(*) from pg_locks
216 |  GROUP BY mode;
217 | 
218 | COMMENT ON view locks_by_mode is
219 | $$
220 | This view provides cluster-wide statistics on locks by lock mode (access share
221 | vs exclusive for example).  Combined with the locks_by_type view, this view
222 | provides a some opportunities to spot locking problems.
223 | $$;
224 | 
225 | -- count client backends waiting on a lock for more than given number of seconds
226 | create or replace function count_waiting_on_locks_more_than_seconds(int default 300)
227 | returns bigint
228 | as
229 | $$
230 | select count(1)
231 |  from pg_locks
232 |  join  @extschema@.client_stat_activity using(pid)
233 | where granted = 'f' and
234 |       extract('epoch' from now() - query_start) > $1;
235 | $$ language sql;
236 | 
237 | comment on function count_waiting_on_locks_more_than_seconds is
238 | $$
239 | This function provides the number of client backend processes waiting on a lock
240 | for more than given number of seconds (5 minutes if not supplied). Can be used to spot
241 | locking conflicts.
242 | $$;
243 | 
244 | 
245 | CREATE VIEW tuple_access_stats AS
246 | select schemaname, relname,
247 |        seq_scan, seq_tup_read,
248 |        idx_scan, idx_tup_fetch,
249 |        n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup,
250 |        n_mod_since_analyze
251 |   FROM pg_stat_user_tables;
252 | 
253 | comment on view tuple_access_stats is
254 | $$
255 | This view provides statistcs for scans (index and sequential) along with
256 | numbers of tuples updated through various means.  It allows you to get a pretty
257 | good idea of where you may need indexes or where IO-related problems may be
258 | coming from.
259 | $$;
260 | 
261 | -- autovacuum stats
262 | CREATE VIEW autovacuum_stats AS
263 | select schemaname, relname,
264 |        last_vacuum,
265 |        extract (epoch from age(now(), last_vacuum)) as age_last_vacuum,
266 |        vacuum_count,
267 |        last_autovacuum,
268 |        extract (epoch from age(now(), last_autovacuum)) as age_last_autovacuum,
269 |        autovacuum_count,
270 |        last_analyze,
271 |        extract (epoch from age(now(), last_analyze)) as age_last_analyze,
272 |        analyze_count,
273 |        last_autoanalyze,
274 |        extract (epoch from age(now(), last_autoanalyze)) as age_last_autoanalyze,
275 |        autoanalyze_count
276 |   FROM pg_stat_user_tables;
277 | 
278 | comment on view autovacuum_stats is
279 | $$
280 | This provides basic metrics per table in the current database for when
281 | autovacuum and analyze were last run (as well as manual maintenance).
282 | $$;
283 | 
284 | -- query stats
285 | 
286 | -- call, time, rows
287 | 
288 | CREATE VIEW statement_query_rows_time AS
289 | SELECT datname, queryid, query, sum(calls) as calls,
290 |        sum(total_time) as total_time, sum(rows) as rows
291 |   FROM pg_stat_statements
292 |   JOIN pg_database d ON d.oid = dbid
293 |  GROUP BY datname, queryid, query;
294 | 
295 | comment on view statement_query_rows_time is
296 | $$
297 | This gives aggregated of stats for a given query (cluster-wide)
298 | per query and database name.  This view provides high level timing and row
299 | statistics.
300 | $$;
301 | 
302 | -- buffers
303 | CREATE VIEW statement_query_buffers AS
304 | SELECT datname, queryid, query, sum(calls),
305 |        sum(shared_blks_hit) as shared_blks_hit,
306 |        sum(shared_blks_read) as shared_blks_read,
307 |        sum(shared_blks_dirtied) as shared_blks_dirtied,
308 |        sum(shared_blks_written) as shared_blks_written,
309 |        sum(temp_blks_read) as tmp_blkd_read,
310 |        sum(temp_blks_written) as tmp_blkd_written
311 |   FROM pg_stat_statements
312 |   JOIN pg_database d ON d.oid = dbid
313 |  GROUP BY datname, queryid, query;
314 | 
315 | comment on view statement_query_buffers is
316 | $$
317 | This gives aggregated of stats for a given query (cluster-wide)
318 | per query and database name.  This view provides low-level IO statistics.
319 | $$;
320 | 
321 | -- WAL last state
322 | 
323 | CREATE TABLE pg_telemetry_wal_log (
324 |    run_time numeric unique, -- epoch timestamp
325 |    timestamp timestamp unique,
326 |    lsn pg_lsn
327 | );
328 | 
329 | COMMENT ON TABLE pg_telemetry_wal_log IS $$
330 | This table logs the times and results of wal telemetry readings so that
331 | deltas can be calculated.  At least one row must be present to get any useful
332 | data out of the wal_telemetry() function at all.
333 | 
334 | If you get one telemetry entry a minute, over the course of a year you will get
335 | just over half a million entries.  These are indexed on both epoch and timestamp
336 | so access is not impaired, but if you want ot purge, be careful to leave at
337 | least one entry at the end.
338 | 
339 | You can also process these as a time series using WINDOW functions like lag.
340 | $$;
341 | 
342 | -- WAL telemetry
343 | 
344 | create function wal_telemetry_create_or_select_record()
345 | returns pg_telemetry_wal_log language plpgsql as
346 | $$
347 | declare log_entry pg_telemetry_wal_log;
348 | begin
349 |     if pg_is_in_recovery() then
350 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
351 |     else
352 |        insert into pg_telemetry_wal_log
353 |        select extract('epoch' from now()), now(),
354 |                    pg_current_wal_lsn() as wal_location
355 |        returning * into log_entry;
356 |     end if;
357 |     return log_entry;
358 | end;
359 | $$;
360 | 
361 | create function wal_telemetry() returns table (
362 |    current_epoch numeric, last_epoch numeric, secs_elapsed numeric,
363 |    current_lsn pg_lsn, last_lsn pg_lsn, bytes_elapsed numeric,
364 |    bytes_per_sec numeric
365 | ) language sql as $$
366 |    select c.run_time as current_epoch, l.run_time as last_epoch,
367 |           c.run_time - l.run_time as secs_elapsed,
368 |           c.lsn as current_lsn, l.lsn as last_lsn,
369 |           c.lsn - l.lsn as bytes_elapsed,
370 |           (c.lsn - l.lsn)::numeric / (c.run_time - l.run_time) as bytes_per_sec
371 |      FROM wal_telemetry_create_or_select_record() c,
372 |   lateral (select * from pg_telemetry_wal_log where run_time < c.run_time
373 |             order by run_time desc limit 1) l;
374 | $$ set search_path from current;
375 | 
376 | select wal_telemetry();
377 | 
378 | comment on function wal_telemetry() is $$
379 | The wal_telemetry() function checks the current wal location and compares
380 | with the last entry in the pg_telemetry_wal_log.  It then provides for you
381 | both current and last data, and the differences between them.  These include
382 | bytes elapsed and seconds elapsed, and bytes per sec.
383 | 
384 | The function is designed so that you can export delta information to a monitoring
385 | solution such as munin or prometheus without the latter having to know anything
386 | about lsn representation or losing information in the process.
387 | 
388 | On a replica this does not write to the table and measures deltas from the last
389 | this was written on the master.
390 | $$;
391 | 
392 | -- Replication slots
393 | 
394 | CREATE OR REPLACE VIEW replication_slot_lag as
395 | SELECT slot_name, slot_type, active, restart_lsn, to_jsonb(s) as full_data,
396 |        now() as querytime, CASE WHEN pg_is_in_recovery()
397 |                                 THEN pg_last_wal_replay_lsn()
398 |                                 ELSE pg_current_wal_lsn() END
399 |                            AS pg_current_xlog_location,
400 |        CASE WHEN pg_is_in_recovery() THEN null::int
401 |             ELSE pg_current_wal_lsn() - restart_lsn END
402 |        AS current_lag_bytes
403 |   FROM pg_replication_slots s
404 |  ORDER BY s.slot_name;
405 | 
406 | COMMENT ON VIEW replication_slot_lag IS
407 | $$
408 | This view monitors lag on downstream slots.  It compares the last sent wal
409 | segment to the current known wal location.
410 | 
411 | For master database, the current wal location is self-explanatory.  For replicas
412 | we use the last received WAL location instead.  Note that replicas can have
413 | replication slots for downstream replication tracking.
414 | $$;
415 | 
416 | create or replace function get_autovacuum_vacuum_info
417 | (
418 |       _except regclass[] default null,
419 |   out queue_depth int8,
420 |   out total_dead_tup int8
421 | )
422 |   returns record
423 |   language plpgsql stable as
424 | $fnc$
425 | begin
426 |   select
427 |       count(*),
428 |       sum(n_dead_tup)
429 |       into queue_depth, total_dead_tup
430 |     from
431 |       pg_class c, --lateral
432 |         pg_stat_get_dead_tuples(c.oid) n_dead_tup
433 |     where
434 |           coalesce(c.oid != any (_except), true)
435 |       and n_dead_tup >
436 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_threshold'), current_setting('autovacuum_vacuum_threshold')::int4)+
437 |           reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_vacuum_scale_factor'), current_setting('autovacuum_vacuum_scale_factor')::float4)
438 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
439 |     ;
440 |     return;
441 | end;
442 | $fnc$;
443 | 
444 | create or replace function get_autovacuum_analyze_info
445 | (
446 |       _except regclass[] default array['pg_catalog.pg_statistic'],
447 |   out queue_depth int8,
448 |   out total_mod_since_analyze int8
449 | )
450 |   returns record
451 |   language plpgsql stable as
452 | $fnc$
453 | begin
454 |   select
455 |       count(*),
456 |       sum(n_mod_since_analyze)
457 |       into queue_depth, total_mod_since_analyze
458 |     from
459 |       pg_class c,
460 |         pg_stat_get_mod_since_analyze(c.oid) n_mod_since_analyze
461 |     where
462 |           c.relnamespace != 'pg_toast'::regnamespace
463 |       and coalesce(c.oid != any (_except), true)
464 |       and n_mod_since_analyze >
465 |           coalesce((select option_value::int4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_threshold'), current_setting('autovacuum_analyze_threshold')::int4)+
466 |             reltuples*coalesce((select option_value::float4 from pg_options_to_table(reloptions) where option_name = 'autovacuum_analyze_scale_factor'), current_setting('autovacuum_analyze_scale_factor')::float4)
467 |       and not exists (select from pg_options_to_table(reloptions) where option_name = 'autovacuum_enabled' and option_value::bool = false)
468 |     ;
469 |     return;
470 | end;
471 | $fnc$;
472 | 
473 | create or replace function @extschema@.vacuum_progress()
474 |   returns table
475 |   (
476 |     datname               name,
477 |     schemaname            name,
478 |     relname               name,
479 |     phase                 text,
480 |     heap_blks_total       int8,
481 |     heap_blks_scanned     int8,
482 |     heap_blks_vacuumed    int8,
483 |     index_vacuum_count    int8,
484 |     max_dead_tuples       int8,
485 |     num_dead_tuples       int8
486 |   )
487 |   language plpgsql stable as
488 | $fnc$
489 | begin
490 |   /* too lazy to check which version of pg the view was added in, so
491 |    * just check if its there and return null if its not
492 |    */
493 |   if exists (select from pg_class c where c.relnamespace='pg_catalog'::regnamespace and c.relname = 'pg_stat_progress_vacuum') then
494 |     return query
495 |       select
496 |           v.datname,
497 |           c.relnamespace::regnamespace::name as schemaname,
498 |           c.relname,
499 |           v.phase,
500 |           v.heap_blks_total,
501 |           v.heap_blks_scanned,
502 |           v.heap_blks_vacuumed,
503 |           v.index_vacuum_count,
504 |           v.max_dead_tuples,
505 |           v.num_dead_tuples
506 |         from pg_stat_progress_vacuum v
507 |           left join pg_class c on c.oid=v.relid;
508 |   end if;
509 | 
510 |   return;
511 | end;
512 | $fnc$;
513 | 
514 | create table @extschema@.long_running_query_rules
515 | (
516 |   priority                  int4,
517 |   application_name_ilike    text,
518 |   usename                   name,
519 |   state                     text,
520 |   alert_threshold           interval not null,
521 |   active_since              timestamptz,
522 |   active_until              timestamptz,
523 |   created_at                timestamptz default now(),
524 |   comment                   text
525 | );
526 | 
527 | insert into @extschema@.long_running_query_rules(priority, application_name_ilike, usename, state, alert_threshold)
528 |   values
529 |   (0, 'pg\_dump%', null, null, interval'6 hours'), -- pg_dump 6 hours
530 |   (0, 'pg2ch', null, null, interval'3 hours'), -- pg2ch 3 hours
531 |   (100, null, null, 'idle in transaction', interval'5 minutes'), -- any idle transaction 5 minutes
532 |   (100, null, null, 'idle in transaction (aborted)', interval'5 minutes'), -- same as above, except one of the statements in the transaction caused an error
533 |   (1000, null, null, null, interval'1 hour'); -- anything else 1 hour
534 | 
535 | 
536 | create or replace view @extschema@.long_running_queries
537 | (
538 |     current_state_duration,
539 |     query_duration,
540 |     pid,
541 |     is_slave,
542 |     application_name,
543 |     username,
544 |     database,
545 |     backend_type,
546 |     client_addr,
547 |     state,
548 |     wait_event,
549 |     wait_event_type,
550 |     query
551 | ) as
552 | select
553 |         now() - p.state_change as current_state_duration,
554 |         now() - p.query_start as query_duration,
555 |         p.pid,
556 |         pg_is_in_recovery() as is_slave,
557 |         p.application_name,
558 |         p.usename,
559 |         p.datname as database,
560 |         p.backend_type,
561 |         p.client_addr,
562 |         p.state,
563 |         p.wait_event,
564 |         p.wait_event_type,
565 |         p.query
566 |     from pg_catalog.pg_stat_activity p,
567 |         lateral
568 |         (
569 |             select
570 |                     alert_threshold,
571 |                     active_since,
572 |                     active_until,
573 |                     state
574 |                 from @extschema@.long_running_query_rules l
575 |                 where
576 |                         coalesce(p.application_name ilike l.application_name_ilike, true)
577 |                     and coalesce(p.usename = l.usename, true)
578 |                     and coalesce(p.state = l.state, true)
579 |                     and coalesce(now() >= l.active_since, true)
580 |                     and coalesce(now() <= l.active_until, true)
581 |                 order by priority asc
582 |                 limit 1
583 |         ) l
584 |     where
585 |             p.state != 'idle'
586 |         and backend_type = 'client backend'
587 |         and ((l.state is NOT NULL and age(now(), state_change) > l.alert_threshold)
588 |           or (l.state is NULL and age(now(), query_start) > l.alert_threshold))
589 |     order by current_state_duration desc;
590 | 


--------------------------------------------------------------------------------
/extension/pgtelemetry--1.5--1.6.sql:
--------------------------------------------------------------------------------
 1 | -- WAL telemetry
 2 | 
 3 | create or replace function wal_telemetry_create_or_select_record()
 4 | returns pg_telemetry_wal_log language plpgsql as
 5 | $$
 6 | declare log_entry pg_telemetry_wal_log;
 7 | begin
 8 |     if pg_is_in_recovery() then
 9 |        select * into log_entry from pg_telemetry_wal_log order by run_time desc limit 1;
10 |     else
11 |        delete from pg_telemetry_wal_log where timestamp < (current_timestamp - interval '1 month');
12 |        insert into pg_telemetry_wal_log
13 |        select extract('epoch' from now()), now(),
14 |                    pg_current_wal_lsn() as wal_location
15 |        returning * into log_entry;
16 |     end if;
17 |     return log_entry;
18 | end;
19 | $$;
20 | 


--------------------------------------------------------------------------------
/extras/prometheus/queries-statements.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adjust/pg-telemetry/b0e1ccb8c5c6e3df5025ab6b402bfdbabe7458c4/extras/prometheus/queries-statements.yaml


--------------------------------------------------------------------------------
/extras/prometheus/queries-wal.yaml:
--------------------------------------------------------------------------------
 1 | wal_telemetry:
 2 |    query:
 3 |       "select now(), bytes_per_sec from pgtelemetry.wal_telemetry()"
 4 |    metrics:
 5 |       - now:
 6 |         usage: "LABEL"
 7 |         description: "Time of measurement"
 8 |       - bytes_per_sec: 
 9 |         usage: "GAUGE"
10 |         description: "Bytes per sec to WAL"
11 | 


--------------------------------------------------------------------------------
/extras/prometheus/queries.yaml:
--------------------------------------------------------------------------------
  1 | # A queries.yaml file for postgres_exporter to prometheus
  2 | 
  3 | relation_total_size:
  4 |    query:  "select relation, inclusive_bytes, exclusive_bytes from pgtelemetry.relation_total_size"
  5 |    metrics:
  6 |       - relation:
  7 |          usage: "LABEL"
  8 |          description:  "Table Name"
  9 |       - inclusive_bytes:
 10 |          usage: "GAUGE"
 11 |          description: "Size of table, including indexes and toast"
 12 |       - exclusive_bytes:
 13 |          usage: "GAUGE"
 14 |          description: "Size of table, excluding indexes and toast"
 15 | 
 16 | catalog_total_size:
 17 |    query:  "select relation, inclusive_bytes, exclusive_bytes from pgtelemetry.catalog_total_size"
 18 |    metrics:
 19 |       - relation:
 20 |          usage: "LABEL"
 21 |          description:  "Table Name"
 22 |       - inclusive_bytes:
 23 |          usage: "GAUGE"
 24 |          description: "Size of table, including indexes and toast"
 25 |       - exclusive_bytes:
 26 |          usage: "GAUGE"
 27 |          description: "Size of table, excluding indexes and toast"
 28 | 
 29 | toast_total_size:
 30 |    query:  "select relation, exclusive_bytes from pgtelemetry.relation_toast_size"
 31 |    metrics:
 32 |       - relation:
 33 |          usage: "LABEL"
 34 |          description:  "Table Name"
 35 |       - exclusive_bytes:
 36 |          usage: "GAUGE"
 37 |          description: "Size of table, excluding indexes and toast"
 38 | 
 39 | tablespace_total_size:
 40 |    query:  "select name, bytes from pgtelemetry.tablespace_size"
 41 |    metrics:
 42 |       - name:
 43 |          usage: "LABEL"
 44 |          description:  "Tablespace Name"
 45 |       - bytes:
 46 |          usage: "GAUGE"
 47 |          description: "Size of table, excluding indexes and toast"
 48 | 
 49 | database_total_size:
 50 |    query:  "select name, bytes from pgtelemetry.database_size"
 51 |    metrics:
 52 |       - name:
 53 |          usage: "LABEL"
 54 |          description:  "Database Name"
 55 |       - bytes:
 56 |          usage: "GAUGE"
 57 |          description: "Size of table, excluding indexes and toast"
 58 | 
 59 | connections_by_application:
 60 |    query: 'select application_name, "count" from pgtelemetry.connections_by_application'
 61 |    metrics:
 62 |        - application_name:
 63 |           usage: "LABEL"
 64 |           description: "Application name of connections"
 65 |        - count:
 66 |           usage: "GAUGE"
 67 |           "Number of connections current"
 68 | 
 69 | connections_by_state:
 70 |    query: 'select state, "count" from pgtelemetry.connections_by_state'
 71 |    metrics:
 72 |        - state:
 73 |           usage: "LABEL"
 74 |           description: "Query state"
 75 |        - count:
 76 |           usage: "GAUGE"
 77 |           "Number of connections current"
 78 | 
 79 | connections_by_ip_source
 80 |    query: 'select client_addr, "count" from pgtelemetry.connections_by_ip_source'
 81 |    metrics:
 82 |        - client_addr:
 83 |           usage: "LABEL"
 84 |        - count:
 85 |           usage: "GAUGE"
 86 |           "Number of connections current"
 87 | 
 88 | waiting_connections_by_event_type:
 89 |    query: 'select wait_event_type, "count" from pgtelemetry.waiting_connections_by_event_type'
 90 |    metrics:
 91 |        - wait_event_type:
 92 |           usage: "LABEL"
 93 |           description: "General Reason for Wait"
 94 |        - count:
 95 |           usage: "GAUGE"
 96 |           "Number of waiting connections (cluster)"
 97 | 
 98 | locks_by_type:
 99 |    query: 'select locktype, "count" from pgtelemetry.locks_by_type'
100 |    metrics:
101 |        - locktype:
102 |           usage: "LABEL"
103 |           description: "Type of lock"
104 |        - count:
105 |           usage: "GAUGE"
106 |           description: "Number of locks (cluster)"
107 | 
108 | locks_by_mode:
109 |    query: 'select mode, "count" from pgtelemetry.locks_by_mode'
110 |    metrics:
111 |        - mode:
112 |           usage: "LABEL"
113 |           description: "Lock mode"
114 |        - count:
115 |           usage: "GAUGE"
116 |           description: "Number of locks (cluster)"
117 | 
118 | tuple_access_stats:
119 |     query: "SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze from pgtelemetry.tuple_access_stats"
120 |     metrics:
121 |        - schemaname:
122 |           usage: "LABEL"
123 |           description: "Schema of relation"
124 |        - relname:
125 |           usage: "LABEL"
126 |           description: "Name of relation"
127 |        - seq_scan:
128 |           usage: "COUNTER"
129 |           description: "Number of sequential scans"
130 |       - seq_tup_read:
131 |           usage: "COUNTER"
132 |           description: "Tuples fetched by sequential scan"
133 |       - idx_scan:
134 |           usage: "COUNTER"
135 |           description: "Number of index scans"
136 |       - idx_tup_fetch:
137 |           usage: "COUNTER"
138 |           description: "Number of tuples fetched by index scan"
139 |       - n_tup_ins:
140 |           usage: "COUNTER"
141 |           description: "Number of tuples inserted"
142 |       - n_tup_upd:
143 |           usage: "COUNTER"
144 |           description: "Number of tuples updated"
145 |       - n_tup_del:
146 |           usage: "COUNTER"
147 |           description: "Number of tuples deleted"
148 |       - n_tup_hot_upd:
149 |           usage: "COUNTER"
150 |           description: "Number of tuples HOT updated (i.e. no index update)"
151 |       - n_live_tup:
152 |           usage: "GAUGE"
153 |           description: "Estimated live tuples"
154 |       - n_dead_tup:
155 |           usage: "GAUGE"
156 |           description: "Estimated dead tuples"
157 |       - n_mod_since_analyze:
158 |           usage: "GAUGE"
159 |           description: "Estimated modified tuples since analyze"
160 | 
161 | autovacuum_stats:
162 |     query: "select schemaname, relname, age_last_vacuum, vacuum_count, age_last_autovacuum, autovacuum_count,  age_last_analyze, analyze_count, age_last_autoanalyze, autoanalyze_count from pgtelemetry.autovacuum_stats"
163 |     metrics:
164 |        - schemaname:
165 |           usage: "LABEL"
166 |           description: "Schema of relation"
167 |        - relname:
168 |           usage: "LABEL"
169 |           description: "Name of relation"
170 |        - age_last_vacuum:
171 |           usage: "GAUGE"
172 |           description: "Seconds since last vacuum"
173 |        - vacuum_count:
174 |           usage: "COUNTER"
175 |           description: "Number of vacuums of table"
176 |        - age_last_autovacuum:
177 |           usage: "GAUGE"
178 |           description: "Seconds since last autovacuum"
179 |        - autovacuum_count:
180 |           usage: "COUNTER"
181 |           description: "Number of autovacuums of table"
182 |        - age_last_analyze:
183 |           usage: "GAUGE"
184 |           description: "Seconds since last analyze"
185 |        - analyze_count:
186 |           usage: "COUNTER"
187 |           description: "Number of analyze runs of table"
188 |        - age_last_autoanalyze:
189 |           usage: "GAUGE"
190 |           description: "Seconds since last autoanalyze"
191 |        - autoanalyze_count:
192 |           usage: "COUNTER"
193 |           description: "Number of autoanalyze runs of table" 
194 | 
195 | replication_slot_lag:
196 |    query: "select slot_name, slot_type, lag_bytes from pgtelemetry.replication_slot_lag"
197 |    metrics:
198 |        - slot_name
199 |           usage: "LABEL"
200 |           description: "Name"
201 |        - slot_type
202 |           usage: "LABEL"
203 |           description: "Logical or Physical"
204 |        - lag_bytes
205 |           usage: "GAUGE"
206 |           description: "WAL bytes not guaranteed received"
207 | 
208 | 


--------------------------------------------------------------------------------
/extras/regression/postgresql.conf:
--------------------------------------------------------------------------------
1 | shared_preload_libraries = 'pg_stat_statements'
2 | 


--------------------------------------------------------------------------------
/extras/zabbix/checkNoLongRunningQueries.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use DBI;
 4 | use strict;
 5 | use warnings;
 6 | 
 7 | # BEGIN CONF
 8 | ## set up your limits here:
 9 | 
10 | my $max_duration = '900'; # in seconds
11 | 
12 | # set your host and db here.  Udeally use a .pgpass for authentication
13 | 
14 | my $host = 'localhost'; # hosts starting wtih / are paths to sockets
15 | my $port = 5432;
16 | my $db = 'postgres'; # doesn't matter since this is a db cluster global
17 | my $dbuser = 'postgres';
18 | 
19 | # END CONF
20 | 
21 | my $dbh = DBI->connect("dbi:Pg:host=$host port=$port dbname=$db", $dbuser) or die 'No db connection';
22 | 
23 | my $sth = $dbh->prepare('select extract(epoch from max(running_for)) as secs from pgtelemetry.longest_running_active_queries');
24 | $sth->execute();
25 | while (my $row = $sth->fetchrow_hashref('NAME_lc')){
26 |     if ($row->{secs} > $max_duration){
27 |        warn "Query running duration exceeded maximum duration of $max_duration";
28 |        exit 1;
29 |     }
30 | }
31 | exit 0;
32 |        
33 | 


--------------------------------------------------------------------------------
/extras/zabbix/checkReplicationLag.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use DBI;
 4 | use strict;
 5 | use warnings;
 6 | 
 7 | # BEGIN CONF
 8 | ## set up your limits hash here:
 9 | 
10 | my $max_lag_bytes = 1024 * 1024; #1MB
11 | 
12 | # set your host and db here.  Udeally use a .pgpass for authentication
13 | 
14 | my $host = 'localhost'; # hosts starting wtih / are paths to sockets
15 | my $port = 5432;
16 | my $db = 'postgres'; # doesn't matter since this is a db cluster global
17 | my $dbuser = 'postgres';
18 | 
19 | # END CONF
20 | 
21 | my $dbh = DBI->connect("dbi:Pg:host=$host port=$port dbname=$db", $dbuser) or die 'No db connection';
22 | 
23 | my $sth = $dbh->prepare('select max(current_lag_bytes) from pgtelemetry.replication_slot_lag');
24 | $sth->execute();
25 | while (my $lag = $sth->fetchrow_array){
26 |     if (defined $lag and $lag > $max_lag_bytes){
27 |         warn "Replication lag limit $max_lag_bytes bytes exceeded";
28 |         exit 1;
29 |     }
30 | }
31 | exit 0;
32 |        
33 | 


--------------------------------------------------------------------------------
/extras/zabbix/checkTablespaceSize.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use DBI;
 4 | use strict;
 5 | use warnings;
 6 | 
 7 | # BEGIN CONF
 8 | ## set up your limits hash here:
 9 | 
10 | my %tslimits = (
11 |    pg_default => 1024^4, # 1TB  
12 | );
13 | 
14 | # set your host and db here.  Udeally use a .pgpass for authentication
15 | 
16 | my $host = 'localhost'; # hosts starting wtih / are paths to sockets
17 | my $port = 5432;
18 | my $db = 'postgres'; # doesn't matter since this is a db cluster global
19 | my $dbuser = 'postgres';
20 | 
21 | # END CONF
22 | 
23 | my $dbh = DBI->connect("dbi:Pg:host=$host port=$port dbname=$db", $dbuser) or die 'No db connection';
24 | 
25 | my $sth = $dbh->prepare('select name, bytes from pgtelemetry.tablespace_size');
26 | $sth->execute();
27 | while (my $row = $sth->fetchrow_hashref('NAME_lc')){
28 |     if (exists $tslimits{$row->{name}} and $tslimits{$row->{name}} > $row->{bytes} ){
29 |         warn "Tablespace $row->{name} is above limit";
30 |         exit 1;
31 |     }
32 | }
33 | exit 0;
34 |        
35 | 


--------------------------------------------------------------------------------
/extras/zabbix/checkWaitingQueries.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use DBI;
 4 | use strict;
 5 | use warnings;
 6 | 
 7 | # BEGIN CONF
 8 | ## set up your limits here:
 9 | 
10 | my $max_waiting = 100;
11 | 
12 | # set your host and db here.  Udeally use a .pgpass for authentication
13 | 
14 | my $host = 'localhost'; # hosts starting wtih / are paths to sockets
15 | my $port = 5432;
16 | my $db = 'postgres'; # doesn't matter since this is a db cluster global
17 | my $dbuser = 'postgres';
18 | 
19 | # END CONF
20 | 
21 | my $dbh = DBI->connect("dbi:Pg:host=$host port=$port dbname=$db", $dbuser) or die 'No db connection';
22 | 
23 | my $sth = $dbh->prepare('select sum(count) from pgtelemetry.waiting_connections_by_event_type');
24 | $sth->execute();
25 | while (my ($count) = $sth->fetchrow_array){
26 |     if ( defined $count and $count > $max_waiting){
27 |         warn "Too many waiting queries";
28 |         exit 1;
29 |     }
30 | }
31 | exit 0;
32 |        
33 | 


--------------------------------------------------------------------------------
/pgtelemetry.control:
--------------------------------------------------------------------------------
1 | default_version = '1.6'
2 | directory = 'extension'
3 | requires = 'pg_stat_statements, plpgsql'
4 | relocatable = 'false'
5 | superuser = 'true'
6 | schema = 'pgtelemetry'
7 | 
8 | 


--------------------------------------------------------------------------------
/sql/definitions.sql:
--------------------------------------------------------------------------------
 1 | set client_min_messages to warning;
 2 | create extension pgtelemetry cascade;
 3 | select count(*) > 0 from pgtelemetry.database_size;
 4 | SELECT count(*) > 0 from pgtelemetry.relation_total_size;
 5 | SELECT count(*) > 0 from pgtelemetry.catalog_total_size;
 6 | SELECT count(*) > 0 from pgtelemetry.index_size;
 7 | SELECT count(*) > 0 from pgtelemetry.relation_toast_size;
 8 | SELECT count(*) > 0 from pgtelemetry.tablespace_size;
 9 | SELECT count(*) > 0 from pgtelemetry.database_size;
10 | SELECT count(*) > 0 from pgtelemetry.connections_by_application;
11 | SELECT count(*) > 0 from pgtelemetry.connections_by_state;
12 | SELECT count(*) > 0 from pgtelemetry.connections_by_ip_source;
13 | SELECT count(*) > 0 from pgtelemetry.longest_running_active_queries;
14 | SELECT count(*) > -1 from pgtelemetry.waiting_connections_by_event_type;
15 | SELECT count(*) > 0 from pgtelemetry.locks_by_type;
16 | SELECT count(*) > 0 from pgtelemetry.locks_by_mode;
17 | SELECT pgtelemetry.count_waiting_on_locks_more_than_seconds(300) >= 0;
18 | SELECT count(*) > 0 from pgtelemetry.tuple_access_stats;
19 | SELECT count(*) > 0 from pgtelemetry.autovacuum_stats;
20 | SELECT count(*) > 0 from pgtelemetry.statement_query_rows_time;
21 | SELECT count(*) > 0 from pgtelemetry.statement_query_buffers;
22 | SELECT count(*) > 0 from pgtelemetry.wal_telemetry();
23 | SELECT count(*) > 0 from pgtelemetry.replication_slot_lag;
24 | SELECT count(*) > 0 from pgtelemetry.get_autovacuum_vacuum_info();
25 | SELECT count(*) > 0 from pgtelemetry.get_autovacuum_analyze_info();
26 | SELECT count(*) > 0 from pgtelemetry.vacuum_progress();
27 | SELECT count(*) > 0 from pgtelemetry.long_running_queries;
28 | 


--------------------------------------------------------------------------------