├── .dockerignore ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── RELEASENOTE.md ├── config ├── 0000-doc.yml ├── 0110-pg.yml ├── 0120-pg_meta.yml ├── 0130-pg_setting.yml ├── 0210-pg_repl.yml ├── 0220-pg_sync_standby.yml ├── 0230-pg_downstream.yml ├── 0240-pg_slot.yml ├── 0250-pg_recv.yml ├── 0260-pg_sub.yml ├── 0270-pg_origin.yml ├── 0300-pg_io.yml ├── 0310-pg_size.yml ├── 0320-pg_archiver.yml ├── 0330-pg_bgwriter.yml ├── 0331-pg_checkpointer.yml ├── 0340-pg_ssl.yml ├── 0350-pg_checkpoint.yml ├── 0360-pg_recovery.yml ├── 0370-pg_slru.yml ├── 0380-pg_shmem.yml ├── 0390-pg_wal.yml ├── 0410-pg_activity.yml ├── 0420-pg_wait.yml ├── 0430-pg_backend.yml ├── 0440-pg_xact.yml ├── 0450-pg_lock.yml ├── 0460-pg_query.yml ├── 0510-pg_vacuuming.yml ├── 0520-pg_indexing.yml ├── 0530-pg_clustering.yml ├── 0540-pg_backup.yml ├── 0610-pg_db.yml ├── 0620-pg_db_confl.yml ├── 0640-pg_pubrel.yml ├── 0650-pg_subrel.yml ├── 0700-pg_table.yml ├── 0710-pg_index.yml ├── 0720-pg_func.yml ├── 0730-pg_seq.yml ├── 0740-pg_relkind.yml ├── 0750-pg_defpart.yml ├── 0810-pg_table_size.yml ├── 0820-pg_table_bloat.yml ├── 0830-pg_index_bloat.yml ├── 0910-pgbouncer_list.yml ├── 0920-pgbouncer_database.yml ├── 0930-pgbouncer_stat.yml ├── 0940-pgbouncer_pool.yml ├── 1000-pg_wait_event.yml ├── 1800-pg_tsdb_hypertable.yml ├── 1900-pg_citus.yml └── 2000-pg_heartbeat.yml ├── exporter ├── arg.go ├── collector.go ├── column.go ├── config.go ├── exporter.go ├── global.go ├── main.go ├── pgurl.go ├── query.go ├── server.go └── utils.go ├── go.mod ├── go.sum ├── logo.png ├── main.go ├── monitor ├── initdb.sh ├── pgrds-instance.json └── pgsql-exporter.json ├── package ├── nfpm-amd64-deb.yaml ├── nfpm-amd64-rpm.yaml ├── nfpm-arm64-deb.yaml ├── nfpm-arm64-rpm.yaml ├── pg_exporter.default ├── pg_exporter.service └── preinstall.sh └── pg_exporter.yml /.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !go.mod 3 | !go.sum 4 | !main.go 5 | !exporter 6 | !pg_exporter.yml 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # binary files 2 | pg_exporter 3 | 4 | # tmp files 5 | test/ 6 | deploy/ 7 | upload.sh 8 | temp/ 9 | dist/ 10 | .DS_Store 11 | 12 | # IDE files 13 | .vscode/ 14 | .idea/ 15 | .code/ 16 | pg_exporter.iml 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | FROM golang:1.24-alpine AS builder-env 3 | 4 | # Build a self-contained pg_exporter container with a clean environment and no 5 | # dependencies. 6 | # 7 | # build with 8 | # 9 | # docker buildx build -f Dockerfile --tag pg_exporter . 10 | # 11 | 12 | WORKDIR /build 13 | 14 | COPY go.mod go.sum ./ 15 | RUN \ 16 | --mount=type=cache,target=/go/pkg/mod \ 17 | --mount=type=cache,target=/root/.cache/go-build \ 18 | CGO_ENABLED=0 GOOS=linux go mod download 19 | 20 | COPY . /build 21 | RUN \ 22 | --mount=type=cache,target=/go/pkg/mod \ 23 | --mount=type=cache,target=/root/.cache/go-build \ 24 | CGO_ENABLED=0 GOOS=linux go build -a -o /pg_exporter . 25 | 26 | FROM scratch 27 | LABEL org.opencontainers.image.authors="Ruohang Feng , Craig Ringer " \ 28 | org.opencontainers.image.url="https://github.com/pgsty/pg_exporter" \ 29 | org.opencontainers.image.source="https://github.com/pgsty/pg_exporter" \ 30 | org.opencontainers.image.licenses="Apache-2.0" \ 31 | org.opencontainers.image.title="pg_exporter" \ 32 | org.opencontainers.image.description="PostgreSQL/Pgbouncer metrics exporter for Prometheus" 33 | 34 | WORKDIR /bin 35 | COPY --from=builder-env /pg_exporter /bin/pg_exporter 36 | COPY pg_exporter.yml /etc/pg_exporter.yml 37 | EXPOSE 9630/tcp 38 | ENTRYPOINT ["/bin/pg_exporter"] 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [2019-2025] [Ruohang Feng](rh@vonng.com) 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # File : Makefile 3 | # Mtime : 2025-04-21 4 | # License : Apache-2.0 @ https://github.com/pgsty/pg_exporter 5 | # Copyright : 2018-2025 Ruohang Feng / Vonng (rh@vonng.com) 6 | #==============================================================# 7 | VERSION ?= v1.0.0 8 | BUILD_DATE := $(shell date '+%Y%m%d%H%M%S') 9 | GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown") 10 | GIT_REVISION := $(shell git rev-parse --short HEAD 2>/dev/null || echo "HEAD") 11 | LDFLAGS_META := -X 'pg_exporter/exporter.Version=$(VERSION)' \ 12 | -X 'pg_exporter/exporter.Branch=$(GIT_BRANCH)' \ 13 | -X 'pg_exporter/exporter.Revision=$(GIT_REVISION)' \ 14 | -X 'pg_exporter/exporter.BuildDate=$(BUILD_DATE)' 15 | LDFLAGS_STATIC := -s -w -extldflags \"-static\" $(LDFLAGS_META) 16 | 17 | # Release Dir 18 | LINUX_AMD_DIR:=dist/$(VERSION)/pg_exporter-$(VERSION).linux-amd64 19 | LINUX_ARM_DIR:=dist/$(VERSION)/pg_exporter-$(VERSION).linux-arm64 20 | DARWIN_AMD_DIR:=dist/$(VERSION)/pg_exporter-$(VERSION).darwin-amd64 21 | DARWIN_ARM_DIR:=dist/$(VERSION)/pg_exporter-$(VERSION).darwin-arm64 22 | WINDOWS_DIR:=dist/$(VERSION)/pg_exporter-$(VERSION).windows-amd64 23 | 24 | 25 | ############################################################### 26 | # Shortcuts # 27 | ############################################################### 28 | build: 29 | go build -ldflags "$(LDFLAGS_META)" -o pg_exporter 30 | clean: 31 | rm -rf pg_exporter 32 | build-darwin-amd64: 33 | CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -a -ldflags "$(LDFLAGS_STATIC)" -o pg_exporter 34 | build-darwin-arm64: 35 | CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -a -ldflags "$(LDFLAGS_STATIC)" -o pg_exporter 36 | build-linux-amd64: 37 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -ldflags "$(LDFLAGS_STATIC)" -o pg_exporter 38 | build-linux-arm64: 39 | CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -a -ldflags "$(LDFLAGS_STATIC)" -o pg_exporter 40 | 41 | r: release 42 | release: release-linux release-darwin 43 | 44 | release-linux: linux-amd64 linux-arm64 45 | linux-amd64: clean build-linux-amd64 46 | rm -rf $(LINUX_AMD_DIR) && mkdir -p $(LINUX_AMD_DIR) 47 | nfpm package --packager rpm --config package/nfpm-amd64-rpm.yaml --target dist/$(VERSION) 48 | nfpm package --packager deb --config package/nfpm-amd64-deb.yaml --target dist/$(VERSION) 49 | cp -r pg_exporter $(LINUX_AMD_DIR)/pg_exporter 50 | cp -f pg_exporter.yml $(LINUX_AMD_DIR)/pg_exporter.yml 51 | cp -f LICENSE $(LINUX_AMD_DIR)/LICENSE 52 | tar -czf dist/$(VERSION)/pg_exporter-$(VERSION).linux-amd64.tar.gz -C dist/$(VERSION) pg_exporter-$(VERSION).linux-amd64 53 | rm -rf $(LINUX_AMD_DIR) 54 | 55 | linux-arm64: clean build-linux-arm64 56 | rm -rf $(LINUX_ARM_DIR) && mkdir -p $(LINUX_ARM_DIR) 57 | nfpm package --packager rpm --config package/nfpm-arm64-rpm.yaml --target dist/$(VERSION) 58 | nfpm package --packager deb --config package/nfpm-arm64-deb.yaml --target dist/$(VERSION) 59 | cp -r pg_exporter $(LINUX_ARM_DIR)/pg_exporter 60 | cp -f pg_exporter.yml $(LINUX_ARM_DIR)/pg_exporter.yml 61 | cp -f LICENSE $(LINUX_ARM_DIR)/LICENSE 62 | tar -czf dist/$(VERSION)/pg_exporter-$(VERSION).linux-arm64.tar.gz -C dist/$(VERSION) pg_exporter-$(VERSION).linux-arm64 63 | rm -rf $(LINUX_ARM_DIR) 64 | 65 | release-darwin: darwin-amd64 darwin-arm64 66 | darwin-amd64: clean build-darwin-amd64 67 | rm -rf $(DARWIN_AMD_DIR) && mkdir -p $(DARWIN_AMD_DIR) 68 | cp -r pg_exporter $(DARWIN_AMD_DIR)/pg_exporter 69 | cp -f pg_exporter.yml $(DARWIN_AMD_DIR)/pg_exporter.yml 70 | cp -f LICENSE $(DARWIN_AMD_DIR)/LICENSE 71 | tar -czf dist/$(VERSION)/pg_exporter-$(VERSION).darwin-amd64.tar.gz -C dist/$(VERSION) pg_exporter-$(VERSION).darwin-amd64 72 | rm -rf $(DARWIN_AMD_DIR) 73 | 74 | darwin-arm64: clean build-darwin-arm64 75 | rm -rf $(DARWIN_ARM_DIR) && mkdir -p $(DARWIN_ARM_DIR) 76 | cp -r pg_exporter $(DARWIN_ARM_DIR)/pg_exporter 77 | cp -f pg_exporter.yml $(DARWIN_ARM_DIR)/pg_exporter.yml 78 | cp -f LICENSE $(DARWIN_ARM_DIR)/LICENSE 79 | tar -czf dist/$(VERSION)/pg_exporter-$(VERSION).darwin-arm64.tar.gz -C dist/$(VERSION) pg_exporter-$(VERSION).darwin-arm64 80 | rm -rf $(DARWIN_ARM_DIR) 81 | 82 | 83 | 84 | ############################################################### 85 | # Configuration # 86 | ############################################################### 87 | # generate merged config from separated configuration 88 | conf: 89 | rm -rf pg_exporter.yml 90 | cat config/*.yml >> pg_exporter.yml 91 | 92 | 93 | ############################################################### 94 | # Release # 95 | ############################################################### 96 | release-dir: 97 | mkdir -p dist/$(VERSION) 98 | 99 | release-clean: 100 | rm -rf dist/$(VERSION) 101 | 102 | # build docker image 103 | docker: docker-build 104 | docker-build: 105 | docker build -t pgsty/pg_exporter . 106 | docker image tag pgsty/pg_exporter pgsty/pg_exporter:$(VERSION) 107 | docker image tag pgsty/pg_exporter pgsty/pg_exporter:latest 108 | docker-push: 109 | docker image push --all-tags pgsty/pg_exporter 110 | 111 | ############################################################### 112 | # Develop # 113 | ############################################################### 114 | install: build 115 | sudo install -m 0755 pg_exporter /usr/bin/pg_exporter 116 | 117 | uninstall: 118 | sudo rm -rf /usr/bin/pg_exporter 119 | 120 | runb: 121 | ./pg_exporter --log.level=info --config=pg_exporter.yml --auto-discovery 122 | run: 123 | go run main.go --log.level=info --config=pg_exporter.yml --auto-discovery 124 | 125 | debug: 126 | go run main.go --log.level=debug --config=pg_exporter.yml --auto-discovery 127 | 128 | curl: 129 | curl localhost:9630/metrics | grep -v '#' | grep pg_ 130 | 131 | upload: 132 | ./upload.sh 133 | 134 | .PHONY: build clean build-darwin build-linux\ 135 | release release-darwin release-linux release-windows docker docker-build docker-push \ 136 | install uninstall debug curl upload 137 | -------------------------------------------------------------------------------- /RELEASENOTE.md: -------------------------------------------------------------------------------- 1 | # Release Note 2 | 3 | 4 | ------ 5 | 6 | ## 1.0.0 7 | 8 | Add PostgreSQL 18 metrics support 9 | 10 | - new collector branch `pg_wal_18`: 11 | - remove `write`, `sync`, `write_time`, `sync_time` metrics 12 | - move to `pg_stat_io` 13 | - new collector branch `pg_checkpointer_18`: 14 | - new metric `num_done` 15 | - new metric `slru_written` 16 | - new collector branch `pg_db_18`: 17 | - new metric `parallel_workers_to_launch` 18 | - new metric `parallel_workers_launched` 19 | - new collector branch `pg_table_18`: 20 | - `table_parallel_workers_to_launch` 21 | - `table_parallel_workers_launched` 22 | - new collector branch `pg_io_18`: 23 | - new series about WAL statistics 24 | - new metric `read_bytes` 25 | - new metric `write_bytes` 26 | - new metric `extend_bytes` 27 | - remove `op_bytes` due to fixed value 28 | - new collector branch `pg_vacuuming_18` 29 | - new metric `delay_time` 30 | 31 | https://github.com/pgsty/pg_exporter/releases/tag/v1.0.0 32 | 33 | 34 | ------ 35 | 36 | ## 0.9.0 37 | 38 | **Default Collectors** 39 | 40 | * new metrics collector for `timescaledb` hypertable 41 | * new metrics collector for `citus` dist node 42 | * new metrics collector for `pg_wait_sampling` wait event profile 43 | * `pg_slot` overhaul: Add 16/17 pg_replication_slot metrics 44 | * allow `pg_slot` collector run on replica since 16/17 45 | * refactor `pg_wait` collector to agg from all processes 46 | * restrict pg_clustering, pg_indexing, pg_vacuuming run on primary 47 | * mark all `reset_time` as `GAUGE` rather than `COUNTER` 48 | * fix `pg_recovery_prefetch_skip_fpw` type from `GAUGE` to `COUNTER` 49 | * fix `pg_recv.state` type from `LABEL` to `GAUGE` 50 | * Format collector in compact mode 51 | * new default metric `pg_exporter_build_info` / `pgbouncer_exporter_build_info` 52 | * add `server_encoding` to `pg_meta` collector 53 | * add 12 new setting metrics to `pg_setting` collector 54 | - wal_block_size 55 | - segment_size 56 | - wal_segment_size 57 | - wal_level 58 | - wal_log_hints 59 | - work_mem 60 | - hugepage_count 61 | - hugepage_status 62 | - max_wal_size 63 | - min_wal_size 64 | - max_slot_wal_keep_size 65 | 66 | **Exporter Codebase** 67 | 68 | * normalize collector branch name with min pg ver suffix 69 | * Add license file to binary packages 70 | * move `pgsty/pg_exporter` repo to `pgsty/pg_exporter` 71 | * refactor `server.go` to reduce `Compatible` and `PostgresPrecheck` complexity 72 | * rename metrics collector with extra number prefix for better sorting 73 | * bump dependencies to the latest version 74 | * execute fatal collectors ahead of all non-fatal collectors, and fail fast 75 | 76 | https://github.com/pgsty/pg_exporter/releases/tag/v0.9.0 77 | 78 | 79 | 80 | ------ 81 | 82 | ## 0.8.1 83 | 84 | * Bump dependencies to the latest version 85 | * [Bump golang.org/x/net from 0.35.0 to 0.36.0 #67](https://github.com/pgsty/pg_exporter/pull/67) 86 | * Update docker images building tags 87 | 88 | https://github.com/pgsty/pg_exporter/releases/tag/0.9.0 89 | 90 | 91 | 92 | 93 | ------ 94 | 95 | ## 0.8.0 96 | 97 | * add pgBouncer 1.24 new metrics support (stat, pool, database) 98 | * fix: `310-pg_size.yml` fails if log dir not set properly https://github.com/pgsty/pg_exporter/issues/64 by [@Süleyman Vurucu](https://github.com/suikast42) 99 | * Build with the latest go 1.24 and bump all the dependencies 100 | * Refactor logging with the standard `log/slog` instead of `go-kit` 101 | * Full Changelog**: https://github.com/pgsty/pg_exporter/compare/v0.7.1...v0.8.0 102 | 103 | https://github.com/pgsty/pg_exporter/releases/tag/v0.8.0 104 | 105 | 106 | 107 | ------ 108 | 109 | ## 0.7.1 110 | 111 | Routine update with dependabot 112 | 113 | * feat: support specifying configuration as Reader by @ringerc in https://github.com/pgsty/pg_exporter/pull/62 114 | * Bump golang.org/x/crypto from 0.21.0 to 0.31.0 by @dependabot in https://github.com/pgsty/pg_exporter/pull/63 115 | * Fix some typos 116 | * Full Changelog**: https://github.com/pgsty/pg_exporter/compare/v0.7.0...v0.7.1 117 | 118 | https://github.com/pgsty/pg_exporter/releases/tag/v0.7.1 119 | 120 | 121 | 122 | ------ 123 | 124 | ## 0.7.0 125 | 126 | Refactor codebase for the latest go version. 127 | 128 | - [PostgreSQL 17 Metrics Support](https://github.com/pgsty/pg_exporter/issues/53) by @Vonng 129 | - [pg_exporter: predicate queries feature](https://github.com/pgsty/pg_exporter/pull/47) by [@ringerc](https://github.com/ringerc) 130 | - [Do a clean build in the dockerfile](https://github.com/pgsty/pg_exporter/pull/54) by [@ringerc](https://github.com/ringerc) by [@ringerc](https://github.com/ringerc) 131 | - [pg_exporter: don't panic after "bind: address already in use"](https://github.com/pgsty/pg_exporter/pull/46) by [@ringerc](https://github.com/ringerc) 132 | - [pg_exporter: fix /stat endpoint formatting](https://github.com/pgsty/pg_exporter/pull/48) by [@ringerc](https://github.com/ringerc) 133 | - [pg_exporter: omit default query properties on yaml export](https://github.com/pgsty/pg_exporter/pull/49) by [@ringerc](https://github.com/ringerc) 134 | - [Exclude template DBs from discovery and schema-qualify discovery query](https://github.com/pgsty/pg_exporter/pull/50) by [@ringerc](https://github.com/ringerc) 135 | - [Fix some typos and some metric description mistakes](https://github.com/pgsty/pg_exporter/pull/51) by [@ringerc](https://github.com/ringerc) 136 | - [Switch from unmaintained lib/pq driver to pgx with stdlib wrapper](https://github.com/pgsty/pg_exporter/pull/52) by [@ringerc](https://github.com/ringerc) 137 | 138 | https://github.com/pgsty/pg_exporter/releases/tag/v0.7.0 139 | 140 | 141 | ------ 142 | 143 | ## 0.6.0 144 | 145 | - Security Enhancement: Fix [security](https://github.com/pgsty/pg_exporter/security/dependabot?q=is%3Aclosed) 146 | dependent-bot issue 147 | - Add pg16 collectors 148 | - Add `arm64` & `aarch64` packages 149 | - Remove the `monitor` schema requirement for `pg_query` collectors (you have to ensure it with search_path or just 150 | install `pg_stat_statements` in the default `public` schema) 151 | - Fix pgbouncer version parsing message level from info to debug 152 | - Fix `pg_table_10_12` collector missing `relid` issue. 153 | 154 | - [Recognize the files with yml suffix in config directory](https://github.com/pgsty/pg_exporter/pull/34) by [@Japin Li](https://github.com/japinli) 155 | - [Support PostgreSQL 15 and higher](https://github.com/pgsty/pg_exporter/pull/35) by [@Japin Li](https://github.com/japinli) 156 | - [Fix connect-timeout propagation](https://github.com/pgsty/pg_exporter/pull/37/files) by [@mouchar](https://github.com/mouchar) 157 | 158 | https://github.com/pgsty/pg_exporter/releases/tag/v0.6.0 159 | 160 | 161 | 162 | ------ 163 | 164 | ## 0.5.0 165 | 166 | **Exporter Enhancement** 167 | 168 | - Build rpm & deb with `nfpm` 169 | - Add `column.default`, replace when metric value is NULL 170 | - Add `column.scale`, multiply scale factor when metric value is float/int (e.g µs to second) 171 | - Fix `/stat` endpoint output 172 | - Add docker container [`pgsty/pg_exporter`](https://hub.docker.com/r/pgsty/pg_exporter) 173 | 174 | **Metrics Collector** 175 | 176 | - scale bgwriter & pg_wal time unit to second 177 | - remove pg_class collector and move it to pg_table & pg_inex 178 | - add pg_class metrics to pg_table 179 | - add pg_class metrics to pg_index 180 | - enable pg_table_size by default 181 | - scale pg_query pg_db pg_bgwriter pg_ssl pgbouncer_stat time metrics to second 182 | 183 | https://github.com/pgsty/pg_exporter/releases/tag/v0.5.0 184 | 185 | 186 | 187 | 188 | 189 | 190 | ------ 191 | 192 | ## 0.4.1 193 | 194 | - update default collectors 195 | - omit citus & timescaledb schemas on object monitoring 196 | - avoid duplicate pg_statio tuples 197 | - support pgbouncer v1.16 198 | - bug fix: `pg_repl` collector overlap on pg 12 199 | - new parameter: `-T` `connect-timeout` `PG_EXPORTER_CONNECT_TIMEOUT` 200 | this can be useful when monitoring remote Postgres instances. 201 | - now `pg_exporter.yaml` are renamed as `pg_exporter.yml` in rpm package. 202 | 203 | https://github.com/pgsty/pg_exporter/releases/tag/v0.4.1 204 | 205 | 206 | 207 | 208 | 209 | 210 | ------ 211 | 212 | ## 0.4.0 213 | 214 | - Add PG 14 support 215 | - Default metrics configuration overhaul. (BUT you can still use the old configuration) 216 | - add `auto-discovery` , `include-database` and `exclude-database` option 217 | - Add multiple database monitoring implementations (with `auto-discovery = on`) 218 | 219 | https://github.com/pgsty/pg_exporter/releases/tag/v0.4.0 220 | 221 | 222 | 223 | 224 | 225 | ------ 226 | 227 | ## 0.3.2 228 | 229 | - fix shadow DSN corner case 230 | - fix typo & docs 231 | 232 | https://github.com/pgsty/pg_exporter/releases/tag/v0.3.2 233 | 234 | 235 | 236 | 237 | 238 | ------ 239 | 240 | ## 0.3.1 241 | 242 | fix default configuration problems (especially for versions lower than 13) 243 | 244 | - setting `primary_conninfo` not exists until PG13 245 | - add `funcid` label to `pg_func` collector to avoid func name duplicate label 246 | - fix version string to `pg_exporter` 247 | 248 | https://github.com/pgsty/pg_exporter/releases/tag/v0.3.1 249 | 250 | 251 | 252 | 253 | 254 | 255 | ------ 256 | 257 | ## 0.3.0 258 | 259 | https://github.com/pgsty/pg_exporter/releases/tag/v0.3.0 260 | 261 | - Change default configuration, Support PostgreSQL 13 new metrics (`pg_slru`, `pg_shmem`, `pg_query13`,`pg_backup`, 262 | etc...) 263 | - Add a series of new REST APIs for health / recovery status check 264 | - Add a dummy server with fake `pg_up 0` metric, which serves before PgExporter is initialized. 265 | - Add `sslmode=disable` to URL if `sslmode` is not given 266 | - fix typos and bugs 267 | 268 | ------ 269 | 270 | ## 0.2.0 271 | 272 | - add yum package and linux service definition 273 | - add a 'skip' flag into query config 274 | - fix `pgbouncer_up` metrics 275 | - add conf reload support 276 | 277 | https://github.com/pgsty/pg_exporter/releases/tag/v0.2.0 278 | 279 | 280 | 281 | 282 | 283 | 284 | ------ 285 | 286 | ## 0.1.2 287 | 288 | * fix pgbouncer_up metrics 289 | * add dynamic configuration reload 290 | * remove 'shard' related logic 291 | * add a 'bulky' mode to default settings 292 | 293 | https://github.com/pgsty/pg_exporter/releases/tag/v0.1.2 294 | 295 | 296 | 297 | 298 | ------ 299 | 300 | ## 0.1.1 301 | 302 | Fix the bug that pg_exporter will hang during start-up if any query is failed. 303 | 304 | https://github.com/pgsty/pg_exporter/releases/tag/v0.1.1 305 | 306 | 307 | 308 | ------ 309 | 310 | ## 0.1.0 311 | 312 | It works, looks good to me. 313 | 314 | https://github.com/pgsty/pg_exporter/releases/tag/v0.1.0 315 | 316 | 317 | ------ 318 | 319 | ## 0.0.4 320 | 321 | Tested in real world production environment with 200+ nodes for about 2 weeks. Looks good ! 322 | 323 | https://github.com/pgsty/pg_exporter/releases/tag/v0.0.4 324 | 325 | 326 | 327 | ------ 328 | 329 | ## 0.0.3 330 | 331 | v0.0.3 Release, Tested in Production Environment 332 | 333 | This version is already tested in a production environment. 334 | 335 | This project is still under rapid evolution, I would say if you want use it in production , try with caution. 336 | 337 | https://github.com/pgsty/pg_exporter/releases/tag/v0.0.3 338 | 339 | 340 | 341 | ------ 342 | 343 | ## 0.0.2 344 | 345 | It's ok to try now 346 | 347 | https://github.com/pgsty/pg_exporter/releases/tag/v0.0.2 348 | 349 | 350 | 351 | ------ 352 | 353 | ## 0.0.1 354 | 355 | Add pgbouncer mode 356 | 357 | https://github.com/pgsty/pg_exporter/releases/tag/v0.0.1 -------------------------------------------------------------------------------- /config/0110-pg.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0110 pg 3 | #==============================================================# 4 | pg_primary_only: 5 | name: pg 6 | desc: PostgreSQL basic information (on primary) 7 | query: |- 8 | SELECT 9 | extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp, 10 | extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime, 11 | extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time, 12 | pg_current_wal_lsn() - '0/0' AS lsn, 13 | pg_current_wal_insert_lsn() - '0/0' AS insert_lsn, 14 | pg_current_wal_lsn() - '0/0' AS write_lsn, 15 | pg_current_wal_flush_lsn() - '0/0' AS flush_lsn, 16 | NULL::BIGINT AS receive_lsn, 17 | NULL::BIGINT AS replay_lsn, 18 | extract(EPOCH FROM pg_conf_load_time()) AS reload_time, 19 | extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time, 20 | NULL::FLOAT AS last_replay_time, 21 | 0::FLOAT AS lag, 22 | pg_is_in_recovery() AS is_in_recovery, 23 | FALSE AS is_wal_replay_paused; 24 | tags: [ cluster, primary ] 25 | ttl: 1 26 | min_version: 100000 27 | fatal: true 28 | skip: false 29 | metrics: 30 | - timestamp: { usage: GAUGE ,description: current database timestamp in unix epoch } 31 | - uptime: { usage: GAUGE ,description: seconds since postmaster start } 32 | - boot_time: { usage: GAUGE ,description: postmaster boot timestamp in unix epoch } 33 | - lsn: { usage: COUNTER ,description: log sequence number, current write location } 34 | - insert_lsn: { usage: COUNTER ,description: primary only, location of current wal inserting } 35 | - write_lsn: { usage: COUNTER ,description: primary only, location of current wal writing } 36 | - flush_lsn: { usage: COUNTER ,description: primary only, location of current wal syncing } 37 | - receive_lsn: { usage: COUNTER ,description: replica only, location of wal synced to disk } 38 | - replay_lsn: { usage: COUNTER ,description: replica only, location of wal applied } 39 | - reload_time: { usage: GAUGE ,description: time when configuration was last reloaded } 40 | - conf_reload_time: { usage: GAUGE ,description: seconds since last configuration reload } 41 | - last_replay_time: { usage: GAUGE ,description: time when last transaction been replayed } 42 | - lag: { usage: GAUGE ,description: replica only, replication lag in seconds } 43 | - is_in_recovery: { usage: GAUGE ,description: 1 if in recovery mode } 44 | - is_wal_replay_paused: { usage: GAUGE ,description: 1 if wal play is paused } 45 | 46 | pg_replica_only: 47 | name: pg 48 | desc: PostgreSQL basic information (on replica) 49 | query: |- 50 | SELECT 51 | extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp, 52 | extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime, 53 | extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time, 54 | pg_last_wal_replay_lsn() - '0/0' AS lsn, 55 | NULL::BIGINT AS insert_lsn, 56 | NULL::BIGINT AS write_lsn, 57 | NULL::BIGINT AS flush_lsn, 58 | pg_last_wal_receive_lsn() - '0/0' AS receive_lsn, 59 | pg_last_wal_replay_lsn() - '0/0' AS replay_lsn, 60 | extract(EPOCH FROM pg_conf_load_time()) AS reload_time, 61 | extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time, 62 | extract(EPOCH FROM pg_last_xact_replay_timestamp()) AS last_replay_time, 63 | CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 64 | ELSE EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp()) END AS lag, 65 | pg_is_in_recovery() AS is_in_recovery, 66 | pg_is_wal_replay_paused() AS is_wal_replay_paused; 67 | 68 | tags: [ cluster, replica ] 69 | ttl: 1 70 | min_version: 100000 71 | fatal: true 72 | skip: false 73 | metrics: 74 | - timestamp: { usage: GAUGE ,description: database current timestamp } 75 | - uptime: { usage: GAUGE ,description: seconds since postmaster start } 76 | - boot_time: { usage: GAUGE ,description: unix timestamp when postmaster boot } 77 | - lsn: { usage: COUNTER ,description: log sequence number, current write location } 78 | - insert_lsn: { usage: COUNTER ,description: primary only, location of current wal inserting } 79 | - write_lsn: { usage: COUNTER ,description: primary only, location of current wal writing } 80 | - flush_lsn: { usage: COUNTER ,description: primary only, location of current wal syncing } 81 | - receive_lsn: { usage: COUNTER ,description: replica only, location of wal synced to disk } 82 | - replay_lsn: { usage: COUNTER ,description: replica only, location of wal applied } 83 | - reload_time: { usage: GAUGE ,description: time when configuration was last reloaded } 84 | - conf_reload_time: { usage: GAUGE ,description: seconds since last configuration reload } 85 | - last_replay_time: { usage: GAUGE ,description: time when last transaction been replayed } 86 | - lag: { usage: GAUGE ,description: replica only, replication lag in seconds } 87 | - is_in_recovery: { usage: GAUGE ,description: 1 if in recovery mode } 88 | - is_wal_replay_paused: { usage: GAUGE ,description: 1 if wal play paused } 89 | 90 | 91 | -------------------------------------------------------------------------------- /config/0120-pg_meta.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0120 pg_meta 3 | #==============================================================# 4 | pg_meta_13: 5 | name: pg_meta 6 | desc: PostgreSQL meta info for pg 13, with extra primary conninfo 7 | query: | 8 | SELECT 9 | (SELECT system_identifier FROM pg_control_system()) AS cluster_id, 10 | current_setting('cluster_name') AS cluster_name, 11 | current_setting('port') AS listen_port, 12 | current_setting('data_directory', true) AS data_dir, 13 | current_setting('config_file', true) AS conf_path, 14 | current_setting('hba_file', true) AS hba_path, 15 | current_setting('wal_level') AS wal_level, 16 | current_setting('server_encoding') AS encoding, 17 | current_setting('server_version') AS version, 18 | current_setting('server_version_num') AS ver_num, 19 | version() AS ver_str, 20 | current_setting('shared_preload_libraries', true) AS extensions, 21 | current_setting('primary_conninfo', true) AS primary_conninfo, 22 | 1 AS info 23 | ttl: 10 24 | min_version: 130000 25 | tags: [ cluster ] 26 | metrics: 27 | - cluster_id: { usage: LABEL ,description: cluster system identifier } 28 | - cluster_name: { usage: LABEL ,description: cluster name } 29 | - listen_port: { usage: LABEL ,description: listen port } 30 | - data_dir: { usage: LABEL ,description: path to data directory } 31 | - conf_path: { usage: LABEL ,description: path to postgresql.conf } 32 | - hba_path: { usage: LABEL ,description: path to pg_hba.conf } 33 | - wal_level: { usage: LABEL ,description: wal level } 34 | - encoding: { usage: LABEL ,description: server encoding } 35 | - version: { usage: LABEL ,description: server version in human-readable format } 36 | - ver_num: { usage: LABEL ,description: server version number in machine-readable format } 37 | - ver_str: { usage: LABEL ,description: complete version string } 38 | - extensions: { usage: LABEL ,description: server installed preload libraries } 39 | - primary_conninfo: { usage: LABEL ,description: connection string to upstream (do not set password here) } 40 | - info: { usage: GAUGE ,description: constant 1 } 41 | 42 | pg_meta_10: 43 | name: pg_meta 44 | desc: PostgreSQL meta info 45 | query: | 46 | SELECT 47 | (SELECT system_identifier FROM pg_control_system()) AS cluster_id, 48 | current_setting('cluster_name') AS cluster_name, 49 | current_setting('port') AS listen_port, 50 | current_setting('data_directory', true) AS data_dir, 51 | current_setting('config_file', true) AS conf_path, 52 | current_setting('hba_file', true) AS hba_path, 53 | current_setting('wal_level') AS wal_level, 54 | current_setting('server_encoding') AS encoding, 55 | current_setting('server_version') AS version, 56 | current_setting('server_version_num') AS ver_num, 57 | version() AS ver_str, 58 | current_setting('shared_preload_libraries', true) AS extensions, 59 | 'N/A' AS primary_conninfo, 60 | 1 AS info 61 | ttl: 10 62 | min_version: 090600 63 | max_version: 130000 64 | tags: [ cluster ] 65 | metrics: 66 | - cluster_id: { usage: LABEL ,description: cluster system identifier } 67 | - cluster_name: { usage: LABEL ,description: cluster name } 68 | - listen_port: { usage: LABEL ,description: listen port } 69 | - data_dir: { usage: LABEL ,description: path to data directory } 70 | - conf_path: { usage: LABEL ,description: path to postgresql.conf } 71 | - hba_path: { usage: LABEL ,description: path to pg_hba.conf } 72 | - wal_level: { usage: LABEL ,description: wal level } 73 | - encoding: { usage: LABEL ,description: server encoding } 74 | - version: { usage: LABEL ,description: server version in human-readable format } 75 | - ver_num: { usage: LABEL ,description: server version number in machine-readable format } 76 | - ver_str: { usage: LABEL ,description: complete version string } 77 | - extensions: { usage: LABEL ,description: server installed preload libraries } 78 | - primary_conninfo: { usage: LABEL ,description: connection string to upstream (do not set password here) } 79 | - info: { usage: GAUGE ,description: constant 1 } 80 | 81 | 82 | -------------------------------------------------------------------------------- /config/0130-pg_setting.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0130 pg_setting 3 | #==============================================================# 4 | pg_setting: 5 | name: pg_setting 6 | desc: Important postgres setting parameters 7 | query: | 8 | SELECT 9 | current_setting('max_connections') AS max_connections, 10 | current_setting('max_prepared_transactions') AS max_prepared_transactions, 11 | current_setting('max_locks_per_transaction') AS max_locks_per_transaction, 12 | current_setting('max_worker_processes') AS max_worker_processes, 13 | current_setting('max_replication_slots') AS max_replication_slots, 14 | current_setting('max_wal_senders') AS max_wal_senders, 15 | current_setting('block_size') AS block_size, 16 | current_setting('wal_block_size') AS wal_block_size, 17 | pg_size_bytes(current_setting('segment_size')) AS segment_size, 18 | pg_size_bytes(current_setting('wal_segment_size')) AS wal_segment_size, 19 | CASE current_setting('data_checksums') WHEN 'on' THEN 1 ELSE 0 END AS data_checksums, 20 | CASE current_setting('wal_log_hints') WHEN 'on' THEN 1 ELSE 0 END AS wal_log_hints, 21 | CASE current_setting('wal_level') WHEN 'logical' THEN 3 WHEN 'replica' THEN 2 WHEN 'minimal' THEN 1 ELSE 0 END AS wal_level, 22 | pg_size_bytes(current_setting('work_mem', true)) AS work_memory_size, 23 | pg_size_bytes(current_setting('shared_memory_size', true)) AS shared_memory_size, 24 | CASE current_setting('huge_pages_status', true) WHEN 'on' THEN 1 WHEN 'off' THEN 0 WHEN 'unknown' THEN -1 ELSE -2 END AS hugepage_status, 25 | current_setting('shared_memory_size_in_huge_pages', true) AS hugepage_count, 26 | pg_size_bytes(current_setting('min_wal_size')) AS min_wal_size, 27 | pg_size_bytes(current_setting('max_wal_size')) AS max_wal_size, 28 | pg_size_bytes(current_setting('max_slot_wal_keep_size', true)) AS max_slot_wal_keep_size, 29 | CASE current_setting('archive_mode') WHEN 'off' THEN 0 WHEN 'on' THEN 1 WHEN 'always' THEN 2 ELSE -1 END AS archive_mode; 30 | ttl: 10 31 | min_version: 090600 32 | tags: [ cluster ] 33 | metrics: 34 | - max_connections: { usage: GAUGE ,description: number of concurrent connections to the database server } 35 | - max_prepared_transactions: { usage: GAUGE ,description: maximum number of transactions that can be in the prepared state simultaneously } 36 | - max_locks_per_transaction: { usage: GAUGE ,description: no more than this many distinct objects can be locked at any one time } 37 | - max_worker_processes: { usage: GAUGE ,description: maximum number of background processes that the system can support } 38 | - max_replication_slots: { usage: GAUGE ,description: maximum number of replication slots } 39 | - max_wal_senders: { usage: GAUGE ,description: maximum number of concurrent connections from standby servers } 40 | - block_size: { usage: GAUGE ,description: pg page block size, 8192 by default } 41 | - wal_block_size: { usage: GAUGE ,description: block size in WAL files } 42 | - segment_size: { usage: GAUGE ,description: segment size for database files } 43 | - wal_segment_size: { usage: GAUGE ,description: segment size for WAL files } 44 | - data_checksums: { usage: GAUGE ,description: whether data checksum is enabled, 1 enabled 0 disabled } 45 | - wal_log_hints: { usage: GAUGE ,description: whether wal_log_hints is enabled, 1 enabled 0 disabled } 46 | - wal_level: { usage: GAUGE ,description: WAL level, 1=minimal, 2=replica, 3=logical } 47 | - work_memory_size: { usage: GAUGE ,description: size of work memory in bytes } 48 | - shared_memory_size: { usage: GAUGE ,description: size of shared memory segment in bytes } 49 | - hugepage_count: { usage: GAUGE ,description: Reports the number of huge pages that are needed for the main shared memory area } 50 | - hugepage_status: { usage: GAUGE ,description: huge pages configuration status, 0=off, 1=on, -1=unknown } 51 | - min_wal_size: { usage: GAUGE ,description: minimum size that the WAL will shrink to in bytes } 52 | - max_wal_size: { usage: GAUGE ,description: maximum size that the WAL will grow to in bytes } 53 | - max_slot_wal_keep_size: { usage: GAUGE ,description: maximum WAL size that replication slots are allowed to retain in bytes, -1 means no limit } 54 | - archive_mode: { usage: GAUGE ,description: archive mode status, 0=off, 1=on, 2=always, -1=other } 55 | 56 | 57 | -------------------------------------------------------------------------------- /config/0210-pg_repl.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0210 pg_repl 3 | #==============================================================# 4 | pg_repl_12: 5 | name: pg_repl 6 | desc: PostgreSQL replication stat metrics 12+ 7 | query: | 8 | SELECT application_name AS appname, usename, coalesce(client_addr::TEXT,'localhost') AS address, pid::TEXT, client_port, 9 | CASE state WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state, 10 | CASE sync_state WHEN 'async' THEN 0 WHEN 'potential' THEN 1 WHEN 'sync' THEN 2 WHEN 'quorum' THEN 3 ELSE -1 END AS sync_state, 11 | sync_priority, backend_xmin::TEXT::BIGINT AS backend_xmin, current.lsn - '0/0' AS lsn, 12 | current.lsn - sent_lsn AS sent_diff, current.lsn - write_lsn AS write_diff, current.lsn - flush_lsn AS flush_diff, current.lsn - replay_lsn AS replay_diff, 13 | sent_lsn - '0/0' AS sent_lsn, write_lsn - '0/0' AS write_lsn, flush_lsn - '0/0' AS flush_lsn, replay_lsn - '0/0' AS replay_lsn, 14 | coalesce(extract(EPOCH FROM write_lag), 0) AS write_lag, coalesce(extract(EPOCH FROM flush_lag), 0) AS flush_lag, coalesce(extract(EPOCH FROM replay_lag), 0) AS replay_lag, 15 | extract(EPOCH FROM current_timestamp) AS "time", extract(EPOCH FROM backend_start) AS launch_time, extract(EPOCH FROM reply_time) AS reply_time 16 | FROM pg_stat_replication, (SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END AS lsn) current; 17 | ttl: 10 18 | min_version: 120000 19 | tags: [ cluster ] 20 | metrics: 21 | - appname: { usage: LABEL ,description: Name of the application that is connected to this WAL sender } 22 | - usename: { usage: LABEL ,description: Name of the user logged into this WAL sender process } 23 | - address: { usage: LABEL ,description: IP address of the client connected to this WAL sender, localhost for unix socket } 24 | - pid: { usage: LABEL ,description: Process ID of the WAL sender process } 25 | - client_port: { usage: GAUGE ,description: TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used } 26 | - state: { usage: GAUGE ,description: Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping } 27 | - sync_state: { usage: GAUGE ,description: Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum } 28 | - sync_priority: { usage: GAUGE ,description: Priority of this standby server for being chosen as the synchronous standby } 29 | - backend_xmin: { usage: COUNTER ,description: This standby's xmin horizon reported by hot_standby_feedback. } 30 | - lsn: { usage: COUNTER ,description: Current log position on this server } 31 | - sent_diff: { usage: GAUGE ,description: Last log position sent to this standby server diff with current lsn } 32 | - write_diff: { usage: GAUGE ,description: Last log position written to disk by this standby server diff with current lsn } 33 | - flush_diff: { usage: GAUGE ,description: Last log position flushed to disk by this standby server diff with current lsn } 34 | - replay_diff: { usage: GAUGE ,description: Last log position replayed into the database on this standby server diff with current lsn } 35 | - sent_lsn: { usage: COUNTER ,description: Last write-ahead log location sent on this connection } 36 | - write_lsn: { usage: COUNTER ,description: Last write-ahead log location written to disk by this standby server } 37 | - flush_lsn: { usage: COUNTER ,description: Last write-ahead log location flushed to disk by this standby server } 38 | - replay_lsn: { usage: COUNTER ,description: Last write-ahead log location replayed into the database on this standby server } 39 | - write_lag: { usage: GAUGE ,description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it } 40 | - flush_lag: { usage: GAUGE ,description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it } 41 | - replay_lag: { usage: GAUGE ,description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it } 42 | - time: { usage: COUNTER ,description: Current timestamp in unix epoch } 43 | - launch_time: { usage: COUNTER ,description: Time when this process was started, i.e., when the client connected to this WAL sender } 44 | - reply_time: { usage: GAUGE ,description: Send time of last reply message received from standby server } 45 | 46 | pg_repl_10: 47 | name: pg_repl 48 | desc: PostgreSQL replication stat metrics v10 v11 49 | query: | 50 | SELECT application_name AS appname, usename, coalesce(client_addr::TEXT,'localhost') AS address, pid::TEXT, client_port, 51 | CASE state WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state, 52 | CASE sync_state WHEN 'async' THEN 0 WHEN 'potential' THEN 1 WHEN 'sync' THEN 2 WHEN 'quorum' THEN 3 ELSE -1 END AS sync_state, 53 | sync_priority, backend_xmin::TEXT::BIGINT AS backend_xmin, current.lsn - '0/0' AS lsn, 54 | current.lsn - sent_lsn AS sent_diff, current.lsn - write_lsn AS write_diff, current.lsn - flush_lsn AS flush_diff, current.lsn - replay_lsn AS replay_diff, 55 | sent_lsn - '0/0' AS sent_lsn, write_lsn - '0/0' AS write_lsn, flush_lsn - '0/0' AS flush_lsn, replay_lsn - '0/0' AS replay_lsn, 56 | coalesce(extract(EPOCH FROM write_lag), 0) AS write_lag, coalesce(extract(EPOCH FROM flush_lag), 0) AS flush_lag, coalesce(extract(EPOCH FROM replay_lag), 0) AS replay_lag, 57 | extract(EPOCH FROM current_timestamp) AS "time", extract(EPOCH FROM backend_start) AS launch_time 58 | FROM pg_stat_replication, (SELECT CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END AS lsn) current; 59 | ttl: 10 60 | min_version: 100000 61 | max_version: 120000 62 | tags: [ cluster ] 63 | metrics: 64 | - appname: { usage: LABEL ,description: Name of the application that is connected to this WAL sender } 65 | - usename: { usage: LABEL ,description: Name of the user logged into this WAL sender process } 66 | - address: { usage: LABEL ,description: IP address of the client connected to this WAL sender, localhost for unix socket } 67 | - pid: { usage: LABEL ,description: Process ID of the WAL sender process } 68 | - client_port: { usage: GAUGE ,description: TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used } 69 | - state: { usage: GAUGE ,description: Current WAL sender encoded state 0-4 for streaming|startup|catchup|backup|stopping } 70 | - sync_state: { usage: GAUGE ,description: Encoded synchronous state of this standby server, 0-3 for async|potential|sync|quorum } 71 | - sync_priority: { usage: GAUGE ,description: Priority of this standby server for being chosen as the synchronous standby } 72 | - backend_xmin: { usage: COUNTER ,description: This standby's xmin horizon reported by hot_standby_feedback. } 73 | - lsn: { usage: COUNTER ,description: Current log position on this server } 74 | - sent_diff: { usage: GAUGE ,description: Last log position sent to this standby server diff with current lsn } 75 | - write_diff: { usage: GAUGE ,description: Last log position written to disk by this standby server diff with current lsn } 76 | - flush_diff: { usage: GAUGE ,description: Last log position flushed to disk by this standby server diff with current lsn } 77 | - replay_diff: { usage: GAUGE ,description: Last log position replayed into the database on this standby server diff with current lsn } 78 | - sent_lsn: { usage: COUNTER ,description: Last write-ahead log location sent on this connection } 79 | - write_lsn: { usage: COUNTER ,description: Last write-ahead log location written to disk by this standby server } 80 | - flush_lsn: { usage: COUNTER ,description: Last write-ahead log location flushed to disk by this standby server } 81 | - replay_lsn: { usage: COUNTER ,description: Last write-ahead log location replayed into the database on this standby server } 82 | - write_lag: { usage: GAUGE ,description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it } 83 | - flush_lag: { usage: GAUGE ,description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it } 84 | - replay_lag: { usage: GAUGE ,description: Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it } 85 | - time: { usage: COUNTER ,description: Current timestamp in unix epoch } 86 | - launch_time: { usage: COUNTER ,description: Time when this process was started, i.e., when the client connected to this WAL sender } 87 | 88 | 89 | -------------------------------------------------------------------------------- /config/0220-pg_sync_standby.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0220 pg_sync_standby 3 | #==============================================================# 4 | pg_sync_standby: 5 | name: pg_sync_standby 6 | desc: PostgreSQL synchronous standby status and names 7 | query: | 8 | SELECT CASE WHEN names <> '' THEN names ELSE '' END AS names, CASE WHEN names <> '' THEN 1 ELSE 0 END AS enabled FROM (SELECT current_setting('synchronous_standby_names') AS names) n; 9 | ttl: 10 10 | min_version: 090400 11 | tags: [ cluster ] 12 | metrics: 13 | - names: { usage: LABEL ,description: List of standby servers that can support synchronous replication, if not enabled } 14 | - enabled: { usage: GAUGE ,description: Synchronous commit enabled, 1 if enabled, 0 if disabled } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0230-pg_downstream.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0230 pg_downstream 3 | #==============================================================# 4 | pg_downstream: 5 | name: pg_downstream 6 | desc: PostgreSQL replication client count group by state 7 | query: | 8 | SELECT l.state, coalesce(count, 0 ) AS count FROM unnest(ARRAY ['streaming','startup','catchup', 'backup', 'stopping']) l(state) LEFT JOIN (SELECT state, count(*) AS count FROM pg_stat_replication GROUP BY state)r ON l.state = r.state; 9 | ttl: 10 10 | min_version: 090400 11 | tags: [ cluster ] 12 | metrics: 13 | - state: { usage: LABEL ,description: Replication client state, could be one of startup|catchup|streaming|backup|stopping } 14 | - count: { usage: GAUGE ,description: Count of corresponding state } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0250-pg_recv.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0250 pg_recv 3 | #==============================================================# 4 | pg_recv_13: 5 | name: pg_recv 6 | desc: PostgreSQL walreceiver metrics 13+ 7 | query: |- 8 | SELECT 9 | coalesce(sender_host, (regexp_match(conninfo, '.*host=(\S+).*'))[1]) AS sender_host, coalesce(sender_port::TEXT, (regexp_match(conninfo, '.*port=(\S+).*'))[1]) AS sender_port, slot_name, 10 | pid, CASE status WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state, 11 | receive_start_lsn - '0/0' AS init_lsn,receive_start_tli AS init_tli, 12 | flushed_lsn - '0/0' AS flush_lsn,written_lsn - '0/0'AS write_lsn, received_tli AS flush_tli, latest_end_lsn - '0/0' AS reported_lsn, 13 | last_msg_send_time AS msg_send_time,last_msg_receipt_time AS msg_recv_time,latest_end_time AS reported_time,now() AS time FROM pg_stat_wal_receiver; 14 | 15 | ttl: 10 16 | min_version: 130000 17 | tags: [ cluster, replica ] 18 | metrics: 19 | - sender_host: { usage: LABEL ,description: Host of the PostgreSQL instance this WAL receiver is connected to } 20 | - sender_port: { usage: LABEL ,description: Port number of the PostgreSQL instance this WAL receiver is connected to. } 21 | - slot_name: { usage: LABEL ,description: Replication slot name used by this WAL receiver } 22 | - pid: { usage: GAUGE ,description: Process ID of the WAL receiver process } 23 | - state: { usage: GAUGE ,description: Encoded activity status of the WAL receiver process 0-4 for streaming|startup|catchup|backup|stopping } 24 | - init_lsn: { usage: COUNTER ,description: First write-ahead log location used when WAL receiver is started } 25 | - init_tli: { usage: COUNTER ,description: First timeline number used when WAL receiver is started } 26 | - flush_lsn: { usage: COUNTER ,description: Last write-ahead log location already received and flushed to disk } 27 | - write_lsn: { usage: COUNTER ,description: Last write-ahead log location already received and written to disk, but not flushed. } 28 | - flush_tli: { usage: COUNTER ,description: Timeline number of last write-ahead log location received and flushed to disk } 29 | - reported_lsn: { usage: COUNTER ,description: Last write-ahead log location reported to origin WAL sender } 30 | - msg_send_time: { usage: GAUGE ,description: Send time of last message received from origin WAL sender } 31 | - msg_recv_time: { usage: GAUGE ,description: Receipt time of last message received from origin WAL sender } 32 | - reported_time: { usage: GAUGE ,description: Time of last write-ahead log location reported to origin WAL sender } 33 | - time: { usage: GAUGE ,description: Time of current snapshot } 34 | 35 | pg_recv_11: 36 | name: pg_recv 37 | desc: PostgreSQL walreceiver metrics v11+ 38 | query: |- 39 | SELECT 40 | coalesce(sender_host, (regexp_match(conninfo, '.*host=(\S+).*'))[1]) AS sender_host, coalesce(sender_port::TEXT, (regexp_match(conninfo, '.*port=(\S+).*'))[1]) AS sender_port, slot_name, 41 | pid, CASE status WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state, 42 | receive_start_lsn - '0/0' AS init_lsn,receive_start_tli AS init_tli, 43 | received_lsn - '0/0' AS flush_lsn, received_tli AS flush_tli, latest_end_lsn - '0/0' AS reported_lsn, 44 | last_msg_send_time AS msg_send_time,last_msg_receipt_time AS msg_recv_time,latest_end_time AS reported_time,now() AS time FROM pg_stat_wal_receiver; 45 | 46 | ttl: 10 47 | tags: [ cluster, replica ] 48 | min_version: 110000 49 | max_version: 130000 50 | metrics: 51 | - sender_host: { usage: LABEL ,description: Host of the PostgreSQL instance this WAL receiver is connected to } 52 | - sender_port: { usage: LABEL ,description: Port number of the PostgreSQL instance this WAL receiver is connected to. } 53 | - slot_name: { usage: LABEL ,description: Replication slot name used by this WAL receiver } 54 | - pid: { usage: GAUGE ,description: Process ID of the WAL receiver process } 55 | - state: { usage: GAUGE ,description: Encoded activity status of the WAL receiver process 0-4 for streaming|startup|catchup|backup|stopping } 56 | - init_lsn: { usage: COUNTER ,description: First write-ahead log location used when WAL receiver is started } 57 | - init_tli: { usage: COUNTER ,description: First timeline number used when WAL receiver is started } 58 | - flush_lsn: { usage: COUNTER ,description: Last write-ahead log location already received and flushed to disk } 59 | - flush_tli: { usage: COUNTER ,description: Timeline number of last write-ahead log location received and flushed to disk } 60 | - reported_lsn: { usage: COUNTER ,description: Last write-ahead log location reported to origin WAL sender } 61 | - msg_send_time: { usage: GAUGE ,description: Send time of last message received from origin WAL sender } 62 | - msg_recv_time: { usage: GAUGE ,description: Receipt time of last message received from origin WAL sender } 63 | - reported_time: { usage: GAUGE ,description: Time of last write-ahead log location reported to origin WAL sender } 64 | - time: { usage: GAUGE ,description: Time of current snapshot } 65 | 66 | pg_recv_10: 67 | name: pg_recv 68 | desc: PostgreSQL walreceiver metrics v10+ 69 | query: |- 70 | SELECT 71 | (regexp_match(conninfo, '.*host=(\S+).*'))[1] AS sender_host, (regexp_match(conninfo, '.*port=(\S+).*'))[1] AS sender_port, slot_name, 72 | pid, CASE status WHEN 'streaming' THEN 0 WHEN 'startup' THEN 1 WHEN 'catchup' THEN 2 WHEN 'backup' THEN 3 WHEN 'stopping' THEN 4 ELSE -1 END AS state, 73 | receive_start_lsn - '0/0' AS init_lsn,receive_start_tli AS init_tli, 74 | received_lsn - '0/0' AS flush_lsn, received_tli AS flush_tli, latest_end_lsn - '0/0' AS reported_lsn, 75 | last_msg_send_time AS msg_send_time,last_msg_receipt_time AS msg_recv_time,latest_end_time AS reported_time,now() AS time FROM pg_stat_wal_receiver; 76 | 77 | ttl: 10 78 | tags: [ cluster, replica ] 79 | min_version: 090600 80 | max_version: 110000 81 | metrics: 82 | - sender_host: { usage: LABEL ,description: Host of the PostgreSQL instance this WAL receiver is connected to } 83 | - sender_port: { usage: LABEL ,description: Port number of the PostgreSQL instance this WAL receiver is connected to. } 84 | - slot_name: { usage: LABEL ,description: Replication slot name used by this WAL receiver } 85 | - pid: { usage: GAUGE ,description: Process ID of the WAL receiver process } 86 | - state: { usage: GAUGE ,description: Encoded activity status of the WAL receiver process 0-4 for streaming|startup|catchup|backup|stopping } 87 | - init_lsn: { usage: COUNTER ,description: First write-ahead log location used when WAL receiver is started } 88 | - init_tli: { usage: COUNTER ,description: First timeline number used when WAL receiver is started } 89 | - flush_lsn: { usage: COUNTER ,description: Last write-ahead log location already received and flushed to disk } 90 | - flush_tli: { usage: COUNTER ,description: Timeline number of last write-ahead log location received and flushed to disk } 91 | - reported_lsn: { usage: COUNTER ,description: Last write-ahead log location reported to origin WAL sender } 92 | - msg_send_time: { usage: GAUGE ,description: Send time of last message received from origin WAL sender } 93 | - msg_recv_time: { usage: GAUGE ,description: Receipt time of last message received from origin WAL sender } 94 | - reported_time: { usage: GAUGE ,description: Time of last write-ahead log location reported to origin WAL sender } 95 | - time: { usage: GAUGE ,description: Time of current snapshot } 96 | 97 | 98 | -------------------------------------------------------------------------------- /config/0260-pg_sub.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0260 pg_sub 3 | #==============================================================# 4 | pg_sub_15: 5 | name: pg_sub 6 | desc: PostgreSQL subscription statistics (15+) 7 | query: |- 8 | SELECT 9 | s1.subname, subid AS id, pid, received_lsn, reported_lsn, 10 | msg_send_time, msg_recv_time, reported_time, 11 | apply_error_count, sync_error_count 12 | FROM 13 | (SELECT 14 | subname, subid, pid, 15 | received_lsn - '0/0' AS received_lsn, latest_end_lsn - '0/0' AS reported_lsn, 16 | extract(epoch from last_msg_send_time) AS msg_send_time, 17 | extract(epoch from last_msg_receipt_time) AS msg_recv_time, 18 | extract(epoch from latest_end_time) AS reported_time 19 | FROM pg_stat_subscription WHERE relid ISNULL) s1 20 | LEFT OUTER JOIN pg_stat_subscription_stats s2 USING(subid); 21 | 22 | ttl: 10 23 | min_version: 150000 24 | tags: [ cluster ] 25 | metrics: 26 | - subname: { usage: LABEL ,description: Name of this subscription } 27 | - id: { usage: GAUGE ,description: OID of the subscription } 28 | - pid: { usage: GAUGE ,description: Process ID of the subscription main apply worker process } 29 | - received_lsn: { usage: COUNTER ,description: Last write-ahead log location received } 30 | - reported_lsn: { usage: COUNTER ,description: Last write-ahead log location reported to origin WAL sender } 31 | - msg_send_time: { usage: GAUGE ,description: Send time of last message received from origin WAL sender } 32 | - msg_recv_time: { usage: GAUGE ,description: Receipt time of last message received from origin WAL sender } 33 | - reported_time: { usage: GAUGE ,description: Time of last write-ahead log location reported to origin WAL sender } 34 | - apply_error_count: { usage: COUNTER ,description: Number of times an error occurred while applying changes. } 35 | - sync_error_count: { usage: COUNTER ,description: Number of times an error occurred during the initial table synchronization } 36 | 37 | pg_sub_10: 38 | name: pg_sub 39 | desc: PostgreSQL subscription statistics (10-14) 40 | query: |- 41 | SELECT 42 | subname, subid AS id, pid, 43 | received_lsn - '0/0' AS received_lsn, latest_end_lsn - '0/0' AS reported_lsn, 44 | extract(epoch from last_msg_send_time) AS msg_send_time, 45 | extract(epoch from last_msg_receipt_time) AS msg_recv_time, 46 | extract(epoch from latest_end_time) AS reported_time 47 | FROM pg_stat_subscription WHERE relid ISNULL; 48 | 49 | ttl: 10 50 | min_version: 100000 51 | max_version: 150000 52 | tags: [ cluster ] 53 | metrics: 54 | - subname: { usage: LABEL ,description: Name of this subscription } 55 | - id: { usage: GAUGE ,description: OID of the subscription } 56 | - pid: { usage: GAUGE ,description: Process ID of the subscription main apply worker process } 57 | - received_lsn: { usage: COUNTER ,description: Last write-ahead log location received } 58 | - reported_lsn: { usage: COUNTER ,description: Last write-ahead log location reported to origin WAL sender } 59 | - msg_send_time: { usage: GAUGE ,description: Send time of last message received from origin WAL sender } 60 | - msg_recv_time: { usage: GAUGE ,description: Receipt time of last message received from origin WAL sender } 61 | - reported_time: { usage: GAUGE ,description: Time of last write-ahead log location reported to origin WAL sender } 62 | 63 | 64 | -------------------------------------------------------------------------------- /config/0270-pg_origin.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0270 pg_origin 3 | #==============================================================# 4 | # skip by default, require additional privilege setup 5 | # GRANT SELECT ON pg_replication_origin, pg_replication_origin_status TO pg_monitor; 6 | pg_origin: 7 | name: pg_origin 8 | desc: PostgreSQL replay state (approximate) for a certain origin 9 | query: SELECT roname, remote_lsn - '0/0' AS remote_lsn, local_lsn - '0/0' AS local_lsn FROM pg_replication_origin o LEFT JOIN pg_replication_origin_status os ON o.roident = os.local_id; 10 | ttl: 10 11 | min_version: 090500 12 | skip: true 13 | tags: [ cluster ] 14 | metrics: 15 | - roiname: { usage: LABEL ,description: The external, user defined, name of a replication origin. } 16 | - remote_lsn: { usage: LABEL ,description: The origin node's LSN up to which data has been replicated. } 17 | - local_lsn: { usage: LABEL ,description: This node's LSN at which remote_lsn has been replicated. } 18 | 19 | 20 | -------------------------------------------------------------------------------- /config/0300-pg_io.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0300 pg_io 3 | #==============================================================# 4 | pg_io_18: 5 | name: pg_io 6 | desc: PostgreSQL I/O stats since v18 7 | query: |- 8 | SELECT backend_type AS "type",object,context,reads,read_bytes,read_time,writes,write_bytes,write_time,writebacks,writeback_time, 9 | extends,extend_bytes,extend_time,hits,evictions,reuses,fsyncs,fsync_time,extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_io; 10 | 11 | ttl: 10 12 | timeout: 1 13 | min_version: 180000 14 | tags: [ cluster ] 15 | metrics: 16 | - type: { usage: LABEL ,description: Type of backend } 17 | - object: { usage: LABEL ,description: Target object of an I/O operation, relation or temp } 18 | - context: { usage: LABEL ,description: The context of an I/O operation. normal,vacuum,bulkread,bulkwrite } 19 | - reads: { usage: COUNTER ,default: 0 ,description: Number of read operations, each of the size specified in op_bytes. } 20 | - read_bytes: { usage: COUNTER ,default: 0 ,description: Number of read bytes } 21 | - read_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in read operations in seconds } 22 | - writes: { usage: COUNTER ,default: 0 ,description: Number of write operations, each of the size specified in op_bytes. } 23 | - write_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in write operations in seconds } 24 | - write_bytes: { usage: COUNTER ,default: 0 ,description: Number of read bytes } 25 | - writebacks: { usage: COUNTER ,default: 0 ,description: Number of units of size op_bytes which the process requested the kernel write out to permanent storage. } 26 | - writeback_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in writeback operations in seconds } 27 | - extends: { usage: COUNTER ,default: 0 ,description: Number of relation extend operations, each of the size specified in op_bytes. } 28 | - extend_bytes: { usage: COUNTER ,default: 0 ,description: Number of extend bytes } 29 | - extend_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in extend operations in seconds } 30 | - hits: { usage: COUNTER ,default: 0 ,description: The number of times a desired block was found in a shared buffer. } 31 | - evictions: { usage: COUNTER ,default: 0 ,description: Number of times a block has been written out from a shared or local buffer } 32 | - reuses: { usage: COUNTER ,default: 0 ,description: The number of times an existing buffer in reused } 33 | - fsyncs: { usage: COUNTER ,default: 0 ,description: Number of fsync calls. These are only tracked in context normal } 34 | - fsync_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in fsync operations in seconds } 35 | - reset_time: { usage: GAUGE ,description: Timestamp at which these statistics were last reset } 36 | 37 | pg_io_16: 38 | name: pg_io 39 | desc: PostgreSQL I/O stats 40 | query: |- 41 | SELECT backend_type AS "type", object, context, reads, read_time,writes,write_time,writebacks,writeback_time,extends, 42 | extend_time,hits,evictions,reuses,fsyncs,fsync_time,extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_io; 43 | 44 | ttl: 10 45 | timeout: 1 46 | min_version: 160000 47 | max_version: 180000 48 | tags: [ cluster ] 49 | metrics: 50 | - type: { usage: LABEL ,description: Type of backend } 51 | - object: { usage: LABEL ,description: Target object of an I/O operation, relation or temp } 52 | - context: { usage: LABEL ,description: The context of an I/O operation. normal,vacuum,bulkread,bulkwrite } 53 | - reads: { usage: COUNTER ,default: 0 ,description: Number of read operations, each of the size specified in op_bytes. } 54 | - read_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in read operations in seconds } 55 | - writes: { usage: COUNTER ,default: 0 ,description: Number of write operations, each of the size specified in op_bytes. } 56 | - write_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in write operations in seconds } 57 | - writebacks: { usage: COUNTER ,default: 0 ,description: Number of units of size op_bytes which the process requested the kernel write out to permanent storage. } 58 | - writeback_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in writeback operations in seconds } 59 | - extends: { usage: COUNTER ,default: 0 ,description: Number of relation extend operations, each of the size specified in op_bytes. } 60 | - extend_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in extend operations in seconds } 61 | - hits: { usage: COUNTER ,default: 0 ,description: The number of times a desired block was found in a shared buffer. } 62 | - evictions: { usage: COUNTER ,default: 0 ,description: Number of times a block has been written out from a shared or local buffer } 63 | - reuses: { usage: COUNTER ,default: 0 ,description: The number of times an existing buffer in reused } 64 | - fsyncs: { usage: COUNTER ,default: 0 ,description: Number of fsync calls. These are only tracked in context normal } 65 | - fsync_time: { usage: COUNTER ,default: 0 ,scale: 1e-3 ,description: Time spent in fsync operations in seconds } 66 | - reset_time: { usage: GAUGE ,description: Timestamp at which these statistics were last reset } 67 | 68 | 69 | -------------------------------------------------------------------------------- /config/0310-pg_size.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0310 pg_size 3 | #==============================================================# 4 | pg_size: 5 | name: pg_size 6 | desc: PostgreSQL Database, WAL, Log size since v10 7 | query: |- 8 | SELECT 9 | datname, pg_database_size(oid) AS bytes FROM pg_database UNION ALL SELECT 'log' AS datname, 10 | (CASE WHEN EXISTS (SELECT setting FROM pg_settings WHERE name = 'log_directory' and setting like '/%') 11 | THEN (SELECT COALESCE(SUM(size), 0) FROM pg_catalog.pg_ls_logdir()) ELSE 0 END) AS bytes 12 | UNION ALL SELECT 'wal' AS datname, (SELECT COALESCE(SUM(size), 0) FROM pg_catalog.pg_ls_waldir()) AS bytes; 13 | 14 | ttl: 60 15 | timeout: 1 16 | min_version: 100000 17 | tags: [ cluster ] 18 | metrics: 19 | - datname: { usage: LABEL ,description: Database name, or special category 'wal' , 'log' } 20 | - bytes: { usage: GAUGE ,description: File size in bytes } 21 | 22 | 23 | -------------------------------------------------------------------------------- /config/0320-pg_archiver.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0320 pg_archiver 3 | #==============================================================# 4 | pg_archiver: 5 | name: pg_archiver 6 | desc: PostgreSQL archiver process statistics 7 | query: |- 8 | SELECT archived_count AS finish_count,failed_count, 9 | extract(epoch FROM last_archived_time) AS finish_time, 10 | extract(epoch FROM last_failed_time) AS failed_time, 11 | extract(epoch FROM stats_reset) AS reset_time 12 | FROM pg_stat_archiver; 13 | 14 | ttl: 60 15 | min_version: 090400 16 | tags: [ cluster ] 17 | metrics: 18 | - finish_count: { usage: COUNTER ,description: Number of WAL files that have been successfully archived } 19 | - failed_count: { usage: COUNTER ,description: Number of failed attempts for archiving WAL files } 20 | - finish_time: { usage: GAUGE ,description: Time of the last successful archive operation } 21 | - failed_time: { usage: GAUGE ,description: Time of the last failed archival operation } 22 | - reset_time: { usage: GAUGE ,description: Time at which archive statistics were last reset } 23 | 24 | 25 | -------------------------------------------------------------------------------- /config/0330-pg_bgwriter.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0330 pg_bgwriter 3 | #==============================================================# 4 | # https://pgpedia.info/p/pg_stat_bgwriter.html 5 | pg_bgwriter_17: 6 | name: pg_bgwriter 7 | desc: "PostgreSQL background writer metrics PG 17+" 8 | query: SELECT buffers_clean, maxwritten_clean, buffers_alloc, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_bgwriter; 9 | ttl: 10 10 | min_version: 170000 11 | tags: [ cluster ] 12 | metrics: 13 | - buffers_clean: { usage: COUNTER ,description: Number of buffers written by the background writer } 14 | - maxwritten_clean: { usage: COUNTER ,description: Number of times the background writer stopped a cleaning scan because it had written too many buffers } 15 | - buffers_alloc: { usage: COUNTER ,description: Number of buffers allocated } 16 | - reset_time: { usage: GAUGE ,description: Time at which bgwriter statistics were last reset } 17 | 18 | pg_bgwriter_10: 19 | name: pg_bgwriter 20 | desc: "PostgreSQL background writer metrics (PG17-)" 21 | query: SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, buffers_backend, maxwritten_clean, buffers_backend_fsync, buffers_alloc, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_bgwriter; 22 | ttl: 10 23 | min_version: 090400 24 | max_version: 170000 25 | tags: [ cluster ] 26 | metrics: 27 | - checkpoints_timed: { usage: COUNTER ,description: Number of scheduled checkpoints that have been performed } 28 | - checkpoints_req: { usage: COUNTER ,description: Number of requested checkpoints that have been performed } 29 | - checkpoint_write_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in seconds } 30 | - checkpoint_sync_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in seconds } 31 | - buffers_checkpoint: { usage: COUNTER ,description: Number of buffers written during checkpoints } 32 | - buffers_clean: { usage: COUNTER ,description: Number of buffers written by the background writer } 33 | - buffers_backend: { usage: COUNTER ,description: Number of buffers written directly by a backend } 34 | - maxwritten_clean: { usage: COUNTER ,description: Number of times the background writer stopped a cleaning scan because it had written too many buffers } 35 | - buffers_backend_fsync: { usage: COUNTER ,description: Number of times a backend had to execute its own fsync call } 36 | - buffers_alloc: { usage: COUNTER ,description: Number of buffers allocated } 37 | - reset_time: { usage: GAUGE ,description: Time at which bgwriter statistics were last reset } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config/0331-pg_checkpointer.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0331 pg_checkpointer 3 | #==============================================================# 4 | pg_checkpointer_18: 5 | name: pg_checkpointer 6 | desc: "PostgreSQL checkpointer stat metrics for pg 18+" 7 | query: SELECT num_timed, num_requested, num_done, restartpoints_timed, restartpoints_req, restartpoints_done, write_time, sync_time, buffers_written, slru_written, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_checkpointer; 8 | ttl: 10 9 | min_version: 180000 10 | tags: [ cluster ] 11 | metrics: 12 | - num_timed: { usage: COUNTER ,rename: timed ,description: Number of scheduled checkpoints that have been performed } 13 | - num_requested: { usage: COUNTER ,rename: req ,description: Number of requested checkpoints that have been performed } 14 | - num_done: { usage: COUNTER ,rename: done ,description: Number of checkpoints that have been performed } 15 | - restartpoints_timed: { usage: COUNTER ,description: Number of scheduled restartpoints due to timeout or after a failed attempt to perform it } 16 | - restartpoints_req: { usage: COUNTER ,description: Number of requested restartpoints } 17 | - restartpoints_done: { usage: COUNTER ,description: Number of restartpoints that have been performed } 18 | - write_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in seconds } 19 | - sync_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in seconds } 20 | - buffers_written: { usage: COUNTER ,description: Number of buffers written during checkpoints and restartpoints } 21 | - slru_written: { usage: COUNTER ,description: Number of SLRU buffers written during checkpoints and restartpoints } 22 | - reset_time: { usage: GAUGE ,description: Time at which checkpointer statistics were last reset } 23 | 24 | pg_checkpointer_17: 25 | name: pg_checkpointer 26 | desc: "PostgreSQL checkpointer stat metrics for pg 17+" 27 | query: SELECT num_timed, num_requested, restartpoints_timed, restartpoints_req, restartpoints_done, write_time, sync_time, buffers_written, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_checkpointer; 28 | ttl: 10 29 | min_version: 170000 30 | max_version: 180000 31 | tags: [ cluster ] 32 | metrics: 33 | - num_timed: { usage: COUNTER ,rename: timed ,description: Number of scheduled checkpoints that have been performed } 34 | - num_requested: { usage: COUNTER ,rename: req ,description: Number of requested checkpoints that have been performed } 35 | - restartpoints_timed: { usage: COUNTER ,description: Number of scheduled restartpoints due to timeout or after a failed attempt to perform it } 36 | - restartpoints_req: { usage: COUNTER ,description: Number of requested restartpoints } 37 | - restartpoints_done: { usage: COUNTER ,description: Number of restartpoints that have been performed } 38 | - write_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in seconds } 39 | - sync_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in seconds } 40 | - buffers_written: { usage: COUNTER ,description: Number of buffers written during checkpoints and restartpoints } 41 | - reset_time: { usage: GAUGE ,description: Time at which checkpointer statistics were last reset } 42 | 43 | pg_checkpointer_10: 44 | name: pg_checkpointer 45 | desc: "PostgreSQL checkpointer stat metrics for pg 10+" 46 | query: SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_bgwriter; 47 | ttl: 10 48 | min_version: 090400 49 | max_version: 170000 50 | tags: [ cluster ] 51 | metrics: 52 | - checkpoints_timed: { usage: COUNTER ,rename: timed ,description: Number of scheduled checkpoints that have been performed } 53 | - checkpoints_req: { usage: COUNTER ,rename: req ,description: Number of requested checkpoints that have been performed } 54 | - checkpoint_write_time: { usage: COUNTER ,rename: write_time ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in seconds } 55 | - checkpoint_sync_time: { usage: COUNTER ,rename: sync_time ,scale: 1e-3 ,description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in seconds } 56 | - buffers_checkpoint: { usage: COUNTER ,rename: buffers_written ,description: Number of buffers written during checkpoints and restartpoints } 57 | - reset_time: { usage: GAUGE ,description: Time at which checkpointer statistics were last reset } 58 | 59 | 60 | -------------------------------------------------------------------------------- /config/0340-pg_ssl.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0340 pg_ssl 3 | #==============================================================# 4 | pg_ssl: 5 | name: pg_ssl 6 | desc: PostgreSQL SSL client connection count 7 | query: | 8 | SELECT count(*) FILTER (WHERE ssl) AS enabled, count(*) FILTER ( WHERE NOT ssl) AS disabled FROM pg_stat_ssl; 9 | ttl: 10 10 | min_version: 090500 11 | tags: [ cluster ] 12 | metrics: 13 | - enabled: { usage: GAUGE ,description: Number of client connection that use ssl } 14 | - disabled: { usage: GAUGE ,description: Number of client connection that does not use ssl } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0350-pg_checkpoint.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0350 pg_checkpoint 3 | #==============================================================# 4 | pg_checkpoint: 5 | name: pg_checkpoint 6 | desc: checkpoint information from pg_control_checkpoint since 10 7 | query: |- 8 | SELECT 9 | checkpoint_lsn - '0/0' AS checkpoint_lsn, 10 | redo_lsn - '0/0' AS redo_lsn, 11 | timeline_id AS tli, 12 | prev_timeline_id AS prev_tli, 13 | full_page_writes, 14 | split_part(next_xid, ':', 1) AS next_xid_epoch, 15 | split_part(next_xid, ':', 2) AS next_xid, 16 | next_oid::BIGINT, 17 | next_multixact_id::text::BIGINT, 18 | next_multi_offset::text::BIGINT, 19 | oldest_xid::text::BIGINT, 20 | oldest_xid_dbid::text::BIGINT, 21 | oldest_active_xid::text::BIGINT, 22 | oldest_multi_xid::text::BIGINT, 23 | oldest_multi_dbid::BIGINT, 24 | oldest_commit_ts_xid::text::BIGINT, 25 | newest_commit_ts_xid::text::BIGINT, 26 | checkpoint_time AS time, 27 | extract(epoch from now() - checkpoint_time) AS elapse 28 | FROM pg_control_checkpoint(); 29 | 30 | ttl: 60 31 | min_version: 100000 32 | tags: [ cluster ] 33 | metrics: 34 | - checkpoint_lsn: { usage: COUNTER ,description: Latest checkpoint location } 35 | - redo_lsn: { usage: COUNTER ,description: Latest checkpoint's REDO location } 36 | - tli: { usage: COUNTER ,description: Latest checkpoint's TimeLineID } 37 | - prev_tli: { usage: COUNTER ,description: Latest checkpoint's PrevTimeLineID } 38 | - full_page_writes: { usage: GAUGE ,description: Latest checkpoint's full_page_writes enabled } 39 | - next_xid_epoch: { usage: COUNTER ,description: Latest checkpoint's NextXID epoch } 40 | - next_xid: { usage: COUNTER ,description: Latest checkpoint's NextXID xid } 41 | - next_oid: { usage: COUNTER ,description: Latest checkpoint's NextOID } 42 | - next_multixact_id: { usage: COUNTER ,description: Latest checkpoint's NextMultiXactId } 43 | - next_multi_offset: { usage: COUNTER ,description: Latest checkpoint's NextMultiOffset } 44 | - oldest_xid: { usage: COUNTER ,description: Latest checkpoint's oldestXID } 45 | - oldest_xid_dbid: { usage: GAUGE ,description: Latest checkpoint's oldestXID's DB OID } 46 | - oldest_active_xid: { usage: COUNTER ,description: Latest checkpoint's oldestActiveXID } 47 | - oldest_multi_xid: { usage: COUNTER ,description: Latest checkpoint's oldestMultiXid } 48 | - oldest_multi_dbid: { usage: GAUGE ,description: Latest checkpoint's oldestMulti's DB OID } 49 | - oldest_commit_ts_xid: { usage: COUNTER ,description: Latest checkpoint's oldestCommitTsXid } 50 | - newest_commit_ts_xid: { usage: COUNTER ,description: Latest checkpoint's newestCommitTsXid } 51 | - time: { usage: COUNTER ,description: Time of latest checkpoint } 52 | - elapse: { usage: GAUGE ,description: Seconds elapsed since latest checkpoint in seconds } 53 | 54 | 55 | -------------------------------------------------------------------------------- /config/0360-pg_recovery.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0360 pg_recovery 3 | #==============================================================# 4 | pg_recovery: 5 | name: pg_recovery 6 | desc: PostgreSQL control recovery metrics (9.6+) 7 | query: | 8 | SELECT min_recovery_end_timeline AS min_timeline, 9 | min_recovery_end_lsn - '0/0' AS min_lsn, 10 | backup_start_lsn - '0/0' AS backup_start_lsn, 11 | backup_end_lsn - '0/0' AS backup_end_lsn, 12 | end_of_backup_record_required AS require_record 13 | FROM pg_control_recovery(); 14 | ttl: 10 15 | min_version: 090600 16 | tags: [ cluster, replica ] 17 | metrics: 18 | - min_timeline: { usage: COUNTER ,description: Min recovery ending loc's timeline } 19 | - min_lsn: { usage: COUNTER ,description: Minimum recovery ending location } 20 | - backup_start_lsn: { usage: COUNTER ,description: Backup start location } 21 | - backup_end_lsn: { usage: COUNTER ,description: Backup end location } 22 | - require_record: { usage: GAUGE ,description: End-of-backup record required } 23 | 24 | pg_recovery_prefetch: 25 | name: pg_recovery_prefetch 26 | desc: PostgreSQL recovery prefetch metrics (15+) 27 | query: SELECT prefetch,hit,skip_init,skip_new,skip_fpw,skip_rep,wal_distance,block_distance,io_depth,extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_recovery_prefetch; 28 | ttl: 10 29 | min_version: 150000 30 | tags: [ cluster, replica ] 31 | metrics: 32 | - prefetch: { usage: COUNTER ,description: Number of blocks prefetched because they were not in the buffer pool } 33 | - hit: { usage: COUNTER ,description: Number of blocks not prefetched because they were already in the buffer pool } 34 | - skip_init: { usage: COUNTER ,description: Number of blocks not prefetched because they would be zero-initialized } 35 | - skip_new: { usage: COUNTER ,description: Number of blocks not prefetched because they didn't exist yet } 36 | - skip_fpw: { usage: COUNTER ,description: Number of blocks not prefetched because a full page image was included in the WAL } 37 | - skip_rep: { usage: COUNTER ,description: Number of blocks not prefetched because they were already recently prefetched } 38 | - wal_distance: { usage: GAUGE ,description: How many bytes ahead the prefetcher is looking } 39 | - block_distance: { usage: GAUGE ,description: How many blocks ahead the prefetcher is looking } 40 | - io_depth: { usage: GAUGE ,description: How many prefetches have been initiated but are not yet known to have completed } 41 | - reset_time: { usage: GAUGE ,description: Time at which these recovery prefetch statistics were last reset } 42 | 43 | 44 | -------------------------------------------------------------------------------- /config/0370-pg_slru.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0370 pg_slru 3 | #==============================================================# 4 | pg_slru_13: 5 | name: pg_slru 6 | desc: PostgreSQL simple-least-recently-used (SLRU) cache statistics v13 7 | query: SELECT name, blks_zeroed, blks_hit, blks_read, blks_written, blks_exists, flushes, truncates, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_slru; 8 | ttl: 60 9 | min_version: 130000 10 | tags: [ cluster ] 11 | metrics: 12 | - name: { usage: LABEL ,description: Name of the SLRU } 13 | - blks_zeroed: { usage: COUNTER ,description: Number of blocks zeroed during initializations } 14 | - blks_hit: { usage: COUNTER ,description: Number of times disk blocks were found already in the SLRU, so that a read was not necessary } 15 | - blks_read: { usage: COUNTER ,description: Number of disk blocks read for this SLRU } 16 | - blks_written: { usage: COUNTER ,description: Number of disk blocks written for this SLRU } 17 | - blks_exists: { usage: COUNTER ,description: Number of blocks checked for existence for this SLRU } 18 | - flushes: { usage: COUNTER ,description: Number of flushes of dirty data for this SLRU } 19 | - truncates: { usage: COUNTER ,description: Number of truncates for this SLRU } 20 | - reset_time: { usage: GAUGE ,description: Time at which these statistics were last reset } 21 | 22 | 23 | -------------------------------------------------------------------------------- /config/0380-pg_shmem.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0380 pg_shmem 3 | #==============================================================# 4 | # pg_shmem require su privilege to work. Disable it or create auxiliary function with su before use: 5 | # CREATE OR REPLACE FUNCTION monitor.pg_shmem() RETURNS SETOF pg_shmem_allocations AS $$ SELECT * FROM pg_shmem_allocations;$$ LANGUAGE SQL SECURITY DEFINER; 6 | pg_shmem: 7 | name: pg_shmem 8 | desc: Allocations made from the server's main shared memory segment 9 | query: SELECT coalesce(name, 'Free') AS name, off AS offset, size, allocated_size FROM monitor.pg_shmem(); 10 | ttl: 60 11 | min_version: 130000 12 | skip: true # disable it by default 13 | tags: [cluster, "schema:monitor" ] 14 | metrics: 15 | - name: { usage: LABEL ,description: Name of the shared memory allocation } 16 | - offset: { usage: GAUGE ,description: The offset at which the allocation starts } 17 | - size: { usage: GAUGE ,description: Size of the allocation } 18 | - allocated_size: { usage: GAUGE ,description: Size of the allocation including padding } 19 | 20 | 21 | -------------------------------------------------------------------------------- /config/0390-pg_wal.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0390 pg_wal 3 | #==============================================================# 4 | pg_wal_18: 5 | name: pg_wal 6 | desc: PostgreSQL WAL statistics since v18 with some col removed 7 | query: SELECT wal_records AS records, wal_fpi AS fpi, wal_bytes AS bytes, wal_buffers_full AS buffers_full,extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_wal; 8 | ttl: 10 9 | tags: [ cluster ] 10 | min_version: 180000 11 | metrics: 12 | - records: { usage: COUNTER ,description: Total number of WAL records generated } 13 | - fpi: { usage: COUNTER ,description: Total number of WAL full page images generated } 14 | - bytes: { usage: COUNTER ,description: Total amount of WAL generated in bytes } 15 | - buffers_full: { usage: COUNTER ,description: Number of times WAL data was written to disk because WAL buffers became full } 16 | - reset_time: { usage: GAUGE ,description: When statistics were last reset } 17 | 18 | pg_wal_14: 19 | name: pg_wal 20 | desc: PostgreSQL WAL statistics since v14 21 | query: SELECT wal_records AS records, wal_fpi AS fpi, wal_bytes AS bytes, wal_buffers_full AS buffers_full, wal_write AS write, wal_sync AS sync, wal_write_time AS write_time, wal_sync_time AS sync_time, extract(EPOCH FROM stats_reset) AS reset_time FROM pg_stat_wal; 22 | ttl: 10 23 | tags: [ cluster ] 24 | min_version: 140000 25 | max_version: 180000 26 | metrics: 27 | - records: { usage: COUNTER ,description: Total number of WAL records generated } 28 | - fpi: { usage: COUNTER ,description: Total number of WAL full page images generated } 29 | - bytes: { usage: COUNTER ,description: Total amount of WAL generated in bytes } 30 | - buffers_full: { usage: COUNTER ,description: Number of times WAL data was written to disk because WAL buffers became full } 31 | - write: { usage: COUNTER ,description: Number of times WAL buffers were written out to disk via XLogWrite request. } 32 | - sync: { usage: COUNTER ,description: Number of times WAL files were synced to disk via issue_xlog_fsync request } 33 | - write_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time spent writing WAL buffers to disk via XLogWrite request in seconds } 34 | - sync_time: { usage: COUNTER ,scale: 1e-3 ,description: Total amount of time spent syncing WAL files to disk via issue_xlog_fsync request, in seconds } 35 | - reset_time: { usage: GAUGE ,description: When statistics were last reset } 36 | 37 | 38 | -------------------------------------------------------------------------------- /config/0410-pg_activity.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0410 pg_activity 3 | #==============================================================# 4 | pg_activity: 5 | name: pg_activity 6 | desc: PostgreSQL backend activity group by database and state 7 | query: |- 8 | SELECT datname, state, coalesce(count, 0) AS count, coalesce(max_duration, 0) AS max_duration, coalesce(max_tx_duration, 0) AS max_tx_duration, coalesce(max_conn_duration, 0) AS max_conn_duration FROM 9 | (SELECT d.datname, a.state FROM pg_database d, unnest(ARRAY ['active','idle','idle in transaction','idle in transaction (aborted)','fastpath function call','disabled']) a(state) WHERE d.datallowconn AND NOT d.datistemplate) base 10 | LEFT JOIN (SELECT datname, state, count(*) AS count, max(extract(epoch from now() - state_change)) AS max_duration, max(extract(epoch from now() - xact_start)) 11 | AS max_tx_duration, max(extract(epoch from now() - backend_start)) AS max_conn_duration FROM pg_stat_activity WHERE pid <> pg_backend_pid() GROUP BY 1,2) data USING (datname,state); 12 | ttl: 10 13 | min_version: 090400 14 | tags: [ cluster ] 15 | metrics: 16 | - datname: { usage: LABEL ,description: Name of the database this backend is connected to } 17 | - state: { usage: LABEL ,description: Current overall state of this backend. } 18 | - count: { usage: GAUGE ,description: Count of connection among (datname,state) } 19 | - max_duration: { usage: GAUGE ,description: Max duration since last state change among (datname, state) } 20 | - max_tx_duration: { usage: GAUGE ,description: Max transaction duration since state change among (datname, state) } 21 | - max_conn_duration: { usage: GAUGE ,description: Max backend session duration since state change among (datname, state) } 22 | 23 | 24 | -------------------------------------------------------------------------------- /config/0420-pg_wait.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0420 pg_wait 3 | #==============================================================# 4 | pg_wait: 5 | name: pg_wait 6 | desc: PostgreSQL backend client count group by wait event type since 9.6 7 | query: | 8 | SELECT coalesce(datname, '_system') AS datname, coalesce(wait_event_type, 'Running') AS event, count(*) AS count FROM pg_stat_activity GROUP BY 1, 2; 9 | ttl: 10 10 | min_version: 090600 11 | tags: [ cluster ] 12 | metrics: 13 | - datname: { usage: LABEL ,description: Name of the database, _system for global process } 14 | - event: { usage: LABEL ,description: Wait event type } 15 | - count: { usage: GAUGE ,description: Count of WaitEvent on target database } 16 | 17 | 18 | -------------------------------------------------------------------------------- /config/0430-pg_backend.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0430 pg_backend 3 | #==============================================================# 4 | pg_backend: 5 | name: pg_backend 6 | desc: PostgreSQL backend client count group by wait event type since 9.6 7 | query: SELECT backend_type AS "type", count(*) AS count FROM pg_stat_activity GROUP BY backend_type; 8 | ttl: 10 9 | min_version: 090600 10 | tags: [ cluster ] 11 | metrics: 12 | - type: { usage: LABEL ,description: Database backend process type } 13 | - count: { usage: GAUGE ,description: Database backend process count by backend_type } 14 | 15 | 16 | -------------------------------------------------------------------------------- /config/0440-pg_xact.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0440 pg_xact 3 | #==============================================================# 4 | pg_xact: 5 | name: pg_xact 6 | desc: PostgreSQL transaction identifier metrics 7 | query: WITH snap(v) AS (SELECT txid_current_snapshot()), xset(v) AS (SELECT txid_snapshot_xip(v) FROM snap), xnum(v) AS (SELECT count(*) from xset), xmin(v) AS (SELECT txid_snapshot_xmin(v) FROM snap), xmax(v) AS (SELECT txid_snapshot_xmin(v) FROM snap) SELECT xmin.v AS xmin, xmax.v AS xmax, xnum.v AS xnum FROM xmin, xmax, xnum; 8 | ttl: 10 9 | min_version: 090400 10 | tags: [ cluster ] 11 | metrics: 12 | - xmin: { usage: COUNTER ,description: Earliest txid that is still active } 13 | - xmax: { usage: COUNTER ,description: First as-yet-unassigned txid. txid >= this are invisible. } 14 | - xnum: { usage: GAUGE ,description: Current active transaction count } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0450-pg_lock.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0450 pg_lock 3 | #==============================================================# 4 | pg_lock: 5 | name: pg_lock 6 | desc: PostgreSQL lock distribution by mode and database 7 | query: | 8 | SELECT datname, mode, coalesce(count, 0) AS count 9 | FROM (SELECT d.oid AS database, d.datname, l.mode FROM pg_database d, unnest(ARRAY ['AccessShareLock','RowShareLock','RowExclusiveLock','ShareUpdateExclusiveLock', 'ShareLock','ShareRowExclusiveLock','ExclusiveLock','AccessExclusiveLock']) l(mode) WHERE d.datallowconn AND NOT d.datistemplate) base 10 | LEFT JOIN (SELECT database, mode, count(*) AS count FROM pg_locks WHERE database IS NOT NULL GROUP BY 1, 2) cnt USING (database, mode); 11 | ttl: 10 12 | min_version: 090400 13 | tags: [ cluster ] 14 | metrics: 15 | - datname: { usage: LABEL ,description: Name of the database this backend is connected to } 16 | - mode: { usage: LABEL ,description: Name of the lock mode held or desired by this process } 17 | - count: { usage: GAUGE ,description: Number of locks of corresponding mode and database } 18 | 19 | 20 | -------------------------------------------------------------------------------- /config/0460-pg_query.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0460 pg_query 3 | #==============================================================# 4 | pg_query_17: 5 | name: pg_query 6 | desc: PostgreSQL Query metrics, require pg_stat_statements installed, 17+ 7 | query: |- 8 | SELECT datname, queryid AS query, sum(calls) AS calls, sum(rows) AS rows, sum(total_exec_time) AS exec_time, sum(shared_blk_read_time) + sum(shared_blk_write_time) AS io_time, sum(wal_bytes) AS wal_bytes 9 | ,sum(shared_blks_hit) AS sblk_hit, sum(shared_blks_read) AS sblk_read, sum(shared_blks_dirtied) AS sblk_dirtied, sum(shared_blks_written) AS sblk_written 10 | FROM pg_stat_statements(false) s JOIN pg_database d ON s.dbid = d.oid WHERE userid != 10 AND calls > 4 GROUP BY 1, 2 ORDER BY 3 DESC LIMIT 128; 11 | 12 | ttl: 10 13 | timeout: 1 14 | min_version: 170000 15 | tags: [ cluster, "extension:pg_stat_statements" ] 16 | metrics: 17 | - datname: { usage: LABEL ,description: Name of database } 18 | - query: { usage: LABEL ,description: QueryID generated from internal hash code, computed from the statement's parse tree } 19 | - calls: { usage: COUNTER ,description: Number of times the statement was executed } 20 | - rows: { usage: COUNTER ,description: Total number of rows retrieved or affected by the statement } 21 | - exec_time: { usage: COUNTER ,scale: 1e-3 ,description: Total time spent executing the statement, in seconds } 22 | - io_time: { usage: COUNTER ,scale: 1e-3 ,description: Total time the statement spent reading and writing blocks, in seconds } 23 | - wal_bytes: { usage: COUNTER ,description: Total amount of WAL bytes generated by the statement } 24 | - sblk_hit: { usage: COUNTER ,description: Total number of shared block cache hits by the statement } 25 | - sblk_read: { usage: COUNTER ,description: Total number of shared blocks read by the statement } 26 | - sblk_dirtied: { usage: COUNTER ,description: Total number of shared blocks dirtied by the statement } 27 | - sblk_written: { usage: COUNTER ,description: Total number of shared blocks written by the statement } 28 | 29 | pg_query_13: 30 | name: pg_query 31 | desc: PostgreSQL Query metrics, require pg_stat_statements installed, 13 - 16 32 | query: |- 33 | SELECT datname, queryid AS query, sum(calls) AS calls, sum(rows) AS rows, sum(total_exec_time) AS exec_time, sum(blk_read_time) + sum(blk_write_time) AS io_time, sum(wal_bytes) AS wal_bytes 34 | ,sum(shared_blks_hit) AS sblk_hit, sum(shared_blks_read) AS sblk_read, sum(shared_blks_dirtied) AS sblk_dirtied, sum(shared_blks_written) AS sblk_written 35 | FROM pg_stat_statements(false) s JOIN pg_database d ON s.dbid = d.oid WHERE userid != 10 AND calls > 4 GROUP BY 1, 2 ORDER BY 3 DESC LIMIT 128; 36 | 37 | ttl: 10 38 | timeout: 1 39 | min_version: 130000 40 | max_version: 170000 41 | tags: [ cluster, "extension:pg_stat_statements" ] 42 | metrics: 43 | - datname: { usage: LABEL ,description: Name of database } 44 | - query: { usage: LABEL ,description: QueryID generated from internal hash code, computed from the statement's parse tree } 45 | - calls: { usage: COUNTER ,description: Number of times the statement was executed } 46 | - rows: { usage: COUNTER ,description: Total number of rows retrieved or affected by the statement } 47 | - exec_time: { usage: COUNTER ,scale: 1e-3 ,description: Total time spent executing the statement, in seconds } 48 | - io_time: { usage: COUNTER ,scale: 1e-3 ,description: Total time the statement spent reading and writing blocks, in seconds } 49 | - wal_bytes: { usage: COUNTER ,description: Total amount of WAL bytes generated by the statement } 50 | - sblk_hit: { usage: COUNTER ,description: Total number of shared block cache hits by the statement } 51 | - sblk_read: { usage: COUNTER ,description: Total number of shared blocks read by the statement } 52 | - sblk_dirtied: { usage: COUNTER ,description: Total number of shared blocks dirtied by the statement } 53 | - sblk_written: { usage: COUNTER ,description: Total number of shared blocks written by the statement } 54 | 55 | pg_query_10: 56 | name: pg_query 57 | desc: PostgreSQL query statement metrics, require pg_stat_statements installed, 9.4 ~ 12 58 | query: |- 59 | SELECT datname, queryid AS query, sum(calls) AS calls, sum(rows) AS rows, sum(total_time) AS exec_time, sum(blk_read_time) + sum(blk_write_time) AS io_time, 60 | sum(shared_blks_hit) AS sblk_hit, sum(shared_blks_read) AS sblk_read, sum(shared_blks_dirtied) AS sblk_dirtied, sum(shared_blks_written) AS sblk_written 61 | FROM pg_stat_statements(false) s JOIN pg_database d ON s.dbid = d.oid WHERE userid != 10 AND calls > 4 GROUP BY 1, 2 ORDER BY 3 DESC LIMIT 128; 62 | 63 | ttl: 10 64 | timeout: 1 65 | min_version: 090400 66 | max_version: 130000 67 | tags: [ cluster, "extension:pg_stat_statements" ] 68 | metrics: 69 | - datname: { usage: LABEL ,description: Name of database } 70 | - query: { usage: LABEL ,description: QueryID generated from internal hash code, computed from the statement's parse tree } 71 | - calls: { usage: COUNTER ,description: Number of times the statement was executed } 72 | - rows: { usage: COUNTER ,description: Total number of rows retrieved or affected by the statement } 73 | - exec_time: { usage: COUNTER ,scale: 1e-3 ,description: Total time spent executing the statement, in seconds } 74 | - io_time: { usage: COUNTER ,scale: 1e-3 ,description: Total time the statement spent reading and writing blocks, in seconds } 75 | - sblk_hit: { usage: COUNTER ,description: Total number of shared block cache hits by the statement } 76 | - sblk_read: { usage: COUNTER ,description: Total number of shared blocks read by the statement } 77 | - sblk_dirtied: { usage: COUNTER ,description: Total number of shared blocks dirtied by the statement } 78 | - sblk_written: { usage: COUNTER ,description: Total number of shared blocks written by the statement } 79 | 80 | 81 | -------------------------------------------------------------------------------- /config/0510-pg_vacuuming.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0510 pg_vacuuming 3 | #==============================================================# 4 | pg_vacuuming_18: 5 | name: pg_vacuuming 6 | desc: PostgreSQL vacuum progress 18+ 7 | query: |- 8 | SELECT datname, pid, relid::RegClass AS relname, 9 | CASE phase WHEN 'scanning heap' THEN (CASE WHEN heap_blks_total > 0 THEN 1.0 * heap_blks_scanned / heap_blks_total ELSE 0.0 END) 10 | WHEN 'vacuuming heap' THEN (CASE WHEN heap_blks_total > 0 THEN 1.0 * heap_blks_vacuumed / heap_blks_total ELSE 0 END) ELSE NULL END AS progress, delay_time 11 | FROM pg_stat_progress_vacuum pspv; 12 | 13 | ttl: 10 14 | min_version: 180000 15 | tags: [ cluster, primary ] 16 | metrics: 17 | - datname: { usage: LABEL ,description: database name } 18 | - pid: { usage: LABEL ,description: process id of indexing table } 19 | - relname: { usage: LABEL ,description: relation name of indexed table } 20 | - progress: { usage: GAUGE ,description: the actual progress } 21 | - delay_time: { usage: COUNTER ,scale: 1e-3 ,description: Total time spent sleeping due to cost-based delay } 22 | 23 | 24 | pg_vacuuming_12: 25 | name: pg_vacuuming 26 | desc: PostgreSQL vacuum progress since 12 27 | query: |- 28 | SELECT 29 | datname, pid, relid::RegClass AS relname, 30 | CASE phase WHEN 'scanning heap' THEN (CASE WHEN heap_blks_total > 0 THEN 1.0 * heap_blks_scanned / heap_blks_total ELSE 0.0 END) 31 | WHEN 'vacuuming heap' THEN (CASE WHEN heap_blks_total > 0 THEN 1.0 * heap_blks_vacuumed / heap_blks_total ELSE 0 END) 32 | ELSE NULL END AS progress FROM pg_stat_progress_vacuum pspv; 33 | 34 | ttl: 10 35 | min_version: 120000 36 | max_version: 180000 37 | tags: [ cluster, primary ] 38 | metrics: 39 | - datname: { usage: LABEL, description: database name } 40 | - pid: { usage: LABEL, description: process id of indexing table } 41 | - relname: { usage: LABEL, description: relation name of indexed table } 42 | - progress: { usage: GAUGE, description: the actual progress } 43 | 44 | 45 | -------------------------------------------------------------------------------- /config/0520-pg_indexing.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0520 pg_indexing 3 | #==============================================================# 4 | pg_indexing: 5 | name: pg_indexing 6 | desc: PostgreSQL index creating progress (v12+) 7 | query: |- 8 | SELECT datname, pid, relid::RegClass AS relname, 9 | (CASE WHEN blocks_total > 0 THEN 1.0 * blocks_done / blocks_total ELSE NULL END) AS blocks, 10 | (CASE WHEN tuples_total > 0 THEN 1.0 * tuples_done / tuples_total ELSE NULL END) AS tuples, 11 | (CASE WHEN partitions_total > 0 THEN 1.0 * partitions_done / partitions_total ELSE NULL END) AS partitions, 12 | (CASE WHEN lockers_total > 0 THEN 1.0 * lockers_done / lockers_total ELSE NULL END) AS lockers 13 | FROM pg_stat_progress_create_index pspci; 14 | 15 | ttl: 10 16 | min_version: 120000 17 | tags: [ cluster, primary ] 18 | metrics: 19 | - datname: { usage: LABEL, description: Name of the database } 20 | - pid: { usage: LABEL, description: Process id of indexing table } 21 | - relname: { usage: LABEL, description: Relation name of indexed table } 22 | - blocks: { usage: GAUGE, description: Percent of blocks been proceeded } 23 | - tuples: { usage: GAUGE, description: Percent of tuples been proceeded } 24 | - partitions: { usage: GAUGE, description: Percent of partitions been proceeded } 25 | - lockers: { usage: GAUGE, description: Percent of lockers been proceeded } 26 | 27 | 28 | -------------------------------------------------------------------------------- /config/0530-pg_clustering.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0530 pg_clustering 3 | #==============================================================# 4 | pg_clustering: 5 | name: pg_clustering 6 | desc: PostgreSQL cluster or vacuum full progress (v12+) 7 | query: SELECT datname, pid, relid::RegClass AS relname, param4 AS tup_scan, CASE WHEN param6 > 0 THEN 1.0 * param7 / param6 ELSE 0 END AS progress FROM pg_stat_get_progress_info('cluster') s LEFT JOIN pg_database d ON s.datid = d.oid; 8 | ttl: 10 9 | min_version: 120000 10 | tags: [ cluster, primary ] 11 | metrics: 12 | - datname: { usage: LABEL, description: Name of databae been clustering } 13 | - pid: { usage: LABEL, description: Process id of indexing table } 14 | - relname: { usage: LABEL, description: Relation name of indexed table } 15 | - tup_scan: { usage: GAUGE, description: How much tuple been scanned } 16 | - progress: { usage: GAUGE, description: Progress of heap been processed } 17 | 18 | 19 | -------------------------------------------------------------------------------- /config/0540-pg_backup.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0540 pg_backup 3 | #==============================================================# 4 | pg_backup: 5 | name: pg_backup 6 | desc: PostgreSQL basebackup progress since 13 7 | query: SELECT pid, param1 AS phase, CASE param2 WHEN -1::integer THEN NULL::bigint ELSE param2 END AS total_bytes, param3 AS sent_bytes FROM pg_stat_get_progress_info('BASEBACKUP'); 8 | ttl: 10 9 | min_version: 130000 10 | tags: [ cluster ] 11 | metrics: 12 | - pid: { usage: LABEL, description: process id of basebackup sender } 13 | - phase: { usage: GAUGE, description: Phase encoded in 0~5 initial, wait checkpoint, estimate, streaming, waiting archive, transfer archive } 14 | - total_bytes: { usage: GAUGE, description: Total amount of data that will be streamed } 15 | - sent_bytes: { usage: GAUGE, description: Amount of data streamed } 16 | 17 | 18 | -------------------------------------------------------------------------------- /config/0620-pg_db_confl.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0620 pg_db_confl 3 | #==============================================================# 4 | # https://pgpedia.info/p/pg_stat_database_conflicts.html 5 | 6 | pg_db_confl_16: 7 | name: pg_db_confl 8 | desc: PostgreSQL database conflicts metrics for PG16+ 9 | query: SELECT * FROM pg_stat_database_conflicts; 10 | ttl: 10 11 | min_version: 160000 12 | tags: [ cluster, replica ] 13 | metrics: 14 | - datid: { usage: DISCARD } 15 | - datname: { usage: LABEL ,description: Name of this database } 16 | - confl_tablespace: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to dropped tablespaces } 17 | - confl_lock: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to lock timeouts } 18 | - confl_snapshot: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to old snapshots } 19 | - confl_bufferpin: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to pinned buffers } 20 | - confl_deadlock: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to deadlocks } 21 | - confl_active_logicalslot: { usage: COUNTER ,description: Number of uses of logical slots in this database that have been canceled due to old snapshots or too low a wal_level on the primary } 22 | 23 | pg_db_confl_15: 24 | name: pg_db_confl 25 | desc: PostgreSQL database conflicts metrics for pg 9.1 - 16 26 | query: SELECT * FROM pg_stat_database_conflicts; 27 | ttl: 10 28 | min_version: 90100 29 | max_version: 160000 30 | tags: [ cluster, replica ] 31 | metrics: 32 | - datid: { usage: DISCARD } 33 | - datname: { usage: LABEL ,description: Name of this database } 34 | - confl_tablespace: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to dropped tablespaces } 35 | - confl_lock: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to lock timeouts } 36 | - confl_snapshot: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to old snapshots } 37 | - confl_bufferpin: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to pinned buffers } 38 | - confl_deadlock: { usage: COUNTER ,description: Number of queries in this database that have been canceled due to deadlocks } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config/0640-pg_pubrel.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0640 pg_pubrel 3 | #==============================================================# 4 | pg_pubrel: 5 | name: pg_pubrel 6 | desc: PostgreSQL publication and relation count 7 | query: SELECT CURRENT_CATALOG AS datname, pubname, count(*) AS count FROM pg_publication p, LATERAL pg_get_publication_tables(pubname) GROUP BY pubname; 8 | ttl: 10 9 | min_version: 100000 10 | metrics: 11 | - datname: { usage: LABEL ,description: Name of the database which publication belonged } 12 | - pubname: { usage: LABEL ,description: Name of the publication } 13 | - count: { usage: GAUGE ,description: Count of relation in the publication } 14 | 15 | 16 | -------------------------------------------------------------------------------- /config/0650-pg_subrel.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0650 pg_subrel 3 | #==============================================================# 4 | pg_subrel: 5 | name: pg_subrel 6 | desc: PostgreSQL subscripted relation group by state 7 | query: SELECT CURRENT_CATALOG AS datname, subname, srsubstate::TEXT AS state, count(*) AS count FROM pg_subscription_rel sr LEFT JOIN pg_stat_subscription ss ON sr.srsubid = ss.subid GROUP BY 2, 3; 8 | ttl: 10 9 | min_version: 100000 10 | metrics: 11 | - datname: { usage: LABEL ,description: Name of the database which publication belonged } 12 | - subname: { usage: LABEL ,description: Name of the subscription } 13 | - state: { usage: LABEL ,description: State of table in subscription, i=initialize, d=data copy, s=sync, r=ready } 14 | - count: { usage: GAUGE ,description: Count of relation in this subscription and corresponding state } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0710-pg_index.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0710 pg_index 3 | #==============================================================# 4 | pg_index: 5 | name: pg_index 6 | desc: PostgreSQL index metrics 7 | query: |- 8 | SELECT CURRENT_CATALOG AS datname, schemaname || '.' || indexrelname AS idxname, schemaname || '.' || relname AS relname ,indexrelid AS relid, 9 | relpages, reltuples, idx_scan, idx_tup_read, idx_tup_fetch, idx_blks_read, idx_blks_hit 10 | FROM pg_stat_user_indexes psui, LATERAL (SELECT idx_blks_read, idx_blks_hit FROM pg_statio_user_indexes psio WHERE psio.indexrelid = psui.indexrelid LIMIT 1) p2, 11 | LATERAL (SELECT relpages,reltuples FROM pg_class c WHERE c.oid = psui.indexrelid LIMIT 1) p3 12 | WHERE schemaname !~ '^pg_' AND schemaname !~ '^_' AND schemaname !~ '^timescaledb' AND schemaname !~ '^citus' AND schemaname !~ '^columnar' AND schemaname NOT IN ('pg_catalog','information_schema','pg_toast','repack','monitor') 13 | ORDER BY idx_tup_read DESC LIMIT 512; 14 | 15 | ttl: 10 16 | timeout: 1 17 | min_version: 090400 18 | metrics: 19 | - datname: { usage: LABEL ,description: Database name of this index } 20 | - idxname: { usage: LABEL ,description: Name of this index (full-qualifed schema name) } 21 | - relname: { usage: LABEL ,description: Name of the table for this index (full-qualifed schema name) } 22 | - relid: { usage: LABEL ,description: Relation oid of this index } 23 | - relpages: { usage: GAUGE ,description: Size of the on-disk representation of this index in pages } 24 | - reltuples: { usage: GAUGE ,description: Estimate relation tuples } 25 | - idx_scan: { usage: COUNTER ,description: Number of index scans initiated on this index } 26 | - idx_tup_read: { usage: COUNTER ,description: Number of index entries returned by scans on this index } 27 | - idx_tup_fetch: { usage: COUNTER ,description: Number of live table rows fetched by simple index scans using this index } 28 | - idx_blks_read: { usage: COUNTER ,description: Number of disk blocks read from this index } 29 | - idx_blks_hit: { usage: COUNTER ,description: Number of buffer hits in this index } 30 | 31 | 32 | -------------------------------------------------------------------------------- /config/0720-pg_func.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0720 pg_func 3 | #==============================================================# 4 | pg_func: 5 | desc: PostgreSQL function metrics 6 | query: SELECT CURRENT_CATALOG AS datname, schemaname || '.' || funcname AS funcname, sum(calls) AS calls, sum(total_time) AS total_time, sum(self_time) AS self_time FROM pg_stat_user_functions GROUP BY 2 ORDER BY 4 DESC LIMIT 128; 7 | ttl: 10 8 | min_version: 090400 9 | metrics: 10 | - datname: { usage: LABEL ,description: Name of belonged database } 11 | - funcname: { usage: LABEL ,description: Name of this function, may have multiple override } 12 | - calls: { usage: COUNTER ,description: Number of times this function has been called } 13 | - total_time: { usage: COUNTER ,description: Total time spent in this function and all other functions called by it, in ms } 14 | - self_time: { usage: COUNTER ,description: Total time spent in this function itself, not including other functions called by it, in ms } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0730-pg_seq.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0730 pg_seq 3 | #==============================================================# 4 | pg_seq: 5 | desc: PostgreSQL sequence metrics 6 | query: SELECT CURRENT_CATALOG AS datname, schemaname || '.' || sequencename AS seqname, last_value, blks_read, blks_hit FROM pg_sequences s, LATERAL (SELECT relid, blks_read, blks_hit FROM pg_statio_all_sequences sio WHERE s.schemaname = sio.schemaname AND s.sequencename = sio.relname LIMIT 1) d LIMIT 128; 7 | ttl: 10 8 | min_version: 100000 9 | metrics: 10 | - datname: { usage: LABEL ,description: Database name of this sequence } 11 | - seqname: { usage: LABEL ,description: Fully schema qualified sequence name } 12 | - last_value: { usage: COUNTER ,description: The last sequence value written to disk } 13 | - blks_read: { usage: COUNTER ,description: Number of disk blocks read from this sequence } 14 | - blks_hit: { usage: COUNTER ,description: Number of buffer hits in this sequence } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0740-pg_relkind.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0740 pg_relkind 3 | #==============================================================# 4 | pg_relkind: 5 | name: pg_relkind 6 | desc: Postgres relation count by kind (category, r,i,m,t,...) 7 | query: SELECT CURRENT_CATALOG AS datname, relkind, count(*) AS count FROM pg_class GROUP BY relkind; 8 | ttl: 60 9 | timeout: 1 10 | min_version: 090400 11 | metrics: 12 | - datname: { usage: LABEL ,description: Name of database } 13 | - relkind: { usage: LABEL ,description: Kind of this relation, could be r,i,S,t,v,m,c,f,p,I } 14 | - count: { usage: GAUGE ,description: Number of relations of corresponding relkind } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0750-pg_defpart.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0750 pg_defpart 3 | #==============================================================# 4 | pg_defpart: 5 | name: pg_defpart 6 | desc: PostgreSQL default partition tuples 7 | query: SELECT CURRENT_CATALOG AS datname, relnamespace::RegNamespace || '.' || relname AS relname, reltuples AS tuples FROM pg_class WHERE relpartbound IS NOT NULL AND pg_catalog.pg_get_expr(relpartbound, oid) = 'DEFAULT' ORDER BY reltuples DESC LIMIT 64; 8 | ttl: 60 9 | timeout: 1 10 | min_version: 110000 11 | metrics: 12 | - datname: { usage: LABEL ,description: Database name of this default partition } 13 | - relname: { usage: LABEL ,description: Schema qualified default partition relation name } 14 | - tuples: { usage: GAUGE ,description: Number of tuples in this default partition } 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/0810-pg_table_size.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0810 pg_table_size 3 | #==============================================================# 4 | pg_table_size: 5 | desc: PostgreSQL table size metrics, quite slow 6 | query: |- 7 | SELECT CURRENT_CATALOG AS datname, nsp.nspname || '.' || rel.relname AS relname, 8 | pg_total_relation_size(rel.oid) AS bytes, 9 | pg_relation_size(rel.oid) AS relsize, 10 | pg_indexes_size(rel.oid) AS indexsize, 11 | pg_total_relation_size(reltoastrelid) AS toastsize 12 | FROM pg_namespace nsp JOIN pg_class rel ON nsp.oid = rel.relnamespace 13 | WHERE nspname <> ALL(ARRAY['pg_catalog', 'information_schema']) AND rel.relkind = 'r' 14 | ORDER BY 3 DESC NULLS LAST LIMIT 256; 15 | 16 | ttl: 300 17 | timeout: 2 18 | min_version: 100000 19 | metrics: 20 | - datname: { usage: LABEL ,description: Database name of this table } 21 | - relname: { usage: LABEL ,description: Schema qualified table name } 22 | - bytes: { usage: GAUGE ,default: 0 ,description: Total bytes of this table (including toast, index, toast index) } 23 | - relsize: { usage: GAUGE ,default: 0 ,description: Bytes of this table itself (main, vm, fsm) } 24 | - indexsize: { usage: GAUGE ,default: 0 ,description: Bytes of all related indexes of this table } 25 | - toastsize: { usage: GAUGE ,default: 0 ,description: Bytes of toast tables of this table } 26 | 27 | 28 | -------------------------------------------------------------------------------- /config/0820-pg_table_bloat.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0820 pg_table_bloat 3 | #==============================================================# 4 | # pg_table_bloat require auxiliary view to work. Disable it or create auxiliary view before use: 5 | pg_table_bloat: 6 | name: pg_table_bloat 7 | desc: PostgreSQL table bloat metrics, require auxiliary view pg_table_bloat to work 8 | query: SELECT datname, nspname || '.' || relname AS relname, size, ratio FROM pg_table_bloat ORDER BY size DESC LIMIT 64; 9 | ttl: 300 10 | timeout: 2 11 | min_version: 090400 12 | skip: true 13 | metrics: 14 | - datname: { usage: LABEL ,description: Database name of this table } 15 | - relname: { usage: LABEL ,description: Schema qualified name of this table } 16 | - size: { usage: GAUGE ,description: Total bytes of this table } 17 | - ratio: { usage: GAUGE ,description: Estimated bloat ratio of this table from 0 to 1 } 18 | 19 | 20 | -------------------------------------------------------------------------------- /config/0830-pg_index_bloat.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0830 pg_index_bloat 3 | #==============================================================# 4 | # pg_index_bloat require auxiliary view to work. Disable it or create auxiliary view before use: 5 | pg_index_bloat: 6 | name: pg_index_bloat 7 | desc: PostgreSQL index bloat metrics (btree only), require pg_index_bloat 8 | query: SELECT datname, nspname || '.' || relname AS relname, size, ratio FROM pg_index_bloat ORDER BY size DESC LIMIT 64; 9 | ttl: 300 10 | timeout: 2 11 | min_version: 090400 12 | skip: true 13 | metrics: 14 | - datname: { usage: LABEL ,description: Database name of this index } 15 | - relname: { usage: LABEL ,description: Schema qualified index name } 16 | - size: { usage: GAUGE ,description: Total bytes of this index } 17 | - ratio: { usage: GAUGE ,description: Estimated bloat ratio of this index, 0~1 } 18 | 19 | 20 | -------------------------------------------------------------------------------- /config/0910-pgbouncer_list.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0910 pgbouncer_list 3 | #==============================================================# 4 | # http://www.pgbouncer.org/usage.html#show-lists 5 | pgbouncer_list: 6 | name: pgbouncer_list 7 | desc: Pgbouncer entry list 8 | query: SHOW LISTS; 9 | ttl: 10 10 | min_version: 10800 11 | fatal: true 12 | tags: [ pgbouncer ] 13 | metrics: 14 | - list: { usage: LABEL ,description: Pgbouncer internal list name } 15 | - items: { usage: GAUGE ,description: Number of corresponding pgbouncer object } 16 | 17 | 18 | -------------------------------------------------------------------------------- /config/0920-pgbouncer_database.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0920 pgbouncer_database 3 | #==============================================================# 4 | # http://www.pgbouncer.org/usage.html#show-databases 5 | pgbouncer_database_124: 6 | name: pgbouncer_database 7 | desc: Pgbouncer database stats (since 1.24) 8 | query: SHOW DATABASES; 9 | ttl: 10 10 | min_version: 12400 11 | tags: [ pgbouncer ] 12 | metrics: 13 | - name: { usage: LABEL ,rename: datname ,description: Name of configured database entry } 14 | - host: { usage: LABEL ,description: Host that pgbouncer will connects to } 15 | - port: { usage: LABEL ,description: Port that pgbouncer will connects to } 16 | - database: { usage: LABEL ,rename: real_datname ,description: The real database name pgbouncer connects to } 17 | - force_user: { usage: DISCARD } 18 | - pool_size: { usage: GAUGE ,description: Maximum number of server connections } 19 | - min_pool_size: { usage: GAUGE ,description: Minimum number of server connections } 20 | - reserve_pool_size: { usage: GAUGE ,rename: reserve_pool ,description: Maximum number of additional connections for this database } 21 | - server_lifetime: { usage: GAUGE ,description: The maximum lifetime of a server connection for this database } 22 | - pool_mode: { usage: DISCARD } 23 | - load_balance_hosts: { usage: DISCARD } 24 | - max_connections: { usage: GAUGE ,description: Maximum number of allowed connections for this database } 25 | - current_connections: { usage: GAUGE ,description: Current number of connections for this database } 26 | - max_client_connections: { usage: GAUGE ,description: Maximum number of allowed client connections for this pgbouncer instance } 27 | - current_client_connections: { usage: GAUGE ,description: Current number of client connections for this database } 28 | - paused: { usage: GAUGE ,description: True(1) if this database is currently paused, else 0 } 29 | - disabled: { usage: GAUGE ,description: True(1) if this database is currently disabled, else 0 } 30 | 31 | pgbouncer_database_123: 32 | name: pgbouncer_database 33 | desc: Pgbouncer database stats 1.23 34 | query: SHOW DATABASES; 35 | ttl: 10 36 | min_version: 12300 37 | max_version: 12400 38 | tags: [ pgbouncer ] 39 | metrics: 40 | - name: { usage: LABEL ,rename: datname ,description: Name of configured database entry } 41 | - host: { usage: LABEL ,description: Host that pgbouncer will connects to } 42 | - port: { usage: LABEL ,description: Port that pgbouncer will connects to } 43 | - database: { usage: LABEL ,rename: real_datname ,description: The real database name pgbouncer connects to } 44 | - force_user: { usage: DISCARD } 45 | - pool_size: { usage: GAUGE ,description: Maximum number of server connections } 46 | - min_pool_size: { usage: GAUGE ,description: Minimum number of server connections } 47 | - reserve_pool: { usage: GAUGE ,description: Maximum number of additional connections for this database } 48 | - server_lifetime: { usage: GAUGE ,description: The maximum lifetime of a server connection for this database } 49 | - pool_mode: { usage: DISCARD } 50 | - max_connections: { usage: GAUGE ,description: Maximum number of allowed connections for this database } 51 | - current_connections: { usage: GAUGE ,description: Current number of connections for this database } 52 | - paused: { usage: GAUGE ,description: True(1) if this database is currently paused, else 0 } 53 | - disabled: { usage: GAUGE ,description: True(1) if this database is currently disabled, else 0 } 54 | 55 | pgbouncer_database_116: 56 | name: pgbouncer_database 57 | desc: Pgbouncer database stats (1.16-1.22) 58 | query: SHOW DATABASES; 59 | ttl: 10 60 | min_version: 11600 61 | max_version: 12300 62 | tags: [ pgbouncer ] 63 | metrics: 64 | - name: { usage: LABEL ,rename: datname ,description: Name of configured database entry } 65 | - host: { usage: LABEL ,description: Host that pgbouncer will connects to } 66 | - port: { usage: LABEL ,description: Port that pgbouncer will connects to } 67 | - database: { usage: LABEL ,rename: real_datname ,description: The real database name pgbouncer connects to } 68 | - force_user: { usage: DISCARD } 69 | - pool_size: { usage: GAUGE ,description: Maximum number of server connections } 70 | - min_pool_size: { usage: GAUGE ,description: Minimum number of server connections } 71 | - reserve_pool: { usage: GAUGE ,description: Maximum number of additional connections for this database } 72 | - pool_mode: { usage: DISCARD } 73 | - max_connections: { usage: GAUGE ,description: Maximum number of allowed connections for this database } 74 | - current_connections: { usage: GAUGE ,description: Current number of connections for this database } 75 | - paused: { usage: GAUGE ,description: True(1) if this database is currently paused, else 0 } 76 | - disabled: { usage: GAUGE ,description: True(1) if this database is currently disabled, else 0 } 77 | 78 | pgbouncer_database_108: 79 | name: pgbouncer_database 80 | desc: Pgbouncer database stats (1.08-1.15) 81 | query: SHOW DATABASES; 82 | ttl: 10 83 | min_version: 10800 84 | max_version: 11600 85 | tags: [ pgbouncer ] 86 | metrics: 87 | - name: { usage: LABEL ,rename: datname ,description: Name of configured database entry } 88 | - host: { usage: LABEL ,description: Host that pgbouncer will connects to } 89 | - port: { usage: LABEL ,description: Port that pgbouncer will connects to } 90 | - database: { usage: LABEL ,rename: real_datname ,description: The real database name pgbouncer connects to } 91 | - force_user: { usage: DISCARD } 92 | - pool_size: { usage: COUNTER ,description: Maximum number of server connections } 93 | - reserve_pool: { usage: GAUGE ,description: Maximum number of additional connections for this database } 94 | - pool_mode: { usage: DISCARD } 95 | - max_connections: { usage: GAUGE ,description: Maximum number of allowed connections for this database } 96 | - current_connections: { usage: GAUGE ,description: Current number of connections for this database } 97 | - paused: { usage: GAUGE ,description: True(1) if this database is currently paused, else 0 } 98 | - disabled: { usage: GAUGE ,description: True(1) if this database is currently disabled, else 0 } 99 | 100 | 101 | -------------------------------------------------------------------------------- /config/0930-pgbouncer_stat.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0930 pgbouncer_stat 3 | #==============================================================# 4 | # http://www.pgbouncer.org/usage.html#show-stats 5 | pgbouncer_stat_124: 6 | name: pgbouncer_stat 7 | desc: Pgbouncer stats per database (since 1.24) 8 | query: SHOW STATS; 9 | ttl: 10 10 | min_version: 12400 11 | tags: [ pgbouncer ] 12 | metrics: 13 | - database: { usage: LABEL ,rename: datname ,description: Name of database } 14 | - total_xact_count: { usage: COUNTER ,description: Total number of SQL transactions pooled by pgbouncer } 15 | - total_query_count: { usage: COUNTER ,description: Total number of SQL queries pooled by pgbouncer } 16 | - total_server_assignment_count: { usage: COUNTER ,description: Total times a server was assigned to a client } 17 | - total_received: { usage: COUNTER ,description: Total volume in bytes of network traffic received by pgbouncer } 18 | - total_sent: { usage: COUNTER ,description: Total volume in bytes of network traffic sent by pgbouncer } 19 | - total_xact_time: { usage: COUNTER ,scale: 1e-6 ,description: Total number of seconds spent when in a transaction } 20 | - total_query_time: { usage: COUNTER ,scale: 1e-6 ,description: Total number of seconds spent when executing queries } 21 | - total_wait_time: { usage: COUNTER ,scale: 1e-6 ,description: Time spent by clients waiting for a server, in seconds } 22 | - total_client_parse_count: { usage: COUNTER ,description: Total number of prepared statements created by clients } 23 | - total_server_parse_count: { usage: COUNTER ,description: Total number of prepared statements created on a server. } 24 | - total_bind_count: { usage: COUNTER ,description: Total number of prepared statements readied for execution by clients and forwarded to postgres } 25 | - avg_xact_count: { usage: GAUGE ,description: Average transactions per second in last stat period } 26 | - avg_query_count: { usage: GAUGE ,description: Average queries per second in last stat period } 27 | - avg_server_assignment_count: { usage: GAUGE ,description: Average number of times a server as assigned to a client per second in the last stat period. } 28 | - avg_recv: { usage: GAUGE ,description: Average received (from clients) bytes per second } 29 | - avg_sent: { usage: GAUGE ,description: Average sent (to clients) bytes per second } 30 | - avg_xact_time: { usage: GAUGE ,scale: 1e-6 ,description: Average transaction duration, in seconds } 31 | - avg_query_time: { usage: GAUGE ,scale: 1e-6 ,description: Average query duration, in seconds } 32 | - avg_wait_time: { usage: GAUGE ,scale: 1e-6 ,description: Time spent by clients waiting for a server, in seconds (average per second). } 33 | - avg_client_parse_count: { usage: COUNTER ,description: Average number of prepared statements created by clients } 34 | - avg_server_parse_count: { usage: COUNTER ,description: Average number of prepared statements created on a server. } 35 | - avg_bind_count: { usage: COUNTER ,description: Average number of prepared statements readied for execution by clients and forwarded to postgres } 36 | 37 | pgbouncer_stat_123: 38 | name: pgbouncer_stat 39 | desc: Pgbouncer stats per database (1.23) 40 | query: SHOW STATS; 41 | ttl: 10 42 | min_version: 12300 43 | max_version: 12400 44 | tags: [ pgbouncer ] 45 | metrics: 46 | - database: { usage: LABEL ,rename: datname ,description: Name of database } 47 | - total_xact_count: { usage: COUNTER ,description: Total number of SQL transactions pooled by pgbouncer } 48 | - total_query_count: { usage: COUNTER ,description: Total number of SQL queries pooled by pgbouncer } 49 | - total_server_assignment_count: { usage: COUNTER ,description: Total times a server was assigned to a client } 50 | - total_received: { usage: COUNTER ,description: Total volume in bytes of network traffic received by pgbouncer } 51 | - total_sent: { usage: COUNTER ,description: Total volume in bytes of network traffic sent by pgbouncer } 52 | - total_xact_time: { usage: COUNTER ,scale: 1e-6 ,description: Total number of seconds spent when in a transaction } 53 | - total_query_time: { usage: COUNTER ,scale: 1e-6 ,description: Total number of seconds spent when executing queries } 54 | - total_wait_time: { usage: COUNTER ,scale: 1e-6 ,description: Time spent by clients waiting for a server, in seconds } 55 | - avg_xact_count: { usage: GAUGE ,description: Average transactions per second in last stat period } 56 | - avg_query_count: { usage: GAUGE ,description: Average queries per second in last stat period } 57 | - avg_server_assignment_count: { usage: GAUGE ,description: Average number of times a server as assigned to a client per second in the last stat period. } 58 | - avg_recv: { usage: GAUGE ,description: Average received (from clients) bytes per second } 59 | - avg_sent: { usage: GAUGE ,description: Average sent (to clients) bytes per second } 60 | - avg_xact_time: { usage: GAUGE ,scale: 1e-6 ,description: Average transaction duration, in seconds } 61 | - avg_query_time: { usage: GAUGE ,scale: 1e-6 ,description: Average query duration, in seconds } 62 | - avg_wait_time: { usage: GAUGE ,scale: 1e-6 ,description: Time spent by clients waiting for a server, in seconds (average per second). } 63 | 64 | pgbouncer_stat_108: 65 | name: pgbouncer_stat 66 | desc: Pgbouncer stats per database (1.08 - 1.22) 67 | query: SHOW STATS; 68 | ttl: 10 69 | min_version: 10800 70 | max_version: 12300 71 | tags: [ pgbouncer ] 72 | metrics: 73 | - database: { usage: LABEL ,rename: datname ,description: Name of database } 74 | - total_xact_count: { usage: COUNTER ,description: Total number of SQL transactions pooled by pgbouncer } 75 | - total_query_count: { usage: COUNTER ,description: Total number of SQL queries pooled by pgbouncer } 76 | - total_received: { usage: COUNTER ,description: Total volume in bytes of network traffic received by pgbouncer } 77 | - total_sent: { usage: COUNTER ,description: Total volume in bytes of network traffic sent by pgbouncer } 78 | - total_xact_time: { usage: COUNTER ,scale: 1e-6 ,description: Total number of seconds spent when in a transaction } 79 | - total_query_time: { usage: COUNTER ,scale: 1e-6 ,description: Total number of seconds spent when executing queries } 80 | - total_wait_time: { usage: COUNTER ,scale: 1e-6 ,description: Time spent by clients waiting for a server, in seconds } 81 | - avg_xact_count: { usage: GAUGE ,description: Average transactions per second in last stat period } 82 | - avg_query_count: { usage: GAUGE ,description: Average queries per second in last stat period } 83 | - avg_recv: { usage: GAUGE ,description: Average received (from clients) bytes per second } 84 | - avg_sent: { usage: GAUGE ,description: Average sent (to clients) bytes per second } 85 | - avg_xact_time: { usage: GAUGE ,scale: 1e-6 ,description: Average transaction duration, in seconds } 86 | - avg_query_time: { usage: GAUGE ,scale: 1e-6 ,description: Average query duration, in seconds } 87 | - avg_wait_time: { usage: GAUGE ,scale: 1e-6 ,description: Time spent by clients waiting for a server, in seconds (average per second). } 88 | 89 | 90 | -------------------------------------------------------------------------------- /config/0940-pgbouncer_pool.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 0940 pgbouncer_pool 3 | #==============================================================# 4 | # http://www.pgbouncer.org/usage.html#show-pools 5 | pgbouncer_pool_124: 6 | name: pgbouncer_pool 7 | desc: Pgbouncer pool stats (1.24+) 8 | query: SHOW POOLS; 9 | ttl: 10 10 | min_version: 12400 11 | tags: [ pgbouncer ] 12 | metrics: 13 | - database: { usage: LABEL, rename: datname, description: Database name of this pool } 14 | - user: { usage: LABEL, description: User name of this pool } 15 | - cl_active: { usage: GAUGE, rename: active_clients, description: Client connections that are linked to server connection and can process queries } 16 | - cl_waiting: { usage: GAUGE, rename: waiting_clients, description: Client connections that have sent queries but have not yet got a server connection } 17 | - cl_active_cancel_req: { usage: GAUGE, rename: active_cancel_clients, description: Client connections that have forwarded query cancellations to the server and are waiting for the server response. } 18 | - cl_waiting_cancel_req: { usage: GAUGE, rename: cancel_clients, description: Client connections that have not forwarded query cancellations to the server yet. } 19 | - sv_active: { usage: GAUGE, rename: active_servers, description: Server connections that are linked to a client } 20 | - sv_active_cancel: { usage: GAUGE, rename: active_cancel_servers, description: Server connections that are currently forwarding a cancel request } 21 | - sv_being_canceled: { usage: GAUGE, rename: cancel_servers, description: cancel requests have completed that were sent to cancel a query on this server } 22 | - sv_idle: { usage: GAUGE, rename: idle_servers, description: Server connections that are unused and immediately usable for client queries } 23 | - sv_used: { usage: GAUGE, rename: used_servers, description: Server connections that have been idle for more than server_check_delay (means have to run check query) } 24 | - sv_tested: { usage: GAUGE, rename: tested_servers, description: Server connections that are currently running reset or check query } 25 | - sv_login: { usage: GAUGE, rename: login_servers, description: Server connections currently in the process of logging in } 26 | - maxwait: { usage: GAUGE, description: How long the first(oldest) client in the queue has waited, in seconds, key metric } 27 | - maxwait_us: { usage: GAUGE, description: Microsecond part of the maximum waiting time. } 28 | - pool_mode: { usage: LABEL, description: Pooling mode in use } 29 | - load_balance_hosts: { usage: LABEL, description: The load_balance_hosts in use } 30 | 31 | pgbouncer_pool_118: 32 | name: pgbouncer_pool 33 | desc: Pgbouncer pool stats (1.18-1.23) 34 | query: SHOW POOLS; 35 | ttl: 10 36 | min_version: 11800 37 | max_version: 12400 38 | tags: [ pgbouncer ] 39 | metrics: 40 | - database: { usage: LABEL, rename: datname, description: Database name of this pool } 41 | - user: { usage: LABEL, description: User name of this pool } 42 | - cl_active: { usage: GAUGE, rename: active_clients, description: Client connections that are linked to server connection and can process queries } 43 | - cl_waiting: { usage: GAUGE, rename: waiting_clients, description: Client connections that have sent queries but have not yet got a server connection } 44 | - cl_active_cancel_req: { usage: GAUGE, rename: active_cancel_clients, description: Client connections that have forwarded query cancellations to the server and are waiting for the server response. } 45 | - cl_waiting_cancel_req: { usage: GAUGE, rename: cancel_clients, description: Client connections that have not forwarded query cancellations to the server yet. } 46 | - sv_active: { usage: GAUGE, rename: active_servers, description: Server connections that are linked to a client } 47 | - sv_active_cancel: { usage: GAUGE, rename: active_cancel_servers, description: Server connections that are currently forwarding a cancel request } 48 | - sv_being_canceled: { usage: GAUGE, rename: cancel_servers, description: cancel requests have completed that were sent to cancel a query on this server } 49 | - sv_idle: { usage: GAUGE, rename: idle_servers, description: Server connections that are unused and immediately usable for client queries } 50 | - sv_used: { usage: GAUGE, rename: used_servers, description: Server connections that have been idle for more than server_check_delay (means have to run check query) } 51 | - sv_tested: { usage: GAUGE, rename: tested_servers, description: Server connections that are currently running reset or check query } 52 | - sv_login: { usage: GAUGE, rename: login_servers, description: Server connections currently in the process of logging in } 53 | - maxwait: { usage: GAUGE, description: How long the first(oldest) client in the queue has waited, in seconds, key metric } 54 | - maxwait_us: { usage: GAUGE, description: Microsecond part of the maximum waiting time. } 55 | - pool_mode: { usage: LABEL, description: Pooling mode in use } 56 | 57 | pgbouncer_pool_116: 58 | name: pgbouncer_pool 59 | desc: Pgbouncer pool stats (1.16-1.17) 60 | query: SHOW POOLS; 61 | ttl: 10 62 | min_version: 11600 63 | max_version: 11800 64 | tags: [ pgbouncer ] 65 | metrics: 66 | - database: { usage: LABEL, rename: datname, description: Database name of this pool } 67 | - user: { usage: LABEL, description: User name of this pool } 68 | - cl_active: { usage: GAUGE, rename: active_clients, description: Client connections that are linked to server connection and can process queries } 69 | - cl_waiting: { usage: GAUGE, rename: waiting_clients, description: Client connections that have sent queries but have not yet got a server connection } 70 | - cl_cancel_req: { usage: GAUGE, rename: cancel_clients, description: Client connections that have not forwarded query cancellations to the server yet. } 71 | - sv_active: { usage: GAUGE, rename: active_servers, description: Server connections that are linked to a client } 72 | - sv_idle: { usage: GAUGE, rename: idle_servers, description: Server connections that are unused and immediately usable for client queries } 73 | - sv_used: { usage: GAUGE, rename: used_servers, description: Server connections that have been idle for more than server_check_delay (means have to run check query) } 74 | - sv_tested: { usage: GAUGE, rename: tested_servers, description: Server connections that are currently running reset or check query } 75 | - sv_login: { usage: GAUGE, rename: login_servers, description: Server connections currently in the process of logging in } 76 | - maxwait: { usage: GAUGE, description: How long the first(oldest) client in the queue has waited, in seconds, key metric } 77 | - maxwait_us: { usage: GAUGE, description: Microsecond part of the maximum waiting time. } 78 | - pool_mode: { usage: LABEL, description: Pooling mode in use } 79 | 80 | pgbouncer_pool_108: 81 | name: pgbouncer_pool 82 | desc: Pgbouncer pool stats (1.08-1.15) 83 | query: SHOW POOLS; 84 | ttl: 10 85 | min_version: 10800 86 | max_version: 11600 87 | tags: [ pgbouncer ] 88 | metrics: 89 | - database: { usage: LABEL, rename: datname, description: Database name of this pool } 90 | - user: { usage: LABEL, description: User name of this pool } 91 | - cl_active: { usage: GAUGE, rename: active_clients, description: Client connections that are linked to server connection and can process queries } 92 | - cl_waiting: { usage: GAUGE, rename: waiting_clients, description: Client connections that have sent queries but have not yet got a server connection } 93 | - sv_active: { usage: GAUGE, rename: active_servers, description: Server connections that are linked to a client } 94 | - sv_idle: { usage: GAUGE, rename: idle_servers, description: Server connections that are unused and immediately usable for client queries } 95 | - sv_used: { usage: GAUGE, rename: used_servers, description: Server connections that have been idle for more than server_check_delay (means have to run check query) } 96 | - sv_tested: { usage: GAUGE, rename: tested_servers, description: Server connections that are currently running reset or check query } 97 | - sv_login: { usage: GAUGE, rename: login_servers, description: Server connections currently in the process of logging in } 98 | - maxwait: { usage: GAUGE, description: How long the first(oldest) client in the queue has waited, in seconds, key metric } 99 | - maxwait_us: { usage: GAUGE, description: Microsecond part of the maximum waiting time. } 100 | - pool_mode: { usage: LABEL, description: Pooling mode in use } 101 | 102 | 103 | -------------------------------------------------------------------------------- /config/1000-pg_wait_event.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 1000 pg_wait_event 3 | #==============================================================# 4 | pg_wait_event: 5 | name: pg_wait_event 6 | desc: PostgreSQL wait event sampling based on pg_wait_sampling extension 7 | query: SELECT coalesce(event_type, 'Running') AS etype, coalesce(event, 'Running') AS event, sum(count) AS count FROM pg_wait_sampling_profile GROUP BY 1,2; 8 | ttl: 10 9 | min_version: 100000 10 | tags: [ cluster, "extension:pg_wait_sampling" ] 11 | metrics: 12 | - etype: { usage: "LABEL" ,description: "wait event type" } 13 | - event: { usage: "LABEL" ,description: "wait event name" } 14 | - count: { usage: "COUNTER" ,description: "Total count of wait events sampled" } 15 | 16 | pg_wait_event_1s: 17 | name: pg_wait_event_1s 18 | desc: PostgreSQL wait event sampling based on pg_wait_sampling extension 19 | query: SELECT coalesce(event_type, 'Running') AS etype, coalesce(event, 'Running') AS event, count(*) FROM pg_wait_sampling_history WHERE ts BETWEEN now() - '1s'::INTERVAL AND now() GROUP BY 1,2; 20 | ttl: 10 21 | min_version: 100000 22 | tags: [ cluster, "extension:pg_wait_sampling" ] 23 | metrics: 24 | - etype: { usage: "LABEL" ,description: "wait event type" } 25 | - event: { usage: "LABEL" ,description: "wait event name" } 26 | - count: { usage: "GAUGE" ,description: "Number of wait events in last second" } 27 | 28 | 29 | -------------------------------------------------------------------------------- /config/1800-pg_tsdb_hypertable.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 1800 pg_tsdb_hypertable 3 | #==============================================================# 4 | # this collector reqires timescaledb extension to be installed 5 | pg_tsdb_hypertable: 6 | name: pg_tsdb_hypertable 7 | desc: TimescaleDB hypertable overview 8 | query: |- 9 | SELECT 10 | current_database() AS datname, 11 | format('%I.%I', hypertable_schema, hypertable_name) AS relname, 12 | num_dimensions AS dimensions, num_chunks AS chunks, 13 | compression_enabled::BOOLEAN::int AS compressed, 14 | hypertable_size(format('"%I"."%I"', hypertable_schema, hypertable_name)::RegClass) AS bytes 15 | FROM timescaledb_information.hypertables; 16 | 17 | ttl: 60 18 | timeout: 2 19 | min_version: 100000 20 | tags: [ "extension:timescaledb", "schema:timescaledb_information" ] 21 | metrics: 22 | - datname: { usage: LABEL ,description: database name } 23 | - relname: { usage: LABEL ,description: Hypertable relation name } 24 | - dimensions: { usage: GAUGE ,description: Number of partitioning dimensions } 25 | - chunks: { usage: GAUGE ,description: Total chunks of this hypertable } 26 | - compressed: { usage: GAUGE ,description: 1 if compression enabled } 27 | - bytes: { usage: GAUGE ,description: Total size of hypertable in bytes } 28 | 29 | 30 | -------------------------------------------------------------------------------- /config/1900-pg_citus.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 1900 pg_citus_node 3 | #==============================================================# 4 | # https://docs.citusdata.com/en/latest/develop/api_metadata.html#worker-node-table 5 | pg_citus_node: 6 | name: pg_citus_node 7 | desc: Citus worker coordinator node inventory 8 | query: |- 9 | SELECT 10 | CONCAT(nodename, ':', nodeport) AS node, 11 | current_database() AS datname, 12 | nodeid AS id, 13 | groupid AS group, 14 | hasmetadata::BOOLEAN::INT AS has_meta, 15 | isactive::BOOLEAN::INT AS is_active, 16 | metadatasynced::BOOLEAN::INT AS meta_synced, 17 | shouldhaveshards::BOOLEAN::INT AS have_shards 18 | FROM pg_dist_node; 19 | ttl: 60 20 | min_version: 100000 21 | tags: [ "extension:citus" ] 22 | metrics: 23 | - node: { usage: LABEL ,description: nodename:port of the PostgreSQL instance } 24 | - datname: { usage: LABEL ,description: database name } 25 | - id: { usage: GAUGE ,description: auto‑generated node identifier } 26 | - group: { usage: GAUGE ,description: replication group id (primary + secondaries) } 27 | - has_meta: { usage: GAUGE ,description: 1 = internal use flag set } 28 | - is_active: { usage: GAUGE ,description: 1 = node currently accepts shards } 29 | - meta_synced: { usage: GAUGE ,description: 1 = metadata fully synced to node } 30 | - have_shards: { usage: GAUGE ,description: 1 = rebalancer may place shards here } 31 | 32 | 33 | -------------------------------------------------------------------------------- /config/2000-pg_heartbeat.yml: -------------------------------------------------------------------------------- 1 | #==============================================================# 2 | # 1000 heartbeat 3 | #==============================================================# 4 | # this is a example of application monitoring and predicate queries 5 | pg_heartbeat: 6 | name: pg_heartbeat 7 | desc: monitoring heartbeat in monitor.heartbeat table 8 | predicate_queries: 9 | - name: if heartbeat table exists 10 | predicate_query: | 11 | SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'monitor' AND table_name = 'heartbeat'); 12 | query: |- 13 | SELECT id AS cluster_name, extract(EPOCH FROM ts) AS ts, lsn, txid FROM monitor.heartbeat; 14 | 15 | ttl: 10 16 | min_version: 090100 17 | tags: [ "dbname:postgres", "schema:monitor" ] 18 | skip: true 19 | metrics: 20 | - cluster_name: { usage: LABEL, description: cluster_name param of this database cluster } 21 | - ts: { usage: GAUGE, description: unix timestamp of the heartbeat } 22 | - lsn: { usage: GAUGE, description: lsn of the heartbeat } 23 | - txid: { usage: GAUGE, description: txid of the heartbeat } 24 | 25 | 26 | -------------------------------------------------------------------------------- /exporter/arg.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | 7 | "github.com/alecthomas/kingpin/v2" 8 | "github.com/prometheus/exporter-toolkit/web/kingpinflag" 9 | ) 10 | 11 | var ( 12 | // exporter settings 13 | pgURL = kingpin.Flag("url", "postgres target url").Short('d').Short('u').String() 14 | configPath = kingpin.Flag("config", "path to config dir or file").Short('c').String() 15 | webConfig = kingpinflag.AddFlags(kingpin.CommandLine, ":9630") 16 | constLabels = kingpin.Flag("label", "constant lables:comma separated list of label=value pair").Short('l').Default("").Envar("PG_EXPORTER_LABEL").String() 17 | serverTags = kingpin.Flag("tag", "tags,comma separated list of server tag").Default("").Short('t').Envar("PG_EXPORTER_TAG").String() 18 | disableCache = kingpin.Flag("disable-cache", "force not using cache").Default("false").Short('C').Envar("PG_EXPORTER_DISABLE_CACHE").Bool() 19 | disableIntro = kingpin.Flag("disable-intro", "disable collector level introspection metrics").Short('m').Default("false").Envar("PG_EXPORTER_DISABLE_INTRO").Bool() 20 | autoDiscovery = kingpin.Flag("auto-discovery", "automatically scrape all database for given server").Short('a').Default("false").Envar("PG_EXPORTER_AUTO_DISCOVERY").Bool() 21 | excludeDatabase = kingpin.Flag("exclude-database", "excluded databases when enabling auto-discovery").Short('x').Default("template0,template1,postgres").Envar("PG_EXPORTER_EXCLUDE_DATABASE").String() 22 | includeDatabase = kingpin.Flag("include-database", "included databases when enabling auto-discovery").Short('i').Default("").Envar("PG_EXPORTER_INCLUDE_DATABASE").String() 23 | exporterNamespace = kingpin.Flag("namespace", "prefix of built-in metrics, (pg|pgbouncer) by default").Short('n').Default("").Envar("PG_EXPORTER_NAMESPACE").String() 24 | failFast = kingpin.Flag("fail-fast", "fail fast instead of waiting during start-up").Short('f').Envar("PG_EXPORTER_FAIL_FAST").Default("false").Bool() 25 | connectTimeout = kingpin.Flag("connect-timeout", "connect timeout in ms, 100 by default").Short('T').Envar("PG_EXPORTER_CONNECT_TIMEOUT").Default("100").Int() 26 | 27 | // prometheus http 28 | // listenAddress = kingpin.Flag("web.listen-address", "prometheus web server listen address").Short('L').Default(":9630").Envar("PG_EXPORTER_LISTEN_ADDRESS").String() 29 | metricPath = kingpin.Flag("web.telemetry-path", "URL path under which to expose metrics.").Short('P').Default("/metrics").Envar("PG_EXPORTER_TELEMETRY_PATH").String() 30 | 31 | // action 32 | dryRun = kingpin.Flag("dry-run", "dry run and print raw configs").Default("false").Short('D').Bool() 33 | explainOnly = kingpin.Flag("explain", "explain server planned queries").Default("false").Short('E').Bool() 34 | 35 | // logger setting 36 | logLevel = kingpin.Flag("log.level", "log level: debug|info|warn|error]").Default("info").String() 37 | logFormat = kingpin.Flag("log.format", "log format: logfmt|json").Default("logfmt").String() 38 | ) 39 | 40 | // ParseArgs will parse cli args with kingpin. url and config have special treatment 41 | func ParseArgs() { 42 | kingpin.Version(fmt.Sprintf("pg_exporter %s (built with %s on %s/%s)\n", Version, runtime.Version(), runtime.GOOS, runtime.GOARCH)) 43 | kingpin.HelpFlag.Short('h') 44 | kingpin.Parse() 45 | Logger = configureLogger(*logLevel, *logFormat) 46 | logDebugf("init pg_exporter, configPath=%v constLabels=%v disableCache=%v autoDiscovery=%v excludeDatabase=%v includeDatabase=%v connectTimeout=%vms webConfig=%v metricPath=%v", 47 | *configPath, *constLabels, *disableCache, *autoDiscovery, *excludeDatabase, *includeDatabase, *connectTimeout, *webConfig.WebListenAddresses, *metricPath) 48 | *pgURL = GetPGURL() 49 | *configPath = GetConfig() 50 | } 51 | -------------------------------------------------------------------------------- /exporter/column.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/prometheus/client_golang/prometheus" 8 | ) 9 | 10 | /* ================ Column ================ */ 11 | 12 | const ( 13 | DISCARD = "DISCARD" // Ignore this column (when SELECT *) 14 | LABEL = "LABEL" // Use this column as a label 15 | COUNTER = "COUNTER" // Use this column as a counter 16 | GAUGE = "GAUGE" // Use this column as a gauge 17 | HISTOGRAM = "HISTOGRAM" // Use this column as a histogram 18 | ) 19 | 20 | // ColumnUsage determine how to use query result column 21 | var ColumnUsage = map[string]bool{ 22 | DISCARD: false, 23 | LABEL: false, 24 | COUNTER: true, 25 | GAUGE: true, 26 | HISTOGRAM: true, 27 | } 28 | 29 | // Column holds the metadata of query result 30 | type Column struct { 31 | Name string `yaml:"name"` 32 | Usage string `yaml:"usage,omitempty"` // column usage 33 | Rename string `yaml:"rename,omitempty"` // rename column 34 | Bucket []float64 `yaml:"bucket,omitempty"` // histogram bucket 35 | Scale string `yaml:"scale,omitempty"` // scale factor 36 | Default string `yaml:"default,omitempty"` // default value 37 | Desc string `yaml:"description,omitempty"` 38 | } 39 | 40 | // PrometheusValueType returns column's corresponding prometheus value type 41 | func (c *Column) PrometheusValueType() prometheus.ValueType { 42 | switch strings.ToUpper(c.Usage) { 43 | case GAUGE: 44 | return prometheus.GaugeValue 45 | case COUNTER: 46 | return prometheus.CounterValue 47 | default: 48 | // it's user's responsibility to make sure this is a value column 49 | panic(fmt.Errorf("column %s does not have a valid value type %s", c.Name, c.Usage)) 50 | } 51 | } 52 | 53 | // String turns column into a one-line text representation 54 | func (c *Column) String() string { 55 | return fmt.Sprintf("%-8s %-20s %s", c.Usage, c.Name, c.Desc) 56 | } 57 | 58 | // MetricDesc will generate MetricDesc from column and additional information 59 | func (c *Column) MetricDesc(prefix string, labels []string) *MetricDesc { 60 | metricName := fmt.Sprintf("%s_%s{%s}", prefix, c.Name, strings.Join(labels, ",")) 61 | if c.Rename != "" { 62 | metricName = fmt.Sprintf("%s_%s{%s}", prefix, c.Rename, labels) 63 | } 64 | return &MetricDesc{ 65 | metricName, 66 | labels, 67 | c, 68 | } 69 | } 70 | 71 | // MetricDesc is generated by collector's column definition 72 | type MetricDesc struct { 73 | Name string 74 | Labels []string 75 | Column *Column 76 | } 77 | 78 | // Signature will print metric signature such as pg_db_age{datname} 79 | func (m *MetricDesc) String() string { 80 | return fmt.Sprintf("%s %-8s %s", m.Name, m.Column.Usage, m.Column.Desc) 81 | } 82 | -------------------------------------------------------------------------------- /exporter/config.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path" 7 | "strings" 8 | 9 | "gopkg.in/yaml.v3" 10 | ) 11 | 12 | // GetConfig will try load config from target path 13 | func GetConfig() (res string) { 14 | // priority: cli-args > env > default settings (check exist) 15 | if res = *configPath; res != "" { 16 | logInfof("retrieve config path %s from command line", res) 17 | return res 18 | } 19 | if res = os.Getenv("PG_EXPORTER_CONFIG"); res != "" { 20 | logInfof("retrieve config path %s from PG_EXPORTER_CONFIG", res) 21 | return res 22 | } 23 | 24 | candidate := []string{"pg_exporter.yml", "/etc/pg_exporter.yml", "/etc/pg_exporter"} 25 | for _, res = range candidate { 26 | if _, err := os.Stat(res); err == nil { // default1 exist 27 | logInfof("fallback on default config path: %s", res) 28 | return res 29 | } 30 | } 31 | return "" 32 | } 33 | 34 | // ParseConfig turn config content into Query struct 35 | func ParseConfig(content []byte) (queries map[string]*Query, err error) { 36 | queries = make(map[string]*Query) 37 | if err = yaml.Unmarshal(content, &queries); err != nil { 38 | return nil, fmt.Errorf("malformed config: %w", err) 39 | } 40 | 41 | // parse additional fields 42 | for name, query := range queries { 43 | if query.Name == "" { 44 | query.Name = name 45 | } 46 | // parse query column info 47 | columns := make(map[string]*Column, len(query.Metrics)) 48 | var allColumns, labelColumns, metricColumns []string 49 | for _, colMap := range query.Metrics { 50 | for colName, column := range colMap { // one-entry map 51 | if column.Name == "" { 52 | column.Name = colName 53 | } 54 | if _, isValid := ColumnUsage[column.Usage]; !isValid { 55 | return nil, fmt.Errorf("column %s have unsupported usage: %s", colName, column.Desc) 56 | } 57 | column.Usage = strings.ToUpper(column.Usage) 58 | switch column.Usage { 59 | case LABEL: 60 | labelColumns = append(labelColumns, column.Name) 61 | case GAUGE, COUNTER: 62 | metricColumns = append(metricColumns, column.Name) 63 | } 64 | allColumns = append(allColumns, column.Name) 65 | columns[column.Name] = column 66 | } 67 | } 68 | query.Columns, query.ColumnNames, query.LabelNames, query.MetricNames = columns, allColumns, labelColumns, metricColumns 69 | } 70 | return 71 | } 72 | 73 | // ParseQuery generate a single query from config string 74 | func ParseQuery(config string) (*Query, error) { 75 | queries, err := ParseConfig([]byte(config)) 76 | if err != nil { 77 | return nil, err 78 | } 79 | if len(queries) == 0 { 80 | return nil, fmt.Errorf("no query definition found") 81 | } 82 | if len(queries) > 1 { 83 | return nil, fmt.Errorf("multiple query definition found") 84 | } 85 | for _, q := range queries { 86 | return q, nil // return the only query instance 87 | } 88 | return nil, fmt.Errorf("no query definition found") 89 | } 90 | 91 | // LoadConfig will read single conf file or read multiple conf file if a dir is given 92 | // conf file in a dir will be load in alphabetic order, query with same name will overwrite predecessor 93 | func LoadConfig(configPath string) (queries map[string]*Query, err error) { 94 | stat, err := os.Stat(configPath) 95 | if err != nil { 96 | return nil, fmt.Errorf("invalid config path: %s: %w", configPath, err) 97 | } 98 | if stat.IsDir() { // recursively iterate conf files if a dir is given 99 | files, err := os.ReadDir(configPath) 100 | if err != nil { 101 | return nil, fmt.Errorf("fail reading config dir: %s: %w", configPath, err) 102 | } 103 | 104 | logDebugf("load config from dir: %s", configPath) 105 | confFiles := make([]string, 0) 106 | for _, conf := range files { 107 | if !(strings.HasSuffix(conf.Name(), ".yaml") || strings.HasSuffix(conf.Name(), ".yml")) && !conf.IsDir() { // depth = 1 108 | continue // skip non yaml files 109 | } 110 | confFiles = append(confFiles, path.Join(configPath, conf.Name())) 111 | } 112 | 113 | // make global config map and assign priority according to config file alphabetic orders 114 | // priority is an integer range from 1 to 999, where 1 - 99 is reserved for user 115 | queries = make(map[string]*Query) 116 | var queryCount, configCount int 117 | for _, confPath := range confFiles { 118 | if singleQueries, err := LoadConfig(confPath); err != nil { 119 | logWarnf("skip config %s due to error: %s", confPath, err.Error()) 120 | } else { 121 | configCount++ 122 | for name, query := range singleQueries { 123 | queryCount++ 124 | if query.Priority == 0 { // set to config rank if not manually set 125 | query.Priority = 100 + configCount 126 | } 127 | queries[name] = query // so the later one will overwrite former one 128 | } 129 | } 130 | } 131 | logDebugf("load %d of %d queries from %d config files", len(queries), queryCount, configCount) 132 | return queries, nil 133 | } 134 | 135 | // single file case: recursive exit condition 136 | content, err := os.ReadFile(configPath) 137 | if err != nil { 138 | return nil, fmt.Errorf("fail reading config file %s: %w", configPath, err) 139 | } 140 | queries, err = ParseConfig(content) 141 | if err != nil { 142 | return nil, err 143 | } 144 | for branch, q := range queries { 145 | q.Path = stat.Name() 146 | q.Branch = branch 147 | // if timeout is not set, set to 100ms by default 148 | // if timeout is set to a neg number, set to 0, so it's actually disabled 149 | if q.Timeout == 0 { 150 | q.Timeout = 0.1 151 | } 152 | if q.Timeout < 0 { 153 | q.Timeout = 0 154 | } 155 | } 156 | logDebugf("load %d queries from %s, ", len(queries), configPath) 157 | return queries, nil 158 | 159 | } 160 | -------------------------------------------------------------------------------- /exporter/global.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "log/slog" 5 | "runtime" 6 | "sync" 7 | ) 8 | 9 | /* ================ Parameters ================ */ 10 | 11 | // Version is read by make build procedure 12 | var Version = "1.0.0" 13 | 14 | // Build information. Populated at build-time. 15 | var ( 16 | Branch = "main" 17 | Revision = "HEAD" 18 | BuildDate = "20250421212100" 19 | GoVersion = runtime.Version() 20 | GOOS = runtime.GOOS 21 | GOARCH = runtime.GOARCH 22 | ) 23 | 24 | var defaultPGURL = "postgresql:///?sslmode=disable" 25 | 26 | /* ================ Global Vars ================ */ 27 | 28 | // PgExporter is the global singleton of Exporter 29 | var ( 30 | PgExporter *Exporter 31 | ReloadLock sync.Mutex 32 | Logger *slog.Logger 33 | ) 34 | -------------------------------------------------------------------------------- /exporter/main.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net/http" 7 | "os" 8 | "os/signal" 9 | "runtime" 10 | "sort" 11 | "strings" 12 | "syscall" 13 | 14 | "github.com/prometheus/client_golang/prometheus" 15 | "github.com/prometheus/client_golang/prometheus/promhttp" 16 | "github.com/prometheus/exporter-toolkit/web" 17 | ) 18 | 19 | // DryRun will explain all query fetched from configs 20 | func DryRun() { 21 | configs, err := LoadConfig(*configPath) 22 | if err != nil { 23 | logErrorf("fail loading config %s, %v", *configPath, err) 24 | os.Exit(1) 25 | } 26 | 27 | var queries []*Query 28 | for _, query := range configs { 29 | queries = append(queries, query) 30 | } 31 | sort.Slice(queries, func(i, j int) bool { 32 | return queries[i].Priority < queries[j].Priority 33 | }) 34 | for _, query := range queries { 35 | fmt.Println(query.Explain()) 36 | } 37 | fmt.Println() 38 | os.Exit(0) 39 | 40 | } 41 | 42 | // Reload will launch a new pg exporter instance 43 | func Reload() error { 44 | ReloadLock.Lock() 45 | defer ReloadLock.Unlock() 46 | logDebugf("reload request received, launch new exporter instance") 47 | 48 | // create a new exporter 49 | newExporter, err := NewExporter( 50 | *pgURL, 51 | WithConfig(*configPath), 52 | WithConstLabels(*constLabels), 53 | WithCacheDisabled(*disableCache), 54 | WithIntroDisabled(*disableIntro), 55 | WithFailFast(*failFast), 56 | WithNamespace(*exporterNamespace), 57 | WithAutoDiscovery(*autoDiscovery), 58 | WithExcludeDatabase(*excludeDatabase), 59 | WithIncludeDatabase(*includeDatabase), 60 | WithTags(*serverTags), 61 | WithConnectTimeout(*connectTimeout), 62 | ) 63 | // if launch new exporter failed, do nothing 64 | if err != nil { 65 | logErrorf("fail to reload exporter: %s", err.Error()) 66 | return err 67 | } 68 | 69 | logDebugf("shutdown old exporter instance") 70 | // if older one exists, close and unregister it 71 | if PgExporter != nil { 72 | // DO NOT MANUALLY CLOSE OLD EXPORTER INSTANCE because the stupid implementation of sql.DB 73 | // there connection will be automatically released after 1 min 74 | // PgExporter.Close() 75 | prometheus.Unregister(PgExporter) 76 | } 77 | PgExporter = newExporter 78 | runtime.GC() 79 | logInfof("server reloaded") 80 | return nil 81 | } 82 | 83 | // DummyServer response with a dummy metrics pg_up 0 or pgbouncer_up 0 84 | func DummyServer() (s *http.Server, exit <-chan bool) { 85 | mux := http.NewServeMux() 86 | namespace := `pg` 87 | if ParseDatname(*pgURL) == `pgbouncer` { 88 | namespace = `pgbouncer` 89 | } 90 | // setup pg_up / pgbouncer_up metrics 91 | dummyMetricName := namespace + `_up` 92 | mux.HandleFunc(*metricPath, func(w http.ResponseWriter, req *http.Request) { 93 | userLabels := parseConstLabels(*constLabels) 94 | output := fmt.Sprintf("# HELP %s last scrape was able to connect to the server: 1 for yes, 0 for no\n# TYPE %s gauge\n", dummyMetricName, dummyMetricName) 95 | if len(userLabels) > 0 { 96 | labelStrs := make([]string, 0, len(userLabels)) 97 | for k, v := range userLabels { 98 | labelStrs = append(labelStrs, fmt.Sprintf("%s=%q", k, v)) 99 | } 100 | output += fmt.Sprintf("%s{%s} 0\n", dummyMetricName, strings.Join(labelStrs, ",")) 101 | } else { 102 | output += fmt.Sprintf("%s 0\n", dummyMetricName) 103 | } 104 | 105 | // setup build info metrics 106 | buildInfoName := namespace + `_exporter_build_info` 107 | output += fmt.Sprintf("# HELP %s A metric with a constant '1' value labeled with version, revision, branch, goversion, builddate, goos, and goarch from which %s_exporter was built.\n", buildInfoName, namespace) 108 | output += fmt.Sprintf("# TYPE %s gauge\n", buildInfoName) 109 | buildInfoLabels := map[string]string{ 110 | "version": Version, 111 | "revision": Revision, 112 | "branch": Branch, 113 | "goversion": GoVersion, 114 | "builddate": BuildDate, 115 | "goos": GOOS, 116 | "goarch": GOARCH, 117 | } 118 | for k, v := range userLabels { 119 | buildInfoLabels[k] = v 120 | } 121 | allLabelStrs := make([]string, 0, len(buildInfoLabels)) 122 | for k, v := range buildInfoLabels { 123 | allLabelStrs = append(allLabelStrs, fmt.Sprintf("%s=%q", k, v)) 124 | } 125 | output += fmt.Sprintf("%s{%s} 1\n", buildInfoName, strings.Join(allLabelStrs, ",")) 126 | _, _ = fmt.Fprint(w, output) 127 | }) 128 | 129 | listenAddr := (*webConfig.WebListenAddresses)[0] 130 | httpServer := &http.Server{ 131 | Addr: listenAddr, 132 | Handler: mux, 133 | } 134 | exitChan := make(chan bool, 1) 135 | go func() { 136 | if err := httpServer.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) { 137 | logDebugf("shutdown dummy server") 138 | } 139 | exitChan <- true 140 | }() 141 | return httpServer, exitChan 142 | } 143 | 144 | // Run pg_exporter 145 | func Run() { 146 | ParseArgs() 147 | 148 | // explain config only 149 | if *dryRun { 150 | DryRun() 151 | } 152 | 153 | if *configPath == "" { 154 | Logger.Error("no valid config path, exit") 155 | os.Exit(1) 156 | } 157 | 158 | if len(*webConfig.WebListenAddresses) == 0 { 159 | Logger.Error("invalid listen address", "addresses", *webConfig.WebListenAddresses) 160 | os.Exit(1) 161 | } 162 | listenAddr := (*webConfig.WebListenAddresses)[0] 163 | 164 | // DummyServer will server a constant pg_up 165 | // launch a dummy server to check listen address availability 166 | // and fake a pg_up 0 metrics before PgExporter connecting to target instance 167 | // otherwise, exporter API is not available until target instance online 168 | dummySrv, closeChan := DummyServer() 169 | 170 | // create exporter: if target is down, exporter creation will wait until it backup online 171 | var err error 172 | PgExporter, err = NewExporter( 173 | *pgURL, 174 | WithConfig(*configPath), 175 | WithConstLabels(*constLabels), 176 | WithCacheDisabled(*disableCache), 177 | WithFailFast(*failFast), 178 | WithNamespace(*exporterNamespace), 179 | WithAutoDiscovery(*autoDiscovery), 180 | WithExcludeDatabase(*excludeDatabase), 181 | WithIncludeDatabase(*includeDatabase), 182 | WithTags(*serverTags), 183 | WithConnectTimeout(*connectTimeout), 184 | ) 185 | if err != nil { 186 | logFatalf("fail creating pg_exporter: %s", err.Error()) 187 | os.Exit(2) 188 | } 189 | 190 | // trigger a manual planning before explain 191 | if *explainOnly { 192 | PgExporter.server.Plan() 193 | fmt.Println(PgExporter.Explain()) 194 | os.Exit(0) 195 | } 196 | 197 | prometheus.MustRegister(PgExporter) 198 | defer PgExporter.Close() 199 | 200 | // reload conf when receiving SIGHUP or SIGUSR1 201 | sigs := make(chan os.Signal, 1) 202 | signal.Notify(sigs, syscall.SIGHUP) 203 | go func() { 204 | for sig := range sigs { 205 | switch sig { 206 | case syscall.SIGHUP: 207 | logInfof("%v received, reloading", sig) 208 | _ = Reload() 209 | } 210 | } 211 | }() 212 | 213 | /* ================ REST API ================ */ 214 | // basic 215 | http.HandleFunc("/", TitleFunc) 216 | http.HandleFunc("/version", VersionFunc) 217 | // reload 218 | http.HandleFunc("/reload", ReloadFunc) 219 | // explain & stat 220 | http.HandleFunc("/stat", PgExporter.StatFunc) 221 | http.HandleFunc("/explain", PgExporter.ExplainFunc) 222 | // alive 223 | http.HandleFunc("/up", PgExporter.UpCheckFunc) 224 | http.HandleFunc("/read", PgExporter.UpCheckFunc) 225 | http.HandleFunc("/health", PgExporter.UpCheckFunc) 226 | http.HandleFunc("/liveness", PgExporter.UpCheckFunc) 227 | http.HandleFunc("/readiness", PgExporter.UpCheckFunc) 228 | // primary 229 | http.HandleFunc("/primary", PgExporter.PrimaryCheckFunc) 230 | http.HandleFunc("/leader", PgExporter.PrimaryCheckFunc) 231 | http.HandleFunc("/master", PgExporter.PrimaryCheckFunc) 232 | http.HandleFunc("/read-write", PgExporter.PrimaryCheckFunc) 233 | http.HandleFunc("/rw", PgExporter.PrimaryCheckFunc) 234 | // replica 235 | http.HandleFunc("/replica", PgExporter.ReplicaCheckFunc) 236 | http.HandleFunc("/standby", PgExporter.ReplicaCheckFunc) 237 | http.HandleFunc("/slave", PgExporter.ReplicaCheckFunc) 238 | http.HandleFunc("/read-only", PgExporter.ReplicaCheckFunc) 239 | http.HandleFunc("/ro", PgExporter.ReplicaCheckFunc) 240 | 241 | // metric 242 | _ = dummySrv.Close() 243 | <-closeChan 244 | http.Handle(*metricPath, promhttp.Handler()) 245 | 246 | logInfof("pg_exporter for %s start, listen on %s%s", ShadowPGURL(*pgURL), listenAddr, *metricPath) 247 | 248 | srv := &http.Server{} 249 | if err := web.ListenAndServe(srv, webConfig, Logger); err != nil { 250 | logFatalf("http server failed: %s", err.Error()) 251 | } 252 | 253 | } 254 | -------------------------------------------------------------------------------- /exporter/pgurl.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "net/url" 5 | "os" 6 | "strings" 7 | ) 8 | 9 | // GetPGURL will retrieve, parse, modify postgres connection string 10 | func GetPGURL() string { 11 | return ProcessPGURL(RetrievePGURL()) 12 | } 13 | 14 | // RetrievePGURL retrieve pg target url from multiple sources according to precedence 15 | // priority: cli-args > env > env file path 16 | // 1. Command Line Argument (--url -u -d) 17 | // 2. Environment PG_EXPORTER_URL 18 | // 3. From file specified via Environment PG_EXPORTER_URL_FILE 19 | // 4. Default url 20 | func RetrievePGURL() (res string) { 21 | // command line args 22 | if *pgURL != "" { 23 | logInfof("retrieve target url %s from command line", ShadowPGURL(*pgURL)) 24 | return *pgURL 25 | } 26 | // env PG_EXPORTER_URL 27 | if res = os.Getenv("PG_EXPORTER_URL"); res != "" { 28 | logInfof("retrieve target url %s from PG_EXPORTER_URL", ShadowPGURL(*pgURL)) 29 | return res 30 | } 31 | // env PGURL 32 | if res = os.Getenv("PGURL"); res != "" { 33 | logInfof("retrieve target url %s from PGURL", ShadowPGURL(*pgURL)) 34 | return res 35 | } 36 | // file content from file PG_EXPORTER_URL_FILE 37 | if filename := os.Getenv("PG_EXPORTER_URL_FILE"); filename != "" { 38 | if fileContents, err := os.ReadFile(filename); err != nil { 39 | logFatalf("PG_EXPORTER_URL_FILE=%s is specified, fail loading url, exit", err.Error()) 40 | os.Exit(-1) 41 | } else { 42 | res = strings.TrimSpace(string(fileContents)) 43 | logInfof("retrieve target url %s from PG_EXPORTER_URL_FILE", ShadowPGURL(res)) 44 | return res 45 | } 46 | } 47 | // DEFAULT 48 | logWarnf("fail retrieving target url, fallback on default url: %s", defaultPGURL) 49 | return defaultPGURL 50 | } 51 | 52 | // ProcessPGURL will fix URL with default options 53 | func ProcessPGURL(pgurl string) string { 54 | u, err := url.Parse(pgurl) 55 | if err != nil { 56 | logErrorf("invalid url format %s", pgurl) 57 | return "" 58 | } 59 | 60 | // add sslmode = disable if not exists 61 | qs := u.Query() 62 | if sslmode := qs.Get(`sslmode`); sslmode == "" { 63 | qs.Set(`sslmode`, `disable`) 64 | } 65 | var buf strings.Builder 66 | for k, v := range qs { 67 | if len(v) == 0 { 68 | continue 69 | } 70 | if buf.Len() > 0 { 71 | buf.WriteByte('&') 72 | } 73 | buf.WriteString(k) 74 | buf.WriteByte('=') 75 | buf.WriteString(v[0]) 76 | } 77 | u.RawQuery = buf.String() 78 | return u.String() 79 | } 80 | 81 | // ShadowPGURL will hide password part of dsn 82 | func ShadowPGURL(pgurl string) string { 83 | parsedURL, err := url.Parse(pgurl) 84 | // That means we got a bad connection string. Fail early 85 | if err != nil { 86 | logFatalf("Could not parse connection string %s", err.Error()) 87 | os.Exit(-1) 88 | } 89 | 90 | // We need to handle two cases: 91 | // 1. The password is in the format postgresql://localhost:5432/postgres?sslmode=disable&user=&password= 92 | // 2. The password is in the format postgresql://:@localhost:5432/postgres?sslmode=disable 93 | 94 | qs := parsedURL.Query() 95 | var buf strings.Builder 96 | for k, v := range qs { 97 | if len(v) == 0 { 98 | continue 99 | } 100 | if buf.Len() > 0 { 101 | buf.WriteByte('&') 102 | } 103 | buf.WriteString(k) 104 | buf.WriteByte('=') 105 | if strings.ToLower(k) == "password" { 106 | buf.WriteString("xxxxx") 107 | } else { 108 | buf.WriteString(v[0]) 109 | } 110 | } 111 | parsedURL.RawQuery = buf.String() 112 | return parsedURL.Redacted() 113 | } 114 | 115 | // ParseDatname extract database name part of a pgurl 116 | func ParseDatname(pgurl string) string { 117 | u, err := url.Parse(pgurl) 118 | if err != nil { 119 | return "" 120 | } 121 | return strings.TrimLeft(u.Path, "/") 122 | } 123 | 124 | // ReplaceDatname will replace pgurl with new database name 125 | func ReplaceDatname(pgurl, datname string) string { 126 | u, err := url.Parse(pgurl) 127 | if err != nil { 128 | logErrorf("invalid url format %s", pgurl) 129 | return "" 130 | } 131 | u.Path = "/" + datname 132 | return u.String() 133 | } 134 | -------------------------------------------------------------------------------- /exporter/query.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "slices" 7 | "text/template" 8 | "time" 9 | 10 | "gopkg.in/yaml.v3" 11 | ) 12 | 13 | /* ================ Query ================ */ 14 | 15 | // Query hold the information of how to fetch metric and parse them 16 | type Query struct { 17 | Name string `yaml:"name,omitempty"` // actual query name, used as metric prefix 18 | Desc string `yaml:"desc,omitempty"` // description of this metric query 19 | SQL string `yaml:"query"` // SQL command to fetch metrics 20 | PredicateQueries []PredicateQuery `yaml:"predicate_queries,omitempty"` // SQL command to filter metrics 21 | Branch string `yaml:"-"` // branch name, top layer key of config file 22 | 23 | // control query behaviour 24 | Tags []string `yaml:"tags,omitempty"` // tags are used for execution control 25 | TTL float64 `yaml:"ttl,omitempty"` // caching ttl in seconds 26 | Timeout float64 `yaml:"timeout,omitempty"` // query execution timeout in seconds 27 | Priority int `yaml:"priority,omitempty"` // execution priority, from 1 to 999 28 | MinVersion int `yaml:"min_version,omitempty"` // minimal supported version, include 29 | MaxVersion int `yaml:"max_version,omitempty"` // maximal supported version, not include 30 | Fatal bool `yaml:"fatal,omitempty"` // if query marked fatal fail, entire scrape will fail 31 | Skip bool `yaml:"skip,omitempty"` // if query marked skip, it will be omit while loading 32 | 33 | Metrics []map[string]*Column `yaml:"metrics"` // metric definition list 34 | 35 | // metrics parsing auxiliaries 36 | Path string `yaml:"-"` // where am I from ? 37 | Columns map[string]*Column `yaml:"-"` // column map 38 | ColumnNames []string `yaml:"-"` // column names in origin orders 39 | LabelNames []string `yaml:"-"` // column (name) that used as label, sequences matters 40 | MetricNames []string `yaml:"-"` // column (name) that used as metric 41 | } 42 | 43 | // A PredicateQuery is a query that returns a 1-column resultset that's used to decide whether 44 | // to run the main query. 45 | type PredicateQuery struct { 46 | Name string `yaml:"name,omitempty"` // predicate query name, only used for logging 47 | SQL string `yaml:"predicate_query"` // SQL command to return a predicate 48 | TTL float64 `yaml:"ttl,omitempty"` // How long to cache results for 49 | } 50 | 51 | var queryTemplate, _ = template.New("Query").Parse(`## 52 | # SYNOPSIS 53 | # {{ .Name }}{{ if ne .Name .Branch }}.{{ .Branch }}{{ end }}_* 54 | # 55 | # DESCRIPTION 56 | # {{ with .Desc }}{{ . }}{{ else }}N/A{{ end }} 57 | # 58 | # OPTIONS 59 | # Tags [{{ range $i, $e := .Tags }}{{ if $i }}, {{ end }}{{ $e }}{{ end }}] 60 | # TTL {{ .TTL }} 61 | # Priority {{ .Priority }} 62 | # Timeout {{ .TimeoutDuration }} 63 | # Fatal {{ .Fatal }} 64 | # Version {{ if ne .MinVersion 0 }}{{ .MinVersion }}{{ else }}lower{{ end }} ~ {{ if ne .MaxVersion 0 }}{{ .MaxVersion }}{{ else }}higher{{ end }} 65 | # Source {{ .Path }} 66 | # 67 | # METRICS 68 | {{- range .ColumnList }} 69 | # {{ .Name }} ({{ .Usage }}) 70 | # {{ with .Desc }}{{ . }}{{ else }}N/A{{ end }}{{ end }} 71 | # 72 | {{.MarshalYAML -}} 73 | `) 74 | 75 | var htmlTemplate, _ = template.New("Query").Parse(` 76 |
77 | 78 |

{{ .Name }}

79 |

{{ .Desc }}

80 | {{ if len .PredicateQueries }} 81 |

Predicate queries

82 | 83 | 84 | 85 | {{ range .PredicateQueries }} 86 | 87 | {{ end }} 88 |
Name SQL Cache TTL
{{ .Name }}{{ html .SQL }}{{if ne .TTL 0}}{{ .TTL }}s{{else}}not cached{{end}}
89 | {{ end }} 90 |

Query

91 |
{{ .SQL }}
92 | 93 |

Attribution

94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 |
Branch {{ .Branch }}
TTL {{ .TTL }}
Priority {{ .Priority }}
Timeout {{ .TimeoutDuration }}
Fatal {{ .Fatal }}
Version {{if ne .MinVersion 0}}{{ .MinVersion }}{{else}}lower{{end}} ~ {{if ne .MaxVersion 0}}{{ .MaxVersion }}{{else}}higher{{end}}
Tags {{ .Tags }}
Source {{ .Path }}
104 | 105 |

Columns

106 | 107 | {{ range .ColumnList }}{{ end }} 108 |
Name Usage Rename Bucket Scale Default Description
{{ .Name }}{{ .Usage }}{{ .Rename }}{{ .Bucket }}{{ .Scale }}{{ .Default }}{{ .Desc }}
109 | 110 |

Metrics

111 | 112 | {{ range .MetricList }}{{ end }} 113 |
Name Usage Desc
{{ .Name }}{{ .Column.Usage }}{{ .Column.Desc }}
114 |
115 | `) 116 | 117 | // MarshalYAML will turn query into YAML format 118 | func (q *Query) MarshalYAML() string { 119 | // buf := new(bytes.Buffer) 120 | v := make(map[string]Query, 1) 121 | v[q.Branch] = *q 122 | buf, err := yaml.Marshal(v) 123 | if err != nil { 124 | msg := fmt.Sprintf("fail to marshall query yaml: %s", err.Error()) 125 | logError(msg) 126 | return msg 127 | } 128 | return string(buf) 129 | } 130 | 131 | // Explain will turn query into text format 132 | func (q *Query) Explain() string { 133 | buf := new(bytes.Buffer) 134 | err := queryTemplate.Execute(buf, q) 135 | if err != nil { 136 | msg := fmt.Sprintf("fail to explain query: %s", err.Error()) 137 | logError(msg) 138 | return msg 139 | } 140 | return buf.String() 141 | } 142 | 143 | // HTML will turn Query into HTML format 144 | func (q *Query) HTML() string { 145 | buf := new(bytes.Buffer) 146 | err := htmlTemplate.Execute(buf, q) 147 | if err != nil { 148 | msg := fmt.Sprintf("fail to generate query html: %s", err.Error()) 149 | logError(msg) 150 | return msg 151 | } 152 | return buf.String() 153 | } 154 | 155 | // HasTag tells whether this query have specific tag 156 | // since only few tags is provided, we don't really need a map here 157 | func (q *Query) HasTag(tag string) bool { 158 | return slices.Contains(q.Tags, tag) 159 | } 160 | 161 | // ColumnList return ordered column list 162 | func (q *Query) ColumnList() (res []*Column) { 163 | res = make([]*Column, len(q.ColumnNames)) 164 | for i, colName := range q.ColumnNames { 165 | res[i] = q.Columns[colName] 166 | } 167 | return 168 | } 169 | 170 | // LabelList returns a list of label column names 171 | func (q *Query) LabelList() []string { 172 | labelNames := make([]string, len(q.LabelNames)) 173 | for i, labelName := range q.LabelNames { 174 | labelColumn := q.Columns[labelName] 175 | if labelColumn.Rename != "" { 176 | labelNames[i] = labelColumn.Rename 177 | } else { 178 | labelNames[i] = labelColumn.Name 179 | } 180 | } 181 | return labelNames 182 | } 183 | 184 | // MetricList returns a list of MetricDesc generated by this query 185 | func (q *Query) MetricList() (res []*MetricDesc) { 186 | res = make([]*MetricDesc, len(q.MetricNames)) 187 | for i, metricName := range q.MetricNames { 188 | column := q.Columns[metricName] 189 | res[i] = column.MetricDesc(q.Name, q.LabelList()) 190 | } 191 | return 192 | } 193 | 194 | // TimeoutDuration will turn timeout settings into time.Duration 195 | func (q *Query) TimeoutDuration() time.Duration { 196 | return time.Duration(float64(time.Second) * q.Timeout) 197 | } 198 | -------------------------------------------------------------------------------- /exporter/utils.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | "math" 7 | "os" 8 | "strconv" 9 | "strings" 10 | "time" 11 | 12 | "github.com/prometheus/client_golang/prometheus" 13 | ) 14 | 15 | /* ================ Logger ================ */ 16 | 17 | func configureLogger(levelStr, formatStr string) *slog.Logger { 18 | var level slog.Level 19 | switch strings.ToLower(levelStr) { 20 | case "debug": 21 | level = slog.LevelDebug 22 | case "info": 23 | level = slog.LevelInfo 24 | case "warn": 25 | level = slog.LevelWarn 26 | case "error": 27 | level = slog.LevelError 28 | default: 29 | level = slog.LevelInfo // fallback to default info level 30 | } 31 | 32 | opts := &slog.HandlerOptions{ 33 | Level: level, 34 | } 35 | 36 | var handler slog.Handler 37 | switch formatStr { 38 | case "json": 39 | handler = slog.NewJSONHandler(os.Stderr, opts) 40 | case "logfmt", "": 41 | handler = slog.NewTextHandler(os.Stderr, opts) 42 | default: 43 | panic("unknown log format: " + formatStr) 44 | } 45 | 46 | return slog.New(handler) 47 | } 48 | 49 | // logDebugf will log debug message 50 | func logDebugf(format string, v ...interface{}) { 51 | Logger.Debug(fmt.Sprintf(format, v...)) 52 | } 53 | 54 | // logInfof will log info message 55 | func logInfof(format string, v ...interface{}) { 56 | Logger.Info(fmt.Sprintf(format, v...)) 57 | } 58 | 59 | // logWarnf will log warning message 60 | func logWarnf(format string, v ...interface{}) { 61 | Logger.Warn(fmt.Sprintf(format, v...)) 62 | } 63 | 64 | // logErrorf will log error message 65 | func logErrorf(format string, v ...interface{}) { 66 | Logger.Error(fmt.Sprintf(format, v...)) 67 | } 68 | 69 | // logError will print error message directly 70 | func logError(msg string) { 71 | Logger.Error(msg) 72 | } 73 | 74 | // logFatalf will log error message 75 | func logFatalf(format string, v ...interface{}) { 76 | Logger.Error(fmt.Sprintf(format, v...)) 77 | os.Exit(1) 78 | } 79 | 80 | /* ================ Auxiliaries ================ */ 81 | 82 | // castFloat64 will cast datum into float64 with scale & default value 83 | func castFloat64(t interface{}, s string, d string) float64 { 84 | var scale = 1.0 85 | if s != "" { 86 | if scaleFactor, err := strconv.ParseFloat(s, 64); err != nil { 87 | logWarnf("invalid column scale: %v ", s) 88 | } else { 89 | scale = scaleFactor 90 | } 91 | } 92 | 93 | switch v := t.(type) { 94 | case int64: 95 | return float64(v) * scale 96 | case float64: 97 | return v * scale 98 | case time.Time: 99 | return float64(v.Unix()) 100 | case []byte: 101 | strV := string(v) 102 | result, err := strconv.ParseFloat(strV, 64) 103 | if err != nil { 104 | logWarnf("fail casting []byte to float64: %v", t) 105 | return math.NaN() 106 | } 107 | return result * scale 108 | case string: 109 | result, err := strconv.ParseFloat(v, 64) 110 | if err != nil { 111 | logWarnf("fail casting string to float64: %v", t) 112 | return math.NaN() 113 | } 114 | return result * scale 115 | case bool: 116 | if v { 117 | return 1.0 118 | } 119 | return 0.0 120 | case nil: 121 | if d != "" { 122 | result, err := strconv.ParseFloat(d, 64) 123 | if err != nil { 124 | logWarnf("invalid column default: %v", d) 125 | return math.NaN() 126 | } 127 | return result 128 | } 129 | return math.NaN() 130 | default: 131 | logWarnf("fail casting unknown to float64: %v", t) 132 | return math.NaN() 133 | } 134 | } 135 | 136 | // castString will force interface{} into string 137 | func castString(t interface{}) string { 138 | switch v := t.(type) { 139 | case int64: 140 | return fmt.Sprintf("%v", v) 141 | case float64: 142 | return fmt.Sprintf("%v", v) 143 | case time.Time: 144 | return fmt.Sprintf("%v", v.Unix()) 145 | case nil: 146 | return "" 147 | case []byte: 148 | // Try and convert to string 149 | return string(v) 150 | case string: 151 | return v 152 | case bool: 153 | if v { 154 | return "true" 155 | } 156 | return "false" 157 | default: 158 | logWarnf("fail casting unknown to string: %v", t) 159 | return "" 160 | } 161 | } 162 | 163 | // parseConstLabels turn param string into prometheus.Labels 164 | func parseConstLabels(s string) prometheus.Labels { 165 | labels := make(prometheus.Labels) 166 | s = strings.TrimSpace(s) 167 | if len(s) == 0 { 168 | return nil 169 | } 170 | 171 | parts := strings.Split(s, ",") 172 | for _, p := range parts { 173 | keyValue := strings.Split(strings.TrimSpace(p), "=") 174 | if len(keyValue) != 2 { 175 | logErrorf(`malformed labels format %q, should be "key=value"`, p) 176 | continue 177 | } 178 | key := strings.TrimSpace(keyValue[0]) 179 | value := strings.TrimSpace(keyValue[1]) 180 | if key == "" || value == "" { 181 | continue 182 | } 183 | labels[key] = value 184 | } 185 | if len(labels) == 0 { 186 | return nil 187 | } 188 | 189 | return labels 190 | } 191 | 192 | // parseCSV will turn a comma separated string into a []string 193 | func parseCSV(s string) (tags []string) { 194 | s = strings.TrimSpace(s) 195 | if len(s) == 0 { 196 | return nil 197 | } 198 | 199 | parts := strings.Split(s, ",") 200 | for _, p := range parts { 201 | if tag := strings.TrimSpace(p); len(tag) > 0 { 202 | tags = append(tags, tag) 203 | } 204 | } 205 | 206 | if len(tags) == 0 { 207 | return nil 208 | } 209 | return 210 | } 211 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module pg_exporter 2 | 3 | go 1.24.2 4 | 5 | require ( 6 | github.com/alecthomas/kingpin/v2 v2.4.0 7 | github.com/lib/pq v1.10.9 8 | github.com/prometheus/client_golang v1.22.0 9 | github.com/prometheus/exporter-toolkit v0.14.0 10 | gopkg.in/yaml.v3 v3.0.1 11 | ) 12 | 13 | require ( 14 | github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect 15 | github.com/beorn7/perks v1.0.1 // indirect 16 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 17 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 18 | github.com/jpillora/backoff v1.0.0 // indirect 19 | github.com/mdlayher/socket v0.5.1 // indirect 20 | github.com/mdlayher/vsock v1.2.1 // indirect 21 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 22 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect 23 | github.com/prometheus/client_model v0.6.2 // indirect 24 | github.com/prometheus/common v0.63.0 // indirect 25 | github.com/prometheus/procfs v0.16.1 // indirect 26 | github.com/xhit/go-str2duration/v2 v2.1.0 // indirect 27 | golang.org/x/crypto v0.37.0 // indirect 28 | golang.org/x/net v0.39.0 // indirect 29 | golang.org/x/oauth2 v0.29.0 // indirect 30 | golang.org/x/sync v0.13.0 // indirect 31 | golang.org/x/sys v0.32.0 // indirect 32 | golang.org/x/text v0.24.0 // indirect 33 | google.golang.org/protobuf v1.36.6 // indirect 34 | gopkg.in/yaml.v2 v2.4.0 // indirect 35 | ) 36 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= 2 | github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= 3 | github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= 4 | github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= 5 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 6 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 7 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 8 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 9 | github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= 10 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 11 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 12 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 13 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 14 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 15 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 16 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 17 | github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= 18 | github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= 19 | github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 20 | github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 21 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 22 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 23 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 24 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 25 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 26 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 27 | github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= 28 | github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= 29 | github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos= 30 | github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= 31 | github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= 32 | github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= 33 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 34 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 35 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= 36 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 37 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 38 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 39 | github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= 40 | github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= 41 | github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= 42 | github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= 43 | github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k= 44 | github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18= 45 | github.com/prometheus/exporter-toolkit v0.14.0 h1:NMlswfibpcZZ+H0sZBiTjrA3/aBFHkNZqE+iCj5EmRg= 46 | github.com/prometheus/exporter-toolkit v0.14.0/go.mod h1:Gu5LnVvt7Nr/oqTBUC23WILZepW0nffNo10XdhQcwWA= 47 | github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= 48 | github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= 49 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 50 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 51 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 52 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 53 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 54 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 55 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 56 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 57 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 58 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 59 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 60 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 61 | github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= 62 | github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= 63 | golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 64 | golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 65 | golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= 66 | golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= 67 | golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98= 68 | golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= 69 | golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= 70 | golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 71 | golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= 72 | golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 73 | golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= 74 | golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= 75 | google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= 76 | google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= 77 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 78 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 79 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 80 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 81 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 82 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 83 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 84 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 85 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pgsty/pg_exporter/197454a0c87790efbff52c57655894619b8d7829/logo.png -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /***********************************************************************\ 2 | Copyright © 2019-2025 Ruohang Feng 3 | Contributors: https://github.com/pgsty/pg_exporter/graphs/contributors 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | \***********************************************************************/ 17 | 18 | package main 19 | 20 | import "pg_exporter/exporter" 21 | 22 | func main() { 23 | exporter.Run() 24 | } 25 | -------------------------------------------------------------------------------- /package/nfpm-amd64-deb.yaml: -------------------------------------------------------------------------------- 1 | name: "pg-exporter" 2 | arch: "amd64" 3 | platform: "linux" 4 | version: "v1.0.0" 5 | version_schema: semver 6 | maintainer: Ruohang Feng 7 | description: | 8 | Prometheus exporter for PostgreSQL / Pgbouncer server metrics. 9 | Supported version: Postgres9.4 - 17+ & Pgbouncer 1.8 - 1.24+ 10 | Part of Project Pigsty -- Battery Included PostgreSQL Distribution 11 | with ultimate observability support: https://pigsty.io 12 | 13 | vendor: "Vonng" 14 | homepage: "https://github.com/pgsty/pg_exporter" 15 | license: "Apache-2.0 License" 16 | 17 | rpm: 18 | compression: gzip 19 | prefixes: 20 | - /usr/bin 21 | 22 | contents: 23 | - src: pg_exporter 24 | dst: /usr/bin/pg_exporter 25 | file_info: 26 | mode: 0755 27 | 28 | - src: pg_exporter.yml 29 | dst: /etc/pg_exporter.yml 30 | type: config|noreplace 31 | file_info: 32 | mode: 0700 33 | owner: prometheus 34 | group: prometheus 35 | 36 | - src: package/pg_exporter.default 37 | dst: /etc/default/pg_exporter 38 | type: config|noreplace 39 | file_info: 40 | mode: 0700 41 | owner: prometheus 42 | group: prometheus 43 | 44 | - src: package/pg_exporter.service 45 | dst: /lib/systemd/system/pg_exporter.service 46 | type: config 47 | 48 | - src: LICENSE 49 | dst: /usr/share/doc/pg_exporter/LICENSE 50 | file_info: 51 | mode: 0644 52 | 53 | scripts: 54 | preinstall: package/preinstall.sh -------------------------------------------------------------------------------- /package/nfpm-amd64-rpm.yaml: -------------------------------------------------------------------------------- 1 | name: "pg_exporter" 2 | arch: "amd64" 3 | platform: "linux" 4 | version: "v1.0.0" 5 | version_schema: semver 6 | maintainer: Ruohang Feng 7 | description: | 8 | Prometheus exporter for PostgreSQL / Pgbouncer server metrics. 9 | Supported version: Postgres9.4 - 17+ & Pgbouncer 1.8 - 1.24+ 10 | Part of Project Pigsty -- Battery Included PostgreSQL Distribution 11 | with ultimate observability support: https://pigsty.io 12 | 13 | vendor: "Vonng" 14 | homepage: "https://github.com/pgsty/pg_exporter" 15 | license: "Apache-2.0 License" 16 | 17 | rpm: 18 | compression: gzip 19 | prefixes: 20 | - /usr/bin 21 | 22 | contents: 23 | - src: pg_exporter 24 | dst: /usr/bin/pg_exporter 25 | file_info: 26 | mode: 0755 27 | 28 | - src: pg_exporter.yml 29 | dst: /etc/pg_exporter.yml 30 | type: config|noreplace 31 | file_info: 32 | mode: 0700 33 | owner: prometheus 34 | group: prometheus 35 | 36 | - src: package/pg_exporter.default 37 | dst: /etc/default/pg_exporter 38 | type: config|noreplace 39 | file_info: 40 | mode: 0700 41 | owner: prometheus 42 | group: prometheus 43 | 44 | - src: package/pg_exporter.service 45 | dst: /usr/lib/systemd/system/pg_exporter.service 46 | type: config 47 | 48 | - src: LICENSE 49 | dst: /usr/share/doc/pg_exporter/LICENSE 50 | file_info: 51 | mode: 0644 52 | 53 | scripts: 54 | preinstall: package/preinstall.sh -------------------------------------------------------------------------------- /package/nfpm-arm64-deb.yaml: -------------------------------------------------------------------------------- 1 | name: "pg-exporter" 2 | arch: "arm64" 3 | platform: "linux" 4 | version: "v1.0.0" 5 | version_schema: semver 6 | maintainer: Ruohang Feng 7 | description: | 8 | Prometheus exporter for PostgreSQL / Pgbouncer server metrics. 9 | Supported version: Postgres9.4 - 17+ & Pgbouncer 1.8 - 1.24+ 10 | Part of Project Pigsty -- Battery Included PostgreSQL Distribution 11 | with ultimate observability support: https://pigsty.io 12 | 13 | vendor: "Vonng" 14 | homepage: "https://github.com/pgsty/pg_exporter" 15 | license: "Apache-2.0 License" 16 | 17 | rpm: 18 | compression: gzip 19 | prefixes: 20 | - /usr/bin 21 | 22 | contents: 23 | - src: pg_exporter 24 | dst: /usr/bin/pg_exporter 25 | file_info: 26 | mode: 0755 27 | 28 | - src: pg_exporter.yml 29 | dst: /etc/pg_exporter.yml 30 | type: config|noreplace 31 | file_info: 32 | mode: 0700 33 | owner: prometheus 34 | group: prometheus 35 | 36 | - src: package/pg_exporter.default 37 | dst: /etc/default/pg_exporter 38 | type: config|noreplace 39 | file_info: 40 | mode: 0700 41 | owner: prometheus 42 | group: prometheus 43 | 44 | - src: package/pg_exporter.service 45 | dst: /lib/systemd/system/pg_exporter.service 46 | type: config 47 | 48 | - src: LICENSE 49 | dst: /usr/share/doc/pg_exporter/LICENSE 50 | file_info: 51 | mode: 0644 52 | 53 | scripts: 54 | preinstall: package/preinstall.sh -------------------------------------------------------------------------------- /package/nfpm-arm64-rpm.yaml: -------------------------------------------------------------------------------- 1 | name: "pg_exporter" 2 | arch: "arm64" 3 | platform: "linux" 4 | version: "v1.0.0" 5 | version_schema: semver 6 | maintainer: Ruohang Feng 7 | description: | 8 | Prometheus exporter for PostgreSQL / Pgbouncer server metrics. 9 | Supported version: Postgres9.4 - 17+ & Pgbouncer 1.8 - 1.24+ 10 | Part of Project Pigsty -- Battery Included PostgreSQL Distribution 11 | with ultimate observability support: https://pigsty.io 12 | 13 | vendor: "Vonng" 14 | homepage: "https://github.com/pgsty/pg_exporter" 15 | license: "Apache-2.0 License" 16 | 17 | rpm: 18 | compression: gzip 19 | prefixes: 20 | - /usr/bin 21 | 22 | contents: 23 | - src: pg_exporter 24 | dst: /usr/bin/pg_exporter 25 | file_info: 26 | mode: 0755 27 | 28 | - src: pg_exporter.yml 29 | dst: /etc/pg_exporter.yml 30 | type: config|noreplace 31 | file_info: 32 | mode: 0700 33 | owner: prometheus 34 | group: prometheus 35 | 36 | - src: package/pg_exporter.default 37 | dst: /etc/default/pg_exporter 38 | type: config|noreplace 39 | file_info: 40 | mode: 0700 41 | owner: prometheus 42 | group: prometheus 43 | 44 | - src: package/pg_exporter.service 45 | dst: /usr/lib/systemd/system/pg_exporter.service 46 | type: config 47 | 48 | - src: LICENSE 49 | dst: /usr/share/doc/pg_exporter/LICENSE 50 | file_info: 51 | mode: 0644 52 | 53 | scripts: 54 | preinstall: package/preinstall.sh -------------------------------------------------------------------------------- /package/pg_exporter.default: -------------------------------------------------------------------------------- 1 | PG_EXPORTER_URL='postgres://:5432/?sslmode=disable' 2 | PG_EXPORTER_CONFIG=/etc/pg_exporter.yml 3 | PG_EXPORTER_LABEL="" 4 | PG_EXPORTER_TAG="" 5 | PG_EXPORTER_DISABLE_CACHE=false 6 | PG_EXPORTER_AUTO_DISCOVERY=true 7 | PG_EXPORTER_EXCLUDE_DATABASE="template0,template1,postgres" 8 | PG_EXPORTER_INCLUDE_DATABASE="" 9 | PG_EXPORTER_NAMESPACE="pg" 10 | PG_EXPORTER_FAIL_FAST=false 11 | PG_EXPORTER_CONNECT_TIMEOUT=100 12 | PG_EXPORTER_TELEMETRY_PATH="/metrics" 13 | PG_EXPORTER_OPTS='--log.level=info' -------------------------------------------------------------------------------- /package/pg_exporter.service: -------------------------------------------------------------------------------- 1 | # -*- mode: conf -*- 2 | 3 | [Unit] 4 | Description=Prometheus exporter for PostgreSQL/Pgbouncer server metrics 5 | Documentation=https://github.com/pgsty/pg_exporter 6 | After=network.target 7 | 8 | [Service] 9 | EnvironmentFile=-/etc/default/pg_exporter 10 | User=prometheus 11 | ExecStart=/usr/bin/pg_exporter $PG_EXPORTER_OPTS 12 | Restart=on-failure 13 | 14 | [Install] 15 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /package/preinstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # create a group & user named prometheus if not exists 4 | getent group prometheus >/dev/null || groupadd -r prometheus ; /bin/true 5 | getent passwd prometheus >/dev/null || useradd -r -g prometheus -s /sbin/nologin -c "Prometheus services" prometheus 6 | exit 0 --------------------------------------------------------------------------------