├── .github └── workflows │ ├── goreleaser.yaml │ ├── latest.yaml │ ├── lint.yaml │ └── test-and-release.yaml ├── .gitignore ├── .gitmodules ├── .goreleaser.yml ├── LICENSE ├── Makefile ├── README.md ├── docker ├── release.Dockerfile └── test.Dockerfile ├── gatherer ├── cmd │ ├── pg_gatherer │ │ ├── config.go │ │ ├── http.go │ │ ├── main.go │ │ └── prometheus.go │ └── testing │ │ └── main.go └── internal │ ├── cache │ ├── README.md │ ├── cache.go │ ├── cache_lua.go │ ├── cache_test.go │ ├── sqlite │ │ ├── cache.go │ │ ├── rotate.go │ │ └── set_get.go │ └── tests │ │ ├── .gitignore │ │ ├── cache.lua │ │ └── rotate.lua │ ├── connection │ ├── README.md │ ├── background_query_lua.go │ ├── connection.go │ ├── connection_lua.go │ ├── connection_metric.go │ ├── connection_test.go │ ├── driver.go │ ├── metric.go │ ├── pool.go │ ├── pool_test.go │ ├── result.go │ └── tests │ │ └── connection.lua │ ├── plugins │ ├── README.md │ ├── plugin.go │ ├── plugin_lua.go │ ├── pool.go │ ├── pool_test.go │ └── tests │ │ ├── .gitignore │ │ ├── init.lua │ │ ├── pl_cache │ │ └── plugin.lua │ │ ├── pl_pg │ │ └── plugin.lua │ │ ├── pl_rds │ │ └── plugin.lua │ │ ├── pl_restarts │ │ └── plugin.lua │ │ ├── pl_run_every │ │ └── plugin.lua │ │ ├── pl_stat │ │ └── plugin.lua │ │ └── pl_stop │ │ ├── .gitignore │ │ └── plugin.lua │ ├── prometheus │ ├── README.md │ ├── metric.go │ ├── metric_registered.go │ ├── metric_util.go │ └── prometheus.go │ ├── secrets │ ├── README.md │ ├── secret.go │ ├── secret_lua.go │ ├── secret_test.go │ └── tests │ │ ├── .gitignore │ │ ├── reload_1.lua │ │ ├── reload_2.lua │ │ ├── secrets.lua │ │ └── secrets.yaml │ └── testing_framework │ ├── README.md │ ├── framework.go │ ├── framework_lua.go │ ├── framework_test.go │ └── tests │ ├── cache │ └── testing-1 │ │ └── testing-1 │ │ └── cache.sqlite │ ├── init.lua │ └── testing-1 │ ├── plugin.lua │ └── test.lua ├── go.mod ├── go.sum ├── grafana ├── Makefile ├── README.md ├── dashboard.json ├── images │ ├── 1.png │ ├── 10.png │ ├── 11.png │ ├── 12.png │ ├── 13.png │ ├── 14.png │ ├── 15.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ └── 9.png └── jsonnet │ ├── dashboard.jsonnet │ └── dashboard_sql.libsonnet ├── plugins ├── .gitignore ├── README.md ├── activity │ ├── activity_10.sql │ ├── activity_9.sql │ ├── linux_helper_proc_stat.lua │ ├── plugin.lua │ ├── states.sql │ ├── test.lua │ └── waits.sql ├── alerts │ ├── bloat.lua │ ├── bloat.sql │ ├── errors.lua │ ├── errors.sql │ ├── healthcheck.lua │ ├── healthcheck.sql │ ├── last_created_at.sql │ ├── long_running_transactions.lua │ ├── long_running_transactions.sql │ ├── plugin.lua │ ├── replication_slots.lua │ ├── replication_slots.sql │ ├── sequences.lua │ ├── sequences.sql │ ├── test.lua │ ├── uptime.lua │ ├── uptime.sql │ ├── waits.lua │ ├── waits.sql │ ├── wraparound.lua │ └── wraparound.sql ├── bgwriter │ ├── bgwriter.sql │ ├── plugin.lua │ └── test.lua ├── block │ ├── block.sql │ ├── plugin.lua │ └── test.lua ├── btree_bloat │ ├── btree_bloat.sql │ ├── plugin.lua │ └── test.lua ├── buffercache │ ├── buffercache.sql │ ├── plugin.lua │ └── test.lua ├── cloudwatch │ └── plugin.lua ├── databases │ ├── databases.sql │ ├── plugin.lua │ └── test.lua ├── healthcheck │ ├── healthcheck.sql │ ├── plugin.lua │ └── test.lua ├── init.lua ├── init.test.lua ├── linux_cpu │ ├── plugin.lua │ └── test.lua ├── linux_diskstats │ ├── helper_disk_stat.lua │ ├── plugin.lua │ └── test.lua ├── linux_memory │ ├── plugin.lua │ └── test.lua ├── replication_slots │ ├── plugin.lua │ ├── replication_slots_10.sql │ ├── replication_slots_9.sql │ └── test.lua ├── sender │ ├── list_of_alerts.sql │ ├── pagerduty.lua │ ├── pagerduty_routing.lua │ ├── plugin.lua │ └── telegram.lua ├── sequences │ ├── plugin.lua │ ├── sequences.sql │ └── test.lua ├── statements │ ├── plugin.lua │ ├── statements.sql │ └── test.lua ├── uptime │ ├── checkpointer_uptime_10.sql │ ├── plugin.lua │ ├── test.lua │ └── uptime.sql ├── user_tables │ ├── plugin.lua │ ├── test.lua │ ├── user_tables.sql │ └── user_tables_io.sql └── wal │ ├── plugin.lua │ ├── test.lua │ ├── wal_master_10.sql │ ├── wal_master_9.sql │ ├── wal_replica_10.sql │ └── wal_replica_9.sql └── schema └── schema.sql /.github/workflows/goreleaser.yaml: -------------------------------------------------------------------------------- 1 | name: gorelease 2 | on: 3 | push: 4 | tags: 5 | - '*' 6 | jobs: 7 | 8 | test: 9 | runs-on: ubuntu-latest 10 | steps: 11 | 12 | - name: Checkout repo 13 | uses: actions/checkout@v2 14 | 15 | - name: Set up Go 16 | uses: actions/setup-go@v2 17 | with: 18 | go-version: 1.14 19 | 20 | - name: Run GoReleaser 21 | uses: goreleaser/goreleaser-action@v2 22 | with: 23 | version: latest 24 | args: release --rm-dist 25 | env: 26 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 27 | -------------------------------------------------------------------------------- /.github/workflows/latest.yaml: -------------------------------------------------------------------------------- 1 | name: latest 2 | on: 3 | push: 4 | branches: 5 | - master 6 | schedule: 7 | - cron: '0 0 * * *' 8 | 9 | jobs: 10 | 11 | test: 12 | runs-on: ubuntu-latest 13 | steps: 14 | 15 | - name: Checkout repo 16 | uses: actions/checkout@v2 17 | 18 | - name: Build and push Docker images 19 | uses: elgohr/Publish-Docker-Github-Action@v5 20 | with: 21 | name: vadv/pg_gatherer_test 22 | dockerfile: ./docker/test.Dockerfile 23 | username: vadv 24 | password: ${{ secrets.DOCKER_TOKEN }} 25 | cache: ${{ github.event_name != 'schedule' }} 26 | tags: latest 27 | 28 | - name: Test in docker image 29 | run: docker run --rm vadv/pg_gatherer:test bash -ec "make test_in_docker" 30 | 31 | - name: Build and push Docker images 32 | uses: elgohr/Publish-Docker-Github-Action@v5 33 | with: 34 | name: vadv/pg_gatherer 35 | dockerfile: ./docker/release.Dockerfile 36 | username: vadv 37 | password: ${{ secrets.DOCKER_TOKEN }} 38 | cache: ${{ github.event_name != 'schedule' }} 39 | tags: "latest" 40 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: lint 2 | on: [push] 3 | jobs: 4 | golint: 5 | name: golint 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v2 9 | - name: golangci-lint 10 | uses: golangci/golangci-lint-action@v1 11 | with: 12 | version: v1.28 13 | -------------------------------------------------------------------------------- /.github/workflows/test-and-release.yaml: -------------------------------------------------------------------------------- 1 | name: test-and-release 2 | on: [push] 3 | jobs: 4 | 5 | test: 6 | runs-on: ubuntu-latest 7 | steps: 8 | 9 | - name: Checkout repo 10 | uses: actions/checkout@v2 11 | 12 | - name: Build test image 13 | uses: elgohr/Publish-Docker-Github-Action@v5 14 | with: 15 | name: vadv/pg_gatherer_test 16 | dockerfile: ./docker/test.Dockerfile 17 | username: vadv 18 | password: ${{ secrets.DOCKER_TOKEN }} 19 | cache: true 20 | tags: latest 21 | 22 | - name: Test in docker image 23 | run: docker run --rm vadv/pg_gatherer_test bash -ec "make test_in_docker" 24 | 25 | - name: Build release docker image 26 | uses: elgohr/Publish-Docker-Github-Action@v5 27 | with: 28 | name: vadv/pg_gatherer 29 | dockerfile: ./docker/release.Dockerfile 30 | username: vadv 31 | password: ${{ secrets.DOCKER_TOKEN }} 32 | tag_names: true 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.idea 2 | .DS_Store 3 | dist 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "grafana/jsonnet/vendor/grafonnet-lib"] 2 | path = grafana/jsonnet/vendor/grafonnet-lib 3 | url = https://github.com/grafana/grafonnet-lib.git 4 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | builds: 2 | - main: ./gatherer/cmd/pg_gatherer 3 | id: "pg_gatherer" 4 | binary: pg_gatherer 5 | goos: 6 | - linux 7 | - darwin 8 | - windows 9 | changelog: 10 | sort: asc 11 | filters: 12 | exclude: 13 | - '^docs:' 14 | - '^test:' 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Dmitry Vasiliev 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: build 2 | 3 | build: 4 | go build -o ./bin/pg_gatherer --tags netcgo ./gatherer/cmd/pg_gatherer/ 5 | 6 | dashboard: 7 | $(MAKE) -C grafana 8 | 9 | test_in_docker: 10 | # init && start database 11 | sudo -H -u postgres bash -l -c '/usr/pgsql-12/bin/initdb -D /tmp/db' 12 | sudo -H -u postgres bash -l -c '/usr/pgsql-12/bin/pg_ctl start -W -D /tmp/db' 13 | sleep 3 14 | # change preload libraries 15 | psql -U postgres -Atc "alter system set shared_preload_libraries TO pg_stat_statements, timescaledb" 16 | sudo -H -u postgres bash -l -c '/usr/pgsql-12/bin/pg_ctl restart -W -D /tmp/db' 17 | sleep 3 18 | # create user && database 19 | psql -U postgres -Atc "create user gatherer" 20 | psql -U postgres -Atc "create database gatherer owner gatherer" 21 | psql -U postgres -Atc "grant pg_monitor to gatherer" 22 | psql -U postgres -Atc "create database nobuffercache" 23 | # install extensions 24 | psql -U postgres -d gatherer -Atc "create extension pg_buffercache" 25 | psql -U postgres -d gatherer -Atc "create extension pg_stat_statements" 26 | psql -U postgres -d postgres -Atc "create extension pg_buffercache" 27 | psql -U postgres -d postgres -Atc "create extension pg_stat_statements" 28 | psql -U postgres -d gatherer -Atc "create extension timescaledb" 29 | # update statistics 30 | /usr/pgsql-12/bin/pgbench -U postgres -h /tmp -i -s 2 postgres 31 | /usr/pgsql-12/bin/pgbench -U postgres -h /tmp -i -s 2 gatherer 32 | /usr/pgsql-12/bin/pgbench -U postgres -h /tmp -T 5 postgres 33 | /usr/pgsql-12/bin/pgbench -U postgres -h /tmp -T 5 gatherer 34 | /usr/pgsql-12/bin/vacuumdb --analyze-only -U postgres -h /tmp --all 35 | # deploy schema gatherer 36 | psql -U gatherer -At -1 -f ./schema/schema.sql -d gatherer 37 | # start tests 38 | psql -U postgres -d gatherer -Atc "insert into host (name) values ('hostname-not-found-healthcheck-must-failed');" 39 | psql -U postgres -d postgres -Atc "select pg_create_physical_replication_slot('standby_slot')" 40 | timeout 10 /usr/pgsql-12/bin/pg_receivewal -h /tmp -U postgres -D /tmp/ -S standby_slot || echo ok 41 | go test -v -race ./... 42 | go build -o ./bin/testing --tags netcgo ./gatherer/cmd/testing/ 43 | ./bin/testing --plugin-dir ./plugins --cache-dir /tmp/cache --host /tmp --dbname gatherer --username gatherer 44 | 45 | .DEFAULT_GOAL: all 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pg_gatherer 2 | 3 | The project is designed to collect and store statistical data of PostgreSQL into other PostgreSQL. 4 | 5 | # Architecture 6 | 7 | ``` 8 | +------------+ +---------------+ 9 | +---+ Grafana | +--->+ Target # 1 | 10 | | +------------+ | +---------------+ 11 | v | 12 | +------+-------+ | +---------------+ 13 | | Storage | +-------------+--->+ Target # N | 14 | +------+-------+ | +---------------+ 15 | ^ | 16 | | +-------+--------+ +---------------+ 17 | +-----+ pg_gatherer +<--------+ Prometheus | 18 | +---------+------+ +---------------+ 19 | | 20 | +----------------+ | +-----------------------+ 21 | |Pager Dutty Api +<----+-------> | Other api (zabbix, ..)| 22 | +----------------+ +-----------------------+ 23 | ``` 24 | 25 | ## Targets 26 | 27 | Target databases, which agent is monitored. 28 | 29 | ## Storage 30 | 31 | PostgreSQL database (recommended use [TimescaleDB](https://docs.timescale.com/latest/introduction) extension) in which information is stored. 32 | 33 | ## Agent pg_gatherer 34 | 35 | The agent is golang-binary, with plugins written in [Gopher-lua](https://github.com/yuin/gopher-lua) (without any system dependencies). 36 | 37 | You can run agent locally on machine `Target`, 38 | then you get additional statistics, for example link `/proc/{pid}/io` stats with query. 39 | 40 | ## Why? 41 | 42 | There are a lot of monitoring systems in the world that are known and proven. Why make another system? 43 | 44 | The answers: 45 | * firstly pg_gatherer does not claim to be a full-fledged monitoring system, it developed as a tool for in-depth analysis. 46 | * it seems natural to me to store different information (not only float/int metrics with tags) but also queries texts in the database. 47 | * system should be easily extensible with [plugins](/plugins). 48 | * keep integration with [Grafana](/grafana). 49 | 50 | ## Installation 51 | 52 | * Install storage database. 53 | * Apply [migration](/schema/schema.sql) on storage database. 54 | * Create user on targets with [pg_monitor](https://www.postgresql.org/docs/10/default-roles.html) rights. 55 | * Get && run agent. 56 | * Populate table host on storage database. 57 | * Also, if you use TimescaleDB, when you can use [Grafana dashboard](/grafana). 58 | 59 | ```bash 60 | go get github.com/vadv/pg_gatherer/gatherer/cmd/pg_gatherer 61 | pg_gatherer \ 62 | --host-config-file host-config.yaml \ 63 | --plugins-dir /etc/pg_gatherer/plugins \ 64 | --cache-dir /var/cache \ 65 | --http-listen 8080 \ 66 | --secret-file /path/to/secrets.yaml \ 67 | --max-open-conns 1 68 | ``` 69 | 70 | Host config example: 71 | 72 | ```yaml 73 | peripheral-db-1: # name of target in storage-db 74 | 75 | plugins: # list of plugins which can be activated on this target 76 | - activity 77 | - databases 78 | ... 79 | 80 | connections: 81 | target: # target agent connection 82 | host: 192.168.1.1 83 | dbname: your_database 84 | username: monitor 85 | port: 5432 86 | storage: # storage connection 87 | host: /tmp 88 | dbname: gatherer 89 | username: storage 90 | port: 5432 91 | replica: # another target agent connection 92 | host: 192.168.1.2 93 | dbname: your_database 94 | username: monitor 95 | port: 5432 96 | ``` 97 | 98 | ## Plugins 99 | 100 | More information here: [plugins](/plugins) 101 | 102 | ## Build status 103 | 104 | ![Status](https://github.com/vadv/pg_gatherer/workflows/test-and-release/badge.svg) 105 | -------------------------------------------------------------------------------- /docker/release.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.14 as builder 2 | 3 | WORKDIR /go/github.com/vadv/pg_gatherer 4 | 5 | COPY go.mod go.mod 6 | COPY go.sum go.sum 7 | RUN go mod download 8 | 9 | COPY . . 10 | RUN make build 11 | 12 | FROM centos:7 13 | 14 | ENV LC_ALL=en_US.UTF-8 15 | COPY --from=builder /go/github.com/vadv/pg_gatherer/bin/pg_gatherer /usr/bin/pg_gatherer 16 | COPY --from=builder /go/github.com/vadv/pg_gatherer/plugins /etc/pg_gatherer/plugins 17 | -------------------------------------------------------------------------------- /docker/test.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | RUN echo $'[timescale_timescaledb] \n\ 4 | name=timescale_timescaledb \n\ 5 | baseurl=https://packagecloud.io/timescale/timescaledb/el/7/\$basearch \n\ 6 | repo_gpgcheck=1 \n\ 7 | gpgcheck=0 \n\ 8 | enabled=1 \n\ 9 | gpgkey=https://packagecloud.io/timescale/timescaledb/gpgkey \n\ 10 | sslverify=1 \n\ 11 | sslcacert=/etc/pki/tls/certs/ca-bundle.crt \n\ 12 | metadata_expire=300' > /etc/yum.repos.d/timescale_timescaledb.repo 13 | 14 | RUN yum install -y epel-release && \ 15 | yum install -y golang && \ 16 | yum install -y @"Development tools" && \ 17 | yum install -y sudo && \ 18 | yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm && \ 19 | yum install -y postgresql12-server && \ 20 | yum install -y postgresql12-contrib && \ 21 | yum install -y timescaledb-postgresql-12 && \ 22 | yum clean all 23 | 24 | ENV GO111MODULE=on 25 | ENV PATH="/usr/local/go/bin:${PATH}" 26 | 27 | WORKDIR /opt/pg_gatherer/ 28 | 29 | # download go modules 30 | COPY go.mod . 31 | COPY go.sum . 32 | RUN go mod download 33 | 34 | # copy project 35 | COPY . . 36 | -------------------------------------------------------------------------------- /gatherer/cmd/pg_gatherer/config.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | 8 | "gopkg.in/yaml.v2" 9 | 10 | "github.com/vadv/pg_gatherer/gatherer/internal/plugins" 11 | "github.com/vadv/pg_gatherer/gatherer/internal/secrets" 12 | ) 13 | 14 | // Config represent configuration of plugins 15 | type Config map[string]HostConfiguration 16 | 17 | // HostConfiguration represent configurations of hosts 18 | type HostConfiguration struct { 19 | Plugins []string `yaml:"plugins,omitempty"` 20 | Connections map[string]*plugins.Connection `yaml:"connections"` 21 | } 22 | 23 | func readConfig(configFile string) (*Config, error) { 24 | data, err := ioutil.ReadFile(configFile) 25 | if err != nil { 26 | return nil, err 27 | } 28 | config := new(Config) 29 | if errMarshal := yaml.Unmarshal(data, &config); errMarshal != nil { 30 | return nil, errMarshal 31 | } 32 | if errConfig := config.validate(); errConfig != nil { 33 | return nil, errConfig 34 | } 35 | return config, nil 36 | } 37 | 38 | func (c Config) registerHostsAndPlugins(pool *plugins.Pool, secretStorage *secrets.Storage) error { 39 | for host, hostConfig := range c { 40 | log.Printf("[INFO] register host: '%s'\n", host) 41 | pool.RegisterHost(host, hostConfig.Connections) 42 | for _, pl := range hostConfig.Plugins { 43 | log.Printf("[INFO] register plugin '%s' for host: '%s'\n", pl, host) 44 | if errPl := pool.AddPluginToHost(pl, host, secretStorage); errPl != nil { 45 | return fmt.Errorf("register plugin '%s' for host '%s': %s\n", 46 | pl, host, errPl.Error()) 47 | } 48 | } 49 | } 50 | return nil 51 | } 52 | 53 | func (c Config) unregisterAll(pool *plugins.Pool) error { 54 | for host, hostConfig := range c { 55 | for _, pl := range hostConfig.Plugins { 56 | log.Printf("[INFO] unregister plugin '%s' for host: '%s'\n", pl, host) 57 | if errRemove := pool.StopAndRemovePluginFromHost(pl, host); errRemove != nil { 58 | return fmt.Errorf("stop plugin '%s' for host '%s': %s\n", pl, host, errRemove.Error()) 59 | } 60 | } 61 | } 62 | return nil 63 | } 64 | 65 | func (c Config) validate() error { 66 | if len(c) == 0 { 67 | return fmt.Errorf("hosts configuration is empty") 68 | } 69 | for host, config := range c { 70 | if len(config.Plugins) == 0 { 71 | return fmt.Errorf("plugins is empty for host: %s", host) 72 | } 73 | } 74 | return nil 75 | } 76 | -------------------------------------------------------------------------------- /gatherer/cmd/pg_gatherer/http.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | "net/http/pprof" 6 | "strings" 7 | ) 8 | 9 | func handleHTTP(w http.ResponseWriter, req *http.Request) { 10 | 11 | // debug 12 | if strings.HasPrefix(req.URL.Path, "/debug/") { 13 | if strings.HasPrefix(req.URL.Path, "/debug/pprof/symbol") { 14 | pprof.Symbol(w, req) 15 | return 16 | } 17 | if strings.HasPrefix(req.URL.Path, "/debug/pprof/profile") { 18 | pprof.Profile(w, req) 19 | return 20 | } 21 | if strings.HasPrefix(req.URL.Path, "/debug/pprof/cmdline") { 22 | pprof.Cmdline(w, req) 23 | return 24 | } 25 | pprof.Index(w, req) 26 | return 27 | } 28 | 29 | http.RedirectHandler("/metrics", http.StatusFound) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /gatherer/cmd/pg_gatherer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "os" 9 | "os/signal" 10 | "syscall" 11 | 12 | "github.com/prometheus/client_golang/prometheus/promhttp" 13 | 14 | "github.com/vadv/pg_gatherer/gatherer/internal/connection" 15 | 16 | "github.com/vadv/pg_gatherer/gatherer/internal/secrets" 17 | 18 | "github.com/vadv/pg_gatherer/gatherer/internal/plugins" 19 | ) 20 | 21 | var ( 22 | version = `v1.0.0` 23 | hostConfigFile = flag.String(`host-config-file`, `host.yaml`, `Path to config file with host configurations.`) 24 | pluginDir = flag.String(`plugins-dir`, `./plugins`, `Path to plugins directory`) 25 | cacheDir = flag.String(`cache-dir`, `./cache`, `Path to cache directory`) 26 | httpListen = flag.String(`http-listen`, `:8080`, `Lister address`) 27 | secretsFile = flag.String(`secret-file`, ``, `Path to yaml file with secrets (key:value)`) 28 | maxOpenConns = flag.Uint(`max-open-conns`, 1, `Set max open connections`) 29 | versionFlag = flag.Bool(`version`, false, `Print version and exit`) 30 | ) 31 | 32 | func main() { 33 | 34 | if !flag.Parsed() { 35 | flag.Parse() 36 | } 37 | 38 | if *versionFlag { 39 | fmt.Printf("version: %s\n", version) 40 | os.Exit(0) 41 | } 42 | 43 | connection.SetMaxOpenConns(*maxOpenConns) 44 | 45 | config, errConfig := readConfig(*hostConfigFile) 46 | if errConfig != nil { 47 | log.Printf("[FATAL] config file error: %s\n", errConfig.Error()) 48 | os.Exit(1) 49 | } 50 | 51 | // http 52 | http.HandleFunc("/", handleHTTP) 53 | http.Handle("/metrics", promhttp.Handler()) 54 | go func() { 55 | if errListen := http.ListenAndServe(*httpListen, nil); errListen != nil { 56 | log.Printf("[FATAL] http listen: %s\n", errListen.Error()) 57 | os.Exit(2) 58 | } 59 | }() 60 | 61 | // secrets for pool 62 | secretStorage := secrets.New(*secretsFile) 63 | // pool of plugins 64 | pool := plugins.NewPool(*pluginDir, *cacheDir) 65 | // register plugins 66 | if errRegister := config.registerHostsAndPlugins(pool, secretStorage); errRegister != nil { 67 | log.Printf("[FATAL] register: %s\n", errRegister.Error()) 68 | os.Exit(3) 69 | } 70 | go prometheusCollectPoolStatistics(pool) 71 | log.Printf("[INFO] started\n") 72 | 73 | sig := make(chan os.Signal, 1) 74 | signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) 75 | 76 | for { 77 | s := <-sig 78 | switch s { 79 | case syscall.SIGHUP: 80 | // reload 81 | log.Printf("[INFO] reloading\n") 82 | secretStorage.Read() 83 | newConfig, errConfigReRead := readConfig(*hostConfigFile) 84 | if errConfigReRead != nil { 85 | log.Printf("[FATAL] config file error: %s\n", errConfigReRead.Error()) 86 | os.Exit(4) 87 | } 88 | if errUnRegister := config.unregisterAll(pool); errUnRegister != nil { 89 | log.Printf("[FATAL] unregister error: %s\n", errUnRegister.Error()) 90 | os.Exit(5) 91 | } 92 | if errRegister := newConfig.registerHostsAndPlugins(pool, secretStorage); errRegister != nil { 93 | // TODO: rollback 94 | log.Printf("[FATAL] register error: %s\n", errRegister.Error()) 95 | os.Exit(5) 96 | } 97 | config = newConfig 98 | log.Printf("[INFO] reloaded\n") 99 | case syscall.SIGINT, syscall.SIGTERM: 100 | // stop 101 | log.Printf("[INFO] shutdown\n") 102 | for host := range *config { 103 | pool.RemoveHostAndPlugins(host) 104 | } 105 | log.Printf("[INFO] stopped\n") 106 | return 107 | } 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /gatherer/cmd/pg_gatherer/prometheus.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/prometheus/client_golang/prometheus" 7 | 8 | "github.com/vadv/pg_gatherer/gatherer/internal/plugins" 9 | ) 10 | 11 | func prometheusCollectPoolStatistics(pool *plugins.Pool) { 12 | 13 | errorsVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{ 14 | Namespace: "pg_gatherer", 15 | Name: "plugin_errors", 16 | Help: "Count of plugins errors", 17 | }, []string{"host", "plugin"}) 18 | prometheus.MustRegister(errorsVec) 19 | 20 | for { 21 | errorStat := make(map[string]map[string]int) 22 | for host, hostStats := range pool.PluginStatisticPerHost() { 23 | errorStat[host] = make(map[string]int) 24 | for _, stat := range hostStats { 25 | errorStat[host][stat.PluginName] = stat.Errors 26 | } 27 | } 28 | for host, pluginStat := range errorStat { 29 | for plugin, errors := range pluginStat { 30 | errorsVec.With(map[string]string{"host": host, "plugin": plugin}).Set(float64(errors)) 31 | } 32 | } 33 | time.Sleep(5 * time.Second) 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /gatherer/cmd/testing/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | "sync" 11 | "sync/atomic" 12 | "time" 13 | 14 | "github.com/vadv/pg_gatherer/gatherer/internal/connection" 15 | 16 | libs "github.com/vadv/gopher-lua-libs" 17 | "github.com/vadv/pg_gatherer/gatherer/internal/testing_framework" 18 | lua "github.com/yuin/gopher-lua" 19 | ) 20 | 21 | var ( 22 | pluginPath = flag.String(`plugin-dir`, `plugins`, `Path to plugin directory`) 23 | cachePath = flag.String(`cache-dir`, `plugins\cache`, `Path to cache directory`) 24 | stopOnFirstError = flag.Bool(`stop-on-first-error`, false, `Stop on first error`) 25 | hostName = flag.String(`host`, os.Getenv(`PGHOST`), `PostgreSQL host`) 26 | dbName = flag.String(`dbname`, os.Getenv(`PGDATABASE`), `PostgreSQL database`) 27 | userName = flag.String(`username`, os.Getenv(`PGUSER`), `PostgreSQL user`) 28 | password = flag.String(`password`, os.Getenv(`PGPASSWORD`), `PostgreSQL password`) 29 | dbPort = flag.Int(`port`, 5432, `PostgreSQL port`) 30 | ) 31 | 32 | type testResult struct { 33 | pluginName string 34 | testFile string 35 | err error 36 | } 37 | 38 | func main() { 39 | if !flag.Parsed() { 40 | flag.Parse() 41 | } 42 | startAt := time.Now() 43 | // load embedded plugins 44 | plugins, err := listOfPluginsAndTestFiles(filepath.Join(*pluginPath, "embedded")) 45 | if err != nil { 46 | log.Printf("[ERROR] get embedded plugins: %s\n", err.Error()) 47 | plugins = make(map[string]string) 48 | } 49 | // load override plugins 50 | overridePlugins, errOverridePlugins := listOfPluginsAndTestFiles(*pluginPath) 51 | if errOverridePlugins == nil { 52 | for plugin, file := range overridePlugins { 53 | plugins[plugin] = file 54 | } 55 | } 56 | 57 | var wg sync.WaitGroup 58 | wg.Add(len(plugins)) 59 | log.Printf("[INFO] test %d plugins\n", len(plugins)) 60 | testResultChan := make(chan *testResult) 61 | 62 | for plugin, testFile := range plugins { 63 | go func(plugin, testFile string) { 64 | log.Printf("[INFO] start testing plugin %s via file %s\n", plugin, testFile) 65 | errTest := testPlugin(*pluginPath, *cachePath, plugin, testFile) 66 | log.Printf("[INFO] test plugin %s via file %s: was completed\n", plugin, testFile) 67 | testResultChan <- &testResult{ 68 | pluginName: plugin, 69 | testFile: testFile, 70 | err: errTest, 71 | } 72 | }(plugin, testFile) 73 | } 74 | 75 | failed, completed := int32(0), int32(0) 76 | go func() { 77 | ticker := time.NewTicker(time.Second) 78 | for { 79 | select { 80 | case result := <-testResultChan: 81 | atomic.AddInt32(&completed, 1) 82 | if result != nil && result.err != nil { 83 | atomic.AddInt32(&failed, 1) 84 | log.Printf("[ERROR] plugin '%s' file '%s' error:\n%s\n", 85 | result.pluginName, result.testFile, result.err.Error()) 86 | if *stopOnFirstError { 87 | os.Exit(1) 88 | } 89 | } 90 | wg.Done() 91 | case <-ticker.C: 92 | log.Printf("[INFO] already processing\t%.0fs:\ttotal: %d\tcompleted: %d\tfailed: %d\n", 93 | time.Since(startAt).Seconds(), len(plugins), completed, failed) 94 | } 95 | } 96 | }() 97 | 98 | wg.Wait() 99 | if failed > 0 { 100 | log.Printf("[ERROR] %d plugin(s) was failed\n", failed) 101 | os.Exit(int(failed)) 102 | } else { 103 | log.Printf("[INFO] was competed after: %v\n", time.Since(startAt)) 104 | } 105 | } 106 | 107 | func testPlugin(pluginDir, cacheDir, pluginName, testFile string) error { 108 | state := lua.NewState() 109 | libs.Preload(state) 110 | testing_framework.Preload(state) 111 | if err := testing_framework.New(state, pluginDir, cacheDir, pluginName, 112 | *hostName, *dbName, *userName, *password, *dbPort, nil); err != nil { 113 | return err 114 | } 115 | connection.Preload(state) 116 | connection.New(state, `target`, 117 | *hostName, *dbName, *userName, *password, *dbPort, nil) 118 | connection.New(state, `storage`, 119 | *hostName, *dbName, *userName, *password, *dbPort, nil) 120 | if err := state.DoFile(filepath.Join(pluginDir, "init.test.lua")); err != nil { 121 | return err 122 | } 123 | return state.DoFile(testFile) 124 | } 125 | 126 | func listOfPluginsAndTestFiles(dir string) (map[string]string, error) { 127 | pattern := filepath.Join(dir, "*", "test.lua") 128 | testFiles, err := filepath.Glob(pattern) 129 | if err != nil { 130 | return nil, err 131 | } 132 | result := make(map[string]string) 133 | for _, f := range testFiles { 134 | pluginDir, _ := filepath.Split(f) 135 | split := strings.Split(pluginDir, string(filepath.Separator)) 136 | if len(split) < 3 { 137 | return nil, fmt.Errorf("splited: %v", split) 138 | } 139 | plugin := split[len(split)-2] 140 | result[plugin] = f 141 | } 142 | return result, nil 143 | } 144 | -------------------------------------------------------------------------------- /gatherer/internal/cache/README.md: -------------------------------------------------------------------------------- 1 | Creates lua user data `cache_ud`. 2 | 3 | # Golang 4 | 5 | ```go 6 | state := lua.NewState() 7 | cache.Preload(state) 8 | // register user data "cache" 9 | cache.NewSqlite(state, "cache", "/tmp/db.sqlite") 10 | ``` 11 | 12 | # Lua 13 | 14 | ## cache:set(string, number) 15 | 16 | Set `number` to cache by key `string`, raise error. 17 | 18 | ## cache:get(string) 19 | 20 | Get `number` (value) and `number` (time updated_at) from cache by key `string`. Return two `number` or two `nil`, raise error. 21 | 22 | ## cache:diff_and_set(string, value) 23 | 24 | Get `number`, diff from previous value. Return `number` or `nil`, raise error. 25 | 26 | ## cache:speed_and_set(string, value) 27 | 28 | Get `number`, speed based on previous value. Return `number` or nil, raise error. -------------------------------------------------------------------------------- /gatherer/internal/cache/cache.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "github.com/vadv/pg_gatherer/gatherer/internal/cache/sqlite" 5 | lua "github.com/yuin/gopher-lua" 6 | ) 7 | 8 | type cache interface { 9 | Set(key string, value float64) error 10 | Get(key string) (float64, int64, bool, error) 11 | Delete(key string) error 12 | } 13 | 14 | type cacheUserData struct { 15 | cache 16 | } 17 | 18 | // Preload is the preloader of user data cache_ud. 19 | func Preload(L *lua.LState) int { 20 | connectionUd := L.NewTypeMetatable(`cache_ud`) 21 | L.SetGlobal(`cache_ud`, connectionUd) 22 | L.SetField(connectionUd, "__index", L.SetFuncs(L.NewTable(), map[string]lua.LGFunction{ 23 | "set": set, 24 | "get": get, 25 | "diff_and_set": diffAndSet, 26 | "speed_and_set": speedAndSet, 27 | })) 28 | return 0 29 | } 30 | 31 | // NewSqlite open new sqlite cache 32 | func NewSqlite(L *lua.LState, userDataName, fileName, prefix string) error { 33 | s, err := sqlite.New(fileName, prefix) 34 | if err != nil { 35 | return err 36 | } 37 | c := &cacheUserData{cache: s} 38 | ud := L.NewUserData() 39 | ud.Value = c 40 | L.SetMetatable(ud, L.GetTypeMetatable(`cache_ud`)) 41 | L.SetGlobal(userDataName, ud) 42 | return nil 43 | } 44 | -------------------------------------------------------------------------------- /gatherer/internal/cache/cache_lua.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "time" 5 | 6 | lua "github.com/yuin/gopher-lua" 7 | ) 8 | 9 | // checkUserDataCache return connection from lua state 10 | func checkUserDataCache(L *lua.LState, n int) *cacheUserData { 11 | ud := L.CheckUserData(n) 12 | if v, ok := ud.Value.(*cacheUserData); ok { 13 | return v 14 | } 15 | L.ArgError(n, "cache_ud expected") 16 | return nil 17 | } 18 | 19 | // set key with value 20 | func set(L *lua.LState) int { 21 | ud := checkUserDataCache(L, 1) 22 | key := L.CheckString(2) 23 | if valueT := L.CheckAny(3); valueT.Type() == lua.LTNil { 24 | if err := ud.Delete(key); err != nil { 25 | L.RaiseError("cache error: %s", err.Error()) 26 | } 27 | return 0 28 | } 29 | value := L.CheckNumber(3) 30 | if err := ud.Set(key, float64(value)); err != nil { 31 | L.RaiseError("cache error: %s", err.Error()) 32 | } 33 | return 0 34 | } 35 | 36 | // get key 37 | func get(L *lua.LState) int { 38 | ud := checkUserDataCache(L, 1) 39 | key := L.CheckString(2) 40 | value, updatedAt, found, err := ud.Get(key) 41 | if err != nil { 42 | L.RaiseError("cache error: %s", err.Error()) 43 | return 0 44 | } 45 | if !found { 46 | L.Push(lua.LNil) 47 | L.Push(lua.LNil) 48 | return 2 49 | } 50 | L.Push(lua.LNumber(value)) 51 | L.Push(lua.LNumber(updatedAt)) 52 | return 2 53 | } 54 | 55 | // diff key 56 | func diffAndSet(L *lua.LState) int { 57 | ud := checkUserDataCache(L, 1) 58 | key := L.CheckString(2) 59 | if currentValueT := L.CheckAny(3); currentValueT.Type() == lua.LTNil { 60 | if err := ud.Delete(key); err != nil { 61 | L.RaiseError("cache error: %s", err.Error()) 62 | } 63 | return 0 64 | } 65 | currentValue := L.CheckNumber(3) 66 | prevValue, _, found, err := ud.Get(key) 67 | if err != nil { 68 | L.RaiseError("cache error: %s", err.Error()) 69 | return 0 70 | } 71 | if errSet := ud.Set(key, float64(currentValue)); errSet != nil { 72 | L.RaiseError("cache error: %s", errSet.Error()) 73 | return 0 74 | } 75 | if !found { 76 | // not found, return nil 77 | L.Push(lua.LNil) 78 | return 1 79 | } 80 | // found, calc diff 81 | result := float64(currentValue) - prevValue 82 | L.Push(lua.LNumber(result)) 83 | return 1 84 | } 85 | 86 | func speedAndSet(L *lua.LState) int { 87 | ud := checkUserDataCache(L, 1) 88 | key := L.CheckString(2) 89 | if currentValueT := L.CheckAny(3); currentValueT.Type() == lua.LTNil { 90 | if err := ud.Delete(key); err != nil { 91 | L.RaiseError("cache error: %s", err.Error()) 92 | } 93 | return 0 94 | } 95 | currentValue := L.CheckNumber(3) 96 | prevValue, updatedAt, found, err := ud.Get(key) 97 | if err != nil { 98 | L.RaiseError("cache error: %s", err.Error()) 99 | return 0 100 | } 101 | if errSet := ud.Set(key, float64(currentValue)); errSet != nil { 102 | L.RaiseError("cache error: %s", errSet.Error()) 103 | return 0 104 | } 105 | if !found { 106 | // not found, return nil 107 | L.Push(lua.LNil) 108 | return 1 109 | } 110 | // found, calc diff 111 | result := (float64(currentValue) - prevValue) / float64(time.Now().Unix()-updatedAt) 112 | L.Push(lua.LNumber(result)) 113 | return 1 114 | } 115 | -------------------------------------------------------------------------------- /gatherer/internal/cache/cache_test.go: -------------------------------------------------------------------------------- 1 | package cache_test 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | 8 | "github.com/vadv/gopher-lua-libs/inspect" 9 | "github.com/vadv/gopher-lua-libs/time" 10 | "github.com/vadv/pg_gatherer/gatherer/internal/cache" 11 | lua "github.com/yuin/gopher-lua" 12 | ) 13 | 14 | func TestCacheRotate(t *testing.T) { 15 | 16 | state := lua.NewState() 17 | cache.Preload(state) 18 | os.RemoveAll("./tests/rotate.sqlite") 19 | if err := cache.NewSqlite(state, "cache", "./tests/rotate.sqlite", "prefix_"); err != nil { 20 | t.Fatalf(err.Error()) 21 | } 22 | time.Preload(state) 23 | inspect.Preload(state) 24 | if err := state.DoFile("./tests/rotate.lua"); err != nil { 25 | t.Fatalf("error: %s\n", err.Error()) 26 | } 27 | 28 | } 29 | 30 | func TestCache(t *testing.T) { 31 | 32 | state := lua.NewState() 33 | cache.Preload(state) 34 | os.RemoveAll("./tests/db.sqlite") 35 | if err := cache.NewSqlite(state, "cache", "./tests/db.sqlite", "prefix_"); err != nil { 36 | t.Fatalf(err.Error()) 37 | } 38 | time.Preload(state) 39 | inspect.Preload(state) 40 | if err := state.DoFile("./tests/cache.lua"); err != nil { 41 | t.Fatalf("error: %s\n", err.Error()) 42 | } 43 | 44 | } 45 | 46 | func TestCorrupt(t *testing.T) { 47 | state := lua.NewState() 48 | cache.Preload(state) 49 | os.RemoveAll("./tests/corrupt.sqlite") 50 | if err := ioutil.WriteFile("./tests/corrupt.sqlite", []byte(""), 0600); err != nil { 51 | t.Fatalf(err.Error()) 52 | } 53 | if err := cache.NewSqlite(state, "cache", "./tests/corrupt.sqlite", "prefix_"); err != nil { 54 | t.Fatalf(err.Error()) 55 | } 56 | time.Preload(state) 57 | inspect.Preload(state) 58 | if err := state.DoFile("./tests/cache.lua"); err != nil { 59 | t.Fatalf("error: %s\n", err.Error()) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /gatherer/internal/cache/sqlite/cache.go: -------------------------------------------------------------------------------- 1 | package sqlite 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "log" 8 | "os" 9 | "path/filepath" 10 | "strconv" 11 | "sync" 12 | "time" 13 | 14 | // sqlite 15 | _ "github.com/mattn/go-sqlite3" 16 | ) 17 | 18 | // cache in sqlite database 19 | // this is a trade-off between memory usage and disk iops usage 20 | 21 | const ( 22 | // EnvCacheRotateTable env value for override default value 23 | EnvCacheRotateTable = `CACHE_ROTATE_TABLE` 24 | // DefaultCacheRotateTable default value for rotate tables 25 | DefaultCacheRotateTable = int64(60 * 60 * 2) 26 | createQuery = `create table if not exists "%s" (key text primary key, value real, updated_at real)` 27 | ) 28 | 29 | // Cache sqlite cache 30 | type Cache struct { 31 | path string 32 | prefix string 33 | db *sql.DB 34 | tables map[string]bool 35 | tableMutex sync.Mutex 36 | } 37 | 38 | var listOfOpenCaches = &listOfCaches{list: make(map[string]*sql.DB)} 39 | 40 | type listOfCaches struct { 41 | mutex sync.Mutex 42 | list map[string]*sql.DB 43 | } 44 | 45 | func (c *Cache) getCacheTableNamePrefix() string { 46 | return c.prefix 47 | } 48 | 49 | // New init new cache 50 | func New(path, prefix string) (*Cache, error) { 51 | if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { 52 | return nil, err 53 | } 54 | result := &Cache{path: path, prefix: prefix, tables: make(map[string]bool)} 55 | listOfOpenCaches.mutex.Lock() 56 | defer listOfOpenCaches.mutex.Unlock() 57 | db, ok := listOfOpenCaches.list[path] 58 | if ok { 59 | result.db = db 60 | return result, nil 61 | } 62 | retries := 0 63 | OpenSqlite: 64 | if retries > 3 { 65 | return nil, fmt.Errorf("too many errors while prepare sqlite database") 66 | } 67 | // https://github.com/mattn/go-sqlite3/tree/v2.0.3#connection-string 68 | connectionString := fmt.Sprintf("file:%s?_synchronous=0&_journal_mode=OFF", path) 69 | newDB, err := sql.Open(`sqlite3`, connectionString) 70 | if err != nil { 71 | log.Printf("[ERROR] delete db %#v, because: %#v while open\n", connectionString, err.Error()) 72 | os.RemoveAll(path) 73 | retries++ 74 | goto OpenSqlite 75 | } 76 | if _, testQuery := newDB.Exec(`select 1`); testQuery != nil { 77 | newDB.Close() 78 | log.Printf("[ERROR] delete db %#v, because: %#v while exec test query\n", connectionString, testQuery.Error()) 79 | os.RemoveAll(path) 80 | retries++ 81 | goto OpenSqlite 82 | } 83 | newDB.SetMaxOpenConns(1) 84 | newDB.SetMaxIdleConns(1) 85 | listOfOpenCaches.list[path] = newDB 86 | result.db = newDB 87 | go result.rotateOldTablesRoutine() 88 | return result, nil 89 | } 90 | 91 | // TODO: to save syscall, get variable from cache 92 | func (c *Cache) getCacheRotateTable() int64 { 93 | result := DefaultCacheRotateTable 94 | envVar := os.Getenv(EnvCacheRotateTable) 95 | if envVar != `` { 96 | if value, err := strconv.ParseUint(envVar, 10, 64); err == nil { 97 | if value > 0 { 98 | result = int64(value) 99 | } 100 | } else { 101 | log.Printf("[ERROR] cache %s env variable %s has bad value: '%s': '%s' ignoring\n", 102 | c.path, EnvCacheRotateTable, envVar, err.Error()) 103 | } 104 | } 105 | return result 106 | } 107 | 108 | // current table name 109 | func (c *Cache) currentTableName() string { 110 | now := time.Now().Unix() 111 | return fmt.Sprintf("%s_%d", c.getCacheTableNamePrefix(), now-(now%c.getCacheRotateTable())) 112 | } 113 | 114 | // prev table name 115 | func (c *Cache) prevTableName() string { 116 | now := time.Now().Unix() 117 | return fmt.Sprintf("%s_%d", c.getCacheTableNamePrefix(), now-(now%c.getCacheRotateTable())-c.getCacheRotateTable()) 118 | } 119 | 120 | func (c *Cache) createTable(tableName string) error { 121 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 122 | defer cancel() 123 | _, err := c.db.ExecContext(ctx, fmt.Sprintf(createQuery, tableName)) 124 | return err 125 | } 126 | -------------------------------------------------------------------------------- /gatherer/internal/cache/sqlite/rotate.go: -------------------------------------------------------------------------------- 1 | package sqlite 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "log" 8 | "strconv" 9 | "strings" 10 | "time" 11 | ) 12 | 13 | const ( 14 | listTablesQuery = ` 15 | select 16 | name 17 | from sqlite_master 18 | where type = 'table' and name like '%%_%%' order by name desc;` 19 | ) 20 | 21 | func (c *Cache) rotateOldTablesRoutine() { 22 | for { 23 | if err := c.rotateOldTables(); err != nil { 24 | log.Printf("[ERROR] cache %s rotate old tables: %s\n", c.path, err.Error()) 25 | time.Sleep(100 * time.Millisecond) 26 | continue 27 | } 28 | time.Sleep(time.Second * time.Duration(c.getCacheRotateTable()/2)) 29 | } 30 | } 31 | 32 | func (c *Cache) rotateOldTables() error { 33 | time.Sleep(100 * time.Millisecond) 34 | deadline := time.Now().Unix() - 2*c.getCacheRotateTable() 35 | tables, err := listOfTables(c.db, time.Second) 36 | if err != nil { 37 | return err 38 | } 39 | ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 40 | defer cancel() 41 | for _, tableName := range tables { 42 | if timeSlice := strings.Split(tableName, "_"); len(timeSlice) > 0 { 43 | timeStr := timeSlice[len(timeSlice)-1] 44 | t, err := strconv.ParseInt(timeStr, 10, 64) 45 | if err == nil { 46 | if deadline > t { 47 | log.Printf("[INFO] cache drop table: %#v\n", tableName) 48 | _, errExec := c.db.ExecContext(ctx, fmt.Sprintf(`drop table %#v`, tableName)) 49 | if errExec != nil { 50 | return errExec 51 | } 52 | c.tableMutex.Lock() 53 | delete(c.tables, tableName) 54 | c.tableMutex.Unlock() 55 | } 56 | } 57 | } 58 | } 59 | return nil 60 | } 61 | 62 | func listOfTables(db *sql.DB, timeout time.Duration) ([]string, error) { 63 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 64 | defer cancel() 65 | rows, err := db.QueryContext(ctx, listTablesQuery) 66 | if err != nil { 67 | return nil, err 68 | } 69 | defer rows.Close() 70 | var tables []string 71 | for rows.Next() { 72 | tableName := "" 73 | errScan := rows.Scan(&tableName) 74 | if errScan != nil { 75 | return nil, errScan 76 | } 77 | tables = append(tables, tableName) 78 | } 79 | return tables, nil 80 | } 81 | -------------------------------------------------------------------------------- /gatherer/internal/cache/sqlite/set_get.go: -------------------------------------------------------------------------------- 1 | package sqlite 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | ) 9 | 10 | const ( 11 | setQuery = ` 12 | insert 13 | into "%s" (key, value, updated_at) 14 | values (?, ?, ?) on conflict (key) do 15 | update set value=excluded.value, updated_at=excluded.updated_at 16 | ` 17 | getQuery = `select value, updated_at from "%s" where key = ?` 18 | deleteQuery = `delete from "%s" where key = ?` 19 | ) 20 | 21 | func (c *Cache) checkTableExists(tableName string) error { 22 | // check to table is created 23 | c.tableMutex.Lock() 24 | defer c.tableMutex.Unlock() 25 | if _, ok := c.tables[tableName]; !ok { 26 | if err := c.createTable(tableName); err != nil { 27 | return err 28 | } 29 | c.tables[tableName] = true 30 | } 31 | return nil 32 | } 33 | 34 | // Set new value in cache 35 | func (c *Cache) Set(key string, value float64) error { 36 | if err := c.checkTableExists(c.currentTableName()); err != nil { 37 | return err 38 | } 39 | // query 40 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 41 | defer cancel() 42 | _, err := c.db.ExecContext(ctx, fmt.Sprintf(setQuery, c.currentTableName()), key, value, time.Now().Unix()) 43 | return err 44 | } 45 | 46 | // get return value, updatedAt, error 47 | func (c *Cache) getFromTable(key, tableName string) (float64, int64, bool, error) { 48 | if err := c.checkTableExists(tableName); err != nil { 49 | return 0, 0, false, err 50 | } 51 | query := fmt.Sprintf(getQuery, tableName) 52 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 53 | defer cancel() 54 | row := c.db.QueryRowContext(ctx, query, key) 55 | var value, updatedAt float64 56 | err := row.Scan(&value, &updatedAt) 57 | if err != nil && err == sql.ErrNoRows { 58 | return 0, 0, false, nil 59 | } 60 | if err != nil { 61 | return 0, 0, false, err 62 | } 63 | return value, int64(updatedAt), true, nil 64 | } 65 | 66 | // Get return value, updatedAt, error 67 | // we get data from prev table, if not found key in current 68 | func (c *Cache) Get(key string) (value float64, updatedAt int64, found bool, err error) { 69 | value, updatedAt, found, err = c.getFromTable(key, c.currentTableName()) 70 | if !found { 71 | return c.getFromTable(key, c.prevTableName()) 72 | } 73 | return 74 | } 75 | 76 | // Delete value by key 77 | func (c *Cache) Delete(key string) error { 78 | query := fmt.Sprintf(deleteQuery, c.currentTableName()) 79 | ctx, cancel := context.WithTimeout(context.Background(), time.Second) 80 | defer cancel() 81 | if _, err := c.db.ExecContext(ctx, query, key); err != nil { 82 | return err 83 | } 84 | query = fmt.Sprintf(deleteQuery, c.prevTableName()) 85 | if _, err := c.db.ExecContext(ctx, query, key); err != nil { 86 | return err 87 | } 88 | return nil 89 | } 90 | -------------------------------------------------------------------------------- /gatherer/internal/cache/tests/.gitignore: -------------------------------------------------------------------------------- 1 | *.sqlite -------------------------------------------------------------------------------- /gatherer/internal/cache/tests/cache.lua: -------------------------------------------------------------------------------- 1 | local time = require("time") 2 | 3 | local value = cache:get("unknown") 4 | if value then error("must be unknown") end 5 | 6 | -- set get 7 | local err = cache:set("value_set_get", 42) 8 | if err then error(err) end 9 | local value_set_get = cache:get("value_set_get") 10 | if not(value_set_get == 42) then error("value_set_get must be 42, but get: "..tostring(value_set_get)) end 11 | 12 | -- check diff 13 | local err = cache:set("value_diff", 0) 14 | if err then error(err) end 15 | local diff = cache:diff_and_set("value_diff", 1) 16 | if not(diff == 1) then error("diff must be 1, but get: "..tostring(diff)) end 17 | local diff = cache:get("value_diff") 18 | if not(diff == 1) then error("diff must be 1, but get: "..tostring(diff)) end 19 | 20 | -- check speed 21 | local err = cache:set("value_speed", 0) 22 | if err then error(err) end 23 | time.sleep(1) 24 | local speed = cache:speed_and_set("value_speed", 1) 25 | if speed == 0 then error("speed: "..tostring(speed)) end 26 | if not(speed > 0) then error("speed: "..tostring(speed)) end 27 | local speed = cache:get("value_speed") 28 | if not(speed == 1) then error("diff must be 1, but get: "..tostring(speed)) end 29 | 30 | -- check nil 31 | local err = cache:set("value_nil", nil) 32 | if err then error(err) end 33 | local value = cache:get("value_nil") 34 | if value then error("must be unknown") end 35 | cache:diff_and_set("value_nil", nil) 36 | local value = cache:get("value_nil") 37 | if value then error("must be unknown") end 38 | cache:speed_and_set("value_nil", nil) 39 | local value = cache:get("value_nil") 40 | if value then error("must be unknown") end 41 | -------------------------------------------------------------------------------- /gatherer/internal/cache/tests/rotate.lua: -------------------------------------------------------------------------------- 1 | local time = require("time") 2 | -- set rotate table value = 2 sec 3 | os.setenv("CACHE_ROTATE_TABLE", "2") 4 | 5 | local err = cache:set("must_be_rotated_after_2_second", 1) 6 | if err then error(err) end 7 | 8 | time.sleep(1.5) 9 | local value = cache:get("must_be_rotated_after_2_second") 10 | if not(value == 1) then error("must be get from prev table") end 11 | 12 | time.sleep(3) 13 | local value = cache:get("must_be_rotated_after_2_second") 14 | if value then error("table must be rotated") end 15 | 16 | time.sleep(3) 17 | local value = cache:get("must_be_rotated_after_2_second") 18 | if value then error("table must be rotated") end 19 | -------------------------------------------------------------------------------- /gatherer/internal/connection/README.md: -------------------------------------------------------------------------------- 1 | Creates lua user data `connection_ud`. 2 | 3 | # Golang 4 | 5 | ```go 6 | state := lua.NewState() 7 | connection.Preload(state) 8 | connection.SetMaxOpenConns(1) 9 | // register user data "test" 10 | connection.New(state, "test", "/tmp", "gatherer-db-test", 5432, "gatherer-user-test", "gatherer-password", params map[string]string) 11 | // register user data "new" 12 | connection.New(state, "new", "/tmp", "gatherer-db", 5432, "gatherer-user", "gatherer-password", params map[string]string) 13 | ``` 14 | 15 | # Lua 16 | 17 | ## connection:query(string, args...) 18 | 19 | Execute read-only query with args. Return table with `rows` and `columns`, raise error. 20 | 21 | ```lua 22 | local result = connection:query("select $1::integer, $1::text, $2", 1, "tests") 23 | --[[ 24 | result: 25 | { 26 | columns = { "int4", "text", "?column?" }, 27 | rows = { { 1, "1", "1" } } 28 | } 29 | --]] 30 | ``` 31 | 32 | ## connection:available_connections() 33 | 34 | List of available connections in this PostgreSQL instance. Return list of user data `connection_ud`, raise error. 35 | 36 | ```lua 37 | local connections, err = connection:available_connections() 38 | if err then error(err) end 39 | connections[1]:query("select 1") 40 | ``` 41 | 42 | ## connection:insert_metric({plugin="", [int=0,float=0,json=""]}) 43 | 44 | Save metric to database, raises error. 45 | 46 | ## connection:background_query(string, args...) 47 | 48 | Execute query with args in background, return user data `background_query_ud`. 49 | 50 | ## background_query_ud:cancel() 51 | 52 | Cancel background query. 53 | 54 | ## background_query_ud:is_running() 55 | 56 | Return true if query is already running. 57 | 58 | ## background_query_ud:result() 59 | 60 | Return `result` table like `connection:query()`, raise error if error was canceled on not executed. -------------------------------------------------------------------------------- /gatherer/internal/connection/background_query_lua.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "sync" 7 | 8 | lua "github.com/yuin/gopher-lua" 9 | ) 10 | 11 | type backgroundQuery struct { 12 | mutex sync.Mutex 13 | running bool 14 | tx *sql.Tx 15 | sqlRows *sql.Rows 16 | query string 17 | queryArgs []interface{} 18 | err error 19 | cancelFunc context.CancelFunc 20 | } 21 | 22 | // checkUserDataBackgroundQuery return background_query_ud from lua state 23 | func checkUserDataBackgroundQuery(L *lua.LState, n int) *backgroundQuery { 24 | ud := L.CheckUserData(n) 25 | if v, ok := ud.Value.(*backgroundQuery); ok { 26 | return v 27 | } 28 | L.ArgError(n, "background_query_ud expected") 29 | return nil 30 | } 31 | 32 | // create background_query 33 | func runBackgroundQuery(L *lua.LState) int { 34 | conn := checkUserDataConnection(L, 1) 35 | sqlQuery := L.CheckString(2) 36 | args := parseArgs(L, 3) 37 | ctx, cancelFunc := context.WithCancel(context.Background()) 38 | result := &backgroundQuery{ 39 | running: true, 40 | cancelFunc: cancelFunc, 41 | query: sqlQuery, 42 | queryArgs: args, 43 | } 44 | go func(result *backgroundQuery) { 45 | 46 | tx, err := getTx(conn.db, ctx) 47 | if err != nil { 48 | result.mutex.Lock() 49 | result.err = err 50 | result.running = false 51 | result.mutex.Unlock() 52 | return 53 | } 54 | 55 | result.mutex.Lock() 56 | result.tx = tx 57 | result.mutex.Unlock() 58 | 59 | sqlRows, errQuery := tx.Query(result.query, result.queryArgs...) 60 | if errQuery != nil { 61 | result.mutex.Lock() 62 | result.err = errQuery 63 | result.running = false 64 | result.mutex.Unlock() 65 | return 66 | } 67 | 68 | result.mutex.Lock() 69 | result.sqlRows = sqlRows 70 | result.running = false 71 | result.mutex.Unlock() 72 | }(result) 73 | ud := L.NewUserData() 74 | ud.Value = result 75 | L.SetMetatable(ud, L.GetTypeMetatable(`background_query_ud`)) 76 | L.Push(ud) 77 | return 1 78 | } 79 | 80 | // cancel background query 81 | func backgroundQueryCancel(L *lua.LState) int { 82 | ud := checkUserDataBackgroundQuery(L, 1) 83 | ud.mutex.Lock() 84 | defer ud.mutex.Unlock() 85 | ud.cancelFunc() 86 | return 0 87 | } 88 | 89 | // background query is running 90 | func backgroundQueryIsRunning(L *lua.LState) int { 91 | ud := checkUserDataBackgroundQuery(L, 1) 92 | ud.mutex.Lock() 93 | defer ud.mutex.Unlock() 94 | L.Push(lua.LBool(ud.running)) 95 | return 1 96 | } 97 | 98 | // background query result 99 | func backgroundQueryResult(L *lua.LState) int { 100 | ud := checkUserDataBackgroundQuery(L, 1) 101 | ud.mutex.Lock() 102 | defer ud.mutex.Unlock() 103 | if ud.running { 104 | L.RaiseError("query already running") 105 | return 0 106 | } 107 | if ud.sqlRows != nil { 108 | defer ud.sqlRows.Close() 109 | } 110 | if ud.tx != nil { 111 | defer func() { _ = ud.tx.Commit() }() 112 | } 113 | if ud.err != nil { 114 | L.RaiseError("query has error: %s", ud.err.Error()) 115 | return 0 116 | } 117 | execResult, err := parseRows(ud.sqlRows, L) 118 | if err != nil { 119 | L.RaiseError("parse query: %s", err.Error()) 120 | return 0 121 | } 122 | result := L.NewTable() 123 | result.RawSetString(`rows`, execResult.Rows) 124 | result.RawSetString(`columns`, execResult.Columns) 125 | L.Push(result) 126 | return 1 127 | } 128 | -------------------------------------------------------------------------------- /gatherer/internal/connection/connection.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "sort" 7 | "strings" 8 | 9 | lua "github.com/yuin/gopher-lua" 10 | ) 11 | 12 | // connection to PostgreSQL 13 | type connection struct { 14 | db *sql.DB 15 | host string 16 | dbname string 17 | port int 18 | user string 19 | password string 20 | params map[string]string 21 | } 22 | 23 | // Preload is the preloader of user data connection_ud. 24 | func Preload(L *lua.LState) int { 25 | connectionUd := L.NewTypeMetatable(`connection_ud`) 26 | L.SetGlobal(`connection_ud`, connectionUd) 27 | L.SetField(connectionUd, "__index", L.SetFuncs(L.NewTable(), map[string]lua.LGFunction{ 28 | "query": query, 29 | "available_connections": availableConnections, 30 | "background_query": runBackgroundQuery, 31 | "insert_metric": insertMetric, 32 | })) 33 | backgroundQueryUd := L.NewTypeMetatable(`background_query_ud`) 34 | L.SetGlobal(`background_query_ud`, backgroundQueryUd) 35 | L.SetField(backgroundQueryUd, "__index", L.SetFuncs(L.NewTable(), map[string]lua.LGFunction{ 36 | "is_running": backgroundQueryIsRunning, 37 | "result": backgroundQueryResult, 38 | "cancel": backgroundQueryCancel, 39 | })) 40 | return 0 41 | } 42 | 43 | // New create new connection into lua state as user data 44 | func New(L *lua.LState, userDataName, host, dbname, user, password string, port int, params map[string]string) { 45 | c := &connection{ 46 | host: host, 47 | dbname: dbname, 48 | port: port, 49 | user: user, 50 | password: password, 51 | params: params, 52 | } 53 | ud := c.userData(L) 54 | L.SetGlobal(userDataName, ud) 55 | } 56 | 57 | // ConnectionString return connection string 58 | func (c *connection) connectionString() string { 59 | return BuildConnectionString(c.host, c.dbname, c.port, c.user, c.password, c.params) 60 | } 61 | 62 | // BuildConnectionString create connection string 63 | func BuildConnectionString(host, dbname string, port int, user, password string, params map[string]string) string { 64 | kvs := make([]string, 0) 65 | escaper := strings.NewReplacer(` `, `\ `, `'`, `\'`, `\`, `\\`) 66 | accrue := func(k, v string) { 67 | if v != "" { 68 | kvs = append(kvs, k+"="+escaper.Replace(v)) 69 | } 70 | } 71 | // prevent random map iteration 72 | paramKeys := make([]string, 0, len(params)) 73 | for k := range params { 74 | paramKeys = append(paramKeys, k) 75 | } 76 | sort.Strings(paramKeys) 77 | for _, k := range paramKeys { 78 | accrue(k, params[k]) 79 | } 80 | return fmt.Sprintf("host='%s' port=%d dbname='%s' user='%s' password='%s' %s", 81 | host, port, dbname, user, password, strings.Join(kvs, " ")) 82 | } 83 | -------------------------------------------------------------------------------- /gatherer/internal/connection/connection_lua.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "context" 5 | 6 | lua "github.com/yuin/gopher-lua" 7 | ) 8 | 9 | const ( 10 | listConnections = `select 11 | d.datname 12 | from 13 | pg_catalog.pg_database d 14 | where has_database_privilege(d.datname, 'connect') and not d.datistemplate 15 | ` 16 | ) 17 | 18 | // userData represent connection in lua state 19 | func (c *connection) userData(L *lua.LState) *lua.LUserData { 20 | ud := L.NewUserData() 21 | if c.db == nil { 22 | db, err := getDBFromPool(c) 23 | if err != nil { 24 | L.RaiseError("open connection error: %s", err.Error()) 25 | return nil 26 | } 27 | c.db = db 28 | } 29 | ud.Value = c 30 | L.SetMetatable(ud, L.GetTypeMetatable(`connection_ud`)) 31 | return ud 32 | } 33 | 34 | // checkUserDataConnection return connection from lua state 35 | func checkUserDataConnection(L *lua.LState, n int) *connection { 36 | ud := L.CheckUserData(n) 37 | if v, ok := ud.Value.(*connection); ok { 38 | return v 39 | } 40 | L.ArgError(n, "connection_ud expected") 41 | return nil 42 | } 43 | 44 | func parseArgs(L *lua.LState, n int) []interface{} { 45 | args := make([]interface{}, 0) 46 | if count := L.GetTop(); count >= n { 47 | for i := n; i <= count; i++ { 48 | arg := L.CheckAny(i) 49 | switch arg.Type() { 50 | case lua.LTString: 51 | args = append(args, L.CheckString(i)) 52 | case lua.LTNumber: 53 | args = append(args, float64(L.CheckNumber(i))) 54 | case lua.LTBool: 55 | args = append(args, L.CheckBool(i)) 56 | default: 57 | L.RaiseError("unsupported type for sqlQuery args") 58 | return nil 59 | } 60 | } 61 | } 62 | return args 63 | } 64 | 65 | // query execute query from connection 66 | func query(L *lua.LState) int { 67 | ud := checkUserDataConnection(L, 1) 68 | sqlQuery := L.CheckString(2) 69 | args := parseArgs(L, 3) 70 | execResult, err := processQuery(L, ud.db, context.Background(), sqlQuery, args...) 71 | if err != nil { 72 | L.RaiseError("error: %s", err.Error()) 73 | return 0 74 | } 75 | result := L.NewTable() 76 | result.RawSetString(`rows`, execResult.Rows) 77 | result.RawSetString(`columns`, execResult.Columns) 78 | L.Push(result) 79 | return 1 80 | } 81 | 82 | // availableConnections push list of all connections 83 | func availableConnections(L *lua.LState) int { 84 | ud := checkUserDataConnection(L, 1) 85 | sqlRows, err := ud.db.Query(listConnections) 86 | if err != nil { 87 | L.RaiseError("error: %s", err.Error()) 88 | return 0 89 | } 90 | defer sqlRows.Close() 91 | result := L.NewTable() 92 | for sqlRows.Next() { 93 | dbname := "" 94 | if errScan := sqlRows.Scan(&dbname); errScan != nil { 95 | L.Push(lua.LNil) 96 | L.Push(lua.LString(errScan.Error())) 97 | return 2 98 | } 99 | c := &connection{ 100 | host: ud.host, 101 | dbname: dbname, 102 | port: ud.port, 103 | user: ud.user, 104 | password: ud.password, 105 | params: ud.params, 106 | } 107 | newUd := c.userData(L) 108 | result.Append(newUd) 109 | } 110 | L.Push(result) 111 | return 1 112 | } 113 | -------------------------------------------------------------------------------- /gatherer/internal/connection/connection_metric.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | lua "github.com/yuin/gopher-lua" 8 | ) 9 | 10 | const ( 11 | queryInsert = ` 12 | insert into metric 13 | (host, plugin, snapshot, value_bigint, value_double, value_jsonb) 14 | values (md5($1)::uuid, md5($2)::uuid, $3, $4, $5, $6::jsonb) 15 | ` 16 | ) 17 | 18 | func insertMetric(L *lua.LState) int { 19 | ud := checkUserDataConnection(L, 1) 20 | table := L.CheckTable(2) 21 | m, err := parseMetric(table) 22 | if err != nil { 23 | L.RaiseError("parse metric: %s", err.Error()) 24 | return 0 25 | } 26 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 27 | defer cancel() 28 | _, err = ud.db.ExecContext(ctx, queryInsert, 29 | m.host, m.plugin, m.snapshot, m.valueInteger, m.valueFloat64, m.valueJson) 30 | if err != nil { 31 | L.RaiseError("save metric: %s", err.Error()) 32 | } 33 | return 0 34 | } 35 | -------------------------------------------------------------------------------- /gatherer/internal/connection/connection_test.go: -------------------------------------------------------------------------------- 1 | package connection_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/vadv/gopher-lua-libs/time" 7 | 8 | "github.com/vadv/gopher-lua-libs/inspect" 9 | 10 | "github.com/vadv/pg_gatherer/gatherer/internal/connection" 11 | lua "github.com/yuin/gopher-lua" 12 | ) 13 | 14 | func TestBuildConnectionString(t *testing.T) { 15 | params := make(map[string]string) 16 | params[`10`] = "10" 17 | params[`11`] = "11" 18 | first := connection.BuildConnectionString(`host`, `dbname`, 5432, `user`, `password`, params) 19 | for i := 0; i < 100; i++ { 20 | second := connection.BuildConnectionString(`host`, `dbname`, 5432, `user`, `password`, params) 21 | if first != second { 22 | t.Fatalf("first != second:\nfirst: %s\nsecond: %s\n", first, second) 23 | } 24 | } 25 | } 26 | 27 | func TestConnection(t *testing.T) { 28 | 29 | state := lua.NewState() 30 | connection.Preload(state) 31 | connection.New(state, `connection`, 32 | "/tmp", "gatherer", "gatherer", "", 5432, nil) 33 | 34 | inspect.Preload(state) 35 | time.Preload(state) 36 | if err := state.DoFile("./tests/connection.lua"); err != nil { 37 | t.Fatalf("error: %s\n", err.Error()) 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /gatherer/internal/connection/driver.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "database/sql" 5 | "database/sql/driver" 6 | "net" 7 | "time" 8 | 9 | "github.com/lib/pq" 10 | ) 11 | 12 | // original: https://gist.github.com/tejasmanohar/fdaafe17d7ac1c083147055f2c03959b 13 | 14 | // KeepAliveDuration is the duration between keepalives for all new Postgres connections. 15 | var KeepAliveDuration = 5 * time.Second 16 | 17 | func init() { 18 | sql.Register("gatherer-pq", &enhancedDriver{}) 19 | } 20 | 21 | // enhancedDriver is a wrapper over lib/pq to mimic jackc/pgx's keepalive policy. 22 | // This avoids: 23 | // * an issue with connections leak 24 | // * an issue where the NAT kills an "idle" connection while it is waiting on a long-running query. 25 | type enhancedDriver struct{} 26 | type dialer struct{} 27 | 28 | // Open returns a new SQL driver connection with our custom settings. 29 | func (d *enhancedDriver) Open(name string) (driver.Conn, error) { 30 | return pq.DialOpen(&dialer{}, name) 31 | } 32 | 33 | // Dial returns network connection 34 | func (d dialer) Dial(ntw, addr string) (net.Conn, error) { 35 | customDialer := net.Dialer{KeepAlive: KeepAliveDuration} 36 | return customDialer.Dial(ntw, addr) 37 | } 38 | 39 | // DialTimeout returns network connection 40 | func (d dialer) DialTimeout(ntw, addr string, timeout time.Duration) (net.Conn, error) { 41 | customDialer := net.Dialer{Timeout: timeout, KeepAlive: KeepAliveDuration} 42 | return customDialer.Dial(ntw, addr) 43 | } 44 | -------------------------------------------------------------------------------- /gatherer/internal/connection/metric.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | lua "github.com/yuin/gopher-lua" 8 | ) 9 | 10 | type metric struct { 11 | host string 12 | plugin string 13 | snapshot *int64 14 | valueInteger *int64 15 | valueFloat64 *float64 16 | valueJson *string 17 | } 18 | 19 | func parseMetric(table *lua.LTable) (*metric, error) { 20 | m := &metric{} 21 | var err error 22 | table.ForEach(func(k lua.LValue, v lua.LValue) { 23 | switch strings.ToLower(k.String()) { 24 | case `host`: 25 | m.host = v.String() 26 | case `plugin`: 27 | m.plugin = v.String() 28 | case `snapshot`: 29 | if v.Type() != lua.LTNumber { 30 | err = fmt.Errorf("`snapshot` must be number") 31 | return 32 | } 33 | n := int64(v.(lua.LNumber)) 34 | m.snapshot = &n 35 | case `json`: 36 | value := v.String() 37 | m.valueJson = &value 38 | case `int`: 39 | if v.Type() != lua.LTNumber { 40 | err = fmt.Errorf("`int` must be number") 41 | return 42 | } 43 | n := int64(v.(lua.LNumber)) 44 | m.valueInteger = &n 45 | case `float`: 46 | if v.Type() != lua.LTNumber { 47 | err = fmt.Errorf("`float` must be number") 48 | return 49 | } 50 | n := float64(v.(lua.LNumber)) 51 | m.valueFloat64 = &n 52 | } 53 | }) 54 | if err != nil { 55 | return nil, err 56 | } 57 | if m.host == `` { 58 | return nil, fmt.Errorf("empty `host` info") 59 | } 60 | if m.plugin == `` { 61 | return nil, fmt.Errorf("empty `plugin` info") 62 | } 63 | // lua `[]` -> json `{}` 64 | if m.valueJson != nil { 65 | valueJson := *m.valueJson 66 | if valueJson == `[]` { 67 | valueJson = `{}` 68 | m.valueJson = &valueJson 69 | } 70 | } 71 | if m.valueInteger == nil && m.valueFloat64 == nil && (m.valueJson == nil || *m.valueJson == `{}`) { 72 | return nil, fmt.Errorf("empty value") 73 | } 74 | return m, err 75 | } 76 | -------------------------------------------------------------------------------- /gatherer/internal/connection/pool.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "database/sql" 5 | "sync" 6 | "sync/atomic" 7 | ) 8 | 9 | var ( 10 | connectionPool *connPool 11 | poolDatabasesOpen *int32 // for testing 12 | maxOpenConns uint 13 | ) 14 | 15 | func init() { 16 | maxOpenConns = 5 17 | connectionPool = &connPool{ 18 | mutex: sync.Mutex{}, 19 | pool: make(map[string]*sql.DB), 20 | } 21 | zero := int32(0) 22 | poolDatabasesOpen = &zero 23 | } 24 | 25 | // SetMaxOpenConns set max open connections 26 | func SetMaxOpenConns(i uint) { 27 | maxOpenConns = i 28 | connectionPool.mutex.Lock() 29 | defer connectionPool.mutex.Unlock() 30 | for _, db := range connectionPool.pool { 31 | db.SetMaxOpenConns(int(maxOpenConns)) 32 | db.SetMaxIdleConns(int(maxOpenConns)) 33 | } 34 | } 35 | 36 | type connPool struct { 37 | mutex sync.Mutex 38 | pool map[string]*sql.DB 39 | } 40 | 41 | func newPostgresConnection(connectionString string) (*sql.DB, error) { 42 | atomic.AddInt32(poolDatabasesOpen, 1) 43 | db, err := sql.Open(`gatherer-pq`, connectionString) 44 | if err != nil { 45 | return nil, err 46 | } 47 | db.SetMaxIdleConns(int(maxOpenConns)) 48 | db.SetMaxOpenConns(int(maxOpenConns)) 49 | return db, err 50 | } 51 | 52 | // get sql.DB from connection pool 53 | func getDBFromPool(c *connection) (*sql.DB, error) { 54 | connectionPool.mutex.Lock() 55 | defer connectionPool.mutex.Unlock() 56 | if db, ok := connectionPool.pool[c.connectionString()]; ok { 57 | return db, nil 58 | } else { 59 | // open 60 | newDB, err := newPostgresConnection(c.connectionString()) 61 | if err != nil { 62 | return nil, err 63 | } 64 | // store 65 | connectionPool.pool[c.connectionString()] = newDB 66 | return newDB, nil 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /gatherer/internal/connection/pool_test.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "sync" 7 | "testing" 8 | 9 | lua "github.com/yuin/gopher-lua" 10 | ) 11 | 12 | const doAvailableConnections = ` 13 | connection:query('select 1') 14 | for _, conn in pairs(connection:available_connections()) do 15 | conn:query('select 1') 16 | end 17 | ` 18 | 19 | func TestNew(t *testing.T) { 20 | wait := sync.WaitGroup{} 21 | count := 100 22 | wait.Add(count) 23 | countOfDatabases := getCountOfDatabases(t) 24 | SetMaxOpenConns(1) 25 | for i := 0; i < count; i++ { 26 | go func() { 27 | defer wait.Done() 28 | state := lua.NewState() 29 | Preload(state) 30 | params := make(map[string]string) 31 | params[`fallback_application_name`] = `test` 32 | params[`connect_timeout`] = `5` 33 | New(state, `connection`, 34 | "/tmp", "gatherer", "gatherer", "", 5432, params) 35 | if err := state.DoString(doAvailableConnections); err != nil { 36 | panic(fmt.Sprintf("do: %s\n", err.Error())) 37 | } 38 | }() 39 | } 40 | wait.Wait() 41 | if *poolDatabasesOpen != int32(countOfDatabases) { 42 | t.Fatalf("open: %d count: %d\n", *poolDatabasesOpen, countOfDatabases) 43 | } 44 | if len(connectionPool.pool) != countOfDatabases { 45 | t.Fatalf("pool: %#v\n", connectionPool.pool) 46 | } 47 | if connections := getCountOfApplicationNameTest(t); connections != countOfDatabases { 48 | t.Fatalf("databases: %d connections: %d\n", countOfDatabases, connections) 49 | } 50 | } 51 | 52 | func getCountOfDatabases(t *testing.T) int { 53 | db, err := sql.Open(`postgres`, `host=/tmp dbname=gatherer user=gatherer port=5432`) 54 | if err != nil { 55 | t.Fatalf("open: %s\n", err.Error()) 56 | } 57 | row := db.QueryRow(`select 58 | count(d.datname) 59 | from 60 | pg_catalog.pg_database d 61 | where has_database_privilege(d.datname, 'connect') and not d.datistemplate 62 | `) 63 | defer db.Close() 64 | var result int 65 | if errScan := row.Scan(&result); errScan != nil { 66 | t.Fatalf("scan: %s\n", errScan.Error()) 67 | } 68 | return result 69 | } 70 | 71 | func getCountOfApplicationNameTest(t *testing.T) int { 72 | db, err := sql.Open(`postgres`, `host=/tmp dbname=gatherer user=gatherer port=5432`) 73 | if err != nil { 74 | t.Fatalf("open: %s\n", err.Error()) 75 | } 76 | row := db.QueryRow(`select count(*) from pg_stat_activity where application_name = 'test'`) 77 | defer db.Close() 78 | var result int 79 | if errScan := row.Scan(&result); errScan != nil { 80 | t.Fatalf("scan: %s\n", errScan.Error()) 81 | } 82 | return result 83 | } 84 | -------------------------------------------------------------------------------- /gatherer/internal/connection/result.go: -------------------------------------------------------------------------------- 1 | package connection 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/lib/pq" 10 | lua "github.com/yuin/gopher-lua" 11 | ) 12 | 13 | // queryResult is sql-query result 14 | type queryResult struct { 15 | Rows *lua.LTable 16 | Columns *lua.LTable 17 | } 18 | 19 | func processQuery(L *lua.LState, db *sql.DB, ctx context.Context, query string, args ...interface{}) (*queryResult, error) { 20 | tx, err := getTx(db, ctx) 21 | if err != nil { 22 | return nil, err 23 | } 24 | defer func() { _ = tx.Commit() }() 25 | sqlRows, errQuery := tx.Query(query, args...) 26 | if errQuery != nil { 27 | return nil, errQuery 28 | } 29 | return parseRows(sqlRows, L) 30 | } 31 | 32 | func getTx(db *sql.DB, ctx context.Context) (*sql.Tx, error) { 33 | tx, err := db.BeginTx(ctx, &sql.TxOptions{ 34 | Isolation: sql.LevelReadCommitted, 35 | ReadOnly: true, 36 | }) 37 | if err != nil { 38 | return nil, err 39 | } 40 | return tx, nil 41 | } 42 | 43 | func parseRows(sqlRows *sql.Rows, L *lua.LState) (*queryResult, error) { 44 | if sqlRows == nil { 45 | return nil, fmt.Errorf("rows is nil, program bug") 46 | } 47 | cols, err := sqlRows.Columns() 48 | if err != nil { 49 | return nil, err 50 | } 51 | columns := L.CreateTable(len(cols), 1) 52 | for _, col := range cols { 53 | columns.Append(lua.LString(col)) 54 | } 55 | luaRows := L.CreateTable(0, len(cols)) 56 | rowCount := 1 57 | for sqlRows.Next() { 58 | col := make([]interface{}, len(cols)) 59 | pointers := make([]interface{}, len(cols)) 60 | for i := range col { 61 | pointers[i] = &col[i] 62 | } 63 | errScan := sqlRows.Scan(pointers...) 64 | if errScan != nil { 65 | return nil, errScan 66 | } 67 | luaRow := L.CreateTable(0, len(cols)) 68 | for i := range cols { 69 | valueP := pointers[i].(*interface{}) 70 | value := *valueP 71 | switch converted := value.(type) { 72 | case bool: 73 | luaRow.RawSetInt(i+1, lua.LBool(converted)) 74 | case float64: 75 | luaRow.RawSetInt(i+1, lua.LNumber(converted)) 76 | case int64: 77 | luaRow.RawSetInt(i+1, lua.LNumber(converted)) 78 | case []uint8: 79 | strArr := make([]string, 0) 80 | pqArr := pq.Array(&strArr) 81 | if errConv := pqArr.Scan(converted); errConv != nil { 82 | // todo: new type of array 83 | luaRow.RawSetInt(i+1, lua.LString(converted)) 84 | } else { 85 | tbl := L.NewTable() 86 | for _, v := range strArr { 87 | tbl.Append(lua.LString(v)) 88 | } 89 | luaRow.RawSetInt(i+1, tbl) 90 | } 91 | case string: 92 | luaRow.RawSetInt(i+1, lua.LString(converted)) 93 | case time.Time: 94 | tt := float64(converted.UTC().UnixNano()) / float64(time.Second) 95 | luaRow.RawSetInt(i+1, lua.LNumber(tt)) 96 | case nil: 97 | luaRow.RawSetInt(i+1, lua.LNil) 98 | default: 99 | return nil, fmt.Errorf("unknown type (value: `%#v`, converted: `%#v`)\n", value, converted) 100 | } 101 | } 102 | luaRows.RawSet(lua.LNumber(rowCount), luaRow) 103 | rowCount++ 104 | } 105 | return &queryResult{ 106 | Rows: luaRows, 107 | Columns: columns, 108 | }, sqlRows.Close() 109 | } 110 | -------------------------------------------------------------------------------- /gatherer/internal/connection/tests/connection.lua: -------------------------------------------------------------------------------- 1 | local inspect = require("inspect") 2 | local time = require("time") 3 | 4 | function check_ud(ud) 5 | local result = ud:query("select $1::integer, $1::text, $2", 1, "1") 6 | print(inspect( result )) 7 | 8 | if not(result.rows[1][1] == 1) then error("helpers") end 9 | if not(result.rows[1][2] == "1") then error("helpers") end 10 | end 11 | 12 | -- check connection 13 | check_ud(connection) 14 | 15 | -- check available_connections 16 | local connections = connection:available_connections() 17 | for _, v in pairs(connections) do 18 | check_ud(v) 19 | end 20 | 21 | local err = connection:insert_metric({host="tmp", plugin="plugin.int", int=10}) 22 | if err then error(err) end 23 | 24 | local result, err = connection:query("select value_bigint from metric where plugin = md5('plugin.int')::uuid ") 25 | if err then error(err) end 26 | if not(result.rows[1][1] == 10) then error("value must be 10, but get: "..tostring(result.rows[1][1])) end 27 | 28 | -- background query 29 | local bg_query = connection:background_query("select pg_sleep($1), 1", 10) 30 | time.sleep(1) 31 | if not(bg_query:is_running()) then 32 | error("must be running") 33 | end 34 | bg_query:cancel() 35 | time.sleep(1) 36 | if bg_query:is_running() then 37 | error("must be not running") 38 | end 39 | 40 | bg_query = connection:background_query("select pg_sleep($1), 1", 2) 41 | time.sleep(1) 42 | if not(bg_query:is_running()) then 43 | error("must be running") 44 | end 45 | time.sleep(2) 46 | if bg_query:is_running() then 47 | error("must be not running") 48 | end 49 | local result = bg_query:result() 50 | if not(result.rows[1][2] == 1) then 51 | error("result: "..inspect(result)) 52 | end -------------------------------------------------------------------------------- /gatherer/internal/plugins/README.md: -------------------------------------------------------------------------------- 1 | # Lua 2 | 3 | ## plugin_stat:name() 4 | 5 | Return plugin name. 6 | 7 | ## plugin_stat:dir() 8 | 9 | Return current plugin directory. 10 | 11 | ## plugin:error_count() 12 | 13 | Return count of errors in current plugin. 14 | 15 | ## plugin:start_count() 16 | 17 | Return count of starts of current plugin. 18 | 19 | ## plugin:last_error() 20 | 21 | Return last error (nil or string). -------------------------------------------------------------------------------- /gatherer/internal/plugins/plugin_lua.go: -------------------------------------------------------------------------------- 1 | package plugins 2 | 3 | import ( 4 | "path/filepath" 5 | 6 | lua "github.com/yuin/gopher-lua" 7 | ) 8 | 9 | func checkPlugin(L *lua.LState, n int) *plugin { 10 | ud := L.CheckUserData(n) 11 | if v, ok := ud.Value.(*plugin); ok { 12 | return v 13 | } 14 | L.ArgError(n, "plugin_status_ud expected") 15 | return nil 16 | } 17 | 18 | // return plugin name 19 | func pluginName(L *lua.LState) int { 20 | p := checkPlugin(L, 1) 21 | p.mutex.Lock() 22 | defer p.mutex.Unlock() 23 | L.Push(lua.LString(p.config.pluginName)) 24 | return 1 25 | } 26 | 27 | // return plugin dir 28 | func pluginDir(L *lua.LState) int { 29 | p := checkPlugin(L, 1) 30 | p.mutex.Lock() 31 | defer p.mutex.Unlock() 32 | dir := filepath.Join(p.config.rootDir, p.config.pluginName) 33 | L.Push(lua.LString(dir)) 34 | return 1 35 | } 36 | 37 | // plugin host 38 | func pluginHost(L *lua.LState) int { 39 | p := checkPlugin(L, 1) 40 | p.mutex.Lock() 41 | defer p.mutex.Unlock() 42 | L.Push(lua.LString(p.config.host)) 43 | return 1 44 | } 45 | 46 | // error count 47 | func pluginErrorCount(L *lua.LState) int { 48 | p := checkPlugin(L, 1) 49 | p.mutex.Lock() 50 | defer p.mutex.Unlock() 51 | L.Push(lua.LNumber(p.statistics.Errors)) 52 | return 1 53 | } 54 | 55 | // start count 56 | func pluginStartCount(L *lua.LState) int { 57 | p := checkPlugin(L, 1) 58 | p.mutex.Lock() 59 | defer p.mutex.Unlock() 60 | L.Push(lua.LNumber(p.statistics.Starts)) 61 | return 1 62 | } 63 | 64 | // last error 65 | func pluginLastError(L *lua.LState) int { 66 | p := checkPlugin(L, 1) 67 | p.mutex.Lock() 68 | defer p.mutex.Unlock() 69 | if p.statistics.LastError != `` { 70 | L.Push(lua.LString(p.statistics.LastError)) 71 | return 1 72 | } 73 | return 0 74 | } 75 | -------------------------------------------------------------------------------- /gatherer/internal/plugins/pool.go: -------------------------------------------------------------------------------- 1 | package plugins 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "sync" 7 | "time" 8 | 9 | "github.com/vadv/pg_gatherer/gatherer/internal/secrets" 10 | ) 11 | 12 | // Pool of plugins 13 | type Pool struct { 14 | mutex sync.Mutex 15 | hosts map[string]*pluginsForHost 16 | rootDir string 17 | globalCacheDir string 18 | } 19 | 20 | // pluginsForHost list of plugins for host 21 | type pluginsForHost struct { 22 | connections map[string]*Connection 23 | plugins []*plugin 24 | } 25 | 26 | // NewPool return new Pool 27 | func NewPool(rootDir string, globalCacheDir string) *Pool { 28 | result := &Pool{ 29 | hosts: make(map[string]*pluginsForHost), 30 | rootDir: rootDir, 31 | globalCacheDir: globalCacheDir, 32 | } 33 | go result.supervisor() 34 | return result 35 | } 36 | 37 | func (p *Pool) supervisor() { 38 | for { 39 | time.Sleep(time.Second) 40 | p.mutex.Lock() 41 | for host, pls := range p.hosts { 42 | for _, pl := range pls.plugins { 43 | running, plErr := pl.check() 44 | pl.updateStatisticCheck() 45 | if !running { 46 | log.Printf("[INFO] host: %s, plugin: %s was not running, start it\n", 47 | host, pl.config.pluginName) 48 | if plErr != nil { 49 | log.Printf("[ERROR] host: %s, plugin: %s has error: %s\n", 50 | host, pl.config.pluginName, plErr.Error()) 51 | } 52 | if err := pl.prepareState(); err == nil { 53 | go pl.execute() 54 | } else { 55 | log.Printf("[ERROR] host: %s, plugin: %s can't start: %s\n", 56 | host, pl.config.pluginName, err.Error()) 57 | } 58 | } 59 | } 60 | } 61 | p.mutex.Unlock() 62 | } 63 | } 64 | 65 | // RegisterHost register new host 66 | func (p *Pool) RegisterHost(host string, connections map[string]*Connection) { 67 | p.mutex.Lock() 68 | defer p.mutex.Unlock() 69 | if _, ok := p.hosts[host]; !ok { 70 | p.hosts[host] = &pluginsForHost{ 71 | connections: connections, 72 | plugins: make([]*plugin, 0), 73 | } 74 | } 75 | } 76 | 77 | // RemoveHostAndPlugins stop all plugins and remove host 78 | func (p *Pool) RemoveHostAndPlugins(host string) { 79 | p.mutex.Lock() 80 | defer p.mutex.Unlock() 81 | for plHostName, pls := range p.hosts { 82 | if plHostName == host { 83 | // stop all plugins 84 | for _, pl := range pls.plugins { 85 | pl.stop() 86 | } 87 | delete(p.hosts, host) 88 | } 89 | } 90 | } 91 | 92 | // AddPluginToHost add plugin to host 93 | func (p *Pool) AddPluginToHost(pluginName, host string, secrets *secrets.Storage) error { 94 | p.mutex.Lock() 95 | defer p.mutex.Unlock() 96 | // check 97 | if pls, ok := p.hosts[host]; !ok { 98 | return fmt.Errorf("host not registered") 99 | } else { 100 | for _, pl := range pls.plugins { 101 | if pl.config.pluginName == pluginName { 102 | return fmt.Errorf("plugin already registered") 103 | } 104 | } 105 | } 106 | plConfig := &pluginConfig{ 107 | host: host, 108 | rootDir: p.rootDir, 109 | pluginName: pluginName, 110 | globalCacheDir: p.globalCacheDir, 111 | secrets: secrets, 112 | connections: p.hosts[host].connections, 113 | } 114 | pl, err := createPlugin(plConfig) 115 | if err != nil { 116 | return err 117 | } 118 | p.hosts[host].plugins = append(p.hosts[host].plugins, pl) 119 | if errPrepare := pl.prepareState(); errPrepare != nil { 120 | return errPrepare 121 | } 122 | return nil 123 | } 124 | 125 | // StopAndRemovePluginFromHost stop plugin on host 126 | func (p *Pool) StopAndRemovePluginFromHost(pluginName, host string) error { 127 | p.mutex.Lock() 128 | defer p.mutex.Unlock() 129 | // check 130 | if pls, ok := p.hosts[host]; !ok { 131 | return fmt.Errorf("host '%s' not registered", host) 132 | } else { 133 | var found bool 134 | for _, pl := range pls.plugins { 135 | if pl.config.pluginName == pluginName { 136 | found = true 137 | } 138 | } 139 | if !found { 140 | return fmt.Errorf("plugin '%s' for host '%s' not found", pluginName, host) 141 | } 142 | } 143 | // stop 144 | plugins := make([]*plugin, 0) 145 | for _, pl := range p.hosts[host].plugins { 146 | if pl.config.pluginName == pluginName { 147 | pl.stop() 148 | } else { 149 | plugins = append(plugins, pl) 150 | } 151 | } 152 | p.hosts[host].plugins = plugins 153 | return nil 154 | } 155 | 156 | // PluginStatisticPerHost statistic information about all host 157 | func (p *Pool) PluginStatisticPerHost() map[string][]PluginStatistic { 158 | p.mutex.Lock() 159 | defer p.mutex.Unlock() 160 | result := make(map[string][]PluginStatistic) 161 | for host, pls := range p.hosts { 162 | if _, ok := result[host]; !ok { 163 | result[host] = make([]PluginStatistic, 0) 164 | } 165 | for _, pl := range pls.plugins { 166 | result[host] = append(result[host], pl.getStatistics()) 167 | } 168 | } 169 | return result 170 | } 171 | -------------------------------------------------------------------------------- /gatherer/internal/plugins/pool_test.go: -------------------------------------------------------------------------------- 1 | package plugins_test 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "strings" 7 | "testing" 8 | "time" 9 | 10 | "github.com/vadv/pg_gatherer/gatherer/internal/secrets" 11 | 12 | "github.com/vadv/pg_gatherer/gatherer/internal/plugins" 13 | ) 14 | 15 | const hostname = "localhost-test" 16 | 17 | func TestPool(t *testing.T) { 18 | 19 | pool := plugins.NewPool("./tests/", "./tests/cache") 20 | conn := &plugins.Connection{ 21 | Host: "/tmp", 22 | DBName: "gatherer", 23 | Port: 5432, 24 | UserName: "gatherer", 25 | } 26 | connections := make(map[string]*plugins.Connection) 27 | connections[`target`] = conn 28 | connections[`storage`] = conn 29 | pool.RegisterHost(hostname, connections) 30 | secretStorage := secrets.New(``) 31 | 32 | // delete caches 33 | os.RemoveAll("./tests/cache") 34 | 35 | // add pl_cache 36 | if err := pool.AddPluginToHost("pl_cache", hostname, secretStorage); err != nil { 37 | t.Fatalf("add plugin: %s\n", err.Error()) 38 | } 39 | 40 | // add pl_restarts 41 | if err := pool.AddPluginToHost("pl_restarts", hostname, secretStorage); err != nil { 42 | t.Fatalf("add plugin: %s\n", err.Error()) 43 | } 44 | 45 | // add pl_pg 46 | if err := pool.AddPluginToHost("pl_pg", hostname, secretStorage); err != nil { 47 | t.Fatalf("add plugin: %s\n", err.Error()) 48 | } 49 | 50 | // add pl_rds 51 | if err := pool.AddPluginToHost("pl_rds", hostname, secretStorage); err != nil { 52 | t.Fatalf("add plugin: %s\n", err.Error()) 53 | } 54 | 55 | // add pl_run_every 56 | if err := pool.AddPluginToHost("pl_run_every", hostname, secretStorage); err != nil { 57 | t.Fatalf("add plugin: %s\n", err.Error()) 58 | } 59 | 60 | // add pl_stat 61 | if err := pool.AddPluginToHost("pl_stat", hostname, secretStorage); err != nil { 62 | t.Fatalf("add plugin: %s\n", err.Error()) 63 | } 64 | 65 | // add pl_stop 66 | os.RemoveAll("./tests/pl_stop/must_exist.txt") 67 | os.RemoveAll("./tests/pl_stop/must_not_exist.txt") 68 | if err := pool.AddPluginToHost("pl_stop", hostname, secretStorage); err != nil { 69 | t.Fatalf("add plugin: %s\n", err.Error()) 70 | } 71 | 72 | time.Sleep(5 * time.Second) 73 | 74 | stat := pool.PluginStatisticPerHost() 75 | for _, pl := range stat[hostname] { 76 | 77 | // pl_pg 78 | if pl.PluginName == `pl_pg` { 79 | if pl.Errors > 0 { 80 | t.Fatalf("must not restarted with error: %d\n", pl.Errors) 81 | } 82 | } 83 | 84 | // pl_run_every 85 | if pl.PluginName == `pl_run_every` { 86 | if pl.Errors != 1 { 87 | t.Fatalf("must be 1 time errored: %d\n", pl.Errors) 88 | } 89 | if !strings.Contains(pl.LastError, "first error") { 90 | t.Fatalf("must be error 'first error, but get: %s\n'", pl.LastError) 91 | } 92 | if pl.Starts != 2 { 93 | t.Fatalf("must started 2 times: %d\n", pl.Starts) 94 | } 95 | } 96 | 97 | // pl_rds 98 | if pl.PluginName == `pl_rds` { 99 | if pl.Errors > 0 { 100 | t.Fatalf("must not restarted with error: %d\n", pl.Errors) 101 | } 102 | } 103 | 104 | // pl_cache 105 | if pl.PluginName == `pl_cache` { 106 | if pl.Errors > 0 { 107 | t.Fatalf("must not restarted with error: %d\n", pl.Errors) 108 | } 109 | if pl.Starts != 3 { 110 | t.Fatalf("must start 3 times: %d\n", pl.Starts) 111 | } 112 | } 113 | 114 | // pl_stat 115 | if pl.PluginName == `pl_stat` { 116 | if pl.Errors > 0 { 117 | t.Fatalf("must not restarted with error: %d\n", pl.Errors) 118 | } 119 | } 120 | 121 | // pl_restarts 122 | if pl.PluginName == `pl_restarts` { 123 | if pl.Errors != 1 { 124 | t.Fatalf("must errored 1 times: %d\n", pl.Errors) 125 | } 126 | if pl.Starts != 3 { 127 | t.Fatalf("must start only 3 times: %d\n", pl.Starts) 128 | } 129 | if !strings.Contains(pl.LastError, `error anchor-test-restarts`) { 130 | t.Fatalf("get error: %s\n", pl.LastError) 131 | } 132 | } 133 | } 134 | 135 | time.Sleep(2 * time.Second) 136 | if _, err := os.Stat("./tests/pl_stop/must_exist.txt"); err != nil { 137 | t.Fatalf("file must exist\n") 138 | } 139 | if _, err := os.Stat("./tests/pl_stop/must_not_exist.txt"); err == nil { 140 | t.Fatalf("file must not exist\n") 141 | } 142 | 143 | // stop all plugins 144 | for _, pl := range stat[hostname] { 145 | log.Printf("stop plugin: %s\n", pl.PluginName) 146 | if err := pool.StopAndRemovePluginFromHost(pl.PluginName, hostname); err != nil { 147 | t.Fatalf("stop %s: %s\n", pl.PluginName, err.Error()) 148 | } 149 | } 150 | 151 | stat = pool.PluginStatisticPerHost() 152 | if len(stat[hostname]) != 0 { 153 | t.Fatalf("all plugins must be stopped\n") 154 | } 155 | pool.RemoveHostAndPlugins(hostname) 156 | 157 | } 158 | -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/.gitignore: -------------------------------------------------------------------------------- 1 | cache 2 | -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/init.lua: -------------------------------------------------------------------------------- 1 | -- this file loaded on first start of plugin 2 | 3 | filepath = require("filepath") 4 | time = require("time") 5 | inspect = require("inspect") 6 | json = require("json") 7 | ioutil = require("ioutil") 8 | 9 | -- current directory (root) 10 | root = filepath.dir(debug.getinfo(1).source) 11 | 12 | -- return true if database hosted on rds 13 | function is_rds(conn) 14 | conn = conn or agent 15 | return not(not( 16 | pcall(function() 17 | target:query("show rds.extensions") 18 | end) 19 | )) 20 | end 21 | 22 | -- return postgresql version 23 | function get_pg_server_version(conn) 24 | conn = conn or agent 25 | if pg_server_version then return pg_server_version end 26 | local version = target:query("show server_version") 27 | pg_server_version = tonumber(version.rows[1][1]) 28 | return pg_server_version 29 | end 30 | 31 | -- run function f every sec 32 | -- this function run in plugin context, then we use cache key `last_run` 33 | function run_every(f, every) 34 | while true do 35 | 36 | local _, updated_at = cache:get("last_run") 37 | updated_at = updated_at or 0 38 | 39 | if time.unix() >= updated_at + every then 40 | local start_at = time.unix() 41 | cache:set("last_run", 0) 42 | f() 43 | local exec_time = (time.unix() - start_at) 44 | if exec_time > every then 45 | print(debug.getinfo(2).source, "execution timeout:", exec_time) 46 | time.sleep(1) 47 | end 48 | else 49 | -- wait 50 | time.sleep(1) 51 | end 52 | 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_cache/plugin.lua: -------------------------------------------------------------------------------- 1 | print("plugin cache was started") 2 | 3 | local count_of_start = cache:get("key") 4 | if not(count_of_start) then count_of_start = 1 end 5 | print("cache count_of_start: ", count_of_start) 6 | 7 | if count_of_start == 3 then 8 | time.sleep(1000) 9 | end 10 | 11 | cache:set("key", count_of_start+1) -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_pg/plugin.lua: -------------------------------------------------------------------------------- 1 | target:query("select 1") 2 | storage:insert_metric({host="test", plugin="test_pg", int=10}) 3 | 4 | time.sleep(1000) 5 | -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_rds/plugin.lua: -------------------------------------------------------------------------------- 1 | if is_rds() then 2 | error("is_rds true") 3 | end 4 | 5 | time.sleep(1000) -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_restarts/plugin.lua: -------------------------------------------------------------------------------- 1 | print("plugin restarts was started") 2 | 3 | local count_of_start = cache:get("key") 4 | if not(count_of_start) then count_of_start = 1 end 5 | print("restarts count_of_start: ", count_of_start) 6 | 7 | if count_of_start == 2 then 8 | local stop_err = cache:get("stop_key") 9 | if not(stop_err) then 10 | cache:set("stop_key", 1) 11 | cache:set("key", count_of_start+1) 12 | error("error anchor-test-restarts") 13 | end 14 | end 15 | 16 | if count_of_start == 3 then 17 | time.sleep(1000) 18 | end 19 | 20 | cache:set("key", count_of_start+1) -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_run_every/plugin.lua: -------------------------------------------------------------------------------- 1 | local count_of_start = cache:get("key") 2 | if not(count_of_start) then count_of_start = 1 end 3 | 4 | function collect() 5 | time.sleep(1) 6 | 7 | if count_of_start == 1 then 8 | cache:set("key", count_of_start+1) 9 | error("first error") 10 | end 11 | 12 | if count_of_start == 2 then 13 | error("must not be this error") 14 | end 15 | 16 | cache:set("key", count_of_start+1) 17 | end 18 | 19 | run_every(collect, 10) -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_stat/plugin.lua: -------------------------------------------------------------------------------- 1 | if not (plugin:name() == "pl_stat") then 2 | error("name") 3 | end 4 | 5 | if not (plugin:start_count() == 1) then 6 | error("start_count: " .. tostring(plugin:start_count())) 7 | end 8 | 9 | if not (plugin:error_count() == 0) then 10 | error("err_count: " .. tostring(plugin:error_count())) 11 | end 12 | 13 | time.sleep(10000) -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_stop/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt -------------------------------------------------------------------------------- /gatherer/internal/plugins/tests/pl_stop/plugin.lua: -------------------------------------------------------------------------------- 1 | ioutil.write_file(filepath.join(root, "pl_stop", "must_exist.txt"), "") 2 | time.sleep(7) 3 | ioutil.write_file(filepath.join(root, "pl_stop", "must_not_exist.txt"), "") -------------------------------------------------------------------------------- /gatherer/internal/prometheus/README.md: -------------------------------------------------------------------------------- 1 | Creates lua user data `prometheus_metric_ud`. 2 | 3 | # Golang 4 | 5 | ```go 6 | state := lua.NewState() 7 | prometheus.Preload(state) 8 | ``` 9 | 10 | # Lua 11 | 12 | ## local gauge = prometheus:gauge({name="",namespace="",subsystem="",name="",help=""}) 13 | 14 | Register prometheus `gauge`. 15 | 16 | ## gauge:set(number), gauge:inc(), gauge:add(1) 17 | 18 | Set value to `gauge`. 19 | 20 | ## local counter = prometheus:counter({name="",namespace="",subsystem="",name="",help=""}) 21 | 22 | Register prometheus `counter`. 23 | 24 | ## counter:inc(), counter:add(1) 25 | 26 | Set value to `counter`. 27 | 28 | ## local gauge_vec = prometheus:gauge({name="",namespace="",subsystem="",name="",help="", labels={"label1", "label2"}) 29 | 30 | Register prometheus `gauge` vector. 31 | 32 | ## gauge_vec:set(number, {label1="",label2=""}), gauge_vec:inc({label1="",label2=""}), gauge_vec:add(1, {label1="",label2=""}) 33 | 34 | Set value to `gauge` vector. 35 | 36 | ## local counter_vec = prometheus:gauge({name="",namespace="",subsystem="",name="",help="", labels={"label1", "label2"}) 37 | 38 | Register prometheus `counter` vector. 39 | 40 | ## counter_vec:inc({label1="",label2=""}), counter_vec:add(1, {label1="",label2=""}) 41 | 42 | Set value to `counter` vector. -------------------------------------------------------------------------------- /gatherer/internal/prometheus/metric.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | lua "github.com/yuin/gopher-lua" 6 | ) 7 | 8 | type luaPrometheusMetricConfig struct { 9 | namespace string 10 | name string 11 | subsystem string 12 | help string 13 | labels []string 14 | } 15 | 16 | type luaMetric struct { 17 | ud *lua.LUserData 18 | isGauge bool 19 | gauge prometheus.Gauge 20 | gaugeVec *prometheus.GaugeVec 21 | counter prometheus.Counter 22 | counterVec *prometheus.CounterVec 23 | config *luaPrometheusMetricConfig 24 | } 25 | 26 | // Gauge 27 | // prometheus.gauge(config) return lua (user data, error) 28 | // config table: 29 | // { 30 | // namespace="pg_gatherer", 31 | // subsystem="pg", 32 | // name="transactions_per_second", 33 | // help="transactions_per_second per database", 34 | // labels={"database"}, -- optional 35 | // } 36 | func Gauge(L *lua.LState) int { 37 | return newMetric(L, true) 38 | } 39 | 40 | // Counter 41 | // prometheus.counter(config) return lua (user data, error) 42 | // config table: 43 | // { 44 | // namespace="pg_gatherer", 45 | // subsystem="pg", 46 | // name="transactions_per_second", 47 | // help="transactions_per_second per database", 48 | // labels={"database"}, -- optional 49 | // } 50 | func Counter(L *lua.LState) int { 51 | return newMetric(L, false) 52 | } 53 | 54 | // Set lua prometheus_metric_ud:set(value) 55 | func Set(L *lua.LState) int { 56 | metric := checkMetric(L, 1) 57 | if !metric.isGauge { 58 | L.ArgError(1, "unsupported operations for counter") 59 | } 60 | value := float64(L.CheckNumber(2)) 61 | if metric.config.hasLabels() { 62 | labels := luaTableToPrometheusLabels(L.CheckTable(3)) 63 | metric.gaugeVec.With(labels).Set(value) 64 | } else { 65 | metric.gauge.Set(value) 66 | } 67 | return 0 68 | } 69 | 70 | // Add lua prometheus_metric_ud:add(value) 71 | func Add(L *lua.LState) int { 72 | metric := checkMetric(L, 1) 73 | value := float64(L.CheckNumber(2)) 74 | if metric.isGauge { 75 | if metric.config.hasLabels() { 76 | labels := luaTableToPrometheusLabels(L.CheckTable(3)) 77 | metric.gaugeVec.With(labels).Add(value) 78 | } else { 79 | metric.gauge.Add(value) 80 | } 81 | } else { 82 | if metric.config.hasLabels() { 83 | labels := luaTableToPrometheusLabels(L.CheckTable(3)) 84 | metric.counterVec.With(labels).Add(value) 85 | } else { 86 | metric.counter.Add(value) 87 | } 88 | } 89 | return 0 90 | } 91 | 92 | // Inc lua prometheus_metric_ud:inc() 93 | func Inc(L *lua.LState) int { 94 | metric := checkMetric(L, 1) 95 | if metric.isGauge { 96 | if metric.config.hasLabels() { 97 | labels := luaTableToPrometheusLabels(L.CheckTable(2)) 98 | metric.gaugeVec.With(labels).Inc() 99 | } else { 100 | metric.gauge.Inc() 101 | } 102 | } else { 103 | if metric.config.hasLabels() { 104 | labels := luaTableToPrometheusLabels(L.CheckTable(2)) 105 | metric.counterVec.With(labels).Inc() 106 | } else { 107 | metric.counter.Inc() 108 | } 109 | } 110 | return 0 111 | } 112 | 113 | func checkMetric(L *lua.LState, n int) *luaMetric { 114 | ud := L.CheckUserData(n) 115 | if v, ok := ud.Value.(*luaMetric); ok { 116 | return v 117 | } 118 | L.ArgError(n, "prometheus_metric_ud expected") 119 | return nil 120 | } 121 | 122 | func newMetric(L *lua.LState, isGauge bool) int { 123 | config := L.CheckTable(1) 124 | mConfig := luaTableToMetricConfig(config, L) 125 | fullKey := mConfig.getKey() 126 | if m, ok := registeredMetrics.get(fullKey); ok { 127 | if m.config.equal(mConfig) { 128 | L.Push(m.ud) 129 | return 1 130 | } 131 | L.Push(lua.LNil) 132 | L.Push(lua.LString("already created with another config")) 133 | return 2 134 | } 135 | ud := L.NewUserData() 136 | metric := &luaMetric{config: mConfig} 137 | ud.Value = metric 138 | metric.ud = ud 139 | if isGauge { 140 | // is Gauge 141 | metric.isGauge = true 142 | if mConfig.hasLabels() { 143 | // is GaugeVec 144 | gaugeVec := prometheus.NewGaugeVec(mConfig.getGaugeOpts(), mConfig.labels) 145 | if err := prometheus.Register(gaugeVec); err != nil { 146 | L.Push(lua.LNil) 147 | L.Push(lua.LString(err.Error())) 148 | return 2 149 | } 150 | metric.gaugeVec = gaugeVec 151 | } else { 152 | // is Gauge 153 | gauge := prometheus.NewGauge(mConfig.getGaugeOpts()) 154 | if err := prometheus.Register(gauge); err != nil { 155 | L.Push(lua.LNil) 156 | L.Push(lua.LString(err.Error())) 157 | return 2 158 | } 159 | metric.gauge = gauge 160 | } 161 | } else { 162 | // is Counter 163 | metric.isGauge = false 164 | if mConfig.hasLabels() { 165 | // is CounterVec 166 | counterVec := prometheus.NewCounterVec(mConfig.getCounterOpts(), mConfig.labels) 167 | if err := prometheus.Register(counterVec); err != nil { 168 | L.Push(lua.LNil) 169 | L.Push(lua.LString(err.Error())) 170 | return 2 171 | } 172 | metric.counterVec = counterVec 173 | } else { 174 | // is Counter 175 | counter := prometheus.NewCounter(mConfig.getCounterOpts()) 176 | if err := prometheus.Register(counter); err != nil { 177 | L.Push(lua.LNil) 178 | L.Push(lua.LString(err.Error())) 179 | return 2 180 | } 181 | metric.counter = counter 182 | } 183 | } 184 | 185 | L.SetMetatable(ud, L.GetTypeMetatable("prometheus_metric_ud")) 186 | L.Push(ud) 187 | registeredMetrics.set(fullKey, metric) 188 | return 1 189 | } 190 | -------------------------------------------------------------------------------- /gatherer/internal/prometheus/metric_registered.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import "sync" 4 | 5 | var registeredMetrics = newPrometheusMetricList() 6 | 7 | type prometheusMetricList struct { 8 | lock *sync.Mutex 9 | cache map[string]*luaMetric 10 | } 11 | 12 | func newPrometheusMetricList() *prometheusMetricList { 13 | return &prometheusMetricList{ 14 | lock: &sync.Mutex{}, 15 | cache: make(map[string]*luaMetric), 16 | } 17 | } 18 | 19 | func (c *prometheusMetricList) get(key string) (*luaMetric, bool) { 20 | c.lock.Lock() 21 | defer c.lock.Unlock() 22 | m, ok := c.cache[key] 23 | return m, ok 24 | } 25 | 26 | func (c *prometheusMetricList) set(key string, m *luaMetric) { 27 | c.lock.Lock() 28 | defer c.lock.Unlock() 29 | c.cache[key] = m 30 | } 31 | -------------------------------------------------------------------------------- /gatherer/internal/prometheus/metric_util.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "strings" 7 | 8 | "github.com/prometheus/client_golang/prometheus" 9 | 10 | lua "github.com/yuin/gopher-lua" 11 | ) 12 | 13 | const ( 14 | // DefaultNamespace is default namespace for prometheus 15 | DefaultNamespace = `pg` 16 | // DefaultSubsystem is default subsystem for prometheus 17 | DefaultSubsystem = `gatherer` 18 | ) 19 | 20 | // convert lua table to luaPrometheusMetricConfig 21 | func luaTableToMetricConfig(config *lua.LTable, L *lua.LState) *luaPrometheusMetricConfig { 22 | result := &luaPrometheusMetricConfig{ 23 | namespace: DefaultNamespace, 24 | subsystem: DefaultSubsystem, 25 | } 26 | config.ForEach(func(k lua.LValue, v lua.LValue) { 27 | switch k.String() { 28 | case `namespace`: 29 | result.namespace = v.String() 30 | case `subsystem`: 31 | result.subsystem = v.String() 32 | case `name`: 33 | result.name = v.String() 34 | case `help`: 35 | result.help = v.String() 36 | case `labels`: 37 | tbl, ok := v.(*lua.LTable) 38 | if !ok { 39 | L.ArgError(1, "labels must be string") 40 | } 41 | result.labels = luaTableToSlice(tbl) 42 | } 43 | }) 44 | return result 45 | } 46 | 47 | // convert lua table to sorted []string 48 | func luaTableToSlice(tbl *lua.LTable) []string { 49 | result := make([]string, 0) 50 | tbl.ForEach(func(k lua.LValue, v lua.LValue) { 51 | result = append(result, v.String()) 52 | }) 53 | sort.Strings(result) 54 | return result 55 | } 56 | 57 | func (m *luaPrometheusMetricConfig) hasLabels() bool { 58 | return len(m.labels) > 0 59 | } 60 | 61 | func (m *luaPrometheusMetricConfig) getKey() string { 62 | return fmt.Sprintf("%s_%s_%s", m.namespace, m.subsystem, m.name) 63 | } 64 | 65 | func (m *luaPrometheusMetricConfig) equal(m2 *luaPrometheusMetricConfig) bool { 66 | if m.getKey() != m2.getKey() { 67 | return false 68 | } 69 | if len(m.labels) == 0 && len(m2.labels) == 0 { 70 | return true 71 | } 72 | if len(m.labels) != len(m2.labels) { 73 | return false 74 | } 75 | sort.Strings(m.labels) 76 | sort.Strings(m2.labels) 77 | return strings.Join(m.labels, "") == strings.Join(m2.labels, "") 78 | } 79 | 80 | func (m *luaPrometheusMetricConfig) getGaugeOpts() prometheus.GaugeOpts { 81 | return prometheus.GaugeOpts{ 82 | Namespace: m.namespace, 83 | Subsystem: m.subsystem, 84 | Name: m.name, 85 | Help: m.help, 86 | } 87 | } 88 | 89 | func (m *luaPrometheusMetricConfig) getCounterOpts() prometheus.CounterOpts { 90 | return prometheus.CounterOpts{ 91 | Namespace: m.namespace, 92 | Subsystem: m.subsystem, 93 | Name: m.name, 94 | Help: m.help, 95 | } 96 | } 97 | 98 | //convert lua table to prometheus.Label 99 | func luaTableToPrometheusLabels(tbl *lua.LTable) prometheus.Labels { 100 | result := make(map[string]string) 101 | tbl.ForEach(func(k lua.LValue, v lua.LValue) { 102 | result[k.String()] = v.String() 103 | }) 104 | return result 105 | } 106 | -------------------------------------------------------------------------------- /gatherer/internal/prometheus/prometheus.go: -------------------------------------------------------------------------------- 1 | package prometheus 2 | 3 | import lua "github.com/yuin/gopher-lua" 4 | 5 | // Preload is the preloader of user data prometheus_metric_ud. 6 | func Preload(L *lua.LState) int { 7 | prometheusUD := L.NewTypeMetatable(`prometheus_metric_ud`) 8 | L.SetGlobal(`prometheus_metric_ud`, prometheusUD) 9 | L.SetField(prometheusUD, "__index", L.SetFuncs(L.NewTable(), map[string]lua.LGFunction{ 10 | "set": Set, 11 | "get": Inc, 12 | "add": Add, 13 | })) 14 | L.SetGlobal("prometheus_counter", L.NewFunction(Counter)) 15 | L.SetGlobal("prometheus_gauge", L.NewFunction(Gauge)) 16 | return 0 17 | } 18 | -------------------------------------------------------------------------------- /gatherer/internal/secrets/README.md: -------------------------------------------------------------------------------- 1 | Creates lua user data `secret_ud`. 2 | 3 | # Golang 4 | 5 | ```go 6 | state := lua.NewState() 7 | secrets.Preload(state) 8 | s := secrets.New(filename) 9 | // register user data "secrets" 10 | s.Register(state, "secrets") 11 | ``` 12 | 13 | # Lua 14 | 15 | ## secret:get(key) 16 | 17 | Get secret, return string or nil. -------------------------------------------------------------------------------- /gatherer/internal/secrets/secret.go: -------------------------------------------------------------------------------- 1 | package secrets 2 | 3 | import ( 4 | "io/ioutil" 5 | "log" 6 | "sync" 7 | 8 | "gopkg.in/yaml.v2" 9 | ) 10 | 11 | // Storage for secrets 12 | type Storage struct { 13 | mutex sync.Mutex 14 | filename string 15 | data map[string]string 16 | } 17 | 18 | // New return new Secret 19 | func New(filename string) *Storage { 20 | result := &Storage{ 21 | mutex: sync.Mutex{}, 22 | filename: filename, 23 | data: make(map[string]string), 24 | } 25 | result.Read() 26 | return result 27 | } 28 | 29 | // Read secrets from file 30 | func (s *Storage) Read() { 31 | if s.filename != `` { 32 | log.Printf("[INFO] reading secret file: %s\n", s.filename) 33 | } else { 34 | log.Printf("[INFO] skip read secret file: not specified\n") 35 | } 36 | s.mutex.Lock() 37 | defer s.mutex.Unlock() 38 | result := make(map[string]string) 39 | data, err := ioutil.ReadFile(s.filename) 40 | if err != nil { 41 | log.Printf("[ERROR] read secret file: %s\n", err.Error()) 42 | return 43 | } 44 | if errYaml := yaml.Unmarshal(data, &result); errYaml != nil { 45 | log.Printf("[ERROR] parse secret file: %s\n", errYaml.Error()) 46 | } else { 47 | log.Printf("[INFO] secret file %s readed\n", s.filename) 48 | } 49 | s.data = result 50 | } 51 | 52 | // get value 53 | func (s *Storage) get(key string) *string { 54 | s.mutex.Lock() 55 | defer s.mutex.Unlock() 56 | result, ok := s.data[key] 57 | if !ok { 58 | return nil 59 | } 60 | return &result 61 | } 62 | -------------------------------------------------------------------------------- /gatherer/internal/secrets/secret_lua.go: -------------------------------------------------------------------------------- 1 | package secrets 2 | 3 | import ( 4 | lua "github.com/yuin/gopher-lua" 5 | ) 6 | 7 | // Preload is the preloader of user data secret_ud. 8 | func Preload(L *lua.LState) int { 9 | secretUD := L.NewTypeMetatable(`secret_ud`) 10 | L.SetGlobal(`secret_ud`, secretUD) 11 | L.SetField(secretUD, "__index", L.SetFuncs(L.NewTable(), map[string]lua.LGFunction{ 12 | "get": getSecret, 13 | })) 14 | return 0 15 | } 16 | 17 | // Register secrets_ud in state 18 | func (s *Storage) Register(L *lua.LState, userDataName string) { 19 | ud := L.NewUserData() 20 | ud.Value = s 21 | L.SetMetatable(ud, L.GetTypeMetatable(`secret_ud`)) 22 | L.SetGlobal(userDataName, ud) 23 | } 24 | 25 | func checkSecret(L *lua.LState, n int) *Storage { 26 | ud := L.CheckUserData(n) 27 | if v, ok := ud.Value.(*Storage); ok { 28 | return v 29 | } 30 | L.ArgError(n, "secret_ud expected") 31 | return nil 32 | } 33 | 34 | // return plugin dir 35 | func getSecret(L *lua.LState) int { 36 | s := checkSecret(L, 1) 37 | key := L.CheckString(2) 38 | secret := s.get(key) 39 | if secret != nil { 40 | result := *secret 41 | L.Push(lua.LString(result)) 42 | return 1 43 | } 44 | return 0 45 | } 46 | -------------------------------------------------------------------------------- /gatherer/internal/secrets/secret_test.go: -------------------------------------------------------------------------------- 1 | package secrets_test 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | 8 | "github.com/vadv/pg_gatherer/gatherer/internal/secrets" 9 | 10 | "github.com/vadv/gopher-lua-libs/inspect" 11 | "github.com/vadv/gopher-lua-libs/time" 12 | lua "github.com/yuin/gopher-lua" 13 | ) 14 | 15 | func TestCache(t *testing.T) { 16 | 17 | state := lua.NewState() 18 | secrets.Preload(state) 19 | 20 | s1 := secrets.New("./tests/secrets.yaml") 21 | s1.Register(state, `secrets_1`) 22 | time.Preload(state) 23 | inspect.Preload(state) 24 | 25 | if err := state.DoFile("./tests/secrets.lua"); err != nil { 26 | t.Fatalf("error: %s\n", err.Error()) 27 | } 28 | 29 | reloadSecretFile := "./tests/secrets_reload.yaml" 30 | reloadSecretFileData := "set_after_reload: ok\n" 31 | os.RemoveAll(reloadSecretFile) 32 | s2 := secrets.New(reloadSecretFile) 33 | s2.Register(state, `secrets_2`) 34 | if err := state.DoFile("./tests/reload_1.lua"); err != nil { 35 | t.Fatalf("error: %s\n", err.Error()) 36 | } 37 | if err := ioutil.WriteFile(reloadSecretFile, []byte(reloadSecretFileData), 0644); err != nil { 38 | t.Fatalf("error: %s\n", err.Error()) 39 | } 40 | s2.Read() 41 | if err := state.DoFile("./tests/reload_2.lua"); err != nil { 42 | t.Fatalf("error: %s\n", err.Error()) 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /gatherer/internal/secrets/tests/.gitignore: -------------------------------------------------------------------------------- 1 | secrets_reload.yaml -------------------------------------------------------------------------------- /gatherer/internal/secrets/tests/reload_1.lua: -------------------------------------------------------------------------------- 1 | local data = secrets_2:get("set_after_reload") 2 | if data then error("must be nil") end -------------------------------------------------------------------------------- /gatherer/internal/secrets/tests/reload_2.lua: -------------------------------------------------------------------------------- 1 | local data = secrets_2:get("set_after_reload") 2 | if not(data == "ok") then error("data: "..tostring(data)) end -------------------------------------------------------------------------------- /gatherer/internal/secrets/tests/secrets.lua: -------------------------------------------------------------------------------- 1 | local data = secrets_1:get("unknown") 2 | if data then error("must be nil") end 3 | 4 | local data = secrets_1:get("test_1") 5 | if not(data == "ok") then error("data: "..tostring(data)) end 6 | -------------------------------------------------------------------------------- /gatherer/internal/secrets/tests/secrets.yaml: -------------------------------------------------------------------------------- 1 | test_1: ok -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/README.md: -------------------------------------------------------------------------------- 1 | Way to test plugin. 2 | 3 | # Golang 4 | 5 | ```go 6 | state := lua.NewState() 7 | testing_framework.Preload(state) 8 | testing_framework.New(state, "./root_of_plugins/", "plugin_name", 9 | "host", "gatherer-db-test", 5432, "gatherer-user-test", "gatherer-password") 10 | ``` 11 | 12 | # Lua 13 | 14 | ## plugin:create() 15 | 16 | Start "plugin.lua" in background, raise error if plugin already created. 17 | 18 | ## plugin:remove() 19 | 20 | Stop "plugin.lua" (raise error 'context canceled' in plugin), raise error if plugin was removed. 21 | 22 | ## plugin:restart_count() 23 | 24 | Get restart count of plugin, raise error if plugin was removed. 25 | 26 | ## plugin:error_count() 27 | 28 | Get restart with error count of plugin, raise error if plugin was removed. 29 | 30 | ## plugin:last_error() 31 | 32 | Get string with error text, raise error if plugin was removed. 33 | 34 | ## connection:query() 35 | 36 | Execute read-only query in manager-db with args. Return table with `rows` and `columns`, raise error. 37 | 38 | ```lua 39 | local result = connection:query("select $1::integer, $1::text, $2", 1, "tests") 40 | --[[ 41 | result: 42 | { 43 | columns = { "int4", "text", "?column?" }, 44 | rows = { { 1, "1", "1" } } 45 | } 46 | --]] 47 | ``` 48 | -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/framework.go: -------------------------------------------------------------------------------- 1 | package testing_framework 2 | 3 | import ( 4 | "github.com/vadv/pg_gatherer/gatherer/internal/plugins" 5 | "github.com/vadv/pg_gatherer/gatherer/internal/secrets" 6 | lua "github.com/yuin/gopher-lua" 7 | ) 8 | 9 | type framework struct { 10 | pool *plugins.Pool 11 | secrets *secrets.Storage 12 | pluginName string 13 | host string 14 | } 15 | 16 | // Preload is the preloader of user data connection_ud. 17 | func Preload(L *lua.LState) int { 18 | frameworkUD := L.NewTypeMetatable(`testing_framework_ud`) 19 | L.SetGlobal(`testing_framework_ud`, frameworkUD) 20 | L.SetField(frameworkUD, "__index", L.SetFuncs(L.NewTable(), map[string]lua.LGFunction{ 21 | "create": createPlugin, 22 | "remove": removePlugin, 23 | "restart_count": restartCount, 24 | "error_count": errorCount, 25 | "last_error": lastError, 26 | "host": hostName, 27 | })) 28 | return 0 29 | } 30 | 31 | // New create new testing_framework into lua state as user data 32 | func New(L *lua.LState, rootDir, cacheDir, pluginName, host, dbname, user, password string, port int, params map[string]string) error { 33 | pool := plugins.NewPool(rootDir, cacheDir) 34 | conn := &plugins.Connection{ 35 | Host: host, 36 | DBName: dbname, 37 | Port: port, 38 | UserName: user, 39 | Password: password, 40 | Params: params, 41 | } 42 | connections := make(map[string]*plugins.Connection) 43 | connections[`target`] = conn 44 | connections[`storage`] = conn 45 | f := &framework{ 46 | pool: pool, 47 | pluginName: pluginName, 48 | host: pluginName, 49 | secrets: secrets.New(``), 50 | } 51 | pool.RegisterHost(f.host, connections) 52 | ud := L.NewUserData() 53 | ud.Value = f 54 | L.SetMetatable(ud, L.GetTypeMetatable(`testing_framework_ud`)) 55 | L.SetGlobal("tested_plugin", ud) 56 | return nil 57 | } 58 | -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/framework_lua.go: -------------------------------------------------------------------------------- 1 | package testing_framework 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/vadv/pg_gatherer/gatherer/internal/plugins" 7 | lua "github.com/yuin/gopher-lua" 8 | ) 9 | 10 | // checkUserDataFramework return testing_framework from lua state 11 | func checkUserDataFramework(L *lua.LState, n int) *framework { 12 | ud := L.CheckUserData(n) 13 | if v, ok := ud.Value.(*framework); ok { 14 | return v 15 | } 16 | L.ArgError(n, "testing_framework_ud expected") 17 | return nil 18 | } 19 | 20 | func createPlugin(L *lua.LState) int { 21 | ud := checkUserDataFramework(L, 1) 22 | if err := ud.pool.AddPluginToHost(ud.pluginName, ud.host, ud.secrets); err != nil { 23 | L.RaiseError("register error: %s", err.Error()) 24 | } 25 | return 0 26 | } 27 | 28 | func removePlugin(L *lua.LState) int { 29 | ud := checkUserDataFramework(L, 1) 30 | if err := ud.pool.StopAndRemovePluginFromHost(ud.pluginName, ud.host); err != nil { 31 | L.RaiseError("remove error: %s", err.Error()) 32 | } 33 | time.Sleep(time.Second) 34 | return 0 35 | } 36 | 37 | func restartCount(L *lua.LState) int { 38 | ud := checkUserDataFramework(L, 1) 39 | stat := getStatistic(L, ud) 40 | L.Push(lua.LNumber(stat.Starts)) 41 | return 1 42 | } 43 | 44 | func errorCount(L *lua.LState) int { 45 | ud := checkUserDataFramework(L, 1) 46 | stat := getStatistic(L, ud) 47 | L.Push(lua.LNumber(stat.Errors)) 48 | return 1 49 | } 50 | 51 | func lastError(L *lua.LState) int { 52 | ud := checkUserDataFramework(L, 1) 53 | stat := getStatistic(L, ud) 54 | L.Push(lua.LString(stat.LastError)) 55 | return 1 56 | } 57 | 58 | func hostName(L *lua.LState) int { 59 | ud := checkUserDataFramework(L, 1) 60 | L.Push(lua.LString(ud.host)) 61 | return 1 62 | } 63 | 64 | func getStatistic(L *lua.LState, ud *framework) *plugins.PluginStatistic { 65 | statistic := ud.pool.PluginStatisticPerHost() 66 | hostPluginStat, okPluginStatForHost := statistic[ud.host] 67 | if !okPluginStatForHost { 68 | L.RaiseError("host '%s' not registered", ud.host) 69 | return nil 70 | } 71 | var stat *plugins.PluginStatistic 72 | for _, plStat := range hostPluginStat { 73 | if plStat.PluginName == ud.pluginName { 74 | stat = &plStat 75 | } 76 | } 77 | if stat == nil { 78 | L.RaiseError("plugin '%s' not found", ud.pluginName) 79 | return nil 80 | } 81 | return stat 82 | } 83 | -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/framework_test.go: -------------------------------------------------------------------------------- 1 | package testing_framework_test 2 | 3 | import ( 4 | "testing" 5 | 6 | libs "github.com/vadv/gopher-lua-libs" 7 | 8 | "github.com/vadv/pg_gatherer/gatherer/internal/testing_framework" 9 | lua "github.com/yuin/gopher-lua" 10 | ) 11 | 12 | func TestFramework(t *testing.T) { 13 | 14 | state := lua.NewState() 15 | 16 | libs.Preload(state) 17 | testing_framework.Preload(state) 18 | if err := testing_framework.New(state, `./tests`, `./tests/cache`, `testing-1`, 19 | `/tmp`, "gatherer", "gatherer", "", 5432, nil); err != nil { 20 | t.Fatalf(err.Error()) 21 | } 22 | 23 | if err := state.DoFile("./tests/testing-1/test.lua"); err != nil { 24 | t.Fatalf("error: %s\n", err.Error()) 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/tests/cache/testing-1/testing-1/cache.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/gatherer/internal/testing_framework/tests/cache/testing-1/testing-1/cache.sqlite -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/tests/init.lua: -------------------------------------------------------------------------------- 1 | time = require("time") -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/tests/testing-1/plugin.lua: -------------------------------------------------------------------------------- 1 | target:query("select pg_sleep(1000)") 2 | -------------------------------------------------------------------------------- /gatherer/internal/testing_framework/tests/testing-1/test.lua: -------------------------------------------------------------------------------- 1 | local time = require("time") 2 | 3 | tested_plugin:create() 4 | time.sleep(3) 5 | if not (tested_plugin:error_count() == 0) then 6 | error("error count: " .. tostring(tested_plugin:error_count())) 7 | end 8 | 9 | tested_plugin:remove() -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/vadv/pg_gatherer 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/lib/pq v1.7.0 7 | github.com/mattn/go-sqlite3 v1.14.3 8 | github.com/prometheus/client_golang v1.5.1 9 | github.com/vadv/gopher-lua-libs v0.1.1 10 | github.com/yuin/gopher-lua v0.0.0-20200603152657-dc2b0ca8b37e 11 | gopkg.in/yaml.v2 v2.3.0 12 | ) 13 | -------------------------------------------------------------------------------- /grafana/Makefile: -------------------------------------------------------------------------------- 1 | dashboard: submodule_check 2 | jsonnet -J jsonnet jsonnet/dashboard.jsonnet -o dashboard.json 3 | 4 | submodules: 5 | git submodule init 6 | git submodule update 7 | 8 | submodule_update: 9 | git submodule update 10 | 11 | submodule_pull: 12 | git submodule foreach "git pull" 13 | 14 | submodule_check: 15 | @-test -d .git -a .gitmodules && \ 16 | git submodule status \ 17 | | grep -q "^-" \ 18 | && $(MAKE) submodules || true 19 | @-test -d .git -a .gitmodules && \ 20 | git submodule status \ 21 | | grep -q "^+" \ 22 | && $(MAKE) submodule_update || true -------------------------------------------------------------------------------- /grafana/README.md: -------------------------------------------------------------------------------- 1 | # Install 2 | 3 | Import dashboard json file [dashboard.json](dashboard.json) and configure storage-db as datasource. 4 | 5 | # Build 6 | 7 | Requires jsonnet binary. 8 | 9 | ```bash 10 | go get github.com/google/go-jsonnet/cmd/jsonnet 11 | make dashboard 12 | ``` 13 | 14 | # Examples 15 | ## status indicators 16 | ![status indicators](images/1.png) 17 | ## databases 18 | ![databases](images/2.png) 19 | ## tables 20 | ![tables](images/3.png) 21 | ## operations 22 | ![operations](images/4.png) 23 | ## buffer pool 24 | ![buffer pool-1](images/14.png) 25 | ![buffer pool-2](images/15.png) 26 | ## backend states 27 | ![backend states](images/5.png) 28 | ## stat statements 29 | ![statements-1](images/6.png) 30 | ![statements-2](images/7.png) 31 | ## logged statements 32 | ![logs](images/8.png) 33 | ## vacuum 34 | ![vacuum](images/9.png) 35 | ## sequential scans 36 | ![seq scans](images/10.png) 37 | ## tuples/blocks access 38 | ![tuples](images/11.png) 39 | ## wal 40 | ![wal](images/12.png) 41 | ## rds 42 | ![rds](images/13.png) -------------------------------------------------------------------------------- /grafana/images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/1.png -------------------------------------------------------------------------------- /grafana/images/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/10.png -------------------------------------------------------------------------------- /grafana/images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/11.png -------------------------------------------------------------------------------- /grafana/images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/12.png -------------------------------------------------------------------------------- /grafana/images/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/13.png -------------------------------------------------------------------------------- /grafana/images/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/14.png -------------------------------------------------------------------------------- /grafana/images/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/15.png -------------------------------------------------------------------------------- /grafana/images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/2.png -------------------------------------------------------------------------------- /grafana/images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/3.png -------------------------------------------------------------------------------- /grafana/images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/4.png -------------------------------------------------------------------------------- /grafana/images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/5.png -------------------------------------------------------------------------------- /grafana/images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/6.png -------------------------------------------------------------------------------- /grafana/images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/7.png -------------------------------------------------------------------------------- /grafana/images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/8.png -------------------------------------------------------------------------------- /grafana/images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vadv/pg_gatherer/40826872b965790aa0c67f76acdebd96ffdda7c5/grafana/images/9.png -------------------------------------------------------------------------------- /plugins/.gitignore: -------------------------------------------------------------------------------- 1 | cache 2 | -------------------------------------------------------------------------------- /plugins/README.md: -------------------------------------------------------------------------------- 1 | # Plugin 2 | 3 | * The plugin is the directory where the plugin.lua file must be located. 4 | * If plugin raise error, it will automatically restart. 5 | * Each plugin lives in a separate lua-state. 6 | * Before each start of plugin [init.lua](init.lua) is execute. 7 | 8 | 9 | Lua 5.1 and all libraries from [glua-libs](https://github.com/vadv/gopher-lua-libs) are available in plugin. 10 | 11 | ## Connection 12 | 13 | For each connection described in host-config-file you can: 14 | 15 | ```lua 16 | -- execute read only query: 17 | connection_name:query(query, args1, argN) 18 | -- returns {columns={"name-1", ...} rows={ {0="", 1=""}, {} } } 19 | 20 | -- insert metric, table: {plugin="", host="", int=, float=, json=} 21 | -- relevant only for storage connection 22 | connection_name:insert_metric() 23 | -- returns nil, raise error 24 | ``` 25 | 26 | ## Cache 27 | 28 | For each plugin, an table in sqlite is created in which it is possible to store information in a key-value format. 29 | `cache` is registered as global user-data. 30 | 31 | ```lua 32 | -- key: string, value: float 33 | cache:set(key, value) 34 | -- returns nil, raise error 35 | 36 | -- key: string 37 | cache:get(key) 38 | -- returns value: float, updated_at: float (unix ts) 39 | 40 | -- key: string, value: float 41 | cache:diff_and_set(key, value) 42 | -- returns diff between current and previous set value: float, or nil if previous value was doesn't set. 43 | 44 | -- key: string, value: float 45 | cache:speed_and_set(key, value) 46 | -- returns speed (current-previous)/(current_time - previous_time): float, or nil if previous value was doesn't set. 47 | ``` 48 | 49 | ## Plugin alerts 50 | 51 | This is a special plugin that must be run in one instance, for example, on storage. 52 | The plugin analyzes the information which saved other plugins and creates entries in the storage database, 53 | which will be sent using the sender plugin. 54 | 55 | ## Plugin sender 56 | 57 | This plugin integrates with other types of monitoring: currently it sends information from `alerts` plugin to [PagerDuty](https://pagerduty.com) 58 | and also sends messages to [telegram im](http://telegram.org) if you need it. -------------------------------------------------------------------------------- /plugins/activity/activity_10.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'sql_id', md5(query)::UUID, 5 | 'query_id', md5(query || query_start::text)::UUID, 6 | 'datname', datname::text, 7 | 'pid', pid, 8 | 'username', usename, 9 | 'application_name', application_name, 10 | 'client_addr', client_addr, 11 | 'client_hostname', client_hostname, 12 | 'client_port', client_port, 13 | 'xact_start_duration', extract(epoch from now() - xact_start)::int, 14 | 'query_start_duration', extract(epoch from now() - query_start)::int, 15 | 'state_change_duration', extract(epoch from now() - state_change)::int, 16 | 'wait_event_type', wait_event_type::text, 17 | 'wait_event', wait_event::text, 18 | 'state', state, 19 | 'query', query::text, 20 | 'backend_type', backend_type::text 21 | ) as result 22 | from 23 | pg_catalog.pg_stat_activity 24 | where 25 | state <> 'idle' 26 | and query is not null 27 | and backend_type not in ('walsender', 'checkpointer', 'walwriter') 28 | and extract(epoch from now() - state_change)::int > $2 -------------------------------------------------------------------------------- /plugins/activity/activity_9.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'sql_id', md5(query)::UUID, 5 | 'query_id', md5(query || query_start::text)::UUID, 6 | 'datname', datname::text, 7 | 'pid', pid, 8 | 'username', usename, 9 | 'application_name', application_name, 10 | 'client_addr', client_addr, 11 | 'client_hostname', client_hostname, 12 | 'client_port', client_port, 13 | 'xact_start_duration', extract(epoch from now() - xact_start)::int, 14 | 'query_start_duration', extract(epoch from now() - query_start)::int, 15 | 'state_change_duration', extract(epoch from now() - state_change)::int, 16 | 'wait_event_type', wait_event_type::text, 17 | 'wait_event', wait_event::text, 18 | 'state', state, 19 | 'query', query::text 20 | ) as result 21 | from 22 | pg_catalog.pg_stat_activity 23 | where 24 | state <> 'idle' 25 | and query is not null 26 | and extract(epoch from now() - state_change)::int > $2; -------------------------------------------------------------------------------- /plugins/activity/linux_helper_proc_stat.lua: -------------------------------------------------------------------------------- 1 | local helpers = {} 2 | 3 | -- /proc//io 4 | function helpers.read_linux_io_real(pid) 5 | local result = {} 6 | local filename = string.format(HOST_PROC_DIR.."/%d/io", pid) 7 | local fh = io.open(filename, "rb") 8 | if not fh then return result end 9 | local content = fh:read("*a"); 10 | fh:close() 11 | for x in string.gmatch(content, "read_bytes: (%d+)\n") do result.read_bytes = tonumber(x) end 12 | for x in string.gmatch(content, "write_bytes: (%d+)\n") do result.write_bytes = tonumber(x) end 13 | for x in string.gmatch(content, "rchar: (%d+)\n") do result.rchar = tonumber(x) end 14 | for x in string.gmatch(content, "wchar: (%d+)\n") do result.wchar = tonumber(x) end 15 | for x in string.gmatch(content, "syscr: (%d+)\n") do result.syscr = tonumber(x) end 16 | for x in string.gmatch(content, "syscw: (%d+)\n") do result.syscw = tonumber(x) end 17 | return result 18 | end 19 | 20 | function helpers.read_linux_io(pid) 21 | local result = {} 22 | pcall(function() result = helpers.read_linux_io_real(pid) end) 23 | return result 24 | end 25 | 26 | -- /proc//stat 27 | function helpers.read_linux_cpu_real(pid) 28 | local result = {} 29 | local filename = string.format(HOST_PROC_DIR.."/%d/stat", pid) 30 | local fh = io.open(filename, "rb") 31 | if not fh then return result end 32 | local content = fh:read("*a"); 33 | fh:close() 34 | -- 13 полей вместе с state: (R|S|...) 35 | -- state ppid pgrp session tty_nr tpgid flags minflt cminflt majflt cmajflt utime stime 36 | for utime, stime in string.gmatch(content, " %a %d+ %d+ %d+ %d+ %-%d+ %d+ %d+ %d+ %d+ %d+ (%d+) (%d+)") do 37 | result.utime = tonumber(utime) 38 | result.stime = tonumber(stime) 39 | end 40 | return result 41 | end 42 | 43 | function helpers.read_linux_cpu(pid) 44 | local result = {} 45 | pcall(function() result = helpers.read_linux_cpu_real(pid) end) 46 | return result 47 | end 48 | 49 | helpers.calc_diff_t, helpers.count_calc_diff_call = {}, 0 50 | function helpers.calc_diff(id, value) 51 | 52 | if not value then return nil end 53 | 54 | local now = os.time() 55 | local prev = helpers.calc_diff_t[id] 56 | if (prev == nil) then 57 | helpers.calc_diff_t[id] = { touch = now, begin_value = value } 58 | return nil 59 | end 60 | 61 | -- обновляем доступ 62 | helpers.calc_diff_t[id]["touch"] = now 63 | 64 | -- если необходимо делаем компакт calc_diff_t 65 | helpers.count_calc_diff_call = helpers.count_calc_diff_call + 1 66 | if (helpers.count_calc_diff_call % 100) == 0 then helpers.gc_calc_diff() end 67 | 68 | return value - prev["begin_value"] 69 | end 70 | 71 | function helpers.gc_calc_diff() 72 | local new_calc_diff_t = {} 73 | local garbage_time = os.time() - (15 * 60) 74 | for id, val in pairs(helpers.calc_diff_t) do 75 | if val["touch"] > garbage_time then new_calc_diff_t[id] = val end 76 | end 77 | helpers.calc_diff_t = new_calc_diff_t 78 | end 79 | 80 | return helpers -------------------------------------------------------------------------------- /plugins/activity/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.activity' 2 | local every = 60 3 | 4 | local activity_file = "activity_9.sql" 5 | if get_pg_server_version() >= 10 then activity_file = "activity_10.sql" end 6 | 7 | local sql_activity = read_file_in_plugin_dir(activity_file) 8 | local sql_states = read_file_in_plugin_dir("states.sql") 9 | local sql_waits = read_file_in_plugin_dir("waits.sql") 10 | 11 | local helpers = dofile(filepath.join(plugin:dir(), "linux_helper_proc_stat.lua")) 12 | 13 | -- process states 14 | local function states() 15 | local result = target:query(sql_states, every) 16 | local jsonb = {} 17 | local snapshot = nil 18 | for _, row in pairs(result.rows) do 19 | if not (snapshot) then snapshot = row[1] end 20 | jsonb[row[2]] = tonumber(row[3]) 21 | gauge_set("activity_states", row[3], { state = row[2] }) 22 | end 23 | local jsonb, err = json.encode(jsonb) 24 | if err then error(err) end 25 | storage_insert_metric({ plugin = plugin_name .. ".states", snapshot = snapshot, json = jsonb }) 26 | end 27 | 28 | -- process waits 29 | local function waits() 30 | local result = target:query(sql_waits, every) 31 | for _, row in pairs(result.rows) do 32 | local jsonb, err = json.decode(row[2]) 33 | if err then error(err) end 34 | gauge_set("activity_waits", jsonb.count, 35 | { state = jsonb.state, wait_event = jsonb.wait_event, wait_event_type = jsonb.wait_event_type }) 36 | storage_insert_metric({ plugin = plugin_name .. ".waits", snapshot = row[1], json = row[2] }) 37 | end 38 | end 39 | 40 | -- collect on rds 41 | local function collect_rds() 42 | local result = target:query(sql_activity, every, 30) 43 | for _, row in pairs(result.rows) do 44 | storage_insert_metric({ plugin = plugin_name, snapshot = row[1], json = row[2] }) 45 | end 46 | states() 47 | waits() 48 | end 49 | 50 | -- collect on localhost 51 | local function collect_local() 52 | -- process activity 53 | local result = target:query(sql_activity, every, 30) 54 | for _, row in pairs(result.rows) do 55 | local jsonb, err = json.decode(row[2]) 56 | if err then error(err) end 57 | local pid = tonumber(jsonb.pid) 58 | if pid then 59 | -- IO for query 60 | local pid_io = helpers.read_linux_io(pid) 61 | local rchar, wchar = pid_io.rchar, pid_io.wchar 62 | if rchar and wchar then 63 | -- need root rights 64 | jsonb.rchar = helpers.calc_diff("rchar-" .. tostring(pid), rchar) 65 | jsonb.wchar = helpers.calc_diff("wchar-" .. tostring(pid), wchar) 66 | end 67 | -- CPU for query 68 | local pid_cpu = helpers.read_linux_cpu(pid) 69 | local utime, stime = pid_cpu.utime, pid_cpu.stime 70 | if utime or stime then 71 | -- need root rights for stime 72 | jsonb.utime = helpers.calc_diff("utime-" .. tostring(pid), utime) 73 | jsonb.stime = helpers.calc_diff("stime-" .. tostring(pid), stime) 74 | end 75 | end 76 | local jsonb, err = json.encode(jsonb) 77 | if err then error(err) end 78 | storage_insert_metric({ plugin = plugin_name, snapshot = row[1], json = jsonb }) 79 | end 80 | states() 81 | waits() 82 | end 83 | 84 | -- choose function to collect 85 | local f = collect_local 86 | if is_rds() then f = collect_rds end 87 | 88 | run_every(f, every) 89 | -------------------------------------------------------------------------------- /plugins/activity/states.sql: -------------------------------------------------------------------------------- 1 | with states as ( 2 | select * 3 | from 4 | unnest('{active,idle,idle in transaction,idle in transaction (aborted),fastpath function call}'::text[]) 5 | ) 6 | select 7 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 8 | s.unnest, 9 | count(a.pid) as count 10 | from 11 | states s 12 | left join pg_catalog.pg_stat_activity a on s.unnest = a.state 13 | group by 14 | s.unnest; -------------------------------------------------------------------------------- /plugins/activity/test.lua: -------------------------------------------------------------------------------- 1 | target:background_query("select pg_sleep(120)") 2 | 3 | local activity_metric_exists = function() 4 | return ( 5 | metric_exists('pg.activity') 6 | and metric_exists('pg.activity.states') 7 | and metric_exists('pg.activity.waits') 8 | ) 9 | end 10 | 11 | run_plugin_test(120, activity_metric_exists) 12 | -------------------------------------------------------------------------------- /plugins/activity/waits.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'state', a.state::text, 5 | 'wait_event', a.wait_event::text, 6 | 'wait_event_type', a.wait_event_type::text, 7 | 'count', count(a.pid)::bigint 8 | ) as result 9 | from 10 | pg_catalog.pg_stat_activity a 11 | where 12 | state <> 'idle' 13 | and a.wait_event is not null 14 | group by 15 | a.wait_event, a.wait_event_type, a.state; -------------------------------------------------------------------------------- /plugins/alerts/bloat.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("bloat.sql") 2 | local key = "bloat" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not(result.rows[1] == nil) and not(result.rows[1][1] == nil) then 7 | local table_name, bloat = result.rows[1][1], result.rows[1][2] 8 | local jsonb = { 9 | host = host, 10 | key = key, 11 | created_at = get_last_created_at(host, key, unix_ts), 12 | custom_details ={table_name=table_name, bloat=bloat} 13 | } 14 | local jsonb, err = json.encode(jsonb) 15 | if err then error(err) end 16 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 17 | end 18 | end 19 | 20 | return check 21 | -------------------------------------------------------------------------------- /plugins/alerts/bloat.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | select 3 | snapshot, 4 | jsonb_array_elements(value_jsonb) as value_jsonb 5 | from 6 | metric 7 | where 8 | host = md5($1::text)::uuid 9 | and plugin = md5('pg.user_tables')::uuid 10 | and ts > ($2 - 10 * 60) 11 | and ts < $2 12 | ) 13 | select 14 | value_jsonb ->> 'full_table_name' as full_table_name, 15 | round( 16 | 100 * coalesce((value_jsonb ->> 'n_dead_tup')::float8, 0) / ( 17 | coalesce((value_jsonb ->> 'n_live_tup')::float8, 0) 18 | + coalesce((value_jsonb ->> 'n_dead_tup')::float8, 0) 19 | ) 20 | ) as bloat 21 | from 22 | data 23 | where 24 | coalesce((value_jsonb ->> 'n_dead_tup')::bigint, 0) > 0 25 | and (coalesce((value_jsonb ->> 'relpages')::bigint, 0) * 8 * 1024) > (256 * 1024 * 1024) 26 | and round( 27 | 100 * coalesce((value_jsonb ->> 'n_dead_tup')::float8, 0) / ( 28 | coalesce((value_jsonb ->> 'n_live_tup')::float8, 0) 29 | + coalesce((value_jsonb ->> 'n_dead_tup')::float8, 0) 30 | ) 31 | ) > 10 32 | order by 33 | 2 desc 34 | limit 1; -------------------------------------------------------------------------------- /plugins/alerts/errors.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("errors.sql") 2 | local key = "errors" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not (result.rows[1] == nil) and not (result.rows[1][1] == nil) then 7 | local percentile_90_rollbacks, percentile_90_conflicts = result.rows[1][1], result.rows[1][2] 8 | if (percentile_90_rollbacks > 500) or (percentile_90_conflicts > 100) then 9 | local jsonb = { 10 | host = host, 11 | key = key, 12 | created_at = get_last_created_at(host, key, unix_ts), 13 | custom_details = { 14 | percentile_90_rollbacks = percentile_90_rollbacks, 15 | percentile_90_conflicts = percentile_90_conflicts, 16 | } 17 | } 18 | local jsonb, err = json.encode(jsonb) 19 | if err then error(err) end 20 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 21 | end 22 | end 23 | end 24 | 25 | return check 26 | -------------------------------------------------------------------------------- /plugins/alerts/errors.sql: -------------------------------------------------------------------------------- 1 | with sum_errors as ( 2 | select 3 | ts as ts, 4 | sum(coalesce((value_jsonb ->> 'xact_rollback')::float8, 0)) as rollback, 5 | sum(coalesce((value_jsonb ->> 'conflicts')::float8, 0)) as conflicts 6 | from 7 | metric 8 | where 9 | host = md5($1::text)::uuid 10 | and plugin = md5('pg.databases')::uuid 11 | and ts > ($2 - (10 * 60)) 12 | and ts < $2 13 | group by ts 14 | order by ts desc 15 | ) 16 | select 17 | percentile_cont(0.9) within group (order by rollback asc) as rolback, 18 | percentile_cont(0.9) within group (order by conflicts asc) as conflicts 19 | from 20 | sum_errors; -------------------------------------------------------------------------------- /plugins/alerts/healthcheck.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("healthcheck.sql") 2 | local key = "healthcheck" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if (result.rows[1] == nil) or (result.rows[1][1] == nil) or 7 | math.abs(result.rows[1][2] - result.rows[1][1]) > 5 * 60 then 8 | local jsonb = { 9 | host = host, 10 | key = key, 11 | created_at = get_last_created_at(host, key, unix_ts) 12 | } 13 | local jsonb, err = json.encode(jsonb) 14 | if err then error(err) end 15 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 16 | end 17 | end 18 | 19 | return check 20 | -------------------------------------------------------------------------------- /plugins/alerts/healthcheck.sql: -------------------------------------------------------------------------------- 1 | select 2 | max(ts), 3 | extract(epoch from current_timestamp)::bigint 4 | from 5 | metric 6 | where 7 | host = md5($1::text)::uuid 8 | and plugin = md5('pg.healthcheck')::uuid 9 | and ts > ($2 - 10 * 60) 10 | and ts < $2; -------------------------------------------------------------------------------- /plugins/alerts/last_created_at.sql: -------------------------------------------------------------------------------- 1 | select 2 | coalesce((value_jsonb ->> 'created_at')::bigint, 0) as created_at 3 | from 4 | metric 5 | where 6 | host = md5($1::text)::uuid 7 | and plugin = md5('pg.alerts')::uuid 8 | and value_jsonb ->> 'key' = $2 9 | and ts > ($3 - (10 * 60)) 10 | and ts < $3 11 | order by 12 | 1 desc -------------------------------------------------------------------------------- /plugins/alerts/long_running_transactions.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("long_running_transactions.sql") 2 | local key = "long_running_transactions" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not (result.rows[1] == nil) and not (result.rows[1][1] == nil) then 7 | local info, err = json.decode(result.rows[1][1]) 8 | if err then error(err) end 9 | local jsonb = { 10 | host = host, 11 | key = key, 12 | created_at = get_last_created_at(host, key, unix_ts), 13 | custom_details = info, 14 | } 15 | local jsonb, err = json.encode(jsonb) 16 | if err then error(err) end 17 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 18 | end 19 | end 20 | 21 | return check 22 | -------------------------------------------------------------------------------- /plugins/alerts/long_running_transactions.sql: -------------------------------------------------------------------------------- 1 | select 2 | value_jsonb::text 3 | from 4 | metric 5 | where 6 | host = md5($1::text)::uuid 7 | and plugin = md5('pg.activity')::uuid 8 | and ts > ($2 - 10 * 60) 9 | and ts < $2 10 | and (value_jsonb ->> 'state_change_duration')::bigint > 20 * 60 11 | and (value_jsonb ->> 'backend_type' <> 'autovacuum worker') 12 | and not (value_jsonb ->> 'query' ~ '^autovacuum: VACUUM') 13 | limit 1; -------------------------------------------------------------------------------- /plugins/alerts/plugin.lua: -------------------------------------------------------------------------------- 1 | plugin_name = 'pg.alerts' 2 | local every = 60 3 | 4 | -- list of files 5 | local files, err = filepath.glob(filepath.join(plugin:dir(), "*.lua")) 6 | if err then error(err) end 7 | 8 | -- load checks 9 | local checks = {} 10 | for _, file in pairs(files) do 11 | if not(strings.has_suffix(file, "plugin.lua")) 12 | and not(strings.has_suffix(file, "test.lua")) then 13 | checks[file] = dofile(file) 14 | end 15 | end 16 | 17 | -- get last value 18 | local last_created_at_sql = read_file_in_plugin_dir("last_created_at.sql") 19 | function get_last_created_at(host, key, unix_ts) 20 | if storage:query(last_created_at_sql, host, key, unix_ts).rows[1] then 21 | return storage:query(last_created_at_sql, host, key, unix_ts).rows[1][1] 22 | end 23 | return unix_ts 24 | end 25 | 26 | -- collect function 27 | function collect() 28 | local unix_ts = get_unix_ts(storage) 29 | local result = storage:query("select name from host where not maintenance") 30 | for _, row in pairs(result.rows) do 31 | local host = row[1] 32 | for _, check in pairs(checks) do 33 | local status, err = pcall(check, host, unix_ts) 34 | if not status then 35 | plugin_log:printf("[ERROR] plugin '%s' on host '%s' error: %s\n", plugin:name(), plugin:host(), err) 36 | end 37 | end 38 | end 39 | end 40 | 41 | run_every(collect, every) -------------------------------------------------------------------------------- /plugins/alerts/replication_slots.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("replication_slots.sql") 2 | local key = "replication_slots" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not (result.rows[1] == nil) and not (result.rows[1][1] == nil) then 7 | local info, err = json.decode(result.rows[1][1]) 8 | if err then error(err) end 9 | -- calc max_size 10 | local max_size = 0 11 | for _, size in pairs(info) do 12 | if size > max_size then max_size = size end 13 | end 14 | if max_size > 1024 * 1024 * 1024 then 15 | -- humanize info 16 | local humanize_info = {} 17 | for slot_name, size in pairs(info) do 18 | local size_string = humanize.ibytes(size) 19 | humanize_info[slot_name] = size_string 20 | end 21 | local jsonb = { 22 | host = host, 23 | key = key, 24 | created_at = get_last_created_at(host, key, unix_ts), 25 | custom_details = humanize_info, 26 | } 27 | local jsonb, err = json.encode(jsonb) 28 | if err then error(err) end 29 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 30 | end 31 | end 32 | end 33 | 34 | return check -------------------------------------------------------------------------------- /plugins/alerts/replication_slots.sql: -------------------------------------------------------------------------------- 1 | select 2 | value_jsonb::text 3 | from 4 | metric 5 | where 6 | host = md5($1::text)::uuid 7 | and plugin = md5('pg.replication_slots')::uuid 8 | and ts > ($2 - 10 * 60) 9 | and ts < $2 10 | order by 11 | ts desc 12 | limit 1; -------------------------------------------------------------------------------- /plugins/alerts/sequences.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("sequences.sql") 2 | local key = "sequences" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not(result.rows[1] == nil) and not(result.rows[1][1] == nil) then 7 | local sequence_name, remaining_capacity = result.rows[1][1], result.rows[1][2] 8 | local jsonb = { 9 | host = host, 10 | key = key, 11 | created_at = get_last_created_at(host, key, unix_ts), 12 | custom_details ={sequence_name=sequence_name, remaining_capacity=remaining_capacity} 13 | } 14 | local jsonb, err = json.encode(jsonb) 15 | if err then error(err) end 16 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 17 | end 18 | end 19 | 20 | return check 21 | -------------------------------------------------------------------------------- /plugins/alerts/sequences.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | select 3 | snapshot, 4 | value_jsonb 5 | from 6 | metric 7 | where 8 | host = md5($1::text)::uuid 9 | and plugin = md5('pg.sequences')::uuid 10 | and ts > ($2 - 10 * 60) 11 | and ts < $2 12 | ) 13 | select (value_jsonb->'sequence_name')::text as full_sequence_name, (value_jsonb->'remaining_capacity')::text::float as remaining_capacity 14 | from data 15 | where (value_jsonb->'remaining_capacity')::text::float < 20.0 16 | -------------------------------------------------------------------------------- /plugins/alerts/test.lua: -------------------------------------------------------------------------------- 1 | function alerts_metric_exists() 2 | local sql_query = string.format([[ 3 | select 4 | count(*) 5 | from 6 | metric m 7 | where 8 | plugin = md5('pg.alerts')::uuid 9 | and ts > extract( epoch from (now()-'3 minute'::interval) ) 10 | and ts > (value_jsonb->'created_at')::bigint + 10 11 | ]]) 12 | local result = target:query(sql_query).rows[1] 13 | if result and result[1] then 14 | return result[1] > 0 15 | end 16 | return false 17 | end 18 | 19 | local timeout = 120 20 | tested_plugin:create() 21 | while timeout > 0 do 22 | if tested_plugin:error_count() > 0 then 23 | error(tested_plugin:last_error()) 24 | end 25 | if alerts_metric_exists() then 26 | tested_plugin:remove() 27 | return 28 | end 29 | time.sleep(1) 30 | timeout = timeout - 1 31 | end 32 | error("execution timeout") -------------------------------------------------------------------------------- /plugins/alerts/uptime.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("uptime.sql") 2 | local key = "checkpointer.uptime" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not (result.rows[1] == nil) and not (result.rows[1][1] == nil) then 7 | local uptime = result.rows[1][1] 8 | if uptime < 300 then 9 | local jsonb = { 10 | host = host, 11 | key = key, 12 | created_at = get_last_created_at(host, key, unix_ts), 13 | custom_details = { uptime = uptime } 14 | } 15 | local jsonb, err = json.encode(jsonb) 16 | if err then error(err) end 17 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 18 | end 19 | end 20 | end 21 | 22 | return check 23 | -------------------------------------------------------------------------------- /plugins/alerts/uptime.sql: -------------------------------------------------------------------------------- 1 | select 2 | min(value_bigint) as uptime 3 | from 4 | metric 5 | where 6 | host = md5($1::text)::uuid 7 | and plugin = md5('pg.uptime.checkpointer')::uuid 8 | and ts > ($2 - 20 * 60) 9 | and ts < $2; -------------------------------------------------------------------------------- /plugins/alerts/waits.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("waits.sql") 2 | local key = "waits" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not (result.rows[1] == nil) and not (result.rows[1][1] == nil) 7 | and result.rows[1][1] > 100 then 8 | local jsonb = { 9 | host = host, 10 | key = key, 11 | created_at = get_last_created_at(host, key, unix_ts), 12 | custom_details = { percentile_90 = result.rows[1][1] } 13 | } 14 | local jsonb, err = json.encode(jsonb) 15 | if err then error(err) end 16 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 17 | end 18 | end 19 | 20 | return check 21 | -------------------------------------------------------------------------------- /plugins/alerts/waits.sql: -------------------------------------------------------------------------------- 1 | with sum_waits as ( 2 | select 3 | ts as ts, 4 | sum(coalesce((value_jsonb ->> 'count')::bigint, 0)) as waits 5 | from 6 | metric 7 | where 8 | host = md5($1::text)::uuid 9 | and plugin = md5('pg.activity.waits')::uuid 10 | and ts > ($2 - (20 * 60)) 11 | and ts < $2 12 | and value_jsonb ->> 'state' <> 'idle in transaction' 13 | group by ts 14 | order by ts desc 15 | ) 16 | select 17 | percentile_cont(0.9) within group (order by waits asc) 18 | from 19 | sum_waits; -------------------------------------------------------------------------------- /plugins/alerts/wraparound.lua: -------------------------------------------------------------------------------- 1 | local sql = read_file_in_plugin_dir("wraparound.sql") 2 | local key = "wraparound" 3 | 4 | local function check(host, unix_ts) 5 | local result = storage:query(sql, host, unix_ts) 6 | if not (result.rows[1] == nil) and not (result.rows[1][1] == nil) then 7 | local database, age = result.rows[1][1], result.rows[1][2] 8 | local jsonb = { 9 | host = host, 10 | key = key, 11 | created_at = get_last_created_at(host, key, unix_ts), 12 | custom_details = { database=database, age=age } 13 | } 14 | local jsonb, err = json.encode(jsonb) 15 | if err then error(err) end 16 | storage:insert_metric({ host = host, plugin = plugin_name, json = jsonb }) 17 | end 18 | end 19 | 20 | return check 21 | -------------------------------------------------------------------------------- /plugins/alerts/wraparound.sql: -------------------------------------------------------------------------------- 1 | select 2 | value_jsonb ->> 'datname' as datname, 3 | coalesce((value_jsonb ->> 'age')::bigint, 0) as age 4 | from 5 | metric 6 | where 7 | host = md5($1::text)::uuid 8 | and plugin = md5('pg.databases')::uuid 9 | and ts > ($2 - 10 * 60) 10 | and ts < $2 11 | and coalesce((value_jsonb ->> 'age')::bigint, 0) > 300 * 1000 * 1000 12 | order by 13 | 2 desc 14 | limit 1; -------------------------------------------------------------------------------- /plugins/bgwriter/bgwriter.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'checkpoints_timed', checkpoints_timed, 5 | 'checkpoints_req', checkpoints_req, 6 | 'checkpoint_write_time', checkpoint_write_time, 7 | 'checkpoint_sync_time', checkpoint_sync_time, 8 | 'maxwritten_clean', maxwritten_clean, 9 | 'buffers_backend_fsync', buffers_backend_fsync, 10 | 'buffers_alloc', buffers_alloc, 11 | 'buffers_checkpoint', buffers_checkpoint, 12 | 'buffers_clean', buffers_clean, 13 | 'buffers_backend', buffers_backend 14 | ) as result 15 | from 16 | pg_catalog.pg_stat_bgwriter; -------------------------------------------------------------------------------- /plugins/bgwriter/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.bgwriter' 2 | local every = 60 3 | 4 | local sql_bgwriter = read_file_in_plugin_dir("bgwriter.sql") 5 | 6 | local function collect() 7 | local result = target:query(sql_bgwriter, every) 8 | for _, row in pairs(result.rows) do 9 | local jsonb, err = json.decode(row[2]) 10 | if err then error(err) end 11 | jsonb.checkpoints_timed = cache:diff_and_set("checkpoints_timed", jsonb.checkpoints_timed) 12 | jsonb.checkpoints_req = cache:diff_and_set("checkpoints_req", jsonb.checkpoints_req) 13 | jsonb.checkpoint_write_time = cache:diff_and_set("checkpoint_write_time", jsonb.checkpoint_write_time) 14 | jsonb.checkpoint_sync_time = cache:diff_and_set("checkpoint_sync_time", jsonb.checkpoint_sync_time) 15 | jsonb.maxwritten_clean = cache:diff_and_set("maxwritten_clean", jsonb.maxwritten_clean) 16 | jsonb.buffers_backend_fsync = cache:diff_and_set("buffers_backend_fsync", jsonb.buffers_backend_fsync) 17 | jsonb.buffers_alloc = cache:diff_and_set("buffers_alloc", jsonb.buffers_alloc) 18 | jsonb.buffers_checkpoint = cache:speed_and_set("buffers_checkpoint", jsonb.buffers_checkpoint) 19 | jsonb.buffers_clean = cache:speed_and_set("buffers_clean", jsonb.buffers_clean) 20 | jsonb.buffers_backend = cache:speed_and_set("buffers_backend", jsonb.buffers_backend) 21 | jsonb, err = json.encode(jsonb) 22 | if err then error(err) end 23 | storage_insert_metric({ plugin = plugin_name, snapshot = row[1], json = jsonb }) 24 | end 25 | end 26 | 27 | run_every(collect, every) 28 | -------------------------------------------------------------------------------- /plugins/bgwriter/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.bgwriter') end) 2 | -------------------------------------------------------------------------------- /plugins/block/block.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'database', blocked_activity.datname, 5 | 'blocked_query_id', md5(blocked_activity.query || blocked_activity.query_start::text)::UUID, 6 | 'blocked_pid', blocked_locks.pid, 7 | 'blocked_user', blocked_activity.usename, 8 | 'blocked_duration', extract(epoch from now() - blocked_activity.query_start)::int, 9 | 'blocking_query_id', md5(blocking_activity.query || blocking_activity.query_start::text)::UUID, 10 | 'blocking_pid', blocking_locks.pid, 11 | 'blocking_user', blocking_activity.usename, 12 | 'blocking_duration', extract(epoch from now() - blocking_activity.query_start)::int, 13 | 'blocked_statement', blocked_activity.query, 14 | 'current_statement_in_blocking_process', blocking_activity.query, 15 | 'blocked_application', blocked_activity.application_name, 16 | 'blocking_application', blocking_activity.application_name 17 | ) as result 18 | from 19 | pg_catalog.pg_locks blocked_locks 20 | join pg_catalog.pg_stat_activity blocked_activity on blocked_activity.pid = blocked_locks.pid 21 | join pg_catalog.pg_locks blocking_locks on blocking_locks.locktype = blocked_locks.locktype 22 | and blocking_locks.database is not distinct from blocked_locks.database 23 | and blocking_locks.relation is not distinct from blocked_locks.relation 24 | and blocking_locks.page is not distinct from blocked_locks.page 25 | and blocking_locks.tuple is not distinct from blocked_locks.tuple 26 | and blocking_locks.virtualxid is not distinct from blocked_locks.virtualxid 27 | and blocking_locks.transactionid is not distinct from blocked_locks.transactionid 28 | and blocking_locks.classid is not distinct from blocked_locks.classid 29 | and blocking_locks.objid is not distinct from blocked_locks.objid 30 | and blocking_locks.objsubid is not distinct from blocked_locks.objsubid 31 | and blocking_locks.pid != blocked_locks.pid 32 | join pg_catalog.pg_stat_activity blocking_activity on blocking_activity.pid = blocking_locks.pid 33 | where 34 | not blocked_locks.granted; -------------------------------------------------------------------------------- /plugins/block/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.block' 2 | local every = 15 3 | 4 | local sql_block = read_file_in_plugin_dir("block.sql") 5 | 6 | local function collect() 7 | local result = target:query(sql_block, every) 8 | for _, row in pairs(result.rows) do 9 | storage_insert_metric({ plugin = plugin_name, snapshot = row[1], json = row[2] }) 10 | end 11 | end 12 | 13 | run_every(collect, every) 14 | -------------------------------------------------------------------------------- /plugins/block/test.lua: -------------------------------------------------------------------------------- 1 | target:background_query("select pg_advisory_xact_lock(1), pg_sleep(10);") 2 | target:background_query("select pg_advisory_xact_lock(1), pg_sleep(10);") 3 | 4 | run_plugin_test(120, function() return metric_exists('pg.block') end) 5 | -------------------------------------------------------------------------------- /plugins/btree_bloat/btree_bloat.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'datname', current_database(), 5 | 'schemaname', nspname, 6 | 'tablename', tblname, 7 | 'indexname', idxname, 8 | 'bloat_ratio', 100 * (relpages-est_pages_ff)::float / relpages, 9 | 'extra_size', bs*(relpages-est_pages)::bigint, 10 | 'extra_ratio', 100 * (relpages-est_pages)::float / relpages, 11 | 'fillfactor', fillfactor, 12 | 'is_na', is_na, 13 | 'bloat_size', CASE WHEN relpages > est_pages_ff THEN bs*(relpages-est_pages_ff) ELSE 0 END 14 | ) as result 15 | FROM ( 16 | SELECT coalesce(1 + 17 | ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth) 18 | ) AS est_pages, 19 | coalesce(1 + 20 | ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0 21 | ) AS est_pages_ff, 22 | bs, nspname, tblname, idxname, relpages, fillfactor, is_na 23 | -- , pgstatindex(idxoid) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO) 24 | FROM ( 25 | SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor, 26 | ( index_tuple_hdr_bm + 27 | maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN 28 | WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign 29 | ELSE index_tuple_hdr_bm%maxalign 30 | END 31 | + nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN 32 | WHEN nulldatawidth = 0 THEN 0 33 | WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign 34 | ELSE nulldatawidth::integer%maxalign 35 | END 36 | )::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na 37 | -- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO) 38 | FROM ( 39 | SELECT n.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, 40 | i.idxoid, i.fillfactor, current_setting('block_size')::numeric AS bs, 41 | CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?) 42 | WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8 43 | ELSE 4 44 | END AS maxalign, 45 | /* per page header, fixed size: 20 for 7.X, 24 for others */ 46 | 24 AS pagehdr, 47 | /* per page btree opaque data */ 48 | 16 AS pageopqdata, 49 | /* per tuple header: add IndexAttributeBitMapData if some cols are null-able */ 50 | CASE WHEN max(coalesce(s.null_frac,0)) = 0 51 | THEN 2 -- IndexTupleData size 52 | ELSE 2 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8) 53 | END AS index_tuple_hdr_bm, 54 | /* data len: we remove null values save space using it fractionnal part from stats */ 55 | sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS nulldatawidth, 56 | max( CASE WHEN i.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na 57 | FROM ( 58 | SELECT ct.relname AS tblname, ct.relnamespace, ic.idxname, ic.attpos, ic.indkey, ic.indkey[ic.attpos], ic.reltuples, ic.relpages, ic.tbloid, ic.idxoid, ic.fillfactor, 59 | coalesce(a1.attnum, a2.attnum) AS attnum, coalesce(a1.attname, a2.attname) AS attname, coalesce(a1.atttypid, a2.atttypid) AS atttypid, 60 | CASE WHEN a1.attnum IS NULL 61 | THEN ic.idxname 62 | ELSE ct.relname 63 | END AS attrelname 64 | FROM ( 65 | SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey, 66 | pg_catalog.generate_series(1,indnatts) AS attpos 67 | FROM ( 68 | SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid, 69 | i.indexrelid AS idxoid, 70 | coalesce(substring( 71 | array_to_string(ci.reloptions, ' ') 72 | from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor, 73 | i.indnatts, 74 | pg_catalog.string_to_array(pg_catalog.textin( 75 | pg_catalog.int2vectorout(i.indkey)),' ')::int[] AS indkey 76 | FROM pg_catalog.pg_index i 77 | JOIN pg_catalog.pg_class ci ON ci.oid = i.indexrelid 78 | WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree') 79 | AND ci.relpages > 0 80 | ) AS idx_data 81 | ) AS ic 82 | JOIN pg_catalog.pg_class ct ON ct.oid = ic.tbloid 83 | LEFT JOIN pg_catalog.pg_attribute a1 ON 84 | ic.indkey[ic.attpos] <> 0 85 | AND a1.attrelid = ic.tbloid 86 | AND a1.attnum = ic.indkey[ic.attpos] 87 | LEFT JOIN pg_catalog.pg_attribute a2 ON 88 | ic.indkey[ic.attpos] = 0 89 | AND a2.attrelid = ic.idxoid 90 | AND a2.attnum = ic.attpos 91 | ) i 92 | JOIN pg_catalog.pg_namespace n ON n.oid = i.relnamespace 93 | JOIN pg_catalog.pg_stats s ON s.schemaname = n.nspname 94 | AND s.tablename = i.attrelname 95 | AND s.attname = i.attname 96 | GROUP BY 1,2,3,4,5,6,7,8,9,10,11 97 | ) AS rows_data_stats 98 | ) AS rows_hdr_pdg_stats 99 | ) AS relation_stats 100 | -------------------------------------------------------------------------------- /plugins/btree_bloat/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.btree_bloat' 2 | local every = 60 3 | 4 | local sql_btree_bloat = read_file_in_plugin_dir("btree_bloat.sql") 5 | 6 | local function collect() 7 | local result = target:query(sql_btree_bloat, 60) 8 | for _, row in pairs(result.rows) do 9 | storage_insert_metric({ plugin = plugin_name, snapshot = row[1], json = row[2] }) 10 | end 11 | end 12 | 13 | run_every(collect, every) 14 | -------------------------------------------------------------------------------- /plugins/btree_bloat/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.btree_bloat') end) 2 | -------------------------------------------------------------------------------- /plugins/buffercache/buffercache.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'current_database', current_database(), 5 | 'full_relation_name', current_database() || '.' || n.nspname || '.' || c.relname, 6 | 'relation_type', case c.relkind 7 | when 'r' then 'table' 8 | when 'v' then 'view' 9 | when 'm' then 'materialized view' 10 | when 'i' then 'index' 11 | when 'S' then 'sequence' 12 | when 's' then 'special' 13 | when 'f' then 'foreign table' 14 | when 'p' then 'table' 15 | when 'I' then 'index' 16 | end, 17 | 'buffers', count(b.*), 18 | 'usagecount', b.usagecount, 19 | 'dirty', b.isdirty 20 | ) as result 21 | from 22 | pg_buffercache b 23 | inner join pg_catalog.pg_database d on d.oid = b.reldatabase and d.datname = current_database() 24 | left join pg_catalog.pg_class c on b.relfilenode = pg_relation_filenode(c.oid) 25 | left join pg_catalog.pg_namespace n on n.oid = c.relnamespace 26 | group by 27 | c.relname, n.nspname, b.usagecount, b.isdirty, c.relkind -------------------------------------------------------------------------------- /plugins/buffercache/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.buffercache' 2 | local every = 300 3 | 4 | local sql_buffercache = read_file_in_plugin_dir("buffercache.sql") 5 | 6 | local snapshot = nil 7 | 8 | local function collect_for_db(conn) 9 | local result = conn:query(sql_buffercache, every) 10 | local per_relation_stat = {} 11 | --[===[ 12 | convert from: 13 | {"full_relation_name" = "x", buffers = 1, usage_count = 3, dirty = true}, 14 | {"full_relation_name" = "x", buffers = 1, usage_count = 3, dirty = false}, 15 | {"full_relation_name" = "x", buffers = 1, usage_count = 5, dirty = false}, 16 | {"full_relation_name" = "x", buffers = 2, usage_count = 2, dirty = false}, 17 | to: 18 | {"full_relation_name" = "x", buffers = 5, usage_count_3 = 3, usage_count_0 = 0, dirty_count = 1} 19 | --]===] 20 | local database_state = { 21 | datname = nil, 22 | buffers_count = 0, 23 | dirty_count = 0, 24 | usage_count_0 = 0, 25 | usage_count_3 = 3, 26 | } 27 | for _, row in pairs(result.rows) do 28 | 29 | if not snapshot then snapshot = row[1] end 30 | 31 | local jsonb, err = json.decode(row[2]) 32 | if err then error(err) end 33 | 34 | database_state.datname = database_state.datname or jsonb.current_database 35 | local relation = jsonb.full_relation_name 36 | local buffers = tonumber(jsonb.buffers) or 0 37 | local usage_count = tonumber(jsonb.usagecount) or 0 38 | local is_dirty = jsonb.dirty 39 | 40 | -- calc database_state 41 | database_state.buffers_count = database_state.buffers_count + buffers 42 | if is_dirty then 43 | database_state.dirty_count = database_state.dirty_count + buffers 44 | end 45 | if usage_count == 0 then 46 | database_state.usage_count_0 = database_state.usage_count_0 + buffers 47 | end 48 | if usage_count >= 3 then 49 | database_state.usage_count_3 = database_state.usage_count_3 + buffers 50 | end 51 | 52 | if relation then 53 | if per_relation_stat[relation] == nil then 54 | per_relation_stat[relation] = { 55 | full_relation_name = relation, 56 | buffers = 0, 57 | dirty_count = 0, 58 | usage_count_0 = 0, 59 | usage_count_3 = 3, 60 | } 61 | end 62 | per_relation_stat[relation].buffers = per_relation_stat[relation].buffers + buffers 63 | if is_dirty then 64 | per_relation_stat[relation].dirty_count = per_relation_stat[relation].dirty_count + buffers 65 | end 66 | if usage_count == 0 then 67 | per_relation_stat[relation].usage_count_0 = per_relation_stat[relation].usage_count_0 + buffers 68 | end 69 | if usage_count >= 3 then 70 | per_relation_stat[relation].usage_count_3 = per_relation_stat[relation].usage_count_3 + buffers 71 | end 72 | end 73 | end 74 | local jsonb = database_state 75 | jsonb.per_relation_stat = per_relation_stat 76 | local jsonb, err = json.encode(jsonb) 77 | if err then error(err) end 78 | storage_insert_metric({ plugin = plugin_name, snapshot = snapshot, json = jsonb }) 79 | end 80 | 81 | local function collect() 82 | for _, conn in pairs(target:available_connections()) do 83 | if extension_present(conn, 'pg_buffercache') then 84 | collect_for_db(conn) 85 | end 86 | end 87 | end 88 | 89 | run_every(collect, every) 90 | -------------------------------------------------------------------------------- /plugins/buffercache/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.buffercache') end) -------------------------------------------------------------------------------- /plugins/cloudwatch/plugin.lua: -------------------------------------------------------------------------------- 1 | local cloudwatch = require("cloudwatch") 2 | local plugin_name = 'pg.cloudwatch' 3 | local every = 5 * 60 4 | local last_time = time.unix() - every - 60 5 | local metrics = { 6 | burst_balance = "BurstBalance", cpu_credit_balance = "CPUCreditBalance", 7 | cpu_credit_usage = "CPUCreditUsage", cpu_surplus_credit_balance = "CPUSurplusCreditBalance", 8 | cpu_surplus_credits_charged = "CPUSurplusCreditsCharged", 9 | cpu_utilization = "CPUUtilization", 10 | db_load = "DBLoad", db_load_cpu = "DBLoadCPU", 11 | db_load_non_cpu = "DBLoadNonCPU", 12 | disk_queue_depth = "DiskQueueDepth", 13 | free_storage_space = "FreeStorageSpace", 14 | freeable_memory = "FreeableMemory", 15 | network_receive_throughput = "NetworkReceiveThroughput", network_transmit_throughput = "NetworkTransmitThroughput", 16 | read_iops = "ReadIOPS", read_latency = "ReadLatency", read_throudhput = "ReadThroughput", 17 | replication_slot_disk_usage = "ReplicationSlotDiskUsage", 18 | transaction_logs_disk_usage = "TransactionLogsDiskUsage", 19 | swap_usage = "SwapUsage", 20 | write_iops = "WriteIOPS", write_latency = "WriteLatency", write_throughput = "WriteThroughput", 21 | } 22 | 23 | local clw, err = cloudwatch.new() 24 | if err then error(err) end 25 | 26 | local queries = {} 27 | for name, metric in pairs(metrics) do 28 | queries[name] = { 29 | namespace = "AWS/RDS", 30 | metric = metric, 31 | dimension_name = "DBInstanceIdentifier", 32 | dimension_value = plugin:host(), 33 | stat = "Average", 34 | period = 60, 35 | } 36 | end 37 | 38 | function collect() 39 | local end_time = time.unix() - 60 40 | local result, err = clw:get_metric_data({ 41 | start_time = last_time, 42 | end_time = end_time, 43 | queries = queries, 44 | }) 45 | if err then error(err) end 46 | for name, time_value in pairs(result) do 47 | for t, v in pairs(time_value) do 48 | -- print(name, t, v) 49 | storage_insert_metric({ plugin = plugin_name .. "." .. name, snapshot = t, float = v }) 50 | end 51 | end 52 | last_time = end_time 53 | end 54 | 55 | run_every(collect, every) -------------------------------------------------------------------------------- /plugins/databases/databases.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'datname', d.datname::text, 5 | 'size', pg_catalog.pg_database_size(d.datname)::bigint, 6 | 'age', age(s.datfrozenxid)::bigint, 7 | 'numbackends', d.numbackends, 8 | 'xact_commit', d.xact_commit, 9 | 'xact_rollback', d.xact_rollback, 10 | 'blks_read', d.blks_read, 11 | 'blks_hit', d.blks_hit, 12 | 'tup_returned', d.tup_returned, 13 | 'tup_fetched', d.tup_fetched, 14 | 'tup_inserted', d.tup_inserted, 15 | 'tup_updated', d.tup_updated, 16 | 'tup_deleted', d.tup_deleted, 17 | 'conflicts', d.conflicts, 18 | 'temp_files', d.temp_files, 19 | 'temp_bytes', d.temp_bytes, 20 | 'deadlocks', d.deadlocks, 21 | 'blk_read_time', d.blk_read_time, 22 | 'blk_write_time', d.blk_write_time 23 | ) as result 24 | from 25 | pg_catalog.pg_stat_database d 26 | inner join pg_catalog.pg_database s on s.datname = d.datname; -------------------------------------------------------------------------------- /plugins/databases/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.databases' 2 | local every = 60 3 | 4 | local sql_databases = read_file_in_plugin_dir("databases.sql") 5 | 6 | local function collect() 7 | local result = target:query(sql_databases, 60) 8 | for _, row in pairs(result.rows) do 9 | local jsonb, err = json.decode(row[2]) 10 | if err then error(err) end 11 | local database = jsonb.datname 12 | jsonb.xact_commit = cache:speed_and_set(database .. ".xact_commit", jsonb.xact_commit) 13 | jsonb.xact_rollback = cache:speed_and_set(database .. ".xact_rollback", jsonb.xact_rollback) 14 | jsonb.blks_read = cache:speed_and_set(database .. ".blks_read", jsonb.blks_read) 15 | jsonb.blks_hit = cache:speed_and_set(database .. ".blks_hit", jsonb.blks_hit) 16 | jsonb.tup_returned = cache:speed_and_set(database .. ".tup_returned", jsonb.tup_returned) 17 | jsonb.tup_fetched = cache:speed_and_set(database .. ".tup_fetched", jsonb.tup_fetched) 18 | jsonb.tup_inserted = cache:speed_and_set(database .. ".tup_inserted", jsonb.tup_inserted) 19 | jsonb.tup_updated = cache:speed_and_set(database .. ".tup_updated", jsonb.tup_updated) 20 | jsonb.tup_deleted = cache:speed_and_set(database .. ".tup_deleted", jsonb.tup_deleted) 21 | jsonb.conflicts = cache:speed_and_set(database .. ".conflicts", jsonb.conflicts) 22 | jsonb.temp_files = cache:diff_and_set(database .. ".temp_files", jsonb.temp_files) 23 | jsonb.temp_bytes = cache:speed_and_set(database .. ".temp_bytes", jsonb.temp_bytes) 24 | jsonb.deadlocks = cache:speed_and_set(database .. ".deadlocks", jsonb.deadlocks) 25 | jsonb.blk_read_time = cache:diff_and_set(database .. ".blk_read_time", jsonb.blk_read_time) 26 | jsonb.blk_write_time = cache:diff_and_set(database .. ".blk_write_time", jsonb.blk_write_time) 27 | 28 | local jsonb, err = json.encode(jsonb) 29 | if err then error(err) end 30 | storage_insert_metric({ plugin = plugin_name, snapshot = row[1], json = jsonb }) 31 | end 32 | end 33 | 34 | run_every(collect, every) 35 | -------------------------------------------------------------------------------- /plugins/databases/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.databases') end) 2 | -------------------------------------------------------------------------------- /plugins/healthcheck/healthcheck.sql: -------------------------------------------------------------------------------- 1 | select extract(epoch from now())::int - (extract(epoch from now())::int % $1); -------------------------------------------------------------------------------- /plugins/healthcheck/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.healthcheck' 2 | local every = 60 3 | 4 | local sql_healthcheck = read_file_in_plugin_dir("healthcheck.sql") 5 | 6 | local function collect() 7 | local result = target:query(sql_healthcheck, every) 8 | storage_insert_metric({ plugin = plugin_name, snapshot = result.rows[1][1], int = result.rows[1][1] }) 9 | end 10 | 11 | run_every(collect, every) 12 | -------------------------------------------------------------------------------- /plugins/healthcheck/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.healthcheck') end) -------------------------------------------------------------------------------- /plugins/init.lua: -------------------------------------------------------------------------------- 1 | -- this file loaded on first start of plugin 2 | 3 | filepath = require("filepath") 4 | time = require("time") 5 | inspect = require("inspect") 6 | json = require("json") 7 | ioutil = require("ioutil") 8 | crypto = require("crypto") 9 | goos = require("goos") 10 | log = require("log") 11 | humanize = require("humanize") 12 | strings = require("strings") 13 | 14 | plugin_log = log.new() 15 | plugin_log:set_flags({ date = true, time = true }) 16 | 17 | -- current directory (root) 18 | root = filepath.dir(debug.getinfo(1).source) 19 | 20 | HOST_DEV_DIR = os.getenv('HOST_DEV_DIR') or '/dev' 21 | HOST_SYS_DIR = os.getenv('HOST_SYS_DIR') or '/sys' 22 | HOST_PROC_DIR = os.getenv('HOST_PROC_DIR') or '/proc' 23 | 24 | -- read file in plugin dir 25 | function read_file_in_plugin_dir(filename) 26 | local data, err = ioutil.read_file(filepath.join(plugin:dir(), filename)) 27 | if err then error(err) end 28 | return data 29 | end 30 | 31 | -- return true if database hosted on rds 32 | function is_rds() 33 | return not (not ( 34 | pcall(function() 35 | target:query("show rds.extensions") 36 | end) 37 | )) 38 | end 39 | 40 | -- return unix ts from connection 41 | function get_unix_ts(conn, ts) 42 | conn = conn or target 43 | ts = ts or 1 44 | return conn:query("select extract(epoch from now())::int - (extract(epoch from now())::int % $1)", ts).rows[1][1] 45 | end 46 | 47 | -- insert metric with plugin:host() 48 | local count_empty_metrics = 0 49 | function storage_insert_metric(metric) 50 | if not (metric.host) then metric.host = plugin:host() end 51 | if (metric.int == nil) and (metric.float == nil) and not (metric.json == nil) then 52 | local jsonb, err = json.decode(metric.json) 53 | if err then error(err) end 54 | if next(jsonb) == nil then 55 | count_empty_metrics = count_empty_metrics + 1 56 | if (count_empty_metrics % 10) == 0 then 57 | plugin_log:printf("[ERROR] plugin '%s' on host '%s': empty metric (%d times)\n", plugin:name(), plugin:host(), count_empty_metrics) 58 | end 59 | return 60 | end 61 | end 62 | storage:insert_metric(metric) 63 | end 64 | 65 | -- return postgresql version 66 | function get_pg_server_version() 67 | if pg_server_version then return pg_server_version end 68 | local version = target:query("show server_version") 69 | pg_server_version = tonumber(version.rows[1][1]) 70 | return pg_server_version 71 | end 72 | 73 | -- return in pg_in_recovery 74 | function get_pg_is_in_recovery() 75 | local pg_is_in_recovery = target:query("select pg_catalog.pg_is_in_recovery()") 76 | return pg_is_in_recovery.rows[1][1] 77 | end 78 | 79 | -- return true if extension installed 80 | function extension_present(conn, extname) 81 | local extension = conn:query("select count(extname) from pg_catalog.pg_extension where extname = $1", extname) 82 | return (extension.rows[1][1] == 1) 83 | end 84 | 85 | -- prometheus_gauge:set() 86 | function gauge_set(name, value, labels) 87 | local value = tonumber(value) 88 | if (value == nil) then return end 89 | local labels = labels or {} 90 | if (labels.host == nil) then labels.host = plugin:host() end 91 | local label_keys = {}; for k, _ in ipairs(labels) do table.insert(label_keys, k) end 92 | local gauge, err = prometheus_gauge({ 93 | namespace = "pg", 94 | subsystem = "gatherer", 95 | name = name, 96 | labels = label_keys 97 | }) 98 | if err then error(err) end 99 | gauge:set(value, labels) 100 | end 101 | 102 | -- run function f every sec 103 | -- this function run in plugin context, then we use cache key `last_run` 104 | function run_every(f, every) 105 | while true do 106 | local last_run_at = cache:get("last_run") or 0 107 | if time.unix() >= last_run_at + every then 108 | local start_at = time.unix() 109 | cache:set("last_run", start_at) 110 | f() 111 | local exec_time = (time.unix() - start_at) 112 | if exec_time > every then 113 | plugin_log:printf("[ERROR] plugin '%s' on host '%s' execution timeout: %.2f s\n", plugin:name(), plugin:host(), exec_time) 114 | time.sleep(1) 115 | else 116 | if exec_time > 1 then 117 | plugin_log:printf("[INFO] plugin '%s' on host '%s' execution time: %.2f s\n", plugin:name(), plugin:host(), exec_time) 118 | end 119 | end 120 | else 121 | -- wait random seconds, for decrease CPU spikes ((0..every)/10 + 1)s 122 | local rand = tonumber(string.format("%.0f", every / 10) +1) 123 | time.sleep(rand) 124 | end 125 | end 126 | end 127 | 128 | -- wait random seconds, for decrease CPU spikes (0-1s) 129 | time.sleep(math.random(100)/100) 130 | -------------------------------------------------------------------------------- /plugins/init.test.lua: -------------------------------------------------------------------------------- 1 | -- load init.lua 2 | local filepath = require("filepath") 3 | local root = filepath.dir(debug.getinfo(1).source) 4 | dofile(filepath.join(root, "init.lua")) 5 | 6 | function metric_exists(metric) 7 | local sql_query = string.format([[ 8 | select 9 | count(*) 10 | from 11 | metric m 12 | where 13 | plugin = md5('%s')::uuid 14 | and host = md5('%s')::uuid 15 | and ts > extract( epoch from (now()-'3 minute'::interval) ) 16 | and (value_jsonb::text <> '{}' or value_jsonb is null) 17 | ]], metric, tested_plugin:host()) 18 | local result = target:query(sql_query).rows[1] 19 | if result and result[1] then 20 | return result[1] > 0 21 | end 22 | return false 23 | end 24 | 25 | function plugin_check_error() 26 | if tested_plugin:error_count() > 0 then 27 | error(tested_plugin:last_error()) 28 | end 29 | end 30 | 31 | function run_plugin_test(timeout, success_exit_function, check_error_function) 32 | check_error_function = check_error_function or plugin_check_error 33 | success_exit_function = success_exit_function or function() return false end 34 | tested_plugin:create() 35 | timeout = timeout or 120 36 | while timeout > 0 do 37 | check_error_function() 38 | if success_exit_function() then 39 | tested_plugin:remove() 40 | return 41 | end 42 | time.sleep(1) 43 | timeout = timeout - 1 44 | end 45 | tested_plugin:remove() 46 | error("execution timeout") 47 | end 48 | -------------------------------------------------------------------------------- /plugins/linux_cpu/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'linux.cpu' 2 | local every = 60 3 | 4 | -- read line from /proc/stat 5 | local function read_cpu_values(str) 6 | -- https://www.kernel.org/doc/Documentation/filesystems/proc.txt 7 | local fields = { "user", "nice", "system", "idle", "iowait", "irq", "softirq", "steal", "guest", "guest_nice" } 8 | local row, offset = {}, 1 9 | for value in str:gmatch("(%d+)") do 10 | row[fields[offset]] = tonumber(value) 11 | offset = offset + 1 12 | end 13 | return row 14 | end 15 | 16 | local function collect() 17 | for line in io.lines(HOST_PROC_DIR .. "/stat") do 18 | 19 | -- all cpu 20 | local cpu_all_line = line:match("^cpu%s+(.*)") 21 | if cpu_all_line then 22 | local cpu_all_values = read_cpu_values(cpu_all_line) 23 | local jsonb = {} 24 | for key, value in pairs(cpu_all_values) do 25 | jsonb[key] = cache:speed_and_set(key, value) 26 | end 27 | local jsonb, err = json.encode(jsonb) 28 | if err then error(err) end 29 | storage_insert_metric({ plugin = plugin_name, json = jsonb }) 30 | end 31 | 32 | -- running, blocked 33 | local processes = line:match("^procs_(.*)") 34 | if processes then 35 | local key, val = string.match(processes, "^(%S+)%s+(%d+)") 36 | storage_insert_metric({ plugin = plugin_name .. "." .. key, int = tonumber(val) }) 37 | end 38 | 39 | -- context switching 40 | local ctxt = line:match("^ctxt (%d+)") 41 | if ctxt then 42 | local diff = cache:speed_and_set("ctxt", tonumber(ctxt)) 43 | if diff then storage_insert_metric({ plugin = plugin_name .. ".ctxt", float = diff }) end 44 | end 45 | 46 | -- fork rate 47 | local processes = line:match("^processes (%d+)") 48 | if processes then 49 | local diff = cache:speed_and_set("processes", tonumber(processes)) 50 | if diff then storage_insert_metric({ plugin = plugin_name .. ".fork_rate", float = diff }) end 51 | end 52 | 53 | -- interrupts 54 | local intr = line:match("^intr (%d+)") 55 | if intr then 56 | local diff = cache:speed_and_set("intr", tonumber(intr)) 57 | if diff then storage_insert_metric({ plugin = plugin_name .. ".intr", float = diff }) end 58 | end 59 | 60 | end 61 | end 62 | 63 | run_every(collect, every) 64 | -------------------------------------------------------------------------------- /plugins/linux_cpu/test.lua: -------------------------------------------------------------------------------- 1 | if not goos.stat(HOST_PROC_DIR..'/stat') then 2 | print('disabled plugin, because /proc/stat not found') 3 | return 4 | end 5 | 6 | run_plugin_test(120, function() return metric_exists('linux.cpu') end) -------------------------------------------------------------------------------- /plugins/linux_diskstats/helper_disk_stat.lua: -------------------------------------------------------------------------------- 1 | local filepath = require('filepath') 2 | local ioutil = require('ioutil') 3 | local helper = {} 4 | 5 | local function read_diskstat() 6 | local result = {} 7 | -- https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats 8 | local pattern = "(%d+)%s+(%d+)%s+(%S+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)%s+(%d+)" 9 | local data, err = ioutil.read_file(HOST_PROC_DIR .. "/diskstats") 10 | if err then error(err) end 11 | for _, line in pairs(strings.split(data, "\n")) do 12 | local major, minor, dev_name, 13 | rd_ios, rd_merges_or_rd_sec, rd_sec_or_wr_ios, rd_ticks_or_wr_sec, 14 | wr_ios, wr_merges, wr_sec, wr_ticks, ios_pgr, tot_ticks, rq_ticks = line:match(pattern) 15 | if dev_name then 16 | result[dev_name] = { 17 | major = tonumber(major), minor = tonumber(minor), 18 | rd_ios = tonumber(rd_ios), rd_merges_or_rd_sec = tonumber(rd_merges_or_rd_sec), 19 | rd_sec_or_wr_ios = tonumber(rd_sec_or_wr_ios), rd_ticks_or_wr_sec = tonumber(rd_ticks_or_wr_sec), 20 | wr_ios = tonumber(wr_ios), wr_merges = tonumber(wr_merges), 21 | wr_sec = tonumber(wr_sec), wr_ticks = tonumber(wr_ticks), 22 | ios_pgr = tonumber(ios_pgr), tot_ticks = tonumber(tot_ticks), 23 | rq_ticks = tonumber(rq_ticks) 24 | } 25 | end 26 | end 27 | return result 28 | end 29 | helper.read_diskstat = read_diskstat 30 | 31 | -- /dev/sda => mountpoint, /dev/mapper/vg0-lv_slashroot => / 32 | -- мы ищем только прямое совпадение! 33 | local function get_mountpoint_from_mounts(full_dev_name) 34 | for line in io.lines(HOST_PROC_DIR .. "/mounts") do 35 | local reg_full_dev_name = full_dev_name:gsub("%-", "%S") 36 | local mountpoint = line:match("^" .. reg_full_dev_name .. "%s+(%S+)%s+") 37 | if mountpoint then return mountpoint end 38 | end 39 | end 40 | 41 | -- sdXD => mountpoint 42 | local function sd_mountpoint(sdX) 43 | return get_mountpoint_from_mounts(HOST_DEV_DIR .. "/" .. sdX) 44 | end 45 | 46 | -- dm-X => mountpoint 47 | local function dm_mountpoint(dmX) 48 | local name = ioutil.read_file(HOST_SYS_DIR .. "/block/" .. dmX .. "/dm/name"):gsub("^%s+", ""):gsub("%s+$", "") 49 | if not name then return nil end 50 | return get_mountpoint_from_mounts(HOST_DEV_DIR .. "/mapper/" .. name) 51 | end 52 | 53 | -- mdX => mountpoint 54 | local function md_mountpoint(mdX) 55 | return get_mountpoint_from_mounts(HOST_DEV_DIR .. "/" .. mdX) 56 | end 57 | 58 | -- sd, md, dm => mountpoint 59 | local function get_mountpoint_by_dev(dev) 60 | if dev:match("^sd") then return sd_mountpoint(dev) end 61 | if dev:match("^nvme") then return sd_mountpoint(dev) end 62 | if dev:match("^xvd") then return sd_mountpoint(dev) end 63 | if dev:match("^dm") then return dm_mountpoint(dev) end 64 | if dev:match("^md") then return md_mountpoint(dev) end 65 | end 66 | helper.get_mountpoint_by_dev = get_mountpoint_by_dev 67 | 68 | local function md_device_sizes(mdX) 69 | local result = {} 70 | for _, path in pairs(filepath.glob(HOST_SYS_DIR .. "/block/" .. mdX .. "/slaves/*")) do 71 | local dev = path:match(HOST_SYS_DIR .. "/block/" .. mdX .. "/slaves/(%S+)$") 72 | result[dev] = tonumber(ioutil.read_file(path .. "/size")) 73 | end 74 | return result 75 | end 76 | helper.md_device_sizes = md_device_sizes 77 | 78 | -- mdX => raid0, raid1, ... 79 | local function md_level(mdX) 80 | local data = ioutil.read_file(HOST_SYS_DIR .. "/block/" .. mdX .. "/md/level") 81 | if data then 82 | return data:gsub("%s+$", "") 83 | else 84 | return nil 85 | end 86 | end 87 | helper.md_level = md_level 88 | 89 | helper.calc_values = {} 90 | local function calc_value(dev, values) 91 | if helper.calc_values[dev] == nil then helper.calc_values[dev] = {} end 92 | if helper.calc_values[dev]["data"] == nil then helper.calc_values[dev]["data"] = {} end 93 | -- first run 94 | if helper.calc_values[dev]["data"]["previous"] == nil then 95 | helper.calc_values[dev]["data"]["previous"] = values; 96 | return ; 97 | end 98 | 99 | local previous, current = helper.calc_values[dev]["data"]["previous"], values 100 | 101 | -- await https://github.com/sysstat/sysstat/blob/v11.5.6/common.c#L816 102 | local ticks = ((current.rd_ticks_or_wr_sec - previous.rd_ticks_or_wr_sec) + (current.wr_ticks - previous.wr_ticks)) 103 | local io_sec = (current.rd_ios + current.wr_ios) - (previous.rd_ios + previous.wr_ios) 104 | if (io_sec > 0) and (ticks > 0) then helper.calc_values[dev]["await"] = ticks / io_sec end 105 | if (io_sec == 0) or (ticks == 0) then helper.calc_values[dev]["await"] = 0 end 106 | 107 | -- save 108 | helper.calc_values[dev]["data"]["previous"] = values 109 | end 110 | helper.calc_value = calc_value 111 | 112 | return helper 113 | -------------------------------------------------------------------------------- /plugins/linux_diskstats/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'linux.diskstats' 2 | local every = 10 3 | 4 | if not goos.stat(HOST_PROC_DIR .. '/diskstats') then 5 | print('disabled diskstats plugin, because /proc/diskstats not found') 6 | while true do 7 | time.sleep(1) 8 | end 9 | end 10 | 11 | local helper = dofile(filepath.join(plugin:dir(), "helper_disk_stat.lua")) 12 | 13 | local function collect() 14 | 15 | local devices_info, all_stats = {}, {} 16 | for dev, values in pairs(helper.read_diskstat()) do 17 | local mountpoint = helper.get_mountpoint_by_dev(dev) 18 | if mountpoint then 19 | devices_info[dev] = {} 20 | devices_info[dev]["mountpoint"] = mountpoint 21 | all_stats[dev] = { 22 | utilization = values.tot_ticks / 10, 23 | read_bytes = values.rd_sec_or_wr_ios * 512, read_ops = values.rd_ios, 24 | write_bytes = values.wr_sec * 512, write_ops = values.wr_ios 25 | } 26 | helper.calc_value(dev, values) 27 | end 28 | end 29 | 30 | for dev, info in pairs(devices_info) do 31 | local mountpoint = info["mountpoint"] 32 | local utilization, await = 0, nil 33 | if dev:match("^md") then 34 | local slaves_info = helper.md_device_sizes(dev) 35 | local total_slave_size = 0; 36 | for _, size in pairs(slaves_info) do total_slave_size = total_slave_size + size end 37 | local raid_level = helper.md_level(dev) 38 | if raid_level then 39 | -- для raid{0,1} просчитываем utilization с весом 40 | -- вес высчитывается = (размер slave) / (сумму размера slave-устройств) 41 | if (raid_level == "raid0") or (raid_level == "raid1") then 42 | for slave, size in pairs(slaves_info) do 43 | local weight = size / total_slave_size 44 | utilization = utilization + (all_stats[slave]["utilization"] * weight) 45 | local slave_await = helper.calc_values[slave]["await"] 46 | if slave_await then 47 | if await == nil then await = 0 end 48 | await = await + (slave_await * weight) 49 | end 50 | end 51 | end 52 | end 53 | else 54 | utilization = all_stats[dev]["utilization"] 55 | await = helper.calc_values[dev]["await"] 56 | end 57 | -- send calculated values 58 | local jsonb = { mountpoint = mountpoint } 59 | jsonb.utilization = cache:speed_and_set(plugin_name .. "utilization" .. mountpoint, utilization) 60 | jsonb.await = await 61 | for _, key in pairs({ 'read_bytes', 'write_bytes', 'read_ops', 'write_ops' }) do 62 | jsonb[key] = cache:speed_and_set(plugin_name .. key .. mountpoint, all_stats[dev][key]) 63 | end 64 | local jsonb, err = json.encode(jsonb) 65 | if err then error(err) end 66 | storage_insert_metric({ plugin = plugin_name, json = jsonb }) 67 | end 68 | 69 | end 70 | 71 | run_every(collect, every) 72 | -------------------------------------------------------------------------------- /plugins/linux_diskstats/test.lua: -------------------------------------------------------------------------------- 1 | if not goos.stat(HOST_PROC_DIR .. '/diskstats') then 2 | print('disabled plugin, because /proc/diskstats not found') 3 | return 4 | end 5 | 6 | if os.getenv('CI') then 7 | local dir = filepath.dir(debug.getinfo(1).source) 8 | local helper = dofile(filepath.join(dir, "helper_disk_stat.lua")) 9 | local count = 0 10 | for dev, value in pairs(helper.read_diskstat()) do 11 | print("disk stat:", "dev:", dev, "value:", inspect(value)) 12 | count = count + 1 13 | end 14 | if count == 0 then 15 | print('disabled plugin: diskstat is empty') 16 | return 17 | end 18 | end 19 | 20 | run_plugin_test(120, function() return metric_exists('linux.diskstats') end) 21 | -------------------------------------------------------------------------------- /plugins/linux_memory/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'linux.memory' 2 | local every = 60 3 | 4 | local function collect() 5 | local jsonb = {} 6 | for line in io.lines(HOST_PROC_DIR .. "/meminfo") do 7 | local key, value = line:match("(%S+)%:%s+%d+%s+kB"), line:match("%S%:%s+(%d+)%s+kB") 8 | if key and value then 9 | jsonb[key] = tonumber(value * 1024) 10 | end 11 | end 12 | local jsonb, err = json.encode(jsonb) 13 | if err then error(err) end 14 | storage_insert_metric({ plugin = plugin_name, json = jsonb }) 15 | end 16 | 17 | run_every(collect, every) 18 | -------------------------------------------------------------------------------- /plugins/linux_memory/test.lua: -------------------------------------------------------------------------------- 1 | if not goos.stat(HOST_PROC_DIR..'/meminfo') then 2 | print('disabled plugin, because /proc/meminfo not found') 3 | return 4 | end 5 | 6 | run_plugin_test(120, function() return metric_exists('linux.memory') end) -------------------------------------------------------------------------------- /plugins/replication_slots/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.replication_slots' 2 | local every = 60 3 | 4 | local replication_slots_file = "replication_slots_9.sql" 5 | if get_pg_server_version() >= 10 then replication_slots_file = "replication_slots_10.sql" end 6 | local sql_replication_slots = read_file_in_plugin_dir(replication_slots_file) 7 | 8 | local function collect() 9 | local result = target:query(sql_replication_slots) 10 | local jsonb = {} 11 | for _, row in pairs(result.rows) do 12 | jsonb[row[1]] = tonumber(row[2]) 13 | end 14 | local jsonb, err = json.encode(jsonb) 15 | if err then error(err) end 16 | storage_insert_metric({ plugin = plugin_name, json = jsonb }) 17 | end 18 | 19 | run_every(collect, every) 20 | -------------------------------------------------------------------------------- /plugins/replication_slots/replication_slots_10.sql: -------------------------------------------------------------------------------- 1 | select 2 | s.slot_name::text as slot_name, 3 | pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), coalesce(s.confirmed_flush_lsn, s.restart_lsn))::bigint as size 4 | from 5 | pg_catalog.pg_replication_slots s; -------------------------------------------------------------------------------- /plugins/replication_slots/replication_slots_9.sql: -------------------------------------------------------------------------------- 1 | select 2 | s.slot_name::text as slot_name, 3 | pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_location(), coalesce(s.confirmed_flush_lsn, s.restart_lsn))::bigint as size 4 | from 5 | pg_catalog.pg_replication_slots s; -------------------------------------------------------------------------------- /plugins/replication_slots/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.replication_slots') end) -------------------------------------------------------------------------------- /plugins/sender/list_of_alerts.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | select 3 | value_jsonb ->> 'key' as key, 4 | value_jsonb ->> 'host' as host_text, 5 | host as host, 6 | max(ts) as ts 7 | from 8 | metric 9 | where 10 | host = md5($1::text)::uuid 11 | and plugin = md5('pg.alerts')::uuid 12 | and ts > ($2 - 5 * 60) 13 | and ts < $2 14 | group by 1, 2, 3 15 | ) 16 | select 17 | d.key, 18 | d.ts, 19 | d.host_text, 20 | m.value_jsonb ->> 'custom_details' as custom_details, 21 | (m.value_jsonb ->> 'created_at')::bigint as created_at 22 | from 23 | data d 24 | inner join metric m on m.ts = d.ts and m.host = d.host 25 | where 26 | m.host = md5($1::text)::uuid 27 | and m.plugin = md5('pg.alerts')::uuid 28 | and m.ts > ($2 - 5 * 60) 29 | and m.ts < $2; 30 | -------------------------------------------------------------------------------- /plugins/sender/pagerduty.lua: -------------------------------------------------------------------------------- 1 | local token = secrets:get("pagerduty_token") 2 | local key_default = secrets:get("pagerduty_key_default") 3 | 4 | if not (token) or not (key_default) then 5 | return function() end 6 | end 7 | 8 | plugin_log:printf("[INFO] start pagerduty sender\n") 9 | 10 | -- load routing rules 11 | local routing_file = filepath.join(plugin:dir(), "pagerduty_routing.lua") 12 | if goos.stat(filepath.join(plugin:dir(), "pagerduty_routing_overrides.lua")) then 13 | routing_file = filepath.join(plugin:dir(), "pagerduty_routing_overrides.lua") 14 | end 15 | local get_routing = dofile(routing_file) 16 | 17 | local http = require("http") 18 | local crypto = require("crypto") 19 | local http_client = http.client({}) 20 | 21 | local function send(info) 22 | local jsonb, err = json.encode(info) 23 | if err then error(err) end 24 | plugin_log:printf("[INFO] send to pagerduty: %s\n", jsonb) 25 | local request, err = http.request("POST", "https://events.pagerduty.com/v2/enqueue", jsonb) 26 | if err then error(err) end 27 | request:header_set("Content-Type", "application/json") 28 | request:header_set("Accept", "application/vnd.pagerduty+json;version=2") 29 | request:header_set("Authorization", "Token token=" .. token) 30 | local result, err = http_client:do_request(request) 31 | if err then error(err) end 32 | if result.code > 300 then 33 | error("response : " .. inspect(result)) 34 | end 35 | end 36 | 37 | local function process_alert_row(alert) 38 | local cache_key = alert.host .. alert.key .. "pagerduty" 39 | local silence_to = cache:get(cache_key) or 0 40 | if time.unix() > silence_to then 41 | local routing = get_routing(alert) 42 | local jsonb = { 43 | routing_key = routing.key, 44 | dedup_key = crypto.md5(alert.key .. alert.host), 45 | event_action = "trigger", 46 | payload = { 47 | summary = alert.key .. " [" .. alert.host .. "]", 48 | source = "pg_gatherer for " .. alert.host, 49 | severity = routing.severity, 50 | component = "postgresql", 51 | custom_details = json.decode(alert.custom_details or '{}') 52 | } 53 | } 54 | send(jsonb) 55 | silence_to = time.unix() + 5 * 60 56 | cache:set(cache_key, silence_to) 57 | end 58 | end 59 | 60 | return process_alert_row 61 | -------------------------------------------------------------------------------- /plugins/sender/pagerduty_routing.lua: -------------------------------------------------------------------------------- 1 | local dict = { 2 | critical = "critical", 3 | error = "error", 4 | warning = "warning", 5 | info = "info", 6 | } 7 | 8 | local key_default = secrets:get("pagerduty_key_default") 9 | local routing_keys = { 10 | default = key_default, 11 | critical = secrets:get("pagerduty_key_critical") or key_default, 12 | error = secrets:get("pagerduty_key_error") or key_default, 13 | warning = secrets:get("pagerduty_key_warning") or key_default, 14 | info = secrets:get("pagerduty_key_info") or key_default, 15 | } 16 | 17 | -- return routing 18 | local function routing(alert) 19 | return {severity=dict.critical, key=routing_keys[dict.critical]} 20 | end 21 | 22 | return routing 23 | -------------------------------------------------------------------------------- /plugins/sender/plugin.lua: -------------------------------------------------------------------------------- 1 | local processors = {} 2 | processors.telegram = dofile(filepath.join(plugin:dir(), "telegram.lua")) 3 | processors.pagerduty = dofile(filepath.join(plugin:dir(), "pagerduty.lua")) 4 | 5 | local sql = read_file_in_plugin_dir("list_of_alerts.sql") 6 | 7 | function process() 8 | local result = storage:query("select name from host where not maintenance") 9 | for _, rowHost in pairs(result.rows) do 10 | local host = rowHost[1] 11 | local result = storage:query(sql, host, get_unix_ts(storage)) 12 | for _, row in pairs(result.rows) do 13 | local alert = { 14 | key = row[1], ts = row[2], 15 | host = row[3], custom_details = row[4], 16 | created_at = row[5] 17 | } 18 | for name, process_f in pairs(processors) do 19 | local status, err = pcall(process_f, alert) 20 | if not status then 21 | plugin_log:printf("[ERROR] plugin '%s' processor '%s' error: %s\n", plugin:name(), name, err) 22 | end 23 | end 24 | end 25 | end 26 | end 27 | 28 | run_every(process, 5) 29 | -------------------------------------------------------------------------------- /plugins/sender/telegram.lua: -------------------------------------------------------------------------------- 1 | local token = secrets:get("telegram_token") 2 | local chat_id = secrets:get("telegram_chat_id") 3 | if chat_id then chat_id = tonumber(chat_id) end 4 | 5 | if not (token) or not (chat_id) then 6 | return function() end 7 | end 8 | 9 | plugin_log:printf("[INFO] start telegram sender\n") 10 | 11 | local telegram = require("telegram") 12 | local http = require("http") 13 | local client = http.client({}) 14 | local template = require("template") 15 | 16 | local mustache, err = template.choose("mustache") 17 | if err then error(err) end 18 | 19 | local telegram_bot = telegram.bot(token, client) 20 | local telegram_message_firing_template = [[ 21 | 🔥 FIRING 🔥 22 | Host: {{ host }} 23 | Problem: {{ key }} 24 | Created: {{ created_at }} 25 | Description: {{ description }} 26 | ]] 27 | 28 | function process_alert_row(alert) 29 | local cache_key = alert.key .. alert.host .. "telegram" 30 | local silence_to = cache:get(cache_key) or 0 31 | -- send message 32 | if time.unix() > silence_to then 33 | alert.created_at = alert.created_at or alert.ts 34 | local alert = { 35 | host = alert.host, 36 | key = alert.key, 37 | created_at = humanize.time(alert.created_at), 38 | description = alert.custom_details 39 | } 40 | local message = mustache:render(telegram_message_firing_template, alert) 41 | local _, err = telegram_bot:sendMessage({ 42 | chat_id = chat_id, 43 | text = message, 44 | parse_mode = "html" 45 | }) 46 | if err then error(err) end 47 | silence_to = time.unix() + 60 * 60 48 | cache:set(cache_key, silence_to) 49 | end 50 | end 51 | 52 | return process_alert_row 53 | -------------------------------------------------------------------------------- /plugins/sequences/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.sequences' 2 | local every = 60 3 | local sql_sequences = read_file_in_plugin_dir("sequences.sql") 4 | 5 | local function collect_for_db(conn) 6 | local result = conn:query(sql_sequences, every) 7 | for _, row in pairs(result.rows) do 8 | storage_insert_metric({ plugin = plugin_name, snapshot = row[1], json = row[2] }) 9 | end 10 | end 11 | 12 | local function collect() 13 | for _, conn in pairs(target:available_connections()) do 14 | collect_for_db(conn) 15 | end 16 | end 17 | 18 | run_every(collect, every) 19 | -------------------------------------------------------------------------------- /plugins/sequences/sequences.sql: -------------------------------------------------------------------------------- 1 | with raw_sequences as ( 2 | select sq.relname, pn.nspname, sqs.increment_by, sqs.min_value, 3 | case 4 | when min_value < 0 then -1 5 | else 256 ^ pa.attlen / 2 6 | end 7 | as max_value, 8 | coalesce(sqs.last_value, sqs.min_value) as last_value 9 | from pg_class sq 10 | join pg_depend dp on sq.oid = dp.objid 11 | join pg_class pc on pc.oid = dp.refobjid 12 | join pg_attribute pa on pa.attrelid = pc.oid and pa.attnum = dp.refobjsubid 13 | join pg_type pt on pa.atttypid = pt.oid 14 | join pg_namespace pn on pn.oid = pc.relnamespace 15 | join pg_sequences sqs on sqs.sequencename = sq.relname and pn.nspname = sqs.schemaname 16 | where sq.relkind = 'S' 17 | ), 18 | qsequences as( 19 | select *, (max_value - min_value) / increment_by as total_values, 20 | (max_value - coalesce(last_value, min_value)) / increment_by as remain_values from raw_sequences 21 | ) 22 | select 23 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 24 | jsonb_build_object( 25 | 'sequence_name', current_database() || '.' || nspname || '.' || relname, 26 | 'last_value', last_value, 27 | 'max_value', max_value, 28 | 'remaining_capacity', remain_values::float / total_values::float * 100 29 | ) 30 | from qsequences 31 | -------------------------------------------------------------------------------- /plugins/sequences/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.sequences') end) 2 | -------------------------------------------------------------------------------- /plugins/statements/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.statements' 2 | local every = 300 3 | 4 | local sql_statements = read_file_in_plugin_dir("statements.sql") 5 | 6 | local function collect() 7 | local result = target:query(sql_statements, every) 8 | local statements_data, snapshot = {}, nil 9 | for _, row in pairs(result.rows) do 10 | if not snapshot then snapshot = row[1] end 11 | local jsonb, err = json.decode(row[2]) 12 | if err then error(err) end 13 | local key = crypto.md5(tostring(jsonb.queryid) .. tostring(jsonb.query) .. tostring(jsonb.dbname) .. tostring(jsonb.user)) 14 | jsonb.calls = cache:diff_and_set(key .. ".calls", jsonb.calls) 15 | jsonb.rows = cache:diff_and_set(key .. ".rows", jsonb.rows) 16 | jsonb.shared_blks_hit = cache:diff_and_set(key .. ".shared_blks_hit", jsonb.shared_blks_hit) 17 | jsonb.shared_blks_read = cache:diff_and_set(key .. ".shared_blks_read", jsonb.shared_blks_read) 18 | jsonb.shared_blks_dirtied = cache:diff_and_set(key .. ".shared_blks_dirtied", jsonb.shared_blks_dirtied) 19 | jsonb.shared_blks_written = cache:diff_and_set(key .. ".shared_blks_written", jsonb.shared_blks_written) 20 | jsonb.local_blks_hit = cache:diff_and_set(key .. ".local_blks_hit", jsonb.local_blks_hit) 21 | jsonb.local_blks_read = cache:diff_and_set(key .. ".local_blks_read", jsonb.local_blks_read) 22 | jsonb.local_blks_dirtied = cache:diff_and_set(key .. ".local_blks_dirtied", jsonb.local_blks_dirtied) 23 | jsonb.local_blks_written = cache:diff_and_set(key .. ".local_blks_written", jsonb.local_blks_written) 24 | jsonb.temp_blks_read = cache:diff_and_set(key .. ".temp_blks_read", jsonb.temp_blks_read) 25 | jsonb.temp_blks_written = cache:diff_and_set(key .. ".temp_blks_written", jsonb.temp_blks_written) 26 | jsonb.total_time = cache:diff_and_set(key .. ".total_time", jsonb.total_time) 27 | jsonb.blk_read_time = cache:diff_and_set(key .. ".blk_read_time", jsonb.blk_read_time) 28 | jsonb.blk_write_time = cache:diff_and_set(key .. ".blk_write_time", jsonb.blk_write_time) 29 | if jsonb.calls and (jsonb.calls > 0) then 30 | table.insert(statements_data, jsonb) 31 | end 32 | end 33 | local jsonb, err = json.encode(statements_data) 34 | if err then error(err) end 35 | storage_insert_metric({ plugin = plugin_name, snapshot = snapshot, json = jsonb }) 36 | end 37 | 38 | run_every(collect, every) 39 | -------------------------------------------------------------------------------- /plugins/statements/statements.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'queryid', queryid::bigint, 5 | 'dbname', d.datname::text, 6 | 'user', pg_catalog.pg_get_userbyid(userid)::text, 7 | 'query', query::text, 8 | 'calls', calls::bigint, 9 | 'total_time', total_time::float8, 10 | 'rows', rows::bigint, 11 | 'shared_blks_hit', shared_blks_hit::bigint, 12 | 'shared_blks_read', shared_blks_read::bigint, 13 | 'shared_blks_written', shared_blks_written::bigint, 14 | 'shared_blks_dirtied', shared_blks_dirtied::bigint, 15 | 'local_blks_hit', local_blks_hit::bigint, 16 | 'local_blks_read', local_blks_read::bigint, 17 | 'local_blks_dirtied', local_blks_dirtied::bigint, 18 | 'local_blks_written', local_blks_written::bigint, 19 | 'temp_blks_read', temp_blks_read::bigint, 20 | 'temp_blks_written', temp_blks_written::bigint, 21 | 'blk_read_time', blk_read_time::float8, 22 | 'blk_write_time', blk_write_time::float8 23 | ) as result 24 | from 25 | pg_stat_statements s 26 | inner join pg_database d on s.dbid = d.oid 27 | where 28 | not (query ~ '^SAVEPOINT ') 29 | and not (query ~ '^RELEASE SAVEPOINT'); -------------------------------------------------------------------------------- /plugins/statements/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(300+30+3, function() return metric_exists('pg.statements') end) 2 | -------------------------------------------------------------------------------- /plugins/uptime/checkpointer_uptime_10.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from (now() - backend_start))::bigint 3 | from 4 | pg_catalog.pg_stat_activity 5 | where 6 | backend_type = 'checkpointer' 7 | limit 1; -------------------------------------------------------------------------------- /plugins/uptime/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.uptime' 2 | local every = 300 3 | 4 | local sql_uptime = read_file_in_plugin_dir("uptime.sql") 5 | local sql_checkpointer_uptime = read_file_in_plugin_dir("checkpointer_uptime_10.sql") 6 | 7 | local function collect_9() 8 | local result = target:query(sql_uptime) 9 | for _, row in pairs(result.rows) do 10 | storage_insert_metric({ plugin = plugin_name, int = row[1] }) 11 | end 12 | end 13 | 14 | local function collect_10() 15 | local result = target:query(sql_uptime) 16 | for _, row in pairs(result.rows) do 17 | storage_insert_metric({ plugin = plugin_name, int = row[1] }) 18 | end 19 | local result = target:query(sql_checkpointer_uptime) 20 | for _, row in pairs(result.rows) do 21 | storage_insert_metric({ plugin = plugin_name .. ".checkpointer", int = row[1] }) 22 | end 23 | end 24 | 25 | local collect = collect_9 26 | if get_pg_server_version() >= 10 then collect = collect_10 end 27 | run_every(collect, every) 28 | -------------------------------------------------------------------------------- /plugins/uptime/test.lua: -------------------------------------------------------------------------------- 1 | local uptime_metric_exists = function() 2 | return (metric_exists('pg.uptime') and metric_exists('pg.uptime.checkpointer')) 3 | end 4 | 5 | run_plugin_test(120, uptime_metric_exists) -------------------------------------------------------------------------------- /plugins/uptime/uptime.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from (current_timestamp - pg_catalog.pg_postmaster_start_time()))::bigint; -------------------------------------------------------------------------------- /plugins/user_tables/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.user_tables' 2 | local every = 60 3 | 4 | local sql_user_tables = read_file_in_plugin_dir("user_tables.sql") 5 | local sql_user_tables_io = read_file_in_plugin_dir("user_tables_io.sql") 6 | 7 | snapshot = nil 8 | user_tables_stat_data, user_tables_io_data = {}, {} 9 | 10 | local function collect_for_db(conn) 11 | 12 | local result = conn:query(sql_user_tables, every) 13 | for _, row in pairs(result.rows) do 14 | if not snapshot then snapshot = row[1] end 15 | local jsonb, err = json.decode(row[2]) 16 | if err then error(err) end 17 | 18 | local table_name = jsonb.full_table_name 19 | jsonb.vacuum_count = cache:diff_and_set(table_name .. ".vacuum_count", jsonb.vacuum_count) 20 | jsonb.autovacuum_count = cache:diff_and_set(table_name .. ".autovacuum_count", jsonb.autovacuum_count) 21 | jsonb.analyze_count = cache:diff_and_set(table_name .. ".analyze_count", jsonb.analyze_count) 22 | jsonb.autoanalyze_count = cache:diff_and_set(table_name .. ".autoanalyze_count", jsonb.autoanalyze_count) 23 | jsonb.seq_scan = cache:diff_and_set(table_name .. ".seq_scan", jsonb.seq_scan) 24 | jsonb.seq_tup_read = cache:diff_and_set(table_name .. ".seq_tup_read", jsonb.seq_tup_read) 25 | jsonb.idx_scan = cache:diff_and_set(table_name .. ".idx_scan", jsonb.idx_scan) 26 | jsonb.idx_tup_fetch = cache:diff_and_set(table_name .. ".idx_tup_fetch", jsonb.idx_tup_fetch) 27 | jsonb.n_tup_ins = cache:diff_and_set(table_name .. ".n_tup_ins", jsonb.n_tup_ins) 28 | jsonb.n_tup_upd = cache:diff_and_set(table_name .. ".n_tup_upd", jsonb.n_tup_upd) 29 | jsonb.n_tup_del = cache:diff_and_set(table_name .. ".n_tup_del", jsonb.n_tup_del) 30 | jsonb.n_tup_hot_upd = cache:diff_and_set(table_name .. ".n_tup_hot_upd", jsonb.n_tup_hot_upd) 31 | jsonb.n_live_tup = jsonb.n_live_tup 32 | jsonb.n_dead_tup = jsonb.n_dead_tup 33 | jsonb.n_mod_since_analyze = cache:diff_and_set(table_name .. ".n_mod_since_analyze", jsonb.n_mod_since_analyze) 34 | jsonb.relpages = jsonb.relpages 35 | 36 | if jsonb.vacuum_count or jsonb.autovacuum_count or jsonb.analyze_count or 37 | jsonb.autoanalyze_count or jsonb.seq_scan or jsonb.seq_tup_read or jsonb.idx_scan or 38 | jsonb.idx_tup_fetch or jsonb.idx_tup_fetch or jsonb.n_tup_ins or jsonb.n_tup_upd or 39 | jsonb.n_tup_del or jsonb.n_tup_hot_upd then 40 | table.insert(user_tables_stat_data, jsonb) 41 | end 42 | end 43 | 44 | local result = conn:query(sql_user_tables_io, every) 45 | for _, row in pairs(result.rows) do 46 | if not snapshot then snapshot = row[1] end 47 | local jsonb, err = json.decode(row[2]) 48 | if err then error(err) end 49 | 50 | local table_name = jsonb.full_table_name 51 | jsonb.heap_blks_read = cache:diff_and_set(table_name .. ".heap_blks_read", jsonb.heap_blks_read) 52 | jsonb.heap_blks_hit = cache:diff_and_set(table_name .. ".heap_blks_hit", jsonb.heap_blks_hit) 53 | jsonb.idx_blks_read = cache:diff_and_set(table_name .. ".idx_blks_read", jsonb.idx_blks_read) 54 | jsonb.idx_blks_hit = cache:diff_and_set(table_name .. ".idx_blks_hit", jsonb.idx_blks_hit) 55 | jsonb.toast_blks_read = cache:diff_and_set(table_name .. ".toast_blks_read", jsonb.toast_blks_read) 56 | jsonb.toast_blks_hit = cache:diff_and_set(table_name .. ".toast_blks_hit", jsonb.toast_blks_hit) 57 | jsonb.tidx_blks_read = cache:diff_and_set(table_name .. ".tidx_blks_read", jsonb.tidx_blks_read) 58 | jsonb.tidx_blks_hit = cache:diff_and_set(table_name .. ".tidx_blks_hit", jsonb.tidx_blks_hit) 59 | 60 | if jsonb.heap_blks_read or jsonb.heap_blks_hit or jsonb.idx_blks_read or jsonb.idx_blks_hit or 61 | jsonb.toast_blks_read or jsonb.toast_blks_hit or jsonb.tidx_blks_read or jsonb.tidx_blks_hit then 62 | table.insert(user_tables_io_data, jsonb) 63 | end 64 | end 65 | 66 | end 67 | 68 | local function collect() 69 | 70 | snapshot, user_tables_stat_data, user_tables_io_data = nil, {}, {} 71 | 72 | for _, conn in pairs(target:available_connections()) do 73 | collect_for_db(conn) 74 | end 75 | 76 | if snapshot then 77 | local jsonb, err = json.encode(user_tables_stat_data) 78 | if err then error(err) end 79 | storage_insert_metric({ plugin = plugin_name, snapshot = snapshot, json = jsonb }) 80 | 81 | local jsonb, err = json.encode(user_tables_io_data) 82 | if err then error(err) end 83 | storage_insert_metric({ plugin = plugin_name .. ".io", snapshot = snapshot, json = jsonb }) 84 | else 85 | plugin_log:printf("[ERROR] user_tables information is empty, host %s\n", plugin:host()) 86 | end 87 | end 88 | 89 | run_every(collect, every) 90 | -------------------------------------------------------------------------------- /plugins/user_tables/test.lua: -------------------------------------------------------------------------------- 1 | local user_tables_metric_exists = function() 2 | return (metric_exists('pg.user_tables') and metric_exists('pg.user_tables.io')) 3 | end 4 | 5 | run_plugin_test(120, user_tables_metric_exists) -------------------------------------------------------------------------------- /plugins/user_tables/user_tables.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'relid', relid::bigint, 5 | 'full_table_name', current_database() || '.' || p.schemaname || '.' || p.relname, 6 | 'last_vacuum', extract(epoch from p.last_vacuum)::bigint, 7 | 'last_autovacuum', extract(epoch from p.last_autovacuum)::bigint, 8 | 'last_analyze', extract(epoch from p.last_analyze)::bigint, 9 | 'last_autoanalyze', extract(epoch from p.last_autoanalyze)::bigint, 10 | 'vacuum_count', p.vacuum_count, 11 | 'autovacuum_count', p.autovacuum_count, 12 | 'analyze_count', p.analyze_count, 13 | 'autoanalyze_count', p.autoanalyze_count, 14 | 'seq_scan', p.seq_scan, 15 | 'seq_tup_read', p.seq_tup_read, 16 | 'idx_scan', p.idx_scan, 17 | 'idx_tup_fetch', p.idx_tup_fetch, 18 | 'n_tup_ins', p.n_tup_ins, 19 | 'n_tup_upd', p.n_tup_upd, 20 | 'n_tup_del', p.n_tup_del, 21 | 'n_tup_hot_upd', p.n_tup_hot_upd, 22 | 'n_live_tup', p.n_live_tup, 23 | 'n_dead_tup', p.n_dead_tup, 24 | 'n_mod_since_analyze', p.n_mod_since_analyze, 25 | 'relpages', c.relpages, 26 | 'reltuples', c.reltuples 27 | ) as result 28 | from 29 | pg_catalog.pg_stat_user_tables p 30 | inner join pg_catalog.pg_class c on c.oid = p.relid; -------------------------------------------------------------------------------- /plugins/user_tables/user_tables_io.sql: -------------------------------------------------------------------------------- 1 | select 2 | extract(epoch from now())::int - (extract(epoch from now())::int % $1), 3 | jsonb_build_object( 4 | 'relid', relid::bigint, 5 | 'full_table_name', current_database() || '.' || schemaname || '.' || relname, 6 | 'heap_blks_read', heap_blks_read, 7 | 'heap_blks_hit', heap_blks_hit, 8 | 'idx_blks_read', idx_blks_read, 9 | 'idx_blks_hit', idx_blks_hit, 10 | 'toast_blks_read', toast_blks_read, 11 | 'toast_blks_hit', toast_blks_hit, 12 | 'tidx_blks_read', tidx_blks_read, 13 | 'tidx_blks_hit', tidx_blks_hit 14 | ) as result 15 | from 16 | pg_catalog.pg_statio_user_tables; -------------------------------------------------------------------------------- /plugins/wal/plugin.lua: -------------------------------------------------------------------------------- 1 | local plugin_name = 'pg.wal' 2 | local every = 60 3 | 4 | local function get_sql() 5 | local filename = "" 6 | if get_pg_is_in_recovery() then 7 | -- slave 8 | if get_pg_server_version() >= 10 then 9 | filename = "wal_replica_10.sql" 10 | else 11 | filename = "wal_replica_9.sql" 12 | end 13 | else 14 | -- master 15 | if get_pg_server_version() >= 10 then 16 | filename = "wal_master_10.sql" 17 | else 18 | filename = "wal_master_10.sql" 19 | end 20 | end 21 | return read_file_in_plugin_dir(filename) 22 | end 23 | 24 | local function collect() 25 | local result = target:query(get_sql()) 26 | for _, row in pairs(result.rows) do 27 | local wal_position, pg_is_in_recovery, time_lag = row[1], row[2], row[3] 28 | local wal_speed = cache:speed_and_set("wal_speed", wal_position) 29 | if wal_speed then 30 | storage_insert_metric({ plugin = plugin_name .. ".speed", float = wal_speed }) 31 | end 32 | if pg_is_in_recovery then 33 | storage_insert_metric({ plugin = plugin_name .. ".replication_time_lag", float = time_lag }) 34 | end 35 | end 36 | end 37 | 38 | run_every(collect, every) 39 | -------------------------------------------------------------------------------- /plugins/wal/test.lua: -------------------------------------------------------------------------------- 1 | run_plugin_test(120, function() return metric_exists('pg.wal.speed') end) -------------------------------------------------------------------------------- /plugins/wal/wal_master_10.sql: -------------------------------------------------------------------------------- 1 | select 2 | pg_wal_lsn_diff(pg_current_wal_lsn(), '0/00000000')::bigint as wal_position, 3 | pg_catalog.pg_is_in_recovery() as pg_is_in_recovery, 4 | 0::float8 as time_lag; -------------------------------------------------------------------------------- /plugins/wal/wal_master_9.sql: -------------------------------------------------------------------------------- 1 | select 2 | pg_xlog_location_diff(pg_current_xlog_location(), '0/00000000')::bigint as wal_position, 3 | pg_catalog.pg_is_in_recovery() as pg_is_in_recovery, 4 | 0::float8 as time_lag; -------------------------------------------------------------------------------- /plugins/wal/wal_replica_10.sql: -------------------------------------------------------------------------------- 1 | select 2 | pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/00000000')::bigint as wal_position, 3 | pg_catalog.pg_is_in_recovery() as pg_is_in_recovery, 4 | extract(epoch from now() - pg_last_xact_replay_timestamp())::float8 as time_lag; -------------------------------------------------------------------------------- /plugins/wal/wal_replica_9.sql: -------------------------------------------------------------------------------- 1 | select 2 | pg_xlog_location_diff(pg_last_xlog_replay_location(), '0/00000000')::bigint as wal_position, 3 | pg_catalog.pg_is_in_recovery() as pg_is_in_recovery, 4 | extract(epoch from now() - pg_last_xact_replay_timestamp())::float8 as time_lag; -------------------------------------------------------------------------------- /schema/schema.sql: -------------------------------------------------------------------------------- 1 | create table metric ( 2 | id bigserial, 3 | host uuid not null, 4 | plugin uuid not null, 5 | ts bigint not null default extract(epoch from current_timestamp)::bigint, 6 | snapshot bigint, 7 | value_bigint bigint, 8 | value_double float8, 9 | value_jsonb jsonb 10 | ); 11 | select create_hypertable('metric', 'ts', chunk_time_interval => 43200); /* comment if you don't use timescaledb */ 12 | create index on metric (ts, plugin, host); 13 | 14 | create table host ( 15 | name text primary key, 16 | maintenance bool default false, 17 | created_at timestamptz default now() 18 | ); --------------------------------------------------------------------------------