├── destination ├── file │ ├── __init__.py │ └── jsonfile.py ├── net │ ├── __init__.py │ └── http.py ├── application │ ├── __init__.py │ └── zabbix.py ├── __init__.py └── base.py ├── database ├── __init__.py ├── base.py └── postgresql.py ├── .gitignore ├── pgmon.service ├── configure.sh ├── pgmon.rc.in ├── main.py.src ├── Dockerfile ├── LICENSE ├── Makefile.in ├── pgmon.conf.sample ├── items.py ├── config.py ├── daemon.py ├── pgmon.py └── README.md /destination/file/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /destination/net/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["http"] -------------------------------------------------------------------------------- /database/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __all__ = ["base", "postgresql"] 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | pgmon.conf 3 | pgmon.rc 4 | pgmon 5 | Makefile 6 | -------------------------------------------------------------------------------- /destination/application/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __all__ = ["zabbix"] 4 | -------------------------------------------------------------------------------- /destination/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __all__ = ["base", "application", "file", "net"] 4 | -------------------------------------------------------------------------------- /pgmon.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=PostgreSQL monitoring service 3 | After=network.target auditd.service 4 | 5 | [Service] 6 | ExecStart=/usr/local/bin/pgmon -c /usr/local/pgmon/etc/pgmon.conf 7 | ExecReload=/bin/kill -HUP $MAINPID 8 | User=pgmon 9 | RuntimeDirectory=pgmon 10 | KillMode=process 11 | Restart=on-failure 12 | Type=forking 13 | PIDFile=/var/run/pgmon/pgmon.pid 14 | 15 | [Install] 16 | WantedBy=multi-user.target 17 | Alias=pgmon.service 18 | 19 | -------------------------------------------------------------------------------- /configure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PYTHON_PATH=$( which python3 || which python || which python2 || echo 0 ) 4 | SYSTEM=$( uname -s ) 5 | 6 | if [ "$PYTHON_PATH" = "0" ]; then 7 | echo "No python detected! Please ensure it is installed in \$PATH" 8 | exit 1 9 | fi 10 | 11 | if [ $SYSTEM = "FreeBSD" ] 12 | then 13 | INSTALL_OS="install-bsd" 14 | elif [ $SYSTEM = "Linux" ] 15 | then 16 | INSTALL_OS="install-linux" 17 | else 18 | echo "Unrecognized system. Aborting." 19 | exit 1 20 | fi 21 | 22 | cat Makefile.in | sed "s|##PYTHON_PATH##|${PYTHON_PATH}|" | sed "s|##INSTALL_OS##|${INSTALL_OS}|" > Makefile 23 | cat pgmon.rc.in | sed "s|##PYTHON_PATH##|${PYTHON_PATH}|" > pgmon.rc 24 | 25 | -------------------------------------------------------------------------------- /pgmon.rc.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # PROVIDE: pgmon 4 | # REQUIRE: DAEMON SSH 5 | # KEYWORD: shutdown 6 | 7 | # 8 | # Add the following line to /etc/rc.conf to enable pgmon: 9 | # 10 | # pgmon_enable="YES" 11 | # 12 | # Configuration variables and their default values: 13 | # 14 | 15 | . /etc/rc.subr 16 | 17 | name="pgmon" 18 | rcvar=`set_rcvar` 19 | 20 | command="/usr/local/bin/pgmon" 21 | 22 | # read configuration and set defaults 23 | load_rc_config ${name} 24 | 25 | : ${pgmon_enable:="NO"} 26 | : ${pgmon_config:="/usr/local/pgmon/etc/pgmon.conf"} 27 | : ${pgmon_pidfile:="/var/run/pgmon/pgmon.pid"} 28 | 29 | pidfile=${pgmon_pidfile} 30 | command_args="-c ${pgmon_config}" 31 | command_interpreter="##PYTHON_PATH##" 32 | 33 | 34 | run_rc_command "$1" 35 | 36 | -------------------------------------------------------------------------------- /destination/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base for destinations 3 | """ 4 | 5 | from abc import ABCMeta 6 | 7 | class BaseDestination(object): 8 | __metaclass__ = ABCMeta 9 | 10 | def __init__(self,config): 11 | self.name = "" 12 | self.type = "" 13 | self.results = {} 14 | 15 | @property 16 | def has_results(self): 17 | if len(self.results) > 0: 18 | return True 19 | else: 20 | return False 21 | 22 | @classmethod 23 | def send(self): 24 | pass 25 | 26 | @classmethod 27 | def add_result(self,id,value,timestamp): 28 | pass 29 | 30 | #def Destination(config): 31 | # print ("loading destination of type {0}".format(config['Type'])) 32 | # module = import_module("destination.{0}".format(config['Type'])) 33 | # return getattr(module, "load")(config) 34 | 35 | -------------------------------------------------------------------------------- /main.py.src: -------------------------------------------------------------------------------- 1 | #{PYTHON_PATH} 2 | 3 | import sys, os, signal 4 | import syslog 5 | from argparse import ArgumentParser 6 | 7 | sys.path.append('#{APP_LIB_PATH}') 8 | 9 | import daemon 10 | import pgmon 11 | 12 | APPNAME = "pgmon" 13 | 14 | if __name__ == "__main__": 15 | 16 | opts = pgmon.setup_args() 17 | pgmon.init_logging(APPNAME) 18 | 19 | try: 20 | with pgmon.PGMon(opts.config,opts.forground) as mon: 21 | try: 22 | mon.run() 23 | except Exception as e: 24 | pgmon.log_error("Error: {exc}".format(exc=e)) 25 | except Exception as exc: 26 | print("Fatal Error: {exc}".format(exc=exc)) 27 | pgmon.log_error("Error: {exc}. Terminating program.".format(exc=exc)) 28 | except KeyboardInterrupt: 29 | print("Interrupted! Exiting") 30 | finally: 31 | syslog.closelog() 32 | -------------------------------------------------------------------------------- /database/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base class for the datbase configuration 3 | """ 4 | 5 | from abc import ABCMeta 6 | 7 | class DBConnectionError(Exception): 8 | """ 9 | There was some kind of issue with the db connection 10 | """ 11 | pass 12 | 13 | class DBQueryError(Exception): 14 | """ 15 | A query failed for some reason 16 | """ 17 | pass 18 | 19 | class BaseDatabase(object): 20 | __metaclass__ = ABCMeta 21 | 22 | def __init__(self,db_dict): 23 | self.name = "postgres" 24 | self.type = "postgresql" 25 | self.host = "127.0.0.1" 26 | self.port = 5432 27 | self.user = "postgres" 28 | self.password = "postgres" 29 | 30 | @classmethod 31 | def connect(self): 32 | pass 33 | 34 | @classmethod 35 | def disconnect(self): 36 | pass 37 | 38 | @classmethod 39 | def query(self,string): 40 | pass 41 | 42 | -------------------------------------------------------------------------------- /destination/file/jsonfile.py: -------------------------------------------------------------------------------- 1 | """ 2 | Store results in a json file 3 | """ 4 | 5 | import json 6 | from ..base import BaseDestination 7 | 8 | class JsonDestination(BaseDestination): 9 | 10 | def __init__(self,config): 11 | super(JsonDestination,self).__init__(config) 12 | self.type = "file.json" 13 | self.location = "/tmp/pgmon.out" 14 | self.append = False 15 | self.parse_cfg(config) 16 | 17 | def parse_cfg(self,config): 18 | self.name = config["Name"] 19 | self.location = config["Location"] 20 | if config["Append"] == 1: 21 | self.apend = True 22 | 23 | 24 | def add_result(self,name,result,ts): 25 | self.results[name] = {"data":result,"ts":ts} 26 | 27 | def send(self): 28 | if self.append is True: 29 | mode = "a" 30 | else: 31 | mode = "w" 32 | 33 | with open(self.location,mode) as jf: 34 | json.dump(self.results,jf) 35 | 36 | self.results = {} 37 | 38 | 39 | def load(config): 40 | return JsonDestination(config) 41 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.5 2 | 3 | LABEL maintainer="Adam Schumacher " \ 4 | org.label-schema.schema-version="1.0" \ 5 | org.label-schema.name="pgmon.flightaware.com" \ 6 | org.label-schema.vcs-url="https://github.com/flightaware/pgmon" \ 7 | org.label-schema.description="pgmon" 8 | 9 | 10 | RUN apk update && \ 11 | apk add --no-cache --virtual build-deps gcc python3 python3-dev musl-dev && \ 12 | apk add postgresql-dev && \ 13 | apk add py3-psycopg2 && \ 14 | apk add py3-requests && \ 15 | apk add zabbix-utils 16 | 17 | 18 | COPY destination /usr/local/pgmon/lib/destination/ 19 | COPY database /usr/local/pgmon/lib/database/ 20 | COPY *.py /usr/local/pgmon/lib/ 21 | COPY main.py.src /tmp/main.py.src 22 | RUN cd /tmp && sed "s|#{PYTHON_PATH}|#!/usr/bin/python3|" main.py.src | sed "s|#{APP_LIB_PATH}|/usr/local/pgmon/lib|" > main.py && rm main.py.src && mv main.py /usr/local/bin/pgmon && chmod +x /usr/local/bin/pgmon 23 | 24 | ENTRYPOINT ["/usr/local/bin/pgmon", "-f"] 25 | CMD ["-c", "/usr/local/pgmon/etc/pgmon.conf"] 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, FlightAware LLC 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials provided 13 | with the distribution. 14 | 15 | * Neither the name of the FlightAware LLC nor the names of its 16 | contributors may be used to endorse or promote products derived 17 | from this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile.in: -------------------------------------------------------------------------------- 1 | # 2 | # Makefile for pgmon.py 3 | # 4 | 5 | NAME ?= pgmon 6 | PREFIX ?= /usr/local 7 | BINDIR ?= $(PREFIX)/bin 8 | LIBDIR ?= $(PREFIX)/$(NAME)/lib 9 | CNFDIR ?= $(PREFIX)/$(NAME)/etc 10 | RCDIR ?= $(PREFIX)/etc/rc.d 11 | RUNDIR ?= /var/run/$(NAME) 12 | TARGET ?= $(NAME) 13 | PGUSER ?= $(NAME) 14 | 15 | PYTHON_PATH ?= ##PYTHON_PATH## 16 | 17 | install: install-app ##INSTALL_OS## 18 | 19 | install-app: 20 | mkdir -p $(LIBDIR) 21 | mkdir -p $(CNFDIR) 22 | mkdir -p $(RUNDIR) 23 | sed "s|#{PYTHON_PATH}|#!$(PYTHON_PATH)|" main.py.src | sed "s|#{APP_LIB_PATH}|$(LIBDIR)|" > main.py 24 | 25 | install-bsd: 26 | rsync -rq --chown=root:wheel --chmod=o=rwX --chmod=g=rwX --chmod=o=rX --exclude "*.pyc" destination $(LIBDIR) 27 | rsync -rq --chown=root:wheel --chmod=o=rwX --chmod=g=rwX --chmod=o=rX --exclude "*.pyc" database $(LIBDIR) 28 | install -o root -g wheel -m 644 $(TARGET).conf.sample $(CNFDIR) 29 | @echo "Config file is in $(CNFDIR)" 30 | install -o root -g wheel -m 755 main.py $(BINDIR)/$(TARGET) 31 | install -o root -g wheel -m 644 *.py $(LIBDIR) 32 | pw usershow $(PGUSER) || pw useradd $(PGUSER) -u 2059 -c "Postgres Monitor" -s /sbin/nologin 33 | chown $(PGUSER) $(RUNDIR) 34 | install -o root -g wheel -m 755 $(TARGET).rc $(RCDIR)/$(TARGET) 35 | @echo "Don't forget to set pgmon_enable in rc.conf" 36 | 37 | install-linux: 38 | rsync -rq --chown=root:root --chmod=o=rwX --chmod=g=rwX --chmod=o=rX --exclude "*.pyc" destination $(LIBDIR) 39 | rsync -rq --chown=root:root --chmod=o=rwX --chmod=g=rwX --chmod=o=rX --exclude "*.pyc" database $(LIBDIR) 40 | install -o root -g root -m 644 $(TARGET).conf.sample $(CNFDIR) 41 | @echo "Config file is in $(CNFDIR)" 42 | install -o root -g root -m 755 main.py $(BINDIR)/$(TARGET) 43 | install -o root -g root -m 644 *.py $(LIBDIR) 44 | useradd -u 2059 -c "Postgres Monitor" $(PGUSER) || echo "User already exists" 45 | chown $(PGUSER) $(RUNDIR) 46 | install -o root -g root -m 644 pgmon.service /lib/systemd/system/ 47 | systemctl enable pgmon.service 48 | 49 | uninstall: 50 | rm -f $(BINDIR)/$(TARGET) 51 | rm -f $(LIBDIR)/*.py* 52 | rm -rf $(LIBDIR) 53 | rm -f $(RCDIR)/$(TARGET) 54 | @echo "Don't forget to remove configuration lines from rc.conf" 55 | 56 | clean: 57 | find ./ -name "*.pyc" -type f -exec rm -f {} \; 58 | rm -f main.py 59 | rm -f Makefile 60 | rm -f $(TARGET).rc 61 | 62 | -------------------------------------------------------------------------------- /database/postgresql.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface for postgres database 3 | """ 4 | 5 | from .base import * 6 | import psycopg2 7 | import time 8 | 9 | class PostgresDatabase(BaseDatabase): 10 | 11 | def __init__(self,config): 12 | self.name = config["Name"] 13 | self.type = config["Type"] 14 | self.host = config["Host"] 15 | self.port = config["Port"] 16 | self.user = config["Username"] 17 | self.password = config["Password"] 18 | self.statement_timeout = 10000 #10 seconds 19 | self._last_query_time = None 20 | 21 | self._dbconn = None 22 | 23 | DEC2FLOAT = psycopg2.extensions.new_type( 24 | psycopg2.extensions.DECIMAL.values, 25 | 'DEC2FLOAT', 26 | lambda value, curs: float(value) if value is not None else None) 27 | psycopg2.extensions.register_type(DEC2FLOAT) 28 | 29 | @property 30 | def is_connected(self): 31 | try: 32 | if self._dbconn.closed == 0: 33 | return True 34 | else: 35 | return False 36 | except AttributeError: 37 | return False 38 | 39 | @property 40 | def query_time(self): 41 | return self._last_query_time 42 | 43 | def connect(self): 44 | try: 45 | self._dbconn = psycopg2.connect(host=self.host,port=self.port,dbname=self.name,user=self.user,password=self.password,options="-c statement_timeout={0}".format(self.statement_timeout)) 46 | self._dbconn.autocommit = False 47 | except psycopg2.OperationalError as e: 48 | raise DBConnectionError(e) 49 | 50 | def disconnect(self): 51 | try: 52 | self._dbconn.rollback() 53 | self._dbconn.close() 54 | except: 55 | pass 56 | 57 | 58 | def query(self,query_string,multi_row=False): 59 | 60 | if self.is_connected is False: 61 | self.connect() 62 | 63 | try: 64 | output = [] 65 | cur = self._dbconn.cursor() 66 | start = time.time() 67 | cur.execute(query_string) 68 | end = time.time() 69 | self._last_query_time = end - start 70 | colnames = [desc[0] for desc in cur.description] 71 | for r, result in enumerate(cur): 72 | output.append(dict(zip(colnames,result))) 73 | 74 | finally: 75 | """ 76 | we are only doing selects and shouldn't be modifying the database 77 | this ensures that, and allows future queries to happen in case one fails 78 | """ 79 | self._dbconn.rollback() 80 | return output 81 | 82 | def load(config): 83 | return PostgresDatabase(config) 84 | -------------------------------------------------------------------------------- /pgmon.conf.sample: -------------------------------------------------------------------------------- 1 | { 2 | "Config": { 3 | "Pgmon": { 4 | "PidFile":"/var/run/pgmon/pgmon.pid", 5 | "User":"pgmon", 6 | "Group":"pgmon", 7 | "CheckInterval":60, 8 | "Daemon":1 9 | }, 10 | "Database": { 11 | "Name":"pgdb", 12 | "Type":"postgresql", 13 | "Host":"localhost", 14 | "Port":5432, 15 | "Username":"pguser", 16 | "Password":"pgpass" 17 | }, 18 | "Destinations": [ 19 | {"Name":"Zabbix", 20 | "Type":"application.zabbix", 21 | "Server":"zabbix-server.example.com", 22 | "Host":"myhostname", 23 | "Port":10051, 24 | "SenderLocation":"/usr/local/bin/zabbix_sender" 25 | }, 26 | {"Name":"WWW", 27 | "Type":"net.http", 28 | "Location":"https://monitoring.server.com/pgmon", 29 | "Verb": "POST", 30 | "PostVariable": "pgmon" 31 | } 32 | ], 33 | "Items": [ 34 | {"Query":"select sum(numbackends) as count from pg_stat_database", 35 | "Destination":"WWW", 36 | "Id":"postgres.process" 37 | }, 38 | {"Query":"select pg_database_size('pgdb') as \"size[pgdb]\"", 39 | "Destination":"Zabbix ", 40 | "Id":"postgres.db" 41 | }, 42 | {"Query":"select sum(xact_commit) committed, sum(xact_rollback) as rollback FROM pg_stat_database", 43 | "Destination":"WWW", 44 | "Id":"postgres.transactions" 45 | }, 46 | {"Query":"select sum(tup_updated) as updated, sum(tup_returned) as returned, sum(tup_inserted) as inserted, sum(tup_fetched) as fetched, sum(tup_deleted) as deleted FROM pg_stat_database", 47 | "Destination":"WWW", 48 | "Id":"postgres.tuples" 49 | }, 50 | {"Query":"select count(*) as count from pg_locks where granted", 51 | "Destination":"WWW", 52 | "Id":"postgres.locks.granted" 53 | }, 54 | {"Query":"select count(*) as count from pg_locks where not granted", 55 | "Destination":"WWW", 56 | "Id":"postgres.locks.not_granted" 57 | }, 58 | {"Query":"select count(1) as count from pg_stat_activity where state = 'active'", 59 | "Destination":"WWW", 60 | "Id":"postgres.active_queries" 61 | }, 62 | {"Query":"select client_addr, extract(epoch from replay_lag) as lag_ms from pg_stat_replication", 63 | "Destination":"Zabbix", 64 | "Id":"postgres.replication_delay", 65 | "MultiRow": "client_addr" 66 | }, 67 | 68 | ] 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /destination/application/zabbix.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for zabbix 3 | """ 4 | 5 | import json 6 | import subprocess 7 | import socket 8 | from datetime import datetime 9 | from tempfile import NamedTemporaryFile 10 | from ..base import BaseDestination 11 | 12 | class ZabbixSendError(Exception): 13 | def __init__(self,popts,data,result): 14 | self.process_args = popts 15 | self.data = data 16 | self.result = result 17 | 18 | def __str__(self): 19 | return "Call to zabbix_sender failed. Command: ({0}) Data: ({1}) Result: ({2})".format(" ".join(self.popts), self.data, self.result) 20 | 21 | class ZabbixItemError(Exception): 22 | #DO SOMETHING HERE 23 | pass 24 | 25 | class ZabbixConfigError(ValueError): 26 | pass 27 | 28 | class ZabbixDestination(BaseDestination): 29 | def __init__(self,jsonobj): 30 | self.results = {} 31 | self.zabbix_host = "" 32 | self.local_host = socket.getfqdn() 33 | self.port = 10051 34 | self.type = "application.zabbix" 35 | self.zabbix_sender = "/usr/bin/zabbix_sender" 36 | self.parse_config(jsonobj) 37 | 38 | def parse_config(self,jsonobj): 39 | if "Name" in jsonobj: 40 | self.name = jsonobj["Name"] 41 | else: 42 | raise ZabbixConfigError('Missing "Name" in zabbix destination config.') 43 | 44 | if "Server" in jsonobj: 45 | self.zabbix_host = jsonobj["Server"] 46 | else: 47 | raise ZabbixConfigError('Missing "Server" from zabbix destination config') 48 | 49 | if "Host" in jsonobj: 50 | self.local_host = jsonobj["Host"] 51 | 52 | if "Port" in jsonobj: 53 | self.port = jsonobj["Port"] 54 | 55 | if "SenderLocation" in jsonobj: 56 | self.zabbix_sender = jsonobj["SenderLocation"] 57 | 58 | def add_result(self,base_id,results,ts): 59 | for result in results: 60 | for (id,value) in result.items(): 61 | try: 62 | for (colname, colval) in value.items(): 63 | final_id = f'{base_id}.{colname}[{id}]' 64 | self.results[final_id] = [colval,ts] 65 | except AttributeError: 66 | final_id = f'{base_id}.{id}' 67 | self.results[final_id] = [value,ts] 68 | 69 | def send(self): 70 | with NamedTemporaryFile() as tmpfile: 71 | for (id,result) in self.results.items(): 72 | tmpfile.write("{0} {1} {2} {3}\n".format(self.local_host, id, result[1], result[0]).encode()) 73 | tmpfile.flush() 74 | self.call_zabbix(tmpfile.name) 75 | self.results = {} 76 | 77 | def call_zabbix(self,tmpfile): 78 | commandops = [self.zabbix_sender 79 | ,'-z',self.zabbix_host 80 | ,'-p',str(self.port) 81 | ,'-s',self.local_host 82 | ,'-T' 83 | ,'-i',tmpfile] 84 | try: 85 | result = subprocess.check_output(commandops, stderr=subprocess.STDOUT) 86 | except subprocess.CalledProcessError as cpe: 87 | if cpe.returncode == 1: 88 | # Failed to Send 89 | raise ZabbixSendError(commandops, cpe.output) 90 | elif cpe.returncode == 2: 91 | # Sent fine, an item wasn't accepted (not configured/bad value/etc) 92 | raise ZabbixItemError("Some items failed to update. Check zabbix logs") 93 | else: 94 | # Unknown Error 95 | raise Exception("Unknown error occurred running zabbix_sender. Code: {0}".format(cpe.returncode)) 96 | 97 | def load(config): 98 | return ZabbixDestination(config) 99 | -------------------------------------------------------------------------------- /items.py: -------------------------------------------------------------------------------- 1 | """ 2 | An item to check 3 | """ 4 | 5 | from datetime import datetime 6 | import database 7 | 8 | 9 | class Item(object): 10 | 11 | def __init__(self,item_dict): 12 | self.id = "" 13 | self.query = "select 1" 14 | self.destinations = [] 15 | self._last_check = datetime.min 16 | self._check_result = None 17 | self._multi_item = False 18 | self._multi_row = False 19 | self.parse_cfg(item_dict) 20 | 21 | def parse_cfg(self,item_dict): 22 | if "Id" in item_dict: 23 | self.id = item_dict["Id"] 24 | else: 25 | raise ValueError("Item must contain an Id") 26 | 27 | if "Query" in item_dict: 28 | self.query = item_dict["Query"] 29 | else: 30 | raise ValueError("Item must contain a Query") 31 | 32 | if "Destination" in item_dict: 33 | """ 34 | Python 2/3 compat 35 | """ 36 | try: 37 | is_string = isinstance(item_dict["Destination"], basestring) 38 | except NameError: 39 | is_string = isinstance(item_dict["Destination"], str) 40 | 41 | if is_string: 42 | self.destinations.append(item_dict["Destination"]) 43 | elif isinstance(item_dict["Destination"], list): 44 | self.destinations = item_dict["Destination"] 45 | else: 46 | raise ValueError("Destination must be a list or a single item") 47 | 48 | if "MultiItem" in item_dict: 49 | if item_dict["MultiItem"] == 1: 50 | self._multi_item = True 51 | else: 52 | self._multi_item = False 53 | 54 | if "MultiRow" in item_dict: 55 | """ 56 | Python 2/3 compat 57 | """ 58 | try: 59 | is_string = isinstance(item_dict["MultiRow"], basestring) 60 | except NameError: 61 | is_string = isinstance(item_dict["MultiRow"],str) 62 | 63 | if (is_string and item_dict["MultiRow"] != "") or (isinstance(item_dict["MultiRow"],int) and item_dict["MultiRow"] > 0): 64 | self._multi_row = item_dict["MultiRow"] 65 | else: 66 | raise ValueError("MultiRow must be either a string column name or integer representing the column to use for item key") 67 | 68 | def parse_results(self,results): 69 | # we need to take a row from the multi_row result, pull out the column that is going to be the id 70 | if self.multi_row: 71 | try: 72 | parsed_results = [] 73 | for row in results: 74 | rowid = row.pop(self._multi_row) 75 | parsed_row = {rowid: {}} 76 | for key,value in row.items(): 77 | parsed_row[rowid].update({key: value}) 78 | parsed_results.append(parsed_row) 79 | return parsed_results 80 | except KeyError as e: 81 | raise Exception(f'Configured MultiRow column {self._multi_row} does not exist in the result set') 82 | else: 83 | return results 84 | 85 | def check(self,db): 86 | self._check_result = self.parse_results(db.query(self.query,self.multi_row)) 87 | self._last_check = datetime.utcnow() 88 | 89 | def result(self): 90 | return self._check_result 91 | 92 | def last_check(self): 93 | return self._last_check 94 | 95 | def multi_item(self): 96 | return self._multi_item 97 | 98 | def multi_row(self): 99 | return self._multi_row 100 | 101 | last_check = property(fget=last_check) 102 | result = property(fget=result) 103 | multi_item = property(fget=multi_item) 104 | multi_row = property(fget=multi_row) 105 | -------------------------------------------------------------------------------- /destination/net/http.py: -------------------------------------------------------------------------------- 1 | """ 2 | Send results to a http endpoint 3 | """ 4 | 5 | from ..base import BaseDestination 6 | import requests 7 | import json 8 | 9 | 10 | class HttpConfigError(Exception): 11 | pass 12 | 13 | 14 | class HttpDestination(BaseDestination): 15 | def __init__(self, config): 16 | super(HttpDestination, self).__init__(config) 17 | self.type = "net.http" 18 | self.location = "http://127.0.0.1" 19 | self.http_verb = "POST" 20 | self.persist_connection = False 21 | self.format = "json" 22 | self._verb_map = {"GET": self._get, "POST": self._post, "PUT": self._put} 23 | self._formatter = {"json": self.json_formatter, "csv":self.csv_formatter} 24 | self.parse_config(config) 25 | 26 | def __verb_is_valid(self, verb): 27 | return verb.upper() in self._verb_map.keys() 28 | 29 | def parse_config(self, config): 30 | if "Name" in config: 31 | self.name = config["Name"] 32 | else: 33 | raise HttpConfigError('Missing "Name" in http destination config.') 34 | 35 | if "Location" in config: 36 | self.location = config["Location"] 37 | else: 38 | raise HttpConfigError('Missing "Location" from http destination config') 39 | 40 | if "Format" in config: 41 | if config["Format"] in self._formatter.keys(): 42 | self.format = config["Format"] 43 | else: 44 | raise HTTPConfigError( 45 | f"Invalid Format of {config['Format']} specified. Must be one of {self._formatter.keys()}") 46 | 47 | if "Verb" in config: 48 | if self.__verb_is_valid(config["Verb"]): 49 | self.http_verb = f'{config["Verb"]}'.upper() 50 | else: 51 | raise HTTPConfigError( 52 | f"Invalid HTTP verb ({config['Verb']}) in config. Must be one of {self._verb_map.keys()}" 53 | ) 54 | 55 | if "PostVariable" in config: 56 | self.parent_key = config["PostVariable"] 57 | 58 | if "Persist" in config: 59 | self.persist_connection = config["Persist"] 60 | 61 | def _get(self,url,params=None): 62 | return requests.get(url,params=params) 63 | 64 | def _post(self,url,params=None): 65 | return requests.post(url,data=params) 66 | 67 | def _put(self,url,params=None): 68 | return requests.put(url,data=params) 69 | 70 | def _delete(self,url): 71 | # no reason to implement this 72 | pass 73 | 74 | def add_result(self, base_id, results, ts): 75 | self.results[base_id] = {'timestamp': ts, 'values': {}} 76 | for result in results: 77 | for (id,value) in result.items(): 78 | self.results[base_id]['values'].update({id: value}) 79 | 80 | def format_data(self,fmt): 81 | return self._formatter[fmt](self.data) 82 | 83 | def json_formatter(self,data): 84 | return json.dumps(data) 85 | 86 | def csv_formatter(self,data): 87 | return f'{f",".join(data.keys())}\n{f",".join(data.items())}' 88 | 89 | def send(self): 90 | if self.has_results is True: 91 | response = self.__make_request() 92 | if response.status_code != requests.codes.ok: 93 | raise HttpDestinationError( 94 | f"Unable to {self.http_verb} to {self.location}. Received {response.status_code} code" 95 | ) 96 | else: 97 | print(response.json()) 98 | 99 | def __make_request(self): 100 | data = {} 101 | if self.parent_key: 102 | data[self.parent_key] = self._formatter[self.format](self.results) 103 | else: 104 | data = self._formatter[self.format](self.results) 105 | return self._verb_map[self.http_verb](self.location, data) 106 | 107 | 108 | def load(config): 109 | return HttpDestination(config) 110 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class for storing configuration data 3 | """ 4 | 5 | from datetime import timedelta 6 | from datetime import datetime 7 | from importlib import import_module 8 | import destination 9 | import database 10 | import items 11 | import json 12 | 13 | class PgmonConfig: 14 | 15 | def __init__(self,cfgfile=None): 16 | self.destinations = dict() 17 | self.items = dict() 18 | self.connections = dict() 19 | self.db = dict() 20 | self.pidfile = "/var/run/pgmon/pgmon.pid" 21 | self.user = "pgmon" 22 | self.group = "pgmon" 23 | self.daemon = True 24 | self.interval = timedelta(seconds=30) 25 | 26 | if cfgfile: 27 | with open(cfgfile,'r') as cf: 28 | cf = open(cfgfile,'r') 29 | self.parse_cfg(json.load(cf)) 30 | 31 | def parse_cfg(self,config): 32 | if "Config" not in config: 33 | raise ValueError("Missing Config section") 34 | else: 35 | config = config["Config"] 36 | 37 | if isinstance(config["Destinations"], list): 38 | for d in config["Destinations"]: 39 | self.add_destination(d) 40 | else: 41 | raise TypeError('"Destinations" section must contain an array/list') 42 | 43 | if isinstance(config["Items"], list): 44 | for item in config["Items"]: 45 | self.add_item(item) 46 | else: 47 | raise TypeError('"Items" section must contain an array/list') 48 | 49 | if isinstance(config["Database"], dict): 50 | self.add_db(config["Database"]) 51 | else: 52 | raise TypeError(' "Database" section must be a dictionary') 53 | 54 | if isinstance(config["Pgmon"], dict): 55 | if "PidFile" in config["Pgmon"]: 56 | self.pidfile = config["Pgmon"]["PidFile"] 57 | 58 | if "User" in config["Pgmon"]: 59 | self.user = config["Pgmon"]["User"] 60 | 61 | if "Group" in config["Pgmon"]: 62 | self.group = config["Pgmon"]["Group"] 63 | 64 | if "Daemon" in config["Pgmon"]: 65 | if config["Pgmon"]["Daemon"] != 1: 66 | self.daemon = False 67 | else: 68 | self.daemon = True 69 | 70 | if "CheckInterval" in config["Pgmon"]: 71 | """ 72 | Python 2/3 compat 73 | """ 74 | try: 75 | is_int = isinstance(config["Pgmon"]["CheckInterval"], (int, long)) 76 | except NameError: 77 | is_int = isinstance(config["Pgmon"]["CheckInterval"], int) 78 | if is_int: 79 | if config["Pgmon"]["CheckInterval"] > 0: 80 | self.interval = timedelta(seconds=int(config["Pgmon"]["CheckInterval"])) 81 | else: 82 | raise ValueError("CheckInterval must be an integer greater than zero") 83 | else: 84 | raise TypeError("interval must be an integer") 85 | else: 86 | raise TypeError('"Pgmon" section must be a dictionary') 87 | 88 | def add_destination(self,dest,overwrite=False): 89 | if dest["Name"] in self.destinations and overwrite == False: 90 | raise ValueError('Destination {0} already exists in configuration.'.format(dest["Name"])) 91 | else: 92 | try: 93 | module = import_module("destination.{0}".format(dest['Type'])) 94 | destcls = getattr(module, "load") 95 | self.destinations[dest["Name"]] = destcls(dest) 96 | except Exception as e: 97 | raise Exception("Unable to add destination: {exc}".format(exc=e)) 98 | 99 | def get_destination(self,name): 100 | if name in self.destinations: 101 | return self.destinations[name] 102 | else: 103 | raise ValueError('Destination {0} does not exist in configuration.'.format(name)) 104 | 105 | def add_item(self,item,overwrite=False): 106 | if item["Id"] in self.items and overwrite == False: 107 | raise ValueError('Item {0} already exists in configuration.'.format(item["Id"])) 108 | 109 | """ 110 | Python 2/3 compat 111 | """ 112 | try: 113 | is_string = isinstance(item["Destination"], basestring) 114 | except NameError: 115 | is_string = isinstance(item["Destination"], str) 116 | 117 | if isinstance(item["Destination"],list): 118 | for dest in item["Destination"]: 119 | if dest not in self.destinations: 120 | raise ValueError('Item references destination that does not exist') 121 | elif is_string: 122 | if item["Destination"] not in self.destinations: 123 | raise ValueError('Item references destination that does not exist') 124 | else: 125 | raise TypeError('Item destination must be a string or an array') 126 | 127 | i = items.Item(item) 128 | self.items[i.id] = i 129 | 130 | def get_item(self,id): 131 | if id in self.items: 132 | return self.items[id] 133 | else: 134 | raise ValueError('Item {0} does not exist in configuration.'.format(id)) 135 | 136 | def add_db(self,db,overwrite=False): 137 | if db["Name"] in self.db and overwrite == False: 138 | raise ValueError('Database {0} already exists in configuration.'.format(db["Name"])) 139 | else: 140 | module = import_module("database.{0}".format(db['Type'])) 141 | dbcls = getattr(module, "load") 142 | self.db = dbcls(db) 143 | 144 | -------------------------------------------------------------------------------- /daemon.py: -------------------------------------------------------------------------------- 1 | """ 2 | Daemon class that manages all the stuff a daemon manages 3 | """ 4 | 5 | import os 6 | import sys 7 | import atexit 8 | import pwd, grp 9 | import resource 10 | import signal 11 | 12 | class Daemon(object): 13 | 14 | def __init__(self,appname="pyDaemon",background=True,chrootdir=None,workdir="/",umask=0,pidfile=None,uid=None,gid=None,signals=None,stdin=None,stdout=None,stderr=None): 15 | self.appname = appname 16 | self.background = background 17 | self.chrootdir = chrootdir 18 | self.workdir = workdir 19 | self.umask = umask 20 | self.pidfile = pidfile 21 | self.signal_map = signals 22 | self.stdin = stdin 23 | self.stdout = stdout 24 | self.stderr = stderr 25 | 26 | if uid is None: 27 | uid = os.getuid() 28 | self.uid = uid 29 | 30 | if gid is None: 31 | gid = os.getgid() 32 | self.gid = gid 33 | 34 | self._is_open = False 35 | 36 | @property 37 | def is_open(self): 38 | return self._is_open 39 | 40 | 41 | def __enter__(self): 42 | self.open() 43 | return self 44 | 45 | def open(self): 46 | 47 | if self.is_open: 48 | return 49 | 50 | if self.chrootdir is not None: 51 | os.chdir(self.chrootdir) 52 | os.chroot(self.chrootdir) 53 | 54 | if self.workdir is not None: 55 | os.chdir(self.workdir) 56 | 57 | self.set_umask() 58 | self.change_process_owner() 59 | 60 | self.map_signals() 61 | 62 | if self.background is True: 63 | self.bg() 64 | self.close_files() 65 | self.redirect(sys.stdin, self.stdin) 66 | self.redirect(sys.stdout, self.stdout) 67 | self.redirect(sys.stderr, self.stderr) 68 | 69 | if self.pidfile is not None: 70 | self.write_pidfile(str(os.getpid())) 71 | 72 | 73 | 74 | self._is_open = True 75 | 76 | self.register_atexit(self.close) 77 | 78 | def __exit__(self, etype, evalue, traceback): 79 | self.close() 80 | 81 | def close(self): 82 | if not self.is_open: 83 | return 84 | 85 | if self.pidfile is not None: 86 | self.remove_pidfile() 87 | self._is_open = False 88 | 89 | def bg(self): 90 | try: 91 | pid = os.fork() 92 | if pid > 0: 93 | os._exit(0) 94 | except OSError as exc: 95 | raise Exception("Unable to fork: {exc}".format(exc=exc)) 96 | exit(1) 97 | 98 | os.setsid() 99 | 100 | try: 101 | pid = os.fork() 102 | if pid > 0: 103 | os._exit(0) 104 | except OSError as exc: 105 | raise Exception("Unable to fork again: {exc}".format(exc=exc)) 106 | exit(1) 107 | 108 | 109 | def set_umask(self): 110 | try: 111 | os.umask(self.umask) 112 | except Exception as e: 113 | raise Exception('Unable to set umask: ({exc})'.format(exc=e)) 114 | 115 | def change_process_owner(self): 116 | try: 117 | os.setgid(self.gid) 118 | os.setuid(self.uid) 119 | except Exception as e: 120 | raise Exception("Unable to change process owner: ({exc})".format(exc=e)) 121 | 122 | def write_pidfile(self,pid): 123 | try: 124 | pidfp = os.open(self.pidfile,os.O_WRONLY|os.O_CREAT|os.O_EXCL,0o644) 125 | os.write(pidfp, pid.encode()) 126 | os.close(pidfp) 127 | except Exception as e: 128 | raise Exception("Unable to create pidfile: {exc}".format(exc=e)) 129 | 130 | def remove_pidfile(self): 131 | try: 132 | os.remove(self.pidfile) 133 | except Exception as e: 134 | raise Exception("Unable to remove pidfile: {exc}".format(exc=e)) 135 | 136 | def close_files(self): 137 | C_MAXFD = 1024 138 | 139 | maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1] 140 | if (maxfd == resource.RLIM_INFINITY): 141 | maxfd = C_MAXFD 142 | 143 | for fd in range(0, maxfd): 144 | try: 145 | os.close(fd) 146 | except OSError: #Ignore files that weren't open 147 | pass 148 | 149 | def map_signals(self): 150 | for (signal_num, handler) in self.signal_map.items(): 151 | if handler is None or handler == "": 152 | handler = signal.SIG_IGN 153 | try: 154 | signal.signal(signal_num, handler) 155 | except RuntimeError as e: 156 | print("Error: Cannot handle signal") 157 | 158 | def redirect(self,srcStream, dstStream): 159 | 160 | if dstStream is None: 161 | if hasattr(os, "devnull"): 162 | null_to = os.devnull 163 | else: 164 | null_to = "/dev/null" 165 | 166 | targetStream = os.open(null_to, os.O_RDWR) 167 | else: 168 | targetStream = dstStream.fileno() 169 | 170 | os.dup2(targetStream, srcStream.fileno()) 171 | 172 | 173 | def register_atexit(self, function): 174 | atexit.register(function) 175 | 176 | @classmethod 177 | def run(self): 178 | raise NotImplementedError 179 | 180 | def get_uid_from_name(name): 181 | pwd_entry = pwd.getpwnam(name) 182 | return pwd_entry.pw_uid 183 | 184 | def get_gid_from_name(name): 185 | grp_entry = grp.getgrnam(name) 186 | return grp_entry.gr_gid 187 | 188 | def get_name_from_pid(pid): 189 | pwd_entry = pwd.getpwuid(uid) 190 | return pwd_entry.pw_name 191 | -------------------------------------------------------------------------------- /pgmon.py: -------------------------------------------------------------------------------- 1 | """ 2 | The main PGMon program 3 | """ 4 | 5 | import os 6 | import sys 7 | import errno 8 | import syslog 9 | import random 10 | from argparse import ArgumentParser 11 | from datetime import datetime 12 | from datetime import timedelta 13 | from calendar import timegm 14 | from time import sleep 15 | from config import PgmonConfig 16 | from database.base import DBConnectionError 17 | from database import * 18 | from daemon import * 19 | 20 | VERSION = "1.0.0" 21 | 22 | class CriticalError(Exception): 23 | """ 24 | Error that cannot be recovered from and needs to exit immediately. 25 | """ 26 | pass 27 | 28 | class PGMon(Daemon): 29 | def __init__(self,configFile="pgmon.conf",foreground=False): 30 | #,appname="pyDaemon",background=True,chrootdir=None,workdir="/",umask=0,pidfile=None,uid=None,gid=None,signals=None,stdin=None,stdout=None,stderr=None 31 | self.appname = "pgmon" 32 | self.lastCheck = datetime.min 33 | self.checkResults = dict() 34 | self.dbConn = None 35 | self.config_file = configFile 36 | self.load_config() 37 | if foreground is True: 38 | self.background = False 39 | self.signal_map = dict({signal.SIGHUP:self.reload,signal.SIGTERM:self.close}) 40 | super(PGMon,self).__init__(appname=self.appname,background=self.background,pidfile=self.pidfile,uid=self.uid,gid=self.gid,signals=self.signal_map) 41 | log_info("pgmon initialized",self.background) 42 | 43 | 44 | def run(self): 45 | ## Run the program 46 | 47 | self.db_connect() 48 | 49 | while True: 50 | try: 51 | self.lastCheck = datetime.utcnow() 52 | if self.do_checks() > 0: 53 | self.send_results() 54 | sleep(((self.last_check + self.interval) - datetime.utcnow()).total_seconds()) 55 | except CriticalError as exc: 56 | raise exc 57 | except DBConnectionError as exc: 58 | self.db_connect() 59 | except Exception as exc: 60 | log_error("{exc}".format(exc=exc),self.background) 61 | 62 | def db_connect(self): 63 | attempts = 0 64 | while self.dbConn.is_connected is False: 65 | try: 66 | attempts+=1 67 | self.dbConn.connect() 68 | log_info("Connected to the database",self.background) 69 | except DBConnectionError as exc: 70 | log_error("Unable to connect to the database: {exc}. Retrying".format(exc=exc),self.background) 71 | sleep(min(300,((2 * attempts) + (random.randint(0, 1000) / 1000.0)))) 72 | 73 | def load_config(self): 74 | ## load the configuration file 75 | try: 76 | config = PgmonConfig(self.config_file) 77 | self.background = config.daemon 78 | self.pidfile = config.pidfile 79 | self.interval = config.interval 80 | self.destinations = config.destinations 81 | self.items = config.items 82 | self.connections = config.connections 83 | self.dbConn = config.db 84 | self.uid = get_uid_from_name(config.user) 85 | self.gid = get_gid_from_name(config.group) 86 | except OSError as exc: 87 | if exc.errno == errno.ENOENT: 88 | raise CriticalError("Configuration file not found at {0}".format(self.config_file)) 89 | else: 90 | raise CriticalError(exc) 91 | except TypeError as exc: 92 | raise CriticalError("Unable to parsse configuration: {exc}".format(exc=exc)) 93 | 94 | 95 | def do_checks(self): 96 | ## run the configured queries and return the results 97 | did_check = 0 98 | for (id,item) in self.items.items(): 99 | if datetime.utcnow() - self.interval > item.last_check: 100 | try: 101 | item.check(self.dbConn) 102 | did_check+=1 103 | """ 104 | Add the results of the item check to each destionation configured. 105 | If the item is "MultiItem", then add the value of each column in the result and append the column name to the base item name 106 | Eg, if you've got an item name of "postgres.transactions" and your multiItem has a column named "committed" the destination will get "postgres.transactions.committed" 107 | """ 108 | for item_destination in item.destinations: 109 | self.destinations[item_destination].add_result(item.id, item.result,timegm(item.last_check.timetuple())) 110 | except DBConnectionError as dbe: 111 | log_error("Db connection lost, attempting to reconnect",self.background) 112 | self.db_connect() 113 | except Exception as e: 114 | ie = Exception('Unable to run check {id}: ({etype}) {exc}'.format(id=id,exc=e,etype=type(e))) 115 | log_error("{exc}".format(exc=ie),self.background) 116 | else: 117 | log_info("Didn't wait long enough. Skipping check {0}".format(item.id),self.background) 118 | self.last_check = datetime.utcnow() 119 | log_info("Ran {0} checks of {1} configured".format(did_check, len(self.items)),self.background) 120 | return did_check 121 | 122 | def send_results(self): 123 | ## send the results of the check to the configured destination 124 | did_send = 0 125 | for (id,dest) in self.destinations.items(): 126 | if dest.has_results is True: 127 | try: 128 | dest.send() 129 | did_send+=1 130 | except Exception as e: 131 | de = Exception('Unable to send results to {id}: {exc}'.format(id=id,exc=e)) 132 | log_error("{exc}".format(exc=de),self.background) 133 | else: 134 | log_info("Destination {0} has no available results, skipping".format(dest.name),self.background) 135 | log_info("Sent items to {0} destinations of {1} configured".format(did_send, len(self.destinations)),self.background) 136 | 137 | def reload(self, etype=None, evalue=None, traceback=None): 138 | log_info("Re-reading configuration file") 139 | self.load_config() 140 | 141 | def close(self, etype=None, evalue=None, traceback=None): 142 | ## close DB 143 | ## terminate subprocesses 144 | self.dbConn.disconnect() 145 | super(PGMon,self).close() 146 | if traceback is not None: 147 | log_info("Exiting with error: {tb}".format(tb=traceback),self.background) 148 | else: 149 | log_info("Exiting") 150 | print("Exiting!") 151 | os._exit(0) 152 | 153 | def init_logging(appname): 154 | syslog.openlog(ident=appname,logoption=syslog.LOG_PID,facility=syslog.LOG_LOCAL0) 155 | 156 | def log_info(message,dosyslog=True): 157 | if dosyslog: 158 | syslog.syslog(syslog.LOG_INFO,message) 159 | else: 160 | print("INFO: {0}".format(message)) 161 | 162 | def log_warn(message,dosyslog=True): 163 | if dosyslog: 164 | syslog.syslog(syslog.LOG_WARNING,message) 165 | else: 166 | print("WARN: {0}".format(message)) 167 | 168 | def log_error(message,dosyslog=True): 169 | if dosyslog: 170 | syslog.syslog(syslog.LOG_ERR,message) 171 | else: 172 | print("ERROR: {0}".format(message)) 173 | 174 | 175 | def setup_args(): 176 | parser = ArgumentParser(description="Monitor database using a persistent connection") 177 | 178 | parser.add_argument ( 179 | "-f", 180 | "--forground", 181 | action = "store_true", 182 | dest = "forground", 183 | help = "Run in the forground and don't daemonize. If specified, this overrides the configuration file", 184 | ) 185 | 186 | parser.add_argument ( 187 | "-c", 188 | "--config", 189 | action = "store", 190 | dest = "config", 191 | default = "pgmon.conf", 192 | help = "The location of the configuration file to load", 193 | ) 194 | 195 | parser.add_argument ( 196 | "-v", 197 | "--version", 198 | action = "version", 199 | version = VERSION 200 | ) 201 | 202 | return parser.parse_args() 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PGMON # 2 | 3 | 4 | `PGMON` is a small daemon written in python that maintains a persistent connection to a postgresql database and periodically queries it for stats/performance data. It can send that data to external systems for monitoring/alerting purposes. By default, it is configured to use zabbix with zabbix_sender to get the data out, but you could write a module to send the data anywere in any format you'd like. It also comes with support for writing the data in json format to a file or via HTTP POST/PUT to a webservice. The queries to run and what to do with the results are stored in a config file so you don't need to rewrite the app just to add new monitoring capabilities. The config file is in a json format so it is easy to edit, parse, and even generate programmatically if you want. 5 | 6 | ## Changes ## 7 | 8 | #### v1.0.0 #### 9 | 1. Initial Public Release 10 | 11 | ## Requirements ## 12 | 13 | * OS: Linux or FreeBSD are the only ones supported at the moment, though anywhere python will run could work in theory 14 | * Python: version 3.4+ works. It also works with v2.7; however, the sha-bang line has been changed to default to python3, if found 15 | * Database: For connecting to postgres (the only supported DB at the moment) psycopg2 is required. 16 | * Destinations: For the zabbix destination, zabbix_sender (usually part of the zabbix agent package on most systems) must be installed. 17 | 18 | ## Install Process ## 19 | 1. Ensure psycopg2 is installed via `pip3 install psycopg2` 20 | 2. Ensure the zabbix_sender program is installed via your OSes preferred package manager 21 | 2. Run the `./configure.sh` script to determine your python path and what OS (Linux or FreeBSD) you're on. It will use os-specific applications to add the 'pgmon' user and group, which is the only reason it cares. 22 | 3. Run `sudo make install` to put the library in the standard place (/usr/local/pgmon/lib) and the main program in the standard place (/usr/local/bin/pgmon) 23 | 4. Create /usr/local/pgmon/etc/pgmon.conf. A pgmon.conf.sample is available in the same dir to get you started 24 | 25 | ## Running Pgmon ## 26 | You can run pgmon from the command line as `pgmon -c /usr/local/pgmon/etc/pgmon.conf`. If you have it configured to switch to another user and you don't have the ability to do that, you'll probably need to run it as a user who does (eg root). 27 | 28 | ### FreeBSD ### 29 | For FreeBSD systems, there is an included rc script. Assuming you've got `pgmon_enabled="YES"` in your rc.conf, you can do `[sudo] service pgmon start`. 30 | 31 | ### Linux ### 32 | On linux there is a systemd unit file. You can do `[sudo] systemctl start pgmon` 33 | 34 | ### Docker ### 35 | You need to mount the directory where the configuration file lives. Otherwise, its pretty much just a standard docker run 36 | `docker run -d -h $(hostname) --name=pgmon --net=host --mount 'type=bind,src=/usr/local/flightaware/etc/pgmon/pgmon_.conf,dst=/usr/local/pgmon/etc/pgmon.conf,readonly' pgmon` 37 | 38 | ## Config File Format ## 39 | 40 | The config file is a json object with a top level name of "Config". Beneath that are four required sections with a number of sub-elements that may or may not be required. If they've got a (Default *), then they aren't required, otherwise, they are. 41 | 42 | 1. Pgmon - Configures the application itself 43 | a. PidFile - Location of the pidfile (Default: /var/run/pgmon/pgmon.pid) 44 | b. User - Username to run pgmon as (Default: pgmon) 45 | c. Group - Group to run pgmon as (Default: pgmon) 46 | d. CheckInterval - Period of time (in seconds) to wait before checking again (Default: 60) 47 | e. Daemon - Whether or not to daemonize. 1 means yes, 2 means no (Default 1) 48 | 2. Database - Parameters for the database you want to monitor 49 | a. Name - Name of the database you want to monitor 50 | b. Type - The type of database you want to monitor. Currently, the only valid option is "Postgres" 51 | c. Host - The hostname/IP of the database you want to monitor 52 | d. Port - The port to connect to 53 | e. Username - The database user to connect as 54 | f. Password - the password to use for the connection 55 | 3. Destinations - Where to send the results. 56 | a. Name - Something descriptive to identify the destination. Can be any valid string, but you've got to reference it in the item section, so its probably best to not go too crazy 57 | b. Type - What kind of destination you want to use. This corresponds to the package name in the `destinations` package. 58 | c. - Each Type may have 1 more more options that can go here. View the section for the type to learn more 59 | 4. Items - The specific queries you want to run 60 | a. Query - The sql query to run. Note that only the first row is returned, so make sure the data you want is in that first row 61 | b. Destination - The name of the destination to send the results to. Can also be an array of destination names. 62 | c. Id - An identifier for this item. Must be unique in the configuration. Will be combined with the column name and sent along with the value to the destination 63 | d. MultiRow - Whether or not this query contains multiple rows of values. Must be set to the name of the column whose value will be used to build the item key in combination with `Id` 64 | 65 | ### Example Config: ### 66 | ``` 67 | { 68 | "Config": { 69 | "Pgmon": { 70 | "PidFile":"/var/run/pgmon.pid", 71 | "User":"pgmon", 72 | "Group":"pgmon", 73 | "CheckInterval":60, 74 | "Daemon":1 75 | }, 76 | "Database": { 77 | "Name":"pgdata", 78 | "Type":"Postgres", 79 | "Host":"db.domain.com", 80 | "Port":5432, 81 | "Username":"pgsql", 82 | "Password":"mypass"} 83 | "Destinations": [ 84 | {"Name":"Zabbix", 85 | "Type":"application.zabbix", 86 | "Host":"zabbix.domain.com", 87 | "Port":10051 88 | }, 89 | {"Name":"www", 90 | "Type":"net.http", 91 | "Location":"https://domain.com/pgmon_endpoint", 92 | "PostVariable":"pgmon", 93 | "Format":"json" 94 | } 95 | ], 96 | "Items": [ 97 | {"Query":"SELECT count(1) as count from table", 98 | "Destination":"Zabbix", 99 | "Id":"postgres.table" 100 | }, 101 | {"Query":"SELECT sum(bytes) as total_bytes from stats_table", 102 | "Destination":"www", 103 | "Id":"postgres.stats_table" 104 | }, 105 | {"Query":"SELECT count(1) as num_items from table2", 106 | "Destination":["Zabbix","www"], 107 | "Id":"postgres.table2" 108 | }, 109 | {"Query":"select sum(xact_commit) as committed, sum(xact_rollback) as rollback from pg_stat_database", 110 | "Destination":"Zabbix", 111 | "Id":"postgres.transactions" 112 | }, 113 | {"Query":"select sum(tup_updated) as updated, sum(tup_returned) as returned, sum(tup_inserted) as inserted, sum(tup_fetched) as fetched, sum(tup_deleted) as deleted FROM pg_stat_database", 114 | "Destination":"www", 115 | "Id":"postgres.tuples" 116 | }, 117 | {"Query":"select client_addr, extract(epoch from replay_lag) as lag_ms from pg_stat_replication", 118 | "Destination":"www", 119 | "Id":"postgres.replication_delay", 120 | "MultiRow": "client_addr" 121 | } 122 | ] 123 | } 124 | } 125 | ``` 126 | 127 | 128 | ##Item identification## 129 | 130 | `PGMON` will combine with item Id with the column names so that each value can be uniquely identified. In the example configuration above, we are aliasing some of the computed columns to give them a more friendly name. If you don't do this, you may end up with wonky item Ids: 131 | `postgres.tuples.sum(tup_updated)` 132 | 133 | If you specify an item as a `MultiRow` item, you need to identify a column that will be used to construct the id so each row can be uniquely identified. See the `MultiRow` section below for more details. 134 | 135 | ## MultiRow ## 136 | 137 | MultiRow allows you to perform a single query and get results across multiple rows. This is useful if you have a table that holds stats for multiple things of the same kind. For example, if you wanted to get the replication latency for SR backends from pg_stat_replication. 138 | 139 | You set the configuration of MultiItem to be the name of the column to use for building the item key. `PGMON` will build the key by combining the `Id` configuration item with the value in the specified column for each row. So if you have a table that looks like this: 140 | ``` 141 | username | login_count 142 | ------------------+---------------- 143 | bob | 10 144 | sally | 11 145 | jane | 3 146 | ``` 147 | 148 | and the configuration has 149 | 150 | ``` 151 | { "Query": "select username, login_count from table", 152 | "Destination": "MyDest", 153 | "Id": "login.count", 154 | "MultiItem": "username" 155 | } 156 | ``` 157 | 158 | Then your item keys will end up like `login.count.bob`, `login.count.sally`, etc etc. 159 | 160 | Note that how these keys get built when sending to a specific destination may be different as, for example, zabbix will require values in a different format than the http module. 161 | 162 | ## Destinations ## 163 | Info about the available destinations. 164 | 165 | ### application.zabbix ### 166 | This destination uses zabbix_sender to send items to a zabbix server. The items must be configured as a type "Zabbix Trapper". `PGMON` will send the configured item Id as the zabbix item id, a unix timestamp when the value was collected, and the raw value itself. In the destination config you can chose the hostname the checks are for so you don't have to run `PGMON` on the host you are monitoring. 167 | 168 | #### Configuration Items #### 169 | 1. Server - The hostname of the zabbix server to send the checks to 170 | 2. Host - The name of the host the checks are for. Must match what is configured in zabbix. (Default: local host's fqdn) 171 | 3. Port - The port on the zabbix server to connect to for sending zabbix trapper items (Default: 10051) 172 | 4. SenderLocation - The path to the `zabbix_sender` binary. (Default: /usr/bin/zabbix_sender) 173 | 174 | ### net.http ### 175 | This destination will send an HTTP GET/POST to a web endpoint. The data can be formatted in a number of ways (extensible), but the default is json 176 | 177 | #### Configuration Items #### 178 | 1. Location - HTTP(S) URL that will receive the data 179 | 2. Format - How the submitted data should be formatted. Currently valid options are `json` or `csv` (Default: json) 180 | 3. Verb - The HTTP verb to use. Eg GET or POST (Default: POST) 181 | 4. PostVariable - The GET or POST variable to set all the data to 182 | 5. Persist - Whether to keep the connection persistently, or reconnect for each delivery. (Default: false) 183 | 184 | ### file.jsonfile ### 185 | This destination outputs a file in json format containing the item id, a unix timestamp from when the value was collected, and the raw value itself. 186 | 187 | #### Configuration Items #### 188 | 1. Location - The path and filename for where to write the results to. Must be writable by the user `PGMON` runs as, obviously 189 | 2. Append - Whether or not to append the latest values to the file or overwrite. 1 will append, 0 will not. (Default: 0) 190 | 191 | --------------------------------------------------------------------------------