├── etc
    └── zabbix
    │   ├── scripts
    │       ├── postgresql
    │       │   ├── pgsql.ping.time.sql
    │       │   ├── pgsql.replication.recovery_role.sql
    │       │   ├── pgsql.connections.prepared.sql
    │       │   ├── pgsql.uptime.sql
    │       │   ├── pgsql.cache.hit.sql
    │       │   ├── pgsql.discovery.db.sql
    │       │   ├── pgsql.scans.sql
    │       │   ├── pgsql.dbstat.sql
    │       │   ├── pgsql.config.hash.sql
    │       │   ├── pgsql.bgwriter.sql
    │       │   ├── pgsql.frozenxid.sql
    │       │   ├── pgsql.replication.status.sql
    │       │   ├── pgsql.dbstat.sum.sql
    │       │   ├── pgsql.wal.stat.sql
    │       │   ├── pgsql.replication.lag.sql
    │       │   ├── pgsql.locks.sql
    │       │   ├── pgsql.connections.sum.sql
    │       │   ├── pgsql.connections.sql
    │       │   ├── pgsql.transactions.sql
    │       │   └── pgsql.query.time.sql
    │       ├── zabbix_files.sh
    │       ├── script_version.sh
    │       ├── kontena_grid.sh
    │       ├── process.sh
    │       ├── db2snapshot.pl
    │       ├── discover_subprocess.sh
    │       ├── discover_certificates.py
    │       ├── db2stat.pl
    │       ├── docker_swarm.py
    │       ├── check_certificate.py
    │       ├── zabbix_sender_psk.py
    │       ├── docker.sh
    │       ├── kubernetes_monitoring.py
    │       └── pacemaker.py
    │   └── zabbix_agentd.d
    │       ├── pacemaker.conf
    │       ├── script_version.conf
    │       ├── zabbix_files.conf
    │       ├── kontena_grid.conf
    │       ├── docker_swarm.conf
    │       ├── mysql.conf
    │       ├── process.conf
    │       ├── docker.conf
    │       ├── certificates.conf
    │       ├── kubernetes_monitoring.conf
    │       ├── postgresql_monitoring.conf
    │       └── galera.conf
├── custom
    ├── conf
    │   ├── elastizabbix.conf
    │   ├── fileTimestamp.conf
    │   ├── curl.conf
    │   ├── alfresco.conf
    │   ├── zapache.conf
    │   ├── nginx_monitoring.conf
    │   └── discover_apache.conf
    └── scripts
    │   ├── curl.sh
    │   ├── alfresco-pdf.sh
    │   ├── alfresco-pdfa.sh
    │   ├── discover_apache-backends.sh
    │   ├── discover_responsecodes.sh
    │   ├── fileTimestamp.vbs
    │   ├── elastizabbix.py
    │   └── zapache
├── documentation
    ├── docker.png
    ├── process.png
    ├── certificates.png
    ├── kubernetes_monitoring
    │   ├── csr.yml
    │   ├── config
    │   └── access.yml
    ├── kontena_grid.md
    ├── process.md
    ├── docker_swarm.md
    ├── pacemaker.md
    ├── certificates.md
    ├── mysql-galera.md
    ├── db2stat.md
    ├── docker.md
    ├── db2stat-testing.md
    └── kubernetes_monitoring.md
├── README.md
├── opt
    └── cron
    │   └── docker_stats.sh
└── templates
    ├── process.xml
    ├── docker_trapper.xml
    ├── process_active.xml
    ├── pacemaker.xml
    └── pacemaker_active.xml


/etc/zabbix/scripts/postgresql/pgsql.ping.time.sql:
--------------------------------------------------------------------------------
1 | \timing
2 | SELECT 1;
3 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/zabbix_files.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Version: 1.0
3 | ls -la $1
4 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.replication.recovery_role.sql:
--------------------------------------------------------------------------------
1 | SELECT pg_is_in_recovery()::int
2 | 


--------------------------------------------------------------------------------
/custom/conf/elastizabbix.conf:
--------------------------------------------------------------------------------
1 | UserParameter=elastizabbix[*],/etc/zabbix/scripts/elastizabbix.py $1 $2 $3
2 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.connections.prepared.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*)
2 | FROM pg_prepared_xacts
3 | 


--------------------------------------------------------------------------------
/custom/conf/fileTimestamp.conf:
--------------------------------------------------------------------------------
1 | UserParameter=fileTimestamp[*],cscript c:\zabbix\scripts\fileTimestamp.vbs //nologo $1


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.uptime.sql:
--------------------------------------------------------------------------------
1 | SELECT date_part('epoch', now() - pg_postmaster_start_time())::int
2 | 


--------------------------------------------------------------------------------
/documentation/docker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digiapulssi/zabbix-monitoring-scripts/HEAD/documentation/docker.png


--------------------------------------------------------------------------------
/documentation/process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digiapulssi/zabbix-monitoring-scripts/HEAD/documentation/process.png


--------------------------------------------------------------------------------
/custom/conf/curl.conf:
--------------------------------------------------------------------------------
1 | #First parameter is header. Second is URL
2 | UserParameter=curl[*],/etc/zabbix/scripts/curl.sh "$1" "$2"
3 | 


--------------------------------------------------------------------------------
/documentation/certificates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/digiapulssi/zabbix-monitoring-scripts/HEAD/documentation/certificates.png


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.cache.hit.sql:
--------------------------------------------------------------------------------
1 | SELECT round(sum(blks_hit)*100/sum(blks_hit+blks_read), 2)
2 | FROM pg_stat_database
3 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/pacemaker.conf:
--------------------------------------------------------------------------------
1 | UserParameter=pacemaker.status[*],/etc/zabbix/scripts/pacemaker.py $1 $2 $3 $4 $5 $6 $7 $8
2 | 
3 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/script_version.conf:
--------------------------------------------------------------------------------
1 | UserParameter=script.version[*],/etc/zabbix/scripts/script_version.sh /etc/zabbix/scripts/
2 | 


--------------------------------------------------------------------------------
/custom/scripts/curl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Description:
 5 | #Silmple curl wiht one header
 6 | 
 7 | set -e
 8 | curl -s -H "$1" "$2"
 9 | 
10 | 


--------------------------------------------------------------------------------
/custom/conf/alfresco.conf:
--------------------------------------------------------------------------------
1 | UserParameter=alfresco.pdf[*],/etc/zabbix/scripts/alfresco-pdf.sh $1 $2 $3
2 | UserParameter=alfresco.pdfa[*],/etc/zabbix/scripts/alfresco-pdfa.sh $1 $2 $3
3 | 


--------------------------------------------------------------------------------
/custom/conf/zapache.conf:
--------------------------------------------------------------------------------
1 | #
2 | # This is a sample zabbix_agentd config file.
3 | # Edit to your needs.
4 | #
5 | UserParameter=zapache[*],/etc/zabbix/scripts/zapache "$1" "$2" "$3"
6 | 


--------------------------------------------------------------------------------
/custom/conf/nginx_monitoring.conf:
--------------------------------------------------------------------------------
1 | # ~> Zabbix3.4
2 | UserParameter=nginx.json[*],curl -s 'http://$1:$2/nginx_status' | tr -d a-zA-Z\\n | tr ' :' ',' | sed -e s/',,*'/,/g -e s/'^,'/'{"nginx":['/g -e s/',$'/']}'/g
3 | 


--------------------------------------------------------------------------------
/custom/conf/discover_apache.conf:
--------------------------------------------------------------------------------
1 | UserParameter=discover.backends[*],/etc/zabbix/scripts/discover_apache-backends.sh "$1"
2 | UserParameter=discover.responsecodes[*],/etc/zabbix/scripts/discover_responsecodes.sh "$1"
3 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/zabbix_files.conf:
--------------------------------------------------------------------------------
1 | UserParameter=agent.scripts[*],/etc/zabbix/scripts/zabbix_files.sh /etc/zabbix/scripts
2 | UserParameter=agent.confs[*],/etc/zabbix/scripts/zabbix_files.sh /etc/zabbix/zabbix_agentd.d
3 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/script_version.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Version: 1.0
3 | set -e
4 | 
5 | echo -n '{"data":['
6 | # format to json with sed
7 | ls $1 | sed 's/\(.*\)/{"{#SCRIPT}":"\1"}/g' | sed '$!s/$/,/' | tr '\n' ' '
8 | echo -n ']}'
9 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/kontena_grid.conf:
--------------------------------------------------------------------------------
1 | UserParameter=kontena.grid.discover_nodes[*],/etc/zabbix/scripts/kontena_grid.sh discover "$1" "$2" "$3"
2 | UserParameter=kontena.grid.node.connected[*],/etc/zabbix/scripts/kontena_grid.sh stat "$1" "$2" "$3" "$4" connected
3 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.discovery.db.sql:
--------------------------------------------------------------------------------
 1 | WITH T AS (
 2 | 	SELECT
 3 | 		datname AS "{#DBNAME}"
 4 | 	FROM pg_database
 5 | 	WHERE
 6 | 		NOT datistemplate
 7 | 		AND datname != 'postgres'
 8 | )
 9 | SELECT '{"data":'|| regexp_replace(coalesce(json_agg(T), '[]'::json)::text, E'[\\n\\r\\s]+', '', 'g') || '}'
10 | FROM T
11 | 


--------------------------------------------------------------------------------
/documentation/kubernetes_monitoring/csr.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: certificates.k8s.io/v1beta1
 2 | kind: CertificateSigningRequest
 3 | metadata:
 4 |   name: zabbix
 5 | spec:
 6 |   groups:
 7 |   - system:authenticated
 8 |   request: <base_64_encoded_csr>
 9 |   signerName: kubernetes.io/kube-apiserver-client
10 |   usages:
11 |   - client auth
12 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.scans.sql:
--------------------------------------------------------------------------------
 1 | WITH T AS (
 2 | 	SELECT
 3 | 		sum(CASE WHEN relkind IN ('r', 't', 'm') THEN pg_stat_get_numscans(oid) END) seq,
 4 | 		sum(CASE WHEN relkind = 'i' THEN pg_stat_get_numscans(oid) END) idx
 5 | 	FROM pg_class
 6 | 	WHERE relkind IN ('r', 't', 'm', 'i')
 7 | )
 8 | SELECT row_to_json(T)
 9 | FROM T
10 | 


--------------------------------------------------------------------------------
/custom/scripts/alfresco-pdf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Takes IP and port as an argument.
3 | # Script gets content from given url. Returns lines only with "SUMMARY".
4 | set -e
5 | url='http://'${1}':'${2}'/alfresco/s/enterprise/admin/admin-testtransform-test?operation=getTransformationStatistics&arg1=&arg2=&arg3=pdf'
6 | header=${3}
7 | curl -v -s -H $header $url --stderr - | grep SUMMARY
8 | 


--------------------------------------------------------------------------------
/custom/scripts/alfresco-pdfa.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Takes IP and port as an argument.
3 | # Script gets content from given url. Returns lines only with "SUMMARY".
4 | set -e
5 | url='http://'${1}':'${2}'/alfresco/s/enterprise/admin/admin-testtransform-test?operation=getTransformationStatistics&arg1=&arg2=&arg3=pdfa'
6 | header=${3}
7 | curl -v -s -H $header $url --stderr - | grep SUMMARY
8 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.dbstat.sql:
--------------------------------------------------------------------------------
 1 | SELECT json_object_agg(datname, row_to_json(T)) FROM (
 2 | 	SELECT datname,
 3 | 			numbackends,
 4 | 			xact_commit,
 5 | 			xact_rollback,
 6 | 			blks_read,
 7 | 			blks_hit,
 8 | 			tup_returned,
 9 | 			tup_fetched,
10 | 			tup_inserted,
11 | 			tup_updated,
12 | 			tup_deleted,
13 | 			conflicts,
14 | 			temp_files,
15 | 			temp_bytes,
16 | 			deadlocks
17 | 	FROM pg_stat_database
18 | 	WHERE datname IS NOT NULL) T
19 | 


--------------------------------------------------------------------------------
/documentation/kontena_grid.md:
--------------------------------------------------------------------------------
 1 | # Kontena Grid Monitoring
 2 | 
 3 | Monitor Kontena grid nodes.
 4 | 
 5 | Requirements:
 6 | - jq
 7 | - curl
 8 | 
 9 | ## Usage
10 | 
11 | Item Syntax | Description | Units |
12 | ----------- | ----------- | ----- |
13 | kontena.grid.discover[<MASTER_ADDRESS>, <ACCESS_TOKEN>, <GRID_NAME>] | Discover nodes in Kontena grid | |
14 | kontena.grid.node.connected[<MASTER_ADDRESS>, <ACCESS_TOKEN>, <GRID_NAME>, {#NODE}] | Node connection status | true/false |
15 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.config.hash.sql:
--------------------------------------------------------------------------------
 1 | SELECT md5(
 2 | 	json_build_object(
 3 | 		'extensions', (
 4 | 			SELECT array_agg(extname) FROM (
 5 | 				SELECT extname
 6 | 				FROM pg_extension
 7 | 				ORDER BY extname
 8 | 			) AS e
 9 | 		),
10 | 		'settings', (
11 | 			SELECT json_object(array_agg(name), array_agg(setting)) FROM (
12 | 				SELECT name, setting
13 | 				FROM pg_settings
14 | 				WHERE name != 'application_name'
15 | 				ORDER BY name
16 | 			) AS s
17 | 		)
18 | 	)::text);
19 | 


--------------------------------------------------------------------------------
/documentation/kubernetes_monitoring/config:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | clusters:
 3 | - cluster:
 4 |     certificate-authority: /path/to/ca.crt
 5 |     server: https://127.0.0.1:8443
 6 |   name: <cluster_name>
 7 | contexts:
 8 | - context:
 9 |     cluster: <cluster_name>
10 |     user: <user_name>
11 |   name: <context_name>
12 | current-context: <context_name>
13 | kind: Config
14 | preferences: {}
15 | users:
16 | - name: <user_name>
17 |   user:
18 |     client-certificate: /path/to/client.crt
19 |     client-key: /path/to/client.key
20 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/docker_swarm.conf:
--------------------------------------------------------------------------------
1 | UserParameter=docker.swarm.discover.services[*],/etc/zabbix/scripts/docker_swarm.py "discovery"
2 | 
3 | # Metric retrievals for Zabbix 4.0 compatibility. Use dependent discoveries on Zabbix 4.2+
4 | UserParameter=docker.swarm.hostname[*],/etc/zabbix/scripts/docker_swarm.py "hostname" --service "$1"
5 | UserParameter=docker.swarm.status[*],/etc/zabbix/scripts/docker_swarm.py "status" --service "$1"
6 | UserParameter=docker.swarm.uptime[*],/etc/zabbix/scripts/docker_swarm.py "uptime" --service "$1"
7 | 


--------------------------------------------------------------------------------
/documentation/kubernetes_monitoring/access.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRole
 3 | metadata:
 4 |   name: zabbix-role
 5 | rules:
 6 |   - apiGroups: ["batch"]
 7 |     resources: ["pods", "nodes", "services", "jobs"]
 8 |     verbs: ["get", "list"]
 9 | ---
10 | apiVersion: rbac.authorization.k8s.io/v1
11 | kind: ClusterRoleBinding
12 | metadata:
13 |   name: zabbix-role
14 | subjects:
15 |   - kind: User
16 |     name: zabbix
17 | roleRef:
18 |   kind: ClusterRole
19 |   name: zabbix-role
20 |   apiGroup: rbac.authorization.k8s.io
21 | 


--------------------------------------------------------------------------------
/custom/scripts/discover_apache-backends.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Version: 1.0
 3 | 
 4 | # This script takes path to apache configuration folder as an argument and reads lines only with 'ProxyPass' or 'Location'.
 5 | # It takes all backends between first and second slash and prints them out as json format.
 6 | 
 7 | set -e
 8 | 
 9 | echo -n '{"data":['
10 | 
11 | # Removes duplicates with awk and formats to json with sed
12 | grep -r 'ProxyPass\|Location' $1 | grep -Po '(?<=[[:blank:]])\/[^\/ \s]*' | awk '!a[$0]++' | sed 's/\(.*\)/{"{#URI}":"\1"}/g' | sed '$!s/$/,/' | tr '\n' ' '
13 | echo -n ']}'
14 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.bgwriter.sql:
--------------------------------------------------------------------------------
 1 | SELECT row_to_json(T)
 2 | FROM
 3 | 	(SELECT checkpoints_timed,
 4 | 			checkpoints_req,
 5 | 			checkpoint_write_time,
 6 | 			checkpoint_sync_time,
 7 | 			current_setting('block_size')::int*buffers_checkpoint AS buffers_checkpoint,
 8 | 			current_setting('block_size')::int*buffers_clean AS buffers_clean,
 9 | 			maxwritten_clean,
10 | 			current_setting('block_size')::int*buffers_backend AS buffers_backend,
11 | 			buffers_backend_fsync,
12 | 			current_setting('block_size')::int*buffers_alloc AS buffers_alloc
13 | 	FROM pg_stat_bgwriter) T
14 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.frozenxid.sql:
--------------------------------------------------------------------------------
 1 | WITH T AS (
 2 | 	SELECT
 3 | 		age(relfrozenxid),
 4 | 		current_setting('autovacuum_freeze_max_age')::integer fma
 5 | 	FROM pg_class
 6 | 	WHERE relkind IN ('r', 't'))
 7 | SELECT row_to_json(T2)
 8 | FROM (
 9 | 	SELECT extract(epoch FROM now())::integer ts,
10 | 	(
11 | 		SELECT ((1 - max(age)::double precision / current_setting('autovacuum_freeze_max_age')::integer) * 100)::numeric(9,6)
12 | 		FROM T
13 | 		WHERE age < fma
14 | 	) prc_before_av,
15 | 	(
16 | 		SELECT ((1 - max(age)::double precision / -((1 << 31) + 1)) * 100)::numeric(9,6)
17 | 		FROM T
18 | 	) prc_before_stop
19 | ) T2
20 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.replication.status.sql:
--------------------------------------------------------------------------------
 1 | DO LANGUAGE plpgsql $$
 2 | DECLARE
 3 | 	ver integer;
 4 | 	res text := 2;
 5 | BEGIN
 6 | 	SELECT current_setting('server_version_num') INTO ver;
 7 | 
 8 | 	IF (SELECT pg_is_in_recovery()) THEN
 9 | 		IF (ver >= 90600) THEN
10 | 			SELECT * INTO res from (
11 | 				SELECT COUNT(*) FROM pg_stat_wal_receiver
12 | 				) T;
13 | 		ELSE
14 | 			res := 'ZBX_NOTSUPPORTED: Requires PostgreSQL version 9.6 or higher';
15 | 		END IF;
16 | 	END IF;
17 | 
18 | 	perform set_config('zbx_tmp.repl_status_res', res, false);
19 | END $$;
20 | 
21 | SELECT current_setting('zbx_tmp.repl_status_res');
22 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/kontena_grid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | kontena_api_v1() {
 5 |   RESPONSE=$(curl -k -s \
 6 |     -H "Authorization: Bearer $AUTH_TOKEN" \
 7 |   	-H "Accept: application/json" \
 8 |   	"$1")
 9 | 
10 |   echo $RESPONSE
11 | }
12 | 
13 | CMD=$1
14 | MASTER_ADDRESS=$2
15 | AUTH_TOKEN=$3
16 | GRID=$4
17 | 
18 | if [ "$CMD" == "discover" ]; then
19 |   kontena_api_v1 https://$MASTER_ADDRESS/v1/grids/$GRID/nodes | jq '.nodes | map({"{#NODE}": .name}) | { "data": . }'
20 | elif [ "$CMD" == "stat" ]; then
21 |   NODE=$5
22 |   STAT=$6
23 |   kontena_api_v1 https://$MASTER_ADDRESS/v1/nodes/$GRID/$NODE | jq '.'$STAT
24 | fi
25 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.dbstat.sum.sql:
--------------------------------------------------------------------------------
 1 | SELECT row_to_json(T) from (
 2 | 	SELECT sum(numbackends) AS numbackends,
 3 | 			sum(xact_commit) AS xact_commit,
 4 | 			sum(xact_rollback) AS xact_rollback,
 5 | 			sum(blks_read) AS blks_read,
 6 | 			sum(blks_hit) AS blks_hit,
 7 | 			sum(tup_returned) AS tup_returned,
 8 | 			sum(tup_fetched) AS tup_fetched,
 9 | 			sum(tup_inserted) AS tup_inserted,
10 | 			sum(tup_updated) AS tup_updated,
11 | 			sum(tup_deleted) AS tup_deleted,
12 | 			sum(conflicts) AS conflicts,
13 | 			sum(temp_files) AS temp_files,
14 | 			sum(temp_bytes) AS temp_bytes,
15 | 			sum(deadlocks) AS deadlocks
16 | 	FROM pg_stat_database) T
17 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/mysql.conf:
--------------------------------------------------------------------------------
 1 | #copied from https://github.com/zabbix/zabbix/blob/master/conf/zabbix_agentd/userparameter_mysql.conf
 2 | 
 3 | UserParameter=mysql.ping, HOME=/var/lib/zabbix mysqladmin ping
 4 | UserParameter=mysql.get_status_variables, HOME=/var/lib/zabbix mysql -sNX -e "show global status"
 5 | UserParameter=mysql.version, HOME=/var/lib/zabbix mysqladmin -s version
 6 | UserParameter=mysql.db.discovery, HOME=/var/lib/zabbix mysql -sN -e "show databases"
 7 | UserParameter=mysql.db.size[*], HOME=/var/lib/zabbix mysql -sN -e "SELECT SUM(DATA_LENGTH + INDEX_LENGTH) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='$1'"
 8 | UserParameter=mysql.replication.discovery, HOME=/var/lib/zabbix mysql -sNX -e "show slave status"
 9 | UserParameter=mysql.slave_status, HOME=/var/lib/zabbix mysql -sNX -e "show slave status"
10 | 


--------------------------------------------------------------------------------
/documentation/process.md:
--------------------------------------------------------------------------------
 1 | # Process Monitoring
 2 | 
 3 | Discover running processes in Unix/Linux/AIX environments.
 4 | 
 5 | Discovery uses the process name in /proc/pid/status that's truncated to 15 characters
 6 | because it's the most reliable name used by Zabbix process monitoring items (see https://www.zabbix.com/documentation/3.0/manual/appendix/items/proc_mem_num_notes)
 7 | 
 8 | ## Usage
 9 | 
10 | Item Syntax | Description | Units |
11 | ----------- | ----------- | ----- |
12 | discover.processes | Discover all processes | Provides the following template variables: {#COMMAND} |
13 | proc.cpu.util["{#COMMAND}"] | Process CPU utilization | % |
14 | proc.mem["{#COMMAND}"] | Process memory usage | bytes |
15 | proc.num["{#COMMAND}"] | Number of processes with the same command | (number) |
16 | 
17 | ## Example
18 | 
19 | ![Screenshot](process.png)
20 | 
21 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.wal.stat.sql:
--------------------------------------------------------------------------------
 1 | DO LANGUAGE plpgsql $$
 2 | DECLARE
 3 | 	ver integer;
 4 | 	res text := '{"write":0,"count":0}';
 5 | BEGIN
 6 | 	SELECT current_setting('server_version_num') INTO ver;
 7 | 
 8 | 	IF (SELECT NOT pg_is_in_recovery()) THEN
 9 | 		IF (ver >= 100000) THEN
10 | 			SELECT row_to_json(T) INTO res FROM (
11 | 				SELECT pg_wal_lsn_diff(pg_current_wal_lsn(),'0/00000000') AS WRITE,
12 | 				count(*) FROM pg_ls_waldir() AS COUNT
13 | 				) T;
14 | 
15 | 		ELSE
16 | 			SELECT row_to_json(T) INTO res FROM (
17 | 				SELECT pg_xlog_location_diff(pg_current_xlog_location(),'0/00000000') AS WRITE,
18 | 				count(*) FROM pg_ls_dir('pg_xlog') AS COUNT
19 | 				) T;
20 | 		END IF;
21 | 	END IF;
22 | 
23 | 	perform set_config('zbx_tmp.wal_json_res', res, false);
24 | END $$;
25 | 
26 | select current_setting('zbx_tmp.wal_json_res');
27 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.replication.lag.sql:
--------------------------------------------------------------------------------
 1 | DO LANGUAGE plpgsql $$
 2 | DECLARE
 3 | 	ver integer;
 4 | 	res text;
 5 | BEGIN
 6 | 	SELECT current_setting('server_version_num') INTO ver;
 7 | 
 8 | 	IF (ver >= 100000) THEN
 9 | 		SELECT * INTO res from (
10 | 			SELECT
11 | 				CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()
12 | 					THEN 0
13 | 					ELSE COALESCE(EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp())::integer, 0)
14 | 				END
15 | 			) T;
16 | 
17 | 	ELSE
18 | 		SELECT * INTO res from (
19 | 			SELECT
20 | 				CASE WHEN pg_last_xlog_receive_location() = pg_last_xlog_replay_location()
21 | 					THEN 0
22 | 					ELSE COALESCE(EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp())::integer, 0)
23 | 				END
24 | 			) T;
25 | 	END IF;
26 | 
27 | 	perform set_config('zbx_tmp.repl_lag_res', res, false);
28 | END $$;
29 | 
30 | select current_setting('zbx_tmp.repl_lag_res');
31 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/process.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Version: 1.0
 3 | set -e
 4 | 
 5 | # Discover all running process names
 6 | # Use the process name in /proc/pid/status that's truncated to 15 characters
 7 | # because it's the most reliable name used by Zabbix process monitoring items
 8 | # See https://www.zabbix.com/documentation/3.0/manual/appendix/items/proc_mem_num_notes
 9 | 
10 | echo -n '{"data":['
11 | # Filter away processes with no cumulative CPU time with grep -v
12 | # Filter away kernel processes (and also zombie processes) by filtering out processes that don't use any user memory (vsz == 0)
13 | # Take only 15 characters (to leave out the time portion) with cut
14 | # Remove duplicates with awk, format to json with sed
15 | ps -A -o comm= -o time= -o vsz= | grep -v ' 00:00:00' | awk '$3 != 0' | cut -c-15 | sed 's/ *$//' | awk '!a[$0]++' | sed 's/\(.*\)/{"{#COMMAND}":"\1"}/g' | sed '$!s/$/,/' | tr '\n' ' '
16 | echo -n ']}'
17 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/db2snapshot.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -wT
 2 | # Version: 1.0
 3 | # Usage: db2snapshot <dbname> <lines>
 4 | #
 5 | # dbname    - Name of db2 database
 6 | # lines     - How many lines to output
 7 | 
 8 | use File::Spec;
 9 | 
10 | # Directory where snapshots are cached.
11 | my $SNAPSHOT_DIR = File::Spec->tmpdir();
12 | 
13 | # Get database path, name and timeout args
14 | my $dbname = shift @ARGV;
15 | my $lines = shift @ARGV;
16 | 
17 | # Untaint
18 | if ($lines =~ /^(\d+)$/) {
19 |   $lines = $1;
20 | } else {
21 |   die "Bad lines value";
22 | }
23 | 
24 | if ($dbname =~ /^([-\w.]+)$/) {
25 |   $dbname = $1;
26 | } else {
27 |   die "Bad dbname argument";
28 | }
29 | 
30 | # Generate stat file name
31 | my $statfile = "$SNAPSHOT_DIR/$dbname.txt";
32 | 
33 | # Open db2 snapshot file and output x lines
34 | my $i=1;
35 | open FILE, "<$statfile";
36 | while (<FILE>) {
37 |   if ($i >$lines) {
38 |     last;
39 |   }
40 |   print $_;
41 |   $i++;
42 | }
43 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/process.conf:
--------------------------------------------------------------------------------
 1 | UserParameter=discover.processes,/etc/zabbix/scripts/process.sh
 2 | 
 3 | # This is exactly the same as discover.processes but can be used in Zabbix as a separate discovery with filtering for critical processes
 4 | UserParameter=discover.critical.processes,/etc/zabbix/scripts/process.sh
 5 | 
 6 | # Discovery sub-processes, arguments:
 7 | # - Main process name (eg. DataFlowEngine) by which sub-processes are filtered from ps command output
 8 | # - Argument order number to return as {#PARAM1} LLD macro; 1 for the first command-line argument to the process, 2 for the second etc.
 9 | # - Argument order number to return as {#PARAM2} LLD macro (if not used you can use eg. 0 to return the process path as macro value)
10 | #
11 | # The LLD macros can then be used as <cmdline> argument for proc.mem, proc.cpu.util and proc.num items 
12 | UserParameter=discover.subprocess[*],/etc/zabbix/scripts/discover_subprocess.sh "$1" "$2" "$3"
13 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/docker.conf:
--------------------------------------------------------------------------------
 1 | UserParameter=docker.containers.discovery,/etc/zabbix/scripts/docker.sh discovery
 2 | UserParameter=docker.containers.count,/etc/zabbix/scripts/docker.sh count
 3 | UserParameter=docker.containers.discovery.all,/etc/zabbix/scripts/docker.sh discovery_all
 4 | UserParameter=docker.containers.count.all,/etc/zabbix/scripts/docker.sh count_all
 5 | 
 6 | # First parameter: container id
 7 | # Second parameter: one of netin, netout, cpu, disk, memory, uptime, up or status
 8 | UserParameter=docker.containers[*],/etc/zabbix/scripts/docker.sh "$1" "$2"
 9 | 
10 | #######################################################################
11 | # Compatibility with www.monitoringartist.com docker templates
12 | 
13 | UserParameter=docker.discovery,/etc/zabbix/scripts/docker.sh discovery
14 | UserParameter=docker.up[*],/etc/zabbix/scripts/docker.sh "$1" up
15 | 
16 | # Ignore the second argument for docker.cpu (system vs user)
17 | UserParameter=docker.cpu[*],/etc/zabbix/scripts/docker.sh "$1" cpu
18 | 
19 | # Ignore the second argument for docker.mem (total_cache vs total_rss vs total_swap)
20 | UserParameter=docker.mem[*],/etc/zabbix/scripts/docker.sh "$1" memory
21 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/certificates.conf:
--------------------------------------------------------------------------------
 1 | UserParameter=certificates.discovery[*],/etc/zabbix/scripts/discover_certificates.py "$1"
 2 | UserParameter=certificate.status[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" status
 3 | UserParameter=certificate.startdate[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" startdate
 4 | UserParameter=certificate.enddate[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" enddate
 5 | UserParameter=certificate.lifetime[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" lifetime
 6 | UserParameter=certificate.lifetime_days[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" lifetime_days
 7 | UserParameter=certificate.serial[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" serial
 8 | UserParameter=certificate.subject[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" subject
 9 | UserParameter=certificate.issuer[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" issuer
10 | UserParameter=certificate.subject_hash[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" subject_hash
11 | UserParameter=certificate.issuer_hash[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" issuer_hash
12 | UserParameter=certificate.fingerprint[*],/etc/zabbix/scripts/check_certificate.py "$1" "$2" fingerprint
13 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.locks.sql:
--------------------------------------------------------------------------------
 1 | WITH T AS
 2 | 	(SELECT db.datname dbname,
 3 | 			lower(replace(Q.mode, 'Lock', '')) AS MODE,
 4 | 			coalesce(T.qty, 0) val
 5 | 	FROM pg_database db
 6 | 	JOIN (
 7 | 			VALUES ('AccessShareLock') ,('RowShareLock') ,('RowExclusiveLock') ,('ShareUpdateExclusiveLock') ,('ShareLock') ,('ShareRowExclusiveLock') ,('ExclusiveLock') ,('AccessExclusiveLock')) Q(MODE) ON TRUE NATURAL
 8 | 	LEFT JOIN
 9 | 		(SELECT datname,
10 | 			MODE,
11 | 			count(MODE) qty
12 | 		FROM pg_locks lc
13 | 		RIGHT JOIN pg_database db ON db.oid = lc.database
14 | 		GROUP BY 1, 2) T
15 | 	WHERE NOT db.datistemplate
16 | 	ORDER BY 1, 2)
17 | SELECT json_object_agg(dbname, row_to_json(T2))
18 | FROM
19 | 	(SELECT dbname,
20 | 			sum(val) AS total,
21 | 			sum(CASE
22 | 					WHEN MODE = 'accessexclusive' THEN val
23 | 				END) AS accessexclusive,
24 | 			sum(CASE
25 | 					WHEN MODE = 'accessshare' THEN val
26 | 				END) AS accessshare,
27 | 			sum(CASE
28 | 					WHEN MODE = 'exclusive' THEN val
29 | 				END) AS EXCLUSIVE,
30 | 			sum(CASE
31 | 					WHEN MODE = 'rowexclusive' THEN val
32 | 				END) AS rowexclusive,
33 | 			sum(CASE
34 | 					WHEN MODE = 'rowshare' THEN val
35 | 				END) AS rowshare,
36 | 			sum(CASE
37 | 					WHEN MODE = 'share' THEN val
38 | 				END) AS SHARE,
39 | 			sum(CASE
40 | 					WHEN MODE = 'sharerowexclusive' THEN val
41 | 				END) AS sharerowexclusive,
42 | 			sum(CASE
43 | 					WHEN MODE = 'shareupdateexclusive' THEN val
44 | 				END) AS shareupdateexclusive
45 | 	FROM T
46 | 	GROUP BY dbname) T2
47 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/discover_subprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Version: 1.0
 3 | set -e
 4 | 
 5 | 
 6 | if [ "$#" -ne 3 ]
 7 | then
 8 |   echo "Missing or too many command line arguments. Usage: discover.subprocess[<process_name>, <first_nth_column>, <second_nth_column>]"
 9 |   exit 1
10 | fi
11 | 
12 | # Discover all running processes with the given process name (first cmdline parameter)
13 | # Print the startup arguments of the processes using awk
14 | # The first argument is fifth column in ps command output
15 | PROCESS="$1"
16 | PARAM1_COL=`expr $2 + 4`
17 | PARAM2_COL=`expr $3 + 4`
18 | 
19 | echo -n '{"data":['
20 | 
21 | # Uses first command line argument to filter processes
22 | # Filter away processes with no cumulative CPU time with grep -v
23 | # Filter away kernel processes (and also zombie processes) by filtering out processes that don't use any user memory (vsz == 0)
24 | 
25 | # Example output of ps command:
26 | # DataFlowEngine  00:01:16 2822924 DataFlowEngine ACEBET1 00000000-0000-0000-0000-000000000000 EJSGRP1
27 | # DataFlowEngine  00:00:22 3427168 DataFlowEngine ACEBET1 00000000-0000-0000-0000-000000000000 HTTPGRP1
28 | # DataFlowEngine  00:00:07 2466424 DataFlowEngine ACEBET1 00000000-0000-0000-0000-000000000000 MONITORGRP1
29 | 
30 | ps -A -o comm= -o time= -o vsz= -o args= | egrep "^$PROCESS " | grep -v ' 00:00:00' | awk '$3 != 0' | awk -v a="$PARAM1_COL" -v b="$PARAM2_COL" '{print $1 " " $a " " $b}' | sed 's/\(.*\) \(.*\) \(.*\)/{"{#COMMAND}":"\1", "{#PARAM1}":"\2", "{#PARAM2}":"\3"}/g' | sed '$!s/$/,/' | tr '\n' ' '
31 | 
32 | echo -n ']}'
33 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.connections.sum.sql:
--------------------------------------------------------------------------------
 1 | DO LANGUAGE plpgsql $$
 2 | DECLARE
 3 | 	ver integer;
 4 | 	res text;
 5 | BEGIN
 6 | 	SELECT current_setting('server_version_num') INTO ver;
 7 | 
 8 | 	IF (ver >= 90600) THEN
 9 | 		SELECT row_to_json(T) INTO res from (
10 | 			SELECT
11 | 				sum(CASE WHEN state = 'active' THEN 1 ELSE 0 END) AS active,
12 | 				sum(CASE WHEN state = 'idle' THEN 1 ELSE 0 END) AS idle,
13 | 				sum(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END) AS idle_in_transaction,
14 | 				count(*) AS total,
15 | 				count(*)*100/(SELECT current_setting('max_connections')::int) AS total_pct,
16 | 				sum(CASE WHEN wait_event IS NOT NULL THEN 1 ELSE 0 END) AS waiting,
17 | 				(SELECT count(*) FROM pg_prepared_xacts) AS prepared
18 | 			FROM pg_stat_activity WHERE datid is not NULL
19 | 			) T;
20 | 
21 | 	ELSE
22 | 		SELECT row_to_json(T) INTO res from (
23 | 			SELECT
24 | 				sum(CASE WHEN state = 'active' THEN 1 ELSE 0 END) AS active,
25 | 				sum(CASE WHEN state = 'idle' THEN 1 ELSE 0 END) AS idle,
26 | 				sum(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END) AS idle_in_transaction,
27 | 				count(*) AS total,
28 | 				count(*)*100/(SELECT current_setting('max_connections')::int) AS total_pct,
29 | 				sum(CASE WHEN waiting IS TRUE THEN 1 ELSE 0 END) AS waiting,
30 | 				(SELECT count(*) FROM pg_prepared_xacts) AS prepared
31 | 			FROM pg_stat_activity
32 | 			) T;
33 | 	END IF;
34 | 
35 | 	perform set_config('zbx_tmp.conn_json_res', res, false);
36 | END $$;
37 | 
38 | select current_setting('zbx_tmp.conn_json_res');
39 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.connections.sql:
--------------------------------------------------------------------------------
 1 | DO LANGUAGE plpgsql $$
 2 | DECLARE
 3 | 	ver integer;
 4 | 	res text;
 5 | 
 6 | BEGIN
 7 | 	SELECT current_setting('server_version_num') INTO ver;
 8 | 
 9 | 	IF (ver >= 90600) THEN
10 | 		SELECT json_object_agg(datname, row_to_json(T)) INTO res from (
11 | 			SELECT
12 | 				datname,
13 | 				sum(CASE WHEN state = 'active' THEN 1 ELSE 0 END) AS active,
14 | 				sum(CASE WHEN state = 'idle' THEN 1 ELSE 0 END) AS idle,
15 | 				sum(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END) AS idle_in_transaction,
16 | 				count(*) AS total,
17 | 				count(*)*100/(SELECT current_setting('max_connections')::int) AS total_pct,
18 | 				sum(CASE WHEN wait_event IS NOT NULL THEN 1 ELSE 0 END) AS waiting
19 | 			FROM pg_stat_activity WHERE datid is not NULL GROUP BY datname ) T;
20 | 
21 | 		ELSE
22 | 			SELECT json_object_agg(datname, row_to_json(T)) INTO res from (
23 | 				SELECT
24 | 					datname,
25 | 					sum(CASE WHEN state = 'active' THEN 1 ELSE 0 END) AS active,
26 | 					sum(CASE WHEN state = 'idle' THEN 1 ELSE 0 END) AS idle,
27 | 					sum(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END) AS idle_in_transaction,
28 | 					count(*) AS total,
29 | 					count(*)*100/(SELECT current_setting('max_connections')::int) AS total_pct,
30 | 					sum(CASE WHEN waiting IS TRUE THEN 1 ELSE 0 END) AS waiting
31 | 				FROM pg_stat_activity GROUP BY datname ) T;
32 | 		END IF;
33 | 
34 | 	perform set_config('zbx_tmp.db_conn_json_res', res, false);
35 | 
36 | END $$;
37 | 
38 | SELECT current_setting('zbx_tmp.db_conn_json_res');
39 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.transactions.sql:
--------------------------------------------------------------------------------
 1 | DO LANGUAGE plpgsql $$
 2 | DECLARE
 3 | 	ver integer;
 4 | 	res text;
 5 | BEGIN
 6 | 	SELECT current_setting('server_version_num') INTO ver;
 7 | 
 8 | 	IF (ver >= 90600) THEN
 9 | 		SELECT row_to_json(T) INTO res from (
10 | 			SELECT
11 | 				coalesce(extract(epoch FROM max(CASE WHEN state = 'idle in transaction' THEN age(now(), query_start) END)), 0) AS idle,
12 | 				coalesce(extract(epoch FROM max(CASE WHEN state <> 'idle in transaction' AND state <> 'idle' THEN age(now(), query_start) END)), 0) AS active,
13 | 				coalesce(extract(epoch FROM max(CASE WHEN wait_event IS NOT NULL THEN age(now(), query_start) END)), 0) AS waiting,
14 | 				(SELECT coalesce(extract(epoch FROM max(age(now(), prepared))), 0) FROM pg_prepared_xacts) AS prepared
15 | 			FROM pg_stat_activity) T;
16 | 
17 | 	ELSE
18 | 		SELECT row_to_json(T) INTO res from (
19 | 			SELECT
20 | 				coalesce(extract(epoch FROM max(CASE WHEN state = 'idle in transaction' THEN age(now(), query_start) END)), 0) AS idle,
21 | 				coalesce(extract(epoch FROM max(CASE WHEN state <> 'idle in transaction' AND state <> 'idle' THEN age(now(), query_start) END)), 0) AS active,
22 | 				coalesce(extract(epoch FROM max(CASE WHEN waiting IS TRUE THEN age(now(), query_start) END)), 0) AS waiting,
23 | 				(SELECT coalesce(extract(epoch FROM max(age(now(), prepared))), 0) FROM pg_prepared_xacts) AS prepared
24 | 			FROM pg_stat_activity) T;
25 | 	END IF;
26 | 
27 | 	perform set_config('zbx_tmp.trans_json_res', res, false);
28 | END $$;
29 | 
30 | SELECT current_setting('zbx_tmp.trans_json_res');
31 | 


--------------------------------------------------------------------------------
/documentation/docker_swarm.md:
--------------------------------------------------------------------------------
 1 | # Docker Swarm service discovery and monitoring
 2 | 
 3 | Requirements:
 4 | - Python 2.7.13 or Python 3.6.8
 5 | - Libraries for Python: docker, requests, urllib3.
 6 | 
 7 | 
 8 | ## For Python version 3, install dependencies using pip:
 9 | ```
10 | pip3 install docker requests urllib3 python-dateutil
11 | ```
12 | 
13 | 
14 | ## For Python version 2, install specific versions of libraries:
15 | ```
16 | pip install docker==2.7.0 requests==2.23.0 urllib3==1.24.3 python-dateutil==2.8.1
17 | ```
18 | 
19 | 
20 | The zabbix user must have enough privileges to monitor docker
21 | 
22 | * Add zabbix user to docker group `sudo usermod -aG docker zabbix`
23 | 
24 | 
25 | ## Usage
26 | 
27 | Item Syntax | Description | Units |
28 | ----------- | ----------- | ----- |
29 | docker.swarm.discover.services | Discover all running Docker services | Provides the following template variables: {#SERVICE}. Also provides service information in an array: hostname, status, uptime. |
30 | docker.swarm.hostname | Retrieve hostname(s) for specified service. | Hostname(s) as a comma separated list. |
31 | docker.swarm.status | Current service status. | String containing either "running" or "not running". |
32 | docker.swarm.uptime | Retrieve uptime for specified service. | Seconds. |
33 | 
34 | 
35 | ## Retrieving data from discovery using JSONPath
36 | 
37 | In this example, service data can be retrieved using JSONPath:
38 | ```
39 | $.data[?(@.service == "<service_name>")].hostname
40 | $.data[?(@.service == "<service_name>")].status
41 | $.data[?(@.service == "<service_name>")].uptime
42 | ```
43 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/kubernetes_monitoring.conf:
--------------------------------------------------------------------------------
 1 | # Discoveries. Possible arguments are: pods/nodes/services/cronjobs, config_file, field-selector.
 2 | UserParameter=kubernetes.discover.pods[*],source /opt/virtualenv/kube-monitoring/bin/activate && python /etc/zabbix/scripts/kubernetes_monitoring.py "pods" --config "$1" --field-selector "$2"
 3 | UserParameter=kubernetes.discover.nodes[*],source /opt/virtualenv/kube-monitoring/bin/activate && python /etc/zabbix/scripts/kubernetes_monitoring.py "nodes" --config "$1" --field-selector "$2"
 4 | UserParameter=kubernetes.discover.services[*],source /opt/virtualenv/kube-monitoring/bin/activate && python /etc/zabbix/scripts/kubernetes_monitoring.py "services" --config "$1" --field-selector "$2"
 5 | UserParameter=kubernetes.discover.cronjobs[*],source /opt/virtualenv/kube-monitoring/bin/activate && python /etc/zabbix/scripts/kubernetes_monitoring.py "cronjobs" --config "$1"
 6 | 
 7 | # Poller(s) for trapper item data. Possible arguments are: config_file, field-selector, host name, minutes.
 8 | UserParameter=kubernetes.poller.cronjobs[*],source /opt/virtualenv/kube-monitoring/bin/activate && python /etc/zabbix/scripts/kubernetes_monitoring.py "cronjobs" --config "$1" --host-name "$2" --minutes "$3"
 9 | 
10 | # Default field selectors for pods.
11 | # Possible status phase values are: Pending, Running, Succeeded, Failed or Unknown.
12 | UserParameter=kubernetes.discover.pods.default[*],source /opt/virtualenv/kube-monitoring/bin/activate && python /etc/zabbix/scripts/kubernetes_monitoring.py "pods" --config "$1" --field-selector "metadata.namespace!=kube-system,status.phase=Running"
13 | 


--------------------------------------------------------------------------------
/custom/scripts/discover_responsecodes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Version: 1.0
 3 | 
 4 | # This script takes path to apache configuration folder as an argument and reads lines only with 'ProxyPass' or 'Location'.
 5 | # It saves string between first and second slash to "URIS" array.
 6 | # After that it loops through "STATUSCODES" array and "URIS" array and prints all combinations as json.
 7 | 
 8 | set -e
 9 | STATUSCODES=(100 101 102 200 201 202 203 204 205 206 207 208 226 300 301 302 303 304 305 306 307 308 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 426 428 429 431 440 444 449 450 451 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 520 598 599)
10 | 
11 | # Reads files from folder line by line and takes backends from ProxyPass or Location lines and push them into array.
12 | URIS=()
13 | while IFS= read -r line; do
14 |     URIS+="$line "
15 | done < <( grep -r 'ProxyPass\|Location' $1 | grep -Po '(?<=[[:blank:]])\/[^\/ \s]*' | awk '!a[$0]++' )
16 | 
17 | IFS=$' ' read -ra URIS <<< "$URIS"
18 | 
19 | echo -n '{"data":['
20 | 
21 | var1=0
22 | var3=$[ ${#URIS[@]} - 1 ]
23 | 
24 | while [ $var1 -lt "${#URIS[@]}" ]
25 | do
26 |    for (( var2 = 0; $var2 <= 76; var2++ ))
27 |    do
28 |       # trims last "," from last line.
29 |       if [[ $var1 -eq $var3 && $var2 -eq 76 ]]
30 |       then
31 |         echo '{"{#URI}":"'${URIS[$var1]}'","{#RESPONSE}": "'${STATUSCODES[$var2]}'"}' | tr '\n' ' '
32 |       else
33 |         echo '{"{#URI}":"'${URIS[$var1]}'","{#RESPONSE}": "'${STATUSCODES[$var2]}'"},' | tr '\n' ' '
34 |       fi
35 |    done
36 |    var1=$[ $var1 + 1 ]
37 | done
38 | echo -n ']}'
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Zabbix Monitoring Scripts
 2 | 
 3 | This project contains various custom Zabbix monitoring scripts used as user parameters by Zabbix agent.
 4 | 
 5 | ## Installation
 6 | 
 7 | The repository includes ready-to-install files for Zabbix Agent.
 8 | 
 9 | * Copy the files under [etc/zabbix/scripts](etc/zabbix/scripts) to `/etc/zabbix/scripts`
10 | * Copy the files under [etc/zabbix/zabbix_agentd.d](etc/zabbix/zabbix_agentd.d) to `/etc/zabbix/zabbix_agentd.d`
11 | 
12 | ## Templates
13 | 
14 | Each monitoring script has a corresponding template that can be imported to Zabbix Server. Templates can be found under [templates](templates).
15 | 
16 | ## Version Numbering Scheme
17 | 
18 | Each script has version information at the beginning of the script.
19 | [Semantic versioning](https://semver.org/) scheme is used with major.minor syntax
20 | 
21 | * Major version changes when you make incompatible changes with existing items / configuration syntax
22 | * Minor version changes when you add functionality or bug-fixes in backwards-compatible manner
23 | 
24 | ## Usage
25 | 
26 | See the below documentation for each monitoring script.
27 | 
28 | - [DB2 database snapshot statistics](documentation/db2stat.md)
29 | - [Docker discovery and monitoring](documentation/docker.md)
30 | - [Docker Swarm service discovery and monitoring](documentation/docker_swarm.md)
31 | - [Process discovery and monitoring](documentation/process.md)
32 | - [Pacemaker monitoring](documentation/pacemaker.md)
33 | - [PEM file certificate monitoring](documentation/certificates.md)
34 | - [Kontena grid monitoring](documentation/kontena_grid.md)
35 | - [Kubernetes monitoring](documentation/kubernetes_monitoring.md)
36 | - [MySQL & Galera monitoring](documentation/mysql-galera.md)
37 | 
38 | 


--------------------------------------------------------------------------------
/documentation/pacemaker.md:
--------------------------------------------------------------------------------
 1 | # Pacemaker Monitoring
 2 | 
 3 | Get Pacemaker status. Adding -v option to command prints a more verbose string. Otherwise the script returns decimal or single word statuses.
 4 | 
 5 | See [user parameter configuration file](../etc/zabbix/zabbix_agentd.d/pacemaker.conf) for Zabbix item format.
 6 | 
 7 | ## Script Usage
 8 | 
 9 | | Command | Description | Units |
10 | | ------- | ----------- | ----- |
11 | pacemaker_status.py -i cluster -v | Get the cluster status in verbose format | text |
12 | pacemaker_status.py -i cluster | Cluster status in integer format | 0 if no nodes, 1 if running ok, 2 if any in standby 3 if any in maintenance, 4 if any in shutdown |
13 | pacemaker_status.py -i cluster -p failed | Count the resources in given state. e.g. how many failed | number |
14 | pacemaker_status.py -i resource -n Grafana | Get status of the single resource. Returns count of resources running | number |
15 | pacemaker_status.py -i resource -n Grafana -N application1 -p managed | Get the property value for single resource in given node. | If node is not given returns true if all the nodes have the property set to "true" |
16 | pacemaker_status.py -i node -n application1 | Get the status on node | returns count of services running |
17 | pacemaker_status.py -i node -n application1 -v | Get the status on node | returns verbose string of resource status |
18 | pacemaker_status.py -i resource -n Grafana -l | Get the nodes where resource is active. | Text format resource:node1,node2 |
19 | pacemaker_status.py -i cluster -l | Get all resources in the cluster and nodes where they are active. | Returns each resource and the nodes, separated by space |
20 | 	
21 | ## Example verbose output
22 | 
23 | `application1:online:standby:resources_running=0 application2:online:resources_running=10 resources=10/12`
24 | 


--------------------------------------------------------------------------------
/documentation/certificates.md:
--------------------------------------------------------------------------------
 1 | # Certificate Monitoring
 2 | 
 3 | Discover PEM files and monitor certificates stored within.
 4 | 
 5 | Discovery item scans the configured path recursively for files containing PEM
 6 | formatted certificates. Directories and files that are not readable to agent
 7 | are skipped. Make sure zabbix agent user has access to monitored files.
 8 | 
 9 | Monitoring script requires the following Python modules to be installed on the system (confirmed working with version in parenthesis):
10 | 
11 | * pyOpenSSL (17.3.0) URL: https://pypi.python.org/pypi/pyOpenSSL
12 | * pem (17.1.0) URL: https://pypi.python.org/pypi/pem
13 | 
14 | ## Usage
15 | 
16 | Item Syntax | Description | Units |
17 | ----------- | ----------- | ----- |
18 | certificates.discovery[{$CERT_FILE_PATH}] | Discover certificates from path | Provides the following template variables: {#CRT_SUBJECT} {#CRT_FILE} {#CRT_INDEX}, {#CRT_CN} |
19 | certificate.status[{#CRT_FILE},{#CRT_INDEX}] | Certificate status | 0 = Valid, 1 = Not yet valid, 2 = Expired |
20 | certificate.startdate[{#CRT_FILE},{#CRT_INDEX}] | Certificate not before | ISO Date |
21 | certificate.enddate[{#CRT_FILE},{#CRT_INDEX}] | Certificate not after | ISO Date |
22 | certificate.lifetime[{#CRT_FILE},{#CRT_INDEX}] | Certificate lifetime until expiration (seconds) | |
23 | certificate.lifetime_days[{#CRT_FILE},{#CRT_INDEX}] | Certificate lifetime until expiration (days) | |
24 | certificate.serial[{#CRT_FILE},{#CRT_INDEX}] | Certificate serial | |
25 | certificate.subject[{#CRT_FILE},{#CRT_INDEX}] | Certificate subject | |
26 | certificate.issuer[{#CRT_FILE},{#CRT_INDEX}] | Certificate issuer | |
27 | certificate.subject_hash[{#CRT_FILE},{#CRT_INDEX}] | Certificate subject hash | |
28 | certificate.issuer_hash[{#CRT_FILE},{#CRT_INDEX}] | Certificate issuer hash | |
29 | certificate.fingerprint[{#CRT_FILE},{#CRT_INDEX}] | Certificate fingerprint | SHA-1 |
30 | 
31 | ## Example
32 | 
33 | ![Screenshot](certificates.png)
34 | 


--------------------------------------------------------------------------------
/documentation/mysql-galera.md:
--------------------------------------------------------------------------------
 1 | This documentation is for configurating mysql and galera monitoring permissions.
 2 | 
 3 | **Use sudo/root to configure following steps**
 4 | 
 5 | 1. Install Zabbix agent and MySQL client. If necessary, add the path to the mysql and mysqladmin utilities to the global environment variable PATH.
 6 | 2. Create a MySQL user for monitoring (<password> at your discretion):
 7 | ```
 8 | CREATE USER 'zbx_monitor'@'%' IDENTIFIED BY '<password>';
 9 | GRANT REPLICATION CLIENT,PROCESS,SHOW DATABASES,SHOW VIEW ON *.* TO 'zbx_monitor'@'%';
10 | ```
11 | For more information, please see MySQL documentation https://dev.mysql.com/doc/refman/8.0/en/grant.html
12 | 
13 | 3. Create .my.cnf in the home directory of Zabbix agent for Linux (/var/lib/zabbix by default ) or my.cnf in c:\ for Windows. The file must have three strings:
14 | ```
15 | [client]
16 | user='zbx_monitor'
17 | password='<password>'
18 | ```
19 | 
20 | 
21 | Add the rule to the SELinux policy (example for Centos):
22 | ```
23 | # cat <<EOF > zabbix_home.te
24 | 
25 | module zabbix_home 1.0;
26 | 
27 | require {
28 |         type zabbix_agent_t;
29 |         type zabbix_var_lib_t;
30 |         type mysqld_etc_t;
31 |         type mysqld_port_t;
32 |         type mysqld_var_run_t;
33 |         class file { open read };
34 |         class tcp_socket name_connect;
35 |         class sock_file write;
36 | }
37 | 
38 | ============= zabbix_agent_t ==============
39 | 
40 | allow zabbix_agent_t zabbix_var_lib_t:file read;
41 | allow zabbix_agent_t zabbix_var_lib_t:file open;
42 | allow zabbix_agent_t mysqld_etc_t:file read;
43 | allow zabbix_agent_t mysqld_port_t:tcp_socket name_connect;
44 | allow zabbix_agent_t mysqld_var_run_t:sock_file write;
45 | EOF
46 | # checkmodule -M -m -o zabbix_home.mod zabbix_home.te
47 | # semodule_package -o zabbix_home.pp -m zabbix_home.mod
48 | # semodule -i zabbix_home.pp
49 | # restorecon -R /var/lib/zabbix
50 | ```
51 |   
52 | 4. To test mysql connection, run
53 |   ```
54 |   zabbix_agentd -t mysql.version
55 |   zabbix_agentd -t mysql.get_status_variables
56 |   ``` 
57 | 5. To test galera connection, run
58 |   ```
59 |   zabbix_agentd -t galera.cluster_status
60 |   ```
61 | 


--------------------------------------------------------------------------------
/opt/cron/docker_stats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script for running docker monitoring script actions and posting results via trapper items.
 3 | #
 4 | # USAGE:
 5 | # Discover running containers:
 6 | #   /opt/cron/docker_stats.sh discovery
 7 | # Discover all containers:
 8 | #   /opt/cron/docker_stats.sh discovery_all
 9 | # Count running containers:
10 | #   /opt/cron/docker_stats.sh count
11 | # Count all containers:
12 | #   /opt/cron/docker_stats.sh count_all
13 | # Send stats to trapper items:
14 | #   /opt/cron/docker_stats.sh stats "<stats>" "<containers>"
15 | #   - OPTIONAL stats: space delimited list of stats, defaults to all supported stats (cpu disk netin netout memory status uptime)
16 | #   - OPTIONAL containers: space delimited list of container names or ids, defaults to all containers
17 | #
18 | set -e
19 | 
20 | # Path to Zabbix agent script docker.sh
21 | ZBX_DOCKER_SCRIPT=/etc/zabbix/scripts/docker.sh
22 | # Path to temporary stats file
23 | STATS_FILE=/tmp/docker_stats.txt
24 | 
25 | SCRIPT_ACTION=$1
26 | shift
27 | 
28 | rm -f $STATS_FILE
29 | if [ "$SCRIPT_ACTION" == "stats" ]; then
30 |     stats=${1:-cpu disk netin netout memory status uptime}
31 |     containers=$2
32 |     if [ -z "$containers" ]; then
33 |         containers=$($ZBX_DOCKER_SCRIPT discovery_all | jq -r '.[][]["{#CONTAINERNAME}"]')
34 |     fi
35 | 
36 |     for c in $containers; do
37 |         for s in $stats; do
38 |             value=$($ZBX_DOCKER_SCRIPT $c $s)
39 |             echo "- docker.containers[$c,$s] $value" >>$STATS_FILE
40 |         done
41 |     done
42 | elif [ "$SCRIPT_ACTION" == "discovery" ]; then
43 |     value=$($ZBX_DOCKER_SCRIPT discovery)
44 |     echo "- docker.containers.discovery $value" >>$STATS_FILE
45 | elif [ "$SCRIPT_ACTION" == "discovery_all" ]; then
46 |     value=$($ZBX_DOCKER_SCRIPT discovery_all)
47 |     echo "- docker.containers.discovery.all $value" >>$STATS_FILE
48 | elif [ "$SCRIPT_ACTION" == "count" ]; then
49 |     value=$($ZBX_DOCKER_SCRIPT count)
50 |     echo "- docker.containers.count $value" >>$STATS_FILE
51 | elif [ "$SCRIPT_ACTION" == "count_all" ]; then
52 |     value=$($ZBX_DOCKER_SCRIPT count_all)
53 |     echo "- docker.containers.count.all $value" >>$STATS_FILE
54 | fi
55 | 
56 | # Send results if we got some
57 | if [ -e $STATS_FILE ]; then 
58 |     zabbix_sender -vv -c /etc/zabbix/zabbix_agentd.conf -i $STATS_FILE
59 |     rm -f $STATS_FILE
60 | fi


--------------------------------------------------------------------------------
/custom/scripts/fileTimestamp.vbs:
--------------------------------------------------------------------------------
 1 | ' SCRIPT NAME
 2 | ' fileTimestamp.vbs
 3 | ' SUMMARY
 4 | ' This script loops given folder's subfolders recursively
 5 | ' and returns the latest timestamp it can find, also from starting folder
 6 | ' PARAMS
 7 | ' starting folder
 8 | ' RETURNS
 9 | ' timestamp in seconds (unix time) in UTC time
10 | ' If starting folder doesn't exist, returns 0.
11 | ' If some other error occurs, returns compiler's error message and code.
12 | 
13 | Public latestTime 'the latest timestamp from all folders' files
14 | Dim objFSO, objFolder, startFolder
15 | 
16 | On Error Resume Next
17 | 
18 | Set objFSO = CreateObject("Scripting.FileSystemObject")
19 | startFolder = Replace(WScript.Arguments(0),"/","\")
20 | 'startFolder = "f:\startFolder" 'for testing
21 | 
22 | Set objFolder = objFSO.GetFolder(startFolder)
23 | 'checking that the given path exists
24 | If Err.Number <> 0 Then
25 |     'if path doesn't exist, returns 0
26 |     WScript.StdOut.Write 0
27 |     WScript.Quit
28 | End If
29 | 
30 | 'goes through given folder's subfolders and hunts for the latest timestamp
31 | FindLatestTimestamp objFolder
32 | 
33 | 'sets the latest local timestamp to UTC time
34 | Set dateTime = CreateObject("WbemScripting.SWbemDateTime")
35 | dateTime.SetVarDate(latestTime)
36 | 
37 | 'writes the latest timestamp in unix time (seconds from 1.1.1970)
38 | WScript.StdOut.Write DateDiff("s", "1/1/1970", dateTime.GetVarDate(false))
39 | 
40 | 'Method that loops given folder's subfolders recursively and finds the latest timestamp of all files
41 | Sub FindLatestTimestamp(Folder)
42 | 
43 |     'first check the files for given folder
44 |     Set objFolder = objFSO.GetFolder(Folder)
45 |     If objFolder.Files.Count > 0 Then
46 | 
47 |         Set colFiles = objFolder.Files
48 | 
49 |         For Each objFile In colFiles
50 |             'check if the file is the latest so far
51 |             If DateDiff("s", objFile.DateLastModified, latestTime) < 0 Then
52 |                 latestTime = objFile.DateLastModified
53 |             End If
54 |         Next
55 | 
56 |     End If
57 | 
58 |     'then recursively start to check another folder
59 |     For Each Subfolder In Folder.SubFolders
60 |         FindLatestTimestamp Subfolder
61 |     Next
62 | 
63 | End Sub
64 | 
65 | 'checking if other errors have occurred
66 | If Err.Number <> 0 Then
67 |     WScript.StdOut.Write "Error: " & Err.Description & " (" & Err.Number & ")"
68 |     WScript.Quit
69 | End If
70 | 


--------------------------------------------------------------------------------
/documentation/db2stat.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # DB2 Database Snapshot Statistics (db2stat)
 3 | 
 4 | This script generates database snapshots (i.e. get snapshot for database) from
 5 | DB2 and retrieves statistics from it.
 6 | 
 7 | Because DB2 install location varies by system and installation method, the path
 8 | to DB2 executable must be edited into PATH environment variable set up in the
 9 | script.
10 | 
11 | Zabbix agent user must also have permission to create database snapshots. See
12 | below on how to do this.
13 | 
14 | See the [script file](../etc/zabbix/scripts/db2stat.pl) for detailed information.
15 | 
16 | ## Enabling DB2 Snapshots for Zabbix User
17 | 
18 | To allow Zabbix agent user to create snapshots it must have capability in DB2
19 | to do that. For monitoring purposes the best match is the SYSMON permission. The
20 | operating system group for this is set via DB2 configuration parameters.
21 | 
22 | To enable snapshots:
23 | 
24 | 1. Create sysmon group in operating system and add zabbix agent user to it (Zabbix agent must be installed so that zabbix user is present).
25 |    - Linux systems: `groupadd sysmon && usermod -a -G sysmon zabbix`
26 |    - AIX systems: `mkgroup sysmon` 
27 |                   `chgrpmem -m + zabbix sysmon`
28 | 2. Configure sysmon group have SYSMON permission in database execute following *as db2 user*:
29 |    - Configure sysmon group: `db2 update dbm cfg using sysmon_group sysmon`
30 |    - Restart databse: `db2stop && db2start`
31 | 
32 | To test taking the snapshot with zabbix user in Linux as root:
33 | `su -s /bin/bash -c "<path-to-db2dir>/bin/db2 get snapshot for database on <db>" zabbix`
34 | To test taking the snapshot with zabbix user in AIX as root:
35 | `su - zabbix -c "<path-to-db2dir>/bin/db2 get snapshot for database on <db>"`
36 | ## Installing Items from Template
37 | 
38 | Zabbix template for all items supported in configuration is
39 | [included](../templates/db2stat.xml). To configure it, at least macro
40 | value for DATABASE_NAME must be updated.
41 | 
42 | ## Manual Item Configuration
43 | 
44 | Provided user parameter configuration contains several parameters. Consult the
45 | [configuration file](../etc/zabbix/zabbix_agentd.d/db2stat.conf) for a full list.
46 | 
47 | Simple statistics can be retrieved with two paramters, maximum snapshot age in seconds and database:
48 | 
49 | `db2stat.database_status[60,SAMPLE]`
50 | 
51 | Retrieving memory statistics additionally requires node number:
52 | 
53 | `db2stat.package_cache_heap_size[60,SAMPLE,0]`
54 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/discover_certificates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2
 2 | # Version: 1.0
 3 | """
 4 | Zabbix discovery for certificates stored in PEM files.
 5 | """
 6 | 
 7 | from __future__ import print_function
 8 | from OpenSSL.crypto import load_certificate, FILETYPE_PEM
 9 | import sys
10 | import pem
11 | import os
12 | import json
13 | 
14 | 
15 | class CertEntry():
16 |     """Certificate entry model."""
17 | 
18 |     def __init__(self, file_name, index, cert):
19 |         self.file_name = file_name
20 |         self.index = index
21 |         self.cert = cert
22 | 
23 |     def __str__(self):
24 |         return (self.file_name + "[" + str(self.index) + "]:" +
25 |                 str(self.cert.get_subject()))
26 | 
27 | 
28 | def get_certificates_from_pem(file_name, certificates):
29 |     """Finds all certificate entries from PEM file and adds them to given list.
30 |     """
31 |     entries = pem.parse_file(file_name)
32 |     index = 0
33 |     for entry in entries:
34 |         if type(entry) == pem.Certificate:
35 |             cert = load_certificate(FILETYPE_PEM, entry.as_bytes())
36 |             certificates.append(CertEntry(file_name, index, cert))
37 |         index = index + 1
38 | 
39 | 
40 | def format_x509_name(x509_name):
41 |     """Formats X509Name object into string representation."""
42 |     name = ""
43 |     for c in x509_name.get_components():
44 |         name += '/'
45 |         name += c[0].decode("utf-8")
46 |         name += '='
47 |         name += c[1].decode("utf-8")
48 |     return name
49 | 
50 | 
51 | def get_name_component(x509_name, component):
52 |     """Gets single name component from X509 name."""
53 |     value = ""
54 |     for c in x509_name.get_components():
55 |         if c[0].decode("utf-8") == component:
56 |             value = c[1].decode("utf-8")
57 |     return value
58 | 
59 | 
60 | def json_output(entries):
61 |     """Outputs list of certificate entries as Zabbix compatible discovery JSON.
62 |     """
63 |     data = []
64 |     output = {
65 |         'data': data
66 |     }
67 |     for entry in entries:
68 |         data.append({
69 |             '{#CRT_FILE}': entry.file_name,
70 |             '{#CRT_INDEX}': entry.index,
71 |             '{#CRT_SUBJECT}': format_x509_name(entry.cert.get_subject()),
72 |             '{#CRT_CN}': get_name_component(entry.cert.get_subject(), 'CN')
73 |         })
74 |     print(json.dumps(output))
75 | 
76 | 
77 | def search_certificates(path, entries):
78 |     """Searches certificates from PEM files in given path recursively.
79 |     """
80 |     if os.path.isdir(path) and os.access(path, os.R_OK):
81 |         for child in os.listdir(path):
82 |             search_certificates(os.path.join(path, child), entries)
83 |     elif os.path.isfile(path) and os.access(path, os.R_OK):
84 |         get_certificates_from_pem(path, entries)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     entries = []
89 |     for path in sys.argv[1:]:
90 |         search_certificates(path, entries)
91 | 
92 |     json_output(entries)
93 | 


--------------------------------------------------------------------------------
/custom/scripts/elastizabbix.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import os
  4 | import sys
  5 | import json
  6 | import urllib.request
  7 | import time
  8 | import errno
  9 | 
 10 | # Check parameter count
 11 | if len(sys.argv) < 4:
 12 |     sys.exit('This script needs at least 3 parameters: ip, api, stat.')
 13 | 
 14 | ttl = 60
 15 | ip = sys.argv[1]
 16 | 
 17 | cluster_url = 'http://%s:9200/_cluster/stats' %ip
 18 | nodes_url = 'http://%s:9200/_nodes/stats' %ip
 19 | indicies_url = 'http://%s:9200/_stats' %ip
 20 | health_url = 'http://%s:9200/_cluster/health' %ip
 21 | 
 22 | stats = {
 23 |     'cluster': cluster_url,
 24 |     'nodes'  : nodes_url,
 25 |     'indices': indicies_url,
 26 |     'health' : health_url
 27 | }
 28 | 
 29 | def created_file(name):
 30 |     try:
 31 |         fd = os.open(name, os.O_WRONLY | os.O_CREAT | os.O_EXCL)
 32 |         os.close(fd)
 33 |         return True
 34 |     except OSError as e:
 35 |         if e.errno == errno.EEXIST:
 36 |             return False
 37 |         raise
 38 | 
 39 | def is_older_then(name, ttl):
 40 |     age = time.time() - os.path.getmtime(name)
 41 |     return age > ttl
 42 | 
 43 | def get_cache(api):
 44 |     cache = '/tmp/elastizabbix-{0}.json'.format(api)
 45 |     lock = '/tmp/elastizabbix-{0}.lock'.format(api)
 46 |     should_update = (not os.path.exists(cache)) or is_older_then(cache, ttl)
 47 |     if should_update and created_file(lock):
 48 |         try:
 49 |             d = urllib.request.urlopen(stats[api]).read()
 50 |             with open(cache, 'w') as f:
 51 |                 f.write(d)
 52 |         except Exception as e:
 53 |             pass
 54 |         if os.path.exists(lock):
 55 |             os.remove(lock)
 56 |     if  os.path.exists(lock) and is_older_then(lock, 300):
 57 |         os.remove(lock)
 58 |     ret_data = {}
 59 |     try:
 60 |         with open(cache)  as data_file:
 61 |             ret_data = json.load(data_file)
 62 |     except Exception as e:
 63 |         ret_data = json.loads(urllib.request.urlopen(stats[api]).read())
 64 |     return ret_data
 65 | 
 66 | def get_stat(api, stat):
 67 |     d = get_cache(api)
 68 |     keys = []
 69 |     for i in stat.split('.'):
 70 |         keys.append(i)
 71 |         key = '.'.join(keys)
 72 |         if key in d:
 73 |             d = d.get(key)
 74 |             keys = []
 75 |     return d
 76 | 
 77 | def discover_nodes():
 78 |     d = {'data': []}
 79 |     for k,v in get_stat('nodes', 'nodes').items():
 80 |         d['data'].append({'{#NAME}': v['name'], '{#NODE}': k})
 81 |     return json.dumps(d)
 82 | 
 83 | def discover_indices():
 84 |     d = {'data': []}
 85 |     for k,v in get_stat('indices', 'indices').items():
 86 |         d['data'].append({'{#NAME}': k})
 87 |     return json.dumps(d)
 88 | 
 89 | 
 90 | if __name__ == '__main__':
 91 |     api = sys.argv[2]
 92 |     stat = sys.argv[3]
 93 |     if api == 'discover':
 94 |         if stat == 'nodes':
 95 |             print(discover_nodes())
 96 |         if stat == 'indices':
 97 |             print(discover_indices())
 98 | 
 99 |     else:
100 |         stat = get_stat(api, stat)
101 |         if isinstance(stat, dict):
102 |             print('')
103 |         else:
104 |             print(stat)
105 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/postgresql_monitoring.conf:
--------------------------------------------------------------------------------
 1 | UserParameter=pgsql.bgwriter[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.bgwriter.sql"
 2 | 
 3 | UserParameter=pgsql.connections.sum[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.connections.sum.sql"
 4 | UserParameter=pgsql.connections[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.connections.sql"
 5 | UserParameter=pgsql.connections.prepared[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.connections.prepared.sql"
 6 | 
 7 | UserParameter=pgsql.dbstat.sum[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.dbstat.sum.sql"
 8 | UserParameter=pgsql.dbstat[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.dbstat.sql"
 9 | 
10 | UserParameter=pgsql.transactions[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.transactions.sql"
11 | UserParameter=pgsql.config.hash[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.config.hash.sql"
12 | UserParameter=pgsql.wal.stat[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.wal.stat.sql"
13 | UserParameter=pgsql.locks[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.locks.sql"
14 | UserParameter=pgsql.queries[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -v tmax=$5 -f "/etc/zabbix/scripts/postgresql/pgsql.query.time.sql"
15 | UserParameter=pgsql.uptime[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.uptime.sql"
16 | UserParameter=pgsql.cache.hit[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.cache.hit.sql"
17 | UserParameter=pgsql.scans[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.scans.sql"
18 | UserParameter=pgsql.frozenxid[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.frozenxid.sql"
19 | 
20 | UserParameter=pgsql.discovery.db[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.discovery.db.sql"
21 | UserParameter=pgsql.db.size[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -c "SELECT pg_database_size('$5')"
22 | UserParameter=pgsql.ping[*], pg_isready -h "$1" -p "$2" -U "$3" -d "$4"
23 | UserParameter=pgsql.ping.time[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.ping.time.sql"
24 | UserParameter=pgsql.version[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -c "SELECT version();"
25 | 
26 | UserParameter=pgsql.replication.count[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -c "SELECT count(*) FROM pg_stat_replication"
27 | UserParameter=pgsql.replication.recovery_role[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.replication.recovery_role.sql"
28 | UserParameter=pgsql.replication.lag.sec[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.replication.lag.sql"
29 | UserParameter=pgsql.replication.status[*], psql -qtAX -h "$1" -p "$2" -U "$3" -d "$4" -f "/etc/zabbix/scripts/postgresql/pgsql.replication.status.sql"
30 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/postgresql/pgsql.query.time.sql:
--------------------------------------------------------------------------------
 1 | WITH T AS
 2 | 	(SELECT db.datname,
 3 | 			coalesce(T.query_time_max, 0) query_time_max,
 4 | 			coalesce(T.tx_time_max, 0) tx_time_max,
 5 | 			coalesce(T.mro_time_max, 0) mro_time_max,
 6 | 			coalesce(T.query_time_sum, 0) query_time_sum,
 7 | 			coalesce(T.tx_time_sum, 0) tx_time_sum,
 8 | 			coalesce(T.mro_time_sum, 0) mro_time_sum,
 9 | 			coalesce(T.query_slow_count, 0) query_slow_count,
10 | 			coalesce(T.tx_slow_count, 0) tx_slow_count,
11 | 			coalesce(T.mro_slow_count, 0) mro_slow_count
12 | 	FROM pg_database db NATURAL
13 | 	LEFT JOIN (
14 | 		SELECT datname,
15 | 			extract(epoch FROM now())::integer ts,
16 | 			coalesce(max(extract('epoch' FROM (clock_timestamp() - query_start))::integer * (state NOT IN ('idle', 'idle in transaction', 'idle in transaction (aborted)') AND query !~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) query_time_max,
17 | 			coalesce(max(extract('epoch' FROM (clock_timestamp() - query_start))::integer * (state NOT IN ('idle') AND query !~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) tx_time_max,
18 | 			coalesce(max(extract('epoch' FROM (clock_timestamp() - query_start))::integer * (state NOT IN ('idle') AND query ~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) mro_time_max,
19 | 			coalesce(sum(extract('epoch' FROM (clock_timestamp() - query_start))::integer * (state NOT IN ('idle', 'idle in transaction', 'idle in transaction (aborted)') AND query !~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) query_time_sum,
20 | 			coalesce(sum(extract('epoch' FROM (clock_timestamp() - query_start))::integer * (state NOT IN ('idle') AND query !~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) tx_time_sum,
21 | 			coalesce(sum(extract('epoch' FROM (clock_timestamp() - query_start))::integer * (state NOT IN ('idle') AND query ~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) mro_time_sum,
22 | 
23 | 			coalesce(sum((extract('epoch' FROM (clock_timestamp() - query_start)) > :tmax)::integer * (state NOT IN ('idle', 'idle in transaction', 'idle in transaction (aborted)') AND query !~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) query_slow_count,
24 | 			coalesce(sum((extract('epoch' FROM (clock_timestamp() - query_start)) > :tmax)::integer * (state NOT IN ('idle') AND query !~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) tx_slow_count,
25 | 			coalesce(sum((extract('epoch' FROM (clock_timestamp() - query_start)) > :tmax)::integer * (state NOT IN ('idle') AND query ~* E'^(\\s*(--[^\\n]*\\n|/\\*.*\\*/|\\n))*(autovacuum|VACUUM|ANALYZE|REINDEX|CLUSTER|CREATE|ALTER|TRUNCATE|DROP)')::integer), 0) mro_slow_count
26 | 		FROM pg_stat_activity
27 | 		WHERE pid <> pg_backend_pid()
28 | 		GROUP BY 1) T
29 | 	WHERE NOT db.datistemplate )
30 | SELECT json_object_agg(datname, row_to_json(T))
31 | FROM T
32 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/db2stat.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl -wT
  2 | # Version: 1.2
  3 | # Usage: db2stat <timeout> <db2instance> <dbpath> <dbname> [<key> <value> ...] <stat>
  4 | #
  5 | # timeout     - Timeout of snapshot in seconds (new snapshot will be taken if
  6 | #               previous is older than timeout)
  7 | # db2instance - Db2 instance
  8 | # dbpath      - Path of db2 database
  9 | # dbname      - Name of db2 database
 10 | # key/value   - Key value pairs to match (e.g. "Node number" "0")
 11 | # stat        - Actual stat to search for (e.g. "Database status")
 12 | #
 13 | # Generates snapshot from db2 into file in tmpdir directory if one does not
 14 | # already exist. The file is updated if it is older than specified timeout by
 15 | # taking new snapshot.
 16 | #
 17 | # After that searches for named stat line from generated file and returns its
 18 | # value. If key value pairs are specified, those must be found preceding the
 19 | # stat line in specified order and not have empty line in between.
 20 | #
 21 | # Note that key, value and stat are case sensitive and the way db2 uses
 22 | # capitalization is quite inconsistent.
 23 | #
 24 | # Examples:
 25 | #
 26 | # Retrieve simple stat "Database status" that is at most 10 seconds old:
 27 | # db2stat 10 "/usr/bin" mydb "Database status"
 28 | #
 29 | # Retrieve current size of package cache heap on node 0 at most 60 seconds old:
 30 | # db2stat 60 db2instance "/usr/bin" mydb "Node number" "0" "Memory Pool Type" "Package Cache Heap"
 31 | # "Current size (bytes)"
 32 | 
 33 | use File::Spec;
 34 | 
 35 | # Directory where snapshots are cached.
 36 | my $SNAPSHOT_DIR = File::Spec->tmpdir();
 37 | 
 38 | # Get database path, name and timeout args
 39 | my $timeout = shift @ARGV;
 40 | my $dbinstance = shift @ARGV;
 41 | my $dbpath = shift @ARGV;
 42 | my $dbname = shift @ARGV;
 43 | 
 44 | # Untaint
 45 | if ($timeout =~ /^(\d+)$/) {
 46 |   $timeout = $1;
 47 | } else {
 48 |   die "Bad timeout value";
 49 | }
 50 | 
 51 | if ($dbinstance =~ /^([-\/\w.]+)$/) {
 52 |   $dbinstance = $1;
 53 | } else {
 54 |   die "Bad dbinstance argument";
 55 | }
 56 | 
 57 | if ($dbpath =~ /^([-\/\w.]+)$/) {
 58 |   $dbpath = $1;
 59 | } else {
 60 |   die "Bad dbpath argument";
 61 | }
 62 | 
 63 | if ($dbname =~ /^([-\w.]+)$/) {
 64 |   $dbname = $1;
 65 | } else {
 66 |   die "Bad dbname argument";
 67 | }
 68 | 
 69 | # Set environment variable for db2 instance
 70 | $ENV{'DB2INSTANCE'} = "$dbinstance";
 71 | 
 72 | # Set path of db2 executable
 73 | $ENV{'PATH'} = "$dbpath";
 74 | 
 75 | # Generate stat file name
 76 | my $statfile = "$SNAPSHOT_DIR/$dbname.txt";
 77 | my $tmpstatfile = "$SNAPSHOT_DIR/$dbname.txt.tmp";
 78 | 
 79 | # Regenerate stats if file too old
 80 | if (! -f $statfile or (time - (stat($statfile))[10]) > $timeout) {
 81 |   # first touch file to prevent another perform in next moment
 82 |   system("/usr/bin/touch $statfile");
 83 |   #then generate tmp data
 84 |   system("db2 get snapshot for database on $dbname >$tmpstatfile");
 85 |   #finally swap files
 86 |   system("/usr/bin/cp -p $tmpstatfile $statfile");
 87 | }
 88 | 
 89 | # Generate regular expressions to match from args
 90 | while (@ARGV) {
 91 |   my $key = shift(@ARGV);
 92 |   my $value = shift(@ARGV);
 93 |   if (defined $value) {
 94 |     # Add preceding line match
 95 |     push(@RE, qr/\s*\Q$key\E\s*=\s*\Q$value\E\s*/);
 96 |   } else {
 97 |     # Add stat line match
 98 |     push(@RE, qr/\s*\Q$key\E\s*=\s*(.+)\s*/);
 99 |   }
100 | }
101 | 
102 | # Try to find value with preceding line matches and stat line match
103 | my $idx = 0;
104 | open(DATA, "<$statfile");
105 | while (<DATA>) {
106 |   my $line = $_;
107 |   if ($line =~ $RE[$idx]) {
108 |     my $match = $1;
109 | 
110 |     # Return the match if all expressions have been matched
111 |     $idx++;
112 |     if ($idx == scalar @RE) {
113 |       print "$match\n";
114 |       exit 0;
115 |     }
116 |   }
117 | 
118 |   # Reset matches if empty line
119 |   if ($line eq "") {
120 |     $idx = 0;
121 |   }
122 | }
123 | 


--------------------------------------------------------------------------------
/documentation/docker.md:
--------------------------------------------------------------------------------
 1 | # Docker containers discovery and monitoring
 2 | 
 3 | Requirements:
 4 | - Python 2.7.13
 5 | - netcat (ubuntu: `sudo apt-get install netcat`)
 6 | - jq (ubuntu: `sudo apt-get install jq`)
 7 | 
 8 | In addition to the provided [template](../templates) the script is compatible with www.monitoringartist.com docker monitoring templates that's included by default in [zabbix-xxl](https://github.com/monitoringartist/dockbix-xxl).
 9 | 
10 | The zabbix user must have enough privileges to monitor docker
11 | 
12 | * Either add zabbix user to docker group `sudo usermod -aG docker zabbix`
13 | * Or add a file under `/etc/sudoers.d` containing line `zabbix ALL=(ALL:ALL) NOPASSWD: /bin/netcat`
14 | 
15 | ## Usage
16 | 
17 | Item Syntax | Description | Units |
18 | ----------- | ----------- | ----- |
19 | docker.containers.discovery | Discover all running Docker containers | Provides the following template variables: {#CONTAINERID}, {#CONTAINERNAME}, {#HCONTAINERID}, {#IMAGENAME}, {#IMAGETAG} |
20 | docker.containers.count | Number of all running Docker containers | (number) |
21 | docker.containers.discovery.all | Discover all Docker containers | Provides the following template variables: {#CONTAINERID}, {#CONTAINERNAME}, {#HCONTAINERID}, {#IMAGENAME}, {#IMAGETAG} |
22 | docker.containers.count.all | Number of all Docker containers | (number) |
23 | docker.containers[{#CONTAINERID}, netin] | Incoming network traffic (eth0) of the container | bytes per second (B/s) |
24 | docker.containers[{#CONTAINERID}, netout] | Outgoing network traffic (eth0) of the container | bytes per second (B/s) |
25 | docker.containers[{#CONTAINERID}, cpu] | Container CPU usage | % |
26 | docker.containers[{#CONTAINERID}, disk] | Container disk usage | bytes |
27 | docker.containers[{#CONTAINERID}, memory] | Container memory usage | bytes |
28 | docker.containers[{#CONTAINERID}, uptime] | Container uptime | uptime (seconds) |
29 | docker.containers[{#CONTAINERID}, up] | Is container up and running? | 1 (yes), 0 (no) |
30 | docker.containers[{#CONTAINERID}, status] | Container status | 0 (exited with error or no such container), 1 (running), 2 (not started or shut down) |
31 | docker.containers[{#IMAGENAME}, image_netin] | Incoming network traffic (eth0) of only container running given image | bytes per second (B/s) |
32 | docker.containers[{#IMAGENAME}, image_netout] | Outgoing network traffic (eth0) of only container running given image | bytes per second (B/s) |
33 | docker.containers[{#IMAGENAME}, image_cpu] | CPU usage of only container running given image | % |
34 | docker.containers[{#IMAGENAME}, image_disk] | Disk usage of only container running given image | bytes |
35 | docker.containers[{#IMAGENAME}, image_memory] | Memory usage of only container running given image | bytes |
36 | docker.containers[{#IMAGENAME}, image_uptime] | Uptime of only container running given image | uptime (seconds) |
37 | docker.containers[{#IMAGENAME}, image_up] | Is there single container running image up and running? | 1 (yes), 0 (no) |
38 | docker.containers[{#IMAGENAME}, image_containerids] | List of running container IDs with imagename | container IDs, one per line |
39 | docker.containers[{#IMAGENAME}, image_containerids_all] | List of all container IDs with imagename | container IDs, one per line |
40 | 
41 | * Items returning container metrics or status with image name will error if multiple containers with image are running
42 | * Items with image name also allow specifying imagename + tag (i.e. {#IMAGENAME}:{#IMAGETAG})
43 | 
44 | ### Trapper Based Execution
45 | 
46 | Folder /opt/cron includes wrapper script that allows posting status into trapper items instead. 
47 | 
48 | It has some benefits over standard approach:
49 | - All container stats are sent in one bulk request to Zabbix
50 | - It can be set up to run on separate account from zabbix to avoid granting docker permissions to Zabbix agent
51 | 
52 | Main disadvantange is that it requires setting up separate cron jobs to execute discovery and stats gathering (also container count if necessary).
53 | 
54 | Additional requirements:
55 | - zabbix_sender installed in the system and available in user path
56 | - Hostname set in the Zabbix agent configuration file (/etc/zabbix/zabbix_agentd.conf)
57 | 
58 | Zabbix template for trapper version of monitoring is named docker_trapper.xml.
59 | 
60 | *Example crontab setup:*
61 | ```
62 | 0 * * * * /opt/cron/docker_stats.sh discovery_all >>/var/log/docker_stats.log 2>&1
63 | * * * * * /opt/cron/docker_stats.sh count >>/var/log/docker_stats.log 2>&1
64 | * * * * * /opt/cron/docker_stats.sh stats >>/var/log/docker_stats.log 2>&1
65 | ```
66 | 
67 | ## Example
68 | 
69 | ![Screenshot](docker.png)
70 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/docker_swarm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | 
  3 | """
  4 | Docker Swarm service monitoring
  5 | Version: 1.0.1
  6 | 
  7 | Usage:
  8 | python3 docker_swarm.py discovery
  9 | python3 docker_swarm.py <mode> --service <service>
 10 | 
 11 | Discover Docker Swarm services (with service data as an array):
 12 | python3 docker_swarm.py discovery
 13 | 
 14 | Retrieve service hostname, status or uptime:
 15 | python3 docker_swarm.py hostname --service <service_name>
 16 | python3 docker_swarm.py status --service <service_name>
 17 | python3 docker_swarm.py uptime --service <service_name>
 18 | """
 19 | 
 20 | # Python imports
 21 | from argparse import ArgumentParser
 22 | import datetime
 23 | import json
 24 | 
 25 | # 3rd party imports
 26 | import dateutil.parser
 27 | import docker
 28 | 
 29 | # Declare variables
 30 | modes = ["discovery", "hostname", "status", "uptime"] # Available modes
 31 | services = {} # Dictionary for Docker service(s) data
 32 | 
 33 | # Parse command-line arguments
 34 | parser = ArgumentParser(
 35 |     description="Discover or retrieve metrics from Docker Swarm services."
 36 | )
 37 | parser.add_argument("mode", choices=modes, help="Discovery or metric: " + \
 38 |                     ", ".join(modes))
 39 | parser.add_argument("-s", "--service", type=str,
 40 |                     help="Service name to retrieve information from.")
 41 | args = parser.parse_args()
 42 | 
 43 | # Retrieve docker client instance using environment settings
 44 | client = docker.from_env()
 45 | 
 46 | # Parse system time from Docker
 47 | system_time = dateutil.parser.parse(client.info().get("SystemTime"))
 48 | 
 49 | # Limit results to specific service if service parameter is used
 50 | service_filters = {}
 51 | if args.service:
 52 |     service_filters["name"] = args.service
 53 | 
 54 | # Loop services and tasks and retrieve information
 55 | for service in client.services.list(filters=service_filters):
 56 | 
 57 |     # Reset task variables for each service
 58 |     created_date = None # Task's creation date
 59 |     nodes = [] # A list of nodes where task is currently running
 60 |     task_created = None # A datetime object for latest task's creation date
 61 |     task_status = "not running" # Task status, default is "not running"
 62 |     uptime = datetime.timedelta() # A datetime object for latest task's uptime
 63 | 
 64 |     # Loop tasks to collect data, but only from running tasks
 65 |     for task in service.tasks({"desired-state": "running"}):
 66 | 
 67 |         # Parse task creation date for comparison
 68 |         created_date = dateutil.parser.parse(task.get("CreatedAt"))
 69 | 
 70 |         # First time around, grab the first task
 71 |         if not task_created:
 72 |             task_created = created_date
 73 |             task_status = task.get("Status").get("State")
 74 |         # Compare previous task's date to current one
 75 |         elif task_created < created_date:
 76 |             task_created = created_date
 77 |             task_status = task.get("Status").get("State")
 78 | 
 79 |         # Grab node ID for later matching from nodes list
 80 |         nodes.append(task.get("NodeID"))
 81 | 
 82 |     # Count uptime
 83 |     if task_created:
 84 |         uptime = system_time - task_created
 85 | 
 86 |     # Append service data to dictionary
 87 |     services[service.name] = {
 88 |         "hostname": "",
 89 |         "nodes": nodes,
 90 |         "status": task_status,
 91 |         "uptime": uptime.total_seconds()
 92 |     }
 93 | 
 94 | # Loop services and nodes to retrieve additional information
 95 | for node in client.nodes.list():
 96 |     for name, service in services.items():
 97 | 
 98 |         # Match node ID to service's node IDs
 99 |         if node.attrs.get("ID") in service.get("nodes"):
100 | 
101 |             # Add comma if hostnames already have items
102 |             if services[name].get("hostname"):
103 |                 services[name]["hostname"] += ", "
104 | 
105 |             # Add node hostname to services dictionary
106 |             services[name]["hostname"] += "{}".format(
107 |                 node.attrs.get("Description").get("Hostname")
108 |             )
109 | 
110 | # Loop service data and create discovery
111 | if args.mode == "discovery":
112 |     output = []
113 |     for name, service in services.items():
114 |         output.append({
115 |             "{#SERVICE}": name,
116 |             "hostname": service.get("hostname"),
117 |             "uptime": service.get("uptime"),
118 |             "service": name,
119 |             "status": service.get("status")
120 |         })
121 | 
122 |     # Dump discovery
123 |     discovery = {"data": output}
124 |     print(json.dumps(discovery))
125 | 
126 | # Retrieve service information using command-line arguments
127 | else:
128 |     if not services.get(args.service):
129 |         print("Invalid service name.")
130 |     elif not services[args.service].get(args.mode):
131 |         print("Invalid mode argument.")
132 |     else:
133 |         print(services[args.service].get(args.mode))
134 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/check_certificate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | # Version: 1.0
  3 | """
  4 | Script for retrieving certificate information items from PEM file.
  5 | """
  6 | 
  7 | from __future__ import print_function
  8 | from OpenSSL.crypto import load_certificate, FILETYPE_PEM
  9 | from datetime import datetime
 10 | import argparse
 11 | import pem
 12 | import sys
 13 | import os
 14 | 
 15 | 
 16 | def format_x509_name(x509_name):
 17 |     """Formats X509Name object into string representation."""
 18 |     name = ""
 19 |     for c in x509_name.get_components():
 20 |         name += '/'
 21 |         name += c[0].decode("utf-8")
 22 |         name += '='
 23 |         name += c[1].decode("utf-8")
 24 |     return name
 25 | 
 26 | 
 27 | def from_asn1_date(asn1date):
 28 |     """Converts ASN1 formatted datetime into datetime object.
 29 |     """
 30 |     return datetime.strptime(asn1date.decode("utf-8"), '%Y%m%d%H%M%SZ')
 31 | 
 32 | 
 33 | def get_certificate(file_name, index):
 34 |     """Retrieves certificate from PEM file. Returns None if certificate cannot
 35 |     be extracted and writes reason to stdout.
 36 |     """
 37 |     if os.path.isfile(file_name) and os.access(file_name, os.R_OK):
 38 |         entries = pem.parse_file(file_name)
 39 |         if index >= len(entries):
 40 |             print('Requested entry at index', index, 'while file has only',
 41 |                   len(entries), 'entries.')
 42 |         elif type(entries[index]) == pem.Certificate:
 43 |             return load_certificate(FILETYPE_PEM, entries[index].as_bytes())
 44 |         else:
 45 |             print('Entry at index', index, 'is not a certificate.')
 46 |     else:
 47 |         print('Unable to read file [' + file_name + '].')
 48 | 
 49 | 
 50 | def execute_command(command, cert):
 51 |     """Dispatches certificate to command function.
 52 |     """
 53 |     module = sys.modules[__name__]
 54 |     return getattr(module, 'cmd_' + command, None)(cert)
 55 | 
 56 | 
 57 | def cmd_status(cert):
 58 |     """Returns certificate status based on certificate validity period. Values:
 59 |     0 - valid
 60 |     1 - not yet valid
 61 |     2 - expired
 62 |     """
 63 |     not_before = from_asn1_date(cert.get_notBefore())
 64 |     not_after = from_asn1_date(cert.get_notAfter())
 65 |     at = datetime.now()
 66 |     if at < not_before:
 67 |         return 1
 68 |     elif at > not_after:
 69 |         return 2
 70 |     else:
 71 |         return 0
 72 | 
 73 | 
 74 | def cmd_startdate(cert):
 75 |     """Returns not before date of certificate.
 76 |     """
 77 |     return from_asn1_date(cert.get_notBefore())
 78 | 
 79 | 
 80 | def cmd_enddate(cert):
 81 |     """Returns not after date of certificate.
 82 |     """
 83 |     return from_asn1_date(cert.get_notAfter())
 84 | 
 85 | 
 86 | def cmd_serial(cert):
 87 |     """Returns serial number of certificate.
 88 |     """
 89 |     return cert.get_serial_number()
 90 | 
 91 | 
 92 | def cmd_subject(cert):
 93 |     """Returns subject of certificate.
 94 |     """
 95 |     return format_x509_name(cert.get_subject())
 96 | 
 97 | 
 98 | def cmd_subject_hash(cert):
 99 |     """Returns hash of certificate subject.
100 |     """
101 |     return cert.get_subject().hash()
102 | 
103 | 
104 | def cmd_issuer(cert):
105 |     """Returns issuer of certificate.
106 |     """
107 |     return format_x509_name(cert.get_issuer())
108 | 
109 | 
110 | def cmd_issuer_hash(cert):
111 |     """Returns hash of certificate issuer.
112 |     """
113 |     return cert.get_issuer().hash()
114 | 
115 | 
116 | def cmd_fingerprint(cert):
117 |     """Returns SHA-1 fingerprint of certificate.
118 |     """
119 |     return cert.digest('sha1')
120 | 
121 | 
122 | def cmd_lifetime(cert):
123 |     """Returns remaining lifetime of certificate in seconds.
124 |     """
125 |     delta = (from_asn1_date(cert.get_notAfter()) - datetime.now())
126 |     return delta.days * 86400 + delta.seconds
127 | 
128 | 
129 | def cmd_lifetime_days(cert):
130 |     """Returns remaining lifetime of certificate in full days.
131 |     """
132 |     delta = (from_asn1_date(cert.get_notAfter()) - datetime.now())
133 |     return delta.days
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     commands = ['status', 'startdate', 'enddate', 'lifetime', 'lifetime_days',
138 |                 'serial', 'subject', 'issuer', 'subject_hash', 'issuer_hash',
139 |                 'fingerprint']
140 |     # Define and parse arguments
141 |     parser = argparse.ArgumentParser(
142 |         description='Check certificate in PEM file')
143 |     parser.add_argument('file', nargs='?', help='PEM file')
144 |     parser.add_argument('index', nargs='?', type=int,
145 |                         help='Certificate\'s index in file')
146 |     parser.add_argument('stat', nargs='?', default='status',
147 |                         choices=commands, help='Information to return')
148 |     args = parser.parse_args()
149 | 
150 |     # Retrieve certificate and execute command function on it
151 |     cert = get_certificate(args.file, args.index)
152 |     if cert is not None:
153 |         print(execute_command(args.stat, cert))
154 | 


--------------------------------------------------------------------------------
/documentation/db2stat-testing.md:
--------------------------------------------------------------------------------
  1 | # How to test db2stat.pl in a docker container
  2 | 
  3 | IBM Db2 docker container URL
  4 | https://hub.docker.com/r/ibmcom/db2
  5 | 
  6 | Download and run docker container
  7 | ```
  8 | docker run -itd --name mydb2 --privileged=true -p 50000:50000 -e LICENSE=accept -e DB2INST1_PASSWORD=root -e DBNAME=testdb -v /tmp/database:/database ibmcom/db2
  9 | ```
 10 | 
 11 | 
 12 | Log on to the container
 13 | ```
 14 | docker exec -ti mydb2 bash
 15 | ```
 16 | 
 17 | 
 18 | Set correct rights for database configurations
 19 | ```
 20 | groupadd sysmon
 21 | usermod -a -G sysmon db2inst1
 22 | ```
 23 | 
 24 | 
 25 | Install nano and perl
 26 | ```
 27 | yum install -y nano perl
 28 | ```
 29 | 
 30 | 
 31 | Switch to user db2inst1
 32 | ```
 33 | su - db2inst1
 34 | ```
 35 | 
 36 | 
 37 | Modify database configuration file
 38 | ```
 39 | db2 update dbm cfg using sysmon_group sysmon
 40 | ```
 41 | 
 42 | 
 43 | Start the Db2 command line application
 44 | ```
 45 | db2
 46 | ```
 47 | 
 48 | 
 49 | Connect to database
 50 | ```
 51 | connect to testdb user db2inst1 using root
 52 | ```
 53 | 
 54 | 
 55 | Create table and insert test data into it
 56 | ```
 57 | CREATE TABLE test (sarake1 INT PRIMARY KEY NOT NULL);
 58 | INSERT INTO test VALUES (1);
 59 | INSERT INTO test VALUES (2);
 60 | INSERT INTO test VALUES (3);
 61 | ```
 62 | 
 63 | 
 64 | Test database snapshot
 65 | ```
 66 | get snapshot for database on testdb
 67 | ```
 68 | 
 69 | 
 70 | Exit db2 command line
 71 | ```
 72 | CTRL+D
 73 | ```
 74 | 
 75 | 
 76 | Change directory to /tmp
 77 | ```
 78 | cd /tmp
 79 | ```
 80 | 
 81 | 
 82 | Create db2stat.pl using nano
 83 | - Change the db2 application path to the script into "/opt/ibm/db2/V11.5/bin/db2"
 84 | ```
 85 | nano db2stat.pl
 86 | ```
 87 | 
 88 | 
 89 | Test db2stat script
 90 | ```
 91 | perl -T ./db2stat.pl 60 db2inst1 /opt/ibm/db2/V11.5/ testdb "Database status"
 92 | ```
 93 | 
 94 | 
 95 | The script should return the output:
 96 | ```
 97 | Active
 98 | ```
 99 | 
100 | 
101 | ## Alternative way using a docker with multiple Db2 instances
102 | 
103 | Angoca's Db2 docker container URL:
104 | https://github.com/angoca/db2-docker/tree/master/db2-install/expc
105 | 
106 | 
107 | Download and run docker container
108 | ```
109 | sudo docker run -i -t --privileged=true --name="db2inst1" -p 50000:50000 angoca/db2-install
110 | ```
111 | 
112 | 
113 | Change directory to /tmp/db2_conf
114 | ```
115 | cd /tmp/db2_conf
116 | ```
117 | 
118 | 
119 | Update apt lists
120 | ```
121 | apt update
122 | ```
123 | 
124 | 
125 | Download and install nano and perl
126 | ```
127 | apt install -y nano perl
128 | ```
129 | 
130 | 
131 | Create a new database instance
132 | ```
133 | ./createInstance db2inst1
134 | ```
135 | 
136 | 
137 | Create a new user for second instance
138 | (User db2inst1 exists in docker container already)
139 | ```
140 | useradd -g db2grp1 -m db2inst2
141 | ```
142 | 
143 | 
144 | Set db2inst2 user password
145 | ```
146 | passwd db2inst2
147 | ```
148 | 
149 | 
150 | Create new Db2 instance
151 | ```
152 | ./createInstance db2inst2
153 | ```
154 | 
155 | (If you received an error about invalid GUID for user "db2inst2", just change
156 | the user GUID value to file "/tmp/db2_conf/db2expc_instance.rsp". The value is
157 | set at the line "DB2_INST.UID". You can check the user GUID by running command
158 | "id db2inst2".
159 | 
160 | 
161 | Change directory to /tmp
162 | ```
163 | cd /tmp
164 | ```
165 | 
166 | 
167 | Create db2stat.pl using nano
168 | - Change the db2 application path to the script into "/opt/ibm/db2/V11.5/bin/db2"
169 | - Change the cp and touch command paths to "/bin/cp" and "/bin/touch".
170 | ```
171 | nano db2stat.pl
172 | ```
173 | 
174 | 
175 | Change group and permissions for db2stat.pl file
176 | ```
177 | chgrp db2grp1 db2stat.pl
178 | chmod 770 db2stat.pl
179 | ```
180 | 
181 | 
182 | Switch user
183 | ```
184 | su - db2inst1
185 | ```
186 | 
187 | 
188 | Start the database manager
189 | ```
190 | db2start
191 | ```
192 | 
193 | 
194 | Start the Db2 command line application
195 | ```
196 | db2
197 | ```
198 | 
199 | 
200 | Create a new database
201 | ```
202 | create database testdb1 using codeset UTF-8 territory en
203 | ```
204 | 
205 | 
206 | Connect to database
207 | ```
208 | connect to testdb1 user db2inst1 using db2inst1
209 | ```
210 | 
211 | 
212 | Create table and insert test data into it
213 | ```
214 | CREATE TABLE test (sarake1 INT PRIMARY KEY NOT NULL)
215 | INSERT INTO test VALUES (1)
216 | INSERT INTO test VALUES (2)
217 | INSERT INTO test VALUES (3)
218 | ```
219 | 
220 | 
221 | Exit db2 command line
222 | ```
223 | CTRL+D
224 | ```
225 | 
226 | 
227 | Test db2stat.pl script
228 | ```
229 | perl -T ./db2stat.pl 60 db2inst1 /opt/ibm/db2/V11.5/ testdb1 "Database status"
230 | ```
231 | 
232 | 
233 | The script should return the output:
234 | ```
235 | Active
236 | ```
237 | 
238 | 
239 | Exit db2inst1 user shell
240 | ```
241 | exit
242 | ```
243 | 
244 | 
245 | Switch user
246 | ```
247 | su - db2inst2
248 | ```
249 | 
250 | 
251 | Start the database manager
252 | ```
253 | db2start
254 | ```
255 | 
256 | 
257 | Start the Db2 command line application
258 | ```
259 | db2
260 | ```
261 | 
262 | 
263 | Create a new database
264 | ```
265 | create database testdb2 using codeset UTF-8 territory en
266 | ```
267 | 
268 | 
269 | Connect to database
270 | ```
271 | connect to testdb2 user db2inst2 using db2inst2
272 | ```
273 | 
274 | 
275 | Create table and insert test data into it
276 | ```
277 | CREATE TABLE test (sarake1 INT PRIMARY KEY NOT NULL)
278 | INSERT INTO test VALUES (1)
279 | INSERT INTO test VALUES (2)
280 | INSERT INTO test VALUES (3)
281 | ```
282 | 
283 | 
284 | Exit db2 command line
285 | ```
286 | CTRL+D
287 | ```
288 | 
289 | 
290 | Test db2stat.pl script
291 | ```
292 | perl -T ./db2stat.pl 60 db2inst2 /opt/ibm/db2/V11.5/ testdb2 "Database status"
293 | ```
294 | 
295 | 
296 | The script should return the output:
297 | ```
298 | Active
299 | ```
300 | 
301 | 
302 | Exit db2inst2 user shell
303 | ```
304 | exit
305 | ```
306 | 


--------------------------------------------------------------------------------
/etc/zabbix/zabbix_agentd.d/galera.conf:
--------------------------------------------------------------------------------
 1 | # Copied and adapted from https://github.com/MogiePete/zabbix-galera-template/blob/master/userparameter_galera.conf
 2 | 
 3 | #Total number of cluster membership changes happened.
 4 | UserParameter=galera.cluster_conf_id,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_conf_id';" | HOME=/var/lib/zabbix mysql -N
 5 | 
 6 | #Current number of members in the cluster.
 7 | UserParameter=galera.cluster_size,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_size';" | HOME=/var/lib/zabbix mysql -N
 8 | 
 9 | #Status of this cluster component. That is, whether the node is part of a PRIMARY or NON_PRIMARY component.
10 | UserParameter=galera.cluster_status,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_status';" | HOME=/var/lib/zabbix mysql -N
11 | 
12 | #If the value is OFF, the node has not yet connected to any of the cluster components.
13 | UserParameter=galera.wsrep_connected,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_connected';" | HOME=/var/lib/zabbix mysql -N
14 | 
15 | #Shows the internal state of the EVS Protocol
16 | UserParameter=galera.wsrep_evs_state,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_evs_state';" | HOME=/var/lib/zabbix mysql -N
17 | 
18 | #How much the slave lag is slowing down the cluster.
19 | UserParameter=galera.wsrep_flow_control_paused,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_flow_control_paused';" | HOME=/var/lib/zabbix mysql -N
20 | 
21 | #The total time spent in a paused state measured in nanoseconds.
22 | UserParameter=galera.wsrep_flow_control_paused_ns,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_flow_control_paused_ns';" | HOME=/var/lib/zabbix mysql -N
23 | 
24 | #Returns the number of FC_PAUSE events the node has received. Does not reset over time
25 | UserParameter=galera.wsrep_flow_control_recv,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_flow_control_recv';" | HOME=/var/lib/zabbix mysql -N
26 | 
27 | #Returns the number of FC_PAUSE events the node has sent. Does not reset over time
28 | UserParameter=galera.wsrep_flow_control_sent,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_flow_control_sent';" | HOME=/var/lib/zabbix mysql -N
29 | 
30 | #Displays the group communications UUID.
31 | UserParameter=galera.wsrep_gcom_uuid,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_gcomm_uuid';" | HOME=/var/lib/zabbix mysql -N
32 | 
33 | #The sequence number, or seqno, of the last committed transaction.
34 | UserParameter=galera.wsrep_last_committed,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_last_committed';" | HOME=/var/lib/zabbix mysql -N
35 | 
36 | #Internal Galera Cluster FSM state number.
37 | UserParameter=galera.wsrep_local_state,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_state';" | HOME=/var/lib/zabbix mysql -N
38 | 
39 | #Total number of local transactions that were aborted by slave transactions while in execution.
40 | UserParameter=galera.wsrep_local_bf_aborts,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_bf_aborts';" | HOME=/var/lib/zabbix mysql -N
41 | 
42 | #Current (instantaneous) length of the recv queue.
43 | UserParameter=galera.wsrep_local_recv_queue,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_recv_queue';" | HOME=/var/lib/zabbix mysql -N
44 | 
45 | #Current (instantaneous) length of the send queue.
46 | UserParameter=galera.wsrep_local_send_queue,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_send_queue';" | HOME=/var/lib/zabbix mysql -N
47 | 
48 | #Human-readable explanation of the state.
49 | UserParameter=galera.wsrep_local_state_comment,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_state_comment';" | HOME=/var/lib/zabbix mysql -N
50 | 
51 | #The UUID of the state stored on this node.
52 | UserParameter=galera.wsrep_local_state_uuid,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_state_uuid';" | HOME=/var/lib/zabbix mysql -N
53 | 
54 | #Whether the server is ready to accept queries.
55 | UserParameter=galera.wsrep_ready,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_ready';" | HOME=/var/lib/zabbix mysql -N
56 | 
57 | #Total size of write-sets received from other nodes.
58 | UserParameter=galera.wsrep_received_bytes,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_received_bytes';" | HOME=/var/lib/zabbix mysql -N
59 | 
60 | #Total size of write-sets replicated.
61 | UserParameter=galera.replicated_bytes,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_replicated_bytes';" | HOME=/var/lib/zabbix mysql -N
62 | 
63 | #Total size of data replicated.
64 | UserParameter=galera.wsrep_repl_data_bytes,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_repl_data_bytes';" | HOME=/var/lib/zabbix mysql -N
65 | 
66 | #Total number of keys replicated.
67 | UserParameter=galera.wsrep_repl_keys,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_repl_keys';" | HOME=/var/lib/zabbix mysql -N
68 | 
69 | #Total size of keys replicated in bytes
70 | UserParameter=galera.wsrep_repl_keys_bytes,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_repl_keys_bytes';" | HOME=/var/lib/zabbix mysql -N
71 | 
72 | #Total size of other bits replicated
73 | UserParameter=galera.wsrep_repl_other_bytes,echo "select VARIABLE_VALUE from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_repl_other_bytes';" | HOME=/var/lib/zabbix mysql -N
74 | 


--------------------------------------------------------------------------------
/documentation/kubernetes_monitoring.md:
--------------------------------------------------------------------------------
  1 | # Kubernetes pods and nodes discovery and monitoring
  2 | 
  3 | Requirements:
  4 | - Python 3.6.8 or later
  5 | - VirtualEnv 15.1.0 or later
  6 | - 3rd party libraries for Python: kubernetes.
  7 | 
  8 | 
  9 | ## Creating and activating VirtualEnv for Python:
 10 | ```
 11 | mkdir /opt/virtualenv
 12 | cd /opt/virtualenv
 13 | virtualenv -p python3 kubernetes-monitoring
 14 | cd kubernetes-monitoring
 15 | source bin/activate
 16 | ```
 17 | 
 18 | 
 19 | ## Install Python dependencies using pip3:
 20 | ```
 21 | pip3 install kubernetes py-zabbix
 22 | ```
 23 | 
 24 | PSK key support requires additionally sslpsk, installation requires development 
 25 | packages for Python3 and OpenSSL. Development packages can be removed after 
 26 | installation.
 27 | 
 28 | (RedHat/CentOS):
 29 | ```bash
 30 | sudo yum install python3-devel openssl-devel
 31 | pip3 install sslpsk
 32 | sudo yum remove python3-devel openssl-devel
 33 | ```
 34 | 
 35 | 
 36 | ## Configuring access for user zabbix
 37 | 
 38 | The zabbix user must have enough privileges to read Kubernetes configurations
 39 | and access the Kubernetes objects. It is recommended that you create a context
 40 | that specifies the cluster, the user and the namespace that the monitoring
 41 | script will use when making calls to the API server. To achieve this, you need
 42 | to create a kubeconfig file. A kubekonfig file requires the URL of the API
 43 | server, a cluster CA certificate and credentials in the form of a key and a
 44 | certificate signed by the cluster CA.
 45 | 
 46 | This documentation provides steps to create certificates and how to have them
 47 | accepted by an existing Kubernetes cluster. If you already have existing
 48 | certificates and configurations, you may skip the first part where certificates
 49 | are created and approved. There is a template for the configuration file in case
 50 | you already have the certificates and you do not have a configuration file:
 51 | [There is an example file here](kubernetes_monitoring/config).
 52 | 
 53 | ### Creating a certificate signing request (CSR) and retrieving certificates
 54 | 
 55 | First we run the OpenSSL command to generate new private key and CSR. You may
 56 | change the subject fields to suit your needs. Atleast the "/CN=zabbix"-field
 57 | should be checked since the role based access control (RBAC) sub-system will
 58 | determine the username from that field:
 59 | ```
 60 | openssl req -new -newkey rsa:4096 -nodes -keyout zabbix.key -out zabbix.csr -subj "/C=FI/ST=Pirkanmaa/L=Tampere/O=Digia Oyj/OU=Digia Iiris/CN=zabbix"
 61 | ```
 62 | 
 63 | Then we can retrieve the CSR-file and encode it using the base64 command:
 64 | ```
 65 | cat zabbix.csr | base64 | tr -d '\n'
 66 | ```
 67 | 
 68 | Then we paste the base 64 encoded CSR into the certificate signing request YAML-file.
 69 | [There is an example file here](kubernetes_monitoring/csr.yml).
 70 | 
 71 | Then we send the request to the API server:
 72 | ```
 73 | kubectl create -f csr.yml
 74 | ```
 75 | 
 76 | Then we check the condition of the request using the following command:
 77 | ```
 78 | kubectl get csr
 79 | ```
 80 | 
 81 | We should receive an output that is somewhat like this:
 82 | ```
 83 | NAME     AGE   SIGNERNAME                            REQUESTOR       CONDITION
 84 | zabbix   10s   kubernetes.io/kube-apiserver-client   minikube-user   Pending
 85 | ```
 86 | 
 87 | The next thing we need to do is approve the request:
 88 | ```
 89 | kubectl certificate approve zabbix
 90 | ```
 91 | 
 92 | When we check the status for the request again, the request should be approved:
 93 | ```
 94 | kubectl get csr
 95 | 
 96 | NAME     AGE   SIGNERNAME                            REQUESTOR       CONDITION
 97 | zabbix   30s   kubernetes.io/kube-apiserver-client   minikube-user   Approved,Issued
 98 | ```
 99 | 
100 | Now that our request is approved, we can retrieve the certificate. We pipe the
101 | output to base64 command for decoding and finally save it to a file:
102 | ```
103 | kubectl get csr zabbix -o jsonpath='{.status.certificate}' | base64 --decode > zabbix.crt
104 | ```
105 | 
106 | Next thing we need is the cluster CA certificate. We pipe it to the base64
107 | command for decoding and save it into a file as with the previous command:
108 | ```
109 | kubectl get secret -o jsonpath="{.items[?(@.type==\"kubernetes.io/service-account-token\")].data['ca\.crt']}" | base64 --decode >ca.crt
110 | ```
111 | 
112 | 
113 | ### Setting up the configuration using existing certificates
114 | 
115 | Retrieve cluster name:
116 | ```
117 | kubectl config view -o jsonpath='{.clusters[0].name}'
118 | ```
119 | 
120 | Retrieve cluster server address:
121 | ```
122 | kubectl config view -o jsonpath='{.clusters[0].cluster.server}'
123 | ```
124 | 
125 | Pull details from existing Kubernetes-configurations:
126 | ```
127 | kubectl config set-cluster <cluster_name> --server=<server_address> --certificate-authority=<ca.crt> --kubeconfig=<config_file> --embed-certs
128 | ```
129 | 
130 | Set up the user:
131 | ```
132 | kubectl config set-credentials zabbix --client-certificate=<certificate.crt> --client-key=<private.key> --kubeconfig=<config_file> --embed-certs
133 | ```
134 | 
135 | Create a context:
136 | ```
137 | kubectl config set-context zabbix --cluster=<cluster_name> --namespace=default --user=zabbix --kubeconfig=<config_file>
138 | ```
139 | 
140 | Specify the context for user zabbix:
141 | ```
142 | kubectl config use-context zabbix --kubeconfig=<config_file>
143 | ```
144 | 
145 | Test configurations:
146 | ```
147 | kubectl version --kubeconfig=<config_file>
148 | ```
149 | 
150 | You should now see a version listing from client and server, similar to the following:
151 | ```
152 | Client Version: version.Info{Major:"1", Minor:"17", GitVersion:"v1.17.4", GitCommit:"<commit_hash>", GitTreeState:"clean", BuildDate:"2020-03-12T21:03:42Z", GoVersion:"go1.13.8", Compiler:"gc", Platform:"linux/amd64"}
153 | Server Version: version.Info{Major:"1", Minor:"18", GitVersion:"v1.18.0", GitCommit:"<commit_hash>", GitTreeState:"clean", BuildDate:"2020-03-25T14:50:46Z", GoVersion:"go1.13.8", Compiler:"gc", Platform:"linux/amd64"}
154 | ```
155 | 
156 | 
157 | ### Authorize user to list pods, nodes and services from Kubernetes cluster.
158 | 
159 | [There is an example file here](kubernetes_monitoring/access.yml).
160 | 
161 | ```
162 | kubectl create -f kubernetes_monitoring/access.yml
163 | ```
164 | 
165 | 
166 | ## Usage
167 | 
168 | Item Syntax | Description | Units |
169 | ----------- | ----------- | ----- |
170 | kubernetes.discover.pods | Discover all Kubernetes pods | Provides the following template variables: {#POD}. Also provides service information in an array: ip, namespace, pod, restart_count, uptime. |
171 | kubernetes.discover.pods.default | Discover all Kubernetes pods using default field selectors | Provides the following template variables: {#POD}. Also provides service information in an array: ip, namespace, pod, restart_count, uptime. |
172 | kubernetes.discover.nodes | Discover all Kubernetes nodes | Provides the following template variables: {#NODE}. Also provides service information in an array: node, machine_id, status, system_uuid. |
173 | kubernetes.discover.services | Discover all Kubernetes services | Provides the following template variables: {#SERVICE}. Also provides service information in an array: namespace, service, uid. |
174 | 
175 | 
176 | ## Retrieving data from discovery using JSONPath
177 | 
178 | In this example, data can be retrieved using JSONPath:
179 | ```
180 | $.data[?(@.pod == "<pod>")].pod
181 | $.data[?(@.pod == "<pod>")].restart_count
182 | 
183 | $.data[?(@.node == "<node>")].node
184 | $.data[?(@.node == "<node>")].status
185 | 
186 | $.data[?(@.service == "<service>")].service
187 | $.data[?(@.service == "<service>")].uid
188 | ```
189 | 


--------------------------------------------------------------------------------
/custom/scripts/zapache:
--------------------------------------------------------------------------------
  1 | #! /bin/bash
  2 | #
  3 | # Name: zapache
  4 | #
  5 | # Checks Apache activity.
  6 | #
  7 | # Author: Alejandro Michavila
  8 | # Modified for Scoreboard Values: Murat Koc, murat@profelis.com.tr
  9 | # Modified for using also as external script: Murat Koc, murat@profelis.com.tr
 10 | # Modified for outputting usage or ZBX_NOTSUPPORTED: Alejandro Michavila
 11 | # Modified to do cacheing for performance, dmitry.frolov@gmail.com
 12 | #
 13 | # Version: 1.5
 14 | #
 15 | 
 16 | zapachever="1.5"
 17 | rval=0
 18 | value=""
 19 | cache_seconds="60"
 20 | HEADER_PARAM=""
 21 | curl="`which curl`"
 22 | wget="`which wget`"
 23 | [ "$TMPDIR" ] || TMPDIR=/tmp
 24 | 
 25 | function usage()
 26 | {
 27 | 	echo "zapache version: $zapachever"
 28 | 	echo "usage:"
 29 | 	echo "  $0 [<url>] [<header>] TotalAccesses                 - Check total accesses."
 30 | 	echo "  $0 [<url>] [<header>] TotalKBytes                   - Check total KBytes."
 31 | 	echo "  $0 [<url>] [<header>] CPULoad                       - Check CPU load."
 32 | 	echo "  $0 [<url>] [<header>] Uptime                        - Check uptime."
 33 | 	echo "  $0 [<url>] [<header>] ReqPerSec                     - Check requests per second."
 34 | 	echo "  $0 [<url>] [<header>] BytesPerSec                   - Check Bytes per second."
 35 | 	echo "  $0 [<url>] [<header>] BytesPerReq                   - Check Bytes per request."
 36 | 	echo "  $0 [<url>] [<header>] BusyWorkers                   - Check busy workers."
 37 | 	echo "  $0 [<url>] [<header>] IdleWorkers                   - Check idle workers."
 38 | 	echo "  $0 [<url>] [<header>] version                       - Version of this script."
 39 | 	echo "  $0 [<url>] [<header>] ping                          - Check if Apache is up."
 40 | 	echo "  $0 [<url>] [<header>] WaitingForConnection          - Check Waiting for Connection processess."
 41 | 	echo "  $0 [<url>] [<header>] StartingUp                    - Check Starting Up processess."
 42 | 	echo "  $0 [<url>] [<header>] ReadingRequest                - Check Reading Request processess."
 43 | 	echo "  $0 [<url>] [<header>] SendingReply                  - Check Sending Reply processess."
 44 | 	echo "  $0 [<url>] [<header>] KeepAlive                     - Check KeepAlive Processess."
 45 | 	echo "  $0 [<url>] [<header>] DNSLookup                     - Check DNSLookup Processess."
 46 | 	echo "  $0 [<url>] [<header>] ClosingConnection             - Check Closing Connection Processess."
 47 | 	echo "  $0 [<url>] [<header>] Logging                       - Check Logging Processess."
 48 | 	echo "  $0 [<url>] [<header>] GracefullyFinishing           - Check Gracefully Finishing Processess."
 49 | 	echo "  $0 [<url>] [<header>] IdleCleanupOfWorker           - Check Idle Cleanup of Worker Processess."
 50 | 	echo "  $0 [<url>] [<header>] OpenSlotWithNoCurrentProcess  - Check Open Slots with No Current Process."
 51 | }
 52 | 
 53 | ########
 54 | # Main #
 55 | ########
 56 | 
 57 | if [[ $# ==  1 ]];then
 58 | 	#Agent Mode
 59 | 	STATUS_URL="http://localhost/server-status?auto"
 60 | 	CASE_VALUE="$1"
 61 | elif [[ $# == 2 ]];then
 62 | 	#External Script Mode
 63 | 	STATUS_URL="$1"
 64 | 	case "$STATUS_URL" in
 65 | 		http://*|https://*) ;;
 66 | 		*) STATUS_URL="http://$STATUS_URL/server-status?auto";;
 67 | 	esac
 68 | 	CASE_VALUE="$2"
 69 | elif [[ $# == 3 ]];then
 70 |   #External Script Mode
 71 |   STATUS_URL="$1"
 72 |   case "$STATUS_URL" in
 73 |     http://*|https://*) ;;
 74 |     *) STATUS_URL="http://$STATUS_URL/server-status?auto";;
 75 |   esac
 76 |   if [ "$curl" ]; then
 77 | 		HEADER_PARAM="-H \"$2\""
 78 | 	else
 79 |     HEADER_PARAM="--header \"$2\""
 80 |   fi
 81 |   CASE_VALUE="$3"
 82 | else
 83 | 	#No Parameter
 84 | 	usage
 85 | 	exit 0
 86 | fi
 87 | 
 88 | case "$CASE_VALUE" in
 89 | 'version')
 90 | 	echo "$zapachever"
 91 | 	exit 0;;
 92 | esac
 93 | 
 94 | umask 077
 95 | 
 96 | # $UID is bash-specific
 97 | cache_prefix="zapache-$UID-${STATUS_URL//[^a-zA-Z0-9_-]/_}"
 98 | cache="$TMPDIR/$cache_prefix.cache"
 99 | cache_timestamp_check="$TMPDIR/$cache_prefix.ts"
100 | # This assumes touch from coreutils
101 | touch -d "@$((`date +%s` - ($cache_seconds - 1)))" "$cache_timestamp_check"
102 | 
103 | if [ "$cache" -ot "$cache_timestamp_check" ]; then
104 | 	if [ "$curl" ]; then
105 | 		fetch_url() { $curl --insecure --silent --location $HEADER_PARAM -H "Cache-Control: no-cache" "$@"; }
106 | 	else
107 | 		if [ "$wget" ]; then
108 | 			fetch_url() { $wget --no-check-certificate --quiet $HEADER_PARAM --header "Cache-Control: no-cache" -O - "$@"; }
109 | 		else
110 | 			echo "ZBX_NOTSUPPORTED"
111 | 			exit 1
112 | 		fi
113 | 	fi
114 | 
115 | 	fetch_url "$STATUS_URL" > "$cache"
116 | 	rval=$?
117 | 	if [ $rval != 0 ]; then
118 | 		echo "ZBX_NOTSUPPORTED"
119 | 		exit 1
120 | 	fi
121 | fi
122 | 
123 | case "$CASE_VALUE" in
124 | 'ping')
125 | 	if [ ! -s "$cache" -o "$cache" -ot "$cache_timestamp_check" ]; then
126 | 		echo "0"
127 | 	else
128 | 		echo "1"
129 | 	fi
130 | 	exit 0;;
131 | esac
132 | 
133 | if ! [ -s "$cache" ]; then
134 | 	echo "ZBX_NOTSUPPORTED"
135 | 	exit 1
136 | fi
137 | 
138 | case "$CASE_VALUE" in
139 | 'TotalAccesses')
140 | 	value="`awk '/^Total Accesses:/ {print $3}' < \"$cache\"`"
141 | 	rval=$?;;
142 | 'TotalKBytes')
143 | 	value="`awk '/^Total kBytes:/ {print $3}' < \"$cache\"`"
144 | 	rval=$?;;
145 | 'CPULoad')
146 | 	value="`awk '/^CPULoad:/ {print $2}' < \"$cache\"`"
147 | 	rval=$?;;
148 | 'Uptime')
149 | 	value="`awk '/^Uptime:/ {print $2}' < \"$cache\"`"
150 | 	rval=$?;;
151 | 'ReqPerSec')
152 | 	value="`awk '/^ReqPerSec:/ {print $2}' < \"$cache\"`"
153 | 	rval=$?;;
154 | 'BytesPerSec')
155 | 	value="`awk '/^BytesPerSec:/ {print $2}' < \"$cache\"`"
156 | 	rval=$?;;
157 | 'BytesPerReq')
158 | 	value="`awk '/^BytesPerReq:/ {print $2}' < \"$cache\"`"
159 | 	rval=$?;;
160 | 'BusyWorkers')
161 | 	value="`awk '/^BusyWorkers:/ {print $2}' < \"$cache\"`"
162 | 	rval=$?;;
163 | 'IdleWorkers')
164 | 	value="`awk '/^IdleWorkers:/ {print $2}' < \"$cache\"`"
165 | 	rval=$?;;
166 | 'WaitingForConnection')
167 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"_")-1}' < \"$cache\"`"
168 | 	rval=$?;;
169 | 'StartingUp')
170 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"S")-1}' < \"$cache\"`"
171 | 	rval=$?;;
172 | 'ReadingRequest')
173 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"R")-1}' < \"$cache\"`"
174 | 	rval=$?;;
175 | 'SendingReply')
176 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"W")-1}' < \"$cache\"`"
177 | 	rval=$?;;
178 | 'KeepAlive')
179 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"K")-1}' < \"$cache\"`"
180 | 	rval=$?;;
181 | 'DNSLookup')
182 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"D")-1}' < \"$cache\"`"
183 | 	rval=$?;;
184 | 'ClosingConnection')
185 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"C")-1}' < \"$cache\"`"
186 | 	rval=$?;;
187 | 'Logging')
188 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"L")-1}' < \"$cache\"`"
189 | 	rval=$?;;
190 | 'GracefullyFinishing')
191 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"G")-1}' < \"$cache\"`"
192 | 	rval=$?;;
193 | 'IdleCleanupOfWorker')
194 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,"I")-1}' < \"$cache\"`"
195 | 	rval=$?;;
196 | 'OpenSlotWithNoCurrentProcess')
197 | 	value="`awk '/^Scoreboard:/ {print split($2,notused,".")-1}' < \"$cache\"`"
198 | 	rval=$?;;
199 | *)
200 | 	usage
201 | 	exit 1;;
202 | esac
203 | 
204 | if [ "$rval" -eq 0 -a -z "$value" ]; then
205 |     case "$CASE_VALUE" in
206 |         # Theese metrics are output only if non-zero
207 |         'CPULoad' | 'ReqPerSec' | 'BytesPerSec' | 'BytesPerReq')
208 |             value=0
209 |             ;;
210 |         *)
211 |             rval=1
212 |             ;;
213 |     esac
214 | fi
215 | 
216 | if [ "$rval" -ne 0 ]; then
217 | 	echo "ZBX_NOTSUPPORTED"
218 | fi
219 | 
220 | echo "$value"
221 | exit $rval
222 | 
223 | #
224 | # end zapache
225 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/zabbix_sender_psk.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Provides functionally extended version of py-zabbix ZabbixSender and pure python utility for
  4 | sending trapper data.
  5 | """
  6 | from argparse import ArgumentParser
  7 | from configparser import RawConfigParser
  8 | from datetime import datetime
  9 | from io import StringIO
 10 | from typing import Callable, List, Optional, Tuple
 11 | import functools
 12 | import sys
 13 | 
 14 | from pyzabbix import ZabbixSender, ZabbixMetric, ZabbixResponse
 15 | 
 16 | # NOTE: Python 3 and OpenSSL development files required to install sslpsk
 17 | # Packages needed only during installation and can be removed afterwards
 18 | # ---------------------------------------------------------------------
 19 | # RedHat/CentOS: sudo yum install python3-devel openssl-devel ; pip install sslpsk
 20 | 
 21 | # Socket wrapper implementation adapted from GitHub issue:
 22 | # https://github.com/adubkov/py-zabbix/issues/114
 23 | class PyZabbixPSKSocketWrapper:
 24 |     """Implements ssl.wrap_socket with PSK instead of certificates.
 25 | 
 26 |     Proxies calls to a `socket` instance.
 27 |     """
 28 | 
 29 |     def __init__(self, sock, *, identity, psk):
 30 |         self.__sock = sock
 31 |         self.__identity = identity
 32 |         self.__psk = psk
 33 | 
 34 |     def connect(self, *args, **kwargs):
 35 |         """
 36 |         Opens socket connection.
 37 |         """
 38 |         # PSK is optional to use so SSL dependencies are only imported when actually needed
 39 |         # Otherwise script would require bunch of system packages to be installed unnecessarily
 40 |         import ssl  # pylint: disable=import-outside-toplevel
 41 |         import sslpsk  # pylint: disable=import-outside-toplevel
 42 | 
 43 |         # `sslpsk.wrap_socket` must be called *after* socket.connect,
 44 |         # while the `ssl.wrap_socket` must be called *before* socket.connect.
 45 |         self.__sock.connect(*args, **kwargs)
 46 | 
 47 |         # `sslv3 alert bad record mac` exception means incorrect PSK
 48 |         self.__sock = sslpsk.wrap_socket(
 49 |             self.__sock,
 50 |             # https://github.com/zabbix/zabbix/blob/f0a1ad397e5653238638cd1a65a25ff78c6809bb/src/libs/zbxcrypto/tls.c#L3231
 51 |             ssl_version=ssl.PROTOCOL_TLSv1_2,
 52 |             # https://github.com/zabbix/zabbix/blob/f0a1ad397e5653238638cd1a65a25ff78c6809bb/src/libs/zbxcrypto/tls.c#L3179
 53 |             ciphers="PSK-AES128-CBC-SHA",
 54 |             psk=(self.__psk, self.__identity),
 55 |         )
 56 | 
 57 |     def __getattr__(self, name):
 58 |         return getattr(self.__sock, name)
 59 | 
 60 | 
 61 | class ZabbixSenderPSK(ZabbixSender):
 62 |     """
 63 |     Extends py-zabbix library's ZabbixSender by implementing PSK support and sending semantics
 64 |     of command line sender (command line version =>4.2).
 65 | 
 66 |     User can also specify error_listener function which is called in case send call fail. If
 67 |     listener raises another error, the send is terminated.
 68 | 
 69 |     This version always uses Zabbix agent configuration file.
 70 |     """
 71 | 
 72 |     def __init__(self,
 73 |                  config_file: str = None,
 74 |                  error_listener: Callable[[OSError], None] = None):
 75 |         if config_file is None:
 76 |             config_file = '/etc/zabbix/zabbix_agentd.conf'
 77 |         self.config_file = config_file
 78 |         self.error_listener = error_listener
 79 |         self._config = None
 80 | 
 81 |         psk_info = self._get_psk_info()
 82 |         if psk_info:
 83 |             wrapper = functools.partial(
 84 |                 PyZabbixPSKSocketWrapper,
 85 |                 identity=psk_info[0],
 86 |                 psk=psk_info[1])
 87 |             ZabbixSender.__init__(self, use_config=config_file, socket_wrapper=wrapper)
 88 |         else:
 89 |             ZabbixSender.__init__(self, use_config=config_file)
 90 | 
 91 |     def _load_agent_config(self):
 92 |         if self._config is None:
 93 |             with open(self.config_file, 'r') as file_handle:
 94 |                 config_file_data = '[root]\n' + file_handle.read()
 95 | 
 96 |             config_file_fp = StringIO(config_file_data)
 97 |             config = RawConfigParser(strict=False)
 98 |             config.read_file(config_file_fp)
 99 |             self._config = config
100 |         return self._config
101 | 
102 |     def _get_psk_info(self) -> Optional[Tuple[str, bytearray]]:
103 |         config = self._load_agent_config()
104 | 
105 |         tls_connect = config.get('root', 'TLSConnect', fallback=None)
106 |         if tls_connect and tls_connect == 'psk':
107 |             psk_identity = config.get('root', 'TLSPSKIdentity', fallback=None)
108 |             if psk_identity is None:
109 |                 raise ValueError('Error in config file, TLSPSKIdentity missing')
110 | 
111 |             psk_file = config.get('root', 'TLSPSKFile', fallback=None)
112 |             if psk_file is None:
113 |                 raise ValueError('Error in config file, TLSPSKFile missing.')
114 | 
115 |             with open(psk_file, 'r') as file_handle:
116 |                 psk_key = bytes.fromhex(file_handle.read().strip())
117 | 
118 |             return (psk_identity, psk_key)
119 | 
120 |         return None
121 | 
122 |     def get_agent_config(self):
123 |         """
124 |         Returns the agent configuration.
125 |         """
126 |         return self._load_agent_config()
127 | 
128 |     def send(self, metrics: List[ZabbixMetric]) -> ZabbixResponse:
129 |         zabbix_uris = self.zabbix_uri
130 |         response = None
131 |         for uri in zabbix_uris:
132 |             try:
133 |                 self.zabbix_uri = [uri]
134 |                 response = ZabbixSender.send(self, metrics)
135 |             except OSError as ex:
136 |                 if self.error_listener:
137 |                     self.error_listener(ex)
138 |         self.zabbix_uri = zabbix_uris
139 | 
140 |         # Only last successful response is returned, this follows ZabbixSender semantics
141 |         if response is None:
142 |             raise OSError('Could not send values to any Zabbix server.')
143 |         return response
144 | 
145 | def _print_cfg_value(config: RawConfigParser, key: str):
146 |     print(f"{key}: {config.get('root', key, fallback='-')}")
147 | 
148 | 
149 | 
150 | def display_config(sender: ZabbixSenderPSK):
151 |     """
152 |     Prints trapper related Zabbix agent configuration options to stdout.
153 |     """
154 |     config = sender.get_agent_config()
155 |     _print_cfg_value(config, 'ServerActive')
156 |     _print_cfg_value(config, 'Hostname')
157 |     _print_cfg_value(config, 'TLSConnect')
158 |     _print_cfg_value(config, 'TLSPSKIdentity')
159 |     _print_cfg_value(config, 'TLSPSKFile')
160 | 
161 | 
162 | def send_from_file(sender: ZabbixSenderPSK, input_file: str, with_timestamps: bool = False):
163 |     """
164 |     Sends values from file to Zabbix server.
165 |     """
166 |     metrics = []
167 |     with sys.stdin if input_file == '-' else open(input_file, 'r') as file_handle:
168 |         for line in file_handle:
169 |             line = line.strip() # Remove newline
170 |             if with_timestamps:
171 |                 parts = line.split(' ', 4)
172 |                 metrics.append(ZabbixMetric(parts[0], parts[1], parts[3], clock(parts[2])))
173 |             else:
174 |                 parts = line.split(' ', 3)
175 |                 metrics.append(ZabbixMetric(parts[0], parts[1], parts[2]))
176 | 
177 |     response = sender.send(metrics)
178 |     print(response)
179 | 
180 | 
181 | def send_value(sender: ZabbixSenderPSK, host: str, key: str, value: str, clock_value: int):
182 |     """
183 |     Sends single value to Zabbix server.
184 |     """
185 |     if host is None:
186 |         config = sender.get_agent_config()
187 |         host = config.get('root', 'Hostname', fallback=None)
188 |     if host is None:
189 |         raise ValueError('Cannot resolve trapper hostname.')
190 |     metric = ZabbixMetric(host, key, value, clock_value)
191 |     response = sender.send([metric])
192 |     print(response)
193 | 
194 | 
195 | def run_sender(args):
196 |     """
197 |     Executes the sender utility.
198 |     """
199 |     sender = ZabbixSenderPSK(args.config)
200 |     if args.display_config:
201 |         display_config(sender)
202 |         sys.exit(0)
203 | 
204 |     if args.input_file:
205 |         send_from_file(sender, args.input_file, args.with_timestamps)
206 |     else:
207 |         if args.key and args.value:
208 |             send_value(sender, args.host, args.key, args.value, args.clock)
209 |         else:
210 |             sys.exit('Invalid arguments: specify either key and value or input file.')
211 | 
212 | 
213 | def clock(value: str) -> int:
214 |     """
215 |     Tries to parse clock value from string in multiple formats.
216 | 
217 |     Supported formats:
218 |     - Bare clock seconds value from unix epoch
219 |     - ISO datetime without timezone
220 |     """
221 |     try:
222 |         return int(value)
223 |     except ValueError:
224 |         return int(datetime.strptime(value, '%Y-%m-%dT%H:%M:%S').timestamp())
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     parser = ArgumentParser()
229 |     parser.add_argument('-c', '--config', default=None,
230 |                         help='Path to Zabbix agentd configuration file')
231 |     parser.add_argument('-s', '--host', default=None,
232 |                         help='Specify host name the item belongs to')
233 |     parser.add_argument('-k', '--key',
234 |                         help='Specify item key')
235 |     parser.add_argument('-o', '--value',
236 |                         help='Specify item value')
237 |     parser.add_argument('-t', '--clock', type=clock, default=None,
238 |                         help='Specify item clock')
239 |     parser.add_argument('-i', '--input-file',
240 |                         help='Load values from input file. Specify - for standard input.')
241 |     parser.add_argument('-T', '--with-timestamps', action='store_true',
242 |                         help='Each line of file contains whitespace delimited:\n' \
243 |                              '<host> <key> <timestamp> <value>')
244 |     parser.add_argument('-d', '--display-config', action='store_true',
245 |                         help='Print trapper related Zabbix agent configuration')
246 |     cmd_args = parser.parse_args()
247 | 
248 |     run_sender(cmd_args)
249 | 


--------------------------------------------------------------------------------
/templates/process.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <zabbix_export>
  3 |     <version>3.2</version>
  4 |     <date>2017-09-28T11:28:48Z</date>
  5 |     <groups>
  6 |         <group>
  7 |             <name>Templates</name>
  8 |         </group>
  9 |     </groups>
 10 |     <templates>
 11 |         <template>
 12 |             <template>Template App Process - digiapulssi</template>
 13 |             <name>Template App Process - digiapulssi</name>
 14 |             <description/>
 15 |             <groups>
 16 |                 <group>
 17 |                     <name>Templates</name>
 18 |                 </group>
 19 |             </groups>
 20 |             <applications>
 21 |                 <application>
 22 |                     <name>Processes</name>
 23 |                 </application>
 24 |                 <application>
 25 |                     <name>Processes CPU</name>
 26 |                 </application>
 27 |                 <application>
 28 |                     <name>Processes memory</name>
 29 |                 </application>
 30 |                 <application>
 31 |                     <name>Processes runtime</name>
 32 |                 </application>
 33 |             </applications>
 34 |             <items/>
 35 |             <discovery_rules>
 36 |                 <discovery_rule>
 37 |                     <name>Process discovery</name>
 38 |                     <type>0</type>
 39 |                     <snmp_community/>
 40 |                     <snmp_oid/>
 41 |                     <key>discover.processes</key>
 42 |                     <delay>600</delay>
 43 |                     <status>0</status>
 44 |                     <allowed_hosts/>
 45 |                     <snmpv3_contextname/>
 46 |                     <snmpv3_securityname/>
 47 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
 48 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
 49 |                     <snmpv3_authpassphrase/>
 50 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
 51 |                     <snmpv3_privpassphrase/>
 52 |                     <delay_flex/>
 53 |                     <params/>
 54 |                     <ipmi_sensor/>
 55 |                     <authtype>0</authtype>
 56 |                     <username/>
 57 |                     <password/>
 58 |                     <publickey/>
 59 |                     <privatekey/>
 60 |                     <port/>
 61 |                     <filter>
 62 |                         <evaltype>0</evaltype>
 63 |                         <formula/>
 64 |                         <conditions/>
 65 |                     </filter>
 66 |                     <lifetime>3</lifetime>
 67 |                     <description/>
 68 |                     <item_prototypes>
 69 |                         <item_prototype>
 70 |                             <name>CPU utilization on command {#COMMAND}</name>
 71 |                             <type>0</type>
 72 |                             <snmp_community/>
 73 |                             <multiplier>0</multiplier>
 74 |                             <snmp_oid/>
 75 |                             <key>proc.cpu.util[&quot;{#COMMAND}&quot;]</key>
 76 |                             <delay>60</delay>
 77 |                             <history>30</history>
 78 |                             <trends>365</trends>
 79 |                             <status>0</status>
 80 |                             <value_type>0</value_type>
 81 |                             <allowed_hosts/>
 82 |                             <units>%</units>
 83 |                             <delta>0</delta>
 84 |                             <snmpv3_contextname/>
 85 |                             <snmpv3_securityname/>
 86 |                             <snmpv3_securitylevel>0</snmpv3_securitylevel>
 87 |                             <snmpv3_authprotocol>0</snmpv3_authprotocol>
 88 |                             <snmpv3_authpassphrase/>
 89 |                             <snmpv3_privprotocol>0</snmpv3_privprotocol>
 90 |                             <snmpv3_privpassphrase/>
 91 |                             <formula>1</formula>
 92 |                             <delay_flex/>
 93 |                             <params/>
 94 |                             <ipmi_sensor/>
 95 |                             <data_type>0</data_type>
 96 |                             <authtype>0</authtype>
 97 |                             <username/>
 98 |                             <password/>
 99 |                             <publickey/>
100 |                             <privatekey/>
101 |                             <port/>
102 |                             <description/>
103 |                             <inventory_link>0</inventory_link>
104 |                             <applications>
105 |                                 <application>
106 |                                     <name>Processes</name>
107 |                                 </application>
108 |                                 <application>
109 |                                     <name>Processes CPU</name>
110 |                                 </application>
111 |                             </applications>
112 |                             <valuemap/>
113 |                             <logtimefmt/>
114 |                             <application_prototypes/>
115 |                         </item_prototype>
116 |                         <item_prototype>
117 |                             <name>Memory usage on command {#COMMAND}</name>
118 |                             <type>0</type>
119 |                             <snmp_community/>
120 |                             <multiplier>0</multiplier>
121 |                             <snmp_oid/>
122 |                             <key>proc.mem[&quot;{#COMMAND}&quot;]</key>
123 |                             <delay>60</delay>
124 |                             <history>30</history>
125 |                             <trends>365</trends>
126 |                             <status>0</status>
127 |                             <value_type>3</value_type>
128 |                             <allowed_hosts/>
129 |                             <units>Bytes</units>
130 |                             <delta>0</delta>
131 |                             <snmpv3_contextname/>
132 |                             <snmpv3_securityname/>
133 |                             <snmpv3_securitylevel>0</snmpv3_securitylevel>
134 |                             <snmpv3_authprotocol>0</snmpv3_authprotocol>
135 |                             <snmpv3_authpassphrase/>
136 |                             <snmpv3_privprotocol>0</snmpv3_privprotocol>
137 |                             <snmpv3_privpassphrase/>
138 |                             <formula>1</formula>
139 |                             <delay_flex/>
140 |                             <params/>
141 |                             <ipmi_sensor/>
142 |                             <data_type>0</data_type>
143 |                             <authtype>0</authtype>
144 |                             <username/>
145 |                             <password/>
146 |                             <publickey/>
147 |                             <privatekey/>
148 |                             <port/>
149 |                             <description/>
150 |                             <inventory_link>0</inventory_link>
151 |                             <applications>
152 |                                 <application>
153 |                                     <name>Processes</name>
154 |                                 </application>
155 |                                 <application>
156 |                                     <name>Processes memory</name>
157 |                                 </application>
158 |                             </applications>
159 |                             <valuemap/>
160 |                             <logtimefmt/>
161 |                             <application_prototypes/>
162 |                         </item_prototype>
163 |                         <item_prototype>
164 |                             <name>Number of processes on command {#COMMAND}</name>
165 |                             <type>0</type>
166 |                             <snmp_community/>
167 |                             <multiplier>0</multiplier>
168 |                             <snmp_oid/>
169 |                             <key>proc.num[&quot;{#COMMAND}&quot;]</key>
170 |                             <delay>300</delay>
171 |                             <history>30</history>
172 |                             <trends>365</trends>
173 |                             <status>0</status>
174 |                             <value_type>3</value_type>
175 |                             <allowed_hosts/>
176 |                             <units/>
177 |                             <delta>0</delta>
178 |                             <snmpv3_contextname/>
179 |                             <snmpv3_securityname/>
180 |                             <snmpv3_securitylevel>0</snmpv3_securitylevel>
181 |                             <snmpv3_authprotocol>0</snmpv3_authprotocol>
182 |                             <snmpv3_authpassphrase/>
183 |                             <snmpv3_privprotocol>0</snmpv3_privprotocol>
184 |                             <snmpv3_privpassphrase/>
185 |                             <formula>1</formula>
186 |                             <delay_flex/>
187 |                             <params/>
188 |                             <ipmi_sensor/>
189 |                             <data_type>0</data_type>
190 |                             <authtype>0</authtype>
191 |                             <username/>
192 |                             <password/>
193 |                             <publickey/>
194 |                             <privatekey/>
195 |                             <port/>
196 |                             <description/>
197 |                             <inventory_link>0</inventory_link>
198 |                             <applications>
199 |                                 <application>
200 |                                     <name>Processes</name>
201 |                                 </application>
202 |                                 <application>
203 |                                     <name>Processes runtime</name>
204 |                                 </application>
205 |                             </applications>
206 |                             <valuemap/>
207 |                             <logtimefmt/>
208 |                             <application_prototypes/>
209 |                         </item_prototype>
210 |                     </item_prototypes>
211 |                     <trigger_prototypes/>
212 |                     <graph_prototypes/>
213 |                     <host_prototypes/>
214 |                 </discovery_rule>
215 |             </discovery_rules>
216 |             <httptests/>
217 |             <macros/>
218 |             <templates/>
219 |             <screens/>
220 |         </template>
221 |     </templates>
222 | </zabbix_export>
223 | 


--------------------------------------------------------------------------------
/templates/docker_trapper.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <zabbix_export>
  3 |     <version>5.0</version>
  4 |     <date>2020-11-19T08:26:56Z</date>
  5 |     <groups>
  6 |         <group>
  7 |             <name>Templates</name>
  8 |         </group>
  9 |     </groups>
 10 |     <templates>
 11 |         <template>
 12 |             <template>Pulssi Docker Template Trapper</template>
 13 |             <name>Pulssi Docker Template Trapper</name>
 14 |             <groups>
 15 |                 <group>
 16 |                     <name>Templates</name>
 17 |                 </group>
 18 |             </groups>
 19 |             <applications>
 20 |                 <application>
 21 |                     <name>Docker</name>
 22 |                 </application>
 23 |                 <application>
 24 |                     <name>Docker container count</name>
 25 |                 </application>
 26 |                 <application>
 27 |                     <name>Docker cpu</name>
 28 |                 </application>
 29 |                 <application>
 30 |                     <name>Docker disk usage</name>
 31 |                 </application>
 32 |                 <application>
 33 |                     <name>Docker memory</name>
 34 |                 </application>
 35 |                 <application>
 36 |                     <name>Docker netin</name>
 37 |                 </application>
 38 |                 <application>
 39 |                     <name>Docker netout</name>
 40 |                 </application>
 41 |                 <application>
 42 |                     <name>Docker status</name>
 43 |                 </application>
 44 |                 <application>
 45 |                     <name>Docker uptime</name>
 46 |                 </application>
 47 |             </applications>
 48 |             <items>
 49 |                 <item>
 50 |                     <name>Containers count</name>
 51 |                     <key>docker.containers[count]</key>
 52 |                     <history>1w</history>
 53 |                     <trends>90d</trends>
 54 |                     <applications>
 55 |                         <application>
 56 |                             <name>Docker</name>
 57 |                         </application>
 58 |                         <application>
 59 |                             <name>Docker container count</name>
 60 |                         </application>
 61 |                     </applications>
 62 |                     <request_method>POST</request_method>
 63 |                 </item>
 64 |             </items>
 65 |             <discovery_rules>
 66 |                 <discovery_rule>
 67 |                     <name>docker containers discovery</name>
 68 |                     <type>TRAP</type>
 69 |                     <key>docker.containers.discovery</key>
 70 |                     <delay>0</delay>
 71 |                     <lifetime>1w</lifetime>
 72 |                     <item_prototypes>
 73 |                         <item_prototype>
 74 |                             <name>Container {#CONTAINERNAME} cpu:</name>
 75 |                             <type>TRAP</type>
 76 |                             <key>docker.containers[{#CONTAINERNAME},cpu]</key>
 77 |                             <delay>0</delay>
 78 |                             <history>30d</history>
 79 |                             <value_type>FLOAT</value_type>
 80 |                             <units>%</units>
 81 |                             <applications>
 82 |                                 <application>
 83 |                                     <name>Docker</name>
 84 |                                 </application>
 85 |                                 <application>
 86 |                                     <name>Docker cpu</name>
 87 |                                 </application>
 88 |                             </applications>
 89 |                             <valuemap>
 90 |                                 <name>Service state</name>
 91 |                             </valuemap>
 92 |                             <request_method>POST</request_method>
 93 |                         </item_prototype>
 94 |                         <item_prototype>
 95 |                             <name>Container {#CONTAINERNAME} disk usage:</name>
 96 |                             <type>TRAP</type>
 97 |                             <key>docker.containers[{#CONTAINERNAME},disk]</key>
 98 |                             <delay>0</delay>
 99 |                             <history>30d</history>
100 |                             <units>Bytes</units>
101 |                             <applications>
102 |                                 <application>
103 |                                     <name>Docker</name>
104 |                                 </application>
105 |                                 <application>
106 |                                     <name>Docker disk usage</name>
107 |                                 </application>
108 |                             </applications>
109 |                             <request_method>POST</request_method>
110 |                         </item_prototype>
111 |                         <item_prototype>
112 |                             <name>Container {#CONTAINERNAME} memory:</name>
113 |                             <type>TRAP</type>
114 |                             <key>docker.containers[{#CONTAINERNAME},memory]</key>
115 |                             <delay>0</delay>
116 |                             <history>30d</history>
117 |                             <applications>
118 |                                 <application>
119 |                                     <name>Docker</name>
120 |                                 </application>
121 |                                 <application>
122 |                                     <name>Docker memory</name>
123 |                                 </application>
124 |                             </applications>
125 |                             <preprocessing>
126 |                                 <step>
127 |                                     <type>MULTIPLIER</type>
128 |                                     <params>1</params>
129 |                                 </step>
130 |                             </preprocessing>
131 |                             <request_method>POST</request_method>
132 |                         </item_prototype>
133 |                         <item_prototype>
134 |                             <name>Container {#CONTAINERNAME} incoming traffic:</name>
135 |                             <type>TRAP</type>
136 |                             <key>docker.containers[{#CONTAINERNAME},netin]</key>
137 |                             <delay>0</delay>
138 |                             <history>30d</history>
139 |                             <units>B/s</units>
140 |                             <applications>
141 |                                 <application>
142 |                                     <name>Docker</name>
143 |                                 </application>
144 |                                 <application>
145 |                                     <name>Docker netin</name>
146 |                                 </application>
147 |                             </applications>
148 |                             <request_method>POST</request_method>
149 |                         </item_prototype>
150 |                         <item_prototype>
151 |                             <name>Container {#CONTAINERNAME} outgoing traffic:</name>
152 |                             <type>TRAP</type>
153 |                             <key>docker.containers[{#CONTAINERNAME},netout]</key>
154 |                             <delay>0</delay>
155 |                             <history>30d</history>
156 |                             <units>B/s</units>
157 |                             <applications>
158 |                                 <application>
159 |                                     <name>Docker</name>
160 |                                 </application>
161 |                                 <application>
162 |                                     <name>Docker netout</name>
163 |                                 </application>
164 |                             </applications>
165 |                             <request_method>POST</request_method>
166 |                         </item_prototype>
167 |                         <item_prototype>
168 |                             <name>Container {#CONTAINERNAME} status:</name>
169 |                             <type>TRAP</type>
170 |                             <key>docker.containers[{#CONTAINERNAME},status]</key>
171 |                             <delay>0</delay>
172 |                             <history>30d</history>
173 |                             <applications>
174 |                                 <application>
175 |                                     <name>Docker</name>
176 |                                 </application>
177 |                                 <application>
178 |                                     <name>Docker status</name>
179 |                                 </application>
180 |                             </applications>
181 |                             <request_method>POST</request_method>
182 |                         </item_prototype>
183 |                         <item_prototype>
184 |                             <name>Container {#CONTAINERNAME} uptime:</name>
185 |                             <type>TRAP</type>
186 |                             <key>docker.containers[{#CONTAINERNAME},uptime]</key>
187 |                             <delay>0</delay>
188 |                             <history>30d</history>
189 |                             <units>uptime</units>
190 |                             <applications>
191 |                                 <application>
192 |                                     <name>Docker</name>
193 |                                 </application>
194 |                                 <application>
195 |                                     <name>Docker uptime</name>
196 |                                 </application>
197 |                             </applications>
198 |                             <request_method>POST</request_method>
199 |                         </item_prototype>
200 |                     </item_prototypes>
201 |                     <request_method>POST</request_method>
202 |                 </discovery_rule>
203 |             </discovery_rules>
204 |         </template>
205 |     </templates>
206 |     <value_maps>
207 |         <value_map>
208 |             <name>Service state</name>
209 |             <mappings>
210 |                 <mapping>
211 |                     <value>0</value>
212 |                     <newvalue>Down</newvalue>
213 |                 </mapping>
214 |                 <mapping>
215 |                     <value>1</value>
216 |                     <newvalue>Up</newvalue>
217 |                 </mapping>
218 |             </mappings>
219 |         </value_map>
220 |     </value_maps>
221 | </zabbix_export>
222 | 


--------------------------------------------------------------------------------
/templates/process_active.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <zabbix_export>
  3 |     <version>3.2</version>
  4 |     <date>2017-09-28T11:28:55Z</date>
  5 |     <groups>
  6 |         <group>
  7 |             <name>Templates</name>
  8 |         </group>
  9 |     </groups>
 10 |     <templates>
 11 |         <template>
 12 |             <template>Template App Process active - digiapulssi</template>
 13 |             <name>Template App Process active - digiapulssi</name>
 14 |             <description/>
 15 |             <groups>
 16 |                 <group>
 17 |                     <name>Templates</name>
 18 |                 </group>
 19 |             </groups>
 20 |             <applications>
 21 |                 <application>
 22 |                     <name>Processes</name>
 23 |                 </application>
 24 |                 <application>
 25 |                     <name>Processes count</name>
 26 |                 </application>
 27 |                 <application>
 28 |                     <name>Processes CPU</name>
 29 |                 </application>
 30 |                 <application>
 31 |                     <name>Processes memory</name>
 32 |                 </application>
 33 |                 <application>
 34 |                     <name>Processes runtime</name>
 35 |                 </application>
 36 |             </applications>
 37 |             <items/>
 38 |             <discovery_rules>
 39 |                 <discovery_rule>
 40 |                     <name>Process discovery</name>
 41 |                     <type>7</type>
 42 |                     <snmp_community/>
 43 |                     <snmp_oid/>
 44 |                     <key>discover.processes</key>
 45 |                     <delay>600</delay>
 46 |                     <status>0</status>
 47 |                     <allowed_hosts/>
 48 |                     <snmpv3_contextname/>
 49 |                     <snmpv3_securityname/>
 50 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
 51 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
 52 |                     <snmpv3_authpassphrase/>
 53 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
 54 |                     <snmpv3_privpassphrase/>
 55 |                     <delay_flex/>
 56 |                     <params/>
 57 |                     <ipmi_sensor/>
 58 |                     <authtype>0</authtype>
 59 |                     <username/>
 60 |                     <password/>
 61 |                     <publickey/>
 62 |                     <privatekey/>
 63 |                     <port/>
 64 |                     <filter>
 65 |                         <evaltype>0</evaltype>
 66 |                         <formula/>
 67 |                         <conditions/>
 68 |                     </filter>
 69 |                     <lifetime>3</lifetime>
 70 |                     <description/>
 71 |                     <item_prototypes>
 72 |                         <item_prototype>
 73 |                             <name>CPU utilization on command {#COMMAND}</name>
 74 |                             <type>7</type>
 75 |                             <snmp_community/>
 76 |                             <multiplier>0</multiplier>
 77 |                             <snmp_oid/>
 78 |                             <key>proc.cpu.util[&quot;{#COMMAND}&quot;]</key>
 79 |                             <delay>60</delay>
 80 |                             <history>30</history>
 81 |                             <trends>365</trends>
 82 |                             <status>0</status>
 83 |                             <value_type>0</value_type>
 84 |                             <allowed_hosts/>
 85 |                             <units>%</units>
 86 |                             <delta>0</delta>
 87 |                             <snmpv3_contextname/>
 88 |                             <snmpv3_securityname/>
 89 |                             <snmpv3_securitylevel>0</snmpv3_securitylevel>
 90 |                             <snmpv3_authprotocol>0</snmpv3_authprotocol>
 91 |                             <snmpv3_authpassphrase/>
 92 |                             <snmpv3_privprotocol>0</snmpv3_privprotocol>
 93 |                             <snmpv3_privpassphrase/>
 94 |                             <formula>1</formula>
 95 |                             <delay_flex/>
 96 |                             <params/>
 97 |                             <ipmi_sensor/>
 98 |                             <data_type>0</data_type>
 99 |                             <authtype>0</authtype>
100 |                             <username/>
101 |                             <password/>
102 |                             <publickey/>
103 |                             <privatekey/>
104 |                             <port/>
105 |                             <description/>
106 |                             <inventory_link>0</inventory_link>
107 |                             <applications>
108 |                                 <application>
109 |                                     <name>Processes</name>
110 |                                 </application>
111 |                                 <application>
112 |                                     <name>Processes CPU</name>
113 |                                 </application>
114 |                             </applications>
115 |                             <valuemap/>
116 |                             <logtimefmt/>
117 |                             <application_prototypes/>
118 |                         </item_prototype>
119 |                         <item_prototype>
120 |                             <name>Memory usage on command {#COMMAND}</name>
121 |                             <type>7</type>
122 |                             <snmp_community/>
123 |                             <multiplier>0</multiplier>
124 |                             <snmp_oid/>
125 |                             <key>proc.mem[&quot;{#COMMAND}&quot;]</key>
126 |                             <delay>60</delay>
127 |                             <history>30</history>
128 |                             <trends>365</trends>
129 |                             <status>0</status>
130 |                             <value_type>3</value_type>
131 |                             <allowed_hosts/>
132 |                             <units>Bytes</units>
133 |                             <delta>0</delta>
134 |                             <snmpv3_contextname/>
135 |                             <snmpv3_securityname/>
136 |                             <snmpv3_securitylevel>0</snmpv3_securitylevel>
137 |                             <snmpv3_authprotocol>0</snmpv3_authprotocol>
138 |                             <snmpv3_authpassphrase/>
139 |                             <snmpv3_privprotocol>0</snmpv3_privprotocol>
140 |                             <snmpv3_privpassphrase/>
141 |                             <formula>1</formula>
142 |                             <delay_flex/>
143 |                             <params/>
144 |                             <ipmi_sensor/>
145 |                             <data_type>0</data_type>
146 |                             <authtype>0</authtype>
147 |                             <username/>
148 |                             <password/>
149 |                             <publickey/>
150 |                             <privatekey/>
151 |                             <port/>
152 |                             <description/>
153 |                             <inventory_link>0</inventory_link>
154 |                             <applications>
155 |                                 <application>
156 |                                     <name>Processes</name>
157 |                                 </application>
158 |                                 <application>
159 |                                     <name>Processes memory</name>
160 |                                 </application>
161 |                             </applications>
162 |                             <valuemap/>
163 |                             <logtimefmt/>
164 |                             <application_prototypes/>
165 |                         </item_prototype>
166 |                         <item_prototype>
167 |                             <name>Number of processes on command {#COMMAND}</name>
168 |                             <type>7</type>
169 |                             <snmp_community/>
170 |                             <multiplier>0</multiplier>
171 |                             <snmp_oid/>
172 |                             <key>proc.num[&quot;{#COMMAND}&quot;]</key>
173 |                             <delay>300</delay>
174 |                             <history>30</history>
175 |                             <trends>365</trends>
176 |                             <status>0</status>
177 |                             <value_type>3</value_type>
178 |                             <allowed_hosts/>
179 |                             <units/>
180 |                             <delta>0</delta>
181 |                             <snmpv3_contextname/>
182 |                             <snmpv3_securityname/>
183 |                             <snmpv3_securitylevel>0</snmpv3_securitylevel>
184 |                             <snmpv3_authprotocol>0</snmpv3_authprotocol>
185 |                             <snmpv3_authpassphrase/>
186 |                             <snmpv3_privprotocol>0</snmpv3_privprotocol>
187 |                             <snmpv3_privpassphrase/>
188 |                             <formula>1</formula>
189 |                             <delay_flex/>
190 |                             <params/>
191 |                             <ipmi_sensor/>
192 |                             <data_type>0</data_type>
193 |                             <authtype>0</authtype>
194 |                             <username/>
195 |                             <password/>
196 |                             <publickey/>
197 |                             <privatekey/>
198 |                             <port/>
199 |                             <description/>
200 |                             <inventory_link>0</inventory_link>
201 |                             <applications>
202 |                                 <application>
203 |                                     <name>Processes</name>
204 |                                 </application>
205 |                                 <application>
206 |                                     <name>Processes count</name>
207 |                                 </application>
208 |                             </applications>
209 |                             <valuemap/>
210 |                             <logtimefmt/>
211 |                             <application_prototypes/>
212 |                         </item_prototype>
213 |                     </item_prototypes>
214 |                     <trigger_prototypes/>
215 |                     <graph_prototypes/>
216 |                     <host_prototypes/>
217 |                 </discovery_rule>
218 |             </discovery_rules>
219 |             <httptests/>
220 |             <macros/>
221 |             <templates/>
222 |             <screens/>
223 |         </template>
224 |     </templates>
225 | </zabbix_export>
226 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/docker.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Version: 1.0
  3 | set -e
  4 | 
  5 | # Find netcat command or die
  6 | NC_CMD=$(command -v netcat || command -v nc || exit 1)
  7 | # Path of docker socket
  8 | DOCKER_SOCKET=/var/run/docker.sock
  9 | # Statistics directory (parent directory must exist and be writable for user running script)
 10 | STATS_DIR=/tmp/zabbix-docker-stats
 11 | 
 12 | # Chech if docker socket is writable with the current user
 13 | if [ -w "$DOCKER_SOCKET" ]; then
 14 |   NC="$NC_CMD"
 15 | else
 16 |   # Current user does not belong to docker group, use sudo (requires that sudo rights given correctly in the system)
 17 |   NC="sudo $NC_CMD"
 18 | fi
 19 | 
 20 | # Create statistics directory if it does not exist
 21 | if [ ! -e "$STATS_DIR" ]; then
 22 |   mkdir -p $STATS_DIR
 23 | fi
 24 | 
 25 | # Executes GET command to docker socket
 26 | # Parameters: 1 - docker command
 27 | docker_get() {
 28 |   RESPONSE=$(printf "GET $1 HTTP/1.0\r\n\r\n" | $NC -U $DOCKER_SOCKET | tail -n 1)
 29 | }
 30 | 
 31 | # Executes command in docker container
 32 | # Parameters: 1 - Container name
 33 | #             2 - Command and arguments in quoted comma separated list (e.g. "ls", "-l")
 34 | docker_exec() {
 35 |   # Create command execution
 36 |   local BODY="{\"AttachStdout\": true, \"Cmd\": [$2]}"
 37 |   local CREATE_RESPONSE
 38 |   CREATE_RESPONSE=$(printf "POST /containers/$1/exec HTTP/1.0\r\nContent-Type: application/json\r\nContent-Length: ${#BODY}\r\n\r\n${BODY}" | $NC -U $DOCKER_SOCKET | tail -n 1)
 39 |   local RUN_ID=$(echo $CREATE_RESPONSE | jq ".Id // empty " | sed -e 's/"//g')
 40 | 
 41 |   # Start execution
 42 |   if [ "$RUN_ID" != "" ]; then
 43 |     # tr at end is used to suppress warning on bash >=4.4
 44 |     RESPONSE=$(printf "POST /exec/$RUN_ID/start HTTP/1.0\r\nContent-Type: application/json\r\nContent-Length: 2\r\n\r\n{}" | $NC -U $DOCKER_SOCKET | tr -d '\0')
 45 |   else
 46 |     RESPONSE=""
 47 |   fi
 48 | }
 49 | 
 50 | # Obtains last line from execution of cat file on docker container
 51 | # Parameters:: 1 - Container name
 52 | #              2 - File in container
 53 | cat_single_value() {
 54 |   local CMD="\"cat\", \"$2\""
 55 |   docker_exec $1 "$CMD"
 56 |   local VALUE=$(echo "$RESPONSE" | tail -n 1 | tr -cd "[:print:]")
 57 |   echo $VALUE
 58 | }
 59 | 
 60 | # Updates timestamp of statistic and returns the time elapsed since last update
 61 | # in nanoseconds
 62 | # Parameters: 1 - Container name
 63 | #             2 - Statistic name
 64 | update_stat_time() {
 65 |   local UTIME_FILE="$STATS_DIR/$1/$2.utime"
 66 |   local NEW_VALUE=$(date +%s%N)
 67 | 
 68 |   if [ ! -e "$UTIME_FILE" ]; then
 69 |     printf "0" >$UTIME_FILE
 70 |   fi
 71 |   local OLD_VALUE=$(cat $UTIME_FILE)
 72 | 
 73 |   printf "$NEW_VALUE" >$UTIME_FILE
 74 |   TIMEDIFF=$((NEW_VALUE-OLD_VALUE))
 75 |   printf $TIMEDIFF
 76 | }
 77 | 
 78 | # Updates statistic value and prints the old value
 79 | # Parameters: 1 - Container name
 80 | #             2 - Statistic name
 81 | #             3 - New monitored value
 82 | update_stat() {
 83 |   local STAT_FILE="$STATS_DIR/$1/$2"
 84 |   local NEW_VALUE=$3
 85 |   if [ ! -e "$STATS_DIR/$1" ]; then
 86 |     mkdir -p "$STATS_DIR/$1"
 87 |   fi
 88 | 
 89 |   if [ ! -e "$STAT_FILE" ]; then
 90 |     printf "0" >$STAT_FILE
 91 |   fi
 92 | 
 93 |   cat $STAT_FILE
 94 |   printf "$NEW_VALUE" >$STAT_FILE
 95 | }
 96 | 
 97 | # Statistic: Number of running docker containers
 98 | # Parameters: 1 - all or running; defaults to running
 99 | count() {
100 |   if [ "$1" = "all" ]; then
101 |     docker_get "/containers/json?all=true"
102 |   else
103 |     docker_get "/containers/json"
104 |   fi
105 |   echo $RESPONSE | jq "length"
106 | }
107 | 
108 | count_all() {
109 |   count all
110 | }
111 | 
112 | # Docker container discovery
113 | # Parameters: 1 - all or running; defaults to running
114 | discovery() {
115 |   if [ "$1" = "all" ]; then
116 |     docker_get "/containers/json?all=true"
117 |   else
118 |     docker_get "/containers/json"
119 |   fi
120 |   LEN=$(echo $RESPONSE | jq "length")
121 |   for I in $(seq 0 $((LEN-1)))
122 |   do
123 |       NAME=$(echo "$RESPONSE"|jq --raw-output ".[$I].Names[0]"|sed -e 's/^\///')
124 |       ID=$(echo "$RESPONSE"|jq --raw-output ".[$I].Id")
125 |       IMAGENAME=$(echo "$RESPONSE"|jq --raw-output ".[$I].Image"|sed -e 's/:.*//')
126 |       IMAGETAG=$(echo "$RESPONSE"|jq --raw-output ".[$I].Image"|sed -e 's/.*://')
127 | 
128 |       DATA="$DATA,"'{"{#CONTAINERNAME}":"'$NAME'","{#CONTAINERID}":"'$ID'","{#IMAGENAME}":"'$IMAGENAME'","{#IMAGETAG}":"'$IMAGETAG'"'
129 | 
130 |       # Compatibility with www.monitoringartist.com Docker template
131 |       DATA="$DATA,"'"{#HCONTAINERID}":"'$ID'"}'
132 | 
133 |   done
134 |   echo '{"data":['${DATA#,}']}'
135 | }
136 | 
137 | discovery_all() {
138 |   discovery all
139 | }
140 | 
141 | # Statistic: Container status
142 | status() {
143 |   docker_get "/containers/$1/json"
144 |   STATUS=$(echo $RESPONSE | jq ".State.Status" 2>/dev/null | sed -e 's/\"//g')
145 | 
146 |   if [ "$STATUS" = "running" ]; then
147 |     # Running
148 |     echo "1"
149 |   elif [ "$STATUS" = "created" ] || [ "$STATUS" = "paused" ] || [ "$STATUS" = "restarting" ]; then
150 |     # Not started (purposefully)
151 |     echo "2"
152 |   elif [ "$STATUS" = "exited" ] && [ "$(echo $RESPONSE | jq '.State.ExitCode')" = "0" ]; then
153 |     # Stopped purposefully with ok exit code => status is "not started"
154 |     echo "2"
155 |   elif [ "$STATUS" = "exited" ] && [ "$(echo $RESPONSE | jq '.State.ExitCode')" = "137" ]; then
156 |     # Stopped purposefully with sigkill (exit code 137) => status is "not started"
157 |     echo "2"
158 |   else
159 |     # Exited with error (accidentally) or no such container exists
160 |     echo "0"
161 |   fi
162 | }
163 | 
164 | # Container up and runnig? 1 (yes) or 0 (no)
165 | up() {
166 |   docker_get "/containers/$1/json"
167 |   STATUS=$(echo $RESPONSE | jq ".State.Status" 2>/dev/null | sed -e 's/\"//g')
168 | 
169 |   # Running
170 |   if [ "$STATUS" = "running" ]; then
171 |     echo "1"
172 |   else
173 |     echo "0"
174 |   fi
175 | }
176 | 
177 | # Statistic: Container uptime
178 | uptime() {
179 |   docker_get "/containers/$1/json"
180 |   # if running
181 |   if [ "$(echo $RESPONSE | jq '.State.Running')" = "true" ]; then
182 |     local STARTED=$(echo $RESPONSE | jq ".State.StartedAt" | sed -e 's/\"//g')
183 |     local STARTED_S=$(date -d $STARTED +%s)
184 |     local NOW_S=$(date +%s)
185 |     UPTIME=$((NOW_S-STARTED_S))
186 |     echo $UPTIME
187 |   else
188 |     # not running, uptime always zero (must output some number so that zabbix item won't get into error state)
189 |     echo "0"
190 |   fi
191 | }
192 | 
193 | # Statistic: Container memory
194 | memory() {
195 |   NEW_VALUE=$(cat_single_value $1 "/sys/fs/cgroup/memory/memory.usage_in_bytes")
196 |   if [ "$NEW_VALUE" = "" ]; then
197 |     echo "0"
198 |   else
199 |     echo $NEW_VALUE
200 |   fi
201 | }
202 | 
203 | # Statistic: Container disk usage
204 | disk() {
205 |   docker_get "/containers/$1/json?size=1"
206 |   echo $RESPONSE | jq ".SizeRootFs"
207 | }
208 | 
209 | # Statistic: Container CPU usage
210 | cpu() {
211 |   NEW_VALUE=$(cat_single_value $1 "/sys/fs/cgroup/cpuacct/cpuacct.usage")
212 |   if [ "$NEW_VALUE" = "" ]; then
213 |     echo "0.0000"
214 |   else
215 |     OLD_VALUE=$(update_stat $1 "cpuacct.usage" "$NEW_VALUE")
216 |     TIMEDIFF=$(update_stat_time $1 "cpuacct.usage")
217 |     perl -e "print sprintf(\"%.4f\", (($NEW_VALUE-$OLD_VALUE)<0?0:($NEW_VALUE-$OLD_VALUE)/$TIMEDIFF*100))" # cpu percent
218 |   fi
219 | }
220 | 
221 | # Statistic: Container network traffic in
222 | netin() {
223 |   NEW_VALUE=$(cat_single_value $1 "/sys/devices/virtual/net/eth0/statistics/rx_bytes")
224 |   if [ "$NEW_VALUE" = "" ]; then
225 |     echo "0"
226 |   else
227 |     OLD_VALUE=$(update_stat $1 "rx_bytes" "$NEW_VALUE")
228 |     TIMEDIFF=$(update_stat_time $1 "rx_bytes")
229 |     perl -e "print int(($NEW_VALUE-$OLD_VALUE)<0?0:($NEW_VALUE-$OLD_VALUE)/$TIMEDIFF*1000000000)" # nanos to seconds
230 |   fi
231 | }
232 | 
233 | # Statistic: Container network traffic out
234 | netout() {
235 |   NEW_VALUE=$(cat_single_value $1 "/sys/devices/virtual/net/eth0/statistics/tx_bytes")
236 |   if [ "$NEW_VALUE" = "" ]; then
237 |     echo "0"
238 |   else
239 |     OLD_VALUE=$(update_stat $1 "tx_bytes" "$NEW_VALUE")
240 |     TIMEDIFF=$(update_stat_time $1 "tx_bytes")
241 |     perl -e "print int(($NEW_VALUE-$OLD_VALUE)<0?0:($NEW_VALUE-$OLD_VALUE)/$TIMEDIFF*1000000000)" # nanos to seconds
242 |   fi
243 | }
244 | 
245 | # Container image up and runnig? 1 (yes) or 0 (no)
246 | image_up() {
247 |   running_containerid $1
248 |   if [ "$CONTAINER_ID" = "" ]; then
249 |     echo 0
250 |   else
251 |     echo 1
252 |   fi
253 | }
254 | 
255 | # Statistic: Container image uptime
256 | image_uptime() {
257 |   running_containerid $1
258 |   if [ "$CONTAINER_ID" = "" ]; then
259 |     echo 0
260 |   else
261 |     uptime $CONTAINER_ID
262 |   fi
263 | }
264 | 
265 | # Statistic: Container image memory
266 | image_memory() {
267 |   running_containerid $1
268 |   if [ "$CONTAINER_ID" = "" ]; then
269 |     echo 0
270 |   else
271 |     memory $CONTAINER_ID
272 |   fi
273 | }
274 | 
275 | # Statistic: Container image disk usage
276 | image_disk() {
277 |   running_containerid $1
278 |   if [ "$CONTAINER_ID" = "" ]; then
279 |     echo 0
280 |   else
281 |     disk $CONTAINER_ID
282 |   fi
283 | }
284 | 
285 | # Statistic: Container image CPU usage
286 | image_cpu() {
287 |   running_containerid $1
288 |   if [ "$CONTAINER_ID" = "" ]; then
289 |     echo 0
290 |   else
291 |     cpu $CONTAINER_ID
292 |   fi
293 | }
294 | 
295 | # Statistic: Container image network traffic in
296 | image_netin() {
297 |   running_containerid $1
298 |   if [ "$CONTAINER_ID" = "" ]; then
299 |     echo 0
300 |   else
301 |     netin $CONTAINER_ID
302 |   fi
303 | }
304 | 
305 | # Statistic: Container image network traffic out
306 | image_netout() {
307 |   running_containerid $1
308 |   if [ "$CONTAINER_ID" = "" ]; then
309 |     echo 0
310 |   else
311 |     netout $CONTAINER_ID
312 |   fi
313 | }
314 | 
315 | # Returns all running container IDs for image, one per line
316 | image_containerids() {
317 |   containerids $1
318 |   for i in $CONTAINER_IDS; do
319 |     echo $i
320 |   done
321 | }
322 | 
323 | # Returns all existing container IDs for image, one per line
324 | image_containerids_all() {
325 |   containerids $1 all
326 |   for i in $CONTAINER_IDS; do
327 |     echo $i
328 |   done
329 | }
330 | 
331 | # Get sole running container ID for image into CONTAINER_ID
332 | # - Exit with message if multiple running
333 | # - Empty CONTAINER_ID if there are no running containers
334 | running_containerid() {
335 |   CONTAINER_ID=""
336 |   containerids $1
337 |   for i in $CONTAINER_IDS; do
338 |     if [ "$CONTAINER_ID" = "" ]; then
339 |       CONTAINER_ID=$i
340 |     else
341 |       echo "Multiple running containers for image"
342 |       exit 1
343 |     fi
344 |   done
345 | }
346 | 
347 | # Get container IDs for imagename into CONTAINER_IDS
348 | containerids() {
349 |   IMAGENAME=$1
350 |   if [ "$2" = "all" ]; then
351 |     docker_get "/containers/json?all=true"
352 |   else
353 |     docker_get "/containers/json"
354 |   fi
355 |   CONTAINER_IDS=$(echo $RESPONSE | jq '.[]|select(.Image|test("^'$IMAGENAME'(:.*)?$"))|.Id' | sed -e 's/\"//g')
356 | }
357 | 
358 | if [ $# -eq 0 ]; then
359 |   echo "No arguments"
360 |   exit 1
361 | elif [ $# -eq 1 ]; then
362 |   $1
363 | elif [ $# -eq 2 ]; then
364 |   # Compatibility with www.monitoringartist.com docker template:
365 |   # Remove leading slash from container id
366 |   CONT_ID=$(echo "$1" | sed 's/^\///')
367 | 
368 |   # Execute statistic function with container argument
369 |   $2 "$CONT_ID"
370 | fi
371 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/kubernetes_monitoring.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Kubernetes monitoring
  5 | Version: 1.2
  6 | 
  7 | Usage:
  8 | python kubernetes_monitoring.py pods
  9 | python kubernetes_monitoring.py pods -c <config_file> -f <field_selector>
 10 | 
 11 | python kubernetes_monitoring.py nodes
 12 | 
 13 | python kubernetes_monitoring.py services
 14 | 
 15 | python kubernetes_monitoring.py cronjobs
 16 | python kubernetes_monitoring.py cronjobs -c <config_file> -f <field_selector>
 17 | python kubernetes_monitoring.py cronjobs -c <config_file> -f <field_selector>
 18 |                                          --host-name <host-name>
 19 |                                          --minutes <minutes>
 20 | """
 21 | 
 22 | # Python imports
 23 | from argparse import ArgumentParser
 24 | import datetime
 25 | import json
 26 | import os
 27 | import sys
 28 | 
 29 | # 3rd party imports
 30 | from kubernetes import client, config
 31 | from pyzabbix import ZabbixMetric
 32 | 
 33 | from zabbix_sender_psk import ZabbixSenderPSK as ZabbixSender
 34 | 
 35 | epoch_start = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
 36 | system_time = datetime.datetime.now(datetime.timezone.utc)
 37 | 
 38 | # Loop cron jobs and create discovery
 39 | def cronjobs(args, v1):
 40 | 
 41 |     # Retrieve cron jobs from Kubernetes API v1
 42 |     api_client = client.ApiClient()
 43 |     api_instance = client.BatchV1Api(api_client)
 44 |     api_response = api_instance.list_job_for_all_namespaces(
 45 |         watch=False,
 46 |         field_selector=args.field_selector
 47 |     )
 48 | 
 49 |     # Declare variables
 50 |     cronjobs = {}
 51 |     start_interval = int(((
 52 |         system_time - datetime.timedelta(minutes=args.minutes)) - epoch_start
 53 |     ).total_seconds())
 54 | 
 55 |     # Check API response before listing
 56 |     if not api_response:
 57 |         raise Exception("Unable to retrieve API response.")
 58 | 
 59 |     # Loop API response items
 60 |     for item in api_response.items:
 61 | 
 62 |         # Reset loop variables
 63 |         completion_time = None
 64 |         job_length = 0
 65 |         job_name = None
 66 |         job_status = 0
 67 |         packet = []
 68 |         start_time = None
 69 | 
 70 |         # Discard active cron jobs
 71 |         if item.status.active is not None:
 72 |             continue
 73 | 
 74 |         # Check and convert completion time to epoch
 75 |         if item.status.completion_time:
 76 |             completion_time = int(
 77 |                 (item.status.completion_time - epoch_start).total_seconds()
 78 |             )
 79 | 
 80 |         # Skip completed jobs that are outside the interval range
 81 |         if completion_time and completion_time < start_interval:
 82 |             continue
 83 | 
 84 |         # Check and convert start time to epoch
 85 |         if item.status.start_time:
 86 |             start_time = int(
 87 |                 (item.status.start_time - epoch_start).total_seconds()
 88 |             )
 89 | 
 90 |         # Calculate cron job length
 91 |         if completion_time and start_time:
 92 |             job_length = int(completion_time - start_time)
 93 | 
 94 |         # Only retrieve data from cron jobs
 95 |         for owner_reference in item.metadata.owner_references:
 96 |             if owner_reference.kind != "CronJob":
 97 |                 continue
 98 | 
 99 |             # Retrieve job name
100 |             job_name = owner_reference.name
101 | 
102 |         # If job name was not retrieved, kind was not CronJob
103 |         if not job_name:
104 |             continue
105 | 
106 |         # Check job status comparing succeeded and status fields
107 |         if item.status.succeeded and item.status.succeeded > 0 and item.status.failed is None:
108 |             job_status = 1
109 | 
110 |         # Set job data to dictionary
111 |         cronjobs[job_name] = {
112 |             "{#CRONJOB}": job_name,
113 |             "completion_time": completion_time,
114 |             "length": job_length,
115 |             "name": job_name,
116 |             "start_time": start_time,
117 |             "status": job_status,
118 |             "uid": item.metadata.uid
119 |         }
120 | 
121 |     # If instance name is not set, we output discovery
122 |     if not args.host_name:
123 | 
124 |         # Loop and append jobs to output list
125 |         for cron_job in cronjobs:
126 |             output.append(cronjobs[cron_job])
127 | 
128 |         # Dump discovery
129 |         discovery = {"data": output}
130 |         print(json.dumps(discovery))
131 | 
132 |     else:
133 |         # Append item data to list
134 |         for cron_job in cronjobs:
135 |             packet.append(ZabbixMetric(
136 |                 args.host_name,
137 |                 f'kubernetes.cronjob["{cron_job}"]',
138 |                 json.dumps(cronjobs[cron_job]),
139 |                 cronjobs[cron_job].get("completion_time")
140 |             ))
141 | 
142 |         # Send data using ZabbixSender
143 |         result = ZabbixSender().send(packet)
144 | 
145 |         # Print result
146 |         print(result)
147 | 
148 | 
149 | # Loop pods and create discovery
150 | def pods(args, v1):
151 | 
152 |     # Retrieve pods from Kubernetes API v1
153 |     pods = v1.list_pod_for_all_namespaces(
154 |         watch=False,
155 |         field_selector=args.field_selector
156 |     )
157 | 
158 |     # Check pods before listing
159 |     if pods:
160 |         for pod in pods.items:
161 | 
162 |             # Retrieve container's restart counts
163 |             container_started = None  # Container's start time
164 |             kind = None  # Pod's kind found under metadata.owner_references
165 |             restart_count = 0  # Container's restart count
166 |             started_at = None  # Latest start time
167 |             uptime = datetime.timedelta()  # Datetime object for latest uptime
168 | 
169 |             # Loop possible owner_references and retrieve "kind"-field
170 |             if pod.metadata.owner_references:
171 |                 for ref in pod.metadata.owner_references:
172 |                     kind = ref.kind
173 | 
174 |                 # Pods that are identified as "Job" are skipped
175 |                 if kind == "Job":
176 |                     continue
177 | 
178 |             # Check if container_statuses is available
179 |             if pod.status.container_statuses:
180 | 
181 |                 # Loop containers and retrieve information
182 |                 for container in pod.status.container_statuses:
183 |                     restart_count = int(container.restart_count)
184 | 
185 |                     # Check "running"-state first, then "terminated"-state
186 |                     if container.state.running is not None:
187 |                         container_started = container.state.running.started_at
188 |                     elif container.state.terminated is not None:
189 |                         container_started = container.state.terminated.started_at
190 |                     else:
191 |                         continue
192 | 
193 |                     # First time around, grab the first start time
194 |                     if not started_at:
195 |                         started_at = container_started
196 |                     # Compare previous container's start time to current one
197 |                     elif started_at < container_started:
198 |                         started_at = container_started
199 | 
200 |                 # Count uptime
201 |                 if started_at:
202 |                     uptime = system_time - started_at
203 | 
204 |             # Append information to output list
205 |             output.append({
206 |                 "{#POD}": pod.metadata.name,
207 |                 "restart_count": restart_count,
208 |                 "ip": pod.status.pod_ip,
209 |                 "namespace": pod.metadata.namespace,
210 |                 "pod": pod.metadata.name,
211 |                 "uptime": uptime.total_seconds()
212 |             })
213 | 
214 |     # Dump discovery
215 |     discovery = {"data": output}
216 |     print(json.dumps(discovery))
217 | 
218 | 
219 | # Loop nodes and create discovery
220 | def nodes(args, v1):
221 | 
222 |     # Retrieve nodes from Kubernetes API v1
223 |     nodes = v1.list_node(
224 |         watch=False,
225 |         field_selector=args.field_selector
226 |     )
227 | 
228 |     # Check nodes before listing
229 |     if nodes:
230 |         for node in nodes.items:
231 | 
232 |             # Node status is retrieved from node's conditions. Possible
233 |             # conditions are: Ready, MemoryPressure, PIDPressure, DiskPressure
234 |             # and NetworkUnavailable. We are interested only in the main one,
235 |             # "Ready", which describes if the node is healthy and ready to
236 |             # accept pods.
237 |             status = ""
238 |             for condition in node.status.conditions:
239 |                 if condition.type == "Ready":
240 |                     status = condition.status
241 | 
242 |             # Append information to output list
243 |             output.append({
244 |                 "{#NODE}": node.status.node_info.machine_id,
245 |                 "node": next((i.address for i in node.status.addresses if i.type == "Hostname"), node.status.node_info.machine_id),
246 |                 "allocatable_cpu": node.status.allocatable.get("cpu"),
247 |                 "allocatable_storage": node.status.allocatable.get("ephemeral-storage"),
248 |                 "allocatable_memory": node.status.allocatable.get("memory"),
249 |                 "capacity_cpu": node.status.capacity.get("cpu"),
250 |                 "capacity_storage": node.status.capacity.get("ephemeral-storage"),
251 |                 "capacity_memory": node.status.capacity.get("memory"),
252 |                 "external_ip": next((i.address for i in node.status.addresses if i.type == "ExternalIP"), ""),
253 |                 "machine_id": node.status.node_info.machine_id,
254 |                 "status": status,
255 |                 "system_uuid": node.status.node_info.system_uuid
256 |             })
257 | 
258 |     # Dump discovery
259 |     discovery = {"data": output}
260 |     print(json.dumps(discovery))
261 | 
262 | 
263 | # Loop services and create discovery
264 | def services(args, v1):
265 | 
266 |     # Retrieve services from Kubernetes API v1
267 |     services = v1.list_service_for_all_namespaces(
268 |         watch=False,
269 |         field_selector=args.field_selector
270 |     )
271 | 
272 |     # Check services before listing
273 |     if services:
274 |         for service in services.items:
275 | 
276 |             # Append information to output list
277 |             output.append({
278 |                 "{#SERVICE}": service.metadata.name,
279 |                 "namespace": service.metadata.namespace,
280 |                 "service": service.metadata.name,
281 |                 "uid": service.metadata.uid
282 |             })
283 | 
284 |     # Dump discovery
285 |     discovery = {"data": output}
286 |     print(json.dumps(discovery))
287 | 
288 | 
289 | if __name__ == "__main__":
290 | 
291 |     # Declare variables
292 |     output = []  # List for output data
293 | 
294 |     # Parse command-line arguments
295 |     parser = ArgumentParser(
296 |         description="Discover and retrieve metrics from Kubernetes.",
297 |     )
298 | 
299 |     # Use sub-parsers run functions using mandatory positional argument
300 |     subparsers = parser.add_subparsers()
301 |     parser_cronjobs = subparsers.add_parser("cronjobs")
302 |     parser_cronjobs.set_defaults(func=cronjobs)
303 |     parser_pods = subparsers.add_parser("pods")
304 |     parser_pods.set_defaults(func=pods)
305 |     parser_services = subparsers.add_parser("services")
306 |     parser_services.set_defaults(func=services)
307 |     parser_nodes = subparsers.add_parser("nodes")
308 |     parser_nodes.set_defaults(func=nodes)
309 | 
310 |     # Each subparser has the same optional arguments. For now.
311 |     for item in [parser_cronjobs, parser_pods, parser_nodes, parser_services]:
312 |         item.add_argument("-c", "--config", default="", dest="config",
313 |                           type=str,
314 |                           help="Configuration file for Kubernetes client.")
315 |         item.add_argument("-f", "--field-selector", default="",
316 |                           dest="field_selector", type=str,
317 |                           help="Filter results using field selectors.")
318 |         item.add_argument("-hn", "--host-name", default="",
319 |                           dest="host_name", type=str,
320 |                           help="Zabbix host name for sending item data.")
321 |         item.add_argument("-m", "--minutes", default=5,
322 |                           dest="minutes", type=int,
323 |                           help="Interval for cron job retrieval.")
324 | 
325 |     args = parser.parse_args()
326 | 
327 |     # Check configuration file
328 |     if args.config != "":
329 |         if not os.path.isfile(args.config):
330 |             print("Configuration file is not valid.")
331 |             sys.exit()
332 | 
333 |     # Load kubernetes configuration
334 |     try:
335 |         if args.config != "":
336 |             config.load_kube_config(config_file=args.config)
337 |         else:
338 |             config.load_kube_config()
339 |     except Exception as e:
340 |         print(f"Unable to load Kubernetes configuration file. Error: {e}")
341 |         sys.exit()
342 | 
343 |     # Initialize Kubernetes client
344 |     v1 = client.CoreV1Api()
345 | 
346 |     # Run specified mode
347 |     args.func(args, v1)
348 | 


--------------------------------------------------------------------------------
/etc/zabbix/scripts/pacemaker.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | # Version: 1.0
  3 | # Get Pacemaker status. Adding -v option to command prints a more verbose string
  4 | # Otherwise returns decimal or single word statuses.
  5 | #
  6 | # Get the cluster status in verbose format:
  7 | # 	pacemaker_status.py -i cluster -v
  8 | # Get the simple cluster status,  0 if no nodes, 1 if running ok, 2 if any in standby
  9 | # 3 if any in maintenance, 4 if any in shutdown
 10 | # 	pacemaker_status.py -i cluster
 11 | # Count the resources in given state. e.g. how many failed:
 12 | # 	pacemaker_status.py -i cluster -p failed
 13 | # Sum the failcount in cluster:
 14 | # 	pacemaker_status.py -i cluster -p fail-count
 15 | # Get status of the single resource. Returns count of resources running
 16 | # 	pacemaker_status.py -i resource -n Grafana
 17 | # Get the property value for single resource in given node. If node is not given
 18 | # returns true if all the nodes have the property set to "true".
 19 | # 	pacemaker_status.py -i resource -n Grafana -N application1 -p managed
 20 | # Get the status on node, returns count of services running
 21 | # 	pacemaker_status.py -i node -n application1
 22 | # Get the status on node, returns verbose string of resource status
 23 | # 	pacemaker_status.py -i node -n application1 -v
 24 | # Get the nodes where resource is active. Returns in format resource:node1,node2
 25 | # 	pacemaker_status.py -i resource -n Grafana -l
 26 | # Get all resources in the cluster and nodes where they are active. Returns each
 27 | # resource and the nodes, separated by space
 28 | #	pacemaker_status.py -i cluster -l
 29 | # Get last failure in cluster
 30 | #	pacemaker_status.py -i cluster -f
 31 | 
 32 | 
 33 | import argparse
 34 | import sys
 35 | import subprocess
 36 | from datetime import datetime
 37 | from lxml import etree
 38 | 
 39 | def process_xml():
 40 | 	command = "sudo crm_mon -X"
 41 | 	process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
 42 | 	xml, error = process.communicate()
 43 | 	if error:
 44 | 		print("Could not read command output: " + error.decode("utf-8"))
 45 | 		exit()
 46 | 	try:
 47 | 		root = etree.fromstring(xml)
 48 | 	except Exception as e:
 49 | 		if ("Connection to cluster failed: Transport endpoint is not connected" in xml):
 50 | 			# cluster is not running, all queries default to 0
 51 | 			print("0")
 52 | 			exit()
 53 | 		else:
 54 | 			print("Could not get xml from crm_mon, check command righs.")
 55 | 			print(xml)
 56 | 			raise e
 57 | 	return root
 58 | 
 59 | # simple check, return count of active nodes that are running
 60 | # or return true if property is true for the nodeset or node, otherwise false
 61 | # for fail-count, return the summed up fail-count
 62 | def resource_status_simple(args):
 63 | 	root = process_xml()
 64 | 	if args.property and args.property == "fail-count":
 65 | 		xpath = "sum(/crm_mon/node_history/node/resource_history[@id = '" + args.name +"']/@fail-count)"
 66 | 		if args.node:
 67 | 			xpath = "sum(/crm_mon/node_history/node[@name='" + args.node + "']/resource_history[@id = '" + args.name +"']/@fail-count)"
 68 | 		fail_count = root.xpath(xpath)
 69 | 		print(fail_count)
 70 | 	elif args.property:
 71 | 		prop_status = "true"
 72 | 		xpath = "/crm_mon/resources//resource[@id='" + args.name + "']/@" + args.property
 73 | 		if args.node:
 74 | 			# if a node was defined, check only that one with xpath, otherwise print false if
 75 | 			# any of the nodes had false status
 76 | 			xpath = "/crm_mon/resources//resource[node/@name = '" +args.node+ "'][@id='" + args.name + "']/@" + args.property
 77 | 		props = root.xpath(xpath)
 78 | 		for prop in props:
 79 | 			if prop == "false":
 80 | 				prop_status = "false"
 81 | 		print(prop_status)
 82 | 	else:
 83 | 		xpath = "count(/crm_mon/resources//resource[@id='" + args.name + "' and (@role = 'Started' or 'Master')][@active='true'][@orphaned='false'][@managed='true'][@failed='false'][@failure_ignored='false'][@nodes_running_on > 0])"
 84 | 		if args.node:
 85 | 			# if a node was defined, check only that one with xpath
 86 | 			xpath = "count(/crm_mon/resources//resource[node/@name = '" +args.node+ "'][@id='" + args.name + "' and (@role = 'Started' or 'Master')][@active='true'][@orphaned='false'][@managed='true'][@failed='false'][@failure_ignored='false'][@nodes_running_on > 0][node/@name = '" + args.node + "'])"
 87 | 		count = root.xpath(xpath)
 88 | 		print(count)
 89 | 
 90 | # check the status of given resource, return node:status for resources
 91 | def resource_status(args):
 92 | 	root = process_xml()
 93 | 	resource_status = ""
 94 | 	if args.node:
 95 | 		status = resource_verbose(root, args.node, args.name)
 96 | 		xpath = "/crm_mon/resources//resource[@id='" + args.name + "'][node/@name = '" +args.node+ "']/@role"
 97 | 		role_query = root.xpath(xpath)
 98 | 
 99 | 		if role_query:
100 | 			role = role_query[0]
101 | 		else:
102 | 			role = "NotRunning"
103 | 
104 | 		resource_status = args.node + ":" + role
105 | 
106 | 		if status != "":
107 | 			resource_status += "[" + status + "]"
108 | 
109 | 	else:
110 | 		# get list of nodes
111 | 		xpath = "/crm_mon/nodes/node/@name"
112 | 		nodes = root.xpath(xpath)
113 | 
114 | 		# get status for each node
115 | 		for i in range(len(nodes)):
116 | 			if i > 0:
117 | 				resource_status += " "
118 | 			status = resource_verbose(root, nodes[i], args.name)
119 | 			xpath = "/crm_mon/resources//resource[@id='" + args.name + "'][node/@name = '" +nodes[i]+ "']/@role"
120 | 			role_query = root.xpath(xpath)
121 | 			if role_query:
122 | 				role = role_query[0]
123 | 			else:
124 | 				role = "NotRunning"
125 | 
126 | 			resource_status += nodes[i] + ":" + role
127 | 
128 | 			if status != "":
129 | 				resource_status += "[" + status + "]"
130 | 
131 | 	print(resource_status)
132 | 
133 | # verbose resource printout, used for node and cluster also
134 | def resource_verbose(root,node,resource):
135 | 	resource_status = ""
136 | 	resource_statuses = []
137 | 
138 | 	xpath = "/crm_mon/resources//resource[@id='" + resource + "'][node/@name = '" + node + "']/@active"
139 | 	active = root.xpath(xpath)
140 | 	xpath = "/crm_mon/resources//resource[@id='" + resource + "'][node/@name = '" + node + "']/@orphaned"
141 | 	orphaned = root.xpath(xpath)
142 | 	xpath = "/crm_mon/resources//resource[@id='" + resource + "'][node/@name = '" + node + "']/@managed"
143 | 	managed = root.xpath(xpath)
144 | 	xpath = "/crm_mon/resources//resource[@id='" + resource + "'][node/@name = '" + node + "']/@failed"
145 | 	failed = root.xpath(xpath)
146 | 	xpath = "/crm_mon/resources//resource[@id='" + resource + "'][node/@name = '" + node + "']/@failure_ignored"
147 | 	failure_ignored = root.xpath(xpath)
148 | 	xpath = "/crm_mon/resources//resource[@id='" + resource + "'][node/@name = '" + node + "']/@nodes_running_on"
149 | 	nodes_running = root.xpath(xpath)
150 | 	xpath = "/crm_mon/node_history/node[@name='" + node + "']/resource_history[@id = '" + resource +"']/@fail-count"
151 | 	fail_count = root.xpath(xpath)
152 | 
153 | 
154 | 	if "false" in active:
155 | 		resource_statuses.append("inactive")
156 | 	if "false" in managed:
157 | 		resource_statuses.append("unmanaged")
158 | 	if "true" in orphaned:
159 | 		resource_statuses.append("orphaned")
160 | 	if "true" in failed:
161 | 		resource_statuses.append("failed")
162 | 	if "true" in failure_ignored:
163 | 		resource_statuses.append("failure_ignored")
164 | 	if "0" in nodes_running:
165 | 		resource_statuses.append("nodes_running_on=0")
166 | 	if fail_count:
167 | 		resource_statuses.append("fail-count=" + fail_count[0])
168 | 
169 | 	resource_status += ",".join(resource_statuses)
170 | 
171 | 	return resource_status
172 | 
173 | def node_status_simple(args):
174 | 	root = process_xml()
175 | 	xpath = "/crm_mon/nodes/node[@name='" + args.name + "']/@resources_running"
176 | 	count = root.xpath(xpath)
177 | 	if len(count) > 0:
178 | 		print(count[0])
179 | 	else:
180 | 		print("0")
181 | 
182 | # used also in cluster status
183 | def node_verbose(root,node):
184 | 	xpath = "/crm_mon/nodes/node[@name='" + node + "']/@online"
185 | 	online = root.xpath(xpath)
186 | 	xpath = "/crm_mon/nodes/node[@name='" + node + "']/@standby"
187 | 	standby = root.xpath(xpath)
188 | 	xpath = "/crm_mon/nodes/node[@name='" + node + "']/@maintenance"
189 | 	maintenance = root.xpath(xpath)
190 | 	xpath = "/crm_mon/nodes/node[@name='" + node + "']/@resources_running"
191 | 	resource_count = root.xpath(xpath)
192 | 	node_status = node
193 | 
194 | 	if not(online):
195 | 		node_status += ":Not found"
196 | 	else:
197 | 		if (online[0] == 'true'):
198 | 			node_status += ":online"
199 | 		if (standby[0] == 'true'):
200 | 			node_status += ":standby"
201 | 		if (maintenance[0] == 'true'):
202 | 			node_status += ":maintenance"
203 | 
204 | 		# prepare resources dict
205 | 		resources_status = {}
206 | 		xpath = "/crm_mon/resources//resource[node/@name = '" + node + "']/@id"
207 | 		resources = root.xpath(xpath)
208 | 		for resource in resources:
209 | 			resources_status[resource] = []
210 | 
211 | 		# include also node history
212 | 		xpath = "/crm_mon/node_history/node[@name='"+node+"']/resource_history/@id"
213 | 		resources_history = root.xpath(xpath)
214 | 		for resource in resources_history:
215 | 			resources_status[resource] = []
216 | 
217 | 		for resource in resources_status:
218 | 			status = resource_verbose(root,node,resource)
219 | 			if len(status) > 0:
220 | 				node_status += ":" + resource + "[" + status + "]"
221 | 
222 | 		node_status += ":resources_running=" + resource_count[0]
223 | 
224 | 	return node_status
225 | 
226 | def node_status(args):
227 | 	root = process_xml()
228 | 	node_status = node_verbose(root,args.name)
229 | 	print(node_status)
230 | 
231 | # print cluster status in a string of data
232 | # includes resources information
233 | def cluster_status():
234 | 	root = process_xml()
235 | 	cluster_status = ""
236 | 
237 | 	xpath = "/crm_mon/nodes/node/@name"
238 | 	nodes = root.xpath(xpath)
239 | 	xpath = "/crm_mon/nodes/node/@resources_running"
240 | 	res_running = root.xpath(xpath)
241 | 	xpath = "/crm_mon/summary/resources_configured/@number"
242 | 	res_configured = root.xpath(xpath)
243 | 	res_running_total = 0
244 | 
245 | 	# gather a string of status data
246 | 	for i in range(len(nodes)):
247 | 		res_running_total += int(res_running[i])
248 | 		if i > 0:
249 | 			cluster_status += " "
250 | 		cluster_status += node_verbose(root,nodes[i])
251 | 
252 | 	cluster_status += " resources=" + str(res_running_total) + "/" + str(res_configured[0])
253 | 	print(cluster_status)
254 | 
255 | 
256 | # simple status, return 0 if no nodes, 1 if at running, 2 if any in standby
257 | # 3 if any in maintenance, 4 if any in shutdown. 5 if status cannot be determined
258 | # Does not care about resource level statuses.
259 | def cluster_status_simple():
260 | 	root = process_xml()
261 | 
262 | 	# if no nodes are found, or all nodes are offline
263 | 	cluster_status = "5"
264 | 	xpath = "/crm_mon/nodes/node/@online"
265 | 	online = root.xpath(xpath)
266 | 	xpath = "/crm_mon/nodes/node/@maintenance"
267 | 	maintenance = root.xpath(xpath)
268 | 	xpath = "/crm_mon/nodes/node/@standby"
269 | 	standby = root.xpath(xpath)
270 | 	xpath = "/crm_mon/nodes/node/@shutdown"
271 | 	shutdown = root.xpath(xpath)
272 | 
273 | 	# any one node causes the status to increase
274 | 	for state in online:
275 | 		if state == "true":
276 | 			cluster_status = "1"
277 | 	for state in standby:
278 | 		if state == "true":
279 | 			cluster_status = "2"
280 | 	for state in maintenance:
281 | 		if state == "true":
282 | 			cluster_status = "3"
283 | 	for state in shutdown:
284 | 		if state == "true":
285 | 			cluster_status = "4"
286 | 
287 | 	print(cluster_status)
288 | 
289 | # count statuses from all resources for given property
290 | # e.g. how many failed, how many managed.
291 | def cluster_statuses_simple(args):
292 | 	root = process_xml()
293 | 
294 | 	if args.property == "nodes_running_on":
295 | 		print("Nonsensical parameter for cluster proprety count.")
296 | 		exit()
297 | 	elif args.property == "fail-count":
298 | 		xpath = "sum(/crm_mon/node_history/node/resource_history/@fail-count)"
299 | 		property_count = root.xpath(xpath)
300 | 
301 | 	else:
302 | 		xpath = "count(/crm_mon/resources//resource[@" + args.property + " = 'true'])"
303 | 		property_count = root.xpath(xpath)
304 | 
305 | 	print(property_count)
306 | 
307 | # print the resource locations where resources are active
308 | def resource_location(args):
309 | 	root = process_xml()
310 | 
311 | 	resource_locations = {}
312 | 	locations = ""
313 | 	xpath = "/crm_mon/resources//resource[@active = 'true']/@id"
314 | 	resources = root.xpath(xpath)
315 | 
316 | 	for resource in resources:
317 | 		xpath = "/crm_mon/resources//resource[@active = 'true'][@id = '"+resource+"']/node/@name"
318 | 		nodes = root.xpath(xpath)
319 | 		resource_locations[resource] = nodes
320 | 
321 | 
322 | 	if (args.item == "cluster"):
323 | 		for resource in resource_locations:
324 | 			locations += resource + ":" + ",".join(resource_locations[resource]) + " "
325 | 
326 | 	else:
327 | 		if len(resource_locations) == 0:
328 | 			print(args.name+":Not found")
329 | 			exit()
330 | 		locations = args.name +":"+ ",".join(resource_locations[args.name])
331 | 
332 | 	print(locations)
333 | 
334 | # print last failures
335 | def cluster_failures():
336 | 	root = process_xml()
337 | 
338 | 	xpath = "/crm_mon/failures/failure/@op_key"
339 | 	failures = root.xpath(xpath)
340 | 	# get the latest failure
341 | 	if len(failures) > 0:
342 | 		failure_info = ""
343 | 		newest = datetime(1970, 1, 1, 0, 0)
344 | 		for failure in failures:
345 | 
346 | 			xpath = "/crm_mon/failures/failure[@op_key = '"+failure+"']"
347 | 			element = root.xpath(xpath)
348 | 			#Sun Apr 16 21:46:27 2017
349 | 			failure_time = datetime.strptime(element[0].get("last-rc-change"), "%a %b %d %H:%M:%S %Y")
350 | 			if failure_time > newest:
351 | 				newest = failure_time
352 | 				failure_info += element[0].get("node")
353 | 				failure_info += ":" + element[0].get("op_key")
354 | 				failure_info += ":" + element[0].get("status")
355 | 				failure_info += ":" + element[0].get("last-rc-change")
356 | 
357 | 		print(failure_info)
358 | 
359 | 	else:
360 | 		# no failures
361 | 		exit()
362 | 
363 | 
364 | 
365 | if __name__ == "__main__":
366 | 
367 | 	parser = argparse.ArgumentParser(prog="pacemaker_status.py", description="Check the pacemaker cluster status")
368 | 	parser.add_argument("-i", "--item", help="Item type to check", choices=["resource", "node", "cluster"])
369 | 	parser.add_argument("-n", "--name", help="Resource or node name to check.")
370 | 	parser.add_argument("-l", "--location", help="Return the node where is running.", action="store_true")
371 | 	parser.add_argument("-N", "--node", help="Node to check the resource in. Default checks all nodes.")
372 | 	parser.add_argument("-p", "--property", help="Check status of resource property", choices=["active","orphaned","managed","failed","failure_ignored","nodes_running_on","fail-count"])
373 | 	parser.add_argument("-v", "--verbose", help="Verbose status", action="store_true")
374 | 	parser.add_argument("-f", "--failures", help="Failures", action="store_true")
375 | 
376 | 	if len(sys.argv) > 1:
377 | 
378 | 		args = parser.parse_args()
379 | 
380 | 		if (args.item == "resource"):
381 | 			if not(args.name):
382 | 				print("Must define resource name.")
383 | 			elif (args.verbose):
384 | 				resource_status(args)
385 | 			elif (args.location):
386 | 				resource_location(args)
387 | 			else:
388 | 				resource_status_simple(args)
389 | 		elif (args.item == "node"):
390 | 			if not(args.name):
391 | 				print("Must define node name.")
392 | 			elif (args.verbose):
393 | 				node_status(args)
394 | 			else:
395 | 				node_status_simple(args)
396 | 		elif (args.item == "cluster"):
397 | 			if (args.property):
398 | 				cluster_statuses_simple(args)
399 | 			elif (args.failures):
400 | 				cluster_failures()
401 | 			elif (args.verbose):
402 | 				cluster_status()
403 | 			elif (args.location):
404 | 				resource_location(args)
405 | 			else:
406 | 				cluster_status_simple()
407 | 	else:
408 | 		print("No arguments given. Nothing to do.")
409 | 		parser.print_help()
410 | 


--------------------------------------------------------------------------------
/templates/pacemaker.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <zabbix_export>
  3 |     <version>3.2</version>
  4 |     <date>2017-09-28T11:28:33Z</date>
  5 |     <groups>
  6 |         <group>
  7 |             <name>Templates</name>
  8 |         </group>
  9 |     </groups>
 10 |     <templates>
 11 |         <template>
 12 |             <template>Template App Pacemaker - digiapulssi</template>
 13 |             <name>Template App Pacemaker - digiapulssi</name>
 14 |             <description/>
 15 |             <groups>
 16 |                 <group>
 17 |                     <name>Templates</name>
 18 |                 </group>
 19 |             </groups>
 20 |             <applications>
 21 |                 <application>
 22 |                     <name>Pacemaker</name>
 23 |                 </application>
 24 |             </applications>
 25 |             <items>
 26 |                 <item>
 27 |                     <name>Cluster resource locations</name>
 28 |                     <type>7</type>
 29 |                     <snmp_community/>
 30 |                     <multiplier>0</multiplier>
 31 |                     <snmp_oid/>
 32 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-l&quot;]</key>
 33 |                     <delay>300</delay>
 34 |                     <history>1</history>
 35 |                     <trends>0</trends>
 36 |                     <status>0</status>
 37 |                     <value_type>4</value_type>
 38 |                     <allowed_hosts/>
 39 |                     <units/>
 40 |                     <delta>0</delta>
 41 |                     <snmpv3_contextname/>
 42 |                     <snmpv3_securityname/>
 43 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
 44 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
 45 |                     <snmpv3_authpassphrase/>
 46 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
 47 |                     <snmpv3_privpassphrase/>
 48 |                     <formula>1</formula>
 49 |                     <delay_flex/>
 50 |                     <params/>
 51 |                     <ipmi_sensor/>
 52 |                     <data_type>0</data_type>
 53 |                     <authtype>0</authtype>
 54 |                     <username/>
 55 |                     <password/>
 56 |                     <publickey/>
 57 |                     <privatekey/>
 58 |                     <port/>
 59 |                     <description/>
 60 |                     <inventory_link>0</inventory_link>
 61 |                     <applications>
 62 |                         <application>
 63 |                             <name>Pacemaker</name>
 64 |                         </application>
 65 |                     </applications>
 66 |                     <valuemap/>
 67 |                     <logtimefmt/>
 68 |                 </item>
 69 |                 <item>
 70 |                     <name>Cluster resources active</name>
 71 |                     <type>7</type>
 72 |                     <snmp_community/>
 73 |                     <multiplier>0</multiplier>
 74 |                     <snmp_oid/>
 75 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;active&quot;]</key>
 76 |                     <delay>60</delay>
 77 |                     <history>30</history>
 78 |                     <trends>365</trends>
 79 |                     <status>0</status>
 80 |                     <value_type>3</value_type>
 81 |                     <allowed_hosts/>
 82 |                     <units/>
 83 |                     <delta>0</delta>
 84 |                     <snmpv3_contextname/>
 85 |                     <snmpv3_securityname/>
 86 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
 87 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
 88 |                     <snmpv3_authpassphrase/>
 89 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
 90 |                     <snmpv3_privpassphrase/>
 91 |                     <formula>1</formula>
 92 |                     <delay_flex/>
 93 |                     <params/>
 94 |                     <ipmi_sensor/>
 95 |                     <data_type>0</data_type>
 96 |                     <authtype>0</authtype>
 97 |                     <username/>
 98 |                     <password/>
 99 |                     <publickey/>
100 |                     <privatekey/>
101 |                     <port/>
102 |                     <description/>
103 |                     <inventory_link>0</inventory_link>
104 |                     <applications>
105 |                         <application>
106 |                             <name>Pacemaker</name>
107 |                         </application>
108 |                     </applications>
109 |                     <valuemap/>
110 |                     <logtimefmt/>
111 |                 </item>
112 |                 <item>
113 |                     <name>Cluster resources failed</name>
114 |                     <type>7</type>
115 |                     <snmp_community/>
116 |                     <multiplier>0</multiplier>
117 |                     <snmp_oid/>
118 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;failed&quot;]</key>
119 |                     <delay>60</delay>
120 |                     <history>30</history>
121 |                     <trends>365</trends>
122 |                     <status>0</status>
123 |                     <value_type>3</value_type>
124 |                     <allowed_hosts/>
125 |                     <units/>
126 |                     <delta>0</delta>
127 |                     <snmpv3_contextname/>
128 |                     <snmpv3_securityname/>
129 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
130 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
131 |                     <snmpv3_authpassphrase/>
132 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
133 |                     <snmpv3_privpassphrase/>
134 |                     <formula>1</formula>
135 |                     <delay_flex/>
136 |                     <params/>
137 |                     <ipmi_sensor/>
138 |                     <data_type>0</data_type>
139 |                     <authtype>0</authtype>
140 |                     <username/>
141 |                     <password/>
142 |                     <publickey/>
143 |                     <privatekey/>
144 |                     <port/>
145 |                     <description/>
146 |                     <inventory_link>0</inventory_link>
147 |                     <applications>
148 |                         <application>
149 |                             <name>Pacemaker</name>
150 |                         </application>
151 |                     </applications>
152 |                     <valuemap/>
153 |                     <logtimefmt/>
154 |                 </item>
155 |                 <item>
156 |                     <name>Cluster resources failure_ignored</name>
157 |                     <type>7</type>
158 |                     <snmp_community/>
159 |                     <multiplier>0</multiplier>
160 |                     <snmp_oid/>
161 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;failure_ignored&quot;]</key>
162 |                     <delay>60</delay>
163 |                     <history>30</history>
164 |                     <trends>365</trends>
165 |                     <status>0</status>
166 |                     <value_type>3</value_type>
167 |                     <allowed_hosts/>
168 |                     <units/>
169 |                     <delta>0</delta>
170 |                     <snmpv3_contextname/>
171 |                     <snmpv3_securityname/>
172 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
173 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
174 |                     <snmpv3_authpassphrase/>
175 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
176 |                     <snmpv3_privpassphrase/>
177 |                     <formula>1</formula>
178 |                     <delay_flex/>
179 |                     <params/>
180 |                     <ipmi_sensor/>
181 |                     <data_type>0</data_type>
182 |                     <authtype>0</authtype>
183 |                     <username/>
184 |                     <password/>
185 |                     <publickey/>
186 |                     <privatekey/>
187 |                     <port/>
188 |                     <description/>
189 |                     <inventory_link>0</inventory_link>
190 |                     <applications>
191 |                         <application>
192 |                             <name>Pacemaker</name>
193 |                         </application>
194 |                     </applications>
195 |                     <valuemap/>
196 |                     <logtimefmt/>
197 |                 </item>
198 |                 <item>
199 |                     <name>Cluster resources managed</name>
200 |                     <type>7</type>
201 |                     <snmp_community/>
202 |                     <multiplier>0</multiplier>
203 |                     <snmp_oid/>
204 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;managed&quot;]</key>
205 |                     <delay>60</delay>
206 |                     <history>30</history>
207 |                     <trends>365</trends>
208 |                     <status>0</status>
209 |                     <value_type>3</value_type>
210 |                     <allowed_hosts/>
211 |                     <units/>
212 |                     <delta>0</delta>
213 |                     <snmpv3_contextname/>
214 |                     <snmpv3_securityname/>
215 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
216 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
217 |                     <snmpv3_authpassphrase/>
218 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
219 |                     <snmpv3_privpassphrase/>
220 |                     <formula>1</formula>
221 |                     <delay_flex/>
222 |                     <params/>
223 |                     <ipmi_sensor/>
224 |                     <data_type>0</data_type>
225 |                     <authtype>0</authtype>
226 |                     <username/>
227 |                     <password/>
228 |                     <publickey/>
229 |                     <privatekey/>
230 |                     <port/>
231 |                     <description/>
232 |                     <inventory_link>0</inventory_link>
233 |                     <applications>
234 |                         <application>
235 |                             <name>Pacemaker</name>
236 |                         </application>
237 |                     </applications>
238 |                     <valuemap/>
239 |                     <logtimefmt/>
240 |                 </item>
241 |                 <item>
242 |                     <name>Cluster resources orphaned</name>
243 |                     <type>7</type>
244 |                     <snmp_community/>
245 |                     <multiplier>0</multiplier>
246 |                     <snmp_oid/>
247 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;orphaned&quot;]</key>
248 |                     <delay>60</delay>
249 |                     <history>90</history>
250 |                     <trends>365</trends>
251 |                     <status>0</status>
252 |                     <value_type>3</value_type>
253 |                     <allowed_hosts/>
254 |                     <units/>
255 |                     <delta>0</delta>
256 |                     <snmpv3_contextname/>
257 |                     <snmpv3_securityname/>
258 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
259 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
260 |                     <snmpv3_authpassphrase/>
261 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
262 |                     <snmpv3_privpassphrase/>
263 |                     <formula>1</formula>
264 |                     <delay_flex/>
265 |                     <params/>
266 |                     <ipmi_sensor/>
267 |                     <data_type>0</data_type>
268 |                     <authtype>0</authtype>
269 |                     <username/>
270 |                     <password/>
271 |                     <publickey/>
272 |                     <privatekey/>
273 |                     <port/>
274 |                     <description/>
275 |                     <inventory_link>0</inventory_link>
276 |                     <applications>
277 |                         <application>
278 |                             <name>Pacemaker</name>
279 |                         </application>
280 |                     </applications>
281 |                     <valuemap/>
282 |                     <logtimefmt/>
283 |                 </item>
284 |                 <item>
285 |                     <name>Cluster status verbose</name>
286 |                     <type>7</type>
287 |                     <snmp_community/>
288 |                     <multiplier>0</multiplier>
289 |                     <snmp_oid/>
290 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-v&quot;]</key>
291 |                     <delay>300</delay>
292 |                     <history>1</history>
293 |                     <trends>0</trends>
294 |                     <status>0</status>
295 |                     <value_type>4</value_type>
296 |                     <allowed_hosts/>
297 |                     <units/>
298 |                     <delta>0</delta>
299 |                     <snmpv3_contextname/>
300 |                     <snmpv3_securityname/>
301 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
302 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
303 |                     <snmpv3_authpassphrase/>
304 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
305 |                     <snmpv3_privpassphrase/>
306 |                     <formula>1</formula>
307 |                     <delay_flex/>
308 |                     <params/>
309 |                     <ipmi_sensor/>
310 |                     <data_type>0</data_type>
311 |                     <authtype>0</authtype>
312 |                     <username/>
313 |                     <password/>
314 |                     <publickey/>
315 |                     <privatekey/>
316 |                     <port/>
317 |                     <description/>
318 |                     <inventory_link>0</inventory_link>
319 |                     <applications>
320 |                         <application>
321 |                             <name>Pacemaker</name>
322 |                         </application>
323 |                     </applications>
324 |                     <valuemap/>
325 |                     <logtimefmt/>
326 |                 </item>
327 |                 <item>
328 |                     <name>Cluster status</name>
329 |                     <type>7</type>
330 |                     <snmp_community/>
331 |                     <multiplier>0</multiplier>
332 |                     <snmp_oid/>
333 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;]</key>
334 |                     <delay>60</delay>
335 |                     <history>30</history>
336 |                     <trends>365</trends>
337 |                     <status>0</status>
338 |                     <value_type>3</value_type>
339 |                     <allowed_hosts/>
340 |                     <units/>
341 |                     <delta>0</delta>
342 |                     <snmpv3_contextname/>
343 |                     <snmpv3_securityname/>
344 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
345 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
346 |                     <snmpv3_authpassphrase/>
347 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
348 |                     <snmpv3_privpassphrase/>
349 |                     <formula>1</formula>
350 |                     <delay_flex/>
351 |                     <params/>
352 |                     <ipmi_sensor/>
353 |                     <data_type>0</data_type>
354 |                     <authtype>0</authtype>
355 |                     <username/>
356 |                     <password/>
357 |                     <publickey/>
358 |                     <privatekey/>
359 |                     <port/>
360 |                     <description/>
361 |                     <inventory_link>0</inventory_link>
362 |                     <applications>
363 |                         <application>
364 |                             <name>Pacemaker</name>
365 |                         </application>
366 |                     </applications>
367 |                     <valuemap/>
368 |                     <logtimefmt/>
369 |                 </item>
370 |             </items>
371 |             <discovery_rules/>
372 |             <httptests/>
373 |             <macros/>
374 |             <templates/>
375 |             <screens/>
376 |         </template>
377 |     </templates>
378 | </zabbix_export>
379 | 


--------------------------------------------------------------------------------
/templates/pacemaker_active.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <zabbix_export>
  3 |     <version>3.2</version>
  4 |     <date>2017-09-28T11:28:40Z</date>
  5 |     <groups>
  6 |         <group>
  7 |             <name>Templates</name>
  8 |         </group>
  9 |     </groups>
 10 |     <templates>
 11 |         <template>
 12 |             <template>Template App Pacemaker active - digiapulssi</template>
 13 |             <name>Template App Pacemaker active - digiapulssi</name>
 14 |             <description/>
 15 |             <groups>
 16 |                 <group>
 17 |                     <name>Templates</name>
 18 |                 </group>
 19 |             </groups>
 20 |             <applications>
 21 |                 <application>
 22 |                     <name>Pacemaker</name>
 23 |                 </application>
 24 |             </applications>
 25 |             <items>
 26 |                 <item>
 27 |                     <name>Cluster resource locations</name>
 28 |                     <type>7</type>
 29 |                     <snmp_community/>
 30 |                     <multiplier>0</multiplier>
 31 |                     <snmp_oid/>
 32 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-l&quot;]</key>
 33 |                     <delay>300</delay>
 34 |                     <history>1</history>
 35 |                     <trends>0</trends>
 36 |                     <status>0</status>
 37 |                     <value_type>4</value_type>
 38 |                     <allowed_hosts/>
 39 |                     <units/>
 40 |                     <delta>0</delta>
 41 |                     <snmpv3_contextname/>
 42 |                     <snmpv3_securityname/>
 43 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
 44 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
 45 |                     <snmpv3_authpassphrase/>
 46 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
 47 |                     <snmpv3_privpassphrase/>
 48 |                     <formula>1</formula>
 49 |                     <delay_flex/>
 50 |                     <params/>
 51 |                     <ipmi_sensor/>
 52 |                     <data_type>0</data_type>
 53 |                     <authtype>0</authtype>
 54 |                     <username/>
 55 |                     <password/>
 56 |                     <publickey/>
 57 |                     <privatekey/>
 58 |                     <port/>
 59 |                     <description/>
 60 |                     <inventory_link>0</inventory_link>
 61 |                     <applications>
 62 |                         <application>
 63 |                             <name>Pacemaker</name>
 64 |                         </application>
 65 |                     </applications>
 66 |                     <valuemap/>
 67 |                     <logtimefmt/>
 68 |                 </item>
 69 |                 <item>
 70 |                     <name>Cluster resources active</name>
 71 |                     <type>7</type>
 72 |                     <snmp_community/>
 73 |                     <multiplier>0</multiplier>
 74 |                     <snmp_oid/>
 75 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;active&quot;]</key>
 76 |                     <delay>60</delay>
 77 |                     <history>30</history>
 78 |                     <trends>365</trends>
 79 |                     <status>0</status>
 80 |                     <value_type>3</value_type>
 81 |                     <allowed_hosts/>
 82 |                     <units/>
 83 |                     <delta>0</delta>
 84 |                     <snmpv3_contextname/>
 85 |                     <snmpv3_securityname/>
 86 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
 87 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
 88 |                     <snmpv3_authpassphrase/>
 89 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
 90 |                     <snmpv3_privpassphrase/>
 91 |                     <formula>1</formula>
 92 |                     <delay_flex/>
 93 |                     <params/>
 94 |                     <ipmi_sensor/>
 95 |                     <data_type>0</data_type>
 96 |                     <authtype>0</authtype>
 97 |                     <username/>
 98 |                     <password/>
 99 |                     <publickey/>
100 |                     <privatekey/>
101 |                     <port/>
102 |                     <description/>
103 |                     <inventory_link>0</inventory_link>
104 |                     <applications>
105 |                         <application>
106 |                             <name>Pacemaker</name>
107 |                         </application>
108 |                     </applications>
109 |                     <valuemap/>
110 |                     <logtimefmt/>
111 |                 </item>
112 |                 <item>
113 |                     <name>Cluster resources failed</name>
114 |                     <type>7</type>
115 |                     <snmp_community/>
116 |                     <multiplier>0</multiplier>
117 |                     <snmp_oid/>
118 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;failed&quot;]</key>
119 |                     <delay>60</delay>
120 |                     <history>30</history>
121 |                     <trends>365</trends>
122 |                     <status>0</status>
123 |                     <value_type>3</value_type>
124 |                     <allowed_hosts/>
125 |                     <units/>
126 |                     <delta>0</delta>
127 |                     <snmpv3_contextname/>
128 |                     <snmpv3_securityname/>
129 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
130 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
131 |                     <snmpv3_authpassphrase/>
132 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
133 |                     <snmpv3_privpassphrase/>
134 |                     <formula>1</formula>
135 |                     <delay_flex/>
136 |                     <params/>
137 |                     <ipmi_sensor/>
138 |                     <data_type>0</data_type>
139 |                     <authtype>0</authtype>
140 |                     <username/>
141 |                     <password/>
142 |                     <publickey/>
143 |                     <privatekey/>
144 |                     <port/>
145 |                     <description/>
146 |                     <inventory_link>0</inventory_link>
147 |                     <applications>
148 |                         <application>
149 |                             <name>Pacemaker</name>
150 |                         </application>
151 |                     </applications>
152 |                     <valuemap/>
153 |                     <logtimefmt/>
154 |                 </item>
155 |                 <item>
156 |                     <name>Cluster resources failure_ignored</name>
157 |                     <type>7</type>
158 |                     <snmp_community/>
159 |                     <multiplier>0</multiplier>
160 |                     <snmp_oid/>
161 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;failure_ignored&quot;]</key>
162 |                     <delay>60</delay>
163 |                     <history>30</history>
164 |                     <trends>365</trends>
165 |                     <status>0</status>
166 |                     <value_type>3</value_type>
167 |                     <allowed_hosts/>
168 |                     <units/>
169 |                     <delta>0</delta>
170 |                     <snmpv3_contextname/>
171 |                     <snmpv3_securityname/>
172 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
173 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
174 |                     <snmpv3_authpassphrase/>
175 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
176 |                     <snmpv3_privpassphrase/>
177 |                     <formula>1</formula>
178 |                     <delay_flex/>
179 |                     <params/>
180 |                     <ipmi_sensor/>
181 |                     <data_type>0</data_type>
182 |                     <authtype>0</authtype>
183 |                     <username/>
184 |                     <password/>
185 |                     <publickey/>
186 |                     <privatekey/>
187 |                     <port/>
188 |                     <description/>
189 |                     <inventory_link>0</inventory_link>
190 |                     <applications>
191 |                         <application>
192 |                             <name>Pacemaker</name>
193 |                         </application>
194 |                     </applications>
195 |                     <valuemap/>
196 |                     <logtimefmt/>
197 |                 </item>
198 |                 <item>
199 |                     <name>Cluster resources managed</name>
200 |                     <type>7</type>
201 |                     <snmp_community/>
202 |                     <multiplier>0</multiplier>
203 |                     <snmp_oid/>
204 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;managed&quot;]</key>
205 |                     <delay>60</delay>
206 |                     <history>30</history>
207 |                     <trends>365</trends>
208 |                     <status>0</status>
209 |                     <value_type>3</value_type>
210 |                     <allowed_hosts/>
211 |                     <units/>
212 |                     <delta>0</delta>
213 |                     <snmpv3_contextname/>
214 |                     <snmpv3_securityname/>
215 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
216 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
217 |                     <snmpv3_authpassphrase/>
218 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
219 |                     <snmpv3_privpassphrase/>
220 |                     <formula>1</formula>
221 |                     <delay_flex/>
222 |                     <params/>
223 |                     <ipmi_sensor/>
224 |                     <data_type>0</data_type>
225 |                     <authtype>0</authtype>
226 |                     <username/>
227 |                     <password/>
228 |                     <publickey/>
229 |                     <privatekey/>
230 |                     <port/>
231 |                     <description/>
232 |                     <inventory_link>0</inventory_link>
233 |                     <applications>
234 |                         <application>
235 |                             <name>Pacemaker</name>
236 |                         </application>
237 |                     </applications>
238 |                     <valuemap/>
239 |                     <logtimefmt/>
240 |                 </item>
241 |                 <item>
242 |                     <name>Cluster resources orphaned</name>
243 |                     <type>7</type>
244 |                     <snmp_community/>
245 |                     <multiplier>0</multiplier>
246 |                     <snmp_oid/>
247 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-p&quot;, &quot;orphaned&quot;]</key>
248 |                     <delay>60</delay>
249 |                     <history>90</history>
250 |                     <trends>365</trends>
251 |                     <status>0</status>
252 |                     <value_type>3</value_type>
253 |                     <allowed_hosts/>
254 |                     <units/>
255 |                     <delta>0</delta>
256 |                     <snmpv3_contextname/>
257 |                     <snmpv3_securityname/>
258 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
259 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
260 |                     <snmpv3_authpassphrase/>
261 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
262 |                     <snmpv3_privpassphrase/>
263 |                     <formula>1</formula>
264 |                     <delay_flex/>
265 |                     <params/>
266 |                     <ipmi_sensor/>
267 |                     <data_type>0</data_type>
268 |                     <authtype>0</authtype>
269 |                     <username/>
270 |                     <password/>
271 |                     <publickey/>
272 |                     <privatekey/>
273 |                     <port/>
274 |                     <description/>
275 |                     <inventory_link>0</inventory_link>
276 |                     <applications>
277 |                         <application>
278 |                             <name>Pacemaker</name>
279 |                         </application>
280 |                     </applications>
281 |                     <valuemap/>
282 |                     <logtimefmt/>
283 |                 </item>
284 |                 <item>
285 |                     <name>Cluster status verbose</name>
286 |                     <type>7</type>
287 |                     <snmp_community/>
288 |                     <multiplier>0</multiplier>
289 |                     <snmp_oid/>
290 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;, &quot;-v&quot;]</key>
291 |                     <delay>300</delay>
292 |                     <history>1</history>
293 |                     <trends>0</trends>
294 |                     <status>0</status>
295 |                     <value_type>4</value_type>
296 |                     <allowed_hosts/>
297 |                     <units/>
298 |                     <delta>0</delta>
299 |                     <snmpv3_contextname/>
300 |                     <snmpv3_securityname/>
301 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
302 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
303 |                     <snmpv3_authpassphrase/>
304 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
305 |                     <snmpv3_privpassphrase/>
306 |                     <formula>1</formula>
307 |                     <delay_flex/>
308 |                     <params/>
309 |                     <ipmi_sensor/>
310 |                     <data_type>0</data_type>
311 |                     <authtype>0</authtype>
312 |                     <username/>
313 |                     <password/>
314 |                     <publickey/>
315 |                     <privatekey/>
316 |                     <port/>
317 |                     <description/>
318 |                     <inventory_link>0</inventory_link>
319 |                     <applications>
320 |                         <application>
321 |                             <name>Pacemaker</name>
322 |                         </application>
323 |                     </applications>
324 |                     <valuemap/>
325 |                     <logtimefmt/>
326 |                 </item>
327 |                 <item>
328 |                     <name>Cluster status</name>
329 |                     <type>7</type>
330 |                     <snmp_community/>
331 |                     <multiplier>0</multiplier>
332 |                     <snmp_oid/>
333 |                     <key>pacemaker.status[&quot;-i&quot;,&quot;cluster&quot;]</key>
334 |                     <delay>60</delay>
335 |                     <history>30</history>
336 |                     <trends>365</trends>
337 |                     <status>0</status>
338 |                     <value_type>3</value_type>
339 |                     <allowed_hosts/>
340 |                     <units/>
341 |                     <delta>0</delta>
342 |                     <snmpv3_contextname/>
343 |                     <snmpv3_securityname/>
344 |                     <snmpv3_securitylevel>0</snmpv3_securitylevel>
345 |                     <snmpv3_authprotocol>0</snmpv3_authprotocol>
346 |                     <snmpv3_authpassphrase/>
347 |                     <snmpv3_privprotocol>0</snmpv3_privprotocol>
348 |                     <snmpv3_privpassphrase/>
349 |                     <formula>1</formula>
350 |                     <delay_flex/>
351 |                     <params/>
352 |                     <ipmi_sensor/>
353 |                     <data_type>0</data_type>
354 |                     <authtype>0</authtype>
355 |                     <username/>
356 |                     <password/>
357 |                     <publickey/>
358 |                     <privatekey/>
359 |                     <port/>
360 |                     <description/>
361 |                     <inventory_link>0</inventory_link>
362 |                     <applications>
363 |                         <application>
364 |                             <name>Pacemaker</name>
365 |                         </application>
366 |                     </applications>
367 |                     <valuemap/>
368 |                     <logtimefmt/>
369 |                 </item>
370 |             </items>
371 |             <discovery_rules/>
372 |             <httptests/>
373 |             <macros/>
374 |             <templates/>
375 |             <screens/>
376 |         </template>
377 |     </templates>
378 | </zabbix_export>
379 | 


--------------------------------------------------------------------------------