├── version.py
├── requirements.txt
├── .gitignore
├── requirements-dev.txt
├── k8s-sa-config
├── 00_service_account.yaml
├── 01_clusterrole.yaml
├── 02_clusterrolebinding.yaml
└── get_config.sh
├── check_nodes.spec
├── .travis.yml
├── readme.md
├── check_nodes.py
├── check_pods.py
├── test_check_pods.py
├── .github
└── workflows
│ └── test.yml
├── test_check_nodes.py
├── INSTALL.md
└── LICENSE
/version.py:
--------------------------------------------------------------------------------
1 | __version__ = 'dev'
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | kubernetes>=11.0.0
2 | nagiosplugin
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /dist
2 | *.rpm
3 | /.idea
4 | /*.iml
5 | *.pyc
6 | /env
7 | /build
8 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | kubernetes>=11.0.0
2 | nagiosplugin
3 | mock
4 | pyinstaller
5 | six>=0.11
6 | setuptools
7 |
--------------------------------------------------------------------------------
/k8s-sa-config/00_service_account.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: v1
3 | kind: ServiceAccount
4 | metadata:
5 | name: icinga-monitoring-sa
--------------------------------------------------------------------------------
/k8s-sa-config/01_clusterrole.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: icinga-monitoring
6 | rules:
7 | - apiGroups: [""]
8 | resources: ["pods", "nodes"]
9 | verbs: ["get", "watch", "list"]
10 |
--------------------------------------------------------------------------------
/k8s-sa-config/02_clusterrolebinding.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRoleBinding
4 | metadata:
5 | name: icinga-monitor-pods
6 | subjects:
7 | - kind: ServiceAccount
8 | name: icinga-monitoring-sa
9 | namespace: default
10 | roleRef:
11 | kind: ClusterRole
12 | name: icinga-monitoring
13 | apiGroup: rbac.authorization.k8s.io
14 |
--------------------------------------------------------------------------------
/check_nodes.spec:
--------------------------------------------------------------------------------
1 | # -*- mode: python -*-
2 |
3 | block_cipher = None
4 |
5 |
6 | a = Analysis(['check_nodes.py'],
7 | pathex=['/mnt/c/Develop/devops/domon/check_kubernetes'],
8 | binaries=[],
9 | datas=[],
10 | hiddenimports=['nagiosplugin.platform.posix'],
11 | hookspath=[],
12 | runtime_hooks=[],
13 | excludes=[],
14 | win_no_prefer_redirects=False,
15 | win_private_assemblies=False,
16 | cipher=block_cipher)
17 | pyz = PYZ(a.pure, a.zipped_data,
18 | cipher=block_cipher)
19 | exe = EXE(pyz,
20 | a.scripts,
21 | a.binaries,
22 | a.zipfiles,
23 | a.datas,
24 | name='check_nodes',
25 | debug=False,
26 | strip=False,
27 | upx=True,
28 | runtime_tmpdir=None,
29 | console=True )
30 |
--------------------------------------------------------------------------------
/k8s-sa-config/get_config.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | API_SERVER="https://localhost:6443"
4 | SERVICEACCOUNT_NAME=$(kubectl get sa | grep icinga | awk '{ print $1 }')
5 | SECRET_NAME=$(kubectl get secrets | grep "${SERVICEACCOUNT_NAME}-token" | awk '{ print $1 }')
6 |
7 | if [[ ${SERVICEACCOUNT_NAME} == "" ]]; then
8 | >&2 echo "Service account not found!"
9 | exit 1
10 | else
11 | >&2 echo "Found icinga Service Account: ${SECRET_NAME}"
12 | fi
13 |
14 | CA_CERT=$(kubectl get secret/"${SECRET_NAME}" -o jsonpath='{.data.ca\.crt}')
15 | SA_TOKEN=$(kubectl get secret/"${SECRET_NAME}" -o jsonpath='{.data.token}' | base64 --decode)
16 | NS=$(kubectl get secret/"${SECRET_NAME}" -o jsonpath='{.data.namespace}' | base64 --decode)
17 |
18 | echo "
19 | apiVersion: v1
20 | kind: Config
21 | clusters:
22 | - name: default-cluster
23 | cluster:
24 | certificate-authority-data: ${CA_CERT}
25 | server: ${API_SERVER}
26 | contexts:
27 | - name: default-context
28 | context:
29 | cluster: default-cluster
30 | namespace: ${NS}
31 | user: default-user
32 | current-context: default-context
33 | users:
34 | - name: default-user
35 | user:
36 | token: ${SA_TOKEN}
37 | "
38 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: xenial
2 | language: python
3 |
4 | python:
5 | - '2.7'
6 | - '3.6'
7 | - '3.7'
8 | - '3.8'
9 | - '3.9-dev'
10 |
11 | matrix:
12 | allow_failures:
13 | - python: '3.9-dev'
14 |
15 | install:
16 | - pip install -U -r requirements-dev.txt
17 |
18 | script:
19 | - |
20 | if [ -z "$TRAVIS_TAG" ]; then
21 | export version_number="dev-${TRAVIS_COMMIT}"
22 | else
23 | export version_number="${TRAVIS_TAG}"
24 | fi
25 |
26 | echo "Version ${version_number}"
27 |
28 | echo "__version__ = '${version_number}'" > ./version.py
29 |
30 | - python ./test_check_nodes.py
31 | - python ./test_check_pods.py
32 | - pyinstaller check_pods.py --clean --onefile --hidden-import nagiosplugin.platform.posix
33 | - pyinstaller check_nodes.py --clean --onefile --hidden-import nagiosplugin.platform.posix
34 |
35 | deploy:
36 | provider: releases
37 | api_key:
38 | secure: XslaczKUxjcyP2+ooJ6bVMnF/B/mA9WacDxdqB+WW3oVRoxHMrp6ZDPCj4xfH3N/P+sqCgbGskxz1w0Rxt9JEdyeC31uCkPPzaPbVQIryiJewh5XkScZ7DyBgsFCR93NwYUBFeGo81D/sPFWkz+mWXtvZBnvdDboL2OOwxZTLyPYaQjsaxELc1s2LEUrwc+lV6LPWK52bdOUXlgyxLL9QiZJH3Y/KRehNe6ev23VxVLCb9UMnm/VofI7C4L14sV17Fz1VmvriTpWjAsj36m4cG3dacYzAdYaxE6ul88b5sbXIKG7kgkvBn3eU6MYJ2ZLrUjUFBMo4xFve6pwLJ76vTqOz81l+9FxRt+QJ9bq5/Hn5DSuoQZG3u+IL5AmzErS/FmxtC9MpEGDsHHzxLk4USziDx+S6ZYBGRx2QEsx70ut4biPbtC5jCMTOL2GI8EcdEyFZCMALOmF56JJeD1wL63748u1Diy/SvGXJcNySozx64RNflvlMmmBUN9XRtCX6MyAV/90o76nrNI/1u0+lEfwkPJT1ePwWdWeMPhZBxvczJ5QUyp+3prlx7pkyE2aUD9KvNueijTsh47DxX4FDAvpwbT5l/ZiD/VeE/ZdLagGFvPlzxQN6/DgkmKaahODs1VQkRZ4OUr5i8Lsm4HK9zU/IqLU5kfG/gccTcmCOzo=
39 | skip_cleanup: true
40 | file_glob: true
41 | file: dist/*
42 | on:
43 | repo: T-Systems-MMS/check_kubernetes
44 | tags: true
45 | branch: master
46 | python: '3.8'
47 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Nagios/Icinga Checks for Kubernetes
2 |
3 | [](https://travis-ci.org/T-Systems-MMS/check_kubernetes)
4 |
5 | You will need a kubeconfig file for both checks.
6 |
7 | ## Python Compatibility
8 |
9 | Python 2.7.x or Python >= 3.4
10 |
11 | ## check_nodes.py
12 |
13 | Checks the State of your nodes via the Kubernetes API. One node with Problems is a warning, two nodes are critical. Perfdata are supplied.
14 |
15 | ### Usage
16 | ```
17 | usage: check_nodes.py [-h] [--kube-config KUBE_CONFIG]
18 |
19 | optional arguments:
20 | -h, --help show this help message and exit
21 | --kube-config KUBE_CONFIG
22 | Kubernetes Config File
23 | ```
24 |
25 | ## check_pods.py
26 |
27 | Checks the State of all pods in the Kubernetes Cluster.
28 |
29 | ### Usage
30 | ```
31 | usage: check_pods.py [-h] [--kube-config KUBE_CONFIG]
32 | [--warning-pending WARNING_PENDING]
33 | [--critical-pending CRITICAL_PENDING]
34 | [--warning-running WARNING_RUNNING]
35 | [--critical-running CRITICAL_RUNNING]
36 | [--warning-succeeded WARNING_SUCCEEDED]
37 | [--critical-succeeded CRITICAL_SUCCEEDED]
38 | [--warning-failed WARNING_FAILED]
39 | [--critical-failed CRITICAL_FAILED]
40 | [--warning-unknown WARNING_UNKNOWN]
41 | [--critical-unknown CRITICAL_UNKNOWN]
42 |
43 | optional arguments:
44 | -h, --help show this help message and exit
45 | --kube-config KUBE_CONFIG
46 | Kubernetes Config File
47 | --warning-pending WARNING_PENDING
48 | --critical-pending CRITICAL_PENDING
49 | --warning-running WARNING_RUNNING
50 | --critical-running CRITICAL_RUNNING
51 | --warning-succeeded WARNING_SUCCEEDED
52 | --critical-succeeded CRITICAL_SUCCEEDED
53 | --warning-failed WARNING_FAILED
54 | --critical-failed CRITICAL_FAILED
55 | --warning-unknown WARNING_UNKNOWN
56 | --critical-unknown CRITICAL_UNKNOWN
57 | ```
58 |
--------------------------------------------------------------------------------
/check_nodes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Check for Kubernetes Nodes
5 | """
6 |
7 | import argparse
8 |
9 | from kubernetes import config, client
10 | import nagiosplugin
11 |
12 | from version import __version__ as version
13 |
14 | class Nodes(nagiosplugin.Resource):
15 | """
16 | Check for Kubernetes Nodes
17 | """
18 |
19 | def __init__(self, kube_config):
20 | self.kube_config = kube_config
21 | self.nodes = []
22 | self.nodes_with_problems = []
23 |
24 | def probe(self):
25 | config.load_kube_config(self.kube_config)
26 | kube = client.CoreV1Api()
27 |
28 | for node in kube.list_node().items:
29 | self.nodes.append(node)
30 | for condition in node.status.conditions:
31 | # OutOfDisk is not postet in k8s > 1.12, but is still listet in node status contitions,
32 | # see https://github.com/kubernetes/kubernetes/pull/72507
33 | if condition.type == "OutOfDisk":
34 | continue
35 |
36 | if (condition.type == 'Ready' and condition.status != 'True') \
37 | or (condition.type != 'Ready' and condition.status != 'False'):
38 | self.nodes_with_problems.append(node)
39 | break
40 |
41 | return [
42 | nagiosplugin.Metric('problem_nodes', len(self.nodes_with_problems), min=0),
43 | nagiosplugin.Metric('all_nodes', len(self.nodes), min=0),
44 | ]
45 |
46 |
47 | @nagiosplugin.guarded
48 | def main():
49 | """
50 | :return:
51 | """
52 | argp = argparse.ArgumentParser(description='Nagios/Icinga check for Kubernetes Nodes')
53 | argp.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
54 | argp.add_argument('--kube-config', help='Kubernetes Config File')
55 | args = argp.parse_args()
56 |
57 | check = nagiosplugin.Check(
58 | Nodes(args.kube_config),
59 | nagiosplugin.ScalarContext('problem_nodes', 1, 2),
60 | nagiosplugin.ScalarContext('all_nodes')
61 | )
62 | check.main()
63 |
64 |
65 | if __name__ == '__main__':
66 | main()
67 |
--------------------------------------------------------------------------------
/check_pods.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Check for Kubernetes Pods
5 | """
6 |
7 | import argparse
8 |
9 | from kubernetes import config, client
10 | import nagiosplugin
11 |
12 | from version import __version__ as version
13 |
14 |
15 | class Pods(nagiosplugin.Resource):
16 | """
17 | Check for Kubernetes Pods
18 | """
19 | phases = [
20 | 'Pending',
21 | 'Running',
22 | 'Succeeded',
23 | 'Failed',
24 | 'Unknown'
25 | ]
26 |
27 | def __init__(self, kube_config=None):
28 | self.kube_config = kube_config
29 | self.pods = []
30 | self.counts = {}
31 | for phase in self.phases:
32 | self.counts[phase] = 0
33 |
34 | def probe(self):
35 | config.load_kube_config(self.kube_config)
36 | kube = client.CoreV1Api()
37 | self.pods = kube.list_pod_for_all_namespaces().items
38 | for pod in self.pods:
39 | self.counts[pod.status.phase] += 1
40 |
41 | metrics = []
42 | for phase in self.counts:
43 | metrics.append(nagiosplugin.Metric(phase, self.counts[phase], min=0))
44 | return metrics
45 |
46 |
47 | class PodsSummary(nagiosplugin.Summary):
48 | """
49 | Check for Kubernetes Pods Summary
50 | """
51 |
52 | def ok(self, results):
53 | ret_str = []
54 | for phase in Pods.phases:
55 | ret_str.append("%s Pods %s" % (str(results[phase].metric), phase))
56 | return ', '.join(ret_str)
57 |
58 |
59 | @nagiosplugin.guarded
60 | def main():
61 | """
62 | :return:
63 | """
64 | argp = argparse.ArgumentParser(description='Nagios/Icinga check for Kubernetes Pods')
65 | argp.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
66 | argp.add_argument('--kube-config', help='Kubernetes Config File')
67 |
68 | for phase in Pods.phases:
69 | argp.add_argument('--warning-' + phase.lower())
70 | argp.add_argument('--critical-' + phase.lower())
71 |
72 | args = argp.parse_args()
73 |
74 | checks = [Pods(args.kube_config)]
75 | for phase in Pods.phases:
76 | checks.append(nagiosplugin.ScalarContext(phase,
77 | getattr(args, 'warning_' + phase.lower()),
78 | getattr(args, 'critical_' + phase.lower())))
79 |
80 | checks.append(PodsSummary())
81 |
82 | check = nagiosplugin.Check(*checks)
83 | check.main()
84 |
85 |
86 | if __name__ == '__main__':
87 | main()
88 |
--------------------------------------------------------------------------------
/test_check_pods.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import mock
3 |
4 | import check_pods
5 |
6 | nodes_all_ok = [
7 | mock.Mock(
8 | status=mock.Mock(
9 | phase='Running'
10 | )
11 | ),
12 | mock.Mock(
13 | status=mock.Mock(
14 | phase='Running'
15 | )
16 | ),
17 | ]
18 |
19 | nodes_one_failed = [
20 | mock.Mock(
21 | status=mock.Mock(
22 | phase='Failed'
23 | )
24 | ),
25 | mock.Mock(
26 | status=mock.Mock(
27 | phase='Running'
28 | )
29 | ),
30 | ]
31 |
32 | class TestCheckNodes(unittest.TestCase):
33 |
34 | @mock.patch('check_nodes.nagiosplugin.Metric')
35 | @mock.patch('check_nodes.client.CoreV1Api')
36 | @mock.patch('check_nodes.config.load_kube_config')
37 | def test_pods_all_ok(self, mock_config, mock_client, mock_metric):
38 | mock_kube = mock.Mock()
39 |
40 | type(mock_kube.list_pod_for_all_namespaces.return_value).items = mock.PropertyMock(return_value=nodes_all_ok)
41 |
42 | mock_client.return_value = mock_kube
43 |
44 | cls = check_pods.Pods(kube_config='empty')
45 | cls.probe()
46 |
47 | mock_config.assert_called_with('empty')
48 | mock_kube.list_pod_for_all_namespaces.assert_called()
49 |
50 | mock_metric.assert_any_call('Pending', 0, min=0)
51 | mock_metric.assert_any_call('Running', 2, min=0)
52 | mock_metric.assert_any_call('Succeeded', 0, min=0)
53 | mock_metric.assert_any_call('Failed', 0, min=0)
54 | mock_metric.assert_any_call('Unknown', 0, min=0)
55 |
56 | @mock.patch('check_nodes.nagiosplugin.Metric')
57 | @mock.patch('check_nodes.client.CoreV1Api')
58 | @mock.patch('check_nodes.config.load_kube_config')
59 | def test_pods_one_failed(self, mock_config, mock_client, mock_metric):
60 | mock_kube = mock.Mock()
61 |
62 | type(mock_kube.list_pod_for_all_namespaces.return_value).items = mock.PropertyMock(return_value=nodes_one_failed)
63 |
64 | mock_client.return_value = mock_kube
65 |
66 | cls = check_pods.Pods(kube_config='empty')
67 | cls.probe()
68 |
69 | mock_config.assert_called_with('empty')
70 | mock_kube.list_pod_for_all_namespaces.assert_called()
71 |
72 | mock_metric.assert_any_call('Pending', 0, min=0)
73 | mock_metric.assert_any_call('Running', 1, min=0)
74 | mock_metric.assert_any_call('Succeeded', 0, min=0)
75 | mock_metric.assert_any_call('Failed', 1, min=0)
76 | mock_metric.assert_any_call('Unknown', 0, min=0)
77 |
78 |
79 | if __name__ == '__main__':
80 | unittest.main()
81 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Python package
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-16.04
8 | strategy:
9 | matrix:
10 | python-version: [2.7, 3.5, 3.6, 3.7, 3.8]
11 | steps:
12 | - uses: actions/checkout@v2
13 | - name: Set up Python ${{ matrix.python-version }}
14 | uses: actions/setup-python@v2
15 | with:
16 | python-version: ${{ matrix.python-version }}
17 | - name: Get the version
18 | id: vars
19 | run: echo ::set-output name=tag::$(echo ${GITHUB_REF##*/})
20 | - name: Install dependencies
21 | run: |
22 | pip install -U -r requirements-dev.txt
23 | - name: Test
24 | run: |
25 | echo "__version__ = '${{steps.vars.outputs.tag}}'" > ./version.py
26 | python ./test_check_nodes.py
27 | python ./test_check_pods.py
28 | pyinstaller check_pods.py --clean --onefile --hidden-import nagiosplugin.platform.posix
29 | pyinstaller check_nodes.py --clean --onefile --hidden-import nagiosplugin.platform.posix
30 | - name: Upload Artifacts
31 | if: ${{ matrix.python-version == '3.8' }}
32 | uses: actions/upload-artifact@v2
33 | with:
34 | name: binaries
35 | path: dist/*
36 | release:
37 | needs: build
38 | if: startsWith(github.ref, 'refs/tags/v')
39 | runs-on: ubuntu-latest
40 | steps:
41 | - name: Get the version
42 | id: vars
43 | run: echo ::set-output name=tag::$(echo ${GITHUB_REF##*/})
44 | - uses: actions/download-artifact@v2
45 | with:
46 | name: binaries
47 | path: dist/
48 | - name: ls
49 | run: |
50 | ls -la
51 | ls -la dist/
52 | - name: Create Release
53 | id: create_release
54 | uses: actions/create-release@v1
55 | env:
56 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57 | with:
58 | tag_name: ${{ steps.vars.outputs.tag }}
59 | release_name: Release ${{ steps.vars.outputs.tag }}
60 | draft: false
61 | prerelease: false
62 | - name: Upload Release Asset check_nodes
63 | uses: actions/upload-release-asset@v1
64 | env:
65 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
66 | with:
67 | upload_url: ${{ steps.create_release.outputs.upload_url }}
68 | asset_path: dist/check_nodes
69 | asset_name: check_nodes
70 | asset_content_type: application/octet-stream
71 | - name: Upload Release Asset check_pods
72 | uses: actions/upload-release-asset@v1
73 | env:
74 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
75 | with:
76 | upload_url: ${{ steps.create_release.outputs.upload_url }}
77 | asset_path: dist/check_pods
78 | asset_name: check_pods
79 | asset_content_type: application/octet-stream
80 |
--------------------------------------------------------------------------------
/test_check_nodes.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import mock
3 |
4 | import check_nodes
5 |
6 | nodes_all_ok = [
7 | mock.Mock(
8 | status=mock.Mock(
9 | conditions=[
10 | mock.Mock(type='Ready', status='True'),
11 | mock.Mock(type='DiskPressure', status='False'),
12 | ],
13 | )
14 | ),
15 | mock.Mock(
16 | status=mock.Mock(
17 | conditions=[
18 | mock.Mock(type='Ready', status='True'),
19 | mock.Mock(type='DiskPressure', status='False'),
20 | ],
21 | )
22 | ),
23 | ]
24 |
25 | nodes_one_problem = [
26 | mock.Mock(
27 | status=mock.Mock(
28 | conditions=[
29 | mock.Mock(type='Ready', status='True'),
30 | mock.Mock(type='DiskPressure', status='True'),
31 | ],
32 | )
33 | ),
34 | mock.Mock(
35 | status=mock.Mock(
36 | conditions=[
37 | mock.Mock(type='Ready', status='True'),
38 | mock.Mock(type='DiskPressure', status='False'),
39 | ],
40 | )
41 | ),
42 | ]
43 |
44 | nodes_two_problem = [
45 | mock.Mock(
46 | status=mock.Mock(
47 | conditions=[
48 | mock.Mock(type='Ready', status='True'),
49 | mock.Mock(type='DiskPressure', status='True'),
50 | mock.Mock(type='PIDPressure', status='True'),
51 | ],
52 | )
53 | ),
54 | mock.Mock(
55 | status=mock.Mock(
56 | conditions=[
57 | mock.Mock(type='Ready', status='True'),
58 | mock.Mock(type='DiskPressure', status='False'),
59 | mock.Mock(type='PIDPressure', status='False'),
60 | ],
61 | )
62 | ),
63 | ]
64 |
65 | class TestCheckNodes(unittest.TestCase):
66 |
67 | @mock.patch('check_nodes.nagiosplugin.Metric')
68 | @mock.patch('check_nodes.client.CoreV1Api')
69 | @mock.patch('check_nodes.config.load_kube_config')
70 | def test_node_all_ok(self, mock_config, mock_client, mock_metric):
71 | mock_kube = mock.Mock()
72 |
73 | type(mock_kube.list_node.return_value).items = mock.PropertyMock(return_value=nodes_all_ok)
74 |
75 | mock_client.return_value = mock_kube
76 |
77 | cls = check_nodes.Nodes(kube_config='empty')
78 | cls.probe()
79 |
80 | mock_config.assert_called_with('empty')
81 | mock_kube.list_node.assert_called()
82 |
83 | mock_metric.assert_any_call('problem_nodes', 0, min=0)
84 | mock_metric.assert_any_call('all_nodes', 2, min=0)
85 |
86 | @mock.patch('check_nodes.nagiosplugin.Metric')
87 | @mock.patch('check_nodes.client.CoreV1Api')
88 | @mock.patch('check_nodes.config.load_kube_config')
89 | def test_node_one_problem(self, mock_config, mock_client, mock_metric):
90 | mock_kube = mock.Mock()
91 |
92 | type(mock_kube.list_node.return_value).items = mock.PropertyMock(return_value=nodes_one_problem)
93 |
94 | mock_client.return_value = mock_kube
95 |
96 | cls = check_nodes.Nodes(kube_config='empty')
97 | cls.probe()
98 |
99 | mock_config.assert_called_with('empty')
100 | mock_kube.list_node.assert_called()
101 |
102 | mock_metric.assert_any_call('problem_nodes', 1, min=0)
103 | mock_metric.assert_any_call('all_nodes', 2, min=0)
104 |
105 | @mock.patch('check_nodes.nagiosplugin.Metric')
106 | @mock.patch('check_nodes.client.CoreV1Api')
107 | @mock.patch('check_nodes.config.load_kube_config')
108 | def test_node_two_problem(self, mock_config, mock_client, mock_metric):
109 | mock_kube = mock.Mock()
110 |
111 | type(mock_kube.list_node.return_value).items = mock.PropertyMock(return_value=nodes_two_problem)
112 |
113 | mock_client.return_value = mock_kube
114 |
115 | cls = check_nodes.Nodes(kube_config='empty')
116 | cls.probe()
117 |
118 | mock_config.assert_called_with('empty')
119 | mock_kube.list_node.assert_called()
120 |
121 | mock_metric.assert_any_call('problem_nodes', 1, min=0)
122 | mock_metric.assert_any_call('all_nodes', 2, min=0)
123 |
124 |
125 | if __name__ == '__main__':
126 | unittest.main()
127 |
--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
1 | # Installation Instructions
2 | ## Pre-Requisites
3 | - Monitoring-User (nagios) with Home-Dir (/home/nagios) is setup
4 | - NRPE is working correctly
5 |
6 | ## Install from release
7 | ### download release versions (for release 1.2.1)
8 | ```bash
9 | [nagios@host ~]$ cd /home/nagios
10 | [nagios@host nagios]$ mkdir check_kubernetes
11 | [nagios@host nagios]$ cd check_kubernetes
12 | [nagios@host check_kubernetes]$ RELEASE_URL="https://github.com/T-Systems-MMS/check_kubernetes/releases/download/"
13 | [nagios@host check_kubernetes]$ VERSION="v1.2.1"
14 | [nagios@host check_kubernetes]$ wget ${RELEASE_URL}/${VERSION}/check_pods
15 | [nagios@host check_kubernetes]$ wget wget ${RELEASE_URL}/${VERSION}/check_nodes
16 | [nagios@host check_kubernetes]$ chmod 0750 check_pods check_nodes
17 | ```
18 |
19 | ## Install from Source
20 | ### Python environment
21 | The following steps have to be executed as Nagios/NRPE user (user who will run the checks).
22 |
23 | #### venv setup and clone
24 | ```bash
25 | [nagios@host ~]$ cd /home/nagios
26 | [nagios@host nagios]$ python3 -m venv k8s_mon_venv
27 | [nagios@host nagios]$ source k8s_mon_venv/bin/activate
28 | [nagios@host nagios]$ git clone https://github.com/T-Systems-MMS/check_kubernetes.git
29 | [nagios@host nagios]$ cd check_kubernetes
30 | [nagios@host check_kubernetes]$ pip install -r requirements.txt
31 | ```
32 |
33 | ## Kubernetes Service Account Setup
34 | All files shown here can be found in folder k8s-sa-config.
35 |
36 | ### Service Account - 00_service_account.yaml
37 | ```yaml
38 | apiVersion: v1
39 | kind: ServiceAccount
40 | metadata:
41 | name: icinga-monitoring-sa
42 | ```
43 |
44 | ### ClusterRole - 01_clusterrole.yaml
45 | ```yaml
46 | ---
47 | apiVersion: rbac.authorization.k8s.io/v1
48 | kind: ClusterRole
49 | metadata:
50 | name: icinga-monitoring
51 | rules:
52 | - apiGroups: [""]
53 | resources: ["pods", "nodes"]
54 | verbs: ["get", "watch", "list"]
55 | ```
56 |
57 | ### ClusterRoleBinding - 02_clusterrolebinding.yaml
58 | ```yaml
59 | ---
60 | apiVersion: rbac.authorization.k8s.io/v1
61 | kind: ClusterRoleBinding
62 | metadata:
63 | name: icinga-monitor-pods
64 | subjects:
65 | - kind: ServiceAccount
66 | name: icinga-monitoring-sa
67 | namespace: default
68 | roleRef:
69 | kind: ClusterRole
70 | name: icinga-monitoring
71 | apiGroup: rbac.authorization.k8s.io
72 | ```
73 |
74 | ## Kubernetes - Get kube-config for service Account
75 | ### get_config.sh
76 | We assume here that the script is executed on a master node. If this is not the case you
77 | must change `API_SERVER` here.
78 | If you've used another service account name also change `SERVICEACCOUNT_NAME` to reflect the change.
79 |
80 | ```bash
81 | #!/bin/bash
82 |
83 | API_SERVER="https://localhost:6443"
84 | SERVICEACCOUNT_NAME=$(kubectl get sa | grep icinga | awk '{ print $1 }')
85 | SECRET_NAME=$(kubectl get secrets | grep "${SERVICEACCOUNT_NAME}-token" | awk '{ print $1 }')
86 |
87 | if [[ ${SERVICEACCOUNT_NAME} == "" ]]; then
88 | >&2 echo "Service account not found!"
89 | exit 1
90 | else
91 | >&2 echo "Found icinga Service Account: ${SECRET_NAME}"
92 | fi
93 |
94 | CA_CERT=$(kubectl get secret/"${SECRET_NAME}" -o jsonpath='{.data.ca\.crt}')
95 | SA_TOKEN=$(kubectl get secret/"${SECRET_NAME}" -o jsonpath='{.data.token}' | base64 --decode)
96 | NS=$(kubectl get secret/"${SECRET_NAME}" -o jsonpath='{.data.namespace}' | base64 --decode)
97 |
98 | echo "
99 | apiVersion: v1
100 | kind: Config
101 | clusters:
102 | - name: default-cluster
103 | cluster:
104 | certificate-authority-data: ${CA_CERT}
105 | server: ${API_SERVER}
106 | contexts:
107 | - name: default-context
108 | context:
109 | cluster: default-cluster
110 | namespace: ${NS}
111 | user: default-user
112 | current-context: default-context
113 | users:
114 | - name: default-user
115 | user:
116 | token: ${SA_TOKEN}
117 | "
118 | ```
119 |
120 | To generate the kube-config for the service account just call it and redirect the output to a location that is
121 | accessible for nagios/nrpe.
122 | The user running the script must have the kubernetes connection and privileges to run kubectl commands
123 | on cluster level ex. root
124 |
125 | ```bash
126 | [root@host ~]# chmod u+x get_config.sh
127 | [root@host ~]# ./get_config.sh > /home/nagios/kube-config
128 | [root@host ~]# chown nagios.nagios /home/nagios/kube-config
129 | [root@host ~]# chmod 0600 /home/nagios/kube-config
130 | ```
131 |
132 | ## Testing the newly created service account
133 | You can test service account configuration by running the check manually.
134 |
135 | ```bash
136 | [nagios@host ~]$ cd /home/nagios/
137 | [nagios@host nagios]$ ls
138 | bin check_kubernetes k8s_mon_venv kube-config
139 | [nagios@host nagios]$ cd check_kubernetes/
140 | [nagios@host check_kubernetes]$ ./check_nodes --kube-config ../kube-config
141 | NODES OK - problem_nodes is 0 | all_nodes=5;;;0 problem_nodes=0;1;2;0
142 | [nagios@host check_kubernetes]$ ./check_pods --kube-config ../kube-config
143 | PODS OK - 0 Pods Pending, 28 Pods Running, 0 Pods Succeeded, 0 Pods Failed, 0 Pods Unknown | Failed=0;;;0 Pending=0;;;0 Running=28;;;0 Succeeded=0;;;0 Unknown=0;;;0
144 | [nagios@host check_kubernetes]$
145 | ```
146 |
147 | # Troubleshooting
148 | ## Error: Hostname doesn't match
149 | If you get an exception like this:
150 | ```
151 | NODES UNKNOWN: urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='localhost', port=6443): Max retries exceeded with url: /api/v1/nodes (Caused by SSLError(SSLCertVerificationError("hostname 'localhost' doesn't match either of '', 'kubernetes', 'kubernetes.default', 'kubernetes.default.svc', 'kubernetes.default.svc.cluster.local', '', ''")))
152 | ```
153 | check the "server" line under "cluster" ``kube-config`` and replace localhost with one of names in the error message.
154 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------