├── src ├── kubelet_stats_exporter │ ├── __init__.py │ ├── config.py │ ├── logging.py │ ├── exporter.py │ └── collector.py └── main.py ├── .dockerignore ├── requirements.txt ├── extra ├── servicemonitor.yaml ├── example_metrics ├── grafana_dashboard_node.json └── grafana_dashboard_namespace.json ├── k8s-resources ├── service.yaml ├── ns-rbac.yaml └── deploy.yaml ├── Dockerfile ├── .github └── workflows │ ├── pylint.yml │ └── docker.yml ├── LICENSE.md ├── .gitignore └── README.md /src/kubelet_stats_exporter/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | /extra/* 2 | /.github/* 3 | /k8s-resources/* 4 | /kube-prometheus/* 5 | Dockerfile 6 | .DS_Store 7 | .git 8 | .gitignore 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.0.2 2 | kubernetes==17.17.0 3 | prometheus_client==0.11.0 4 | python-json-logger==2.0.2 5 | requests==2.26.0 6 | -------------------------------------------------------------------------------- /src/kubelet_stats_exporter/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # application 4 | EXPORTER_PORT = int(os.getenv('EXPORTER_PORT', "9118")) 5 | 6 | # exporter 7 | KUBERNETES_API_TIMEOUT = int(os.getenv("KUBERNETES_API_TIMEOUT", "5")) 8 | 9 | # logging 10 | JSON_LOGGER = os.getenv('JSON_LOGGER', "false" ) 11 | LOG_LEVEL = os.getenv('LOG_LEVEL', "INFO" ) 12 | -------------------------------------------------------------------------------- /extra/servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: kubelet-stats-exporter 5 | spec: 6 | endpoints: 7 | - interval: 30s 8 | port: metrics 9 | namespaceSelector: 10 | matchNames: 11 | - monitoring 12 | selector: 13 | matchLabels: 14 | name: kubelet-stats-exporter -------------------------------------------------------------------------------- /k8s-resources/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: kubelet-stats-exporter 5 | labels: 6 | name: kubelet-stats-exporter # ServiceMonitor reqires a labelon the service 7 | spec: 8 | selector: 9 | name: kubelet-stats-exporter 10 | ports: 11 | - name: metrics 12 | port: 9113 13 | protocol: TCP 14 | targetPort: metrics 15 | type: ClusterIP 16 | 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | ARG UID=1000 4 | ARG GID=1000 5 | 6 | WORKDIR /app 7 | 8 | COPY requirements.txt src ./ 9 | 10 | RUN pip install -r requirements.txt 11 | 12 | RUN groupadd --system -g 1001 kubeletstats && \ 13 | useradd --system --gid kubeletstats --no-create-home \ 14 | --home-dir /app --shell /usr/sbin/nologin \ 15 | --uid 1001 kubeletstats 16 | 17 | RUN chown -R kubeletstats:kubeletstats /app 18 | 19 | USER 1001:1001 20 | 21 | ENV PYTHONUNBUFFERED=1 22 | 23 | EXPOSE 9118 24 | 25 | ENTRYPOINT ["python", "-u", "/app/main.py"] 26 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from kubelet_stats_exporter.config import EXPORTER_PORT 3 | from kubelet_stats_exporter.exporter import bp 4 | from kubelet_stats_exporter.logging import logger 5 | 6 | def main(): 7 | """Main Function 8 | Flask Application handling 9 | """ 10 | logger.info(f"Starting Kubelet Stats Exporter App in port: {EXPORTER_PORT}") 11 | application = Flask(__name__) 12 | application.register_blueprint(bp) 13 | application.run(host='0.0.0.0', port=EXPORTER_PORT) 14 | 15 | if __name__ == "__main__": 16 | main() 17 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 3.9 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: 3.9 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install pylint 20 | pip install -r requirements.txt 21 | - name: Analysing the code with pylint 22 | run: | 23 | pylint src 24 | -------------------------------------------------------------------------------- /k8s-resources/ns-rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: kubelet-stats-exporter 5 | --- 6 | apiVersion: rbac.authorization.k8s.io/v1 7 | kind: ClusterRole 8 | metadata: 9 | name: kubelet-stats-exporter 10 | rules: 11 | - apiGroups: 12 | - "" 13 | resources: 14 | - nodes/metrics 15 | - nodes/stats 16 | - nodes/proxy 17 | - nodes # Required when running as a Deployment, to query API for list of nodes 18 | verbs: 19 | - get 20 | - list # Required when running as Deployment 21 | --- 22 | apiVersion: rbac.authorization.k8s.io/v1 23 | kind: ClusterRoleBinding 24 | metadata: 25 | name: kubelet-stats-exporter 26 | roleRef: 27 | apiGroup: rbac.authorization.k8s.io 28 | kind: ClusterRole 29 | name: kubelet-stats-exporter 30 | subjects: 31 | - kind: ServiceAccount 32 | name: kubelet-stats-exporter 33 | namespace: default 34 | --- 35 | apiVersion: v1 36 | kind: ServiceAccount 37 | metadata: 38 | name: kubelet-stats-exporter 39 | -------------------------------------------------------------------------------- /src/kubelet_stats_exporter/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pythonjsonlogger import jsonlogger 3 | from kubelet_stats_exporter.config import JSON_LOGGER, LOG_LEVEL 4 | 5 | # create filter for all requests 6 | class NoRequests(logging.Filter): 7 | '''Logging Filter class that filters all werkzeug requests 8 | ''' 9 | def filter(self, record): 10 | return '/' not in record.getMessage() 11 | 12 | # set root logger 13 | level = logging.getLevelName(LOG_LEVEL) 14 | if JSON_LOGGER == "true": 15 | logger = logging.getLogger() 16 | logHandler = logging.StreamHandler() 17 | formatter = jsonlogger.JsonFormatter('%(asctime)s - %(levelname)s - %(message)s') 18 | logHandler.setFormatter(formatter) 19 | logger.addHandler(logHandler) 20 | logger.setLevel(level) 21 | else: 22 | logging.basicConfig(level=level, format='%(asctime)s - %(levelname)s - %(message)s') 23 | logger = logging.getLogger() 24 | 25 | # Set werkzeug logger filter for requests when DEBUG Logging Level is not set. 26 | if LOG_LEVEL != "DEBUG": 27 | logging.getLogger("werkzeug").addFilter(NoRequests()) 28 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 petros-d 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /k8s-resources/deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: kubelet-stats-exporter 5 | spec: 6 | replicas: 1 7 | revisionHistoryLimit: 10 8 | selector: 9 | matchLabels: 10 | name: kubelet-stats-exporter 11 | template: 12 | metadata: 13 | labels: 14 | name: kubelet-stats-exporter 15 | spec: 16 | containers: 17 | - name: kubelet-stats-exporter 18 | image: petrosd/kubelet-stats-exporter:latest 19 | # imagePullPolicy: Never 20 | ports: 21 | - name: metrics 22 | containerPort: 9118 23 | # env: 24 | # - name: EXPORTER_PORT 25 | # value: "9118" 26 | # - name: JSON_LOGGER 27 | # value: "true" 28 | # - name: LOG_LEVEL 29 | # value: "INFO" 30 | # - name: SCRAPE_TIMEOUT 31 | # value: "30.0" 32 | # Increase these resource values for larger clusters 33 | resources: 34 | limits: 35 | cpu: 50m 36 | memory: 150Mi 37 | requests: 38 | cpu: 10m 39 | memory: 50Mi 40 | serviceAccountName: kubelet-stats-exporter 41 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Docker Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | 16 | - name: Docker meta 17 | id: meta 18 | uses: docker/metadata-action@v3 19 | with: 20 | # list of Docker images to use as base name for tags 21 | images: | 22 | petrosd/kubelet-stats-exporter 23 | # generate Docker tags based on the following events/attributes 24 | tags: | 25 | type=ref,event=tag 26 | 27 | - name: Set up QEMU 28 | uses: docker/setup-qemu-action@v1 29 | 30 | - name: Set up Docker Buildx 31 | uses: docker/setup-buildx-action@v1 32 | 33 | - name: Login to DockerHub 34 | uses: docker/login-action@v1 35 | with: 36 | username: ${{ secrets.DOCKERHUB_USERNAME }} 37 | password: ${{ secrets.DOCKERHUB_TOKEN }} 38 | 39 | - name: Build and push 40 | id: docker_build 41 | uses: docker/build-push-action@v2 42 | with: 43 | push: true 44 | tags: ${{ steps.meta.outputs.tags }} 45 | -------------------------------------------------------------------------------- /extra/example_metrics: -------------------------------------------------------------------------------- 1 | # HELP kube_pod_ephemeral_storage_used_bytes Kubernetes Pod ephemeral storage used in bytes 2 | # TYPE kube_pod_ephemeral_storage_used_bytes gauge 3 | kube_pod_ephemeral_storage_used_bytes{namespace="monitoring",node="docker-desktop",pod="alertmanager-main-2"} 0.0 4 | kube_pod_ephemeral_storage_used_bytes{namespace="kube-system",node="docker-desktop",pod="vpnkit-controller"} 0.0 5 | kube_pod_ephemeral_storage_used_bytes{namespace="monitoring",node="docker-desktop",pod="grafana-864f656f6f-8hk6n"} 0.0 6 | kube_pod_ephemeral_storage_used_bytes{namespace="kube-system",node="docker-desktop",pod="etcd-docker-desktop"} 0.0 7 | kube_pod_ephemeral_storage_used_bytes{namespace="monitoring",node="docker-desktop",pod="alertmanager-main-1"} 0.0 8 | kube_pod_ephemeral_storage_used_bytes{namespace="kube-system",node="docker-desktop",pod="kube-scheduler-docker-desktop"} 0.0 9 | kube_pod_ephemeral_storage_used_bytes{namespace="monitoring",node="docker-desktop",pod="node-exporter-6ntxt"} 0.0 10 | kube_pod_ephemeral_storage_used_bytes{namespace="monitoring",node="docker-desktop",pod="kube-state-metrics-65769f95d8-6gntr"} 0.0 11 | kube_pod_ephemeral_storage_used_bytes{namespace="monitoring",node="docker-desktop",pod="prometheus-adapter-5b8db7955f-xs8qx"} 0.0 -------------------------------------------------------------------------------- /src/kubelet_stats_exporter/exporter.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, Response, abort 2 | from prometheus_client import CONTENT_TYPE_LATEST, CollectorRegistry, generate_latest 3 | from kubelet_stats_exporter.collector import KubeletCollector 4 | from kubelet_stats_exporter.logging import logger 5 | 6 | bp = Blueprint('exporter', __name__) 7 | 8 | def register_metrics_collector(registry): 9 | """Registers the main collector in the registry 10 | Parameters 11 | ---------- 12 | registry: object 13 | Prometheus Exporter Collector Registry Object 14 | """ 15 | collector = KubeletCollector() 16 | registry.register(collector) 17 | 18 | # Application Paths 19 | @bp.route("/health") 20 | def health(): 21 | ''' 22 | Health Endpoint 23 | ''' 24 | return 'ok' 25 | 26 | @bp.route("/metrics") 27 | def metrics(): 28 | ''' 29 | Metrics endpoint for prometheus scraping 30 | ''' 31 | registry = CollectorRegistry() 32 | register_metrics_collector(registry) 33 | try: 34 | content = generate_latest(registry) 35 | logger.debug("Metrics payload content generated") 36 | return content, 200, {'Content-Type': CONTENT_TYPE_LATEST} 37 | except Exception as err: 38 | logger.error(f"Scrape Failed - {str(err)}") 39 | abort(Response(f"Scrape failed: {str(err)}", status=502)) 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | ./kube-prometheus -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kubelet-stats-exporter 2 | This is a Prometheus exporter to expose a Pods ephemeral storage usage stats from the Kubelets 3 | 4 | The Kubernetes Kubelet has some metrics on ephemeral storage usage that are not currently exposed elsewhere. It may be useful to present these in a format that can be collected by Prometheus. Note the kubelet of Docker Desktop does not expose these metrics, so it is not a useful environment for testing. 5 | 6 | It accesses the Kubelet on each node, via the Kubernetes API proxy. Authentication is done to the kubelet using a service account that is configured in `k8s-resources/ns-rbac.yaml`. 7 | 8 | ## Background reading: 9 | 10 | Although these metrics are exposed by the kubelet API, they are not exposed on the kubelets `/metrics` endpoint, nor are they currently collected or exported with kube-state-metrics. Some issues with similar requests can be seen here: 11 | 12 | https://github.com/kubernetes/kube-state-metrics/issues/1046 (https://github.com/kubernetes/kube-state-metrics/issues/1046#issuecomment-640161305) 13 | 14 | https://github.com/kubernetes/kubernetes/issues/69507 15 | 16 | ## Requirements 17 | The following `pip` libraries are required: 18 | - flask 19 | - kubernetes 20 | - prometheus_client 21 | - python-json-logger 22 | - requests 23 | ## Configuration 24 | 25 | The following environment variables are useful to configure the application: 26 | 27 | |Name|Type|Default|Description| 28 | |--|--|--|--| 29 | |EXPORTER_PORT|int|9118|Port exposing prometheus metrics| 30 | |JSON_LOGGER|bool|true|Application logs on JSON Format for easier centralization using log shippers such as FluentBit| 31 | |LOG_LEVEL|str|INFO|Logging Level. Werkzeug requests only logged on DEBUG level for log centralization costs saving purposes| 32 | |KUBERNETES_API_TIMEOUT|int|5|Kubernetes API Scrape timeout. **Make sure `KUBERNETES_API_TIMEOUT` is smaller than Prometheus Scrape timeout**.| 33 | 34 | ## Exporter Endpoints 35 | 36 | - `/health` - Application Status for Kubernetes readiness & liveness probes check. 37 | - `/metrics` - Prometheus metrics endpoint. 38 | 39 | ## Deployment 40 | ### Kubernetes Manifests 41 | 42 | Apply Kubernetes resources: `kubectl apply -f ./k8s-resources ` 43 | 44 | If you're using the PrometheusOperator, and want to configure a ServiceMonitor, apply the `extra/servicemonitor.yaml` file. 45 | 46 | Example dashboards for Grafana can be seen in the `extra` folder. 47 | 48 | ### Helm Chart 49 | 50 | You could use [kubelet-stats-exporter chart](https://github.com/sequra/helm-charts/tree/master/charts/prometheus-kubelet-stats-exporter) to deploy `kubelet-stats-exporter` using helm. 51 | 52 | ``` 53 | helm repo add sequra-community https://sequra.github.io/helm-charts 54 | helm repo update 55 | helm install [RELEASE_NAME] sequra-community/prometheus-kubelet-stats-exporter 56 | ``` 57 | -------------------------------------------------------------------------------- /extra/grafana_dashboard_node.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "target": { 12 | "limit": 100, 13 | "matchAny": false, 14 | "tags": [], 15 | "type": "dashboard" 16 | }, 17 | "type": "dashboard" 18 | } 19 | ] 20 | }, 21 | "editable": true, 22 | "gnetId": null, 23 | "graphTooltip": 0, 24 | "id": 25, 25 | "iteration": 1633877508594, 26 | "links": [], 27 | "panels": [ 28 | { 29 | "datasource": "prometheus", 30 | "fieldConfig": { 31 | "defaults": { 32 | "color": { 33 | "mode": "palette-classic" 34 | }, 35 | "custom": { 36 | "axisLabel": "", 37 | "axisPlacement": "auto", 38 | "barAlignment": 0, 39 | "drawStyle": "line", 40 | "fillOpacity": 0, 41 | "gradientMode": "none", 42 | "hideFrom": { 43 | "legend": false, 44 | "tooltip": false, 45 | "viz": false 46 | }, 47 | "lineInterpolation": "linear", 48 | "lineWidth": 1, 49 | "pointSize": 5, 50 | "scaleDistribution": { 51 | "type": "linear" 52 | }, 53 | "showPoints": "auto", 54 | "spanNulls": false, 55 | "stacking": { 56 | "group": "A", 57 | "mode": "none" 58 | }, 59 | "thresholdsStyle": { 60 | "mode": "off" 61 | } 62 | }, 63 | "mappings": [], 64 | "thresholds": { 65 | "mode": "absolute", 66 | "steps": [ 67 | { 68 | "color": "green", 69 | "value": null 70 | }, 71 | { 72 | "color": "red", 73 | "value": 80 74 | } 75 | ] 76 | } 77 | }, 78 | "overrides": [] 79 | }, 80 | "gridPos": { 81 | "h": 13, 82 | "w": 24, 83 | "x": 0, 84 | "y": 0 85 | }, 86 | "id": 2, 87 | "options": { 88 | "legend": { 89 | "calcs": [], 90 | "displayMode": "list", 91 | "placement": "bottom" 92 | }, 93 | "tooltip": { 94 | "mode": "single" 95 | } 96 | }, 97 | "targets": [ 98 | { 99 | "exemplar": true, 100 | "expr": "sum(kube_pod_ephemeral_storage_used_bytes{node=\"$node\"}) by (exported_pod)", 101 | "interval": "", 102 | "legendFormat": "{{exported_pod}}", 103 | "refId": "A" 104 | } 105 | ], 106 | "title": "Ephemeral Storage", 107 | "type": "timeseries" 108 | } 109 | ], 110 | "schemaVersion": 30, 111 | "style": "dark", 112 | "tags": [], 113 | "templating": { 114 | "list": [ 115 | { 116 | "allValue": null, 117 | "current": { 118 | "selected": true, 119 | "text": "docker-desktop", 120 | "value": "docker-desktop" 121 | }, 122 | "datasource": "prometheus", 123 | "definition": "label_values(kube_pod_ephemeral_storage_used_bytes, node)", 124 | "description": null, 125 | "error": null, 126 | "hide": 0, 127 | "includeAll": false, 128 | "label": null, 129 | "multi": false, 130 | "name": "node", 131 | "options": [], 132 | "query": { 133 | "query": "label_values(kube_pod_ephemeral_storage_used_bytes, node)", 134 | "refId": "StandardVariableQuery" 135 | }, 136 | "refresh": 1, 137 | "regex": "", 138 | "skipUrlSync": false, 139 | "sort": 0, 140 | "type": "query" 141 | } 142 | ] 143 | }, 144 | "time": { 145 | "from": "now-6h", 146 | "to": "now" 147 | }, 148 | "timepicker": {}, 149 | "timezone": "", 150 | "title": "Pod Ephemeral Storage (Node)", 151 | "uid": "triwsyv7k", 152 | "version": 1 153 | } -------------------------------------------------------------------------------- /extra/grafana_dashboard_namespace.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "target": { 12 | "limit": 100, 13 | "matchAny": false, 14 | "tags": [], 15 | "type": "dashboard" 16 | }, 17 | "type": "dashboard" 18 | } 19 | ] 20 | }, 21 | "editable": true, 22 | "gnetId": null, 23 | "graphTooltip": 0, 24 | "id": 25, 25 | "iteration": 1633877508594, 26 | "links": [], 27 | "panels": [ 28 | { 29 | "datasource": "prometheus", 30 | "fieldConfig": { 31 | "defaults": { 32 | "color": { 33 | "mode": "palette-classic" 34 | }, 35 | "custom": { 36 | "axisLabel": "", 37 | "axisPlacement": "auto", 38 | "barAlignment": 0, 39 | "drawStyle": "line", 40 | "fillOpacity": 0, 41 | "gradientMode": "none", 42 | "hideFrom": { 43 | "legend": false, 44 | "tooltip": false, 45 | "viz": false 46 | }, 47 | "lineInterpolation": "linear", 48 | "lineWidth": 1, 49 | "pointSize": 5, 50 | "scaleDistribution": { 51 | "type": "linear" 52 | }, 53 | "showPoints": "auto", 54 | "spanNulls": false, 55 | "stacking": { 56 | "group": "A", 57 | "mode": "none" 58 | }, 59 | "thresholdsStyle": { 60 | "mode": "off" 61 | } 62 | }, 63 | "mappings": [], 64 | "thresholds": { 65 | "mode": "absolute", 66 | "steps": [ 67 | { 68 | "color": "green", 69 | "value": null 70 | }, 71 | { 72 | "color": "red", 73 | "value": 80 74 | } 75 | ] 76 | } 77 | }, 78 | "overrides": [] 79 | }, 80 | "gridPos": { 81 | "h": 13, 82 | "w": 24, 83 | "x": 0, 84 | "y": 0 85 | }, 86 | "id": 2, 87 | "options": { 88 | "legend": { 89 | "calcs": [], 90 | "displayMode": "list", 91 | "placement": "bottom" 92 | }, 93 | "tooltip": { 94 | "mode": "single" 95 | } 96 | }, 97 | "targets": [ 98 | { 99 | "exemplar": true, 100 | "expr": "sum(kube_pod_ephemeral_storage_used_bytes{exported_namespace=\"$namespace\"}) by (exported_pod)", 101 | "interval": "", 102 | "legendFormat": "{{exported_pod}}", 103 | "refId": "A" 104 | } 105 | ], 106 | "title": "Ephemeral Storage", 107 | "type": "timeseries" 108 | } 109 | ], 110 | "schemaVersion": 30, 111 | "style": "dark", 112 | "tags": [], 113 | "templating": { 114 | "list": [ 115 | { 116 | "allValue": null, 117 | "current": { 118 | "selected": true, 119 | "text": "monitoring", 120 | "value": "monitoring" 121 | }, 122 | "datasource": "prometheus", 123 | "definition": "label_values(kube_pod_ephemeral_storage_used_bytes, exported_namespace)", 124 | "description": null, 125 | "error": null, 126 | "hide": 0, 127 | "includeAll": false, 128 | "label": null, 129 | "multi": false, 130 | "name": "namespace", 131 | "options": [], 132 | "query": { 133 | "query": "label_values(kube_pod_ephemeral_storage_used_bytes, exported_namespace)", 134 | "refId": "StandardVariableQuery" 135 | }, 136 | "refresh": 1, 137 | "regex": "", 138 | "skipUrlSync": false, 139 | "sort": 0, 140 | "type": "query" 141 | } 142 | ] 143 | }, 144 | "time": { 145 | "from": "now-6h", 146 | "to": "now" 147 | }, 148 | "timepicker": {}, 149 | "timezone": "", 150 | "title": "Pod Ephemeral Storage (Namespace)", 151 | "uid": "triwsyv7k", 152 | "version": 1 153 | } -------------------------------------------------------------------------------- /src/kubelet_stats_exporter/collector.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import random 3 | from concurrent.futures import ThreadPoolExecutor 4 | from kubernetes import client, config 5 | from prometheus_client.core import GaugeMetricFamily 6 | from kubelet_stats_exporter.config import KUBERNETES_API_TIMEOUT 7 | from kubelet_stats_exporter.logging import logger 8 | 9 | # Custom Collector 10 | class KubeletCollector(): 11 | """ 12 | Custom Collector Class 13 | Collects Kubelet Metrics from Kubernetes nodes 14 | """ 15 | def __init__(self): 16 | 17 | # k8s config 18 | config.load_incluster_config() 19 | self.k8s_client = client.CoreV1Api() 20 | self.ca_file = '/run/secrets/kubernetes.io/serviceaccount/ca.crt' 21 | self.k8s_token = open('/run/secrets/kubernetes.io/serviceaccount/token', "r").read() 22 | self.auth_headers = { 'Authorization': 'Bearer ' + str(self.k8s_token) } 23 | self.timeout = KUBERNETES_API_TIMEOUT 24 | 25 | # multithread config 26 | self.futures = [] 27 | 28 | # Prometheus metrics to collect 29 | self.metric = GaugeMetricFamily( 30 | 'kube_pod_ephemeral_storage_used_bytes', 31 | 'Kubernetes Pod ephemeral storage used in bytes', 32 | labels=['node','namespace','pod']) 33 | 34 | def collect(self): 35 | """ 36 | Main Function for Custom Collector 37 | Get the list of nodes and iterate 38 | """ 39 | logger.debug(f"Retrieving list of nodes") 40 | nodes = self.k8s_client.list_node() 41 | 42 | # Multithread executor 43 | executor = ThreadPoolExecutor(max_workers=len(nodes.items)) 44 | 45 | # Send nodes to executors 46 | for node in nodes.items: 47 | self.futures.append(executor.submit(self.scrape_node_metrics, node=node)) 48 | 49 | for future in self.futures: 50 | future.result() 51 | 52 | logger.debug(f"Save metric content.") 53 | yield self.metric 54 | 55 | def get_node_info(self, node_name): 56 | """Retrieves node information 57 | Parameters 58 | ---------- 59 | node_name: string 60 | Name of the node to retrieve the information from 61 | Returns 62 | ------- 63 | response - Mapping 64 | Response returned from node /proxy/stats/summary endpoint. 65 | None - NoneType 66 | Returned when exception is raised requesting to node /proxy/stats/summary endpoint. 67 | """ 68 | logger.debug(f"Collecting metrics from node {node_name}") 69 | try: 70 | response = requests.get( 71 | f"https://kubernetes.default.svc/api/v1/nodes/{node_name}/proxy/stats/summary", 72 | headers=self.auth_headers, verify=self.ca_file, timeout=self.timeout) 73 | logger.debug(f"Response received from kubernetes API for node {node_name}") 74 | except requests.ConnectTimeout: 75 | logger.warning(f"Connection timeout to Kubernetes API for node {node_name}") 76 | except Exception as err: 77 | logger.warning(f"Unable to request summary stats from node {node_name} - {str(err)}") 78 | return None 79 | return response.json() 80 | 81 | def get_pod_metrics(self, pod_id): 82 | """Retrieves metrics from pod 83 | Parameters 84 | ---------- 85 | pod_id: string 86 | Pod ID 87 | Returns 88 | ------- 89 | name: string 90 | Pod name 91 | namespace: string 92 | Pod namespace 93 | used_bytes: int 94 | Ephemeral Storage - Used bytes metric value 95 | """ 96 | logger.debug(f"Parsing info from pod: {pod_id}") 97 | name = pod_id['podRef']['name'] 98 | namespace = pod_id['podRef']['namespace'] 99 | try: 100 | used_bytes = pod_id['ephemeral-storage']['usedBytes'] 101 | except Exception as err: 102 | used_bytes = 0 103 | logger.warning(f"Unable to get usedBytes metrics for pod {name}, setting to 0 - {str(err)}") 104 | return name, namespace, used_bytes 105 | 106 | def scrape_node_metrics(self, node): 107 | """Scrapes information from nodes to create the metric to be exported 108 | Parameters 109 | ---------- 110 | node: object 111 | Kubernetes Node Information 112 | """ 113 | node_name = node.metadata.name 114 | logger.debug(f"Processing node {node_name}") 115 | # Check Node is in Ready status 116 | ready_status = [x for x in node.status.conditions if x.type == 'Ready'] 117 | if len(ready_status) > 0 and ready_status[0].status == 'True': 118 | logger.debug(f"Node name: {node_name}, status: {ready_status[0].status}") 119 | node_info = self.get_node_info(node_name) 120 | if node_info is not None and 'pods' in node_info: 121 | for pod in node_info['pods']: 122 | name, namespace, used_bytes = self.get_pod_metrics(pod) 123 | labels=[node_name,namespace,name] 124 | self.metric.add_metric(labels, used_bytes) 125 | else: 126 | logger.warning(f"Failed to fetch info from {node_name}") 127 | else: 128 | logger.warning(f"Node {node_name} is not in Ready status") 129 | logger.debug(f"Finished processing node {node_name}") 130 | --------------------------------------------------------------------------------