├── xzibit.jpg ├── traefik-servicemonitor.yaml ├── test-ingress.yaml ├── blackbox-exporter-values.yaml ├── traefik-prometheusrule.yaml ├── kube-prometheus-stack-values.yaml ├── README.md ├── traefik-dashboard.yaml └── blackbox-exporter-dashboard.yaml /xzibit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cablespaghetti/k3s-monitoring/HEAD/xzibit.jpg -------------------------------------------------------------------------------- /traefik-servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | app: traefik 6 | release: prometheus 7 | name: traefik 8 | spec: 9 | endpoints: 10 | - port: metrics 11 | namespaceSelector: 12 | matchNames: 13 | - kube-system 14 | selector: 15 | matchLabels: 16 | app: traefik 17 | 18 | -------------------------------------------------------------------------------- /test-ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: test-ingress 5 | labels: 6 | app: test-ingress 7 | spec: 8 | rules: 9 | - host: test.cablespaghetti.dev 10 | http: 11 | paths: 12 | - path: / 13 | backend: 14 | serviceName: test-ingress 15 | servicePort: http 16 | --- 17 | apiVersion: v1 18 | kind: Service 19 | metadata: 20 | name: test-ingress 21 | labels: 22 | app: test-ingress 23 | spec: 24 | ports: 25 | - port: 80 26 | targetPort: http 27 | protocol: TCP 28 | name: http 29 | selector: 30 | app: test-ingress 31 | --- 32 | apiVersion: apps/v1 33 | kind: Deployment 34 | metadata: 35 | name: test-ingress 36 | labels: 37 | app: test-ingress 38 | spec: 39 | replicas: 1 40 | selector: 41 | matchLabels: 42 | app: test-ingress 43 | template: 44 | metadata: 45 | labels: 46 | app: test-ingress 47 | spec: 48 | containers: 49 | - name: nginx 50 | image: nginx:latest 51 | ports: 52 | - containerPort: 80 53 | name: http 54 | -------------------------------------------------------------------------------- /blackbox-exporter-values.yaml: -------------------------------------------------------------------------------- 1 | config: 2 | modules: 3 | http_2xx: 4 | prober: http 5 | timeout: 30s 6 | http: 7 | valid_http_versions: ["HTTP/1.1", "HTTP/2"] 8 | no_follow_redirects: true 9 | preferred_ip_protocol: "ip4" 10 | 11 | serviceMonitor: 12 | enabled: true 13 | defaults: 14 | labels: 15 | release: prometheus 16 | interval: 30s 17 | scrapeTimeout: 30s 18 | targets: 19 | - url: https://test.cablespaghetti.dev 20 | name: test.cablespaghetti.dev 21 | 22 | prometheusRule: 23 | enabled: true 24 | additionalLabels: 25 | app: kube-prometheus-stack 26 | release: prometheus 27 | rules: 28 | - alert: UrlDown 29 | annotations: 30 | message: The status code of {{"{{ $labels.target }}"}} is 4xx or 5xx, or some other failure occurred such as a timeout (60 seconds) for the last 5 minutes. 31 | expr: probe_http_status_code >= 400 or probe_http_status_code == 0 32 | for: 2m 33 | labels: 34 | severity: critical 35 | - alert: SSLCertExpiry 36 | annotations: 37 | message: The SSL certificate for {{"{{ $labels.target }}"}} is going to expire in less than 2 weeks. 38 | expr: probe_ssl_earliest_cert_expiry - time() < 1209600 39 | for: 1m 40 | labels: 41 | severity: warning 42 | - alert: SSLCertExpiry 43 | annotations: 44 | message: The SSL certificate for {{"{{ $labels.target }}"}} is going to expire in less than 7 days. 45 | expr: probe_ssl_earliest_cert_expiry - time() < 604800 46 | for: 1m 47 | labels: 48 | severity: critical 49 | - alert: SlowResponseTime 50 | annotations: 51 | message: The response time for {{"{{ $labels.target }}"}} has been greater than 30 seconds for 5 minutes. 52 | expr: probe_duration_seconds > 30 53 | for: 5m 54 | labels: 55 | severity: warning 56 | -------------------------------------------------------------------------------- /traefik-prometheusrule.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | app: kube-prometheus-stack 6 | release: prometheus 7 | name: traefik 8 | spec: 9 | groups: 10 | - name: traefik 11 | rules: 12 | - alert: test.cablespaghetti.dev-IngressHigh5xxRate 13 | annotations: 14 | message: The percentage of 5xx errors for test.cablespaghetti.dev over the last 5 minutes is over 1%. 15 | expr: (sum(rate(traefik_backend_request_duration_seconds_count{backend="test.cablespaghetti.dev/",code=~"5[0-9]{2}"}[1m]))/sum(rate(traefik_backend_request_duration_seconds_count{backend="test.cablespaghetti.dev/"}[1m])))*100 > 1 16 | for: 5m 17 | labels: 18 | severity: warning 19 | - alert: test.cablespaghetti.dev-IngressHigh5xxRate 20 | annotations: 21 | message: The percentage of 5xx errors for test.cablespaghetti.dev over the last 1 minute is over 5%. 22 | expr: (sum(rate(traefik_backend_request_duration_seconds_count{backend="test.cablespaghetti.dev/",code=~"5[0-9]{2}"}[1m]))/sum(rate(traefik_backend_request_duration_seconds_count{backend="test.cablespaghetti.dev/"}[1m])))*100 > 5 23 | for: 1m 24 | labels: 25 | severity: critical 26 | 27 | - alert: test.cablespaghetti.dev-IngressHighLatency 28 | annotations: 29 | message: The average latency of test.cablespaghetti.dev over the last 5 minutes is over 1s. 30 | expr: sum(rate(traefik_backend_request_duration_seconds_sum{backend="test.cablespaghetti.dev/"}[1m]))/sum(rate(traefik_backend_request_duration_seconds_count{backend="test.cablespaghetti.dev/"}[1m])) > 1 31 | for: 5m 32 | labels: 33 | severity: warning 34 | - alert: test.cablespaghetti.dev-IngressHighLatency 35 | annotations: 36 | message: The average latency of test.cablespaghetti.dev over the last 5 minutes is over 5s. 37 | expr: sum(rate(traefik_backend_request_duration_seconds_sum{backend="test.cablespaghetti.dev/"}[1m]))/sum(rate(traefik_backend_request_duration_seconds_count{backend="test.cablespaghetti.dev/"}[1m])) > 5 38 | for: 5m 39 | labels: 40 | severity: critical 41 | -------------------------------------------------------------------------------- /kube-prometheus-stack-values.yaml: -------------------------------------------------------------------------------- 1 | # Disable etcd monitoring. See https://github.com/cablespaghetti/k3s-monitoring/issues/4 2 | kubeEtcd: 3 | enabled: false 4 | 5 | # Disable kube-controller-manager and kube-scheduler monitoring. See https://github.com/cablespaghetti/k3s-monitoring/issues/2 6 | kubeControllerManager: 7 | enabled: false 8 | kubeScheduler: 9 | enabled: false 10 | 11 | alertmanager: 12 | config: 13 | global: 14 | smtp_from: you@gmail.com 15 | smtp_smarthost: mailhog:1025 16 | smtp_require_tls: false 17 | # smtp_smarthost: smtp.gmail.com:587 18 | # smtp_auth_username: you@gmail.com 19 | # smtp_auth_password: yourapppassword # https://support.google.com/mail/answer/185833?hl=en-GB 20 | # smtp_auth_identity: you@gmail.com 21 | route: 22 | group_by: ['job'] 23 | group_wait: 30s 24 | group_interval: 5m 25 | repeat_interval: 1h 26 | receiver: email 27 | routes: 28 | - match: 29 | alertname: Watchdog 30 | receiver: 'null' 31 | - match: 32 | alertname: CPUThrottlingHigh 33 | receiver: 'null' 34 | - match: 35 | alertname: KubeMemoryOvercommit 36 | receiver: 'null' 37 | - match: 38 | alertname: KubeCPUOvercommit 39 | receiver: 'null' 40 | - match: 41 | alertname: KubeletTooManyPods 42 | receiver: 'null' 43 | 44 | receivers: 45 | - name: 'null' 46 | - name: email 47 | email_configs: 48 | - send_resolved: true 49 | to: youremail@gmail.com 50 | 51 | # Inhibition rules allow to mute a set of alerts given that another alert is firing. 52 | # We use this to mute any warning-level notifications if the same alert is already critical. 53 | inhibit_rules: 54 | - source_match: 55 | severity: 'critical' 56 | target_match: 57 | severity: 'warning' 58 | # Apply inhibition if the alertname is the same. 59 | equal: ['alertname', 'namespace'] 60 | 61 | alertmanagerSpec: 62 | # replicas: 3 63 | # podAntiAffinity: "soft" 64 | storage: 65 | volumeClaimTemplate: 66 | spec: 67 | accessModes: ["ReadWriteOnce"] 68 | resources: 69 | requests: 70 | storage: 1Gi 71 | # resources: 72 | # limits: 73 | # cpu: 500m 74 | # memory: 64Mi 75 | # requests: 76 | # cpu: 25m 77 | # memory: 32Mi 78 | # priorityClassName: high-priority 79 | 80 | 81 | prometheus: 82 | prometheusSpec: 83 | retention: 3d 84 | 85 | # replicas: 2 86 | # podAntiAffinity: "hard" 87 | storageSpec: 88 | volumeClaimTemplate: 89 | spec: 90 | accessModes: ["ReadWriteOnce"] 91 | resources: 92 | requests: 93 | storage: 10Gi 94 | 95 | # resources: 96 | # limits: 97 | # cpu: "2" 98 | # memory: 5Gi 99 | # requests: 100 | # cpu: 100m 101 | # memory: 4Gi 102 | # priorityClassName: high-priority 103 | # 104 | # service: 105 | # sessionAffinity: "ClientIP" 106 | # 107 | 108 | grafana: 109 | plugins: 110 | - grafana-piechart-panel 111 | # resources: 112 | # limits: 113 | # cpu: 500m 114 | # memory: 128Mi 115 | # requests: 116 | # cpu: 25m 117 | # memory: 64Mi 118 | # 119 | # sidecar: 120 | # resources: 121 | # limits: 122 | # cpu: 100m 123 | # memory: 128Mi 124 | # requests: 125 | # cpu: 5m 126 | # memory: 64Mi 127 | 128 | #prometheusOperator: 129 | # resources: 130 | # limits: 131 | # cpu: 1 132 | # memory: 512Mi 133 | # requests: 134 | # cpu: 50m 135 | # memory: 128Mi 136 | # priorityClassName: high-priority 137 | 138 | #prometheus-node-exporter: 139 | # resources: 140 | # limits: 141 | # cpu: 50m 142 | # memory: 50Mi 143 | # requests: 144 | # cpu: 5m 145 | # memory: 16Mi 146 | # priorityClassName: high-priority 147 | 148 | kube-state-metrics: 149 | # resources: 150 | # limits: 151 | # cpu: 1 152 | # memory: 512Mi 153 | # requests: 154 | # cpu: 5m 155 | # memory: 128Mi 156 | # priorityClassName: high-priority 157 | 158 | # Use an unofficial multi-arch image until kube-state-metrics v2 is stable 159 | image: 160 | repository: eddiezane/kube-state-metrics 161 | tag: v1.9.7 162 | 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Monitoring K3S with Prometheus Operator 2 | 3 | I originally put this guide together for a talk at a Civo Cloud Community Meetup in July 2020; [here is the video](https://youtu.be/thHzf0fmrFQ). This guide has since been updated to reflect changes in k3s and [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) but not a great deal has changed since the video was recorded. [Sign up for Civo's free KUBE100 beta here if you want a cluster to try out this guide on.](https://www.civo.com/?ref=63c625) 4 | 5 | __*I seem incapable of keeping this up to date. If anyone has a fork they are maintaining please send me a message and I'll link to it. However I have archived this repo.*__ 6 | 7 | Prometheus can be complicated to get started with, which is why many people pick hosted monitoring solutions like Datadog. However it doesn't have to be and if you're monitoring Kubernetes, Prometheus is in my opinion the best option. 8 | 9 | The great people over at CoreOS developed a Prometheus Operator for Kubernetes which allows you to define your Prometheus configuration in YAML and deploy it alongside your application manifests. This makes a lot of sense if you're deploying a lot of applications, maybe across many teams. They can all just define their own monitoring alerts. 10 | 11 | You will need: 12 | - A k3s cluster like Civo Cloud (the "development" version is no longer needed, ignore what I say in the video) or maybe one installed on a Raspberry Pi with [k3sup](https://github.com/alexellis/k3sup) 13 | - kubectl installed on your machine and configured for that cluster 14 | - [Helm 3](https://helm.sh) installed on your machine 15 | 16 | I'm using [Mailhog](https://github.com/mailhog/MailHog) to receive my alerts for this demo because it's simple. However you might choose to hook into your mail provider to send emails (see commented settings for Gmail example) or send a Slack message (see Prometheus documentation). To install mailhog: 17 | 18 | ``` 19 | helm repo add codecentric https://codecentric.github.io/helm-charts 20 | helm upgrade --install mailhog codecentric/mailhog 21 | ``` 22 | 23 | ## Install Prometheus Operator 24 | 25 | Now installing [Prometheus Operator from the kube-prometheus-stack Helm chart](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) is as simple as running: 26 | ``` 27 | helm repo add prometheus-community https://prometheus-community.github.io/helm-charts 28 | helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --version 13.4.1 --values kube-prometheus-stack-values.yaml 29 | ``` 30 | 31 | *I've picked a specific version of the Helm Chart here which I know works with my config. Feel free to remove the `--version` parameter to get the latest version.* 32 | 33 | This deploys Prometheus, Alert Manager and Grafana with a few options disabled which don't work for k3s. You'll get a set of default Prometheus Rules (Alerts) configured which will alert you about most of things you need worry about when running a Kubernetes cluster. 34 | 35 | There are a few commented out sections like [CPU and Memory resource requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) which you should definitely set when you know the resources each service needs in your environment. 36 | 37 | I also recommend setting up some [Pod Priority Classes](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/) in your cluster and making the core parts of the system a high priority so if the cluster is low on resources Prometheus will still run and alert you. 38 | 39 | Under routes you will see I've sent a few of the default Prometheus Rules to the `null` receiver which effectively mutes them. You might choose to remove some of these or add different alerts to the list. 40 | 41 | Each time you change your values file, just re-run the `helm upgrade` command above for Helm to apply your changes. 42 | 43 | ## Accessing Prometheus, Alert Manager and Grafana 44 | 45 | I haven't configured any Ingress or Load Balancers for access to the services in my values file. This is because Prometheus and Alert Manager don't support any authentication out of the box and Grafana will be spun up with default credentials (Username: `admin` and Password: `prom-operator`). In our production environments we use oauth2-proxy to put Google authentication in front of these services. You could also set up Basic Authentication using Traefik. 46 | 47 | This means you need to use `kubectl port-forward` to access the services for now. In separate terminal windows run the following commands: 48 | 49 | ``` 50 | kubectl port-forward svc/prometheus-grafana 8080:80 51 | kubectl port-forward svc/prometheus-kube-prometheus-prometheus 9090 52 | kubectl port-forward svc/prometheus-kube-prometheus-alertmanager 9093 53 | ``` 54 | 55 | This will make Grafana accessible on http://localhost:8080, Prometheus on http://localhost:9090 and Alert Manager on http://localhost:9093 56 | 57 | You'll see that Grafana is already configured with lots of useful dashboards and Prometheus is configured with Rules to send alerts for pretty much everything you need to monitor in a production cluster. 58 | 59 | ## The power of Prometheus Operator 60 | 61 | Because k3s uses [Traefik for ingress](https://rancher.com/docs/k3s/latest/en/networking/#traefik-ingress-controller), we want to add monitoring to that. Prometheus "scrapes" services to get metrics rather than having metrics pushed to it like many other systems. Many "cloud native" applications will expose a port for Prometheus metrics out of the box and Traefik is no exception. For any apps you build you will need a metrics endpoint and a Kubernetes Service with that port exposed. 62 | 63 | All we need to do to get Prometheus scraping Traefik is add a Prometheus Operator `ServiceMonitor` resource which tells it the details of the existing service to scrape. 64 | 65 | ``` 66 | kubectl apply -f traefik-servicemonitor.yaml 67 | ``` 68 | 69 | You can verify that Prometheus is now scraping Traefik for metrics at [http://localhost:9090/targets](http://localhost:9090/targets). 70 | 71 | You can also do something similar with Grafana dashboards. Just deploy them in a `ConfigMap` like this: 72 | 73 | ``` 74 | kubectl apply -f traefik-dashboard.yaml 75 | ``` 76 | 77 | This dashboard JSON is copied from [Grafana's amazing dashboards site](https://grafana.com/grafana/dashboards/4475). 78 | 79 | For this reason we haven't configured Grafana with any persistent storage so any dashboards imported or created and not put in a ConfigMap will disappear if the Pod restarts. 80 | 81 | We can now create alerts with Prometheus Rules using the Prometheus Operator `PrometheusRule`: 82 | 83 | ``` 84 | kubectl apply -f traefik-prometheusrule.yaml 85 | ``` 86 | 87 | You can verify that Prometheus has got your rule configured at [http://localhost:9090/rules](http://localhost:9090/rules). 88 | 89 | ## Blackbox Exporter 90 | 91 | I've also configured [Prometheus Blackbox exporter](https://github.com/prometheus/blackbox_exporter) on my cluster which polls HTTP endpoints. These can be anywhere on the Internet. In this case I'm just monitoring my example website to check everything is working as expected. I've also deployed another dashboard to Grafana for it. 92 | 93 | ``` 94 | helm upgrade --install blackbox-exporter prometheus-community/prometheus-blackbox-exporter --version 4.10.0 --values blackbox-exporter-values.yaml 95 | kubectl apply -f blackbox-exporter-dashboard.yaml 96 | ``` 97 | 98 | *I've picked a specific version of the Helm Chart here which I know works with my config. Feel free to remove the `--version` parameter to get the latest version.* 99 | 100 | ## Monitoring the monitoring 101 | 102 | ![Xzibit Meme](https://github.com/cablespaghetti/k3s-monitoring/raw/master/xzibit.jpg) 103 | 104 | But what if my cluster goes down and my monitoring goes with it? One of the alerts we have sent to the `null` receiver in the Prometheus Operator values is `Watchdog`. This is a Prometheus Rule which always fires. If you send this to somewhere outside of your cluster, you can be alerted if this "Dead Man's Switch" stops firing. 105 | 106 | At Pulselive we developed a simple solution using AWS Lambda for this https://github.com/PulseInnovations/prometheus-deadmansswitch 107 | 108 | -------------------------------------------------------------------------------- /traefik-dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: traefik-dashboard 5 | labels: 6 | grafana_dashboard: "true" 7 | data: 8 | traefik-dashboard.json: |- 9 | { 10 | "annotations": { 11 | "list": [ 12 | { 13 | "builtIn": 1, 14 | "datasource": "-- Grafana --", 15 | "enable": true, 16 | "hide": true, 17 | "iconColor": "rgba(0, 211, 255, 1)", 18 | "name": "Annotations & Alerts", 19 | "type": "dashboard" 20 | } 21 | ] 22 | }, 23 | "description": "Traefik dashboard prometheus", 24 | "editable": true, 25 | "gnetId": 4475, 26 | "graphTooltip": 0, 27 | "id": 22, 28 | "iteration": 1595773597259, 29 | "links": [], 30 | "panels": [ 31 | { 32 | "datasource": null, 33 | "gridPos": { 34 | "h": 1, 35 | "w": 24, 36 | "x": 0, 37 | "y": 0 38 | }, 39 | "id": 10, 40 | "title": "$backend stats", 41 | "type": "row" 42 | }, 43 | { 44 | "cacheTimeout": null, 45 | "colorBackground": false, 46 | "colorValue": true, 47 | "colors": [ 48 | "#d44a3a", 49 | "rgba(237, 129, 40, 0.89)", 50 | "#299c46" 51 | ], 52 | "datasource": "Prometheus", 53 | "fieldConfig": { 54 | "defaults": { 55 | "custom": {} 56 | }, 57 | "overrides": [] 58 | }, 59 | "format": "none", 60 | "gauge": { 61 | "maxValue": 100, 62 | "minValue": 0, 63 | "show": false, 64 | "thresholdLabels": false, 65 | "thresholdMarkers": true 66 | }, 67 | "gridPos": { 68 | "h": 7, 69 | "w": 8, 70 | "x": 0, 71 | "y": 1 72 | }, 73 | "id": 1, 74 | "interval": null, 75 | "links": [], 76 | "mappingType": 1, 77 | "mappingTypes": [ 78 | { 79 | "name": "value to text", 80 | "value": 1 81 | }, 82 | { 83 | "name": "range to text", 84 | "value": 2 85 | } 86 | ], 87 | "maxDataPoints": 100, 88 | "nullPointMode": "connected", 89 | "nullText": null, 90 | "postfix": "", 91 | "postfixFontSize": "50%", 92 | "prefix": "", 93 | "prefixFontSize": "50%", 94 | "rangeMaps": [ 95 | { 96 | "from": "null", 97 | "text": "N/A", 98 | "to": "null" 99 | } 100 | ], 101 | "sparkline": { 102 | "fillColor": "rgba(31, 118, 189, 0.18)", 103 | "full": false, 104 | "lineColor": "rgb(31, 120, 193)", 105 | "show": false 106 | }, 107 | "tableColumn": "", 108 | "targets": [ 109 | { 110 | "expr": "sum(traefik_backend_server_up{backend=\"$backend\"})/count(traefik_config_reloads_total)", 111 | "format": "time_series", 112 | "intervalFactor": 2, 113 | "refId": "A" 114 | } 115 | ], 116 | "thresholds": "0,1", 117 | "title": "$backend status", 118 | "type": "singlestat", 119 | "valueFontSize": "80%", 120 | "valueMaps": [ 121 | { 122 | "op": "=", 123 | "text": "OK", 124 | "value": "1" 125 | } 126 | ], 127 | "valueName": "current" 128 | }, 129 | { 130 | "aliasColors": {}, 131 | "breakPoint": "50%", 132 | "cacheTimeout": null, 133 | "combine": { 134 | "label": "Others", 135 | "threshold": 0 136 | }, 137 | "datasource": "Prometheus", 138 | "fieldConfig": { 139 | "defaults": { 140 | "custom": {} 141 | }, 142 | "overrides": [] 143 | }, 144 | "fontSize": "80%", 145 | "format": "short", 146 | "gridPos": { 147 | "h": 7, 148 | "w": 8, 149 | "x": 8, 150 | "y": 1 151 | }, 152 | "id": 2, 153 | "interval": null, 154 | "legend": { 155 | "percentage": true, 156 | "show": true, 157 | "values": true 158 | }, 159 | "legendType": "Right side", 160 | "links": [], 161 | "maxDataPoints": 3, 162 | "nullPointMode": "connected", 163 | "pieType": "pie", 164 | "strokeWidth": 1, 165 | "targets": [ 166 | { 167 | "expr": "traefik_backend_requests_total{backend=\"$backend\"}", 168 | "format": "time_series", 169 | "intervalFactor": 2, 170 | "legendFormat": "{{method}} : {{code}}", 171 | "refId": "A" 172 | } 173 | ], 174 | "title": "$backend return code", 175 | "type": "grafana-piechart-panel", 176 | "valueName": "current" 177 | }, 178 | { 179 | "cacheTimeout": null, 180 | "colorBackground": false, 181 | "colorValue": false, 182 | "colors": [ 183 | "#299c46", 184 | "rgba(237, 129, 40, 0.89)", 185 | "#d44a3a" 186 | ], 187 | "datasource": "Prometheus", 188 | "fieldConfig": { 189 | "defaults": { 190 | "custom": {} 191 | }, 192 | "overrides": [] 193 | }, 194 | "format": "ms", 195 | "gauge": { 196 | "maxValue": 100, 197 | "minValue": 0, 198 | "show": false, 199 | "thresholdLabels": false, 200 | "thresholdMarkers": true 201 | }, 202 | "gridPos": { 203 | "h": 7, 204 | "w": 8, 205 | "x": 16, 206 | "y": 1 207 | }, 208 | "id": 4, 209 | "interval": null, 210 | "links": [], 211 | "mappingType": 1, 212 | "mappingTypes": [ 213 | { 214 | "name": "value to text", 215 | "value": 1 216 | }, 217 | { 218 | "name": "range to text", 219 | "value": 2 220 | } 221 | ], 222 | "maxDataPoints": 100, 223 | "nullPointMode": "connected", 224 | "nullText": null, 225 | "postfix": "", 226 | "postfixFontSize": "50%", 227 | "prefix": "", 228 | "prefixFontSize": "50%", 229 | "rangeMaps": [ 230 | { 231 | "from": "null", 232 | "text": "N/A", 233 | "to": "null" 234 | } 235 | ], 236 | "sparkline": { 237 | "fillColor": "rgba(31, 118, 189, 0.18)", 238 | "full": false, 239 | "lineColor": "rgb(31, 120, 193)", 240 | "show": true 241 | }, 242 | "tableColumn": "", 243 | "targets": [ 244 | { 245 | "expr": "sum(traefik_backend_request_duration_seconds_sum{backend=\"$backend\"}) / sum(traefik_backend_requests_total{backend=\"$backend\"}) * 1000", 246 | "format": "time_series", 247 | "intervalFactor": 2, 248 | "refId": "A" 249 | } 250 | ], 251 | "thresholds": "", 252 | "title": "$backend response time", 253 | "type": "singlestat", 254 | "valueFontSize": "80%", 255 | "valueMaps": [ 256 | { 257 | "op": "=", 258 | "text": "N/A", 259 | "value": "null" 260 | } 261 | ], 262 | "valueName": "avg" 263 | }, 264 | { 265 | "aliasColors": {}, 266 | "bars": true, 267 | "dashLength": 10, 268 | "dashes": false, 269 | "datasource": "Prometheus", 270 | "fieldConfig": { 271 | "defaults": { 272 | "custom": {} 273 | }, 274 | "overrides": [] 275 | }, 276 | "fill": 1, 277 | "fillGradient": 0, 278 | "gridPos": { 279 | "h": 7, 280 | "w": 24, 281 | "x": 0, 282 | "y": 8 283 | }, 284 | "hiddenSeries": false, 285 | "id": 3, 286 | "legend": { 287 | "alignAsTable": true, 288 | "avg": true, 289 | "current": false, 290 | "max": true, 291 | "min": true, 292 | "rightSide": true, 293 | "show": true, 294 | "total": false, 295 | "values": true 296 | }, 297 | "lines": false, 298 | "linewidth": 1, 299 | "links": [], 300 | "nullPointMode": "null", 301 | "options": { 302 | "dataLinks": [] 303 | }, 304 | "percentage": false, 305 | "pointradius": 5, 306 | "points": false, 307 | "renderer": "flot", 308 | "seriesOverrides": [], 309 | "spaceLength": 10, 310 | "stack": false, 311 | "steppedLine": false, 312 | "targets": [ 313 | { 314 | "expr": "sum(rate(traefik_backend_requests_total{backend=\"$backend\"}[5m]))", 315 | "format": "time_series", 316 | "intervalFactor": 2, 317 | "legendFormat": "Total requests $backend", 318 | "refId": "A" 319 | } 320 | ], 321 | "thresholds": [], 322 | "timeFrom": null, 323 | "timeRegions": [], 324 | "timeShift": null, 325 | "title": "Total requests over 5min $backend", 326 | "tooltip": { 327 | "shared": true, 328 | "sort": 0, 329 | "value_type": "individual" 330 | }, 331 | "type": "graph", 332 | "xaxis": { 333 | "buckets": null, 334 | "mode": "time", 335 | "name": null, 336 | "show": true, 337 | "values": [] 338 | }, 339 | "yaxes": [ 340 | { 341 | "format": "short", 342 | "label": null, 343 | "logBase": 1, 344 | "max": null, 345 | "min": null, 346 | "show": true 347 | }, 348 | { 349 | "format": "short", 350 | "label": null, 351 | "logBase": 1, 352 | "max": null, 353 | "min": null, 354 | "show": true 355 | } 356 | ], 357 | "yaxis": { 358 | "align": false, 359 | "alignLevel": null 360 | } 361 | }, 362 | { 363 | "collapsed": false, 364 | "datasource": null, 365 | "gridPos": { 366 | "h": 1, 367 | "w": 24, 368 | "x": 0, 369 | "y": 15 370 | }, 371 | "id": 12, 372 | "panels": [], 373 | "title": "Global stats", 374 | "type": "row" 375 | }, 376 | { 377 | "aliasColors": {}, 378 | "bars": true, 379 | "dashLength": 10, 380 | "dashes": false, 381 | "datasource": "Prometheus", 382 | "fieldConfig": { 383 | "defaults": { 384 | "custom": {} 385 | }, 386 | "overrides": [] 387 | }, 388 | "fill": 1, 389 | "fillGradient": 0, 390 | "gridPos": { 391 | "h": 7, 392 | "w": 12, 393 | "x": 0, 394 | "y": 16 395 | }, 396 | "hiddenSeries": false, 397 | "id": 5, 398 | "legend": { 399 | "alignAsTable": true, 400 | "avg": false, 401 | "current": true, 402 | "max": true, 403 | "min": true, 404 | "rightSide": true, 405 | "show": true, 406 | "total": false, 407 | "values": true 408 | }, 409 | "lines": false, 410 | "linewidth": 1, 411 | "links": [], 412 | "nullPointMode": "null", 413 | "options": { 414 | "dataLinks": [] 415 | }, 416 | "percentage": false, 417 | "pointradius": 5, 418 | "points": false, 419 | "renderer": "flot", 420 | "seriesOverrides": [], 421 | "spaceLength": 10, 422 | "stack": true, 423 | "steppedLine": false, 424 | "targets": [ 425 | { 426 | "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code=\"200\"}[5m])", 427 | "format": "time_series", 428 | "intervalFactor": 2, 429 | "legendFormat": "{{method}} : {{code}}", 430 | "refId": "A" 431 | } 432 | ], 433 | "thresholds": [], 434 | "timeFrom": null, 435 | "timeRegions": [], 436 | "timeShift": null, 437 | "title": "Status code 200 over 5min", 438 | "tooltip": { 439 | "shared": true, 440 | "sort": 0, 441 | "value_type": "individual" 442 | }, 443 | "type": "graph", 444 | "xaxis": { 445 | "buckets": null, 446 | "mode": "time", 447 | "name": null, 448 | "show": true, 449 | "values": [] 450 | }, 451 | "yaxes": [ 452 | { 453 | "format": "short", 454 | "label": null, 455 | "logBase": 1, 456 | "max": null, 457 | "min": null, 458 | "show": true 459 | }, 460 | { 461 | "format": "short", 462 | "label": null, 463 | "logBase": 1, 464 | "max": null, 465 | "min": null, 466 | "show": true 467 | } 468 | ], 469 | "yaxis": { 470 | "align": false, 471 | "alignLevel": null 472 | } 473 | }, 474 | { 475 | "aliasColors": {}, 476 | "bars": true, 477 | "dashLength": 10, 478 | "dashes": false, 479 | "datasource": "Prometheus", 480 | "fieldConfig": { 481 | "defaults": { 482 | "custom": {} 483 | }, 484 | "overrides": [] 485 | }, 486 | "fill": 1, 487 | "fillGradient": 0, 488 | "gridPos": { 489 | "h": 7, 490 | "w": 12, 491 | "x": 12, 492 | "y": 16 493 | }, 494 | "hiddenSeries": false, 495 | "id": 6, 496 | "legend": { 497 | "alignAsTable": true, 498 | "avg": false, 499 | "current": true, 500 | "max": true, 501 | "min": true, 502 | "rightSide": true, 503 | "show": true, 504 | "total": false, 505 | "values": true 506 | }, 507 | "lines": false, 508 | "linewidth": 1, 509 | "links": [], 510 | "nullPointMode": "null", 511 | "options": { 512 | "dataLinks": [] 513 | }, 514 | "percentage": false, 515 | "pointradius": 5, 516 | "points": false, 517 | "renderer": "flot", 518 | "seriesOverrides": [], 519 | "spaceLength": 10, 520 | "stack": true, 521 | "steppedLine": false, 522 | "targets": [ 523 | { 524 | "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code!=\"200\"}[5m])", 525 | "format": "time_series", 526 | "intervalFactor": 2, 527 | "legendFormat": "{{ method }} : {{code}}", 528 | "refId": "A" 529 | } 530 | ], 531 | "thresholds": [], 532 | "timeFrom": null, 533 | "timeRegions": [], 534 | "timeShift": null, 535 | "title": "Others status code over 5min", 536 | "tooltip": { 537 | "shared": true, 538 | "sort": 0, 539 | "value_type": "individual" 540 | }, 541 | "type": "graph", 542 | "xaxis": { 543 | "buckets": null, 544 | "mode": "time", 545 | "name": null, 546 | "show": true, 547 | "values": [] 548 | }, 549 | "yaxes": [ 550 | { 551 | "format": "short", 552 | "label": null, 553 | "logBase": 1, 554 | "max": null, 555 | "min": null, 556 | "show": true 557 | }, 558 | { 559 | "format": "short", 560 | "label": null, 561 | "logBase": 1, 562 | "max": null, 563 | "min": null, 564 | "show": true 565 | } 566 | ], 567 | "yaxis": { 568 | "align": false, 569 | "alignLevel": null 570 | } 571 | }, 572 | { 573 | "aliasColors": {}, 574 | "breakPoint": "50%", 575 | "cacheTimeout": null, 576 | "combine": { 577 | "label": "Others", 578 | "threshold": 0 579 | }, 580 | "datasource": "Prometheus", 581 | "fieldConfig": { 582 | "defaults": { 583 | "custom": {} 584 | }, 585 | "overrides": [] 586 | }, 587 | "fontSize": "80%", 588 | "format": "short", 589 | "gridPos": { 590 | "h": 7, 591 | "w": 12, 592 | "x": 0, 593 | "y": 23 594 | }, 595 | "id": 7, 596 | "interval": null, 597 | "legend": { 598 | "show": true, 599 | "values": true 600 | }, 601 | "legendType": "Right side", 602 | "links": [], 603 | "maxDataPoints": 3, 604 | "nullPointMode": "connected", 605 | "pieType": "pie", 606 | "strokeWidth": 1, 607 | "targets": [ 608 | { 609 | "expr": "sum(rate(traefik_backend_requests_total[5m])) by (backend) ", 610 | "format": "time_series", 611 | "interval": "", 612 | "intervalFactor": 2, 613 | "legendFormat": "{{ backend }}", 614 | "refId": "A" 615 | } 616 | ], 617 | "title": "Requests by service", 618 | "type": "grafana-piechart-panel", 619 | "valueName": "total" 620 | }, 621 | { 622 | "aliasColors": {}, 623 | "breakPoint": "50%", 624 | "cacheTimeout": null, 625 | "combine": { 626 | "label": "Others", 627 | "threshold": 0 628 | }, 629 | "datasource": "Prometheus", 630 | "fieldConfig": { 631 | "defaults": { 632 | "custom": {} 633 | }, 634 | "overrides": [] 635 | }, 636 | "fontSize": "80%", 637 | "format": "short", 638 | "gridPos": { 639 | "h": 7, 640 | "w": 12, 641 | "x": 12, 642 | "y": 23 643 | }, 644 | "id": 8, 645 | "interval": null, 646 | "legend": { 647 | "show": true, 648 | "values": true 649 | }, 650 | "legendType": "Right side", 651 | "links": [], 652 | "maxDataPoints": 3, 653 | "nullPointMode": "connected", 654 | "pieType": "pie", 655 | "strokeWidth": 1, 656 | "targets": [ 657 | { 658 | "expr": "sum(rate(traefik_entrypoint_requests_total{entrypoint =~ \"$entrypoint\"}[5m])) by (entrypoint) ", 659 | "format": "time_series", 660 | "interval": "", 661 | "intervalFactor": 2, 662 | "legendFormat": "{{ entrypoint }}", 663 | "refId": "A" 664 | } 665 | ], 666 | "title": "Requests by protocol", 667 | "type": "grafana-piechart-panel", 668 | "valueName": "total" 669 | } 670 | ], 671 | "schemaVersion": 25, 672 | "style": "dark", 673 | "tags": [ 674 | "traefik", 675 | "prometheus" 676 | ], 677 | "templating": { 678 | "list": [ 679 | { 680 | "allValue": null, 681 | "current": { 682 | "selected": true, 683 | "text": "test.cablespaghetti.dev/", 684 | "value": "test.cablespaghetti.dev/" 685 | }, 686 | "datasource": "Prometheus", 687 | "definition": "", 688 | "hide": 0, 689 | "includeAll": false, 690 | "label": null, 691 | "multi": false, 692 | "name": "backend", 693 | "options": [], 694 | "query": "label_values(backend)", 695 | "refresh": 1, 696 | "regex": "", 697 | "skipUrlSync": false, 698 | "sort": 0, 699 | "tagValuesQuery": "", 700 | "tags": [], 701 | "tagsQuery": "", 702 | "type": "query", 703 | "useTags": false 704 | }, 705 | { 706 | "allValue": null, 707 | "current": { 708 | "selected": true, 709 | "tags": [], 710 | "text": "All + http + https", 711 | "value": [ 712 | "$__all", 713 | "http", 714 | "https" 715 | ] 716 | }, 717 | "datasource": "Prometheus", 718 | "definition": "", 719 | "hide": 0, 720 | "includeAll": true, 721 | "label": null, 722 | "multi": true, 723 | "name": "entrypoint", 724 | "options": [], 725 | "query": "label_values(entrypoint)", 726 | "refresh": 1, 727 | "regex": "", 728 | "skipUrlSync": false, 729 | "sort": 0, 730 | "tagValuesQuery": "", 731 | "tags": [], 732 | "tagsQuery": "", 733 | "type": "query", 734 | "useTags": false 735 | } 736 | ] 737 | }, 738 | "time": { 739 | "from": "now-1h", 740 | "to": "now" 741 | }, 742 | "timepicker": { 743 | "refresh_intervals": [ 744 | "10s", 745 | "30s", 746 | "1m", 747 | "5m", 748 | "15m", 749 | "30m", 750 | "1h", 751 | "2h", 752 | "1d" 753 | ], 754 | "time_options": [ 755 | "5m", 756 | "15m", 757 | "1h", 758 | "6h", 759 | "12h", 760 | "24h", 761 | "2d", 762 | "7d", 763 | "30d" 764 | ] 765 | }, 766 | "timezone": "", 767 | "title": "Traefik", 768 | "uid": "qPdAviJmz", 769 | "version": 1 770 | } 771 | -------------------------------------------------------------------------------- /blackbox-exporter-dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: dashboard-blackbox-exporter 5 | labels: 6 | grafana_dashboard: "true" 7 | data: 8 | dashboard-blackbox-exporter.json: |- 9 | { 10 | "annotations": { 11 | "list": [ 12 | { 13 | "builtIn": 1, 14 | "datasource": "-- Grafana --", 15 | "enable": true, 16 | "hide": true, 17 | "iconColor": "rgba(0, 211, 255, 1)", 18 | "name": "Annotations & Alerts", 19 | "type": "dashboard" 20 | } 21 | ] 22 | }, 23 | "description": "Prometheus Blackbox Exporter Overview", 24 | "editable": true, 25 | "gnetId": 7587, 26 | "graphTooltip": 1, 27 | "id": 19, 28 | "iteration": 1563964248528, 29 | "links": [], 30 | "panels": [ 31 | { 32 | "aliasColors": {}, 33 | "bars": false, 34 | "dashLength": 10, 35 | "dashes": false, 36 | "datasource": "Prometheus", 37 | "fill": 1, 38 | "gridPos": { 39 | "h": 8, 40 | "w": 24, 41 | "x": 0, 42 | "y": 0 43 | }, 44 | "id": 138, 45 | "legend": { 46 | "avg": false, 47 | "current": false, 48 | "max": false, 49 | "min": false, 50 | "show": true, 51 | "total": false, 52 | "values": false 53 | }, 54 | "lines": true, 55 | "linewidth": 1, 56 | "links": [], 57 | "nullPointMode": "null", 58 | "percentage": false, 59 | "pointradius": 5, 60 | "points": false, 61 | "renderer": "flot", 62 | "seriesOverrides": [], 63 | "spaceLength": 10, 64 | "stack": false, 65 | "steppedLine": false, 66 | "targets": [ 67 | { 68 | "expr": "probe_duration_seconds{target=~\"$target\"}", 69 | "format": "time_series", 70 | "interval": "$interval", 71 | "intervalFactor": 1, 72 | "legendFormat": "{{ target }}", 73 | "refId": "A" 74 | } 75 | ], 76 | "thresholds": [], 77 | "timeFrom": null, 78 | "timeRegions": [], 79 | "timeShift": null, 80 | "title": "Global Probe Duration", 81 | "tooltip": { 82 | "shared": true, 83 | "sort": 1, 84 | "value_type": "individual" 85 | }, 86 | "type": "graph", 87 | "xaxis": { 88 | "buckets": null, 89 | "mode": "time", 90 | "name": null, 91 | "show": true, 92 | "values": [] 93 | }, 94 | "yaxes": [ 95 | { 96 | "format": "s", 97 | "label": null, 98 | "logBase": 1, 99 | "max": null, 100 | "min": null, 101 | "show": true 102 | }, 103 | { 104 | "format": "short", 105 | "label": null, 106 | "logBase": 1, 107 | "max": null, 108 | "min": null, 109 | "show": true 110 | } 111 | ], 112 | "yaxis": { 113 | "align": false, 114 | "alignLevel": null 115 | } 116 | }, 117 | { 118 | "collapsed": false, 119 | "gridPos": { 120 | "h": 1, 121 | "w": 24, 122 | "x": 0, 123 | "y": 8 124 | }, 125 | "id": 15, 126 | "panels": [], 127 | "repeat": "target", 128 | "scopedVars": { 129 | "target": { 130 | "selected": true, 131 | "text": "test.cablespaghetti.dev", 132 | "value": "test.cablespaghetti.dev" 133 | } 134 | }, 135 | "title": "$target status", 136 | "type": "row" 137 | }, 138 | { 139 | "cacheTimeout": null, 140 | "colorBackground": true, 141 | "colorValue": false, 142 | "colors": [ 143 | "#d44a3a", 144 | "rgba(237, 129, 40, 0.89)", 145 | "#299c46" 146 | ], 147 | "datasource": "Prometheus", 148 | "format": "none", 149 | "gauge": { 150 | "maxValue": 100, 151 | "minValue": 0, 152 | "show": false, 153 | "thresholdLabels": false, 154 | "thresholdMarkers": true 155 | }, 156 | "gridPos": { 157 | "h": 2, 158 | "w": 4, 159 | "x": 0, 160 | "y": 9 161 | }, 162 | "id": 2, 163 | "interval": null, 164 | "links": [], 165 | "mappingType": 1, 166 | "mappingTypes": [ 167 | { 168 | "name": "value to text", 169 | "value": 1 170 | }, 171 | { 172 | "name": "range to text", 173 | "value": 2 174 | } 175 | ], 176 | "maxDataPoints": 100, 177 | "maxPerRow": 8, 178 | "nullPointMode": "connected", 179 | "nullText": null, 180 | "postfix": "", 181 | "postfixFontSize": "50%", 182 | "prefix": "", 183 | "prefixFontSize": "50%", 184 | "rangeMaps": [ 185 | { 186 | "from": "null", 187 | "text": "N/A", 188 | "to": "null" 189 | } 190 | ], 191 | "repeat": null, 192 | "repeatDirection": "v", 193 | "scopedVars": { 194 | "target": { 195 | "selected": true, 196 | "text": "test.cablespaghetti.dev", 197 | "value": "test.cablespaghetti.dev" 198 | } 199 | }, 200 | "sparkline": { 201 | "fillColor": "rgba(31, 118, 189, 0.18)", 202 | "full": false, 203 | "lineColor": "rgb(31, 120, 193)", 204 | "show": false 205 | }, 206 | "tableColumn": "", 207 | "targets": [ 208 | { 209 | "expr": "probe_success{target=~\"$target\"}", 210 | "format": "time_series", 211 | "interval": "$interval", 212 | "intervalFactor": 1, 213 | "refId": "A" 214 | } 215 | ], 216 | "thresholds": "1,1", 217 | "title": "Status", 218 | "type": "singlestat", 219 | "valueFontSize": "80%", 220 | "valueMaps": [ 221 | { 222 | "op": "=", 223 | "text": "N/A", 224 | "value": "null" 225 | }, 226 | { 227 | "op": "=", 228 | "text": "UP", 229 | "value": "1" 230 | }, 231 | { 232 | "op": "=", 233 | "text": "DOWN", 234 | "value": "0" 235 | } 236 | ], 237 | "valueName": "current" 238 | }, 239 | { 240 | "aliasColors": {}, 241 | "bars": false, 242 | "dashLength": 10, 243 | "dashes": false, 244 | "datasource": "Prometheus", 245 | "fill": 1, 246 | "gridPos": { 247 | "h": 6, 248 | "w": 10, 249 | "x": 4, 250 | "y": 9 251 | }, 252 | "id": 25, 253 | "legend": { 254 | "avg": false, 255 | "current": false, 256 | "max": false, 257 | "min": false, 258 | "show": true, 259 | "total": false, 260 | "values": false 261 | }, 262 | "lines": true, 263 | "linewidth": 1, 264 | "links": [], 265 | "nullPointMode": "null", 266 | "percentage": false, 267 | "pointradius": 5, 268 | "points": false, 269 | "renderer": "flot", 270 | "scopedVars": { 271 | "target": { 272 | "selected": true, 273 | "text": "test.cablespaghetti.dev", 274 | "value": "test.cablespaghetti.dev" 275 | } 276 | }, 277 | "seriesOverrides": [], 278 | "spaceLength": 10, 279 | "stack": false, 280 | "steppedLine": false, 281 | "targets": [ 282 | { 283 | "expr": "probe_http_duration_seconds{target=~\"$target\"}", 284 | "format": "time_series", 285 | "interval": "$interval", 286 | "intervalFactor": 1, 287 | "legendFormat": "{{ phase }}", 288 | "refId": "B" 289 | } 290 | ], 291 | "thresholds": [], 292 | "timeFrom": null, 293 | "timeRegions": [], 294 | "timeShift": null, 295 | "title": "HTTP Duration", 296 | "tooltip": { 297 | "shared": true, 298 | "sort": 0, 299 | "value_type": "individual" 300 | }, 301 | "type": "graph", 302 | "xaxis": { 303 | "buckets": null, 304 | "mode": "time", 305 | "name": null, 306 | "show": true, 307 | "values": [] 308 | }, 309 | "yaxes": [ 310 | { 311 | "format": "s", 312 | "label": null, 313 | "logBase": 1, 314 | "max": null, 315 | "min": null, 316 | "show": true 317 | }, 318 | { 319 | "format": "short", 320 | "label": null, 321 | "logBase": 1, 322 | "max": null, 323 | "min": null, 324 | "show": true 325 | } 326 | ], 327 | "yaxis": { 328 | "align": false, 329 | "alignLevel": null 330 | } 331 | }, 332 | { 333 | "aliasColors": {}, 334 | "bars": false, 335 | "dashLength": 10, 336 | "dashes": false, 337 | "datasource": "Prometheus", 338 | "fill": 1, 339 | "gridPos": { 340 | "h": 6, 341 | "w": 10, 342 | "x": 14, 343 | "y": 9 344 | }, 345 | "id": 17, 346 | "legend": { 347 | "avg": false, 348 | "current": false, 349 | "max": false, 350 | "min": false, 351 | "show": true, 352 | "total": false, 353 | "values": false 354 | }, 355 | "lines": true, 356 | "linewidth": 1, 357 | "links": [], 358 | "nullPointMode": "null", 359 | "percentage": false, 360 | "pointradius": 5, 361 | "points": false, 362 | "renderer": "flot", 363 | "repeat": null, 364 | "scopedVars": { 365 | "target": { 366 | "selected": true, 367 | "text": "test.cablespaghetti.dev", 368 | "value": "test.cablespaghetti.dev" 369 | } 370 | }, 371 | "seriesOverrides": [], 372 | "spaceLength": 10, 373 | "stack": false, 374 | "steppedLine": false, 375 | "targets": [ 376 | { 377 | "expr": "probe_duration_seconds{target=~\"$target\"}", 378 | "format": "time_series", 379 | "interval": "$interval", 380 | "intervalFactor": 1, 381 | "legendFormat": "seconds", 382 | "refId": "A" 383 | } 384 | ], 385 | "thresholds": [], 386 | "timeFrom": null, 387 | "timeRegions": [], 388 | "timeShift": null, 389 | "title": "Probe Duration", 390 | "tooltip": { 391 | "shared": true, 392 | "sort": 0, 393 | "value_type": "individual" 394 | }, 395 | "type": "graph", 396 | "xaxis": { 397 | "buckets": null, 398 | "mode": "time", 399 | "name": null, 400 | "show": true, 401 | "values": [] 402 | }, 403 | "yaxes": [ 404 | { 405 | "format": "s", 406 | "label": null, 407 | "logBase": 1, 408 | "max": null, 409 | "min": null, 410 | "show": true 411 | }, 412 | { 413 | "format": "short", 414 | "label": null, 415 | "logBase": 1, 416 | "max": null, 417 | "min": null, 418 | "show": true 419 | } 420 | ], 421 | "yaxis": { 422 | "align": false, 423 | "alignLevel": null 424 | } 425 | }, 426 | { 427 | "cacheTimeout": null, 428 | "colorBackground": false, 429 | "colorValue": false, 430 | "colors": [ 431 | "#299c46", 432 | "rgba(237, 129, 40, 0.89)", 433 | "#d44a3a" 434 | ], 435 | "datasource": "Prometheus", 436 | "decimals": 0, 437 | "format": "none", 438 | "gauge": { 439 | "maxValue": 100, 440 | "minValue": 0, 441 | "show": false, 442 | "thresholdLabels": false, 443 | "thresholdMarkers": true 444 | }, 445 | "gridPos": { 446 | "h": 2, 447 | "w": 4, 448 | "x": 0, 449 | "y": 11 450 | }, 451 | "id": 20, 452 | "interval": null, 453 | "links": [], 454 | "mappingType": 1, 455 | "mappingTypes": [ 456 | { 457 | "name": "value to text", 458 | "value": 1 459 | }, 460 | { 461 | "name": "range to text", 462 | "value": 2 463 | } 464 | ], 465 | "maxDataPoints": 100, 466 | "maxPerRow": 8, 467 | "nullPointMode": "connected", 468 | "nullText": null, 469 | "postfix": "", 470 | "postfixFontSize": "50%", 471 | "prefix": "", 472 | "prefixFontSize": "50%", 473 | "rangeMaps": [ 474 | { 475 | "from": "null", 476 | "text": "N/A", 477 | "to": "null" 478 | } 479 | ], 480 | "repeat": null, 481 | "repeatDirection": "h", 482 | "scopedVars": { 483 | "target": { 484 | "selected": true, 485 | "text": "test.cablespaghetti.dev", 486 | "value": "test.cablespaghetti.dev" 487 | } 488 | }, 489 | "sparkline": { 490 | "fillColor": "rgba(31, 118, 189, 0.18)", 491 | "full": false, 492 | "lineColor": "rgb(31, 120, 193)", 493 | "show": false 494 | }, 495 | "tableColumn": "", 496 | "targets": [ 497 | { 498 | "expr": "probe_http_status_code{target=~\"$target\"}", 499 | "format": "time_series", 500 | "interval": "$interval", 501 | "intervalFactor": 1, 502 | "refId": "A" 503 | } 504 | ], 505 | "thresholds": "201, 399", 506 | "title": "HTTP Status Code", 507 | "type": "singlestat", 508 | "valueFontSize": "80%", 509 | "valueMaps": [ 510 | { 511 | "op": "=", 512 | "text": "N/A", 513 | "value": "null" 514 | }, 515 | { 516 | "op": "=", 517 | "text": "YES", 518 | "value": "1" 519 | }, 520 | { 521 | "op": "=", 522 | "text": "N/A", 523 | "value": "0" 524 | } 525 | ], 526 | "valueName": "current" 527 | }, 528 | { 529 | "cacheTimeout": null, 530 | "colorBackground": false, 531 | "colorValue": false, 532 | "colors": [ 533 | "#299c46", 534 | "rgba(237, 129, 40, 0.89)", 535 | "#d44a3a" 536 | ], 537 | "datasource": "Prometheus", 538 | "format": "none", 539 | "gauge": { 540 | "maxValue": 100, 541 | "minValue": 0, 542 | "show": false, 543 | "thresholdLabels": false, 544 | "thresholdMarkers": true 545 | }, 546 | "gridPos": { 547 | "h": 2, 548 | "w": 4, 549 | "x": 0, 550 | "y": 13 551 | }, 552 | "id": 27, 553 | "interval": null, 554 | "links": [], 555 | "mappingType": 1, 556 | "mappingTypes": [ 557 | { 558 | "name": "value to text", 559 | "value": 1 560 | }, 561 | { 562 | "name": "range to text", 563 | "value": 2 564 | } 565 | ], 566 | "maxDataPoints": 100, 567 | "nullPointMode": "connected", 568 | "nullText": null, 569 | "postfix": "", 570 | "postfixFontSize": "50%", 571 | "prefix": "", 572 | "prefixFontSize": "50%", 573 | "rangeMaps": [ 574 | { 575 | "from": "null", 576 | "text": "N/A", 577 | "to": "null" 578 | } 579 | ], 580 | "scopedVars": { 581 | "target": { 582 | "selected": true, 583 | "text": "test.cablespaghetti.dev", 584 | "value": "test.cablespaghetti.dev" 585 | } 586 | }, 587 | "sparkline": { 588 | "fillColor": "rgba(31, 118, 189, 0.18)", 589 | "full": false, 590 | "lineColor": "rgb(31, 120, 193)", 591 | "show": false 592 | }, 593 | "tableColumn": "", 594 | "targets": [ 595 | { 596 | "expr": "probe_http_version{target=~\"$target\"}", 597 | "format": "time_series", 598 | "intervalFactor": 1, 599 | "refId": "A" 600 | } 601 | ], 602 | "thresholds": "", 603 | "title": "HTTP Version", 604 | "type": "singlestat", 605 | "valueFontSize": "80%", 606 | "valueMaps": [ 607 | { 608 | "op": "=", 609 | "text": "N/A", 610 | "value": "null" 611 | } 612 | ], 613 | "valueName": "current" 614 | }, 615 | { 616 | "cacheTimeout": null, 617 | "colorBackground": false, 618 | "colorValue": true, 619 | "colors": [ 620 | "#d44a3a", 621 | "rgba(237, 129, 40, 0.89)", 622 | "#299c46" 623 | ], 624 | "datasource": "Prometheus", 625 | "format": "none", 626 | "gauge": { 627 | "maxValue": 100, 628 | "minValue": 0, 629 | "show": false, 630 | "thresholdLabels": false, 631 | "thresholdMarkers": true 632 | }, 633 | "gridPos": { 634 | "h": 2, 635 | "w": 4, 636 | "x": 0, 637 | "y": 15 638 | }, 639 | "id": 18, 640 | "interval": null, 641 | "links": [], 642 | "mappingType": 1, 643 | "mappingTypes": [ 644 | { 645 | "name": "value to text", 646 | "value": 1 647 | }, 648 | { 649 | "name": "range to text", 650 | "value": 2 651 | } 652 | ], 653 | "maxDataPoints": 100, 654 | "maxPerRow": 8, 655 | "nullPointMode": "connected", 656 | "nullText": null, 657 | "postfix": "", 658 | "postfixFontSize": "50%", 659 | "prefix": "", 660 | "prefixFontSize": "50%", 661 | "rangeMaps": [ 662 | { 663 | "from": "null", 664 | "text": "N/A", 665 | "to": "null" 666 | } 667 | ], 668 | "repeat": null, 669 | "repeatDirection": "v", 670 | "scopedVars": { 671 | "target": { 672 | "selected": true, 673 | "text": "test.cablespaghetti.dev", 674 | "value": "test.cablespaghetti.dev" 675 | } 676 | }, 677 | "sparkline": { 678 | "fillColor": "rgba(31, 118, 189, 0.18)", 679 | "full": false, 680 | "lineColor": "rgb(31, 120, 193)", 681 | "show": false 682 | }, 683 | "tableColumn": "", 684 | "targets": [ 685 | { 686 | "expr": "probe_http_ssl{target=~\"$target\"}", 687 | "format": "time_series", 688 | "interval": "$interval", 689 | "intervalFactor": 1, 690 | "refId": "A" 691 | } 692 | ], 693 | "thresholds": "0, 1", 694 | "title": "SSL", 695 | "type": "singlestat", 696 | "valueFontSize": "80%", 697 | "valueMaps": [ 698 | { 699 | "op": "=", 700 | "text": "N/A", 701 | "value": "null" 702 | }, 703 | { 704 | "op": "=", 705 | "text": "YES", 706 | "value": "1" 707 | }, 708 | { 709 | "op": "=", 710 | "text": "NO", 711 | "value": "0" 712 | } 713 | ], 714 | "valueName": "current" 715 | }, 716 | { 717 | "cacheTimeout": null, 718 | "colorBackground": false, 719 | "colorValue": true, 720 | "colors": [ 721 | "#d44a3a", 722 | "rgba(237, 129, 40, 0.89)", 723 | "#299c46" 724 | ], 725 | "datasource": "Prometheus", 726 | "decimals": 2, 727 | "format": "dtdurations", 728 | "gauge": { 729 | "maxValue": 100, 730 | "minValue": 0, 731 | "show": false, 732 | "thresholdLabels": false, 733 | "thresholdMarkers": true 734 | }, 735 | "gridPos": { 736 | "h": 2, 737 | "w": 10, 738 | "x": 4, 739 | "y": 15 740 | }, 741 | "id": 19, 742 | "interval": null, 743 | "links": [], 744 | "mappingType": 1, 745 | "mappingTypes": [ 746 | { 747 | "name": "value to text", 748 | "value": 1 749 | }, 750 | { 751 | "name": "range to text", 752 | "value": 2 753 | } 754 | ], 755 | "maxDataPoints": 100, 756 | "maxPerRow": 8, 757 | "nullPointMode": "connected", 758 | "nullText": null, 759 | "postfix": "", 760 | "postfixFontSize": "50%", 761 | "prefix": "", 762 | "prefixFontSize": "50%", 763 | "rangeMaps": [ 764 | { 765 | "from": "null", 766 | "text": "N/A", 767 | "to": "null" 768 | } 769 | ], 770 | "repeat": null, 771 | "repeatDirection": "h", 772 | "scopedVars": { 773 | "target": { 774 | "selected": true, 775 | "text": "test.cablespaghetti.dev", 776 | "value": "test.cablespaghetti.dev" 777 | } 778 | }, 779 | "sparkline": { 780 | "fillColor": "rgba(31, 118, 189, 0.18)", 781 | "full": false, 782 | "lineColor": "rgb(31, 120, 193)", 783 | "show": false 784 | }, 785 | "tableColumn": "", 786 | "targets": [ 787 | { 788 | "expr": "probe_ssl_earliest_cert_expiry{target=~\"$target\"} - time()", 789 | "format": "time_series", 790 | "interval": "$interval", 791 | "intervalFactor": 1, 792 | "refId": "A" 793 | } 794 | ], 795 | "thresholds": "0,1209600", 796 | "timeFrom": null, 797 | "title": "SSL Expiry", 798 | "type": "singlestat", 799 | "valueFontSize": "80%", 800 | "valueMaps": [ 801 | { 802 | "op": "=", 803 | "text": "N/A", 804 | "value": "null" 805 | }, 806 | { 807 | "op": "=", 808 | "text": "YES", 809 | "value": "1" 810 | }, 811 | { 812 | "op": "=", 813 | "text": "NO", 814 | "value": "0" 815 | } 816 | ], 817 | "valueName": "current" 818 | }, 819 | { 820 | "cacheTimeout": null, 821 | "colorBackground": false, 822 | "colorValue": false, 823 | "colors": [ 824 | "#299c46", 825 | "rgba(237, 129, 40, 0.89)", 826 | "#d44a3a" 827 | ], 828 | "datasource": "Prometheus", 829 | "format": "s", 830 | "gauge": { 831 | "maxValue": 100, 832 | "minValue": 0, 833 | "show": false, 834 | "thresholdLabels": false, 835 | "thresholdMarkers": true 836 | }, 837 | "gridPos": { 838 | "h": 2, 839 | "w": 5, 840 | "x": 14, 841 | "y": 15 842 | }, 843 | "id": 23, 844 | "interval": null, 845 | "links": [], 846 | "mappingType": 1, 847 | "mappingTypes": [ 848 | { 849 | "name": "value to text", 850 | "value": 1 851 | }, 852 | { 853 | "name": "range to text", 854 | "value": 2 855 | } 856 | ], 857 | "maxDataPoints": 100, 858 | "nullPointMode": "connected", 859 | "nullText": null, 860 | "postfix": "", 861 | "postfixFontSize": "50%", 862 | "prefix": "", 863 | "prefixFontSize": "50%", 864 | "rangeMaps": [ 865 | { 866 | "from": "null", 867 | "text": "N/A", 868 | "to": "null" 869 | } 870 | ], 871 | "repeat": null, 872 | "scopedVars": { 873 | "target": { 874 | "selected": true, 875 | "text": "test.cablespaghetti.dev", 876 | "value": "test.cablespaghetti.dev" 877 | } 878 | }, 879 | "sparkline": { 880 | "fillColor": "rgba(31, 118, 189, 0.18)", 881 | "full": false, 882 | "lineColor": "rgb(31, 120, 193)", 883 | "show": false 884 | }, 885 | "tableColumn": "", 886 | "targets": [ 887 | { 888 | "expr": "avg(probe_duration_seconds{target=~\"$target\"})", 889 | "format": "time_series", 890 | "interval": "$interval", 891 | "intervalFactor": 1, 892 | "refId": "A" 893 | } 894 | ], 895 | "thresholds": "", 896 | "title": "Average Probe Duration", 897 | "type": "singlestat", 898 | "valueFontSize": "80%", 899 | "valueMaps": [ 900 | { 901 | "op": "=", 902 | "text": "N/A", 903 | "value": "null" 904 | } 905 | ], 906 | "valueName": "current" 907 | }, 908 | { 909 | "cacheTimeout": null, 910 | "colorBackground": false, 911 | "colorValue": false, 912 | "colors": [ 913 | "#299c46", 914 | "rgba(237, 129, 40, 0.89)", 915 | "#d44a3a" 916 | ], 917 | "datasource": "Prometheus", 918 | "format": "s", 919 | "gauge": { 920 | "maxValue": 100, 921 | "minValue": 0, 922 | "show": false, 923 | "thresholdLabels": false, 924 | "thresholdMarkers": true 925 | }, 926 | "gridPos": { 927 | "h": 2, 928 | "w": 5, 929 | "x": 19, 930 | "y": 15 931 | }, 932 | "id": 24, 933 | "interval": null, 934 | "links": [], 935 | "mappingType": 1, 936 | "mappingTypes": [ 937 | { 938 | "name": "value to text", 939 | "value": 1 940 | }, 941 | { 942 | "name": "range to text", 943 | "value": 2 944 | } 945 | ], 946 | "maxDataPoints": 100, 947 | "nullPointMode": "connected", 948 | "nullText": null, 949 | "postfix": "", 950 | "postfixFontSize": "50%", 951 | "prefix": "", 952 | "prefixFontSize": "50%", 953 | "rangeMaps": [ 954 | { 955 | "from": "null", 956 | "text": "N/A", 957 | "to": "null" 958 | } 959 | ], 960 | "repeat": null, 961 | "repeatDirection": "h", 962 | "scopedVars": { 963 | "target": { 964 | "selected": true, 965 | "text": "test.cablespaghetti.dev", 966 | "value": "test.cablespaghetti.dev" 967 | } 968 | }, 969 | "sparkline": { 970 | "fillColor": "rgba(31, 118, 189, 0.18)", 971 | "full": false, 972 | "lineColor": "rgb(31, 120, 193)", 973 | "show": false 974 | }, 975 | "tableColumn": "", 976 | "targets": [ 977 | { 978 | "expr": "avg(probe_dns_lookup_time_seconds{target=~\"$target\"})", 979 | "format": "time_series", 980 | "interval": "$interval", 981 | "intervalFactor": 1, 982 | "refId": "A" 983 | } 984 | ], 985 | "thresholds": "", 986 | "title": "Average DNS Lookup", 987 | "type": "singlestat", 988 | "valueFontSize": "80%", 989 | "valueMaps": [ 990 | { 991 | "op": "=", 992 | "text": "N/A", 993 | "value": "null" 994 | } 995 | ], 996 | "valueName": "current" 997 | } 998 | ], 999 | "refresh": "10s", 1000 | "schemaVersion": 18, 1001 | "style": "dark", 1002 | "tags": [ 1003 | "blackbox", 1004 | "prometheus" 1005 | ], 1006 | "templating": { 1007 | "list": [ 1008 | { 1009 | "auto": true, 1010 | "auto_count": 10, 1011 | "auto_min": "10s", 1012 | "current": { 1013 | "text": "10s", 1014 | "value": "10s" 1015 | }, 1016 | "hide": 0, 1017 | "label": "Interval", 1018 | "name": "interval", 1019 | "options": [ 1020 | { 1021 | "selected": false, 1022 | "text": "auto", 1023 | "value": "$__auto_interval_interval" 1024 | }, 1025 | { 1026 | "selected": false, 1027 | "text": "5s", 1028 | "value": "5s" 1029 | }, 1030 | { 1031 | "selected": true, 1032 | "text": "10s", 1033 | "value": "10s" 1034 | }, 1035 | { 1036 | "selected": false, 1037 | "text": "30s", 1038 | "value": "30s" 1039 | }, 1040 | { 1041 | "selected": false, 1042 | "text": "1m", 1043 | "value": "1m" 1044 | }, 1045 | { 1046 | "selected": false, 1047 | "text": "10m", 1048 | "value": "10m" 1049 | }, 1050 | { 1051 | "selected": false, 1052 | "text": "30m", 1053 | "value": "30m" 1054 | }, 1055 | { 1056 | "selected": false, 1057 | "text": "1h", 1058 | "value": "1h" 1059 | }, 1060 | { 1061 | "selected": false, 1062 | "text": "6h", 1063 | "value": "6h" 1064 | }, 1065 | { 1066 | "selected": false, 1067 | "text": "12h", 1068 | "value": "12h" 1069 | }, 1070 | { 1071 | "selected": false, 1072 | "text": "1d", 1073 | "value": "1d" 1074 | }, 1075 | { 1076 | "selected": false, 1077 | "text": "7d", 1078 | "value": "7d" 1079 | }, 1080 | { 1081 | "selected": false, 1082 | "text": "14d", 1083 | "value": "14d" 1084 | }, 1085 | { 1086 | "selected": false, 1087 | "text": "30d", 1088 | "value": "30d" 1089 | } 1090 | ], 1091 | "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 1092 | "refresh": 2, 1093 | "skipUrlSync": false, 1094 | "type": "interval" 1095 | }, 1096 | { 1097 | "allValue": null, 1098 | "current": { 1099 | "tags": [], 1100 | "text": "test.cablespaghetti.dev", 1101 | "value": [ 1102 | "test.cablespaghetti.dev" 1103 | ] 1104 | }, 1105 | "datasource": "Prometheus", 1106 | "definition": "label_values(probe_success, target)", 1107 | "hide": 0, 1108 | "includeAll": true, 1109 | "label": null, 1110 | "multi": true, 1111 | "name": "target", 1112 | "options": [], 1113 | "query": "label_values(probe_success, target)", 1114 | "refresh": 1, 1115 | "regex": "", 1116 | "skipUrlSync": false, 1117 | "sort": 0, 1118 | "tagValuesQuery": "", 1119 | "tags": [], 1120 | "tagsQuery": "", 1121 | "type": "query", 1122 | "useTags": false 1123 | } 1124 | ] 1125 | }, 1126 | "time": { 1127 | "from": "now-1h", 1128 | "to": "now" 1129 | }, 1130 | "timepicker": { 1131 | "refresh_intervals": [ 1132 | "5s", 1133 | "10s", 1134 | "30s", 1135 | "1m", 1136 | "5m", 1137 | "15m", 1138 | "30m", 1139 | "1h", 1140 | "2h", 1141 | "1d" 1142 | ], 1143 | "time_options": [ 1144 | "5m", 1145 | "15m", 1146 | "1h", 1147 | "6h", 1148 | "12h", 1149 | "24h", 1150 | "2d", 1151 | "7d", 1152 | "30d" 1153 | ] 1154 | }, 1155 | "timezone": "", 1156 | "title": "Prometheus Blackbox Exporter", 1157 | "uid": "xtkCtBkiz", 1158 | "version": 3 1159 | } 1160 | 1161 | --------------------------------------------------------------------------------