├── .gitignore ├── Makefile ├── README.md ├── alertmanager-config ├── .gitignore └── config.yaml.template ├── custom-metrics-hpa ├── deployment.yaml └── hpa.yaml ├── get_env.sh ├── logging ├── datasource.yaml ├── logging-cluster-flow.yaml └── loki-values.yaml ├── longhorn-monitoring ├── dashboard.yaml └── servicemonitor.yaml ├── migration ├── dashboard.yaml └── rule.yaml ├── run_on.sh ├── scrape-custom-service ├── 01-demo-shop.yaml ├── 02-redis-prometheus-exporter.yaml ├── 03-redis-servicemonitor.yaml ├── 04-redis-grafana-dashboard.yaml ├── 05-redis-prometheus-rules-force-alert.yaml ├── 05-redis-prometheus-rules.yaml ├── 06-mysql-cluster.yaml ├── 07-mysql-rules.yaml └── 08-mysql-grafana-dashboard.yaml └── terraform-setup ├── data.tf ├── lb.tf ├── main.tf ├── output.tf ├── provider.tf └── variables.tf /.gitignore: -------------------------------------------------------------------------------- 1 | *.tfvars 2 | *.tfstate 3 | *.tfstate.* 4 | .terraform 5 | kubeconfig* -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | 3 | export KUBECONFIG=kubeconfig 4 | 5 | destroy: 6 | cd terraform-setup && terraform destroy -auto-approve && rm terraform.tfstate terraform.tfstate.backup 7 | 8 | install: step_01 step_02 step_03 9 | 10 | step_01: 11 | echo "Creating infrastructure" 12 | cd terraform-setup && terraform init && terraform apply -auto-approve 13 | sleep 30 14 | 15 | step_02: 16 | echo "Creating k3s cluster on ubuntu vms 0,1,2" 17 | source get_env.sh && k3sup install \ 18 | --ip $$IP0 \ 19 | --user ubuntu \ 20 | --cluster \ 21 | --k3s-channel latest 22 | source get_env.sh && k3sup join \ 23 | --ip $$IP1 \ 24 | --user ubuntu \ 25 | --server-user ubuntu \ 26 | --server-ip $$IP0 \ 27 | --server \ 28 | --k3s-channel latest 29 | source get_env.sh && k3sup join \ 30 | --ip $$IP2 \ 31 | --user ubuntu \ 32 | --server-user ubuntu \ 33 | --server-ip $$IP0 \ 34 | --server \ 35 | --k3s-channel latest 36 | sleep 20 37 | 38 | step_03: 39 | echo "Installing cert-manager and Rancher" 40 | helm repo update 41 | helm upgrade --install \ 42 | cert-manager jetstack/cert-manager \ 43 | --namespace cert-manager \ 44 | --version v1.0.3 --create-namespace --set installCRDs=true 45 | kubectl rollout status deployment -n cert-manager cert-manager 46 | kubectl rollout status deployment -n cert-manager cert-manager-webhook 47 | helm upgrade --install rancher rancher-latest/rancher \ 48 | --namespace cattle-system \ 49 | --version 2.5.1 \ 50 | --set hostname=rancher-demo.plgrnd.be --create-namespace 51 | kubectl rollout status deployment -n cattle-system rancher 52 | kubectl -n cattle-system wait --for=condition=ready certificate/tls-rancher-ingress 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code examples for Master Class: Monitoring and alerting in Rancher 2.5 2 | 3 | ## Installation steps 4 | 5 | Fill out `terraform-setup/terraform.tfvars` with aws and digital ocean credentials. 6 | 7 | Create infrastructure and install Rancher 8 | 9 | ``` 10 | make install 11 | ``` 12 | 13 | ## Configure Rancher 14 | 15 | Go to https://rancher-demo.plgrnd.be/login and set up admin password and server url. 16 | 17 | ## Install rancher-monitoring 18 | 19 | * In the local cluster, go to Cluster Explorer -> Apps & Marketplace. 20 | * Install the rancher-monitoring app with the default settings 21 | 22 | ! NOTE: On Rancher <2.5.2, install from dev-v2.5 branch, enable k3s in yaml, increase memory limit to 3500Mi 23 | 24 | ## Explore rancher-monitoring 25 | 26 | * See the helm install output 27 | * Have a look at the installed workloads in `cattle-monitoring-system`. 28 | * Have a look at Prometheus 29 | * Scraping targets 30 | * Built-in alerting rules 31 | * Have a look at Grafana 32 | * Built-in dashboards 33 | * Logging into Grafana as admin 34 | 35 | ## Deploy your own workload and scrape it 36 | 37 | * Deploy shop 38 | 39 | ``` 40 | kubectl -n default apply -f scrape-custom-service/01-demo-shop.yaml 41 | ``` 42 | 43 | * Have a look at shop 44 | 45 | * Add prometheus-exporter to redis deployment 46 | 47 | ``` 48 | kubectl -n default apply -f scrape-custom-service/02-redis-prometheus-exporter.yaml 49 | ``` 50 | 51 | * Add ServiceMonitor for redis deployment 52 | 53 | ``` 54 | kubectl -n default apply -f scrape-custom-service/03-redis-servicemonitor.yaml 55 | ``` 56 | 57 | * See that Prometheus starts scraping Redis 58 | 59 | * Add Redis Grafana dashboard 60 | 61 | ``` 62 | kubectl apply -f scrape-custom-service/04-redis-grafana-dashboard.yaml 63 | ``` 64 | 65 | * Add Redis PrometheusRule 66 | 67 | ``` 68 | kubectl -n default apply -f scrape-custom-service/05-redis-prometheus-rules.yaml 69 | ``` 70 | 71 | * Configure alertmanager 72 | 73 | * Force alert 74 | 75 | ``` 76 | kubectl -n default apply -f scrape-custom-service/05-redis-prometheus-rules-force-alert.yaml``` 77 | ``` 78 | * See that alert fires 79 | 80 | * Reset 81 | 82 | ``` 83 | kubectl -n default apply -f scrape-custom-service/05-redis-prometheus-rules.yaml 84 | ``` 85 | 86 | ## More and more helm charts already include ServiceMonitors 87 | 88 | ``` 89 | helm repo add presslabs https://presslabs.github.io/charts 90 | helm upgrade --install mysql-operator presslabs/mysql-operator --namespace mysql-operator --set serviceMonitor.enabled=true --create-namespace 91 | ``` 92 | 93 | Add db 94 | 95 | ``` 96 | kubectl apply -f scrape-custom-service/06-mysql-cluster.yaml 97 | ``` 98 | 99 | Add rules and dashboard 100 | 101 | ``` 102 | kubectl apply -f scrape-custom-service/07-mysql-rules.yaml 103 | kubectl apply -f scrape-custom-service/08-mysql-grafana-dashboard.yaml 104 | ``` 105 | 106 | ## Logging 107 | 108 | * Install rancher-logging 109 | 110 | * Install loki 111 | 112 | ``` 113 | helm upgrade --install loki loki/loki --namespace loki -f logging/loki-values.yaml --create-namespace 114 | ``` 115 | 116 | * Add grafana datasource 117 | 118 | ``` 119 | kubectl apply -f logging/datasource.yaml 120 | kubectl rollout restart deployment -n cattle-monitoring-system rancher-monitoring-grafana 121 | ``` 122 | 123 | * Add ClusterFlow and Output 124 | 125 | ``` 126 | kubectl apply -f logging/logging-cluster-flow.yaml 127 | ``` 128 | 129 | Wait a bit and show logs in Grafana Explorer for `{namespace="default"}` 130 | 131 | ## HPA 132 | 133 | * Deploy sample app 134 | 135 | ``` 136 | kubectl -n default apply -f custom-metrics-hpa/deployment.yaml 137 | ``` 138 | 139 | Deploy HPA 140 | 141 | ``` 142 | kubectl -n default apply -f custom-metrics-hpa/hpa.yaml 143 | ``` 144 | 145 | Create load at https://sample-app.plgrnd.be/ 146 | 147 | ``` 148 | watch kubectl describe hpa -n default 149 | watch kubectl get pods -n default 150 | ``` 151 | 152 | ## V1 to V2 migration 153 | 154 | * Notifiers 155 | * Dashboards 156 | * Non Prometheus Query Alerts 157 | * Prometheus Query Alerts 158 | 159 | 160 | ``` 161 | apiVersion: monitoring.coreos.com/v1 162 | kind: PrometheusRule 163 | metadata: 164 | name: custom-rules 165 | namespace: default 166 | spec: 167 | groups: 168 | - name: custom.rules 169 | rules: 170 | - alert: Deployment with unavailable replicas 171 | expr: kube_deployment_status_replicas_unavailable > 0 172 | for: 5m 173 | labels: 174 | severity: critical 175 | annotations: 176 | summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has {{ $value }} unavailable replicas" 177 | ``` 178 | 179 | ## Istio 180 | 181 | ``` 182 | kubectl label namespace default istio-injection=enabled 183 | ``` 184 | 185 | Redeploy everything -------------------------------------------------------------------------------- /alertmanager-config/.gitignore: -------------------------------------------------------------------------------- 1 | config.yaml -------------------------------------------------------------------------------- /alertmanager-config/config.yaml.template: -------------------------------------------------------------------------------- 1 | global: 2 | resolve_timeout: 5m 3 | receivers: 4 | - name: "null" 5 | - name: slack 6 | slack_configs: 7 | - api_url: 8 | route: 9 | group_by: 10 | - job 11 | group_interval: 5m 12 | group_wait: 30s 13 | receiver: slack 14 | repeat_interval: 12h 15 | routes: 16 | - match: 17 | alertname: Watchdog 18 | receiver: "null" 19 | templates: 20 | - /etc/alertmanager/config/*.tmpl -------------------------------------------------------------------------------- /custom-metrics-hpa/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: sample-app 5 | labels: 6 | app: sample-app 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: sample-app 12 | template: 13 | metadata: 14 | labels: 15 | app: sample-app 16 | spec: 17 | containers: 18 | - image: luxas/autoscale-demo:v0.1.2 19 | name: metrics-provider 20 | ports: 21 | - name: http 22 | containerPort: 8080 23 | --- 24 | apiVersion: v1 25 | kind: Service 26 | metadata: 27 | name: sample-app 28 | labels: 29 | app: sample-app 30 | spec: 31 | ports: 32 | - port: 80 33 | targetPort: 8080 34 | name: http 35 | selector: 36 | app: sample-app 37 | --- 38 | kind: Ingress 39 | apiVersion: networking.k8s.io/v1beta1 40 | metadata: 41 | name: sample-app 42 | spec: 43 | rules: 44 | - host: sample-app.plgrnd.be 45 | http: 46 | paths: 47 | - path: / 48 | backend: 49 | serviceName: sample-app 50 | servicePort: 80 51 | --- 52 | kind: ServiceMonitor 53 | apiVersion: monitoring.coreos.com/v1 54 | metadata: 55 | name: sample-app 56 | labels: 57 | app: sample-app 58 | spec: 59 | selector: 60 | matchLabels: 61 | app: sample-app 62 | endpoints: 63 | - port: http -------------------------------------------------------------------------------- /custom-metrics-hpa/hpa.yaml: -------------------------------------------------------------------------------- 1 | kind: HorizontalPodAutoscaler 2 | apiVersion: autoscaling/v2beta1 3 | metadata: 4 | name: sample-app 5 | spec: 6 | scaleTargetRef: 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | name: sample-app 10 | minReplicas: 1 11 | maxReplicas: 10 12 | metrics: 13 | - type: Pods 14 | pods: 15 | metricName: http_requests 16 | # target 500 milli-requests per second, 17 | # which is 1 request every two seconds 18 | targetAverageValue: 500m -------------------------------------------------------------------------------- /get_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | $(terraform output -state=terraform-setup/terraform.tfstate -json all_node_ips | jq -r 'keys[] as $k | "export IP\($k)=\(.[$k])"') 4 | -------------------------------------------------------------------------------- /logging/datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: loki-datasource 5 | namespace: cattle-monitoring-system 6 | labels: 7 | grafana_datasource: "1" 8 | data: 9 | loki-stack-datasource.yaml: |- 10 | apiVersion: 1 11 | datasources: 12 | - name: Loki 13 | type: loki 14 | access: proxy 15 | url: http://loki.loki:3100 16 | version: 1 17 | -------------------------------------------------------------------------------- /logging/logging-cluster-flow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: logging.banzaicloud.io/v1beta1 2 | kind: ClusterOutput 3 | metadata: 4 | name: loki 5 | namespace: cattle-logging-system 6 | spec: 7 | loki: 8 | url: http://loki.loki:3100 9 | configure_kubernetes_labels: true 10 | --- 11 | apiVersion: logging.banzaicloud.io/v1beta1 12 | kind: ClusterFlow 13 | metadata: 14 | name: all-logs-to-loki 15 | namespace: cattle-logging-system 16 | spec: 17 | globalOutputRefs: 18 | - loki 19 | -------------------------------------------------------------------------------- /logging/loki-values.yaml: -------------------------------------------------------------------------------- 1 | persistence: 2 | enabled: true 3 | accessModes: 4 | - ReadWriteOnce 5 | size: 10Gi 6 | storageClassName: local-path 7 | serviceMonitor: 8 | enabled: true 9 | resources: 10 | limits: 11 | cpu: 400m 12 | memory: 1024Mi 13 | requests: 14 | cpu: 100m 15 | memory: 512Mi 16 | 17 | -------------------------------------------------------------------------------- /longhorn-monitoring/dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | longhorn.json: |- 4 | { 5 | "__inputs": [ 6 | ], 7 | "__requires": [ 8 | { 9 | "type": "grafana", 10 | "id": "grafana", 11 | "name": "Grafana", 12 | "version": "7.1.5" 13 | }, 14 | { 15 | "type": "panel", 16 | "id": "graph", 17 | "name": "Graph", 18 | "version": "" 19 | }, 20 | { 21 | "type": "datasource", 22 | "id": "prometheus", 23 | "name": "Prometheus", 24 | "version": "1.0.0" 25 | }, 26 | { 27 | "type": "panel", 28 | "id": "stat", 29 | "name": "Stat", 30 | "version": "" 31 | }, 32 | { 33 | "type": "panel", 34 | "id": "table", 35 | "name": "Table", 36 | "version": "" 37 | }, 38 | { 39 | "type": "panel", 40 | "id": "text", 41 | "name": "Text", 42 | "version": "7.1.0" 43 | } 44 | ], 45 | "annotations": { 46 | "list": [ 47 | { 48 | "builtIn": 1, 49 | "datasource": "-- Grafana --", 50 | "enable": true, 51 | "hide": true, 52 | "iconColor": "rgba(0, 211, 255, 1)", 53 | "name": "Annotations & Alerts", 54 | "type": "dashboard" 55 | } 56 | ] 57 | }, 58 | "editable": true, 59 | "gnetId": 13032, 60 | "graphTooltip": 0, 61 | "id": null, 62 | "links": [], 63 | "panels": [ 64 | { 65 | "datasource": null, 66 | "fieldConfig": { 67 | "defaults": { 68 | "custom": {} 69 | }, 70 | "overrides": [] 71 | }, 72 | "gridPos": { 73 | "h": 3, 74 | "w": 24, 75 | "x": 0, 76 | "y": 0 77 | }, 78 | "id": 4, 79 | "options": { 80 | "content": "
\nVOLUMES\n
", 81 | "mode": "html" 82 | }, 83 | "pluginVersion": "7.1.0", 84 | "timeFrom": null, 85 | "timeShift": null, 86 | "title": "", 87 | "transparent": true, 88 | "type": "text" 89 | }, 90 | { 91 | "datasource": "Prometheus", 92 | "description": "The total number of volumes in the Longhorn storage system", 93 | "fieldConfig": { 94 | "defaults": { 95 | "custom": {}, 96 | "mappings": [], 97 | "thresholds": { 98 | "mode": "absolute", 99 | "steps": [ 100 | { 101 | "color": "green", 102 | "value": null 103 | } 104 | ] 105 | } 106 | }, 107 | "overrides": [] 108 | }, 109 | "gridPos": { 110 | "h": 6, 111 | "w": 4, 112 | "x": 0, 113 | "y": 3 114 | }, 115 | "id": 8, 116 | "options": { 117 | "colorMode": "value", 118 | "graphMode": "area", 119 | "justifyMode": "auto", 120 | "orientation": "auto", 121 | "reduceOptions": { 122 | "calcs": [ 123 | "last" 124 | ], 125 | "fields": "", 126 | "values": false 127 | }, 128 | "textMode": "auto" 129 | }, 130 | "pluginVersion": "7.1.5", 131 | "targets": [ 132 | { 133 | "expr": "count(longhorn_volume_capacity_bytes) OR on() vector(0)", 134 | "interval": "", 135 | "legendFormat": "", 136 | "refId": "A" 137 | } 138 | ], 139 | "timeFrom": null, 140 | "timeShift": null, 141 | "title": "Total Number Of Volumes", 142 | "type": "stat" 143 | }, 144 | { 145 | "datasource": "Prometheus", 146 | "description": "Healthy volumes are volumes that are attaching to a node and have the number of healthy replicas equals to the expected number of replicas.", 147 | "fieldConfig": { 148 | "defaults": { 149 | "custom": {}, 150 | "mappings": [], 151 | "thresholds": { 152 | "mode": "absolute", 153 | "steps": [ 154 | { 155 | "color": "red", 156 | "value": null 157 | }, 158 | { 159 | "color": "green", 160 | "value": 1 161 | } 162 | ] 163 | } 164 | }, 165 | "overrides": [] 166 | }, 167 | "gridPos": { 168 | "h": 6, 169 | "w": 4, 170 | "x": 4, 171 | "y": 3 172 | }, 173 | "id": 13, 174 | "options": { 175 | "colorMode": "value", 176 | "graphMode": "area", 177 | "justifyMode": "auto", 178 | "orientation": "auto", 179 | "reduceOptions": { 180 | "calcs": [ 181 | "last" 182 | ], 183 | "fields": "", 184 | "values": false 185 | }, 186 | "textMode": "auto" 187 | }, 188 | "pluginVersion": "7.1.5", 189 | "targets": [ 190 | { 191 | "expr": "count(longhorn_volume_robustness==1) OR on() vector(0)", 192 | "interval": "", 193 | "legendFormat": "", 194 | "refId": "A" 195 | } 196 | ], 197 | "timeFrom": null, 198 | "timeShift": null, 199 | "title": "Number Of Healthy Volumes", 200 | "type": "stat" 201 | }, 202 | { 203 | "aliasColors": {}, 204 | "bars": false, 205 | "dashLength": 10, 206 | "dashes": false, 207 | "datasource": "Prometheus", 208 | "description": "Note that Longhorn volume actual size is not the size of the filesystem inside a Longhorn volume. See more at : https://longhorn.io/docs/1.0.2/volumes-and-nodes/volume-size/#volume-actual-size", 209 | "fieldConfig": { 210 | "defaults": { 211 | "custom": {} 212 | }, 213 | "overrides": [] 214 | }, 215 | "fill": 1, 216 | "fillGradient": 0, 217 | "gridPos": { 218 | "h": 9, 219 | "w": 15, 220 | "x": 9, 221 | "y": 3 222 | }, 223 | "hiddenSeries": false, 224 | "id": 12, 225 | "legend": { 226 | "alignAsTable": true, 227 | "avg": false, 228 | "current": true, 229 | "max": false, 230 | "min": false, 231 | "rightSide": true, 232 | "show": true, 233 | "total": false, 234 | "values": true 235 | }, 236 | "lines": true, 237 | "linewidth": 1, 238 | "nullPointMode": "null", 239 | "percentage": false, 240 | "pluginVersion": "7.1.5", 241 | "pointradius": 2, 242 | "points": false, 243 | "renderer": "flot", 244 | "seriesOverrides": [], 245 | "spaceLength": 10, 246 | "stack": false, 247 | "steppedLine": false, 248 | "targets": [ 249 | { 250 | "expr": "( (avg by (volume) (longhorn_volume_actual_size_bytes))/ (avg by (volume) (longhorn_volume_capacity_bytes)) ) *100", 251 | "interval": "", 252 | "legendFormat": "{{volume}}", 253 | "refId": "A" 254 | } 255 | ], 256 | "thresholds": [ 257 | { 258 | "$$hashKey": "object:649", 259 | "colorMode": "critical", 260 | "fill": true, 261 | "line": true, 262 | "op": "gt", 263 | "value": 90, 264 | "yaxis": "left" 265 | } 266 | ], 267 | "timeFrom": null, 268 | "timeRegions": [], 269 | "timeShift": null, 270 | "title": "Volume Actual Size/Capacity", 271 | "tooltip": { 272 | "shared": true, 273 | "sort": 0, 274 | "value_type": "individual" 275 | }, 276 | "type": "graph", 277 | "xaxis": { 278 | "buckets": null, 279 | "mode": "time", 280 | "name": null, 281 | "show": true, 282 | "values": [] 283 | }, 284 | "yaxes": [ 285 | { 286 | "$$hashKey": "object:476", 287 | "format": "percent", 288 | "label": null, 289 | "logBase": 1, 290 | "max": null, 291 | "min": null, 292 | "show": true 293 | }, 294 | { 295 | "$$hashKey": "object:477", 296 | "format": "short", 297 | "label": null, 298 | "logBase": 1, 299 | "max": null, 300 | "min": null, 301 | "show": true 302 | } 303 | ], 304 | "yaxis": { 305 | "align": false, 306 | "alignLevel": null 307 | } 308 | }, 309 | { 310 | "datasource": "Prometheus", 311 | "description": "Degraded volumes are volumes that have the number of healthy replicas smaller than the expected number of replicas. e.g. User creates a volume with 2 replicas but 1 replicas is failed.", 312 | "fieldConfig": { 313 | "defaults": { 314 | "custom": {}, 315 | "mappings": [], 316 | "thresholds": { 317 | "mode": "absolute", 318 | "steps": [ 319 | { 320 | "color": "green", 321 | "value": null 322 | }, 323 | { 324 | "color": "orange", 325 | "value": 1 326 | } 327 | ] 328 | } 329 | }, 330 | "overrides": [] 331 | }, 332 | "gridPos": { 333 | "h": 6, 334 | "w": 4, 335 | "x": 0, 336 | "y": 9 337 | }, 338 | "id": 15, 339 | "options": { 340 | "colorMode": "value", 341 | "graphMode": "area", 342 | "justifyMode": "auto", 343 | "orientation": "auto", 344 | "reduceOptions": { 345 | "calcs": [ 346 | "last" 347 | ], 348 | "fields": "", 349 | "values": false 350 | }, 351 | "textMode": "auto" 352 | }, 353 | "pluginVersion": "7.1.5", 354 | "targets": [ 355 | { 356 | "expr": "count(longhorn_volume_robustness==2) OR on() vector(0)", 357 | "interval": "", 358 | "legendFormat": "", 359 | "refId": "A" 360 | } 361 | ], 362 | "timeFrom": null, 363 | "timeShift": null, 364 | "title": "Number Of Degraded Volumes", 365 | "type": "stat" 366 | }, 367 | { 368 | "datasource": "Prometheus", 369 | "description": "Fault volumes are volumes that doesn't have any healthy replica.", 370 | "fieldConfig": { 371 | "defaults": { 372 | "custom": {}, 373 | "mappings": [], 374 | "thresholds": { 375 | "mode": "absolute", 376 | "steps": [ 377 | { 378 | "color": "green", 379 | "value": null 380 | }, 381 | { 382 | "color": "red", 383 | "value": 1 384 | } 385 | ] 386 | } 387 | }, 388 | "overrides": [] 389 | }, 390 | "gridPos": { 391 | "h": 6, 392 | "w": 4, 393 | "x": 4, 394 | "y": 9 395 | }, 396 | "id": 16, 397 | "options": { 398 | "colorMode": "value", 399 | "graphMode": "area", 400 | "justifyMode": "auto", 401 | "orientation": "auto", 402 | "reduceOptions": { 403 | "calcs": [ 404 | "last" 405 | ], 406 | "fields": "", 407 | "values": false 408 | }, 409 | "textMode": "auto" 410 | }, 411 | "pluginVersion": "7.1.5", 412 | "targets": [ 413 | { 414 | "expr": "count(longhorn_volume_robustness==3) OR on() vector(0)", 415 | "interval": "", 416 | "legendFormat": "", 417 | "refId": "A" 418 | } 419 | ], 420 | "timeFrom": null, 421 | "timeShift": null, 422 | "title": "Number Of Fault Volumes", 423 | "type": "stat" 424 | }, 425 | { 426 | "datasource": "Prometheus", 427 | "description": "The capacity of each Longhorn volume", 428 | "fieldConfig": { 429 | "defaults": { 430 | "custom": { 431 | "align": null, 432 | "displayMode": "auto" 433 | }, 434 | "decimals": 1, 435 | "mappings": [ 436 | { 437 | "from": "", 438 | "id": 0, 439 | "text": "", 440 | "to": "", 441 | "type": 1, 442 | "value": "" 443 | } 444 | ], 445 | "min": 0, 446 | "thresholds": { 447 | "mode": "absolute", 448 | "steps": [ 449 | { 450 | "color": "green", 451 | "value": null 452 | } 453 | ] 454 | }, 455 | "unit": "bytes" 456 | }, 457 | "overrides": [ 458 | { 459 | "matcher": { 460 | "id": "byName", 461 | "options": "volume" 462 | }, 463 | "properties": [ 464 | { 465 | "id": "custom.displayMode", 466 | "value": "auto" 467 | } 468 | ] 469 | }, 470 | { 471 | "matcher": { 472 | "id": "byName", 473 | "options": "Capacity" 474 | }, 475 | "properties": [ 476 | { 477 | "id": "custom.displayMode", 478 | "value": "lcd-gauge" 479 | } 480 | ] 481 | } 482 | ] 483 | }, 484 | "gridPos": { 485 | "h": 9, 486 | "w": 15, 487 | "x": 9, 488 | "y": 12 489 | }, 490 | "id": 10, 491 | "options": { 492 | "frameIndex": 4, 493 | "showHeader": true, 494 | "sortBy": [ 495 | { 496 | "desc": true, 497 | "displayName": "Capacity" 498 | } 499 | ] 500 | }, 501 | "pluginVersion": "7.1.5", 502 | "targets": [ 503 | { 504 | "expr": "avg by (volume) (longhorn_volume_capacity_bytes)", 505 | "format": "table", 506 | "instant": true, 507 | "interval": "", 508 | "intervalFactor": 1, 509 | "legendFormat": "{{volume}}", 510 | "refId": "A" 511 | } 512 | ], 513 | "timeFrom": null, 514 | "timeShift": null, 515 | "title": "Volume Capacity", 516 | "transformations": [ 517 | { 518 | "id": "organize", 519 | "options": { 520 | "excludeByName": { 521 | "Time": true 522 | }, 523 | "indexByName": {}, 524 | "renameByName": { 525 | "Value": "Capacity" 526 | } 527 | } 528 | } 529 | ], 530 | "type": "table" 531 | }, 532 | { 533 | "datasource": "Prometheus", 534 | "description": "Attached volumes are volumes that are currently attaching to a node", 535 | "fieldConfig": { 536 | "defaults": { 537 | "custom": {}, 538 | "mappings": [], 539 | "thresholds": { 540 | "mode": "absolute", 541 | "steps": [ 542 | { 543 | "color": "red", 544 | "value": null 545 | }, 546 | { 547 | "color": "green", 548 | "value": 1 549 | } 550 | ] 551 | } 552 | }, 553 | "overrides": [] 554 | }, 555 | "gridPos": { 556 | "h": 6, 557 | "w": 4, 558 | "x": 0, 559 | "y": 15 560 | }, 561 | "id": 34, 562 | "options": { 563 | "colorMode": "value", 564 | "graphMode": "area", 565 | "justifyMode": "auto", 566 | "orientation": "auto", 567 | "reduceOptions": { 568 | "calcs": [ 569 | "last" 570 | ], 571 | "fields": "", 572 | "values": false 573 | }, 574 | "textMode": "auto" 575 | }, 576 | "pluginVersion": "7.1.5", 577 | "targets": [ 578 | { 579 | "expr": "count(longhorn_volume_state==2) OR on() vector(0)", 580 | "interval": "", 581 | "legendFormat": "", 582 | "refId": "A" 583 | } 584 | ], 585 | "timeFrom": null, 586 | "timeShift": null, 587 | "title": "Number Of Attached Volumes", 588 | "type": "stat" 589 | }, 590 | { 591 | "datasource": "Prometheus", 592 | "description": "Detached volumes are volumes that aren't currently attaching to a node", 593 | "fieldConfig": { 594 | "defaults": { 595 | "custom": {}, 596 | "mappings": [], 597 | "thresholds": { 598 | "mode": "absolute", 599 | "steps": [ 600 | { 601 | "color": "green", 602 | "value": null 603 | }, 604 | { 605 | "color": "yellow", 606 | "value": 1 607 | } 608 | ] 609 | } 610 | }, 611 | "overrides": [] 612 | }, 613 | "gridPos": { 614 | "h": 6, 615 | "w": 4, 616 | "x": 4, 617 | "y": 15 618 | }, 619 | "id": 14, 620 | "options": { 621 | "colorMode": "value", 622 | "graphMode": "area", 623 | "justifyMode": "auto", 624 | "orientation": "auto", 625 | "reduceOptions": { 626 | "calcs": [ 627 | "last" 628 | ], 629 | "fields": "", 630 | "values": false 631 | }, 632 | "textMode": "auto" 633 | }, 634 | "pluginVersion": "7.1.5", 635 | "targets": [ 636 | { 637 | "expr": "count(longhorn_volume_state==3) OR on() vector(0)", 638 | "interval": "", 639 | "legendFormat": "", 640 | "refId": "A" 641 | } 642 | ], 643 | "timeFrom": null, 644 | "timeShift": null, 645 | "title": "Number Of Detached Volumes", 646 | "type": "stat" 647 | }, 648 | { 649 | "datasource": null, 650 | "fieldConfig": { 651 | "defaults": { 652 | "custom": {} 653 | }, 654 | "overrides": [] 655 | }, 656 | "gridPos": { 657 | "h": 3, 658 | "w": 24, 659 | "x": 0, 660 | "y": 21 661 | }, 662 | "id": 6, 663 | "options": { 664 | "content": "
\nNODES\n
", 665 | "mode": "html" 666 | }, 667 | "pluginVersion": "7.1.0", 668 | "timeFrom": null, 669 | "timeShift": null, 670 | "title": "", 671 | "transparent": true, 672 | "type": "text" 673 | }, 674 | { 675 | "datasource": "Prometheus", 676 | "description": "The total number of nodes in the Longhorn storage system", 677 | "fieldConfig": { 678 | "defaults": { 679 | "custom": {}, 680 | "mappings": [], 681 | "thresholds": { 682 | "mode": "absolute", 683 | "steps": [ 684 | { 685 | "color": "green", 686 | "value": null 687 | } 688 | ] 689 | } 690 | }, 691 | "overrides": [] 692 | }, 693 | "gridPos": { 694 | "h": 6, 695 | "w": 4, 696 | "x": 0, 697 | "y": 24 698 | }, 699 | "id": 18, 700 | "options": { 701 | "colorMode": "value", 702 | "graphMode": "area", 703 | "justifyMode": "auto", 704 | "orientation": "auto", 705 | "reduceOptions": { 706 | "calcs": [ 707 | "last" 708 | ], 709 | "fields": "", 710 | "values": false 711 | }, 712 | "textMode": "auto" 713 | }, 714 | "pluginVersion": "7.1.5", 715 | "targets": [ 716 | { 717 | "expr": "avg(longhorn_node_count_total) OR on() vector(0)", 718 | "interval": "", 719 | "legendFormat": "", 720 | "refId": "A" 721 | } 722 | ], 723 | "timeFrom": null, 724 | "timeShift": null, 725 | "title": "Total Number Of Nodes", 726 | "type": "stat" 727 | }, 728 | { 729 | "datasource": "Prometheus", 730 | "description": "Disabled nodes are nodes that are disabled by the user. When users disable a node, Longhorn will not use the node's storage for replica scheduling. Note that Longhorn can still attach a volume to disabled nodes because the actual data of the volume could be on a different node.", 731 | "fieldConfig": { 732 | "defaults": { 733 | "custom": {}, 734 | "mappings": [], 735 | "thresholds": { 736 | "mode": "absolute", 737 | "steps": [ 738 | { 739 | "color": "green", 740 | "value": null 741 | }, 742 | { 743 | "color": "yellow", 744 | "value": 1 745 | } 746 | ] 747 | } 748 | }, 749 | "overrides": [] 750 | }, 751 | "gridPos": { 752 | "h": 6, 753 | "w": 4, 754 | "x": 4, 755 | "y": 24 756 | }, 757 | "id": 21, 758 | "options": { 759 | "colorMode": "value", 760 | "graphMode": "area", 761 | "justifyMode": "auto", 762 | "orientation": "auto", 763 | "reduceOptions": { 764 | "calcs": [ 765 | "last" 766 | ], 767 | "fields": "", 768 | "values": false 769 | }, 770 | "textMode": "auto" 771 | }, 772 | "pluginVersion": "7.1.5", 773 | "targets": [ 774 | { 775 | "expr": "count(longhorn_node_status{condition=\"allowScheduling\"}==0) OR on() vector(0)", 776 | "interval": "", 777 | "legendFormat": "", 778 | "refId": "A" 779 | } 780 | ], 781 | "timeFrom": null, 782 | "timeShift": null, 783 | "title": "Number Of Disabled Nodes", 784 | "type": "stat" 785 | }, 786 | { 787 | "aliasColors": {}, 788 | "bars": false, 789 | "dashLength": 10, 790 | "dashes": false, 791 | "datasource": "Prometheus", 792 | "description": "", 793 | "fieldConfig": { 794 | "defaults": { 795 | "custom": {} 796 | }, 797 | "overrides": [] 798 | }, 799 | "fill": 1, 800 | "fillGradient": 0, 801 | "gridPos": { 802 | "h": 10, 803 | "w": 15, 804 | "x": 9, 805 | "y": 24 806 | }, 807 | "hiddenSeries": false, 808 | "id": 24, 809 | "legend": { 810 | "alignAsTable": true, 811 | "avg": false, 812 | "current": true, 813 | "max": false, 814 | "min": false, 815 | "rightSide": true, 816 | "show": true, 817 | "total": false, 818 | "values": true 819 | }, 820 | "lines": true, 821 | "linewidth": 1, 822 | "nullPointMode": "null", 823 | "percentage": false, 824 | "pluginVersion": "7.1.5", 825 | "pointradius": 2, 826 | "points": false, 827 | "renderer": "flot", 828 | "seriesOverrides": [], 829 | "spaceLength": 10, 830 | "stack": false, 831 | "steppedLine": false, 832 | "targets": [ 833 | { 834 | "expr": "(longhorn_node_storage_usage_bytes/longhorn_node_storage_capacity_bytes) * 100", 835 | "interval": "", 836 | "legendFormat": "{{node}}", 837 | "refId": "A" 838 | } 839 | ], 840 | "thresholds": [], 841 | "timeFrom": null, 842 | "timeRegions": [], 843 | "timeShift": null, 844 | "title": "Node Storage Usage/Capacity", 845 | "tooltip": { 846 | "shared": true, 847 | "sort": 0, 848 | "value_type": "individual" 849 | }, 850 | "type": "graph", 851 | "xaxis": { 852 | "buckets": null, 853 | "mode": "time", 854 | "name": null, 855 | "show": true, 856 | "values": [] 857 | }, 858 | "yaxes": [ 859 | { 860 | "$$hashKey": "object:530", 861 | "format": "percent", 862 | "label": null, 863 | "logBase": 1, 864 | "max": null, 865 | "min": null, 866 | "show": true 867 | }, 868 | { 869 | "$$hashKey": "object:531", 870 | "format": "short", 871 | "label": null, 872 | "logBase": 1, 873 | "max": null, 874 | "min": null, 875 | "show": true 876 | } 877 | ], 878 | "yaxis": { 879 | "align": false, 880 | "alignLevel": null 881 | } 882 | }, 883 | { 884 | "datasource": "Prometheus", 885 | "description": "Schedulable nodes are nodes that Longhorn can use their storage for replica scheduling.", 886 | "fieldConfig": { 887 | "defaults": { 888 | "custom": {}, 889 | "mappings": [], 890 | "thresholds": { 891 | "mode": "absolute", 892 | "steps": [ 893 | { 894 | "color": "red", 895 | "value": null 896 | }, 897 | { 898 | "color": "green", 899 | "value": 1 900 | } 901 | ] 902 | } 903 | }, 904 | "overrides": [] 905 | }, 906 | "gridPos": { 907 | "h": 6, 908 | "w": 4, 909 | "x": 0, 910 | "y": 30 911 | }, 912 | "id": 20, 913 | "options": { 914 | "colorMode": "value", 915 | "graphMode": "area", 916 | "justifyMode": "auto", 917 | "orientation": "auto", 918 | "reduceOptions": { 919 | "calcs": [ 920 | "last" 921 | ], 922 | "fields": "", 923 | "values": false 924 | }, 925 | "textMode": "auto" 926 | }, 927 | "pluginVersion": "7.1.5", 928 | "targets": [ 929 | { 930 | "expr": "(count(longhorn_node_status{condition=\"schedulable\"}==1) OR on() vector(0)) - (count(longhorn_node_status{condition=\"allowScheduling\"}==0) OR on() vector(0))", 931 | "interval": "", 932 | "legendFormat": "", 933 | "refId": "A" 934 | } 935 | ], 936 | "timeFrom": null, 937 | "timeShift": null, 938 | "title": "Number Of Schedulable Nodes", 939 | "type": "stat" 940 | }, 941 | { 942 | "datasource": "Prometheus", 943 | "description": "Failed Nodes are nodes that Longhorn cannot attach volumes to and cannot schedule replicas onto. e.g: when the nodes went down.", 944 | "fieldConfig": { 945 | "defaults": { 946 | "custom": {}, 947 | "mappings": [], 948 | "thresholds": { 949 | "mode": "absolute", 950 | "steps": [ 951 | { 952 | "color": "green", 953 | "value": null 954 | }, 955 | { 956 | "color": "red", 957 | "value": 1 958 | } 959 | ] 960 | } 961 | }, 962 | "overrides": [] 963 | }, 964 | "gridPos": { 965 | "h": 6, 966 | "w": 4, 967 | "x": 4, 968 | "y": 30 969 | }, 970 | "id": 22, 971 | "options": { 972 | "colorMode": "value", 973 | "graphMode": "area", 974 | "justifyMode": "auto", 975 | "orientation": "auto", 976 | "reduceOptions": { 977 | "calcs": [ 978 | "last" 979 | ], 980 | "fields": "", 981 | "values": false 982 | }, 983 | "textMode": "auto" 984 | }, 985 | "pluginVersion": "7.1.5", 986 | "targets": [ 987 | { 988 | "expr": "(avg(longhorn_node_count_total) OR on() vector(0)) - (count(longhorn_node_status{condition=\"ready\"}==1) OR on() vector(0))", 989 | "interval": "", 990 | "legendFormat": "", 991 | "refId": "A" 992 | } 993 | ], 994 | "timeFrom": null, 995 | "timeShift": null, 996 | "title": "Number Of Failed Nodes", 997 | "type": "stat" 998 | }, 999 | { 1000 | "datasource": "Prometheus", 1001 | "description": "", 1002 | "fieldConfig": { 1003 | "defaults": { 1004 | "custom": { 1005 | "align": null 1006 | }, 1007 | "decimals": 1, 1008 | "mappings": [], 1009 | "min": 0, 1010 | "thresholds": { 1011 | "mode": "absolute", 1012 | "steps": [ 1013 | { 1014 | "color": "green", 1015 | "value": null 1016 | } 1017 | ] 1018 | }, 1019 | "unit": "bytes" 1020 | }, 1021 | "overrides": [ 1022 | { 1023 | "matcher": { 1024 | "id": "byName", 1025 | "options": "Storage Capacity" 1026 | }, 1027 | "properties": [ 1028 | { 1029 | "id": "custom.displayMode", 1030 | "value": "lcd-gauge" 1031 | } 1032 | ] 1033 | } 1034 | ] 1035 | }, 1036 | "gridPos": { 1037 | "h": 10, 1038 | "w": 15, 1039 | "x": 9, 1040 | "y": 34 1041 | }, 1042 | "id": 23, 1043 | "options": { 1044 | "showHeader": true 1045 | }, 1046 | "pluginVersion": "7.1.5", 1047 | "targets": [ 1048 | { 1049 | "expr": "longhorn_node_storage_capacity_bytes", 1050 | "format": "table", 1051 | "instant": true, 1052 | "interval": "", 1053 | "legendFormat": "{{node}}", 1054 | "refId": "A" 1055 | } 1056 | ], 1057 | "timeFrom": null, 1058 | "timeShift": null, 1059 | "title": "Node Capacity", 1060 | "transformations": [ 1061 | { 1062 | "id": "organize", 1063 | "options": { 1064 | "excludeByName": { 1065 | "Time": true, 1066 | "__name__": true, 1067 | "endpoint": true, 1068 | "instance": true, 1069 | "job": true, 1070 | "namespace": true, 1071 | "pod": true, 1072 | "service": true 1073 | }, 1074 | "indexByName": {}, 1075 | "renameByName": { 1076 | "Value": "Storage Capacity" 1077 | } 1078 | } 1079 | } 1080 | ], 1081 | "type": "table" 1082 | }, 1083 | { 1084 | "aliasColors": {}, 1085 | "bars": false, 1086 | "dashLength": 10, 1087 | "dashes": false, 1088 | "datasource": "Prometheus", 1089 | "decimals": 0, 1090 | "fieldConfig": { 1091 | "defaults": { 1092 | "custom": {} 1093 | }, 1094 | "overrides": [] 1095 | }, 1096 | "fill": 1, 1097 | "fillGradient": 0, 1098 | "gridPos": { 1099 | "h": 8, 1100 | "w": 8, 1101 | "x": 0, 1102 | "y": 36 1103 | }, 1104 | "hiddenSeries": false, 1105 | "id": 26, 1106 | "legend": { 1107 | "avg": false, 1108 | "current": false, 1109 | "max": false, 1110 | "min": false, 1111 | "show": true, 1112 | "total": false, 1113 | "values": false 1114 | }, 1115 | "lines": true, 1116 | "linewidth": 1, 1117 | "nullPointMode": "null", 1118 | "percentage": false, 1119 | "pluginVersion": "7.1.5", 1120 | "pointradius": 2, 1121 | "points": false, 1122 | "renderer": "flot", 1123 | "seriesOverrides": [], 1124 | "spaceLength": 10, 1125 | "stack": false, 1126 | "steppedLine": false, 1127 | "targets": [ 1128 | { 1129 | "expr": "count by (node) (longhorn_volume_state==2)", 1130 | "interval": "", 1131 | "legendFormat": "{{node}}", 1132 | "refId": "A" 1133 | } 1134 | ], 1135 | "thresholds": [], 1136 | "timeFrom": null, 1137 | "timeRegions": [], 1138 | "timeShift": null, 1139 | "title": "Number of Volumes Per Node", 1140 | "tooltip": { 1141 | "shared": true, 1142 | "sort": 0, 1143 | "value_type": "individual" 1144 | }, 1145 | "type": "graph", 1146 | "xaxis": { 1147 | "buckets": null, 1148 | "mode": "time", 1149 | "name": null, 1150 | "show": true, 1151 | "values": [] 1152 | }, 1153 | "yaxes": [ 1154 | { 1155 | "$$hashKey": "object:422", 1156 | "decimals": 0, 1157 | "format": "short", 1158 | "label": null, 1159 | "logBase": 1, 1160 | "max": null, 1161 | "min": null, 1162 | "show": true 1163 | }, 1164 | { 1165 | "$$hashKey": "object:423", 1166 | "format": "short", 1167 | "label": null, 1168 | "logBase": 1, 1169 | "max": null, 1170 | "min": null, 1171 | "show": true 1172 | } 1173 | ], 1174 | "yaxis": { 1175 | "align": false, 1176 | "alignLevel": null 1177 | } 1178 | }, 1179 | { 1180 | "datasource": null, 1181 | "fieldConfig": { 1182 | "defaults": { 1183 | "custom": {} 1184 | }, 1185 | "overrides": [] 1186 | }, 1187 | "gridPos": { 1188 | "h": 3, 1189 | "w": 24, 1190 | "x": 0, 1191 | "y": 44 1192 | }, 1193 | "id": 17, 1194 | "options": { 1195 | "content": "
\nDISKS\n
", 1196 | "mode": "html" 1197 | }, 1198 | "pluginVersion": "7.1.0", 1199 | "timeFrom": null, 1200 | "timeShift": null, 1201 | "title": "", 1202 | "transparent": true, 1203 | "type": "text" 1204 | }, 1205 | { 1206 | "aliasColors": {}, 1207 | "bars": false, 1208 | "dashLength": 10, 1209 | "dashes": false, 1210 | "datasource": "Prometheus", 1211 | "description": "The capacity of each Longhorn volume", 1212 | "fieldConfig": { 1213 | "defaults": { 1214 | "custom": {} 1215 | }, 1216 | "overrides": [] 1217 | }, 1218 | "fill": 1, 1219 | "fillGradient": 0, 1220 | "gridPos": { 1221 | "h": 10, 1222 | "w": 12, 1223 | "x": 0, 1224 | "y": 47 1225 | }, 1226 | "hiddenSeries": false, 1227 | "id": 32, 1228 | "legend": { 1229 | "alignAsTable": true, 1230 | "avg": false, 1231 | "current": true, 1232 | "max": false, 1233 | "min": false, 1234 | "rightSide": true, 1235 | "show": true, 1236 | "total": false, 1237 | "values": true 1238 | }, 1239 | "lines": true, 1240 | "linewidth": 1, 1241 | "nullPointMode": "null", 1242 | "percentage": false, 1243 | "pluginVersion": "7.1.5", 1244 | "pointradius": 2, 1245 | "points": false, 1246 | "renderer": "flot", 1247 | "seriesOverrides": [], 1248 | "spaceLength": 10, 1249 | "stack": false, 1250 | "steppedLine": false, 1251 | "targets": [ 1252 | { 1253 | "expr": "(longhorn_disk_usage_bytes/longhorn_disk_capacity_bytes)*100", 1254 | "interval": "", 1255 | "legendFormat": "{{disk}}", 1256 | "refId": "A" 1257 | } 1258 | ], 1259 | "thresholds": [], 1260 | "timeFrom": null, 1261 | "timeRegions": [], 1262 | "timeShift": null, 1263 | "title": "Disk Space Usage", 1264 | "tooltip": { 1265 | "shared": true, 1266 | "sort": 0, 1267 | "value_type": "individual" 1268 | }, 1269 | "type": "graph", 1270 | "xaxis": { 1271 | "buckets": null, 1272 | "mode": "time", 1273 | "name": null, 1274 | "show": true, 1275 | "values": [] 1276 | }, 1277 | "yaxes": [ 1278 | { 1279 | "$$hashKey": "object:530", 1280 | "format": "percent", 1281 | "label": null, 1282 | "logBase": 1, 1283 | "max": null, 1284 | "min": null, 1285 | "show": true 1286 | }, 1287 | { 1288 | "$$hashKey": "object:531", 1289 | "format": "short", 1290 | "label": null, 1291 | "logBase": 1, 1292 | "max": null, 1293 | "min": null, 1294 | "show": true 1295 | } 1296 | ], 1297 | "yaxis": { 1298 | "align": false, 1299 | "alignLevel": null 1300 | } 1301 | }, 1302 | { 1303 | "datasource": "Prometheus", 1304 | "description": "", 1305 | "fieldConfig": { 1306 | "defaults": { 1307 | "custom": { 1308 | "align": null 1309 | }, 1310 | "decimals": 1, 1311 | "mappings": [], 1312 | "min": 0, 1313 | "thresholds": { 1314 | "mode": "absolute", 1315 | "steps": [ 1316 | { 1317 | "color": "green", 1318 | "value": null 1319 | } 1320 | ] 1321 | }, 1322 | "unit": "bytes" 1323 | }, 1324 | "overrides": [ 1325 | { 1326 | "matcher": { 1327 | "id": "byName", 1328 | "options": "Capacity" 1329 | }, 1330 | "properties": [ 1331 | { 1332 | "id": "custom.displayMode", 1333 | "value": "lcd-gauge" 1334 | } 1335 | ] 1336 | } 1337 | ] 1338 | }, 1339 | "gridPos": { 1340 | "h": 10, 1341 | "w": 12, 1342 | "x": 12, 1343 | "y": 47 1344 | }, 1345 | "id": 33, 1346 | "options": { 1347 | "showHeader": true 1348 | }, 1349 | "pluginVersion": "7.1.5", 1350 | "targets": [ 1351 | { 1352 | "expr": "longhorn_disk_capacity_bytes", 1353 | "format": "table", 1354 | "instant": true, 1355 | "interval": "", 1356 | "legendFormat": "{{disk}}", 1357 | "refId": "A" 1358 | } 1359 | ], 1360 | "timeFrom": null, 1361 | "timeShift": null, 1362 | "title": "Disk Capacity", 1363 | "transformations": [ 1364 | { 1365 | "id": "organize", 1366 | "options": { 1367 | "excludeByName": { 1368 | "Time": true, 1369 | "Value": false, 1370 | "__name__": true, 1371 | "disk": false, 1372 | "endpoint": true, 1373 | "instance": true, 1374 | "job": true, 1375 | "namespace": true, 1376 | "pod": true, 1377 | "service": true 1378 | }, 1379 | "indexByName": {}, 1380 | "renameByName": { 1381 | "Value": "Capacity" 1382 | } 1383 | } 1384 | } 1385 | ], 1386 | "type": "table" 1387 | }, 1388 | { 1389 | "datasource": null, 1390 | "fieldConfig": { 1391 | "defaults": { 1392 | "custom": {} 1393 | }, 1394 | "overrides": [] 1395 | }, 1396 | "gridPos": { 1397 | "h": 3, 1398 | "w": 24, 1399 | "x": 0, 1400 | "y": 57 1401 | }, 1402 | "id": 5, 1403 | "options": { 1404 | "content": "
\nCPU & MEMORY\n
", 1405 | "mode": "html" 1406 | }, 1407 | "pluginVersion": "7.1.0", 1408 | "timeFrom": null, 1409 | "timeShift": null, 1410 | "title": "", 1411 | "transparent": true, 1412 | "type": "text" 1413 | }, 1414 | { 1415 | "aliasColors": {}, 1416 | "bars": false, 1417 | "dashLength": 10, 1418 | "dashes": false, 1419 | "datasource": "Prometheus", 1420 | "description": "", 1421 | "fieldConfig": { 1422 | "defaults": { 1423 | "custom": {} 1424 | }, 1425 | "overrides": [] 1426 | }, 1427 | "fill": 1, 1428 | "fillGradient": 0, 1429 | "gridPos": { 1430 | "h": 10, 1431 | "w": 12, 1432 | "x": 0, 1433 | "y": 60 1434 | }, 1435 | "hiddenSeries": false, 1436 | "id": 36, 1437 | "legend": { 1438 | "avg": false, 1439 | "current": false, 1440 | "max": false, 1441 | "min": false, 1442 | "show": true, 1443 | "total": false, 1444 | "values": false 1445 | }, 1446 | "lines": true, 1447 | "linewidth": 1, 1448 | "links": [], 1449 | "nullPointMode": "null", 1450 | "percentage": false, 1451 | "pluginVersion": "7.1.5", 1452 | "pointradius": 2, 1453 | "points": false, 1454 | "renderer": "flot", 1455 | "seriesOverrides": [], 1456 | "spaceLength": 10, 1457 | "stack": false, 1458 | "steppedLine": false, 1459 | "targets": [ 1460 | { 1461 | "expr": "(longhorn_node_cpu_usage_millicpu / longhorn_node_cpu_capacity_millicpu) * 100", 1462 | "interval": "", 1463 | "legendFormat": "{{node}}", 1464 | "refId": "A" 1465 | } 1466 | ], 1467 | "thresholds": [ 1468 | { 1469 | "$$hashKey": "object:1092", 1470 | "colorMode": "critical", 1471 | "fill": true, 1472 | "line": true, 1473 | "op": "gt", 1474 | "value": 80, 1475 | "yaxis": "left" 1476 | } 1477 | ], 1478 | "timeFrom": null, 1479 | "timeRegions": [], 1480 | "timeShift": null, 1481 | "title": "Node CPU Usage/Capacity", 1482 | "tooltip": { 1483 | "shared": true, 1484 | "sort": 0, 1485 | "value_type": "individual" 1486 | }, 1487 | "type": "graph", 1488 | "xaxis": { 1489 | "buckets": null, 1490 | "mode": "time", 1491 | "name": null, 1492 | "show": true, 1493 | "values": [] 1494 | }, 1495 | "yaxes": [ 1496 | { 1497 | "$$hashKey": "object:865", 1498 | "decimals": 0, 1499 | "format": "percent", 1500 | "label": null, 1501 | "logBase": 1, 1502 | "max": null, 1503 | "min": null, 1504 | "show": true 1505 | }, 1506 | { 1507 | "$$hashKey": "object:866", 1508 | "format": "none", 1509 | "label": null, 1510 | "logBase": 1, 1511 | "max": null, 1512 | "min": null, 1513 | "show": true 1514 | } 1515 | ], 1516 | "yaxis": { 1517 | "align": false, 1518 | "alignLevel": null 1519 | } 1520 | }, 1521 | { 1522 | "aliasColors": {}, 1523 | "bars": false, 1524 | "dashLength": 10, 1525 | "dashes": false, 1526 | "datasource": "Prometheus", 1527 | "description": "", 1528 | "fieldConfig": { 1529 | "defaults": { 1530 | "custom": {} 1531 | }, 1532 | "overrides": [] 1533 | }, 1534 | "fill": 1, 1535 | "fillGradient": 0, 1536 | "gridPos": { 1537 | "h": 10, 1538 | "w": 12, 1539 | "x": 12, 1540 | "y": 60 1541 | }, 1542 | "hiddenSeries": false, 1543 | "id": 38, 1544 | "legend": { 1545 | "avg": false, 1546 | "current": false, 1547 | "max": false, 1548 | "min": false, 1549 | "show": true, 1550 | "total": false, 1551 | "values": false 1552 | }, 1553 | "lines": true, 1554 | "linewidth": 1, 1555 | "links": [], 1556 | "nullPointMode": "null", 1557 | "percentage": false, 1558 | "pluginVersion": "7.1.5", 1559 | "pointradius": 2, 1560 | "points": false, 1561 | "renderer": "flot", 1562 | "seriesOverrides": [], 1563 | "spaceLength": 10, 1564 | "stack": false, 1565 | "steppedLine": false, 1566 | "targets": [ 1567 | { 1568 | "expr": "( longhorn_node_memory_usage_bytes / longhorn_node_memory_capacity_bytes ) * 100", 1569 | "interval": "", 1570 | "legendFormat": "{{node}}", 1571 | "refId": "A" 1572 | } 1573 | ], 1574 | "thresholds": [ 1575 | { 1576 | "$$hashKey": "object:1092", 1577 | "colorMode": "critical", 1578 | "fill": true, 1579 | "line": true, 1580 | "op": "gt", 1581 | "value": 80, 1582 | "yaxis": "left" 1583 | } 1584 | ], 1585 | "timeFrom": null, 1586 | "timeRegions": [], 1587 | "timeShift": null, 1588 | "title": "Node Memory Usage/Capacity", 1589 | "tooltip": { 1590 | "shared": true, 1591 | "sort": 0, 1592 | "value_type": "individual" 1593 | }, 1594 | "type": "graph", 1595 | "xaxis": { 1596 | "buckets": null, 1597 | "mode": "time", 1598 | "name": null, 1599 | "show": true, 1600 | "values": [] 1601 | }, 1602 | "yaxes": [ 1603 | { 1604 | "$$hashKey": "object:865", 1605 | "decimals": 0, 1606 | "format": "percent", 1607 | "label": null, 1608 | "logBase": 1, 1609 | "max": null, 1610 | "min": null, 1611 | "show": true 1612 | }, 1613 | { 1614 | "$$hashKey": "object:866", 1615 | "format": "none", 1616 | "label": null, 1617 | "logBase": 1, 1618 | "max": null, 1619 | "min": null, 1620 | "show": true 1621 | } 1622 | ], 1623 | "yaxis": { 1624 | "align": false, 1625 | "alignLevel": null 1626 | } 1627 | }, 1628 | { 1629 | "aliasColors": {}, 1630 | "bars": false, 1631 | "dashLength": 10, 1632 | "dashes": false, 1633 | "datasource": "Prometheus", 1634 | "decimals": 0, 1635 | "description": "Instance managers are pods that contains the engine and replica processes of Longhorn volumes. See more at https://longhorn.io/docs/1.0.2/concepts/#11-the-longhorn-manager-and-the-longhorn-engine ", 1636 | "fieldConfig": { 1637 | "defaults": { 1638 | "custom": {} 1639 | }, 1640 | "overrides": [] 1641 | }, 1642 | "fill": 1, 1643 | "fillGradient": 0, 1644 | "gridPos": { 1645 | "h": 10, 1646 | "w": 8, 1647 | "x": 0, 1648 | "y": 70 1649 | }, 1650 | "hiddenSeries": false, 1651 | "id": 28, 1652 | "legend": { 1653 | "avg": false, 1654 | "current": false, 1655 | "max": false, 1656 | "min": false, 1657 | "show": true, 1658 | "total": false, 1659 | "values": false 1660 | }, 1661 | "lines": true, 1662 | "linewidth": 1, 1663 | "links": [], 1664 | "nullPointMode": "null", 1665 | "percentage": false, 1666 | "pluginVersion": "7.1.5", 1667 | "pointradius": 2, 1668 | "points": false, 1669 | "renderer": "flot", 1670 | "seriesOverrides": [], 1671 | "spaceLength": 10, 1672 | "stack": false, 1673 | "steppedLine": false, 1674 | "targets": [ 1675 | { 1676 | "expr": "longhorn_instance_manager_cpu_usage_millicpu", 1677 | "interval": "", 1678 | "legendFormat": "{{instance_manager}}", 1679 | "refId": "A" 1680 | } 1681 | ], 1682 | "thresholds": [], 1683 | "timeFrom": null, 1684 | "timeRegions": [], 1685 | "timeShift": null, 1686 | "title": "Instance Manager CPU Usage", 1687 | "tooltip": { 1688 | "shared": true, 1689 | "sort": 0, 1690 | "value_type": "individual" 1691 | }, 1692 | "type": "graph", 1693 | "xaxis": { 1694 | "buckets": null, 1695 | "mode": "time", 1696 | "name": null, 1697 | "show": true, 1698 | "values": [] 1699 | }, 1700 | "yaxes": [ 1701 | { 1702 | "$$hashKey": "object:865", 1703 | "decimals": 0, 1704 | "format": "milicpu", 1705 | "label": null, 1706 | "logBase": 1, 1707 | "max": null, 1708 | "min": null, 1709 | "show": true 1710 | }, 1711 | { 1712 | "$$hashKey": "object:866", 1713 | "format": "none", 1714 | "label": null, 1715 | "logBase": 1, 1716 | "max": null, 1717 | "min": null, 1718 | "show": true 1719 | } 1720 | ], 1721 | "yaxis": { 1722 | "align": false, 1723 | "alignLevel": null 1724 | } 1725 | }, 1726 | { 1727 | "aliasColors": {}, 1728 | "bars": false, 1729 | "dashLength": 10, 1730 | "dashes": false, 1731 | "datasource": "Prometheus", 1732 | "description": "Instance managers are pods that contains the engine and replica processes of Longhorn volumes. See more at https://longhorn.io/docs/1.0.2/concepts/#11-the-longhorn-manager-and-the-longhorn-engine ", 1733 | "fieldConfig": { 1734 | "defaults": { 1735 | "custom": {} 1736 | }, 1737 | "overrides": [] 1738 | }, 1739 | "fill": 1, 1740 | "fillGradient": 0, 1741 | "gridPos": { 1742 | "h": 10, 1743 | "w": 8, 1744 | "x": 8, 1745 | "y": 70 1746 | }, 1747 | "hiddenSeries": false, 1748 | "id": 30, 1749 | "legend": { 1750 | "avg": false, 1751 | "current": false, 1752 | "max": false, 1753 | "min": false, 1754 | "show": true, 1755 | "total": false, 1756 | "values": false 1757 | }, 1758 | "lines": true, 1759 | "linewidth": 1, 1760 | "links": [], 1761 | "nullPointMode": "null", 1762 | "percentage": false, 1763 | "pluginVersion": "7.1.5", 1764 | "pointradius": 2, 1765 | "points": false, 1766 | "renderer": "flot", 1767 | "seriesOverrides": [], 1768 | "spaceLength": 10, 1769 | "stack": false, 1770 | "steppedLine": false, 1771 | "targets": [ 1772 | { 1773 | "expr": "(longhorn_instance_manager_cpu_usage_millicpu/longhorn_instance_manager_cpu_requests_millicpu)*100", 1774 | "interval": "", 1775 | "legendFormat": "{{instance_manager}}", 1776 | "refId": "A" 1777 | } 1778 | ], 1779 | "thresholds": [ 1780 | { 1781 | "$$hashKey": "object:1092", 1782 | "colorMode": "critical", 1783 | "fill": true, 1784 | "line": true, 1785 | "op": "gt", 1786 | "value": 100, 1787 | "yaxis": "left" 1788 | } 1789 | ], 1790 | "timeFrom": null, 1791 | "timeRegions": [], 1792 | "timeShift": null, 1793 | "title": "Instance Manager CPU Usage/Request", 1794 | "tooltip": { 1795 | "shared": true, 1796 | "sort": 0, 1797 | "value_type": "individual" 1798 | }, 1799 | "type": "graph", 1800 | "xaxis": { 1801 | "buckets": null, 1802 | "mode": "time", 1803 | "name": null, 1804 | "show": true, 1805 | "values": [] 1806 | }, 1807 | "yaxes": [ 1808 | { 1809 | "$$hashKey": "object:865", 1810 | "decimals": 0, 1811 | "format": "percent", 1812 | "label": null, 1813 | "logBase": 1, 1814 | "max": null, 1815 | "min": null, 1816 | "show": true 1817 | }, 1818 | { 1819 | "$$hashKey": "object:866", 1820 | "format": "none", 1821 | "label": null, 1822 | "logBase": 1, 1823 | "max": null, 1824 | "min": null, 1825 | "show": true 1826 | } 1827 | ], 1828 | "yaxis": { 1829 | "align": false, 1830 | "alignLevel": null 1831 | } 1832 | }, 1833 | { 1834 | "aliasColors": {}, 1835 | "bars": false, 1836 | "dashLength": 10, 1837 | "dashes": false, 1838 | "datasource": "Prometheus", 1839 | "description": "Instance managers are pods that contains the engine and replica processes of Longhorn volumes. See more at https://longhorn.io/docs/1.0.2/concepts/#11-the-longhorn-manager-and-the-longhorn-engine ", 1840 | "fieldConfig": { 1841 | "defaults": { 1842 | "custom": {} 1843 | }, 1844 | "overrides": [] 1845 | }, 1846 | "fill": 1, 1847 | "fillGradient": 0, 1848 | "gridPos": { 1849 | "h": 10, 1850 | "w": 8, 1851 | "x": 16, 1852 | "y": 70 1853 | }, 1854 | "hiddenSeries": false, 1855 | "id": 29, 1856 | "legend": { 1857 | "avg": false, 1858 | "current": false, 1859 | "max": false, 1860 | "min": false, 1861 | "show": true, 1862 | "total": false, 1863 | "values": false 1864 | }, 1865 | "lines": true, 1866 | "linewidth": 1, 1867 | "links": [], 1868 | "nullPointMode": "null", 1869 | "percentage": false, 1870 | "pluginVersion": "7.1.5", 1871 | "pointradius": 2, 1872 | "points": false, 1873 | "renderer": "flot", 1874 | "seriesOverrides": [], 1875 | "spaceLength": 10, 1876 | "stack": false, 1877 | "steppedLine": false, 1878 | "targets": [ 1879 | { 1880 | "expr": "longhorn_instance_manager_memory_usage_bytes", 1881 | "interval": "", 1882 | "legendFormat": "{{instance_manager}}", 1883 | "refId": "A" 1884 | } 1885 | ], 1886 | "thresholds": [], 1887 | "timeFrom": null, 1888 | "timeRegions": [], 1889 | "timeShift": null, 1890 | "title": "Instance Manager Memory Usage", 1891 | "tooltip": { 1892 | "shared": true, 1893 | "sort": 0, 1894 | "value_type": "individual" 1895 | }, 1896 | "type": "graph", 1897 | "xaxis": { 1898 | "buckets": null, 1899 | "mode": "time", 1900 | "name": null, 1901 | "show": true, 1902 | "values": [] 1903 | }, 1904 | "yaxes": [ 1905 | { 1906 | "$$hashKey": "object:865", 1907 | "format": "bytes", 1908 | "label": null, 1909 | "logBase": 1, 1910 | "max": null, 1911 | "min": null, 1912 | "show": true 1913 | }, 1914 | { 1915 | "$$hashKey": "object:866", 1916 | "format": "none", 1917 | "label": null, 1918 | "logBase": 1, 1919 | "max": null, 1920 | "min": null, 1921 | "show": true 1922 | } 1923 | ], 1924 | "yaxis": { 1925 | "align": false, 1926 | "alignLevel": null 1927 | } 1928 | }, 1929 | { 1930 | "aliasColors": {}, 1931 | "bars": false, 1932 | "dashLength": 10, 1933 | "dashes": false, 1934 | "datasource": "Prometheus", 1935 | "decimals": 0, 1936 | "description": "Longhorn manager pods manage the control plane of the Longhorn system. e.g. Volume scheduling, attaching, detaching, backup, etc,..", 1937 | "fieldConfig": { 1938 | "defaults": { 1939 | "custom": {} 1940 | }, 1941 | "overrides": [] 1942 | }, 1943 | "fill": 1, 1944 | "fillGradient": 0, 1945 | "gridPos": { 1946 | "h": 9, 1947 | "w": 12, 1948 | "x": 0, 1949 | "y": 80 1950 | }, 1951 | "hiddenSeries": false, 1952 | "id": 2, 1953 | "legend": { 1954 | "avg": false, 1955 | "current": false, 1956 | "max": false, 1957 | "min": false, 1958 | "show": true, 1959 | "total": false, 1960 | "values": false 1961 | }, 1962 | "lines": true, 1963 | "linewidth": 1, 1964 | "nullPointMode": "null", 1965 | "percentage": false, 1966 | "pluginVersion": "7.1.5", 1967 | "pointradius": 2, 1968 | "points": false, 1969 | "renderer": "flot", 1970 | "seriesOverrides": [], 1971 | "spaceLength": 10, 1972 | "stack": false, 1973 | "steppedLine": false, 1974 | "targets": [ 1975 | { 1976 | "expr": "longhorn_manager_cpu_usage_millicpu", 1977 | "interval": "", 1978 | "legendFormat": "{{manager}}", 1979 | "refId": "A" 1980 | } 1981 | ], 1982 | "thresholds": [], 1983 | "timeFrom": null, 1984 | "timeRegions": [], 1985 | "timeShift": null, 1986 | "title": "Longhorn Manager CPU Usage", 1987 | "tooltip": { 1988 | "shared": true, 1989 | "sort": 0, 1990 | "value_type": "individual" 1991 | }, 1992 | "type": "graph", 1993 | "xaxis": { 1994 | "buckets": null, 1995 | "mode": "time", 1996 | "name": null, 1997 | "show": true, 1998 | "values": [] 1999 | }, 2000 | "yaxes": [ 2001 | { 2002 | "$$hashKey": "object:1500", 2003 | "decimals": 0, 2004 | "format": "milicpu", 2005 | "label": null, 2006 | "logBase": 1, 2007 | "max": null, 2008 | "min": null, 2009 | "show": true 2010 | }, 2011 | { 2012 | "$$hashKey": "object:1501", 2013 | "format": "short", 2014 | "label": null, 2015 | "logBase": 1, 2016 | "max": null, 2017 | "min": null, 2018 | "show": true 2019 | } 2020 | ], 2021 | "yaxis": { 2022 | "align": false, 2023 | "alignLevel": null 2024 | } 2025 | }, 2026 | { 2027 | "aliasColors": {}, 2028 | "bars": false, 2029 | "dashLength": 10, 2030 | "dashes": false, 2031 | "datasource": "Prometheus", 2032 | "decimals": null, 2033 | "description": "Longhorn manager pods manage the control plane of the Longhorn system. e.g. Volume scheduling, attaching, detaching, backup, etc,..", 2034 | "fieldConfig": { 2035 | "defaults": { 2036 | "custom": {} 2037 | }, 2038 | "overrides": [] 2039 | }, 2040 | "fill": 1, 2041 | "fillGradient": 0, 2042 | "gridPos": { 2043 | "h": 9, 2044 | "w": 12, 2045 | "x": 12, 2046 | "y": 80 2047 | }, 2048 | "hiddenSeries": false, 2049 | "id": 31, 2050 | "legend": { 2051 | "avg": false, 2052 | "current": false, 2053 | "max": false, 2054 | "min": false, 2055 | "show": true, 2056 | "total": false, 2057 | "values": false 2058 | }, 2059 | "lines": true, 2060 | "linewidth": 1, 2061 | "nullPointMode": "null", 2062 | "percentage": false, 2063 | "pluginVersion": "7.1.5", 2064 | "pointradius": 2, 2065 | "points": false, 2066 | "renderer": "flot", 2067 | "seriesOverrides": [], 2068 | "spaceLength": 10, 2069 | "stack": false, 2070 | "steppedLine": false, 2071 | "targets": [ 2072 | { 2073 | "expr": "longhorn_manager_memory_usage_bytes", 2074 | "interval": "", 2075 | "legendFormat": "{{manager}}", 2076 | "refId": "A" 2077 | } 2078 | ], 2079 | "thresholds": [], 2080 | "timeFrom": null, 2081 | "timeRegions": [], 2082 | "timeShift": null, 2083 | "title": "Longhorn Manager Memory Usage", 2084 | "tooltip": { 2085 | "shared": true, 2086 | "sort": 0, 2087 | "value_type": "individual" 2088 | }, 2089 | "type": "graph", 2090 | "xaxis": { 2091 | "buckets": null, 2092 | "mode": "time", 2093 | "name": null, 2094 | "show": true, 2095 | "values": [] 2096 | }, 2097 | "yaxes": [ 2098 | { 2099 | "$$hashKey": "object:1500", 2100 | "decimals": null, 2101 | "format": "bytes", 2102 | "label": null, 2103 | "logBase": 1, 2104 | "max": null, 2105 | "min": null, 2106 | "show": true 2107 | }, 2108 | { 2109 | "$$hashKey": "object:1501", 2110 | "format": "short", 2111 | "label": null, 2112 | "logBase": 1, 2113 | "max": null, 2114 | "min": null, 2115 | "show": true 2116 | } 2117 | ], 2118 | "yaxis": { 2119 | "align": false, 2120 | "alignLevel": null 2121 | } 2122 | } 2123 | ], 2124 | "refresh": "30s", 2125 | "schemaVersion": 26, 2126 | "style": "dark", 2127 | "tags": [], 2128 | "templating": { 2129 | "list": [] 2130 | }, 2131 | "time": { 2132 | "from": "now-1h", 2133 | "to": "now" 2134 | }, 2135 | "timepicker": { 2136 | "refresh_intervals": [ 2137 | "5s", 2138 | "10s", 2139 | "30s", 2140 | "1m", 2141 | "5m", 2142 | "15m", 2143 | "30m", 2144 | "1h", 2145 | "2h", 2146 | "1d" 2147 | ] 2148 | }, 2149 | "timezone": "", 2150 | "title": "Longhorn v1.1.0", 2151 | "uid": "2BCgsldGz", 2152 | "version": 101, 2153 | "description": "A dashboard for Longhorn v1.1.0" 2154 | } 2155 | kind: ConfigMap 2156 | metadata: 2157 | labels: 2158 | grafana_dashboard: "1" 2159 | name: longhorn 2160 | namespace: cattle-dashboards 2161 | -------------------------------------------------------------------------------- /longhorn-monitoring/servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: longhorn-prometheus-servicemonitor 5 | namespace: longhorn-system 6 | labels: 7 | name: longhorn-prometheus-servicemonitor 8 | spec: 9 | selector: 10 | matchLabels: 11 | app: longhorn-manager 12 | namespaceSelector: 13 | matchNames: 14 | - longhorn-system 15 | endpoints: 16 | - port: manager -------------------------------------------------------------------------------- /migration/dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: custom-dashboard 5 | namespace: cattle-dashboards 6 | labels: 7 | grafana_dashboard: "1" 8 | data: 9 | custom-dashboard.json: | 10 | -------------------------------------------------------------------------------- /migration/rule.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: custom-rules 5 | namespace: default 6 | spec: 7 | groups: 8 | - name: custom.rules 9 | rules: 10 | - alert: 11 | expr: 12 | for: 13 | labels: 14 | severity: 15 | annotations: 16 | summary: 17 | -------------------------------------------------------------------------------- /run_on.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | NODE_ID=$1 4 | CMD="${@:2}" 5 | 6 | source get_env.sh 7 | 8 | var="IP${NODE_ID}" 9 | NODE_IP=${!var} 10 | 11 | ssh ubuntu@"${NODE_IP}" ${CMD} 12 | -------------------------------------------------------------------------------- /scrape-custom-service/01-demo-shop.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # ---------------------------------------------------------- 16 | # WARNING: This file is autogenerated. Do not manually edit. 17 | # ---------------------------------------------------------- 18 | 19 | apiVersion: apps/v1 20 | kind: Deployment 21 | metadata: 22 | name: emailservice 23 | spec: 24 | replicas: 2 25 | selector: 26 | matchLabels: 27 | app: emailservice 28 | template: 29 | metadata: 30 | labels: 31 | app: emailservice 32 | spec: 33 | terminationGracePeriodSeconds: 5 34 | containers: 35 | - name: server 36 | image: gcr.io/google-samples/microservices-demo/emailservice:v0.2.0 37 | ports: 38 | - containerPort: 8080 39 | env: 40 | - name: PORT 41 | value: "8080" 42 | - name: DISABLE_TRACING 43 | value: "1" 44 | - name: DISABLE_PROFILER 45 | value: "1" 46 | readinessProbe: 47 | periodSeconds: 5 48 | exec: 49 | command: ["/bin/grpc_health_probe", "-addr=:8080"] 50 | livenessProbe: 51 | periodSeconds: 5 52 | exec: 53 | command: ["/bin/grpc_health_probe", "-addr=:8080"] 54 | resources: 55 | requests: 56 | cpu: 100m 57 | memory: 64Mi 58 | limits: 59 | cpu: 200m 60 | memory: 128Mi 61 | --- 62 | apiVersion: v1 63 | kind: Service 64 | metadata: 65 | name: emailservice 66 | spec: 67 | type: ClusterIP 68 | selector: 69 | app: emailservice 70 | ports: 71 | - name: grpc 72 | port: 5000 73 | targetPort: 8080 74 | --- 75 | apiVersion: apps/v1 76 | kind: Deployment 77 | metadata: 78 | name: checkoutservice 79 | spec: 80 | replicas: 2 81 | selector: 82 | matchLabels: 83 | app: checkoutservice 84 | template: 85 | metadata: 86 | labels: 87 | app: checkoutservice 88 | spec: 89 | containers: 90 | - name: server 91 | image: gcr.io/google-samples/microservices-demo/checkoutservice:v0.2.0 92 | ports: 93 | - containerPort: 5050 94 | readinessProbe: 95 | exec: 96 | command: ["/bin/grpc_health_probe", "-addr=:5050"] 97 | livenessProbe: 98 | exec: 99 | command: ["/bin/grpc_health_probe", "-addr=:5050"] 100 | env: 101 | - name: PORT 102 | value: "5050" 103 | - name: PRODUCT_CATALOG_SERVICE_ADDR 104 | value: "productcatalogservice:3550" 105 | - name: SHIPPING_SERVICE_ADDR 106 | value: "shippingservice:50051" 107 | - name: PAYMENT_SERVICE_ADDR 108 | value: "paymentservice:50051" 109 | - name: EMAIL_SERVICE_ADDR 110 | value: "emailservice:5000" 111 | - name: CURRENCY_SERVICE_ADDR 112 | value: "currencyservice:7000" 113 | - name: CART_SERVICE_ADDR 114 | value: "cartservice:7070" 115 | - name: DISABLE_STATS 116 | value: "1" 117 | - name: DISABLE_TRACING 118 | value: "1" 119 | - name: DISABLE_PROFILER 120 | value: "1" 121 | resources: 122 | requests: 123 | cpu: 100m 124 | memory: 64Mi 125 | limits: 126 | cpu: 200m 127 | memory: 128Mi 128 | --- 129 | apiVersion: v1 130 | kind: Service 131 | metadata: 132 | name: checkoutservice 133 | spec: 134 | type: ClusterIP 135 | selector: 136 | app: checkoutservice 137 | ports: 138 | - name: grpc 139 | port: 5050 140 | targetPort: 5050 141 | --- 142 | apiVersion: apps/v1 143 | kind: Deployment 144 | metadata: 145 | name: recommendationservice 146 | spec: 147 | replicas: 2 148 | selector: 149 | matchLabels: 150 | app: recommendationservice 151 | template: 152 | metadata: 153 | labels: 154 | app: recommendationservice 155 | spec: 156 | terminationGracePeriodSeconds: 5 157 | containers: 158 | - name: server 159 | image: gcr.io/google-samples/microservices-demo/recommendationservice:v0.2.0 160 | ports: 161 | - containerPort: 8080 162 | readinessProbe: 163 | periodSeconds: 5 164 | exec: 165 | command: ["/bin/grpc_health_probe", "-addr=:8080"] 166 | livenessProbe: 167 | periodSeconds: 5 168 | exec: 169 | command: ["/bin/grpc_health_probe", "-addr=:8080"] 170 | env: 171 | - name: PORT 172 | value: "8080" 173 | - name: PRODUCT_CATALOG_SERVICE_ADDR 174 | value: "productcatalogservice:3550" 175 | - name: DISABLE_TRACING 176 | value: "1" 177 | - name: DISABLE_PROFILER 178 | value: "1" 179 | - name: DISABLE_DEBUGGER 180 | value: "1" 181 | resources: 182 | requests: 183 | cpu: 100m 184 | memory: 220Mi 185 | limits: 186 | cpu: 200m 187 | memory: 450Mi 188 | --- 189 | apiVersion: v1 190 | kind: Service 191 | metadata: 192 | name: recommendationservice 193 | spec: 194 | type: ClusterIP 195 | selector: 196 | app: recommendationservice 197 | ports: 198 | - name: grpc 199 | port: 8080 200 | targetPort: 8080 201 | --- 202 | apiVersion: apps/v1 203 | kind: Deployment 204 | metadata: 205 | name: frontend 206 | spec: 207 | replicas: 2 208 | selector: 209 | matchLabels: 210 | app: frontend 211 | template: 212 | metadata: 213 | labels: 214 | app: frontend 215 | annotations: 216 | sidecar.istio.io/rewriteAppHTTPProbers: "true" 217 | spec: 218 | containers: 219 | - name: server 220 | image: gcr.io/google-samples/microservices-demo/frontend:v0.2.0 221 | ports: 222 | - containerPort: 8080 223 | readinessProbe: 224 | initialDelaySeconds: 10 225 | httpGet: 226 | path: "/_healthz" 227 | port: 8080 228 | httpHeaders: 229 | - name: "Cookie" 230 | value: "shop_session-id=x-readiness-probe" 231 | livenessProbe: 232 | initialDelaySeconds: 10 233 | httpGet: 234 | path: "/_healthz" 235 | port: 8080 236 | httpHeaders: 237 | - name: "Cookie" 238 | value: "shop_session-id=x-liveness-probe" 239 | env: 240 | - name: PORT 241 | value: "8080" 242 | - name: PRODUCT_CATALOG_SERVICE_ADDR 243 | value: "productcatalogservice:3550" 244 | - name: CURRENCY_SERVICE_ADDR 245 | value: "currencyservice:7000" 246 | - name: CART_SERVICE_ADDR 247 | value: "cartservice:7070" 248 | - name: RECOMMENDATION_SERVICE_ADDR 249 | value: "recommendationservice:8080" 250 | - name: SHIPPING_SERVICE_ADDR 251 | value: "shippingservice:50051" 252 | - name: CHECKOUT_SERVICE_ADDR 253 | value: "checkoutservice:5050" 254 | - name: AD_SERVICE_ADDR 255 | value: "adservice:9555" 256 | - name: DISABLE_TRACING 257 | value: "1" 258 | - name: DISABLE_PROFILER 259 | value: "1" 260 | resources: 261 | requests: 262 | cpu: 100m 263 | memory: 64Mi 264 | limits: 265 | cpu: 200m 266 | memory: 128Mi 267 | --- 268 | apiVersion: v1 269 | kind: Service 270 | metadata: 271 | name: frontend 272 | spec: 273 | type: ClusterIP 274 | selector: 275 | app: frontend 276 | ports: 277 | - name: http 278 | port: 80 279 | targetPort: 8080 280 | --- 281 | apiVersion: apps/v1 282 | kind: Deployment 283 | metadata: 284 | name: paymentservice 285 | spec: 286 | replicas: 2 287 | selector: 288 | matchLabels: 289 | app: paymentservice 290 | template: 291 | metadata: 292 | labels: 293 | app: paymentservice 294 | spec: 295 | terminationGracePeriodSeconds: 5 296 | containers: 297 | - name: server 298 | image: gcr.io/google-samples/microservices-demo/paymentservice:v0.2.0 299 | ports: 300 | - containerPort: 50051 301 | env: 302 | - name: PORT 303 | value: "50051" 304 | readinessProbe: 305 | exec: 306 | command: ["/bin/grpc_health_probe", "-addr=:50051"] 307 | livenessProbe: 308 | exec: 309 | command: ["/bin/grpc_health_probe", "-addr=:50051"] 310 | resources: 311 | requests: 312 | cpu: 100m 313 | memory: 64Mi 314 | limits: 315 | cpu: 200m 316 | memory: 128Mi 317 | --- 318 | apiVersion: v1 319 | kind: Service 320 | metadata: 321 | name: paymentservice 322 | spec: 323 | type: ClusterIP 324 | selector: 325 | app: paymentservice 326 | ports: 327 | - name: grpc 328 | port: 50051 329 | targetPort: 50051 330 | --- 331 | apiVersion: apps/v1 332 | kind: Deployment 333 | metadata: 334 | name: productcatalogservice 335 | spec: 336 | replicas: 2 337 | selector: 338 | matchLabels: 339 | app: productcatalogservice 340 | template: 341 | metadata: 342 | labels: 343 | app: productcatalogservice 344 | spec: 345 | terminationGracePeriodSeconds: 5 346 | containers: 347 | - name: server 348 | image: gcr.io/google-samples/microservices-demo/productcatalogservice:v0.2.0 349 | ports: 350 | - containerPort: 3550 351 | env: 352 | - name: PORT 353 | value: "3550" 354 | - name: DISABLE_STATS 355 | value: "1" 356 | - name: DISABLE_TRACING 357 | value: "1" 358 | - name: DISABLE_PROFILER 359 | value: "1" 360 | readinessProbe: 361 | exec: 362 | command: ["/bin/grpc_health_probe", "-addr=:3550"] 363 | livenessProbe: 364 | exec: 365 | command: ["/bin/grpc_health_probe", "-addr=:3550"] 366 | resources: 367 | requests: 368 | cpu: 100m 369 | memory: 64Mi 370 | limits: 371 | cpu: 200m 372 | memory: 128Mi 373 | --- 374 | apiVersion: v1 375 | kind: Service 376 | metadata: 377 | name: productcatalogservice 378 | spec: 379 | type: ClusterIP 380 | selector: 381 | app: productcatalogservice 382 | ports: 383 | - name: grpc 384 | port: 3550 385 | targetPort: 3550 386 | --- 387 | apiVersion: apps/v1 388 | kind: Deployment 389 | metadata: 390 | name: cartservice 391 | spec: 392 | replicas: 2 393 | selector: 394 | matchLabels: 395 | app: cartservice 396 | template: 397 | metadata: 398 | labels: 399 | app: cartservice 400 | spec: 401 | terminationGracePeriodSeconds: 5 402 | containers: 403 | - name: server 404 | image: gcr.io/google-samples/microservices-demo/cartservice:v0.2.0 405 | ports: 406 | - containerPort: 7070 407 | env: 408 | - name: REDIS_ADDR 409 | value: "redis-cart:6379" 410 | - name: PORT 411 | value: "7070" 412 | - name: LISTEN_ADDR 413 | value: "0.0.0.0" 414 | resources: 415 | requests: 416 | cpu: 200m 417 | memory: 64Mi 418 | limits: 419 | cpu: 300m 420 | memory: 128Mi 421 | readinessProbe: 422 | initialDelaySeconds: 15 423 | exec: 424 | command: ["/bin/grpc_health_probe", "-addr=:7070", "-rpc-timeout=5s"] 425 | livenessProbe: 426 | initialDelaySeconds: 15 427 | periodSeconds: 10 428 | exec: 429 | command: ["/bin/grpc_health_probe", "-addr=:7070", "-rpc-timeout=5s"] 430 | --- 431 | apiVersion: v1 432 | kind: Service 433 | metadata: 434 | name: cartservice 435 | spec: 436 | type: ClusterIP 437 | selector: 438 | app: cartservice 439 | ports: 440 | - name: grpc 441 | port: 7070 442 | targetPort: 7070 443 | --- 444 | apiVersion: apps/v1 445 | kind: Deployment 446 | metadata: 447 | name: loadgenerator 448 | spec: 449 | selector: 450 | matchLabels: 451 | app: loadgenerator 452 | replicas: 1 453 | template: 454 | metadata: 455 | labels: 456 | app: loadgenerator 457 | annotations: 458 | sidecar.istio.io/rewriteAppHTTPProbers: "true" 459 | spec: 460 | terminationGracePeriodSeconds: 5 461 | restartPolicy: Always 462 | containers: 463 | - name: main 464 | image: gcr.io/google-samples/microservices-demo/loadgenerator:v0.2.0 465 | env: 466 | - name: FRONTEND_ADDR 467 | value: "frontend:80" 468 | - name: USERS 469 | value: "10" 470 | resources: 471 | requests: 472 | cpu: 300m 473 | memory: 256Mi 474 | limits: 475 | cpu: 500m 476 | memory: 512Mi 477 | --- 478 | apiVersion: apps/v1 479 | kind: Deployment 480 | metadata: 481 | name: currencyservice 482 | spec: 483 | replicas: 2 484 | selector: 485 | matchLabels: 486 | app: currencyservice 487 | template: 488 | metadata: 489 | labels: 490 | app: currencyservice 491 | spec: 492 | terminationGracePeriodSeconds: 5 493 | containers: 494 | - name: server 495 | image: gcr.io/google-samples/microservices-demo/currencyservice:v0.2.0 496 | ports: 497 | - name: grpc 498 | containerPort: 7000 499 | env: 500 | - name: PORT 501 | value: "7000" 502 | - name: DISABLE_TRACING 503 | value: "1" 504 | - name: DISABLE_PROFILER 505 | value: "1" 506 | - name: DISABLE_DEBUGGER 507 | value: "1" 508 | readinessProbe: 509 | exec: 510 | command: ["/bin/grpc_health_probe", "-addr=:7000"] 511 | livenessProbe: 512 | exec: 513 | command: ["/bin/grpc_health_probe", "-addr=:7000"] 514 | resources: 515 | requests: 516 | cpu: 100m 517 | memory: 64Mi 518 | limits: 519 | cpu: 200m 520 | memory: 128Mi 521 | --- 522 | apiVersion: v1 523 | kind: Service 524 | metadata: 525 | name: currencyservice 526 | spec: 527 | type: ClusterIP 528 | selector: 529 | app: currencyservice 530 | ports: 531 | - name: grpc 532 | port: 7000 533 | targetPort: 7000 534 | --- 535 | apiVersion: apps/v1 536 | kind: Deployment 537 | metadata: 538 | name: shippingservice 539 | spec: 540 | replicas: 2 541 | selector: 542 | matchLabels: 543 | app: shippingservice 544 | template: 545 | metadata: 546 | labels: 547 | app: shippingservice 548 | spec: 549 | containers: 550 | - name: server 551 | image: gcr.io/google-samples/microservices-demo/shippingservice:v0.2.0 552 | ports: 553 | - containerPort: 50051 554 | env: 555 | - name: PORT 556 | value: "50051" 557 | - name: DISABLE_STATS 558 | value: "1" 559 | - name: DISABLE_TRACING 560 | value: "1" 561 | - name: DISABLE_PROFILER 562 | value: "1" 563 | readinessProbe: 564 | periodSeconds: 5 565 | exec: 566 | command: ["/bin/grpc_health_probe", "-addr=:50051"] 567 | livenessProbe: 568 | exec: 569 | command: ["/bin/grpc_health_probe", "-addr=:50051"] 570 | resources: 571 | requests: 572 | cpu: 100m 573 | memory: 64Mi 574 | limits: 575 | cpu: 200m 576 | memory: 128Mi 577 | --- 578 | apiVersion: v1 579 | kind: Service 580 | metadata: 581 | name: shippingservice 582 | spec: 583 | type: ClusterIP 584 | selector: 585 | app: shippingservice 586 | ports: 587 | - name: grpc 588 | port: 50051 589 | targetPort: 50051 590 | --- 591 | apiVersion: apps/v1 592 | kind: Deployment 593 | metadata: 594 | name: redis-cart 595 | spec: 596 | selector: 597 | matchLabels: 598 | app: redis-cart 599 | template: 600 | metadata: 601 | labels: 602 | app: redis-cart 603 | spec: 604 | containers: 605 | - name: redis 606 | image: redis:alpine 607 | ports: 608 | - containerPort: 6379 609 | readinessProbe: 610 | periodSeconds: 5 611 | tcpSocket: 612 | port: 6379 613 | livenessProbe: 614 | periodSeconds: 5 615 | tcpSocket: 616 | port: 6379 617 | volumeMounts: 618 | - mountPath: /data 619 | name: redis-data 620 | resources: 621 | limits: 622 | memory: 256Mi 623 | cpu: 125m 624 | requests: 625 | cpu: 70m 626 | memory: 200Mi 627 | volumes: 628 | - name: redis-data 629 | emptyDir: {} 630 | --- 631 | apiVersion: v1 632 | kind: Service 633 | metadata: 634 | name: redis-cart 635 | spec: 636 | type: ClusterIP 637 | selector: 638 | app: redis-cart 639 | ports: 640 | - name: redis 641 | port: 6379 642 | targetPort: 6379 643 | --- 644 | apiVersion: apps/v1 645 | kind: Deployment 646 | metadata: 647 | name: adservice 648 | spec: 649 | replicas: 2 650 | selector: 651 | matchLabels: 652 | app: adservice 653 | template: 654 | metadata: 655 | labels: 656 | app: adservice 657 | spec: 658 | terminationGracePeriodSeconds: 5 659 | containers: 660 | - name: server 661 | image: gcr.io/google-samples/microservices-demo/adservice:v0.2.0 662 | ports: 663 | - containerPort: 9555 664 | env: 665 | - name: PORT 666 | value: "9555" 667 | - name: DISABLE_STATS 668 | value: "1" 669 | - name: DISABLE_TRACING 670 | value: "1" 671 | resources: 672 | requests: 673 | cpu: 200m 674 | memory: 180Mi 675 | limits: 676 | cpu: 300m 677 | memory: 300Mi 678 | readinessProbe: 679 | initialDelaySeconds: 20 680 | periodSeconds: 15 681 | exec: 682 | command: ["/bin/grpc_health_probe", "-addr=:9555"] 683 | livenessProbe: 684 | initialDelaySeconds: 20 685 | periodSeconds: 15 686 | exec: 687 | command: ["/bin/grpc_health_probe", "-addr=:9555"] 688 | --- 689 | apiVersion: v1 690 | kind: Service 691 | metadata: 692 | name: adservice 693 | spec: 694 | type: ClusterIP 695 | selector: 696 | app: adservice 697 | ports: 698 | - name: grpc 699 | port: 9555 700 | targetPort: 9555 701 | --- 702 | kind: Ingress 703 | apiVersion: networking.k8s.io/v1beta1 704 | metadata: 705 | name: demo-shop 706 | spec: 707 | rules: 708 | - host: sslip.io 709 | http: 710 | paths: 711 | - path: / 712 | backend: 713 | serviceName: frontend 714 | servicePort: 80 715 | -------------------------------------------------------------------------------- /scrape-custom-service/02-redis-prometheus-exporter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: redis-cart 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: redis-cart 9 | template: 10 | metadata: 11 | labels: 12 | app: redis-cart 13 | spec: 14 | containers: 15 | - name: redis 16 | image: redis:alpine 17 | ports: 18 | - containerPort: 6379 19 | readinessProbe: 20 | periodSeconds: 5 21 | tcpSocket: 22 | port: 6379 23 | livenessProbe: 24 | periodSeconds: 5 25 | tcpSocket: 26 | port: 6379 27 | volumeMounts: 28 | - mountPath: /data 29 | name: redis-data 30 | resources: 31 | limits: 32 | memory: 256Mi 33 | cpu: 125m 34 | requests: 35 | cpu: 70m 36 | memory: 200Mi 37 | - name: redis-exporter 38 | image: oliver006/redis_exporter:latest 39 | ports: 40 | - containerPort: 9121 41 | name: metrics 42 | readinessProbe: 43 | periodSeconds: 5 44 | tcpSocket: 45 | port: 9121 46 | volumes: 47 | - name: redis-data 48 | emptyDir: {} 49 | --- 50 | apiVersion: v1 51 | kind: Service 52 | metadata: 53 | name: redis-cart 54 | labels: 55 | app: redis-cart 56 | spec: 57 | type: ClusterIP 58 | selector: 59 | app: redis-cart 60 | ports: 61 | - name: redis 62 | port: 6379 63 | targetPort: 6379 64 | - name: metrics 65 | port: 9121 66 | targetPort: 9121 -------------------------------------------------------------------------------- /scrape-custom-service/03-redis-servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: redis-cart 5 | spec: 6 | endpoints: 7 | - interval: 30s 8 | scrapeTimeout: 20s 9 | path: "/metrics" 10 | targetPort: metrics 11 | namespaceSelector: 12 | matchNames: 13 | - default 14 | selector: 15 | matchLabels: 16 | app: redis-cart 17 | -------------------------------------------------------------------------------- /scrape-custom-service/04-redis-grafana-dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: grafana-redis-cart 5 | namespace: cattle-dashboards 6 | labels: 7 | grafana_dashboard: "1" 8 | data: 9 | redis.json: | 10 | { 11 | "__inputs": [ 12 | ], 13 | "__requires": [ 14 | { 15 | "type": "grafana", 16 | "id": "grafana", 17 | "name": "Grafana", 18 | "version": "3.1.1" 19 | }, 20 | { 21 | "type": "panel", 22 | "id": "graph", 23 | "name": "Graph", 24 | "version": "" 25 | }, 26 | { 27 | "type": "datasource", 28 | "id": "prometheus", 29 | "name": "Prometheus", 30 | "version": "1.0.0" 31 | }, 32 | { 33 | "type": "panel", 34 | "id": "singlestat", 35 | "name": "Singlestat", 36 | "version": "" 37 | } 38 | ], 39 | "annotations": { 40 | "list": [ 41 | { 42 | "builtIn": 1, 43 | "datasource": "-- Grafana --", 44 | "enable": true, 45 | "hide": true, 46 | "iconColor": "rgba(0, 211, 255, 1)", 47 | "name": "Annotations & Alerts", 48 | "type": "dashboard" 49 | } 50 | ] 51 | }, 52 | "description": "Redis Dashboard for Prometheus Redis Exporter 1.x", 53 | "editable": true, 54 | "gnetId": 763, 55 | "graphTooltip": 1, 56 | "id": null, 57 | "iteration": 1583850456553, 58 | "links": [], 59 | "panels": [ 60 | { 61 | "cacheTimeout": null, 62 | "colorBackground": false, 63 | "colorValue": false, 64 | "colors": [ 65 | "rgba(245, 54, 54, 0.9)", 66 | "rgba(237, 129, 40, 0.89)", 67 | "rgba(50, 172, 45, 0.97)" 68 | ], 69 | "datasource": "Prometheus", 70 | "decimals": 0, 71 | "editable": true, 72 | "error": false, 73 | "format": "s", 74 | "gauge": { 75 | "maxValue": 100, 76 | "minValue": 0, 77 | "show": false, 78 | "thresholdLabels": false, 79 | "thresholdMarkers": true 80 | }, 81 | "gridPos": { 82 | "h": 7, 83 | "w": 3, 84 | "x": 0, 85 | "y": 0 86 | }, 87 | "id": 9, 88 | "interval": null, 89 | "isNew": true, 90 | "links": [], 91 | "mappingType": 1, 92 | "mappingTypes": [ 93 | { 94 | "name": "value to text", 95 | "value": 1 96 | }, 97 | { 98 | "name": "range to text", 99 | "value": 2 100 | } 101 | ], 102 | "maxDataPoints": 100, 103 | "nullPointMode": "connected", 104 | "nullText": null, 105 | "options": {}, 106 | "postfix": "", 107 | "postfixFontSize": "50%", 108 | "prefix": "", 109 | "prefixFontSize": "50%", 110 | "rangeMaps": [ 111 | { 112 | "from": "null", 113 | "text": "N/A", 114 | "to": "null" 115 | } 116 | ], 117 | "sparkline": { 118 | "fillColor": "rgba(31, 118, 189, 0.18)", 119 | "full": false, 120 | "lineColor": "rgb(31, 120, 193)", 121 | "show": true 122 | }, 123 | "tableColumn": "", 124 | "targets": [ 125 | { 126 | "expr": "max(max_over_time(redis_uptime_in_seconds{instance=~\"$instance\"}[$__interval]))", 127 | "format": "time_series", 128 | "interval": "", 129 | "intervalFactor": 2, 130 | "legendFormat": "", 131 | "metric": "", 132 | "refId": "A", 133 | "step": 1800 134 | } 135 | ], 136 | "thresholds": "", 137 | "title": "Max Uptime", 138 | "type": "singlestat", 139 | "valueFontSize": "70%", 140 | "valueMaps": [ 141 | { 142 | "op": "=", 143 | "text": "N/A", 144 | "value": "null" 145 | } 146 | ], 147 | "valueName": "current" 148 | }, 149 | { 150 | "cacheTimeout": null, 151 | "colorBackground": false, 152 | "colorValue": false, 153 | "colors": [ 154 | "rgba(245, 54, 54, 0.9)", 155 | "rgba(237, 129, 40, 0.89)", 156 | "rgba(50, 172, 45, 0.97)" 157 | ], 158 | "datasource": "Prometheus", 159 | "decimals": 0, 160 | "editable": true, 161 | "error": false, 162 | "format": "none", 163 | "gauge": { 164 | "maxValue": 100, 165 | "minValue": 0, 166 | "show": false, 167 | "thresholdLabels": false, 168 | "thresholdMarkers": true 169 | }, 170 | "gridPos": { 171 | "h": 7, 172 | "w": 2, 173 | "x": 3, 174 | "y": 0 175 | }, 176 | "hideTimeOverride": true, 177 | "id": 12, 178 | "interval": null, 179 | "isNew": true, 180 | "links": [], 181 | "mappingType": 1, 182 | "mappingTypes": [ 183 | { 184 | "name": "value to text", 185 | "value": 1 186 | }, 187 | { 188 | "name": "range to text", 189 | "value": 2 190 | } 191 | ], 192 | "maxDataPoints": 100, 193 | "nullPointMode": "connected", 194 | "nullText": null, 195 | "options": {}, 196 | "postfix": "", 197 | "postfixFontSize": "50%", 198 | "prefix": "", 199 | "prefixFontSize": "50%", 200 | "rangeMaps": [ 201 | { 202 | "from": "null", 203 | "text": "N/A", 204 | "to": "null" 205 | } 206 | ], 207 | "sparkline": { 208 | "fillColor": "rgba(31, 118, 189, 0.18)", 209 | "full": false, 210 | "lineColor": "rgb(31, 120, 193)", 211 | "show": true 212 | }, 213 | "tableColumn": "", 214 | "targets": [ 215 | { 216 | "expr": "sum(redis_connected_clients{instance=~\"$instance\"})", 217 | "format": "time_series", 218 | "intervalFactor": 2, 219 | "legendFormat": "", 220 | "metric": "", 221 | "refId": "A", 222 | "step": 2 223 | } 224 | ], 225 | "thresholds": "", 226 | "timeFrom": "1m", 227 | "timeShift": null, 228 | "title": "Clients", 229 | "type": "singlestat", 230 | "valueFontSize": "80%", 231 | "valueMaps": [ 232 | { 233 | "op": "=", 234 | "text": "N/A", 235 | "value": "null" 236 | } 237 | ], 238 | "valueName": "current" 239 | }, 240 | { 241 | "cacheTimeout": null, 242 | "colorBackground": false, 243 | "colorValue": false, 244 | "colors": [ 245 | "rgba(50, 172, 45, 0.97)", 246 | "rgba(237, 129, 40, 0.89)", 247 | "rgba(245, 54, 54, 0.9)" 248 | ], 249 | "datasource": "Prometheus", 250 | "decimals": 0, 251 | "editable": true, 252 | "error": false, 253 | "format": "percent", 254 | "gauge": { 255 | "maxValue": 100, 256 | "minValue": 0, 257 | "show": true, 258 | "thresholdLabels": false, 259 | "thresholdMarkers": true 260 | }, 261 | "gridPos": { 262 | "h": 7, 263 | "w": 3, 264 | "x": 5, 265 | "y": 0 266 | }, 267 | "hideTimeOverride": true, 268 | "id": 11, 269 | "interval": null, 270 | "isNew": true, 271 | "links": [], 272 | "mappingType": 1, 273 | "mappingTypes": [ 274 | { 275 | "name": "value to text", 276 | "value": 1 277 | }, 278 | { 279 | "name": "range to text", 280 | "value": 2 281 | } 282 | ], 283 | "maxDataPoints": 100, 284 | "nullPointMode": "connected", 285 | "nullText": null, 286 | "options": {}, 287 | "postfix": "", 288 | "postfixFontSize": "50%", 289 | "prefix": "", 290 | "prefixFontSize": "50%", 291 | "rangeMaps": [ 292 | { 293 | "from": "null", 294 | "text": "N/A", 295 | "to": "null" 296 | } 297 | ], 298 | "sparkline": { 299 | "fillColor": "rgba(31, 118, 189, 0.18)", 300 | "full": false, 301 | "lineColor": "rgb(31, 120, 193)", 302 | "show": true 303 | }, 304 | "tableColumn": "", 305 | "targets": [ 306 | { 307 | "expr": "sum(100 * (redis_memory_used_bytes{instance=~\"$instance\"} / redis_memory_max_bytes{instance=~\"$instance\"}))", 308 | "format": "time_series", 309 | "intervalFactor": 2, 310 | "legendFormat": "", 311 | "metric": "", 312 | "refId": "A", 313 | "step": 2 314 | } 315 | ], 316 | "thresholds": "80,95", 317 | "timeFrom": "1m", 318 | "timeShift": null, 319 | "title": "Memory Usage", 320 | "type": "singlestat", 321 | "valueFontSize": "80%", 322 | "valueMaps": [ 323 | { 324 | "op": "=", 325 | "text": "N/A", 326 | "value": "null" 327 | } 328 | ], 329 | "valueName": "current" 330 | }, 331 | { 332 | "aliasColors": {}, 333 | "bars": false, 334 | "dashLength": 10, 335 | "dashes": false, 336 | "datasource": "Prometheus", 337 | "editable": true, 338 | "error": false, 339 | "fill": 8, 340 | "fillGradient": 0, 341 | "grid": {}, 342 | "gridPos": { 343 | "h": 7, 344 | "w": 8, 345 | "x": 8, 346 | "y": 0 347 | }, 348 | "hiddenSeries": false, 349 | "id": 18, 350 | "isNew": true, 351 | "legend": { 352 | "avg": false, 353 | "current": false, 354 | "hideEmpty": false, 355 | "hideZero": false, 356 | "max": false, 357 | "min": false, 358 | "show": false, 359 | "total": false, 360 | "values": false 361 | }, 362 | "lines": true, 363 | "linewidth": 1, 364 | "links": [], 365 | "nullPointMode": "connected", 366 | "options": { 367 | "dataLinks": [] 368 | }, 369 | "percentage": false, 370 | "pointradius": 5, 371 | "points": false, 372 | "renderer": "flot", 373 | "seriesOverrides": [], 374 | "spaceLength": 10, 375 | "stack": true, 376 | "steppedLine": false, 377 | "targets": [ 378 | { 379 | "expr": "sum(rate(redis_commands_total{instance=~\"$instance\"} [1m])) by (cmd)", 380 | "format": "time_series", 381 | "interval": "", 382 | "intervalFactor": 2, 383 | "legendFormat": "{{ cmd }}", 384 | "metric": "redis_command_calls_total", 385 | "refId": "A", 386 | "step": 240 387 | } 388 | ], 389 | "thresholds": [], 390 | "timeFrom": null, 391 | "timeRegions": [], 392 | "timeShift": null, 393 | "title": "Total Commands / sec", 394 | "tooltip": { 395 | "msResolution": true, 396 | "shared": true, 397 | "sort": 2, 398 | "value_type": "individual" 399 | }, 400 | "type": "graph", 401 | "xaxis": { 402 | "buckets": null, 403 | "mode": "time", 404 | "name": null, 405 | "show": true, 406 | "values": [] 407 | }, 408 | "yaxes": [ 409 | { 410 | "format": "short", 411 | "label": null, 412 | "logBase": 1, 413 | "max": null, 414 | "min": null, 415 | "show": true 416 | }, 417 | { 418 | "format": "short", 419 | "label": null, 420 | "logBase": 1, 421 | "max": null, 422 | "min": null, 423 | "show": true 424 | } 425 | ], 426 | "yaxis": { 427 | "align": false, 428 | "alignLevel": null 429 | } 430 | }, 431 | { 432 | "aliasColors": {}, 433 | "bars": false, 434 | "dashLength": 10, 435 | "dashes": false, 436 | "datasource": "Prometheus", 437 | "decimals": 2, 438 | "editable": true, 439 | "error": false, 440 | "fill": 1, 441 | "fillGradient": 0, 442 | "grid": {}, 443 | "gridPos": { 444 | "h": 7, 445 | "w": 8, 446 | "x": 16, 447 | "y": 0 448 | }, 449 | "hiddenSeries": false, 450 | "id": 1, 451 | "isNew": true, 452 | "legend": { 453 | "avg": false, 454 | "current": false, 455 | "max": false, 456 | "min": false, 457 | "show": false, 458 | "total": false, 459 | "values": false 460 | }, 461 | "lines": true, 462 | "linewidth": 2, 463 | "links": [], 464 | "nullPointMode": "connected", 465 | "options": { 466 | "dataLinks": [] 467 | }, 468 | "percentage": true, 469 | "pointradius": 5, 470 | "points": false, 471 | "renderer": "flot", 472 | "seriesOverrides": [], 473 | "spaceLength": 10, 474 | "stack": false, 475 | "steppedLine": false, 476 | "targets": [ 477 | { 478 | "expr": "irate(redis_keyspace_hits_total{instance=~\"$instance\"}[5m])", 479 | "format": "time_series", 480 | "hide": false, 481 | "interval": "", 482 | "intervalFactor": 2, 483 | "legendFormat": "hits, {{ instance }}", 484 | "metric": "", 485 | "refId": "A", 486 | "step": 240, 487 | "target": "" 488 | }, 489 | { 490 | "expr": "irate(redis_keyspace_misses_total{instance=~\"$instance\"}[5m])", 491 | "format": "time_series", 492 | "hide": false, 493 | "interval": "", 494 | "intervalFactor": 2, 495 | "legendFormat": "misses, {{ instance }}", 496 | "metric": "", 497 | "refId": "B", 498 | "step": 240, 499 | "target": "" 500 | } 501 | ], 502 | "thresholds": [], 503 | "timeFrom": null, 504 | "timeRegions": [], 505 | "timeShift": null, 506 | "title": "Hits / Misses per Sec", 507 | "tooltip": { 508 | "msResolution": false, 509 | "shared": true, 510 | "sort": 0, 511 | "value_type": "individual" 512 | }, 513 | "type": "graph", 514 | "xaxis": { 515 | "buckets": null, 516 | "mode": "time", 517 | "name": null, 518 | "show": true, 519 | "values": [] 520 | }, 521 | "yaxes": [ 522 | { 523 | "format": "short", 524 | "label": "", 525 | "logBase": 1, 526 | "max": null, 527 | "min": 0, 528 | "show": true 529 | }, 530 | { 531 | "format": "short", 532 | "label": null, 533 | "logBase": 1, 534 | "max": null, 535 | "min": null, 536 | "show": true 537 | } 538 | ], 539 | "yaxis": { 540 | "align": false, 541 | "alignLevel": null 542 | } 543 | }, 544 | { 545 | "aliasColors": { 546 | "max": "#BF1B00" 547 | }, 548 | "bars": false, 549 | "dashLength": 10, 550 | "dashes": false, 551 | "datasource": "Prometheus", 552 | "editable": true, 553 | "error": false, 554 | "fill": 1, 555 | "fillGradient": 0, 556 | "grid": {}, 557 | "gridPos": { 558 | "h": 7, 559 | "w": 12, 560 | "x": 0, 561 | "y": 7 562 | }, 563 | "hiddenSeries": false, 564 | "id": 7, 565 | "isNew": true, 566 | "legend": { 567 | "avg": false, 568 | "current": false, 569 | "hideEmpty": false, 570 | "hideZero": false, 571 | "max": false, 572 | "min": false, 573 | "show": true, 574 | "total": false, 575 | "values": false 576 | }, 577 | "lines": true, 578 | "linewidth": 2, 579 | "links": [], 580 | "nullPointMode": "null as zero", 581 | "options": { 582 | "dataLinks": [] 583 | }, 584 | "percentage": false, 585 | "pointradius": 5, 586 | "points": false, 587 | "renderer": "flot", 588 | "seriesOverrides": [], 589 | "spaceLength": 10, 590 | "stack": false, 591 | "steppedLine": false, 592 | "targets": [ 593 | { 594 | "expr": "redis_memory_used_bytes{instance=~\"$instance\"}", 595 | "format": "time_series", 596 | "intervalFactor": 2, 597 | "legendFormat": "used, {{ instance }}", 598 | "metric": "", 599 | "refId": "A", 600 | "step": 240, 601 | "target": "" 602 | }, 603 | { 604 | "expr": "redis_memory_max_bytes{instance=~\"$instance\"}", 605 | "format": "time_series", 606 | "hide": false, 607 | "intervalFactor": 2, 608 | "legendFormat": "max, {{ instance }}", 609 | "refId": "B", 610 | "step": 240 611 | } 612 | ], 613 | "thresholds": [], 614 | "timeFrom": null, 615 | "timeRegions": [], 616 | "timeShift": null, 617 | "title": "Total Memory Usage", 618 | "tooltip": { 619 | "msResolution": false, 620 | "shared": true, 621 | "sort": 0, 622 | "value_type": "cumulative" 623 | }, 624 | "type": "graph", 625 | "xaxis": { 626 | "buckets": null, 627 | "mode": "time", 628 | "name": null, 629 | "show": true, 630 | "values": [] 631 | }, 632 | "yaxes": [ 633 | { 634 | "format": "bytes", 635 | "label": null, 636 | "logBase": 1, 637 | "max": null, 638 | "min": 0, 639 | "show": true 640 | }, 641 | { 642 | "format": "short", 643 | "label": null, 644 | "logBase": 1, 645 | "max": null, 646 | "min": null, 647 | "show": true 648 | } 649 | ], 650 | "yaxis": { 651 | "align": false, 652 | "alignLevel": null 653 | } 654 | }, 655 | { 656 | "aliasColors": {}, 657 | "bars": false, 658 | "dashLength": 10, 659 | "dashes": false, 660 | "datasource": "Prometheus", 661 | "editable": true, 662 | "error": false, 663 | "fill": 1, 664 | "fillGradient": 0, 665 | "grid": {}, 666 | "gridPos": { 667 | "h": 7, 668 | "w": 12, 669 | "x": 12, 670 | "y": 7 671 | }, 672 | "hiddenSeries": false, 673 | "id": 10, 674 | "isNew": true, 675 | "legend": { 676 | "avg": false, 677 | "current": false, 678 | "max": false, 679 | "min": false, 680 | "show": true, 681 | "total": false, 682 | "values": false 683 | }, 684 | "lines": true, 685 | "linewidth": 2, 686 | "links": [], 687 | "nullPointMode": "connected", 688 | "options": { 689 | "dataLinks": [] 690 | }, 691 | "percentage": false, 692 | "pointradius": 5, 693 | "points": false, 694 | "renderer": "flot", 695 | "seriesOverrides": [], 696 | "spaceLength": 10, 697 | "stack": false, 698 | "steppedLine": false, 699 | "targets": [ 700 | { 701 | "expr": "sum(rate(redis_net_input_bytes_total{instance=~\"$instance\"}[5m]))", 702 | "format": "time_series", 703 | "intervalFactor": 2, 704 | "legendFormat": "{{ input }}", 705 | "refId": "A", 706 | "step": 240 707 | }, 708 | { 709 | "expr": "sum(rate(redis_net_output_bytes_total{instance=~\"$instance\"}[5m]))", 710 | "format": "time_series", 711 | "interval": "", 712 | "intervalFactor": 2, 713 | "legendFormat": "{{ output }}", 714 | "refId": "B", 715 | "step": 240 716 | } 717 | ], 718 | "thresholds": [], 719 | "timeFrom": null, 720 | "timeRegions": [], 721 | "timeShift": null, 722 | "title": "Network I/O", 723 | "tooltip": { 724 | "msResolution": true, 725 | "shared": true, 726 | "sort": 0, 727 | "value_type": "cumulative" 728 | }, 729 | "type": "graph", 730 | "xaxis": { 731 | "buckets": null, 732 | "mode": "time", 733 | "name": null, 734 | "show": true, 735 | "values": [] 736 | }, 737 | "yaxes": [ 738 | { 739 | "format": "bytes", 740 | "label": null, 741 | "logBase": 1, 742 | "max": null, 743 | "min": null, 744 | "show": true 745 | }, 746 | { 747 | "format": "short", 748 | "label": null, 749 | "logBase": 1, 750 | "max": null, 751 | "min": null, 752 | "show": true 753 | } 754 | ], 755 | "yaxis": { 756 | "align": false, 757 | "alignLevel": null 758 | } 759 | }, 760 | { 761 | "aliasColors": {}, 762 | "bars": false, 763 | "dashLength": 10, 764 | "dashes": false, 765 | "datasource": "Prometheus", 766 | "editable": true, 767 | "error": false, 768 | "fill": 7, 769 | "fillGradient": 0, 770 | "grid": {}, 771 | "gridPos": { 772 | "h": 7, 773 | "w": 12, 774 | "x": 0, 775 | "y": 14 776 | }, 777 | "hiddenSeries": false, 778 | "id": 5, 779 | "isNew": true, 780 | "legend": { 781 | "alignAsTable": false, 782 | "avg": false, 783 | "current": true, 784 | "hideEmpty": false, 785 | "hideZero": true, 786 | "max": false, 787 | "min": false, 788 | "rightSide": false, 789 | "show": true, 790 | "total": false, 791 | "values": true 792 | }, 793 | "lines": true, 794 | "linewidth": 2, 795 | "links": [], 796 | "nullPointMode": "connected", 797 | "options": { 798 | "dataLinks": [] 799 | }, 800 | "percentage": false, 801 | "pointradius": 5, 802 | "points": false, 803 | "renderer": "flot", 804 | "seriesOverrides": [], 805 | "spaceLength": 10, 806 | "stack": true, 807 | "steppedLine": false, 808 | "targets": [ 809 | { 810 | "expr": "sum (redis_db_keys{instance=~\"$instance\"}) by (db, instance)", 811 | "format": "time_series", 812 | "interval": "", 813 | "intervalFactor": 2, 814 | "legendFormat": "{{ db }}, {{ instance }}", 815 | "refId": "A", 816 | "step": 240, 817 | "target": "" 818 | } 819 | ], 820 | "thresholds": [], 821 | "timeFrom": null, 822 | "timeRegions": [], 823 | "timeShift": null, 824 | "title": "Total Items per DB", 825 | "tooltip": { 826 | "msResolution": false, 827 | "shared": true, 828 | "sort": 0, 829 | "value_type": "individual" 830 | }, 831 | "type": "graph", 832 | "xaxis": { 833 | "buckets": null, 834 | "mode": "time", 835 | "name": null, 836 | "show": true, 837 | "values": [] 838 | }, 839 | "yaxes": [ 840 | { 841 | "format": "none", 842 | "label": null, 843 | "logBase": 1, 844 | "max": null, 845 | "min": null, 846 | "show": true 847 | }, 848 | { 849 | "format": "short", 850 | "label": null, 851 | "logBase": 1, 852 | "max": null, 853 | "min": null, 854 | "show": true 855 | } 856 | ], 857 | "yaxis": { 858 | "align": false, 859 | "alignLevel": null 860 | } 861 | }, 862 | { 863 | "aliasColors": {}, 864 | "bars": false, 865 | "dashLength": 10, 866 | "dashes": false, 867 | "datasource": "Prometheus", 868 | "editable": true, 869 | "error": false, 870 | "fill": 7, 871 | "fillGradient": 0, 872 | "grid": {}, 873 | "gridPos": { 874 | "h": 7, 875 | "w": 12, 876 | "x": 12, 877 | "y": 14 878 | }, 879 | "hiddenSeries": false, 880 | "id": 13, 881 | "isNew": true, 882 | "legend": { 883 | "avg": false, 884 | "current": false, 885 | "max": false, 886 | "min": false, 887 | "show": true, 888 | "total": false, 889 | "values": false 890 | }, 891 | "lines": true, 892 | "linewidth": 2, 893 | "links": [], 894 | "nullPointMode": "connected", 895 | "options": { 896 | "dataLinks": [] 897 | }, 898 | "percentage": false, 899 | "pointradius": 5, 900 | "points": false, 901 | "renderer": "flot", 902 | "seriesOverrides": [], 903 | "spaceLength": 10, 904 | "stack": true, 905 | "steppedLine": false, 906 | "targets": [ 907 | { 908 | "expr": "sum (redis_db_keys{instance=~\"$instance\"}) by (instance) - sum (redis_db_keys_expiring{instance=~\"$instance\"}) by (instance)", 909 | "format": "time_series", 910 | "interval": "", 911 | "intervalFactor": 2, 912 | "legendFormat": "not expiring, {{ instance }}", 913 | "refId": "A", 914 | "step": 240, 915 | "target": "" 916 | }, 917 | { 918 | "expr": "sum (redis_db_keys_expiring{instance=~\"$instance\"}) by (instance)", 919 | "format": "time_series", 920 | "interval": "", 921 | "intervalFactor": 2, 922 | "legendFormat": "expiring, {{ instance }}", 923 | "metric": "", 924 | "refId": "B", 925 | "step": 240 926 | } 927 | ], 928 | "thresholds": [], 929 | "timeFrom": null, 930 | "timeRegions": [], 931 | "timeShift": null, 932 | "title": "Expiring vs Not-Expiring Keys", 933 | "tooltip": { 934 | "msResolution": false, 935 | "shared": true, 936 | "sort": 0, 937 | "value_type": "individual" 938 | }, 939 | "type": "graph", 940 | "xaxis": { 941 | "buckets": null, 942 | "mode": "time", 943 | "name": null, 944 | "show": true, 945 | "values": [] 946 | }, 947 | "yaxes": [ 948 | { 949 | "format": "short", 950 | "label": null, 951 | "logBase": 1, 952 | "max": null, 953 | "min": null, 954 | "show": true 955 | }, 956 | { 957 | "format": "short", 958 | "label": null, 959 | "logBase": 1, 960 | "max": null, 961 | "min": null, 962 | "show": true 963 | } 964 | ], 965 | "yaxis": { 966 | "align": false, 967 | "alignLevel": null 968 | } 969 | }, 970 | { 971 | "aliasColors": { 972 | "evicts": "#890F02", 973 | "memcached_items_evicted_total{instance=\"172.17.0.1:9150\",job=\"prometheus\"}": "#890F02", 974 | "reclaims": "#3F6833" 975 | }, 976 | "bars": false, 977 | "dashLength": 10, 978 | "dashes": false, 979 | "datasource": "Prometheus", 980 | "editable": true, 981 | "error": false, 982 | "fill": 1, 983 | "fillGradient": 0, 984 | "grid": {}, 985 | "gridPos": { 986 | "h": 7, 987 | "w": 12, 988 | "x": 0, 989 | "y": 21 990 | }, 991 | "hiddenSeries": false, 992 | "id": 8, 993 | "isNew": true, 994 | "legend": { 995 | "avg": false, 996 | "current": false, 997 | "max": false, 998 | "min": false, 999 | "show": true, 1000 | "total": false, 1001 | "values": false 1002 | }, 1003 | "lines": true, 1004 | "linewidth": 2, 1005 | "links": [], 1006 | "nullPointMode": "connected", 1007 | "options": { 1008 | "dataLinks": [] 1009 | }, 1010 | "percentage": false, 1011 | "pointradius": 5, 1012 | "points": false, 1013 | "renderer": "flot", 1014 | "seriesOverrides": [ 1015 | { 1016 | "alias": "reclaims", 1017 | "yaxis": 2 1018 | } 1019 | ], 1020 | "spaceLength": 10, 1021 | "stack": false, 1022 | "steppedLine": false, 1023 | "targets": [ 1024 | { 1025 | "expr": "sum(rate(redis_expired_keys_total{instance=~\"$instance\"}[5m])) by (instance)", 1026 | "format": "time_series", 1027 | "hide": false, 1028 | "interval": "", 1029 | "intervalFactor": 2, 1030 | "legendFormat": "expired, {{ instance }}", 1031 | "metric": "", 1032 | "refId": "A", 1033 | "step": 240, 1034 | "target": "" 1035 | }, 1036 | { 1037 | "expr": "sum(rate(redis_evicted_keys_total{instance=~\"$instance\"}[5m])) by (instance)", 1038 | "format": "time_series", 1039 | "interval": "", 1040 | "intervalFactor": 2, 1041 | "legendFormat": "evicted, {{ instance }}", 1042 | "refId": "B", 1043 | "step": 240 1044 | } 1045 | ], 1046 | "thresholds": [], 1047 | "timeFrom": null, 1048 | "timeRegions": [], 1049 | "timeShift": null, 1050 | "title": "Expired/Evicted Keys", 1051 | "tooltip": { 1052 | "msResolution": false, 1053 | "shared": true, 1054 | "sort": 0, 1055 | "value_type": "cumulative" 1056 | }, 1057 | "type": "graph", 1058 | "xaxis": { 1059 | "buckets": null, 1060 | "mode": "time", 1061 | "name": null, 1062 | "show": true, 1063 | "values": [] 1064 | }, 1065 | "yaxes": [ 1066 | { 1067 | "format": "short", 1068 | "label": null, 1069 | "logBase": 1, 1070 | "max": null, 1071 | "min": null, 1072 | "show": true 1073 | }, 1074 | { 1075 | "format": "short", 1076 | "label": null, 1077 | "logBase": 1, 1078 | "max": null, 1079 | "min": null, 1080 | "show": true 1081 | } 1082 | ], 1083 | "yaxis": { 1084 | "align": false, 1085 | "alignLevel": null 1086 | } 1087 | }, 1088 | { 1089 | "aliasColors": {}, 1090 | "bars": false, 1091 | "dashLength": 10, 1092 | "dashes": false, 1093 | "datasource": "Prometheus", 1094 | "fill": 1, 1095 | "fillGradient": 0, 1096 | "gridPos": { 1097 | "h": 7, 1098 | "w": 12, 1099 | "x": 12, 1100 | "y": 21 1101 | }, 1102 | "hiddenSeries": false, 1103 | "id": 16, 1104 | "legend": { 1105 | "avg": false, 1106 | "current": false, 1107 | "max": false, 1108 | "min": false, 1109 | "show": true, 1110 | "total": false, 1111 | "values": false 1112 | }, 1113 | "lines": true, 1114 | "linewidth": 1, 1115 | "links": [], 1116 | "nullPointMode": "null", 1117 | "options": { 1118 | "dataLinks": [] 1119 | }, 1120 | "percentage": false, 1121 | "pointradius": 2, 1122 | "points": false, 1123 | "renderer": "flot", 1124 | "seriesOverrides": [], 1125 | "spaceLength": 10, 1126 | "stack": false, 1127 | "steppedLine": false, 1128 | "targets": [ 1129 | { 1130 | "expr": "sum(redis_connected_clients{instance=~\"$instance\"})", 1131 | "format": "time_series", 1132 | "intervalFactor": 1, 1133 | "legendFormat": "connected", 1134 | "refId": "A" 1135 | }, 1136 | { 1137 | "expr": "sum(redis_blocked_clients{instance=~\"$instance\"})", 1138 | "format": "time_series", 1139 | "intervalFactor": 1, 1140 | "legendFormat": "blocked", 1141 | "refId": "B" 1142 | } 1143 | ], 1144 | "thresholds": [], 1145 | "timeFrom": null, 1146 | "timeRegions": [], 1147 | "timeShift": null, 1148 | "title": "Connected/Blocked Clients", 1149 | "tooltip": { 1150 | "shared": true, 1151 | "sort": 0, 1152 | "value_type": "individual" 1153 | }, 1154 | "type": "graph", 1155 | "xaxis": { 1156 | "buckets": null, 1157 | "mode": "time", 1158 | "name": null, 1159 | "show": true, 1160 | "values": [] 1161 | }, 1162 | "yaxes": [ 1163 | { 1164 | "format": "short", 1165 | "label": null, 1166 | "logBase": 1, 1167 | "max": null, 1168 | "min": null, 1169 | "show": true 1170 | }, 1171 | { 1172 | "format": "short", 1173 | "label": null, 1174 | "logBase": 1, 1175 | "max": null, 1176 | "min": null, 1177 | "show": true 1178 | } 1179 | ], 1180 | "yaxis": { 1181 | "align": false, 1182 | "alignLevel": null 1183 | } 1184 | }, 1185 | { 1186 | "aliasColors": {}, 1187 | "bars": false, 1188 | "dashLength": 10, 1189 | "dashes": false, 1190 | "datasource": "Prometheus", 1191 | "editable": true, 1192 | "error": false, 1193 | "fill": 2, 1194 | "fillGradient": 0, 1195 | "grid": {}, 1196 | "gridPos": { 1197 | "h": 7, 1198 | "w": 12, 1199 | "x": 0, 1200 | "y": 28 1201 | }, 1202 | "hiddenSeries": false, 1203 | "id": 20, 1204 | "isNew": true, 1205 | "legend": { 1206 | "avg": false, 1207 | "current": false, 1208 | "hideEmpty": false, 1209 | "hideZero": true, 1210 | "max": false, 1211 | "min": false, 1212 | "show": true, 1213 | "total": false, 1214 | "values": false 1215 | }, 1216 | "lines": true, 1217 | "linewidth": 1, 1218 | "links": [], 1219 | "nullPointMode": "connected", 1220 | "options": { 1221 | "dataLinks": [] 1222 | }, 1223 | "percentage": false, 1224 | "pointradius": 5, 1225 | "points": false, 1226 | "renderer": "flot", 1227 | "seriesOverrides": [], 1228 | "spaceLength": 10, 1229 | "stack": false, 1230 | "steppedLine": false, 1231 | "targets": [ 1232 | { 1233 | "expr": "sum(irate(redis_commands_duration_seconds_total{instance =~ \"$instance\"}[1m])) by (cmd)\n /\nsum(irate(redis_commands_total{instance =~ \"$instance\"}[1m])) by (cmd)\n", 1234 | "format": "time_series", 1235 | "interval": "", 1236 | "intervalFactor": 2, 1237 | "legendFormat": "{{ cmd }}", 1238 | "metric": "redis_command_calls_total", 1239 | "refId": "A", 1240 | "step": 240 1241 | } 1242 | ], 1243 | "thresholds": [], 1244 | "timeFrom": null, 1245 | "timeRegions": [], 1246 | "timeShift": null, 1247 | "title": "Average Time Spent by Command / sec", 1248 | "tooltip": { 1249 | "msResolution": true, 1250 | "shared": true, 1251 | "sort": 2, 1252 | "value_type": "individual" 1253 | }, 1254 | "type": "graph", 1255 | "xaxis": { 1256 | "buckets": null, 1257 | "mode": "time", 1258 | "name": null, 1259 | "show": true, 1260 | "values": [] 1261 | }, 1262 | "yaxes": [ 1263 | { 1264 | "format": "s", 1265 | "label": null, 1266 | "logBase": 1, 1267 | "max": null, 1268 | "min": null, 1269 | "show": true 1270 | }, 1271 | { 1272 | "format": "short", 1273 | "label": null, 1274 | "logBase": 1, 1275 | "max": null, 1276 | "min": null, 1277 | "show": true 1278 | } 1279 | ], 1280 | "yaxis": { 1281 | "align": false, 1282 | "alignLevel": null 1283 | } 1284 | }, 1285 | { 1286 | "aliasColors": {}, 1287 | "bars": false, 1288 | "dashLength": 10, 1289 | "dashes": false, 1290 | "datasource": "Prometheus", 1291 | "editable": true, 1292 | "error": false, 1293 | "fill": 8, 1294 | "fillGradient": 0, 1295 | "grid": {}, 1296 | "gridPos": { 1297 | "h": 7, 1298 | "w": 12, 1299 | "x": 12, 1300 | "y": 28 1301 | }, 1302 | "hiddenSeries": false, 1303 | "id": 14, 1304 | "isNew": true, 1305 | "legend": { 1306 | "avg": false, 1307 | "current": false, 1308 | "hideEmpty": false, 1309 | "hideZero": false, 1310 | "max": false, 1311 | "min": false, 1312 | "show": true, 1313 | "total": false, 1314 | "values": false 1315 | }, 1316 | "lines": true, 1317 | "linewidth": 1, 1318 | "links": [], 1319 | "nullPointMode": "connected", 1320 | "options": { 1321 | "dataLinks": [] 1322 | }, 1323 | "percentage": false, 1324 | "pointradius": 5, 1325 | "points": false, 1326 | "renderer": "flot", 1327 | "seriesOverrides": [], 1328 | "spaceLength": 10, 1329 | "stack": true, 1330 | "steppedLine": false, 1331 | "targets": [ 1332 | { 1333 | "expr": "sum(irate(redis_commands_duration_seconds_total{instance=~\"$instance\"}[1m])) by (cmd) != 0", 1334 | "format": "time_series", 1335 | "interval": "", 1336 | "intervalFactor": 2, 1337 | "legendFormat": "{{ cmd }}", 1338 | "metric": "redis_command_calls_total", 1339 | "refId": "A", 1340 | "step": 240 1341 | } 1342 | ], 1343 | "thresholds": [], 1344 | "timeFrom": null, 1345 | "timeRegions": [], 1346 | "timeShift": null, 1347 | "title": "Total Time Spent by Command / sec", 1348 | "tooltip": { 1349 | "msResolution": true, 1350 | "shared": true, 1351 | "sort": 2, 1352 | "value_type": "individual" 1353 | }, 1354 | "type": "graph", 1355 | "xaxis": { 1356 | "buckets": null, 1357 | "mode": "time", 1358 | "name": null, 1359 | "show": true, 1360 | "values": [] 1361 | }, 1362 | "yaxes": [ 1363 | { 1364 | "format": "s", 1365 | "label": null, 1366 | "logBase": 1, 1367 | "max": null, 1368 | "min": null, 1369 | "show": true 1370 | }, 1371 | { 1372 | "format": "short", 1373 | "label": null, 1374 | "logBase": 1, 1375 | "max": null, 1376 | "min": null, 1377 | "show": true 1378 | } 1379 | ], 1380 | "yaxis": { 1381 | "align": false, 1382 | "alignLevel": null 1383 | } 1384 | } 1385 | ], 1386 | "refresh": false, 1387 | "schemaVersion": 22, 1388 | "style": "dark", 1389 | "tags": [ 1390 | "prometheus", 1391 | "redis" 1392 | ], 1393 | "templating": { 1394 | "list": [ 1395 | { 1396 | "allValue": null, 1397 | "current": {}, 1398 | "datasource": "Prometheus", 1399 | "definition": "label_values(redis_up, instance)", 1400 | "hide": 0, 1401 | "includeAll": false, 1402 | "label": null, 1403 | "multi": true, 1404 | "name": "instance", 1405 | "options": [], 1406 | "query": "label_values(redis_up, instance)", 1407 | "refresh": 2, 1408 | "regex": "", 1409 | "skipUrlSync": false, 1410 | "sort": 1, 1411 | "tagValuesQuery": "", 1412 | "tags": [], 1413 | "tagsQuery": "", 1414 | "type": "query", 1415 | "useTags": false 1416 | } 1417 | ] 1418 | }, 1419 | "time": { 1420 | "from": "now-24h", 1421 | "to": "now" 1422 | }, 1423 | "timepicker": { 1424 | "refresh_intervals": [ 1425 | "5s", 1426 | "10s", 1427 | "30s", 1428 | "1m", 1429 | "5m", 1430 | "15m", 1431 | "30m", 1432 | "1h", 1433 | "2h", 1434 | "1d" 1435 | ], 1436 | "time_options": [ 1437 | "5m", 1438 | "15m", 1439 | "1h", 1440 | "6h", 1441 | "12h", 1442 | "24h", 1443 | "2d", 1444 | "7d", 1445 | "30d" 1446 | ] 1447 | }, 1448 | "timezone": "browser", 1449 | "title": "Redis Dashboard for Prometheus Redis Exporter 1.x", 1450 | "version": 14 1451 | } -------------------------------------------------------------------------------- /scrape-custom-service/05-redis-prometheus-rules-force-alert.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: redis-rules 5 | spec: 6 | groups: 7 | - name: redis_rules 8 | rules: 9 | - record: redis_memory_fragmentation_ratio 10 | expr: redis_memory_used_rss_bytes / redis_memory_used_bytes 11 | - name: redis 12 | rules: 13 | - alert: RedisDown 14 | expr: redis_up == 0 15 | for: 5m 16 | labels: 17 | severity: critical 18 | annotations: 19 | summary: "Redis down (instance {{ $labels.instance }})" 20 | description: "Redis instance is down\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" 21 | - alert: RedisOutOfMemory 22 | expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 23 | for: 5m 24 | labels: 25 | severity: warning 26 | annotations: 27 | summary: "Redis out of memory (instance {{ $labels.instance }})" 28 | description: "Redis is running out of memory (> 90%)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" 29 | - alert: RedisTooManyConnections 30 | expr: redis_connected_clients > 0 31 | for: 10s 32 | labels: 33 | severity: warning 34 | annotations: 35 | summary: "Redis too many connections (instance {{ $labels.instance }})" 36 | description: "Redis instance has too many connections\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" -------------------------------------------------------------------------------- /scrape-custom-service/05-redis-prometheus-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: redis-rules 5 | spec: 6 | groups: 7 | - name: redis_rules 8 | rules: 9 | - record: redis_memory_fragmentation_ratio 10 | expr: redis_memory_used_rss_bytes / redis_memory_used_bytes 11 | - name: redis 12 | rules: 13 | - alert: RedisDown 14 | expr: redis_up == 0 15 | for: 5m 16 | labels: 17 | severity: critical 18 | annotations: 19 | summary: "Redis down (instance {{ $labels.instance }})" 20 | description: "Redis instance is down\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" 21 | - alert: RedisOutOfMemory 22 | expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 23 | for: 5m 24 | labels: 25 | severity: warning 26 | annotations: 27 | summary: "Redis out of memory (instance {{ $labels.instance }})" 28 | description: "Redis is running out of memory (> 90%)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" 29 | - alert: RedisTooManyConnections 30 | expr: redis_connected_clients > 100 31 | for: 5m 32 | labels: 33 | severity: warning 34 | annotations: 35 | summary: "Redis too many connections (instance {{ $labels.instance }})" 36 | description: "Redis instance has too many connections\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" -------------------------------------------------------------------------------- /scrape-custom-service/06-mysql-cluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: mysql-secret 5 | namespace: default 6 | type: Opaque 7 | data: 8 | # root password is required to be specified 9 | # lNohecTcJZJD9zN5WLHC8lJYjymtHBeMSQkUyMpn 10 | ROOT_PASSWORD: bE5vaGVjVGNKWkpEOXpONVdMSEM4bEpZanltdEhCZU1TUWtVeU1wbg== 11 | --- 12 | apiVersion: mysql.presslabs.org/v1alpha1 13 | kind: MysqlCluster 14 | metadata: 15 | name: db 16 | namespace: default 17 | spec: 18 | replicas: 1 19 | secretName: mysql-secret 20 | mysqlVersion: "5.7" 21 | volumeSpec: 22 | persistentVolumeClaim: 23 | accessModes: [ "ReadWriteOnce" ] 24 | resources: 25 | requests: 26 | storage: 1Gi 27 | -------------------------------------------------------------------------------- /scrape-custom-service/07-mysql-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | name: mysql-rules 5 | namespace: default 6 | spec: 7 | groups: 8 | - name: mysql.rules 9 | rules: 10 | - alert: MySQLReplicationNotRunning 11 | expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running 12 | == 0 13 | for: 2m 14 | labels: 15 | severity: critical 16 | annotations: 17 | description: Slave replication (IO or SQL) has been down for more than 2 minutes. 18 | summary: Slave replication is not running 19 | - alert: MySQLReplicationLag 20 | expr: (mysql_slave_lag_seconds > 30) and on(instance) (predict_linear(mysql_slave_lag_seconds[5m], 21 | 60 * 2) > 0) 22 | for: 1m 23 | labels: 24 | severity: critical 25 | annotations: 26 | description: The mysql slave replication has fallen behind and is not recovering 27 | summary: MySQL slave replication is lagging 28 | - alert: MySQLReplicationLag 29 | expr: (mysql_heartbeat_lag_seconds > 30) and on(instance) (predict_linear(mysql_heartbeat_lag_seconds[5m], 30 | 60 * 2) > 0) 31 | for: 1m 32 | labels: 33 | severity: critical 34 | annotations: 35 | description: The mysql slave replication has fallen behind and is not recovering 36 | summary: MySQL slave replication is lagging 37 | - alert: MySQLInnoDBLogWaits 38 | expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10 39 | labels: 40 | severity: warning 41 | annotations: 42 | description: The innodb logs are waiting for disk at a rate of {{$value}} / 43 | second 44 | summary: MySQL innodb log writes stalling 45 | -------------------------------------------------------------------------------- /scrape-custom-service/08-mysql-grafana-dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: grafana-mysql 5 | namespace: cattle-dashboards 6 | labels: 7 | grafana_dashboard: "1" 8 | data: 9 | mysql.json: | 10 | { 11 | "__inputs": [ 12 | ], 13 | "__requires": [ 14 | { 15 | "type": "grafana", 16 | "id": "grafana", 17 | "name": "Grafana", 18 | "version": "5.1.2" 19 | }, 20 | { 21 | "type": "panel", 22 | "id": "graph", 23 | "name": "Graph", 24 | "version": "5.0.0" 25 | }, 26 | { 27 | "type": "datasource", 28 | "id": "prometheus", 29 | "name": "Prometheus", 30 | "version": "5.0.0" 31 | }, 32 | { 33 | "type": "panel", 34 | "id": "singlestat", 35 | "name": "Singlestat", 36 | "version": "5.0.0" 37 | } 38 | ], 39 | "annotations": { 40 | "list": [ 41 | { 42 | "builtIn": 1, 43 | "datasource": "-- Grafana --", 44 | "enable": true, 45 | "hide": true, 46 | "iconColor": "rgba(0, 211, 255, 1)", 47 | "name": "Annotations & Alerts", 48 | "type": "dashboard" 49 | } 50 | ] 51 | }, 52 | "editable": true, 53 | "gnetId": 6239, 54 | "graphTooltip": 0, 55 | "id": null, 56 | "iteration": 1527084642291, 57 | "links": [], 58 | "panels": [ 59 | { 60 | "collapsed": false, 61 | "gridPos": { 62 | "h": 1, 63 | "w": 24, 64 | "x": 0, 65 | "y": 0 66 | }, 67 | "id": 17, 68 | "panels": [], 69 | "title": "Global status", 70 | "type": "row" 71 | }, 72 | { 73 | "cacheTimeout": null, 74 | "colorBackground": true, 75 | "colorValue": false, 76 | "colors": [ 77 | "#bf1b00", 78 | "#508642", 79 | "#ef843c" 80 | ], 81 | "datasource": "Prometheus", 82 | "format": "none", 83 | "gauge": { 84 | "maxValue": 1, 85 | "minValue": 0, 86 | "show": false, 87 | "thresholdLabels": false, 88 | "thresholdMarkers": true 89 | }, 90 | "gridPos": { 91 | "h": 7, 92 | "w": 6, 93 | "x": 0, 94 | "y": 1 95 | }, 96 | "id": 11, 97 | "interval": null, 98 | "links": [], 99 | "mappingType": 1, 100 | "mappingTypes": [ 101 | { 102 | "name": "value to text", 103 | "value": 1 104 | }, 105 | { 106 | "name": "range to text", 107 | "value": 2 108 | } 109 | ], 110 | "maxDataPoints": 100, 111 | "nullPointMode": "connected", 112 | "nullText": null, 113 | "postfix": "", 114 | "postfixFontSize": "50%", 115 | "prefix": "", 116 | "prefixFontSize": "50%", 117 | "rangeMaps": [ 118 | { 119 | "from": "null", 120 | "text": "N/A", 121 | "to": "null" 122 | } 123 | ], 124 | "sparkline": { 125 | "fillColor": "rgba(31, 118, 189, 0.18)", 126 | "full": true, 127 | "lineColor": "rgb(31, 120, 193)", 128 | "show": true 129 | }, 130 | "tableColumn": "", 131 | "targets": [ 132 | { 133 | "expr": "mysql_up{release=\"$release\"}", 134 | "format": "time_series", 135 | "intervalFactor": 1, 136 | "refId": "A" 137 | } 138 | ], 139 | "thresholds": "1,2", 140 | "title": "Instance Up", 141 | "type": "singlestat", 142 | "valueFontSize": "80%", 143 | "valueMaps": [ 144 | { 145 | "op": "=", 146 | "text": "N/A", 147 | "value": "null" 148 | } 149 | ], 150 | "valueName": "current" 151 | }, 152 | { 153 | "cacheTimeout": null, 154 | "colorBackground": true, 155 | "colorValue": false, 156 | "colors": [ 157 | "#d44a3a", 158 | "rgba(237, 129, 40, 0.89)", 159 | "#508642" 160 | ], 161 | "datasource": "Prometheus", 162 | "format": "s", 163 | "gauge": { 164 | "maxValue": 100, 165 | "minValue": 0, 166 | "show": false, 167 | "thresholdLabels": false, 168 | "thresholdMarkers": true 169 | }, 170 | "gridPos": { 171 | "h": 7, 172 | "w": 6, 173 | "x": 6, 174 | "y": 1 175 | }, 176 | "id": 15, 177 | "interval": null, 178 | "links": [], 179 | "mappingType": 1, 180 | "mappingTypes": [ 181 | { 182 | "name": "value to text", 183 | "value": 1 184 | }, 185 | { 186 | "name": "range to text", 187 | "value": 2 188 | } 189 | ], 190 | "maxDataPoints": 100, 191 | "nullPointMode": "connected", 192 | "nullText": null, 193 | "postfix": "", 194 | "postfixFontSize": "50%", 195 | "prefix": "", 196 | "prefixFontSize": "50%", 197 | "rangeMaps": [ 198 | { 199 | "from": "null", 200 | "text": "N/A", 201 | "to": "null" 202 | } 203 | ], 204 | "sparkline": { 205 | "fillColor": "rgba(31, 118, 189, 0.18)", 206 | "full": false, 207 | "lineColor": "rgb(31, 120, 193)", 208 | "show": true 209 | }, 210 | "tableColumn": "", 211 | "targets": [ 212 | { 213 | "expr": "mysql_global_status_uptime{release=\"$release\"}", 214 | "format": "time_series", 215 | "intervalFactor": 1, 216 | "refId": "A" 217 | } 218 | ], 219 | "thresholds": "25200,32400", 220 | "title": "Uptime", 221 | "type": "singlestat", 222 | "valueFontSize": "80%", 223 | "valueMaps": [ 224 | { 225 | "op": "=", 226 | "text": "N/A", 227 | "value": "null" 228 | } 229 | ], 230 | "valueName": "current" 231 | }, 232 | { 233 | "aliasColors": {}, 234 | "bars": false, 235 | "dashLength": 10, 236 | "dashes": false, 237 | "datasource": "Prometheus", 238 | "fill": 1, 239 | "gridPos": { 240 | "h": 7, 241 | "w": 12, 242 | "x": 12, 243 | "y": 1 244 | }, 245 | "id": 29, 246 | "legend": { 247 | "avg": false, 248 | "current": false, 249 | "max": false, 250 | "min": false, 251 | "show": false, 252 | "total": false, 253 | "values": false 254 | }, 255 | "lines": true, 256 | "linewidth": 1, 257 | "links": [], 258 | "nullPointMode": "null", 259 | "percentage": false, 260 | "pointradius": 5, 261 | "points": false, 262 | "renderer": "flot", 263 | "seriesOverrides": [], 264 | "spaceLength": 10, 265 | "stack": false, 266 | "steppedLine": false, 267 | "targets": [ 268 | { 269 | "expr": "mysql_global_status_max_used_connections{release=\"$release\"}", 270 | "format": "time_series", 271 | "intervalFactor": 1, 272 | "legendFormat": "current", 273 | "refId": "A" 274 | }, 275 | { 276 | "expr": "mysql_global_variables_max_connections{release=\"$release\"}", 277 | "format": "time_series", 278 | "intervalFactor": 1, 279 | "legendFormat": "Max", 280 | "refId": "B" 281 | } 282 | ], 283 | "thresholds": [], 284 | "timeFrom": null, 285 | "timeShift": null, 286 | "title": "Mysql Connections", 287 | "tooltip": { 288 | "shared": true, 289 | "sort": 0, 290 | "value_type": "individual" 291 | }, 292 | "type": "graph", 293 | "xaxis": { 294 | "buckets": null, 295 | "mode": "time", 296 | "name": null, 297 | "show": true, 298 | "values": [] 299 | }, 300 | "yaxes": [ 301 | { 302 | "format": "short", 303 | "label": null, 304 | "logBase": 1, 305 | "max": null, 306 | "min": null, 307 | "show": true 308 | }, 309 | { 310 | "format": "short", 311 | "label": null, 312 | "logBase": 1, 313 | "max": null, 314 | "min": null, 315 | "show": true 316 | } 317 | ], 318 | "yaxis": { 319 | "align": false, 320 | "alignLevel": null 321 | } 322 | }, 323 | { 324 | "collapsed": false, 325 | "gridPos": { 326 | "h": 1, 327 | "w": 24, 328 | "x": 0, 329 | "y": 8 330 | }, 331 | "id": 19, 332 | "panels": [], 333 | "title": "I/O", 334 | "type": "row" 335 | }, 336 | { 337 | "aliasColors": {}, 338 | "bars": false, 339 | "dashLength": 10, 340 | "dashes": false, 341 | "datasource": "Prometheus", 342 | "fill": 1, 343 | "gridPos": { 344 | "h": 9, 345 | "w": 12, 346 | "x": 0, 347 | "y": 9 348 | }, 349 | "id": 5, 350 | "legend": { 351 | "avg": false, 352 | "current": false, 353 | "max": false, 354 | "min": false, 355 | "show": true, 356 | "total": false, 357 | "values": false 358 | }, 359 | "lines": true, 360 | "linewidth": 1, 361 | "links": [], 362 | "nullPointMode": "null", 363 | "percentage": false, 364 | "pointradius": 5, 365 | "points": false, 366 | "renderer": "flot", 367 | "seriesOverrides": [ 368 | { 369 | "alias": "write", 370 | "transform": "negative-Y" 371 | } 372 | ], 373 | "spaceLength": 10, 374 | "stack": false, 375 | "steppedLine": false, 376 | "targets": [ 377 | { 378 | "expr": "irate(mysql_global_status_innodb_data_reads{release=\"$release\"}[10m])", 379 | "format": "time_series", 380 | "intervalFactor": 1, 381 | "legendFormat": "reads", 382 | "refId": "A" 383 | }, 384 | { 385 | "expr": "irate(mysql_global_status_innodb_data_writes{release=\"$release\"}[10m])", 386 | "format": "time_series", 387 | "intervalFactor": 1, 388 | "legendFormat": "write", 389 | "refId": "B" 390 | } 391 | ], 392 | "thresholds": [], 393 | "timeFrom": null, 394 | "timeShift": null, 395 | "title": "mysql disk reads vs writes", 396 | "tooltip": { 397 | "shared": true, 398 | "sort": 0, 399 | "value_type": "individual" 400 | }, 401 | "type": "graph", 402 | "xaxis": { 403 | "buckets": null, 404 | "mode": "time", 405 | "name": null, 406 | "show": true, 407 | "values": [] 408 | }, 409 | "yaxes": [ 410 | { 411 | "format": "short", 412 | "label": null, 413 | "logBase": 1, 414 | "max": null, 415 | "min": null, 416 | "show": true 417 | }, 418 | { 419 | "format": "short", 420 | "label": null, 421 | "logBase": 1, 422 | "max": null, 423 | "min": null, 424 | "show": true 425 | } 426 | ], 427 | "yaxis": { 428 | "align": false, 429 | "alignLevel": null 430 | } 431 | }, 432 | { 433 | "aliasColors": {}, 434 | "bars": false, 435 | "dashLength": 10, 436 | "dashes": false, 437 | "datasource": "Prometheus", 438 | "fill": 1, 439 | "gridPos": { 440 | "h": 9, 441 | "w": 12, 442 | "x": 12, 443 | "y": 9 444 | }, 445 | "id": 9, 446 | "legend": { 447 | "avg": false, 448 | "current": false, 449 | "max": false, 450 | "min": false, 451 | "show": false, 452 | "total": false, 453 | "values": false 454 | }, 455 | "lines": true, 456 | "linewidth": 1, 457 | "links": [], 458 | "nullPointMode": "null", 459 | "percentage": false, 460 | "pointradius": 5, 461 | "points": false, 462 | "renderer": "flot", 463 | "seriesOverrides": [ 464 | { 465 | "alias": "/sent/", 466 | "transform": "negative-Y" 467 | } 468 | ], 469 | "spaceLength": 10, 470 | "stack": false, 471 | "steppedLine": false, 472 | "targets": [ 473 | { 474 | "expr": "irate(mysql_global_status_bytes_received{release=\"$release\"}[5m])", 475 | "format": "time_series", 476 | "intervalFactor": 1, 477 | "legendFormat": "received", 478 | "refId": "A" 479 | }, 480 | { 481 | "expr": "irate(mysql_global_status_bytes_sent{release=\"$release\"}[5m])", 482 | "format": "time_series", 483 | "intervalFactor": 1, 484 | "legendFormat": "sent", 485 | "refId": "B" 486 | } 487 | ], 488 | "thresholds": [], 489 | "timeFrom": null, 490 | "timeShift": null, 491 | "title": "mysql network received vs sent", 492 | "tooltip": { 493 | "shared": true, 494 | "sort": 0, 495 | "value_type": "individual" 496 | }, 497 | "type": "graph", 498 | "xaxis": { 499 | "buckets": null, 500 | "mode": "time", 501 | "name": null, 502 | "show": true, 503 | "values": [] 504 | }, 505 | "yaxes": [ 506 | { 507 | "format": "short", 508 | "label": null, 509 | "logBase": 1, 510 | "max": null, 511 | "min": null, 512 | "show": true 513 | }, 514 | { 515 | "format": "short", 516 | "label": null, 517 | "logBase": 1, 518 | "max": null, 519 | "min": null, 520 | "show": true 521 | } 522 | ], 523 | "yaxis": { 524 | "align": false, 525 | "alignLevel": null 526 | } 527 | }, 528 | { 529 | "aliasColors": {}, 530 | "bars": false, 531 | "dashLength": 10, 532 | "dashes": false, 533 | "datasource": "Prometheus", 534 | "fill": 1, 535 | "gridPos": { 536 | "h": 7, 537 | "w": 12, 538 | "x": 0, 539 | "y": 18 540 | }, 541 | "id": 2, 542 | "legend": { 543 | "avg": false, 544 | "current": false, 545 | "max": false, 546 | "min": false, 547 | "show": false, 548 | "total": false, 549 | "values": false 550 | }, 551 | "lines": true, 552 | "linewidth": 1, 553 | "links": [], 554 | "nullPointMode": "null", 555 | "percentage": false, 556 | "pointradius": 5, 557 | "points": false, 558 | "renderer": "flot", 559 | "seriesOverrides": [], 560 | "spaceLength": 10, 561 | "stack": false, 562 | "steppedLine": false, 563 | "targets": [ 564 | { 565 | "expr": "irate(mysql_global_status_commands_total{release=\"$release\"}[5m]) > 0", 566 | "format": "time_series", 567 | "intervalFactor": 1, 568 | "legendFormat": "{{ command }} - {{ release }}", 569 | "refId": "A" 570 | } 571 | ], 572 | "thresholds": [], 573 | "timeFrom": null, 574 | "timeShift": null, 575 | "title": "Query rates", 576 | "tooltip": { 577 | "shared": true, 578 | "sort": 0, 579 | "value_type": "individual" 580 | }, 581 | "type": "graph", 582 | "xaxis": { 583 | "buckets": null, 584 | "mode": "time", 585 | "name": null, 586 | "show": true, 587 | "values": [] 588 | }, 589 | "yaxes": [ 590 | { 591 | "format": "short", 592 | "label": null, 593 | "logBase": 1, 594 | "max": null, 595 | "min": null, 596 | "show": true 597 | }, 598 | { 599 | "format": "short", 600 | "label": null, 601 | "logBase": 1, 602 | "max": null, 603 | "min": null, 604 | "show": true 605 | } 606 | ], 607 | "yaxis": { 608 | "align": false, 609 | "alignLevel": null 610 | } 611 | }, 612 | { 613 | "aliasColors": {}, 614 | "bars": false, 615 | "dashLength": 10, 616 | "dashes": false, 617 | "datasource": "Prometheus", 618 | "fill": 1, 619 | "gridPos": { 620 | "h": 7, 621 | "w": 12, 622 | "x": 12, 623 | "y": 18 624 | }, 625 | "id": 25, 626 | "legend": { 627 | "avg": false, 628 | "current": false, 629 | "max": false, 630 | "min": false, 631 | "show": false, 632 | "total": false, 633 | "values": false 634 | }, 635 | "lines": true, 636 | "linewidth": 1, 637 | "links": [], 638 | "nullPointMode": "null", 639 | "percentage": false, 640 | "pointradius": 5, 641 | "points": false, 642 | "renderer": "flot", 643 | "seriesOverrides": [], 644 | "spaceLength": 10, 645 | "stack": false, 646 | "steppedLine": false, 647 | "targets": [ 648 | { 649 | "expr": "mysql_global_status_threads_running{release=\"$release\"} ", 650 | "format": "time_series", 651 | "intervalFactor": 1, 652 | "refId": "A" 653 | } 654 | ], 655 | "thresholds": [], 656 | "timeFrom": null, 657 | "timeShift": null, 658 | "title": "Running Threads", 659 | "tooltip": { 660 | "shared": true, 661 | "sort": 0, 662 | "value_type": "individual" 663 | }, 664 | "type": "graph", 665 | "xaxis": { 666 | "buckets": null, 667 | "mode": "time", 668 | "name": null, 669 | "show": true, 670 | "values": [] 671 | }, 672 | "yaxes": [ 673 | { 674 | "decimals": null, 675 | "format": "short", 676 | "label": null, 677 | "logBase": 1, 678 | "max": "15", 679 | "min": null, 680 | "show": true 681 | }, 682 | { 683 | "format": "short", 684 | "label": null, 685 | "logBase": 1, 686 | "max": null, 687 | "min": null, 688 | "show": true 689 | } 690 | ], 691 | "yaxis": { 692 | "align": false, 693 | "alignLevel": null 694 | } 695 | }, 696 | { 697 | "collapsed": false, 698 | "gridPos": { 699 | "h": 1, 700 | "w": 24, 701 | "x": 0, 702 | "y": 25 703 | }, 704 | "id": 21, 705 | "panels": [], 706 | "title": "Errors", 707 | "type": "row" 708 | }, 709 | { 710 | "aliasColors": {}, 711 | "bars": false, 712 | "dashLength": 10, 713 | "dashes": false, 714 | "datasource": "Prometheus", 715 | "description": "The number of connections that were aborted because the client died without closing the connection properly. See Section B.5.2.10, “Communication Errors and Aborted Connections”.", 716 | "fill": 1, 717 | "gridPos": { 718 | "h": 9, 719 | "w": 12, 720 | "x": 0, 721 | "y": 26 722 | }, 723 | "id": 13, 724 | "legend": { 725 | "avg": false, 726 | "current": false, 727 | "max": false, 728 | "min": false, 729 | "show": false, 730 | "total": false, 731 | "values": false 732 | }, 733 | "lines": true, 734 | "linewidth": 1, 735 | "links": [], 736 | "nullPointMode": "null", 737 | "percentage": false, 738 | "pointradius": 5, 739 | "points": false, 740 | "renderer": "flot", 741 | "seriesOverrides": [], 742 | "spaceLength": 10, 743 | "stack": false, 744 | "steppedLine": false, 745 | "targets": [ 746 | { 747 | "expr": "mysql_global_status_aborted_clients{release=\"$release\"}", 748 | "format": "time_series", 749 | "intervalFactor": 1, 750 | "refId": "B" 751 | } 752 | ], 753 | "thresholds": [], 754 | "timeFrom": null, 755 | "timeShift": null, 756 | "title": "Aborted clients", 757 | "tooltip": { 758 | "shared": true, 759 | "sort": 0, 760 | "value_type": "individual" 761 | }, 762 | "type": "graph", 763 | "xaxis": { 764 | "buckets": null, 765 | "mode": "time", 766 | "name": null, 767 | "show": true, 768 | "values": [] 769 | }, 770 | "yaxes": [ 771 | { 772 | "format": "short", 773 | "label": null, 774 | "logBase": 1, 775 | "max": null, 776 | "min": null, 777 | "show": true 778 | }, 779 | { 780 | "format": "short", 781 | "label": null, 782 | "logBase": 1, 783 | "max": null, 784 | "min": null, 785 | "show": true 786 | } 787 | ], 788 | "yaxis": { 789 | "align": false, 790 | "alignLevel": null 791 | } 792 | }, 793 | { 794 | "aliasColors": {}, 795 | "bars": false, 796 | "dashLength": 10, 797 | "dashes": false, 798 | "datasource": "Prometheus", 799 | "description": "The number of failed attempts to connect to the MySQL server. See Section B.5.2.10, “Communication Errors and Aborted Connections”.\n\nFor additional connection-related information, check the Connection_errors_xxx status variables and the host_cache table.", 800 | "fill": 1, 801 | "gridPos": { 802 | "h": 9, 803 | "w": 12, 804 | "x": 12, 805 | "y": 26 806 | }, 807 | "id": 4, 808 | "legend": { 809 | "avg": false, 810 | "current": false, 811 | "max": false, 812 | "min": false, 813 | "show": false, 814 | "total": false, 815 | "values": false 816 | }, 817 | "lines": true, 818 | "linewidth": 1, 819 | "links": [], 820 | "nullPointMode": "null", 821 | "percentage": false, 822 | "pointradius": 5, 823 | "points": false, 824 | "renderer": "flot", 825 | "seriesOverrides": [], 826 | "spaceLength": 10, 827 | "stack": false, 828 | "steppedLine": false, 829 | "targets": [ 830 | { 831 | "expr": "mysql_global_status_aborted_connects{release=\"$release\"}", 832 | "format": "time_series", 833 | "intervalFactor": 1, 834 | "legendFormat": "", 835 | "refId": "A" 836 | } 837 | ], 838 | "thresholds": [], 839 | "timeFrom": null, 840 | "timeShift": null, 841 | "title": "mysql aborted Connects", 842 | "tooltip": { 843 | "shared": true, 844 | "sort": 0, 845 | "value_type": "individual" 846 | }, 847 | "type": "graph", 848 | "xaxis": { 849 | "buckets": null, 850 | "mode": "time", 851 | "name": null, 852 | "show": true, 853 | "values": [] 854 | }, 855 | "yaxes": [ 856 | { 857 | "format": "short", 858 | "label": null, 859 | "logBase": 1, 860 | "max": null, 861 | "min": null, 862 | "show": true 863 | }, 864 | { 865 | "format": "short", 866 | "label": null, 867 | "logBase": 1, 868 | "max": null, 869 | "min": null, 870 | "show": true 871 | } 872 | ], 873 | "yaxis": { 874 | "align": false, 875 | "alignLevel": null 876 | } 877 | }, 878 | { 879 | "collapsed": false, 880 | "gridPos": { 881 | "h": 1, 882 | "w": 24, 883 | "x": 0, 884 | "y": 35 885 | }, 886 | "id": 23, 887 | "panels": [], 888 | "title": "Disk usage", 889 | "type": "row" 890 | }, 891 | { 892 | "aliasColors": {}, 893 | "bars": false, 894 | "dashLength": 10, 895 | "dashes": false, 896 | "datasource": "Prometheus", 897 | "fill": 1, 898 | "gridPos": { 899 | "h": 9, 900 | "w": 12, 901 | "x": 0, 902 | "y": 36 903 | }, 904 | "id": 27, 905 | "legend": { 906 | "avg": false, 907 | "current": false, 908 | "max": false, 909 | "min": false, 910 | "show": true, 911 | "total": false, 912 | "values": false 913 | }, 914 | "lines": true, 915 | "linewidth": 1, 916 | "links": [], 917 | "nullPointMode": "null", 918 | "percentage": false, 919 | "pointradius": 5, 920 | "points": false, 921 | "renderer": "flot", 922 | "seriesOverrides": [], 923 | "spaceLength": 10, 924 | "stack": false, 925 | "steppedLine": false, 926 | "targets": [ 927 | { 928 | "expr": "sum(mysql_info_schema_table_size{component=\"data_length\",release=\"$release\"})", 929 | "format": "time_series", 930 | "intervalFactor": 1, 931 | "legendFormat": "Tables", 932 | "refId": "A" 933 | }, 934 | { 935 | "expr": "sum(mysql_info_schema_table_size{component=\"index_length\",release=\"$release\"})", 936 | "format": "time_series", 937 | "intervalFactor": 1, 938 | "legendFormat": "Indexes", 939 | "refId": "B" 940 | } 941 | ], 942 | "thresholds": [], 943 | "timeFrom": null, 944 | "timeShift": null, 945 | "title": "Disk usage tables / indexes", 946 | "tooltip": { 947 | "shared": true, 948 | "sort": 0, 949 | "value_type": "individual" 950 | }, 951 | "type": "graph", 952 | "xaxis": { 953 | "buckets": null, 954 | "mode": "time", 955 | "name": null, 956 | "show": true, 957 | "values": [] 958 | }, 959 | "yaxes": [ 960 | { 961 | "format": "decbytes", 962 | "label": null, 963 | "logBase": 1, 964 | "max": null, 965 | "min": null, 966 | "show": true 967 | }, 968 | { 969 | "format": "short", 970 | "label": null, 971 | "logBase": 1, 972 | "max": null, 973 | "min": null, 974 | "show": true 975 | } 976 | ], 977 | "yaxis": { 978 | "align": false, 979 | "alignLevel": null 980 | } 981 | }, 982 | { 983 | "aliasColors": {}, 984 | "bars": false, 985 | "dashLength": 10, 986 | "dashes": false, 987 | "datasource": "Prometheus", 988 | "fill": 1, 989 | "gridPos": { 990 | "h": 9, 991 | "w": 12, 992 | "x": 12, 993 | "y": 36 994 | }, 995 | "id": 7, 996 | "legend": { 997 | "avg": false, 998 | "current": false, 999 | "max": false, 1000 | "min": false, 1001 | "show": false, 1002 | "total": false, 1003 | "values": false 1004 | }, 1005 | "lines": true, 1006 | "linewidth": 1, 1007 | "links": [], 1008 | "nullPointMode": "null", 1009 | "percentage": false, 1010 | "pointradius": 5, 1011 | "points": false, 1012 | "renderer": "flot", 1013 | "seriesOverrides": [], 1014 | "spaceLength": 10, 1015 | "stack": false, 1016 | "steppedLine": false, 1017 | "targets": [ 1018 | { 1019 | "expr": "sum(mysql_info_schema_table_rows{release=\"$release\"})", 1020 | "format": "time_series", 1021 | "intervalFactor": 1, 1022 | "refId": "A" 1023 | } 1024 | ], 1025 | "thresholds": [], 1026 | "timeFrom": null, 1027 | "timeShift": null, 1028 | "title": "Sum of all rows", 1029 | "tooltip": { 1030 | "shared": true, 1031 | "sort": 0, 1032 | "value_type": "individual" 1033 | }, 1034 | "type": "graph", 1035 | "xaxis": { 1036 | "buckets": null, 1037 | "mode": "time", 1038 | "name": null, 1039 | "show": true, 1040 | "values": [] 1041 | }, 1042 | "yaxes": [ 1043 | { 1044 | "decimals": null, 1045 | "format": "short", 1046 | "label": null, 1047 | "logBase": 1, 1048 | "max": null, 1049 | "min": null, 1050 | "show": true 1051 | }, 1052 | { 1053 | "format": "short", 1054 | "label": null, 1055 | "logBase": 1, 1056 | "max": null, 1057 | "min": null, 1058 | "show": true 1059 | } 1060 | ], 1061 | "yaxis": { 1062 | "align": false, 1063 | "alignLevel": null 1064 | } 1065 | } 1066 | ], 1067 | "schemaVersion": 16, 1068 | "style": "dark", 1069 | "tags": [ 1070 | "Databases", 1071 | "backgroundservices" 1072 | ], 1073 | "templating": { 1074 | "list": [ 1075 | { 1076 | "allValue": null, 1077 | "current": {}, 1078 | "datasource": "Prometheus", 1079 | "hide": 0, 1080 | "includeAll": false, 1081 | "label": null, 1082 | "multi": false, 1083 | "name": "release", 1084 | "options": [], 1085 | "query": "label_values(mysql_up,release)", 1086 | "refresh": 1, 1087 | "regex": "", 1088 | "sort": 0, 1089 | "tagValuesQuery": "", 1090 | "tags": [], 1091 | "tagsQuery": "", 1092 | "type": "query", 1093 | "useTags": false 1094 | } 1095 | ] 1096 | }, 1097 | "time": { 1098 | "from": "now-1h", 1099 | "to": "now" 1100 | }, 1101 | "timepicker": { 1102 | "refresh_intervals": [ 1103 | "5s", 1104 | "10s", 1105 | "30s", 1106 | "1m", 1107 | "5m", 1108 | "15m", 1109 | "30m", 1110 | "1h", 1111 | "2h", 1112 | "1d" 1113 | ], 1114 | "time_options": [ 1115 | "5m", 1116 | "15m", 1117 | "1h", 1118 | "6h", 1119 | "12h", 1120 | "24h", 1121 | "2d", 1122 | "7d", 1123 | "30d" 1124 | ] 1125 | }, 1126 | "timezone": "", 1127 | "title": "Mysql - Prometheus", 1128 | "uid": "6-kPlS7ik", 1129 | "version": 16, 1130 | "description": "Basic Mysql dashboard for the prometheus exporter " 1131 | } 1132 | -------------------------------------------------------------------------------- /terraform-setup/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_ami" "ubuntu" { 2 | most_recent = true 3 | owners = ["099720109477"] # Canonical 4 | 5 | filter { 6 | name = "name" 7 | values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"] 8 | } 9 | 10 | filter { 11 | name = "virtualization-type" 12 | values = ["hvm"] 13 | } 14 | } 15 | 16 | data "aws_ami" "arm" { 17 | most_recent = true 18 | owners = ["099720109477"] # Canonical 19 | 20 | filter { 21 | name = "name" 22 | values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-arm64-server-*"] 23 | } 24 | 25 | filter { 26 | name = "virtualization-type" 27 | values = ["hvm"] 28 | } 29 | } -------------------------------------------------------------------------------- /terraform-setup/lb.tf: -------------------------------------------------------------------------------- 1 | resource "aws_elb" "rancher-server-lb" { 2 | name = "${var.prefix}-rancher-server-lb" 3 | availability_zones = aws_instance.ubuntu_vms[*].availability_zone 4 | 5 | listener { 6 | instance_port = 80 7 | instance_protocol = "tcp" 8 | lb_port = 80 9 | lb_protocol = "tcp" 10 | } 11 | 12 | listener { 13 | instance_port = 443 14 | instance_protocol = "tcp" 15 | lb_port = 443 16 | lb_protocol = "tcp" 17 | } 18 | 19 | health_check { 20 | healthy_threshold = 2 21 | unhealthy_threshold = 2 22 | timeout = 3 23 | target = "TCP:80" 24 | interval = 30 25 | } 26 | 27 | instances = [ 28 | aws_instance.ubuntu_vms[0].id, 29 | aws_instance.ubuntu_vms[1].id, 30 | aws_instance.ubuntu_vms[2].id 31 | ] 32 | cross_zone_load_balancing = true 33 | idle_timeout = 400 34 | connection_draining = true 35 | connection_draining_timeout = 400 36 | 37 | tags = { 38 | Name = "${var.prefix}-rancher-server-lb" 39 | } 40 | } 41 | 42 | data "digitalocean_domain" "rancher" { 43 | name = "plgrnd.be" 44 | } 45 | 46 | resource "digitalocean_record" "rancher" { 47 | domain = data.digitalocean_domain.rancher.name 48 | type = "CNAME" 49 | name = "rancher-demo" 50 | value = "${aws_elb.rancher-server-lb.dns_name}." 51 | ttl = 60 52 | } 53 | 54 | resource "digitalocean_record" "shop" { 55 | domain = data.digitalocean_domain.rancher.name 56 | type = "CNAME" 57 | name = "demo-shop" 58 | value = "${aws_elb.rancher-server-lb.dns_name}." 59 | ttl = 60 60 | } 61 | 62 | resource "digitalocean_record" "sample-app" { 63 | domain = data.digitalocean_domain.rancher.name 64 | type = "CNAME" 65 | name = "sample-app" 66 | value = "${aws_elb.rancher-server-lb.dns_name}." 67 | ttl = 60 68 | } -------------------------------------------------------------------------------- /terraform-setup/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_key_pair" "ssh_key_pair" { 2 | key_name_prefix = "${var.prefix}-rancher-k3s-fleet-" 3 | public_key = file("${var.ssh_key_file_name}.pub") 4 | } 5 | 6 | # Security group to allow all traffic 7 | resource "aws_security_group" "sg_allowall" { 8 | name = "${var.prefix}-rancher-k3s-fleet-allowall" 9 | 10 | ingress { 11 | from_port = "0" 12 | to_port = "0" 13 | protocol = "-1" 14 | cidr_blocks = ["0.0.0.0/0"] 15 | } 16 | 17 | egress { 18 | from_port = "0" 19 | to_port = "0" 20 | protocol = "-1" 21 | cidr_blocks = ["0.0.0.0/0"] 22 | } 23 | } 24 | 25 | resource "aws_instance" "ubuntu_vms" { 26 | count = 3 27 | ami = data.aws_ami.ubuntu.id 28 | instance_type = "t3a.xlarge" 29 | 30 | key_name = aws_key_pair.ssh_key_pair.key_name 31 | security_groups = [aws_security_group.sg_allowall.name] 32 | 33 | root_block_device { 34 | volume_size = 80 35 | } 36 | 37 | tags = { 38 | Name = "${var.prefix}-rancher-k3s-fleet-ubuntu" 39 | } 40 | } -------------------------------------------------------------------------------- /terraform-setup/output.tf: -------------------------------------------------------------------------------- 1 | output "ubuntu_node_ips" { 2 | value = aws_instance.ubuntu_vms.*.public_ip 3 | } 4 | output "rancher_domain" { 5 | value = digitalocean_record.rancher.fqdn 6 | } 7 | output "rancher_cluster_ips" { 8 | value = [ 9 | aws_instance.ubuntu_vms.0.public_ip, 10 | aws_instance.ubuntu_vms.1.public_ip, 11 | aws_instance.ubuntu_vms.2.public_ip, 12 | ] 13 | } 14 | output "all_node_ips" { 15 | value = concat( 16 | aws_instance.ubuntu_vms.*.public_ip, 17 | ) 18 | } -------------------------------------------------------------------------------- /terraform-setup/provider.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "hashicorp/aws" 5 | } 6 | digitalocean = { 7 | source = "digitalocean/digitalocean" 8 | } 9 | } 10 | required_version = ">= 0.13" 11 | } 12 | 13 | provider "aws" { 14 | access_key = var.aws_access_key 15 | secret_key = var.aws_secret_key 16 | region = var.aws_region 17 | } 18 | 19 | provider "digitalocean" { 20 | token = var.digitalocean_token 21 | } -------------------------------------------------------------------------------- /terraform-setup/variables.tf: -------------------------------------------------------------------------------- 1 | variable "aws_access_key" { 2 | type = string 3 | description = "AWS access key used to create infrastructure" 4 | } 5 | variable "aws_secret_key" { 6 | type = string 7 | description = "AWS secret key used to create AWS infrastructure" 8 | } 9 | variable "aws_region" { 10 | type = string 11 | description = "AWS region used for all resources" 12 | default = "eu-central-1" 13 | } 14 | variable "digitalocean_token" { 15 | type = string 16 | description = "API token for DigitalOcean" 17 | } 18 | variable "ssh_key_file_name" { 19 | type = string 20 | description = "File path and name of SSH private key used for infrastructure and RKE" 21 | default = "~/.ssh/id_rsa" 22 | } 23 | variable "prefix" { 24 | type = string 25 | description = "Prefix added to names of all resources" 26 | default = "bhofmann" 27 | } --------------------------------------------------------------------------------