├── 00_hetzner-volumes ├── secrets │ └── hetzner_api_key ├── rm.sh ├── deploy.sh ├── hetzner-volumes-rm.yml └── hetzner-volumes-enable.yml ├── 01_cleanup ├── deploy.sh └── cleanup.yml ├── 02_portainer ├── deploy.sh └── portainer.yml ├── 00_docker_socket_proxy ├── deploy.sh └── docker_socket_proxy.yml ├── 01_traefik_public ├── vars.sh ├── deploy.sh ├── users.sh └── traefik_public.yml ├── 02_monitoring ├── secrets │ ├── grafana │ │ └── environment.sh │ ├── prometheus_users.sh │ ├── alertmanager_users.sh │ └── vars.sh ├── configs │ ├── grafana │ │ └── conf │ │ │ ├── docker-entrypoint.sh │ │ │ ├── dashboards.yaml │ │ │ ├── datasources │ │ │ └── prometheus.yaml │ │ │ └── dashboards │ │ │ ├── traefik-dash.json │ │ │ ├── prometheus-dash.json │ │ │ ├── services-dash.json │ │ │ └── nodes-dash.json │ ├── alertmanager │ │ └── alertmanager.yml │ ├── node-exporter │ │ └── conf │ │ │ └── docker-entrypoint.sh │ └── prometheus │ │ ├── conf │ │ ├── docker-entrypoint.sh │ │ └── prometheus.yml │ │ └── rules │ │ ├── swarm_node.rules.yml │ │ └── swarm_task.rules.yml ├── deploy.sh ├── README.md └── monitoring.yml ├── README.md └── LICENSE /00_hetzner-volumes/secrets/hetzner_api_key: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01_cleanup/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec docker-sdp stack deploy -c cleanup.yml cleanup -------------------------------------------------------------------------------- /00_hetzner-volumes/rm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec docker-sdp stack deploy -c hetzner-volumes-rm.yml hetzner-volumes-rm -------------------------------------------------------------------------------- /02_portainer/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec docker-sdp stack deploy --with-registry-auth -c portainer.yml portainer -------------------------------------------------------------------------------- /00_docker_socket_proxy/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec docker-sdp stack deploy -c docker_socket_proxy.yml docker_socket_proxy -------------------------------------------------------------------------------- /00_hetzner-volumes/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec docker-sdp stack deploy -c hetzner-volumes-enable.yml hetzner-volumes-enable -------------------------------------------------------------------------------- /01_traefik_public/vars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export DOMAIN="ingress.example.tld.localhost" 4 | export USERS=$(bash users.sh) -------------------------------------------------------------------------------- /02_monitoring/secrets/grafana/environment.sh: -------------------------------------------------------------------------------- 1 | export GF_SECURITY_ADMIN_USER=admin 2 | export GF_SECURITY_ADMIN_PASSWORD=CHANGE_THIS_NOW_ALSO -------------------------------------------------------------------------------- /02_monitoring/configs/grafana/conf/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | source /run/secrets/grafana_environment_sh 4 | 5 | exec /run.sh "$@" -------------------------------------------------------------------------------- /01_traefik_public/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -a 4 | 5 | source ./vars.sh 6 | 7 | exec docker-sdp stack deploy -c traefik_public.yml traefik_public -------------------------------------------------------------------------------- /02_monitoring/configs/alertmanager/alertmanager.yml: -------------------------------------------------------------------------------- 1 | route: 2 | receiver: 'prom2teams' 3 | 4 | receivers: 5 | - name: 'prom2teams' 6 | webhook_configs: 7 | - url: "http://prom2teams:8089" -------------------------------------------------------------------------------- /02_monitoring/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -a 4 | 5 | CUR_PWD=$(pwd) 6 | cd ./secrets 7 | source ./vars.sh 8 | cd $CUR_PWD 9 | 10 | exec docker-sdp stack deploy -c monitoring.yml monitoring -------------------------------------------------------------------------------- /01_traefik_public/users.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | declare -A USERS 4 | USERS[admin]=PASSWORD_CHANGE_ME_PLEASE 5 | 6 | for K in "${!USERS[@]}" 7 | do 8 | echo $K:$(openssl passwd -apr1 "${USERS[$K]}") 9 | done | paste -s -d, - -------------------------------------------------------------------------------- /02_monitoring/secrets/prometheus_users.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | declare -A USERS 4 | USERS[admin]=CHANGE_THIS_NOW 5 | 6 | for K in "${!USERS[@]}" 7 | do 8 | echo $K:$(openssl passwd -apr1 "${USERS[$K]}") 9 | done | paste -s -d, - -------------------------------------------------------------------------------- /02_monitoring/secrets/alertmanager_users.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | declare -A USERS 4 | USERS[admin]=CHANGE_THIS_NOW 5 | 6 | for K in "${!USERS[@]}" 7 | do 8 | echo $K:$(openssl passwd -apr1 "${USERS[$K]}") 9 | done | paste -s -d, - -------------------------------------------------------------------------------- /02_monitoring/configs/grafana/conf/dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'default' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | editable: true 10 | options: 11 | path: /etc/grafana/dashboards -------------------------------------------------------------------------------- /02_monitoring/configs/grafana/conf/datasources/prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | deleteDatasources: 4 | - name: Prometheus 5 | 6 | datasources: 7 | - name: Prometheus 8 | type: prometheus 9 | access: proxy 10 | url: http://prometheus:9090 11 | isDefault: true 12 | version: 1 13 | editable: true -------------------------------------------------------------------------------- /02_monitoring/secrets/vars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # this gets prefixed with - to get the domain 4 | export BASE_DOMAIN="mon.example.tld.localhost" 5 | export PROMETHEUS_USERS=$(bash prometheus_users.sh) 6 | export ALERTMANAGER_USERS=$(bash alertmanager_users.sh) 7 | export PROM2TEAMS_CONNECTOR="" -------------------------------------------------------------------------------- /02_monitoring/configs/node-exporter/conf/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | NODE_NAME=$(cat /etc/nodename) 4 | echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom 5 | 6 | set -- /bin/node_exporter "$@" 7 | 8 | exec "$@" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # swarmsible-stacks 2 | Set of default production grade stacks for use with https://github.com/neuroforgede/swarmsible and https://github.com/neuroforgede/swarmsible-hetzner 3 | 4 | This repository is designed to be used in in a setup with an external Load Balancer that handles the routing 5 | to the actual Traefik Ingress via a private Network. 6 | 7 | This repository assumes usage of Hetzner for Cloud Volumes via https://github.com/costela/docker-volume-hetzner. 8 | 9 | # Used Software 10 | 11 | 1. docker-stack-deploy for secret rotation (https://github.com/neuroforgede/docker-stack-deploy) -------------------------------------------------------------------------------- /02_monitoring/configs/prometheus/conf/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | cat /etc/prometheus/prometheus.yml.template > /tmp/prometheus.yml 4 | 5 | #JOBS=mongo-exporter:9111 redis-exporter:9112 6 | 7 | if [ ${JOBS+x} ]; then 8 | 9 | for job in $JOBS 10 | do 11 | echo "adding job $job" 12 | 13 | SERVICE=$(echo "$job" | cut -d":" -f1) 14 | PORT=$(echo "$job" | cut -d":" -f2) 15 | 16 | cat >>/tmp/prometheus.yml < 50 7 | for: 15m 8 | labels: 9 | severity: warning 10 | annotations: 11 | description: Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize 12 | $value}}%. 13 | summary: CPU alert for Swarm node '{{ $labels.node_name }}' 14 | - alert: node_memory_usage 15 | expr: sum(((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) 16 | * ON(instance) GROUP_LEFT(node_name) node_meta * 100) BY (node_name) > 80 17 | for: 1m 18 | labels: 19 | severity: warning 20 | annotations: 21 | description: Swarm node {{ $labels.node_name }} memory usage is at {{ humanize 22 | $value}}%. 23 | summary: Memory alert for Swarm node '{{ $labels.node_name }}' 24 | 25 | - alert: node_disk_usage 26 | expr: ((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}) 27 | * 100 / node_filesystem_size_bytes{mountpoint="/"}) * ON(instance) GROUP_LEFT(node_name) 28 | node_meta > 85 29 | for: 1m 30 | labels: 31 | severity: warning 32 | annotations: 33 | description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize 34 | $value}}%. 35 | summary: Disk alert for Swarm node '{{ $labels.node_name }}' 36 | - alert: node_disk_fill_rate_6h 37 | expr: predict_linear(node_filesystem_free_bytes{mountpoint="/"}[1h], 6 * 3600) * ON(instance) 38 | GROUP_LEFT(node_name) node_meta < 0 39 | for: 1h 40 | labels: 41 | severity: critical 42 | annotations: 43 | description: Swarm node {{ $labels.node_name }} disk is going to fill up in 44 | 6h. 45 | summary: Disk fill alert for Swarm node '{{ $labels.node_name }}' 46 | 47 | - alert: hetzner_cloud_volume_usage 48 | expr: ((node_filesystem_size_bytes{device=~"/dev/disk/by-id/scsi-.*HC_Volume_.*"} - node_filesystem_free_bytes{device=~"/dev/disk/by-id/scsi-.*HC_Volume_.*"}) 49 | * 100 / node_filesystem_size_bytes{device=~"/dev/disk/by-id/scsi-.*HC_Volume_.*"}) * ON(instance) GROUP_LEFT(node_name) 50 | node_meta > 85 51 | for: 1m 52 | labels: 53 | severity: warning 54 | annotations: 55 | description: Hetzner Cloud volume on {{ $labels.device }} attached to Swarm node {{ $labels.node_name }} disk usage is at {{ humanize 56 | $value}}%. 57 | summary: Disk alert for Swarm node '{{ $labels.node_name }}' 58 | 59 | - alert: hetzner_cloud_volume_fill_rate_6h 60 | expr: predict_linear(node_filesystem_free_bytes{device=~"/dev/disk/by-id/scsi-.*HC_Volume_.*"}[1h], 6 * 3600) * ON(instance) 61 | GROUP_LEFT(node_name) node_meta < 0 62 | for: 1h 63 | labels: 64 | severity: critical 65 | annotations: 66 | description: Hetzner Cloud volume on {{ $labels.device }} attached to Swarm node {{ $labels.node_name }} disk is going to fill up in 67 | 6h. 68 | summary: Disk fill alert for Swarm node '{{ $labels.node_name }}' -------------------------------------------------------------------------------- /02_monitoring/configs/prometheus/rules/swarm_task.rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: /1/store/projects/docker-swarm/apps/swarmprom/prometheus/rules/swarm_task.rules.yml 3 | rules: 4 | - alert: task_high_cpu_usage_50 5 | expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m])) 6 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) 7 | * 100 > 50 8 | for: 15m 9 | annotations: 10 | description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ 11 | $labels.container_label_com_docker_swarm_node_id }}'' CPU usage is at {{ humanize 12 | $value}}%.' 13 | summary: CPU alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name 14 | }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' 15 | 16 | - alert: task_high_memory_usage_1g 17 | expr: ( 18 | sum(container_memory_rss{container_label_com_docker_swarm_task_name=~".+"}) 19 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) > 1e+09 20 | AND 21 | ((sum(container_spec_memory_limit_bytes{container_label_com_docker_swarm_task_name=~".+"}) 22 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id)) == 0) 23 | ) 24 | for: 1m 25 | annotations: 26 | description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ 27 | $labels.container_label_com_docker_swarm_node_id }}'' memory usage is {{ humanize 28 | $value}}.' 29 | summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name 30 | }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' 31 | 32 | - alert: task_high_memory_usage_limit 33 | expr: ( 34 | ( 35 | (sum(container_memory_rss{container_label_com_docker_swarm_task_name=~".+"}) 36 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id)) 37 | > 38 | ( 39 | (sum(container_spec_memory_limit_bytes{container_label_com_docker_swarm_task_name=~".+"}) 40 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id)) 41 | * 42 | 0.8 43 | ) 44 | ) 45 | AND 46 | ((sum(container_spec_memory_limit_bytes{container_label_com_docker_swarm_task_name=~".+"}) 47 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id)) > 0) 48 | ) 49 | for: 1m 50 | annotations: 51 | description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ 52 | $labels.container_label_com_docker_swarm_node_id }}'' memory usage is {{ humanize 53 | $value}} which is more than 80% of the summed up limit of all containers.' 54 | summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name 55 | }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' 56 | 57 | - alert: Container (Swarm) Too Many Restarts 58 | expr: count by (instance, container_label_com_docker_swarm_service_name, image) (count_over_time(container_last_seen{container_label_com_docker_swarm_service_name!=""}[15m])) - 1 >= 10 59 | for: 5m 60 | annotations: 61 | summary: "Too many restarts ({{ $value }}) for container \"{{ $labels.container_label_com_docker_swarm_service_name }}\" and docker image \"{{ $labels.image }}\"" 62 | 63 | - alert: Container (Swarm) died/is dying with exit code other than 0 64 | expr: count by (docker_hostname, container_attributes_com_docker_swarm_service_name, container_attributes_exitcode, status) ( 65 | ( 66 | docker_events_container_total{status=~"die|.*oom.*|.*kill.*", container_attributes_exitcode != "0", container_attributes_exitcode != "" } 67 | unless 68 | docker_events_container_total{status=~"die|.*oom.*|.*kill.*", container_attributes_exitcode != "0", container_attributes_exitcode != "" } 69 | offset 10m 70 | ) OR ( 71 | increase(docker_events_container_total{status=~"die|.*oom.*|.*kill.*", container_attributes_exitcode != "0", container_attributes_exitcode != "" }[10m]) > 0 72 | ) 73 | ) 74 | annotations: 75 | summary: "Bad Exit code \"{{ $labels.container_attributes_exitcode }}\" for status \"{{ $labels.status }}\" for service \"{{ $labels.container_attributes_com_docker_swarm_service_name }}\"" 76 | -------------------------------------------------------------------------------- /02_monitoring/monitoring.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | 3 | services: 4 | prometheus: 5 | user: root 6 | entrypoint: "/etc/prometheus/docker-entrypoint.sh" 7 | image: prom/prometheus:v2.41.0 8 | healthcheck: 9 | test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:9090/metrics"] 10 | interval: 10s 11 | timeout: 10s 12 | retries: 2 13 | start_period: 10s 14 | networks: 15 | - net 16 | - traefik-public 17 | command: 18 | - '--config.file=/etc/prometheus/prometheus.yml' 19 | - '--storage.tsdb.path=/prometheus' 20 | - '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}' 21 | - '--web.external-url=https://prometheus-${BASE_DOMAIN}' 22 | volumes: 23 | - prometheus:/prometheus 24 | configs: 25 | - source: node_rules 26 | target: /etc/prometheus/swarm_node.rules.yml 27 | - source: task_rules 28 | target: /etc/prometheus/swarm_task.rules.yml 29 | - source: prometheus_entrypoint 30 | target: /etc/prometheus/docker-entrypoint.sh 31 | mode: 0777 32 | - source: prometheus_yml 33 | target: /etc/prometheus/prometheus.yml.template 34 | deploy: 35 | mode: replicated 36 | replicas: 1 37 | resources: 38 | limits: 39 | memory: 2048M 40 | reservations: 41 | memory: 128M 42 | labels: 43 | - traefik.enable=true 44 | - traefik.docker.network=traefik-public 45 | - traefik.constraint-label=traefik-public 46 | - traefik.http.routers.monitoring-prometheus-http.rule=Host(`prometheus-${BASE_DOMAIN}`) && !PathPrefix(`/metrics`) && !PathPrefix(`/healhtz`) 47 | - traefik.http.routers.monitoring-prometheus-http.entrypoints=http 48 | - traefik.http.services.monitoring-prometheus.loadbalancer.server.port=9090 49 | - traefik.http.middlewares.monitoring-prometheus-auth.basicauth.users=${PROMETHEUS_USERS} 50 | - traefik.http.routers.monitoring-prometheus-http.middlewares=monitoring-prometheus-auth 51 | 52 | cadvisor: 53 | image: gcr.io/cadvisor/cadvisor:v0.47.0 54 | healthcheck: 55 | test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:8080/metrics"] 56 | interval: 10s 57 | timeout: 10s 58 | retries: 2 59 | start_period: 10s 60 | networks: 61 | - net 62 | command: -logtostderr -docker_only 63 | volumes: 64 | - /var/run/docker.sock:/var/run/docker.sock:ro 65 | - /:/rootfs:ro 66 | - /var/run:/var/run 67 | - /sys:/sys:ro 68 | - /var/lib/docker/:/var/lib/docker:ro 69 | deploy: 70 | mode: global 71 | resources: 72 | limits: 73 | memory: 256M 74 | reservations: 75 | memory: 64M 76 | 77 | docker-swarm-trivy-exporter: 78 | image: ghcr.io/neuroforgede/docker-swarm-trivy-exporter:latest 79 | networks: 80 | - net 81 | volumes: 82 | - /var/run/docker.sock:/var/run/docker.sock:ro 83 | environment: 84 | TRIVY_SLOW: "true" 85 | deploy: 86 | placement: 87 | constraints: 88 | - node.role==manager 89 | mode: replicated 90 | replicas: 1 91 | resources: 92 | limits: 93 | memory: 256M 94 | reservations: 95 | memory: 128M 96 | 97 | docker-engine-events-exporter: 98 | image: ghcr.io/neuroforgede/docker-engine-events-exporter:latest 99 | healthcheck: 100 | test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:9000/metrics"] 101 | interval: 10s 102 | timeout: 10s 103 | retries: 2 104 | start_period: 10s 105 | networks: 106 | - net 107 | environment: 108 | - DOCKER_HOSTNAME={{.Node.Hostname}} 109 | volumes: 110 | - /var/run/docker.sock:/var/run/docker.sock:ro 111 | deploy: 112 | mode: global 113 | resources: 114 | limits: 115 | memory: 256M 116 | reservations: 117 | memory: 128M 118 | 119 | docker-engine-networks-exporter: 120 | image: ghcr.io/neuroforgede/docker-engine-networks-exporter:latest 121 | healthcheck: 122 | test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:9000/metrics"] 123 | interval: 10s 124 | timeout: 10s 125 | retries: 2 126 | start_period: 10s 127 | networks: 128 | - net 129 | environment: 130 | - DOCKER_HOSTNAME={{.Node.Hostname}} 131 | volumes: 132 | - /var/run/docker.sock:/var/run/docker.sock:ro 133 | deploy: 134 | mode: global 135 | resources: 136 | limits: 137 | memory: 256M 138 | reservations: 139 | memory: 128M 140 | 141 | node-exporter: 142 | image: prom/node-exporter:latest 143 | entrypoint: /etc/node-exporter/docker-entrypoint.sh 144 | # healthcheck: 145 | # test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:9100/metrics"] 146 | # interval: 10s 147 | # timeout: 10s 148 | # retries: 2 149 | # start_period: 10s 150 | user: root 151 | networks: 152 | - net 153 | environment: 154 | - NODE_ID={{.Node.ID}} 155 | volumes: 156 | - /proc:/host/proc:ro 157 | - /sys:/host/sys:ro 158 | - /:/rootfs:ro 159 | - /etc/hostname:/etc/nodename 160 | command: 161 | - '--path.sysfs=/host/sys' 162 | - '--path.procfs=/host/proc' 163 | - '--path.rootfs=/rootfs' 164 | - '--collector.textfile.directory=/etc/node-exporter/' 165 | - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' 166 | - '--no-collector.ipvs' 167 | configs: 168 | - source: nodeexporter_entrypoint 169 | target: /etc/node-exporter/docker-entrypoint.sh 170 | mode: 0777 171 | deploy: 172 | mode: global 173 | resources: 174 | limits: 175 | memory: 256M 176 | reservations: 177 | memory: 64M 178 | 179 | grafana: 180 | entrypoint: /custom_entrypoint.sh 181 | image: grafana/grafana:9.3.6 182 | healthcheck: 183 | test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:3000/api/health"] 184 | interval: 10s 185 | timeout: 10s 186 | retries: 2 187 | start_period: 10s 188 | user: root 189 | networks: 190 | - net 191 | - traefik-public 192 | environment: 193 | - GF_USERS_ALLOW_SIGN_UP=false 194 | volumes: 195 | - grafana:/var/lib/grafana 196 | secrets: 197 | - source: grafana_environment_sh 198 | target: grafana_environment_sh 199 | configs: 200 | - source: grafana_traefik_dash 201 | target: /etc/grafana/dashboards/traefik-dash.json 202 | - source: grafana_nodes_dash 203 | target: /etc/grafana/dashboards/nodes-dash.json 204 | - source: grafana_prometheus_dash 205 | target: /etc/grafana/dashboards/prometheus-dash.json 206 | - source: grafana_services_dash 207 | target: /etc/grafana/dashboards/services-dash.json 208 | 209 | - source: grafana_dashboards 210 | target: /etc/grafana/provisioning/dashboards/dashboards.yaml 211 | 212 | - source: grafana_datasource_prometheus 213 | target: /etc/grafana/provisioning/datasources/prometheus.yaml 214 | 215 | - source: grafana_datasource_entrypoint 216 | target: /custom_entrypoint.sh 217 | mode: 0777 218 | deploy: 219 | mode: replicated 220 | replicas: 1 221 | resources: 222 | limits: 223 | memory: 1024M 224 | reservations: 225 | memory: 128M 226 | labels: 227 | - traefik.enable=true 228 | - traefik.docker.network=traefik-public 229 | - traefik.constraint-label=traefik-public 230 | - traefik.http.routers.monitoring-grafana-http.rule=Host(`grafana-${BASE_DOMAIN}`) && !PathPrefix(`/api/health`) && !PathPrefix(`/metrics`) && !PathPrefix(`/healhtz`) 231 | - traefik.http.routers.monitoring-grafana-http.entrypoints=http 232 | - traefik.http.services.monitoring-grafana.loadbalancer.server.port=3000 233 | 234 | alertmanager: 235 | image: prom/alertmanager:v0.25.0 236 | healthcheck: 237 | test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:9093/metrics"] 238 | interval: 10s 239 | timeout: 10s 240 | retries: 2 241 | start_period: 10s 242 | networks: 243 | - net 244 | - traefik-public 245 | command: 246 | - '--config.file=/etc/alertmanager/alertmanager.yml' 247 | - '--storage.path=/alertmanager' 248 | - '--web.external-url=https://alertmanager-${BASE_DOMAIN}' 249 | - '--data.retention=${ALERTMANAGER_RETENTION:-168h}' 250 | volumes: 251 | - alertmanager:/alertmanager 252 | configs: 253 | - source: alertmanager_yml 254 | target: /etc/alertmanager/alertmanager.yml 255 | deploy: 256 | mode: replicated 257 | replicas: 1 258 | resources: 259 | limits: 260 | memory: 256M 261 | reservations: 262 | memory: 64M 263 | labels: 264 | - traefik.enable=true 265 | - traefik.docker.network=traefik-public 266 | - traefik.constraint-label=traefik-public 267 | - traefik.http.routers.monitoring-alertmanager-http.rule=Host(`alertmanager-${BASE_DOMAIN}`) && !PathPrefix(`/metrics`) && !PathPrefix(`/healhtz`) 268 | - traefik.http.routers.monitoring-alertmanager-http.entrypoints=http 269 | - traefik.http.services.monitoring-alertmanager.loadbalancer.server.port=9093 270 | - traefik.http.middlewares.monitoring-alertmanager-auth.basicauth.users=${ALERTMANAGER_USERS} 271 | - traefik.http.routers.monitoring-alertmanager-http.middlewares=monitoring-alertmanager-auth 272 | 273 | prom2teams: 274 | image: idealista/prom2teams:3.3.0 275 | entrypoint: /bin/sh -c "python /opt/prom2teams/replace_config.py && exec prom2teams --loglevel INFO" 276 | healthcheck: 277 | test: ["CMD", "wget", "--tries=1", "--spider", "--quiet", "http://localhost:8089/metrics"] 278 | interval: 10s 279 | timeout: 10s 280 | retries: 2 281 | start_period: 10s 282 | networks: 283 | - net 284 | environment: 285 | PROM2TEAMS_CONNECTOR: ${PROM2TEAMS_CONNECTOR} 286 | deploy: 287 | mode: replicated 288 | replicas: 1 289 | resources: 290 | limits: 291 | memory: 256M 292 | reservations: 293 | memory: 64M 294 | 295 | configs: 296 | node_rules: 297 | file: ./configs/prometheus/rules/swarm_node.rules.yml 298 | task_rules: 299 | file: ./configs/prometheus/rules/swarm_task.rules.yml 300 | prometheus_entrypoint: 301 | file: ./configs/prometheus/conf/docker-entrypoint.sh 302 | prometheus_yml: 303 | file: ./configs/prometheus/conf/prometheus.yml 304 | 305 | alertmanager_yml: 306 | file: ./configs/alertmanager/alertmanager.yml 307 | 308 | nodeexporter_entrypoint: 309 | file: ./configs/node-exporter/conf/docker-entrypoint.sh 310 | 311 | grafana_traefik_dash: 312 | file: ./configs/grafana/conf/dashboards/traefik-dash.json 313 | grafana_nodes_dash: 314 | file: ./configs/grafana/conf/dashboards/nodes-dash.json 315 | grafana_prometheus_dash: 316 | file: ./configs/grafana/conf/dashboards/prometheus-dash.json 317 | grafana_services_dash: 318 | file: ./configs/grafana/conf/dashboards/services-dash.json 319 | grafana_datasource_prometheus: 320 | file: ./configs/grafana/conf/datasources/prometheus.yaml 321 | grafana_dashboards: 322 | file: ./configs/grafana/conf/dashboards.yaml 323 | grafana_datasource_entrypoint: 324 | file: ./configs/grafana/conf/docker-entrypoint.sh 325 | 326 | secrets: 327 | grafana_environment_sh: 328 | file: ./secrets/grafana/environment.sh 329 | 330 | networks: 331 | net: 332 | driver: overlay 333 | attachable: true 334 | driver_opts: 335 | encrypted: "" 336 | com.docker.network.driver.mtu: "1350" 337 | traefik-public: 338 | name: "traefik-public" 339 | external: true 340 | 341 | volumes: 342 | prometheus: 343 | driver: hetzner-volume 344 | driver_opts: 345 | size: '25' 346 | fstype: ext4 347 | grafana: 348 | driver: hetzner-volume 349 | driver_opts: 350 | size: '25' 351 | fstype: ext4 352 | alertmanager: 353 | driver: hetzner-volume 354 | driver_opts: 355 | size: '10' 356 | fstype: ext4 357 | -------------------------------------------------------------------------------- /02_monitoring/configs/grafana/conf/dashboards/traefik-dash.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "datasource", 8 | "uid": "grafana" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "description": "Traefik dashboard prometheus", 25 | "editable": true, 26 | "fiscalYearStartMonth": 0, 27 | "gnetId": 4475, 28 | "graphTooltip": 0, 29 | "links": [], 30 | "liveNow": false, 31 | "panels": [ 32 | { 33 | "datasource": { 34 | "type": "prometheus", 35 | "uid": "PBFA97CFB590B2093" 36 | }, 37 | "gridPos": { 38 | "h": 1, 39 | "w": 24, 40 | "x": 0, 41 | "y": 0 42 | }, 43 | "id": 10, 44 | "targets": [ 45 | { 46 | "datasource": { 47 | "type": "prometheus", 48 | "uid": "PBFA97CFB590B2093" 49 | }, 50 | "refId": "A" 51 | } 52 | ], 53 | "title": "$service stats", 54 | "type": "row" 55 | }, 56 | { 57 | "datasource": { 58 | "type": "prometheus", 59 | "uid": "PBFA97CFB590B2093" 60 | }, 61 | "fieldConfig": { 62 | "defaults": { 63 | "color": { 64 | "mode": "thresholds" 65 | }, 66 | "mappings": [ 67 | { 68 | "options": { 69 | "1": { 70 | "text": "OK" 71 | } 72 | }, 73 | "type": "value" 74 | } 75 | ], 76 | "thresholds": { 77 | "mode": "absolute", 78 | "steps": [ 79 | { 80 | "color": "#d44a3a", 81 | "value": null 82 | }, 83 | { 84 | "color": "rgba(237, 129, 40, 0.89)", 85 | "value": 0 86 | }, 87 | { 88 | "color": "#299c46", 89 | "value": 1 90 | } 91 | ] 92 | }, 93 | "unit": "none" 94 | }, 95 | "overrides": [] 96 | }, 97 | "gridPos": { 98 | "h": 11, 99 | "w": 8, 100 | "x": 0, 101 | "y": 1 102 | }, 103 | "id": 1, 104 | "links": [], 105 | "maxDataPoints": 100, 106 | "options": { 107 | "colorMode": "value", 108 | "graphMode": "none", 109 | "justifyMode": "auto", 110 | "orientation": "horizontal", 111 | "reduceOptions": { 112 | "calcs": [ 113 | "lastNotNull" 114 | ], 115 | "fields": "", 116 | "values": false 117 | }, 118 | "textMode": "auto" 119 | }, 120 | "pluginVersion": "9.0.7", 121 | "targets": [ 122 | { 123 | "datasource": { 124 | "type": "prometheus", 125 | "uid": "PBFA97CFB590B2093" 126 | }, 127 | "expr": "count(traefik_service_open_connections)", 128 | "format": "time_series", 129 | "intervalFactor": 2, 130 | "refId": "A" 131 | } 132 | ], 133 | "title": "Number of Traefik Services ", 134 | "type": "stat" 135 | }, 136 | { 137 | "datasource": { 138 | "type": "prometheus", 139 | "uid": "PBFA97CFB590B2093" 140 | }, 141 | "fieldConfig": { 142 | "defaults": { 143 | "color": { 144 | "mode": "thresholds" 145 | }, 146 | "mappings": [ 147 | { 148 | "id": 0, 149 | "op": "=", 150 | "text": "N/A", 151 | "type": 1, 152 | "value": "null" 153 | } 154 | ], 155 | "max": 5, 156 | "min": 0, 157 | "thresholds": { 158 | "mode": "absolute", 159 | "steps": [ 160 | { 161 | "color": "green", 162 | "value": null 163 | }, 164 | { 165 | "color": "red", 166 | "value": 3 167 | } 168 | ] 169 | }, 170 | "unit": "none" 171 | }, 172 | "overrides": [] 173 | }, 174 | "gridPos": { 175 | "h": 11, 176 | "w": 8, 177 | "x": 8, 178 | "y": 1 179 | }, 180 | "id": 2, 181 | "links": [], 182 | "options": { 183 | "orientation": "horizontal", 184 | "reduceOptions": { 185 | "calcs": [ 186 | "mean" 187 | ], 188 | "fields": "", 189 | "values": false 190 | }, 191 | "showThresholdLabels": false, 192 | "showThresholdMarkers": true 193 | }, 194 | "pluginVersion": "9.0.7", 195 | "targets": [ 196 | { 197 | "datasource": { 198 | "type": "prometheus", 199 | "uid": "PBFA97CFB590B2093" 200 | }, 201 | "expr": "traefik_service_request_duration_seconds_sum{service=\"$service\"}", 202 | "format": "time_series", 203 | "intervalFactor": 2, 204 | "legendFormat": "{{$service}} | {{method}}", 205 | "refId": "A" 206 | } 207 | ], 208 | "title": "Average response time", 209 | "type": "gauge" 210 | }, 211 | { 212 | "aliasColors": {}, 213 | "breakPoint": "50%", 214 | "combine": { 215 | "label": "Others", 216 | "threshold": 0 217 | }, 218 | "datasource": { 219 | "type": "prometheus", 220 | "uid": "PBFA97CFB590B2093" 221 | }, 222 | "fontSize": "80%", 223 | "format": "short", 224 | "gridPos": { 225 | "h": 11, 226 | "w": 8, 227 | "x": 16, 228 | "y": 1 229 | }, 230 | "id": 4, 231 | "legend": { 232 | "show": true, 233 | "values": true 234 | }, 235 | "legendType": "Under graph", 236 | "links": [], 237 | "maxDataPoints": 3, 238 | "nullPointMode": "connected", 239 | "pieType": "pie", 240 | "pluginVersion": "6.5.0", 241 | "strokeWidth": 1, 242 | "targets": [ 243 | { 244 | "datasource": { 245 | "type": "prometheus", 246 | "uid": "PBFA97CFB590B2093" 247 | }, 248 | "expr": "avg_over_time(traefik_service_request_duration_seconds_sum{service=\"$service\"}[5m])", 249 | "format": "time_series", 250 | "intervalFactor": 2, 251 | "legendFormat": "{{service}}", 252 | "refId": "A" 253 | } 254 | ], 255 | "title": "Average Service response time", 256 | "type": "grafana-piechart-panel", 257 | "valueName": "current" 258 | }, 259 | { 260 | "datasource": { 261 | "type": "prometheus", 262 | "uid": "PBFA97CFB590B2093" 263 | }, 264 | "fieldConfig": { 265 | "defaults": { 266 | "color": { 267 | "mode": "thresholds" 268 | }, 269 | "mappings": [], 270 | "max": 100, 271 | "min": 0, 272 | "thresholds": { 273 | "mode": "absolute", 274 | "steps": [ 275 | { 276 | "color": "green", 277 | "value": null 278 | } 279 | ] 280 | }, 281 | "unit": "short" 282 | }, 283 | "overrides": [] 284 | }, 285 | "gridPos": { 286 | "h": 7, 287 | "w": 24, 288 | "x": 0, 289 | "y": 12 290 | }, 291 | "id": 3, 292 | "links": [], 293 | "options": { 294 | "displayMode": "basic", 295 | "minVizHeight": 10, 296 | "minVizWidth": 0, 297 | "orientation": "horizontal", 298 | "reduceOptions": { 299 | "calcs": [ 300 | "range" 301 | ], 302 | "fields": "", 303 | "values": false 304 | }, 305 | "showUnfilled": true 306 | }, 307 | "pluginVersion": "9.0.7", 308 | "targets": [ 309 | { 310 | "datasource": { 311 | "type": "prometheus", 312 | "uid": "PBFA97CFB590B2093" 313 | }, 314 | "expr": "avg(traefik_service_requests_total{service=\"$service\"})", 315 | "format": "time_series", 316 | "intervalFactor": 2, 317 | "legendFormat": "{{$service}} | {{method}}", 318 | "refId": "A" 319 | } 320 | ], 321 | "title": "Total requests over 5min $service", 322 | "type": "bargauge" 323 | }, 324 | { 325 | "collapsed": false, 326 | "datasource": { 327 | "type": "prometheus", 328 | "uid": "PBFA97CFB590B2093" 329 | }, 330 | "gridPos": { 331 | "h": 1, 332 | "w": 24, 333 | "x": 0, 334 | "y": 19 335 | }, 336 | "id": 12, 337 | "panels": [], 338 | "targets": [ 339 | { 340 | "datasource": { 341 | "type": "prometheus", 342 | "uid": "PBFA97CFB590B2093" 343 | }, 344 | "refId": "A" 345 | } 346 | ], 347 | "title": "Global stats", 348 | "type": "row" 349 | }, 350 | { 351 | "aliasColors": {}, 352 | "bars": true, 353 | "dashLength": 10, 354 | "dashes": false, 355 | "datasource": { 356 | "type": "prometheus", 357 | "uid": "PBFA97CFB590B2093" 358 | }, 359 | "fieldConfig": { 360 | "defaults": { 361 | "links": [] 362 | }, 363 | "overrides": [] 364 | }, 365 | "fill": 1, 366 | "fillGradient": 0, 367 | "gridPos": { 368 | "h": 7, 369 | "w": 12, 370 | "x": 0, 371 | "y": 20 372 | }, 373 | "hiddenSeries": false, 374 | "id": 5, 375 | "legend": { 376 | "alignAsTable": true, 377 | "avg": false, 378 | "current": true, 379 | "max": true, 380 | "min": true, 381 | "rightSide": true, 382 | "show": true, 383 | "total": false, 384 | "values": true 385 | }, 386 | "lines": false, 387 | "linewidth": 1, 388 | "links": [], 389 | "nullPointMode": "null", 390 | "options": { 391 | "alertThreshold": true 392 | }, 393 | "percentage": false, 394 | "pluginVersion": "9.0.7", 395 | "pointradius": 5, 396 | "points": false, 397 | "renderer": "flot", 398 | "seriesOverrides": [], 399 | "spaceLength": 10, 400 | "stack": true, 401 | "steppedLine": false, 402 | "targets": [ 403 | { 404 | "datasource": { 405 | "type": "prometheus", 406 | "uid": "PBFA97CFB590B2093" 407 | }, 408 | "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code=\"200\"}[5m])", 409 | "format": "time_series", 410 | "intervalFactor": 2, 411 | "legendFormat": "{{method}} : {{code}}", 412 | "refId": "A" 413 | } 414 | ], 415 | "thresholds": [], 416 | "timeRegions": [], 417 | "title": "Status code 200 over 5min", 418 | "tooltip": { 419 | "shared": true, 420 | "sort": 0, 421 | "value_type": "individual" 422 | }, 423 | "type": "graph", 424 | "xaxis": { 425 | "mode": "time", 426 | "show": true, 427 | "values": [] 428 | }, 429 | "yaxes": [ 430 | { 431 | "format": "short", 432 | "logBase": 1, 433 | "show": true 434 | }, 435 | { 436 | "format": "short", 437 | "label": "", 438 | "logBase": 1, 439 | "show": true 440 | } 441 | ], 442 | "yaxis": { 443 | "align": false 444 | } 445 | }, 446 | { 447 | "datasource": { 448 | "type": "prometheus", 449 | "uid": "PBFA97CFB590B2093" 450 | }, 451 | "fieldConfig": { 452 | "defaults": { 453 | "color": { 454 | "mode": "thresholds" 455 | }, 456 | "decimals": 0, 457 | "mappings": [], 458 | "max": 100, 459 | "min": 0, 460 | "thresholds": { 461 | "mode": "absolute", 462 | "steps": [ 463 | { 464 | "color": "green", 465 | "value": null 466 | } 467 | ] 468 | }, 469 | "unit": "short" 470 | }, 471 | "overrides": [] 472 | }, 473 | "gridPos": { 474 | "h": 7, 475 | "w": 12, 476 | "x": 12, 477 | "y": 20 478 | }, 479 | "id": 6, 480 | "links": [], 481 | "options": { 482 | "displayMode": "basic", 483 | "minVizHeight": 10, 484 | "minVizWidth": 0, 485 | "orientation": "horizontal", 486 | "reduceOptions": { 487 | "calcs": [ 488 | "mean" 489 | ], 490 | "fields": "", 491 | "limit": 10, 492 | "values": false 493 | }, 494 | "showUnfilled": true 495 | }, 496 | "pluginVersion": "9.0.7", 497 | "targets": [ 498 | { 499 | "datasource": { 500 | "type": "prometheus", 501 | "uid": "PBFA97CFB590B2093" 502 | }, 503 | "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code!=\"200\"}[5m]) * 100", 504 | "format": "time_series", 505 | "instant": false, 506 | "intervalFactor": 2, 507 | "legendFormat": "{{ method }} : {{code}}", 508 | "refId": "A" 509 | }, 510 | { 511 | "datasource": { 512 | "type": "prometheus", 513 | "uid": "PBFA97CFB590B2093" 514 | }, 515 | "expr": "avg_over_time", 516 | "refId": "B" 517 | } 518 | ], 519 | "title": "Entrypoint Error Codes", 520 | "type": "bargauge" 521 | }, 522 | { 523 | "aliasColors": {}, 524 | "breakPoint": "50%", 525 | "combine": { 526 | "label": "Others", 527 | "threshold": 0 528 | }, 529 | "datasource": { 530 | "type": "prometheus", 531 | "uid": "PBFA97CFB590B2093" 532 | }, 533 | "fontSize": "80%", 534 | "format": "short", 535 | "gridPos": { 536 | "h": 7, 537 | "w": 12, 538 | "x": 0, 539 | "y": 27 540 | }, 541 | "id": 7, 542 | "legend": { 543 | "show": true, 544 | "values": true 545 | }, 546 | "legendType": "Right side", 547 | "links": [], 548 | "maxDataPoints": 3, 549 | "nullPointMode": "connected", 550 | "pieType": "pie", 551 | "strokeWidth": 1, 552 | "targets": [ 553 | { 554 | "datasource": { 555 | "type": "prometheus", 556 | "uid": "PBFA97CFB590B2093" 557 | }, 558 | "expr": "sum(rate(traefik_service_requests_total[5m])) by (service) ", 559 | "format": "time_series", 560 | "interval": "", 561 | "intervalFactor": 2, 562 | "legendFormat": "{{ service }}", 563 | "refId": "A" 564 | } 565 | ], 566 | "title": "Requests by service", 567 | "type": "grafana-piechart-panel", 568 | "valueName": "total" 569 | }, 570 | { 571 | "aliasColors": {}, 572 | "breakPoint": "50%", 573 | "combine": { 574 | "label": "Others", 575 | "threshold": 0 576 | }, 577 | "datasource": { 578 | "type": "prometheus", 579 | "uid": "PBFA97CFB590B2093" 580 | }, 581 | "fontSize": "80%", 582 | "format": "short", 583 | "gridPos": { 584 | "h": 7, 585 | "w": 12, 586 | "x": 12, 587 | "y": 27 588 | }, 589 | "id": 8, 590 | "legend": { 591 | "show": true, 592 | "values": true 593 | }, 594 | "legendType": "Right side", 595 | "links": [], 596 | "maxDataPoints": 3, 597 | "nullPointMode": "connected", 598 | "pieType": "pie", 599 | "strokeWidth": 1, 600 | "targets": [ 601 | { 602 | "datasource": { 603 | "type": "prometheus", 604 | "uid": "PBFA97CFB590B2093" 605 | }, 606 | "expr": "sum(rate(traefik_entrypoint_requests_total{entrypoint =~ \"$entrypoint\"}[5m])) by (entrypoint) ", 607 | "format": "time_series", 608 | "interval": "", 609 | "intervalFactor": 2, 610 | "legendFormat": "{{ entrypoint }}", 611 | "refId": "A" 612 | } 613 | ], 614 | "title": "Requests by protocol", 615 | "type": "grafana-piechart-panel", 616 | "valueName": "total" 617 | } 618 | ], 619 | "schemaVersion": 36, 620 | "style": "dark", 621 | "tags": [ 622 | "traefik", 623 | "prometheus" 624 | ], 625 | "templating": { 626 | "list": [ 627 | { 628 | "current": { 629 | "selected": true, 630 | "text": "website-frontend-prod@docker", 631 | "value": "website-frontend-prod@docker" 632 | }, 633 | "datasource": { 634 | "type": "prometheus", 635 | "uid": "PBFA97CFB590B2093" 636 | }, 637 | "definition": "label_values(service)", 638 | "hide": 0, 639 | "includeAll": true, 640 | "multi": false, 641 | "name": "service", 642 | "options": [], 643 | "query": { 644 | "query": "label_values(service)", 645 | "refId": "Prometheus-service-Variable-Query" 646 | }, 647 | "refresh": 1, 648 | "regex": "", 649 | "skipUrlSync": false, 650 | "sort": 0, 651 | "tagValuesQuery": "", 652 | "tagsQuery": "", 653 | "type": "query", 654 | "useTags": false 655 | }, 656 | { 657 | "current": { 658 | "selected": true, 659 | "text": [ 660 | "http" 661 | ], 662 | "value": [ 663 | "http" 664 | ] 665 | }, 666 | "datasource": { 667 | "type": "prometheus", 668 | "uid": "PBFA97CFB590B2093" 669 | }, 670 | "definition": "label_values(entrypoint)", 671 | "hide": 0, 672 | "includeAll": true, 673 | "multi": true, 674 | "name": "entrypoint", 675 | "options": [], 676 | "query": { 677 | "query": "label_values(entrypoint)", 678 | "refId": "Prometheus-entrypoint-Variable-Query" 679 | }, 680 | "refresh": 1, 681 | "regex": "", 682 | "skipUrlSync": false, 683 | "sort": 0, 684 | "tagValuesQuery": "", 685 | "tagsQuery": "", 686 | "type": "query", 687 | "useTags": false 688 | }, 689 | { 690 | "current": { 691 | "selected": false, 692 | "text": "200", 693 | "value": "200" 694 | }, 695 | "datasource": { 696 | "type": "prometheus", 697 | "uid": "PBFA97CFB590B2093" 698 | }, 699 | "definition": "label_values(code)", 700 | "hide": 2, 701 | "includeAll": false, 702 | "multi": false, 703 | "name": "code", 704 | "options": [], 705 | "query": { 706 | "query": "label_values(code)", 707 | "refId": "Prometheus-code-Variable-Query" 708 | }, 709 | "refresh": 1, 710 | "regex": "", 711 | "skipUrlSync": false, 712 | "sort": 0, 713 | "tagValuesQuery": "", 714 | "tagsQuery": "", 715 | "type": "query", 716 | "useTags": false 717 | } 718 | ] 719 | }, 720 | "time": { 721 | "from": "now-5m", 722 | "to": "now" 723 | }, 724 | "timepicker": { 725 | "refresh_intervals": [ 726 | "5s", 727 | "10s", 728 | "30s", 729 | "1m", 730 | "5m", 731 | "15m", 732 | "30m", 733 | "1h", 734 | "2h", 735 | "1d" 736 | ], 737 | "time_options": [ 738 | "5m", 739 | "15m", 740 | "1h", 741 | "6h", 742 | "12h", 743 | "24h", 744 | "2d", 745 | "7d", 746 | "30d" 747 | ] 748 | }, 749 | "timezone": "", 750 | "title": "Traefik2", 751 | "uid": "qPdAviJmz1", 752 | "version": 1, 753 | "weekStart": "" 754 | } -------------------------------------------------------------------------------- /02_monitoring/configs/grafana/conf/dashboards/prometheus-dash.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "$$hashKey": "object:698", 6 | "builtIn": 1, 7 | "datasource": "-- Grafana --", 8 | "enable": true, 9 | "hide": true, 10 | "iconColor": "rgba(0, 211, 255, 1)", 11 | "name": "Annotations & Alerts", 12 | "type": "dashboard" 13 | } 14 | ] 15 | }, 16 | "editable": true, 17 | "gnetId": null, 18 | "graphTooltip": 1, 19 | "links": [ 20 | { 21 | "icon": "info", 22 | "tags": [], 23 | "targetBlank": true, 24 | "title": "Grafana Docs", 25 | "tooltip": "", 26 | "type": "link", 27 | "url": "http://docs.grafana.org/" 28 | }, 29 | { 30 | "icon": "info", 31 | "tags": [], 32 | "targetBlank": true, 33 | "title": "Prometheus Docs", 34 | "type": "link", 35 | "url": "http://prometheus.io/docs/introduction/overview/" 36 | } 37 | ], 38 | "panels": [ 39 | { 40 | "aliasColors": { 41 | "prometheus": "#C15C17", 42 | "{instance=\"localhost:9090\",job=\"prometheus\"}": "#CCA300" 43 | }, 44 | "bars": false, 45 | "dashLength": 10, 46 | "dashes": false, 47 | "datasource": "Prometheus", 48 | "editable": true, 49 | "error": false, 50 | "fill": 0, 51 | "grid": {}, 52 | "gridPos": { 53 | "h": 5, 54 | "w": 6, 55 | "x": 0, 56 | "y": 0 57 | }, 58 | "id": 3, 59 | "legend": { 60 | "avg": false, 61 | "current": false, 62 | "max": false, 63 | "min": false, 64 | "show": true, 65 | "total": false, 66 | "values": false 67 | }, 68 | "lines": true, 69 | "linewidth": 1, 70 | "links": [], 71 | "nullPointMode": "connected", 72 | "percentage": false, 73 | "pointradius": 2, 74 | "points": false, 75 | "renderer": "flot", 76 | "seriesOverrides": [], 77 | "spaceLength": 10, 78 | "stack": false, 79 | "steppedLine": false, 80 | "targets": [ 81 | { 82 | "expr": "sum(irate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus\"}[5m]))", 83 | "format": "time_series", 84 | "hide": false, 85 | "interval": "", 86 | "intervalFactor": 2, 87 | "legendFormat": "samples", 88 | "metric": "", 89 | "refId": "A", 90 | "step": 20 91 | } 92 | ], 93 | "thresholds": [], 94 | "timeFrom": null, 95 | "timeShift": null, 96 | "title": "Samples Appended", 97 | "tooltip": { 98 | "shared": true, 99 | "sort": 0, 100 | "value_type": "cumulative" 101 | }, 102 | "type": "graph", 103 | "xaxis": { 104 | "buckets": null, 105 | "mode": "time", 106 | "name": null, 107 | "show": true, 108 | "values": [] 109 | }, 110 | "yaxes": [ 111 | { 112 | "format": "short", 113 | "logBase": 1, 114 | "max": null, 115 | "min": "0", 116 | "show": true 117 | }, 118 | { 119 | "format": "short", 120 | "logBase": 1, 121 | "max": null, 122 | "min": null, 123 | "show": true 124 | } 125 | ] 126 | }, 127 | { 128 | "aliasColors": {}, 129 | "bars": false, 130 | "dashLength": 10, 131 | "dashes": false, 132 | "datasource": "Prometheus", 133 | "editable": true, 134 | "error": false, 135 | "fill": 0, 136 | "grid": {}, 137 | "gridPos": { 138 | "h": 5, 139 | "w": 6, 140 | "x": 6, 141 | "y": 0 142 | }, 143 | "id": 14, 144 | "legend": { 145 | "avg": false, 146 | "current": false, 147 | "max": false, 148 | "min": false, 149 | "show": true, 150 | "total": false, 151 | "values": false 152 | }, 153 | "lines": true, 154 | "linewidth": 1, 155 | "links": [], 156 | "nullPointMode": "connected", 157 | "percentage": false, 158 | "pointradius": 5, 159 | "points": false, 160 | "renderer": "flot", 161 | "seriesOverrides": [], 162 | "spaceLength": 10, 163 | "stack": false, 164 | "steppedLine": false, 165 | "targets": [ 166 | { 167 | "expr": "topk(5, max(scrape_duration_seconds) by (job))", 168 | "format": "time_series", 169 | "interval": "", 170 | "intervalFactor": 2, 171 | "legendFormat": "{{job}}", 172 | "metric": "", 173 | "refId": "A", 174 | "step": 20 175 | } 176 | ], 177 | "thresholds": [], 178 | "timeFrom": null, 179 | "timeShift": null, 180 | "title": "Scrape Duration", 181 | "tooltip": { 182 | "shared": true, 183 | "sort": 0, 184 | "value_type": "cumulative" 185 | }, 186 | "type": "graph", 187 | "xaxis": { 188 | "buckets": null, 189 | "mode": "time", 190 | "name": null, 191 | "show": true, 192 | "values": [] 193 | }, 194 | "yaxes": [ 195 | { 196 | "format": "s", 197 | "logBase": 1, 198 | "max": null, 199 | "min": null, 200 | "show": true 201 | }, 202 | { 203 | "format": "short", 204 | "logBase": 1, 205 | "max": null, 206 | "min": null, 207 | "show": true 208 | } 209 | ] 210 | }, 211 | { 212 | "aliasColors": {}, 213 | "bars": false, 214 | "dashLength": 10, 215 | "dashes": false, 216 | "datasource": "Prometheus", 217 | "description": "", 218 | "fill": 0, 219 | "gridPos": { 220 | "h": 5, 221 | "w": 6, 222 | "x": 12, 223 | "y": 0 224 | }, 225 | "id": 16, 226 | "legend": { 227 | "avg": false, 228 | "current": false, 229 | "max": false, 230 | "min": false, 231 | "show": true, 232 | "total": false, 233 | "values": false 234 | }, 235 | "lines": true, 236 | "linewidth": 1, 237 | "links": [], 238 | "nullPointMode": "null", 239 | "percentage": false, 240 | "pointradius": 5, 241 | "points": false, 242 | "renderer": "flot", 243 | "seriesOverrides": [], 244 | "spaceLength": 10, 245 | "stack": false, 246 | "steppedLine": false, 247 | "targets": [ 248 | { 249 | "expr": "sum(process_resident_memory_bytes{job=\"prometheus\"})", 250 | "format": "time_series", 251 | "hide": false, 252 | "interval": "", 253 | "intervalFactor": 2, 254 | "legendFormat": "p8s process resident memory", 255 | "refId": "D", 256 | "step": 20 257 | }, 258 | { 259 | "expr": "process_virtual_memory_bytes{job=\"prometheus\"}", 260 | "format": "time_series", 261 | "hide": false, 262 | "intervalFactor": 2, 263 | "legendFormat": "virtual memory", 264 | "refId": "C", 265 | "step": 20 266 | } 267 | ], 268 | "thresholds": [], 269 | "timeFrom": null, 270 | "timeShift": null, 271 | "title": "Memory Profile", 272 | "tooltip": { 273 | "shared": true, 274 | "sort": 2, 275 | "value_type": "individual" 276 | }, 277 | "transparent": false, 278 | "type": "graph", 279 | "xaxis": { 280 | "buckets": null, 281 | "mode": "time", 282 | "name": null, 283 | "show": true, 284 | "values": [] 285 | }, 286 | "yaxes": [ 287 | { 288 | "format": "bytes", 289 | "label": "", 290 | "logBase": 1, 291 | "max": null, 292 | "min": "0", 293 | "show": true 294 | }, 295 | { 296 | "format": "short", 297 | "label": null, 298 | "logBase": 1, 299 | "max": null, 300 | "min": null, 301 | "show": true 302 | } 303 | ] 304 | }, 305 | { 306 | "cacheTimeout": null, 307 | "colorBackground": false, 308 | "colorValue": true, 309 | "colors": [ 310 | "rgba(50, 172, 45, 0.97)", 311 | "rgba(237, 129, 40, 0.89)", 312 | "rgba(245, 54, 54, 0.9)" 313 | ], 314 | "datasource": "Prometheus", 315 | "format": "none", 316 | "gauge": { 317 | "maxValue": 100, 318 | "minValue": 0, 319 | "show": false, 320 | "thresholdLabels": false, 321 | "thresholdMarkers": true 322 | }, 323 | "gridPos": { 324 | "h": 5, 325 | "w": 6, 326 | "x": 18, 327 | "y": 0 328 | }, 329 | "id": 37, 330 | "interval": null, 331 | "links": [], 332 | "mappingType": 1, 333 | "mappingTypes": [ 334 | { 335 | "name": "value to text", 336 | "value": 1 337 | }, 338 | { 339 | "name": "range to text", 340 | "value": 2 341 | } 342 | ], 343 | "maxDataPoints": 100, 344 | "nullPointMode": "connected", 345 | "nullText": null, 346 | "postfix": "", 347 | "postfixFontSize": "50%", 348 | "prefix": "", 349 | "prefixFontSize": "50%", 350 | "rangeMaps": [ 351 | { 352 | "from": "null", 353 | "text": "N/A", 354 | "to": "null" 355 | } 356 | ], 357 | "sparkline": { 358 | "fillColor": "rgba(31, 118, 189, 0.18)", 359 | "full": false, 360 | "lineColor": "rgb(31, 120, 193)", 361 | "show": false 362 | }, 363 | "tableColumn": "", 364 | "targets": [ 365 | { 366 | "expr": "prometheus_tsdb_wal_corruptions_total{job=\"prometheus\"}", 367 | "format": "time_series", 368 | "intervalFactor": 2, 369 | "legendFormat": "", 370 | "refId": "A", 371 | "step": 60 372 | } 373 | ], 374 | "thresholds": "0.1,1", 375 | "title": "WAL Corruptions", 376 | "type": "singlestat", 377 | "valueFontSize": "200%", 378 | "valueMaps": [ 379 | { 380 | "op": "=", 381 | "text": "None", 382 | "value": "0" 383 | } 384 | ], 385 | "valueName": "max" 386 | }, 387 | { 388 | "aliasColors": {}, 389 | "bars": false, 390 | "dashLength": 10, 391 | "dashes": false, 392 | "datasource": "Prometheus", 393 | "fill": 0, 394 | "gridPos": { 395 | "h": 5, 396 | "w": 6, 397 | "x": 0, 398 | "y": 5 399 | }, 400 | "id": 29, 401 | "legend": { 402 | "avg": false, 403 | "current": false, 404 | "max": false, 405 | "min": false, 406 | "show": true, 407 | "total": false, 408 | "values": false 409 | }, 410 | "lines": true, 411 | "linewidth": 1, 412 | "links": [], 413 | "nullPointMode": "null", 414 | "percentage": false, 415 | "pointradius": 5, 416 | "points": false, 417 | "renderer": "flot", 418 | "seriesOverrides": [], 419 | "spaceLength": 10, 420 | "stack": false, 421 | "steppedLine": false, 422 | "targets": [ 423 | { 424 | "expr": "sum(prometheus_tsdb_head_active_appenders{job=\"prometheus\"})", 425 | "format": "time_series", 426 | "interval": "", 427 | "intervalFactor": 2, 428 | "legendFormat": "active_appenders", 429 | "metric": "", 430 | "refId": "A", 431 | "step": 20 432 | }, 433 | { 434 | "expr": "sum(process_open_fds{job=\"prometheus\"})", 435 | "format": "time_series", 436 | "interval": "", 437 | "intervalFactor": 2, 438 | "legendFormat": "open_fds", 439 | "refId": "B", 440 | "step": 20 441 | } 442 | ], 443 | "thresholds": [], 444 | "timeFrom": null, 445 | "timeShift": null, 446 | "title": "Active Appenders", 447 | "tooltip": { 448 | "shared": true, 449 | "sort": 0, 450 | "value_type": "individual" 451 | }, 452 | "type": "graph", 453 | "xaxis": { 454 | "buckets": null, 455 | "mode": "time", 456 | "name": null, 457 | "show": true, 458 | "values": [] 459 | }, 460 | "yaxes": [ 461 | { 462 | "format": "short", 463 | "label": null, 464 | "logBase": 1, 465 | "max": null, 466 | "min": null, 467 | "show": true 468 | }, 469 | { 470 | "format": "short", 471 | "label": null, 472 | "logBase": 1, 473 | "max": null, 474 | "min": null, 475 | "show": false 476 | } 477 | ] 478 | }, 479 | { 480 | "aliasColors": { 481 | "prometheus": "#F9BA8F", 482 | "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" 483 | }, 484 | "bars": false, 485 | "dashLength": 10, 486 | "dashes": false, 487 | "datasource": "Prometheus", 488 | "editable": true, 489 | "error": false, 490 | "fill": 0, 491 | "grid": {}, 492 | "gridPos": { 493 | "h": 5, 494 | "w": 6, 495 | "x": 6, 496 | "y": 5 497 | }, 498 | "id": 2, 499 | "legend": { 500 | "avg": false, 501 | "current": false, 502 | "max": false, 503 | "min": false, 504 | "show": true, 505 | "total": false, 506 | "values": false 507 | }, 508 | "lines": true, 509 | "linewidth": 1, 510 | "links": [], 511 | "nullPointMode": "connected", 512 | "percentage": false, 513 | "pointradius": 5, 514 | "points": false, 515 | "renderer": "flot", 516 | "seriesOverrides": [], 517 | "spaceLength": 10, 518 | "stack": false, 519 | "steppedLine": false, 520 | "targets": [ 521 | { 522 | "expr": "prometheus_tsdb_blocks_loaded{job=\"prometheus\"}", 523 | "format": "time_series", 524 | "intervalFactor": 2, 525 | "legendFormat": "blocks", 526 | "refId": "A", 527 | "step": 20 528 | } 529 | ], 530 | "thresholds": [], 531 | "timeFrom": null, 532 | "timeShift": null, 533 | "title": "Blocks Loaded", 534 | "tooltip": { 535 | "shared": true, 536 | "sort": 0, 537 | "value_type": "cumulative" 538 | }, 539 | "type": "graph", 540 | "xaxis": { 541 | "buckets": null, 542 | "mode": "time", 543 | "name": null, 544 | "show": true, 545 | "values": [] 546 | }, 547 | "yaxes": [ 548 | { 549 | "format": "short", 550 | "logBase": 1, 551 | "max": null, 552 | "min": null, 553 | "show": true 554 | }, 555 | { 556 | "format": "short", 557 | "logBase": 1, 558 | "max": null, 559 | "min": null, 560 | "show": true 561 | } 562 | ] 563 | }, 564 | { 565 | "aliasColors": {}, 566 | "bars": false, 567 | "dashLength": 10, 568 | "dashes": false, 569 | "datasource": "Prometheus", 570 | "decimals": null, 571 | "description": "", 572 | "fill": 0, 573 | "gridPos": { 574 | "h": 5, 575 | "w": 6, 576 | "x": 12, 577 | "y": 5 578 | }, 579 | "id": 33, 580 | "legend": { 581 | "avg": false, 582 | "current": false, 583 | "max": false, 584 | "min": false, 585 | "show": true, 586 | "total": false, 587 | "values": false 588 | }, 589 | "lines": true, 590 | "linewidth": 1, 591 | "links": [], 592 | "nullPointMode": "connected", 593 | "percentage": false, 594 | "pointradius": 5, 595 | "points": false, 596 | "renderer": "flot", 597 | "seriesOverrides": [], 598 | "spaceLength": 10, 599 | "stack": false, 600 | "steppedLine": false, 601 | "targets": [ 602 | { 603 | "expr": "prometheus_tsdb_head_chunks{job=\"prometheus\"}", 604 | "format": "time_series", 605 | "interval": "", 606 | "intervalFactor": 2, 607 | "legendFormat": "chunks", 608 | "refId": "A", 609 | "step": 20 610 | } 611 | ], 612 | "thresholds": [], 613 | "timeFrom": null, 614 | "timeShift": null, 615 | "title": "Head Chunks", 616 | "tooltip": { 617 | "shared": true, 618 | "sort": 0, 619 | "value_type": "individual" 620 | }, 621 | "type": "graph", 622 | "xaxis": { 623 | "buckets": null, 624 | "mode": "time", 625 | "name": null, 626 | "show": true, 627 | "values": [] 628 | }, 629 | "yaxes": [ 630 | { 631 | "format": "short", 632 | "label": null, 633 | "logBase": 1, 634 | "max": null, 635 | "min": null, 636 | "show": true 637 | }, 638 | { 639 | "format": "bytes", 640 | "label": "", 641 | "logBase": 1, 642 | "max": null, 643 | "min": null, 644 | "show": false 645 | } 646 | ] 647 | }, 648 | { 649 | "aliasColors": {}, 650 | "bars": false, 651 | "dashLength": 10, 652 | "dashes": false, 653 | "datasource": "Prometheus", 654 | "fill": 1, 655 | "gridPos": { 656 | "h": 5, 657 | "w": 6, 658 | "x": 18, 659 | "y": 5 660 | }, 661 | "id": 36, 662 | "legend": { 663 | "avg": false, 664 | "current": false, 665 | "max": false, 666 | "min": false, 667 | "show": true, 668 | "total": false, 669 | "values": false 670 | }, 671 | "lines": true, 672 | "linewidth": 1, 673 | "links": [], 674 | "nullPointMode": "null", 675 | "percentage": false, 676 | "pointradius": 5, 677 | "points": false, 678 | "renderer": "flot", 679 | "seriesOverrides": [ 680 | { 681 | "alias": "duration-p99", 682 | "yaxis": 2 683 | } 684 | ], 685 | "spaceLength": 10, 686 | "stack": false, 687 | "steppedLine": false, 688 | "targets": [ 689 | { 690 | "expr": "prometheus_tsdb_head_gc_duration_seconds{job=\"prometheus\",quantile=\"0.99\"}", 691 | "format": "time_series", 692 | "intervalFactor": 2, 693 | "legendFormat": "duration-p99", 694 | "refId": "A", 695 | "step": 20 696 | }, 697 | { 698 | "expr": "irate(prometheus_tsdb_head_gc_duration_seconds_count{job=\"prometheus\"}[5m])", 699 | "format": "time_series", 700 | "intervalFactor": 2, 701 | "legendFormat": "collections", 702 | "refId": "B", 703 | "step": 20 704 | } 705 | ], 706 | "thresholds": [], 707 | "timeFrom": null, 708 | "timeShift": null, 709 | "title": "Head Block GC Activity", 710 | "tooltip": { 711 | "shared": true, 712 | "sort": 0, 713 | "value_type": "individual" 714 | }, 715 | "type": "graph", 716 | "xaxis": { 717 | "buckets": null, 718 | "mode": "time", 719 | "name": null, 720 | "show": true, 721 | "values": [] 722 | }, 723 | "yaxes": [ 724 | { 725 | "format": "short", 726 | "label": null, 727 | "logBase": 1, 728 | "max": null, 729 | "min": "0", 730 | "show": true 731 | }, 732 | { 733 | "format": "s", 734 | "label": null, 735 | "logBase": 1, 736 | "max": null, 737 | "min": "0", 738 | "show": true 739 | } 740 | ] 741 | }, 742 | { 743 | "aliasColors": {}, 744 | "bars": false, 745 | "dashLength": 10, 746 | "dashes": false, 747 | "datasource": "Prometheus", 748 | "decimals": null, 749 | "description": "", 750 | "fill": 0, 751 | "gridPos": { 752 | "h": 5, 753 | "w": 8, 754 | "x": 0, 755 | "y": 10 756 | }, 757 | "id": 20, 758 | "legend": { 759 | "avg": false, 760 | "current": false, 761 | "max": false, 762 | "min": false, 763 | "show": true, 764 | "total": false, 765 | "values": false 766 | }, 767 | "lines": true, 768 | "linewidth": 1, 769 | "links": [], 770 | "nullPointMode": "connected", 771 | "percentage": false, 772 | "pointradius": 5, 773 | "points": false, 774 | "renderer": "flot", 775 | "seriesOverrides": [ 776 | { 777 | "alias": "duration-p99", 778 | "yaxis": 2 779 | } 780 | ], 781 | "spaceLength": 10, 782 | "stack": false, 783 | "steppedLine": false, 784 | "targets": [ 785 | { 786 | "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_bucket{job=\"prometheus\"}[5m])) by (le))", 787 | "format": "time_series", 788 | "hide": false, 789 | "interval": "", 790 | "intervalFactor": 2, 791 | "legendFormat": "duration-{{p99}}", 792 | "refId": "A", 793 | "step": 20 794 | }, 795 | { 796 | "expr": "irate(prometheus_tsdb_compactions_total{job=\"prometheus\"}[5m])", 797 | "format": "time_series", 798 | "intervalFactor": 2, 799 | "legendFormat": "compactions", 800 | "refId": "B", 801 | "step": 20 802 | }, 803 | { 804 | "expr": "irate(prometheus_tsdb_compactions_failed_total{job=\"prometheus\"}[5m])", 805 | "format": "time_series", 806 | "intervalFactor": 2, 807 | "legendFormat": "failed", 808 | "refId": "C", 809 | "step": 20 810 | }, 811 | { 812 | "expr": "irate(prometheus_tsdb_compactions_triggered_total{job=\"prometheus\"}[5m])", 813 | "format": "time_series", 814 | "intervalFactor": 2, 815 | "legendFormat": "triggered", 816 | "refId": "D", 817 | "step": 20 818 | } 819 | ], 820 | "thresholds": [], 821 | "timeFrom": null, 822 | "timeShift": null, 823 | "title": "Compaction Activity", 824 | "tooltip": { 825 | "shared": true, 826 | "sort": 0, 827 | "value_type": "individual" 828 | }, 829 | "type": "graph", 830 | "xaxis": { 831 | "buckets": null, 832 | "mode": "time", 833 | "name": null, 834 | "show": true, 835 | "values": [] 836 | }, 837 | "yaxes": [ 838 | { 839 | "format": "short", 840 | "label": null, 841 | "logBase": 1, 842 | "max": null, 843 | "min": "0", 844 | "show": true 845 | }, 846 | { 847 | "format": "s", 848 | "label": "", 849 | "logBase": 1, 850 | "max": null, 851 | "min": "0", 852 | "show": true 853 | } 854 | ] 855 | }, 856 | { 857 | "aliasColors": {}, 858 | "bars": false, 859 | "dashLength": 10, 860 | "dashes": false, 861 | "datasource": "Prometheus", 862 | "fill": 1, 863 | "gridPos": { 864 | "h": 5, 865 | "w": 8, 866 | "x": 8, 867 | "y": 10 868 | }, 869 | "id": 32, 870 | "legend": { 871 | "avg": false, 872 | "current": false, 873 | "max": false, 874 | "min": false, 875 | "show": true, 876 | "total": false, 877 | "values": false 878 | }, 879 | "lines": true, 880 | "linewidth": 1, 881 | "links": [], 882 | "nullPointMode": "null", 883 | "percentage": false, 884 | "pointradius": 5, 885 | "points": false, 886 | "renderer": "flot", 887 | "seriesOverrides": [], 888 | "spaceLength": 10, 889 | "stack": false, 890 | "steppedLine": false, 891 | "targets": [ 892 | { 893 | "expr": "rate(prometheus_tsdb_reloads_total{job=\"prometheus\"}[5m])", 894 | "format": "time_series", 895 | "intervalFactor": 2, 896 | "legendFormat": "reloads", 897 | "refId": "A", 898 | "step": 20 899 | }, 900 | { 901 | "expr": "rate(prometheus_tsdb_reloads_failures_total{job=\"prometheus\"}[5m])", 902 | "format": "time_series", 903 | "hide": false, 904 | "intervalFactor": 2, 905 | "legendFormat": "failures", 906 | "refId": "B", 907 | "step": 20 908 | } 909 | ], 910 | "thresholds": [], 911 | "timeFrom": null, 912 | "timeShift": null, 913 | "title": "Reload Count", 914 | "tooltip": { 915 | "shared": true, 916 | "sort": 0, 917 | "value_type": "individual" 918 | }, 919 | "type": "graph", 920 | "xaxis": { 921 | "buckets": null, 922 | "mode": "time", 923 | "name": null, 924 | "show": true, 925 | "values": [] 926 | }, 927 | "yaxes": [ 928 | { 929 | "format": "short", 930 | "label": null, 931 | "logBase": 1, 932 | "max": null, 933 | "min": null, 934 | "show": true 935 | }, 936 | { 937 | "format": "short", 938 | "label": null, 939 | "logBase": 1, 940 | "max": null, 941 | "min": null, 942 | "show": true 943 | } 944 | ] 945 | }, 946 | { 947 | "aliasColors": {}, 948 | "bars": false, 949 | "dashLength": 10, 950 | "dashes": false, 951 | "datasource": "Prometheus", 952 | "fill": 0, 953 | "gridPos": { 954 | "h": 5, 955 | "w": 8, 956 | "x": 16, 957 | "y": 10 958 | }, 959 | "id": 38, 960 | "legend": { 961 | "avg": false, 962 | "current": false, 963 | "max": false, 964 | "min": false, 965 | "show": true, 966 | "total": false, 967 | "values": false 968 | }, 969 | "lines": true, 970 | "linewidth": 1, 971 | "links": [], 972 | "nullPointMode": "null", 973 | "percentage": false, 974 | "pointradius": 5, 975 | "points": false, 976 | "renderer": "flot", 977 | "seriesOverrides": [], 978 | "spaceLength": 10, 979 | "stack": false, 980 | "steppedLine": false, 981 | "targets": [ 982 | { 983 | "expr": "prometheus_engine_query_duration_seconds{job=\"prometheus\", quantile=\"0.99\"}", 984 | "format": "time_series", 985 | "intervalFactor": 2, 986 | "legendFormat": "{{slice}}_p99", 987 | "refId": "A", 988 | "step": 20 989 | } 990 | ], 991 | "thresholds": [], 992 | "timeFrom": null, 993 | "timeShift": null, 994 | "title": "Query Durations", 995 | "tooltip": { 996 | "shared": true, 997 | "sort": 0, 998 | "value_type": "individual" 999 | }, 1000 | "type": "graph", 1001 | "xaxis": { 1002 | "buckets": null, 1003 | "mode": "time", 1004 | "name": null, 1005 | "show": true, 1006 | "values": [] 1007 | }, 1008 | "yaxes": [ 1009 | { 1010 | "format": "short", 1011 | "label": null, 1012 | "logBase": 1, 1013 | "max": null, 1014 | "min": null, 1015 | "show": true 1016 | }, 1017 | { 1018 | "format": "short", 1019 | "label": null, 1020 | "logBase": 1, 1021 | "max": null, 1022 | "min": null, 1023 | "show": true 1024 | } 1025 | ] 1026 | }, 1027 | { 1028 | "aliasColors": {}, 1029 | "bars": false, 1030 | "dashLength": 10, 1031 | "dashes": false, 1032 | "datasource": "Prometheus", 1033 | "decimals": null, 1034 | "editable": true, 1035 | "error": false, 1036 | "fill": 0, 1037 | "grid": {}, 1038 | "gridPos": { 1039 | "h": 7, 1040 | "w": 12, 1041 | "x": 0, 1042 | "y": 15 1043 | }, 1044 | "id": 35, 1045 | "legend": { 1046 | "alignAsTable": false, 1047 | "avg": false, 1048 | "current": false, 1049 | "hideEmpty": true, 1050 | "max": false, 1051 | "min": false, 1052 | "show": true, 1053 | "total": false, 1054 | "values": false 1055 | }, 1056 | "lines": true, 1057 | "linewidth": 1, 1058 | "links": [], 1059 | "nullPointMode": "connected", 1060 | "percentage": false, 1061 | "pointradius": 5, 1062 | "points": false, 1063 | "renderer": "flot", 1064 | "seriesOverrides": [], 1065 | "spaceLength": 10, 1066 | "stack": false, 1067 | "steppedLine": false, 1068 | "targets": [ 1069 | { 1070 | "expr": "max(prometheus_rule_group_duration_seconds{job=\"prometheus\"}) by (quantile)", 1071 | "format": "time_series", 1072 | "interval": "", 1073 | "intervalFactor": 2, 1074 | "legendFormat": "{{quantile}}", 1075 | "refId": "A", 1076 | "step": 10 1077 | } 1078 | ], 1079 | "thresholds": [], 1080 | "timeFrom": null, 1081 | "timeShift": null, 1082 | "title": "Rule Group Eval Duration", 1083 | "tooltip": { 1084 | "shared": true, 1085 | "sort": 0, 1086 | "value_type": "cumulative" 1087 | }, 1088 | "type": "graph", 1089 | "xaxis": { 1090 | "buckets": null, 1091 | "mode": "time", 1092 | "name": null, 1093 | "show": true, 1094 | "values": [] 1095 | }, 1096 | "yaxes": [ 1097 | { 1098 | "format": "s", 1099 | "label": "", 1100 | "logBase": 1, 1101 | "max": null, 1102 | "min": null, 1103 | "show": true 1104 | }, 1105 | { 1106 | "format": "short", 1107 | "logBase": 1, 1108 | "max": null, 1109 | "min": null, 1110 | "show": true 1111 | } 1112 | ] 1113 | }, 1114 | { 1115 | "aliasColors": {}, 1116 | "bars": false, 1117 | "dashLength": 10, 1118 | "dashes": false, 1119 | "datasource": "Prometheus", 1120 | "fill": 1, 1121 | "gridPos": { 1122 | "h": 7, 1123 | "w": 12, 1124 | "x": 12, 1125 | "y": 15 1126 | }, 1127 | "id": 39, 1128 | "legend": { 1129 | "avg": false, 1130 | "current": false, 1131 | "max": false, 1132 | "min": false, 1133 | "show": true, 1134 | "total": false, 1135 | "values": false 1136 | }, 1137 | "lines": true, 1138 | "linewidth": 1, 1139 | "links": [], 1140 | "nullPointMode": "null", 1141 | "percentage": false, 1142 | "pointradius": 5, 1143 | "points": false, 1144 | "renderer": "flot", 1145 | "seriesOverrides": [], 1146 | "spaceLength": 10, 1147 | "stack": true, 1148 | "steppedLine": false, 1149 | "targets": [ 1150 | { 1151 | "expr": "rate(prometheus_rule_group_iterations_missed_total{job=\"prometheus\"}[5m])", 1152 | "format": "time_series", 1153 | "intervalFactor": 2, 1154 | "legendFormat": "missed", 1155 | "refId": "B", 1156 | "step": 10 1157 | }, 1158 | { 1159 | "expr": "rate(prometheus_rule_group_iterations_total{job=\"prometheus\"}[5m])", 1160 | "format": "time_series", 1161 | "intervalFactor": 2, 1162 | "legendFormat": "iterations", 1163 | "refId": "A", 1164 | "step": 10 1165 | } 1166 | ], 1167 | "thresholds": [], 1168 | "timeFrom": null, 1169 | "timeShift": null, 1170 | "title": "Rule Group Eval Activity", 1171 | "tooltip": { 1172 | "shared": true, 1173 | "sort": 0, 1174 | "value_type": "individual" 1175 | }, 1176 | "type": "graph", 1177 | "xaxis": { 1178 | "buckets": null, 1179 | "mode": "time", 1180 | "name": null, 1181 | "show": true, 1182 | "values": [] 1183 | }, 1184 | "yaxes": [ 1185 | { 1186 | "format": "short", 1187 | "label": null, 1188 | "logBase": 1, 1189 | "max": null, 1190 | "min": null, 1191 | "show": true 1192 | }, 1193 | { 1194 | "format": "short", 1195 | "label": null, 1196 | "logBase": 1, 1197 | "max": null, 1198 | "min": null, 1199 | "show": true 1200 | } 1201 | ] 1202 | } 1203 | ], 1204 | "refresh": "1m", 1205 | "revision": "1.0", 1206 | "schemaVersion": 16, 1207 | "style": "dark", 1208 | "tags": [ 1209 | "prometheus" 1210 | ], 1211 | "templating": { 1212 | "list": [] 1213 | }, 1214 | "time": { 1215 | "from": "now-1h", 1216 | "to": "now" 1217 | }, 1218 | "timepicker": { 1219 | "now": true, 1220 | "refresh_intervals": [ 1221 | "5s", 1222 | "10s", 1223 | "30s", 1224 | "1m", 1225 | "5m", 1226 | "15m", 1227 | "30m", 1228 | "1h", 1229 | "2h", 1230 | "1d" 1231 | ], 1232 | "time_options": [ 1233 | "5m", 1234 | "15m", 1235 | "1h", 1236 | "6h", 1237 | "12h", 1238 | "24h", 1239 | "2d", 1240 | "7d", 1241 | "30d" 1242 | ] 1243 | }, 1244 | "timezone": "browser", 1245 | "title": "Prometheus 2.0 Stats", 1246 | "uid": "mGFfYSRiz", 1247 | "version": 1 1248 | } 1249 | -------------------------------------------------------------------------------- /02_monitoring/configs/grafana/conf/dashboards/services-dash.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "$$hashKey": "object:429", 6 | "builtIn": 1, 7 | "datasource": { 8 | "type": "datasource", 9 | "uid": "grafana" 10 | }, 11 | "enable": true, 12 | "hide": true, 13 | "iconColor": "rgba(0, 211, 255, 1)", 14 | "name": "Annotations & Alerts", 15 | "target": { 16 | "limit": 100, 17 | "matchAny": false, 18 | "tags": [], 19 | "type": "dashboard" 20 | }, 21 | "type": "dashboard" 22 | } 23 | ] 24 | }, 25 | "description": "Docker Swarm stacks and services metrics", 26 | "editable": true, 27 | "fiscalYearStartMonth": 0, 28 | "graphTooltip": 0, 29 | "iteration": 1655486948760, 30 | "links": [], 31 | "liveNow": false, 32 | "panels": [ 33 | { 34 | "datasource": { 35 | "type": "prometheus", 36 | "uid": "PBFA97CFB590B2093" 37 | }, 38 | "fieldConfig": { 39 | "defaults": { 40 | "color": { 41 | "mode": "thresholds" 42 | }, 43 | "decimals": 0, 44 | "mappings": [ 45 | { 46 | "options": { 47 | "match": "null", 48 | "result": { 49 | "text": "N/A" 50 | } 51 | }, 52 | "type": "special" 53 | } 54 | ], 55 | "thresholds": { 56 | "mode": "absolute", 57 | "steps": [ 58 | { 59 | "color": "green", 60 | "value": null 61 | }, 62 | { 63 | "color": "red", 64 | "value": 80 65 | } 66 | ] 67 | }, 68 | "unit": "none" 69 | }, 70 | "overrides": [] 71 | }, 72 | "gridPos": { 73 | "h": 4, 74 | "w": 6, 75 | "x": 0, 76 | "y": 0 77 | }, 78 | "hideTimeOverride": true, 79 | "id": 1, 80 | "links": [], 81 | "maxDataPoints": 100, 82 | "options": { 83 | "colorMode": "none", 84 | "graphMode": "none", 85 | "justifyMode": "auto", 86 | "orientation": "horizontal", 87 | "reduceOptions": { 88 | "calcs": [ 89 | "mean" 90 | ], 91 | "fields": "", 92 | "values": false 93 | }, 94 | "textMode": "auto" 95 | }, 96 | "pluginVersion": "8.5.5", 97 | "targets": [ 98 | { 99 | "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_node_id =~\"$node_id\"}) by (container_label_com_docker_swarm_node_id))", 100 | "format": "time_series", 101 | "intervalFactor": 2, 102 | "legendFormat": "", 103 | "refId": "A", 104 | "step": 2 105 | } 106 | ], 107 | "timeFrom": "1m", 108 | "title": "Nodes", 109 | "type": "stat" 110 | }, 111 | { 112 | "datasource": { 113 | "type": "prometheus", 114 | "uid": "PBFA97CFB590B2093" 115 | }, 116 | "fieldConfig": { 117 | "defaults": { 118 | "color": { 119 | "mode": "thresholds" 120 | }, 121 | "decimals": 0, 122 | "mappings": [ 123 | { 124 | "options": { 125 | "match": "null", 126 | "result": { 127 | "text": "N/A" 128 | } 129 | }, 130 | "type": "special" 131 | } 132 | ], 133 | "thresholds": { 134 | "mode": "absolute", 135 | "steps": [ 136 | { 137 | "color": "green", 138 | "value": null 139 | }, 140 | { 141 | "color": "red", 142 | "value": 80 143 | } 144 | ] 145 | }, 146 | "unit": "none" 147 | }, 148 | "overrides": [] 149 | }, 150 | "gridPos": { 151 | "h": 4, 152 | "w": 6, 153 | "x": 6, 154 | "y": 0 155 | }, 156 | "hideTimeOverride": true, 157 | "id": 21, 158 | "links": [], 159 | "maxDataPoints": 100, 160 | "options": { 161 | "colorMode": "none", 162 | "graphMode": "none", 163 | "justifyMode": "auto", 164 | "orientation": "horizontal", 165 | "reduceOptions": { 166 | "calcs": [ 167 | "mean" 168 | ], 169 | "fields": "", 170 | "values": false 171 | }, 172 | "textMode": "auto" 173 | }, 174 | "pluginVersion": "8.5.5", 175 | "targets": [ 176 | { 177 | "expr": "count(count(container_tasks_state{container_label_com_docker_stack_namespace=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_stack_namespace))", 178 | "format": "time_series", 179 | "intervalFactor": 2, 180 | "legendFormat": "", 181 | "refId": "A", 182 | "step": 2 183 | } 184 | ], 185 | "timeFrom": "1m", 186 | "title": "Stacks", 187 | "type": "stat" 188 | }, 189 | { 190 | "datasource": { 191 | "type": "prometheus", 192 | "uid": "PBFA97CFB590B2093" 193 | }, 194 | "fieldConfig": { 195 | "defaults": { 196 | "color": { 197 | "mode": "thresholds" 198 | }, 199 | "decimals": 0, 200 | "mappings": [ 201 | { 202 | "options": { 203 | "match": "null", 204 | "result": { 205 | "text": "N/A" 206 | } 207 | }, 208 | "type": "special" 209 | } 210 | ], 211 | "thresholds": { 212 | "mode": "absolute", 213 | "steps": [ 214 | { 215 | "color": "green", 216 | "value": null 217 | }, 218 | { 219 | "color": "red", 220 | "value": 80 221 | } 222 | ] 223 | }, 224 | "unit": "none" 225 | }, 226 | "overrides": [] 227 | }, 228 | "gridPos": { 229 | "h": 4, 230 | "w": 6, 231 | "x": 12, 232 | "y": 0 233 | }, 234 | "hideTimeOverride": true, 235 | "id": 20, 236 | "links": [], 237 | "maxDataPoints": 100, 238 | "options": { 239 | "colorMode": "none", 240 | "graphMode": "none", 241 | "justifyMode": "auto", 242 | "orientation": "horizontal", 243 | "reduceOptions": { 244 | "calcs": [ 245 | "mean" 246 | ], 247 | "fields": "", 248 | "values": false 249 | }, 250 | "textMode": "auto" 251 | }, 252 | "pluginVersion": "8.5.5", 253 | "targets": [ 254 | { 255 | "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_service_name=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_swarm_service_name))", 256 | "format": "time_series", 257 | "intervalFactor": 2, 258 | "refId": "A", 259 | "step": 2 260 | } 261 | ], 262 | "timeFrom": "1m", 263 | "title": "Services", 264 | "type": "stat" 265 | }, 266 | { 267 | "datasource": { 268 | "type": "prometheus", 269 | "uid": "PBFA97CFB590B2093" 270 | }, 271 | "fieldConfig": { 272 | "defaults": { 273 | "color": { 274 | "mode": "thresholds" 275 | }, 276 | "decimals": 0, 277 | "mappings": [ 278 | { 279 | "options": { 280 | "match": "null", 281 | "result": { 282 | "text": "N/A" 283 | } 284 | }, 285 | "type": "special" 286 | } 287 | ], 288 | "thresholds": { 289 | "mode": "absolute", 290 | "steps": [ 291 | { 292 | "color": "green", 293 | "value": null 294 | }, 295 | { 296 | "color": "red", 297 | "value": 80 298 | } 299 | ] 300 | }, 301 | "unit": "none" 302 | }, 303 | "overrides": [] 304 | }, 305 | "gridPos": { 306 | "h": 4, 307 | "w": 6, 308 | "x": 18, 309 | "y": 0 310 | }, 311 | "hideTimeOverride": true, 312 | "id": 7, 313 | "links": [], 314 | "maxDataPoints": 100, 315 | "options": { 316 | "colorMode": "none", 317 | "graphMode": "none", 318 | "justifyMode": "auto", 319 | "orientation": "horizontal", 320 | "reduceOptions": { 321 | "calcs": [ 322 | "mean" 323 | ], 324 | "fields": "", 325 | "values": false 326 | }, 327 | "textMode": "auto" 328 | }, 329 | "pluginVersion": "8.5.5", 330 | "targets": [ 331 | { 332 | "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", 333 | "format": "time_series", 334 | "intervalFactor": 2, 335 | "refId": "A", 336 | "step": 2 337 | } 338 | ], 339 | "timeFrom": "1m", 340 | "title": "Containers", 341 | "type": "stat" 342 | }, 343 | { 344 | "aliasColors": {}, 345 | "bars": true, 346 | "dashLength": 10, 347 | "dashes": false, 348 | "datasource": { 349 | "type": "prometheus", 350 | "uid": "PBFA97CFB590B2093" 351 | }, 352 | "decimals": 0, 353 | "fill": 5, 354 | "fillGradient": 0, 355 | "gridPos": { 356 | "h": 7, 357 | "w": 12, 358 | "x": 0, 359 | "y": 4 360 | }, 361 | "hiddenSeries": false, 362 | "id": 12, 363 | "legend": { 364 | "alignAsTable": true, 365 | "avg": false, 366 | "current": true, 367 | "hideEmpty": true, 368 | "hideZero": true, 369 | "max": false, 370 | "min": false, 371 | "rightSide": true, 372 | "show": true, 373 | "sort": "current", 374 | "sortDesc": true, 375 | "total": false, 376 | "values": true 377 | }, 378 | "lines": false, 379 | "linewidth": 1, 380 | "links": [], 381 | "nullPointMode": "null", 382 | "options": { 383 | "alertThreshold": true 384 | }, 385 | "percentage": false, 386 | "pluginVersion": "8.5.5", 387 | "pointradius": 5, 388 | "points": false, 389 | "renderer": "flot", 390 | "seriesOverrides": [], 391 | "spaceLength": 10, 392 | "stack": true, 393 | "steppedLine": false, 394 | "targets": [ 395 | { 396 | "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", 397 | "format": "time_series", 398 | "intervalFactor": 10, 399 | "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", 400 | "refId": "A", 401 | "step": 10 402 | } 403 | ], 404 | "thresholds": [], 405 | "timeRegions": [], 406 | "title": "Service Tasks", 407 | "tooltip": { 408 | "shared": true, 409 | "sort": 2, 410 | "value_type": "individual" 411 | }, 412 | "type": "graph", 413 | "xaxis": { 414 | "mode": "time", 415 | "show": true, 416 | "values": [] 417 | }, 418 | "yaxes": [ 419 | { 420 | "format": "short", 421 | "logBase": 1, 422 | "show": true 423 | }, 424 | { 425 | "format": "short", 426 | "logBase": 1, 427 | "show": true 428 | } 429 | ], 430 | "yaxis": { 431 | "align": false 432 | } 433 | }, 434 | { 435 | "aliasColors": {}, 436 | "bars": false, 437 | "dashLength": 10, 438 | "dashes": false, 439 | "datasource": { 440 | "type": "prometheus", 441 | "uid": "PBFA97CFB590B2093" 442 | }, 443 | "decimals": 0, 444 | "fill": 1, 445 | "fillGradient": 0, 446 | "gridPos": { 447 | "h": 7, 448 | "w": 12, 449 | "x": 12, 450 | "y": 4 451 | }, 452 | "hiddenSeries": false, 453 | "id": 32, 454 | "legend": { 455 | "alignAsTable": true, 456 | "avg": false, 457 | "current": true, 458 | "hideEmpty": true, 459 | "hideZero": true, 460 | "max": false, 461 | "min": false, 462 | "rightSide": true, 463 | "show": false, 464 | "sort": "current", 465 | "sortDesc": true, 466 | "total": false, 467 | "values": true 468 | }, 469 | "lines": true, 470 | "linewidth": 1, 471 | "links": [], 472 | "nullPointMode": "null", 473 | "options": { 474 | "alertThreshold": true 475 | }, 476 | "percentage": false, 477 | "pluginVersion": "8.5.5", 478 | "pointradius": 5, 479 | "points": false, 480 | "renderer": "flot", 481 | "seriesOverrides": [], 482 | "spaceLength": 10, 483 | "stack": false, 484 | "steppedLine": false, 485 | "targets": [ 486 | { 487 | "expr": "sum(increase(engine_daemon_health_checks_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", 488 | "format": "time_series", 489 | "intervalFactor": 10, 490 | "legendFormat": "checks", 491 | "refId": "A", 492 | "step": 10 493 | }, 494 | { 495 | "expr": "sum(increase(engine_daemon_health_checks_failed_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", 496 | "format": "time_series", 497 | "intervalFactor": 10, 498 | "legendFormat": "failed", 499 | "refId": "B", 500 | "step": 10 501 | } 502 | ], 503 | "thresholds": [], 504 | "timeRegions": [], 505 | "title": "Health Checks", 506 | "tooltip": { 507 | "shared": true, 508 | "sort": 2, 509 | "value_type": "individual" 510 | }, 511 | "type": "graph", 512 | "xaxis": { 513 | "mode": "time", 514 | "show": true, 515 | "values": [] 516 | }, 517 | "yaxes": [ 518 | { 519 | "format": "short", 520 | "logBase": 1, 521 | "show": true 522 | }, 523 | { 524 | "format": "short", 525 | "logBase": 1, 526 | "show": true 527 | } 528 | ], 529 | "yaxis": { 530 | "align": false 531 | } 532 | }, 533 | { 534 | "aliasColors": {}, 535 | "bars": false, 536 | "dashLength": 10, 537 | "dashes": false, 538 | "datasource": { 539 | "type": "prometheus", 540 | "uid": "PBFA97CFB590B2093" 541 | }, 542 | "decimals": 2, 543 | "fill": 1, 544 | "fillGradient": 0, 545 | "gridPos": { 546 | "h": 7, 547 | "w": 20, 548 | "x": 0, 549 | "y": 11 550 | }, 551 | "hiddenSeries": false, 552 | "id": 22, 553 | "legend": { 554 | "alignAsTable": true, 555 | "avg": true, 556 | "current": false, 557 | "hideEmpty": true, 558 | "hideZero": true, 559 | "max": true, 560 | "min": true, 561 | "rightSide": true, 562 | "show": true, 563 | "sort": "avg", 564 | "sortDesc": true, 565 | "total": false, 566 | "values": true 567 | }, 568 | "lines": true, 569 | "linewidth": 1, 570 | "links": [], 571 | "nullPointMode": "null", 572 | "options": { 573 | "alertThreshold": true 574 | }, 575 | "percentage": false, 576 | "pluginVersion": "8.5.5", 577 | "pointradius": 5, 578 | "points": false, 579 | "renderer": "flot", 580 | "seriesOverrides": [], 581 | "spaceLength": 10, 582 | "stack": true, 583 | "steppedLine": false, 584 | "targets": [ 585 | { 586 | "expr": "sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[1m])) by (container_label_com_docker_swarm_service_name) * 100 ", 587 | "format": "time_series", 588 | "intervalFactor": 2, 589 | "legendFormat": "{{container_label_com_docker_swarm_service_name}}", 590 | "refId": "A", 591 | "step": 2 592 | } 593 | ], 594 | "thresholds": [], 595 | "timeRegions": [], 596 | "title": "CPU usage by Service", 597 | "tooltip": { 598 | "shared": true, 599 | "sort": 2, 600 | "value_type": "individual" 601 | }, 602 | "type": "graph", 603 | "xaxis": { 604 | "mode": "time", 605 | "show": true, 606 | "values": [] 607 | }, 608 | "yaxes": [ 609 | { 610 | "format": "percent", 611 | "logBase": 1, 612 | "show": true 613 | }, 614 | { 615 | "format": "short", 616 | "logBase": 1, 617 | "show": false 618 | } 619 | ], 620 | "yaxis": { 621 | "align": false 622 | } 623 | }, 624 | { 625 | "datasource": { 626 | "type": "prometheus", 627 | "uid": "PBFA97CFB590B2093" 628 | }, 629 | "fieldConfig": { 630 | "defaults": { 631 | "color": { 632 | "mode": "thresholds" 633 | }, 634 | "mappings": [ 635 | { 636 | "options": { 637 | "match": "null", 638 | "result": { 639 | "text": "N/A" 640 | } 641 | }, 642 | "type": "special" 643 | } 644 | ], 645 | "max": 100, 646 | "min": 0, 647 | "thresholds": { 648 | "mode": "absolute", 649 | "steps": [ 650 | { 651 | "color": "rgba(245, 54, 54, 0.9)", 652 | "value": null 653 | }, 654 | { 655 | "color": "rgba(237, 129, 40, 0.89)", 656 | "value": 10 657 | }, 658 | { 659 | "color": "rgba(50, 172, 45, 0.97)", 660 | "value": 25 661 | } 662 | ] 663 | }, 664 | "unit": "percent" 665 | }, 666 | "overrides": [] 667 | }, 668 | "gridPos": { 669 | "h": 7, 670 | "w": 4, 671 | "x": 20, 672 | "y": 11 673 | }, 674 | "hideTimeOverride": true, 675 | "id": 11, 676 | "links": [], 677 | "maxDataPoints": 100, 678 | "options": { 679 | "orientation": "horizontal", 680 | "reduceOptions": { 681 | "calcs": [ 682 | "mean" 683 | ], 684 | "fields": "", 685 | "values": false 686 | }, 687 | "showThresholdLabels": false, 688 | "showThresholdMarkers": true 689 | }, 690 | "pluginVersion": "8.5.5", 691 | "targets": [ 692 | { 693 | "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", 694 | "format": "time_series", 695 | "intervalFactor": 2, 696 | "legendFormat": "", 697 | "refId": "A", 698 | "step": 2 699 | } 700 | ], 701 | "timeFrom": "1m", 702 | "title": "CPU Idle", 703 | "type": "gauge" 704 | }, 705 | { 706 | "aliasColors": {}, 707 | "bars": false, 708 | "dashLength": 10, 709 | "dashes": false, 710 | "datasource": { 711 | "type": "prometheus", 712 | "uid": "PBFA97CFB590B2093" 713 | }, 714 | "decimals": 2, 715 | "fill": 1, 716 | "fillGradient": 0, 717 | "gridPos": { 718 | "h": 7, 719 | "w": 24, 720 | "x": 0, 721 | "y": 18 722 | }, 723 | "hiddenSeries": false, 724 | "id": 33, 725 | "legend": { 726 | "alignAsTable": true, 727 | "avg": true, 728 | "current": false, 729 | "hideEmpty": true, 730 | "hideZero": true, 731 | "max": false, 732 | "min": false, 733 | "rightSide": true, 734 | "show": true, 735 | "sort": "avg", 736 | "sortDesc": true, 737 | "total": false, 738 | "values": true 739 | }, 740 | "lines": true, 741 | "linewidth": 1, 742 | "links": [], 743 | "nullPointMode": "null as zero", 744 | "options": { 745 | "alertThreshold": true 746 | }, 747 | "percentage": false, 748 | "pluginVersion": "8.5.5", 749 | "pointradius": 5, 750 | "points": false, 751 | "renderer": "flot", 752 | "seriesOverrides": [], 753 | "spaceLength": 10, 754 | "stack": false, 755 | "steppedLine": false, 756 | "targets": [ 757 | { 758 | "expr": "topk(10, sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval])) by (name)) * 100 ", 759 | "format": "time_series", 760 | "intervalFactor": 2, 761 | "legendFormat": "{{name}}", 762 | "refId": "A", 763 | "step": 2 764 | } 765 | ], 766 | "thresholds": [], 767 | "timeRegions": [], 768 | "title": "CPU usage by Container (top 10)", 769 | "tooltip": { 770 | "shared": true, 771 | "sort": 2, 772 | "value_type": "individual" 773 | }, 774 | "type": "graph", 775 | "xaxis": { 776 | "mode": "time", 777 | "show": true, 778 | "values": [] 779 | }, 780 | "yaxes": [ 781 | { 782 | "format": "percent", 783 | "logBase": 1, 784 | "show": true 785 | }, 786 | { 787 | "format": "short", 788 | "logBase": 1, 789 | "show": false 790 | } 791 | ], 792 | "yaxis": { 793 | "align": false 794 | } 795 | }, 796 | { 797 | "aliasColors": {}, 798 | "bars": false, 799 | "dashLength": 10, 800 | "dashes": false, 801 | "datasource": { 802 | "type": "prometheus", 803 | "uid": "PBFA97CFB590B2093" 804 | }, 805 | "fill": 1, 806 | "fillGradient": 0, 807 | "gridPos": { 808 | "h": 7, 809 | "w": 20, 810 | "x": 0, 811 | "y": 25 812 | }, 813 | "hiddenSeries": false, 814 | "id": 24, 815 | "legend": { 816 | "alignAsTable": true, 817 | "avg": true, 818 | "current": false, 819 | "max": true, 820 | "min": true, 821 | "rightSide": true, 822 | "show": true, 823 | "sort": "avg", 824 | "sortDesc": true, 825 | "total": false, 826 | "values": true 827 | }, 828 | "lines": true, 829 | "linewidth": 1, 830 | "links": [], 831 | "nullPointMode": "null", 832 | "options": { 833 | "alertThreshold": true 834 | }, 835 | "percentage": false, 836 | "pluginVersion": "8.5.5", 837 | "pointradius": 5, 838 | "points": false, 839 | "renderer": "flot", 840 | "seriesOverrides": [], 841 | "spaceLength": 10, 842 | "stack": false, 843 | "steppedLine": false, 844 | "targets": [ 845 | { 846 | "expr": "sum(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", 847 | "format": "time_series", 848 | "intervalFactor": 2, 849 | "legendFormat": "Used {{container_label_com_docker_swarm_service_name}}", 850 | "refId": "A", 851 | "step": 2 852 | }, 853 | { 854 | "expr": "sum(container_memory_cache{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", 855 | "format": "time_series", 856 | "intervalFactor": 2, 857 | "legendFormat": "Cached {{container_label_com_docker_swarm_service_name}}", 858 | "refId": "B", 859 | "step": 2 860 | } 861 | ], 862 | "thresholds": [], 863 | "timeRegions": [], 864 | "title": "Memory usage by Service", 865 | "tooltip": { 866 | "shared": true, 867 | "sort": 0, 868 | "value_type": "individual" 869 | }, 870 | "type": "graph", 871 | "xaxis": { 872 | "mode": "time", 873 | "show": true, 874 | "values": [] 875 | }, 876 | "yaxes": [ 877 | { 878 | "format": "decbytes", 879 | "logBase": 1, 880 | "show": true 881 | }, 882 | { 883 | "format": "short", 884 | "logBase": 1, 885 | "show": true 886 | } 887 | ], 888 | "yaxis": { 889 | "align": false 890 | } 891 | }, 892 | { 893 | "datasource": { 894 | "type": "prometheus", 895 | "uid": "PBFA97CFB590B2093" 896 | }, 897 | "fieldConfig": { 898 | "defaults": { 899 | "color": { 900 | "mode": "thresholds" 901 | }, 902 | "mappings": [ 903 | { 904 | "options": { 905 | "match": "null", 906 | "result": { 907 | "text": "N/A" 908 | } 909 | }, 910 | "type": "special" 911 | } 912 | ], 913 | "max": 100, 914 | "min": 0, 915 | "thresholds": { 916 | "mode": "absolute", 917 | "steps": [ 918 | { 919 | "color": "rgba(245, 54, 54, 0.9)" 920 | }, 921 | { 922 | "color": "rgba(237, 129, 40, 0.89)", 923 | "value": 10 924 | }, 925 | { 926 | "color": "rgba(50, 172, 45, 0.97)", 927 | "value": 25 928 | } 929 | ] 930 | }, 931 | "unit": "percent" 932 | }, 933 | "overrides": [] 934 | }, 935 | "gridPos": { 936 | "h": 7, 937 | "w": 4, 938 | "x": 20, 939 | "y": 25 940 | }, 941 | "id": 8, 942 | "links": [], 943 | "maxDataPoints": 100, 944 | "options": { 945 | "orientation": "horizontal", 946 | "reduceOptions": { 947 | "calcs": [ 948 | "mean" 949 | ], 950 | "fields": "", 951 | "values": false 952 | }, 953 | "showThresholdLabels": false, 954 | "showThresholdMarkers": true 955 | }, 956 | "pluginVersion": "8.5.5", 957 | "targets": [ 958 | { 959 | "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 960 | "format": "time_series", 961 | "intervalFactor": 2, 962 | "legendFormat": "", 963 | "refId": "A", 964 | "step": 20 965 | } 966 | ], 967 | "title": "Available Memory", 968 | "type": "gauge" 969 | }, 970 | { 971 | "aliasColors": {}, 972 | "bars": false, 973 | "dashLength": 10, 974 | "dashes": false, 975 | "datasource": { 976 | "type": "prometheus", 977 | "uid": "PBFA97CFB590B2093" 978 | }, 979 | "fill": 1, 980 | "fillGradient": 0, 981 | "gridPos": { 982 | "h": 7, 983 | "w": 24, 984 | "x": 0, 985 | "y": 32 986 | }, 987 | "hiddenSeries": false, 988 | "id": 34, 989 | "legend": { 990 | "alignAsTable": true, 991 | "avg": true, 992 | "current": false, 993 | "hideEmpty": false, 994 | "hideZero": false, 995 | "max": false, 996 | "min": false, 997 | "rightSide": true, 998 | "show": true, 999 | "sort": "avg", 1000 | "sortDesc": true, 1001 | "total": false, 1002 | "values": true 1003 | }, 1004 | "lines": true, 1005 | "linewidth": 1, 1006 | "links": [], 1007 | "nullPointMode": "null", 1008 | "options": { 1009 | "alertThreshold": true 1010 | }, 1011 | "percentage": false, 1012 | "pluginVersion": "8.5.5", 1013 | "pointradius": 5, 1014 | "points": false, 1015 | "renderer": "flot", 1016 | "seriesOverrides": [], 1017 | "spaceLength": 10, 1018 | "stack": false, 1019 | "steppedLine": false, 1020 | "targets": [ 1021 | { 1022 | "expr": "topk(10, avg_over_time(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval]))", 1023 | "format": "time_series", 1024 | "intervalFactor": 2, 1025 | "legendFormat": "{{name}}", 1026 | "refId": "A", 1027 | "step": 2 1028 | } 1029 | ], 1030 | "thresholds": [], 1031 | "timeRegions": [], 1032 | "title": "Memory usage by Container (top 10)", 1033 | "tooltip": { 1034 | "shared": true, 1035 | "sort": 2, 1036 | "value_type": "individual" 1037 | }, 1038 | "type": "graph", 1039 | "xaxis": { 1040 | "mode": "time", 1041 | "show": true, 1042 | "values": [] 1043 | }, 1044 | "yaxes": [ 1045 | { 1046 | "format": "decbytes", 1047 | "logBase": 1, 1048 | "show": true 1049 | }, 1050 | { 1051 | "format": "short", 1052 | "logBase": 1, 1053 | "show": false 1054 | } 1055 | ], 1056 | "yaxis": { 1057 | "align": false 1058 | } 1059 | }, 1060 | { 1061 | "aliasColors": {}, 1062 | "bars": false, 1063 | "dashLength": 10, 1064 | "dashes": false, 1065 | "datasource": { 1066 | "type": "prometheus", 1067 | "uid": "PBFA97CFB590B2093" 1068 | }, 1069 | "fill": 1, 1070 | "fillGradient": 0, 1071 | "gridPos": { 1072 | "h": 7, 1073 | "w": 24, 1074 | "x": 0, 1075 | "y": 39 1076 | }, 1077 | "hiddenSeries": false, 1078 | "id": 17, 1079 | "legend": { 1080 | "alignAsTable": true, 1081 | "avg": true, 1082 | "current": false, 1083 | "max": true, 1084 | "min": true, 1085 | "rightSide": true, 1086 | "show": true, 1087 | "sort": "avg", 1088 | "sortDesc": true, 1089 | "total": false, 1090 | "values": true 1091 | }, 1092 | "lines": true, 1093 | "linewidth": 1, 1094 | "links": [], 1095 | "nullPointMode": "null", 1096 | "options": { 1097 | "alertThreshold": true 1098 | }, 1099 | "percentage": false, 1100 | "pluginVersion": "8.5.5", 1101 | "pointradius": 5, 1102 | "points": false, 1103 | "renderer": "flot", 1104 | "seriesOverrides": [], 1105 | "spaceLength": 10, 1106 | "stack": false, 1107 | "steppedLine": false, 1108 | "targets": [ 1109 | { 1110 | "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", 1111 | "format": "time_series", 1112 | "intervalFactor": 2, 1113 | "legendFormat": "{{container_label_com_docker_swarm_service_name}}", 1114 | "refId": "A", 1115 | "step": 2 1116 | } 1117 | ], 1118 | "thresholds": [], 1119 | "timeRegions": [], 1120 | "title": "Network received by Service", 1121 | "tooltip": { 1122 | "shared": true, 1123 | "sort": 0, 1124 | "value_type": "individual" 1125 | }, 1126 | "type": "graph", 1127 | "xaxis": { 1128 | "mode": "time", 1129 | "show": true, 1130 | "values": [] 1131 | }, 1132 | "yaxes": [ 1133 | { 1134 | "format": "Bps", 1135 | "logBase": 1, 1136 | "show": true 1137 | }, 1138 | { 1139 | "format": "short", 1140 | "logBase": 1, 1141 | "show": true 1142 | } 1143 | ], 1144 | "yaxis": { 1145 | "align": false 1146 | } 1147 | }, 1148 | { 1149 | "aliasColors": {}, 1150 | "bars": false, 1151 | "dashLength": 10, 1152 | "dashes": false, 1153 | "datasource": { 1154 | "type": "prometheus", 1155 | "uid": "PBFA97CFB590B2093" 1156 | }, 1157 | "fill": 1, 1158 | "fillGradient": 0, 1159 | "gridPos": { 1160 | "h": 7, 1161 | "w": 24, 1162 | "x": 0, 1163 | "y": 46 1164 | }, 1165 | "hiddenSeries": false, 1166 | "id": 25, 1167 | "legend": { 1168 | "alignAsTable": true, 1169 | "avg": true, 1170 | "current": false, 1171 | "max": true, 1172 | "min": true, 1173 | "rightSide": true, 1174 | "show": true, 1175 | "sort": "avg", 1176 | "sortDesc": true, 1177 | "total": false, 1178 | "values": true 1179 | }, 1180 | "lines": true, 1181 | "linewidth": 1, 1182 | "links": [], 1183 | "nullPointMode": "null", 1184 | "options": { 1185 | "alertThreshold": true 1186 | }, 1187 | "percentage": false, 1188 | "pluginVersion": "8.5.5", 1189 | "pointradius": 5, 1190 | "points": false, 1191 | "renderer": "flot", 1192 | "seriesOverrides": [], 1193 | "spaceLength": 10, 1194 | "stack": false, 1195 | "steppedLine": false, 1196 | "targets": [ 1197 | { 1198 | "expr": "sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", 1199 | "format": "time_series", 1200 | "intervalFactor": 2, 1201 | "legendFormat": "{{container_label_com_docker_swarm_service_name}}", 1202 | "metric": "", 1203 | "refId": "B", 1204 | "step": 2 1205 | } 1206 | ], 1207 | "thresholds": [], 1208 | "timeRegions": [], 1209 | "title": "Network transmitted by Service", 1210 | "tooltip": { 1211 | "shared": true, 1212 | "sort": 0, 1213 | "value_type": "individual" 1214 | }, 1215 | "type": "graph", 1216 | "xaxis": { 1217 | "mode": "time", 1218 | "show": true, 1219 | "values": [] 1220 | }, 1221 | "yaxes": [ 1222 | { 1223 | "format": "Bps", 1224 | "logBase": 1, 1225 | "show": true 1226 | }, 1227 | { 1228 | "format": "short", 1229 | "logBase": 1, 1230 | "show": true 1231 | } 1232 | ], 1233 | "yaxis": { 1234 | "align": false 1235 | } 1236 | }, 1237 | { 1238 | "aliasColors": {}, 1239 | "bars": false, 1240 | "dashLength": 10, 1241 | "dashes": false, 1242 | "datasource": { 1243 | "type": "prometheus", 1244 | "uid": "PBFA97CFB590B2093" 1245 | }, 1246 | "fill": 1, 1247 | "fillGradient": 0, 1248 | "gridPos": { 1249 | "h": 7, 1250 | "w": 10, 1251 | "x": 0, 1252 | "y": 53 1253 | }, 1254 | "hiddenSeries": false, 1255 | "id": 31, 1256 | "legend": { 1257 | "avg": true, 1258 | "current": false, 1259 | "max": false, 1260 | "min": false, 1261 | "show": true, 1262 | "total": false, 1263 | "values": true 1264 | }, 1265 | "lines": true, 1266 | "linewidth": 1, 1267 | "links": [], 1268 | "nullPointMode": "null", 1269 | "options": { 1270 | "alertThreshold": true 1271 | }, 1272 | "percentage": false, 1273 | "pluginVersion": "8.5.5", 1274 | "pointradius": 5, 1275 | "points": false, 1276 | "renderer": "flot", 1277 | "seriesOverrides": [], 1278 | "spaceLength": 10, 1279 | "stack": false, 1280 | "steppedLine": false, 1281 | "targets": [ 1282 | { 1283 | "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", 1284 | "format": "time_series", 1285 | "intervalFactor": 2, 1286 | "legendFormat": "Received", 1287 | "refId": "A", 1288 | "step": 4 1289 | }, 1290 | { 1291 | "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", 1292 | "format": "time_series", 1293 | "intervalFactor": 2, 1294 | "legendFormat": "Transmited", 1295 | "refId": "B", 1296 | "step": 4 1297 | } 1298 | ], 1299 | "thresholds": [], 1300 | "timeRegions": [], 1301 | "title": "Cluster Network Traffic", 1302 | "tooltip": { 1303 | "shared": true, 1304 | "sort": 0, 1305 | "value_type": "individual" 1306 | }, 1307 | "type": "graph", 1308 | "xaxis": { 1309 | "mode": "time", 1310 | "show": true, 1311 | "values": [] 1312 | }, 1313 | "yaxes": [ 1314 | { 1315 | "format": "Bps", 1316 | "logBase": 1, 1317 | "show": true 1318 | }, 1319 | { 1320 | "format": "short", 1321 | "logBase": 1, 1322 | "show": true 1323 | } 1324 | ], 1325 | "yaxis": { 1326 | "align": false 1327 | } 1328 | }, 1329 | { 1330 | "aliasColors": {}, 1331 | "bars": false, 1332 | "dashLength": 10, 1333 | "dashes": false, 1334 | "datasource": { 1335 | "type": "prometheus", 1336 | "uid": "PBFA97CFB590B2093" 1337 | }, 1338 | "fill": 1, 1339 | "fillGradient": 0, 1340 | "gridPos": { 1341 | "h": 7, 1342 | "w": 10, 1343 | "x": 10, 1344 | "y": 53 1345 | }, 1346 | "hiddenSeries": false, 1347 | "id": 26, 1348 | "legend": { 1349 | "alignAsTable": false, 1350 | "avg": true, 1351 | "current": false, 1352 | "max": true, 1353 | "min": true, 1354 | "rightSide": false, 1355 | "show": true, 1356 | "total": false, 1357 | "values": true 1358 | }, 1359 | "lines": true, 1360 | "linewidth": 1, 1361 | "links": [], 1362 | "nullPointMode": "null", 1363 | "options": { 1364 | "alertThreshold": true 1365 | }, 1366 | "percentage": false, 1367 | "pluginVersion": "8.5.5", 1368 | "pointradius": 5, 1369 | "points": false, 1370 | "renderer": "flot", 1371 | "seriesOverrides": [], 1372 | "spaceLength": 10, 1373 | "stack": false, 1374 | "steppedLine": false, 1375 | "targets": [ 1376 | { 1377 | "expr": "sum(irate(container_fs_reads_total[$interval]) )", 1378 | "format": "time_series", 1379 | "intervalFactor": 2, 1380 | "legendFormat": "Reads", 1381 | "refId": "A", 1382 | "step": 4 1383 | }, 1384 | { 1385 | "expr": "sum(irate(container_fs_writes_total[$interval])) ", 1386 | "format": "time_series", 1387 | "intervalFactor": 2, 1388 | "legendFormat": "Writes ", 1389 | "refId": "B", 1390 | "step": 4 1391 | } 1392 | ], 1393 | "thresholds": [], 1394 | "timeRegions": [], 1395 | "title": "Cluster IOPS", 1396 | "tooltip": { 1397 | "shared": true, 1398 | "sort": 0, 1399 | "value_type": "individual" 1400 | }, 1401 | "type": "graph", 1402 | "xaxis": { 1403 | "mode": "time", 1404 | "show": true, 1405 | "values": [] 1406 | }, 1407 | "yaxes": [ 1408 | { 1409 | "format": "short", 1410 | "logBase": 1, 1411 | "show": true 1412 | }, 1413 | { 1414 | "format": "short", 1415 | "logBase": 1, 1416 | "show": true 1417 | } 1418 | ], 1419 | "yaxis": { 1420 | "align": false 1421 | } 1422 | }, 1423 | { 1424 | "datasource": { 1425 | "type": "prometheus", 1426 | "uid": "PBFA97CFB590B2093" 1427 | }, 1428 | "fieldConfig": { 1429 | "defaults": { 1430 | "color": { 1431 | "mode": "thresholds" 1432 | }, 1433 | "mappings": [ 1434 | { 1435 | "options": { 1436 | "match": "null", 1437 | "result": { 1438 | "text": "N/A" 1439 | } 1440 | }, 1441 | "type": "special" 1442 | } 1443 | ], 1444 | "max": 100, 1445 | "min": 0, 1446 | "thresholds": { 1447 | "mode": "absolute", 1448 | "steps": [ 1449 | { 1450 | "color": "rgba(245, 54, 54, 0.9)" 1451 | }, 1452 | { 1453 | "color": "rgba(237, 129, 40, 0.89)", 1454 | "value": 10 1455 | }, 1456 | { 1457 | "color": "rgba(50, 172, 45, 0.97)", 1458 | "value": 25 1459 | } 1460 | ] 1461 | }, 1462 | "unit": "percent" 1463 | }, 1464 | "overrides": [] 1465 | }, 1466 | "gridPos": { 1467 | "h": 7, 1468 | "w": 4, 1469 | "x": 20, 1470 | "y": 53 1471 | }, 1472 | "id": 27, 1473 | "links": [], 1474 | "maxDataPoints": 100, 1475 | "options": { 1476 | "orientation": "horizontal", 1477 | "reduceOptions": { 1478 | "calcs": [ 1479 | "mean" 1480 | ], 1481 | "fields": "", 1482 | "values": false 1483 | }, 1484 | "showThresholdLabels": false, 1485 | "showThresholdMarkers": true 1486 | }, 1487 | "pluginVersion": "8.5.5", 1488 | "targets": [ 1489 | { 1490 | "datasource": { 1491 | "type": "prometheus", 1492 | "uid": "PBFA97CFB590B2093" 1493 | }, 1494 | "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 1495 | "format": "time_series", 1496 | "intervalFactor": 2, 1497 | "legendFormat": "", 1498 | "refId": "A", 1499 | "step": 20 1500 | } 1501 | ], 1502 | "title": "Available Disk Space", 1503 | "type": "gauge" 1504 | }, 1505 | { 1506 | "aliasColors": {}, 1507 | "bars": false, 1508 | "dashLength": 10, 1509 | "dashes": false, 1510 | "datasource": { 1511 | "type": "prometheus", 1512 | "uid": "PBFA97CFB590B2093" 1513 | }, 1514 | "decimals": 0, 1515 | "fill": 1, 1516 | "fillGradient": 0, 1517 | "gridPos": { 1518 | "h": 7, 1519 | "w": 12, 1520 | "x": 0, 1521 | "y": 60 1522 | }, 1523 | "hiddenSeries": false, 1524 | "id": 29, 1525 | "legend": { 1526 | "alignAsTable": true, 1527 | "avg": false, 1528 | "current": true, 1529 | "hideEmpty": true, 1530 | "hideZero": true, 1531 | "max": false, 1532 | "min": false, 1533 | "rightSide": true, 1534 | "show": true, 1535 | "sort": "current", 1536 | "sortDesc": true, 1537 | "total": false, 1538 | "values": true 1539 | }, 1540 | "lines": true, 1541 | "linewidth": 1, 1542 | "links": [], 1543 | "nullPointMode": "null", 1544 | "options": { 1545 | "alertThreshold": true 1546 | }, 1547 | "percentage": false, 1548 | "pluginVersion": "8.5.5", 1549 | "pointradius": 5, 1550 | "points": false, 1551 | "renderer": "flot", 1552 | "seriesOverrides": [], 1553 | "spaceLength": 10, 1554 | "stack": false, 1555 | "steppedLine": false, 1556 | "targets": [ 1557 | { 1558 | "expr": "sum(engine_daemon_container_actions_seconds_count * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) by (action)", 1559 | "format": "time_series", 1560 | "intervalFactor": 10, 1561 | "legendFormat": "{{action }}", 1562 | "refId": "A", 1563 | "step": 10 1564 | } 1565 | ], 1566 | "thresholds": [], 1567 | "timeRegions": [], 1568 | "title": "Docker Daemon Container Actions", 1569 | "tooltip": { 1570 | "shared": true, 1571 | "sort": 2, 1572 | "value_type": "individual" 1573 | }, 1574 | "type": "graph", 1575 | "xaxis": { 1576 | "mode": "time", 1577 | "show": true, 1578 | "values": [] 1579 | }, 1580 | "yaxes": [ 1581 | { 1582 | "format": "short", 1583 | "logBase": 1, 1584 | "show": true 1585 | }, 1586 | { 1587 | "format": "short", 1588 | "logBase": 1, 1589 | "show": true 1590 | } 1591 | ], 1592 | "yaxis": { 1593 | "align": false 1594 | } 1595 | }, 1596 | { 1597 | "aliasColors": {}, 1598 | "bars": false, 1599 | "dashLength": 10, 1600 | "dashes": false, 1601 | "datasource": { 1602 | "type": "prometheus", 1603 | "uid": "PBFA97CFB590B2093" 1604 | }, 1605 | "decimals": 0, 1606 | "fill": 1, 1607 | "fillGradient": 0, 1608 | "gridPos": { 1609 | "h": 7, 1610 | "w": 12, 1611 | "x": 12, 1612 | "y": 60 1613 | }, 1614 | "hiddenSeries": false, 1615 | "id": 30, 1616 | "legend": { 1617 | "alignAsTable": true, 1618 | "avg": false, 1619 | "current": true, 1620 | "hideEmpty": true, 1621 | "hideZero": true, 1622 | "max": false, 1623 | "min": false, 1624 | "rightSide": true, 1625 | "show": true, 1626 | "sort": "current", 1627 | "sortDesc": true, 1628 | "total": false, 1629 | "values": true 1630 | }, 1631 | "lines": true, 1632 | "linewidth": 1, 1633 | "links": [], 1634 | "nullPointMode": "null", 1635 | "options": { 1636 | "alertThreshold": true 1637 | }, 1638 | "percentage": false, 1639 | "pluginVersion": "8.5.5", 1640 | "pointradius": 5, 1641 | "points": false, 1642 | "renderer": "flot", 1643 | "seriesOverrides": [], 1644 | "spaceLength": 10, 1645 | "stack": false, 1646 | "steppedLine": false, 1647 | "targets": [ 1648 | { 1649 | "expr": "sum(engine_daemon_network_actions_seconds_count * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) by (action)", 1650 | "format": "time_series", 1651 | "intervalFactor": 10, 1652 | "legendFormat": "{{action }}", 1653 | "refId": "A", 1654 | "step": 10 1655 | } 1656 | ], 1657 | "thresholds": [], 1658 | "timeRegions": [], 1659 | "title": "Docker Daemon Network Actions", 1660 | "tooltip": { 1661 | "shared": true, 1662 | "sort": 2, 1663 | "value_type": "individual" 1664 | }, 1665 | "type": "graph", 1666 | "xaxis": { 1667 | "mode": "time", 1668 | "show": true, 1669 | "values": [] 1670 | }, 1671 | "yaxes": [ 1672 | { 1673 | "format": "short", 1674 | "logBase": 1, 1675 | "show": true 1676 | }, 1677 | { 1678 | "format": "short", 1679 | "logBase": 1, 1680 | "show": true 1681 | } 1682 | ], 1683 | "yaxis": { 1684 | "align": false 1685 | } 1686 | }, 1687 | { 1688 | "columns": [ 1689 | { 1690 | "$$hashKey": "object:71", 1691 | "text": "Avg", 1692 | "value": "avg" 1693 | } 1694 | ], 1695 | "datasource": { 1696 | "type": "prometheus", 1697 | "uid": "PBFA97CFB590B2093" 1698 | }, 1699 | "fontSize": "100%", 1700 | "gridPos": { 1701 | "h": 7, 1702 | "w": 24, 1703 | "x": 0, 1704 | "y": 67 1705 | }, 1706 | "hideTimeOverride": true, 1707 | "id": 28, 1708 | "links": [], 1709 | "scroll": true, 1710 | "showHeader": true, 1711 | "sort": { 1712 | "col": 0, 1713 | "desc": true 1714 | }, 1715 | "styles": [ 1716 | { 1717 | "alias": "Time", 1718 | "align": "auto", 1719 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1720 | "pattern": "Time", 1721 | "type": "hidden" 1722 | }, 1723 | { 1724 | "alias": "", 1725 | "align": "auto", 1726 | "colors": [ 1727 | "rgba(245, 54, 54, 0.9)", 1728 | "rgba(237, 129, 40, 0.89)", 1729 | "rgba(50, 172, 45, 0.97)" 1730 | ], 1731 | "decimals": 2, 1732 | "pattern": "/.*/", 1733 | "thresholds": [], 1734 | "type": "number", 1735 | "unit": "short" 1736 | } 1737 | ], 1738 | "targets": [ 1739 | { 1740 | "datasource": { 1741 | "type": "prometheus", 1742 | "uid": "PBFA97CFB590B2093" 1743 | }, 1744 | "expr": "sum(engine_daemon_engine_info * on(instance) group_left(node_id) swarm_node_info) by (kernel, os, graphdriver, version, node_id)", 1745 | "format": "table", 1746 | "instant": true, 1747 | "intervalFactor": 2, 1748 | "legendFormat": "", 1749 | "refId": "A", 1750 | "step": 2 1751 | } 1752 | ], 1753 | "timeFrom": "1s", 1754 | "title": "Docker Engine Info", 1755 | "transform": "timeseries_to_rows", 1756 | "type": "table-old" 1757 | } 1758 | ], 1759 | "refresh": false, 1760 | "schemaVersion": 36, 1761 | "style": "dark", 1762 | "tags": [ 1763 | "swarmprom" 1764 | ], 1765 | "templating": { 1766 | "list": [ 1767 | { 1768 | "allValue": ".+", 1769 | "current": { 1770 | "selected": false, 1771 | "text": "All", 1772 | "value": "$__all" 1773 | }, 1774 | "datasource": { 1775 | "type": "prometheus", 1776 | "uid": "PBFA97CFB590B2093" 1777 | }, 1778 | "definition": "", 1779 | "hide": 0, 1780 | "includeAll": true, 1781 | "label": "Swarm Node", 1782 | "multi": false, 1783 | "name": "node_id", 1784 | "options": [], 1785 | "query": { 1786 | "query": "node_meta", 1787 | "refId": "Prometheus-node_id-Variable-Query" 1788 | }, 1789 | "refresh": 2, 1790 | "regex": "/node_id=\"([^\"]+)\"/", 1791 | "skipUrlSync": false, 1792 | "sort": 0, 1793 | "type": "query" 1794 | }, 1795 | { 1796 | "auto": true, 1797 | "auto_count": 30, 1798 | "auto_min": "30s", 1799 | "current": { 1800 | "selected": false, 1801 | "text": "auto", 1802 | "value": "$__auto_interval_interval" 1803 | }, 1804 | "hide": 0, 1805 | "label": "Interval", 1806 | "name": "interval", 1807 | "options": [ 1808 | { 1809 | "selected": true, 1810 | "text": "auto", 1811 | "value": "$__auto_interval_interval" 1812 | }, 1813 | { 1814 | "selected": false, 1815 | "text": "1m", 1816 | "value": "1m" 1817 | }, 1818 | { 1819 | "selected": false, 1820 | "text": "10m", 1821 | "value": "10m" 1822 | }, 1823 | { 1824 | "selected": false, 1825 | "text": "30m", 1826 | "value": "30m" 1827 | }, 1828 | { 1829 | "selected": false, 1830 | "text": "1h", 1831 | "value": "1h" 1832 | }, 1833 | { 1834 | "selected": false, 1835 | "text": "6h", 1836 | "value": "6h" 1837 | }, 1838 | { 1839 | "selected": false, 1840 | "text": "12h", 1841 | "value": "12h" 1842 | }, 1843 | { 1844 | "selected": false, 1845 | "text": "1d", 1846 | "value": "1d" 1847 | }, 1848 | { 1849 | "selected": false, 1850 | "text": "7d", 1851 | "value": "7d" 1852 | }, 1853 | { 1854 | "selected": false, 1855 | "text": "14d", 1856 | "value": "14d" 1857 | }, 1858 | { 1859 | "selected": false, 1860 | "text": "30d", 1861 | "value": "30d" 1862 | } 1863 | ], 1864 | "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 1865 | "queryValue": "", 1866 | "refresh": 2, 1867 | "skipUrlSync": false, 1868 | "type": "interval" 1869 | } 1870 | ] 1871 | }, 1872 | "time": { 1873 | "from": "now-30m", 1874 | "to": "now" 1875 | }, 1876 | "timepicker": { 1877 | "refresh_intervals": [ 1878 | "5s", 1879 | "10s", 1880 | "30s", 1881 | "1m", 1882 | "5m", 1883 | "15m", 1884 | "30m", 1885 | "1h", 1886 | "2h", 1887 | "1d" 1888 | ], 1889 | "time_options": [ 1890 | "5m", 1891 | "15m", 1892 | "1h", 1893 | "6h", 1894 | "12h", 1895 | "24h", 1896 | "2d", 1897 | "7d", 1898 | "30d" 1899 | ] 1900 | }, 1901 | "timezone": "", 1902 | "title": "Docker Swarm Services", 1903 | "uid": "zr_baSRmk", 1904 | "version": 1, 1905 | "weekStart": "" 1906 | } -------------------------------------------------------------------------------- /02_monitoring/configs/grafana/conf/dashboards/nodes-dash.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "datasource", 8 | "uid": "grafana" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "description": "Docker Swarm nodes metrics", 25 | "editable": true, 26 | "fiscalYearStartMonth": 0, 27 | "graphTooltip": 0, 28 | "iteration": 1655488639528, 29 | "links": [], 30 | "liveNow": false, 31 | "panels": [ 32 | { 33 | "datasource": { 34 | "type": "prometheus", 35 | "uid": "PBFA97CFB590B2093" 36 | }, 37 | "fieldConfig": { 38 | "defaults": { 39 | "color": { 40 | "mode": "thresholds" 41 | }, 42 | "decimals": 1, 43 | "mappings": [ 44 | { 45 | "options": { 46 | "match": "null", 47 | "result": { 48 | "text": "N/A" 49 | } 50 | }, 51 | "type": "special" 52 | } 53 | ], 54 | "thresholds": { 55 | "mode": "absolute", 56 | "steps": [ 57 | { 58 | "color": "green", 59 | "value": null 60 | }, 61 | { 62 | "color": "red", 63 | "value": 80 64 | } 65 | ] 66 | }, 67 | "unit": "s" 68 | }, 69 | "overrides": [] 70 | }, 71 | "gridPos": { 72 | "h": 4, 73 | "w": 6, 74 | "x": 0, 75 | "y": 0 76 | }, 77 | "hideTimeOverride": true, 78 | "id": 2, 79 | "links": [], 80 | "maxDataPoints": 100, 81 | "options": { 82 | "colorMode": "none", 83 | "graphMode": "none", 84 | "justifyMode": "auto", 85 | "orientation": "horizontal", 86 | "reduceOptions": { 87 | "calcs": [ 88 | "mean" 89 | ], 90 | "fields": "", 91 | "values": false 92 | }, 93 | "textMode": "auto" 94 | }, 95 | "pluginVersion": "8.5.5", 96 | "targets": [ 97 | { 98 | "expr": "topk(1, sum((node_time_seconds - node_boot_time_seconds) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name))", 99 | "format": "time_series", 100 | "intervalFactor": 2, 101 | "legendFormat": "", 102 | "refId": "A", 103 | "step": 2 104 | } 105 | ], 106 | "timeFrom": "1m", 107 | "title": "Uptime", 108 | "type": "stat" 109 | }, 110 | { 111 | "datasource": { 112 | "type": "prometheus", 113 | "uid": "PBFA97CFB590B2093" 114 | }, 115 | "fieldConfig": { 116 | "defaults": { 117 | "color": { 118 | "mode": "thresholds" 119 | }, 120 | "decimals": 0, 121 | "mappings": [ 122 | { 123 | "options": { 124 | "match": "null", 125 | "result": { 126 | "text": "N/A" 127 | } 128 | }, 129 | "type": "special" 130 | } 131 | ], 132 | "thresholds": { 133 | "mode": "absolute", 134 | "steps": [ 135 | { 136 | "color": "green", 137 | "value": null 138 | }, 139 | { 140 | "color": "red", 141 | "value": 80 142 | } 143 | ] 144 | }, 145 | "unit": "none" 146 | }, 147 | "overrides": [] 148 | }, 149 | "gridPos": { 150 | "h": 4, 151 | "w": 6, 152 | "x": 6, 153 | "y": 0 154 | }, 155 | "id": 1, 156 | "links": [], 157 | "maxDataPoints": 100, 158 | "options": { 159 | "colorMode": "none", 160 | "graphMode": "none", 161 | "justifyMode": "auto", 162 | "orientation": "horizontal", 163 | "reduceOptions": { 164 | "calcs": [ 165 | "mean" 166 | ], 167 | "fields": "", 168 | "values": false 169 | }, 170 | "textMode": "auto" 171 | }, 172 | "pluginVersion": "8.5.5", 173 | "targets": [ 174 | { 175 | "expr": "count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 176 | "format": "time_series", 177 | "intervalFactor": 2, 178 | "legendFormat": "", 179 | "refId": "A", 180 | "step": 20 181 | } 182 | ], 183 | "title": "Nodes", 184 | "type": "stat" 185 | }, 186 | { 187 | "datasource": { 188 | "type": "prometheus", 189 | "uid": "PBFA97CFB590B2093" 190 | }, 191 | "fieldConfig": { 192 | "defaults": { 193 | "color": { 194 | "mode": "thresholds" 195 | }, 196 | "decimals": 0, 197 | "mappings": [ 198 | { 199 | "options": { 200 | "match": "null", 201 | "result": { 202 | "text": "N/A" 203 | } 204 | }, 205 | "type": "special" 206 | } 207 | ], 208 | "thresholds": { 209 | "mode": "absolute", 210 | "steps": [ 211 | { 212 | "color": "green", 213 | "value": null 214 | }, 215 | { 216 | "color": "red", 217 | "value": 80 218 | } 219 | ] 220 | }, 221 | "unit": "short" 222 | }, 223 | "overrides": [] 224 | }, 225 | "gridPos": { 226 | "h": 4, 227 | "w": 6, 228 | "x": 12, 229 | "y": 0 230 | }, 231 | "hideTimeOverride": true, 232 | "id": 4, 233 | "links": [], 234 | "maxDataPoints": 100, 235 | "options": { 236 | "colorMode": "none", 237 | "graphMode": "none", 238 | "justifyMode": "auto", 239 | "orientation": "horizontal", 240 | "reduceOptions": { 241 | "calcs": [ 242 | "mean" 243 | ], 244 | "fields": "", 245 | "values": false 246 | }, 247 | "textMode": "auto" 248 | }, 249 | "pluginVersion": "8.5.5", 250 | "targets": [ 251 | { 252 | "expr": "count(node_cpu_seconds_total{mode=\"idle\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 253 | "format": "time_series", 254 | "intervalFactor": 2, 255 | "legendFormat": "", 256 | "refId": "A", 257 | "step": 2 258 | } 259 | ], 260 | "timeFrom": "1m", 261 | "title": "CPUs", 262 | "type": "stat" 263 | }, 264 | { 265 | "datasource": { 266 | "type": "prometheus", 267 | "uid": "PBFA97CFB590B2093" 268 | }, 269 | "fieldConfig": { 270 | "defaults": { 271 | "color": { 272 | "mode": "thresholds" 273 | }, 274 | "mappings": [ 275 | { 276 | "options": { 277 | "match": "null", 278 | "result": { 279 | "text": "N/A" 280 | } 281 | }, 282 | "type": "special" 283 | } 284 | ], 285 | "max": 100, 286 | "min": 0, 287 | "thresholds": { 288 | "mode": "absolute", 289 | "steps": [ 290 | { 291 | "color": "rgba(245, 54, 54, 0.9)", 292 | "value": null 293 | }, 294 | { 295 | "color": "rgba(237, 129, 40, 0.89)", 296 | "value": 10 297 | }, 298 | { 299 | "color": "rgba(50, 172, 45, 0.97)", 300 | "value": 25 301 | } 302 | ] 303 | }, 304 | "unit": "percent" 305 | }, 306 | "overrides": [] 307 | }, 308 | "gridPos": { 309 | "h": 4, 310 | "w": 6, 311 | "x": 18, 312 | "y": 0 313 | }, 314 | "hideTimeOverride": true, 315 | "id": 11, 316 | "links": [], 317 | "maxDataPoints": 100, 318 | "options": { 319 | "orientation": "horizontal", 320 | "reduceOptions": { 321 | "calcs": [ 322 | "mean" 323 | ], 324 | "fields": "", 325 | "values": false 326 | }, 327 | "showThresholdLabels": false, 328 | "showThresholdMarkers": true 329 | }, 330 | "pluginVersion": "8.5.5", 331 | "targets": [ 332 | { 333 | "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", 334 | "format": "time_series", 335 | "intervalFactor": 2, 336 | "legendFormat": "", 337 | "refId": "A", 338 | "step": 2 339 | } 340 | ], 341 | "timeFrom": "1m", 342 | "title": "CPU Idle", 343 | "type": "gauge" 344 | }, 345 | { 346 | "aliasColors": {}, 347 | "bars": false, 348 | "dashLength": 10, 349 | "dashes": false, 350 | "datasource": { 351 | "type": "prometheus", 352 | "uid": "PBFA97CFB590B2093" 353 | }, 354 | "decimals": 2, 355 | "fill": 1, 356 | "fillGradient": 0, 357 | "gridPos": { 358 | "h": 7, 359 | "w": 12, 360 | "x": 0, 361 | "y": 4 362 | }, 363 | "hiddenSeries": false, 364 | "id": 13, 365 | "legend": { 366 | "alignAsTable": true, 367 | "avg": true, 368 | "current": true, 369 | "hideEmpty": false, 370 | "hideZero": false, 371 | "max": true, 372 | "min": true, 373 | "rightSide": true, 374 | "show": false, 375 | "total": false, 376 | "values": true 377 | }, 378 | "lines": true, 379 | "linewidth": 1, 380 | "links": [], 381 | "nullPointMode": "null", 382 | "options": { 383 | "alertThreshold": true 384 | }, 385 | "percentage": false, 386 | "pluginVersion": "8.5.5", 387 | "pointradius": 5, 388 | "points": false, 389 | "renderer": "flot", 390 | "seriesOverrides": [], 391 | "spaceLength": 10, 392 | "stack": false, 393 | "steppedLine": false, 394 | "targets": [ 395 | { 396 | "expr": "node_load5 * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}", 397 | "format": "time_series", 398 | "intervalFactor": 2, 399 | "legendFormat": "load5 {{node_name}}", 400 | "refId": "A", 401 | "step": 2 402 | } 403 | ], 404 | "thresholds": [], 405 | "timeRegions": [], 406 | "title": "System Load by Node", 407 | "tooltip": { 408 | "shared": true, 409 | "sort": 2, 410 | "value_type": "individual" 411 | }, 412 | "type": "graph", 413 | "xaxis": { 414 | "mode": "time", 415 | "show": true, 416 | "values": [] 417 | }, 418 | "yaxes": [ 419 | { 420 | "format": "short", 421 | "logBase": 1, 422 | "show": true 423 | }, 424 | { 425 | "format": "short", 426 | "logBase": 1, 427 | "show": true 428 | } 429 | ], 430 | "yaxis": { 431 | "align": false 432 | } 433 | }, 434 | { 435 | "aliasColors": {}, 436 | "bars": false, 437 | "dashLength": 10, 438 | "dashes": false, 439 | "datasource": { 440 | "type": "prometheus", 441 | "uid": "PBFA97CFB590B2093" 442 | }, 443 | "decimals": 2, 444 | "fill": 1, 445 | "fillGradient": 0, 446 | "gridPos": { 447 | "h": 7, 448 | "w": 12, 449 | "x": 12, 450 | "y": 4 451 | }, 452 | "hiddenSeries": false, 453 | "id": 14, 454 | "legend": { 455 | "alignAsTable": true, 456 | "avg": true, 457 | "current": true, 458 | "hideEmpty": true, 459 | "hideZero": true, 460 | "max": true, 461 | "min": true, 462 | "rightSide": true, 463 | "show": false, 464 | "total": false, 465 | "values": true 466 | }, 467 | "lines": true, 468 | "linewidth": 1, 469 | "links": [], 470 | "nullPointMode": "null as zero", 471 | "options": { 472 | "alertThreshold": true 473 | }, 474 | "percentage": false, 475 | "pluginVersion": "8.5.5", 476 | "pointradius": 5, 477 | "points": false, 478 | "renderer": "flot", 479 | "seriesOverrides": [], 480 | "spaceLength": 10, 481 | "stack": false, 482 | "steppedLine": false, 483 | "targets": [ 484 | { 485 | "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", 486 | "format": "time_series", 487 | "intervalFactor": 2, 488 | "legendFormat": "{{node_name}}", 489 | "refId": "A", 490 | "step": 2 491 | } 492 | ], 493 | "thresholds": [], 494 | "timeRegions": [], 495 | "title": "CPU Usage by Node", 496 | "tooltip": { 497 | "shared": true, 498 | "sort": 2, 499 | "value_type": "individual" 500 | }, 501 | "type": "graph", 502 | "xaxis": { 503 | "mode": "time", 504 | "show": true, 505 | "values": [] 506 | }, 507 | "yaxes": [ 508 | { 509 | "format": "percent", 510 | "logBase": 1, 511 | "max": "100", 512 | "show": true 513 | }, 514 | { 515 | "format": "short", 516 | "logBase": 1, 517 | "show": true 518 | } 519 | ], 520 | "yaxis": { 521 | "align": false 522 | } 523 | }, 524 | { 525 | "datasource": { 526 | "type": "prometheus", 527 | "uid": "PBFA97CFB590B2093" 528 | }, 529 | "fieldConfig": { 530 | "defaults": { 531 | "color": { 532 | "mode": "thresholds" 533 | }, 534 | "decimals": 1, 535 | "mappings": [ 536 | { 537 | "options": { 538 | "match": "null", 539 | "result": { 540 | "text": "N/A" 541 | } 542 | }, 543 | "type": "special" 544 | } 545 | ], 546 | "thresholds": { 547 | "mode": "absolute", 548 | "steps": [ 549 | { 550 | "color": "green", 551 | "value": null 552 | }, 553 | { 554 | "color": "red", 555 | "value": 80 556 | } 557 | ] 558 | }, 559 | "unit": "decbytes" 560 | }, 561 | "overrides": [] 562 | }, 563 | "gridPos": { 564 | "h": 4, 565 | "w": 3, 566 | "x": 0, 567 | "y": 11 568 | }, 569 | "hideTimeOverride": true, 570 | "id": 3, 571 | "links": [], 572 | "maxDataPoints": 100, 573 | "options": { 574 | "colorMode": "none", 575 | "graphMode": "none", 576 | "justifyMode": "auto", 577 | "orientation": "horizontal", 578 | "reduceOptions": { 579 | "calcs": [ 580 | "mean" 581 | ], 582 | "fields": "", 583 | "values": false 584 | }, 585 | "textMode": "auto" 586 | }, 587 | "pluginVersion": "8.5.5", 588 | "targets": [ 589 | { 590 | "expr": "sum(node_memory_MemTotal_bytes * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 591 | "format": "time_series", 592 | "intervalFactor": 2, 593 | "legendFormat": "", 594 | "refId": "A", 595 | "step": 20 596 | } 597 | ], 598 | "title": "Total Memory", 599 | "type": "stat" 600 | }, 601 | { 602 | "datasource": { 603 | "type": "prometheus", 604 | "uid": "PBFA97CFB590B2093" 605 | }, 606 | "fieldConfig": { 607 | "defaults": { 608 | "color": { 609 | "mode": "thresholds" 610 | }, 611 | "mappings": [ 612 | { 613 | "options": { 614 | "match": "null", 615 | "result": { 616 | "text": "N/A" 617 | } 618 | }, 619 | "type": "special" 620 | } 621 | ], 622 | "max": 100, 623 | "min": 0, 624 | "thresholds": { 625 | "mode": "absolute", 626 | "steps": [ 627 | { 628 | "color": "rgba(245, 54, 54, 0.9)", 629 | "value": null 630 | }, 631 | { 632 | "color": "rgba(237, 129, 40, 0.89)", 633 | "value": 10 634 | }, 635 | { 636 | "color": "rgba(50, 172, 45, 0.97)", 637 | "value": 25 638 | } 639 | ] 640 | }, 641 | "unit": "percent" 642 | }, 643 | "overrides": [] 644 | }, 645 | "gridPos": { 646 | "h": 4, 647 | "w": 4, 648 | "x": 3, 649 | "y": 11 650 | }, 651 | "id": 8, 652 | "links": [], 653 | "maxDataPoints": 100, 654 | "options": { 655 | "orientation": "horizontal", 656 | "reduceOptions": { 657 | "calcs": [ 658 | "mean" 659 | ], 660 | "fields": "", 661 | "values": false 662 | }, 663 | "showThresholdLabels": false, 664 | "showThresholdMarkers": true 665 | }, 666 | "pluginVersion": "8.5.5", 667 | "targets": [ 668 | { 669 | "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 670 | "format": "time_series", 671 | "intervalFactor": 2, 672 | "legendFormat": "", 673 | "refId": "A", 674 | "step": 20 675 | } 676 | ], 677 | "title": "Available Memory", 678 | "type": "gauge" 679 | }, 680 | { 681 | "datasource": { 682 | "type": "prometheus", 683 | "uid": "PBFA97CFB590B2093" 684 | }, 685 | "fieldConfig": { 686 | "defaults": { 687 | "color": { 688 | "mode": "thresholds" 689 | }, 690 | "decimals": 1, 691 | "mappings": [ 692 | { 693 | "options": { 694 | "match": "null", 695 | "result": { 696 | "text": "N/A" 697 | } 698 | }, 699 | "type": "special" 700 | } 701 | ], 702 | "thresholds": { 703 | "mode": "absolute", 704 | "steps": [ 705 | { 706 | "color": "green", 707 | "value": null 708 | }, 709 | { 710 | "color": "red", 711 | "value": 80 712 | } 713 | ] 714 | }, 715 | "unit": "decbytes" 716 | }, 717 | "overrides": [] 718 | }, 719 | "gridPos": { 720 | "h": 4, 721 | "w": 3, 722 | "x": 7, 723 | "y": 11 724 | }, 725 | "hideTimeOverride": true, 726 | "id": 22, 727 | "links": [], 728 | "maxDataPoints": 100, 729 | "options": { 730 | "colorMode": "none", 731 | "graphMode": "none", 732 | "justifyMode": "auto", 733 | "orientation": "horizontal", 734 | "reduceOptions": { 735 | "calcs": [ 736 | "mean" 737 | ], 738 | "fields": "", 739 | "values": false 740 | }, 741 | "textMode": "auto" 742 | }, 743 | "pluginVersion": "8.5.5", 744 | "targets": [ 745 | { 746 | "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 747 | "format": "time_series", 748 | "intervalFactor": 2, 749 | "legendFormat": "", 750 | "refId": "A", 751 | "step": 20 752 | } 753 | ], 754 | "title": "Total swap memory used", 755 | "type": "stat" 756 | }, 757 | { 758 | "datasource": { 759 | "type": "prometheus", 760 | "uid": "PBFA97CFB590B2093" 761 | }, 762 | "fieldConfig": { 763 | "defaults": { 764 | "color": { 765 | "mode": "thresholds" 766 | }, 767 | "mappings": [ 768 | { 769 | "options": { 770 | "match": "null", 771 | "result": { 772 | "text": "N/A" 773 | } 774 | }, 775 | "type": "special" 776 | } 777 | ], 778 | "max": 100, 779 | "min": 0, 780 | "thresholds": { 781 | "mode": "absolute", 782 | "steps": [ 783 | { 784 | "color": "rgba(50, 172, 45, 0.97)", 785 | "value": null 786 | }, 787 | { 788 | "color": "rgba(237, 129, 40, 0.89)", 789 | "value": 5 790 | }, 791 | { 792 | "color": "rgba(245, 54, 54, 0.9)", 793 | "value": 10 794 | } 795 | ] 796 | }, 797 | "unit": "percent" 798 | }, 799 | "overrides": [] 800 | }, 801 | "gridPos": { 802 | "h": 4, 803 | "w": 4, 804 | "x": 10, 805 | "y": 11 806 | }, 807 | "id": 23, 808 | "links": [], 809 | "maxDataPoints": 100, 810 | "options": { 811 | "orientation": "horizontal", 812 | "reduceOptions": { 813 | "calcs": [ 814 | "mean" 815 | ], 816 | "fields": "", 817 | "values": false 818 | }, 819 | "showThresholdLabels": false, 820 | "showThresholdMarkers": true 821 | }, 822 | "pluginVersion": "8.5.5", 823 | "targets": [ 824 | { 825 | "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_SwapTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 826 | "format": "time_series", 827 | "intervalFactor": 2, 828 | "legendFormat": "", 829 | "refId": "A", 830 | "step": 20 831 | } 832 | ], 833 | "title": "Used swap memory", 834 | "type": "gauge" 835 | }, 836 | { 837 | "datasource": { 838 | "type": "prometheus", 839 | "uid": "PBFA97CFB590B2093" 840 | }, 841 | "fieldConfig": { 842 | "defaults": { 843 | "color": { 844 | "mode": "thresholds" 845 | }, 846 | "mappings": [ 847 | { 848 | "options": { 849 | "match": "null", 850 | "result": { 851 | "text": "N/A" 852 | } 853 | }, 854 | "type": "special" 855 | } 856 | ], 857 | "max": 100, 858 | "min": 0, 859 | "thresholds": { 860 | "mode": "absolute", 861 | "steps": [ 862 | { 863 | "color": "rgba(50, 172, 45, 0.97)", 864 | "value": null 865 | }, 866 | { 867 | "color": "rgba(237, 129, 40, 0.89)", 868 | "value": 5 869 | }, 870 | { 871 | "color": "rgba(245, 54, 54, 0.9)", 872 | "value": 10 873 | } 874 | ] 875 | }, 876 | "unit": "percent" 877 | }, 878 | "overrides": [] 879 | }, 880 | "gridPos": { 881 | "h": 4, 882 | "w": 3, 883 | "x": 14, 884 | "y": 11 885 | }, 886 | "id": 24, 887 | "links": [], 888 | "maxDataPoints": 100, 889 | "options": { 890 | "orientation": "horizontal", 891 | "reduceOptions": { 892 | "calcs": [ 893 | "mean" 894 | ], 895 | "fields": "", 896 | "values": false 897 | }, 898 | "showThresholdLabels": false, 899 | "showThresholdMarkers": true 900 | }, 901 | "pluginVersion": "8.5.5", 902 | "targets": [ 903 | { 904 | "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 905 | "format": "time_series", 906 | "intervalFactor": 2, 907 | "legendFormat": "", 908 | "refId": "A", 909 | "step": 20 910 | } 911 | ], 912 | "title": "Swap used / total RAM memory ratio", 913 | "type": "gauge" 914 | }, 915 | { 916 | "datasource": { 917 | "type": "prometheus", 918 | "uid": "PBFA97CFB590B2093" 919 | }, 920 | "fieldConfig": { 921 | "defaults": { 922 | "color": { 923 | "mode": "thresholds" 924 | }, 925 | "decimals": 1, 926 | "mappings": [ 927 | { 928 | "options": { 929 | "match": "null", 930 | "result": { 931 | "text": "N/A" 932 | } 933 | }, 934 | "type": "special" 935 | } 936 | ], 937 | "thresholds": { 938 | "mode": "absolute", 939 | "steps": [ 940 | { 941 | "color": "green", 942 | "value": null 943 | }, 944 | { 945 | "color": "red", 946 | "value": 80 947 | } 948 | ] 949 | }, 950 | "unit": "decbytes" 951 | }, 952 | "overrides": [] 953 | }, 954 | "gridPos": { 955 | "h": 4, 956 | "w": 3, 957 | "x": 17, 958 | "y": 11 959 | }, 960 | "hideTimeOverride": true, 961 | "id": 9, 962 | "links": [], 963 | "maxDataPoints": 100, 964 | "options": { 965 | "colorMode": "none", 966 | "graphMode": "none", 967 | "justifyMode": "auto", 968 | "orientation": "horizontal", 969 | "reduceOptions": { 970 | "calcs": [ 971 | "mean" 972 | ], 973 | "fields": "", 974 | "values": false 975 | }, 976 | "textMode": "auto" 977 | }, 978 | "pluginVersion": "8.5.5", 979 | "targets": [ 980 | { 981 | "datasource": { 982 | "type": "prometheus", 983 | "uid": "PBFA97CFB590B2093" 984 | }, 985 | "expr": "sum(node_filesystem_size_bytes{mountpoint=\"/\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 986 | "format": "time_series", 987 | "intervalFactor": 2, 988 | "legendFormat": "", 989 | "refId": "A", 990 | "step": 20 991 | } 992 | ], 993 | "title": "Total Disk Space", 994 | "type": "stat" 995 | }, 996 | { 997 | "datasource": { 998 | "type": "prometheus", 999 | "uid": "PBFA97CFB590B2093" 1000 | }, 1001 | "fieldConfig": { 1002 | "defaults": { 1003 | "color": { 1004 | "mode": "thresholds" 1005 | }, 1006 | "mappings": [ 1007 | { 1008 | "options": { 1009 | "match": "null", 1010 | "result": { 1011 | "text": "N/A" 1012 | } 1013 | }, 1014 | "type": "special" 1015 | } 1016 | ], 1017 | "max": 100, 1018 | "min": 0, 1019 | "thresholds": { 1020 | "mode": "absolute", 1021 | "steps": [ 1022 | { 1023 | "color": "rgba(245, 54, 54, 0.9)", 1024 | "value": null 1025 | }, 1026 | { 1027 | "color": "rgba(237, 129, 40, 0.89)", 1028 | "value": 10 1029 | }, 1030 | { 1031 | "color": "rgba(50, 172, 45, 0.97)", 1032 | "value": 25 1033 | } 1034 | ] 1035 | }, 1036 | "unit": "percent" 1037 | }, 1038 | "overrides": [] 1039 | }, 1040 | "gridPos": { 1041 | "h": 4, 1042 | "w": 4, 1043 | "x": 20, 1044 | "y": 11 1045 | }, 1046 | "id": 10, 1047 | "links": [], 1048 | "maxDataPoints": 100, 1049 | "options": { 1050 | "orientation": "horizontal", 1051 | "reduceOptions": { 1052 | "calcs": [ 1053 | "mean" 1054 | ], 1055 | "fields": "", 1056 | "values": false 1057 | }, 1058 | "showThresholdLabels": false, 1059 | "showThresholdMarkers": true 1060 | }, 1061 | "pluginVersion": "8.5.5", 1062 | "targets": [ 1063 | { 1064 | "datasource": { 1065 | "type": "prometheus", 1066 | "uid": "PBFA97CFB590B2093" 1067 | }, 1068 | "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 1069 | "format": "time_series", 1070 | "intervalFactor": 2, 1071 | "legendFormat": "", 1072 | "refId": "A", 1073 | "step": 20 1074 | } 1075 | ], 1076 | "title": "Available Disk Space", 1077 | "type": "gauge" 1078 | }, 1079 | { 1080 | "aliasColors": {}, 1081 | "bars": false, 1082 | "dashLength": 10, 1083 | "dashes": false, 1084 | "datasource": { 1085 | "type": "prometheus", 1086 | "uid": "PBFA97CFB590B2093" 1087 | }, 1088 | "fill": 1, 1089 | "fillGradient": 0, 1090 | "gridPos": { 1091 | "h": 7, 1092 | "w": 24, 1093 | "x": 0, 1094 | "y": 15 1095 | }, 1096 | "hiddenSeries": false, 1097 | "id": 15, 1098 | "legend": { 1099 | "alignAsTable": true, 1100 | "avg": true, 1101 | "current": false, 1102 | "max": true, 1103 | "min": true, 1104 | "rightSide": true, 1105 | "show": true, 1106 | "total": false, 1107 | "values": true 1108 | }, 1109 | "lines": true, 1110 | "linewidth": 1, 1111 | "links": [], 1112 | "nullPointMode": "null", 1113 | "options": { 1114 | "alertThreshold": true 1115 | }, 1116 | "percentage": false, 1117 | "pluginVersion": "8.5.5", 1118 | "pointradius": 5, 1119 | "points": false, 1120 | "renderer": "flot", 1121 | "seriesOverrides": [], 1122 | "spaceLength": 10, 1123 | "stack": true, 1124 | "steppedLine": false, 1125 | "targets": [ 1126 | { 1127 | "expr": "sum((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_Slab_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1128 | "format": "time_series", 1129 | "intervalFactor": 2, 1130 | "legendFormat": "Used {{node_name}}", 1131 | "refId": "A", 1132 | "step": 2 1133 | }, 1134 | { 1135 | "expr": "sum(node_memory_Cached * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1136 | "format": "time_series", 1137 | "intervalFactor": 2, 1138 | "legendFormat": "Cached {{node_name}}", 1139 | "refId": "B", 1140 | "step": 2 1141 | } 1142 | ], 1143 | "thresholds": [], 1144 | "timeRegions": [], 1145 | "title": "Memory usage by Node", 1146 | "tooltip": { 1147 | "shared": true, 1148 | "sort": 0, 1149 | "value_type": "individual" 1150 | }, 1151 | "type": "graph", 1152 | "xaxis": { 1153 | "mode": "time", 1154 | "show": true, 1155 | "values": [] 1156 | }, 1157 | "yaxes": [ 1158 | { 1159 | "format": "decbytes", 1160 | "logBase": 1, 1161 | "show": true 1162 | }, 1163 | { 1164 | "format": "short", 1165 | "logBase": 1, 1166 | "show": true 1167 | } 1168 | ], 1169 | "yaxis": { 1170 | "align": false 1171 | } 1172 | }, 1173 | { 1174 | "aliasColors": {}, 1175 | "bars": false, 1176 | "dashLength": 10, 1177 | "dashes": false, 1178 | "datasource": { 1179 | "type": "prometheus", 1180 | "uid": "PBFA97CFB590B2093" 1181 | }, 1182 | "fill": 1, 1183 | "fillGradient": 0, 1184 | "gridPos": { 1185 | "h": 7, 1186 | "w": 24, 1187 | "x": 0, 1188 | "y": 22 1189 | }, 1190 | "hiddenSeries": false, 1191 | "id": 21, 1192 | "legend": { 1193 | "alignAsTable": true, 1194 | "avg": true, 1195 | "current": false, 1196 | "max": true, 1197 | "min": true, 1198 | "rightSide": true, 1199 | "show": true, 1200 | "total": false, 1201 | "values": true 1202 | }, 1203 | "lines": true, 1204 | "linewidth": 1, 1205 | "links": [], 1206 | "nullPointMode": "null", 1207 | "options": { 1208 | "alertThreshold": true 1209 | }, 1210 | "percentage": false, 1211 | "pluginVersion": "8.5.5", 1212 | "pointradius": 5, 1213 | "points": false, 1214 | "renderer": "flot", 1215 | "seriesOverrides": [], 1216 | "spaceLength": 10, 1217 | "stack": true, 1218 | "steppedLine": false, 1219 | "targets": [ 1220 | { 1221 | "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1222 | "format": "time_series", 1223 | "intervalFactor": 2, 1224 | "legendFormat": "Used {{node_name}}", 1225 | "refId": "A", 1226 | "step": 2 1227 | } 1228 | ], 1229 | "thresholds": [], 1230 | "timeRegions": [], 1231 | "title": "Swap memory usage by Node", 1232 | "tooltip": { 1233 | "shared": true, 1234 | "sort": 0, 1235 | "value_type": "individual" 1236 | }, 1237 | "type": "graph", 1238 | "xaxis": { 1239 | "mode": "time", 1240 | "show": true, 1241 | "values": [] 1242 | }, 1243 | "yaxes": [ 1244 | { 1245 | "format": "decbytes", 1246 | "logBase": 1, 1247 | "min": "0", 1248 | "show": true 1249 | }, 1250 | { 1251 | "format": "short", 1252 | "logBase": 1, 1253 | "show": true 1254 | } 1255 | ], 1256 | "yaxis": { 1257 | "align": false 1258 | } 1259 | }, 1260 | { 1261 | "aliasColors": {}, 1262 | "bars": false, 1263 | "dashLength": 10, 1264 | "dashes": false, 1265 | "datasource": { 1266 | "type": "prometheus", 1267 | "uid": "PBFA97CFB590B2093" 1268 | }, 1269 | "decimals": 2, 1270 | "fill": 1, 1271 | "fillGradient": 0, 1272 | "gridPos": { 1273 | "h": 7, 1274 | "w": 24, 1275 | "x": 0, 1276 | "y": 29 1277 | }, 1278 | "hiddenSeries": false, 1279 | "id": 16, 1280 | "legend": { 1281 | "alignAsTable": true, 1282 | "avg": true, 1283 | "current": false, 1284 | "max": true, 1285 | "min": true, 1286 | "rightSide": true, 1287 | "show": true, 1288 | "total": false, 1289 | "values": true 1290 | }, 1291 | "lines": true, 1292 | "linewidth": 1, 1293 | "links": [], 1294 | "nullPointMode": "null as zero", 1295 | "options": { 1296 | "alertThreshold": true 1297 | }, 1298 | "percentage": false, 1299 | "pluginVersion": "8.5.5", 1300 | "pointradius": 5, 1301 | "points": false, 1302 | "renderer": "flot", 1303 | "seriesOverrides": [], 1304 | "spaceLength": 10, 1305 | "stack": false, 1306 | "steppedLine": false, 1307 | "targets": [ 1308 | { 1309 | "expr": "sum(irate(node_disk_read_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1310 | "format": "time_series", 1311 | "intervalFactor": 2, 1312 | "legendFormat": "Read {{node_name}}", 1313 | "refId": "A", 1314 | "step": 2 1315 | }, 1316 | { 1317 | "expr": "sum(irate(node_disk_written_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1318 | "format": "time_series", 1319 | "intervalFactor": 2, 1320 | "legendFormat": "Written {{node_name}}", 1321 | "refId": "B", 1322 | "step": 2 1323 | } 1324 | ], 1325 | "thresholds": [], 1326 | "timeRegions": [], 1327 | "title": "Disk I/O by Node", 1328 | "tooltip": { 1329 | "shared": true, 1330 | "sort": 0, 1331 | "value_type": "individual" 1332 | }, 1333 | "type": "graph", 1334 | "xaxis": { 1335 | "mode": "time", 1336 | "show": true, 1337 | "values": [] 1338 | }, 1339 | "yaxes": [ 1340 | { 1341 | "format": "Bps", 1342 | "logBase": 1, 1343 | "show": true 1344 | }, 1345 | { 1346 | "format": "short", 1347 | "logBase": 1, 1348 | "show": true 1349 | } 1350 | ], 1351 | "yaxis": { 1352 | "align": false 1353 | } 1354 | }, 1355 | { 1356 | "aliasColors": {}, 1357 | "bars": false, 1358 | "dashLength": 10, 1359 | "dashes": false, 1360 | "datasource": { 1361 | "type": "prometheus", 1362 | "uid": "PBFA97CFB590B2093" 1363 | }, 1364 | "decimals": 2, 1365 | "fill": 1, 1366 | "fillGradient": 0, 1367 | "gridPos": { 1368 | "h": 7, 1369 | "w": 12, 1370 | "x": 0, 1371 | "y": 36 1372 | }, 1373 | "hiddenSeries": false, 1374 | "id": 18, 1375 | "legend": { 1376 | "alignAsTable": true, 1377 | "avg": true, 1378 | "current": true, 1379 | "max": true, 1380 | "min": true, 1381 | "rightSide": true, 1382 | "show": false, 1383 | "total": false, 1384 | "values": true 1385 | }, 1386 | "lines": true, 1387 | "linewidth": 1, 1388 | "links": [], 1389 | "nullPointMode": "null as zero", 1390 | "options": { 1391 | "alertThreshold": true 1392 | }, 1393 | "percentage": false, 1394 | "pluginVersion": "8.5.5", 1395 | "pointradius": 5, 1396 | "points": false, 1397 | "renderer": "flot", 1398 | "seriesOverrides": [], 1399 | "spaceLength": 10, 1400 | "stack": false, 1401 | "steppedLine": false, 1402 | "targets": [ 1403 | { 1404 | "expr": "sum(irate(node_disk_reads_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1405 | "format": "time_series", 1406 | "intervalFactor": 2, 1407 | "legendFormat": "Reads {{node_name}}", 1408 | "refId": "A", 1409 | "step": 2 1410 | }, 1411 | { 1412 | "expr": "sum(irate(node_disk_writes_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1413 | "format": "time_series", 1414 | "intervalFactor": 2, 1415 | "legendFormat": "Writes {{node_name}}", 1416 | "refId": "B", 1417 | "step": 2 1418 | } 1419 | ], 1420 | "thresholds": [], 1421 | "timeRegions": [], 1422 | "title": "IOPS by Node", 1423 | "tooltip": { 1424 | "shared": true, 1425 | "sort": 0, 1426 | "value_type": "individual" 1427 | }, 1428 | "type": "graph", 1429 | "xaxis": { 1430 | "mode": "time", 1431 | "show": true, 1432 | "values": [] 1433 | }, 1434 | "yaxes": [ 1435 | { 1436 | "format": "short", 1437 | "logBase": 1, 1438 | "show": true 1439 | }, 1440 | { 1441 | "format": "short", 1442 | "logBase": 1, 1443 | "show": true 1444 | } 1445 | ], 1446 | "yaxis": { 1447 | "align": false 1448 | } 1449 | }, 1450 | { 1451 | "aliasColors": {}, 1452 | "bars": false, 1453 | "dashLength": 10, 1454 | "dashes": false, 1455 | "datasource": { 1456 | "type": "prometheus", 1457 | "uid": "PBFA97CFB590B2093" 1458 | }, 1459 | "decimals": 2, 1460 | "fill": 1, 1461 | "fillGradient": 0, 1462 | "gridPos": { 1463 | "h": 7, 1464 | "w": 12, 1465 | "x": 12, 1466 | "y": 36 1467 | }, 1468 | "hiddenSeries": false, 1469 | "id": 19, 1470 | "legend": { 1471 | "alignAsTable": true, 1472 | "avg": true, 1473 | "current": true, 1474 | "hideEmpty": true, 1475 | "hideZero": true, 1476 | "max": true, 1477 | "min": true, 1478 | "rightSide": true, 1479 | "show": false, 1480 | "total": false, 1481 | "values": true 1482 | }, 1483 | "lines": true, 1484 | "linewidth": 1, 1485 | "links": [], 1486 | "nullPointMode": "null as zero", 1487 | "options": { 1488 | "alertThreshold": true 1489 | }, 1490 | "percentage": false, 1491 | "pluginVersion": "8.5.5", 1492 | "pointradius": 5, 1493 | "points": false, 1494 | "renderer": "flot", 1495 | "seriesOverrides": [], 1496 | "spaceLength": 10, 1497 | "stack": false, 1498 | "steppedLine": false, 1499 | "targets": [ 1500 | { 1501 | "expr": "(avg(irate(node_cpu_seconds_total{mode=\"iowait\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", 1502 | "format": "time_series", 1503 | "intervalFactor": 2, 1504 | "legendFormat": "{{node_name}}", 1505 | "refId": "A", 1506 | "step": 2 1507 | } 1508 | ], 1509 | "thresholds": [], 1510 | "timeRegions": [], 1511 | "title": "CPU IO Wait by Node", 1512 | "tooltip": { 1513 | "shared": true, 1514 | "sort": 2, 1515 | "value_type": "individual" 1516 | }, 1517 | "type": "graph", 1518 | "xaxis": { 1519 | "mode": "time", 1520 | "show": true, 1521 | "values": [] 1522 | }, 1523 | "yaxes": [ 1524 | { 1525 | "format": "percent", 1526 | "logBase": 1, 1527 | "show": true 1528 | }, 1529 | { 1530 | "format": "short", 1531 | "logBase": 1, 1532 | "show": true 1533 | } 1534 | ], 1535 | "yaxis": { 1536 | "align": false 1537 | } 1538 | }, 1539 | { 1540 | "aliasColors": {}, 1541 | "bars": false, 1542 | "dashLength": 10, 1543 | "dashes": false, 1544 | "datasource": { 1545 | "type": "prometheus", 1546 | "uid": "PBFA97CFB590B2093" 1547 | }, 1548 | "decimals": 0, 1549 | "fill": 3, 1550 | "fillGradient": 0, 1551 | "gridPos": { 1552 | "h": 7, 1553 | "w": 18, 1554 | "x": 0, 1555 | "y": 43 1556 | }, 1557 | "hiddenSeries": false, 1558 | "id": 12, 1559 | "legend": { 1560 | "alignAsTable": true, 1561 | "avg": false, 1562 | "current": true, 1563 | "hideEmpty": true, 1564 | "hideZero": true, 1565 | "max": false, 1566 | "min": false, 1567 | "rightSide": true, 1568 | "show": true, 1569 | "sort": "current", 1570 | "sortDesc": true, 1571 | "total": false, 1572 | "values": true 1573 | }, 1574 | "lines": true, 1575 | "linewidth": 1, 1576 | "links": [], 1577 | "nullPointMode": "null", 1578 | "options": { 1579 | "alertThreshold": true 1580 | }, 1581 | "percentage": false, 1582 | "pluginVersion": "8.5.5", 1583 | "pointradius": 5, 1584 | "points": false, 1585 | "renderer": "flot", 1586 | "seriesOverrides": [], 1587 | "spaceLength": 10, 1588 | "stack": true, 1589 | "steppedLine": false, 1590 | "targets": [ 1591 | { 1592 | "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", 1593 | "format": "time_series", 1594 | "intervalFactor": 10, 1595 | "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", 1596 | "refId": "A", 1597 | "step": 10 1598 | } 1599 | ], 1600 | "thresholds": [], 1601 | "timeRegions": [], 1602 | "title": "Running Containers by Service", 1603 | "tooltip": { 1604 | "shared": true, 1605 | "sort": 2, 1606 | "value_type": "individual" 1607 | }, 1608 | "type": "graph", 1609 | "xaxis": { 1610 | "mode": "time", 1611 | "show": true, 1612 | "values": [] 1613 | }, 1614 | "yaxes": [ 1615 | { 1616 | "format": "short", 1617 | "logBase": 1, 1618 | "show": true 1619 | }, 1620 | { 1621 | "format": "short", 1622 | "logBase": 1, 1623 | "show": true 1624 | } 1625 | ], 1626 | "yaxis": { 1627 | "align": false 1628 | } 1629 | }, 1630 | { 1631 | "datasource": { 1632 | "type": "prometheus", 1633 | "uid": "PBFA97CFB590B2093" 1634 | }, 1635 | "fieldConfig": { 1636 | "defaults": { 1637 | "color": { 1638 | "fixedColor": "rgb(31, 120, 193)", 1639 | "mode": "fixed" 1640 | }, 1641 | "mappings": [ 1642 | { 1643 | "options": { 1644 | "match": "null", 1645 | "result": { 1646 | "text": "N/A" 1647 | } 1648 | }, 1649 | "type": "special" 1650 | } 1651 | ], 1652 | "thresholds": { 1653 | "mode": "absolute", 1654 | "steps": [ 1655 | { 1656 | "color": "green", 1657 | "value": null 1658 | }, 1659 | { 1660 | "color": "red", 1661 | "value": 80 1662 | } 1663 | ] 1664 | }, 1665 | "unit": "none" 1666 | }, 1667 | "overrides": [] 1668 | }, 1669 | "gridPos": { 1670 | "h": 7, 1671 | "w": 6, 1672 | "x": 18, 1673 | "y": 43 1674 | }, 1675 | "id": 7, 1676 | "links": [], 1677 | "maxDataPoints": 100, 1678 | "options": { 1679 | "colorMode": "none", 1680 | "graphMode": "area", 1681 | "justifyMode": "auto", 1682 | "orientation": "horizontal", 1683 | "reduceOptions": { 1684 | "calcs": [ 1685 | "mean" 1686 | ], 1687 | "fields": "", 1688 | "values": false 1689 | }, 1690 | "textMode": "auto" 1691 | }, 1692 | "pluginVersion": "8.5.5", 1693 | "targets": [ 1694 | { 1695 | "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", 1696 | "format": "time_series", 1697 | "intervalFactor": 2, 1698 | "refId": "A", 1699 | "step": 20 1700 | } 1701 | ], 1702 | "title": "Total Containers", 1703 | "type": "stat" 1704 | }, 1705 | { 1706 | "aliasColors": {}, 1707 | "bars": false, 1708 | "dashLength": 10, 1709 | "dashes": false, 1710 | "datasource": { 1711 | "type": "prometheus", 1712 | "uid": "PBFA97CFB590B2093" 1713 | }, 1714 | "fill": 1, 1715 | "fillGradient": 0, 1716 | "gridPos": { 1717 | "h": 7, 1718 | "w": 24, 1719 | "x": 0, 1720 | "y": 50 1721 | }, 1722 | "hiddenSeries": false, 1723 | "id": 17, 1724 | "legend": { 1725 | "alignAsTable": true, 1726 | "avg": true, 1727 | "current": false, 1728 | "max": true, 1729 | "min": true, 1730 | "rightSide": true, 1731 | "show": true, 1732 | "total": false, 1733 | "values": true 1734 | }, 1735 | "lines": true, 1736 | "linewidth": 1, 1737 | "links": [], 1738 | "nullPointMode": "null", 1739 | "options": { 1740 | "alertThreshold": true 1741 | }, 1742 | "percentage": false, 1743 | "pluginVersion": "8.5.5", 1744 | "pointradius": 5, 1745 | "points": false, 1746 | "renderer": "flot", 1747 | "seriesOverrides": [], 1748 | "spaceLength": 10, 1749 | "stack": false, 1750 | "steppedLine": false, 1751 | "targets": [ 1752 | { 1753 | "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", 1754 | "format": "time_series", 1755 | "intervalFactor": 2, 1756 | "legendFormat": "IN {{node_name}}", 1757 | "refId": "A", 1758 | "step": 2 1759 | }, 1760 | { 1761 | "expr": "- sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", 1762 | "format": "time_series", 1763 | "hide": false, 1764 | "intervalFactor": 2, 1765 | "legendFormat": "OUT {{node_name}}", 1766 | "metric": "", 1767 | "refId": "B", 1768 | "step": 2 1769 | } 1770 | ], 1771 | "thresholds": [], 1772 | "timeRegions": [], 1773 | "title": "Containers Network Traffic by Node", 1774 | "tooltip": { 1775 | "shared": true, 1776 | "sort": 0, 1777 | "value_type": "individual" 1778 | }, 1779 | "type": "graph", 1780 | "xaxis": { 1781 | "mode": "time", 1782 | "show": true, 1783 | "values": [] 1784 | }, 1785 | "yaxes": [ 1786 | { 1787 | "format": "Bps", 1788 | "logBase": 1, 1789 | "show": true 1790 | }, 1791 | { 1792 | "format": "short", 1793 | "logBase": 1, 1794 | "show": true 1795 | } 1796 | ], 1797 | "yaxis": { 1798 | "align": false 1799 | } 1800 | }, 1801 | { 1802 | "datasource": { 1803 | "type": "prometheus", 1804 | "uid": "PBFA97CFB590B2093" 1805 | }, 1806 | "fieldConfig": { 1807 | "defaults": { 1808 | "color": { 1809 | "mode": "palette-classic" 1810 | }, 1811 | "custom": { 1812 | "axisLabel": "", 1813 | "axisPlacement": "auto", 1814 | "barAlignment": 0, 1815 | "drawStyle": "line", 1816 | "fillOpacity": 0, 1817 | "gradientMode": "none", 1818 | "hideFrom": { 1819 | "legend": false, 1820 | "tooltip": false, 1821 | "viz": false 1822 | }, 1823 | "lineInterpolation": "linear", 1824 | "lineWidth": 1, 1825 | "pointSize": 5, 1826 | "scaleDistribution": { 1827 | "type": "linear" 1828 | }, 1829 | "showPoints": "auto", 1830 | "spanNulls": false, 1831 | "stacking": { 1832 | "group": "A", 1833 | "mode": "none" 1834 | }, 1835 | "thresholdsStyle": { 1836 | "mode": "off" 1837 | } 1838 | }, 1839 | "mappings": [], 1840 | "thresholds": { 1841 | "mode": "absolute", 1842 | "steps": [ 1843 | { 1844 | "color": "green", 1845 | "value": null 1846 | }, 1847 | { 1848 | "color": "red", 1849 | "value": 80 1850 | } 1851 | ] 1852 | } 1853 | }, 1854 | "overrides": [] 1855 | }, 1856 | "gridPos": { 1857 | "h": 8, 1858 | "w": 24, 1859 | "x": 0, 1860 | "y": 57 1861 | }, 1862 | "id": 26, 1863 | "options": { 1864 | "legend": { 1865 | "calcs": [], 1866 | "displayMode": "list", 1867 | "placement": "bottom" 1868 | }, 1869 | "tooltip": { 1870 | "mode": "single", 1871 | "sort": "none" 1872 | } 1873 | }, 1874 | "targets": [ 1875 | { 1876 | "datasource": { 1877 | "type": "prometheus", 1878 | "uid": "PBFA97CFB590B2093" 1879 | }, 1880 | "editorMode": "code", 1881 | "exemplar": false, 1882 | "expr": "min by (device) ((node_filesystem_free_bytes{device=~\"/dev/disk/by-id/scsi-.*HC_Volume_.*\"} / node_filesystem_size_bytes{device=~\"/dev/disk/by-id/scsi-.*HC_Volume_.*\"}))", 1883 | "format": "time_series", 1884 | "legendFormat": "{{label_name}}", 1885 | "range": true, 1886 | "refId": "A" 1887 | } 1888 | ], 1889 | "title": "Hetzner Volume Free Space", 1890 | "type": "timeseries" 1891 | }, 1892 | { 1893 | "columns": [], 1894 | "datasource": { 1895 | "type": "prometheus", 1896 | "uid": "PBFA97CFB590B2093" 1897 | }, 1898 | "fontSize": "100%", 1899 | "gridPos": { 1900 | "h": 7, 1901 | "w": 24, 1902 | "x": 0, 1903 | "y": 65 1904 | }, 1905 | "hideTimeOverride": true, 1906 | "id": 20, 1907 | "links": [], 1908 | "scroll": true, 1909 | "showHeader": true, 1910 | "sort": { 1911 | "col": 0, 1912 | "desc": true 1913 | }, 1914 | "styles": [ 1915 | { 1916 | "alias": "Time", 1917 | "align": "auto", 1918 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1919 | "pattern": "Time", 1920 | "type": "hidden" 1921 | }, 1922 | { 1923 | "alias": "", 1924 | "align": "auto", 1925 | "colors": [ 1926 | "rgba(245, 54, 54, 0.9)", 1927 | "rgba(237, 129, 40, 0.89)", 1928 | "rgba(50, 172, 45, 0.97)" 1929 | ], 1930 | "decimals": 2, 1931 | "pattern": "/.*/", 1932 | "thresholds": [], 1933 | "type": "number", 1934 | "unit": "short" 1935 | } 1936 | ], 1937 | "targets": [ 1938 | { 1939 | "expr": "sum(node_meta) by (node_id, node_name, instance)", 1940 | "format": "table", 1941 | "instant": true, 1942 | "intervalFactor": 2, 1943 | "refId": "A", 1944 | "step": 2 1945 | } 1946 | ], 1947 | "timeFrom": "1s", 1948 | "title": "Cluster members", 1949 | "transform": "table", 1950 | "type": "table-old" 1951 | } 1952 | ], 1953 | "refresh": "30s", 1954 | "schemaVersion": 36, 1955 | "style": "dark", 1956 | "tags": [ 1957 | "swarmprom" 1958 | ], 1959 | "templating": { 1960 | "list": [ 1961 | { 1962 | "allValue": ".+", 1963 | "current": { 1964 | "selected": false, 1965 | "text": "All", 1966 | "value": "$__all" 1967 | }, 1968 | "datasource": { 1969 | "type": "prometheus", 1970 | "uid": "PBFA97CFB590B2093" 1971 | }, 1972 | "definition": "", 1973 | "hide": 0, 1974 | "includeAll": true, 1975 | "label": "Swarm Node", 1976 | "multi": false, 1977 | "name": "node_id", 1978 | "options": [], 1979 | "query": { 1980 | "query": "node_meta", 1981 | "refId": "Prometheus-node_id-Variable-Query" 1982 | }, 1983 | "refresh": 1, 1984 | "regex": "/node_id=\"([^\"]+)\"/", 1985 | "skipUrlSync": false, 1986 | "sort": 0, 1987 | "type": "query" 1988 | }, 1989 | { 1990 | "auto": true, 1991 | "auto_count": 30, 1992 | "auto_min": "30s", 1993 | "current": { 1994 | "selected": false, 1995 | "text": "auto", 1996 | "value": "$__auto_interval_interval" 1997 | }, 1998 | "hide": 0, 1999 | "label": "Interval", 2000 | "name": "interval", 2001 | "options": [ 2002 | { 2003 | "selected": true, 2004 | "text": "auto", 2005 | "value": "$__auto_interval_interval" 2006 | }, 2007 | { 2008 | "selected": false, 2009 | "text": "1m", 2010 | "value": "1m" 2011 | }, 2012 | { 2013 | "selected": false, 2014 | "text": "10m", 2015 | "value": "10m" 2016 | }, 2017 | { 2018 | "selected": false, 2019 | "text": "30m", 2020 | "value": "30m" 2021 | }, 2022 | { 2023 | "selected": false, 2024 | "text": "1h", 2025 | "value": "1h" 2026 | }, 2027 | { 2028 | "selected": false, 2029 | "text": "6h", 2030 | "value": "6h" 2031 | }, 2032 | { 2033 | "selected": false, 2034 | "text": "12h", 2035 | "value": "12h" 2036 | }, 2037 | { 2038 | "selected": false, 2039 | "text": "1d", 2040 | "value": "1d" 2041 | }, 2042 | { 2043 | "selected": false, 2044 | "text": "7d", 2045 | "value": "7d" 2046 | }, 2047 | { 2048 | "selected": false, 2049 | "text": "14d", 2050 | "value": "14d" 2051 | }, 2052 | { 2053 | "selected": false, 2054 | "text": "30d", 2055 | "value": "30d" 2056 | } 2057 | ], 2058 | "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 2059 | "queryValue": "", 2060 | "refresh": 2, 2061 | "skipUrlSync": false, 2062 | "type": "interval" 2063 | } 2064 | ] 2065 | }, 2066 | "time": { 2067 | "from": "now-30m", 2068 | "to": "now" 2069 | }, 2070 | "timepicker": { 2071 | "refresh_intervals": [ 2072 | "5s", 2073 | "10s", 2074 | "30s", 2075 | "1m", 2076 | "5m", 2077 | "15m", 2078 | "30m", 2079 | "1h", 2080 | "2h", 2081 | "1d" 2082 | ], 2083 | "time_options": [ 2084 | "5m", 2085 | "15m", 2086 | "1h", 2087 | "6h", 2088 | "12h", 2089 | "24h", 2090 | "2d", 2091 | "7d", 2092 | "30d" 2093 | ] 2094 | }, 2095 | "timezone": "", 2096 | "title": "Docker Swarm Nodes", 2097 | "uid": "BPlb-Sgik", 2098 | "version": 2, 2099 | "weekStart": "" 2100 | } --------------------------------------------------------------------------------