├── .env ├── .github └── workflows │ └── shell.yml ├── .gitignore ├── README.md ├── deploy.sh ├── deploy_portainer.sh ├── deploy_registry.sh ├── deploy_swarmpit.sh ├── deploy_swarmprom.sh ├── deploy_traefik.sh ├── portainer.yml ├── registry.yml ├── swarmpit.yml ├── swarmprom.yml ├── swarmprom ├── .gitattributes ├── .gitignore ├── .travis.yml ├── alertmanager │ ├── Dockerfile │ └── conf │ │ ├── alertmanager.yml │ │ └── docker-entrypoint.sh ├── dockerd-exporter │ └── Caddyfile ├── grafana │ ├── .dockerignore │ ├── Dockerfile │ ├── dashboards │ │ ├── swarmprom-nodes-dash.json │ │ ├── swarmprom-prometheus-dash.json │ │ └── swarmprom-services-dash.json │ ├── datasources │ │ └── prometheus.yaml │ ├── screens │ │ ├── alertmanager-slack-v2.png │ │ ├── swarmprom-nodes-dash-v3.png │ │ ├── swarmprom-prometheus-dash-v3.png │ │ ├── swarmprom-services-dash-v3.png │ │ ├── unsee.png │ │ ├── weave-scope-hosts-v2.png │ │ └── weave-scope.png │ └── swarmprom_dashboards.yml ├── node-exporter │ ├── Dockerfile │ └── conf │ │ └── docker-entrypoint.sh └── prometheus │ ├── Dockerfile │ ├── conf │ ├── docker-entrypoint.sh │ ├── prometheus.yml │ └── weave-cortex.yml │ └── rules │ ├── swarm_node.rules.yml │ └── swarm_task.rules.yml └── traefik.yml /.env: -------------------------------------------------------------------------------- 1 | # used for Web UI of the base services 2 | UI_DOMAIN=ssl.sre.im 3 | 4 | # http basic auth 5 | USERNAME=admin 6 | # the value was created by: openssl passwd -apr1 yourPASSWORD 7 | HASHED_PASSWORD=$apr1$XwGg3UUb$/BLwwEU/V0llXX6NKXFki0 8 | 9 | # traefik proxy shared network 10 | TRAEFIK_NETWORK=traefik-public 11 | 12 | COMMON_IP_WHITELIST=127.0.0.1/32, 192.168.0.0/16, 10.0.0.0/8, 172.16.0.0/12 13 | -------------------------------------------------------------------------------- /.github/workflows/shell.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Shell 3 | 4 | on: 5 | push: 6 | branches: [master] 7 | pull_request: 8 | branches: [master] 9 | 10 | jobs: 11 | shellcheck: 12 | name: Shellcheck 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@master 16 | - name: Run ShellCheck 17 | uses: ludeeus/action-shellcheck@master 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | rsync.sh 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Name 2 | ==== 3 | 4 | `docker-swarm-infras` - Docker Swarm Mode Infrastructrues Setup. 5 | 6 | 7 | Description 8 | =========== 9 | 10 | Integrated Traefik、Consul、Prometheus、Grafana、Swarmpit、Portainer and some other useful open source products 11 | into the Docker Swarm Mode cluster selectively by some shell scripts and docker-compose files, 12 | and then setup a productively container cloud platform. 13 | 14 | 15 | Deployment 16 | ========== 17 | 18 | ## Swarm mode cluster 19 | 20 | Create Swarm mode cluster first refer to: 21 | ``` 22 | https://docs.docker.com/engine/swarm/swarm-tutorial/create-swarm/ 23 | ``` 24 | 25 | ## Deploy all infra stacks at one time 26 | 27 | ```bash 28 | ./deploy.sh 29 | ``` 30 | 31 | Or you can deploy one by one as follows. 32 | 33 | ## Traefik & Consul 34 | 35 | Set up Traefik as a glabal load balancer/proxy and Consul to store configurations and HTTPS certificates. 36 | 37 | ### Deploy traefik-consul stack in one manager node 38 | 39 | ```bash 40 | ./deploy_traefik.sh 41 | ``` 42 | 43 | Then execute some commands to check: 44 | ```bash 45 | # all nodes in swarm mode cluster 46 | docker node ls 47 | 48 | # all stacks in swarm mode cluster 49 | docker stack ls 50 | 51 | # all services in swarm mode cluster 52 | docker service ls 53 | 54 | # services in one stack 55 | docker stack services traefik-consul 56 | 57 | # tasks in one stack 58 | docker stack ps traefik-consul 59 | 60 | # tasks in one service 61 | docker service ps traefik-consul_traefik 62 | 63 | # tasks in someone node 64 | docker node ps self/node_id/node_hostname 65 | 66 | # logs of a service 67 | docker service logs traefik-consul_traefik -f 68 | 69 | # logs of a task/container 70 | docker container logs traefik-consul_traefik.2.8bn0pn4jg2c0y2bu94sftj12l 71 | ``` 72 | 73 | Relevant destroy commands: 74 | ```bash 75 | # destroy a stack 76 | docker stack rm traefik-consul 77 | 78 | # destroy a service 79 | docker service rm traefik-consul_traefik 80 | 81 | # destroy a task/container will have no effect in actual, 82 | # cos the task/container will auto startup immediately. 83 | docker container rm -f traefik-consul_traefik.2.8bn0pn4jg2c0y2bu94sftj12l 84 | ``` 85 | 86 | If traefik.yml or .env variables have been changed, just execute the shell script again, 87 | and the stack will be updated automaticly: 88 | ``` 89 | ./depoly_traefik.sh 90 | ``` 91 | 92 | ### Put your domain cert and key into Consul 93 | 94 | ```bash 95 | # cert 96 | docker container exec -it traefik-consul_consul-leader... consul kv put traefik/tls/certificates/wildcard.$UI_DOMAIN/certFile "your cert content" 97 | # key 98 | docker container exec -it traefik-consul_consul-leader... consul kv put traefik/tls/certificates/wildcard.$UI_DOMAIN/keyFile "your key content" 99 | ``` 100 | 101 | > https://www.consul.io/docs/commands/kv 102 | 103 | ### Browser 104 | ``` 105 | https://traefik.$UI_DOMAIN 106 | https://consul.$UI_DOMAIN 107 | ``` 108 | 109 | ## Registry 110 | 111 | Simple private image registry. 112 | 113 | ### Deploy 114 | 115 | ```bash 116 | ./deploy_registry.sh 117 | ``` 118 | 119 | ### Browser 120 | ``` 121 | https://reg.$UI_DOMAIN/v2/ 122 | ``` 123 | 124 | ## Swarmprom (Prometheus & Grafana & Unsee & Alertmanager) 125 | 126 | Swarmprom is actually just a set of tools pre-configured in a smart way for a Docker Swarm cluster. 127 | 128 | ### Deploy 129 | 130 | ```bash 131 | ./deploy_swarmprom.sh 132 | ``` 133 | 134 | ### Browser 135 | ``` 136 | https://grafana.$UI_DOMAIN 137 | https://alertmanager.$UI_DOMAIN 138 | https://unsee.$UI_DOMAIN 139 | https://prometheus.$UI_DOMAIN 140 | ``` 141 | 142 | ## Swarmpit 143 | 144 | Swarmpit provides a nice and clean way to manage your Docker Swarm cluster. 145 | 146 | ### Depoly 147 | 148 | ```bash 149 | ./deploy_swarmpit.sh 150 | ``` 151 | 152 | ### Browser 153 | ``` 154 | https://swarmpit.$UI_DOMAIN 155 | ``` 156 | 157 | ## Portainer 158 | 159 | Portainer is a web UI (user interface) that allows you to see the state of your Docker services in a Docker Swarm mode cluster and manage it. 160 | 161 | ### Deploy 162 | 163 | ```bash 164 | ./deploy_portainer.sh 165 | ``` 166 | 167 | ### Browser 168 | ``` 169 | https://portainer.$UI_DOMAIN 170 | ``` 171 | 172 | References 173 | ========== 174 | 175 | https://dockerswarm.rocks/ 176 | 177 | https://docs.traefik.io/ 178 | 179 | https://github.com/stefanprodan/swarmprom 180 | 181 | https://github.com/swarmpit/swarmpit 182 | 183 | https://github.com/portainer/portainer 184 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # deploy all 3 | 4 | set -e 5 | 6 | 7 | ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 8 | cd "$ROOT_DIR" || exit 1 9 | 10 | echo "Start deploying Traefik ..." 11 | ./deploy_traefik.sh 12 | 13 | echo "Start deploying Registry ..." 14 | ./deploy_registry.sh 15 | 16 | echo "Start deploying Swarmprom ..." 17 | ./deploy_swarmprom.sh 18 | 19 | echo "Start deploying Swarmpit ..." 20 | ./deploy_swarmpit.sh 21 | 22 | echo "Start deploying Portainer ..." 23 | ./deploy_portainer.sh 24 | -------------------------------------------------------------------------------- /deploy_portainer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | 4 | set -e 5 | 6 | 7 | IFS=$'\n' 8 | while read -r var;do 9 | # shellcheck disable=SC2163 10 | export "$var" 11 | done < <(grep -Ev '^#|^$' .env) 12 | 13 | 14 | export STACK_NAME=portainer 15 | # shellcheck disable=SC2155 16 | export NODE_ID=$(docker info -f '{{.Swarm.NodeID}}') 17 | 18 | docker node update --label-add $STACK_NAME.portainer-data=true "$NODE_ID" 19 | 20 | sed -i "s/traefik-public/$TRAEFIK_NETWORK/" portainer.yml 21 | docker stack deploy -c portainer.yml $STACK_NAME 22 | 23 | 24 | echo "Next access follows in browser: 25 | https://portainer.$UI_DOMAIN 26 | " 27 | -------------------------------------------------------------------------------- /deploy_registry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | 4 | set -e 5 | 6 | 7 | IFS=$'\n' 8 | while read -r var;do 9 | # shellcheck disable=SC2163 10 | export "$var" 11 | done < <(grep -Ev '^#|^$' .env) 12 | 13 | 14 | export STACK_NAME=registry 15 | # shellcheck disable=SC2155 16 | export NODE_ID=$(docker info -f '{{.Swarm.NodeID}}') 17 | 18 | 19 | docker node update --label-add ${STACK_NAME}.image-data=true "$NODE_ID" 20 | 21 | sed -i "s/traefik-public/$TRAEFIK_NETWORK/" registry.yml 22 | docker stack deploy -c registry.yml $STACK_NAME 23 | 24 | 25 | echo "Next access follows in browser: 26 | https://reg.$UI_DOMAIN/v2 27 | " 28 | -------------------------------------------------------------------------------- /deploy_swarmpit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | 4 | set -e 5 | 6 | 7 | IFS=$'\n' 8 | while read -r var;do 9 | # shellcheck disable=SC2163 10 | export "$var" 11 | done < <(grep -Ev '^#|^$' .env) 12 | 13 | 14 | export STACK_NAME=swarmpit 15 | # shellcheck disable=SC2155 16 | export NODE_ID=$(docker info -f '{{.Swarm.NodeID}}') 17 | 18 | 19 | docker node update --label-add swarmpit.db-data=true "$NODE_ID" 20 | docker node update --label-add swarmpit.influx-data=true "$NODE_ID" 21 | 22 | sed -i "s/traefik-public/$TRAEFIK_NETWORK/" swarmpit.yml 23 | docker stack deploy -c swarmpit.yml $STACK_NAME 24 | 25 | 26 | echo "Next access follows in browser: 27 | https://swarmpit.$UI_DOMAIN 28 | " 29 | -------------------------------------------------------------------------------- /deploy_swarmprom.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # https://github.com/stefanprodan/swarmprom 4 | # https://prometheus.io/docs/prometheus/latest/getting_started/ 5 | # 6 | 7 | set -e 8 | 9 | 10 | IFS=$'\n' 11 | while read -r var;do 12 | # shellcheck disable=SC2163 13 | export "$var" 14 | done < <(grep -Ev '^#|^$' .env) 15 | 16 | export STACK_NAME=swarmprom 17 | 18 | # NOTE: If you forge the address and username of the mail sender, dest email server may reject the mail. 19 | #export GF_SMTP_FROM_ADDRESS=admin@test.com 20 | #export GF_SMTP_FROM_NAME=admin 21 | 22 | 23 | sed -i "s/traefik-public/$TRAEFIK_NETWORK/" swarmprom.yml 24 | docker stack deploy -c swarmprom.yml $STACK_NAME 25 | 26 | 27 | echo "Next access follows in browser: 28 | https://grafana.$UI_DOMAIN 29 | https://alertmanager.$UI_DOMAIN 30 | https://unsee.$UI_DOMAIN 31 | https://prometheus.$UI_DOMAIN 32 | " 33 | -------------------------------------------------------------------------------- /deploy_traefik.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #set -e 4 | 5 | 6 | IFS=$'\n' 7 | while read -r var;do 8 | # shellcheck disable=SC2163 9 | export "$var" 10 | done < <(grep -Ev '^#|^$' .env) 11 | 12 | export STACK_NAME=traefik-consul 13 | # 3 or 5, not more. if you have a single node, set 0 14 | export CONSUL_REPLICAS=3 15 | # the value number is equal to the count of swarm mode managers. 16 | # if you just have a single manager node, set 1 17 | export TRAEFIK_REPLICAS=3 18 | # id of the current manager node 19 | # shellcheck disable=SC2155 20 | export NODE_ID=$(docker info -f '{{.Swarm.NodeID}}') 21 | 22 | 23 | docker network create --driver=overlay "$TRAEFIK_NETWORK" 24 | 25 | docker node update --label-add ${STACK_NAME}.consul-data-leader=true "$NODE_ID" 26 | 27 | sed -i "s/traefik-public/$TRAEFIK_NETWORK/" traefik.yml 28 | docker stack deploy -c traefik.yml $STACK_NAME 29 | 30 | 31 | echo "Next please put your domain certs in consul as follows: 32 | docker container exec -it traefik-consul_consul-leader... consul kv put traefik/tls/certificates/wildcard.$UI_DOMAIN/certFile \"your cert content\" 33 | docker container exec -it traefik-consul_consul-leader... consul kv put traefik/tls/certificates/wildcard.$UI_DOMAIN/keyFile \"your key content\" 34 | 35 | Then access follows in browser: 36 | https://traefik.$UI_DOMAIN 37 | https://consul.$UI_DOMAIN 38 | " 39 | -------------------------------------------------------------------------------- /portainer.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | agent: 5 | image: portainer/agent 6 | environment: 7 | AGENT_CLUSTER_ADDR: tasks.agent 8 | volumes: 9 | - /var/run/docker.sock:/var/run/docker.sock 10 | - /var/lib/docker/volumes:/var/lib/docker/volumes 11 | networks: 12 | - agent-network 13 | deploy: 14 | mode: global 15 | placement: 16 | constraints: 17 | - node.platform.os == linux 18 | 19 | portainer: 20 | image: portainer/portainer 21 | command: -H tcp://tasks.agent:9001 --tlsskipverify 22 | volumes: 23 | - portainer-data:/data 24 | networks: 25 | - agent-network 26 | - $TRAEFIK_NETWORK 27 | deploy: 28 | placement: 29 | constraints: 30 | - node.role == manager 31 | - node.labels.portainer.portainer-data == true 32 | labels: 33 | - "traefik.enable=true" 34 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 35 | - "traefik.http.routers.portainer.rule=Host(`portainer.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 36 | - "traefik.http.services.portainer.loadbalancer.server.port=9000" 37 | - "traefik.http.routers.portainer.entryPoints=web, websecure" 38 | - "traefik.http.routers.portainer.tls=true" 39 | - "traefik.http.routers.portainer.middlewares=IpWhiteList" 40 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 41 | #- "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 42 | 43 | networks: 44 | agent-network: 45 | attachable: true 46 | traefik-public: 47 | external: true 48 | 49 | volumes: 50 | portainer-data: 51 | -------------------------------------------------------------------------------- /registry.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | 5 | registry: 6 | image: registry:2 7 | networks: 8 | - default 9 | - ${TRAEFIK_NETWORK} 10 | volumes: 11 | - image-data:/var/lib/registry 12 | deploy: 13 | placement: 14 | constraints: 15 | - node.labels.${STACK_NAME}.image-data == true 16 | labels: 17 | - "traefik.enable=true" 18 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 19 | - "traefik.http.routers.registry.rule=Host(`reg.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 20 | - "traefik.http.services.registry.loadbalancer.server.port=5000" 21 | - "traefik.http.routers.registry.entryPoints=web, websecure" 22 | - "traefik.http.routers.registry.tls=true" 23 | - "traefik.http.routers.registry.middlewares=IpWhiteList" 24 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 25 | 26 | volumes: 27 | image-data: 28 | 29 | networks: 30 | traefik-public: 31 | external: true 32 | -------------------------------------------------------------------------------- /swarmpit.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | app: 5 | image: swarmpit/swarmpit:latest 6 | environment: 7 | - SWARMPIT_DB=http://db:5984 8 | - SWARMPIT_INFLUXDB=http://influxdb:8086 9 | volumes: 10 | - /var/run/docker.sock:/var/run/docker.sock:ro 11 | #ports: 12 | #- 888:8080 13 | networks: 14 | - net 15 | - $TRAEFIK_NETWORK 16 | deploy: 17 | resources: 18 | limits: 19 | cpus: '0.50' 20 | memory: 1024M 21 | reservations: 22 | cpus: '0.25' 23 | memory: 512M 24 | placement: 25 | constraints: 26 | - node.role == manager 27 | labels: 28 | - "traefik.enable=true" 29 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 30 | - "traefik.http.routers.swarmpit.rule=Host(`swarmpit.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 31 | - "traefik.http.services.swarmpit.loadbalancer.server.port=8080" 32 | - "traefik.http.routers.swarmpit.entryPoints=web, websecure" 33 | - "traefik.http.routers.swarmpit.tls=true" 34 | - "traefik.http.routers.swarmpit.middlewares=IpWhiteList" 35 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 36 | #- "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 37 | 38 | db: 39 | image: couchdb:2.3.0 40 | volumes: 41 | - db-data:/opt/couchdb/data 42 | networks: 43 | - net 44 | deploy: 45 | resources: 46 | limits: 47 | cpus: '0.30' 48 | memory: 512M 49 | reservations: 50 | cpus: '0.15' 51 | memory: 256M 52 | placement: 53 | constraints: 54 | - node.labels.${STACK_NAME}.db-data == true 55 | 56 | influxdb: 57 | image: influxdb:1.7 58 | volumes: 59 | - influx-data:/var/lib/influxdb 60 | networks: 61 | - net 62 | deploy: 63 | resources: 64 | reservations: 65 | cpus: '0.3' 66 | memory: 128M 67 | limits: 68 | cpus: '0.6' 69 | memory: 512M 70 | placement: 71 | constraints: 72 | - node.labels.${STACK_NAME}.influx-data == true 73 | 74 | agent: 75 | image: swarmpit/agent:latest 76 | environment: 77 | - DOCKER_API_VERSION=1.35 78 | volumes: 79 | - /var/run/docker.sock:/var/run/docker.sock:ro 80 | networks: 81 | - net 82 | deploy: 83 | mode: global 84 | resources: 85 | limits: 86 | cpus: '0.10' 87 | memory: 64M 88 | reservations: 89 | cpus: '0.05' 90 | memory: 32M 91 | 92 | networks: 93 | net: 94 | driver: overlay 95 | attachable: true 96 | traefik-public: 97 | external: true 98 | 99 | volumes: 100 | db-data: 101 | driver: local 102 | influx-data: 103 | driver: local 104 | -------------------------------------------------------------------------------- /swarmprom.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | 3 | networks: 4 | # the dedicated network of this stack 5 | net: 6 | driver: overlay 7 | attachable: true 8 | # the network of the global traefik proxy 9 | traefik-public: 10 | external: true 11 | 12 | volumes: 13 | prometheus: {} 14 | grafana: {} 15 | alertmanager: {} 16 | 17 | configs: 18 | dockerd_config: 19 | file: ./swarmprom/dockerd-exporter/Caddyfile 20 | node_rules: 21 | file: ./swarmprom/prometheus/rules/swarm_node.rules.yml 22 | task_rules: 23 | file: ./swarmprom/prometheus/rules/swarm_task.rules.yml 24 | 25 | services: 26 | 27 | # dockerd-exporter (Docker daemon metrics collector, requires Docker experimental metrics-addr to be enabled) 28 | dockerd-exporter: 29 | image: stefanprodan/caddy 30 | networks: 31 | - net 32 | environment: 33 | - DOCKER_GWBRIDGE_IP=172.18.0.1 34 | configs: 35 | - source: dockerd_config 36 | target: /etc/caddy/Caddyfile 37 | deploy: 38 | mode: global 39 | resources: 40 | limits: 41 | memory: 128M 42 | reservations: 43 | memory: 64M 44 | 45 | # cadvisor (containers metrics collector) 46 | cadvisor: 47 | image: google/cadvisor 48 | networks: 49 | - net 50 | command: -logtostderr -docker_only 51 | volumes: 52 | - /var/run/docker.sock:/var/run/docker.sock:ro 53 | - /:/rootfs:ro 54 | - /var/run:/var/run 55 | - /sys:/sys:ro 56 | - /var/lib/docker/:/var/lib/docker:ro 57 | deploy: 58 | mode: global 59 | resources: 60 | limits: 61 | memory: 128M 62 | reservations: 63 | memory: 64M 64 | 65 | # grafana (visualize metrics) http://:3000 66 | grafana: 67 | image: stefanprodan/swarmprom-grafana:5.3.4 68 | networks: 69 | - default 70 | - net 71 | - $TRAEFIK_NETWORK 72 | environment: 73 | - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin} 74 | - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} 75 | - GF_USERS_ALLOW_SIGN_UP=false 76 | - GF_SERVER_ROOT_URL=https://grafana.${UI_DOMAIN} 77 | - GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-true} 78 | - GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25} 79 | #- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:-grafana@test.com} 80 | #- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana} 81 | #- GF_SMTP_USER=${GF_SMTP_USER} 82 | #- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD} 83 | volumes: 84 | - grafana:/var/lib/grafana 85 | deploy: 86 | mode: replicated 87 | replicas: 1 88 | placement: 89 | constraints: 90 | - node.role == manager 91 | resources: 92 | limits: 93 | memory: 128M 94 | reservations: 95 | memory: 64M 96 | labels: 97 | - "traefik.enable=true" 98 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 99 | - "traefik.http.routers.grafana.rule=Host(`grafana.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 100 | - "traefik.http.services.grafana.loadbalancer.server.port=3000" 101 | - "traefik.http.routers.grafana.entryPoints=web, websecure" 102 | - "traefik.http.routers.grafana.tls=true" 103 | - "traefik.http.routers.grafana.middlewares=IpWhiteList" 104 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 105 | #- "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 106 | 107 | smtp: 108 | image: namshi/smtp 109 | networks: 110 | - default 111 | 112 | # alertmanager (alerts dispatcher) http://:9093 113 | alertmanager: 114 | image: stefanprodan/swarmprom-alertmanager:v0.14.0 115 | networks: 116 | - default 117 | - net 118 | - $TRAEFIK_NETWORK 119 | environment: 120 | - SLACK_URL=${SLACK_URL:-https://hooks.slack.com/services/TOKEN} 121 | - SLACK_CHANNEL=${SLACK_CHANNEL:-general} 122 | - SLACK_USER=${SLACK_USER:-alertmanager} 123 | command: 124 | - '--config.file=/etc/alertmanager/alertmanager.yml' 125 | - '--storage.path=/alertmanager' 126 | volumes: 127 | - alertmanager:/alertmanager 128 | deploy: 129 | mode: replicated 130 | replicas: 1 131 | placement: 132 | constraints: 133 | - node.role == manager 134 | resources: 135 | limits: 136 | memory: 128M 137 | reservations: 138 | memory: 64M 139 | labels: 140 | - "traefik.enable=true" 141 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 142 | - "traefik.http.routers.alertmanager.rule=Host(`alertmanager.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 143 | - "traefik.http.services.alertmanager.loadbalancer.server.port=9093" 144 | - "traefik.http.routers.alertmanager.entryPoints=web, websecure" 145 | - "traefik.http.routers.alertmanager.tls=true" 146 | - "traefik.http.routers.alertmanager.middlewares=IpWhiteList, auth" 147 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 148 | - "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 149 | 150 | # unsee (alert manager dashboard) http://:9094 151 | unsee: 152 | image: cloudflare/unsee:v0.8.0 153 | networks: 154 | - default 155 | - net 156 | - $TRAEFIK_NETWORK 157 | environment: 158 | - "ALERTMANAGER_URIS=default:http://alertmanager:9093" 159 | deploy: 160 | mode: replicated 161 | replicas: 1 162 | labels: 163 | - "traefik.enable=true" 164 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 165 | - "traefik.http.routers.unsee.rule=Host(`unsee.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 166 | - "traefik.http.services.unsee.loadbalancer.server.port=8080" 167 | - "traefik.http.routers.unsee.entryPoints=web, websecure" 168 | - "traefik.http.routers.unsee.tls=true" 169 | - "traefik.http.routers.unsee.middlewares=IpWhiteList, auth" 170 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 171 | - "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 172 | 173 | # node-exporter (host metrics collector) 174 | node-exporter: 175 | image: stefanprodan/swarmprom-node-exporter:v0.16.0 176 | networks: 177 | - net 178 | environment: 179 | - NODE_ID={{.Node.ID}} 180 | volumes: 181 | - /proc:/host/proc:ro 182 | - /sys:/host/sys:ro 183 | - /:/rootfs:ro 184 | - /etc/hostname:/etc/nodename 185 | command: 186 | - '--path.sysfs=/host/sys' 187 | - '--path.procfs=/host/proc' 188 | - '--collector.textfile.directory=/etc/node-exporter/' 189 | - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' 190 | - '--no-collector.ipvs' 191 | deploy: 192 | mode: global 193 | resources: 194 | limits: 195 | memory: 128M 196 | reservations: 197 | memory: 64M 198 | 199 | # prometheus (metrics database) http://:9090 200 | prometheus: 201 | image: stefanprodan/swarmprom-prometheus:v2.5.0 202 | networks: 203 | - default 204 | - net 205 | - $TRAEFIK_NETWORK 206 | command: 207 | - '--config.file=/etc/prometheus/prometheus.yml' 208 | - '--storage.tsdb.path=/prometheus' 209 | - '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}' 210 | volumes: 211 | - prometheus:/prometheus 212 | configs: 213 | - source: node_rules 214 | target: /etc/prometheus/swarm_node.rules.yml 215 | - source: task_rules 216 | target: /etc/prometheus/swarm_task.rules.yml 217 | deploy: 218 | mode: replicated 219 | replicas: 1 220 | placement: 221 | constraints: 222 | - node.role == manager 223 | resources: 224 | limits: 225 | memory: 2048M 226 | reservations: 227 | memory: 128M 228 | labels: 229 | - "traefik.enable=true" 230 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 231 | - "traefik.http.routers.prometheus.rule=Host(`prometheus.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 232 | - "traefik.http.services.prometheus.loadbalancer.server.port=9090" 233 | - "traefik.http.routers.prometheus.entryPoints=web, websecure" 234 | - "traefik.http.routers.prometheus.tls=true" 235 | - "traefik.http.routers.prometheus.middlewares=IpWhiteList, auth" 236 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 237 | - "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 238 | -------------------------------------------------------------------------------- /swarmprom/.gitattributes: -------------------------------------------------------------------------------- 1 | # Denote all files that are truly binary and should not be modified. 2 | *.png binary 3 | *.jpg binary 4 | -------------------------------------------------------------------------------- /swarmprom/.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | 16 | .idea/ 17 | .DS_Store 18 | -------------------------------------------------------------------------------- /swarmprom/.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | services: 4 | - docker 5 | 6 | before_install: 7 | - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 8 | - sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 9 | - sudo apt-get update 10 | - sudo apt-get -y install docker-ce 11 | - sudo service docker restart 12 | 13 | script: 14 | - cd prometheus && docker build -t stefanprodan/swarmprom-prometheus:$TRAVIS_BUILD_NUMBER . 15 | - cd .. && cd node-exporter && docker build -t stefanprodan/swarmprom-node-exporter:$TRAVIS_BUILD_NUMBER . 16 | - cd .. && cd alertmanager && docker build -t stefanprodan/swarmprom-alertmanager:$TRAVIS_BUILD_NUMBER . 17 | - cd .. && cd grafana && docker build -t stefanprodan/swarmprom-grafana:$TRAVIS_BUILD_NUMBER . 18 | 19 | after_success: 20 | - if [ -z "$DOCKER_USER" ]; then 21 | echo "PR build, skipping Docker Hub push"; 22 | else 23 | docker login -u "$DOCKER_USER" -p "$DOCKER_PASS"; 24 | docker tag stefanprodan/swarmprom-prometheus:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-prometheus:v2.5.0; 25 | docker push stefanprodan/swarmprom-prometheus:v2.5.0; 26 | docker tag stefanprodan/swarmprom-node-exporter:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-node-exporter:v0.16.0; 27 | docker push stefanprodan/swarmprom-node-exporter:v0.16.0; 28 | docker tag stefanprodan/swarmprom-alertmanager:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-alertmanager:v0.15.3; 29 | docker push stefanprodan/swarmprom-alertmanager:v0.15.3; 30 | docker tag stefanprodan/swarmprom-grafana:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-grafana:5.3.4; 31 | docker push stefanprodan/swarmprom-grafana:5.3.4; 32 | fi 33 | -------------------------------------------------------------------------------- /swarmprom/alertmanager/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM prom/alertmanager:v0.15.3 2 | 3 | COPY conf /etc/alertmanager/ 4 | 5 | ENTRYPOINT [ "/etc/alertmanager/docker-entrypoint.sh" ] 6 | CMD [ "--config.file=/etc/alertmanager/alertmanager.yml", \ 7 | "--storage.path=/alertmanager" ] 8 | -------------------------------------------------------------------------------- /swarmprom/alertmanager/conf/alertmanager.yml: -------------------------------------------------------------------------------- 1 | route: 2 | receiver: 'slack' 3 | 4 | receivers: 5 | - name: 'slack' 6 | slack_configs: 7 | - send_resolved: true 8 | text: "{{ .CommonAnnotations.description }}" 9 | #username: # 10 | #channel: # 11 | #api_url: # 12 | -------------------------------------------------------------------------------- /swarmprom/alertmanager/conf/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | # shellcheck disable=SC2002 4 | cat /etc/alertmanager/alertmanager.yml | 5 | sed "s@#api_url: #@api_url: '$SLACK_URL'@g" | 6 | sed "s@#channel: #@channel: '#$SLACK_CHANNEL'@g" | 7 | sed "s@#username: #@username: '$SLACK_USER'@g" > /tmp/alertmanager.yml 8 | 9 | mv /tmp/alertmanager.yml /etc/alertmanager/alertmanager.yml 10 | 11 | set -- /bin/alertmanager "$@" 12 | 13 | exec "$@" 14 | -------------------------------------------------------------------------------- /swarmprom/dockerd-exporter/Caddyfile: -------------------------------------------------------------------------------- 1 | :9323 { 2 | proxy / {$DOCKER_GWBRIDGE_IP}:9323 { 3 | transparent 4 | } 5 | 6 | errors stderr 7 | tls off 8 | } 9 | -------------------------------------------------------------------------------- /swarmprom/grafana/.dockerignore: -------------------------------------------------------------------------------- 1 | screens/ 2 | -------------------------------------------------------------------------------- /swarmprom/grafana/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM grafana/grafana:5.3.4 2 | # https://hub.docker.com/r/grafana/grafana/tags/ 3 | 4 | COPY datasources /etc/grafana/provisioning/datasources/ 5 | COPY swarmprom_dashboards.yml /etc/grafana/provisioning/dashboards/ 6 | COPY dashboards /etc/grafana/dashboards/ 7 | 8 | ENV GF_SECURITY_ADMIN_PASSWORD=admin \ 9 | GF_SECURITY_ADMIN_USER=admin \ 10 | GF_PATHS_PROVISIONING=/etc/grafana/provisioning/ 11 | -------------------------------------------------------------------------------- /swarmprom/grafana/dashboards/swarmprom-nodes-dash.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "description": "Docker Swarm nodes metrics", 16 | "editable": true, 17 | "gnetId": null, 18 | "graphTooltip": 0, 19 | "iteration": 1547535746076, 20 | "links": [], 21 | "panels": [ 22 | { 23 | "cacheTimeout": null, 24 | "colorBackground": false, 25 | "colorValue": false, 26 | "colors": [ 27 | "rgba(245, 54, 54, 0.9)", 28 | "rgba(237, 129, 40, 0.89)", 29 | "rgba(50, 172, 45, 0.97)" 30 | ], 31 | "datasource": "Prometheus", 32 | "decimals": 1, 33 | "format": "s", 34 | "gauge": { 35 | "maxValue": 100, 36 | "minValue": 0, 37 | "show": false, 38 | "thresholdLabels": false, 39 | "thresholdMarkers": true 40 | }, 41 | "gridPos": { 42 | "h": 4, 43 | "w": 6, 44 | "x": 0, 45 | "y": 0 46 | }, 47 | "hideTimeOverride": true, 48 | "id": 2, 49 | "interval": null, 50 | "links": [], 51 | "mappingType": 1, 52 | "mappingTypes": [ 53 | { 54 | "name": "value to text", 55 | "value": 1 56 | }, 57 | { 58 | "name": "range to text", 59 | "value": 2 60 | } 61 | ], 62 | "maxDataPoints": 100, 63 | "nullPointMode": "connected", 64 | "nullText": null, 65 | "postfix": "", 66 | "postfixFontSize": "50%", 67 | "prefix": "", 68 | "prefixFontSize": "50%", 69 | "rangeMaps": [ 70 | { 71 | "from": "null", 72 | "text": "N/A", 73 | "to": "null" 74 | } 75 | ], 76 | "sparkline": { 77 | "fillColor": "rgba(31, 118, 189, 0.18)", 78 | "full": false, 79 | "lineColor": "rgb(31, 120, 193)", 80 | "show": false 81 | }, 82 | "tableColumn": "", 83 | "targets": [ 84 | { 85 | "expr": "topk(1, sum((node_time_seconds - node_boot_time_seconds) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name))", 86 | "format": "time_series", 87 | "intervalFactor": 2, 88 | "legendFormat": "", 89 | "refId": "A", 90 | "step": 2 91 | } 92 | ], 93 | "thresholds": "", 94 | "timeFrom": "1m", 95 | "timeShift": null, 96 | "title": "Uptime", 97 | "type": "singlestat", 98 | "valueFontSize": "80%", 99 | "valueMaps": [ 100 | { 101 | "op": "=", 102 | "text": "N/A", 103 | "value": "null" 104 | } 105 | ], 106 | "valueName": "avg" 107 | }, 108 | { 109 | "cacheTimeout": null, 110 | "colorBackground": false, 111 | "colorValue": false, 112 | "colors": [ 113 | "rgba(245, 54, 54, 0.9)", 114 | "rgba(237, 129, 40, 0.89)", 115 | "rgba(50, 172, 45, 0.97)" 116 | ], 117 | "datasource": null, 118 | "decimals": 0, 119 | "format": "none", 120 | "gauge": { 121 | "maxValue": 100, 122 | "minValue": 0, 123 | "show": false, 124 | "thresholdLabels": false, 125 | "thresholdMarkers": true 126 | }, 127 | "gridPos": { 128 | "h": 4, 129 | "w": 6, 130 | "x": 6, 131 | "y": 0 132 | }, 133 | "id": 1, 134 | "interval": null, 135 | "links": [], 136 | "mappingType": 1, 137 | "mappingTypes": [ 138 | { 139 | "name": "value to text", 140 | "value": 1 141 | }, 142 | { 143 | "name": "range to text", 144 | "value": 2 145 | } 146 | ], 147 | "maxDataPoints": 100, 148 | "nullPointMode": "connected", 149 | "nullText": null, 150 | "postfix": "", 151 | "postfixFontSize": "50%", 152 | "prefix": "", 153 | "prefixFontSize": "50%", 154 | "rangeMaps": [ 155 | { 156 | "from": "null", 157 | "text": "N/A", 158 | "to": "null" 159 | } 160 | ], 161 | "sparkline": { 162 | "fillColor": "rgba(31, 118, 189, 0.18)", 163 | "full": false, 164 | "lineColor": "rgb(31, 120, 193)", 165 | "show": false 166 | }, 167 | "tableColumn": "", 168 | "targets": [ 169 | { 170 | "expr": "count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 171 | "format": "time_series", 172 | "intervalFactor": 2, 173 | "legendFormat": "", 174 | "refId": "A", 175 | "step": 20 176 | } 177 | ], 178 | "thresholds": "", 179 | "title": "Nodes", 180 | "type": "singlestat", 181 | "valueFontSize": "80%", 182 | "valueMaps": [ 183 | { 184 | "op": "=", 185 | "text": "N/A", 186 | "value": "null" 187 | } 188 | ], 189 | "valueName": "avg" 190 | }, 191 | { 192 | "cacheTimeout": null, 193 | "colorBackground": false, 194 | "colorValue": false, 195 | "colors": [ 196 | "rgba(245, 54, 54, 0.9)", 197 | "rgba(237, 129, 40, 0.89)", 198 | "rgba(50, 172, 45, 0.97)" 199 | ], 200 | "datasource": null, 201 | "decimals": 0, 202 | "format": "short", 203 | "gauge": { 204 | "maxValue": 100, 205 | "minValue": 0, 206 | "show": false, 207 | "thresholdLabels": false, 208 | "thresholdMarkers": true 209 | }, 210 | "gridPos": { 211 | "h": 4, 212 | "w": 6, 213 | "x": 12, 214 | "y": 0 215 | }, 216 | "hideTimeOverride": true, 217 | "id": 4, 218 | "interval": null, 219 | "links": [], 220 | "mappingType": 1, 221 | "mappingTypes": [ 222 | { 223 | "name": "value to text", 224 | "value": 1 225 | }, 226 | { 227 | "name": "range to text", 228 | "value": 2 229 | } 230 | ], 231 | "maxDataPoints": 100, 232 | "nullPointMode": "connected", 233 | "nullText": null, 234 | "postfix": "", 235 | "postfixFontSize": "50%", 236 | "prefix": "", 237 | "prefixFontSize": "50%", 238 | "rangeMaps": [ 239 | { 240 | "from": "null", 241 | "text": "N/A", 242 | "to": "null" 243 | } 244 | ], 245 | "sparkline": { 246 | "fillColor": "rgba(31, 118, 189, 0.18)", 247 | "full": false, 248 | "lineColor": "rgb(31, 120, 193)", 249 | "show": false 250 | }, 251 | "tableColumn": "", 252 | "targets": [ 253 | { 254 | "expr": "count(node_cpu_seconds_total{mode=\"idle\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 255 | "format": "time_series", 256 | "intervalFactor": 2, 257 | "legendFormat": "", 258 | "refId": "A", 259 | "step": 2 260 | } 261 | ], 262 | "thresholds": "", 263 | "timeFrom": "1m", 264 | "timeShift": null, 265 | "title": "CPUs", 266 | "type": "singlestat", 267 | "valueFontSize": "80%", 268 | "valueMaps": [ 269 | { 270 | "op": "=", 271 | "text": "N/A", 272 | "value": "null" 273 | } 274 | ], 275 | "valueName": "avg" 276 | }, 277 | { 278 | "cacheTimeout": null, 279 | "colorBackground": false, 280 | "colorValue": false, 281 | "colors": [ 282 | "rgba(245, 54, 54, 0.9)", 283 | "rgba(237, 129, 40, 0.89)", 284 | "rgba(50, 172, 45, 0.97)" 285 | ], 286 | "datasource": null, 287 | "decimals": null, 288 | "format": "percent", 289 | "gauge": { 290 | "maxValue": 100, 291 | "minValue": 0, 292 | "show": true, 293 | "thresholdLabels": false, 294 | "thresholdMarkers": true 295 | }, 296 | "gridPos": { 297 | "h": 4, 298 | "w": 6, 299 | "x": 18, 300 | "y": 0 301 | }, 302 | "hideTimeOverride": true, 303 | "id": 11, 304 | "interval": null, 305 | "links": [], 306 | "mappingType": 1, 307 | "mappingTypes": [ 308 | { 309 | "name": "value to text", 310 | "value": 1 311 | }, 312 | { 313 | "name": "range to text", 314 | "value": 2 315 | } 316 | ], 317 | "maxDataPoints": 100, 318 | "nullPointMode": "connected", 319 | "nullText": null, 320 | "postfix": "", 321 | "postfixFontSize": "50%", 322 | "prefix": "", 323 | "prefixFontSize": "50%", 324 | "rangeMaps": [ 325 | { 326 | "from": "null", 327 | "text": "N/A", 328 | "to": "null" 329 | } 330 | ], 331 | "sparkline": { 332 | "fillColor": "rgba(31, 118, 189, 0.18)", 333 | "full": false, 334 | "lineColor": "rgb(31, 120, 193)", 335 | "show": false 336 | }, 337 | "tableColumn": "", 338 | "targets": [ 339 | { 340 | "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", 341 | "format": "time_series", 342 | "intervalFactor": 2, 343 | "legendFormat": "", 344 | "refId": "A", 345 | "step": 2 346 | } 347 | ], 348 | "thresholds": "10,25,100", 349 | "timeFrom": "1m", 350 | "timeShift": null, 351 | "title": "CPU Idle", 352 | "type": "singlestat", 353 | "valueFontSize": "80%", 354 | "valueMaps": [ 355 | { 356 | "op": "=", 357 | "text": "N/A", 358 | "value": "null" 359 | } 360 | ], 361 | "valueName": "avg" 362 | }, 363 | { 364 | "aliasColors": {}, 365 | "bars": false, 366 | "dashLength": 10, 367 | "dashes": false, 368 | "datasource": null, 369 | "decimals": 2, 370 | "fill": 1, 371 | "gridPos": { 372 | "h": 7, 373 | "w": 12, 374 | "x": 0, 375 | "y": 4 376 | }, 377 | "id": 13, 378 | "legend": { 379 | "alignAsTable": true, 380 | "avg": true, 381 | "current": true, 382 | "hideEmpty": false, 383 | "hideZero": false, 384 | "max": true, 385 | "min": true, 386 | "rightSide": true, 387 | "show": false, 388 | "total": false, 389 | "values": true 390 | }, 391 | "lines": true, 392 | "linewidth": 1, 393 | "links": [], 394 | "nullPointMode": "null", 395 | "percentage": false, 396 | "pointradius": 5, 397 | "points": false, 398 | "renderer": "flot", 399 | "seriesOverrides": [], 400 | "spaceLength": 10, 401 | "stack": false, 402 | "steppedLine": false, 403 | "targets": [ 404 | { 405 | "expr": "node_load5 * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}", 406 | "format": "time_series", 407 | "intervalFactor": 2, 408 | "legendFormat": "load5 {{node_name}}", 409 | "refId": "A", 410 | "step": 2 411 | } 412 | ], 413 | "thresholds": [], 414 | "timeFrom": null, 415 | "timeShift": null, 416 | "title": "System Load by Node", 417 | "tooltip": { 418 | "shared": true, 419 | "sort": 2, 420 | "value_type": "individual" 421 | }, 422 | "type": "graph", 423 | "xaxis": { 424 | "buckets": null, 425 | "mode": "time", 426 | "name": null, 427 | "show": true, 428 | "values": [] 429 | }, 430 | "yaxes": [ 431 | { 432 | "format": "short", 433 | "label": null, 434 | "logBase": 1, 435 | "max": null, 436 | "min": null, 437 | "show": true 438 | }, 439 | { 440 | "format": "short", 441 | "label": null, 442 | "logBase": 1, 443 | "max": null, 444 | "min": null, 445 | "show": true 446 | } 447 | ], 448 | "yaxis": { 449 | "align": false, 450 | "alignLevel": null 451 | } 452 | }, 453 | { 454 | "aliasColors": {}, 455 | "bars": false, 456 | "dashLength": 10, 457 | "dashes": false, 458 | "datasource": null, 459 | "decimals": 2, 460 | "fill": 1, 461 | "gridPos": { 462 | "h": 7, 463 | "w": 12, 464 | "x": 12, 465 | "y": 4 466 | }, 467 | "id": 14, 468 | "legend": { 469 | "alignAsTable": true, 470 | "avg": true, 471 | "current": true, 472 | "hideEmpty": true, 473 | "hideZero": true, 474 | "max": true, 475 | "min": true, 476 | "rightSide": true, 477 | "show": false, 478 | "total": false, 479 | "values": true 480 | }, 481 | "lines": true, 482 | "linewidth": 1, 483 | "links": [], 484 | "nullPointMode": "null as zero", 485 | "percentage": false, 486 | "pointradius": 5, 487 | "points": false, 488 | "renderer": "flot", 489 | "seriesOverrides": [], 490 | "spaceLength": 10, 491 | "stack": false, 492 | "steppedLine": false, 493 | "targets": [ 494 | { 495 | "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", 496 | "format": "time_series", 497 | "intervalFactor": 2, 498 | "legendFormat": "{{node_name}}", 499 | "refId": "A", 500 | "step": 2 501 | } 502 | ], 503 | "thresholds": [], 504 | "timeFrom": null, 505 | "timeShift": null, 506 | "title": "CPU Usage by Node", 507 | "tooltip": { 508 | "shared": true, 509 | "sort": 2, 510 | "value_type": "individual" 511 | }, 512 | "type": "graph", 513 | "xaxis": { 514 | "buckets": null, 515 | "mode": "time", 516 | "name": null, 517 | "show": true, 518 | "values": [] 519 | }, 520 | "yaxes": [ 521 | { 522 | "format": "percent", 523 | "label": null, 524 | "logBase": 1, 525 | "max": "100", 526 | "min": null, 527 | "show": true 528 | }, 529 | { 530 | "format": "short", 531 | "label": null, 532 | "logBase": 1, 533 | "max": null, 534 | "min": null, 535 | "show": true 536 | } 537 | ], 538 | "yaxis": { 539 | "align": false, 540 | "alignLevel": null 541 | } 542 | }, 543 | { 544 | "cacheTimeout": null, 545 | "colorBackground": false, 546 | "colorValue": false, 547 | "colors": [ 548 | "rgba(245, 54, 54, 0.9)", 549 | "rgba(237, 129, 40, 0.89)", 550 | "rgba(50, 172, 45, 0.97)" 551 | ], 552 | "datasource": null, 553 | "decimals": 1, 554 | "format": "decbytes", 555 | "gauge": { 556 | "maxValue": 100, 557 | "minValue": 0, 558 | "show": false, 559 | "thresholdLabels": false, 560 | "thresholdMarkers": true 561 | }, 562 | "gridPos": { 563 | "h": 4, 564 | "w": 3, 565 | "x": 0, 566 | "y": 11 567 | }, 568 | "hideTimeOverride": true, 569 | "id": 3, 570 | "interval": null, 571 | "links": [], 572 | "mappingType": 1, 573 | "mappingTypes": [ 574 | { 575 | "name": "value to text", 576 | "value": 1 577 | }, 578 | { 579 | "name": "range to text", 580 | "value": 2 581 | } 582 | ], 583 | "maxDataPoints": 100, 584 | "nullPointMode": "connected", 585 | "nullText": null, 586 | "postfix": "", 587 | "postfixFontSize": "50%", 588 | "prefix": "", 589 | "prefixFontSize": "50%", 590 | "rangeMaps": [ 591 | { 592 | "from": "null", 593 | "text": "N/A", 594 | "to": "null" 595 | } 596 | ], 597 | "sparkline": { 598 | "fillColor": "rgba(31, 118, 189, 0.18)", 599 | "full": false, 600 | "lineColor": "rgb(31, 120, 193)", 601 | "show": false 602 | }, 603 | "tableColumn": "", 604 | "targets": [ 605 | { 606 | "expr": "sum(node_memory_MemTotal_bytes * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 607 | "format": "time_series", 608 | "intervalFactor": 2, 609 | "legendFormat": "", 610 | "refId": "A", 611 | "step": 20 612 | } 613 | ], 614 | "thresholds": "", 615 | "timeFrom": null, 616 | "timeShift": null, 617 | "title": "Total Memory", 618 | "type": "singlestat", 619 | "valueFontSize": "80%", 620 | "valueMaps": [ 621 | { 622 | "op": "=", 623 | "text": "N/A", 624 | "value": "null" 625 | } 626 | ], 627 | "valueName": "avg" 628 | }, 629 | { 630 | "cacheTimeout": null, 631 | "colorBackground": false, 632 | "colorValue": false, 633 | "colors": [ 634 | "rgba(245, 54, 54, 0.9)", 635 | "rgba(237, 129, 40, 0.89)", 636 | "rgba(50, 172, 45, 0.97)" 637 | ], 638 | "datasource": null, 639 | "format": "percent", 640 | "gauge": { 641 | "maxValue": 100, 642 | "minValue": 0, 643 | "show": true, 644 | "thresholdLabels": false, 645 | "thresholdMarkers": true 646 | }, 647 | "gridPos": { 648 | "h": 4, 649 | "w": 4, 650 | "x": 3, 651 | "y": 11 652 | }, 653 | "id": 8, 654 | "interval": null, 655 | "links": [], 656 | "mappingType": 1, 657 | "mappingTypes": [ 658 | { 659 | "name": "value to text", 660 | "value": 1 661 | }, 662 | { 663 | "name": "range to text", 664 | "value": 2 665 | } 666 | ], 667 | "maxDataPoints": 100, 668 | "nullPointMode": "connected", 669 | "nullText": null, 670 | "postfix": "", 671 | "postfixFontSize": "50%", 672 | "prefix": "", 673 | "prefixFontSize": "50%", 674 | "rangeMaps": [ 675 | { 676 | "from": "null", 677 | "text": "N/A", 678 | "to": "null" 679 | } 680 | ], 681 | "sparkline": { 682 | "fillColor": "rgba(31, 118, 189, 0.18)", 683 | "full": false, 684 | "lineColor": "rgb(31, 120, 193)", 685 | "show": false 686 | }, 687 | "tableColumn": "", 688 | "targets": [ 689 | { 690 | "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 691 | "format": "time_series", 692 | "intervalFactor": 2, 693 | "legendFormat": "", 694 | "refId": "A", 695 | "step": 20 696 | } 697 | ], 698 | "thresholds": "10,25,100", 699 | "title": "Available Memory", 700 | "type": "singlestat", 701 | "valueFontSize": "80%", 702 | "valueMaps": [ 703 | { 704 | "op": "=", 705 | "text": "N/A", 706 | "value": "null" 707 | } 708 | ], 709 | "valueName": "avg" 710 | }, 711 | { 712 | "cacheTimeout": null, 713 | "colorBackground": false, 714 | "colorValue": false, 715 | "colors": [ 716 | "rgba(245, 54, 54, 0.9)", 717 | "rgba(237, 129, 40, 0.89)", 718 | "rgba(50, 172, 45, 0.97)" 719 | ], 720 | "datasource": null, 721 | "decimals": 1, 722 | "format": "decbytes", 723 | "gauge": { 724 | "maxValue": 100, 725 | "minValue": 0, 726 | "show": false, 727 | "thresholdLabels": false, 728 | "thresholdMarkers": true 729 | }, 730 | "gridPos": { 731 | "h": 4, 732 | "w": 3, 733 | "x": 7, 734 | "y": 11 735 | }, 736 | "hideTimeOverride": true, 737 | "id": 22, 738 | "interval": null, 739 | "links": [], 740 | "mappingType": 1, 741 | "mappingTypes": [ 742 | { 743 | "name": "value to text", 744 | "value": 1 745 | }, 746 | { 747 | "name": "range to text", 748 | "value": 2 749 | } 750 | ], 751 | "maxDataPoints": 100, 752 | "nullPointMode": "connected", 753 | "nullText": null, 754 | "postfix": "", 755 | "postfixFontSize": "50%", 756 | "prefix": "", 757 | "prefixFontSize": "50%", 758 | "rangeMaps": [ 759 | { 760 | "from": "null", 761 | "text": "N/A", 762 | "to": "null" 763 | } 764 | ], 765 | "sparkline": { 766 | "fillColor": "rgba(31, 118, 189, 0.18)", 767 | "full": false, 768 | "lineColor": "rgb(31, 120, 193)", 769 | "show": false 770 | }, 771 | "tableColumn": "", 772 | "targets": [ 773 | { 774 | "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 775 | "format": "time_series", 776 | "intervalFactor": 2, 777 | "legendFormat": "", 778 | "refId": "A", 779 | "step": 20 780 | } 781 | ], 782 | "thresholds": "", 783 | "timeFrom": null, 784 | "timeShift": null, 785 | "title": "Total swap memory used", 786 | "type": "singlestat", 787 | "valueFontSize": "80%", 788 | "valueMaps": [ 789 | { 790 | "op": "=", 791 | "text": "N/A", 792 | "value": "null" 793 | } 794 | ], 795 | "valueName": "avg" 796 | }, 797 | { 798 | "cacheTimeout": null, 799 | "colorBackground": false, 800 | "colorValue": false, 801 | "colors": [ 802 | "rgba(50, 172, 45, 0.97)", 803 | "rgba(237, 129, 40, 0.89)", 804 | "rgba(245, 54, 54, 0.9)" 805 | ], 806 | "datasource": null, 807 | "format": "percent", 808 | "gauge": { 809 | "maxValue": 100, 810 | "minValue": 0, 811 | "show": true, 812 | "thresholdLabels": false, 813 | "thresholdMarkers": true 814 | }, 815 | "gridPos": { 816 | "h": 4, 817 | "w": 4, 818 | "x": 10, 819 | "y": 11 820 | }, 821 | "id": 23, 822 | "interval": null, 823 | "links": [], 824 | "mappingType": 1, 825 | "mappingTypes": [ 826 | { 827 | "name": "value to text", 828 | "value": 1 829 | }, 830 | { 831 | "name": "range to text", 832 | "value": 2 833 | } 834 | ], 835 | "maxDataPoints": 100, 836 | "nullPointMode": "connected", 837 | "nullText": null, 838 | "postfix": "", 839 | "postfixFontSize": "50%", 840 | "prefix": "", 841 | "prefixFontSize": "50%", 842 | "rangeMaps": [ 843 | { 844 | "from": "null", 845 | "text": "N/A", 846 | "to": "null" 847 | } 848 | ], 849 | "sparkline": { 850 | "fillColor": "rgba(31, 118, 189, 0.18)", 851 | "full": false, 852 | "lineColor": "rgb(31, 120, 193)", 853 | "show": false 854 | }, 855 | "tableColumn": "", 856 | "targets": [ 857 | { 858 | "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_SwapTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 859 | "format": "time_series", 860 | "intervalFactor": 2, 861 | "legendFormat": "", 862 | "refId": "A", 863 | "step": 20 864 | } 865 | ], 866 | "thresholds": "5,10,100", 867 | "title": "Used swap memory", 868 | "type": "singlestat", 869 | "valueFontSize": "80%", 870 | "valueMaps": [ 871 | { 872 | "op": "=", 873 | "text": "N/A", 874 | "value": "null" 875 | } 876 | ], 877 | "valueName": "avg" 878 | }, 879 | { 880 | "cacheTimeout": null, 881 | "colorBackground": false, 882 | "colorValue": false, 883 | "colors": [ 884 | "rgba(50, 172, 45, 0.97)", 885 | "rgba(237, 129, 40, 0.89)", 886 | "rgba(245, 54, 54, 0.9)" 887 | ], 888 | "datasource": null, 889 | "format": "percent", 890 | "gauge": { 891 | "maxValue": 100, 892 | "minValue": 0, 893 | "show": true, 894 | "thresholdLabels": false, 895 | "thresholdMarkers": true 896 | }, 897 | "gridPos": { 898 | "h": 4, 899 | "w": 3, 900 | "x": 14, 901 | "y": 11 902 | }, 903 | "id": 24, 904 | "interval": null, 905 | "links": [], 906 | "mappingType": 1, 907 | "mappingTypes": [ 908 | { 909 | "name": "value to text", 910 | "value": 1 911 | }, 912 | { 913 | "name": "range to text", 914 | "value": 2 915 | } 916 | ], 917 | "maxDataPoints": 100, 918 | "nullPointMode": "connected", 919 | "nullText": null, 920 | "postfix": "", 921 | "postfixFontSize": "50%", 922 | "prefix": "", 923 | "prefixFontSize": "50%", 924 | "rangeMaps": [ 925 | { 926 | "from": "null", 927 | "text": "N/A", 928 | "to": "null" 929 | } 930 | ], 931 | "sparkline": { 932 | "fillColor": "rgba(31, 118, 189, 0.18)", 933 | "full": false, 934 | "lineColor": "rgb(31, 120, 193)", 935 | "show": false 936 | }, 937 | "tableColumn": "", 938 | "targets": [ 939 | { 940 | "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 941 | "format": "time_series", 942 | "intervalFactor": 2, 943 | "legendFormat": "", 944 | "refId": "A", 945 | "step": 20 946 | } 947 | ], 948 | "thresholds": "5,10,100", 949 | "title": "Swap used / total RAM memory ratio", 950 | "type": "singlestat", 951 | "valueFontSize": "80%", 952 | "valueMaps": [ 953 | { 954 | "op": "=", 955 | "text": "N/A", 956 | "value": "null" 957 | } 958 | ], 959 | "valueName": "avg" 960 | }, 961 | { 962 | "cacheTimeout": null, 963 | "colorBackground": false, 964 | "colorValue": false, 965 | "colors": [ 966 | "rgba(245, 54, 54, 0.9)", 967 | "rgba(237, 129, 40, 0.89)", 968 | "rgba(50, 172, 45, 0.97)" 969 | ], 970 | "datasource": null, 971 | "decimals": 1, 972 | "format": "decbytes", 973 | "gauge": { 974 | "maxValue": 100, 975 | "minValue": 0, 976 | "show": false, 977 | "thresholdLabels": false, 978 | "thresholdMarkers": true 979 | }, 980 | "gridPos": { 981 | "h": 4, 982 | "w": 3, 983 | "x": 17, 984 | "y": 11 985 | }, 986 | "hideTimeOverride": true, 987 | "id": 9, 988 | "interval": null, 989 | "links": [], 990 | "mappingType": 1, 991 | "mappingTypes": [ 992 | { 993 | "name": "value to text", 994 | "value": 1 995 | }, 996 | { 997 | "name": "range to text", 998 | "value": 2 999 | } 1000 | ], 1001 | "maxDataPoints": 100, 1002 | "nullPointMode": "connected", 1003 | "nullText": null, 1004 | "postfix": "", 1005 | "postfixFontSize": "50%", 1006 | "prefix": "", 1007 | "prefixFontSize": "50%", 1008 | "rangeMaps": [ 1009 | { 1010 | "from": "null", 1011 | "text": "N/A", 1012 | "to": "null" 1013 | } 1014 | ], 1015 | "sparkline": { 1016 | "fillColor": "rgba(31, 118, 189, 0.18)", 1017 | "full": false, 1018 | "lineColor": "rgb(31, 120, 193)", 1019 | "show": false 1020 | }, 1021 | "tableColumn": "", 1022 | "targets": [ 1023 | { 1024 | "expr": "sum(node_filesystem_size_bytes{mountpoint=\"/rootfs\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 1025 | "format": "time_series", 1026 | "intervalFactor": 2, 1027 | "legendFormat": "", 1028 | "refId": "A", 1029 | "step": 20 1030 | } 1031 | ], 1032 | "thresholds": "", 1033 | "timeFrom": null, 1034 | "timeShift": null, 1035 | "title": "Total Disk Space", 1036 | "type": "singlestat", 1037 | "valueFontSize": "80%", 1038 | "valueMaps": [ 1039 | { 1040 | "op": "=", 1041 | "text": "N/A", 1042 | "value": "null" 1043 | } 1044 | ], 1045 | "valueName": "avg" 1046 | }, 1047 | { 1048 | "cacheTimeout": null, 1049 | "colorBackground": false, 1050 | "colorValue": false, 1051 | "colors": [ 1052 | "rgba(245, 54, 54, 0.9)", 1053 | "rgba(237, 129, 40, 0.89)", 1054 | "rgba(50, 172, 45, 0.97)" 1055 | ], 1056 | "datasource": null, 1057 | "format": "percent", 1058 | "gauge": { 1059 | "maxValue": 100, 1060 | "minValue": 0, 1061 | "show": true, 1062 | "thresholdLabels": false, 1063 | "thresholdMarkers": true 1064 | }, 1065 | "gridPos": { 1066 | "h": 4, 1067 | "w": 4, 1068 | "x": 20, 1069 | "y": 11 1070 | }, 1071 | "id": 10, 1072 | "interval": null, 1073 | "links": [], 1074 | "mappingType": 1, 1075 | "mappingTypes": [ 1076 | { 1077 | "name": "value to text", 1078 | "value": 1 1079 | }, 1080 | { 1081 | "name": "range to text", 1082 | "value": 2 1083 | } 1084 | ], 1085 | "maxDataPoints": 100, 1086 | "nullPointMode": "connected", 1087 | "nullText": null, 1088 | "postfix": "", 1089 | "postfixFontSize": "50%", 1090 | "prefix": "", 1091 | "prefixFontSize": "50%", 1092 | "rangeMaps": [ 1093 | { 1094 | "from": "null", 1095 | "text": "N/A", 1096 | "to": "null" 1097 | } 1098 | ], 1099 | "sparkline": { 1100 | "fillColor": "rgba(31, 118, 189, 0.18)", 1101 | "full": false, 1102 | "lineColor": "rgb(31, 120, 193)", 1103 | "show": false 1104 | }, 1105 | "tableColumn": "", 1106 | "targets": [ 1107 | { 1108 | "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 1109 | "format": "time_series", 1110 | "intervalFactor": 2, 1111 | "legendFormat": "", 1112 | "refId": "A", 1113 | "step": 20 1114 | } 1115 | ], 1116 | "thresholds": "10,25,100", 1117 | "title": "Available Disk Space", 1118 | "type": "singlestat", 1119 | "valueFontSize": "80%", 1120 | "valueMaps": [ 1121 | { 1122 | "op": "=", 1123 | "text": "N/A", 1124 | "value": "null" 1125 | } 1126 | ], 1127 | "valueName": "avg" 1128 | }, 1129 | { 1130 | "aliasColors": {}, 1131 | "bars": false, 1132 | "dashLength": 10, 1133 | "dashes": false, 1134 | "datasource": null, 1135 | "fill": 1, 1136 | "gridPos": { 1137 | "h": 7, 1138 | "w": 24, 1139 | "x": 0, 1140 | "y": 15 1141 | }, 1142 | "id": 15, 1143 | "legend": { 1144 | "alignAsTable": true, 1145 | "avg": true, 1146 | "current": false, 1147 | "max": true, 1148 | "min": true, 1149 | "rightSide": true, 1150 | "show": true, 1151 | "total": false, 1152 | "values": true 1153 | }, 1154 | "lines": true, 1155 | "linewidth": 1, 1156 | "links": [], 1157 | "nullPointMode": "null", 1158 | "percentage": false, 1159 | "pointradius": 5, 1160 | "points": false, 1161 | "renderer": "flot", 1162 | "seriesOverrides": [], 1163 | "spaceLength": 10, 1164 | "stack": true, 1165 | "steppedLine": false, 1166 | "targets": [ 1167 | { 1168 | "expr": "sum((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_Slab_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1169 | "format": "time_series", 1170 | "intervalFactor": 2, 1171 | "legendFormat": "Used {{node_name}}", 1172 | "refId": "A", 1173 | "step": 2 1174 | }, 1175 | { 1176 | "expr": "sum(node_memory_Cached * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1177 | "format": "time_series", 1178 | "intervalFactor": 2, 1179 | "legendFormat": "Cached {{node_name}}", 1180 | "refId": "B", 1181 | "step": 2 1182 | } 1183 | ], 1184 | "thresholds": [], 1185 | "timeFrom": null, 1186 | "timeShift": null, 1187 | "title": "Memory usage by Node", 1188 | "tooltip": { 1189 | "shared": true, 1190 | "sort": 0, 1191 | "value_type": "individual" 1192 | }, 1193 | "type": "graph", 1194 | "xaxis": { 1195 | "buckets": null, 1196 | "mode": "time", 1197 | "name": null, 1198 | "show": true, 1199 | "values": [] 1200 | }, 1201 | "yaxes": [ 1202 | { 1203 | "format": "decbytes", 1204 | "label": null, 1205 | "logBase": 1, 1206 | "max": null, 1207 | "min": null, 1208 | "show": true 1209 | }, 1210 | { 1211 | "format": "short", 1212 | "label": null, 1213 | "logBase": 1, 1214 | "max": null, 1215 | "min": null, 1216 | "show": true 1217 | } 1218 | ], 1219 | "yaxis": { 1220 | "align": false, 1221 | "alignLevel": null 1222 | } 1223 | }, 1224 | { 1225 | "aliasColors": {}, 1226 | "bars": false, 1227 | "dashLength": 10, 1228 | "dashes": false, 1229 | "datasource": null, 1230 | "fill": 1, 1231 | "gridPos": { 1232 | "h": 7, 1233 | "w": 24, 1234 | "x": 0, 1235 | "y": 22 1236 | }, 1237 | "id": 21, 1238 | "legend": { 1239 | "alignAsTable": true, 1240 | "avg": true, 1241 | "current": false, 1242 | "max": true, 1243 | "min": true, 1244 | "rightSide": true, 1245 | "show": true, 1246 | "total": false, 1247 | "values": true 1248 | }, 1249 | "lines": true, 1250 | "linewidth": 1, 1251 | "links": [], 1252 | "nullPointMode": "null", 1253 | "percentage": false, 1254 | "pointradius": 5, 1255 | "points": false, 1256 | "renderer": "flot", 1257 | "seriesOverrides": [], 1258 | "spaceLength": 10, 1259 | "stack": true, 1260 | "steppedLine": false, 1261 | "targets": [ 1262 | { 1263 | "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1264 | "format": "time_series", 1265 | "intervalFactor": 2, 1266 | "legendFormat": "Used {{node_name}}", 1267 | "refId": "A", 1268 | "step": 2 1269 | } 1270 | ], 1271 | "thresholds": [], 1272 | "timeFrom": null, 1273 | "timeShift": null, 1274 | "title": "Swap memory usage by Node", 1275 | "tooltip": { 1276 | "shared": true, 1277 | "sort": 0, 1278 | "value_type": "individual" 1279 | }, 1280 | "type": "graph", 1281 | "xaxis": { 1282 | "buckets": null, 1283 | "mode": "time", 1284 | "name": null, 1285 | "show": true, 1286 | "values": [] 1287 | }, 1288 | "yaxes": [ 1289 | { 1290 | "format": "decbytes", 1291 | "label": null, 1292 | "logBase": 1, 1293 | "max": null, 1294 | "min": "0", 1295 | "show": true 1296 | }, 1297 | { 1298 | "format": "short", 1299 | "label": null, 1300 | "logBase": 1, 1301 | "max": null, 1302 | "min": null, 1303 | "show": true 1304 | } 1305 | ], 1306 | "yaxis": { 1307 | "align": false, 1308 | "alignLevel": null 1309 | } 1310 | }, 1311 | { 1312 | "aliasColors": {}, 1313 | "bars": false, 1314 | "dashLength": 10, 1315 | "dashes": false, 1316 | "datasource": null, 1317 | "decimals": 2, 1318 | "fill": 1, 1319 | "gridPos": { 1320 | "h": 7, 1321 | "w": 24, 1322 | "x": 0, 1323 | "y": 29 1324 | }, 1325 | "id": 16, 1326 | "legend": { 1327 | "alignAsTable": true, 1328 | "avg": true, 1329 | "current": false, 1330 | "max": true, 1331 | "min": true, 1332 | "rightSide": true, 1333 | "show": true, 1334 | "total": false, 1335 | "values": true 1336 | }, 1337 | "lines": true, 1338 | "linewidth": 1, 1339 | "links": [], 1340 | "nullPointMode": "null as zero", 1341 | "percentage": false, 1342 | "pointradius": 5, 1343 | "points": false, 1344 | "renderer": "flot", 1345 | "seriesOverrides": [], 1346 | "spaceLength": 10, 1347 | "stack": false, 1348 | "steppedLine": false, 1349 | "targets": [ 1350 | { 1351 | "expr": "sum(irate(node_disk_read_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1352 | "format": "time_series", 1353 | "intervalFactor": 2, 1354 | "legendFormat": "Read {{node_name}}", 1355 | "refId": "A", 1356 | "step": 2 1357 | }, 1358 | { 1359 | "expr": "sum(irate(node_disk_written_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1360 | "format": "time_series", 1361 | "intervalFactor": 2, 1362 | "legendFormat": "Written {{node_name}}", 1363 | "refId": "B", 1364 | "step": 2 1365 | } 1366 | ], 1367 | "thresholds": [], 1368 | "timeFrom": null, 1369 | "timeShift": null, 1370 | "title": "Disk I/O by Node", 1371 | "tooltip": { 1372 | "shared": true, 1373 | "sort": 0, 1374 | "value_type": "individual" 1375 | }, 1376 | "type": "graph", 1377 | "xaxis": { 1378 | "buckets": null, 1379 | "mode": "time", 1380 | "name": null, 1381 | "show": true, 1382 | "values": [] 1383 | }, 1384 | "yaxes": [ 1385 | { 1386 | "format": "Bps", 1387 | "label": null, 1388 | "logBase": 1, 1389 | "max": null, 1390 | "min": null, 1391 | "show": true 1392 | }, 1393 | { 1394 | "format": "short", 1395 | "label": null, 1396 | "logBase": 1, 1397 | "max": null, 1398 | "min": null, 1399 | "show": true 1400 | } 1401 | ], 1402 | "yaxis": { 1403 | "align": false, 1404 | "alignLevel": null 1405 | } 1406 | }, 1407 | { 1408 | "aliasColors": {}, 1409 | "bars": false, 1410 | "dashLength": 10, 1411 | "dashes": false, 1412 | "datasource": null, 1413 | "decimals": 2, 1414 | "fill": 1, 1415 | "gridPos": { 1416 | "h": 7, 1417 | "w": 12, 1418 | "x": 0, 1419 | "y": 36 1420 | }, 1421 | "id": 18, 1422 | "legend": { 1423 | "alignAsTable": true, 1424 | "avg": true, 1425 | "current": true, 1426 | "max": true, 1427 | "min": true, 1428 | "rightSide": true, 1429 | "show": false, 1430 | "total": false, 1431 | "values": true 1432 | }, 1433 | "lines": true, 1434 | "linewidth": 1, 1435 | "links": [], 1436 | "nullPointMode": "null as zero", 1437 | "percentage": false, 1438 | "pointradius": 5, 1439 | "points": false, 1440 | "renderer": "flot", 1441 | "seriesOverrides": [], 1442 | "spaceLength": 10, 1443 | "stack": false, 1444 | "steppedLine": false, 1445 | "targets": [ 1446 | { 1447 | "expr": "sum(irate(node_disk_reads_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1448 | "format": "time_series", 1449 | "intervalFactor": 2, 1450 | "legendFormat": "Reads {{node_name}}", 1451 | "refId": "A", 1452 | "step": 2 1453 | }, 1454 | { 1455 | "expr": "sum(irate(node_disk_writes_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", 1456 | "format": "time_series", 1457 | "intervalFactor": 2, 1458 | "legendFormat": "Writes {{node_name}}", 1459 | "refId": "B", 1460 | "step": 2 1461 | } 1462 | ], 1463 | "thresholds": [], 1464 | "timeFrom": null, 1465 | "timeShift": null, 1466 | "title": "IOPS by Node", 1467 | "tooltip": { 1468 | "shared": true, 1469 | "sort": 0, 1470 | "value_type": "individual" 1471 | }, 1472 | "type": "graph", 1473 | "xaxis": { 1474 | "buckets": null, 1475 | "mode": "time", 1476 | "name": null, 1477 | "show": true, 1478 | "values": [] 1479 | }, 1480 | "yaxes": [ 1481 | { 1482 | "format": "short", 1483 | "label": null, 1484 | "logBase": 1, 1485 | "max": null, 1486 | "min": null, 1487 | "show": true 1488 | }, 1489 | { 1490 | "format": "short", 1491 | "label": null, 1492 | "logBase": 1, 1493 | "max": null, 1494 | "min": null, 1495 | "show": true 1496 | } 1497 | ], 1498 | "yaxis": { 1499 | "align": false, 1500 | "alignLevel": null 1501 | } 1502 | }, 1503 | { 1504 | "aliasColors": {}, 1505 | "bars": false, 1506 | "dashLength": 10, 1507 | "dashes": false, 1508 | "datasource": null, 1509 | "decimals": 2, 1510 | "fill": 1, 1511 | "gridPos": { 1512 | "h": 7, 1513 | "w": 12, 1514 | "x": 12, 1515 | "y": 36 1516 | }, 1517 | "id": 19, 1518 | "legend": { 1519 | "alignAsTable": true, 1520 | "avg": true, 1521 | "current": true, 1522 | "hideEmpty": true, 1523 | "hideZero": true, 1524 | "max": true, 1525 | "min": true, 1526 | "rightSide": true, 1527 | "show": false, 1528 | "total": false, 1529 | "values": true 1530 | }, 1531 | "lines": true, 1532 | "linewidth": 1, 1533 | "links": [], 1534 | "nullPointMode": "null as zero", 1535 | "percentage": false, 1536 | "pointradius": 5, 1537 | "points": false, 1538 | "renderer": "flot", 1539 | "seriesOverrides": [], 1540 | "spaceLength": 10, 1541 | "stack": false, 1542 | "steppedLine": false, 1543 | "targets": [ 1544 | { 1545 | "expr": "(avg(irate(node_cpu_seconds_total{mode=\"iowait\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", 1546 | "format": "time_series", 1547 | "intervalFactor": 2, 1548 | "legendFormat": "{{node_name}}", 1549 | "refId": "A", 1550 | "step": 2 1551 | } 1552 | ], 1553 | "thresholds": [], 1554 | "timeFrom": null, 1555 | "timeShift": null, 1556 | "title": "CPU IO Wait by Node", 1557 | "tooltip": { 1558 | "shared": true, 1559 | "sort": 2, 1560 | "value_type": "individual" 1561 | }, 1562 | "type": "graph", 1563 | "xaxis": { 1564 | "buckets": null, 1565 | "mode": "time", 1566 | "name": null, 1567 | "show": true, 1568 | "values": [] 1569 | }, 1570 | "yaxes": [ 1571 | { 1572 | "format": "percent", 1573 | "label": null, 1574 | "logBase": 1, 1575 | "max": null, 1576 | "min": null, 1577 | "show": true 1578 | }, 1579 | { 1580 | "format": "short", 1581 | "label": null, 1582 | "logBase": 1, 1583 | "max": null, 1584 | "min": null, 1585 | "show": true 1586 | } 1587 | ], 1588 | "yaxis": { 1589 | "align": false, 1590 | "alignLevel": null 1591 | } 1592 | }, 1593 | { 1594 | "aliasColors": {}, 1595 | "bars": false, 1596 | "dashLength": 10, 1597 | "dashes": false, 1598 | "datasource": null, 1599 | "decimals": 0, 1600 | "fill": 3, 1601 | "gridPos": { 1602 | "h": 7, 1603 | "w": 18, 1604 | "x": 0, 1605 | "y": 43 1606 | }, 1607 | "id": 12, 1608 | "legend": { 1609 | "alignAsTable": true, 1610 | "avg": false, 1611 | "current": true, 1612 | "hideEmpty": true, 1613 | "hideZero": true, 1614 | "max": false, 1615 | "min": false, 1616 | "rightSide": true, 1617 | "show": true, 1618 | "sort": "current", 1619 | "sortDesc": true, 1620 | "total": false, 1621 | "values": true 1622 | }, 1623 | "lines": true, 1624 | "linewidth": 1, 1625 | "links": [], 1626 | "nullPointMode": "null", 1627 | "percentage": false, 1628 | "pointradius": 5, 1629 | "points": false, 1630 | "renderer": "flot", 1631 | "seriesOverrides": [], 1632 | "spaceLength": 10, 1633 | "stack": true, 1634 | "steppedLine": false, 1635 | "targets": [ 1636 | { 1637 | "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", 1638 | "format": "time_series", 1639 | "intervalFactor": 10, 1640 | "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", 1641 | "refId": "A", 1642 | "step": 10 1643 | } 1644 | ], 1645 | "thresholds": [], 1646 | "timeFrom": null, 1647 | "timeShift": null, 1648 | "title": "Running Containers by Service", 1649 | "tooltip": { 1650 | "shared": true, 1651 | "sort": 2, 1652 | "value_type": "individual" 1653 | }, 1654 | "type": "graph", 1655 | "xaxis": { 1656 | "buckets": null, 1657 | "mode": "time", 1658 | "name": null, 1659 | "show": true, 1660 | "values": [] 1661 | }, 1662 | "yaxes": [ 1663 | { 1664 | "format": "short", 1665 | "label": null, 1666 | "logBase": 1, 1667 | "max": null, 1668 | "min": null, 1669 | "show": true 1670 | }, 1671 | { 1672 | "format": "short", 1673 | "label": null, 1674 | "logBase": 1, 1675 | "max": null, 1676 | "min": null, 1677 | "show": true 1678 | } 1679 | ], 1680 | "yaxis": { 1681 | "align": false, 1682 | "alignLevel": null 1683 | } 1684 | }, 1685 | { 1686 | "cacheTimeout": null, 1687 | "colorBackground": false, 1688 | "colorValue": false, 1689 | "colors": [ 1690 | "rgba(245, 54, 54, 0.9)", 1691 | "rgba(237, 129, 40, 0.89)", 1692 | "rgba(50, 172, 45, 0.97)" 1693 | ], 1694 | "datasource": null, 1695 | "format": "none", 1696 | "gauge": { 1697 | "maxValue": 100, 1698 | "minValue": 0, 1699 | "show": false, 1700 | "thresholdLabels": false, 1701 | "thresholdMarkers": true 1702 | }, 1703 | "gridPos": { 1704 | "h": 7, 1705 | "w": 6, 1706 | "x": 18, 1707 | "y": 43 1708 | }, 1709 | "id": 7, 1710 | "interval": null, 1711 | "links": [], 1712 | "mappingType": 1, 1713 | "mappingTypes": [ 1714 | { 1715 | "name": "value to text", 1716 | "value": 1 1717 | }, 1718 | { 1719 | "name": "range to text", 1720 | "value": 2 1721 | } 1722 | ], 1723 | "maxDataPoints": 100, 1724 | "nullPointMode": "connected", 1725 | "nullText": null, 1726 | "postfix": "", 1727 | "postfixFontSize": "50%", 1728 | "prefix": "", 1729 | "prefixFontSize": "50%", 1730 | "rangeMaps": [ 1731 | { 1732 | "from": "null", 1733 | "text": "N/A", 1734 | "to": "null" 1735 | } 1736 | ], 1737 | "sparkline": { 1738 | "fillColor": "rgba(31, 118, 189, 0.18)", 1739 | "full": false, 1740 | "lineColor": "rgb(31, 120, 193)", 1741 | "show": true 1742 | }, 1743 | "tableColumn": "", 1744 | "targets": [ 1745 | { 1746 | "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", 1747 | "format": "time_series", 1748 | "intervalFactor": 2, 1749 | "refId": "A", 1750 | "step": 20 1751 | } 1752 | ], 1753 | "thresholds": "", 1754 | "title": "Total Containers", 1755 | "type": "singlestat", 1756 | "valueFontSize": "80%", 1757 | "valueMaps": [ 1758 | { 1759 | "op": "=", 1760 | "text": "N/A", 1761 | "value": "null" 1762 | } 1763 | ], 1764 | "valueName": "avg" 1765 | }, 1766 | { 1767 | "aliasColors": {}, 1768 | "bars": false, 1769 | "dashLength": 10, 1770 | "dashes": false, 1771 | "datasource": null, 1772 | "fill": 1, 1773 | "gridPos": { 1774 | "h": 7, 1775 | "w": 24, 1776 | "x": 0, 1777 | "y": 50 1778 | }, 1779 | "id": 17, 1780 | "legend": { 1781 | "alignAsTable": true, 1782 | "avg": true, 1783 | "current": false, 1784 | "max": true, 1785 | "min": true, 1786 | "rightSide": true, 1787 | "show": true, 1788 | "total": false, 1789 | "values": true 1790 | }, 1791 | "lines": true, 1792 | "linewidth": 1, 1793 | "links": [], 1794 | "nullPointMode": "null", 1795 | "percentage": false, 1796 | "pointradius": 5, 1797 | "points": false, 1798 | "renderer": "flot", 1799 | "seriesOverrides": [], 1800 | "spaceLength": 10, 1801 | "stack": false, 1802 | "steppedLine": false, 1803 | "targets": [ 1804 | { 1805 | "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", 1806 | "format": "time_series", 1807 | "intervalFactor": 2, 1808 | "legendFormat": "IN {{node_name}}", 1809 | "refId": "A", 1810 | "step": 2 1811 | }, 1812 | { 1813 | "expr": "- sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", 1814 | "format": "time_series", 1815 | "hide": false, 1816 | "intervalFactor": 2, 1817 | "legendFormat": "OUT {{node_name}}", 1818 | "metric": "", 1819 | "refId": "B", 1820 | "step": 2 1821 | } 1822 | ], 1823 | "thresholds": [], 1824 | "timeFrom": null, 1825 | "timeShift": null, 1826 | "title": "Containers Network Traffic by Node", 1827 | "tooltip": { 1828 | "shared": true, 1829 | "sort": 0, 1830 | "value_type": "individual" 1831 | }, 1832 | "type": "graph", 1833 | "xaxis": { 1834 | "buckets": null, 1835 | "mode": "time", 1836 | "name": null, 1837 | "show": true, 1838 | "values": [] 1839 | }, 1840 | "yaxes": [ 1841 | { 1842 | "format": "Bps", 1843 | "label": null, 1844 | "logBase": 1, 1845 | "max": null, 1846 | "min": null, 1847 | "show": true 1848 | }, 1849 | { 1850 | "format": "short", 1851 | "label": null, 1852 | "logBase": 1, 1853 | "max": null, 1854 | "min": null, 1855 | "show": true 1856 | } 1857 | ], 1858 | "yaxis": { 1859 | "align": false, 1860 | "alignLevel": null 1861 | } 1862 | }, 1863 | { 1864 | "columns": [], 1865 | "datasource": null, 1866 | "fontSize": "100%", 1867 | "gridPos": { 1868 | "h": 7, 1869 | "w": 24, 1870 | "x": 0, 1871 | "y": 57 1872 | }, 1873 | "hideTimeOverride": true, 1874 | "id": 20, 1875 | "links": [], 1876 | "pageSize": null, 1877 | "scroll": true, 1878 | "showHeader": true, 1879 | "sort": { 1880 | "col": 0, 1881 | "desc": true 1882 | }, 1883 | "styles": [ 1884 | { 1885 | "alias": "Time", 1886 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1887 | "pattern": "Time", 1888 | "type": "hidden" 1889 | }, 1890 | { 1891 | "alias": "", 1892 | "colorMode": null, 1893 | "colors": [ 1894 | "rgba(245, 54, 54, 0.9)", 1895 | "rgba(237, 129, 40, 0.89)", 1896 | "rgba(50, 172, 45, 0.97)" 1897 | ], 1898 | "decimals": 2, 1899 | "pattern": "/.*/", 1900 | "thresholds": [], 1901 | "type": "number", 1902 | "unit": "short" 1903 | } 1904 | ], 1905 | "targets": [ 1906 | { 1907 | "expr": "sum(node_meta) by (node_id, node_name, instance)", 1908 | "format": "table", 1909 | "instant": true, 1910 | "intervalFactor": 2, 1911 | "refId": "A", 1912 | "step": 2 1913 | } 1914 | ], 1915 | "timeFrom": "1s", 1916 | "title": "Cluster members", 1917 | "transform": "table", 1918 | "type": "table" 1919 | } 1920 | ], 1921 | "refresh": "30s", 1922 | "schemaVersion": 16, 1923 | "style": "dark", 1924 | "tags": [ 1925 | "swarmprom" 1926 | ], 1927 | "templating": { 1928 | "list": [ 1929 | { 1930 | "allValue": ".+", 1931 | "current": { 1932 | "text": "All", 1933 | "value": "$__all" 1934 | }, 1935 | "datasource": "Prometheus", 1936 | "hide": 0, 1937 | "includeAll": true, 1938 | "label": "Swarm Node", 1939 | "multi": false, 1940 | "name": "node_id", 1941 | "options": [], 1942 | "query": "node_meta", 1943 | "refresh": 1, 1944 | "regex": "/node_id=\"([^\"]+)\"/", 1945 | "skipUrlSync": false, 1946 | "sort": 0, 1947 | "tagValuesQuery": "label_values({node_id=\"$tag\"},node_name)", 1948 | "tags": [ 1949 | "ofdocker", 1950 | "ofmon" 1951 | ], 1952 | "tagsQuery": "label_values(node_meta, node_name)", 1953 | "type": "query", 1954 | "useTags": true 1955 | }, 1956 | { 1957 | "auto": true, 1958 | "auto_count": 30, 1959 | "auto_min": "30s", 1960 | "current": { 1961 | "text": "auto", 1962 | "value": "$__auto_interval_interval" 1963 | }, 1964 | "hide": 0, 1965 | "label": "Interval", 1966 | "name": "interval", 1967 | "options": [ 1968 | { 1969 | "selected": true, 1970 | "text": "auto", 1971 | "value": "$__auto_interval_interval" 1972 | }, 1973 | { 1974 | "selected": false, 1975 | "text": "1m", 1976 | "value": "1m" 1977 | }, 1978 | { 1979 | "selected": false, 1980 | "text": "10m", 1981 | "value": "10m" 1982 | }, 1983 | { 1984 | "selected": false, 1985 | "text": "30m", 1986 | "value": "30m" 1987 | }, 1988 | { 1989 | "selected": false, 1990 | "text": "1h", 1991 | "value": "1h" 1992 | }, 1993 | { 1994 | "selected": false, 1995 | "text": "6h", 1996 | "value": "6h" 1997 | }, 1998 | { 1999 | "selected": false, 2000 | "text": "12h", 2001 | "value": "12h" 2002 | }, 2003 | { 2004 | "selected": false, 2005 | "text": "1d", 2006 | "value": "1d" 2007 | }, 2008 | { 2009 | "selected": false, 2010 | "text": "7d", 2011 | "value": "7d" 2012 | }, 2013 | { 2014 | "selected": false, 2015 | "text": "14d", 2016 | "value": "14d" 2017 | }, 2018 | { 2019 | "selected": false, 2020 | "text": "30d", 2021 | "value": "30d" 2022 | } 2023 | ], 2024 | "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 2025 | "refresh": 2, 2026 | "skipUrlSync": false, 2027 | "type": "interval" 2028 | } 2029 | ] 2030 | }, 2031 | "time": { 2032 | "from": "now-15m", 2033 | "to": "now" 2034 | }, 2035 | "timepicker": { 2036 | "refresh_intervals": [ 2037 | "5s", 2038 | "10s", 2039 | "30s", 2040 | "1m", 2041 | "5m", 2042 | "15m", 2043 | "30m", 2044 | "1h", 2045 | "2h", 2046 | "1d" 2047 | ], 2048 | "time_options": [ 2049 | "5m", 2050 | "15m", 2051 | "1h", 2052 | "6h", 2053 | "12h", 2054 | "24h", 2055 | "2d", 2056 | "7d", 2057 | "30d" 2058 | ] 2059 | }, 2060 | "timezone": "", 2061 | "title": "Docker Swarm Nodes", 2062 | "uid": "BPlb-Sgik", 2063 | "version": 3 2064 | } 2065 | -------------------------------------------------------------------------------- /swarmprom/grafana/dashboards/swarmprom-prometheus-dash.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "$$hashKey": "object:698", 6 | "builtIn": 1, 7 | "datasource": "-- Grafana --", 8 | "enable": true, 9 | "hide": true, 10 | "iconColor": "rgba(0, 211, 255, 1)", 11 | "name": "Annotations & Alerts", 12 | "type": "dashboard" 13 | } 14 | ] 15 | }, 16 | "editable": true, 17 | "gnetId": null, 18 | "graphTooltip": 1, 19 | "links": [ 20 | { 21 | "icon": "info", 22 | "tags": [], 23 | "targetBlank": true, 24 | "title": "Grafana Docs", 25 | "tooltip": "", 26 | "type": "link", 27 | "url": "http://docs.grafana.org/" 28 | }, 29 | { 30 | "icon": "info", 31 | "tags": [], 32 | "targetBlank": true, 33 | "title": "Prometheus Docs", 34 | "type": "link", 35 | "url": "http://prometheus.io/docs/introduction/overview/" 36 | } 37 | ], 38 | "panels": [ 39 | { 40 | "aliasColors": { 41 | "prometheus": "#C15C17", 42 | "{instance=\"localhost:9090\",job=\"prometheus\"}": "#CCA300" 43 | }, 44 | "bars": false, 45 | "dashLength": 10, 46 | "dashes": false, 47 | "datasource": "Prometheus", 48 | "editable": true, 49 | "error": false, 50 | "fill": 0, 51 | "grid": {}, 52 | "gridPos": { 53 | "h": 5, 54 | "w": 6, 55 | "x": 0, 56 | "y": 0 57 | }, 58 | "id": 3, 59 | "legend": { 60 | "avg": false, 61 | "current": false, 62 | "max": false, 63 | "min": false, 64 | "show": true, 65 | "total": false, 66 | "values": false 67 | }, 68 | "lines": true, 69 | "linewidth": 1, 70 | "links": [], 71 | "nullPointMode": "connected", 72 | "percentage": false, 73 | "pointradius": 2, 74 | "points": false, 75 | "renderer": "flot", 76 | "seriesOverrides": [], 77 | "spaceLength": 10, 78 | "stack": false, 79 | "steppedLine": false, 80 | "targets": [ 81 | { 82 | "expr": "sum(irate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus\"}[5m]))", 83 | "format": "time_series", 84 | "hide": false, 85 | "interval": "", 86 | "intervalFactor": 2, 87 | "legendFormat": "samples", 88 | "metric": "", 89 | "refId": "A", 90 | "step": 20 91 | } 92 | ], 93 | "thresholds": [], 94 | "timeFrom": null, 95 | "timeShift": null, 96 | "title": "Samples Appended", 97 | "tooltip": { 98 | "shared": true, 99 | "sort": 0, 100 | "value_type": "cumulative" 101 | }, 102 | "type": "graph", 103 | "xaxis": { 104 | "buckets": null, 105 | "mode": "time", 106 | "name": null, 107 | "show": true, 108 | "values": [] 109 | }, 110 | "yaxes": [ 111 | { 112 | "format": "short", 113 | "logBase": 1, 114 | "max": null, 115 | "min": "0", 116 | "show": true 117 | }, 118 | { 119 | "format": "short", 120 | "logBase": 1, 121 | "max": null, 122 | "min": null, 123 | "show": true 124 | } 125 | ] 126 | }, 127 | { 128 | "aliasColors": {}, 129 | "bars": false, 130 | "dashLength": 10, 131 | "dashes": false, 132 | "datasource": "Prometheus", 133 | "editable": true, 134 | "error": false, 135 | "fill": 0, 136 | "grid": {}, 137 | "gridPos": { 138 | "h": 5, 139 | "w": 6, 140 | "x": 6, 141 | "y": 0 142 | }, 143 | "id": 14, 144 | "legend": { 145 | "avg": false, 146 | "current": false, 147 | "max": false, 148 | "min": false, 149 | "show": true, 150 | "total": false, 151 | "values": false 152 | }, 153 | "lines": true, 154 | "linewidth": 1, 155 | "links": [], 156 | "nullPointMode": "connected", 157 | "percentage": false, 158 | "pointradius": 5, 159 | "points": false, 160 | "renderer": "flot", 161 | "seriesOverrides": [], 162 | "spaceLength": 10, 163 | "stack": false, 164 | "steppedLine": false, 165 | "targets": [ 166 | { 167 | "expr": "topk(5, max(scrape_duration_seconds) by (job))", 168 | "format": "time_series", 169 | "interval": "", 170 | "intervalFactor": 2, 171 | "legendFormat": "{{job}}", 172 | "metric": "", 173 | "refId": "A", 174 | "step": 20 175 | } 176 | ], 177 | "thresholds": [], 178 | "timeFrom": null, 179 | "timeShift": null, 180 | "title": "Scrape Duration", 181 | "tooltip": { 182 | "shared": true, 183 | "sort": 0, 184 | "value_type": "cumulative" 185 | }, 186 | "type": "graph", 187 | "xaxis": { 188 | "buckets": null, 189 | "mode": "time", 190 | "name": null, 191 | "show": true, 192 | "values": [] 193 | }, 194 | "yaxes": [ 195 | { 196 | "format": "s", 197 | "logBase": 1, 198 | "max": null, 199 | "min": null, 200 | "show": true 201 | }, 202 | { 203 | "format": "short", 204 | "logBase": 1, 205 | "max": null, 206 | "min": null, 207 | "show": true 208 | } 209 | ] 210 | }, 211 | { 212 | "aliasColors": {}, 213 | "bars": false, 214 | "dashLength": 10, 215 | "dashes": false, 216 | "datasource": "Prometheus", 217 | "description": "", 218 | "fill": 0, 219 | "gridPos": { 220 | "h": 5, 221 | "w": 6, 222 | "x": 12, 223 | "y": 0 224 | }, 225 | "id": 16, 226 | "legend": { 227 | "avg": false, 228 | "current": false, 229 | "max": false, 230 | "min": false, 231 | "show": true, 232 | "total": false, 233 | "values": false 234 | }, 235 | "lines": true, 236 | "linewidth": 1, 237 | "links": [], 238 | "nullPointMode": "null", 239 | "percentage": false, 240 | "pointradius": 5, 241 | "points": false, 242 | "renderer": "flot", 243 | "seriesOverrides": [], 244 | "spaceLength": 10, 245 | "stack": false, 246 | "steppedLine": false, 247 | "targets": [ 248 | { 249 | "expr": "sum(process_resident_memory_bytes{job=\"prometheus\"})", 250 | "format": "time_series", 251 | "hide": false, 252 | "interval": "", 253 | "intervalFactor": 2, 254 | "legendFormat": "p8s process resident memory", 255 | "refId": "D", 256 | "step": 20 257 | }, 258 | { 259 | "expr": "process_virtual_memory_bytes{job=\"prometheus\"}", 260 | "format": "time_series", 261 | "hide": false, 262 | "intervalFactor": 2, 263 | "legendFormat": "virtual memory", 264 | "refId": "C", 265 | "step": 20 266 | } 267 | ], 268 | "thresholds": [], 269 | "timeFrom": null, 270 | "timeShift": null, 271 | "title": "Memory Profile", 272 | "tooltip": { 273 | "shared": true, 274 | "sort": 2, 275 | "value_type": "individual" 276 | }, 277 | "transparent": false, 278 | "type": "graph", 279 | "xaxis": { 280 | "buckets": null, 281 | "mode": "time", 282 | "name": null, 283 | "show": true, 284 | "values": [] 285 | }, 286 | "yaxes": [ 287 | { 288 | "format": "bytes", 289 | "label": "", 290 | "logBase": 1, 291 | "max": null, 292 | "min": "0", 293 | "show": true 294 | }, 295 | { 296 | "format": "short", 297 | "label": null, 298 | "logBase": 1, 299 | "max": null, 300 | "min": null, 301 | "show": true 302 | } 303 | ] 304 | }, 305 | { 306 | "cacheTimeout": null, 307 | "colorBackground": false, 308 | "colorValue": true, 309 | "colors": [ 310 | "rgba(50, 172, 45, 0.97)", 311 | "rgba(237, 129, 40, 0.89)", 312 | "rgba(245, 54, 54, 0.9)" 313 | ], 314 | "datasource": "Prometheus", 315 | "format": "none", 316 | "gauge": { 317 | "maxValue": 100, 318 | "minValue": 0, 319 | "show": false, 320 | "thresholdLabels": false, 321 | "thresholdMarkers": true 322 | }, 323 | "gridPos": { 324 | "h": 5, 325 | "w": 6, 326 | "x": 18, 327 | "y": 0 328 | }, 329 | "id": 37, 330 | "interval": null, 331 | "links": [], 332 | "mappingType": 1, 333 | "mappingTypes": [ 334 | { 335 | "name": "value to text", 336 | "value": 1 337 | }, 338 | { 339 | "name": "range to text", 340 | "value": 2 341 | } 342 | ], 343 | "maxDataPoints": 100, 344 | "nullPointMode": "connected", 345 | "nullText": null, 346 | "postfix": "", 347 | "postfixFontSize": "50%", 348 | "prefix": "", 349 | "prefixFontSize": "50%", 350 | "rangeMaps": [ 351 | { 352 | "from": "null", 353 | "text": "N/A", 354 | "to": "null" 355 | } 356 | ], 357 | "sparkline": { 358 | "fillColor": "rgba(31, 118, 189, 0.18)", 359 | "full": false, 360 | "lineColor": "rgb(31, 120, 193)", 361 | "show": false 362 | }, 363 | "tableColumn": "", 364 | "targets": [ 365 | { 366 | "expr": "prometheus_tsdb_wal_corruptions_total{job=\"prometheus\"}", 367 | "format": "time_series", 368 | "intervalFactor": 2, 369 | "legendFormat": "", 370 | "refId": "A", 371 | "step": 60 372 | } 373 | ], 374 | "thresholds": "0.1,1", 375 | "title": "WAL Corruptions", 376 | "type": "singlestat", 377 | "valueFontSize": "200%", 378 | "valueMaps": [ 379 | { 380 | "op": "=", 381 | "text": "None", 382 | "value": "0" 383 | } 384 | ], 385 | "valueName": "max" 386 | }, 387 | { 388 | "aliasColors": {}, 389 | "bars": false, 390 | "dashLength": 10, 391 | "dashes": false, 392 | "datasource": "Prometheus", 393 | "fill": 0, 394 | "gridPos": { 395 | "h": 5, 396 | "w": 6, 397 | "x": 0, 398 | "y": 5 399 | }, 400 | "id": 29, 401 | "legend": { 402 | "avg": false, 403 | "current": false, 404 | "max": false, 405 | "min": false, 406 | "show": true, 407 | "total": false, 408 | "values": false 409 | }, 410 | "lines": true, 411 | "linewidth": 1, 412 | "links": [], 413 | "nullPointMode": "null", 414 | "percentage": false, 415 | "pointradius": 5, 416 | "points": false, 417 | "renderer": "flot", 418 | "seriesOverrides": [], 419 | "spaceLength": 10, 420 | "stack": false, 421 | "steppedLine": false, 422 | "targets": [ 423 | { 424 | "expr": "sum(prometheus_tsdb_head_active_appenders{job=\"prometheus\"})", 425 | "format": "time_series", 426 | "interval": "", 427 | "intervalFactor": 2, 428 | "legendFormat": "active_appenders", 429 | "metric": "", 430 | "refId": "A", 431 | "step": 20 432 | }, 433 | { 434 | "expr": "sum(process_open_fds{job=\"prometheus\"})", 435 | "format": "time_series", 436 | "interval": "", 437 | "intervalFactor": 2, 438 | "legendFormat": "open_fds", 439 | "refId": "B", 440 | "step": 20 441 | } 442 | ], 443 | "thresholds": [], 444 | "timeFrom": null, 445 | "timeShift": null, 446 | "title": "Active Appenders", 447 | "tooltip": { 448 | "shared": true, 449 | "sort": 0, 450 | "value_type": "individual" 451 | }, 452 | "type": "graph", 453 | "xaxis": { 454 | "buckets": null, 455 | "mode": "time", 456 | "name": null, 457 | "show": true, 458 | "values": [] 459 | }, 460 | "yaxes": [ 461 | { 462 | "format": "short", 463 | "label": null, 464 | "logBase": 1, 465 | "max": null, 466 | "min": null, 467 | "show": true 468 | }, 469 | { 470 | "format": "short", 471 | "label": null, 472 | "logBase": 1, 473 | "max": null, 474 | "min": null, 475 | "show": false 476 | } 477 | ] 478 | }, 479 | { 480 | "aliasColors": { 481 | "prometheus": "#F9BA8F", 482 | "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" 483 | }, 484 | "bars": false, 485 | "dashLength": 10, 486 | "dashes": false, 487 | "datasource": "Prometheus", 488 | "editable": true, 489 | "error": false, 490 | "fill": 0, 491 | "grid": {}, 492 | "gridPos": { 493 | "h": 5, 494 | "w": 6, 495 | "x": 6, 496 | "y": 5 497 | }, 498 | "id": 2, 499 | "legend": { 500 | "avg": false, 501 | "current": false, 502 | "max": false, 503 | "min": false, 504 | "show": true, 505 | "total": false, 506 | "values": false 507 | }, 508 | "lines": true, 509 | "linewidth": 1, 510 | "links": [], 511 | "nullPointMode": "connected", 512 | "percentage": false, 513 | "pointradius": 5, 514 | "points": false, 515 | "renderer": "flot", 516 | "seriesOverrides": [], 517 | "spaceLength": 10, 518 | "stack": false, 519 | "steppedLine": false, 520 | "targets": [ 521 | { 522 | "expr": "prometheus_tsdb_blocks_loaded{job=\"prometheus\"}", 523 | "format": "time_series", 524 | "intervalFactor": 2, 525 | "legendFormat": "blocks", 526 | "refId": "A", 527 | "step": 20 528 | } 529 | ], 530 | "thresholds": [], 531 | "timeFrom": null, 532 | "timeShift": null, 533 | "title": "Blocks Loaded", 534 | "tooltip": { 535 | "shared": true, 536 | "sort": 0, 537 | "value_type": "cumulative" 538 | }, 539 | "type": "graph", 540 | "xaxis": { 541 | "buckets": null, 542 | "mode": "time", 543 | "name": null, 544 | "show": true, 545 | "values": [] 546 | }, 547 | "yaxes": [ 548 | { 549 | "format": "short", 550 | "logBase": 1, 551 | "max": null, 552 | "min": null, 553 | "show": true 554 | }, 555 | { 556 | "format": "short", 557 | "logBase": 1, 558 | "max": null, 559 | "min": null, 560 | "show": true 561 | } 562 | ] 563 | }, 564 | { 565 | "aliasColors": {}, 566 | "bars": false, 567 | "dashLength": 10, 568 | "dashes": false, 569 | "datasource": "Prometheus", 570 | "decimals": null, 571 | "description": "", 572 | "fill": 0, 573 | "gridPos": { 574 | "h": 5, 575 | "w": 6, 576 | "x": 12, 577 | "y": 5 578 | }, 579 | "id": 33, 580 | "legend": { 581 | "avg": false, 582 | "current": false, 583 | "max": false, 584 | "min": false, 585 | "show": true, 586 | "total": false, 587 | "values": false 588 | }, 589 | "lines": true, 590 | "linewidth": 1, 591 | "links": [], 592 | "nullPointMode": "connected", 593 | "percentage": false, 594 | "pointradius": 5, 595 | "points": false, 596 | "renderer": "flot", 597 | "seriesOverrides": [], 598 | "spaceLength": 10, 599 | "stack": false, 600 | "steppedLine": false, 601 | "targets": [ 602 | { 603 | "expr": "prometheus_tsdb_head_chunks{job=\"prometheus\"}", 604 | "format": "time_series", 605 | "interval": "", 606 | "intervalFactor": 2, 607 | "legendFormat": "chunks", 608 | "refId": "A", 609 | "step": 20 610 | } 611 | ], 612 | "thresholds": [], 613 | "timeFrom": null, 614 | "timeShift": null, 615 | "title": "Head Chunks", 616 | "tooltip": { 617 | "shared": true, 618 | "sort": 0, 619 | "value_type": "individual" 620 | }, 621 | "type": "graph", 622 | "xaxis": { 623 | "buckets": null, 624 | "mode": "time", 625 | "name": null, 626 | "show": true, 627 | "values": [] 628 | }, 629 | "yaxes": [ 630 | { 631 | "format": "short", 632 | "label": null, 633 | "logBase": 1, 634 | "max": null, 635 | "min": null, 636 | "show": true 637 | }, 638 | { 639 | "format": "bytes", 640 | "label": "", 641 | "logBase": 1, 642 | "max": null, 643 | "min": null, 644 | "show": false 645 | } 646 | ] 647 | }, 648 | { 649 | "aliasColors": {}, 650 | "bars": false, 651 | "dashLength": 10, 652 | "dashes": false, 653 | "datasource": "Prometheus", 654 | "fill": 1, 655 | "gridPos": { 656 | "h": 5, 657 | "w": 6, 658 | "x": 18, 659 | "y": 5 660 | }, 661 | "id": 36, 662 | "legend": { 663 | "avg": false, 664 | "current": false, 665 | "max": false, 666 | "min": false, 667 | "show": true, 668 | "total": false, 669 | "values": false 670 | }, 671 | "lines": true, 672 | "linewidth": 1, 673 | "links": [], 674 | "nullPointMode": "null", 675 | "percentage": false, 676 | "pointradius": 5, 677 | "points": false, 678 | "renderer": "flot", 679 | "seriesOverrides": [ 680 | { 681 | "alias": "duration-p99", 682 | "yaxis": 2 683 | } 684 | ], 685 | "spaceLength": 10, 686 | "stack": false, 687 | "steppedLine": false, 688 | "targets": [ 689 | { 690 | "expr": "prometheus_tsdb_head_gc_duration_seconds{job=\"prometheus\",quantile=\"0.99\"}", 691 | "format": "time_series", 692 | "intervalFactor": 2, 693 | "legendFormat": "duration-p99", 694 | "refId": "A", 695 | "step": 20 696 | }, 697 | { 698 | "expr": "irate(prometheus_tsdb_head_gc_duration_seconds_count{job=\"prometheus\"}[5m])", 699 | "format": "time_series", 700 | "intervalFactor": 2, 701 | "legendFormat": "collections", 702 | "refId": "B", 703 | "step": 20 704 | } 705 | ], 706 | "thresholds": [], 707 | "timeFrom": null, 708 | "timeShift": null, 709 | "title": "Head Block GC Activity", 710 | "tooltip": { 711 | "shared": true, 712 | "sort": 0, 713 | "value_type": "individual" 714 | }, 715 | "type": "graph", 716 | "xaxis": { 717 | "buckets": null, 718 | "mode": "time", 719 | "name": null, 720 | "show": true, 721 | "values": [] 722 | }, 723 | "yaxes": [ 724 | { 725 | "format": "short", 726 | "label": null, 727 | "logBase": 1, 728 | "max": null, 729 | "min": "0", 730 | "show": true 731 | }, 732 | { 733 | "format": "s", 734 | "label": null, 735 | "logBase": 1, 736 | "max": null, 737 | "min": "0", 738 | "show": true 739 | } 740 | ] 741 | }, 742 | { 743 | "aliasColors": {}, 744 | "bars": false, 745 | "dashLength": 10, 746 | "dashes": false, 747 | "datasource": "Prometheus", 748 | "decimals": null, 749 | "description": "", 750 | "fill": 0, 751 | "gridPos": { 752 | "h": 5, 753 | "w": 8, 754 | "x": 0, 755 | "y": 10 756 | }, 757 | "id": 20, 758 | "legend": { 759 | "avg": false, 760 | "current": false, 761 | "max": false, 762 | "min": false, 763 | "show": true, 764 | "total": false, 765 | "values": false 766 | }, 767 | "lines": true, 768 | "linewidth": 1, 769 | "links": [], 770 | "nullPointMode": "connected", 771 | "percentage": false, 772 | "pointradius": 5, 773 | "points": false, 774 | "renderer": "flot", 775 | "seriesOverrides": [ 776 | { 777 | "alias": "duration-p99", 778 | "yaxis": 2 779 | } 780 | ], 781 | "spaceLength": 10, 782 | "stack": false, 783 | "steppedLine": false, 784 | "targets": [ 785 | { 786 | "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_bucket{job=\"prometheus\"}[5m])) by (le))", 787 | "format": "time_series", 788 | "hide": false, 789 | "interval": "", 790 | "intervalFactor": 2, 791 | "legendFormat": "duration-{{p99}}", 792 | "refId": "A", 793 | "step": 20 794 | }, 795 | { 796 | "expr": "irate(prometheus_tsdb_compactions_total{job=\"prometheus\"}[5m])", 797 | "format": "time_series", 798 | "intervalFactor": 2, 799 | "legendFormat": "compactions", 800 | "refId": "B", 801 | "step": 20 802 | }, 803 | { 804 | "expr": "irate(prometheus_tsdb_compactions_failed_total{job=\"prometheus\"}[5m])", 805 | "format": "time_series", 806 | "intervalFactor": 2, 807 | "legendFormat": "failed", 808 | "refId": "C", 809 | "step": 20 810 | }, 811 | { 812 | "expr": "irate(prometheus_tsdb_compactions_triggered_total{job=\"prometheus\"}[5m])", 813 | "format": "time_series", 814 | "intervalFactor": 2, 815 | "legendFormat": "triggered", 816 | "refId": "D", 817 | "step": 20 818 | } 819 | ], 820 | "thresholds": [], 821 | "timeFrom": null, 822 | "timeShift": null, 823 | "title": "Compaction Activity", 824 | "tooltip": { 825 | "shared": true, 826 | "sort": 0, 827 | "value_type": "individual" 828 | }, 829 | "type": "graph", 830 | "xaxis": { 831 | "buckets": null, 832 | "mode": "time", 833 | "name": null, 834 | "show": true, 835 | "values": [] 836 | }, 837 | "yaxes": [ 838 | { 839 | "format": "short", 840 | "label": null, 841 | "logBase": 1, 842 | "max": null, 843 | "min": "0", 844 | "show": true 845 | }, 846 | { 847 | "format": "s", 848 | "label": "", 849 | "logBase": 1, 850 | "max": null, 851 | "min": "0", 852 | "show": true 853 | } 854 | ] 855 | }, 856 | { 857 | "aliasColors": {}, 858 | "bars": false, 859 | "dashLength": 10, 860 | "dashes": false, 861 | "datasource": "Prometheus", 862 | "fill": 1, 863 | "gridPos": { 864 | "h": 5, 865 | "w": 8, 866 | "x": 8, 867 | "y": 10 868 | }, 869 | "id": 32, 870 | "legend": { 871 | "avg": false, 872 | "current": false, 873 | "max": false, 874 | "min": false, 875 | "show": true, 876 | "total": false, 877 | "values": false 878 | }, 879 | "lines": true, 880 | "linewidth": 1, 881 | "links": [], 882 | "nullPointMode": "null", 883 | "percentage": false, 884 | "pointradius": 5, 885 | "points": false, 886 | "renderer": "flot", 887 | "seriesOverrides": [], 888 | "spaceLength": 10, 889 | "stack": false, 890 | "steppedLine": false, 891 | "targets": [ 892 | { 893 | "expr": "rate(prometheus_tsdb_reloads_total{job=\"prometheus\"}[5m])", 894 | "format": "time_series", 895 | "intervalFactor": 2, 896 | "legendFormat": "reloads", 897 | "refId": "A", 898 | "step": 20 899 | }, 900 | { 901 | "expr": "rate(prometheus_tsdb_reloads_failures_total{job=\"prometheus\"}[5m])", 902 | "format": "time_series", 903 | "hide": false, 904 | "intervalFactor": 2, 905 | "legendFormat": "failures", 906 | "refId": "B", 907 | "step": 20 908 | } 909 | ], 910 | "thresholds": [], 911 | "timeFrom": null, 912 | "timeShift": null, 913 | "title": "Reload Count", 914 | "tooltip": { 915 | "shared": true, 916 | "sort": 0, 917 | "value_type": "individual" 918 | }, 919 | "type": "graph", 920 | "xaxis": { 921 | "buckets": null, 922 | "mode": "time", 923 | "name": null, 924 | "show": true, 925 | "values": [] 926 | }, 927 | "yaxes": [ 928 | { 929 | "format": "short", 930 | "label": null, 931 | "logBase": 1, 932 | "max": null, 933 | "min": null, 934 | "show": true 935 | }, 936 | { 937 | "format": "short", 938 | "label": null, 939 | "logBase": 1, 940 | "max": null, 941 | "min": null, 942 | "show": true 943 | } 944 | ] 945 | }, 946 | { 947 | "aliasColors": {}, 948 | "bars": false, 949 | "dashLength": 10, 950 | "dashes": false, 951 | "datasource": "Prometheus", 952 | "fill": 0, 953 | "gridPos": { 954 | "h": 5, 955 | "w": 8, 956 | "x": 16, 957 | "y": 10 958 | }, 959 | "id": 38, 960 | "legend": { 961 | "avg": false, 962 | "current": false, 963 | "max": false, 964 | "min": false, 965 | "show": true, 966 | "total": false, 967 | "values": false 968 | }, 969 | "lines": true, 970 | "linewidth": 1, 971 | "links": [], 972 | "nullPointMode": "null", 973 | "percentage": false, 974 | "pointradius": 5, 975 | "points": false, 976 | "renderer": "flot", 977 | "seriesOverrides": [], 978 | "spaceLength": 10, 979 | "stack": false, 980 | "steppedLine": false, 981 | "targets": [ 982 | { 983 | "expr": "prometheus_engine_query_duration_seconds{job=\"prometheus\", quantile=\"0.99\"}", 984 | "format": "time_series", 985 | "intervalFactor": 2, 986 | "legendFormat": "{{slice}}_p99", 987 | "refId": "A", 988 | "step": 20 989 | } 990 | ], 991 | "thresholds": [], 992 | "timeFrom": null, 993 | "timeShift": null, 994 | "title": "Query Durations", 995 | "tooltip": { 996 | "shared": true, 997 | "sort": 0, 998 | "value_type": "individual" 999 | }, 1000 | "type": "graph", 1001 | "xaxis": { 1002 | "buckets": null, 1003 | "mode": "time", 1004 | "name": null, 1005 | "show": true, 1006 | "values": [] 1007 | }, 1008 | "yaxes": [ 1009 | { 1010 | "format": "short", 1011 | "label": null, 1012 | "logBase": 1, 1013 | "max": null, 1014 | "min": null, 1015 | "show": true 1016 | }, 1017 | { 1018 | "format": "short", 1019 | "label": null, 1020 | "logBase": 1, 1021 | "max": null, 1022 | "min": null, 1023 | "show": true 1024 | } 1025 | ] 1026 | }, 1027 | { 1028 | "aliasColors": {}, 1029 | "bars": false, 1030 | "dashLength": 10, 1031 | "dashes": false, 1032 | "datasource": "Prometheus", 1033 | "decimals": null, 1034 | "editable": true, 1035 | "error": false, 1036 | "fill": 0, 1037 | "grid": {}, 1038 | "gridPos": { 1039 | "h": 7, 1040 | "w": 12, 1041 | "x": 0, 1042 | "y": 15 1043 | }, 1044 | "id": 35, 1045 | "legend": { 1046 | "alignAsTable": false, 1047 | "avg": false, 1048 | "current": false, 1049 | "hideEmpty": true, 1050 | "max": false, 1051 | "min": false, 1052 | "show": true, 1053 | "total": false, 1054 | "values": false 1055 | }, 1056 | "lines": true, 1057 | "linewidth": 1, 1058 | "links": [], 1059 | "nullPointMode": "connected", 1060 | "percentage": false, 1061 | "pointradius": 5, 1062 | "points": false, 1063 | "renderer": "flot", 1064 | "seriesOverrides": [], 1065 | "spaceLength": 10, 1066 | "stack": false, 1067 | "steppedLine": false, 1068 | "targets": [ 1069 | { 1070 | "expr": "max(prometheus_rule_group_duration_seconds{job=\"prometheus\"}) by (quantile)", 1071 | "format": "time_series", 1072 | "interval": "", 1073 | "intervalFactor": 2, 1074 | "legendFormat": "{{quantile}}", 1075 | "refId": "A", 1076 | "step": 10 1077 | } 1078 | ], 1079 | "thresholds": [], 1080 | "timeFrom": null, 1081 | "timeShift": null, 1082 | "title": "Rule Group Eval Duration", 1083 | "tooltip": { 1084 | "shared": true, 1085 | "sort": 0, 1086 | "value_type": "cumulative" 1087 | }, 1088 | "type": "graph", 1089 | "xaxis": { 1090 | "buckets": null, 1091 | "mode": "time", 1092 | "name": null, 1093 | "show": true, 1094 | "values": [] 1095 | }, 1096 | "yaxes": [ 1097 | { 1098 | "format": "s", 1099 | "label": "", 1100 | "logBase": 1, 1101 | "max": null, 1102 | "min": null, 1103 | "show": true 1104 | }, 1105 | { 1106 | "format": "short", 1107 | "logBase": 1, 1108 | "max": null, 1109 | "min": null, 1110 | "show": true 1111 | } 1112 | ] 1113 | }, 1114 | { 1115 | "aliasColors": {}, 1116 | "bars": false, 1117 | "dashLength": 10, 1118 | "dashes": false, 1119 | "datasource": "Prometheus", 1120 | "fill": 1, 1121 | "gridPos": { 1122 | "h": 7, 1123 | "w": 12, 1124 | "x": 12, 1125 | "y": 15 1126 | }, 1127 | "id": 39, 1128 | "legend": { 1129 | "avg": false, 1130 | "current": false, 1131 | "max": false, 1132 | "min": false, 1133 | "show": true, 1134 | "total": false, 1135 | "values": false 1136 | }, 1137 | "lines": true, 1138 | "linewidth": 1, 1139 | "links": [], 1140 | "nullPointMode": "null", 1141 | "percentage": false, 1142 | "pointradius": 5, 1143 | "points": false, 1144 | "renderer": "flot", 1145 | "seriesOverrides": [], 1146 | "spaceLength": 10, 1147 | "stack": true, 1148 | "steppedLine": false, 1149 | "targets": [ 1150 | { 1151 | "expr": "rate(prometheus_rule_group_iterations_missed_total{job=\"prometheus\"}[5m])", 1152 | "format": "time_series", 1153 | "intervalFactor": 2, 1154 | "legendFormat": "missed", 1155 | "refId": "B", 1156 | "step": 10 1157 | }, 1158 | { 1159 | "expr": "rate(prometheus_rule_group_iterations_total{job=\"prometheus\"}[5m])", 1160 | "format": "time_series", 1161 | "intervalFactor": 2, 1162 | "legendFormat": "iterations", 1163 | "refId": "A", 1164 | "step": 10 1165 | } 1166 | ], 1167 | "thresholds": [], 1168 | "timeFrom": null, 1169 | "timeShift": null, 1170 | "title": "Rule Group Eval Activity", 1171 | "tooltip": { 1172 | "shared": true, 1173 | "sort": 0, 1174 | "value_type": "individual" 1175 | }, 1176 | "type": "graph", 1177 | "xaxis": { 1178 | "buckets": null, 1179 | "mode": "time", 1180 | "name": null, 1181 | "show": true, 1182 | "values": [] 1183 | }, 1184 | "yaxes": [ 1185 | { 1186 | "format": "short", 1187 | "label": null, 1188 | "logBase": 1, 1189 | "max": null, 1190 | "min": null, 1191 | "show": true 1192 | }, 1193 | { 1194 | "format": "short", 1195 | "label": null, 1196 | "logBase": 1, 1197 | "max": null, 1198 | "min": null, 1199 | "show": true 1200 | } 1201 | ] 1202 | } 1203 | ], 1204 | "refresh": "1m", 1205 | "revision": "1.0", 1206 | "schemaVersion": 16, 1207 | "style": "dark", 1208 | "tags": [ 1209 | "prometheus" 1210 | ], 1211 | "templating": { 1212 | "list": [] 1213 | }, 1214 | "time": { 1215 | "from": "now-1h", 1216 | "to": "now" 1217 | }, 1218 | "timepicker": { 1219 | "now": true, 1220 | "refresh_intervals": [ 1221 | "5s", 1222 | "10s", 1223 | "30s", 1224 | "1m", 1225 | "5m", 1226 | "15m", 1227 | "30m", 1228 | "1h", 1229 | "2h", 1230 | "1d" 1231 | ], 1232 | "time_options": [ 1233 | "5m", 1234 | "15m", 1235 | "1h", 1236 | "6h", 1237 | "12h", 1238 | "24h", 1239 | "2d", 1240 | "7d", 1241 | "30d" 1242 | ] 1243 | }, 1244 | "timezone": "browser", 1245 | "title": "Prometheus 2.0 Stats", 1246 | "uid": "mGFfYSRiz", 1247 | "version": 1 1248 | } 1249 | -------------------------------------------------------------------------------- /swarmprom/grafana/dashboards/swarmprom-services-dash.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "$$hashKey": "object:429", 6 | "builtIn": 1, 7 | "datasource": "-- Grafana --", 8 | "enable": true, 9 | "hide": true, 10 | "iconColor": "rgba(0, 211, 255, 1)", 11 | "name": "Annotations & Alerts", 12 | "type": "dashboard" 13 | } 14 | ] 15 | }, 16 | "description": "Docker Swarm stacks and services metrics", 17 | "editable": true, 18 | "gnetId": null, 19 | "graphTooltip": 0, 20 | "iteration": 1520585594614, 21 | "links": [], 22 | "panels": [ 23 | { 24 | "cacheTimeout": null, 25 | "colorBackground": false, 26 | "colorValue": false, 27 | "colors": [ 28 | "rgba(245, 54, 54, 0.9)", 29 | "rgba(237, 129, 40, 0.89)", 30 | "rgba(50, 172, 45, 0.97)" 31 | ], 32 | "datasource": null, 33 | "decimals": 0, 34 | "format": "none", 35 | "gauge": { 36 | "maxValue": 100, 37 | "minValue": 0, 38 | "show": false, 39 | "thresholdLabels": false, 40 | "thresholdMarkers": true 41 | }, 42 | "gridPos": { 43 | "h": 4, 44 | "w": 6, 45 | "x": 0, 46 | "y": 0 47 | }, 48 | "hideTimeOverride": true, 49 | "id": 1, 50 | "interval": null, 51 | "links": [], 52 | "mappingType": 1, 53 | "mappingTypes": [ 54 | { 55 | "name": "value to text", 56 | "value": 1 57 | }, 58 | { 59 | "name": "range to text", 60 | "value": 2 61 | } 62 | ], 63 | "maxDataPoints": 100, 64 | "nullPointMode": "connected", 65 | "nullText": null, 66 | "postfix": "", 67 | "postfixFontSize": "50%", 68 | "prefix": "", 69 | "prefixFontSize": "50%", 70 | "rangeMaps": [ 71 | { 72 | "from": "null", 73 | "text": "N/A", 74 | "to": "null" 75 | } 76 | ], 77 | "sparkline": { 78 | "fillColor": "rgba(31, 118, 189, 0.18)", 79 | "full": false, 80 | "lineColor": "rgb(31, 120, 193)", 81 | "show": false 82 | }, 83 | "tableColumn": "", 84 | "targets": [ 85 | { 86 | "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_node_id =~\"$node_id\"}) by (container_label_com_docker_swarm_node_id))", 87 | "format": "time_series", 88 | "intervalFactor": 2, 89 | "legendFormat": "", 90 | "refId": "A", 91 | "step": 2 92 | } 93 | ], 94 | "thresholds": "", 95 | "timeFrom": "1m", 96 | "title": "Nodes", 97 | "type": "singlestat", 98 | "valueFontSize": "80%", 99 | "valueMaps": [ 100 | { 101 | "op": "=", 102 | "text": "N/A", 103 | "value": "null" 104 | } 105 | ], 106 | "valueName": "avg" 107 | }, 108 | { 109 | "cacheTimeout": null, 110 | "colorBackground": false, 111 | "colorValue": false, 112 | "colors": [ 113 | "rgba(245, 54, 54, 0.9)", 114 | "rgba(237, 129, 40, 0.89)", 115 | "rgba(50, 172, 45, 0.97)" 116 | ], 117 | "datasource": null, 118 | "decimals": 0, 119 | "format": "none", 120 | "gauge": { 121 | "maxValue": 100, 122 | "minValue": 0, 123 | "show": false, 124 | "thresholdLabels": false, 125 | "thresholdMarkers": true 126 | }, 127 | "gridPos": { 128 | "h": 4, 129 | "w": 6, 130 | "x": 6, 131 | "y": 0 132 | }, 133 | "hideTimeOverride": true, 134 | "id": 21, 135 | "interval": null, 136 | "links": [], 137 | "mappingType": 1, 138 | "mappingTypes": [ 139 | { 140 | "name": "value to text", 141 | "value": 1 142 | }, 143 | { 144 | "name": "range to text", 145 | "value": 2 146 | } 147 | ], 148 | "maxDataPoints": 100, 149 | "nullPointMode": "connected", 150 | "nullText": null, 151 | "postfix": "", 152 | "postfixFontSize": "50%", 153 | "prefix": "", 154 | "prefixFontSize": "50%", 155 | "rangeMaps": [ 156 | { 157 | "from": "null", 158 | "text": "N/A", 159 | "to": "null" 160 | } 161 | ], 162 | "sparkline": { 163 | "fillColor": "rgba(31, 118, 189, 0.18)", 164 | "full": false, 165 | "lineColor": "rgb(31, 120, 193)", 166 | "show": false 167 | }, 168 | "tableColumn": "", 169 | "targets": [ 170 | { 171 | "expr": "count(count(container_tasks_state{container_label_com_docker_stack_namespace=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_stack_namespace))", 172 | "format": "time_series", 173 | "intervalFactor": 2, 174 | "legendFormat": "", 175 | "refId": "A", 176 | "step": 2 177 | } 178 | ], 179 | "thresholds": "", 180 | "timeFrom": "1m", 181 | "title": "Stacks", 182 | "type": "singlestat", 183 | "valueFontSize": "80%", 184 | "valueMaps": [ 185 | { 186 | "op": "=", 187 | "text": "N/A", 188 | "value": "null" 189 | } 190 | ], 191 | "valueName": "avg" 192 | }, 193 | { 194 | "cacheTimeout": null, 195 | "colorBackground": false, 196 | "colorValue": false, 197 | "colors": [ 198 | "rgba(245, 54, 54, 0.9)", 199 | "rgba(237, 129, 40, 0.89)", 200 | "rgba(50, 172, 45, 0.97)" 201 | ], 202 | "datasource": null, 203 | "decimals": 0, 204 | "format": "none", 205 | "gauge": { 206 | "maxValue": 100, 207 | "minValue": 0, 208 | "show": false, 209 | "thresholdLabels": false, 210 | "thresholdMarkers": true 211 | }, 212 | "gridPos": { 213 | "h": 4, 214 | "w": 6, 215 | "x": 12, 216 | "y": 0 217 | }, 218 | "hideTimeOverride": true, 219 | "id": 20, 220 | "interval": null, 221 | "links": [], 222 | "mappingType": 1, 223 | "mappingTypes": [ 224 | { 225 | "name": "value to text", 226 | "value": 1 227 | }, 228 | { 229 | "name": "range to text", 230 | "value": 2 231 | } 232 | ], 233 | "maxDataPoints": 100, 234 | "nullPointMode": "connected", 235 | "nullText": null, 236 | "postfix": "", 237 | "postfixFontSize": "50%", 238 | "prefix": "", 239 | "prefixFontSize": "50%", 240 | "rangeMaps": [ 241 | { 242 | "from": "null", 243 | "text": "N/A", 244 | "to": "null" 245 | } 246 | ], 247 | "sparkline": { 248 | "fillColor": "rgba(31, 118, 189, 0.18)", 249 | "full": false, 250 | "lineColor": "rgb(31, 120, 193)", 251 | "show": false 252 | }, 253 | "tableColumn": "", 254 | "targets": [ 255 | { 256 | "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_service_name=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_swarm_service_name))", 257 | "format": "time_series", 258 | "intervalFactor": 2, 259 | "refId": "A", 260 | "step": 2 261 | } 262 | ], 263 | "thresholds": "", 264 | "timeFrom": "1m", 265 | "timeShift": null, 266 | "title": "Services", 267 | "type": "singlestat", 268 | "valueFontSize": "80%", 269 | "valueMaps": [ 270 | { 271 | "op": "=", 272 | "text": "N/A", 273 | "value": "null" 274 | } 275 | ], 276 | "valueName": "avg" 277 | }, 278 | { 279 | "cacheTimeout": null, 280 | "colorBackground": false, 281 | "colorValue": false, 282 | "colors": [ 283 | "rgba(245, 54, 54, 0.9)", 284 | "rgba(237, 129, 40, 0.89)", 285 | "rgba(50, 172, 45, 0.97)" 286 | ], 287 | "datasource": null, 288 | "decimals": 0, 289 | "format": "none", 290 | "gauge": { 291 | "maxValue": 100, 292 | "minValue": 0, 293 | "show": false, 294 | "thresholdLabels": false, 295 | "thresholdMarkers": true 296 | }, 297 | "gridPos": { 298 | "h": 4, 299 | "w": 6, 300 | "x": 18, 301 | "y": 0 302 | }, 303 | "hideTimeOverride": true, 304 | "id": 7, 305 | "interval": null, 306 | "links": [], 307 | "mappingType": 1, 308 | "mappingTypes": [ 309 | { 310 | "name": "value to text", 311 | "value": 1 312 | }, 313 | { 314 | "name": "range to text", 315 | "value": 2 316 | } 317 | ], 318 | "maxDataPoints": 100, 319 | "nullPointMode": "connected", 320 | "nullText": null, 321 | "postfix": "", 322 | "postfixFontSize": "50%", 323 | "prefix": "", 324 | "prefixFontSize": "50%", 325 | "rangeMaps": [ 326 | { 327 | "from": "null", 328 | "text": "N/A", 329 | "to": "null" 330 | } 331 | ], 332 | "sparkline": { 333 | "fillColor": "rgba(31, 118, 189, 0.18)", 334 | "full": false, 335 | "lineColor": "rgb(31, 120, 193)", 336 | "show": false 337 | }, 338 | "tableColumn": "", 339 | "targets": [ 340 | { 341 | "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", 342 | "format": "time_series", 343 | "intervalFactor": 2, 344 | "refId": "A", 345 | "step": 2 346 | } 347 | ], 348 | "thresholds": "", 349 | "timeFrom": "1m", 350 | "title": "Containers", 351 | "type": "singlestat", 352 | "valueFontSize": "80%", 353 | "valueMaps": [ 354 | { 355 | "op": "=", 356 | "text": "N/A", 357 | "value": "null" 358 | } 359 | ], 360 | "valueName": "avg" 361 | }, 362 | { 363 | "aliasColors": {}, 364 | "bars": true, 365 | "dashLength": 10, 366 | "dashes": false, 367 | "datasource": null, 368 | "decimals": 0, 369 | "fill": 5, 370 | "gridPos": { 371 | "h": 7, 372 | "w": 12, 373 | "x": 0, 374 | "y": 4 375 | }, 376 | "id": 12, 377 | "legend": { 378 | "alignAsTable": true, 379 | "avg": false, 380 | "current": true, 381 | "hideEmpty": true, 382 | "hideZero": true, 383 | "max": false, 384 | "min": false, 385 | "rightSide": true, 386 | "show": true, 387 | "sort": "current", 388 | "sortDesc": true, 389 | "total": false, 390 | "values": true 391 | }, 392 | "lines": false, 393 | "linewidth": 1, 394 | "links": [], 395 | "nullPointMode": "null", 396 | "percentage": false, 397 | "pointradius": 5, 398 | "points": false, 399 | "renderer": "flot", 400 | "seriesOverrides": [], 401 | "spaceLength": 10, 402 | "stack": true, 403 | "steppedLine": false, 404 | "targets": [ 405 | { 406 | "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", 407 | "format": "time_series", 408 | "intervalFactor": 10, 409 | "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", 410 | "refId": "A", 411 | "step": 10 412 | } 413 | ], 414 | "thresholds": [], 415 | "timeFrom": null, 416 | "timeShift": null, 417 | "title": "Service Tasks", 418 | "tooltip": { 419 | "shared": true, 420 | "sort": 2, 421 | "value_type": "individual" 422 | }, 423 | "type": "graph", 424 | "xaxis": { 425 | "buckets": null, 426 | "mode": "time", 427 | "name": null, 428 | "show": true, 429 | "values": [] 430 | }, 431 | "yaxes": [ 432 | { 433 | "format": "short", 434 | "label": null, 435 | "logBase": 1, 436 | "max": null, 437 | "min": null, 438 | "show": true 439 | }, 440 | { 441 | "format": "short", 442 | "label": null, 443 | "logBase": 1, 444 | "max": null, 445 | "min": null, 446 | "show": true 447 | } 448 | ] 449 | }, 450 | { 451 | "aliasColors": {}, 452 | "bars": false, 453 | "dashLength": 10, 454 | "dashes": false, 455 | "datasource": null, 456 | "decimals": 0, 457 | "fill": 1, 458 | "gridPos": { 459 | "h": 7, 460 | "w": 12, 461 | "x": 12, 462 | "y": 4 463 | }, 464 | "id": 32, 465 | "legend": { 466 | "alignAsTable": true, 467 | "avg": false, 468 | "current": true, 469 | "hideEmpty": true, 470 | "hideZero": true, 471 | "max": false, 472 | "min": false, 473 | "rightSide": true, 474 | "show": false, 475 | "sort": "current", 476 | "sortDesc": true, 477 | "total": false, 478 | "values": true 479 | }, 480 | "lines": true, 481 | "linewidth": 1, 482 | "links": [], 483 | "nullPointMode": "null", 484 | "percentage": false, 485 | "pointradius": 5, 486 | "points": false, 487 | "renderer": "flot", 488 | "seriesOverrides": [], 489 | "spaceLength": 10, 490 | "stack": false, 491 | "steppedLine": false, 492 | "targets": [ 493 | { 494 | "expr": "sum(increase(engine_daemon_health_checks_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", 495 | "format": "time_series", 496 | "intervalFactor": 10, 497 | "legendFormat": "checks", 498 | "refId": "A", 499 | "step": 10 500 | }, 501 | { 502 | "expr": "sum(increase(engine_daemon_health_checks_failed_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", 503 | "format": "time_series", 504 | "intervalFactor": 10, 505 | "legendFormat": "failed", 506 | "refId": "B", 507 | "step": 10 508 | } 509 | ], 510 | "thresholds": [], 511 | "timeFrom": null, 512 | "timeShift": null, 513 | "title": "Health Checks", 514 | "tooltip": { 515 | "shared": true, 516 | "sort": 2, 517 | "value_type": "individual" 518 | }, 519 | "type": "graph", 520 | "xaxis": { 521 | "buckets": null, 522 | "mode": "time", 523 | "name": null, 524 | "show": true, 525 | "values": [] 526 | }, 527 | "yaxes": [ 528 | { 529 | "format": "short", 530 | "label": null, 531 | "logBase": 1, 532 | "max": null, 533 | "min": null, 534 | "show": true 535 | }, 536 | { 537 | "format": "short", 538 | "label": null, 539 | "logBase": 1, 540 | "max": null, 541 | "min": null, 542 | "show": true 543 | } 544 | ] 545 | }, 546 | { 547 | "aliasColors": {}, 548 | "bars": false, 549 | "dashLength": 10, 550 | "dashes": false, 551 | "datasource": null, 552 | "decimals": 2, 553 | "fill": 1, 554 | "gridPos": { 555 | "h": 7, 556 | "w": 20, 557 | "x": 0, 558 | "y": 11 559 | }, 560 | "id": 22, 561 | "legend": { 562 | "alignAsTable": true, 563 | "avg": true, 564 | "current": false, 565 | "hideEmpty": true, 566 | "hideZero": true, 567 | "max": true, 568 | "min": true, 569 | "rightSide": true, 570 | "show": true, 571 | "sort": "avg", 572 | "sortDesc": true, 573 | "total": false, 574 | "values": true 575 | }, 576 | "lines": true, 577 | "linewidth": 1, 578 | "links": [], 579 | "nullPointMode": "null", 580 | "percentage": false, 581 | "pointradius": 5, 582 | "points": false, 583 | "renderer": "flot", 584 | "seriesOverrides": [], 585 | "spaceLength": 10, 586 | "stack": true, 587 | "steppedLine": false, 588 | "targets": [ 589 | { 590 | "expr": "sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[1m])) by (container_label_com_docker_swarm_service_name) * 100 ", 591 | "format": "time_series", 592 | "intervalFactor": 2, 593 | "legendFormat": "{{container_label_com_docker_swarm_service_name}}", 594 | "refId": "A", 595 | "step": 2 596 | } 597 | ], 598 | "thresholds": [], 599 | "timeFrom": null, 600 | "timeShift": null, 601 | "title": "CPU usage by Service", 602 | "tooltip": { 603 | "shared": true, 604 | "sort": 2, 605 | "value_type": "individual" 606 | }, 607 | "type": "graph", 608 | "xaxis": { 609 | "buckets": null, 610 | "mode": "time", 611 | "name": null, 612 | "show": true, 613 | "values": [] 614 | }, 615 | "yaxes": [ 616 | { 617 | "format": "percent", 618 | "label": null, 619 | "logBase": 1, 620 | "max": null, 621 | "min": null, 622 | "show": true 623 | }, 624 | { 625 | "format": "short", 626 | "label": null, 627 | "logBase": 1, 628 | "max": null, 629 | "min": null, 630 | "show": false 631 | } 632 | ] 633 | }, 634 | { 635 | "cacheTimeout": null, 636 | "colorBackground": false, 637 | "colorValue": false, 638 | "colors": [ 639 | "rgba(245, 54, 54, 0.9)", 640 | "rgba(237, 129, 40, 0.89)", 641 | "rgba(50, 172, 45, 0.97)" 642 | ], 643 | "datasource": null, 644 | "decimals": null, 645 | "format": "percent", 646 | "gauge": { 647 | "maxValue": 100, 648 | "minValue": 0, 649 | "show": true, 650 | "thresholdLabels": false, 651 | "thresholdMarkers": true 652 | }, 653 | "gridPos": { 654 | "h": 7, 655 | "w": 4, 656 | "x": 20, 657 | "y": 11 658 | }, 659 | "hideTimeOverride": true, 660 | "id": 11, 661 | "interval": null, 662 | "links": [], 663 | "mappingType": 1, 664 | "mappingTypes": [ 665 | { 666 | "name": "value to text", 667 | "value": 1 668 | }, 669 | { 670 | "name": "range to text", 671 | "value": 2 672 | } 673 | ], 674 | "maxDataPoints": 100, 675 | "nullPointMode": "connected", 676 | "nullText": null, 677 | "postfix": "", 678 | "postfixFontSize": "50%", 679 | "prefix": "", 680 | "prefixFontSize": "50%", 681 | "rangeMaps": [ 682 | { 683 | "from": "null", 684 | "text": "N/A", 685 | "to": "null" 686 | } 687 | ], 688 | "sparkline": { 689 | "fillColor": "rgba(31, 118, 189, 0.18)", 690 | "full": false, 691 | "lineColor": "rgb(31, 120, 193)", 692 | "show": false 693 | }, 694 | "tableColumn": "", 695 | "targets": [ 696 | { 697 | "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", 698 | "format": "time_series", 699 | "intervalFactor": 2, 700 | "legendFormat": "", 701 | "refId": "A", 702 | "step": 2 703 | } 704 | ], 705 | "thresholds": "10,25,100", 706 | "timeFrom": "1m", 707 | "timeShift": null, 708 | "title": "CPU Idle", 709 | "type": "singlestat", 710 | "valueFontSize": "80%", 711 | "valueMaps": [ 712 | { 713 | "op": "=", 714 | "text": "N/A", 715 | "value": "null" 716 | } 717 | ], 718 | "valueName": "avg" 719 | }, 720 | { 721 | "aliasColors": {}, 722 | "bars": false, 723 | "dashLength": 10, 724 | "dashes": false, 725 | "datasource": null, 726 | "decimals": 2, 727 | "fill": 1, 728 | "gridPos": { 729 | "h": 7, 730 | "w": 24, 731 | "x": 0, 732 | "y": 18 733 | }, 734 | "id": 33, 735 | "legend": { 736 | "alignAsTable": true, 737 | "avg": true, 738 | "current": false, 739 | "hideEmpty": true, 740 | "hideZero": true, 741 | "max": false, 742 | "min": false, 743 | "rightSide": true, 744 | "show": true, 745 | "sort": "avg", 746 | "sortDesc": true, 747 | "total": false, 748 | "values": true 749 | }, 750 | "lines": true, 751 | "linewidth": 1, 752 | "links": [], 753 | "nullPointMode": "null as zero", 754 | "percentage": false, 755 | "pointradius": 5, 756 | "points": false, 757 | "renderer": "flot", 758 | "seriesOverrides": [], 759 | "spaceLength": 10, 760 | "stack": false, 761 | "steppedLine": false, 762 | "targets": [ 763 | { 764 | "expr": "topk(10, sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval])) by (name)) * 100 ", 765 | "format": "time_series", 766 | "intervalFactor": 2, 767 | "legendFormat": "{{name}}", 768 | "refId": "A", 769 | "step": 2 770 | } 771 | ], 772 | "thresholds": [], 773 | "timeFrom": null, 774 | "timeShift": null, 775 | "title": "CPU usage by Container (top 10)", 776 | "tooltip": { 777 | "shared": true, 778 | "sort": 2, 779 | "value_type": "individual" 780 | }, 781 | "type": "graph", 782 | "xaxis": { 783 | "buckets": null, 784 | "mode": "time", 785 | "name": null, 786 | "show": true, 787 | "values": [] 788 | }, 789 | "yaxes": [ 790 | { 791 | "format": "percent", 792 | "label": null, 793 | "logBase": 1, 794 | "max": null, 795 | "min": null, 796 | "show": true 797 | }, 798 | { 799 | "format": "short", 800 | "label": null, 801 | "logBase": 1, 802 | "max": null, 803 | "min": null, 804 | "show": false 805 | } 806 | ] 807 | }, 808 | { 809 | "aliasColors": {}, 810 | "bars": false, 811 | "dashLength": 10, 812 | "dashes": false, 813 | "datasource": null, 814 | "fill": 1, 815 | "gridPos": { 816 | "h": 7, 817 | "w": 20, 818 | "x": 0, 819 | "y": 25 820 | }, 821 | "id": 24, 822 | "legend": { 823 | "alignAsTable": true, 824 | "avg": true, 825 | "current": false, 826 | "max": true, 827 | "min": true, 828 | "rightSide": true, 829 | "show": true, 830 | "sort": "avg", 831 | "sortDesc": true, 832 | "total": false, 833 | "values": true 834 | }, 835 | "lines": true, 836 | "linewidth": 1, 837 | "links": [], 838 | "nullPointMode": "null", 839 | "percentage": false, 840 | "pointradius": 5, 841 | "points": false, 842 | "renderer": "flot", 843 | "seriesOverrides": [], 844 | "spaceLength": 10, 845 | "stack": false, 846 | "steppedLine": false, 847 | "targets": [ 848 | { 849 | "expr": "sum(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", 850 | "format": "time_series", 851 | "intervalFactor": 2, 852 | "legendFormat": "Used {{container_label_com_docker_swarm_service_name}}", 853 | "refId": "A", 854 | "step": 2 855 | }, 856 | { 857 | "expr": "sum(container_memory_cache{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", 858 | "format": "time_series", 859 | "intervalFactor": 2, 860 | "legendFormat": "Cached {{container_label_com_docker_swarm_service_name}}", 861 | "refId": "B", 862 | "step": 2 863 | } 864 | ], 865 | "thresholds": [], 866 | "timeFrom": null, 867 | "timeShift": null, 868 | "title": "Memory usage by Service", 869 | "tooltip": { 870 | "shared": true, 871 | "sort": 0, 872 | "value_type": "individual" 873 | }, 874 | "type": "graph", 875 | "xaxis": { 876 | "buckets": null, 877 | "mode": "time", 878 | "name": null, 879 | "show": true, 880 | "values": [] 881 | }, 882 | "yaxes": [ 883 | { 884 | "format": "decbytes", 885 | "label": null, 886 | "logBase": 1, 887 | "max": null, 888 | "min": null, 889 | "show": true 890 | }, 891 | { 892 | "format": "short", 893 | "label": null, 894 | "logBase": 1, 895 | "max": null, 896 | "min": null, 897 | "show": true 898 | } 899 | ] 900 | }, 901 | { 902 | "cacheTimeout": null, 903 | "colorBackground": false, 904 | "colorValue": false, 905 | "colors": [ 906 | "rgba(245, 54, 54, 0.9)", 907 | "rgba(237, 129, 40, 0.89)", 908 | "rgba(50, 172, 45, 0.97)" 909 | ], 910 | "datasource": null, 911 | "format": "percent", 912 | "gauge": { 913 | "maxValue": 100, 914 | "minValue": 0, 915 | "show": true, 916 | "thresholdLabels": false, 917 | "thresholdMarkers": true 918 | }, 919 | "gridPos": { 920 | "h": 7, 921 | "w": 4, 922 | "x": 20, 923 | "y": 25 924 | }, 925 | "id": 8, 926 | "interval": null, 927 | "links": [], 928 | "mappingType": 1, 929 | "mappingTypes": [ 930 | { 931 | "name": "value to text", 932 | "value": 1 933 | }, 934 | { 935 | "name": "range to text", 936 | "value": 2 937 | } 938 | ], 939 | "maxDataPoints": 100, 940 | "nullPointMode": "connected", 941 | "nullText": null, 942 | "postfix": "", 943 | "postfixFontSize": "50%", 944 | "prefix": "", 945 | "prefixFontSize": "50%", 946 | "rangeMaps": [ 947 | { 948 | "from": "null", 949 | "text": "N/A", 950 | "to": "null" 951 | } 952 | ], 953 | "sparkline": { 954 | "fillColor": "rgba(31, 118, 189, 0.18)", 955 | "full": false, 956 | "lineColor": "rgb(31, 120, 193)", 957 | "show": false 958 | }, 959 | "tableColumn": "", 960 | "targets": [ 961 | { 962 | "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 963 | "format": "time_series", 964 | "intervalFactor": 2, 965 | "legendFormat": "", 966 | "refId": "A", 967 | "step": 20 968 | } 969 | ], 970 | "thresholds": "10,25,100", 971 | "title": "Available Memory", 972 | "type": "singlestat", 973 | "valueFontSize": "80%", 974 | "valueMaps": [ 975 | { 976 | "op": "=", 977 | "text": "N/A", 978 | "value": "null" 979 | } 980 | ], 981 | "valueName": "avg" 982 | }, 983 | { 984 | "aliasColors": {}, 985 | "bars": false, 986 | "dashLength": 10, 987 | "dashes": false, 988 | "datasource": null, 989 | "fill": 1, 990 | "gridPos": { 991 | "h": 7, 992 | "w": 24, 993 | "x": 0, 994 | "y": 32 995 | }, 996 | "id": 34, 997 | "legend": { 998 | "alignAsTable": true, 999 | "avg": true, 1000 | "current": false, 1001 | "hideEmpty": false, 1002 | "hideZero": false, 1003 | "max": false, 1004 | "min": false, 1005 | "rightSide": true, 1006 | "show": true, 1007 | "sort": "avg", 1008 | "sortDesc": true, 1009 | "total": false, 1010 | "values": true 1011 | }, 1012 | "lines": true, 1013 | "linewidth": 1, 1014 | "links": [], 1015 | "nullPointMode": "null", 1016 | "percentage": false, 1017 | "pointradius": 5, 1018 | "points": false, 1019 | "renderer": "flot", 1020 | "seriesOverrides": [], 1021 | "spaceLength": 10, 1022 | "stack": false, 1023 | "steppedLine": false, 1024 | "targets": [ 1025 | { 1026 | "expr": "topk(10, avg_over_time(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval]))", 1027 | "format": "time_series", 1028 | "intervalFactor": 2, 1029 | "legendFormat": "{{name}}", 1030 | "refId": "A", 1031 | "step": 2 1032 | } 1033 | ], 1034 | "thresholds": [], 1035 | "timeFrom": null, 1036 | "timeShift": null, 1037 | "title": "Memory usage by Container (top 10)", 1038 | "tooltip": { 1039 | "shared": true, 1040 | "sort": 2, 1041 | "value_type": "individual" 1042 | }, 1043 | "type": "graph", 1044 | "xaxis": { 1045 | "buckets": null, 1046 | "mode": "time", 1047 | "name": null, 1048 | "show": true, 1049 | "values": [] 1050 | }, 1051 | "yaxes": [ 1052 | { 1053 | "format": "decbytes", 1054 | "label": null, 1055 | "logBase": 1, 1056 | "max": null, 1057 | "min": null, 1058 | "show": true 1059 | }, 1060 | { 1061 | "format": "short", 1062 | "label": null, 1063 | "logBase": 1, 1064 | "max": null, 1065 | "min": null, 1066 | "show": false 1067 | } 1068 | ] 1069 | }, 1070 | { 1071 | "aliasColors": {}, 1072 | "bars": false, 1073 | "dashLength": 10, 1074 | "dashes": false, 1075 | "datasource": null, 1076 | "fill": 1, 1077 | "gridPos": { 1078 | "h": 7, 1079 | "w": 24, 1080 | "x": 0, 1081 | "y": 39 1082 | }, 1083 | "id": 17, 1084 | "legend": { 1085 | "alignAsTable": true, 1086 | "avg": true, 1087 | "current": false, 1088 | "max": true, 1089 | "min": true, 1090 | "rightSide": true, 1091 | "show": true, 1092 | "sort": "avg", 1093 | "sortDesc": true, 1094 | "total": false, 1095 | "values": true 1096 | }, 1097 | "lines": true, 1098 | "linewidth": 1, 1099 | "links": [], 1100 | "nullPointMode": "null", 1101 | "percentage": false, 1102 | "pointradius": 5, 1103 | "points": false, 1104 | "renderer": "flot", 1105 | "seriesOverrides": [], 1106 | "spaceLength": 10, 1107 | "stack": false, 1108 | "steppedLine": false, 1109 | "targets": [ 1110 | { 1111 | "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", 1112 | "format": "time_series", 1113 | "intervalFactor": 2, 1114 | "legendFormat": "{{container_label_com_docker_swarm_service_name}}", 1115 | "refId": "A", 1116 | "step": 2 1117 | } 1118 | ], 1119 | "thresholds": [], 1120 | "timeFrom": null, 1121 | "timeShift": null, 1122 | "title": "Network received by Service", 1123 | "tooltip": { 1124 | "shared": true, 1125 | "sort": 0, 1126 | "value_type": "individual" 1127 | }, 1128 | "type": "graph", 1129 | "xaxis": { 1130 | "buckets": null, 1131 | "mode": "time", 1132 | "name": null, 1133 | "show": true, 1134 | "values": [] 1135 | }, 1136 | "yaxes": [ 1137 | { 1138 | "format": "Bps", 1139 | "label": null, 1140 | "logBase": 1, 1141 | "max": null, 1142 | "min": null, 1143 | "show": true 1144 | }, 1145 | { 1146 | "format": "short", 1147 | "label": null, 1148 | "logBase": 1, 1149 | "max": null, 1150 | "min": null, 1151 | "show": true 1152 | } 1153 | ] 1154 | }, 1155 | { 1156 | "aliasColors": {}, 1157 | "bars": false, 1158 | "dashLength": 10, 1159 | "dashes": false, 1160 | "datasource": null, 1161 | "fill": 1, 1162 | "gridPos": { 1163 | "h": 7, 1164 | "w": 24, 1165 | "x": 0, 1166 | "y": 46 1167 | }, 1168 | "id": 25, 1169 | "legend": { 1170 | "alignAsTable": true, 1171 | "avg": true, 1172 | "current": false, 1173 | "max": true, 1174 | "min": true, 1175 | "rightSide": true, 1176 | "show": true, 1177 | "sort": "avg", 1178 | "sortDesc": true, 1179 | "total": false, 1180 | "values": true 1181 | }, 1182 | "lines": true, 1183 | "linewidth": 1, 1184 | "links": [], 1185 | "nullPointMode": "null", 1186 | "percentage": false, 1187 | "pointradius": 5, 1188 | "points": false, 1189 | "renderer": "flot", 1190 | "seriesOverrides": [], 1191 | "spaceLength": 10, 1192 | "stack": false, 1193 | "steppedLine": false, 1194 | "targets": [ 1195 | { 1196 | "expr": "sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", 1197 | "format": "time_series", 1198 | "intervalFactor": 2, 1199 | "legendFormat": "{{container_label_com_docker_swarm_service_name}}", 1200 | "metric": "", 1201 | "refId": "B", 1202 | "step": 2 1203 | } 1204 | ], 1205 | "thresholds": [], 1206 | "timeFrom": null, 1207 | "timeShift": null, 1208 | "title": "Network transmitted by Service", 1209 | "tooltip": { 1210 | "shared": true, 1211 | "sort": 0, 1212 | "value_type": "individual" 1213 | }, 1214 | "type": "graph", 1215 | "xaxis": { 1216 | "buckets": null, 1217 | "mode": "time", 1218 | "name": null, 1219 | "show": true, 1220 | "values": [] 1221 | }, 1222 | "yaxes": [ 1223 | { 1224 | "format": "Bps", 1225 | "label": null, 1226 | "logBase": 1, 1227 | "max": null, 1228 | "min": null, 1229 | "show": true 1230 | }, 1231 | { 1232 | "format": "short", 1233 | "label": null, 1234 | "logBase": 1, 1235 | "max": null, 1236 | "min": null, 1237 | "show": true 1238 | } 1239 | ] 1240 | }, 1241 | { 1242 | "aliasColors": {}, 1243 | "bars": false, 1244 | "dashLength": 10, 1245 | "dashes": false, 1246 | "datasource": null, 1247 | "fill": 1, 1248 | "gridPos": { 1249 | "h": 7, 1250 | "w": 10, 1251 | "x": 0, 1252 | "y": 53 1253 | }, 1254 | "id": 31, 1255 | "legend": { 1256 | "avg": true, 1257 | "current": false, 1258 | "max": false, 1259 | "min": false, 1260 | "show": true, 1261 | "total": false, 1262 | "values": true 1263 | }, 1264 | "lines": true, 1265 | "linewidth": 1, 1266 | "links": [], 1267 | "nullPointMode": "null", 1268 | "percentage": false, 1269 | "pointradius": 5, 1270 | "points": false, 1271 | "renderer": "flot", 1272 | "seriesOverrides": [], 1273 | "spaceLength": 10, 1274 | "stack": false, 1275 | "steppedLine": false, 1276 | "targets": [ 1277 | { 1278 | "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", 1279 | "format": "time_series", 1280 | "intervalFactor": 2, 1281 | "legendFormat": "Received", 1282 | "refId": "A", 1283 | "step": 4 1284 | }, 1285 | { 1286 | "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", 1287 | "format": "time_series", 1288 | "intervalFactor": 2, 1289 | "legendFormat": "Transmited", 1290 | "refId": "B", 1291 | "step": 4 1292 | } 1293 | ], 1294 | "thresholds": [], 1295 | "timeFrom": null, 1296 | "timeShift": null, 1297 | "title": "Cluster Network Traffic", 1298 | "tooltip": { 1299 | "shared": true, 1300 | "sort": 0, 1301 | "value_type": "individual" 1302 | }, 1303 | "type": "graph", 1304 | "xaxis": { 1305 | "buckets": null, 1306 | "mode": "time", 1307 | "name": null, 1308 | "show": true, 1309 | "values": [] 1310 | }, 1311 | "yaxes": [ 1312 | { 1313 | "format": "Bps", 1314 | "label": null, 1315 | "logBase": 1, 1316 | "max": null, 1317 | "min": null, 1318 | "show": true 1319 | }, 1320 | { 1321 | "format": "short", 1322 | "label": null, 1323 | "logBase": 1, 1324 | "max": null, 1325 | "min": null, 1326 | "show": true 1327 | } 1328 | ] 1329 | }, 1330 | { 1331 | "aliasColors": {}, 1332 | "bars": false, 1333 | "dashLength": 10, 1334 | "dashes": false, 1335 | "datasource": null, 1336 | "fill": 1, 1337 | "gridPos": { 1338 | "h": 7, 1339 | "w": 10, 1340 | "x": 10, 1341 | "y": 53 1342 | }, 1343 | "id": 26, 1344 | "legend": { 1345 | "alignAsTable": false, 1346 | "avg": true, 1347 | "current": false, 1348 | "max": true, 1349 | "min": true, 1350 | "rightSide": false, 1351 | "show": true, 1352 | "total": false, 1353 | "values": true 1354 | }, 1355 | "lines": true, 1356 | "linewidth": 1, 1357 | "links": [], 1358 | "nullPointMode": "null", 1359 | "percentage": false, 1360 | "pointradius": 5, 1361 | "points": false, 1362 | "renderer": "flot", 1363 | "seriesOverrides": [], 1364 | "spaceLength": 10, 1365 | "stack": false, 1366 | "steppedLine": false, 1367 | "targets": [ 1368 | { 1369 | "expr": "sum(irate(container_fs_reads_total[$interval]) )", 1370 | "format": "time_series", 1371 | "intervalFactor": 2, 1372 | "legendFormat": "Reads", 1373 | "refId": "A", 1374 | "step": 4 1375 | }, 1376 | { 1377 | "expr": "sum(irate(container_fs_writes_total[$interval])) ", 1378 | "format": "time_series", 1379 | "intervalFactor": 2, 1380 | "legendFormat": "Writes ", 1381 | "refId": "B", 1382 | "step": 4 1383 | } 1384 | ], 1385 | "thresholds": [], 1386 | "timeFrom": null, 1387 | "timeShift": null, 1388 | "title": "Cluster IOPS", 1389 | "tooltip": { 1390 | "shared": true, 1391 | "sort": 0, 1392 | "value_type": "individual" 1393 | }, 1394 | "type": "graph", 1395 | "xaxis": { 1396 | "buckets": null, 1397 | "mode": "time", 1398 | "name": null, 1399 | "show": true, 1400 | "values": [] 1401 | }, 1402 | "yaxes": [ 1403 | { 1404 | "format": "short", 1405 | "label": null, 1406 | "logBase": 1, 1407 | "max": null, 1408 | "min": null, 1409 | "show": true 1410 | }, 1411 | { 1412 | "format": "short", 1413 | "label": null, 1414 | "logBase": 1, 1415 | "max": null, 1416 | "min": null, 1417 | "show": true 1418 | } 1419 | ] 1420 | }, 1421 | { 1422 | "cacheTimeout": null, 1423 | "colorBackground": false, 1424 | "colorValue": false, 1425 | "colors": [ 1426 | "rgba(245, 54, 54, 0.9)", 1427 | "rgba(237, 129, 40, 0.89)", 1428 | "rgba(50, 172, 45, 0.97)" 1429 | ], 1430 | "datasource": null, 1431 | "format": "percent", 1432 | "gauge": { 1433 | "maxValue": 100, 1434 | "minValue": 0, 1435 | "show": true, 1436 | "thresholdLabels": false, 1437 | "thresholdMarkers": true 1438 | }, 1439 | "gridPos": { 1440 | "h": 7, 1441 | "w": 4, 1442 | "x": 20, 1443 | "y": 53 1444 | }, 1445 | "id": 27, 1446 | "interval": null, 1447 | "links": [], 1448 | "mappingType": 1, 1449 | "mappingTypes": [ 1450 | { 1451 | "name": "value to text", 1452 | "value": 1 1453 | }, 1454 | { 1455 | "name": "range to text", 1456 | "value": 2 1457 | } 1458 | ], 1459 | "maxDataPoints": 100, 1460 | "nullPointMode": "connected", 1461 | "nullText": null, 1462 | "postfix": "", 1463 | "postfixFontSize": "50%", 1464 | "prefix": "", 1465 | "prefixFontSize": "50%", 1466 | "rangeMaps": [ 1467 | { 1468 | "from": "null", 1469 | "text": "N/A", 1470 | "to": "null" 1471 | } 1472 | ], 1473 | "sparkline": { 1474 | "fillColor": "rgba(31, 118, 189, 0.18)", 1475 | "full": false, 1476 | "lineColor": "rgb(31, 120, 193)", 1477 | "show": false 1478 | }, 1479 | "tableColumn": "", 1480 | "targets": [ 1481 | { 1482 | "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", 1483 | "format": "time_series", 1484 | "intervalFactor": 2, 1485 | "legendFormat": "", 1486 | "refId": "A", 1487 | "step": 20 1488 | } 1489 | ], 1490 | "thresholds": "10,25,100", 1491 | "title": "Available Disk Space", 1492 | "type": "singlestat", 1493 | "valueFontSize": "80%", 1494 | "valueMaps": [ 1495 | { 1496 | "op": "=", 1497 | "text": "N/A", 1498 | "value": "null" 1499 | } 1500 | ], 1501 | "valueName": "avg" 1502 | }, 1503 | { 1504 | "aliasColors": {}, 1505 | "bars": false, 1506 | "dashLength": 10, 1507 | "dashes": false, 1508 | "datasource": null, 1509 | "decimals": 0, 1510 | "fill": 1, 1511 | "gridPos": { 1512 | "h": 7, 1513 | "w": 12, 1514 | "x": 0, 1515 | "y": 60 1516 | }, 1517 | "id": 29, 1518 | "legend": { 1519 | "alignAsTable": true, 1520 | "avg": false, 1521 | "current": true, 1522 | "hideEmpty": true, 1523 | "hideZero": true, 1524 | "max": false, 1525 | "min": false, 1526 | "rightSide": true, 1527 | "show": true, 1528 | "sort": "current", 1529 | "sortDesc": true, 1530 | "total": false, 1531 | "values": true 1532 | }, 1533 | "lines": true, 1534 | "linewidth": 1, 1535 | "links": [], 1536 | "nullPointMode": "null", 1537 | "percentage": false, 1538 | "pointradius": 5, 1539 | "points": false, 1540 | "renderer": "flot", 1541 | "seriesOverrides": [], 1542 | "spaceLength": 10, 1543 | "stack": false, 1544 | "steppedLine": false, 1545 | "targets": [ 1546 | { 1547 | "expr": "sum(engine_daemon_container_actions_seconds_count * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) by (action)", 1548 | "format": "time_series", 1549 | "intervalFactor": 10, 1550 | "legendFormat": "{{action }}", 1551 | "refId": "A", 1552 | "step": 10 1553 | } 1554 | ], 1555 | "thresholds": [], 1556 | "timeFrom": null, 1557 | "timeShift": null, 1558 | "title": "Docker Daemon Container Actions", 1559 | "tooltip": { 1560 | "shared": true, 1561 | "sort": 2, 1562 | "value_type": "individual" 1563 | }, 1564 | "type": "graph", 1565 | "xaxis": { 1566 | "buckets": null, 1567 | "mode": "time", 1568 | "name": null, 1569 | "show": true, 1570 | "values": [] 1571 | }, 1572 | "yaxes": [ 1573 | { 1574 | "format": "short", 1575 | "label": null, 1576 | "logBase": 1, 1577 | "max": null, 1578 | "min": null, 1579 | "show": true 1580 | }, 1581 | { 1582 | "format": "short", 1583 | "label": null, 1584 | "logBase": 1, 1585 | "max": null, 1586 | "min": null, 1587 | "show": true 1588 | } 1589 | ] 1590 | }, 1591 | { 1592 | "aliasColors": {}, 1593 | "bars": false, 1594 | "dashLength": 10, 1595 | "dashes": false, 1596 | "datasource": null, 1597 | "decimals": 0, 1598 | "fill": 1, 1599 | "gridPos": { 1600 | "h": 7, 1601 | "w": 12, 1602 | "x": 12, 1603 | "y": 60 1604 | }, 1605 | "id": 30, 1606 | "legend": { 1607 | "alignAsTable": true, 1608 | "avg": false, 1609 | "current": true, 1610 | "hideEmpty": true, 1611 | "hideZero": true, 1612 | "max": false, 1613 | "min": false, 1614 | "rightSide": true, 1615 | "show": true, 1616 | "sort": "current", 1617 | "sortDesc": true, 1618 | "total": false, 1619 | "values": true 1620 | }, 1621 | "lines": true, 1622 | "linewidth": 1, 1623 | "links": [], 1624 | "nullPointMode": "null", 1625 | "percentage": false, 1626 | "pointradius": 5, 1627 | "points": false, 1628 | "renderer": "flot", 1629 | "seriesOverrides": [], 1630 | "spaceLength": 10, 1631 | "stack": false, 1632 | "steppedLine": false, 1633 | "targets": [ 1634 | { 1635 | "expr": "sum(engine_daemon_network_actions_seconds_count * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) by (action)", 1636 | "format": "time_series", 1637 | "intervalFactor": 10, 1638 | "legendFormat": "{{action }}", 1639 | "refId": "A", 1640 | "step": 10 1641 | } 1642 | ], 1643 | "thresholds": [], 1644 | "timeFrom": null, 1645 | "timeShift": null, 1646 | "title": "Docker Daemon Network Actions", 1647 | "tooltip": { 1648 | "shared": true, 1649 | "sort": 2, 1650 | "value_type": "individual" 1651 | }, 1652 | "type": "graph", 1653 | "xaxis": { 1654 | "buckets": null, 1655 | "mode": "time", 1656 | "name": null, 1657 | "show": true, 1658 | "values": [] 1659 | }, 1660 | "yaxes": [ 1661 | { 1662 | "format": "short", 1663 | "label": null, 1664 | "logBase": 1, 1665 | "max": null, 1666 | "min": null, 1667 | "show": true 1668 | }, 1669 | { 1670 | "format": "short", 1671 | "label": null, 1672 | "logBase": 1, 1673 | "max": null, 1674 | "min": null, 1675 | "show": true 1676 | } 1677 | ] 1678 | }, 1679 | { 1680 | "columns": [ 1681 | { 1682 | "text": "Avg", 1683 | "value": "avg" 1684 | } 1685 | ], 1686 | "datasource": null, 1687 | "fontSize": "100%", 1688 | "gridPos": { 1689 | "h": 7, 1690 | "w": 24, 1691 | "x": 0, 1692 | "y": 67 1693 | }, 1694 | "hideTimeOverride": true, 1695 | "id": 28, 1696 | "links": [], 1697 | "pageSize": null, 1698 | "repeat": null, 1699 | "scroll": true, 1700 | "showHeader": true, 1701 | "sort": { 1702 | "col": 0, 1703 | "desc": true 1704 | }, 1705 | "styles": [ 1706 | { 1707 | "alias": "Time", 1708 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1709 | "pattern": "Time", 1710 | "type": "hidden" 1711 | }, 1712 | { 1713 | "alias": "", 1714 | "colorMode": null, 1715 | "colors": [ 1716 | "rgba(245, 54, 54, 0.9)", 1717 | "rgba(237, 129, 40, 0.89)", 1718 | "rgba(50, 172, 45, 0.97)" 1719 | ], 1720 | "decimals": 2, 1721 | "pattern": "/.*/", 1722 | "thresholds": [], 1723 | "type": "number", 1724 | "unit": "short" 1725 | } 1726 | ], 1727 | "targets": [ 1728 | { 1729 | "expr": "sum(engine_daemon_engine_info * on(instance) group_left(node_id) swarm_node_info) by (kernel, os, graphdriver, version, node_id)", 1730 | "format": "table", 1731 | "instant": true, 1732 | "intervalFactor": 2, 1733 | "legendFormat": "", 1734 | "refId": "A", 1735 | "step": 2 1736 | } 1737 | ], 1738 | "timeFrom": "1s", 1739 | "title": "Docker Engine Info", 1740 | "transform": "timeseries_to_rows", 1741 | "type": "table" 1742 | } 1743 | ], 1744 | "refresh": "30s", 1745 | "schemaVersion": 16, 1746 | "style": "dark", 1747 | "tags": [ 1748 | "swarmprom" 1749 | ], 1750 | "templating": { 1751 | "list": [ 1752 | { 1753 | "allValue": ".+", 1754 | "current": { 1755 | "text": "All", 1756 | "value": "$__all" 1757 | }, 1758 | "datasource": "Prometheus", 1759 | "hide": 0, 1760 | "includeAll": true, 1761 | "label": "Swarm Node", 1762 | "multi": false, 1763 | "name": "node_id", 1764 | "options": [], 1765 | "query": "node_meta", 1766 | "refresh": 2, 1767 | "regex": "/node_id=\"([^\"]+)\"/", 1768 | "sort": 0, 1769 | "tagValuesQuery": "label_values({node_id=\"$tag\"},node_name)", 1770 | "tags": [ 1771 | "ofdocker", 1772 | "ofmon" 1773 | ], 1774 | "tagsQuery": "label_values(node_meta, node_name)", 1775 | "type": "query", 1776 | "useTags": true 1777 | }, 1778 | { 1779 | "auto": true, 1780 | "auto_count": 30, 1781 | "auto_min": "30s", 1782 | "current": { 1783 | "text": "auto", 1784 | "value": "$__auto_interval_interval" 1785 | }, 1786 | "hide": 0, 1787 | "label": "Interval", 1788 | "name": "interval", 1789 | "options": [ 1790 | { 1791 | "selected": true, 1792 | "text": "auto", 1793 | "value": "$__auto_interval_interval" 1794 | }, 1795 | { 1796 | "selected": false, 1797 | "text": "1m", 1798 | "value": "1m" 1799 | }, 1800 | { 1801 | "selected": false, 1802 | "text": "10m", 1803 | "value": "10m" 1804 | }, 1805 | { 1806 | "selected": false, 1807 | "text": "30m", 1808 | "value": "30m" 1809 | }, 1810 | { 1811 | "selected": false, 1812 | "text": "1h", 1813 | "value": "1h" 1814 | }, 1815 | { 1816 | "selected": false, 1817 | "text": "6h", 1818 | "value": "6h" 1819 | }, 1820 | { 1821 | "selected": false, 1822 | "text": "12h", 1823 | "value": "12h" 1824 | }, 1825 | { 1826 | "selected": false, 1827 | "text": "1d", 1828 | "value": "1d" 1829 | }, 1830 | { 1831 | "selected": false, 1832 | "text": "7d", 1833 | "value": "7d" 1834 | }, 1835 | { 1836 | "selected": false, 1837 | "text": "14d", 1838 | "value": "14d" 1839 | }, 1840 | { 1841 | "selected": false, 1842 | "text": "30d", 1843 | "value": "30d" 1844 | } 1845 | ], 1846 | "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 1847 | "refresh": 2, 1848 | "type": "interval" 1849 | } 1850 | ] 1851 | }, 1852 | "time": { 1853 | "from": "now-15m", 1854 | "to": "now" 1855 | }, 1856 | "timepicker": { 1857 | "refresh_intervals": [ 1858 | "5s", 1859 | "10s", 1860 | "30s", 1861 | "1m", 1862 | "5m", 1863 | "15m", 1864 | "30m", 1865 | "1h", 1866 | "2h", 1867 | "1d" 1868 | ], 1869 | "time_options": [ 1870 | "5m", 1871 | "15m", 1872 | "1h", 1873 | "6h", 1874 | "12h", 1875 | "24h", 1876 | "2d", 1877 | "7d", 1878 | "30d" 1879 | ] 1880 | }, 1881 | "timezone": "", 1882 | "title": "Docker Swarm Services", 1883 | "uid": "zr_baSRmk", 1884 | "version": 1 1885 | } 1886 | -------------------------------------------------------------------------------- /swarmprom/grafana/datasources/prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | deleteDatasources: 4 | - name: Prometheus 5 | 6 | datasources: 7 | - name: Prometheus 8 | type: prometheus 9 | access: proxy 10 | url: http://prometheus:9090 11 | isDefault: true 12 | version: 1 13 | editable: true 14 | -------------------------------------------------------------------------------- /swarmprom/grafana/screens/alertmanager-slack-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windvalley/docker-swarm-infras/b17d991a548046ed8e5aa7b690c1fe9a6067dea3/swarmprom/grafana/screens/alertmanager-slack-v2.png -------------------------------------------------------------------------------- /swarmprom/grafana/screens/swarmprom-nodes-dash-v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windvalley/docker-swarm-infras/b17d991a548046ed8e5aa7b690c1fe9a6067dea3/swarmprom/grafana/screens/swarmprom-nodes-dash-v3.png -------------------------------------------------------------------------------- /swarmprom/grafana/screens/swarmprom-prometheus-dash-v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windvalley/docker-swarm-infras/b17d991a548046ed8e5aa7b690c1fe9a6067dea3/swarmprom/grafana/screens/swarmprom-prometheus-dash-v3.png -------------------------------------------------------------------------------- /swarmprom/grafana/screens/swarmprom-services-dash-v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windvalley/docker-swarm-infras/b17d991a548046ed8e5aa7b690c1fe9a6067dea3/swarmprom/grafana/screens/swarmprom-services-dash-v3.png -------------------------------------------------------------------------------- /swarmprom/grafana/screens/unsee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windvalley/docker-swarm-infras/b17d991a548046ed8e5aa7b690c1fe9a6067dea3/swarmprom/grafana/screens/unsee.png -------------------------------------------------------------------------------- /swarmprom/grafana/screens/weave-scope-hosts-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windvalley/docker-swarm-infras/b17d991a548046ed8e5aa7b690c1fe9a6067dea3/swarmprom/grafana/screens/weave-scope-hosts-v2.png -------------------------------------------------------------------------------- /swarmprom/grafana/screens/weave-scope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windvalley/docker-swarm-infras/b17d991a548046ed8e5aa7b690c1fe9a6067dea3/swarmprom/grafana/screens/weave-scope.png -------------------------------------------------------------------------------- /swarmprom/grafana/swarmprom_dashboards.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'default' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | editable: true 10 | options: 11 | path: /etc/grafana/dashboards 12 | -------------------------------------------------------------------------------- /swarmprom/node-exporter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM prom/node-exporter:v0.16.0 2 | 3 | ENV NODE_ID=none 4 | 5 | USER root 6 | 7 | COPY conf /etc/node-exporter/ 8 | 9 | ENTRYPOINT [ "/etc/node-exporter/docker-entrypoint.sh" ] 10 | CMD [ "/bin/node_exporter" ] 11 | -------------------------------------------------------------------------------- /swarmprom/node-exporter/conf/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | NODE_NAME=$(cat /etc/nodename) 4 | echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom 5 | 6 | set -- /bin/node_exporter "$@" 7 | 8 | exec "$@" 9 | -------------------------------------------------------------------------------- /swarmprom/prometheus/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM prom/prometheus:v2.5.0 2 | # https://hub.docker.com/r/prom/prometheus/tags/ 3 | 4 | ENV WEAVE_TOKEN=none 5 | 6 | COPY conf /etc/prometheus/ 7 | 8 | ENTRYPOINT [ "/etc/prometheus/docker-entrypoint.sh" ] 9 | CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ 10 | "--storage.tsdb.path=/prometheus" ] 11 | -------------------------------------------------------------------------------- /swarmprom/prometheus/conf/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | cat /etc/prometheus/prometheus.yml > /tmp/prometheus.yml 4 | # shellcheck disable=SC2002 5 | cat /etc/prometheus/weave-cortex.yml | 6 | sed "s@#password: #@password: '$WEAVE_TOKEN'@g" > /tmp/weave-cortex.yml 7 | 8 | #JOBS=mongo-exporter:9111 redis-exporter:9112 9 | 10 | if [ ${JOBS+x} ]; then 11 | 12 | for job in $JOBS 13 | do 14 | echo "adding job $job" 15 | 16 | SERVICE=$(echo "$job" | cut -d":" -f1) 17 | PORT=$(echo "$job" | cut -d":" -f2) 18 | 19 | cat >>/tmp/prometheus.yml <>/tmp/weave-cortex.yml <# 5 | 6 | global: 7 | scrape_interval: 15s 8 | evaluation_interval: 15s 9 | 10 | external_labels: 11 | monitor: 'promswarm' 12 | 13 | scrape_configs: 14 | - job_name: 'prometheus' 15 | static_configs: 16 | - targets: ['localhost:9090'] 17 | 18 | - job_name: 'dockerd-exporter' 19 | dns_sd_configs: 20 | - names: 21 | - 'tasks.dockerd-exporter' 22 | type: 'A' 23 | port: 9323 24 | 25 | - job_name: 'cadvisor' 26 | dns_sd_configs: 27 | - names: 28 | - 'tasks.cadvisor' 29 | type: 'A' 30 | port: 8080 31 | 32 | - job_name: 'node-exporter' 33 | dns_sd_configs: 34 | - names: 35 | - 'tasks.node-exporter' 36 | type: 'A' 37 | port: 9100 38 | -------------------------------------------------------------------------------- /swarmprom/prometheus/rules/swarm_node.rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_node.rules.yml 3 | rules: 4 | - alert: node_cpu_usage 5 | expr: 100 - (avg(irate(node_cpu_seconds_total{mode="idle"}[1m]) * ON(instance) GROUP_LEFT(node_name) 6 | node_meta * 100) BY (node_name)) > 50 7 | for: 1m 8 | labels: 9 | severity: warning 10 | annotations: 11 | description: Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize 12 | $value}}%. 13 | summary: CPU alert for Swarm node '{{ $labels.node_name }}' 14 | - alert: node_memory_usage 15 | expr: sum(((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) 16 | * ON(instance) GROUP_LEFT(node_name) node_meta * 100) BY (node_name) > 80 17 | for: 1m 18 | labels: 19 | severity: warning 20 | annotations: 21 | description: Swarm node {{ $labels.node_name }} memory usage is at {{ humanize 22 | $value}}%. 23 | summary: Memory alert for Swarm node '{{ $labels.node_name }}' 24 | - alert: node_disk_usage 25 | expr: ((node_filesystem_size_bytes{mountpoint="/rootfs"} - node_filesystem_free_bytes{mountpoint="/rootfs"}) 26 | * 100 / node_filesystem_size_bytes{mountpoint="/rootfs"}) * ON(instance) GROUP_LEFT(node_name) 27 | node_meta > 85 28 | for: 1m 29 | labels: 30 | severity: warning 31 | annotations: 32 | description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize 33 | $value}}%. 34 | summary: Disk alert for Swarm node '{{ $labels.node_name }}' 35 | - alert: node_disk_fill_rate_6h 36 | expr: predict_linear(node_filesystem_free_bytes{mountpoint="/rootfs"}[1h], 6 * 3600) * ON(instance) 37 | GROUP_LEFT(node_name) node_meta < 0 38 | for: 1h 39 | labels: 40 | severity: critical 41 | annotations: 42 | description: Swarm node {{ $labels.node_name }} disk is going to fill up in 43 | 6h. 44 | summary: Disk fill alert for Swarm node '{{ $labels.node_name }}' 45 | -------------------------------------------------------------------------------- /swarmprom/prometheus/rules/swarm_task.rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_task.rules.yml 3 | rules: 4 | - alert: task_high_cpu_usage_50 5 | expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m])) 6 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) 7 | * 100 > 50 8 | for: 1m 9 | annotations: 10 | description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ 11 | $labels.container_label_com_docker_swarm_node_id }}'' CPU usage is at {{ humanize 12 | $value}}%.' 13 | summary: CPU alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name 14 | }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' 15 | - alert: task_high_memory_usage_1g 16 | expr: sum(container_memory_rss{container_label_com_docker_swarm_task_name=~".+"}) 17 | BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) > 1e+09 18 | for: 1m 19 | annotations: 20 | description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ 21 | $labels.container_label_com_docker_swarm_node_id }}'' memory usage is {{ humanize 22 | $value}}.' 23 | summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name 24 | }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' 25 | -------------------------------------------------------------------------------- /traefik.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | 5 | traefik: 6 | image: traefik:v2.2 7 | # `mode: host`, could retrieve the real ip of the client, refer to: 8 | # https://dockerswarm.rocks/traefik/ 9 | ports: 10 | - target: 80 11 | published: 80 12 | mode: host 13 | - target: 443 14 | published: 443 15 | mode: host 16 | deploy: 17 | replicas: ${TRAEFIK_REPLICAS:-3} 18 | placement: 19 | # traefik can only be deployed in manager node of swarm mode, refer to: 20 | # https://docs.traefik.io/providers/docker/#docker-api-access_1 21 | constraints: 22 | - node.role == manager 23 | preferences: 24 | - spread: node.id 25 | labels: 26 | - "traefik.enable=true" 27 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 28 | - "traefik.http.routers.api.rule=Host(`traefik.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 29 | - "traefik.http.routers.api.service=api@internal" 30 | - "traefik.http.routers.api.entryPoints=web, websecure" 31 | - "traefik.http.routers.api.tls=true" 32 | - "traefik.http.routers.api.middlewares=IpWhiteList, auth" 33 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 34 | - "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 35 | # Dummy service for Swarm port detection. The port can be any valid integer value. 36 | - "traefik.http.services.dummy-svc.loadbalancer.server.port=9999" 37 | volumes: 38 | - /var/run/docker.sock:/var/run/docker.sock 39 | - traefik-log:/var/log/traefik/ 40 | command: 41 | --log=true 42 | --log.level=INFO 43 | --accesslog=true 44 | --accesslog.filepath=/var/log/traefik/access.log 45 | --accesslog.fields.headers.defaultmode=keep 46 | --api=true 47 | --api.dashboard=true 48 | --providers.docker=true 49 | --providers.docker.swarmMode=true 50 | --providers.docker.exposedbydefault=false 51 | --entrypoints.web.address=:80 52 | --entrypoints.websecure.address=:443 53 | --providers.consul=true 54 | --providers.consul.endpoints="http://consul-leader:8500" 55 | --providers.consul.rootkey=traefik 56 | --providers.consul.tls.insecureSkipVerify=true 57 | --entrypoints.web.http.redirections.entryPoint.to=websecure 58 | #--providers.docker.constraints="Label(`traefik.enable`,`true`)" 59 | networks: 60 | - default 61 | - ${TRAEFIK_NETWORK} 62 | depends_on: 63 | - consul-leader 64 | 65 | consul-leader: 66 | image: consul 67 | command: agent -server -client=0.0.0.0 -bootstrap -ui 68 | volumes: 69 | - consul-data-leader:/consul/data 70 | environment: 71 | - CONSUL_BIND_INTERFACE=eth0 72 | - 'CONSUL_LOCAL_CONFIG={"leave_on_terminate": true}' 73 | networks: 74 | - default 75 | - ${TRAEFIK_NETWORK} 76 | deploy: 77 | # consul-leader need to be constrainted to a fixed node, otherwise the consul cluster will elect failed while redeploy the stack. 78 | placement: 79 | constraints: 80 | - node.labels.${STACK_NAME}.consul-data-leader == true 81 | labels: 82 | - "traefik.enable=true" 83 | - "traefik.docker.network=${TRAEFIK_NETWORK}" 84 | - "traefik.http.routers.consul-leader.rule=Host(`consul.${UI_DOMAIN?Variable UI_DOMAIN not set}`)" 85 | - "traefik.http.services.consul-leader.loadbalancer.server.port=8500" 86 | - "traefik.http.routers.consul-leader.entryPoints=web, websecure" 87 | - "traefik.http.routers.consul-leader.tls=true" 88 | - "traefik.http.routers.consul-leader.middlewares=IpWhiteList, auth" 89 | - "traefik.http.middlewares.IpWhiteList.ipwhitelist.sourcerange=${COMMON_IP_WHITELIST}" 90 | - "traefik.http.middlewares.auth.basicauth.users=${USERNAME?Variable USERNAME not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set}" 91 | 92 | consul-replica: 93 | image: consul 94 | command: agent -server -client=0.0.0.0 -retry-join="consul-leader" 95 | volumes: 96 | - consul-data-replica:/consul/data 97 | environment: 98 | - CONSUL_BIND_INTERFACE=eth0 99 | - 'CONSUL_LOCAL_CONFIG={"leave_on_terminate": true}' 100 | networks: 101 | - default 102 | - ${TRAEFIK_NETWORK} 103 | deploy: 104 | replicas: ${CONSUL_REPLICAS:-3} 105 | placement: 106 | preferences: 107 | - spread: node.id 108 | 109 | volumes: 110 | consul-data-leader: 111 | consul-data-replica: 112 | traefik-log: 113 | 114 | networks: 115 | traefik-public: 116 | external: true 117 | --------------------------------------------------------------------------------