├── README.md ├── alertmanager └── configs │ └── alertmanager.yml ├── alt_versions ├── docker-compose_http.yml ├── docker-compose_http_with_dashboards.yml ├── docker-compose_http_with_dbs.yml ├── docker-compose_no_traefik.yml └── docker-compose_with_dashboards.yml ├── deploy.sh ├── docker-compose.localhost.yml ├── docker-compose.yml ├── grafana ├── .placeholder └── provisioning │ ├── dashboards │ ├── BlackboxExporter.json │ ├── DockerAndSystemMonitoring.json │ ├── DockerMonitoring.json │ ├── DockerPrometheusMonitoring.json │ ├── MysqlOverview.json │ ├── NodeMonitoring.json │ ├── TraefikRealTime.json │ └── ds_prometheus.yml │ └── datasources │ └── prometheus.yml ├── htpasswd ├── node-exporter ├── Dockerfile └── conf │ └── docker-entrypoint.sh └── prometheus ├── configs ├── prometheus-localhost.yml ├── prometheus.yml └── prometheus_with_mysql.yml └── rules └── alert.rules /README.md: -------------------------------------------------------------------------------- 1 | # monitoring-cpang 2 | 3 | cAdvisor, Prometheus, Alertmanager, Node-Exporter, Grafana 4 | 5 | ## How to use this 6 | 7 | **Note**: Dependent service [Traefik](https://github.com/bekkerstacks/traefik#usage) is required, if you don't use traefik, you can find a [deploy without traefik](#alternative-versions) 8 | 9 | Get the sources: 10 | 11 | ``` 12 | $ git clone https://github.com/bekkerstacks/monitoring-cpang 13 | $ cd monitoring-cpang 14 | ``` 15 | 16 | #### Dependency: Traefik 17 | 18 | Get Traefik, by default it uses SSL 19 | 20 | See all [configuration options here](https://github.com/bekkerstacks/traefik#configuration): 21 | 22 | - requires your domain name as env var `DOMAIN=""` defaults to `localhost` 23 | 24 | ``` 25 | $ wget -O docker-compose.traefik.yml https://raw.githubusercontent.com/bekkerstacks/traefik/master/docker-compose.yml 26 | ``` 27 | 28 | #### Authentication 29 | 30 | Services that do not come with authentication endpoints, is protected by basic auth and by default is (`admin/admin`). 31 | 32 | To override: 33 | 34 | ``` 35 | $ htpasswd -c htpasswd 36 | ``` 37 | 38 | #### Deploy Traefik and CPANG 39 | 40 | This will deploy Traefik and a Fresh CPANG stack: 41 | 42 | ``` 43 | $ export DOMAIN="localhost" # services will use traefik.localhost in this case 44 | $ docker stack deploy -c docker-compose.traefik.yml proxy 45 | $ docker stack deploy -c docker-compose.yml mon 46 | ``` 47 | 48 | ### Alternative Versions: 49 | 50 | a Stack with HTTP Endpoints: 51 | 52 | ``` 53 | # traefik needs to be running 54 | $ docker stack deploy -c alt_versions/docker-compose_http.yml http_mon 55 | ``` 56 | 57 | a Stack without Traefik: 58 | 59 | ``` 60 | $ docker stack deploy -c alt_versions/docker-compose_no_traefik.yml notraefik_mon 61 | ``` 62 | 63 | If you want a stack with prepopulated dashboards:: 64 | 65 | ``` 66 | # traefik needs to be running 67 | $ docker stack deploy -c alt_versions/docker-compose_with_dashboards.yml dash_mon 68 | ``` 69 | 70 | CPANG Stack with MySQL, MySQL Exporter and Dashboards: 71 | 72 | ``` 73 | # traefik needs to be running 74 | $ docker stack deploy -c alt_versions/docker-compose_http_with_dbs.yml dbs_mon 75 | ``` 76 | 77 | ## Endpoints: 78 | 79 | - Grafana: `grafana.${DOMAIN}` 80 | - Alertmanager: `alertmanager.${DOMAIN}` 81 | - Prometheus: `prometheus.${DOMAIN}` 82 | - Traefik: `traefik.${DOMAIN}` 83 | 84 | ## Configuration 85 | 86 | Look at the [wiki](https://github.com/bekkerstacks/monitoring-cpang/wiki) for tutorials on how to add datasources and dashboards 87 | 88 | ## Extra Dashboard Links: 89 | 90 | Traefik: 91 | 92 | - [4475 - Traefik per Backend](https://grafana.com/dashboards/4475) 93 | - [6293 - Traefik Real Time](https://grafana.com/dashboards/6293) 94 | 95 | Docker: 96 | 97 | - [609 - Docker Swarm and Container Overview](https://grafana.com/dashboards/609) 98 | -------------------------------------------------------------------------------- /alertmanager/configs/alertmanager.yml: -------------------------------------------------------------------------------- 1 | route: 2 | receiver: 'slack' 3 | 4 | receivers: 5 | - name: 'slack' 6 | # slack_configs: 7 | # - send_resolved: true 8 | # username: '' 9 | # channel: '#' 10 | # api_url: '' 11 | -------------------------------------------------------------------------------- /alt_versions/docker-compose_http.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | cadvisor: 5 | image: google/cadvisor 6 | volumes: 7 | - /:/rootfs:ro 8 | - /var/run:/var/run:rw 9 | - /sys:/sys:ro 10 | - /var/lib/docker:/var/lib/docker:ro 11 | networks: 12 | - private 13 | - public 14 | deploy: 15 | mode: global 16 | labels: 17 | - traefik.frontend.rule=Host:cadvisor.${DOMAIN:-localhost} 18 | - traefik.enable=true 19 | - traefik.port=8080 20 | - traefik.tags=public 21 | - traefik.backend=cadvisor 22 | - traefik.docker.network=public 23 | - traefik.redirectorservice.frontend.entryPoints=http 24 | 25 | node-exporter: 26 | image: basi/node-exporter 27 | volumes: 28 | - /proc:/host/proc:ro 29 | - /sys:/host/sys:ro 30 | - /:/rootfs:ro 31 | - /tmp/hostname:/etc/host_hostname 32 | hostname: "{{.Node.ID}}" 33 | environment: 34 | - HOST_HOSTNAME=/etc/host_hostname 35 | command: 36 | - '--path.procfs=/host/proc' 37 | - '--path.sysfs=/host/sys' 38 | - '--collector.textfile.directory=/etc/node-exporter/' 39 | - '--collector.filesystem.ignored-mount-points' 40 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 41 | networks: 42 | - private 43 | deploy: 44 | mode: global 45 | 46 | docker-exporter: 47 | image: basi/socat:${DOCKER_EXPORTER_VERSION:-v0.1.0} 48 | networks: 49 | - private 50 | deploy: 51 | mode: global 52 | resources: 53 | limits: 54 | cpus: '0.05' 55 | memory: 6M 56 | reservations: 57 | cpus: '0.05' 58 | memory: 4M 59 | 60 | blackbox-exporter: 61 | image: prom/blackbox-exporter:v0.12.0 62 | networks: 63 | - private 64 | 65 | alertmanager: 66 | image: prom/alertmanager 67 | configs: 68 | - source: alertmanager_config 69 | target: /etc/alertmanager/config.yml 70 | - source: auth_htpasswd 71 | target: /etc/htpasswd 72 | command: 73 | - '--config.file=/etc/alertmanager/config.yml' 74 | - '--storage.path=/alertmanager' 75 | volumes: 76 | - alertmanager:/alertmanager 77 | networks: 78 | - private 79 | - public 80 | deploy: 81 | replicas: 1 82 | placement: 83 | constraints: 84 | - node.role==manager 85 | labels: 86 | - traefik.frontend.rule=Host:alertmanager.${DOMAIN:-localhost} 87 | - traefik.enable=true 88 | - traefik.port=9093 89 | - traefik.tags=public 90 | - traefik.backend=alertmanager 91 | - traefik.docker.network=public 92 | - traefik.frontend.auth.basic.usersFile=/etc/htpasswd 93 | - traefik.redirectorservice.frontend.entryPoints=http 94 | 95 | prometheus: 96 | image: prom/prometheus 97 | user: root 98 | volumes: 99 | - promconfig:/etc/prometheus 100 | - promdata:/prometheus 101 | configs: 102 | - source: prometheus_config 103 | target: /etc/prometheus/prometheus.yml 104 | - source: prometheus_rules 105 | target: /etc/prometheus/alert.rules 106 | - source: auth_htpasswd 107 | target: /etc/htpasswd 108 | command: 109 | - '--config.file=/etc/prometheus/prometheus.yml' 110 | - '--storage.tsdb.path=/prometheus' 111 | - '--web.console.libraries=/etc/prometheus/console_libraries' 112 | - '--web.console.templates=/etc/prometheus/consoles' 113 | networks: 114 | - private 115 | - public 116 | deploy: 117 | placement: 118 | constraints: 119 | - node.role==manager 120 | labels: 121 | - traefik.frontend.rule=Host:prometheus.${DOMAIN:-localhost} 122 | - traefik.frontend.auth.basic.usersFile=/etc/htpasswd 123 | - traefik.enable=true 124 | - traefik.port=9090 125 | - traefik.tags=public 126 | - traefik.backend=prometheus 127 | - traefik.docker.network=public 128 | - traefik.redirectorservice.frontend.entryPoints=http 129 | 130 | grafana: 131 | image: grafana/grafana:6.2.1 132 | volumes: 133 | - grafanadata:/var/lib/grafana 134 | environment: 135 | - GF_SERVER_ROOT_URL=http://grafana.${DOMAIN:-localhost} 136 | - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} 137 | - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} 138 | - GF_USERS_ALLOW_SIGN_UP=false 139 | - GF_INSTALL_PLUGINS=grafana-piechart-panel 140 | - PROMETHEUS_ENDPOINT=http://prometheus:9090 141 | networks: 142 | - private 143 | - public 144 | deploy: 145 | replicas: 1 146 | placement: 147 | constraints: 148 | - node.role==manager 149 | labels: 150 | - traefik.frontend.rule=Host:grafana.${DOMAIN:-localhost} 151 | - traefik.enable=true 152 | - traefik.port=3000 153 | - traefik.tags=public 154 | - traefik.backend=grafana 155 | - traefik.docker.network=public 156 | - traefik.redirectorservice.frontend.entryPoints=http 157 | 158 | networks: 159 | private: 160 | name: private 161 | driver: overlay 162 | public: 163 | name: public 164 | external: true 165 | 166 | volumes: 167 | promconfig: {} 168 | promdata: {} 169 | grafanadata: {} 170 | alertmanager: {} 171 | 172 | configs: 173 | alertmanager_config: 174 | file: ../alertmanager/configs/alertmanager.yml 175 | prometheus_config: 176 | file: ../prometheus/configs/prometheus.yml 177 | prometheus_rules: 178 | file: ../prometheus/rules/alert.rules 179 | auth_htpasswd: 180 | file: ../htpasswd 181 | -------------------------------------------------------------------------------- /alt_versions/docker-compose_http_with_dashboards.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | cadvisor: 5 | image: google/cadvisor 6 | volumes: 7 | - /:/rootfs:ro 8 | - /var/run:/var/run:rw 9 | - /sys:/sys:ro 10 | - /var/lib/docker:/var/lib/docker:ro 11 | networks: 12 | - private 13 | - public 14 | deploy: 15 | mode: global 16 | resources: 17 | limits: 18 | memory: 128M 19 | 20 | node-exporter: 21 | image: prom/node-exporter 22 | volumes: 23 | - /proc:/host/proc:ro 24 | - /sys:/host/sys:ro 25 | - /:/rootfs:ro 26 | environment: 27 | - NODE_ID={{.Node.ID}} 28 | command: 29 | - '--path.procfs=/host/proc' 30 | - '--path.sysfs=/host/sys' 31 | - --collector.filesystem.ignored-mount-points 32 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 33 | networks: 34 | - private 35 | deploy: 36 | mode: global 37 | resources: 38 | limits: 39 | memory: 128M 40 | 41 | blackbox-exporter: 42 | image: prom/blackbox-exporter:v0.12.0 43 | networks: 44 | - private 45 | deploy: 46 | mode: replicated 47 | replicas: 1 48 | resources: 49 | limits: 50 | memory: 64M 51 | 52 | alertmanager: 53 | image: prom/alertmanager:v0.17.0 54 | configs: 55 | - source: alertmanager_config 56 | target: /etc/alertmanager/config.yml 57 | command: 58 | - '--config.file=/etc/alertmanager/config.yml' 59 | - '--storage.path=/alertmanager' 60 | volumes: 61 | - alertmanager:/alertmanager 62 | networks: 63 | - private 64 | - public 65 | deploy: 66 | replicas: 1 67 | placement: 68 | constraints: 69 | - node.role==manager 70 | resources: 71 | limits: 72 | memory: 100M 73 | labels: 74 | - traefik.frontend.rule=Host:alertmanager.${DOMAIN:-localhost} 75 | - traefik.enable=true 76 | - traefik.port=9093 77 | - traefik.tags=public 78 | - traefik.backend=alertmanager 79 | - traefik.docker.network=public 80 | - traefik.webservice.frontend.entryPoints=http 81 | 82 | prometheus: 83 | image: prom/prometheus:v2.9.2 84 | user: root 85 | volumes: 86 | - promconfig:/etc/prometheus 87 | - promdata:/prometheus 88 | configs: 89 | - source: prometheus_config 90 | target: /etc/prometheus/prometheus.yml 91 | - source: prometheus_rules 92 | target: /etc/prometheus/alert.rules 93 | command: 94 | - '--config.file=/etc/prometheus/prometheus.yml' 95 | - '--storage.tsdb.path=/prometheus' 96 | - '--web.console.libraries=/etc/prometheus/console_libraries' 97 | - '--web.console.templates=/etc/prometheus/consoles' 98 | networks: 99 | - private 100 | - public 101 | deploy: 102 | placement: 103 | constraints: 104 | - node.role==manager 105 | resources: 106 | limits: 107 | memory: 500M 108 | labels: 109 | - traefik.frontend.rule=Host:prometheus.${DOMAIN:-localhost} 110 | - traefik.enable=true 111 | - traefik.port=9090 112 | - traefik.tags=public 113 | - traefik.backend=prometheus 114 | - traefik.docker.network=public 115 | - traefik.webservice.frontend.entryPoints=http 116 | 117 | grafana: 118 | image: grafana/grafana:6.2.1 119 | volumes: 120 | - grafanadata:/var/lib/grafana 121 | - grafana_provisioning:/etc/grafana/provisioning 122 | configs: 123 | - source: grafana_config_datasource 124 | target: /etc/grafana/provisioning/datasources/prometheus.yml 125 | - source: grafana_dashboard_prometheus 126 | target: /etc/grafana/provisioning/dashboards/ds_prometheus.yml 127 | - source: grafana_dashboard_docker 128 | target: /etc/grafana/provisioning/dashboards/DockerMonitoring.json 129 | - source: grafana_dashboard_nodes 130 | target: /etc/grafana/provisioning/dashboards/NodeMonitoring.json 131 | - source: grafana_dashboard_blackbox 132 | target: /etc/grafana/provisioning/dashboards/BlackboxExporter.json 133 | environment: 134 | - GF_SERVER_ROOT_URL=http://grafana.${DOMAIN:-localhost} 135 | - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} 136 | - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} 137 | - GF_USERS_ALLOW_SIGN_UP=false 138 | - PROMETHEUS_ENDPOINT=http://prometheus:9090 139 | networks: 140 | - private 141 | - public 142 | deploy: 143 | replicas: 1 144 | placement: 145 | constraints: 146 | - node.role==manager 147 | labels: 148 | - traefik.frontend.rule=Host:grafana.${DOMAIN:-localhost} 149 | - traefik.enable=true 150 | - traefik.port=3000 151 | - traefik.tags=public 152 | - traefik.backend=grafana 153 | - traefik.docker.network=public 154 | - traefik.webservice.frontend.entryPoints=http 155 | 156 | networks: 157 | private: 158 | name: private 159 | driver: overlay 160 | public: 161 | name: public 162 | external: true 163 | 164 | volumes: 165 | promconfig: {} 166 | promdata: {} 167 | grafanadata: {} 168 | alertmanager: {} 169 | grafana_provisioning: {} 170 | 171 | configs: 172 | alertmanager_config: 173 | file: ../alertmanager/configs/alertmanager.yml 174 | prometheus_config: 175 | file: ../prometheus/configs/prometheus.yml 176 | prometheus_rules: 177 | file: ../prometheus/rules/alert.rules 178 | grafana_config_datasource: 179 | file: ../grafana/provisioning/datasources/prometheus.yml 180 | grafana_dashboard_prometheus: 181 | file: ../grafana/provisioning/dashboards/ds_prometheus.yml 182 | grafana_dashboard_docker: 183 | file: ../grafana/provisioning/dashboards/DockerMonitoring.json 184 | grafana_dashboard_nodes: 185 | file: ../grafana/provisioning/dashboards/NodeMonitoring.json 186 | grafana_dashboard_blackbox: 187 | file: ../grafana/provisioning/dashboards/BlackboxExporter.json 188 | -------------------------------------------------------------------------------- /alt_versions/docker-compose_http_with_dbs.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | cadvisor: 5 | image: google/cadvisor 6 | volumes: 7 | - /:/rootfs:ro 8 | - /var/run:/var/run:rw 9 | - /sys:/sys:ro 10 | - /var/lib/docker:/var/lib/docker:ro 11 | networks: 12 | - private 13 | - public 14 | deploy: 15 | mode: global 16 | resources: 17 | limits: 18 | memory: 128M 19 | 20 | node-exporter: 21 | image: prom/node-exporter 22 | volumes: 23 | - /proc:/host/proc:ro 24 | - /sys:/host/sys:ro 25 | - /:/rootfs:ro 26 | - /etc/hostname:/etc/host_hostname 27 | ports: 28 | - 9100:9100 29 | environment: 30 | - HOST_HOSTNAME=/etc/host_hostname 31 | command: 32 | - '--path.procfs=/host/proc' 33 | - '--path.sysfs=/host/sys' 34 | - '--collector.filesystem.ignored-mount-points' 35 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 36 | - '--collector.textfile.directory=/etc/node-exporter/' 37 | networks: 38 | - private 39 | deploy: 40 | mode: global 41 | resources: 42 | limits: 43 | memory: 128M 44 | 45 | blackbox-exporter: 46 | image: prom/blackbox-exporter:v0.12.0 47 | networks: 48 | - private 49 | deploy: 50 | mode: replicated 51 | replicas: 1 52 | resources: 53 | limits: 54 | memory: 64M 55 | 56 | mysql-exporter: 57 | image: prom/mysqld-exporter 58 | environment: 59 | - DATA_SOURCE_NAME=root:password@(mysql:3306)/ 60 | networks: 61 | - private 62 | deploy: 63 | mode: replicated 64 | resources: 65 | limits: 66 | memory: 128M 67 | 68 | alertmanager: 69 | image: prom/alertmanager 70 | configs: 71 | - source: alertmanager_config 72 | target: /etc/alertmanager/config.yml 73 | command: 74 | - '--config.file=/etc/alertmanager/config.yml' 75 | - '--storage.path=/alertmanager' 76 | volumes: 77 | - alertmanager:/alertmanager 78 | networks: 79 | - private 80 | - public 81 | deploy: 82 | replicas: 1 83 | placement: 84 | constraints: 85 | - node.role==manager 86 | resources: 87 | limits: 88 | memory: 100M 89 | labels: 90 | - traefik.frontend.rule=Host:alertmanager.${DOMAIN:-localhost} 91 | - traefik.enable=true 92 | - traefik.port=9093 93 | - traefik.tags=public 94 | - traefik.backend=alertmanager 95 | - traefik.docker.network=public 96 | - traefik.redirectorservice.frontend.entryPoints=http 97 | 98 | prometheus: 99 | image: prom/prometheus 100 | user: root 101 | volumes: 102 | - promconfig:/etc/prometheus 103 | - promdata:/prometheus 104 | configs: 105 | - source: prometheus_config 106 | target: /etc/prometheus/prometheus.yml 107 | - source: prometheus_rules 108 | target: /etc/prometheus/alert.rules 109 | command: 110 | - '--config.file=/etc/prometheus/prometheus.yml' 111 | - '--storage.tsdb.path=/prometheus' 112 | - '--web.console.libraries=/etc/prometheus/console_libraries' 113 | - '--web.console.templates=/etc/prometheus/consoles' 114 | networks: 115 | - private 116 | - public 117 | deploy: 118 | placement: 119 | constraints: 120 | - node.role==manager 121 | resources: 122 | limits: 123 | memory: 500M 124 | labels: 125 | - traefik.frontend.rule=Host:prometheus.${DOMAIN:-localhost} 126 | - traefik.enable=true 127 | - traefik.port=9090 128 | - traefik.tags=public 129 | - traefik.backend=prometheus 130 | - traefik.docker.network=public 131 | - traefik.redirectorservice.frontend.entryPoints=http 132 | 133 | grafana: 134 | image: grafana/grafana 135 | volumes: 136 | - grafanadata:/var/lib/grafana 137 | - grafana_provisioning:/etc/grafana/provisioning 138 | configs: 139 | - source: grafana_config_datasource 140 | target: /etc/grafana/provisioning/datasources/prometheus.yml 141 | - source: grafana_dashboard_prometheus 142 | target: /etc/grafana/provisioning/dashboards/ds_prometheus.yml 143 | - source: grafana_dashboard_docker 144 | target: /etc/grafana/provisioning/dashboards/DockerMonitoring.json 145 | - source: grafana_dashboard_dockerprometheus 146 | target: /etc/grafana/provisioning/dashboards/DockerPrometheusMonitoring.json 147 | - source: grafana_dashboard_blackbox 148 | target: /etc/grafana/provisioning/dashboards/BlackboxExporter.json 149 | - source: grafana_dashboard_mysql 150 | target: /etc/grafana/provisioning/dashboards/MysqlOverview.json 151 | environment: 152 | - GF_SERVER_ROOT_URL=http://grafana.${DOMAIN:-localhost} 153 | - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} 154 | - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} 155 | - GF_USERS_ALLOW_SIGN_UP=false 156 | - PROMETHEUS_ENDPOINT=http://prometheus:9090 157 | networks: 158 | - private 159 | - public 160 | deploy: 161 | replicas: 1 162 | placement: 163 | constraints: 164 | - node.role==manager 165 | resources: 166 | limits: 167 | memory: 384M 168 | labels: 169 | - traefik.frontend.rule=Host:grafana.${DOMAIN:-localhost} 170 | - traefik.enable=true 171 | - traefik.port=3000 172 | - traefik.tags=public 173 | - traefik.backend=grafana 174 | - traefik.docker.network=public 175 | - traefik.redirectorservice.frontend.entryPoints=http 176 | 177 | mysql: 178 | image: mysql 179 | command: --default-authentication-plugin=mysql_native_password 180 | networks: 181 | - private 182 | environment: 183 | - MYSQL_ROOT_PASSWORD=password 184 | healthcheck: 185 | test: "mysql --user=root --password=password --execute \"SHOW DATABASES;\"" 186 | interval: 15s 187 | timeout: 1s 188 | retries: 5 189 | deploy: 190 | mode: replicated 191 | resources: 192 | limits: 193 | memory: 128M 194 | 195 | mysql-exporter: 196 | image: prom/mysqld-exporter 197 | ports: 198 | - 9104:9104 199 | environment: 200 | - DATA_SOURCE_NAME=root:password@(mysql:3306)/ 201 | networks: 202 | - private 203 | deploy: 204 | mode: replicated 205 | resources: 206 | limits: 207 | memory: 128M 208 | 209 | networks: 210 | private: 211 | name: private 212 | driver: overlay 213 | public: 214 | name: public 215 | external: true 216 | 217 | volumes: 218 | promconfig: {} 219 | promdata: {} 220 | grafanadata: {} 221 | alertmanager: {} 222 | grafana_provisioning: {} 223 | 224 | configs: 225 | alertmanager_config: 226 | file: ../alertmanager/configs/alertmanager.yml 227 | prometheus_config: 228 | file: ../prometheus/configs/prometheus_with_mysql.yml 229 | prometheus_rules: 230 | file: ../prometheus/rules/alert.rules 231 | grafana_config_datasource: 232 | file: ../grafana/provisioning/datasources/prometheus.yml 233 | grafana_dashboard_prometheus: 234 | file: ../grafana/provisioning/dashboards/ds_prometheus.yml 235 | grafana_dashboard_docker: 236 | file: ../grafana/provisioning/dashboards/DockerMonitoring.json 237 | grafana_dashboard_dockerprometheus: 238 | file: ../grafana/provisioning/dashboards/DockerPrometheusMonitoring.json 239 | grafana_dashboard_blackbox: 240 | file: ../grafana/provisioning/dashboards/BlackboxExporter.json 241 | grafana_dashboard_mysql: 242 | file: ../grafana/provisioning/dashboards/MysqlOverview.json 243 | -------------------------------------------------------------------------------- /alt_versions/docker-compose_no_traefik.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | cadvisor: 5 | image: google/cadvisor 6 | volumes: 7 | - /:/rootfs:ro 8 | - /var/run:/var/run:rw 9 | - /sys:/sys:ro 10 | - /var/lib/docker:/var/lib/docker:ro 11 | networks: 12 | - private 13 | ports: 14 | - 8080:8080 15 | deploy: 16 | mode: global 17 | 18 | node-exporter: 19 | image: prom/node-exporter 20 | volumes: 21 | - /proc:/host/proc:ro 22 | - /sys:/host/sys:ro 23 | - /:/rootfs:ro 24 | - /etc/hostname:/etc/host_hostname 25 | ports: 26 | - 9100:9100 27 | environment: 28 | - HOST_HOSTNAME=/etc/host_hostname 29 | command: 30 | - '--path.procfs=/host/proc' 31 | - '--path.sysfs=/host/sys' 32 | - '--collector.filesystem.ignored-mount-points' 33 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 34 | - '--collector.textfile.directory=/etc/node-exporter/' 35 | networks: 36 | - private 37 | deploy: 38 | mode: global 39 | 40 | blackbox-exporter: 41 | image: prom/blackbox-exporter:v0.12.0 42 | networks: 43 | - private 44 | 45 | alertmanager: 46 | image: prom/alertmanager 47 | configs: 48 | - source: alertmanager_config 49 | target: /etc/alertmanager/config.yml 50 | command: 51 | - '--config.file=/etc/alertmanager/config.yml' 52 | - '--storage.path=/alertmanager' 53 | volumes: 54 | - alertmanager:/alertmanager 55 | networks: 56 | - private 57 | ports: 58 | - 9093:9093 59 | deploy: 60 | replicas: 1 61 | placement: 62 | constraints: 63 | - node.role==manager 64 | 65 | prometheus: 66 | image: prom/prometheus 67 | user: root 68 | volumes: 69 | - promconfig:/etc/prometheus 70 | - promdata:/prometheus 71 | configs: 72 | - source: prometheus_config 73 | target: /etc/prometheus/prometheus.yml 74 | - source: prometheus_rules 75 | target: /etc/prometheus/alert.rules 76 | command: 77 | - '--config.file=/etc/prometheus/prometheus.yml' 78 | - '--storage.tsdb.path=/prometheus' 79 | - '--web.console.libraries=/etc/prometheus/console_libraries' 80 | - '--web.console.templates=/etc/prometheus/consoles' 81 | networks: 82 | - private 83 | ports: 84 | - 9090:9090 85 | deploy: 86 | placement: 87 | constraints: 88 | - node.role==manager 89 | 90 | grafana: 91 | image: grafana/grafana 92 | volumes: 93 | - grafanadata:/var/lib/grafana 94 | environment: 95 | - GF_SERVER_ROOT_URL=http://grafana.${DOMAIN:-localhost} 96 | - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} 97 | - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} 98 | - GF_USERS_ALLOW_SIGN_UP=false 99 | - PROMETHEUS_ENDPOINT=http://prometheus:9090 100 | networks: 101 | - private 102 | ports: 103 | - 3000:3000 104 | deploy: 105 | replicas: 1 106 | placement: 107 | constraints: 108 | - node.role==manager 109 | 110 | networks: 111 | private: 112 | name: private 113 | driver: overlay 114 | 115 | volumes: 116 | promconfig: {} 117 | promdata: {} 118 | grafanadata: {} 119 | alertmanager: {} 120 | 121 | configs: 122 | alertmanager_config: 123 | file: ../alertmanager/configs/alertmanager.yml 124 | prometheus_config: 125 | file: ../prometheus/configs/prometheus.yml 126 | prometheus_rules: 127 | file: ../prometheus/rules/alert.rules 128 | -------------------------------------------------------------------------------- /alt_versions/docker-compose_with_dashboards.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | cadvisor: 5 | image: google/cadvisor 6 | volumes: 7 | - /:/rootfs:ro 8 | - /var/run:/var/run:rw 9 | - /sys:/sys:ro 10 | - /var/lib/docker:/var/lib/docker:ro 11 | networks: 12 | - private 13 | - public 14 | deploy: 15 | mode: global 16 | resources: 17 | limits: 18 | memory: 128M 19 | 20 | node-exporter: 21 | image: prom/node-exporter 22 | volumes: 23 | - /proc:/host/proc:ro 24 | - /sys:/host/sys:ro 25 | - /:/rootfs:ro 26 | - /etc/hostname:/etc/host_hostname 27 | ports: 28 | - 9100:9100 29 | environment: 30 | - HOST_HOSTNAME=/etc/host_hostname 31 | command: 32 | - '--path.procfs=/host/proc' 33 | - '--path.sysfs=/host/sys' 34 | - '--collector.filesystem.ignored-mount-points' 35 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 36 | - '--collector.textfile.directory=/etc/node-exporter/' 37 | networks: 38 | - private 39 | deploy: 40 | mode: global 41 | resources: 42 | limits: 43 | memory: 128M 44 | 45 | blackbox-exporter: 46 | image: prom/blackbox-exporter:v0.12.0 47 | networks: 48 | - private 49 | deploy: 50 | mode: replicated 51 | replicas: 1 52 | resources: 53 | limits: 54 | memory: 64M 55 | 56 | alertmanager: 57 | image: prom/alertmanager 58 | configs: 59 | - source: alertmanager_config 60 | target: /etc/alertmanager/config.yml 61 | command: 62 | - '--config.file=/etc/alertmanager/config.yml' 63 | - '--storage.path=/alertmanager' 64 | volumes: 65 | - alertmanager:/alertmanager 66 | networks: 67 | - private 68 | - public 69 | deploy: 70 | replicas: 1 71 | placement: 72 | constraints: 73 | - node.role==manager 74 | resources: 75 | limits: 76 | memory: 100M 77 | labels: 78 | - traefik.frontend.rule=Host:alertmanager.${DOMAIN:-localhost} 79 | - traefik.enable=true 80 | - traefik.port=9093 81 | - traefik.tags=public 82 | - traefik.backend=alertmanager 83 | - traefik.docker.network=public 84 | - traefik.redirectorservice.frontend.entryPoints=http 85 | - traefik.redirectorservice.frontend.redirect.entryPoint=https 86 | - traefik.webservice.frontend.entryPoints=https 87 | 88 | prometheus: 89 | image: prom/prometheus 90 | user: root 91 | volumes: 92 | - promconfig:/etc/prometheus 93 | - promdata:/prometheus 94 | configs: 95 | - source: prometheus_config 96 | target: /etc/prometheus/prometheus.yml 97 | - source: prometheus_rules 98 | target: /etc/prometheus/alert.rules 99 | command: 100 | - '--config.file=/etc/prometheus/prometheus.yml' 101 | - '--storage.tsdb.path=/prometheus' 102 | - '--web.console.libraries=/etc/prometheus/console_libraries' 103 | - '--web.console.templates=/etc/prometheus/consoles' 104 | networks: 105 | - private 106 | - public 107 | deploy: 108 | placement: 109 | constraints: 110 | - node.role==manager 111 | resources: 112 | limits: 113 | memory: 500M 114 | labels: 115 | - traefik.frontend.rule=Host:prometheus.${DOMAIN:-localhost} 116 | - traefik.enable=true 117 | - traefik.port=9090 118 | - traefik.tags=public 119 | - traefik.backend=prometheus 120 | - traefik.docker.network=public 121 | - traefik.redirectorservice.frontend.entryPoints=http 122 | - traefik.redirectorservice.frontend.redirect.entryPoint=https 123 | - traefik.webservice.frontend.entryPoints=https 124 | 125 | grafana: 126 | image: grafana/grafana 127 | volumes: 128 | - grafanadata:/var/lib/grafana 129 | - grafana_provisioning:/etc/grafana/provisioning 130 | configs: 131 | - source: grafana_config_datasource 132 | target: /etc/grafana/provisioning/datasources/prometheus.yml 133 | - source: grafana_dashboard_prometheus 134 | target: /etc/grafana/provisioning/dashboards/ds_prometheus.yml 135 | - source: grafana_dashboard_docker 136 | target: /etc/grafana/provisioning/dashboards/DockerMonitoring.json 137 | - source: grafana_dashboard_dockerprometheus 138 | target: /etc/grafana/provisioning/dashboards/DockerPrometheusMonitoring.json 139 | - source: grafana_dashboard_blackbox 140 | target: /etc/grafana/provisioning/dashboards/BlackboxExporter.json 141 | environment: 142 | - GF_SERVER_ROOT_URL=http://grafana.${DOMAIN:-localhost} 143 | - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} 144 | - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} 145 | - GF_USERS_ALLOW_SIGN_UP=false 146 | - PROMETHEUS_ENDPOINT=http://prometheus:9090 147 | networks: 148 | - private 149 | - public 150 | deploy: 151 | replicas: 1 152 | placement: 153 | constraints: 154 | - node.role==manager 155 | labels: 156 | - traefik.frontend.rule=Host:grafana.${DOMAIN:-localhost} 157 | - traefik.enable=true 158 | - traefik.port=3000 159 | - traefik.tags=public 160 | - traefik.backend=grafana 161 | - traefik.docker.network=public 162 | - traefik.redirectorservice.frontend.entryPoints=http 163 | - traefik.redirectorservice.frontend.redirect.entryPoint=https 164 | - traefik.webservice.frontend.entryPoints=https 165 | 166 | networks: 167 | private: 168 | name: private 169 | driver: overlay 170 | public: 171 | name: public 172 | external: true 173 | 174 | volumes: 175 | promconfig: {} 176 | promdata: {} 177 | grafanadata: {} 178 | alertmanager: {} 179 | grafana_provisioning: {} 180 | 181 | configs: 182 | alertmanager_config: 183 | file: ../alertmanager/configs/alertmanager.yml 184 | prometheus_config: 185 | file: ../prometheus/configs/prometheus.yml 186 | prometheus_rules: 187 | file: ../prometheus/rules/alert.rules 188 | grafana_config_datasource: 189 | file: ../grafana/provisioning/datasources/prometheus.yml 190 | grafana_dashboard_prometheus: 191 | file: ../grafana/provisioning/dashboards/ds_prometheus.yml 192 | grafana_dashboard_docker: 193 | file: ../grafana/provisioning/dashboards/DockerMonitoring.json 194 | grafana_dashboard_dockerprometheus: 195 | file: ../grafana/provisioning/dashboards/DockerPrometheusMonitoring.json 196 | grafana_dashboard_blackbox: 197 | file: ../grafana/provisioning/dashboards/BlackboxExporter.json 198 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | MODE=${BSTACKS_MODE-:https} 2 | FEATURE=${BSTACKS_FEATURE:-default} 3 | 4 | if [ $BSTACKS_MODE == "http" ] && [ $BSTACKS_FEATURE == "dashboards" ] 5 | then 6 | echo "deploying monitoring stack in http mode with dashboards pre-configured" 7 | docker stack deploy -c alt_versions/docker-compose_http_with_dashboards.yml mon 8 | echo "Stack is available at:" 9 | echo "- http:/grafana.${DOMAIN}" 10 | echo "- http:/grafana.${DOMAIN}" 11 | echo "- http:/grafana.${DOMAIN}" 12 | else 13 | echo "deploying monitoring stack in https mode" 14 | docker stack deploy -c docker-compose.yml mon 15 | fi 16 | -------------------------------------------------------------------------------- /docker-compose.localhost.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | cadvisor: 5 | image: google/cadvisor 6 | volumes: 7 | - /:/rootfs:ro 8 | - /var/run:/var/run:rw 9 | - /sys:/sys:ro 10 | - /var/lib/docker:/var/lib/docker:ro 11 | networks: 12 | - private 13 | ports: 14 | - 8080:8080 15 | 16 | node-exporter: 17 | image: prom/node-exporter 18 | volumes: 19 | - /proc:/host/proc:ro 20 | - /sys:/host/sys:ro 21 | - /:/rootfs:ro 22 | ports: 23 | - 9100:9100 24 | command: 25 | - '--path.procfs=/host/proc' 26 | - '--path.sysfs=/host/sys' 27 | - '--collector.filesystem.ignored-mount-points' 28 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 29 | networks: 30 | - private 31 | 32 | blackbox-exporter: 33 | image: prom/blackbox-exporter:v0.12.0 34 | networks: 35 | - private 36 | 37 | alertmanager: 38 | image: prom/alertmanager 39 | command: 40 | - '--config.file=/etc/alertmanager/config.yml' 41 | - '--storage.path=/alertmanager' 42 | volumes: 43 | - ./alertmanager/configs/alertmanager.yml:/etc/alertmanager/config.yml 44 | - alertmanager:/alertmanager 45 | networks: 46 | - private 47 | ports: 48 | - 9093:9093 49 | 50 | prometheus: 51 | image: prom/prometheus 52 | user: root 53 | volumes: 54 | - ./prometheus/configs/prometheus-localhost.yml:/etc/prometheus/prometheus.yml 55 | - ./prometheus/rules/alert.rules:/etc/prometheus/alert.rules 56 | - promdata:/prometheus 57 | command: 58 | - '--config.file=/etc/prometheus/prometheus.yml' 59 | - '--storage.tsdb.path=/prometheus' 60 | - '--web.console.libraries=/etc/prometheus/console_libraries' 61 | - '--web.console.templates=/etc/prometheus/consoles' 62 | networks: 63 | - private 64 | ports: 65 | - 9090:9090 66 | 67 | grafana: 68 | image: grafana/grafana 69 | volumes: 70 | - grafanadata:/var/lib/grafana 71 | environment: 72 | - GF_SERVER_ROOT_URL=http://grafana.${DOMAIN:-localdns.xyz} 73 | - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} 74 | - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} 75 | - GF_USERS_ALLOW_SIGN_UP=false 76 | - PROMETHEUS_ENDPOINT=http://prometheus:9090 77 | networks: 78 | - private 79 | ports: 80 | - 3000:3000 81 | 82 | networks: 83 | private: {} 84 | 85 | volumes: 86 | promdata: {} 87 | grafanadata: {} 88 | alertmanager: {} 89 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | cadvisor: 5 | image: google/cadvisor 6 | volumes: 7 | - /:/rootfs:ro 8 | - /var/run:/var/run:rw 9 | - /sys:/sys:ro 10 | - /var/lib/docker:/var/lib/docker:ro 11 | networks: 12 | - private 13 | - public 14 | deploy: 15 | mode: global 16 | resources: 17 | limits: 18 | memory: 128M 19 | 20 | node-exporter: 21 | image: ruanbekker/node-exporter:v0.18.0 22 | volumes: 23 | - /proc:/host/proc:ro 24 | - /sys:/host/sys:ro 25 | - /:/rootfs:ro 26 | - /etc/hostname:/etc/host_hostname 27 | - /etc/hostname:/etc/nodename 28 | ports: 29 | - 9100:9100 30 | environment: 31 | - HOST_HOSTNAME=/etc/host_hostname 32 | command: 33 | - '--path.procfs=/host/proc' 34 | - '--path.sysfs=/host/sys' 35 | - '--collector.filesystem.ignored-mount-points' 36 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 37 | - '--collector.textfile.directory=/etc/node-exporter/' 38 | networks: 39 | - private 40 | deploy: 41 | mode: global 42 | resources: 43 | limits: 44 | memory: 128M 45 | 46 | blackbox-exporter: 47 | image: prom/blackbox-exporter:v0.12.0 48 | networks: 49 | - private 50 | deploy: 51 | mode: replicated 52 | replicas: 1 53 | resources: 54 | limits: 55 | memory: 64M 56 | 57 | alertmanager: 58 | image: prom/alertmanager:v0.17.0 59 | configs: 60 | - source: alertmanager_config 61 | target: /etc/alertmanager/config.yml 62 | - source: basicauth_htpasswd 63 | target: /etc/htpasswd 64 | command: 65 | - '--config.file=/etc/alertmanager/config.yml' 66 | - '--storage.path=/alertmanager' 67 | volumes: 68 | - alertmanager:/alertmanager 69 | networks: 70 | - private 71 | - public 72 | deploy: 73 | replicas: 1 74 | placement: 75 | constraints: 76 | - node.role==manager 77 | resources: 78 | limits: 79 | memory: 100M 80 | labels: 81 | - traefik.frontend.rule=Host:alertmanager.${DOMAIN:-localhost} 82 | - traefik.frontend.auth.basic.usersFile=/etc/htpasswd 83 | - traefik.enable=true 84 | - traefik.port=9093 85 | - traefik.tags=public 86 | - traefik.backend=alertmanager 87 | - traefik.docker.network=public 88 | - traefik.redirectorservice.frontend.entryPoints=http 89 | - traefik.redirectorservice.frontend.redirect.entryPoint=https 90 | - traefik.webservice.frontend.entryPoints=https 91 | 92 | prometheus: 93 | image: prom/prometheus:v2.9.2 94 | user: root 95 | volumes: 96 | - promconfig:/etc/prometheus 97 | - promdata:/prometheus 98 | configs: 99 | - source: prometheus_config 100 | target: /etc/prometheus/prometheus.yml 101 | - source: prometheus_rules 102 | target: /etc/prometheus/alert.rules 103 | command: 104 | - '--config.file=/etc/prometheus/prometheus.yml' 105 | - '--storage.tsdb.path=/prometheus' 106 | - '--web.console.libraries=/etc/prometheus/console_libraries' 107 | - '--web.console.templates=/etc/prometheus/consoles' 108 | networks: 109 | - private 110 | - public 111 | deploy: 112 | placement: 113 | constraints: 114 | - node.role==manager 115 | resources: 116 | limits: 117 | memory: 500M 118 | labels: 119 | - traefik.frontend.rule=Host:prometheus.${DOMAIN:-localhost} 120 | - traefik.enable=true 121 | - traefik.port=9090 122 | - traefik.tags=public 123 | - traefik.backend=prometheus 124 | - traefik.docker.network=public 125 | - traefik.redirectorservice.frontend.entryPoints=http 126 | - traefik.redirectorservice.frontend.redirect.entryPoint=https 127 | - traefik.webservice.frontend.entryPoints=https 128 | 129 | pushgateway: 130 | image: prom/pushgateway:v0.8.0 131 | networks: 132 | - private 133 | - public 134 | configs: 135 | - source: basicauth_htpasswd 136 | target: /etc/htpasswd 137 | deploy: 138 | replicas: 1 139 | placement: 140 | constraints: 141 | - node.role==manager 142 | resources: 143 | limits: 144 | memory: 64M 145 | labels: 146 | - traefik.frontend.rule=Host:pushgateway.${DOMAIN:-localhost} 147 | - traefik.frontend.auth.basic.usersFile=/etc/htpasswd 148 | - traefik.enable=true 149 | - traefik.port=9091 150 | - traefik.tags=public 151 | - traefik.backend=pushgateway 152 | - traefik.docker.network=public 153 | - traefik.redirectorservice.frontend.entryPoints=http 154 | - traefik.redirectorservice.frontend.redirect.entryPoint=https 155 | - traefik.webservice.frontend.entryPoints=https 156 | 157 | grafana: 158 | image: grafana/grafana:6.2.1 159 | volumes: 160 | - grafanadata:/var/lib/grafana 161 | environment: 162 | - GF_SERVER_ROOT_URL=http://grafana.${DOMAIN:-localhost} 163 | - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} 164 | - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} 165 | - GF_USERS_ALLOW_SIGN_UP=false 166 | - GF_INSTALL_PLUGINS=grafana-piechart-panel 167 | - PROMETHEUS_ENDPOINT=http://prometheus:9090 168 | networks: 169 | - private 170 | - public 171 | deploy: 172 | replicas: 1 173 | placement: 174 | constraints: 175 | - node.role==manager 176 | resources: 177 | limits: 178 | memory: 384M 179 | labels: 180 | - traefik.frontend.rule=Host:grafana.${DOMAIN:-localhost} 181 | - traefik.enable=true 182 | - traefik.port=3000 183 | - traefik.tags=public 184 | - traefik.backend=grafana 185 | - traefik.docker.network=public 186 | - traefik.redirectorservice.frontend.entryPoints=http 187 | - traefik.redirectorservice.frontend.redirect.entryPoint=https 188 | - traefik.webservice.frontend.entryPoints=https 189 | 190 | networks: 191 | private: 192 | name: private 193 | driver: overlay 194 | public: 195 | name: public 196 | external: true 197 | 198 | volumes: 199 | promconfig: {} 200 | promdata: {} 201 | grafanadata: {} 202 | alertmanager: {} 203 | 204 | configs: 205 | alertmanager_config: 206 | file: ./alertmanager/configs/alertmanager.yml 207 | prometheus_config: 208 | file: ./prometheus/configs/prometheus.yml 209 | prometheus_rules: 210 | file: ./prometheus/rules/alert.rules 211 | basicauth_htpasswd: 212 | file: ./htpasswd 213 | -------------------------------------------------------------------------------- /grafana/.placeholder: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/BlackboxExporter.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "Prometheus", 5 | "label": "signcl-prometheus", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "5.2.2" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "5.0.0" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "prometheus", 28 | "name": "Prometheus", 29 | "version": "5.0.0" 30 | }, 31 | { 32 | "type": "panel", 33 | "id": "singlestat", 34 | "name": "Singlestat", 35 | "version": "5.0.0" 36 | } 37 | ], 38 | "annotations": { 39 | "list": [ 40 | { 41 | "builtIn": 1, 42 | "datasource": "-- Grafana --", 43 | "enable": true, 44 | "hide": true, 45 | "iconColor": "rgba(0, 211, 255, 1)", 46 | "name": "Annotations & Alerts", 47 | "type": "dashboard" 48 | } 49 | ] 50 | }, 51 | "description": "Prometheus Blackbox Exporter Overview", 52 | "editable": true, 53 | "gnetId": 7587, 54 | "graphTooltip": 0, 55 | "id": null, 56 | "iteration": 1534695504413, 57 | "links": [], 58 | "panels": [ 59 | { 60 | "aliasColors": {}, 61 | "bars": false, 62 | "dashLength": 10, 63 | "dashes": false, 64 | "datasource": "Prometheus", 65 | "fill": 1, 66 | "gridPos": { 67 | "h": 8, 68 | "w": 24, 69 | "x": 0, 70 | "y": 0 71 | }, 72 | "id": 138, 73 | "legend": { 74 | "avg": false, 75 | "current": false, 76 | "max": false, 77 | "min": false, 78 | "show": true, 79 | "total": false, 80 | "values": false 81 | }, 82 | "lines": true, 83 | "linewidth": 1, 84 | "links": [], 85 | "nullPointMode": "null", 86 | "percentage": false, 87 | "pointradius": 5, 88 | "points": false, 89 | "renderer": "flot", 90 | "seriesOverrides": [], 91 | "spaceLength": 10, 92 | "stack": false, 93 | "steppedLine": false, 94 | "targets": [ 95 | { 96 | "expr": "probe_duration_seconds{instance=~\"$target\"}", 97 | "format": "time_series", 98 | "interval": "$interval", 99 | "intervalFactor": 1, 100 | "legendFormat": "{{ instance }}", 101 | "refId": "A" 102 | } 103 | ], 104 | "thresholds": [], 105 | "timeFrom": null, 106 | "timeShift": null, 107 | "title": "Global Probe Duration", 108 | "tooltip": { 109 | "shared": true, 110 | "sort": 1, 111 | "value_type": "individual" 112 | }, 113 | "type": "graph", 114 | "xaxis": { 115 | "buckets": null, 116 | "mode": "time", 117 | "name": null, 118 | "show": true, 119 | "values": [] 120 | }, 121 | "yaxes": [ 122 | { 123 | "format": "s", 124 | "label": null, 125 | "logBase": 1, 126 | "max": null, 127 | "min": null, 128 | "show": true 129 | }, 130 | { 131 | "format": "short", 132 | "label": null, 133 | "logBase": 1, 134 | "max": null, 135 | "min": null, 136 | "show": true 137 | } 138 | ], 139 | "yaxis": { 140 | "align": false, 141 | "alignLevel": null 142 | } 143 | }, 144 | { 145 | "collapsed": false, 146 | "gridPos": { 147 | "h": 1, 148 | "w": 24, 149 | "x": 0, 150 | "y": 8 151 | }, 152 | "id": 15, 153 | "panels": [], 154 | "repeat": "target", 155 | "title": "$target status", 156 | "type": "row" 157 | }, 158 | { 159 | "cacheTimeout": null, 160 | "colorBackground": true, 161 | "colorValue": false, 162 | "colors": [ 163 | "#d44a3a", 164 | "rgba(237, 129, 40, 0.89)", 165 | "#299c46" 166 | ], 167 | "datasource": "Prometheus", 168 | "format": "none", 169 | "gauge": { 170 | "maxValue": 100, 171 | "minValue": 0, 172 | "show": false, 173 | "thresholdLabels": false, 174 | "thresholdMarkers": true 175 | }, 176 | "gridPos": { 177 | "h": 2, 178 | "w": 4, 179 | "x": 0, 180 | "y": 9 181 | }, 182 | "id": 2, 183 | "interval": null, 184 | "links": [], 185 | "mappingType": 1, 186 | "mappingTypes": [ 187 | { 188 | "name": "value to text", 189 | "value": 1 190 | }, 191 | { 192 | "name": "range to text", 193 | "value": 2 194 | } 195 | ], 196 | "maxDataPoints": 100, 197 | "minSpan": 3, 198 | "nullPointMode": "connected", 199 | "nullText": null, 200 | "postfix": "", 201 | "postfixFontSize": "50%", 202 | "prefix": "", 203 | "prefixFontSize": "50%", 204 | "rangeMaps": [ 205 | { 206 | "from": "null", 207 | "text": "N/A", 208 | "to": "null" 209 | } 210 | ], 211 | "repeat": null, 212 | "repeatDirection": "v", 213 | "sparkline": { 214 | "fillColor": "rgba(31, 118, 189, 0.18)", 215 | "full": false, 216 | "lineColor": "rgb(31, 120, 193)", 217 | "show": false 218 | }, 219 | "tableColumn": "", 220 | "targets": [ 221 | { 222 | "expr": "probe_success{instance=~\"$target\"}", 223 | "format": "time_series", 224 | "interval": "$interval", 225 | "intervalFactor": 1, 226 | "refId": "A" 227 | } 228 | ], 229 | "thresholds": "1,1", 230 | "title": "Status", 231 | "type": "singlestat", 232 | "valueFontSize": "80%", 233 | "valueMaps": [ 234 | { 235 | "op": "=", 236 | "text": "N/A", 237 | "value": "null" 238 | }, 239 | { 240 | "op": "=", 241 | "text": "UP", 242 | "value": "1" 243 | }, 244 | { 245 | "op": "=", 246 | "text": "DOWN", 247 | "value": "0" 248 | } 249 | ], 250 | "valueName": "current" 251 | }, 252 | { 253 | "aliasColors": {}, 254 | "bars": false, 255 | "dashLength": 10, 256 | "dashes": false, 257 | "datasource": "Prometheus", 258 | "fill": 1, 259 | "gridPos": { 260 | "h": 6, 261 | "w": 10, 262 | "x": 4, 263 | "y": 9 264 | }, 265 | "id": 25, 266 | "legend": { 267 | "avg": false, 268 | "current": false, 269 | "max": false, 270 | "min": false, 271 | "show": true, 272 | "total": false, 273 | "values": false 274 | }, 275 | "lines": true, 276 | "linewidth": 1, 277 | "links": [], 278 | "nullPointMode": "null", 279 | "percentage": false, 280 | "pointradius": 5, 281 | "points": false, 282 | "renderer": "flot", 283 | "seriesOverrides": [], 284 | "spaceLength": 10, 285 | "stack": false, 286 | "steppedLine": false, 287 | "targets": [ 288 | { 289 | "expr": "probe_http_duration_seconds{instance=~\"$target\"}", 290 | "format": "time_series", 291 | "interval": "$interval", 292 | "intervalFactor": 1, 293 | "legendFormat": "{{ phase }}", 294 | "refId": "B" 295 | } 296 | ], 297 | "thresholds": [], 298 | "timeFrom": null, 299 | "timeShift": null, 300 | "title": "HTTP Duration", 301 | "tooltip": { 302 | "shared": true, 303 | "sort": 0, 304 | "value_type": "individual" 305 | }, 306 | "type": "graph", 307 | "xaxis": { 308 | "buckets": null, 309 | "mode": "time", 310 | "name": null, 311 | "show": true, 312 | "values": [] 313 | }, 314 | "yaxes": [ 315 | { 316 | "format": "s", 317 | "label": null, 318 | "logBase": 1, 319 | "max": null, 320 | "min": null, 321 | "show": true 322 | }, 323 | { 324 | "format": "short", 325 | "label": null, 326 | "logBase": 1, 327 | "max": null, 328 | "min": null, 329 | "show": true 330 | } 331 | ], 332 | "yaxis": { 333 | "align": false, 334 | "alignLevel": null 335 | } 336 | }, 337 | { 338 | "aliasColors": {}, 339 | "bars": false, 340 | "dashLength": 10, 341 | "dashes": false, 342 | "datasource": "Prometheus", 343 | "fill": 1, 344 | "gridPos": { 345 | "h": 6, 346 | "w": 10, 347 | "x": 14, 348 | "y": 9 349 | }, 350 | "id": 17, 351 | "legend": { 352 | "avg": false, 353 | "current": false, 354 | "max": false, 355 | "min": false, 356 | "show": true, 357 | "total": false, 358 | "values": false 359 | }, 360 | "lines": true, 361 | "linewidth": 1, 362 | "links": [], 363 | "nullPointMode": "null", 364 | "percentage": false, 365 | "pointradius": 5, 366 | "points": false, 367 | "renderer": "flot", 368 | "repeat": null, 369 | "seriesOverrides": [], 370 | "spaceLength": 10, 371 | "stack": false, 372 | "steppedLine": false, 373 | "targets": [ 374 | { 375 | "expr": "probe_duration_seconds{instance=~\"$target\"}", 376 | "format": "time_series", 377 | "interval": "$interval", 378 | "intervalFactor": 1, 379 | "legendFormat": "seconds", 380 | "refId": "A" 381 | } 382 | ], 383 | "thresholds": [], 384 | "timeFrom": null, 385 | "timeShift": null, 386 | "title": "Probe Duration", 387 | "tooltip": { 388 | "shared": true, 389 | "sort": 0, 390 | "value_type": "individual" 391 | }, 392 | "type": "graph", 393 | "xaxis": { 394 | "buckets": null, 395 | "mode": "time", 396 | "name": null, 397 | "show": true, 398 | "values": [] 399 | }, 400 | "yaxes": [ 401 | { 402 | "format": "s", 403 | "label": null, 404 | "logBase": 1, 405 | "max": null, 406 | "min": null, 407 | "show": true 408 | }, 409 | { 410 | "format": "short", 411 | "label": null, 412 | "logBase": 1, 413 | "max": null, 414 | "min": null, 415 | "show": true 416 | } 417 | ], 418 | "yaxis": { 419 | "align": false, 420 | "alignLevel": null 421 | } 422 | }, 423 | { 424 | "cacheTimeout": null, 425 | "colorBackground": false, 426 | "colorValue": false, 427 | "colors": [ 428 | "#299c46", 429 | "rgba(237, 129, 40, 0.89)", 430 | "#d44a3a" 431 | ], 432 | "datasource": "Prometheus", 433 | "decimals": 0, 434 | "format": "none", 435 | "gauge": { 436 | "maxValue": 100, 437 | "minValue": 0, 438 | "show": false, 439 | "thresholdLabels": false, 440 | "thresholdMarkers": true 441 | }, 442 | "gridPos": { 443 | "h": 2, 444 | "w": 4, 445 | "x": 0, 446 | "y": 11 447 | }, 448 | "id": 20, 449 | "interval": null, 450 | "links": [], 451 | "mappingType": 1, 452 | "mappingTypes": [ 453 | { 454 | "name": "value to text", 455 | "value": 1 456 | }, 457 | { 458 | "name": "range to text", 459 | "value": 2 460 | } 461 | ], 462 | "maxDataPoints": 100, 463 | "minSpan": 3, 464 | "nullPointMode": "connected", 465 | "nullText": null, 466 | "postfix": "", 467 | "postfixFontSize": "50%", 468 | "prefix": "", 469 | "prefixFontSize": "50%", 470 | "rangeMaps": [ 471 | { 472 | "from": "null", 473 | "text": "N/A", 474 | "to": "null" 475 | } 476 | ], 477 | "repeat": null, 478 | "repeatDirection": "h", 479 | "sparkline": { 480 | "fillColor": "rgba(31, 118, 189, 0.18)", 481 | "full": false, 482 | "lineColor": "rgb(31, 120, 193)", 483 | "show": false 484 | }, 485 | "tableColumn": "", 486 | "targets": [ 487 | { 488 | "expr": "probe_http_status_code{instance=~\"$target\"}", 489 | "format": "time_series", 490 | "interval": "$interval", 491 | "intervalFactor": 1, 492 | "refId": "A" 493 | } 494 | ], 495 | "thresholds": "201, 399", 496 | "title": "HTTP Status Code", 497 | "transparent": false, 498 | "type": "singlestat", 499 | "valueFontSize": "80%", 500 | "valueMaps": [ 501 | { 502 | "op": "=", 503 | "text": "N/A", 504 | "value": "null" 505 | }, 506 | { 507 | "op": "=", 508 | "text": "YES", 509 | "value": "1" 510 | }, 511 | { 512 | "op": "=", 513 | "text": "N/A", 514 | "value": "0" 515 | } 516 | ], 517 | "valueName": "current" 518 | }, 519 | { 520 | "cacheTimeout": null, 521 | "colorBackground": false, 522 | "colorValue": false, 523 | "colors": [ 524 | "#299c46", 525 | "rgba(237, 129, 40, 0.89)", 526 | "#d44a3a" 527 | ], 528 | "datasource": "Prometheus", 529 | "format": "none", 530 | "gauge": { 531 | "maxValue": 100, 532 | "minValue": 0, 533 | "show": false, 534 | "thresholdLabels": false, 535 | "thresholdMarkers": true 536 | }, 537 | "gridPos": { 538 | "h": 2, 539 | "w": 4, 540 | "x": 0, 541 | "y": 13 542 | }, 543 | "id": 27, 544 | "interval": null, 545 | "links": [], 546 | "mappingType": 1, 547 | "mappingTypes": [ 548 | { 549 | "name": "value to text", 550 | "value": 1 551 | }, 552 | { 553 | "name": "range to text", 554 | "value": 2 555 | } 556 | ], 557 | "maxDataPoints": 100, 558 | "nullPointMode": "connected", 559 | "nullText": null, 560 | "postfix": "", 561 | "postfixFontSize": "50%", 562 | "prefix": "", 563 | "prefixFontSize": "50%", 564 | "rangeMaps": [ 565 | { 566 | "from": "null", 567 | "text": "N/A", 568 | "to": "null" 569 | } 570 | ], 571 | "sparkline": { 572 | "fillColor": "rgba(31, 118, 189, 0.18)", 573 | "full": false, 574 | "lineColor": "rgb(31, 120, 193)", 575 | "show": false 576 | }, 577 | "tableColumn": "", 578 | "targets": [ 579 | { 580 | "expr": "probe_http_version{instance=~\"$target\"}", 581 | "format": "time_series", 582 | "intervalFactor": 1, 583 | "refId": "A" 584 | } 585 | ], 586 | "thresholds": "", 587 | "title": "HTTP Version", 588 | "type": "singlestat", 589 | "valueFontSize": "80%", 590 | "valueMaps": [ 591 | { 592 | "op": "=", 593 | "text": "N/A", 594 | "value": "null" 595 | } 596 | ], 597 | "valueName": "current" 598 | }, 599 | { 600 | "cacheTimeout": null, 601 | "colorBackground": false, 602 | "colorValue": true, 603 | "colors": [ 604 | "#d44a3a", 605 | "rgba(237, 129, 40, 0.89)", 606 | "#299c46" 607 | ], 608 | "datasource": "Prometheus", 609 | "format": "none", 610 | "gauge": { 611 | "maxValue": 100, 612 | "minValue": 0, 613 | "show": false, 614 | "thresholdLabels": false, 615 | "thresholdMarkers": true 616 | }, 617 | "gridPos": { 618 | "h": 2, 619 | "w": 4, 620 | "x": 0, 621 | "y": 15 622 | }, 623 | "id": 18, 624 | "interval": null, 625 | "links": [], 626 | "mappingType": 1, 627 | "mappingTypes": [ 628 | { 629 | "name": "value to text", 630 | "value": 1 631 | }, 632 | { 633 | "name": "range to text", 634 | "value": 2 635 | } 636 | ], 637 | "maxDataPoints": 100, 638 | "minSpan": 3, 639 | "nullPointMode": "connected", 640 | "nullText": null, 641 | "postfix": "", 642 | "postfixFontSize": "50%", 643 | "prefix": "", 644 | "prefixFontSize": "50%", 645 | "rangeMaps": [ 646 | { 647 | "from": "null", 648 | "text": "N/A", 649 | "to": "null" 650 | } 651 | ], 652 | "repeat": null, 653 | "repeatDirection": "v", 654 | "sparkline": { 655 | "fillColor": "rgba(31, 118, 189, 0.18)", 656 | "full": false, 657 | "lineColor": "rgb(31, 120, 193)", 658 | "show": false 659 | }, 660 | "tableColumn": "", 661 | "targets": [ 662 | { 663 | "expr": "probe_http_ssl{instance=~\"$target\"}", 664 | "format": "time_series", 665 | "interval": "$interval", 666 | "intervalFactor": 1, 667 | "refId": "A" 668 | } 669 | ], 670 | "thresholds": "0, 1", 671 | "title": "SSL", 672 | "type": "singlestat", 673 | "valueFontSize": "80%", 674 | "valueMaps": [ 675 | { 676 | "op": "=", 677 | "text": "N/A", 678 | "value": "null" 679 | }, 680 | { 681 | "op": "=", 682 | "text": "YES", 683 | "value": "1" 684 | }, 685 | { 686 | "op": "=", 687 | "text": "NO", 688 | "value": "0" 689 | } 690 | ], 691 | "valueName": "current" 692 | }, 693 | { 694 | "cacheTimeout": null, 695 | "colorBackground": false, 696 | "colorValue": true, 697 | "colors": [ 698 | "#d44a3a", 699 | "rgba(237, 129, 40, 0.89)", 700 | "#299c46" 701 | ], 702 | "datasource": "Prometheus", 703 | "decimals": 2, 704 | "format": "dtdurations", 705 | "gauge": { 706 | "maxValue": 100, 707 | "minValue": 0, 708 | "show": false, 709 | "thresholdLabels": false, 710 | "thresholdMarkers": true 711 | }, 712 | "gridPos": { 713 | "h": 2, 714 | "w": 10, 715 | "x": 4, 716 | "y": 15 717 | }, 718 | "id": 19, 719 | "interval": null, 720 | "links": [], 721 | "mappingType": 1, 722 | "mappingTypes": [ 723 | { 724 | "name": "value to text", 725 | "value": 1 726 | }, 727 | { 728 | "name": "range to text", 729 | "value": 2 730 | } 731 | ], 732 | "maxDataPoints": 100, 733 | "minSpan": 3, 734 | "nullPointMode": "connected", 735 | "nullText": null, 736 | "postfix": "", 737 | "postfixFontSize": "50%", 738 | "prefix": "", 739 | "prefixFontSize": "50%", 740 | "rangeMaps": [ 741 | { 742 | "from": "null", 743 | "text": "N/A", 744 | "to": "null" 745 | } 746 | ], 747 | "repeat": null, 748 | "repeatDirection": "h", 749 | "sparkline": { 750 | "fillColor": "rgba(31, 118, 189, 0.18)", 751 | "full": false, 752 | "lineColor": "rgb(31, 120, 193)", 753 | "show": false 754 | }, 755 | "tableColumn": "", 756 | "targets": [ 757 | { 758 | "expr": "probe_ssl_earliest_cert_expiry{instance=~\"$target\"} - time()", 759 | "format": "time_series", 760 | "interval": "$interval", 761 | "intervalFactor": 1, 762 | "refId": "A" 763 | } 764 | ], 765 | "thresholds": "0,1209600", 766 | "timeFrom": null, 767 | "title": "SSL Expiry", 768 | "transparent": false, 769 | "type": "singlestat", 770 | "valueFontSize": "80%", 771 | "valueMaps": [ 772 | { 773 | "op": "=", 774 | "text": "N/A", 775 | "value": "null" 776 | }, 777 | { 778 | "op": "=", 779 | "text": "YES", 780 | "value": "1" 781 | }, 782 | { 783 | "op": "=", 784 | "text": "NO", 785 | "value": "0" 786 | } 787 | ], 788 | "valueName": "current" 789 | }, 790 | { 791 | "cacheTimeout": null, 792 | "colorBackground": false, 793 | "colorValue": false, 794 | "colors": [ 795 | "#299c46", 796 | "rgba(237, 129, 40, 0.89)", 797 | "#d44a3a" 798 | ], 799 | "datasource": "Prometheus", 800 | "format": "s", 801 | "gauge": { 802 | "maxValue": 100, 803 | "minValue": 0, 804 | "show": false, 805 | "thresholdLabels": false, 806 | "thresholdMarkers": true 807 | }, 808 | "gridPos": { 809 | "h": 2, 810 | "w": 5, 811 | "x": 14, 812 | "y": 15 813 | }, 814 | "id": 23, 815 | "interval": null, 816 | "links": [], 817 | "mappingType": 1, 818 | "mappingTypes": [ 819 | { 820 | "name": "value to text", 821 | "value": 1 822 | }, 823 | { 824 | "name": "range to text", 825 | "value": 2 826 | } 827 | ], 828 | "maxDataPoints": 100, 829 | "nullPointMode": "connected", 830 | "nullText": null, 831 | "postfix": "", 832 | "postfixFontSize": "50%", 833 | "prefix": "", 834 | "prefixFontSize": "50%", 835 | "rangeMaps": [ 836 | { 837 | "from": "null", 838 | "text": "N/A", 839 | "to": "null" 840 | } 841 | ], 842 | "repeat": null, 843 | "sparkline": { 844 | "fillColor": "rgba(31, 118, 189, 0.18)", 845 | "full": false, 846 | "lineColor": "rgb(31, 120, 193)", 847 | "show": false 848 | }, 849 | "tableColumn": "", 850 | "targets": [ 851 | { 852 | "expr": "avg(probe_duration_seconds{instance=~\"$target\"})", 853 | "format": "time_series", 854 | "interval": "$interval", 855 | "intervalFactor": 1, 856 | "refId": "A" 857 | } 858 | ], 859 | "thresholds": "", 860 | "title": "Average Probe Duration", 861 | "type": "singlestat", 862 | "valueFontSize": "80%", 863 | "valueMaps": [ 864 | { 865 | "op": "=", 866 | "text": "N/A", 867 | "value": "null" 868 | } 869 | ], 870 | "valueName": "current" 871 | }, 872 | { 873 | "cacheTimeout": null, 874 | "colorBackground": false, 875 | "colorValue": false, 876 | "colors": [ 877 | "#299c46", 878 | "rgba(237, 129, 40, 0.89)", 879 | "#d44a3a" 880 | ], 881 | "datasource": "Prometheus", 882 | "format": "s", 883 | "gauge": { 884 | "maxValue": 100, 885 | "minValue": 0, 886 | "show": false, 887 | "thresholdLabels": false, 888 | "thresholdMarkers": true 889 | }, 890 | "gridPos": { 891 | "h": 2, 892 | "w": 5, 893 | "x": 19, 894 | "y": 15 895 | }, 896 | "id": 24, 897 | "interval": null, 898 | "links": [], 899 | "mappingType": 1, 900 | "mappingTypes": [ 901 | { 902 | "name": "value to text", 903 | "value": 1 904 | }, 905 | { 906 | "name": "range to text", 907 | "value": 2 908 | } 909 | ], 910 | "maxDataPoints": 100, 911 | "nullPointMode": "connected", 912 | "nullText": null, 913 | "postfix": "", 914 | "postfixFontSize": "50%", 915 | "prefix": "", 916 | "prefixFontSize": "50%", 917 | "rangeMaps": [ 918 | { 919 | "from": "null", 920 | "text": "N/A", 921 | "to": "null" 922 | } 923 | ], 924 | "repeat": null, 925 | "repeatDirection": "h", 926 | "sparkline": { 927 | "fillColor": "rgba(31, 118, 189, 0.18)", 928 | "full": false, 929 | "lineColor": "rgb(31, 120, 193)", 930 | "show": false 931 | }, 932 | "tableColumn": "", 933 | "targets": [ 934 | { 935 | "expr": "avg(probe_dns_lookup_time_seconds{instance=~\"$target\"})", 936 | "format": "time_series", 937 | "interval": "$interval", 938 | "intervalFactor": 1, 939 | "refId": "A" 940 | } 941 | ], 942 | "thresholds": "", 943 | "title": "Average DNS Lookup", 944 | "type": "singlestat", 945 | "valueFontSize": "80%", 946 | "valueMaps": [ 947 | { 948 | "op": "=", 949 | "text": "N/A", 950 | "value": "null" 951 | } 952 | ], 953 | "valueName": "current" 954 | } 955 | ], 956 | "refresh": "10s", 957 | "schemaVersion": 16, 958 | "style": "dark", 959 | "tags": [ 960 | "blackbox", 961 | "prometheus" 962 | ], 963 | "templating": { 964 | "list": [ 965 | { 966 | "auto": true, 967 | "auto_count": 10, 968 | "auto_min": "10s", 969 | "current": { 970 | "text": "10s", 971 | "value": "10s" 972 | }, 973 | "hide": 0, 974 | "label": "Interval", 975 | "name": "interval", 976 | "options": [ 977 | { 978 | "selected": false, 979 | "text": "auto", 980 | "value": "$__auto_interval_interval" 981 | }, 982 | { 983 | "selected": false, 984 | "text": "5s", 985 | "value": "5s" 986 | }, 987 | { 988 | "selected": true, 989 | "text": "10s", 990 | "value": "10s" 991 | }, 992 | { 993 | "selected": false, 994 | "text": "30s", 995 | "value": "30s" 996 | }, 997 | { 998 | "selected": false, 999 | "text": "1m", 1000 | "value": "1m" 1001 | }, 1002 | { 1003 | "selected": false, 1004 | "text": "10m", 1005 | "value": "10m" 1006 | }, 1007 | { 1008 | "selected": false, 1009 | "text": "30m", 1010 | "value": "30m" 1011 | }, 1012 | { 1013 | "selected": false, 1014 | "text": "1h", 1015 | "value": "1h" 1016 | }, 1017 | { 1018 | "selected": false, 1019 | "text": "6h", 1020 | "value": "6h" 1021 | }, 1022 | { 1023 | "selected": false, 1024 | "text": "12h", 1025 | "value": "12h" 1026 | }, 1027 | { 1028 | "selected": false, 1029 | "text": "1d", 1030 | "value": "1d" 1031 | }, 1032 | { 1033 | "selected": false, 1034 | "text": "7d", 1035 | "value": "7d" 1036 | }, 1037 | { 1038 | "selected": false, 1039 | "text": "14d", 1040 | "value": "14d" 1041 | }, 1042 | { 1043 | "selected": false, 1044 | "text": "30d", 1045 | "value": "30d" 1046 | } 1047 | ], 1048 | "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 1049 | "refresh": 2, 1050 | "type": "interval" 1051 | }, 1052 | { 1053 | "allValue": null, 1054 | "current": {}, 1055 | "datasource": "Prometheus", 1056 | "hide": 0, 1057 | "includeAll": true, 1058 | "label": null, 1059 | "multi": true, 1060 | "name": "target", 1061 | "options": [], 1062 | "query": "label_values(probe_success, instance)", 1063 | "refresh": 1, 1064 | "regex": "", 1065 | "sort": 0, 1066 | "tagValuesQuery": "", 1067 | "tags": [], 1068 | "tagsQuery": "", 1069 | "type": "query", 1070 | "useTags": false 1071 | } 1072 | ] 1073 | }, 1074 | "time": { 1075 | "from": "now-1h", 1076 | "to": "now" 1077 | }, 1078 | "timepicker": { 1079 | "refresh_intervals": [ 1080 | "5s", 1081 | "10s", 1082 | "30s", 1083 | "1m", 1084 | "5m", 1085 | "15m", 1086 | "30m", 1087 | "1h", 1088 | "2h", 1089 | "1d" 1090 | ], 1091 | "time_options": [ 1092 | "5m", 1093 | "15m", 1094 | "1h", 1095 | "6h", 1096 | "12h", 1097 | "24h", 1098 | "2d", 1099 | "7d", 1100 | "30d" 1101 | ] 1102 | }, 1103 | "timezone": "", 1104 | "title": "Prometheus Blackbox Exporter", 1105 | "uid": "xtkCtBkiz", 1106 | "version": 2 1107 | } 1108 | -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/DockerPrometheusMonitoring.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "description": "Docker Monitoring Template", 16 | "editable": true, 17 | "gnetId": 179, 18 | "graphTooltip": 1, 19 | "iteration": 1556748725233, 20 | "links": [], 21 | "panels": [ 22 | { 23 | "collapsed": false, 24 | "gridPos": { 25 | "h": 1, 26 | "w": 24, 27 | "x": 0, 28 | "y": 0 29 | }, 30 | "id": 17, 31 | "panels": [], 32 | "title": "Host Info", 33 | "type": "row" 34 | }, 35 | { 36 | "cacheTimeout": null, 37 | "colorBackground": false, 38 | "colorValue": false, 39 | "colors": [ 40 | "#299c46", 41 | "rgba(237, 129, 40, 0.89)", 42 | "#d44a3a" 43 | ], 44 | "datasource": "Prometheus", 45 | "format": "s", 46 | "gauge": { 47 | "maxValue": 100, 48 | "minValue": 0, 49 | "show": false, 50 | "thresholdLabels": false, 51 | "thresholdMarkers": true 52 | }, 53 | "gridPos": { 54 | "h": 7, 55 | "w": 6, 56 | "x": 0, 57 | "y": 1 58 | }, 59 | "id": 15, 60 | "interval": null, 61 | "links": [], 62 | "mappingType": 1, 63 | "mappingTypes": [ 64 | { 65 | "name": "value to text", 66 | "value": 1 67 | }, 68 | { 69 | "name": "range to text", 70 | "value": 2 71 | } 72 | ], 73 | "maxDataPoints": 100, 74 | "nullPointMode": "connected", 75 | "nullText": null, 76 | "postfix": "", 77 | "postfixFontSize": "50%", 78 | "prefix": "", 79 | "prefixFontSize": "50%", 80 | "rangeMaps": [ 81 | { 82 | "from": "null", 83 | "text": "N/A", 84 | "to": "null" 85 | } 86 | ], 87 | "sparkline": { 88 | "fillColor": "rgba(31, 118, 189, 0.18)", 89 | "full": false, 90 | "lineColor": "rgb(31, 120, 193)", 91 | "show": false 92 | }, 93 | "tableColumn": "", 94 | "targets": [ 95 | { 96 | "expr": "time() - process_start_time_seconds{job=\"prometheus\"}", 97 | "format": "time_series", 98 | "intervalFactor": 1, 99 | "refId": "A" 100 | } 101 | ], 102 | "thresholds": "", 103 | "title": "Uptime", 104 | "type": "singlestat", 105 | "valueFontSize": "80%", 106 | "valueMaps": [ 107 | { 108 | "op": "=", 109 | "text": "N/A", 110 | "value": "null" 111 | } 112 | ], 113 | "valueName": "avg" 114 | }, 115 | { 116 | "cacheTimeout": null, 117 | "colorBackground": true, 118 | "colorValue": false, 119 | "colors": [ 120 | "#299c46", 121 | "rgba(237, 129, 40, 0.89)", 122 | "#d44a3a" 123 | ], 124 | "datasource": "Prometheus", 125 | "format": "none", 126 | "gauge": { 127 | "maxValue": 100, 128 | "minValue": 0, 129 | "show": false, 130 | "thresholdLabels": false, 131 | "thresholdMarkers": true 132 | }, 133 | "gridPos": { 134 | "h": 7, 135 | "w": 5, 136 | "x": 6, 137 | "y": 1 138 | }, 139 | "id": 13, 140 | "interval": null, 141 | "links": [], 142 | "mappingType": 1, 143 | "mappingTypes": [ 144 | { 145 | "name": "value to text", 146 | "value": 1 147 | }, 148 | { 149 | "name": "range to text", 150 | "value": 2 151 | } 152 | ], 153 | "maxDataPoints": 100, 154 | "nullPointMode": "connected", 155 | "nullText": null, 156 | "postfix": "", 157 | "postfixFontSize": "50%", 158 | "prefix": "", 159 | "prefixFontSize": "50%", 160 | "rangeMaps": [ 161 | { 162 | "from": "null", 163 | "text": "N/A", 164 | "to": "null" 165 | } 166 | ], 167 | "sparkline": { 168 | "fillColor": "rgba(31, 118, 189, 0.18)", 169 | "full": false, 170 | "lineColor": "rgb(31, 120, 193)", 171 | "show": false 172 | }, 173 | "tableColumn": "", 174 | "targets": [ 175 | { 176 | "expr": "sum(ALERTS)", 177 | "format": "time_series", 178 | "intervalFactor": 1, 179 | "refId": "A" 180 | } 181 | ], 182 | "thresholds": "0,1", 183 | "title": "Alerts", 184 | "type": "singlestat", 185 | "valueFontSize": "80%", 186 | "valueMaps": [ 187 | { 188 | "op": "=", 189 | "text": "N/A", 190 | "value": "0" 191 | } 192 | ], 193 | "valueName": "avg" 194 | }, 195 | { 196 | "cacheTimeout": null, 197 | "colorBackground": true, 198 | "colorValue": false, 199 | "colors": [ 200 | "#d44a3a", 201 | "rgba(237, 129, 40, 0.89)", 202 | "#299c46" 203 | ], 204 | "datasource": "Prometheus", 205 | "format": "none", 206 | "gauge": { 207 | "maxValue": 100, 208 | "minValue": 0, 209 | "show": false, 210 | "thresholdLabels": false, 211 | "thresholdMarkers": true 212 | }, 213 | "gridPos": { 214 | "h": 7, 215 | "w": 4, 216 | "x": 11, 217 | "y": 1 218 | }, 219 | "id": 11, 220 | "interval": null, 221 | "links": [], 222 | "mappingType": 1, 223 | "mappingTypes": [ 224 | { 225 | "name": "value to text", 226 | "value": 1 227 | }, 228 | { 229 | "name": "range to text", 230 | "value": 2 231 | } 232 | ], 233 | "maxDataPoints": 100, 234 | "nullPointMode": "connected", 235 | "nullText": null, 236 | "postfix": "", 237 | "postfixFontSize": "50%", 238 | "prefix": "", 239 | "prefixFontSize": "50%", 240 | "rangeMaps": [ 241 | { 242 | "from": "null", 243 | "text": "N/A", 244 | "to": "null" 245 | } 246 | ], 247 | "sparkline": { 248 | "fillColor": "rgba(31, 118, 189, 0.18)", 249 | "full": false, 250 | "lineColor": "rgb(31, 120, 193)", 251 | "show": false 252 | }, 253 | "tableColumn": "", 254 | "targets": [ 255 | { 256 | "expr": "sum(up)", 257 | "format": "time_series", 258 | "intervalFactor": 1, 259 | "refId": "A" 260 | } 261 | ], 262 | "thresholds": "0,1", 263 | "title": "Targets Online", 264 | "type": "singlestat", 265 | "valueFontSize": "80%", 266 | "valueMaps": [ 267 | { 268 | "op": "=", 269 | "text": "N/A", 270 | "value": "null" 271 | } 272 | ], 273 | "valueName": "avg" 274 | }, 275 | { 276 | "cacheTimeout": null, 277 | "colorBackground": false, 278 | "colorValue": false, 279 | "colors": [ 280 | "#d44a3a", 281 | "rgba(237, 129, 40, 0.89)", 282 | "#299c46" 283 | ], 284 | "datasource": "Prometheus", 285 | "format": "none", 286 | "gauge": { 287 | "maxValue": 100, 288 | "minValue": 0, 289 | "show": false, 290 | "thresholdLabels": false, 291 | "thresholdMarkers": true 292 | }, 293 | "gridPos": { 294 | "h": 7, 295 | "w": 4, 296 | "x": 15, 297 | "y": 1 298 | }, 299 | "id": 31, 300 | "interval": null, 301 | "links": [], 302 | "mappingType": 1, 303 | "mappingTypes": [ 304 | { 305 | "name": "value to text", 306 | "value": 1 307 | }, 308 | { 309 | "name": "range to text", 310 | "value": 2 311 | } 312 | ], 313 | "maxDataPoints": 100, 314 | "nullPointMode": "connected", 315 | "nullText": null, 316 | "postfix": "", 317 | "postfixFontSize": "50%", 318 | "prefix": "", 319 | "prefixFontSize": "50%", 320 | "rangeMaps": [ 321 | { 322 | "from": "null", 323 | "text": "N/A", 324 | "to": "null" 325 | } 326 | ], 327 | "sparkline": { 328 | "fillColor": "rgba(31, 118, 189, 0.18)", 329 | "full": false, 330 | "lineColor": "rgb(31, 120, 193)", 331 | "show": true 332 | }, 333 | "tableColumn": "", 334 | "targets": [ 335 | { 336 | "expr": "count(rate(container_last_seen{job=\"cadvisor\", name!=\"\"}[5m]))", 337 | "format": "time_series", 338 | "intervalFactor": 1, 339 | "refId": "A" 340 | } 341 | ], 342 | "thresholds": "0,1", 343 | "title": "Running Containers", 344 | "type": "singlestat", 345 | "valueFontSize": "80%", 346 | "valueMaps": [ 347 | { 348 | "op": "=", 349 | "text": "N/A", 350 | "value": "null" 351 | } 352 | ], 353 | "valueName": "avg" 354 | }, 355 | { 356 | "cacheTimeout": null, 357 | "colorBackground": false, 358 | "colorValue": false, 359 | "colors": [ 360 | "rgba(50, 172, 45, 0.97)", 361 | "rgba(237, 129, 40, 0.89)", 362 | "rgba(245, 54, 54, 0.9)" 363 | ], 364 | "datasource": "Prometheus", 365 | "editable": true, 366 | "error": false, 367 | "format": "percent", 368 | "gauge": { 369 | "maxValue": 100, 370 | "minValue": 0, 371 | "show": true, 372 | "thresholdLabels": false, 373 | "thresholdMarkers": true 374 | }, 375 | "gridPos": { 376 | "h": 6, 377 | "w": 6, 378 | "x": 0, 379 | "y": 8 380 | }, 381 | "id": 4, 382 | "interval": null, 383 | "isNew": true, 384 | "links": [], 385 | "mappingType": 1, 386 | "mappingTypes": [ 387 | { 388 | "name": "value to text", 389 | "value": 1 390 | }, 391 | { 392 | "name": "range to text", 393 | "value": 2 394 | } 395 | ], 396 | "maxDataPoints": 100, 397 | "nullPointMode": "connected", 398 | "nullText": null, 399 | "postfix": "", 400 | "postfixFontSize": "50%", 401 | "prefix": "", 402 | "prefixFontSize": "50%", 403 | "rangeMaps": [ 404 | { 405 | "from": "null", 406 | "text": "N/A", 407 | "to": "null" 408 | } 409 | ], 410 | "sparkline": { 411 | "fillColor": "rgba(31, 118, 189, 0.18)", 412 | "full": false, 413 | "lineColor": "rgb(31, 120, 193)", 414 | "show": false 415 | }, 416 | "tableColumn": "", 417 | "targets": [ 418 | { 419 | "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes +node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", 420 | "format": "time_series", 421 | "interval": "10s", 422 | "intervalFactor": 1, 423 | "refId": "A", 424 | "step": 10 425 | } 426 | ], 427 | "thresholds": "65, 90", 428 | "title": "Memory usage", 429 | "type": "singlestat", 430 | "valueFontSize": "80%", 431 | "valueMaps": [ 432 | { 433 | "op": "=", 434 | "text": "N/A", 435 | "value": "null" 436 | } 437 | ], 438 | "valueName": "current" 439 | }, 440 | { 441 | "cacheTimeout": null, 442 | "colorBackground": false, 443 | "colorValue": false, 444 | "colors": [ 445 | "rgba(50, 172, 45, 0.97)", 446 | "rgba(237, 129, 40, 0.89)", 447 | "rgba(245, 54, 54, 0.9)" 448 | ], 449 | "datasource": "Prometheus", 450 | "decimals": 2, 451 | "editable": true, 452 | "error": false, 453 | "format": "percent", 454 | "gauge": { 455 | "maxValue": 100, 456 | "minValue": 0, 457 | "show": true, 458 | "thresholdLabels": false, 459 | "thresholdMarkers": true 460 | }, 461 | "gridPos": { 462 | "h": 6, 463 | "w": 6, 464 | "x": 6, 465 | "y": 8 466 | }, 467 | "id": 6, 468 | "interval": null, 469 | "isNew": true, 470 | "links": [], 471 | "mappingType": 1, 472 | "mappingTypes": [ 473 | { 474 | "name": "value to text", 475 | "value": 1 476 | }, 477 | { 478 | "name": "range to text", 479 | "value": 2 480 | } 481 | ], 482 | "maxDataPoints": 100, 483 | "nullPointMode": "connected", 484 | "nullText": null, 485 | "postfix": "", 486 | "postfixFontSize": "50%", 487 | "prefix": "", 488 | "prefixFontSize": "50%", 489 | "rangeMaps": [ 490 | { 491 | "from": "null", 492 | "text": "N/A", 493 | "to": "null" 494 | } 495 | ], 496 | "sparkline": { 497 | "fillColor": "rgba(31, 118, 189, 0.18)", 498 | "full": false, 499 | "lineColor": "rgb(31, 120, 193)", 500 | "show": false 501 | }, 502 | "tableColumn": "", 503 | "targets": [ 504 | { 505 | "expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total[1m] ) )) / count(node_cpu_seconds_total{mode=\"system\"}) * 100", 506 | "format": "time_series", 507 | "interval": "1m", 508 | "intervalFactor": 1, 509 | "legendFormat": "", 510 | "refId": "A", 511 | "step": 10 512 | } 513 | ], 514 | "thresholds": "65, 90", 515 | "title": "CPU usage", 516 | "type": "singlestat", 517 | "valueFontSize": "80%", 518 | "valueMaps": [ 519 | { 520 | "op": "=", 521 | "text": "N/A", 522 | "value": "null" 523 | } 524 | ], 525 | "valueName": "current" 526 | }, 527 | { 528 | "cacheTimeout": null, 529 | "colorBackground": false, 530 | "colorValue": false, 531 | "colors": [ 532 | "rgba(50, 172, 45, 0.97)", 533 | "rgba(237, 129, 40, 0.89)", 534 | "rgba(245, 54, 54, 0.9)" 535 | ], 536 | "datasource": "Prometheus", 537 | "decimals": 2, 538 | "editable": true, 539 | "error": false, 540 | "format": "percent", 541 | "gauge": { 542 | "maxValue": 100, 543 | "minValue": 0, 544 | "show": true, 545 | "thresholdLabels": false, 546 | "thresholdMarkers": true 547 | }, 548 | "gridPos": { 549 | "h": 6, 550 | "w": 7, 551 | "x": 12, 552 | "y": 8 553 | }, 554 | "id": 7, 555 | "interval": null, 556 | "isNew": true, 557 | "links": [], 558 | "mappingType": 1, 559 | "mappingTypes": [ 560 | { 561 | "name": "value to text", 562 | "value": 1 563 | }, 564 | { 565 | "name": "range to text", 566 | "value": 2 567 | } 568 | ], 569 | "maxDataPoints": 100, 570 | "nullPointMode": "connected", 571 | "nullText": null, 572 | "postfix": "", 573 | "postfixFontSize": "50%", 574 | "prefix": "", 575 | "prefixFontSize": "50%", 576 | "rangeMaps": [ 577 | { 578 | "from": "null", 579 | "text": "N/A", 580 | "to": "null" 581 | } 582 | ], 583 | "sparkline": { 584 | "fillColor": "rgba(31, 118, 189, 0.18)", 585 | "full": false, 586 | "lineColor": "rgb(31, 120, 193)", 587 | "show": false 588 | }, 589 | "tableColumn": "", 590 | "targets": [ 591 | { 592 | "expr": "sum (container_fs_limit_bytes - container_fs_usage_bytes) / sum(container_fs_limit_bytes)", 593 | "interval": "10s", 594 | "intervalFactor": 1, 595 | "metric": "", 596 | "refId": "A", 597 | "step": 10 598 | } 599 | ], 600 | "thresholds": "65, 90", 601 | "title": "Filesystem usage", 602 | "type": "singlestat", 603 | "valueFontSize": "80%", 604 | "valueMaps": [ 605 | { 606 | "op": "=", 607 | "text": "N/A", 608 | "value": "null" 609 | } 610 | ], 611 | "valueName": "current" 612 | }, 613 | { 614 | "aliasColors": { 615 | "RECEIVE": "#ea6460", 616 | "SENT": "#1f78c1", 617 | "TRANSMIT": "#1f78c1" 618 | }, 619 | "bars": false, 620 | "dashLength": 10, 621 | "dashes": false, 622 | "datasource": "Prometheus", 623 | "fill": 4, 624 | "gridPos": { 625 | "h": 9, 626 | "w": 6, 627 | "x": 0, 628 | "y": 14 629 | }, 630 | "id": 25, 631 | "legend": { 632 | "avg": false, 633 | "current": false, 634 | "max": false, 635 | "min": false, 636 | "show": true, 637 | "total": false, 638 | "values": false 639 | }, 640 | "lines": true, 641 | "linewidth": 1, 642 | "links": [], 643 | "nullPointMode": "null", 644 | "percentage": false, 645 | "pointradius": 5, 646 | "points": false, 647 | "renderer": "flot", 648 | "seriesOverrides": [], 649 | "spaceLength": 10, 650 | "stack": false, 651 | "steppedLine": false, 652 | "targets": [ 653 | { 654 | "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", 655 | "format": "time_series", 656 | "interval": "2m", 657 | "intervalFactor": 2, 658 | "legendFormat": "RECEIVE", 659 | "refId": "A" 660 | }, 661 | { 662 | "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", 663 | "format": "time_series", 664 | "interval": "2m", 665 | "intervalFactor": 2, 666 | "legendFormat": "TRANSMIT", 667 | "refId": "B" 668 | } 669 | ], 670 | "thresholds": [], 671 | "timeFrom": null, 672 | "timeRegions": [], 673 | "timeShift": null, 674 | "title": "Node Network Traffic", 675 | "tooltip": { 676 | "shared": true, 677 | "sort": 0, 678 | "value_type": "cumulative" 679 | }, 680 | "type": "graph", 681 | "xaxis": { 682 | "buckets": null, 683 | "mode": "time", 684 | "name": null, 685 | "show": true, 686 | "values": [] 687 | }, 688 | "yaxes": [ 689 | { 690 | "format": "Bps", 691 | "label": null, 692 | "logBase": 1, 693 | "max": null, 694 | "min": null, 695 | "show": true 696 | }, 697 | { 698 | "format": "s", 699 | "label": null, 700 | "logBase": 1, 701 | "max": null, 702 | "min": null, 703 | "show": true 704 | } 705 | ], 706 | "yaxis": { 707 | "align": false, 708 | "alignLevel": null 709 | } 710 | }, 711 | { 712 | "aliasColors": { 713 | "Available Memory": "#508642", 714 | "Used Memory": "#bf1b00" 715 | }, 716 | "bars": false, 717 | "dashLength": 10, 718 | "dashes": false, 719 | "datasource": "Prometheus", 720 | "fill": 3, 721 | "gridPos": { 722 | "h": 9, 723 | "w": 6, 724 | "x": 6, 725 | "y": 14 726 | }, 727 | "id": 27, 728 | "legend": { 729 | "avg": false, 730 | "current": false, 731 | "max": false, 732 | "min": false, 733 | "show": true, 734 | "total": false, 735 | "values": false 736 | }, 737 | "lines": true, 738 | "linewidth": 1, 739 | "links": [], 740 | "nullPointMode": "null", 741 | "percentage": false, 742 | "pointradius": 5, 743 | "points": false, 744 | "renderer": "flot", 745 | "seriesOverrides": [], 746 | "spaceLength": 10, 747 | "stack": true, 748 | "steppedLine": false, 749 | "targets": [ 750 | { 751 | "expr": "sum(node_memory_MemTotal_bytes) - sum(node_memory_MemAvailable_bytes)", 752 | "format": "time_series", 753 | "interval": "2m", 754 | "intervalFactor": 2, 755 | "legendFormat": "Used Memory", 756 | "refId": "B" 757 | }, 758 | { 759 | "expr": "sum(node_memory_MemAvailable_bytes)", 760 | "format": "time_series", 761 | "interval": "2m", 762 | "intervalFactor": 2, 763 | "legendFormat": "Available Memory", 764 | "refId": "A" 765 | } 766 | ], 767 | "thresholds": [], 768 | "timeFrom": null, 769 | "timeRegions": [], 770 | "timeShift": null, 771 | "title": "Node Mermory", 772 | "tooltip": { 773 | "shared": true, 774 | "sort": 0, 775 | "value_type": "individual" 776 | }, 777 | "type": "graph", 778 | "xaxis": { 779 | "buckets": null, 780 | "mode": "time", 781 | "name": null, 782 | "show": true, 783 | "values": [] 784 | }, 785 | "yaxes": [ 786 | { 787 | "format": "decbytes", 788 | "label": null, 789 | "logBase": 1, 790 | "max": null, 791 | "min": null, 792 | "show": true 793 | }, 794 | { 795 | "format": "s", 796 | "label": null, 797 | "logBase": 1, 798 | "max": null, 799 | "min": null, 800 | "show": true 801 | } 802 | ], 803 | "yaxis": { 804 | "align": false, 805 | "alignLevel": null 806 | } 807 | }, 808 | { 809 | "aliasColors": { 810 | "Available Memory": "#508642", 811 | "Free Storage": "#447ebc", 812 | "Total Storage Available": "#508642", 813 | "Used Memory": "#bf1b00", 814 | "Used Storage": "#bf1b00" 815 | }, 816 | "bars": false, 817 | "dashLength": 10, 818 | "dashes": false, 819 | "datasource": "Prometheus", 820 | "fill": 3, 821 | "gridPos": { 822 | "h": 9, 823 | "w": 7, 824 | "x": 12, 825 | "y": 14 826 | }, 827 | "id": 28, 828 | "legend": { 829 | "avg": false, 830 | "current": false, 831 | "max": false, 832 | "min": false, 833 | "show": true, 834 | "total": false, 835 | "values": false 836 | }, 837 | "lines": true, 838 | "linewidth": 1, 839 | "links": [], 840 | "nullPointMode": "null", 841 | "percentage": false, 842 | "pointradius": 5, 843 | "points": false, 844 | "renderer": "flot", 845 | "seriesOverrides": [], 846 | "spaceLength": 10, 847 | "stack": true, 848 | "steppedLine": false, 849 | "targets": [ 850 | { 851 | "expr": "sum(node_filesystem_free_bytes {job=\"node-exporter\", instance=~\".*9100\", device=~\"/dev/.*\", mountpoint!=\"/var/lib/docker/aufs\"}) ", 852 | "format": "time_series", 853 | "interval": "2m", 854 | "intervalFactor": 2, 855 | "legendFormat": "Free Storage", 856 | "refId": "A" 857 | } 858 | ], 859 | "thresholds": [], 860 | "timeFrom": null, 861 | "timeRegions": [], 862 | "timeShift": null, 863 | "title": "Filesystem Available", 864 | "tooltip": { 865 | "shared": true, 866 | "sort": 0, 867 | "value_type": "individual" 868 | }, 869 | "type": "graph", 870 | "xaxis": { 871 | "buckets": null, 872 | "mode": "time", 873 | "name": null, 874 | "show": true, 875 | "values": [] 876 | }, 877 | "yaxes": [ 878 | { 879 | "decimals": null, 880 | "format": "decbytes", 881 | "label": null, 882 | "logBase": 1, 883 | "max": null, 884 | "min": null, 885 | "show": true 886 | }, 887 | { 888 | "format": "s", 889 | "label": null, 890 | "logBase": 1, 891 | "max": null, 892 | "min": null, 893 | "show": true 894 | } 895 | ], 896 | "yaxis": { 897 | "align": false, 898 | "alignLevel": null 899 | } 900 | }, 901 | { 902 | "collapsed": false, 903 | "gridPos": { 904 | "h": 1, 905 | "w": 24, 906 | "x": 0, 907 | "y": 23 908 | }, 909 | "id": 19, 910 | "panels": [], 911 | "repeat": null, 912 | "title": "Container Performance", 913 | "type": "row" 914 | }, 915 | { 916 | "aliasColors": {}, 917 | "bars": false, 918 | "dashLength": 10, 919 | "dashes": false, 920 | "datasource": "Prometheus", 921 | "decimals": 3, 922 | "editable": true, 923 | "error": false, 924 | "fill": 0, 925 | "grid": {}, 926 | "gridPos": { 927 | "h": 10, 928 | "w": 6, 929 | "x": 0, 930 | "y": 24 931 | }, 932 | "id": 3, 933 | "isNew": true, 934 | "legend": { 935 | "alignAsTable": true, 936 | "avg": true, 937 | "current": true, 938 | "max": false, 939 | "min": false, 940 | "rightSide": false, 941 | "show": true, 942 | "sort": "current", 943 | "sortDesc": true, 944 | "total": false, 945 | "values": true 946 | }, 947 | "lines": true, 948 | "linewidth": 2, 949 | "links": [], 950 | "nullPointMode": "connected", 951 | "percentage": false, 952 | "pointradius": 5, 953 | "points": false, 954 | "renderer": "flot", 955 | "seriesOverrides": [], 956 | "spaceLength": 10, 957 | "stack": false, 958 | "steppedLine": false, 959 | "targets": [ 960 | { 961 | "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m])) by (id,name)", 962 | "format": "time_series", 963 | "interval": "10s", 964 | "intervalFactor": 1, 965 | "legendFormat": "{{ name }}", 966 | "metric": "container_cpu_user_seconds_total", 967 | "refId": "A", 968 | "step": 10 969 | } 970 | ], 971 | "thresholds": [], 972 | "timeFrom": null, 973 | "timeRegions": [], 974 | "timeShift": null, 975 | "title": "Container CPU usage", 976 | "tooltip": { 977 | "msResolution": true, 978 | "shared": true, 979 | "sort": 0, 980 | "value_type": "cumulative" 981 | }, 982 | "type": "graph", 983 | "xaxis": { 984 | "buckets": null, 985 | "mode": "time", 986 | "name": null, 987 | "show": true, 988 | "values": [] 989 | }, 990 | "yaxes": [ 991 | { 992 | "format": "percentunit", 993 | "label": null, 994 | "logBase": 1, 995 | "max": null, 996 | "min": null, 997 | "show": true 998 | }, 999 | { 1000 | "format": "short", 1001 | "label": null, 1002 | "logBase": 1, 1003 | "max": null, 1004 | "min": null, 1005 | "show": true 1006 | } 1007 | ], 1008 | "yaxis": { 1009 | "align": false, 1010 | "alignLevel": null 1011 | } 1012 | }, 1013 | { 1014 | "aliasColors": {}, 1015 | "bars": false, 1016 | "dashLength": 10, 1017 | "dashes": false, 1018 | "datasource": "Prometheus", 1019 | "decimals": 2, 1020 | "editable": true, 1021 | "error": false, 1022 | "fill": 0, 1023 | "grid": {}, 1024 | "gridPos": { 1025 | "h": 10, 1026 | "w": 6, 1027 | "x": 6, 1028 | "y": 24 1029 | }, 1030 | "id": 2, 1031 | "isNew": true, 1032 | "legend": { 1033 | "alignAsTable": true, 1034 | "avg": true, 1035 | "current": true, 1036 | "max": false, 1037 | "min": false, 1038 | "rightSide": false, 1039 | "show": true, 1040 | "sort": "current", 1041 | "sortDesc": true, 1042 | "total": false, 1043 | "values": true 1044 | }, 1045 | "lines": true, 1046 | "linewidth": 2, 1047 | "links": [], 1048 | "nullPointMode": "connected", 1049 | "percentage": false, 1050 | "pointradius": 5, 1051 | "points": false, 1052 | "renderer": "flot", 1053 | "seriesOverrides": [], 1054 | "spaceLength": 10, 1055 | "stack": false, 1056 | "steppedLine": false, 1057 | "targets": [ 1058 | { 1059 | "expr": "container_memory_max_usage_bytes{image!=\"\"}", 1060 | "format": "time_series", 1061 | "interval": "10s", 1062 | "intervalFactor": 1, 1063 | "legendFormat": "{{ name }}", 1064 | "metric": "container_memory_usage:sort_desc", 1065 | "refId": "A", 1066 | "step": 10 1067 | } 1068 | ], 1069 | "thresholds": [], 1070 | "timeFrom": null, 1071 | "timeRegions": [], 1072 | "timeShift": null, 1073 | "title": "Container Memory Usage", 1074 | "tooltip": { 1075 | "msResolution": false, 1076 | "shared": true, 1077 | "sort": 0, 1078 | "value_type": "cumulative" 1079 | }, 1080 | "type": "graph", 1081 | "xaxis": { 1082 | "buckets": null, 1083 | "mode": "time", 1084 | "name": null, 1085 | "show": true, 1086 | "values": [] 1087 | }, 1088 | "yaxes": [ 1089 | { 1090 | "format": "bytes", 1091 | "label": null, 1092 | "logBase": 1, 1093 | "max": null, 1094 | "min": null, 1095 | "show": true 1096 | }, 1097 | { 1098 | "format": "short", 1099 | "label": null, 1100 | "logBase": 1, 1101 | "max": null, 1102 | "min": null, 1103 | "show": true 1104 | } 1105 | ], 1106 | "yaxis": { 1107 | "align": false, 1108 | "alignLevel": null 1109 | } 1110 | }, 1111 | { 1112 | "columns": [], 1113 | "datasource": "Prometheus", 1114 | "fontSize": "100%", 1115 | "gridPos": { 1116 | "h": 13, 1117 | "w": 10, 1118 | "x": 12, 1119 | "y": 24 1120 | }, 1121 | "id": 23, 1122 | "links": [], 1123 | "pageSize": null, 1124 | "scroll": true, 1125 | "showHeader": true, 1126 | "sort": { 1127 | "col": 0, 1128 | "desc": true 1129 | }, 1130 | "styles": [ 1131 | { 1132 | "alias": "Time", 1133 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1134 | "pattern": "Time", 1135 | "type": "date" 1136 | }, 1137 | { 1138 | "alias": "", 1139 | "colorMode": null, 1140 | "colors": [ 1141 | "rgba(245, 54, 54, 0.9)", 1142 | "rgba(237, 129, 40, 0.89)", 1143 | "rgba(50, 172, 45, 0.97)" 1144 | ], 1145 | "decimals": 2, 1146 | "pattern": "/.*/", 1147 | "thresholds": [], 1148 | "type": "number", 1149 | "unit": "short" 1150 | } 1151 | ], 1152 | "targets": [ 1153 | { 1154 | "expr": "ALERTS", 1155 | "format": "table", 1156 | "intervalFactor": 1, 1157 | "refId": "A" 1158 | } 1159 | ], 1160 | "title": "Alerts", 1161 | "transform": "table", 1162 | "type": "table" 1163 | }, 1164 | { 1165 | "aliasColors": {}, 1166 | "bars": false, 1167 | "dashLength": 10, 1168 | "dashes": false, 1169 | "datasource": "Prometheus", 1170 | "decimals": 2, 1171 | "editable": true, 1172 | "error": false, 1173 | "fill": 0, 1174 | "grid": {}, 1175 | "gridPos": { 1176 | "h": 14, 1177 | "w": 6, 1178 | "x": 0, 1179 | "y": 34 1180 | }, 1181 | "id": 8, 1182 | "isNew": true, 1183 | "legend": { 1184 | "alignAsTable": true, 1185 | "avg": true, 1186 | "current": true, 1187 | "max": false, 1188 | "min": false, 1189 | "rightSide": false, 1190 | "show": true, 1191 | "sort": "current", 1192 | "sortDesc": true, 1193 | "total": false, 1194 | "values": true 1195 | }, 1196 | "lines": true, 1197 | "linewidth": 2, 1198 | "links": [], 1199 | "nullPointMode": "connected", 1200 | "percentage": false, 1201 | "pointradius": 5, 1202 | "points": false, 1203 | "renderer": "flot", 1204 | "seriesOverrides": [], 1205 | "spaceLength": 10, 1206 | "stack": false, 1207 | "steppedLine": false, 1208 | "targets": [ 1209 | { 1210 | "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) ))", 1211 | "interval": "10s", 1212 | "intervalFactor": 1, 1213 | "legendFormat": "{{ name }}", 1214 | "metric": "container_network_receive_bytes_total", 1215 | "refId": "A", 1216 | "step": 10 1217 | } 1218 | ], 1219 | "thresholds": [], 1220 | "timeFrom": null, 1221 | "timeRegions": [], 1222 | "timeShift": null, 1223 | "title": "Container Network Input", 1224 | "tooltip": { 1225 | "msResolution": false, 1226 | "shared": true, 1227 | "sort": 0, 1228 | "value_type": "cumulative" 1229 | }, 1230 | "type": "graph", 1231 | "xaxis": { 1232 | "buckets": null, 1233 | "mode": "time", 1234 | "name": null, 1235 | "show": true, 1236 | "values": [] 1237 | }, 1238 | "yaxes": [ 1239 | { 1240 | "format": "bytes", 1241 | "label": null, 1242 | "logBase": 1, 1243 | "max": null, 1244 | "min": null, 1245 | "show": true 1246 | }, 1247 | { 1248 | "format": "short", 1249 | "label": null, 1250 | "logBase": 1, 1251 | "max": null, 1252 | "min": null, 1253 | "show": true 1254 | } 1255 | ], 1256 | "yaxis": { 1257 | "align": false, 1258 | "alignLevel": null 1259 | } 1260 | }, 1261 | { 1262 | "aliasColors": {}, 1263 | "bars": false, 1264 | "dashLength": 10, 1265 | "dashes": false, 1266 | "datasource": "Prometheus", 1267 | "decimals": 2, 1268 | "editable": true, 1269 | "error": false, 1270 | "fill": 0, 1271 | "grid": {}, 1272 | "gridPos": { 1273 | "h": 14, 1274 | "w": 6, 1275 | "x": 6, 1276 | "y": 34 1277 | }, 1278 | "id": 9, 1279 | "isNew": true, 1280 | "legend": { 1281 | "alignAsTable": true, 1282 | "avg": true, 1283 | "current": true, 1284 | "max": false, 1285 | "min": false, 1286 | "rightSide": false, 1287 | "show": true, 1288 | "sort": "current", 1289 | "sortDesc": true, 1290 | "total": false, 1291 | "values": true 1292 | }, 1293 | "lines": true, 1294 | "linewidth": 2, 1295 | "links": [], 1296 | "nullPointMode": "connected", 1297 | "percentage": false, 1298 | "pointradius": 5, 1299 | "points": false, 1300 | "renderer": "flot", 1301 | "seriesOverrides": [], 1302 | "spaceLength": 10, 1303 | "stack": false, 1304 | "steppedLine": false, 1305 | "targets": [ 1306 | { 1307 | "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) ))", 1308 | "format": "time_series", 1309 | "intervalFactor": 2, 1310 | "legendFormat": "{{ name }}", 1311 | "metric": "container_network_transmit_bytes_total", 1312 | "refId": "B", 1313 | "step": 4 1314 | } 1315 | ], 1316 | "thresholds": [], 1317 | "timeFrom": null, 1318 | "timeRegions": [], 1319 | "timeShift": null, 1320 | "title": "Container Network Output", 1321 | "tooltip": { 1322 | "msResolution": false, 1323 | "shared": true, 1324 | "sort": 0, 1325 | "value_type": "cumulative" 1326 | }, 1327 | "type": "graph", 1328 | "xaxis": { 1329 | "buckets": null, 1330 | "mode": "time", 1331 | "name": null, 1332 | "show": true, 1333 | "values": [] 1334 | }, 1335 | "yaxes": [ 1336 | { 1337 | "format": "bytes", 1338 | "label": null, 1339 | "logBase": 1, 1340 | "max": null, 1341 | "min": null, 1342 | "show": true 1343 | }, 1344 | { 1345 | "format": "short", 1346 | "label": null, 1347 | "logBase": 1, 1348 | "max": null, 1349 | "min": null, 1350 | "show": false 1351 | } 1352 | ], 1353 | "yaxis": { 1354 | "align": false, 1355 | "alignLevel": null 1356 | } 1357 | }, 1358 | { 1359 | "columns": [], 1360 | "datasource": "Prometheus", 1361 | "fontSize": "100%", 1362 | "gridPos": { 1363 | "h": 10, 1364 | "w": 10, 1365 | "x": 12, 1366 | "y": 37 1367 | }, 1368 | "id": 30, 1369 | "links": [], 1370 | "pageSize": 10, 1371 | "scroll": true, 1372 | "showHeader": true, 1373 | "sort": { 1374 | "col": 0, 1375 | "desc": true 1376 | }, 1377 | "styles": [ 1378 | { 1379 | "alias": "Time", 1380 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1381 | "link": false, 1382 | "linkUrl": "", 1383 | "pattern": "Time", 1384 | "type": "date" 1385 | }, 1386 | { 1387 | "alias": "", 1388 | "colorMode": null, 1389 | "colors": [ 1390 | "rgba(245, 54, 54, 0.9)", 1391 | "rgba(237, 129, 40, 0.89)", 1392 | "rgba(50, 172, 45, 0.97)" 1393 | ], 1394 | "decimals": 2, 1395 | "pattern": "/.*/", 1396 | "thresholds": [], 1397 | "type": "number", 1398 | "unit": "short" 1399 | } 1400 | ], 1401 | "targets": [ 1402 | { 1403 | "expr": "cadvisor_version_info", 1404 | "format": "table", 1405 | "instant": false, 1406 | "interval": "15m", 1407 | "intervalFactor": 2, 1408 | "legendFormat": "cAdvisor Version: {{cadvisorVersion}}", 1409 | "refId": "A" 1410 | }, 1411 | { 1412 | "expr": "prometheus_build_info", 1413 | "format": "table", 1414 | "interval": "15m", 1415 | "intervalFactor": 2, 1416 | "legendFormat": "Prometheus Version: {{version}}", 1417 | "refId": "B" 1418 | }, 1419 | { 1420 | "expr": "node_exporter_build_info", 1421 | "format": "table", 1422 | "interval": "15m", 1423 | "intervalFactor": 2, 1424 | "legendFormat": "Node-Exporter Version: {{version}}", 1425 | "refId": "C" 1426 | } 1427 | ], 1428 | "title": "Running Versions", 1429 | "transform": "table", 1430 | "type": "table" 1431 | } 1432 | ], 1433 | "refresh": "10s", 1434 | "schemaVersion": 16, 1435 | "style": "dark", 1436 | "tags": [ 1437 | "docker", 1438 | "prometheus, ", 1439 | "node-exporter", 1440 | "cadvisor" 1441 | ], 1442 | "templating": { 1443 | "list": [ 1444 | { 1445 | "auto": false, 1446 | "auto_count": 30, 1447 | "auto_min": "10s", 1448 | "current": { 1449 | "text": "1m", 1450 | "value": "1m" 1451 | }, 1452 | "hide": 0, 1453 | "label": "interval", 1454 | "name": "interval", 1455 | "options": [ 1456 | { 1457 | "selected": true, 1458 | "text": "1m", 1459 | "value": "1m" 1460 | }, 1461 | { 1462 | "selected": false, 1463 | "text": "10m", 1464 | "value": "10m" 1465 | }, 1466 | { 1467 | "selected": false, 1468 | "text": "30m", 1469 | "value": "30m" 1470 | }, 1471 | { 1472 | "selected": false, 1473 | "text": "1h", 1474 | "value": "1h" 1475 | }, 1476 | { 1477 | "selected": false, 1478 | "text": "6h", 1479 | "value": "6h" 1480 | }, 1481 | { 1482 | "selected": false, 1483 | "text": "12h", 1484 | "value": "12h" 1485 | }, 1486 | { 1487 | "selected": false, 1488 | "text": "1d", 1489 | "value": "1d" 1490 | }, 1491 | { 1492 | "selected": false, 1493 | "text": "7d", 1494 | "value": "7d" 1495 | }, 1496 | { 1497 | "selected": false, 1498 | "text": "14d", 1499 | "value": "14d" 1500 | }, 1501 | { 1502 | "selected": false, 1503 | "text": "30d", 1504 | "value": "30d" 1505 | } 1506 | ], 1507 | "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 1508 | "refresh": 2, 1509 | "skipUrlSync": false, 1510 | "type": "interval" 1511 | } 1512 | ] 1513 | }, 1514 | "time": { 1515 | "from": "now-1h", 1516 | "to": "now" 1517 | }, 1518 | "timepicker": { 1519 | "refresh_intervals": [ 1520 | "5s", 1521 | "10s", 1522 | "30s", 1523 | "1m", 1524 | "5m", 1525 | "15m", 1526 | "30m", 1527 | "1h", 1528 | "2h", 1529 | "1d" 1530 | ], 1531 | "time_options": [ 1532 | "5m", 1533 | "15m", 1534 | "1h", 1535 | "6h", 1536 | "12h", 1537 | "24h", 1538 | "2d", 1539 | "7d", 1540 | "30d" 1541 | ] 1542 | }, 1543 | "timezone": "browser", 1544 | "title": "Docker Prometheus Monitoring", 1545 | "uid": "64nrElFmk", 1546 | "version": 2 1547 | } 1548 | -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/NodeMonitoring.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "description": "Docker Swarm nodes metrics", 16 | "editable": true, 17 | "gnetId": 7461, 18 | "graphTooltip": 0, 19 | "id": 7, 20 | "iteration": 1561850634511, 21 | "links": [], 22 | "panels": [ 23 | { 24 | "cacheTimeout": null, 25 | "colorBackground": false, 26 | "colorValue": false, 27 | "colors": [ 28 | "rgba(245, 54, 54, 0.9)", 29 | "rgba(237, 129, 40, 0.89)", 30 | "rgba(50, 172, 45, 0.97)" 31 | ], 32 | "datasource": "Prometheus", 33 | "decimals": 1, 34 | "format": "s", 35 | "gauge": { 36 | "maxValue": 100, 37 | "minValue": 0, 38 | "show": false, 39 | "thresholdLabels": false, 40 | "thresholdMarkers": true 41 | }, 42 | "gridPos": { 43 | "h": 4, 44 | "w": 6, 45 | "x": 0, 46 | "y": 0 47 | }, 48 | "hideTimeOverride": true, 49 | "id": 2, 50 | "interval": null, 51 | "links": [], 52 | "mappingType": 1, 53 | "mappingTypes": [ 54 | { 55 | "name": "value to text", 56 | "value": 1 57 | }, 58 | { 59 | "name": "range to text", 60 | "value": 2 61 | } 62 | ], 63 | "maxDataPoints": 100, 64 | "nullPointMode": "connected", 65 | "nullText": null, 66 | "options": {}, 67 | "postfix": "", 68 | "postfixFontSize": "50%", 69 | "prefix": "", 70 | "prefixFontSize": "50%", 71 | "rangeMaps": [ 72 | { 73 | "from": "null", 74 | "text": "N/A", 75 | "to": "null" 76 | } 77 | ], 78 | "sparkline": { 79 | "fillColor": "rgba(31, 118, 189, 0.18)", 80 | "full": false, 81 | "lineColor": "rgb(31, 120, 193)", 82 | "show": false 83 | }, 84 | "tableColumn": "", 85 | "targets": [ 86 | { 87 | "expr": "time() - node_boot_time_seconds{instance=~\"$node_id\"}", 88 | "format": "time_series", 89 | "instant": false, 90 | "intervalFactor": 2, 91 | "legendFormat": "", 92 | "refId": "A", 93 | "step": 2 94 | } 95 | ], 96 | "thresholds": "", 97 | "timeFrom": "1m", 98 | "timeShift": null, 99 | "title": "Uptime", 100 | "type": "singlestat", 101 | "valueFontSize": "80%", 102 | "valueMaps": [ 103 | { 104 | "op": "=", 105 | "text": "N/A", 106 | "value": "null" 107 | } 108 | ], 109 | "valueName": "avg" 110 | }, 111 | { 112 | "cacheTimeout": null, 113 | "colorBackground": false, 114 | "colorValue": false, 115 | "colors": [ 116 | "rgba(245, 54, 54, 0.9)", 117 | "rgba(237, 129, 40, 0.89)", 118 | "rgba(50, 172, 45, 0.97)" 119 | ], 120 | "datasource": "Prometheus", 121 | "decimals": 0, 122 | "format": "none", 123 | "gauge": { 124 | "maxValue": 100, 125 | "minValue": 0, 126 | "show": false, 127 | "thresholdLabels": false, 128 | "thresholdMarkers": true 129 | }, 130 | "gridPos": { 131 | "h": 4, 132 | "w": 6, 133 | "x": 6, 134 | "y": 0 135 | }, 136 | "id": 1, 137 | "interval": null, 138 | "links": [], 139 | "mappingType": 1, 140 | "mappingTypes": [ 141 | { 142 | "name": "value to text", 143 | "value": 1 144 | }, 145 | { 146 | "name": "range to text", 147 | "value": 2 148 | } 149 | ], 150 | "maxDataPoints": 100, 151 | "nullPointMode": "connected", 152 | "nullText": null, 153 | "options": {}, 154 | "postfix": "", 155 | "postfixFontSize": "50%", 156 | "prefix": "", 157 | "prefixFontSize": "50%", 158 | "rangeMaps": [ 159 | { 160 | "from": "null", 161 | "text": "N/A", 162 | "to": "null" 163 | } 164 | ], 165 | "sparkline": { 166 | "fillColor": "rgba(31, 118, 189, 0.18)", 167 | "full": false, 168 | "lineColor": "rgb(31, 120, 193)", 169 | "show": false 170 | }, 171 | "tableColumn": "", 172 | "targets": [ 173 | { 174 | "expr": "count(node_load1 * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"})", 175 | "format": "time_series", 176 | "intervalFactor": 2, 177 | "legendFormat": "", 178 | "refId": "A", 179 | "step": 20 180 | } 181 | ], 182 | "thresholds": "", 183 | "title": "Nodes", 184 | "type": "singlestat", 185 | "valueFontSize": "80%", 186 | "valueMaps": [ 187 | { 188 | "op": "=", 189 | "text": "N/A", 190 | "value": "null" 191 | } 192 | ], 193 | "valueName": "avg" 194 | }, 195 | { 196 | "cacheTimeout": null, 197 | "colorBackground": false, 198 | "colorValue": false, 199 | "colors": [ 200 | "rgba(245, 54, 54, 0.9)", 201 | "rgba(237, 129, 40, 0.89)", 202 | "rgba(50, 172, 45, 0.97)" 203 | ], 204 | "datasource": "Prometheus", 205 | "decimals": 0, 206 | "format": "short", 207 | "gauge": { 208 | "maxValue": 100, 209 | "minValue": 0, 210 | "show": false, 211 | "thresholdLabels": false, 212 | "thresholdMarkers": true 213 | }, 214 | "gridPos": { 215 | "h": 4, 216 | "w": 6, 217 | "x": 12, 218 | "y": 0 219 | }, 220 | "hideTimeOverride": true, 221 | "id": 4, 222 | "interval": null, 223 | "links": [], 224 | "mappingType": 1, 225 | "mappingTypes": [ 226 | { 227 | "name": "value to text", 228 | "value": 1 229 | }, 230 | { 231 | "name": "range to text", 232 | "value": 2 233 | } 234 | ], 235 | "maxDataPoints": 100, 236 | "nullPointMode": "connected", 237 | "nullText": null, 238 | "options": {}, 239 | "postfix": "", 240 | "postfixFontSize": "50%", 241 | "prefix": "", 242 | "prefixFontSize": "50%", 243 | "rangeMaps": [ 244 | { 245 | "from": "null", 246 | "text": "N/A", 247 | "to": "null" 248 | } 249 | ], 250 | "sparkline": { 251 | "fillColor": "rgba(31, 118, 189, 0.18)", 252 | "full": false, 253 | "lineColor": "rgb(31, 120, 193)", 254 | "show": false 255 | }, 256 | "tableColumn": "", 257 | "targets": [ 258 | { 259 | "expr": "count(node_cpu_seconds_total{mode=\"idle\"} * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"})", 260 | "format": "time_series", 261 | "intervalFactor": 2, 262 | "legendFormat": "", 263 | "refId": "A", 264 | "step": 2 265 | } 266 | ], 267 | "thresholds": "", 268 | "timeFrom": "1m", 269 | "timeShift": null, 270 | "title": "CPUs", 271 | "type": "singlestat", 272 | "valueFontSize": "80%", 273 | "valueMaps": [ 274 | { 275 | "op": "=", 276 | "text": "N/A", 277 | "value": "null" 278 | } 279 | ], 280 | "valueName": "avg" 281 | }, 282 | { 283 | "cacheTimeout": null, 284 | "colorBackground": false, 285 | "colorValue": false, 286 | "colors": [ 287 | "rgba(245, 54, 54, 0.9)", 288 | "rgba(237, 129, 40, 0.89)", 289 | "rgba(50, 172, 45, 0.97)" 290 | ], 291 | "datasource": "Prometheus", 292 | "decimals": null, 293 | "format": "percent", 294 | "gauge": { 295 | "maxValue": 100, 296 | "minValue": 0, 297 | "show": true, 298 | "thresholdLabels": false, 299 | "thresholdMarkers": true 300 | }, 301 | "gridPos": { 302 | "h": 4, 303 | "w": 6, 304 | "x": 18, 305 | "y": 0 306 | }, 307 | "hideTimeOverride": true, 308 | "id": 11, 309 | "interval": null, 310 | "links": [], 311 | "mappingType": 1, 312 | "mappingTypes": [ 313 | { 314 | "name": "value to text", 315 | "value": 1 316 | }, 317 | { 318 | "name": "range to text", 319 | "value": 2 320 | } 321 | ], 322 | "maxDataPoints": 100, 323 | "nullPointMode": "connected", 324 | "nullText": null, 325 | "options": {}, 326 | "postfix": "", 327 | "postfixFontSize": "50%", 328 | "prefix": "", 329 | "prefixFontSize": "50%", 330 | "rangeMaps": [ 331 | { 332 | "from": "null", 333 | "text": "N/A", 334 | "to": "null" 335 | } 336 | ], 337 | "sparkline": { 338 | "fillColor": "rgba(31, 118, 189, 0.18)", 339 | "full": false, 340 | "lineColor": "rgb(31, 120, 193)", 341 | "show": false 342 | }, 343 | "tableColumn": "", 344 | "targets": [ 345 | { 346 | "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) ", 347 | "format": "time_series", 348 | "intervalFactor": 2, 349 | "legendFormat": "", 350 | "refId": "A", 351 | "step": 2 352 | } 353 | ], 354 | "thresholds": "10,25,100", 355 | "timeFrom": "1m", 356 | "timeShift": null, 357 | "title": "CPU Idle", 358 | "type": "singlestat", 359 | "valueFontSize": "80%", 360 | "valueMaps": [ 361 | { 362 | "op": "=", 363 | "text": "N/A", 364 | "value": "null" 365 | } 366 | ], 367 | "valueName": "avg" 368 | }, 369 | { 370 | "aliasColors": {}, 371 | "bars": false, 372 | "dashLength": 10, 373 | "dashes": false, 374 | "datasource": "Prometheus", 375 | "decimals": 2, 376 | "fill": 1, 377 | "gridPos": { 378 | "h": 7, 379 | "w": 12, 380 | "x": 0, 381 | "y": 4 382 | }, 383 | "id": 13, 384 | "legend": { 385 | "alignAsTable": true, 386 | "avg": true, 387 | "current": true, 388 | "hideEmpty": false, 389 | "hideZero": false, 390 | "max": true, 391 | "min": true, 392 | "rightSide": true, 393 | "show": false, 394 | "total": false, 395 | "values": true 396 | }, 397 | "lines": true, 398 | "linewidth": 1, 399 | "links": [], 400 | "nullPointMode": "null", 401 | "options": {}, 402 | "percentage": false, 403 | "pointradius": 5, 404 | "points": false, 405 | "renderer": "flot", 406 | "seriesOverrides": [], 407 | "spaceLength": 10, 408 | "stack": false, 409 | "steppedLine": false, 410 | "targets": [ 411 | { 412 | "expr": "node_load5 * on(instance) group_left(node_name) node_load5{instance=~\"$node_id\"}", 413 | "format": "time_series", 414 | "intervalFactor": 2, 415 | "legendFormat": "load5 {{node_name}}", 416 | "refId": "A", 417 | "step": 2 418 | } 419 | ], 420 | "thresholds": [], 421 | "timeFrom": null, 422 | "timeRegions": [], 423 | "timeShift": null, 424 | "title": "System Load by Node", 425 | "tooltip": { 426 | "shared": true, 427 | "sort": 2, 428 | "value_type": "individual" 429 | }, 430 | "type": "graph", 431 | "xaxis": { 432 | "buckets": null, 433 | "mode": "time", 434 | "name": null, 435 | "show": true, 436 | "values": [] 437 | }, 438 | "yaxes": [ 439 | { 440 | "format": "short", 441 | "label": null, 442 | "logBase": 1, 443 | "max": null, 444 | "min": null, 445 | "show": true 446 | }, 447 | { 448 | "format": "short", 449 | "label": null, 450 | "logBase": 1, 451 | "max": null, 452 | "min": null, 453 | "show": true 454 | } 455 | ], 456 | "yaxis": { 457 | "align": false, 458 | "alignLevel": null 459 | } 460 | }, 461 | { 462 | "aliasColors": {}, 463 | "bars": false, 464 | "dashLength": 10, 465 | "dashes": false, 466 | "datasource": "Prometheus", 467 | "decimals": 2, 468 | "fill": 1, 469 | "gridPos": { 470 | "h": 7, 471 | "w": 12, 472 | "x": 12, 473 | "y": 4 474 | }, 475 | "id": 14, 476 | "legend": { 477 | "alignAsTable": true, 478 | "avg": true, 479 | "current": true, 480 | "hideEmpty": true, 481 | "hideZero": true, 482 | "max": true, 483 | "min": true, 484 | "rightSide": true, 485 | "show": false, 486 | "total": false, 487 | "values": true 488 | }, 489 | "lines": true, 490 | "linewidth": 1, 491 | "links": [], 492 | "nullPointMode": "null as zero", 493 | "options": {}, 494 | "percentage": false, 495 | "pointradius": 5, 496 | "points": false, 497 | "renderer": "flot", 498 | "seriesOverrides": [], 499 | "spaceLength": 10, 500 | "stack": false, 501 | "steppedLine": false, 502 | "targets": [ 503 | { 504 | "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_arp_entries{instance=~\"$node_id\"} * 100) by (node_name))", 505 | "format": "time_series", 506 | "intervalFactor": 2, 507 | "legendFormat": "{{usage}}", 508 | "refId": "A", 509 | "step": 2 510 | } 511 | ], 512 | "thresholds": [], 513 | "timeFrom": null, 514 | "timeRegions": [], 515 | "timeShift": null, 516 | "title": "CPU Usage by Node", 517 | "tooltip": { 518 | "shared": true, 519 | "sort": 2, 520 | "value_type": "individual" 521 | }, 522 | "type": "graph", 523 | "xaxis": { 524 | "buckets": null, 525 | "mode": "time", 526 | "name": null, 527 | "show": true, 528 | "values": [] 529 | }, 530 | "yaxes": [ 531 | { 532 | "format": "percent", 533 | "label": null, 534 | "logBase": 1, 535 | "max": "100", 536 | "min": null, 537 | "show": true 538 | }, 539 | { 540 | "format": "short", 541 | "label": null, 542 | "logBase": 1, 543 | "max": null, 544 | "min": null, 545 | "show": true 546 | } 547 | ], 548 | "yaxis": { 549 | "align": false, 550 | "alignLevel": null 551 | } 552 | }, 553 | { 554 | "cacheTimeout": null, 555 | "colorBackground": false, 556 | "colorValue": false, 557 | "colors": [ 558 | "rgba(245, 54, 54, 0.9)", 559 | "rgba(237, 129, 40, 0.89)", 560 | "rgba(50, 172, 45, 0.97)" 561 | ], 562 | "datasource": "Prometheus", 563 | "decimals": 1, 564 | "format": "decbytes", 565 | "gauge": { 566 | "maxValue": 100, 567 | "minValue": 0, 568 | "show": false, 569 | "thresholdLabels": false, 570 | "thresholdMarkers": true 571 | }, 572 | "gridPos": { 573 | "h": 4, 574 | "w": 6, 575 | "x": 0, 576 | "y": 11 577 | }, 578 | "hideTimeOverride": true, 579 | "id": 3, 580 | "interval": null, 581 | "links": [], 582 | "mappingType": 1, 583 | "mappingTypes": [ 584 | { 585 | "name": "value to text", 586 | "value": 1 587 | }, 588 | { 589 | "name": "range to text", 590 | "value": 2 591 | } 592 | ], 593 | "maxDataPoints": 100, 594 | "nullPointMode": "connected", 595 | "nullText": null, 596 | "options": {}, 597 | "postfix": "", 598 | "postfixFontSize": "50%", 599 | "prefix": "", 600 | "prefixFontSize": "50%", 601 | "rangeMaps": [ 602 | { 603 | "from": "null", 604 | "text": "N/A", 605 | "to": "null" 606 | } 607 | ], 608 | "sparkline": { 609 | "fillColor": "rgba(31, 118, 189, 0.18)", 610 | "full": false, 611 | "lineColor": "rgb(31, 120, 193)", 612 | "show": false 613 | }, 614 | "tableColumn": "", 615 | "targets": [ 616 | { 617 | "expr": "sum(node_memory_MemTotal_bytes * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"})", 618 | "format": "time_series", 619 | "intervalFactor": 2, 620 | "legendFormat": "", 621 | "refId": "A", 622 | "step": 20 623 | } 624 | ], 625 | "thresholds": "", 626 | "timeFrom": null, 627 | "timeShift": null, 628 | "title": "Total Memory", 629 | "type": "singlestat", 630 | "valueFontSize": "80%", 631 | "valueMaps": [ 632 | { 633 | "op": "=", 634 | "text": "N/A", 635 | "value": "null" 636 | } 637 | ], 638 | "valueName": "avg" 639 | }, 640 | { 641 | "cacheTimeout": null, 642 | "colorBackground": false, 643 | "colorValue": false, 644 | "colors": [ 645 | "rgba(245, 54, 54, 0.9)", 646 | "rgba(237, 129, 40, 0.89)", 647 | "rgba(50, 172, 45, 0.97)" 648 | ], 649 | "datasource": "Prometheus", 650 | "format": "percent", 651 | "gauge": { 652 | "maxValue": 100, 653 | "minValue": 0, 654 | "show": true, 655 | "thresholdLabels": false, 656 | "thresholdMarkers": true 657 | }, 658 | "gridPos": { 659 | "h": 4, 660 | "w": 6, 661 | "x": 6, 662 | "y": 11 663 | }, 664 | "id": 8, 665 | "interval": null, 666 | "links": [], 667 | "mappingType": 1, 668 | "mappingTypes": [ 669 | { 670 | "name": "value to text", 671 | "value": 1 672 | }, 673 | { 674 | "name": "range to text", 675 | "value": 2 676 | } 677 | ], 678 | "maxDataPoints": 100, 679 | "nullPointMode": "connected", 680 | "nullText": null, 681 | "options": {}, 682 | "postfix": "", 683 | "postfixFontSize": "50%", 684 | "prefix": "", 685 | "prefixFontSize": "50%", 686 | "rangeMaps": [ 687 | { 688 | "from": "null", 689 | "text": "N/A", 690 | "to": "null" 691 | } 692 | ], 693 | "sparkline": { 694 | "fillColor": "rgba(31, 118, 189, 0.18)", 695 | "full": false, 696 | "lineColor": "rgb(31, 120, 193)", 697 | "show": false 698 | }, 699 | "tableColumn": "", 700 | "targets": [ 701 | { 702 | "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"} * 100) / count(node_load1 * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"})", 703 | "format": "time_series", 704 | "intervalFactor": 2, 705 | "legendFormat": "", 706 | "refId": "A", 707 | "step": 20 708 | } 709 | ], 710 | "thresholds": "10,25,100", 711 | "title": "Available Memory", 712 | "type": "singlestat", 713 | "valueFontSize": "80%", 714 | "valueMaps": [ 715 | { 716 | "op": "=", 717 | "text": "N/A", 718 | "value": "null" 719 | } 720 | ], 721 | "valueName": "avg" 722 | }, 723 | { 724 | "cacheTimeout": null, 725 | "colorBackground": false, 726 | "colorValue": false, 727 | "colors": [ 728 | "rgba(245, 54, 54, 0.9)", 729 | "rgba(237, 129, 40, 0.89)", 730 | "rgba(50, 172, 45, 0.97)" 731 | ], 732 | "datasource": "Prometheus", 733 | "decimals": 1, 734 | "format": "decbytes", 735 | "gauge": { 736 | "maxValue": 100, 737 | "minValue": 0, 738 | "show": false, 739 | "thresholdLabels": false, 740 | "thresholdMarkers": true 741 | }, 742 | "gridPos": { 743 | "h": 4, 744 | "w": 6, 745 | "x": 12, 746 | "y": 11 747 | }, 748 | "hideTimeOverride": true, 749 | "id": 9, 750 | "interval": null, 751 | "links": [], 752 | "mappingType": 1, 753 | "mappingTypes": [ 754 | { 755 | "name": "value to text", 756 | "value": 1 757 | }, 758 | { 759 | "name": "range to text", 760 | "value": 2 761 | } 762 | ], 763 | "maxDataPoints": 100, 764 | "nullPointMode": "connected", 765 | "nullText": null, 766 | "options": {}, 767 | "postfix": "", 768 | "postfixFontSize": "50%", 769 | "prefix": "", 770 | "prefixFontSize": "50%", 771 | "rangeMaps": [ 772 | { 773 | "from": "null", 774 | "text": "N/A", 775 | "to": "null" 776 | } 777 | ], 778 | "sparkline": { 779 | "fillColor": "rgba(31, 118, 189, 0.18)", 780 | "full": false, 781 | "lineColor": "rgb(31, 120, 193)", 782 | "show": false 783 | }, 784 | "tableColumn": "", 785 | "targets": [ 786 | { 787 | "expr": "sum(node_filesystem_size_bytes{mountpoint=\"/\"} * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"})", 788 | "format": "time_series", 789 | "intervalFactor": 2, 790 | "legendFormat": "", 791 | "refId": "A", 792 | "step": 20 793 | } 794 | ], 795 | "thresholds": "", 796 | "timeFrom": null, 797 | "timeShift": null, 798 | "title": "Total Disk Space", 799 | "type": "singlestat", 800 | "valueFontSize": "80%", 801 | "valueMaps": [ 802 | { 803 | "op": "=", 804 | "text": "N/A", 805 | "value": "null" 806 | } 807 | ], 808 | "valueName": "avg" 809 | }, 810 | { 811 | "cacheTimeout": null, 812 | "colorBackground": false, 813 | "colorValue": false, 814 | "colors": [ 815 | "rgba(245, 54, 54, 0.9)", 816 | "rgba(237, 129, 40, 0.89)", 817 | "rgba(50, 172, 45, 0.97)" 818 | ], 819 | "datasource": "Prometheus", 820 | "format": "percent", 821 | "gauge": { 822 | "maxValue": 100, 823 | "minValue": 0, 824 | "show": true, 825 | "thresholdLabels": false, 826 | "thresholdMarkers": true 827 | }, 828 | "gridPos": { 829 | "h": 4, 830 | "w": 6, 831 | "x": 18, 832 | "y": 11 833 | }, 834 | "id": 10, 835 | "interval": null, 836 | "links": [], 837 | "mappingType": 1, 838 | "mappingTypes": [ 839 | { 840 | "name": "value to text", 841 | "value": 1 842 | }, 843 | { 844 | "name": "range to text", 845 | "value": 2 846 | } 847 | ], 848 | "maxDataPoints": 100, 849 | "nullPointMode": "connected", 850 | "nullText": null, 851 | "options": {}, 852 | "postfix": "", 853 | "postfixFontSize": "50%", 854 | "prefix": "", 855 | "prefixFontSize": "50%", 856 | "rangeMaps": [ 857 | { 858 | "from": "null", 859 | "text": "N/A", 860 | "to": "null" 861 | } 862 | ], 863 | "sparkline": { 864 | "fillColor": "rgba(31, 118, 189, 0.18)", 865 | "full": false, 866 | "lineColor": "rgb(31, 120, 193)", 867 | "show": false 868 | }, 869 | "tableColumn": "", 870 | "targets": [ 871 | { 872 | "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"}) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"} * 100) / count(node_load1 * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"})", 873 | "format": "time_series", 874 | "intervalFactor": 2, 875 | "legendFormat": "", 876 | "refId": "A", 877 | "step": 20 878 | } 879 | ], 880 | "thresholds": "10,25,100", 881 | "title": "Available Disk Space", 882 | "type": "singlestat", 883 | "valueFontSize": "80%", 884 | "valueMaps": [ 885 | { 886 | "op": "=", 887 | "text": "N/A", 888 | "value": "null" 889 | } 890 | ], 891 | "valueName": "avg" 892 | }, 893 | { 894 | "aliasColors": {}, 895 | "bars": false, 896 | "dashLength": 10, 897 | "dashes": false, 898 | "datasource": "Prometheus", 899 | "fill": 1, 900 | "gridPos": { 901 | "h": 7, 902 | "w": 24, 903 | "x": 0, 904 | "y": 15 905 | }, 906 | "id": 15, 907 | "legend": { 908 | "alignAsTable": true, 909 | "avg": true, 910 | "current": false, 911 | "max": true, 912 | "min": true, 913 | "rightSide": true, 914 | "show": true, 915 | "total": false, 916 | "values": true 917 | }, 918 | "lines": true, 919 | "linewidth": 1, 920 | "links": [], 921 | "nullPointMode": "null", 922 | "options": {}, 923 | "percentage": false, 924 | "pointradius": 5, 925 | "points": false, 926 | "renderer": "flot", 927 | "seriesOverrides": [], 928 | "spaceLength": 10, 929 | "stack": true, 930 | "steppedLine": false, 931 | "targets": [ 932 | { 933 | "expr": "sum((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_Slab_bytes) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) by (node_name)", 934 | "format": "time_series", 935 | "intervalFactor": 2, 936 | "legendFormat": "Used {{instance}}", 937 | "refId": "A", 938 | "step": 2 939 | }, 940 | { 941 | "expr": "sum(node_memory_Cached_bytes * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) by (node_name)", 942 | "format": "time_series", 943 | "intervalFactor": 2, 944 | "legendFormat": "Cached {{instance}}", 945 | "refId": "B", 946 | "step": 2 947 | } 948 | ], 949 | "thresholds": [], 950 | "timeFrom": null, 951 | "timeRegions": [], 952 | "timeShift": null, 953 | "title": "Memory usage by Node", 954 | "tooltip": { 955 | "shared": true, 956 | "sort": 0, 957 | "value_type": "individual" 958 | }, 959 | "type": "graph", 960 | "xaxis": { 961 | "buckets": null, 962 | "mode": "time", 963 | "name": null, 964 | "show": true, 965 | "values": [] 966 | }, 967 | "yaxes": [ 968 | { 969 | "format": "decbytes", 970 | "label": null, 971 | "logBase": 1, 972 | "max": null, 973 | "min": null, 974 | "show": true 975 | }, 976 | { 977 | "format": "short", 978 | "label": null, 979 | "logBase": 1, 980 | "max": null, 981 | "min": null, 982 | "show": true 983 | } 984 | ], 985 | "yaxis": { 986 | "align": false, 987 | "alignLevel": null 988 | } 989 | }, 990 | { 991 | "aliasColors": {}, 992 | "bars": false, 993 | "dashLength": 10, 994 | "dashes": false, 995 | "datasource": "Prometheus", 996 | "decimals": 2, 997 | "fill": 1, 998 | "gridPos": { 999 | "h": 7, 1000 | "w": 24, 1001 | "x": 0, 1002 | "y": 22 1003 | }, 1004 | "id": 16, 1005 | "legend": { 1006 | "alignAsTable": true, 1007 | "avg": true, 1008 | "current": false, 1009 | "max": true, 1010 | "min": true, 1011 | "rightSide": true, 1012 | "show": true, 1013 | "total": false, 1014 | "values": true 1015 | }, 1016 | "lines": true, 1017 | "linewidth": 1, 1018 | "links": [], 1019 | "nullPointMode": "null as zero", 1020 | "options": {}, 1021 | "percentage": false, 1022 | "pointradius": 5, 1023 | "points": false, 1024 | "renderer": "flot", 1025 | "seriesOverrides": [], 1026 | "spaceLength": 10, 1027 | "stack": false, 1028 | "steppedLine": false, 1029 | "targets": [ 1030 | { 1031 | "expr": "sum(irate(node_disk_read_bytes_total[$interval]) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) by (node_name)", 1032 | "format": "time_series", 1033 | "intervalFactor": 2, 1034 | "legendFormat": "Read {{node_name}}", 1035 | "refId": "A", 1036 | "step": 2 1037 | }, 1038 | { 1039 | "expr": "sum(irate(node_disk_written_bytes_total[$interval]) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) by (node_name)", 1040 | "format": "time_series", 1041 | "intervalFactor": 2, 1042 | "legendFormat": "Written {{node_name}}", 1043 | "refId": "B", 1044 | "step": 2 1045 | } 1046 | ], 1047 | "thresholds": [], 1048 | "timeFrom": null, 1049 | "timeRegions": [], 1050 | "timeShift": null, 1051 | "title": "Disk I/O by Node", 1052 | "tooltip": { 1053 | "shared": true, 1054 | "sort": 0, 1055 | "value_type": "individual" 1056 | }, 1057 | "type": "graph", 1058 | "xaxis": { 1059 | "buckets": null, 1060 | "mode": "time", 1061 | "name": null, 1062 | "show": true, 1063 | "values": [] 1064 | }, 1065 | "yaxes": [ 1066 | { 1067 | "format": "Bps", 1068 | "label": null, 1069 | "logBase": 1, 1070 | "max": null, 1071 | "min": null, 1072 | "show": true 1073 | }, 1074 | { 1075 | "format": "short", 1076 | "label": null, 1077 | "logBase": 1, 1078 | "max": null, 1079 | "min": null, 1080 | "show": true 1081 | } 1082 | ], 1083 | "yaxis": { 1084 | "align": false, 1085 | "alignLevel": null 1086 | } 1087 | }, 1088 | { 1089 | "aliasColors": {}, 1090 | "bars": false, 1091 | "dashLength": 10, 1092 | "dashes": false, 1093 | "datasource": "Prometheus", 1094 | "decimals": 2, 1095 | "fill": 1, 1096 | "gridPos": { 1097 | "h": 7, 1098 | "w": 12, 1099 | "x": 0, 1100 | "y": 29 1101 | }, 1102 | "id": 18, 1103 | "legend": { 1104 | "alignAsTable": true, 1105 | "avg": true, 1106 | "current": true, 1107 | "max": true, 1108 | "min": true, 1109 | "rightSide": true, 1110 | "show": false, 1111 | "total": false, 1112 | "values": true 1113 | }, 1114 | "lines": true, 1115 | "linewidth": 1, 1116 | "links": [], 1117 | "nullPointMode": "null as zero", 1118 | "options": {}, 1119 | "percentage": false, 1120 | "pointradius": 5, 1121 | "points": false, 1122 | "renderer": "flot", 1123 | "seriesOverrides": [], 1124 | "spaceLength": 10, 1125 | "stack": false, 1126 | "steppedLine": false, 1127 | "targets": [ 1128 | { 1129 | "expr": "sum(irate(node_disk_reads_completed_total[$interval]) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) by (node_name)", 1130 | "format": "time_series", 1131 | "intervalFactor": 2, 1132 | "legendFormat": "Reads {{node_name}}", 1133 | "refId": "A", 1134 | "step": 2 1135 | }, 1136 | { 1137 | "expr": "sum(irate(node_disk_writes_completed_total[$interval]) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"}) by (node_name)", 1138 | "format": "time_series", 1139 | "intervalFactor": 2, 1140 | "legendFormat": "Writes {{node_name}}", 1141 | "refId": "B", 1142 | "step": 2 1143 | } 1144 | ], 1145 | "thresholds": [], 1146 | "timeFrom": null, 1147 | "timeRegions": [], 1148 | "timeShift": null, 1149 | "title": "IOPS by Node", 1150 | "tooltip": { 1151 | "shared": true, 1152 | "sort": 0, 1153 | "value_type": "individual" 1154 | }, 1155 | "type": "graph", 1156 | "xaxis": { 1157 | "buckets": null, 1158 | "mode": "time", 1159 | "name": null, 1160 | "show": true, 1161 | "values": [] 1162 | }, 1163 | "yaxes": [ 1164 | { 1165 | "format": "short", 1166 | "label": null, 1167 | "logBase": 1, 1168 | "max": null, 1169 | "min": null, 1170 | "show": true 1171 | }, 1172 | { 1173 | "format": "short", 1174 | "label": null, 1175 | "logBase": 1, 1176 | "max": null, 1177 | "min": null, 1178 | "show": true 1179 | } 1180 | ], 1181 | "yaxis": { 1182 | "align": false, 1183 | "alignLevel": null 1184 | } 1185 | }, 1186 | { 1187 | "aliasColors": {}, 1188 | "bars": false, 1189 | "dashLength": 10, 1190 | "dashes": false, 1191 | "datasource": "Prometheus", 1192 | "decimals": 2, 1193 | "fill": 1, 1194 | "gridPos": { 1195 | "h": 7, 1196 | "w": 12, 1197 | "x": 12, 1198 | "y": 29 1199 | }, 1200 | "id": 19, 1201 | "legend": { 1202 | "alignAsTable": true, 1203 | "avg": true, 1204 | "current": true, 1205 | "hideEmpty": true, 1206 | "hideZero": true, 1207 | "max": true, 1208 | "min": true, 1209 | "rightSide": true, 1210 | "show": false, 1211 | "total": false, 1212 | "values": true 1213 | }, 1214 | "lines": true, 1215 | "linewidth": 1, 1216 | "links": [], 1217 | "nullPointMode": "null as zero", 1218 | "options": {}, 1219 | "percentage": false, 1220 | "pointradius": 5, 1221 | "points": false, 1222 | "renderer": "flot", 1223 | "seriesOverrides": [], 1224 | "spaceLength": 10, 1225 | "stack": false, 1226 | "steppedLine": false, 1227 | "targets": [ 1228 | { 1229 | "expr": "(avg(irate(node_cpu_seconds_total{mode=\"iowait\"}[$interval]) * on(instance) group_left(node_name) node_load1{instance=~\"$node_id\"} * 100) by (node_name))", 1230 | "format": "time_series", 1231 | "intervalFactor": 2, 1232 | "legendFormat": "{{node_name}}", 1233 | "refId": "A", 1234 | "step": 2 1235 | } 1236 | ], 1237 | "thresholds": [], 1238 | "timeFrom": null, 1239 | "timeRegions": [], 1240 | "timeShift": null, 1241 | "title": "CPU IO Wait by Node", 1242 | "tooltip": { 1243 | "shared": true, 1244 | "sort": 2, 1245 | "value_type": "individual" 1246 | }, 1247 | "type": "graph", 1248 | "xaxis": { 1249 | "buckets": null, 1250 | "mode": "time", 1251 | "name": null, 1252 | "show": true, 1253 | "values": [] 1254 | }, 1255 | "yaxes": [ 1256 | { 1257 | "format": "percent", 1258 | "label": null, 1259 | "logBase": 1, 1260 | "max": null, 1261 | "min": null, 1262 | "show": true 1263 | }, 1264 | { 1265 | "format": "short", 1266 | "label": null, 1267 | "logBase": 1, 1268 | "max": null, 1269 | "min": null, 1270 | "show": true 1271 | } 1272 | ], 1273 | "yaxis": { 1274 | "align": false, 1275 | "alignLevel": null 1276 | } 1277 | }, 1278 | { 1279 | "aliasColors": {}, 1280 | "bars": false, 1281 | "dashLength": 10, 1282 | "dashes": false, 1283 | "datasource": "Prometheus", 1284 | "decimals": 0, 1285 | "fill": 3, 1286 | "gridPos": { 1287 | "h": 6, 1288 | "w": 24, 1289 | "x": 0, 1290 | "y": 36 1291 | }, 1292 | "id": 12, 1293 | "legend": { 1294 | "alignAsTable": true, 1295 | "avg": false, 1296 | "current": true, 1297 | "hideEmpty": true, 1298 | "hideZero": true, 1299 | "max": false, 1300 | "min": false, 1301 | "rightSide": true, 1302 | "show": true, 1303 | "sort": "current", 1304 | "sortDesc": true, 1305 | "total": false, 1306 | "values": true 1307 | }, 1308 | "lines": true, 1309 | "linewidth": 1, 1310 | "links": [], 1311 | "nullPointMode": "null", 1312 | "options": {}, 1313 | "percentage": false, 1314 | "pointradius": 5, 1315 | "points": false, 1316 | "renderer": "flot", 1317 | "seriesOverrides": [], 1318 | "spaceLength": 10, 1319 | "stack": true, 1320 | "steppedLine": false, 1321 | "targets": [ 1322 | { 1323 | "expr": "sum(rate(container_last_seen[5m]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_load1{instance=~\"${node_id}\"}) by (container_label_com_docker_swarm_service_name)", 1324 | "format": "time_series", 1325 | "hide": false, 1326 | "intervalFactor": 10, 1327 | "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", 1328 | "refId": "A", 1329 | "step": 10 1330 | } 1331 | ], 1332 | "thresholds": [], 1333 | "timeFrom": null, 1334 | "timeRegions": [], 1335 | "timeShift": null, 1336 | "title": "Running Containers by Service", 1337 | "tooltip": { 1338 | "shared": true, 1339 | "sort": 2, 1340 | "value_type": "individual" 1341 | }, 1342 | "type": "graph", 1343 | "xaxis": { 1344 | "buckets": null, 1345 | "mode": "time", 1346 | "name": null, 1347 | "show": true, 1348 | "values": [] 1349 | }, 1350 | "yaxes": [ 1351 | { 1352 | "format": "short", 1353 | "label": null, 1354 | "logBase": 1, 1355 | "max": null, 1356 | "min": null, 1357 | "show": true 1358 | }, 1359 | { 1360 | "format": "short", 1361 | "label": null, 1362 | "logBase": 1, 1363 | "max": null, 1364 | "min": null, 1365 | "show": true 1366 | } 1367 | ], 1368 | "yaxis": { 1369 | "align": false, 1370 | "alignLevel": null 1371 | } 1372 | }, 1373 | { 1374 | "aliasColors": {}, 1375 | "bars": false, 1376 | "dashLength": 10, 1377 | "dashes": false, 1378 | "datasource": "Prometheus", 1379 | "fill": 1, 1380 | "gridPos": { 1381 | "h": 7, 1382 | "w": 24, 1383 | "x": 0, 1384 | "y": 42 1385 | }, 1386 | "id": 17, 1387 | "legend": { 1388 | "alignAsTable": true, 1389 | "avg": true, 1390 | "current": false, 1391 | "max": true, 1392 | "min": true, 1393 | "rightSide": true, 1394 | "show": true, 1395 | "total": false, 1396 | "values": true 1397 | }, 1398 | "lines": true, 1399 | "linewidth": 1, 1400 | "links": [], 1401 | "nullPointMode": "null", 1402 | "options": {}, 1403 | "percentage": false, 1404 | "pointradius": 5, 1405 | "points": false, 1406 | "renderer": "flot", 1407 | "seriesOverrides": [], 1408 | "spaceLength": 10, 1409 | "stack": false, 1410 | "steppedLine": false, 1411 | "targets": [ 1412 | { 1413 | "expr": "sum(rate(container_network_receive_bytes_total[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_load1{instance=~\"${node_id}\"}) by (node_name)", 1414 | "format": "time_series", 1415 | "intervalFactor": 2, 1416 | "legendFormat": "IN {{node_name}}", 1417 | "refId": "A", 1418 | "step": 2 1419 | }, 1420 | { 1421 | "expr": "- sum(rate(container_network_transmit_bytes_total[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_load1{instance=~\"${node_id}\"}) by (node_name)", 1422 | "format": "time_series", 1423 | "hide": false, 1424 | "intervalFactor": 2, 1425 | "legendFormat": "OUT {{node_name}}", 1426 | "metric": "", 1427 | "refId": "B", 1428 | "step": 2 1429 | } 1430 | ], 1431 | "thresholds": [], 1432 | "timeFrom": null, 1433 | "timeRegions": [], 1434 | "timeShift": null, 1435 | "title": "Containers Network Traffic by Node", 1436 | "tooltip": { 1437 | "shared": true, 1438 | "sort": 0, 1439 | "value_type": "individual" 1440 | }, 1441 | "type": "graph", 1442 | "xaxis": { 1443 | "buckets": null, 1444 | "mode": "time", 1445 | "name": null, 1446 | "show": true, 1447 | "values": [] 1448 | }, 1449 | "yaxes": [ 1450 | { 1451 | "format": "Bps", 1452 | "label": null, 1453 | "logBase": 1, 1454 | "max": null, 1455 | "min": null, 1456 | "show": true 1457 | }, 1458 | { 1459 | "format": "short", 1460 | "label": null, 1461 | "logBase": 1, 1462 | "max": null, 1463 | "min": null, 1464 | "show": true 1465 | } 1466 | ], 1467 | "yaxis": { 1468 | "align": false, 1469 | "alignLevel": null 1470 | } 1471 | }, 1472 | { 1473 | "cacheTimeout": null, 1474 | "colorBackground": false, 1475 | "colorValue": false, 1476 | "colors": [ 1477 | "rgba(245, 54, 54, 0.9)", 1478 | "rgba(237, 129, 40, 0.89)", 1479 | "rgba(50, 172, 45, 0.97)" 1480 | ], 1481 | "datasource": "Prometheus", 1482 | "format": "none", 1483 | "gauge": { 1484 | "maxValue": 100, 1485 | "minValue": 0, 1486 | "show": false, 1487 | "thresholdLabels": false, 1488 | "thresholdMarkers": true 1489 | }, 1490 | "gridPos": { 1491 | "h": 7, 1492 | "w": 6, 1493 | "x": 0, 1494 | "y": 49 1495 | }, 1496 | "id": 7, 1497 | "interval": null, 1498 | "links": [], 1499 | "mappingType": 1, 1500 | "mappingTypes": [ 1501 | { 1502 | "name": "value to text", 1503 | "value": 1 1504 | }, 1505 | { 1506 | "name": "range to text", 1507 | "value": 2 1508 | } 1509 | ], 1510 | "maxDataPoints": 100, 1511 | "nullPointMode": "connected", 1512 | "nullText": null, 1513 | "options": {}, 1514 | "postfix": "", 1515 | "postfixFontSize": "50%", 1516 | "prefix": "", 1517 | "prefixFontSize": "50%", 1518 | "rangeMaps": [ 1519 | { 1520 | "from": "null", 1521 | "text": "N/A", 1522 | "to": "null" 1523 | } 1524 | ], 1525 | "sparkline": { 1526 | "fillColor": "rgba(31, 118, 189, 0.18)", 1527 | "full": false, 1528 | "lineColor": "rgb(31, 120, 193)", 1529 | "show": true 1530 | }, 1531 | "tableColumn": "", 1532 | "targets": [ 1533 | { 1534 | "expr": "count(rate(container_last_seen[5m]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_load1{instance=~\"$node_id\"})", 1535 | "format": "time_series", 1536 | "intervalFactor": 2, 1537 | "refId": "A", 1538 | "step": 20 1539 | } 1540 | ], 1541 | "thresholds": "", 1542 | "title": "Total Containers", 1543 | "type": "singlestat", 1544 | "valueFontSize": "80%", 1545 | "valueMaps": [ 1546 | { 1547 | "op": "=", 1548 | "text": "N/A", 1549 | "value": "null" 1550 | } 1551 | ], 1552 | "valueName": "avg" 1553 | }, 1554 | { 1555 | "cacheTimeout": null, 1556 | "colorBackground": false, 1557 | "colorValue": false, 1558 | "colors": [ 1559 | "rgba(245, 54, 54, 0.9)", 1560 | "rgba(237, 129, 40, 0.89)", 1561 | "rgba(50, 172, 45, 0.97)" 1562 | ], 1563 | "datasource": "Prometheus", 1564 | "format": "none", 1565 | "gauge": { 1566 | "maxValue": 100, 1567 | "minValue": 0, 1568 | "show": false, 1569 | "thresholdLabels": false, 1570 | "thresholdMarkers": true 1571 | }, 1572 | "gridPos": { 1573 | "h": 7, 1574 | "w": 6, 1575 | "x": 18, 1576 | "y": 49 1577 | }, 1578 | "id": 21, 1579 | "interval": null, 1580 | "links": [], 1581 | "mappingType": 1, 1582 | "mappingTypes": [ 1583 | { 1584 | "name": "value to text", 1585 | "value": 1 1586 | }, 1587 | { 1588 | "name": "range to text", 1589 | "value": 2 1590 | } 1591 | ], 1592 | "maxDataPoints": 100, 1593 | "nullPointMode": "connected", 1594 | "nullText": null, 1595 | "options": {}, 1596 | "postfix": "", 1597 | "postfixFontSize": "50%", 1598 | "prefix": "", 1599 | "prefixFontSize": "50%", 1600 | "rangeMaps": [ 1601 | { 1602 | "from": "null", 1603 | "text": "N/A", 1604 | "to": "null" 1605 | } 1606 | ], 1607 | "sparkline": { 1608 | "fillColor": "rgba(31, 118, 189, 0.18)", 1609 | "full": false, 1610 | "lineColor": "rgb(31, 120, 193)", 1611 | "show": true 1612 | }, 1613 | "tableColumn": "", 1614 | "targets": [ 1615 | { 1616 | "expr": "count(container_last_seen{container_label_com_docker_swarm_node_id!=\"\"})", 1617 | "format": "time_series", 1618 | "intervalFactor": 2, 1619 | "refId": "A", 1620 | "step": 20 1621 | } 1622 | ], 1623 | "thresholds": "", 1624 | "title": "Running Containers", 1625 | "type": "singlestat", 1626 | "valueFontSize": "80%", 1627 | "valueMaps": [ 1628 | { 1629 | "op": "=", 1630 | "text": "N/A", 1631 | "value": "null" 1632 | } 1633 | ], 1634 | "valueName": "avg" 1635 | }, 1636 | { 1637 | "columns": [], 1638 | "datasource": "Prometheus", 1639 | "fontSize": "100%", 1640 | "gridPos": { 1641 | "h": 7, 1642 | "w": 24, 1643 | "x": 0, 1644 | "y": 56 1645 | }, 1646 | "hideTimeOverride": true, 1647 | "id": 20, 1648 | "links": [], 1649 | "options": {}, 1650 | "pageSize": null, 1651 | "scroll": true, 1652 | "showHeader": true, 1653 | "sort": { 1654 | "col": 0, 1655 | "desc": true 1656 | }, 1657 | "styles": [ 1658 | { 1659 | "alias": "Time", 1660 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1661 | "pattern": "Time", 1662 | "type": "date" 1663 | }, 1664 | { 1665 | "alias": "", 1666 | "colorMode": null, 1667 | "colors": [ 1668 | "rgba(245, 54, 54, 0.9)", 1669 | "rgba(237, 129, 40, 0.89)", 1670 | "rgba(50, 172, 45, 0.97)" 1671 | ], 1672 | "decimals": 2, 1673 | "pattern": "/.*/", 1674 | "thresholds": [], 1675 | "type": "number", 1676 | "unit": "short" 1677 | } 1678 | ], 1679 | "targets": [ 1680 | { 1681 | "expr": "sum(node_load1) by (node_id, node_name, instance)", 1682 | "format": "table", 1683 | "instant": false, 1684 | "intervalFactor": 2, 1685 | "refId": "A", 1686 | "step": 2 1687 | } 1688 | ], 1689 | "timeFrom": "1s", 1690 | "title": "Cluster members", 1691 | "transform": "table", 1692 | "type": "table" 1693 | } 1694 | ], 1695 | "refresh": "30s", 1696 | "schemaVersion": 18, 1697 | "style": "dark", 1698 | "tags": [ 1699 | "swarm", 1700 | "nodes" 1701 | ], 1702 | "templating": { 1703 | "list": [ 1704 | { 1705 | "allValue": ".+", 1706 | "current": { 1707 | "text": "All", 1708 | "value": "$__all" 1709 | }, 1710 | "datasource": "Prometheus", 1711 | "definition": "label_values(node_arp_entries,instance)", 1712 | "hide": 0, 1713 | "includeAll": true, 1714 | "label": "Swarm Node", 1715 | "multi": false, 1716 | "name": "node_id", 1717 | "options": [], 1718 | "query": "label_values(node_arp_entries,instance)", 1719 | "refresh": 1, 1720 | "regex": "", 1721 | "skipUrlSync": false, 1722 | "sort": 0, 1723 | "tagValuesQuery": "label_values({node_id=\"$tag\"},node_name)", 1724 | "tags": [], 1725 | "tagsQuery": "label_values(node_meta, node_name)", 1726 | "type": "query", 1727 | "useTags": false 1728 | }, 1729 | { 1730 | "auto": true, 1731 | "auto_count": 30, 1732 | "auto_min": "30s", 1733 | "current": { 1734 | "text": "auto", 1735 | "value": "$__auto_interval_interval" 1736 | }, 1737 | "hide": 0, 1738 | "label": "Interval", 1739 | "name": "interval", 1740 | "options": [ 1741 | { 1742 | "selected": true, 1743 | "text": "auto", 1744 | "value": "$__auto_interval_interval" 1745 | }, 1746 | { 1747 | "selected": false, 1748 | "text": "1m", 1749 | "value": "1m" 1750 | }, 1751 | { 1752 | "selected": false, 1753 | "text": "10m", 1754 | "value": "10m" 1755 | }, 1756 | { 1757 | "selected": false, 1758 | "text": "30m", 1759 | "value": "30m" 1760 | }, 1761 | { 1762 | "selected": false, 1763 | "text": "1h", 1764 | "value": "1h" 1765 | }, 1766 | { 1767 | "selected": false, 1768 | "text": "6h", 1769 | "value": "6h" 1770 | }, 1771 | { 1772 | "selected": false, 1773 | "text": "12h", 1774 | "value": "12h" 1775 | }, 1776 | { 1777 | "selected": false, 1778 | "text": "1d", 1779 | "value": "1d" 1780 | }, 1781 | { 1782 | "selected": false, 1783 | "text": "7d", 1784 | "value": "7d" 1785 | }, 1786 | { 1787 | "selected": false, 1788 | "text": "14d", 1789 | "value": "14d" 1790 | }, 1791 | { 1792 | "selected": false, 1793 | "text": "30d", 1794 | "value": "30d" 1795 | } 1796 | ], 1797 | "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 1798 | "refresh": 2, 1799 | "skipUrlSync": false, 1800 | "type": "interval" 1801 | } 1802 | ] 1803 | }, 1804 | "time": { 1805 | "from": "now-15m", 1806 | "to": "now" 1807 | }, 1808 | "timepicker": { 1809 | "refresh_intervals": [ 1810 | "5s", 1811 | "10s", 1812 | "30s", 1813 | "1m", 1814 | "5m", 1815 | "15m", 1816 | "30m", 1817 | "1h", 1818 | "2h", 1819 | "1d" 1820 | ], 1821 | "time_options": [ 1822 | "5m", 1823 | "15m", 1824 | "1h", 1825 | "6h", 1826 | "12h", 1827 | "24h", 1828 | "2d", 1829 | "7d", 1830 | "30d" 1831 | ] 1832 | }, 1833 | "timezone": "", 1834 | "title": "Docker Swarm Nodes Dashboard", 1835 | "uid": "BPlb-Sgik", 1836 | "version": 7 1837 | } 1838 | -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/TraefikRealTime.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "description": "Visualize Traefik Metrics", 16 | "editable": true, 17 | "gnetId": 2870, 18 | "graphTooltip": 0, 19 | "id": 4, 20 | "links": [], 21 | "panels": [ 22 | { 23 | "cacheTimeout": null, 24 | "colorBackground": false, 25 | "colorValue": false, 26 | "colors": [ 27 | "rgba(50, 172, 45, 0.97)", 28 | "rgba(237, 129, 40, 0.89)", 29 | "rgba(245, 54, 54, 0.9)" 30 | ], 31 | "datasource": "Prometheus", 32 | "format": "none", 33 | "gauge": { 34 | "maxValue": 100, 35 | "minValue": 0, 36 | "show": false, 37 | "thresholdLabels": false, 38 | "thresholdMarkers": true 39 | }, 40 | "gridPos": { 41 | "h": 6, 42 | "w": 8, 43 | "x": 0, 44 | "y": 0 45 | }, 46 | "id": 10, 47 | "interval": null, 48 | "links": [], 49 | "mappingType": 1, 50 | "mappingTypes": [ 51 | { 52 | "name": "value to text", 53 | "value": 1 54 | }, 55 | { 56 | "name": "range to text", 57 | "value": 2 58 | } 59 | ], 60 | "maxDataPoints": 100, 61 | "nullPointMode": "connected", 62 | "nullText": null, 63 | "options": {}, 64 | "postfix": "", 65 | "postfixFontSize": "50%", 66 | "prefix": "", 67 | "prefixFontSize": "50%", 68 | "rangeMaps": [ 69 | { 70 | "from": "null", 71 | "text": "N/A", 72 | "to": "null" 73 | } 74 | ], 75 | "sparkline": { 76 | "fillColor": "rgba(31, 118, 189, 0.18)", 77 | "full": false, 78 | "lineColor": "rgb(31, 120, 193)", 79 | "show": false 80 | }, 81 | "tableColumn": "", 82 | "targets": [ 83 | { 84 | "expr": "sum(traefik_backend_request_duration_seconds_sum) / sum(traefik_backend_requests_total) * 1000", 85 | "format": "time_series", 86 | "intervalFactor": 2, 87 | "legendFormat": "", 88 | "refId": "A", 89 | "step": 60 90 | } 91 | ], 92 | "thresholds": "10,14", 93 | "title": "Average Backend Response Time (ms)", 94 | "type": "singlestat", 95 | "valueFontSize": "80%", 96 | "valueMaps": [ 97 | { 98 | "op": "=", 99 | "text": "N/A", 100 | "value": "null" 101 | } 102 | ], 103 | "valueName": "avg" 104 | }, 105 | { 106 | "cacheTimeout": null, 107 | "colorBackground": false, 108 | "colorValue": false, 109 | "colors": [ 110 | "rgba(245, 54, 54, 0.9)", 111 | "rgba(237, 129, 40, 0.89)", 112 | "rgba(50, 172, 45, 0.97)" 113 | ], 114 | "datasource": "Prometheus", 115 | "decimals": 0, 116 | "format": "s", 117 | "gauge": { 118 | "maxValue": 100, 119 | "minValue": 0, 120 | "show": false, 121 | "thresholdLabels": false, 122 | "thresholdMarkers": true 123 | }, 124 | "gridPos": { 125 | "h": 6, 126 | "w": 8, 127 | "x": 8, 128 | "y": 0 129 | }, 130 | "id": 3, 131 | "interval": null, 132 | "links": [], 133 | "mappingType": 1, 134 | "mappingTypes": [ 135 | { 136 | "name": "value to text", 137 | "value": 1 138 | }, 139 | { 140 | "name": "range to text", 141 | "value": 2 142 | } 143 | ], 144 | "maxDataPoints": 100, 145 | "nullPointMode": "connected", 146 | "nullText": null, 147 | "options": {}, 148 | "postfix": "", 149 | "postfixFontSize": "50%", 150 | "prefix": "", 151 | "prefixFontSize": "50%", 152 | "rangeMaps": [ 153 | { 154 | "from": "null", 155 | "text": "N/A", 156 | "to": "null" 157 | } 158 | ], 159 | "sparkline": { 160 | "fillColor": "rgba(31, 118, 189, 0.18)", 161 | "full": false, 162 | "lineColor": "rgb(31, 120, 193)", 163 | "show": false 164 | }, 165 | "tableColumn": "", 166 | "targets": [ 167 | { 168 | "expr": "time() - process_start_time_seconds{job=\"traefik\"}", 169 | "format": "time_series", 170 | "intervalFactor": 2, 171 | "legendFormat": "", 172 | "refId": "A", 173 | "step": 60 174 | } 175 | ], 176 | "thresholds": "", 177 | "title": "Uptime", 178 | "type": "singlestat", 179 | "valueFontSize": "80%", 180 | "valueMaps": [ 181 | { 182 | "op": "=", 183 | "text": "N/A", 184 | "value": "null" 185 | } 186 | ], 187 | "valueName": "current" 188 | }, 189 | { 190 | "cacheTimeout": null, 191 | "colorBackground": false, 192 | "colorValue": true, 193 | "colors": [ 194 | "rgba(50, 172, 45, 0.97)", 195 | "rgba(26, 206, 22, 0.89)", 196 | "rgba(245, 54, 54, 0.9)" 197 | ], 198 | "datasource": "Prometheus", 199 | "format": "none", 200 | "gauge": { 201 | "maxValue": 100, 202 | "minValue": 0, 203 | "show": false, 204 | "thresholdLabels": false, 205 | "thresholdMarkers": true 206 | }, 207 | "gridPos": { 208 | "h": 6, 209 | "w": 8, 210 | "x": 16, 211 | "y": 0 212 | }, 213 | "id": 9, 214 | "interval": null, 215 | "links": [], 216 | "mappingType": 1, 217 | "mappingTypes": [ 218 | { 219 | "name": "value to text", 220 | "value": 1 221 | }, 222 | { 223 | "name": "range to text", 224 | "value": 2 225 | } 226 | ], 227 | "maxDataPoints": 100, 228 | "nullPointMode": "connected", 229 | "nullText": null, 230 | "options": {}, 231 | "postfix": "", 232 | "postfixFontSize": "50%", 233 | "prefix": "", 234 | "prefixFontSize": "50%", 235 | "rangeMaps": [ 236 | { 237 | "from": "null", 238 | "text": "N/A", 239 | "to": "null" 240 | } 241 | ], 242 | "sparkline": { 243 | "fillColor": "rgba(31, 118, 189, 0.18)", 244 | "full": false, 245 | "lineColor": "rgb(31, 120, 193)", 246 | "show": false 247 | }, 248 | "tableColumn": "", 249 | "targets": [ 250 | { 251 | "expr": "sum(rate(traefik_backend_requests_total{code=\"404\",method=\"GET\",protocol=\"http\"}[5m])) * 1000", 252 | "format": "time_series", 253 | "intervalFactor": 2, 254 | "legendFormat": "", 255 | "metric": "traefik_requests_total", 256 | "refId": "A", 257 | "step": 60 258 | } 259 | ], 260 | "thresholds": "0,1", 261 | "title": "404 Error Count last 5 Minutes", 262 | "type": "singlestat", 263 | "valueFontSize": "80%", 264 | "valueMaps": [ 265 | { 266 | "op": "=", 267 | "text": "N/A", 268 | "value": "null" 269 | } 270 | ], 271 | "valueName": "max" 272 | }, 273 | { 274 | "aliasColors": {}, 275 | "bars": false, 276 | "dashLength": 10, 277 | "dashes": false, 278 | "datasource": "Prometheus", 279 | "fill": 1, 280 | "gridPos": { 281 | "h": 6, 282 | "w": 24, 283 | "x": 0, 284 | "y": 6 285 | }, 286 | "id": 1, 287 | "legend": { 288 | "alignAsTable": true, 289 | "avg": true, 290 | "current": true, 291 | "max": true, 292 | "min": false, 293 | "rightSide": true, 294 | "show": true, 295 | "sideWidth": 300, 296 | "total": false, 297 | "values": true 298 | }, 299 | "lines": true, 300 | "linewidth": 1, 301 | "links": [], 302 | "nullPointMode": "null", 303 | "options": {}, 304 | "percentage": false, 305 | "pointradius": 5, 306 | "points": false, 307 | "renderer": "flot", 308 | "seriesOverrides": [], 309 | "spaceLength": 10, 310 | "stack": false, 311 | "steppedLine": false, 312 | "targets": [ 313 | { 314 | "expr": "sum(traefik_backend_requests_total{protocol=\"http\"})", 315 | "format": "time_series", 316 | "interval": "", 317 | "intervalFactor": 2, 318 | "legendFormat": "{{http}}", 319 | "metric": "", 320 | "refId": "A", 321 | "step": 20 322 | }, 323 | { 324 | "expr": "sum(traefik_backend_requests_total{protocol=\"https\"})", 325 | "format": "time_series", 326 | "interval": "", 327 | "intervalFactor": 2, 328 | "legendFormat": "{{https}}", 329 | "refId": "B", 330 | "step": 20 331 | } 332 | ], 333 | "thresholds": [], 334 | "timeFrom": null, 335 | "timeRegions": [], 336 | "timeShift": null, 337 | "title": "Total requests", 338 | "tooltip": { 339 | "shared": true, 340 | "sort": 0, 341 | "value_type": "individual" 342 | }, 343 | "type": "graph", 344 | "xaxis": { 345 | "buckets": null, 346 | "mode": "time", 347 | "name": null, 348 | "show": true, 349 | "values": [] 350 | }, 351 | "yaxes": [ 352 | { 353 | "format": "none", 354 | "label": "Count", 355 | "logBase": 1, 356 | "max": null, 357 | "min": null, 358 | "show": true 359 | }, 360 | { 361 | "format": "short", 362 | "label": null, 363 | "logBase": 1, 364 | "max": null, 365 | "min": null, 366 | "show": false 367 | } 368 | ], 369 | "yaxis": { 370 | "align": false, 371 | "alignLevel": null 372 | } 373 | }, 374 | { 375 | "aliasColors": {}, 376 | "bars": false, 377 | "dashLength": 10, 378 | "dashes": false, 379 | "datasource": "Prometheus", 380 | "fill": 1, 381 | "gridPos": { 382 | "h": 7, 383 | "w": 24, 384 | "x": 0, 385 | "y": 12 386 | }, 387 | "id": 8, 388 | "legend": { 389 | "alignAsTable": true, 390 | "avg": true, 391 | "current": true, 392 | "max": true, 393 | "min": false, 394 | "rightSide": true, 395 | "show": true, 396 | "sideWidth": 300, 397 | "total": false, 398 | "values": true 399 | }, 400 | "lines": true, 401 | "linewidth": 1, 402 | "links": [], 403 | "nullPointMode": "null", 404 | "options": {}, 405 | "percentage": false, 406 | "pointradius": 5, 407 | "points": false, 408 | "renderer": "flot", 409 | "seriesOverrides": [], 410 | "spaceLength": 10, 411 | "stack": false, 412 | "steppedLine": false, 413 | "targets": [ 414 | { 415 | "expr": "traefik_backend_requests_total{protocol!~\"https\"}", 416 | "format": "time_series", 417 | "intervalFactor": 2, 418 | "legendFormat": "{{protocol}} {{method}} {{code}}", 419 | "refId": "A", 420 | "step": 20 421 | } 422 | ], 423 | "thresholds": [], 424 | "timeFrom": null, 425 | "timeRegions": [], 426 | "timeShift": null, 427 | "title": "Requests by Request Code", 428 | "tooltip": { 429 | "shared": true, 430 | "sort": 0, 431 | "value_type": "individual" 432 | }, 433 | "type": "graph", 434 | "xaxis": { 435 | "buckets": null, 436 | "mode": "time", 437 | "name": null, 438 | "show": true, 439 | "values": [] 440 | }, 441 | "yaxes": [ 442 | { 443 | "format": "short", 444 | "label": null, 445 | "logBase": 1, 446 | "max": null, 447 | "min": null, 448 | "show": true 449 | }, 450 | { 451 | "format": "short", 452 | "label": null, 453 | "logBase": 1, 454 | "max": null, 455 | "min": null, 456 | "show": true 457 | } 458 | ], 459 | "yaxis": { 460 | "align": false, 461 | "alignLevel": null 462 | } 463 | }, 464 | { 465 | "aliasColors": {}, 466 | "bars": false, 467 | "dashLength": 10, 468 | "dashes": false, 469 | "datasource": "Prometheus", 470 | "decimals": 0, 471 | "fill": 1, 472 | "gridPos": { 473 | "h": 7, 474 | "w": 24, 475 | "x": 0, 476 | "y": 19 477 | }, 478 | "id": 5, 479 | "legend": { 480 | "alignAsTable": true, 481 | "avg": true, 482 | "current": true, 483 | "max": true, 484 | "min": false, 485 | "rightSide": true, 486 | "show": true, 487 | "sideWidth": 300, 488 | "total": false, 489 | "values": true 490 | }, 491 | "lines": true, 492 | "linewidth": 1, 493 | "links": [], 494 | "nullPointMode": "null", 495 | "options": {}, 496 | "percentage": false, 497 | "pointradius": 5, 498 | "points": false, 499 | "renderer": "flot", 500 | "seriesOverrides": [], 501 | "spaceLength": 10, 502 | "stack": false, 503 | "steppedLine": false, 504 | "targets": [ 505 | { 506 | "expr": "rate(traefik_backend_requests_total{protocol=~\"http|https\",code=\"200\"}[5m])", 507 | "format": "time_series", 508 | "intervalFactor": 2, 509 | "legendFormat": "{{service}} {{method}} {{code}}", 510 | "refId": "A", 511 | "step": 10 512 | } 513 | ], 514 | "thresholds": [], 515 | "timeFrom": null, 516 | "timeRegions": [], 517 | "timeShift": null, 518 | "title": "Successful Status Code Count (5min)", 519 | "tooltip": { 520 | "shared": true, 521 | "sort": 0, 522 | "value_type": "individual" 523 | }, 524 | "type": "graph", 525 | "xaxis": { 526 | "buckets": null, 527 | "mode": "time", 528 | "name": null, 529 | "show": true, 530 | "values": [] 531 | }, 532 | "yaxes": [ 533 | { 534 | "format": "short", 535 | "label": null, 536 | "logBase": 1, 537 | "max": null, 538 | "min": null, 539 | "show": true 540 | }, 541 | { 542 | "format": "short", 543 | "label": null, 544 | "logBase": 1, 545 | "max": null, 546 | "min": null, 547 | "show": false 548 | } 549 | ], 550 | "yaxis": { 551 | "align": false, 552 | "alignLevel": null 553 | } 554 | }, 555 | { 556 | "aliasColors": {}, 557 | "bars": false, 558 | "dashLength": 10, 559 | "dashes": false, 560 | "datasource": "Prometheus", 561 | "fill": 1, 562 | "gridPos": { 563 | "h": 7, 564 | "w": 24, 565 | "x": 0, 566 | "y": 26 567 | }, 568 | "id": 6, 569 | "legend": { 570 | "alignAsTable": true, 571 | "avg": true, 572 | "current": true, 573 | "max": true, 574 | "min": false, 575 | "rightSide": true, 576 | "show": true, 577 | "sideWidth": 300, 578 | "total": false, 579 | "values": true 580 | }, 581 | "lines": true, 582 | "linewidth": 1, 583 | "links": [], 584 | "nullPointMode": "null", 585 | "options": {}, 586 | "percentage": false, 587 | "pointradius": 5, 588 | "points": false, 589 | "renderer": "flot", 590 | "seriesOverrides": [], 591 | "spaceLength": 10, 592 | "stack": false, 593 | "steppedLine": false, 594 | "targets": [ 595 | { 596 | "expr": "sum(rate(traefik_backend_requests_total[5m]))", 597 | "format": "time_series", 598 | "interval": "", 599 | "intervalFactor": 2, 600 | "legendFormat": "{{requests}}", 601 | "refId": "A", 602 | "step": 20 603 | } 604 | ], 605 | "thresholds": [], 606 | "timeFrom": null, 607 | "timeRegions": [], 608 | "timeShift": null, 609 | "title": "Requests in last 5 minutes", 610 | "tooltip": { 611 | "shared": true, 612 | "sort": 0, 613 | "value_type": "individual" 614 | }, 615 | "type": "graph", 616 | "xaxis": { 617 | "buckets": null, 618 | "mode": "time", 619 | "name": null, 620 | "show": true, 621 | "values": [] 622 | }, 623 | "yaxes": [ 624 | { 625 | "format": "short", 626 | "label": null, 627 | "logBase": 1, 628 | "max": null, 629 | "min": null, 630 | "show": true 631 | }, 632 | { 633 | "format": "short", 634 | "label": null, 635 | "logBase": 1, 636 | "max": null, 637 | "min": null, 638 | "show": true 639 | } 640 | ], 641 | "yaxis": { 642 | "align": false, 643 | "alignLevel": null 644 | } 645 | }, 646 | { 647 | "aliasColors": {}, 648 | "bars": false, 649 | "dashLength": 10, 650 | "dashes": false, 651 | "datasource": "Prometheus", 652 | "decimals": 0, 653 | "fill": 1, 654 | "gridPos": { 655 | "h": 7, 656 | "w": 24, 657 | "x": 0, 658 | "y": 33 659 | }, 660 | "id": 4, 661 | "legend": { 662 | "alignAsTable": true, 663 | "avg": true, 664 | "current": true, 665 | "max": true, 666 | "min": false, 667 | "rightSide": true, 668 | "show": true, 669 | "sideWidth": 300, 670 | "total": false, 671 | "values": true 672 | }, 673 | "lines": true, 674 | "linewidth": 1, 675 | "links": [], 676 | "nullPointMode": "null", 677 | "options": {}, 678 | "percentage": false, 679 | "pointradius": 5, 680 | "points": false, 681 | "renderer": "flot", 682 | "seriesOverrides": [], 683 | "spaceLength": 10, 684 | "stack": false, 685 | "steppedLine": false, 686 | "targets": [ 687 | { 688 | "expr": "rate(traefik_backend_requests_total{protocol=~\"http|https\",code!=\"200\"}[5m])", 689 | "format": "time_series", 690 | "intervalFactor": 2, 691 | "legendFormat": "{{service}} {{method}} {{code}}", 692 | "refId": "A", 693 | "step": 10 694 | } 695 | ], 696 | "thresholds": [], 697 | "timeFrom": null, 698 | "timeRegions": [], 699 | "timeShift": null, 700 | "title": "Bad Status Code Count (5m)", 701 | "tooltip": { 702 | "shared": true, 703 | "sort": 0, 704 | "value_type": "individual" 705 | }, 706 | "type": "graph", 707 | "xaxis": { 708 | "buckets": null, 709 | "mode": "time", 710 | "name": null, 711 | "show": true, 712 | "values": [] 713 | }, 714 | "yaxes": [ 715 | { 716 | "format": "short", 717 | "label": null, 718 | "logBase": 1, 719 | "max": null, 720 | "min": null, 721 | "show": true 722 | }, 723 | { 724 | "format": "short", 725 | "label": null, 726 | "logBase": 1, 727 | "max": null, 728 | "min": null, 729 | "show": false 730 | } 731 | ], 732 | "yaxis": { 733 | "align": false, 734 | "alignLevel": null 735 | } 736 | } 737 | ], 738 | "refresh": "30s", 739 | "schemaVersion": 18, 740 | "style": "dark", 741 | "tags": [], 742 | "templating": { 743 | "list": [] 744 | }, 745 | "time": { 746 | "from": "now-1h", 747 | "to": "now" 748 | }, 749 | "timepicker": { 750 | "refresh_intervals": [ 751 | "5s", 752 | "10s", 753 | "30s", 754 | "1m", 755 | "5m", 756 | "15m", 757 | "30m", 758 | "1h", 759 | "2h", 760 | "1d" 761 | ], 762 | "time_options": [ 763 | "5m", 764 | "15m", 765 | "1h", 766 | "6h", 767 | "12h", 768 | "24h", 769 | "2d", 770 | "7d", 771 | "30d" 772 | ] 773 | }, 774 | "timezone": "browser", 775 | "title": "Traefik Realtime Metrics", 776 | "uid": "7x9kF4dZk", 777 | "version": 2 778 | } 779 | -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/ds_prometheus.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'Prometheus' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | editable: true 10 | options: 11 | path: /etc/grafana/provisioning/dashboards 12 | -------------------------------------------------------------------------------- /grafana/provisioning/datasources/prometheus.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | deleteDatasources: 4 | - name: Prometheus 5 | orgId: 1 6 | 7 | datasources: 8 | - name: Prometheus 9 | type: prometheus 10 | access: proxy 11 | url: http://prometheus:9090 12 | isDefault: true 13 | editable: true 14 | -------------------------------------------------------------------------------- /htpasswd: -------------------------------------------------------------------------------- 1 | admin:$apr1$rBmp5IHe$cnOKeXBuzpZEifaapouq/. 2 | -------------------------------------------------------------------------------- /node-exporter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM prom/node-exporter:v0.18.0 2 | # thanks: https://github.com/stefanprodan/swarmprom 3 | 4 | ENV NODE_ID=none 5 | USER root 6 | COPY conf /etc/node-exporter/ 7 | 8 | ENTRYPOINT [ "/etc/node-exporter/docker-entrypoint.sh" ] 9 | CMD [ "/bin/node_exporter" ] 10 | -------------------------------------------------------------------------------- /node-exporter/conf/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | NODE_NAME=$(cat /etc/nodename) 4 | echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom 5 | set -- /bin/node_exporter "$@" 6 | exec "$@" 7 | -------------------------------------------------------------------------------- /prometheus/configs/prometheus-localhost.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | external_labels: 6 | monitor: 'bekkerstacks' 7 | 8 | rule_files: 9 | - 'alert.rules' 10 | 11 | alerting: 12 | alertmanagers: 13 | - scheme: http 14 | static_configs: 15 | - targets: 16 | - "alertmanager:9093" 17 | 18 | scrape_configs: 19 | - job_name: 'prometheus' 20 | scrape_interval: 5s 21 | static_configs: 22 | - targets: ['localhost:9090'] 23 | 24 | - job_name: 'cadvisor' 25 | static_configs: 26 | - targets: ['cadvisor:8080'] 27 | 28 | - job_name: 'node-exporter' 29 | static_configs: 30 | - targets: ['node-exporter:9100'] 31 | 32 | - job_name: 'blackbox' 33 | metrics_path: /probe 34 | params: 35 | module: [http_2xx] # Look for a HTTP 200 response. 36 | static_configs: 37 | - targets: 38 | - https://status.cloud.google.com 39 | - https://www.githubstatus.com 40 | - https://status.aws.amazon.com 41 | - https://azure.microsoft.com/en-us/status 42 | relabel_configs: 43 | - source_labels: [__address__] 44 | target_label: __param_target 45 | - source_labels: [__param_target] 46 | target_label: instance 47 | - target_label: __address__ 48 | replacement: blackbox-exporter:9115 49 | -------------------------------------------------------------------------------- /prometheus/configs/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | external_labels: 6 | monitor: 'bekkerstacks' 7 | 8 | rule_files: 9 | - 'alert.rules' 10 | 11 | alerting: 12 | alertmanagers: 13 | - scheme: http 14 | static_configs: 15 | - targets: 16 | - "alertmanager:9093" 17 | 18 | scrape_configs: 19 | - job_name: 'prometheus' 20 | scrape_interval: 5s 21 | static_configs: 22 | - targets: ['localhost:9090'] 23 | 24 | - job_name: 'cadvisor' 25 | scrape_interval: 5s 26 | dns_sd_configs: 27 | - names: 28 | - 'tasks.cadvisor' 29 | type: 'A' 30 | port: 8080 31 | 32 | - job_name: 'node-exporter' 33 | scrape_interval: 5s 34 | dns_sd_configs: 35 | - names: 36 | - 'tasks.node-exporter' 37 | type: 'A' 38 | port: 9100 39 | 40 | - job_name: 'traefik' 41 | scrape_interval: 5s 42 | dns_sd_configs: 43 | - names: 44 | - 'tasks.traefik' 45 | type: 'A' 46 | port: 8080 47 | 48 | - job_name: 'blackbox' 49 | metrics_path: /probe 50 | params: 51 | module: [http_2xx] # Look for a HTTP 200 response. 52 | static_configs: 53 | - targets: 54 | - https://status.cloud.google.com 55 | - https://www.githubstatus.com 56 | - https://status.aws.amazon.com 57 | - https://azure.microsoft.com/en-us/status 58 | relabel_configs: 59 | - source_labels: [__address__] 60 | target_label: __param_target 61 | - source_labels: [__param_target] 62 | target_label: instance 63 | - target_label: __address__ 64 | replacement: blackbox-exporter:9115 65 | 66 | -------------------------------------------------------------------------------- /prometheus/configs/prometheus_with_mysql.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | external_labels: 6 | monitor: 'bekkerstacks' 7 | 8 | rule_files: 9 | - 'alert.rules' 10 | 11 | alerting: 12 | alertmanagers: 13 | - scheme: http 14 | static_configs: 15 | - targets: 16 | - "alertmanager:9093" 17 | 18 | scrape_configs: 19 | - job_name: 'prometheus' 20 | scrape_interval: 5s 21 | static_configs: 22 | - targets: ['localhost:9090'] 23 | 24 | - job_name: 'cadvisor' 25 | scrape_interval: 5s 26 | dns_sd_configs: 27 | - names: 28 | - 'tasks.cadvisor' 29 | type: 'A' 30 | port: 8080 31 | 32 | - job_name: 'node-exporter' 33 | scrape_interval: 5s 34 | dns_sd_configs: 35 | - names: 36 | - 'tasks.node-exporter' 37 | type: 'A' 38 | port: 9100 39 | 40 | - job_name: 'mysql-exporter' 41 | scrape_interval: 5s 42 | dns_sd_configs: 43 | - names: 44 | - 'tasks.mysql-exporter' 45 | type: 'A' 46 | port: 9104 47 | 48 | - job_name: 'blackbox' 49 | metrics_path: /probe 50 | params: 51 | module: [http_2xx] # Look for a HTTP 200 response. 52 | static_configs: 53 | - targets: 54 | - https://status.cloud.google.com 55 | - https://www.githubstatus.com 56 | - https://status.aws.amazon.com 57 | - https://azure.microsoft.com/en-us/status 58 | relabel_configs: 59 | - source_labels: [__address__] 60 | target_label: __param_target 61 | - source_labels: [__param_target] 62 | target_label: instance 63 | - target_label: __address__ 64 | replacement: blackbox-exporter:9115 65 | 66 | -------------------------------------------------------------------------------- /prometheus/rules/alert.rules: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: node-alerts 3 | rules: 4 | 5 | - alert: service_down 6 | expr: up == 0 7 | for: 2m 8 | labels: 9 | severity: page 10 | annotations: 11 | summary: "Instance {{ $labels.instance }} down" 12 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes." 13 | 14 | - alert: high_load 15 | expr: node_load1 > 0.5 16 | for: 2m 17 | labels: 18 | severity: page 19 | annotations: 20 | summary: "Instance {{ $labels.instance }} under high load" 21 | description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load." 22 | 23 | - alert: low_memory 24 | expr: node_memory_MemAvailable_bytes/1024/1024 < 512 25 | for: 2m 26 | labels: 27 | severity: page 28 | annotations: 29 | summary: "Instance {{ $labels.instance }} has low memory" 30 | description: "{{ $labels.instance }} of job {{ $labels.job }} has low memory." 31 | --------------------------------------------------------------------------------