├── Monitoring_Logging_Stack.png ├── alertfiles ├── elastrules.error.yaml ├── alertrules.task └── alertrules.nodes ├── configs ├── alertmanagerconfig.yml ├── prometheus.v1.8.2.yml ├── prometheus.yml ├── elastalert_supervisord.conf └── elastalertconfig.yaml ├── CHANGELOG.md ├── composefiles ├── docker-compose-logging.yml └── docker-compose-monitoring.yml └── README.md /Monitoring_Logging_Stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robinong79/docker-swarm-monitoring/HEAD/Monitoring_Logging_Stack.png -------------------------------------------------------------------------------- /alertfiles/elastrules.error.yaml: -------------------------------------------------------------------------------- 1 | # (Required) 2 | # Rule name, must be unique 3 | name: Log_Errors 4 | 5 | # (Required) 6 | # Type of alert. 7 | # the frequency rule type alerts when num_events events occur with timeframe time 8 | type: any 9 | 10 | # (Required) 11 | # Index to search, wildcard supported 12 | index: logstash-* 13 | 14 | filter: 15 | - query: 16 | query_string: 17 | query: "message:*error*" 18 | 19 | include: 20 | - tag 21 | - message 22 | 23 | # (Required) 24 | # The alert is used when a match is found 25 | alert: 26 | - "slack" 27 | slack_webhook_url: 'https://hooks.slack.com/services/' 28 | slack_username_override: 'Elast-Alert' 29 | slack_channel_override: '#' -------------------------------------------------------------------------------- /configs/alertmanagerconfig.yml: -------------------------------------------------------------------------------- 1 | 2 | global: 3 | resolve_timeout: 5m 4 | 5 | route: 6 | receiver: 'slack' 7 | repeat_interval: 15m 8 | group_interval: 5m 9 | group_wait: 1m 10 | routes: 11 | - receiver: 'msteams' 12 | match: 13 | alert: msteams 14 | 15 | receivers: 16 | - name: 'slack' 17 | slack_configs: 18 | - send_resolved: true 19 | api_url: 'https://hooks.slack.com/services/' 20 | username: 'Prometheus - Alerter' 21 | channel: '#' 22 | text: '{{ .CommonAnnotations.summary }} ---> {{ .CommonAnnotations.description }}' 23 | 24 | - name: "msteams" 25 | webhook_configs: 26 | - url: 'http://prom2teams:8089' 27 | send_resolved: false -------------------------------------------------------------------------------- /configs/prometheus.v1.8.2.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 5s 3 | evaluation_interval: 15s 4 | external_labels: 5 | monitor: 'Prometheus-Monitor' 6 | 7 | rule_files: 8 | - '/etc/prometheus-rules/alertrules.nodes' 9 | - '/etc/prometheus-rules/alertrules.task' 10 | 11 | scrape_configs: 12 | - job_name: 'cadvisor' 13 | dns_sd_configs: 14 | - names: 15 | - 'tasks.cadvisor' 16 | type: 'A' 17 | port: 8080 18 | 19 | - job_name: 'node-exporter' 20 | dns_sd_configs: 21 | - names: 22 | - 'tasks.node-exporter' 23 | type: 'A' 24 | port: 9100 25 | 26 | - job_name: 'alertmanager' 27 | dns_sd_configs: 28 | - names: 29 | - 'tasks.alertmanager' 30 | type: 'A' 31 | port: 9093 -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 2018-03-02 4 | ### Added 5 | - ElasticSearch Curator to clean up indices 6 | - Few redo's to README.md 7 | 8 | ## 2018-03-01 9 | ### Added 10 | - Tagged master branch with v1.0 (Updated to Prom 2.x.x) 11 | - Tagged master branch with v1.1 (Added prom2teams) 12 | - Merged branch prom2teams to Master 13 | - Deleted old pre Prom 2.x.x. files 14 | 15 | ## 2018-02-24 16 | ### Added 17 | - Adjustments to compose files where needed. This to work with Prometheus 2.x.x 18 | - Added PROM2TEAMS in the stack so alerts can also be sent there through a seperate route 19 | - Updated alert files with new Prometheus 2.x.x format 20 | - Fix in composefile on AlertManager 21 | - Several updates to README regarding creating directories 22 | -------------------------------------------------------------------------------- /configs/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 5s 3 | evaluation_interval: 15s 4 | external_labels: 5 | monitor: 'Prometheus-Monitor' 6 | 7 | rule_files: 8 | - '/etc/prometheus-rules/alertrules.nodes' 9 | - '/etc/prometheus-rules/alertrules.task' 10 | 11 | scrape_configs: 12 | - job_name: 'cadvisor' 13 | dns_sd_configs: 14 | - names: 15 | - 'tasks.cadvisor' 16 | type: 'A' 17 | port: 8080 18 | 19 | - job_name: 'node-exporter' 20 | dns_sd_configs: 21 | - names: 22 | - 'tasks.node-exporter' 23 | type: 'A' 24 | port: 9100 25 | 26 | - job_name: 'alertmanager' 27 | dns_sd_configs: 28 | - names: 29 | - 'tasks.alertmanager' 30 | type: 'A' 31 | port: 9093 32 | 33 | alerting: 34 | alertmanagers: 35 | - scheme: http 36 | static_configs: 37 | - targets: 38 | - "alertmanager:9093" -------------------------------------------------------------------------------- /configs/elastalert_supervisord.conf: -------------------------------------------------------------------------------- 1 | [unix_http_server] 2 | file=/var/run/elastalert_supervisor.sock 3 | 4 | [supervisord] 5 | logfile=/var/log/elastalert_supervisord.log 6 | logfile_maxbytes=1MB 7 | logfile_backups=2 8 | loglevel=debug 9 | nodaemon=false 10 | directory=%(here)s 11 | 12 | [rpcinterface:supervisor] 13 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 14 | 15 | [supervisorctl] 16 | serverurl=unix:///var/run/elastalert_supervisor.sock 17 | 18 | [program:elastalert] 19 | # running globally 20 | command = elastalert --config /opt/config/elastalertconfig.yaml --verbose 21 | 22 | # (alternative) using virtualenv 23 | # command=/path/to/venv/bin/elastalert --config /path/to/config.yaml --verbose 24 | process_name=elastalert 25 | autorestart=true 26 | startsecs=15 27 | stopsignal=INT 28 | stopasgroup=true 29 | killasgroup=true 30 | stderr_logfile=/var/log/elastalert_stderr.log 31 | stderr_logfile_maxbytes=5MB -------------------------------------------------------------------------------- /configs/elastalertconfig.yaml: -------------------------------------------------------------------------------- 1 | # This is the folder that contains the rule yaml files 2 | # Any .yaml file will be loaded as a rule 3 | rules_folder: /opt/rules 4 | 5 | # How often ElastAlert will query Elasticsearch 6 | # The unit can be anything from weeks to seconds 7 | run_every: 8 | seconds: 30 9 | 10 | # ElastAlert will buffer results from the most recent 11 | # period of time, in case some log sources are not in real time 12 | buffer_time: 13 | minutes: 15 14 | 15 | # The Elasticsearch hostname for metadata writeback 16 | # Note that every rule can have its own Elasticsearch host 17 | es_host: elasticsearch 18 | 19 | # The Elasticsearch port 20 | es_port: 9200 21 | 22 | # The index on es_host which is used for metadata storage 23 | # This can be a unmapped index, but it is recommended that you run 24 | # elastalert-create-index to set a mapping 25 | writeback_index: elastalert_status 26 | 27 | # If an alert fails for some reason, ElastAlert will retry 28 | # sending the alert until this time period has elapsed 29 | alert_time_limit: 30 | days: 1 -------------------------------------------------------------------------------- /alertfiles/alertrules.task: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: nodes 3 | rules: 4 | 5 | - alert: high_cpu_usage_on_container 6 | expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m])) by (container_label_com_docker_swarm_task_name,instance) * 100 > 10 7 | for: 5m 8 | annotations: 9 | summary: "HIGH CPU USAGE WARNING: TASK {{ $labels.container_label_com_docker_swarm_task_name }} on {{ $labels.instance }}" 10 | description: "{{ $labels.container_label_com_docker_swarm_task_name }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%." 11 | 12 | - alert: container_eating_memory 13 | expr: sum(container_memory_usage_bytes{container_label_com_docker_swarm_task_name=~".+"}) by (container_label_com_docker_swarm_task_name,instance,container_label_com_docker_swarm_service_name) > 2800000000 14 | for: 5m 15 | annotations: 16 | summary: "HIGH MEMORY USAGE WARNING: TASK {{ $labels.container_label_com_docker_swarm_task_name }} on {{ $labels.instance }}" 17 | description: "{{ $labels.container_label_com_docker_swarm_service_name }} is eating up a LOT of memory. Memory consumption of {{ $labels.container_label_com_docker_swarm_service_name }} is at {{ humanize $value}}." -------------------------------------------------------------------------------- /alertfiles/alertrules.nodes: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: nodes 3 | rules: 4 | 5 | - alert: high_cpu_usage_on_node 6 | expr: sum(rate(process_cpu_seconds_total[5m])) by (instance) * 100 > 70 7 | for: 5m 8 | annotations: 9 | summary: "HIGH CPU USAGE WARNING ON {{ $labels.instance }}" 10 | description: "{{ $labels.instance }} ({{ $labels.host }}) is using a LOT of CPU. CPU usage is {{ humanize $value}}%." 11 | 12 | - alert: high_memory_usage_on_node 13 | expr: ((node_memory_MemTotal-node_memory_MemAvailable)/node_memory_MemTotal)*100 > 80 14 | for: 5m 15 | annotations: 16 | summary: "HIGH MEMORY USAGE WARNING TASK ON {{ $labels.host }}" 17 | description: "{{ $labels.instance }} ({{ $labels.host }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}%." 18 | 19 | - alert: high_la_usage_on_node 20 | expr: node_load5 > 5 21 | for: 5m 22 | annotations: 23 | summary: "HIGH LOAD AVERAGE WARNING ON {{ $labels.instance }}" 24 | description: "{{ $labels.instance }} ({{ $labels.host }}) has a high load average. CPU usage is {{ humanize $value}}%." 25 | 26 | - alert: monitoring_service_down 27 | expr: up == 0 28 | for: 5m 29 | annotations: 30 | summary: "MONITORING SERVICE DOWN WARNING: NODE {{ $labels.host }}" 31 | description: "The monitoring service '{{ $labels.job }}' is down." 32 | 33 | - alert: node_running_out_of_disk_space 34 | expr: (node_filesystem_size{fstype="aufs", mountpoint="/"} - node_filesystem_free{fstype="aufs", mountpoint="/"}) * 100/ node_filesystem_size{fstype="aufs", mountpoint="/"} > 80 35 | for: 5m 36 | annotations: 37 | summary: "LOW DISK SPACE WARING: NODE {{ $labels.host }}" 38 | description: "More than 80% of disk used. Disk usage {{ humanize $value }} GB." 39 | 40 | - alert: msteamstestrobin 41 | expr: node_procs_blocked{instance="10.0.0.55:9100",job="node-exporter"}==0 42 | for: 10s 43 | labels: 44 | alert: msteams 45 | annotations: 46 | summary: "test robin msteams docker" 47 | description: "jaja lekkah" 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /composefiles/docker-compose-logging.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | 5 | elasticsearch: 6 | image: elasticsearch 7 | networks: 8 | - logging 9 | volumes: 10 | - /var/dockerdata/elasticsearch/data:/usr/share/elasticsearch/data 11 | labels: 12 | com.docker.stack.namespace: "logging" 13 | com.docker.service.name: "elasticsearch" 14 | deploy: 15 | mode: replicated 16 | update_config: 17 | parallelism: 1 18 | delay: 60s 19 | restart_policy: 20 | condition: none 21 | max_attempts: 5 22 | labels: 23 | com.docker.stack.namespace: "logging" 24 | com.docker.service.name: "elasticsearch" 25 | 26 | curator: 27 | image: robinong79/docker-elasticsearch-curator:v1.0 28 | networks: 29 | - logging 30 | environment: 31 | - ELASTICSEARCH_HOST=elasticsearch 32 | - ELASTICSEARCH_PORT=9200 33 | - INTERVAL_IN_HOURS=12 34 | volumes: 35 | - /var/dockerdata/curator:/var/curator 36 | labels: 37 | com.docker.stack.namespace: "logging" 38 | com.docker.service.name: "curator" 39 | deploy: 40 | mode: replicated 41 | update_config: 42 | parallelism: 1 43 | delay: 60s 44 | restart_policy: 45 | condition: none 46 | max_attempts: 5 47 | labels: 48 | com.docker.stack.namespace: "logging" 49 | com.docker.service.name: "curator" 50 | 51 | logstash: 52 | image: logstash:latest 53 | depends_on: 54 | - elasticsearch 55 | ports: 56 | - "12201:12201/udp" 57 | - "8080:8082" 58 | networks: 59 | - logging 60 | command: -e "input { gelf {} } output { elasticsearch { hosts => ['elasticsearch']} stdout {} }" 61 | labels: 62 | com.docker.stack.namespace: "logging" 63 | com.docker.service.name: "logstash" 64 | deploy: 65 | mode: replicated 66 | update_config: 67 | parallelism: 1 68 | delay: 60s 69 | restart_policy: 70 | condition: none 71 | max_attempts: 5 72 | labels: 73 | com.docker.stack.namespace: "logging" 74 | com.docker.service.name: "logstash" 75 | 76 | kibana: 77 | image: kibana 78 | depends_on: 79 | - elasticsearch 80 | ports: 81 | - "5601:5601" 82 | networks: 83 | - logging 84 | environment: 85 | - ELASTICSEARCH_URL=http://elasticsearch:9200 86 | labels: 87 | com.docker.stack.namespace: "logging" 88 | com.docker.service.name: "kibana" 89 | deploy: 90 | mode: replicated 91 | update_config: 92 | parallelism: 1 93 | delay: 60s 94 | restart_policy: 95 | condition: none 96 | max_attempts: 5 97 | labels: 98 | com.docker.stack.namespace: "logging" 99 | com.docker.service.name: "kibana" 100 | 101 | networks: 102 | logging: 103 | external: true 104 | 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker-swarm-monitoring 2 | Monitoring Docker Swarm with Prometheus and ELK stack. 3 | 4 | This repository describes and publishes our setup of monitoring a Docker Swarm with the help of the ELK repository and Prometheus with it's scrapers. 5 | 6 | ## Important note regarding Prometheus 2.x.x 7 | As of version 2.x.x. of Prometheus they changed the user to nobody. This means that if you use a persistent data directory is has to be chmod'ed to 777. 8 | In this example this will be the directory /var/dockerdata/prometheus/data. 9 | 10 | ## Prerequisites 11 | 12 | - Ubuntu (16.04 or higher) or RHEL host(s) or CentOS7 hosts 13 | - Docker v1.13.1 (minimum) (Latest full run on 17.12.0-ce) 14 | - Experimental Mode must be set to true (to be able to use "docker deploy" with compose v3 files) 15 | - Must run in Swarm Mode 16 | - 2 overlay networks ("monitoring" and "logging") 17 | 18 | ## Used components 19 | 20 | We have split up the monitoring into 2 basic parts: 21 | 22 | #### Monitoring Stack 23 | 24 | | Service | Purpose | 25 | | ------ | ----- | 26 | | [Prometheus](https://hub.docker.com/r/prom/prometheus/) | Central Metric Collecting | 27 | | [CAdvisor](https://hub.docker.com/r/google/cadvisor/) | Collecting Container information | 28 | | [Node-Exporter](https://hub.docker.com/r/basi/node-exporter/) | Collecting Hardware and OS information | 29 | | [AlertManager](https://hub.docker.com/r/prom/alertmanager/) | Sending out alerts raised from Prometheus | 30 | | [Prom2Teams](https://hub.docker.com/r/robinong79/prom2teams/) | Alerter that sends alerts to MSTeams (currently DEV tag till proven) | 31 | | [Grafana](https://hub.docker.com/r/grafana/grafana/) | Dashboard on top of Prometheus | 32 | 33 | #### Logging Stack 34 | 35 | | Service | Purpose | 36 | | ------ | ----- | 37 | | [ElasticSearch](https://hub.docker.com/_/elasticsearch/) | Central storage for Logdata | 38 | | [LogStash](https://hub.docker.com/_/logstash/) | Log formatter and processing pipeline | 39 | | [ElastAlert](https://hub.docker.com/r/ivankrizsan/elastalert/) | Sending out alerts raised on Logs | 40 | | [Curator](https://github.com/elastic/curator/) | Cleaning up Indices in Elasticsearch | 41 | | [Kibana](https://hub.docker.com/_/kibana/) | Dashboard on top of Elasticsearch | 42 | 43 | ## Schema of the stacks *Needs Updating* 44 | ![stackflow](/Monitoring_Logging_Stack.png "Monitoring Logging Stack") 45 | 46 | ## Preparation 47 | 48 | #### Directories 49 | 50 | Create the following directories: 51 | - /var/dockerdata/prometheus 52 | - /var/dockerdata/elasticsearch/data 53 | - /var/dockerdata/elastalert/logs 54 | - /var/dockerdata/alertmanager/data 55 | - /var/dockerdata/grafana 56 | - /var/dockerdata/curator 57 | 58 | #### Misc 59 | 60 | Host setting for ElasticSearch (Look [here](https://www.elastic.co/guide/en/elasticsearch/reference/5.0/vm-max-map-count.html) for more information) 61 | ``` 62 | $ sysctl -w vm.max_map_count=262144 63 | ``` 64 | 65 | Changing mod of persistent data directory (Only for Prometheus 2.x.x) 66 | 67 | 68 | ``` 69 | $ chmod 777 /var/dockerdata/prometheus/data 70 | ``` 71 | 72 | #### Docker 73 | 74 | ``` 75 | $ docker swarm init 76 | $ docker network create -d overlay monitoring 77 | $ docker network create -d overlay logging 78 | ``` 79 | 80 | #### Compose files 81 | 82 | Make sure to look at the compose files for the volume mappings. 83 | In this example everything is mapped to /var/dockerdata/{servicename}/{directories}. Adjust this to your own liking or create the same structure as used in this example. 84 | 85 | #### Config Files 86 | 87 | | Config file | Needs to be in | Remarks | 88 | | ----- | ----- | ----- | 89 | | alertmanagerconfig.yml | /var/dockerdata/alertmanager/ | The alerts go through Slack/MS Teams. Use your Slack Key and Slack channel name and MSTeams Webhook URL for it to work | 90 | | elastalert_supervisord.conf | /var/dockerdata/elastalert/config | - | 91 | | elastalertconfig.yaml | /var/dockerdata/elastalert/config | - | 92 | | prometheus.yml | /var/dockerdata/prometheus | - | 93 | 94 | #### Alert Files 95 | 96 | | Alert file | Needs to be in | Remarks | 97 | | ----- | ----- | ----- | 98 | | alertrules.nodes | /var/dockerdata/prometheus/rules | - | 99 | | alertrules.task | /var/dockerdata/prometheus/rules | - | 100 | | elastrules.error.yaml| /var/dockerdata/elastalert/rules | The alerts go through Slack. Use your Slack Key and channel name for it to work | 101 | 102 | #### Misc files 103 | | File | Needs to be in | Remarks | 104 | | ----- | ----- | ----- | 105 | | action.yml | /var/dockerdata/curator | Action file for Curator | 106 | 107 | 108 | ## Installation 109 | 110 | #### Logging Stack 111 | 112 | ``` 113 | $ docker deploy --compose-file docker-compose-logging.yml logging 114 | ``` 115 | 116 | #### Monitoring Stack 117 | 118 | ``` 119 | $ docker deploy --compose-file docker-compose-monitoring.yml monitoring 120 | ``` 121 | 122 | #### Container/Service logging to Logstash 123 | 124 | In order to get the logs from the services/containers to Logstash you need to start them with a different logdriver. 125 | 126 | Compose file: 127 | 128 | ``` 129 | logging: 130 | driver: gelf 131 | options: 132 | gelf-address: "udp://127.0.0.1:12201" 133 | tag: "" 134 | ``` 135 | 136 | Run command: 137 | 138 | ``` 139 | $ docker run \ 140 | --log-driver=gelf \ 141 | --log-opt gelf-address=udp://127.0.0.1:12201 \ 142 | --log-opt tag="" \ 143 | .... 144 | .... 145 | ``` 146 | 147 | ## Credits and License 148 | 149 | Basilio Vera's repo's (https://hub.docker.com/u/basi/) have been used for information. This got me a long way with building up a monitoring stack. 150 | Also using his version of Node-Exporter and some alert files so we have access to HOST_NAME and some startup alerts. 151 | He made a really nice Grafana Dashboard too which we used as a base. You can check it out here (https://grafana.net/dashboards/609). 152 | 153 | The files are free to use and you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation. 154 | -------------------------------------------------------------------------------- /composefiles/docker-compose-monitoring.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | 5 | prometheus: 6 | image: prom/prometheus:v2.1.0 7 | ports: 8 | - "9090:9090" 9 | networks: 10 | - monitoring 11 | volumes: 12 | - /var/dockerdata/prometheus/data:/prometheus 13 | - /var/dockerdata/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml 14 | - /var/dockerdata/prometheus/rules:/etc/prometheus-rules 15 | command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.console.libraries=/etc/prometheus/console_libraries --web.console.templates=/etc/prometheus/consoles" 16 | labels: 17 | com.docker.stack.namespace: "monitoring" 18 | com.docker.service.name: "prometheus" 19 | deploy: 20 | mode: replicated 21 | replicas: 1 22 | update_config: 23 | parallelism: 1 24 | delay: 60s 25 | restart_policy: 26 | condition: on-failure 27 | max_attempts: 5 28 | labels: 29 | com.docker.stack.namespace: "monitoring" 30 | com.docker.service.name: "prometheus" 31 | 32 | cadvisor: 33 | image: google/cadvisor:latest 34 | ports: 35 | - "8081:8080" 36 | networks: 37 | - monitoring 38 | volumes: 39 | - /:/rootfs:ro 40 | - /var/run:/var/run:rw 41 | - /sys:/sys:ro 42 | - /var/lib/docker/:/var/lib/docker:ro 43 | labels: 44 | com.docker.stack.namespace: "monitoring" 45 | com.docker.service.name: "cadvisor" 46 | deploy: 47 | mode: global 48 | update_config: 49 | parallelism: 1 50 | delay: 60s 51 | restart_policy: 52 | condition: on-failure 53 | max_attempts: 5 54 | labels: 55 | com.docker.stack.namespace: "monitoring" 56 | com.docker.service.name: "cadvisor" 57 | 58 | node-exporter: 59 | image: basi/node-exporter:v1.15.0 60 | ports: 61 | - "9100:9100" 62 | networks: 63 | - monitoring 64 | environment: 65 | - HOST_HOSTNAME=/etc/hostname 66 | volumes: 67 | - /proc:/host/proc 68 | - /sys:/host/sys 69 | - /:/rootfs 70 | - /etc/hostname:/etc/hostname 71 | command: [--path.procfs=/host/proc,--path.sysfs=/host/sys,--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc)($$|/)",--collector.textfile.directory=/etc/node-exporter/] 72 | labels: 73 | com.docker.stack.namespace: "monitoring" 74 | com.docker.service.name: "node-exporter" 75 | deploy: 76 | mode: global 77 | resources: 78 | limits: 79 | cpus: '0.10' 80 | memory: 32M 81 | update_config: 82 | parallelism: 1 83 | delay: 60s 84 | restart_policy: 85 | condition: on-failure 86 | max_attempts: 5 87 | labels: 88 | com.docker.stack.namespace: "monitoring" 89 | com.docker.service.name: "node-exporter" 90 | 91 | alertmanager: 92 | image: prom/alertmanager 93 | ports: 94 | - "9093:9093" 95 | networks: 96 | - monitoring 97 | volumes: 98 | - /var/dockerdata/alertmanager/alertmanagerconfig.yml:/etc/alertmanager/alertmanagerconfig.yml 99 | - /var/dockerdata/alertmanager/data:/etc/alertmanager/data 100 | command: [--config.file=/etc/alertmanager/alertmanagerconfig.yml,--storage.path=/etc/alertmanager/data] 101 | labels: 102 | com.docker.stack.namespace: "monitoring" 103 | com.docker.service.name: "alertmanager" 104 | deploy: 105 | mode: replicated 106 | replicas: 1 107 | update_config: 108 | parallelism: 1 109 | delay: 60s 110 | restart_policy: 111 | condition: on-failure 112 | max_attempts: 5 113 | labels: 114 | com.docker.stack.namespace: "monitoring" 115 | com.docker.service.name: "alertmanager" 116 | 117 | elastalert: 118 | image: ivankrizsan/elastalert:0.1.8 119 | ports: 120 | - "3030:3030" 121 | networks: 122 | - logging 123 | environment: 124 | - ELASTALERT_CONFIG=elastalertconfig.yaml 125 | - CONFIG_DIR=/opt/config 126 | - LOG_DIR=/opt/logs 127 | - ELASTALERT_CONFIG=/opt/config/elastalertconfig.yaml 128 | - ELASTICSEARCH_PORT=9200 129 | - ELASTICSEARCH_HOST=elasticsearch 130 | - ELASTALERT_SUPERVISOR_CONF=/opt/config/elastalert_supervisord.conf 131 | volumes: 132 | - /var/dockerdata/elastalert/config:/opt/config 133 | - /var/dockerdata/elastalert/rules:/opt/rules 134 | - /var/dockerdata/elastalert/logs:/opt/logs 135 | labels: 136 | com.docker.stack.namespace: "monitoring" 137 | com.docker.service.name: "elastalert" 138 | deploy: 139 | mode: replicated 140 | replicas: 1 141 | update_config: 142 | parallelism: 1 143 | delay: 60s 144 | restart_policy: 145 | condition: on-failure 146 | max_attempts: 5 147 | labels: 148 | com.docker.stack.namespace: "monitoring" 149 | com.docker.service.name: "elastalert" 150 | 151 | grafana: 152 | image: grafana/grafana 153 | ports: 154 | - "3000:3000" 155 | networks: 156 | - monitoring 157 | - logging 158 | volumes: 159 | - /var/dockerdata/grafana:/var/lib/grafana 160 | command: -e "GF_SERVER_ROOT_URL=http://grafana.local.com GF_SECURITY_ADMIN_PASSWORD=admin PROMETHEUS_ENDPOINT=http://prometheus:9090 ELASTICSEARCH_ENDPOINT=http://elasticsearch:9200" 161 | labels: 162 | com.docker.stack.namespace: "monitoring" 163 | com.docker.service.name: "grafana" 164 | deploy: 165 | mode: replicated 166 | replicas: 1 167 | update_config: 168 | parallelism: 1 169 | delay: 60s 170 | restart_policy: 171 | condition: on-failure 172 | max_attempts: 5 173 | labels: 174 | com.docker.stack.namespace: "monitoring" 175 | com.docker.service.name: "grafana" 176 | 177 | prom2teams: 178 | image: robinong79/prom2teams:dev 179 | ports: 180 | - "8089:8089" 181 | ports: 182 | - "8089:8089" 183 | networks: 184 | - monitoring 185 | environment: 186 | - PROM2TEAMS_HOST=0.0.0.0 187 | - PROM2TEAMS_PORT=8089 188 | - PROM2TEAMS_LOGLEVEL=INFO 189 | - PROM2TEAMS_CONNECTOR= 190 | labels: 191 | com.docker.stack.namespace: "monitoring" 192 | com.docker.service.name: "prom2teams" 193 | deploy: 194 | mode: replicated 195 | replicas: 1 196 | placement: 197 | constraints: [node.platform.OS == linux] 198 | update_config: 199 | parallelism: 1 200 | delay: 60s 201 | restart_policy: 202 | condition: on-failure 203 | max_attempts: 5 204 | labels: 205 | com.docker.stack.namespace: "monitoring" 206 | com.docker.service.name: "prom2teams" 207 | 208 | 209 | networks: 210 | logging: 211 | external: true 212 | monitoring: 213 | external: true 214 | --------------------------------------------------------------------------------