├── Monitoring_Logging_Stack.png
├── alertfiles
    ├── elastrules.error.yaml
    ├── alertrules.task
    └── alertrules.nodes
├── configs
    ├── alertmanagerconfig.yml
    ├── prometheus.v1.8.2.yml
    ├── prometheus.yml
    ├── elastalert_supervisord.conf
    └── elastalertconfig.yaml
├── CHANGELOG.md
├── composefiles
    ├── docker-compose-logging.yml
    └── docker-compose-monitoring.yml
└── README.md


/Monitoring_Logging_Stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robinong79/docker-swarm-monitoring/HEAD/Monitoring_Logging_Stack.png


--------------------------------------------------------------------------------
/alertfiles/elastrules.error.yaml:
--------------------------------------------------------------------------------
 1 | # (Required)
 2 | # Rule name, must be unique
 3 | name: Log_Errors
 4 | 
 5 | # (Required)
 6 | # Type of alert.
 7 | # the frequency rule type alerts when num_events events occur with timeframe time
 8 | type: any
 9 | 
10 | # (Required)
11 | # Index to search, wildcard supported
12 | index: logstash-*
13 | 
14 | filter:
15 | - query:
16 |     query_string: 
17 |       query: "message:*error*"
18 | 
19 | include:
20 |   - tag
21 |   - message
22 | 
23 | # (Required)
24 | # The alert is used when a match is found
25 | alert:
26 | - "slack"
27 | slack_webhook_url: 'https://hooks.slack.com/services/<YOUR_KEY>'
28 | slack_username_override: 'Elast-Alert'
29 | slack_channel_override: '#<CHANNEL_NAME>'


--------------------------------------------------------------------------------
/configs/alertmanagerconfig.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | global:
 3 |   resolve_timeout: 5m
 4 | 
 5 | route:
 6 |   receiver: 'slack'
 7 |   repeat_interval: 15m
 8 |   group_interval: 5m
 9 |   group_wait: 1m
10 |   routes:
11 |   - receiver: 'msteams'
12 |     match:
13 |       alert: msteams
14 | 
15 | receivers:
16 |   - name: 'slack'
17 |     slack_configs:
18 |       - send_resolved: true
19 |         api_url: 'https://hooks.slack.com/services/<YOUR_KEY>'
20 |         username: 'Prometheus - Alerter'
21 |         channel: '#<CHANNEL_NAME>'
22 |         text: '{{ .CommonAnnotations.summary }} ---> {{ .CommonAnnotations.description }}'
23 | 
24 |   - name: "msteams"
25 |     webhook_configs:
26 |     - url: 'http://prom2teams:8089'
27 |       send_resolved: false


--------------------------------------------------------------------------------
/configs/prometheus.v1.8.2.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval:     5s
 3 |   evaluation_interval: 15s
 4 |   external_labels:
 5 |     monitor: 'Prometheus-Monitor'
 6 |   
 7 | rule_files:
 8 |   - '/etc/prometheus-rules/alertrules.nodes'
 9 |   - '/etc/prometheus-rules/alertrules.task'
10 | 
11 | scrape_configs:
12 |   - job_name: 'cadvisor'
13 |     dns_sd_configs:
14 |     - names:
15 |       - 'tasks.cadvisor'
16 |       type: 'A'
17 |       port: 8080
18 | 
19 |   - job_name: 'node-exporter'
20 |     dns_sd_configs:
21 |     - names:
22 |       - 'tasks.node-exporter'
23 |       type: 'A'
24 |       port: 9100
25 | 
26 |   - job_name: 'alertmanager'
27 |     dns_sd_configs:
28 |     - names:
29 |       - 'tasks.alertmanager'
30 |       type: 'A'
31 |       port: 9093


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 2018-03-02
 4 | ### Added
 5 |  - ElasticSearch Curator to clean up indices
 6 |  - Few redo's to README.md
 7 | 
 8 | ## 2018-03-01
 9 | ### Added
10 |  - Tagged master branch with v1.0 (Updated to Prom 2.x.x)
11 |  - Tagged master branch with v1.1 (Added prom2teams)
12 |  - Merged branch prom2teams to Master
13 |  - Deleted old pre Prom 2.x.x. files
14 |  
15 | ## 2018-02-24
16 | ### Added
17 | - Adjustments to compose files where needed. This to work with Prometheus 2.x.x
18 | - Added PROM2TEAMS in the stack so alerts can also be sent there through a seperate route
19 | - Updated alert files with new Prometheus 2.x.x format
20 | - Fix in composefile on AlertManager
21 | - Several updates to README regarding creating directories
22 | 


--------------------------------------------------------------------------------
/configs/prometheus.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval:     5s
 3 |   evaluation_interval: 15s
 4 |   external_labels:
 5 |     monitor: 'Prometheus-Monitor'
 6 |   
 7 | rule_files:
 8 |   - '/etc/prometheus-rules/alertrules.nodes'
 9 |   - '/etc/prometheus-rules/alertrules.task'
10 | 
11 | scrape_configs:
12 |   - job_name: 'cadvisor'
13 |     dns_sd_configs:
14 |     - names:
15 |       - 'tasks.cadvisor'
16 |       type: 'A'
17 |       port: 8080
18 | 
19 |   - job_name: 'node-exporter'
20 |     dns_sd_configs:
21 |     - names:
22 |       - 'tasks.node-exporter'
23 |       type: 'A'
24 |       port: 9100
25 | 
26 |   - job_name: 'alertmanager'
27 |     dns_sd_configs:
28 |     - names:
29 |       - 'tasks.alertmanager'
30 |       type: 'A'
31 |       port: 9093
32 | 
33 | alerting:
34 |   alertmanagers:
35 |   - scheme: http
36 |     static_configs:
37 |     - targets:
38 |       - "alertmanager:9093"


--------------------------------------------------------------------------------
/configs/elastalert_supervisord.conf:
--------------------------------------------------------------------------------
 1 | [unix_http_server]
 2 | file=/var/run/elastalert_supervisor.sock
 3 | 
 4 | [supervisord]
 5 | logfile=/var/log/elastalert_supervisord.log
 6 | logfile_maxbytes=1MB
 7 | logfile_backups=2
 8 | loglevel=debug
 9 | nodaemon=false
10 | directory=%(here)s
11 | 
12 | [rpcinterface:supervisor]
13 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
14 | 
15 | [supervisorctl]
16 | serverurl=unix:///var/run/elastalert_supervisor.sock
17 | 
18 | [program:elastalert]
19 | # running globally
20 | command = elastalert --config /opt/config/elastalertconfig.yaml --verbose
21 |                
22 | # (alternative) using virtualenv
23 | # command=/path/to/venv/bin/elastalert --config /path/to/config.yaml --verbose
24 | process_name=elastalert
25 | autorestart=true
26 | startsecs=15
27 | stopsignal=INT
28 | stopasgroup=true
29 | killasgroup=true
30 | stderr_logfile=/var/log/elastalert_stderr.log
31 | stderr_logfile_maxbytes=5MB


--------------------------------------------------------------------------------
/configs/elastalertconfig.yaml:
--------------------------------------------------------------------------------
 1 | # This is the folder that contains the rule yaml files
 2 | # Any .yaml file will be loaded as a rule
 3 | rules_folder: /opt/rules
 4 | 
 5 | # How often ElastAlert will query Elasticsearch
 6 | # The unit can be anything from weeks to seconds
 7 | run_every:
 8 |   seconds: 30
 9 | 
10 | # ElastAlert will buffer results from the most recent
11 | # period of time, in case some log sources are not in real time
12 | buffer_time:
13 |   minutes: 15
14 | 
15 | # The Elasticsearch hostname for metadata writeback
16 | # Note that every rule can have its own Elasticsearch host
17 | es_host: elasticsearch
18 | 
19 | # The Elasticsearch port
20 | es_port: 9200
21 | 
22 | # The index on es_host which is used for metadata storage
23 | # This can be a unmapped index, but it is recommended that you run
24 | # elastalert-create-index to set a mapping
25 | writeback_index: elastalert_status
26 | 
27 | # If an alert fails for some reason, ElastAlert will retry
28 | # sending the alert until this time period has elapsed
29 | alert_time_limit:
30 |   days: 1


--------------------------------------------------------------------------------
/alertfiles/alertrules.task:
--------------------------------------------------------------------------------
 1 | groups:
 2 | - name: nodes
 3 |   rules:
 4 | 
 5 |   - alert: high_cpu_usage_on_container
 6 |     expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m])) by (container_label_com_docker_swarm_task_name,instance) * 100 > 10
 7 |     for: 5m
 8 |     annotations:
 9 |       summary: "HIGH CPU USAGE WARNING: TASK {{ $labels.container_label_com_docker_swarm_task_name }} on {{ $labels.instance }}"
10 |       description: "{{ $labels.container_label_com_docker_swarm_task_name }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%."
11 | 
12 |   - alert: container_eating_memory
13 |     expr: sum(container_memory_usage_bytes{container_label_com_docker_swarm_task_name=~".+"}) by (container_label_com_docker_swarm_task_name,instance,container_label_com_docker_swarm_service_name) > 2800000000
14 |     for: 5m
15 |     annotations:
16 |       summary: "HIGH MEMORY USAGE WARNING: TASK {{ $labels.container_label_com_docker_swarm_task_name }} on {{ $labels.instance }}"
17 |       description: "{{ $labels.container_label_com_docker_swarm_service_name }} is eating up a LOT of memory. Memory consumption of {{ $labels.container_label_com_docker_swarm_service_name }} is at {{ humanize $value}}."


--------------------------------------------------------------------------------
/alertfiles/alertrules.nodes:
--------------------------------------------------------------------------------
 1 | groups:
 2 | - name: nodes
 3 |   rules:
 4 | 
 5 |   - alert: high_cpu_usage_on_node
 6 |     expr: sum(rate(process_cpu_seconds_total[5m])) by (instance) * 100 > 70
 7 |     for: 5m
 8 |     annotations:
 9 |       summary: "HIGH CPU USAGE WARNING ON {{ $labels.instance }}"
10 |       description: "{{ $labels.instance }} ({{ $labels.host }}) is using a LOT of CPU. CPU usage is {{ humanize $value}}%."
11 | 
12 |   - alert: high_memory_usage_on_node
13 |     expr: ((node_memory_MemTotal-node_memory_MemAvailable)/node_memory_MemTotal)*100 > 80
14 |     for: 5m
15 |     annotations:
16 |       summary: "HIGH MEMORY USAGE WARNING TASK ON {{ $labels.host }}"
17 |       description: "{{ $labels.instance }} ({{ $labels.host }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}%."
18 | 
19 |   - alert: high_la_usage_on_node
20 |     expr: node_load5 > 5
21 |     for: 5m
22 |     annotations:
23 |       summary: "HIGH LOAD AVERAGE WARNING ON {{ $labels.instance }}"
24 |       description: "{{ $labels.instance }} ({{ $labels.host }}) has a high load average. CPU usage is {{ humanize $value}}%."
25 | 
26 |   - alert: monitoring_service_down
27 |     expr: up == 0
28 |     for: 5m
29 |     annotations:
30 |       summary: "MONITORING SERVICE DOWN WARNING: NODE {{ $labels.host }}"
31 |       description: "The monitoring service '{{ $labels.job }}' is down."
32 | 
33 |   - alert: node_running_out_of_disk_space
34 |     expr: (node_filesystem_size{fstype="aufs", mountpoint="/"} - node_filesystem_free{fstype="aufs", mountpoint="/"}) * 100/ node_filesystem_size{fstype="aufs", mountpoint="/"} > 80
35 |     for: 5m
36 |     annotations:
37 |       summary: "LOW DISK SPACE WARING: NODE {{ $labels.host }}"
38 |       description: "More than 80% of disk used. Disk usage {{ humanize $value }} GB."
39 | 
40 |   - alert: msteamstestrobin
41 |     expr: node_procs_blocked{instance="10.0.0.55:9100",job="node-exporter"}==0
42 |     for: 10s
43 |     labels:
44 |       alert: msteams    
45 |     annotations:
46 |       summary: "test robin msteams docker"
47 |       description: "jaja lekkah"
48 | 
49 |       
50 | 
51 | 


--------------------------------------------------------------------------------
/composefiles/docker-compose-logging.yml:
--------------------------------------------------------------------------------
  1 | version: "3"
  2 | 
  3 | services:
  4 |     
  5 |     elasticsearch:
  6 |         image: elasticsearch
  7 |         networks:
  8 |             - logging
  9 |         volumes:
 10 |             - /var/dockerdata/elasticsearch/data:/usr/share/elasticsearch/data
 11 |         labels:
 12 |             com.docker.stack.namespace: "logging"
 13 |             com.docker.service.name: "elasticsearch"
 14 |         deploy:
 15 |             mode: replicated
 16 |             update_config:
 17 |                 parallelism: 1
 18 |                 delay: 60s
 19 |             restart_policy:
 20 |                 condition: none
 21 |                 max_attempts: 5
 22 |             labels:
 23 |                 com.docker.stack.namespace: "logging"
 24 |                 com.docker.service.name: "elasticsearch"
 25 | 
 26 |     curator:
 27 |         image: robinong79/docker-elasticsearch-curator:v1.0
 28 |         networks:
 29 |             - logging
 30 |         environment:
 31 |             - ELASTICSEARCH_HOST=elasticsearch
 32 |             - ELASTICSEARCH_PORT=9200
 33 |             - INTERVAL_IN_HOURS=12
 34 |         volumes:
 35 |             - /var/dockerdata/curator:/var/curator
 36 |         labels:
 37 |             com.docker.stack.namespace: "logging"
 38 |             com.docker.service.name: "curator"
 39 |         deploy:
 40 |             mode: replicated
 41 |             update_config:
 42 |                 parallelism: 1
 43 |                 delay: 60s
 44 |             restart_policy:
 45 |                 condition: none
 46 |                 max_attempts: 5
 47 |             labels:
 48 |                 com.docker.stack.namespace: "logging"
 49 |                 com.docker.service.name: "curator"                   
 50 | 
 51 |     logstash:
 52 |         image: logstash:latest
 53 |         depends_on:
 54 |             - elasticsearch
 55 |         ports:
 56 |             - "12201:12201/udp"         
 57 |             - "8080:8082"
 58 |         networks:
 59 |             - logging
 60 |         command: -e "input { gelf {} } output { elasticsearch { hosts => ['elasticsearch']} stdout {} }"
 61 |         labels:
 62 |             com.docker.stack.namespace: "logging"
 63 |             com.docker.service.name: "logstash"
 64 |         deploy:
 65 |             mode: replicated
 66 |             update_config:
 67 |                 parallelism: 1
 68 |                 delay: 60s
 69 |             restart_policy:
 70 |                 condition: none
 71 |                 max_attempts: 5
 72 |             labels:
 73 |                 com.docker.stack.namespace: "logging"
 74 |                 com.docker.service.name: "logstash"
 75 | 
 76 |     kibana:
 77 |         image: kibana
 78 |         depends_on:
 79 |             - elasticsearch        
 80 |         ports:
 81 |             - "5601:5601"          
 82 |         networks:
 83 |             - logging
 84 |         environment:
 85 |             - ELASTICSEARCH_URL=http://elasticsearch:9200
 86 |         labels:
 87 |             com.docker.stack.namespace: "logging"
 88 |             com.docker.service.name: "kibana"
 89 |         deploy:
 90 |             mode: replicated
 91 |             update_config:
 92 |                 parallelism: 1
 93 |                 delay: 60s
 94 |             restart_policy:
 95 |                 condition: none
 96 |                 max_attempts: 5
 97 |             labels:
 98 |                 com.docker.stack.namespace: "logging"
 99 |                 com.docker.service.name: "kibana"                
100 | 
101 | networks:
102 |     logging:
103 |         external: true
104 | 
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # docker-swarm-monitoring
  2 | Monitoring Docker Swarm with Prometheus and ELK stack.
  3 | 
  4 | This repository describes and publishes our setup of monitoring a Docker Swarm with the help of the ELK repository and Prometheus with it's scrapers.
  5 | 
  6 | ## Important note regarding Prometheus 2.x.x
  7 | As of version 2.x.x. of Prometheus they changed the user to nobody. This means that if you use a persistent data directory is has to be chmod'ed to 777.
  8 | In this example this will be the directory /var/dockerdata/prometheus/data.
  9 | 
 10 | ## Prerequisites
 11 | 
 12 | - Ubuntu (16.04 or higher) or RHEL host(s) or CentOS7 hosts
 13 | - Docker v1.13.1 (minimum) (Latest full run on  17.12.0-ce)
 14 |     - Experimental Mode must be set to true (to be able to use "docker deploy" with compose v3 files)
 15 |     - Must run in Swarm Mode
 16 |     - 2 overlay networks ("monitoring" and "logging")
 17 | 
 18 | ## Used components
 19 | 
 20 | We have split up the monitoring into 2 basic parts:
 21 | 
 22 | #### Monitoring Stack
 23 | 
 24 | | Service | Purpose |
 25 | | ------ | ----- |
 26 | | [Prometheus](https://hub.docker.com/r/prom/prometheus/) | Central Metric Collecting |
 27 | | [CAdvisor](https://hub.docker.com/r/google/cadvisor/) | Collecting Container information  |
 28 | | [Node-Exporter](https://hub.docker.com/r/basi/node-exporter/) | Collecting Hardware and OS information |
 29 | | [AlertManager](https://hub.docker.com/r/prom/alertmanager/) | Sending out alerts raised from Prometheus |
 30 | | [Prom2Teams](https://hub.docker.com/r/robinong79/prom2teams/) | Alerter that sends alerts to MSTeams (currently DEV tag till proven) |
 31 | | [Grafana](https://hub.docker.com/r/grafana/grafana/) | Dashboard on top of Prometheus |
 32 | 
 33 | #### Logging Stack
 34 | 
 35 | | Service | Purpose |
 36 | | ------ | ----- |
 37 | | [ElasticSearch](https://hub.docker.com/_/elasticsearch/) | Central storage for Logdata |
 38 | | [LogStash](https://hub.docker.com/_/logstash/) | Log formatter and processing pipeline |
 39 | | [ElastAlert](https://hub.docker.com/r/ivankrizsan/elastalert/) | Sending out alerts raised on Logs |
 40 | | [Curator](https://github.com/elastic/curator/) | Cleaning up Indices in Elasticsearch |
 41 | | [Kibana](https://hub.docker.com/_/kibana/) | Dashboard on top of Elasticsearch |
 42 | 
 43 | ## Schema of the stacks *Needs Updating*
 44 | ![stackflow](/Monitoring_Logging_Stack.png "Monitoring Logging Stack")
 45 | 
 46 | ## Preparation
 47 | 
 48 | #### Directories
 49 | 
 50 | Create the following directories:
 51 | - /var/dockerdata/prometheus
 52 | - /var/dockerdata/elasticsearch/data
 53 | - /var/dockerdata/elastalert/logs
 54 | - /var/dockerdata/alertmanager/data
 55 | - /var/dockerdata/grafana
 56 | - /var/dockerdata/curator
 57 | 
 58 | #### Misc
 59 | 
 60 | Host setting for ElasticSearch (Look [here](https://www.elastic.co/guide/en/elasticsearch/reference/5.0/vm-max-map-count.html) for more information)
 61 | ```
 62 | $ sysctl -w vm.max_map_count=262144
 63 | ```
 64 | 
 65 | Changing mod of persistent data directory (Only for Prometheus 2.x.x)
 66 | 
 67 | 
 68 | ```
 69 | $ chmod 777 /var/dockerdata/prometheus/data
 70 | ```
 71 | 
 72 | #### Docker
 73 | 
 74 | ```
 75 | $ docker swarm init
 76 | $ docker network create -d overlay monitoring
 77 | $ docker network create -d overlay logging
 78 | ```
 79 | 
 80 | #### Compose files
 81 | 
 82 | Make sure to look at the compose files for the volume mappings.
 83 | In this example everything is mapped to /var/dockerdata/{servicename}/{directories}. Adjust this to your own liking or create the same structure as used in this example.
 84 | 
 85 | #### Config Files
 86 | 
 87 | | Config file | Needs to be in <Location> | Remarks |
 88 | | ----- | ----- | ----- | 
 89 | | alertmanagerconfig.yml | /var/dockerdata/alertmanager/ | The alerts go through Slack/MS Teams. Use your Slack Key and Slack channel name and MSTeams Webhook URL for it to work |
 90 | | elastalert_supervisord.conf | /var/dockerdata/elastalert/config | - |
 91 | | elastalertconfig.yaml | /var/dockerdata/elastalert/config | - |
 92 | | prometheus.yml | /var/dockerdata/prometheus | - |
 93 | 
 94 | #### Alert Files
 95 | 
 96 | | Alert file | Needs to be in <Location> | Remarks |
 97 | | ----- | ----- | ----- | 
 98 | | alertrules.nodes | /var/dockerdata/prometheus/rules | - |
 99 | | alertrules.task | /var/dockerdata/prometheus/rules | - |
100 | | elastrules.error.yaml| /var/dockerdata/elastalert/rules | The alerts go through Slack. Use your Slack Key and channel name for it to work |
101 | 
102 | #### Misc files
103 | | File | Needs to be in <Location> | Remarks |
104 | | ----- | ----- | ----- |
105 | | action.yml | /var/dockerdata/curator | Action file for Curator |
106 | 
107 | 
108 | ## Installation
109 | 
110 | #### Logging Stack
111 | 
112 | ```
113 | $ docker deploy --compose-file docker-compose-logging.yml logging
114 | ```
115 | 
116 | #### Monitoring Stack
117 | 
118 | ```
119 | $ docker deploy --compose-file docker-compose-monitoring.yml monitoring
120 | ```
121 | 
122 | #### Container/Service logging to Logstash
123 | 
124 | In order to get the logs from the services/containers to Logstash you need to start them with a different logdriver.
125 | 
126 | Compose file:
127 | 
128 | ```
129 | logging:
130 |     driver: gelf
131 |     options:
132 |         gelf-address: "udp://127.0.0.1:12201"
133 |         tag: "<name of container for filtering in elasticsearch>" 
134 | ```
135 | 
136 | Run command:
137 | 
138 | ```
139 | $ docker run \
140 |          --log-driver=gelf \
141 |          --log-opt gelf-address=udp://127.0.0.1:12201 \
142 |          --log-opt tag="<name of container for filtering in elasticsearch>" \
143 |          ....
144 |          ....
145 | ```     
146 | 
147 | ## Credits and License
148 | 
149 | Basilio Vera's repo's (https://hub.docker.com/u/basi/) have been used for information. This got me a long way with building up a monitoring stack.
150 | Also using his version of Node-Exporter and some alert files so we have access to HOST_NAME and some startup alerts.
151 | He made a really nice Grafana Dashboard too which we used as a base. You can check it out here (https://grafana.net/dashboards/609).
152 | 
153 | The files are free to use and you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation.
154 | 


--------------------------------------------------------------------------------
/composefiles/docker-compose-monitoring.yml:
--------------------------------------------------------------------------------
  1 | version: "3"
  2 | 
  3 | services:
  4 |     
  5 |     prometheus:
  6 |         image: prom/prometheus:v2.1.0
  7 |         ports:
  8 |             - "9090:9090"
  9 |         networks:
 10 |             - monitoring
 11 |         volumes:
 12 |             - /var/dockerdata/prometheus/data:/prometheus
 13 |             - /var/dockerdata/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
 14 |             - /var/dockerdata/prometheus/rules:/etc/prometheus-rules
 15 |         command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.console.libraries=/etc/prometheus/console_libraries --web.console.templates=/etc/prometheus/consoles"
 16 |         labels:
 17 |             com.docker.stack.namespace: "monitoring"
 18 |             com.docker.service.name: "prometheus"
 19 |         deploy:
 20 |             mode: replicated
 21 |             replicas: 1
 22 |             update_config:
 23 |                 parallelism: 1
 24 |                 delay: 60s
 25 |             restart_policy:
 26 |                 condition: on-failure
 27 |                 max_attempts: 5
 28 |             labels:
 29 |                 com.docker.stack.namespace: "monitoring"
 30 |                 com.docker.service.name: "prometheus"
 31 | 
 32 |     cadvisor:
 33 |         image: google/cadvisor:latest
 34 |         ports:
 35 |             - "8081:8080"
 36 |         networks:
 37 |             - monitoring
 38 |         volumes:
 39 |             - /:/rootfs:ro
 40 |             - /var/run:/var/run:rw
 41 |             - /sys:/sys:ro
 42 |             - /var/lib/docker/:/var/lib/docker:ro
 43 |         labels:
 44 |             com.docker.stack.namespace: "monitoring"
 45 |             com.docker.service.name: "cadvisor"
 46 |         deploy:
 47 |             mode: global
 48 |             update_config:
 49 |                 parallelism: 1
 50 |                 delay: 60s
 51 |             restart_policy:
 52 |                 condition: on-failure
 53 |                 max_attempts: 5
 54 |             labels:
 55 |                 com.docker.stack.namespace: "monitoring"
 56 |                 com.docker.service.name: "cadvisor"
 57 | 
 58 |     node-exporter:
 59 |         image: basi/node-exporter:v1.15.0
 60 |         ports:
 61 |             - "9100:9100"
 62 |         networks:
 63 |             - monitoring
 64 |         environment:
 65 |             - HOST_HOSTNAME=/etc/hostname
 66 |         volumes:
 67 |             - /proc:/host/proc
 68 |             - /sys:/host/sys
 69 |             - /:/rootfs
 70 |             - /etc/hostname:/etc/hostname
 71 |         command: [--path.procfs=/host/proc,--path.sysfs=/host/sys,--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc)($$|/)",--collector.textfile.directory=/etc/node-exporter/]
 72 |         labels:
 73 |             com.docker.stack.namespace: "monitoring"
 74 |             com.docker.service.name: "node-exporter"
 75 |         deploy:
 76 |             mode: global
 77 |             resources:
 78 |                 limits:
 79 |                     cpus: '0.10'
 80 |                     memory: 32M           
 81 |             update_config:
 82 |                 parallelism: 1
 83 |                 delay: 60s
 84 |             restart_policy:
 85 |                 condition: on-failure
 86 |                 max_attempts: 5
 87 |             labels:
 88 |                 com.docker.stack.namespace: "monitoring"
 89 |                 com.docker.service.name: "node-exporter"              
 90 | 
 91 |     alertmanager:
 92 |         image: prom/alertmanager
 93 |         ports:
 94 |             - "9093:9093"
 95 |         networks:
 96 |             - monitoring
 97 |         volumes:
 98 |             - /var/dockerdata/alertmanager/alertmanagerconfig.yml:/etc/alertmanager/alertmanagerconfig.yml
 99 |             - /var/dockerdata/alertmanager/data:/etc/alertmanager/data
100 |         command: [--config.file=/etc/alertmanager/alertmanagerconfig.yml,--storage.path=/etc/alertmanager/data]
101 |         labels:
102 |             com.docker.stack.namespace: "monitoring"
103 |             com.docker.service.name: "alertmanager"
104 |         deploy:
105 |             mode: replicated
106 |             replicas: 1
107 |             update_config:
108 |                 parallelism: 1
109 |                 delay: 60s
110 |             restart_policy:
111 |                 condition: on-failure
112 |                 max_attempts: 5
113 |             labels:
114 |                 com.docker.stack.namespace: "monitoring"
115 |                 com.docker.service.name: "alertmanager"
116 |     
117 |     elastalert:
118 |         image: ivankrizsan/elastalert:0.1.8
119 |         ports:
120 |             - "3030:3030"
121 |         networks:
122 |             - logging
123 |         environment:
124 |             - ELASTALERT_CONFIG=elastalertconfig.yaml
125 |             - CONFIG_DIR=/opt/config
126 |             - LOG_DIR=/opt/logs
127 |             - ELASTALERT_CONFIG=/opt/config/elastalertconfig.yaml
128 |             - ELASTICSEARCH_PORT=9200
129 |             - ELASTICSEARCH_HOST=elasticsearch
130 |             - ELASTALERT_SUPERVISOR_CONF=/opt/config/elastalert_supervisord.conf
131 |         volumes:
132 |             - /var/dockerdata/elastalert/config:/opt/config
133 |             - /var/dockerdata/elastalert/rules:/opt/rules
134 |             - /var/dockerdata/elastalert/logs:/opt/logs
135 |         labels:
136 |             com.docker.stack.namespace: "monitoring"
137 |             com.docker.service.name: "elastalert"
138 |         deploy:
139 |             mode: replicated
140 |             replicas: 1
141 |             update_config:
142 |                 parallelism: 1
143 |                 delay: 60s
144 |             restart_policy:
145 |                 condition: on-failure
146 |                 max_attempts: 5
147 |             labels:
148 |                 com.docker.stack.namespace: "monitoring"
149 |                 com.docker.service.name: "elastalert"            
150 | 
151 |     grafana:
152 |         image: grafana/grafana
153 |         ports:
154 |             - "3000:3000"
155 |         networks:
156 |             - monitoring
157 |             - logging
158 |         volumes:
159 |             - /var/dockerdata/grafana:/var/lib/grafana
160 |         command: -e "GF_SERVER_ROOT_URL=http://grafana.local.com GF_SECURITY_ADMIN_PASSWORD=admin PROMETHEUS_ENDPOINT=http://prometheus:9090 ELASTICSEARCH_ENDPOINT=http://elasticsearch:9200"
161 |         labels:
162 |             com.docker.stack.namespace: "monitoring"
163 |             com.docker.service.name: "grafana"
164 |         deploy:
165 |             mode: replicated
166 |             replicas: 1
167 |             update_config:
168 |                 parallelism: 1
169 |                 delay: 60s
170 |             restart_policy:
171 |                 condition: on-failure
172 |                 max_attempts: 5
173 |             labels:
174 |                 com.docker.stack.namespace: "monitoring"
175 |                 com.docker.service.name: "grafana"
176 | 
177 |     prom2teams:
178 |         image: robinong79/prom2teams:dev
179 |         ports:
180 |             - "8089:8089"        
181 |         ports:
182 |             - "8089:8089"
183 |         networks:
184 |             - monitoring
185 |         environment:
186 |             - PROM2TEAMS_HOST=0.0.0.0
187 |             - PROM2TEAMS_PORT=8089
188 |             - PROM2TEAMS_LOGLEVEL=INFO
189 |             - PROM2TEAMS_CONNECTOR=<YOUR MS TEAMS WEBHOOK>
190 |         labels:
191 |             com.docker.stack.namespace: "monitoring"
192 |             com.docker.service.name: "prom2teams"
193 |         deploy:
194 |             mode: replicated
195 |             replicas: 1
196 |             placement:
197 |                 constraints: [node.platform.OS == linux]            
198 |             update_config:
199 |                 parallelism: 1
200 |                 delay: 60s
201 |             restart_policy:
202 |                 condition: on-failure
203 |                 max_attempts: 5
204 |             labels:
205 |                 com.docker.stack.namespace: "monitoring"
206 |                 com.docker.service.name: "prom2teams"
207 | 
208 | 
209 | networks:
210 |     logging:
211 |         external: true
212 |     monitoring:
213 |         external: true
214 | 


--------------------------------------------------------------------------------