├── .gitignore
├── README.md
├── docker-swarm-sd
├── README.md
├── demo.sh
├── prom-config.yml
└── scrape-data.txt
├── jvm-alerting
├── .gitignore
├── README.md
├── alert-template.tmpl
├── alertmanager-config.default.yml
├── demo.sh
├── jmx-exporter-config.yml
├── jmx_prometheus_javaagent-0.3.1.jar
├── prom-alert-rules.yml
└── prom-config.yml.tmpl
├── jvm-monitoring
├── .gitignore
├── README.md
├── demo.sh
├── jmx-exporter-config.yml
├── jmx_prometheus_javaagent-0.3.1.jar
└── prom-config.yml.tmpl
└── tomcat8
├── .gitignore
├── README.md
├── alert-template.tmpl
├── alertmanager-config.default.yml
├── demo.sh
├── jmx-exporter-config.yml
├── jmx_prometheus_javaagent-0.3.1.jar
├── prom-alert-rules.yml
└── prom-config.yml.tmpl
/.gitignore:
--------------------------------------------------------------------------------
1 | prom-data
2 | grafana-data
3 | alertmanager-data
4 |
5 | .idea
6 | *.iml
7 |
8 | .classpath
9 | .project
10 | .settings
11 |
12 | target
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # prometheus-learn
2 |
3 | 学习Prometheus
4 |
5 | * [监控JVM的例子](jvm-monitoring)
6 | * [JVM告警警的例子](jvm-alerting)
7 | * [监控Tomcat的例子](tomcat8)
8 | * [监控Docker Swarm Overlay网络中的容器](docker-swarm-sd)
--------------------------------------------------------------------------------
/docker-swarm-sd/README.md:
--------------------------------------------------------------------------------
1 | # 监控Docker swarm overlay网络中的container
2 |
3 | * 对应文章: [Prometehus监控Docker Swarm Overlay网络中的容器](https://chanjarster.github.io/post/p8s-scrape-container-in-docker-swarm-overlay-network/)
4 | * Prometheus rules: None
5 | * Alertmanager config: None
6 |
7 |
8 | ```bash
9 | # docker run所有容器
10 | ./demo.sh run
11 |
12 | # docker stop所有容器
13 | ./demo.sh stop
14 |
15 | # docker start所有容器
16 | ./demo.sh start
17 |
18 | # docker restart所有容器
19 | ./demo.sh restart
20 |
21 | # 清理prom-data
22 | ./demo.sh clear-data
23 |
24 | # docker rm所有容器
25 | ./demo.sh clear-container
26 | ```
--------------------------------------------------------------------------------
/docker-swarm-sd/demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | CMD_NAME=`basename $0`
3 | COMMAND=$1
4 |
5 | function run-prom
6 | {
7 | docker pull prom/prometheus
8 |
9 | mkdir -p prom-data
10 |
11 | docker run -d \
12 | --name=prometheus \
13 | --network test-overlay \
14 | -p 9090:9090 \
15 | -v $(pwd):/prometheus-config \
16 | -v $(pwd)/prom-data:/prometheus \
17 | prom/prometheus --config.file=/prometheus-config/prom-config.yml
18 |
19 | }
20 |
21 | function run-mocks
22 | {
23 | docker pull chanjarster/prometheus-mock-data:latest
24 |
25 | docker service create \
26 | --name mock \
27 | --replicas 3 \
28 | --network test-overlay \
29 | --limit-memory 96M \
30 | --mount type=bind,src=$(pwd)/scrape-data.txt,dst=/home/java-app/etc/scrape-data.txt \
31 | chanjarster/prometheus-mock-data:latest
32 |
33 | docker run -d \
34 | -v $(pwd)/scrape-data.txt:/home/java-app/etc/scrape-data.txt \
35 | --network test-overlay \
36 | --name standalone-mock \
37 | chanjarster/prometheus-mock-data:latest
38 | }
39 |
40 |
41 | function run
42 | {
43 | echo 'Run all containers'
44 |
45 | run-mocks
46 | run-prom
47 |
48 | echo 'Open browser: http://localhost:9090 for Prometheus'
49 |
50 | }
51 |
52 | function start
53 | {
54 | echo 'Scale service mock to 3'
55 | docker service scale mock=3
56 |
57 | echo 'Start standalone-mock'
58 | docker start standalone-mock
59 |
60 | echo 'Start prometheus'
61 | docker start prometheus
62 | }
63 |
64 | function stop
65 | {
66 | echo 'Stop prometheus'
67 | docker stop prometheus
68 |
69 | echo 'Stop standalone-mock'
70 | docker stop standalone-mock
71 |
72 | echo 'Scale service mock to 0'
73 | docker service scale mock=0
74 | }
75 |
76 | function restart
77 | {
78 |
79 | echo 'Restart all containers'
80 |
81 | docker stop prometheus
82 | docker service scale mock=0
83 | docker service scale mock=3
84 | docker restart standalone-mock
85 | docker start prometheus
86 | }
87 |
88 | function clear-data
89 | {
90 | echo "Clear all containers' data"
91 | rm -rf $(pwd)/prom-data/*
92 | }
93 |
94 | function clear-container
95 | {
96 | echo 'Clear all containers'
97 | stop
98 | docker service rm mock
99 | docker rm prometheus standalone-mock
100 | }
101 |
102 | function reload-config
103 | {
104 | echo 'Reload Prometheus config'
105 | docker exec -t prometheus kill -SIGHUP 1
106 | }
107 |
108 | function usage
109 | {
110 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container"
111 | }
112 |
113 |
114 | case $COMMAND in
115 | 'run')
116 | run
117 | ;;
118 | 'start')
119 | start
120 | ;;
121 | 'stop')
122 | stop
123 | ;;
124 | 'restart')
125 | restart
126 | ;;
127 | 'reload-config')
128 | reload-config
129 | ;;
130 | 'clear-data')
131 | clear-data
132 | ;;
133 | 'clear-container')
134 | clear-container
135 | ;;
136 | *)
137 | usage
138 | exit 1
139 | ;;
140 | esac
141 |
142 | exit 0
--------------------------------------------------------------------------------
/docker-swarm-sd/prom-config.yml:
--------------------------------------------------------------------------------
1 | scrape_configs:
2 | - job_name: 'swarm-service'
3 | scrape_interval: 30s
4 | dns_sd_configs:
5 | - names:
6 | - tasks.mock
7 | - standalone-mock
8 | type: A
9 | port: 8080
10 | relabel_configs:
11 | - source_labels: ['__meta_dns_name']
12 | target_label: 'service'
--------------------------------------------------------------------------------
/docker-swarm-sd/scrape-data.txt:
--------------------------------------------------------------------------------
1 | # HELP x mock metric
2 | # TYPE x gauge
3 | x 1
4 | ---
5 | # HELP x mock metric
6 | # TYPE x gauge
7 | x 2
8 | ---
9 | # HELP x mock metric
10 | # TYPE x gauge
11 | x 3
12 | ---
13 | # HELP x mock metric
14 | # TYPE x gauge
15 | x 4
--------------------------------------------------------------------------------
/jvm-alerting/.gitignore:
--------------------------------------------------------------------------------
1 | prom-config.yml
2 | alertmanager-config.yml
--------------------------------------------------------------------------------
/jvm-alerting/README.md:
--------------------------------------------------------------------------------
1 | # Prometheus + Alertmanager告警JVM异常情况
2 |
3 | * 对应文章: https://chanjarster.github.io/post/prom-alert-jvm/
4 | * jmx-exporter config: [jmx-exporter-config.yml](jmx-exporter-config.yml)
5 | * Prometheus rules: [prom-alert-rules.yml](prom-alert-rules.yml)
6 | * Alertmanager config: [alertmanager-config.yml](alertmanager-config.default.yml)
7 | * Grafana dashboard: [JVM dashboard](https://grafana.com/dashboards/8563)
8 |
9 | 用法:
10 |
11 | 复制`alertmanager-config.default.yml`文件到文件名`alertmanager-config.yml`。设置smtp相关配置,以及下面`user-a`的邮箱。
12 |
13 | **邮箱发送失败问题**
14 |
15 | 中国的企业/个人邮箱几乎都不支持TLS(见这个[issue][issue]),因此请用gmail邮箱。
16 |
17 | ```bash
18 | # docker run所有容器
19 | ./demo.sh run
20 |
21 | # docker stop所有容器
22 | ./demo.sh stop
23 |
24 | # docker start所有容器
25 | ./demo.sh start
26 |
27 | # docker restart所有容器
28 | ./demo.sh restart
29 |
30 | # 清理prom-data和grafana-data
31 | ./demo.sh clear-data
32 |
33 | # docker rm所有容器
34 | ./demo.sh clear-container
35 | ```
36 |
37 | [issue]: https://github.com/prometheus/alertmanager/issues/980#issuecomment-328088587
38 |
--------------------------------------------------------------------------------
/jvm-alerting/alert-template.tmpl:
--------------------------------------------------------------------------------
1 | {{ define "email.default.html" }}
2 |
Summary
3 |
4 | {{ .CommonAnnotations.summary }}
5 |
6 | Description
7 |
8 | {{ .CommonAnnotations.description }}
9 | {{ end}}
--------------------------------------------------------------------------------
/jvm-alerting/alertmanager-config.default.yml:
--------------------------------------------------------------------------------
1 | global:
2 | smtp_smarthost: ''
3 | smtp_from: ''
4 | smtp_auth_username: ''
5 | smtp_auth_password: ''
6 |
7 | # The directory from which notification templates are read.
8 | templates:
9 | - '/alertmanager-config/*.tmpl'
10 |
11 | # The root route on which each incoming alert enters.
12 | route:
13 | # The labels by which incoming alerts are grouped together. For example,
14 | # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
15 | # be batched into a single group.
16 | group_by: ['alertname', 'instance']
17 |
18 | # When a new group of alerts is created by an incoming alert, wait at
19 | # least 'group_wait' to send the initial notification.
20 | # This way ensures that you get multiple alerts for the same group that start
21 | # firing shortly after another are batched together on the first
22 | # notification.
23 | group_wait: 30s
24 |
25 | # When the first notification was sent, wait 'group_interval' to send a batch
26 | # of new alerts that started firing for that group.
27 | group_interval: 5m
28 |
29 | # If an alert has successfully been sent, wait 'repeat_interval' to
30 | # resend them.
31 | repeat_interval: 3h
32 |
33 | # A default receiver
34 | receiver: "user-a"
35 |
36 | # Inhibition rules allow to mute a set of alerts given that another alert is
37 | # firing.
38 | # We use this to mute any warning-level notifications if the same alert is
39 | # already critical.
40 | inhibit_rules:
41 | - source_match:
42 | severity: 'red'
43 | target_match_re:
44 | severity: ^(blue|yellow|orange)$
45 | # Apply inhibition if the alertname and instance is the same.
46 | equal: ['alertname', 'instance']
47 | - source_match:
48 | severity: 'orange'
49 | target_match_re:
50 | severity: ^(blue|yellow)$
51 | # Apply inhibition if the alertname and instance is the same.
52 | equal: ['alertname', 'instance']
53 | - source_match:
54 | severity: 'yellow'
55 | target_match_re:
56 | severity: ^(blue)$
57 | # Apply inhibition if the alertname and instance is the same.
58 | equal: ['alertname', 'instance']
59 |
60 | receivers:
61 | - name: 'user-a'
62 | email_configs:
63 | - to: 'user-a@domain.com'
--------------------------------------------------------------------------------
/jvm-alerting/demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | CMD_NAME=`basename $0`
3 | COMMAND=$1
4 |
5 | function run-prom
6 | {
7 | docker pull prom/prometheus
8 |
9 | mkdir -p prom-data
10 |
11 | HOSTNAME=$(hostname) envsubst < prom-config.yml.tmpl > prom-config.yml
12 |
13 | docker run -d \
14 | --name=prometheus \
15 | -p 9090:9090 \
16 | -v $(pwd):/prometheus-config \
17 | -v $(pwd)/prom-data:/prometheus \
18 | prom/prometheus --config.file=/prometheus-config/prom-config.yml
19 |
20 | }
21 |
22 | function run-tomcats
23 | {
24 | docker pull tomcat:8.5-alpine
25 |
26 | docker run -d \
27 | --name tomcat-1 \
28 | -v $(pwd):/jmx-exporter \
29 | -e CATALINA_OPTS="-Xms32m -Xmx32m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
30 | -p 6060:6060 \
31 | -p 8080:8080 \
32 | tomcat:8.5-alpine
33 |
34 | docker run -d \
35 | --name tomcat-2 \
36 | -v $(pwd):/jmx-exporter \
37 | -e CATALINA_OPTS="-Xms32m -Xmx32m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
38 | -p 6061:6060 \
39 | -p 8081:8080 \
40 | tomcat:8.5-alpine
41 |
42 | docker run -d \
43 | --name tomcat-3 \
44 | -v $(pwd):/jmx-exporter \
45 | -e CATALINA_OPTS="-Xms32m -Xmx32m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
46 | -p 6062:6060 \
47 | -p 8082:8080 \
48 | tomcat:8.5-alpine
49 |
50 | }
51 |
52 | function run-grafana
53 | {
54 | docker pull grafana/grafana
55 |
56 | mkdir -p grafana-data
57 |
58 | docker run -d \
59 | --name=grafana \
60 | -v $(pwd)/grafana-data:/var/lib/grafana \
61 | -p 3000:3000 \
62 | grafana/grafana
63 |
64 | }
65 |
66 | function run-alertmanager
67 | {
68 | docker pull prom/alertmanager:master
69 |
70 | mkdir -p alertmanager-data
71 |
72 | docker run -d \
73 | --name=alertmanager \
74 | -v $(pwd):/alertmanager-config \
75 | -v $(pwd)/alertmanager-data:/etc/alertmanager/data \
76 | -p 9093:9093 \
77 | prom/alertmanager:master --config.file=/alertmanager-config/alertmanager-config.yml
78 | }
79 |
80 | function run
81 | {
82 | echo 'Run all containers'
83 |
84 | run-tomcats
85 | run-alertmanager
86 | run-prom
87 | run-grafana
88 |
89 | echo ''
90 | echo 'Open browser: http://localhost:8080 for tomcat-1'
91 | echo 'Open browser: http://localhost:6060 for tomcat-1 metrics'
92 | echo ''
93 | echo 'Open browser: http://localhost:8081 for tomcat-2'
94 | echo 'Open browser: http://localhost:6061 for tomcat-2 metrics'
95 | echo ''
96 | echo 'Open browser: http://localhost:8082 for tomcat-3'
97 | echo 'Open browser: http://localhost:6062 for tomcat-3 metrics'
98 | echo ''
99 | echo 'Open browser: http://localhost:9090 for Prometheus'
100 | echo ''
101 | echo 'Open browser: http://localhost:3000 for Grafana (default username/password: admin/admin)'
102 | echo ''
103 | echo 'Open browser: http://localhost:9093 for Alertmanager'
104 |
105 | }
106 |
107 | function start
108 | {
109 | echo 'Start all containers'
110 | docker start prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
111 | }
112 |
113 | function stop
114 | {
115 | echo 'Stop all containers'
116 | docker stop prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
117 | }
118 |
119 | function restart
120 | {
121 | echo 'Restart all containers'
122 | docker restart prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
123 | }
124 |
125 | function clear-data
126 | {
127 | echo "Clear all containers' data and restart"
128 | rm -rf $(pwd)/prom-data/* $(pwd)/grafana-data/* $(pwd)/alertmanager-data/*
129 | }
130 |
131 | function clear-container
132 | {
133 | echo 'Clear all containers'
134 | stop
135 | docker rm prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
136 | }
137 |
138 | function reload-config
139 | {
140 | echo 'Reload Prometheus config'
141 | docker exec -t prometheus kill -SIGHUP 1
142 |
143 | echo 'Reload Alertmanager config'
144 | docker exec -t alertmanager kill -SIGHUP 1
145 | }
146 |
147 |
148 | function usage
149 | {
150 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container"
151 | }
152 |
153 |
154 | case $COMMAND in
155 | 'run')
156 | run
157 | ;;
158 | 'start')
159 | start
160 | ;;
161 | 'stop')
162 | stop
163 | ;;
164 | 'restart')
165 | restart
166 | ;;
167 | 'reload-config')
168 | reload-config
169 | ;;
170 | 'clear-data')
171 | clear-data
172 | ;;
173 | 'clear-container')
174 | clear-container
175 | ;;
176 | *)
177 | usage
178 | exit 1
179 | ;;
180 | esac
181 |
182 | exit 0
--------------------------------------------------------------------------------
/jvm-alerting/jmx-exporter-config.yml:
--------------------------------------------------------------------------------
1 | ---
2 | lowercaseOutputLabelNames: true
3 | lowercaseOutputName: true
4 | whitelistObjectNames: ["java.lang:type=OperatingSystem"]
5 | blacklistObjectNames: []
6 | rules:
7 | - pattern: 'java.lang<>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:'
8 | name: os_$1_bytes
9 | type: GAUGE
10 | attrNameSnakeCase: true
11 | - pattern: 'java.lang<>((?!process_cpu_time)\w+):'
12 | name: os_$1
13 | type: GAUGE
14 | attrNameSnakeCase: true
15 |
--------------------------------------------------------------------------------
/jvm-alerting/jmx_prometheus_javaagent-0.3.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chanjarster/prometheus-learn/23e9a7b30373a093cd1b44c70c593a7227340fc5/jvm-alerting/jmx_prometheus_javaagent-0.3.1.jar
--------------------------------------------------------------------------------
/jvm-alerting/prom-alert-rules.yml:
--------------------------------------------------------------------------------
1 | # severity按严重程度由高到低:red、orange、yello、blue
2 | groups:
3 | - name: jvm-alerting
4 | rules:
5 |
6 | # down了超过30秒
7 | - alert: instance-down
8 | expr: up == 0
9 | for: 30s
10 | labels:
11 | severity: yellow
12 | annotations:
13 | summary: "Instance {{ $labels.instance }} down"
14 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds."
15 |
16 | # down了超过1分钟
17 | - alert: instance-down
18 | expr: up == 0
19 | for: 1m
20 | labels:
21 | severity: orange
22 | annotations:
23 | summary: "Instance {{ $labels.instance }} down"
24 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
25 |
26 | # down了超过5分钟
27 | - alert: instance-down
28 | expr: up == 0
29 | for: 5m
30 | labels:
31 | severity: red
32 | annotations:
33 | summary: "Instance {{ $labels.instance }} down"
34 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
35 |
36 | # 堆空间使用超过50%
37 | - alert: heap-usage-too-much
38 | expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 50
39 | for: 1m
40 | labels:
41 | severity: yellow
42 | annotations:
43 | summary: "JVM Instance {{ $labels.instance }} memory usage > 50%"
44 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [heap usage > 50%] for more than 1 minutes. current usage ({{ $value }}%)"
45 |
46 | # 堆空间使用超过80%
47 | - alert: heap-usage-too-much
48 | expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 80
49 | for: 1m
50 | labels:
51 | severity: orange
52 | annotations:
53 | summary: "JVM Instance {{ $labels.instance }} memory usage > 80%"
54 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [heap usage > 80%] for more than 1 minutes. current usage ({{ $value }}%)"
55 |
56 | # 堆空间使用超过90%
57 | - alert: heap-usage-too-much
58 | expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 90
59 | for: 1m
60 | labels:
61 | severity: red
62 | annotations:
63 | summary: "JVM Instance {{ $labels.instance }} memory usage > 90%"
64 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [heap usage > 90%] for more than 1 minutes. current usage ({{ $value }}%)"
65 |
66 | # 在5分钟里,Old GC花费时间超过30%
67 | - alert: old-gc-time-too-much
68 | expr: increase(jvm_gc_collection_seconds_sum{gc="PS MarkSweep"}[5m]) > 5 * 60 * 0.3
69 | for: 5m
70 | labels:
71 | severity: yellow
72 | annotations:
73 | summary: "JVM Instance {{ $labels.instance }} Old GC time > 30% running time"
74 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [Old GC time > 30% running time] for more than 5 minutes. current seconds ({{ $value }}%)"
75 |
76 | # 在5分钟里,Old GC花费时间超过50%
77 | - alert: old-gc-time-too-much
78 | expr: increase(jvm_gc_collection_seconds_sum{gc="PS MarkSweep"}[5m]) > 5 * 60 * 0.5
79 | for: 5m
80 | labels:
81 | severity: orange
82 | annotations:
83 | summary: "JVM Instance {{ $labels.instance }} Old GC time > 50% running time"
84 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [Old GC time > 50% running time] for more than 5 minutes. current seconds ({{ $value }}%)"
85 |
86 | # 在5分钟里,Old GC花费时间超过80%
87 | - alert: old-gc-time-too-much
88 | expr: increase(jvm_gc_collection_seconds_sum{gc="PS MarkSweep"}[5m]) > 5 * 60 * 0.8
89 | for: 5m
90 | labels:
91 | severity: red
92 | annotations:
93 | summary: "JVM Instance {{ $labels.instance }} Old GC time > 80% running time"
94 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [Old GC time > 80% running time] for more than 5 minutes. current seconds ({{ $value }}%)"
95 |
--------------------------------------------------------------------------------
/jvm-alerting/prom-config.yml.tmpl:
--------------------------------------------------------------------------------
1 | scrape_configs:
2 | - job_name: 'prometheus'
3 | static_configs:
4 | - targets:
5 | - 'localhost:9090'
6 |
7 | - job_name: 'java'
8 | scrape_interval: 30s
9 | static_configs:
10 | - targets:
11 | - '$HOSTNAME:6060'
12 | - '$HOSTNAME:6061'
13 | - '$HOSTNAME:6062'
14 |
15 | alerting:
16 | alertmanagers:
17 | - static_configs:
18 | - targets:
19 | - '$HOSTNAME:9093'
20 |
21 | rule_files:
22 | - '/prometheus-config/prom-alert-rules.yml'
--------------------------------------------------------------------------------
/jvm-monitoring/.gitignore:
--------------------------------------------------------------------------------
1 | prom-config.yml
--------------------------------------------------------------------------------
/jvm-monitoring/README.md:
--------------------------------------------------------------------------------
1 | # Prometheus监控JVM
2 |
3 | * 对应文章: https://chanjarster.github.io/post/prom-grafana-jvm/
4 | * jmx-exporter config: [jmx-exporter-config.yml](jmx-exporter-config.yml)
5 | * Prometheus rules: None
6 | * Alertmanager config: None
7 | * Grafana dashboard: [JVM dashboard](https://grafana.com/dashboards/8563)
8 |
9 |
10 | ```bash
11 | # docker run所有容器
12 | ./demo.sh run
13 |
14 | # docker stop所有容器
15 | ./demo.sh stop
16 |
17 | # docker start所有容器
18 | ./demo.sh start
19 |
20 | # docker restart所有容器
21 | ./demo.sh restart
22 |
23 | # 清理prom-data和grafana-data
24 | ./demo.sh clear-data
25 |
26 | # docker rm所有容器
27 | ./demo.sh clear-container
28 | ```
--------------------------------------------------------------------------------
/jvm-monitoring/demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | CMD_NAME=`basename $0`
3 | COMMAND=$1
4 |
5 | function run-prom
6 | {
7 | docker pull prom/prometheus
8 |
9 | mkdir -p prom-data
10 |
11 | HOSTNAME=$(hostname) envsubst < prom-config.yml.tmpl > prom-config.yml
12 |
13 | docker run -d \
14 | --name=prometheus \
15 | -p 9090:9090 \
16 | -v $(pwd):/prometheus-config \
17 | -v $(pwd)/prom-data:/prometheus \
18 | prom/prometheus --config.file=/prometheus-config/prom-config.yml
19 |
20 | }
21 |
22 | function run-tomcats
23 | {
24 | docker pull tomcat:8.5-alpine
25 |
26 | docker run -d \
27 | --name tomcat-1 \
28 | -v $(pwd):/jmx-exporter \
29 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
30 | -p 6060:6060 \
31 | -p 8080:8080 \
32 | tomcat:8.5-alpine
33 |
34 | docker run -d \
35 | --name tomcat-2 \
36 | -v $(pwd):/jmx-exporter \
37 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
38 | -p 6061:6060 \
39 | -p 8081:8080 \
40 | tomcat:8.5-alpine
41 |
42 | docker run -d \
43 | --name tomcat-3 \
44 | -v $(pwd):/jmx-exporter \
45 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
46 | -p 6062:6060 \
47 | -p 8082:8080 \
48 | tomcat:8.5-alpine
49 |
50 | }
51 |
52 | function run-grafana
53 | {
54 | docker pull grafana/grafana
55 |
56 | mkdir -p grafana-data
57 |
58 | docker run -d \
59 | --name=grafana \
60 | -v $(pwd)/grafana-data:/var/lib/grafana \
61 | -p 3000:3000 \
62 | grafana/grafana
63 |
64 | }
65 |
66 | function run
67 | {
68 | echo 'Run all containers'
69 |
70 | run-tomcats
71 | run-prom
72 | run-grafana
73 |
74 | echo ''
75 | echo 'Open browser: http://localhost:8080 for tomcat-1'
76 | echo 'Open browser: http://localhost:6060 for tomcat-1 metrics'
77 | echo ''
78 | echo 'Open browser: http://localhost:8081 for tomcat-2'
79 | echo 'Open browser: http://localhost:6061 for tomcat-2 metrics'
80 | echo ''
81 | echo 'Open browser: http://localhost:8082 for tomcat-3'
82 | echo 'Open browser: http://localhost:6062 for tomcat-3 metrics'
83 | echo ''
84 | echo 'Open browser: http://localhost:9090 for Prometheus'
85 | echo ''
86 | echo 'Open browser: http://localhost:3000 for Grafana (default username/password: admin/admin)'
87 |
88 | }
89 |
90 | function start
91 | {
92 | echo 'Start all containers'
93 | docker start prometheus grafana tomcat-1 tomcat-2 tomcat-3
94 | }
95 |
96 | function stop
97 | {
98 | echo 'Stop all containers'
99 | docker stop prometheus grafana tomcat-1 tomcat-2 tomcat-3
100 | }
101 |
102 | function restart
103 | {
104 | echo 'Restart all containers'
105 | docker restart prometheus grafana tomcat-1 tomcat-2 tomcat-3
106 | }
107 |
108 | function clear-data
109 | {
110 | echo "Clear all containers' data and restart"
111 | rm -rf $(pwd)/prom-data/* $(pwd)/grafana-data/*
112 | }
113 |
114 | function clear-container
115 | {
116 | echo 'Clear all containers'
117 | stop
118 | docker rm prometheus grafana tomcat-1 tomcat-2 tomcat-3
119 | }
120 |
121 | function reload-config
122 | {
123 | echo 'Reload Prometheus config'
124 | docker exec -t prometheus kill -SIGHUP 1
125 | }
126 |
127 | function usage
128 | {
129 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container"
130 | }
131 |
132 |
133 | case $COMMAND in
134 | 'run')
135 | run
136 | ;;
137 | 'start')
138 | start
139 | ;;
140 | 'stop')
141 | stop
142 | ;;
143 | 'restart')
144 | restart
145 | ;;
146 | 'reload-config')
147 | reload-config
148 | ;;
149 | 'clear-data')
150 | clear-data
151 | ;;
152 | 'clear-container')
153 | clear-container
154 | ;;
155 | *)
156 | usage
157 | exit 1
158 | ;;
159 | esac
160 |
161 | exit 0
--------------------------------------------------------------------------------
/jvm-monitoring/jmx-exporter-config.yml:
--------------------------------------------------------------------------------
1 | ---
2 | lowercaseOutputLabelNames: true
3 | lowercaseOutputName: true
4 | whitelistObjectNames: ["java.lang:type=OperatingSystem"]
5 | #blacklistObjectNames: ["*:*"]
6 | rules:
7 | # - pattern: 'java.lang<>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:'
8 | # name: os_$1_bytes
9 | # type: GAUGE
10 | # attrNameSnakeCase: true
11 | - pattern: 'java.lang<>((?!process_cpu_time)\w+):'
12 | name: os_$1
13 | type: GAUGE
14 | attrNameSnakeCase: true
15 |
--------------------------------------------------------------------------------
/jvm-monitoring/jmx_prometheus_javaagent-0.3.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chanjarster/prometheus-learn/23e9a7b30373a093cd1b44c70c593a7227340fc5/jvm-monitoring/jmx_prometheus_javaagent-0.3.1.jar
--------------------------------------------------------------------------------
/jvm-monitoring/prom-config.yml.tmpl:
--------------------------------------------------------------------------------
1 | scrape_configs:
2 | - job_name: 'prometheus'
3 | static_configs:
4 | - targets:
5 | - 'localhost:9090'
6 |
7 | - job_name: 'java'
8 | scrape_interval: 30s
9 | static_configs:
10 | - targets:
11 | - '$HOSTNAME:6060'
12 | - '$HOSTNAME:6061'
13 | - '$HOSTNAME:6062'
--------------------------------------------------------------------------------
/tomcat8/.gitignore:
--------------------------------------------------------------------------------
1 | prom-config.yml
2 | alertmanager-config.yml
--------------------------------------------------------------------------------
/tomcat8/README.md:
--------------------------------------------------------------------------------
1 | # Prometheus + Alertmanager告警Tomcat异常情况
2 |
3 | * 对应文章: 无
4 | * jmx-exporter config: [jmx-exporter-config.yml](jmx-exporter-config.yml)
5 | * Prometheus rules: [prom-alert-rules.yml](prom-alert-rules.yml)
6 | * Alertmanager config: [alertmanager-config.yml](alertmanager-config.default.yml)
7 | * Grafana dashboard: [Tomcat dashboard](https://grafana.com/dashboards/8704)
8 |
9 | 用法:
10 |
11 | 复制`alertmanager-config.default.yml`文件到文件名`alertmanager-config.yml`。设置smtp相关配置,以及下面`user-a`的邮箱。
12 |
13 | **邮箱发送失败问题**
14 |
15 | 中国的企业/个人邮箱几乎都不支持TLS(见这个[issue][issue]),因此请用gmail邮箱。
16 |
17 | ```bash
18 | # docker run所有容器
19 | ./demo.sh run
20 |
21 | # docker stop所有容器
22 | ./demo.sh stop
23 |
24 | # docker start所有容器
25 | ./demo.sh start
26 |
27 | # docker restart所有容器
28 | ./demo.sh restart
29 |
30 | # 清理prom-data和grafana-data
31 | ./demo.sh clear-data
32 |
33 | # docker rm所有容器
34 | ./demo.sh clear-container
35 | ```
36 |
37 | [issue]: https://github.com/prometheus/alertmanager/issues/980#issuecomment-328088587
38 |
--------------------------------------------------------------------------------
/tomcat8/alert-template.tmpl:
--------------------------------------------------------------------------------
1 | {{ define "email.default.html" }}
2 | Summary
3 |
4 | {{ .CommonAnnotations.summary }}
5 |
6 | Description
7 |
8 | {{ .CommonAnnotations.description }}
9 | {{ end}}
--------------------------------------------------------------------------------
/tomcat8/alertmanager-config.default.yml:
--------------------------------------------------------------------------------
1 | global:
2 | smtp_smarthost: ''
3 | smtp_from: ''
4 | smtp_auth_username: ''
5 | smtp_auth_password: ''
6 |
7 | # The directory from which notification templates are read.
8 | templates:
9 | - '/alertmanager-config/*.tmpl'
10 |
11 | # The root route on which each incoming alert enters.
12 | route:
13 | # The labels by which incoming alerts are grouped together. For example,
14 | # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
15 | # be batched into a single group.
16 | group_by: ['alertname', 'instance']
17 |
18 | # When a new group of alerts is created by an incoming alert, wait at
19 | # least 'group_wait' to send the initial notification.
20 | # This way ensures that you get multiple alerts for the same group that start
21 | # firing shortly after another are batched together on the first
22 | # notification.
23 | group_wait: 30s
24 |
25 | # When the first notification was sent, wait 'group_interval' to send a batch
26 | # of new alerts that started firing for that group.
27 | group_interval: 5m
28 |
29 | # If an alert has successfully been sent, wait 'repeat_interval' to
30 | # resend them.
31 | repeat_interval: 3h
32 |
33 | # A default receiver
34 | receiver: "user-a"
35 |
36 | # Inhibition rules allow to mute a set of alerts given that another alert is
37 | # firing.
38 | # We use this to mute any warning-level notifications if the same alert is
39 | # already critical.
40 | inhibit_rules:
41 | - source_match:
42 | severity: 'red'
43 | target_match_re:
44 | severity: ^(blue|yellow|orange)$
45 | # Apply inhibition if the alertname and instance is the same.
46 | equal: ['alertname', 'instance']
47 | - source_match:
48 | severity: 'orange'
49 | target_match_re:
50 | severity: ^(blue|yellow)$
51 | # Apply inhibition if the alertname and instance is the same.
52 | equal: ['alertname', 'instance']
53 | - source_match:
54 | severity: 'yellow'
55 | target_match_re:
56 | severity: ^(blue)$
57 | # Apply inhibition if the alertname and instance is the same.
58 | equal: ['alertname', 'instance']
59 |
60 | receivers:
61 | - name: 'user-a'
62 | email_configs:
63 | - to: 'user-a@domain.com'
--------------------------------------------------------------------------------
/tomcat8/demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | CMD_NAME=`basename $0`
3 | COMMAND=$1
4 |
5 | function run-prom
6 | {
7 | docker pull prom/prometheus
8 |
9 | mkdir -p prom-data
10 |
11 | HOSTNAME=$(hostname) envsubst < prom-config.yml.tmpl > prom-config.yml
12 |
13 | docker run -d \
14 | --name=prometheus \
15 | -p 9090:9090 \
16 | -v $(pwd):/prometheus-config \
17 | -v $(pwd)/prom-data:/prometheus \
18 | prom/prometheus --config.file=/prometheus-config/prom-config.yml
19 |
20 | }
21 |
22 | function run-tomcats
23 | {
24 | docker pull tomcat:8.5-alpine
25 |
26 | docker run -d \
27 | --name tomcat-1 \
28 | -v $(pwd):/jmx-exporter \
29 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1100 -Dcom.sun.management.jmxremote.rmi.port=1100 -Djava.rmi.server.hostname=localhost" \
30 | -p 6060:6060 \
31 | -p 8080:8080 \
32 | -p 1100:1100 \
33 | tomcat:8.5-alpine
34 |
35 | docker run -d \
36 | --name tomcat-2 \
37 | -v $(pwd):/jmx-exporter \
38 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
39 | -p 6061:6060 \
40 | -p 8081:8080 \
41 | tomcat:8.5-alpine
42 |
43 | docker run -d \
44 | --name tomcat-3 \
45 | -v $(pwd):/jmx-exporter \
46 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \
47 | -p 6062:6060 \
48 | -p 8082:8080 \
49 | tomcat:8.5-alpine
50 |
51 | }
52 |
53 | function run-grafana
54 | {
55 | docker pull grafana/grafana
56 |
57 | mkdir -p grafana-data
58 |
59 | docker run -d \
60 | --name=grafana \
61 | -v $(pwd)/grafana-data:/var/lib/grafana \
62 | -p 3000:3000 \
63 | grafana/grafana
64 |
65 | }
66 |
67 | function run-alertmanager
68 | {
69 | docker pull prom/alertmanager:master
70 |
71 | mkdir -p alertmanager-data
72 |
73 | docker run -d \
74 | --name=alertmanager \
75 | -v $(pwd):/alertmanager-config \
76 | -v $(pwd)/alertmanager-data:/etc/alertmanager/data \
77 | -p 9093:9093 \
78 | prom/alertmanager:master --config.file=/alertmanager-config/alertmanager-config.yml
79 | }
80 |
81 | function run
82 | {
83 | echo 'Run all containers'
84 |
85 | run-tomcats
86 | run-alertmanager
87 | run-prom
88 | run-grafana
89 |
90 | echo ''
91 | echo 'Open browser: http://localhost:8080 for tomcat-1'
92 | echo 'Open browser: http://localhost:6060 for tomcat-1 metrics'
93 | echo ''
94 | echo 'Open browser: http://localhost:8081 for tomcat-2'
95 | echo 'Open browser: http://localhost:6061 for tomcat-2 metrics'
96 | echo ''
97 | echo 'Open browser: http://localhost:8082 for tomcat-3'
98 | echo 'Open browser: http://localhost:6062 for tomcat-3 metrics'
99 | echo ''
100 | echo 'Open browser: http://localhost:9090 for Prometheus'
101 | echo ''
102 | echo 'Open browser: http://localhost:3000 for Grafana (default username/password: admin/admin)'
103 | echo ''
104 | echo 'Open browser: http://localhost:9093 for Alertmanager'
105 |
106 | }
107 |
108 | function start
109 | {
110 | echo 'Start all containers'
111 | docker start prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
112 | }
113 |
114 | function stop
115 | {
116 | echo 'Stop all containers'
117 | docker stop prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
118 | }
119 |
120 | function restart
121 | {
122 | echo 'Restart all containers'
123 | docker restart prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
124 | }
125 |
126 | function clear-data
127 | {
128 | echo "Clear all containers' data and restart"
129 | rm -rf $(pwd)/prom-data/* $(pwd)/grafana-data/* $(pwd)/alertmanager-data/*
130 | }
131 |
132 | function clear-container
133 | {
134 | echo 'Clear all containers'
135 | stop
136 | docker rm prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3
137 | }
138 |
139 | function reload-config
140 | {
141 | echo 'Reload Prometheus config'
142 | docker exec -t prometheus kill -SIGHUP 1
143 |
144 | echo 'Reload Alertmanager config'
145 | docker exec -t alertmanager kill -SIGHUP 1
146 | }
147 |
148 |
149 | function usage
150 | {
151 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container"
152 | }
153 |
154 |
155 | case $COMMAND in
156 | 'run')
157 | run
158 | ;;
159 | 'start')
160 | start
161 | ;;
162 | 'stop')
163 | stop
164 | ;;
165 | 'restart')
166 | restart
167 | ;;
168 | 'reload-config')
169 | reload-config
170 | ;;
171 | 'clear-data')
172 | clear-data
173 | ;;
174 | 'clear-container')
175 | clear-container
176 | ;;
177 | *)
178 | usage
179 | exit 1
180 | ;;
181 | esac
182 |
183 | exit 0
--------------------------------------------------------------------------------
/tomcat8/jmx-exporter-config.yml:
--------------------------------------------------------------------------------
1 | ---
2 | lowercaseOutputLabelNames: true
3 | lowercaseOutputName: true
4 | whitelistObjectNames: ["java.lang:type=OperatingSystem", "Catalina:*"]
5 | blacklistObjectNames: []
6 | rules:
7 | - pattern: 'java.lang<>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:'
8 | name: os_$1_bytes
9 | type: GAUGE
10 | attrNameSnakeCase: true
11 | - pattern: 'java.lang<>((?!process_cpu_time)\w+):'
12 | name: os_$1
13 | type: GAUGE
14 | attrNameSnakeCase: true
15 | - pattern: 'Catalina<>(\w+):'
16 | name: tomcat_$3_total
17 | labels:
18 | port: "$2"
19 | protocol: "$1"
20 | help: Tomcat global $3
21 | type: COUNTER
22 | - pattern: 'Catalina<>serverInfo: (.+)'
23 | name: tomcat_serverinfo
24 | value: 1
25 | labels:
26 | serverInfo: "$1"
27 | type: COUNTER
28 | - pattern: 'Catalina<>(requestCount|processingTime|errorCount):'
29 | name: tomcat_servlet_$3_total
30 | labels:
31 | module: "$1"
32 | servlet: "$2"
33 | help: Tomcat servlet $3 total
34 | type: COUNTER
35 | - pattern: 'Catalina<>(currentThreadCount|currentThreadsBusy|keepAliveCount|connectionCount|acceptCount|acceptorThreadCount|pollerThreadCount|maxThreads|minSpareThreads):'
36 | name: tomcat_threadpool_$3
37 | labels:
38 | port: "$2"
39 | protocol: "$1"
40 | help: Tomcat threadpool $3
41 | type: GAUGE
42 | - pattern: 'Catalina<>(processingTime|sessionCounter|rejectedSessions|expiredSessions):'
43 | name: tomcat_session_$3_total
44 | labels:
45 | context: "$2"
46 | host: "$1"
47 | help: Tomcat session $3 total
48 | type: COUNTER
--------------------------------------------------------------------------------
/tomcat8/jmx_prometheus_javaagent-0.3.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chanjarster/prometheus-learn/23e9a7b30373a093cd1b44c70c593a7227340fc5/tomcat8/jmx_prometheus_javaagent-0.3.1.jar
--------------------------------------------------------------------------------
/tomcat8/prom-alert-rules.yml:
--------------------------------------------------------------------------------
1 | # severity按严重程度由高到低:red、orange、yello、blue
2 | groups:
3 | - name: jvm-alerting
4 | rules:
5 |
6 | # error count超过request count的 > 30%
7 | - alert: error-too-much
8 | expr: idelta(tomcat_servlet_errorcount_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 0.3
9 | for: 5m
10 | labels:
11 | severity: yellow
12 | annotations:
13 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] error over 30%"
14 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [error / request > 30%] for more than 5 minutes."
15 |
16 | # error count超过request count的 > 50%
17 | - alert: error-too-much
18 | expr: idelta(tomcat_servlet_errorcount_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 0.5
19 | for: 5m
20 | labels:
21 | severity: orange
22 | annotations:
23 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] error over 50%"
24 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [error / request > 50%] for more than 5 minutes."
25 |
26 | # error count超过request count的 > 80%
27 | - alert: error-too-much
28 | expr: idelta(tomcat_servlet_errorcount_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 0.8
29 | for: 5m
30 | labels:
31 | severity: red
32 | annotations:
33 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] error over 80%"
34 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [error / request > 80%] for more than 5 minutes."
35 |
36 | # 平均处理请求时间超过 3s
37 | - alert: processing-time-too-slow
38 | expr: idelta(tomcat_servlet_processingtime_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 3 * 1000
39 | for: 5m
40 | labels:
41 | severity: yellow
42 | annotations:
43 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] processing time over 3 seconds"
44 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [avg processing time > 3 seconds] for more than 5 minutes. Current value ({{ $value }})"
45 |
46 | # 平均处理请求时间超过 5s
47 | - alert: processing-time-too-slow
48 | expr: idelta(tomcat_servlet_processingtime_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 5 * 1000
49 | for: 5m
50 | labels:
51 | severity: orange
52 | annotations:
53 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] processing time over 5 seconds"
54 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [avg processing time > 5 seconds] for more than 5 minutes. Current value ({{ $value }})"
55 |
56 | # 平均处理请求时间超过 10s
57 | - alert: processing-time-too-slow
58 | expr: idelta(tomcat_servlet_processingtime_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 10 * 1000
59 | for: 5m
60 | labels:
61 | severity: red
62 | annotations:
63 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] processing time over 10 seconds"
64 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [avg processing time > 10 seconds] for more than 5 minutes. Current value ({{ $value }})"
65 |
--------------------------------------------------------------------------------
/tomcat8/prom-config.yml.tmpl:
--------------------------------------------------------------------------------
1 | scrape_configs:
2 | - job_name: 'prometheus'
3 | static_configs:
4 | - targets:
5 | - 'localhost:9090'
6 |
7 | - job_name: 'java'
8 | scrape_interval: 30s
9 | static_configs:
10 | - targets:
11 | - '$HOSTNAME:6060'
12 | - '$HOSTNAME:6061'
13 | - '$HOSTNAME:6062'
14 |
15 | alerting:
16 | alertmanagers:
17 | - static_configs:
18 | - targets:
19 | - '$HOSTNAME:9093'
20 |
21 | rule_files:
22 | - '/prometheus-config/prom-alert-rules.yml'
--------------------------------------------------------------------------------