├── .gitignore ├── README.md ├── docker-swarm-sd ├── README.md ├── demo.sh ├── prom-config.yml └── scrape-data.txt ├── jvm-alerting ├── .gitignore ├── README.md ├── alert-template.tmpl ├── alertmanager-config.default.yml ├── demo.sh ├── jmx-exporter-config.yml ├── jmx_prometheus_javaagent-0.3.1.jar ├── prom-alert-rules.yml └── prom-config.yml.tmpl ├── jvm-monitoring ├── .gitignore ├── README.md ├── demo.sh ├── jmx-exporter-config.yml ├── jmx_prometheus_javaagent-0.3.1.jar └── prom-config.yml.tmpl └── tomcat8 ├── .gitignore ├── README.md ├── alert-template.tmpl ├── alertmanager-config.default.yml ├── demo.sh ├── jmx-exporter-config.yml ├── jmx_prometheus_javaagent-0.3.1.jar ├── prom-alert-rules.yml └── prom-config.yml.tmpl /.gitignore: -------------------------------------------------------------------------------- 1 | prom-data 2 | grafana-data 3 | alertmanager-data 4 | 5 | .idea 6 | *.iml 7 | 8 | .classpath 9 | .project 10 | .settings 11 | 12 | target 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # prometheus-learn 2 | 3 | 学习Prometheus 4 | 5 | * [监控JVM的例子](jvm-monitoring) 6 | * [JVM告警警的例子](jvm-alerting) 7 | * [监控Tomcat的例子](tomcat8) 8 | * [监控Docker Swarm Overlay网络中的容器](docker-swarm-sd) -------------------------------------------------------------------------------- /docker-swarm-sd/README.md: -------------------------------------------------------------------------------- 1 | # 监控Docker swarm overlay网络中的container 2 | 3 | * 对应文章: [Prometehus监控Docker Swarm Overlay网络中的容器](https://chanjarster.github.io/post/p8s-scrape-container-in-docker-swarm-overlay-network/) 4 | * Prometheus rules: None 5 | * Alertmanager config: None 6 | 7 | 8 | ```bash 9 | # docker run所有容器 10 | ./demo.sh run 11 | 12 | # docker stop所有容器 13 | ./demo.sh stop 14 | 15 | # docker start所有容器 16 | ./demo.sh start 17 | 18 | # docker restart所有容器 19 | ./demo.sh restart 20 | 21 | # 清理prom-data 22 | ./demo.sh clear-data 23 | 24 | # docker rm所有容器 25 | ./demo.sh clear-container 26 | ``` -------------------------------------------------------------------------------- /docker-swarm-sd/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CMD_NAME=`basename $0` 3 | COMMAND=$1 4 | 5 | function run-prom 6 | { 7 | docker pull prom/prometheus 8 | 9 | mkdir -p prom-data 10 | 11 | docker run -d \ 12 | --name=prometheus \ 13 | --network test-overlay \ 14 | -p 9090:9090 \ 15 | -v $(pwd):/prometheus-config \ 16 | -v $(pwd)/prom-data:/prometheus \ 17 | prom/prometheus --config.file=/prometheus-config/prom-config.yml 18 | 19 | } 20 | 21 | function run-mocks 22 | { 23 | docker pull chanjarster/prometheus-mock-data:latest 24 | 25 | docker service create \ 26 | --name mock \ 27 | --replicas 3 \ 28 | --network test-overlay \ 29 | --limit-memory 96M \ 30 | --mount type=bind,src=$(pwd)/scrape-data.txt,dst=/home/java-app/etc/scrape-data.txt \ 31 | chanjarster/prometheus-mock-data:latest 32 | 33 | docker run -d \ 34 | -v $(pwd)/scrape-data.txt:/home/java-app/etc/scrape-data.txt \ 35 | --network test-overlay \ 36 | --name standalone-mock \ 37 | chanjarster/prometheus-mock-data:latest 38 | } 39 | 40 | 41 | function run 42 | { 43 | echo 'Run all containers' 44 | 45 | run-mocks 46 | run-prom 47 | 48 | echo 'Open browser: http://localhost:9090 for Prometheus' 49 | 50 | } 51 | 52 | function start 53 | { 54 | echo 'Scale service mock to 3' 55 | docker service scale mock=3 56 | 57 | echo 'Start standalone-mock' 58 | docker start standalone-mock 59 | 60 | echo 'Start prometheus' 61 | docker start prometheus 62 | } 63 | 64 | function stop 65 | { 66 | echo 'Stop prometheus' 67 | docker stop prometheus 68 | 69 | echo 'Stop standalone-mock' 70 | docker stop standalone-mock 71 | 72 | echo 'Scale service mock to 0' 73 | docker service scale mock=0 74 | } 75 | 76 | function restart 77 | { 78 | 79 | echo 'Restart all containers' 80 | 81 | docker stop prometheus 82 | docker service scale mock=0 83 | docker service scale mock=3 84 | docker restart standalone-mock 85 | docker start prometheus 86 | } 87 | 88 | function clear-data 89 | { 90 | echo "Clear all containers' data" 91 | rm -rf $(pwd)/prom-data/* 92 | } 93 | 94 | function clear-container 95 | { 96 | echo 'Clear all containers' 97 | stop 98 | docker service rm mock 99 | docker rm prometheus standalone-mock 100 | } 101 | 102 | function reload-config 103 | { 104 | echo 'Reload Prometheus config' 105 | docker exec -t prometheus kill -SIGHUP 1 106 | } 107 | 108 | function usage 109 | { 110 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container" 111 | } 112 | 113 | 114 | case $COMMAND in 115 | 'run') 116 | run 117 | ;; 118 | 'start') 119 | start 120 | ;; 121 | 'stop') 122 | stop 123 | ;; 124 | 'restart') 125 | restart 126 | ;; 127 | 'reload-config') 128 | reload-config 129 | ;; 130 | 'clear-data') 131 | clear-data 132 | ;; 133 | 'clear-container') 134 | clear-container 135 | ;; 136 | *) 137 | usage 138 | exit 1 139 | ;; 140 | esac 141 | 142 | exit 0 -------------------------------------------------------------------------------- /docker-swarm-sd/prom-config.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'swarm-service' 3 | scrape_interval: 30s 4 | dns_sd_configs: 5 | - names: 6 | - tasks.mock 7 | - standalone-mock 8 | type: A 9 | port: 8080 10 | relabel_configs: 11 | - source_labels: ['__meta_dns_name'] 12 | target_label: 'service' -------------------------------------------------------------------------------- /docker-swarm-sd/scrape-data.txt: -------------------------------------------------------------------------------- 1 | # HELP x mock metric 2 | # TYPE x gauge 3 | x 1 4 | --- 5 | # HELP x mock metric 6 | # TYPE x gauge 7 | x 2 8 | --- 9 | # HELP x mock metric 10 | # TYPE x gauge 11 | x 3 12 | --- 13 | # HELP x mock metric 14 | # TYPE x gauge 15 | x 4 -------------------------------------------------------------------------------- /jvm-alerting/.gitignore: -------------------------------------------------------------------------------- 1 | prom-config.yml 2 | alertmanager-config.yml -------------------------------------------------------------------------------- /jvm-alerting/README.md: -------------------------------------------------------------------------------- 1 | # Prometheus + Alertmanager告警JVM异常情况 2 | 3 | * 对应文章: https://chanjarster.github.io/post/prom-alert-jvm/ 4 | * jmx-exporter config: [jmx-exporter-config.yml](jmx-exporter-config.yml) 5 | * Prometheus rules: [prom-alert-rules.yml](prom-alert-rules.yml) 6 | * Alertmanager config: [alertmanager-config.yml](alertmanager-config.default.yml) 7 | * Grafana dashboard: [JVM dashboard](https://grafana.com/dashboards/8563) 8 | 9 | 用法: 10 | 11 | 复制`alertmanager-config.default.yml`文件到文件名`alertmanager-config.yml`。设置smtp相关配置,以及下面`user-a`的邮箱。 12 | 13 | **邮箱发送失败问题** 14 | 15 | 中国的企业/个人邮箱几乎都不支持TLS(见这个[issue][issue]),因此请用gmail邮箱。 16 | 17 | ```bash 18 | # docker run所有容器 19 | ./demo.sh run 20 | 21 | # docker stop所有容器 22 | ./demo.sh stop 23 | 24 | # docker start所有容器 25 | ./demo.sh start 26 | 27 | # docker restart所有容器 28 | ./demo.sh restart 29 | 30 | # 清理prom-data和grafana-data 31 | ./demo.sh clear-data 32 | 33 | # docker rm所有容器 34 | ./demo.sh clear-container 35 | ``` 36 | 37 | [issue]: https://github.com/prometheus/alertmanager/issues/980#issuecomment-328088587 38 | -------------------------------------------------------------------------------- /jvm-alerting/alert-template.tmpl: -------------------------------------------------------------------------------- 1 | {{ define "email.default.html" }} 2 |

Summary

3 | 4 |

{{ .CommonAnnotations.summary }}

5 | 6 |

Description

7 | 8 |

{{ .CommonAnnotations.description }}

9 | {{ end}} -------------------------------------------------------------------------------- /jvm-alerting/alertmanager-config.default.yml: -------------------------------------------------------------------------------- 1 | global: 2 | smtp_smarthost: '' 3 | smtp_from: '' 4 | smtp_auth_username: '' 5 | smtp_auth_password: '' 6 | 7 | # The directory from which notification templates are read. 8 | templates: 9 | - '/alertmanager-config/*.tmpl' 10 | 11 | # The root route on which each incoming alert enters. 12 | route: 13 | # The labels by which incoming alerts are grouped together. For example, 14 | # multiple alerts coming in for cluster=A and alertname=LatencyHigh would 15 | # be batched into a single group. 16 | group_by: ['alertname', 'instance'] 17 | 18 | # When a new group of alerts is created by an incoming alert, wait at 19 | # least 'group_wait' to send the initial notification. 20 | # This way ensures that you get multiple alerts for the same group that start 21 | # firing shortly after another are batched together on the first 22 | # notification. 23 | group_wait: 30s 24 | 25 | # When the first notification was sent, wait 'group_interval' to send a batch 26 | # of new alerts that started firing for that group. 27 | group_interval: 5m 28 | 29 | # If an alert has successfully been sent, wait 'repeat_interval' to 30 | # resend them. 31 | repeat_interval: 3h 32 | 33 | # A default receiver 34 | receiver: "user-a" 35 | 36 | # Inhibition rules allow to mute a set of alerts given that another alert is 37 | # firing. 38 | # We use this to mute any warning-level notifications if the same alert is 39 | # already critical. 40 | inhibit_rules: 41 | - source_match: 42 | severity: 'red' 43 | target_match_re: 44 | severity: ^(blue|yellow|orange)$ 45 | # Apply inhibition if the alertname and instance is the same. 46 | equal: ['alertname', 'instance'] 47 | - source_match: 48 | severity: 'orange' 49 | target_match_re: 50 | severity: ^(blue|yellow)$ 51 | # Apply inhibition if the alertname and instance is the same. 52 | equal: ['alertname', 'instance'] 53 | - source_match: 54 | severity: 'yellow' 55 | target_match_re: 56 | severity: ^(blue)$ 57 | # Apply inhibition if the alertname and instance is the same. 58 | equal: ['alertname', 'instance'] 59 | 60 | receivers: 61 | - name: 'user-a' 62 | email_configs: 63 | - to: 'user-a@domain.com' -------------------------------------------------------------------------------- /jvm-alerting/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CMD_NAME=`basename $0` 3 | COMMAND=$1 4 | 5 | function run-prom 6 | { 7 | docker pull prom/prometheus 8 | 9 | mkdir -p prom-data 10 | 11 | HOSTNAME=$(hostname) envsubst < prom-config.yml.tmpl > prom-config.yml 12 | 13 | docker run -d \ 14 | --name=prometheus \ 15 | -p 9090:9090 \ 16 | -v $(pwd):/prometheus-config \ 17 | -v $(pwd)/prom-data:/prometheus \ 18 | prom/prometheus --config.file=/prometheus-config/prom-config.yml 19 | 20 | } 21 | 22 | function run-tomcats 23 | { 24 | docker pull tomcat:8.5-alpine 25 | 26 | docker run -d \ 27 | --name tomcat-1 \ 28 | -v $(pwd):/jmx-exporter \ 29 | -e CATALINA_OPTS="-Xms32m -Xmx32m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 30 | -p 6060:6060 \ 31 | -p 8080:8080 \ 32 | tomcat:8.5-alpine 33 | 34 | docker run -d \ 35 | --name tomcat-2 \ 36 | -v $(pwd):/jmx-exporter \ 37 | -e CATALINA_OPTS="-Xms32m -Xmx32m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 38 | -p 6061:6060 \ 39 | -p 8081:8080 \ 40 | tomcat:8.5-alpine 41 | 42 | docker run -d \ 43 | --name tomcat-3 \ 44 | -v $(pwd):/jmx-exporter \ 45 | -e CATALINA_OPTS="-Xms32m -Xmx32m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 46 | -p 6062:6060 \ 47 | -p 8082:8080 \ 48 | tomcat:8.5-alpine 49 | 50 | } 51 | 52 | function run-grafana 53 | { 54 | docker pull grafana/grafana 55 | 56 | mkdir -p grafana-data 57 | 58 | docker run -d \ 59 | --name=grafana \ 60 | -v $(pwd)/grafana-data:/var/lib/grafana \ 61 | -p 3000:3000 \ 62 | grafana/grafana 63 | 64 | } 65 | 66 | function run-alertmanager 67 | { 68 | docker pull prom/alertmanager:master 69 | 70 | mkdir -p alertmanager-data 71 | 72 | docker run -d \ 73 | --name=alertmanager \ 74 | -v $(pwd):/alertmanager-config \ 75 | -v $(pwd)/alertmanager-data:/etc/alertmanager/data \ 76 | -p 9093:9093 \ 77 | prom/alertmanager:master --config.file=/alertmanager-config/alertmanager-config.yml 78 | } 79 | 80 | function run 81 | { 82 | echo 'Run all containers' 83 | 84 | run-tomcats 85 | run-alertmanager 86 | run-prom 87 | run-grafana 88 | 89 | echo '' 90 | echo 'Open browser: http://localhost:8080 for tomcat-1' 91 | echo 'Open browser: http://localhost:6060 for tomcat-1 metrics' 92 | echo '' 93 | echo 'Open browser: http://localhost:8081 for tomcat-2' 94 | echo 'Open browser: http://localhost:6061 for tomcat-2 metrics' 95 | echo '' 96 | echo 'Open browser: http://localhost:8082 for tomcat-3' 97 | echo 'Open browser: http://localhost:6062 for tomcat-3 metrics' 98 | echo '' 99 | echo 'Open browser: http://localhost:9090 for Prometheus' 100 | echo '' 101 | echo 'Open browser: http://localhost:3000 for Grafana (default username/password: admin/admin)' 102 | echo '' 103 | echo 'Open browser: http://localhost:9093 for Alertmanager' 104 | 105 | } 106 | 107 | function start 108 | { 109 | echo 'Start all containers' 110 | docker start prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 111 | } 112 | 113 | function stop 114 | { 115 | echo 'Stop all containers' 116 | docker stop prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 117 | } 118 | 119 | function restart 120 | { 121 | echo 'Restart all containers' 122 | docker restart prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 123 | } 124 | 125 | function clear-data 126 | { 127 | echo "Clear all containers' data and restart" 128 | rm -rf $(pwd)/prom-data/* $(pwd)/grafana-data/* $(pwd)/alertmanager-data/* 129 | } 130 | 131 | function clear-container 132 | { 133 | echo 'Clear all containers' 134 | stop 135 | docker rm prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 136 | } 137 | 138 | function reload-config 139 | { 140 | echo 'Reload Prometheus config' 141 | docker exec -t prometheus kill -SIGHUP 1 142 | 143 | echo 'Reload Alertmanager config' 144 | docker exec -t alertmanager kill -SIGHUP 1 145 | } 146 | 147 | 148 | function usage 149 | { 150 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container" 151 | } 152 | 153 | 154 | case $COMMAND in 155 | 'run') 156 | run 157 | ;; 158 | 'start') 159 | start 160 | ;; 161 | 'stop') 162 | stop 163 | ;; 164 | 'restart') 165 | restart 166 | ;; 167 | 'reload-config') 168 | reload-config 169 | ;; 170 | 'clear-data') 171 | clear-data 172 | ;; 173 | 'clear-container') 174 | clear-container 175 | ;; 176 | *) 177 | usage 178 | exit 1 179 | ;; 180 | esac 181 | 182 | exit 0 -------------------------------------------------------------------------------- /jvm-alerting/jmx-exporter-config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | lowercaseOutputLabelNames: true 3 | lowercaseOutputName: true 4 | whitelistObjectNames: ["java.lang:type=OperatingSystem"] 5 | blacklistObjectNames: [] 6 | rules: 7 | - pattern: 'java.lang<>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:' 8 | name: os_$1_bytes 9 | type: GAUGE 10 | attrNameSnakeCase: true 11 | - pattern: 'java.lang<>((?!process_cpu_time)\w+):' 12 | name: os_$1 13 | type: GAUGE 14 | attrNameSnakeCase: true 15 | -------------------------------------------------------------------------------- /jvm-alerting/jmx_prometheus_javaagent-0.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chanjarster/prometheus-learn/23e9a7b30373a093cd1b44c70c593a7227340fc5/jvm-alerting/jmx_prometheus_javaagent-0.3.1.jar -------------------------------------------------------------------------------- /jvm-alerting/prom-alert-rules.yml: -------------------------------------------------------------------------------- 1 | # severity按严重程度由高到低:red、orange、yello、blue 2 | groups: 3 | - name: jvm-alerting 4 | rules: 5 | 6 | # down了超过30秒 7 | - alert: instance-down 8 | expr: up == 0 9 | for: 30s 10 | labels: 11 | severity: yellow 12 | annotations: 13 | summary: "Instance {{ $labels.instance }} down" 14 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds." 15 | 16 | # down了超过1分钟 17 | - alert: instance-down 18 | expr: up == 0 19 | for: 1m 20 | labels: 21 | severity: orange 22 | annotations: 23 | summary: "Instance {{ $labels.instance }} down" 24 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." 25 | 26 | # down了超过5分钟 27 | - alert: instance-down 28 | expr: up == 0 29 | for: 5m 30 | labels: 31 | severity: red 32 | annotations: 33 | summary: "Instance {{ $labels.instance }} down" 34 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." 35 | 36 | # 堆空间使用超过50% 37 | - alert: heap-usage-too-much 38 | expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 50 39 | for: 1m 40 | labels: 41 | severity: yellow 42 | annotations: 43 | summary: "JVM Instance {{ $labels.instance }} memory usage > 50%" 44 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [heap usage > 50%] for more than 1 minutes. current usage ({{ $value }}%)" 45 | 46 | # 堆空间使用超过80% 47 | - alert: heap-usage-too-much 48 | expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 80 49 | for: 1m 50 | labels: 51 | severity: orange 52 | annotations: 53 | summary: "JVM Instance {{ $labels.instance }} memory usage > 80%" 54 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [heap usage > 80%] for more than 1 minutes. current usage ({{ $value }}%)" 55 | 56 | # 堆空间使用超过90% 57 | - alert: heap-usage-too-much 58 | expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 90 59 | for: 1m 60 | labels: 61 | severity: red 62 | annotations: 63 | summary: "JVM Instance {{ $labels.instance }} memory usage > 90%" 64 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [heap usage > 90%] for more than 1 minutes. current usage ({{ $value }}%)" 65 | 66 | # 在5分钟里,Old GC花费时间超过30% 67 | - alert: old-gc-time-too-much 68 | expr: increase(jvm_gc_collection_seconds_sum{gc="PS MarkSweep"}[5m]) > 5 * 60 * 0.3 69 | for: 5m 70 | labels: 71 | severity: yellow 72 | annotations: 73 | summary: "JVM Instance {{ $labels.instance }} Old GC time > 30% running time" 74 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [Old GC time > 30% running time] for more than 5 minutes. current seconds ({{ $value }}%)" 75 | 76 | # 在5分钟里,Old GC花费时间超过50% 77 | - alert: old-gc-time-too-much 78 | expr: increase(jvm_gc_collection_seconds_sum{gc="PS MarkSweep"}[5m]) > 5 * 60 * 0.5 79 | for: 5m 80 | labels: 81 | severity: orange 82 | annotations: 83 | summary: "JVM Instance {{ $labels.instance }} Old GC time > 50% running time" 84 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [Old GC time > 50% running time] for more than 5 minutes. current seconds ({{ $value }}%)" 85 | 86 | # 在5分钟里,Old GC花费时间超过80% 87 | - alert: old-gc-time-too-much 88 | expr: increase(jvm_gc_collection_seconds_sum{gc="PS MarkSweep"}[5m]) > 5 * 60 * 0.8 89 | for: 5m 90 | labels: 91 | severity: red 92 | annotations: 93 | summary: "JVM Instance {{ $labels.instance }} Old GC time > 80% running time" 94 | description: "{{ $labels.instance }} of job {{ $labels.job }} has been in status [Old GC time > 80% running time] for more than 5 minutes. current seconds ({{ $value }}%)" 95 | -------------------------------------------------------------------------------- /jvm-alerting/prom-config.yml.tmpl: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'prometheus' 3 | static_configs: 4 | - targets: 5 | - 'localhost:9090' 6 | 7 | - job_name: 'java' 8 | scrape_interval: 30s 9 | static_configs: 10 | - targets: 11 | - '$HOSTNAME:6060' 12 | - '$HOSTNAME:6061' 13 | - '$HOSTNAME:6062' 14 | 15 | alerting: 16 | alertmanagers: 17 | - static_configs: 18 | - targets: 19 | - '$HOSTNAME:9093' 20 | 21 | rule_files: 22 | - '/prometheus-config/prom-alert-rules.yml' -------------------------------------------------------------------------------- /jvm-monitoring/.gitignore: -------------------------------------------------------------------------------- 1 | prom-config.yml -------------------------------------------------------------------------------- /jvm-monitoring/README.md: -------------------------------------------------------------------------------- 1 | # Prometheus监控JVM 2 | 3 | * 对应文章: https://chanjarster.github.io/post/prom-grafana-jvm/ 4 | * jmx-exporter config: [jmx-exporter-config.yml](jmx-exporter-config.yml) 5 | * Prometheus rules: None 6 | * Alertmanager config: None 7 | * Grafana dashboard: [JVM dashboard](https://grafana.com/dashboards/8563) 8 | 9 | 10 | ```bash 11 | # docker run所有容器 12 | ./demo.sh run 13 | 14 | # docker stop所有容器 15 | ./demo.sh stop 16 | 17 | # docker start所有容器 18 | ./demo.sh start 19 | 20 | # docker restart所有容器 21 | ./demo.sh restart 22 | 23 | # 清理prom-data和grafana-data 24 | ./demo.sh clear-data 25 | 26 | # docker rm所有容器 27 | ./demo.sh clear-container 28 | ``` -------------------------------------------------------------------------------- /jvm-monitoring/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CMD_NAME=`basename $0` 3 | COMMAND=$1 4 | 5 | function run-prom 6 | { 7 | docker pull prom/prometheus 8 | 9 | mkdir -p prom-data 10 | 11 | HOSTNAME=$(hostname) envsubst < prom-config.yml.tmpl > prom-config.yml 12 | 13 | docker run -d \ 14 | --name=prometheus \ 15 | -p 9090:9090 \ 16 | -v $(pwd):/prometheus-config \ 17 | -v $(pwd)/prom-data:/prometheus \ 18 | prom/prometheus --config.file=/prometheus-config/prom-config.yml 19 | 20 | } 21 | 22 | function run-tomcats 23 | { 24 | docker pull tomcat:8.5-alpine 25 | 26 | docker run -d \ 27 | --name tomcat-1 \ 28 | -v $(pwd):/jmx-exporter \ 29 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 30 | -p 6060:6060 \ 31 | -p 8080:8080 \ 32 | tomcat:8.5-alpine 33 | 34 | docker run -d \ 35 | --name tomcat-2 \ 36 | -v $(pwd):/jmx-exporter \ 37 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 38 | -p 6061:6060 \ 39 | -p 8081:8080 \ 40 | tomcat:8.5-alpine 41 | 42 | docker run -d \ 43 | --name tomcat-3 \ 44 | -v $(pwd):/jmx-exporter \ 45 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 46 | -p 6062:6060 \ 47 | -p 8082:8080 \ 48 | tomcat:8.5-alpine 49 | 50 | } 51 | 52 | function run-grafana 53 | { 54 | docker pull grafana/grafana 55 | 56 | mkdir -p grafana-data 57 | 58 | docker run -d \ 59 | --name=grafana \ 60 | -v $(pwd)/grafana-data:/var/lib/grafana \ 61 | -p 3000:3000 \ 62 | grafana/grafana 63 | 64 | } 65 | 66 | function run 67 | { 68 | echo 'Run all containers' 69 | 70 | run-tomcats 71 | run-prom 72 | run-grafana 73 | 74 | echo '' 75 | echo 'Open browser: http://localhost:8080 for tomcat-1' 76 | echo 'Open browser: http://localhost:6060 for tomcat-1 metrics' 77 | echo '' 78 | echo 'Open browser: http://localhost:8081 for tomcat-2' 79 | echo 'Open browser: http://localhost:6061 for tomcat-2 metrics' 80 | echo '' 81 | echo 'Open browser: http://localhost:8082 for tomcat-3' 82 | echo 'Open browser: http://localhost:6062 for tomcat-3 metrics' 83 | echo '' 84 | echo 'Open browser: http://localhost:9090 for Prometheus' 85 | echo '' 86 | echo 'Open browser: http://localhost:3000 for Grafana (default username/password: admin/admin)' 87 | 88 | } 89 | 90 | function start 91 | { 92 | echo 'Start all containers' 93 | docker start prometheus grafana tomcat-1 tomcat-2 tomcat-3 94 | } 95 | 96 | function stop 97 | { 98 | echo 'Stop all containers' 99 | docker stop prometheus grafana tomcat-1 tomcat-2 tomcat-3 100 | } 101 | 102 | function restart 103 | { 104 | echo 'Restart all containers' 105 | docker restart prometheus grafana tomcat-1 tomcat-2 tomcat-3 106 | } 107 | 108 | function clear-data 109 | { 110 | echo "Clear all containers' data and restart" 111 | rm -rf $(pwd)/prom-data/* $(pwd)/grafana-data/* 112 | } 113 | 114 | function clear-container 115 | { 116 | echo 'Clear all containers' 117 | stop 118 | docker rm prometheus grafana tomcat-1 tomcat-2 tomcat-3 119 | } 120 | 121 | function reload-config 122 | { 123 | echo 'Reload Prometheus config' 124 | docker exec -t prometheus kill -SIGHUP 1 125 | } 126 | 127 | function usage 128 | { 129 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container" 130 | } 131 | 132 | 133 | case $COMMAND in 134 | 'run') 135 | run 136 | ;; 137 | 'start') 138 | start 139 | ;; 140 | 'stop') 141 | stop 142 | ;; 143 | 'restart') 144 | restart 145 | ;; 146 | 'reload-config') 147 | reload-config 148 | ;; 149 | 'clear-data') 150 | clear-data 151 | ;; 152 | 'clear-container') 153 | clear-container 154 | ;; 155 | *) 156 | usage 157 | exit 1 158 | ;; 159 | esac 160 | 161 | exit 0 -------------------------------------------------------------------------------- /jvm-monitoring/jmx-exporter-config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | lowercaseOutputLabelNames: true 3 | lowercaseOutputName: true 4 | whitelistObjectNames: ["java.lang:type=OperatingSystem"] 5 | #blacklistObjectNames: ["*:*"] 6 | rules: 7 | # - pattern: 'java.lang<>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:' 8 | # name: os_$1_bytes 9 | # type: GAUGE 10 | # attrNameSnakeCase: true 11 | - pattern: 'java.lang<>((?!process_cpu_time)\w+):' 12 | name: os_$1 13 | type: GAUGE 14 | attrNameSnakeCase: true 15 | -------------------------------------------------------------------------------- /jvm-monitoring/jmx_prometheus_javaagent-0.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chanjarster/prometheus-learn/23e9a7b30373a093cd1b44c70c593a7227340fc5/jvm-monitoring/jmx_prometheus_javaagent-0.3.1.jar -------------------------------------------------------------------------------- /jvm-monitoring/prom-config.yml.tmpl: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'prometheus' 3 | static_configs: 4 | - targets: 5 | - 'localhost:9090' 6 | 7 | - job_name: 'java' 8 | scrape_interval: 30s 9 | static_configs: 10 | - targets: 11 | - '$HOSTNAME:6060' 12 | - '$HOSTNAME:6061' 13 | - '$HOSTNAME:6062' -------------------------------------------------------------------------------- /tomcat8/.gitignore: -------------------------------------------------------------------------------- 1 | prom-config.yml 2 | alertmanager-config.yml -------------------------------------------------------------------------------- /tomcat8/README.md: -------------------------------------------------------------------------------- 1 | # Prometheus + Alertmanager告警Tomcat异常情况 2 | 3 | * 对应文章: 无 4 | * jmx-exporter config: [jmx-exporter-config.yml](jmx-exporter-config.yml) 5 | * Prometheus rules: [prom-alert-rules.yml](prom-alert-rules.yml) 6 | * Alertmanager config: [alertmanager-config.yml](alertmanager-config.default.yml) 7 | * Grafana dashboard: [Tomcat dashboard](https://grafana.com/dashboards/8704) 8 | 9 | 用法: 10 | 11 | 复制`alertmanager-config.default.yml`文件到文件名`alertmanager-config.yml`。设置smtp相关配置,以及下面`user-a`的邮箱。 12 | 13 | **邮箱发送失败问题** 14 | 15 | 中国的企业/个人邮箱几乎都不支持TLS(见这个[issue][issue]),因此请用gmail邮箱。 16 | 17 | ```bash 18 | # docker run所有容器 19 | ./demo.sh run 20 | 21 | # docker stop所有容器 22 | ./demo.sh stop 23 | 24 | # docker start所有容器 25 | ./demo.sh start 26 | 27 | # docker restart所有容器 28 | ./demo.sh restart 29 | 30 | # 清理prom-data和grafana-data 31 | ./demo.sh clear-data 32 | 33 | # docker rm所有容器 34 | ./demo.sh clear-container 35 | ``` 36 | 37 | [issue]: https://github.com/prometheus/alertmanager/issues/980#issuecomment-328088587 38 | -------------------------------------------------------------------------------- /tomcat8/alert-template.tmpl: -------------------------------------------------------------------------------- 1 | {{ define "email.default.html" }} 2 |

Summary

3 | 4 |

{{ .CommonAnnotations.summary }}

5 | 6 |

Description

7 | 8 |

{{ .CommonAnnotations.description }}

9 | {{ end}} -------------------------------------------------------------------------------- /tomcat8/alertmanager-config.default.yml: -------------------------------------------------------------------------------- 1 | global: 2 | smtp_smarthost: '' 3 | smtp_from: '' 4 | smtp_auth_username: '' 5 | smtp_auth_password: '' 6 | 7 | # The directory from which notification templates are read. 8 | templates: 9 | - '/alertmanager-config/*.tmpl' 10 | 11 | # The root route on which each incoming alert enters. 12 | route: 13 | # The labels by which incoming alerts are grouped together. For example, 14 | # multiple alerts coming in for cluster=A and alertname=LatencyHigh would 15 | # be batched into a single group. 16 | group_by: ['alertname', 'instance'] 17 | 18 | # When a new group of alerts is created by an incoming alert, wait at 19 | # least 'group_wait' to send the initial notification. 20 | # This way ensures that you get multiple alerts for the same group that start 21 | # firing shortly after another are batched together on the first 22 | # notification. 23 | group_wait: 30s 24 | 25 | # When the first notification was sent, wait 'group_interval' to send a batch 26 | # of new alerts that started firing for that group. 27 | group_interval: 5m 28 | 29 | # If an alert has successfully been sent, wait 'repeat_interval' to 30 | # resend them. 31 | repeat_interval: 3h 32 | 33 | # A default receiver 34 | receiver: "user-a" 35 | 36 | # Inhibition rules allow to mute a set of alerts given that another alert is 37 | # firing. 38 | # We use this to mute any warning-level notifications if the same alert is 39 | # already critical. 40 | inhibit_rules: 41 | - source_match: 42 | severity: 'red' 43 | target_match_re: 44 | severity: ^(blue|yellow|orange)$ 45 | # Apply inhibition if the alertname and instance is the same. 46 | equal: ['alertname', 'instance'] 47 | - source_match: 48 | severity: 'orange' 49 | target_match_re: 50 | severity: ^(blue|yellow)$ 51 | # Apply inhibition if the alertname and instance is the same. 52 | equal: ['alertname', 'instance'] 53 | - source_match: 54 | severity: 'yellow' 55 | target_match_re: 56 | severity: ^(blue)$ 57 | # Apply inhibition if the alertname and instance is the same. 58 | equal: ['alertname', 'instance'] 59 | 60 | receivers: 61 | - name: 'user-a' 62 | email_configs: 63 | - to: 'user-a@domain.com' -------------------------------------------------------------------------------- /tomcat8/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CMD_NAME=`basename $0` 3 | COMMAND=$1 4 | 5 | function run-prom 6 | { 7 | docker pull prom/prometheus 8 | 9 | mkdir -p prom-data 10 | 11 | HOSTNAME=$(hostname) envsubst < prom-config.yml.tmpl > prom-config.yml 12 | 13 | docker run -d \ 14 | --name=prometheus \ 15 | -p 9090:9090 \ 16 | -v $(pwd):/prometheus-config \ 17 | -v $(pwd)/prom-data:/prometheus \ 18 | prom/prometheus --config.file=/prometheus-config/prom-config.yml 19 | 20 | } 21 | 22 | function run-tomcats 23 | { 24 | docker pull tomcat:8.5-alpine 25 | 26 | docker run -d \ 27 | --name tomcat-1 \ 28 | -v $(pwd):/jmx-exporter \ 29 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1100 -Dcom.sun.management.jmxremote.rmi.port=1100 -Djava.rmi.server.hostname=localhost" \ 30 | -p 6060:6060 \ 31 | -p 8080:8080 \ 32 | -p 1100:1100 \ 33 | tomcat:8.5-alpine 34 | 35 | docker run -d \ 36 | --name tomcat-2 \ 37 | -v $(pwd):/jmx-exporter \ 38 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 39 | -p 6061:6060 \ 40 | -p 8081:8080 \ 41 | tomcat:8.5-alpine 42 | 43 | docker run -d \ 44 | --name tomcat-3 \ 45 | -v $(pwd):/jmx-exporter \ 46 | -e CATALINA_OPTS="-Xms64m -Xmx64m -javaagent:/jmx-exporter/jmx_prometheus_javaagent-0.3.1.jar=6060:/jmx-exporter/jmx-exporter-config.yml" \ 47 | -p 6062:6060 \ 48 | -p 8082:8080 \ 49 | tomcat:8.5-alpine 50 | 51 | } 52 | 53 | function run-grafana 54 | { 55 | docker pull grafana/grafana 56 | 57 | mkdir -p grafana-data 58 | 59 | docker run -d \ 60 | --name=grafana \ 61 | -v $(pwd)/grafana-data:/var/lib/grafana \ 62 | -p 3000:3000 \ 63 | grafana/grafana 64 | 65 | } 66 | 67 | function run-alertmanager 68 | { 69 | docker pull prom/alertmanager:master 70 | 71 | mkdir -p alertmanager-data 72 | 73 | docker run -d \ 74 | --name=alertmanager \ 75 | -v $(pwd):/alertmanager-config \ 76 | -v $(pwd)/alertmanager-data:/etc/alertmanager/data \ 77 | -p 9093:9093 \ 78 | prom/alertmanager:master --config.file=/alertmanager-config/alertmanager-config.yml 79 | } 80 | 81 | function run 82 | { 83 | echo 'Run all containers' 84 | 85 | run-tomcats 86 | run-alertmanager 87 | run-prom 88 | run-grafana 89 | 90 | echo '' 91 | echo 'Open browser: http://localhost:8080 for tomcat-1' 92 | echo 'Open browser: http://localhost:6060 for tomcat-1 metrics' 93 | echo '' 94 | echo 'Open browser: http://localhost:8081 for tomcat-2' 95 | echo 'Open browser: http://localhost:6061 for tomcat-2 metrics' 96 | echo '' 97 | echo 'Open browser: http://localhost:8082 for tomcat-3' 98 | echo 'Open browser: http://localhost:6062 for tomcat-3 metrics' 99 | echo '' 100 | echo 'Open browser: http://localhost:9090 for Prometheus' 101 | echo '' 102 | echo 'Open browser: http://localhost:3000 for Grafana (default username/password: admin/admin)' 103 | echo '' 104 | echo 'Open browser: http://localhost:9093 for Alertmanager' 105 | 106 | } 107 | 108 | function start 109 | { 110 | echo 'Start all containers' 111 | docker start prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 112 | } 113 | 114 | function stop 115 | { 116 | echo 'Stop all containers' 117 | docker stop prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 118 | } 119 | 120 | function restart 121 | { 122 | echo 'Restart all containers' 123 | docker restart prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 124 | } 125 | 126 | function clear-data 127 | { 128 | echo "Clear all containers' data and restart" 129 | rm -rf $(pwd)/prom-data/* $(pwd)/grafana-data/* $(pwd)/alertmanager-data/* 130 | } 131 | 132 | function clear-container 133 | { 134 | echo 'Clear all containers' 135 | stop 136 | docker rm prometheus grafana alertmanager tomcat-1 tomcat-2 tomcat-3 137 | } 138 | 139 | function reload-config 140 | { 141 | echo 'Reload Prometheus config' 142 | docker exec -t prometheus kill -SIGHUP 1 143 | 144 | echo 'Reload Alertmanager config' 145 | docker exec -t alertmanager kill -SIGHUP 1 146 | } 147 | 148 | 149 | function usage 150 | { 151 | echo "Usage: ${CMD_NAME} run|start|stop|restart|reload-config|clear-data|clear-container" 152 | } 153 | 154 | 155 | case $COMMAND in 156 | 'run') 157 | run 158 | ;; 159 | 'start') 160 | start 161 | ;; 162 | 'stop') 163 | stop 164 | ;; 165 | 'restart') 166 | restart 167 | ;; 168 | 'reload-config') 169 | reload-config 170 | ;; 171 | 'clear-data') 172 | clear-data 173 | ;; 174 | 'clear-container') 175 | clear-container 176 | ;; 177 | *) 178 | usage 179 | exit 1 180 | ;; 181 | esac 182 | 183 | exit 0 -------------------------------------------------------------------------------- /tomcat8/jmx-exporter-config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | lowercaseOutputLabelNames: true 3 | lowercaseOutputName: true 4 | whitelistObjectNames: ["java.lang:type=OperatingSystem", "Catalina:*"] 5 | blacklistObjectNames: [] 6 | rules: 7 | - pattern: 'java.lang<>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:' 8 | name: os_$1_bytes 9 | type: GAUGE 10 | attrNameSnakeCase: true 11 | - pattern: 'java.lang<>((?!process_cpu_time)\w+):' 12 | name: os_$1 13 | type: GAUGE 14 | attrNameSnakeCase: true 15 | - pattern: 'Catalina<>(\w+):' 16 | name: tomcat_$3_total 17 | labels: 18 | port: "$2" 19 | protocol: "$1" 20 | help: Tomcat global $3 21 | type: COUNTER 22 | - pattern: 'Catalina<>serverInfo: (.+)' 23 | name: tomcat_serverinfo 24 | value: 1 25 | labels: 26 | serverInfo: "$1" 27 | type: COUNTER 28 | - pattern: 'Catalina<>(requestCount|processingTime|errorCount):' 29 | name: tomcat_servlet_$3_total 30 | labels: 31 | module: "$1" 32 | servlet: "$2" 33 | help: Tomcat servlet $3 total 34 | type: COUNTER 35 | - pattern: 'Catalina<>(currentThreadCount|currentThreadsBusy|keepAliveCount|connectionCount|acceptCount|acceptorThreadCount|pollerThreadCount|maxThreads|minSpareThreads):' 36 | name: tomcat_threadpool_$3 37 | labels: 38 | port: "$2" 39 | protocol: "$1" 40 | help: Tomcat threadpool $3 41 | type: GAUGE 42 | - pattern: 'Catalina<>(processingTime|sessionCounter|rejectedSessions|expiredSessions):' 43 | name: tomcat_session_$3_total 44 | labels: 45 | context: "$2" 46 | host: "$1" 47 | help: Tomcat session $3 total 48 | type: COUNTER -------------------------------------------------------------------------------- /tomcat8/jmx_prometheus_javaagent-0.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chanjarster/prometheus-learn/23e9a7b30373a093cd1b44c70c593a7227340fc5/tomcat8/jmx_prometheus_javaagent-0.3.1.jar -------------------------------------------------------------------------------- /tomcat8/prom-alert-rules.yml: -------------------------------------------------------------------------------- 1 | # severity按严重程度由高到低:red、orange、yello、blue 2 | groups: 3 | - name: jvm-alerting 4 | rules: 5 | 6 | # error count超过request count的 > 30% 7 | - alert: error-too-much 8 | expr: idelta(tomcat_servlet_errorcount_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 0.3 9 | for: 5m 10 | labels: 11 | severity: yellow 12 | annotations: 13 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] error over 30%" 14 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [error / request > 30%] for more than 5 minutes." 15 | 16 | # error count超过request count的 > 50% 17 | - alert: error-too-much 18 | expr: idelta(tomcat_servlet_errorcount_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 0.5 19 | for: 5m 20 | labels: 21 | severity: orange 22 | annotations: 23 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] error over 50%" 24 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [error / request > 50%] for more than 5 minutes." 25 | 26 | # error count超过request count的 > 80% 27 | - alert: error-too-much 28 | expr: idelta(tomcat_servlet_errorcount_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 0.8 29 | for: 5m 30 | labels: 31 | severity: red 32 | annotations: 33 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] error over 80%" 34 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [error / request > 80%] for more than 5 minutes." 35 | 36 | # 平均处理请求时间超过 3s 37 | - alert: processing-time-too-slow 38 | expr: idelta(tomcat_servlet_processingtime_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 3 * 1000 39 | for: 5m 40 | labels: 41 | severity: yellow 42 | annotations: 43 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] processing time over 3 seconds" 44 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [avg processing time > 3 seconds] for more than 5 minutes. Current value ({{ $value }})" 45 | 46 | # 平均处理请求时间超过 5s 47 | - alert: processing-time-too-slow 48 | expr: idelta(tomcat_servlet_processingtime_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 5 * 1000 49 | for: 5m 50 | labels: 51 | severity: orange 52 | annotations: 53 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] processing time over 5 seconds" 54 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [avg processing time > 5 seconds] for more than 5 minutes. Current value ({{ $value }})" 55 | 56 | # 平均处理请求时间超过 10s 57 | - alert: processing-time-too-slow 58 | expr: idelta(tomcat_servlet_processingtime_total[2m]) / idelta(tomcat_servlet_requestcount_total[2m]) > 10 * 1000 59 | for: 5m 60 | labels: 61 | severity: red 62 | annotations: 63 | summary: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] processing time over 10 seconds" 64 | description: "Tomcat [{{ $labels.instance }}] Module [{{ $labels.module }}] Servlet [{{ $labels.servlet }}] of job {{ $labels.job }} has been in status [avg processing time > 10 seconds] for more than 5 minutes. Current value ({{ $value }})" 65 | -------------------------------------------------------------------------------- /tomcat8/prom-config.yml.tmpl: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'prometheus' 3 | static_configs: 4 | - targets: 5 | - 'localhost:9090' 6 | 7 | - job_name: 'java' 8 | scrape_interval: 30s 9 | static_configs: 10 | - targets: 11 | - '$HOSTNAME:6060' 12 | - '$HOSTNAME:6061' 13 | - '$HOSTNAME:6062' 14 | 15 | alerting: 16 | alertmanagers: 17 | - static_configs: 18 | - targets: 19 | - '$HOSTNAME:9093' 20 | 21 | rule_files: 22 | - '/prometheus-config/prom-alert-rules.yml' --------------------------------------------------------------------------------