├── .github └── FUNDING.yml ├── README.md ├── alertmanager ├── Dockerfile └── alertmanager.yml ├── docker-compose.yml ├── misc └── dashboard.json └── prometheus ├── Dockerfile ├── alert.rules └── prometheus.yml /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [finestructure, sighmon] 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prometheus & Grafana on a Raspberry Pi 2 | Using docker-compose to run Prometheus & Grafana on a Raspberry Pi 3 | 4 | ## Install Docker & docker-compose 5 | 6 | Instructions from [dev.to](https://dev.to/rohansawant/installing-docker-and-docker-compose-on-the-raspberry-pi-in-5-simple-steps-3mgl) 7 | 8 | * Install docker `curl -sSL https://get.docker.com | sh` 9 | * Add permissions for user pi to run docker commands `sudo usermod -aG docker pi` 10 | * Install docker-compose `sudo pip3 install docker-compose` 11 | 12 | ## Run Prometheus & Grafana 13 | 14 | * Build and run `docker-compose up --build` 15 | * Or run in the background `docker-compose up -d` 16 | * Prometheus should now be running at `http://raspberrypi.local:9090` 17 | * Grafana should now be running at `http://raspberrypi.local:3000` 18 | 19 | ## Using a 32-bit Raspberry Pi 20 | 21 | I've had [a problem similar to this one](https://github.com/prometheus/prometheus/issues/7483) with my 32-bit Raspberry Pi failing compacting when setup with a retention time of 1 year, and scraping every 10 seconds. 22 | 23 | Pinning Prometheus to `1.7.2` that doesn't use `mmap` seems to have fixed it. To do that your `docker-compose.yml` might look like this: 24 | 25 | ```yaml 26 | # docker-compose.yml 27 | version: '3' 28 | services: 29 | prometheus: 30 | container_name: prometheus 31 | # For a 32-bit Raspberry Pi 32 | image: prom/prometheus:v1.7.2 33 | build: ./prometheus 34 | volumes: 35 | - prometheus_data:/prometheus 36 | command: 37 | - '--config.file=/etc/prometheus/prometheus.yml' 38 | - '--web.console.libraries=/etc/prometheus/console_libraries' 39 | - '--web.console.templates=/etc/prometheus/consoles' 40 | - '--storage.tsdb.retention.time=1y' 41 | - '--web.enable-lifecycle' 42 | ports: 43 | - "9090:9090" 44 | restart: on-failure 45 | ``` 46 | 47 | ## Prometheus.yml for multiple targets 48 | 49 | If you're wanting to scrape from multiple targets, the `prometheus.yml` could look something like this: 50 | 51 | ```yaml 52 | # prometheus.yml 53 | global: 54 | scrape_interval: 5s 55 | external_labels: 56 | monitor: 'my-monitor' 57 | scrape_configs: 58 | - job_name: 'prometheus' 59 | static_configs: 60 | - targets: ['localhost:9090'] 61 | - job_name: 'node-exporter' 62 | static_configs: 63 | - targets: ['node-exporter:9100'] 64 | - job_name: 'environment' 65 | static_configs: 66 | - targets: ['10.1.1.2:8000'] 67 | labels: 68 | group: 'environment' 69 | location: 'Melbourne' 70 | - targets: ['11.1.1.2:8000'] 71 | labels: 72 | group: 'environment' 73 | location: 'Adelaide' 74 | alerting: 75 | alertmanagers: 76 | - scheme: http 77 | static_configs: 78 | - targets: ['alertmanager:9093'] 79 | #rule_files: 80 | # - 'alert.rules' 81 | ``` 82 | 83 | ## To change the data retention time 84 | 85 | The Prometheus part of your `docker-compose.yml` should add `--storage.tsdb.retention.time=1y` where 1y is 1 year, and should look something like this: 86 | 87 | ```yaml 88 | # docker-compose.yml 89 | version: '3' 90 | services: 91 | prometheus: 92 | container_name: prometheus 93 | build: ./prometheus 94 | volumes: 95 | - prometheus_data:/prometheus 96 | command: 97 | - '--config.file=/etc/prometheus/prometheus.yml' 98 | - '--web.console.libraries=/etc/prometheus/console_libraries' 99 | - '--web.console.templates=/etc/prometheus/consoles' 100 | - '--storage.tsdb.retention.time=1y' 101 | - '--web.enable-lifecycle' 102 | ports: 103 | - "9090:9090" 104 | restart: on-failure 105 | ``` 106 | 107 | ## To export your data 108 | 109 | Use the [Snapshot API](https://prometheus.io/docs/prometheus/2.1/querying/api/#snapshot). 110 | 111 | * Enable it by passing the flag when running Prometheus `--web.enable-admin-api` 112 | * Curl the Snapshot API: `curl -XPOST http://raspberrypi.local:9090/api/v1/admin/tsdb/snapshot` 113 | 114 | ### Credit 115 | Thanks to [finestructure](https://github.com/finestructure/blogpost-prometheus) for the base. 116 | 117 | -------------------------------------------------------------------------------- /alertmanager/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM prom/alertmanager-linux-armv7 2 | COPY ./alertmanager.yml /alertmanager.yml 3 | -------------------------------------------------------------------------------- /alertmanager/alertmanager.yml: -------------------------------------------------------------------------------- 1 | # alertmanager.yml 2 | route: 3 | receiver: 'slack' 4 | receivers: 5 | - name: 'slack' 6 | slack_configs: 7 | - send_resolved: true 8 | username: 'Prometheus' 9 | channel: '#random' 10 | api_url: 'https://hooks.slack.com/services///' 11 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # docker-compose.yml 2 | version: '3' 3 | services: 4 | prometheus: 5 | container_name: prometheus 6 | # If running on a 32-bit Raspberry Pi, use: 7 | # image: prom/prometheus:v1.7.2 8 | build: ./prometheus 9 | volumes: 10 | - prometheus_data:/prometheus 11 | command: 12 | - '--config.file=/etc/prometheus/prometheus.yml' 13 | - '--web.console.libraries=/etc/prometheus/console_libraries' 14 | - '--web.console.templates=/etc/prometheus/consoles' 15 | - '--storage.tsdb.retention.time=1y' 16 | - '--web.enable-lifecycle' 17 | ports: 18 | - "9090:9090" 19 | restart: on-failure 20 | node-exporter: 21 | container_name: node-exporter 22 | image: prom/node-exporter-linux-armv7 23 | ports: 24 | - "9100:9100" 25 | restart: on-failure 26 | grafana: 27 | container_name: grafana 28 | image: grafana/grafana:latest 29 | environment: 30 | - GF_SECURITY_ADMIN_PASSWORD=password 31 | - GF_AUTH_ANONYMOUS_ENABLED=true 32 | - GF_AUTH_DISABLE_LOGIN_FORM=false 33 | - GF_ORG_NAME=Main Org. 34 | - GF_ORG_ROLE=viewer 35 | volumes: 36 | - grafana_data:/var/lib/grafana 37 | depends_on: 38 | - prometheus 39 | ports: 40 | - "3000:3000" 41 | restart: on-failure 42 | alertmanager: 43 | container_name: alertmanager 44 | build: ./alertmanager 45 | command: 46 | - '--config.file=/alertmanager.yml' 47 | ports: 48 | - "9093:9093" 49 | restart: on-failure 50 | volumes: 51 | prometheus_data: {} 52 | grafana_data: {} 53 | dhparam_cache: {} 54 | -------------------------------------------------------------------------------- /misc/dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 1, 3 | "title": "System Monitoring", 4 | "originalTitle": "System Monitoring", 5 | "tags": [], 6 | "style": "dark", 7 | "timezone": "browser", 8 | "editable": true, 9 | "hideControls": true, 10 | "sharedCrosshair": false, 11 | "rows": [ 12 | { 13 | "collapse": false, 14 | "editable": true, 15 | "height": "250px", 16 | "panels": [ 17 | { 18 | "aliasColors": {}, 19 | "bars": false, 20 | "datasource": "My Monitor", 21 | "decimals": null, 22 | "editable": true, 23 | "error": false, 24 | "fill": 0, 25 | "grid": { 26 | "threshold1": 2500000000, 27 | "threshold1Color": "rgba(216, 200, 27, 0.27)", 28 | "threshold2": 1000000000, 29 | "threshold2Color": "rgba(234, 112, 112, 0.22)" 30 | }, 31 | "id": 1, 32 | "isNew": true, 33 | "legend": { 34 | "alignAsTable": false, 35 | "avg": false, 36 | "current": false, 37 | "max": false, 38 | "min": false, 39 | "rightSide": false, 40 | "show": true, 41 | "sideWidth": null, 42 | "total": false, 43 | "values": false 44 | }, 45 | "lines": true, 46 | "linewidth": 2, 47 | "links": [], 48 | "nullPointMode": "connected", 49 | "percentage": false, 50 | "pointradius": 5, 51 | "points": false, 52 | "renderer": "flot", 53 | "seriesOverrides": [], 54 | "span": 6, 55 | "stack": false, 56 | "steppedLine": false, 57 | "targets": [ 58 | { 59 | "expr": "node_filesystem_avail{mountpoint=\"/\"}", 60 | "interval": "", 61 | "intervalFactor": 2, 62 | "legendFormat": "{{job}}", 63 | "metric": "", 64 | "refId": "A", 65 | "step": 40 66 | } 67 | ], 68 | "timeFrom": null, 69 | "timeShift": null, 70 | "title": "Free Storage", 71 | "tooltip": { 72 | "msResolution": false, 73 | "shared": true, 74 | "value_type": "cumulative" 75 | }, 76 | "type": "graph", 77 | "xaxis": { 78 | "show": true 79 | }, 80 | "yaxes": [ 81 | { 82 | "format": "bytes", 83 | "label": "", 84 | "logBase": 1, 85 | "max": null, 86 | "min": 0, 87 | "show": true 88 | }, 89 | { 90 | "format": "short", 91 | "label": "", 92 | "logBase": 1, 93 | "max": null, 94 | "min": null, 95 | "show": true 96 | } 97 | ] 98 | }, 99 | { 100 | "aliasColors": {}, 101 | "bars": false, 102 | "datasource": "My Monitor", 103 | "editable": true, 104 | "error": false, 105 | "fill": 0, 106 | "grid": { 107 | "threshold1": 20, 108 | "threshold1Color": "rgba(216, 200, 27, 0.27)", 109 | "threshold2": 10, 110 | "threshold2Color": "rgba(234, 112, 112, 0.22)" 111 | }, 112 | "id": 4, 113 | "isNew": true, 114 | "legend": { 115 | "alignAsTable": false, 116 | "avg": false, 117 | "current": false, 118 | "max": false, 119 | "min": false, 120 | "rightSide": false, 121 | "show": true, 122 | "total": false, 123 | "values": false 124 | }, 125 | "lines": true, 126 | "linewidth": 2, 127 | "links": [], 128 | "nullPointMode": "connected", 129 | "percentage": false, 130 | "pointradius": 5, 131 | "points": false, 132 | "renderer": "flot", 133 | "seriesOverrides": [], 134 | "span": 6, 135 | "stack": false, 136 | "steppedLine": false, 137 | "targets": [ 138 | { 139 | "expr": "(node_memory_MemFree + node_memory_SwapFree)/(node_memory_MemTotal + node_memory_SwapTotal)*100", 140 | "intervalFactor": 2, 141 | "legendFormat": "{{job}}", 142 | "refId": "A", 143 | "step": 40 144 | } 145 | ], 146 | "timeFrom": null, 147 | "timeShift": null, 148 | "title": "Free Memory (RAM + Swap)", 149 | "tooltip": { 150 | "msResolution": true, 151 | "shared": true, 152 | "value_type": "cumulative" 153 | }, 154 | "type": "graph", 155 | "xaxis": { 156 | "show": true 157 | }, 158 | "yaxes": [ 159 | { 160 | "format": "percent", 161 | "label": "", 162 | "logBase": 1, 163 | "max": 100, 164 | "min": 0, 165 | "show": true 166 | }, 167 | { 168 | "format": "short", 169 | "label": null, 170 | "logBase": 1, 171 | "max": null, 172 | "min": null, 173 | "show": true 174 | } 175 | ] 176 | } 177 | ], 178 | "title": "Row" 179 | }, 180 | { 181 | "collapse": false, 182 | "editable": true, 183 | "height": "250px", 184 | "panels": [ 185 | { 186 | "aliasColors": {}, 187 | "bars": false, 188 | "datasource": "My Monitor", 189 | "editable": true, 190 | "error": false, 191 | "fill": 0, 192 | "grid": { 193 | "threshold1": null, 194 | "threshold1Color": "rgba(216, 200, 27, 0.27)", 195 | "threshold2": null, 196 | "threshold2Color": "rgba(234, 112, 112, 0.22)" 197 | }, 198 | "id": 2, 199 | "isNew": true, 200 | "legend": { 201 | "alignAsTable": false, 202 | "avg": false, 203 | "current": false, 204 | "max": false, 205 | "min": false, 206 | "rightSide": false, 207 | "show": true, 208 | "total": false, 209 | "values": false 210 | }, 211 | "lines": true, 212 | "linewidth": 2, 213 | "links": [], 214 | "nullPointMode": "connected", 215 | "percentage": false, 216 | "pointradius": 5, 217 | "points": false, 218 | "renderer": "flot", 219 | "seriesOverrides": [], 220 | "span": 6, 221 | "stack": false, 222 | "steppedLine": false, 223 | "targets": [ 224 | { 225 | "expr": "node_memory_MemFree", 226 | "intervalFactor": 2, 227 | "legendFormat": "{{job}}", 228 | "metric": "node_memory_MemFree", 229 | "refId": "A", 230 | "step": 40 231 | } 232 | ], 233 | "timeFrom": null, 234 | "timeShift": null, 235 | "title": "Free RAM", 236 | "tooltip": { 237 | "msResolution": false, 238 | "shared": true, 239 | "value_type": "cumulative" 240 | }, 241 | "type": "graph", 242 | "xaxis": { 243 | "show": true 244 | }, 245 | "yaxes": [ 246 | { 247 | "format": "bytes", 248 | "label": "", 249 | "logBase": 1, 250 | "max": null, 251 | "min": null, 252 | "show": true 253 | }, 254 | { 255 | "format": "short", 256 | "label": null, 257 | "logBase": 1, 258 | "max": null, 259 | "min": null, 260 | "show": true 261 | } 262 | ] 263 | }, 264 | { 265 | "aliasColors": {}, 266 | "bars": false, 267 | "datasource": "My Monitor", 268 | "editable": true, 269 | "error": false, 270 | "fill": 0, 271 | "grid": { 272 | "threshold1": 500000000, 273 | "threshold1Color": "rgba(216, 200, 27, 0.27)", 274 | "threshold2": 250000000, 275 | "threshold2Color": "rgba(234, 112, 112, 0.22)" 276 | }, 277 | "id": 3, 278 | "isNew": true, 279 | "legend": { 280 | "alignAsTable": false, 281 | "avg": false, 282 | "current": false, 283 | "max": false, 284 | "min": false, 285 | "rightSide": false, 286 | "show": true, 287 | "total": false, 288 | "values": false 289 | }, 290 | "lines": true, 291 | "linewidth": 2, 292 | "links": [], 293 | "nullPointMode": "connected", 294 | "percentage": false, 295 | "pointradius": 5, 296 | "points": false, 297 | "renderer": "flot", 298 | "seriesOverrides": [], 299 | "span": 6, 300 | "stack": false, 301 | "steppedLine": false, 302 | "targets": [ 303 | { 304 | "expr": "node_memory_SwapFree", 305 | "intervalFactor": 2, 306 | "legendFormat": "{{job}}", 307 | "refId": "A", 308 | "step": 40 309 | } 310 | ], 311 | "timeFrom": null, 312 | "timeShift": null, 313 | "title": "Free Swap", 314 | "tooltip": { 315 | "msResolution": true, 316 | "shared": true, 317 | "value_type": "cumulative" 318 | }, 319 | "type": "graph", 320 | "xaxis": { 321 | "show": true 322 | }, 323 | "yaxes": [ 324 | { 325 | "format": "bytes", 326 | "label": null, 327 | "logBase": 1, 328 | "max": null, 329 | "min": 0, 330 | "show": true 331 | }, 332 | { 333 | "format": "short", 334 | "label": null, 335 | "logBase": 1, 336 | "max": null, 337 | "min": null, 338 | "show": true 339 | } 340 | ] 341 | } 342 | ], 343 | "title": "New row" 344 | }, 345 | { 346 | "collapse": false, 347 | "editable": true, 348 | "height": "250px", 349 | "panels": [ 350 | { 351 | "aliasColors": {}, 352 | "bars": false, 353 | "datasource": "My Monitor", 354 | "editable": true, 355 | "error": false, 356 | "fill": 1, 357 | "grid": { 358 | "threshold1": null, 359 | "threshold1Color": "rgba(216, 200, 27, 0.27)", 360 | "threshold2": null, 361 | "threshold2Color": "rgba(234, 112, 112, 0.22)" 362 | }, 363 | "id": 6, 364 | "isNew": true, 365 | "legend": { 366 | "avg": false, 367 | "current": false, 368 | "max": false, 369 | "min": false, 370 | "show": true, 371 | "total": false, 372 | "values": false 373 | }, 374 | "lines": true, 375 | "linewidth": 2, 376 | "links": [], 377 | "nullPointMode": "connected", 378 | "percentage": false, 379 | "pointradius": 5, 380 | "points": false, 381 | "renderer": "flot", 382 | "seriesOverrides": [], 383 | "span": 12, 384 | "stack": false, 385 | "steppedLine": false, 386 | "targets": [ 387 | { 388 | "expr": "process_virtual_memory_bytes", 389 | "interval": "", 390 | "intervalFactor": 2, 391 | "legendFormat": "{{job}}", 392 | "metric": "process_virtual_memory_bytes", 393 | "refId": "A", 394 | "step": 20 395 | } 396 | ], 397 | "timeFrom": null, 398 | "timeShift": null, 399 | "title": "Virtual Mem Size", 400 | "tooltip": { 401 | "msResolution": false, 402 | "shared": true, 403 | "value_type": "cumulative" 404 | }, 405 | "type": "graph", 406 | "xaxis": { 407 | "show": true 408 | }, 409 | "yaxes": [ 410 | { 411 | "format": "bytes", 412 | "label": null, 413 | "logBase": 1, 414 | "max": null, 415 | "min": null, 416 | "show": true 417 | }, 418 | { 419 | "format": "short", 420 | "label": null, 421 | "logBase": 1, 422 | "max": null, 423 | "min": null, 424 | "show": true 425 | } 426 | ] 427 | } 428 | ], 429 | "title": "New row" 430 | }, 431 | { 432 | "collapse": false, 433 | "editable": true, 434 | "height": "250px", 435 | "panels": [ 436 | { 437 | "aliasColors": {}, 438 | "bars": false, 439 | "datasource": "My Monitor", 440 | "decimals": null, 441 | "editable": true, 442 | "error": false, 443 | "fill": 1, 444 | "grid": { 445 | "threshold1": 1, 446 | "threshold1Color": "rgba(216, 200, 27, 0.27)", 447 | "threshold2": 2, 448 | "threshold2Color": "rgba(234, 112, 112, 0.22)" 449 | }, 450 | "id": 5, 451 | "isNew": true, 452 | "legend": { 453 | "alignAsTable": false, 454 | "avg": false, 455 | "current": false, 456 | "max": false, 457 | "min": false, 458 | "rightSide": false, 459 | "show": true, 460 | "sideWidth": null, 461 | "total": false, 462 | "values": false 463 | }, 464 | "lines": true, 465 | "linewidth": 2, 466 | "links": [], 467 | "nullPointMode": "connected", 468 | "percentage": false, 469 | "pointradius": 5, 470 | "points": false, 471 | "renderer": "flot", 472 | "seriesOverrides": [], 473 | "span": 8, 474 | "stack": false, 475 | "steppedLine": false, 476 | "targets": [ 477 | { 478 | "expr": "node_load1", 479 | "interval": "", 480 | "intervalFactor": 1, 481 | "legendFormat": "{{job}}", 482 | "metric": "node_load1", 483 | "refId": "A", 484 | "step": 20 485 | } 486 | ], 487 | "timeFrom": null, 488 | "timeShift": null, 489 | "title": "CPU Load", 490 | "tooltip": { 491 | "msResolution": false, 492 | "shared": true, 493 | "value_type": "cumulative" 494 | }, 495 | "type": "graph", 496 | "xaxis": { 497 | "show": true 498 | }, 499 | "yaxes": [ 500 | { 501 | "format": "short", 502 | "label": null, 503 | "logBase": 1, 504 | "max": null, 505 | "min": null, 506 | "show": true 507 | }, 508 | { 509 | "format": "short", 510 | "label": null, 511 | "logBase": 1, 512 | "max": null, 513 | "min": null, 514 | "show": true 515 | } 516 | ] 517 | } 518 | ], 519 | "title": "New row" 520 | }, 521 | { 522 | "collapse": false, 523 | "editable": true, 524 | "height": 229, 525 | "panels": [ 526 | { 527 | "aliasColors": { 528 | "high_load": "#E24D42" 529 | }, 530 | "bars": false, 531 | "datasource": "My Monitor", 532 | "editable": true, 533 | "error": false, 534 | "fill": 2, 535 | "grid": { 536 | "threshold1": null, 537 | "threshold1Color": "rgba(216, 200, 27, 0.27)", 538 | "threshold2": null, 539 | "threshold2Color": "rgba(234, 112, 112, 0.22)" 540 | }, 541 | "id": 8, 542 | "isNew": true, 543 | "legend": { 544 | "alignAsTable": false, 545 | "avg": false, 546 | "current": false, 547 | "max": false, 548 | "min": false, 549 | "rightSide": false, 550 | "show": false, 551 | "total": false, 552 | "values": false 553 | }, 554 | "lines": true, 555 | "linewidth": 2, 556 | "links": [], 557 | "nullPointMode": "connected", 558 | "percentage": false, 559 | "pointradius": 1, 560 | "points": false, 561 | "renderer": "flot", 562 | "seriesOverrides": [], 563 | "span": 8, 564 | "stack": false, 565 | "steppedLine": true, 566 | "targets": [ 567 | { 568 | "expr": "ALERTS{alertname=\"high_load\", alertstate=\"firing\"}", 569 | "interval": "", 570 | "intervalFactor": 1, 571 | "legendFormat": "{{alertname}}", 572 | "metric": "", 573 | "refId": "A", 574 | "step": 20 575 | } 576 | ], 577 | "timeFrom": null, 578 | "timeShift": null, 579 | "title": "High Load Alert", 580 | "tooltip": { 581 | "msResolution": true, 582 | "shared": true, 583 | "value_type": "cumulative" 584 | }, 585 | "type": "graph", 586 | "xaxis": { 587 | "show": true 588 | }, 589 | "yaxes": [ 590 | { 591 | "format": "short", 592 | "label": null, 593 | "logBase": 1, 594 | "max": null, 595 | "min": null, 596 | "show": true 597 | }, 598 | { 599 | "format": "short", 600 | "label": null, 601 | "logBase": 1, 602 | "max": null, 603 | "min": null, 604 | "show": true 605 | } 606 | ] 607 | }, 608 | { 609 | "cacheTimeout": null, 610 | "colorBackground": true, 611 | "colorValue": false, 612 | "colors": [ 613 | "rgba(50, 172, 45, 0.97)", 614 | "rgba(237, 129, 40, 0.89)", 615 | "rgba(245, 54, 54, 0.9)" 616 | ], 617 | "datasource": "My Monitor", 618 | "decimals": null, 619 | "editable": true, 620 | "error": false, 621 | "format": "none", 622 | "gauge": { 623 | "maxValue": 100, 624 | "minValue": 0, 625 | "show": false, 626 | "thresholdLabels": false, 627 | "thresholdMarkers": true 628 | }, 629 | "id": 7, 630 | "interval": null, 631 | "isNew": true, 632 | "links": [], 633 | "maxDataPoints": 100, 634 | "nullPointMode": "connected", 635 | "nullText": null, 636 | "postfix": "", 637 | "postfixFontSize": "50%", 638 | "prefix": "", 639 | "prefixFontSize": "50%", 640 | "span": 4, 641 | "sparkline": { 642 | "fillColor": "rgba(31, 118, 189, 0.18)", 643 | "full": false, 644 | "lineColor": "rgb(31, 120, 193)", 645 | "show": false 646 | }, 647 | "targets": [ 648 | { 649 | "expr": "ALERTS{alertname=\"high_load\",alertstate=\"firing\"}", 650 | "interval": "", 651 | "intervalFactor": 1, 652 | "legendFormat": "", 653 | "metric": "ALERTS", 654 | "refId": "A", 655 | "step": 120 656 | } 657 | ], 658 | "thresholds": "0.6,0.9", 659 | "title": "Load Status", 660 | "type": "singlestat", 661 | "valueFontSize": "80%", 662 | "valueMaps": [ 663 | { 664 | "op": "=", 665 | "text": "HIGH", 666 | "value": "1" 667 | }, 668 | { 669 | "op": "=", 670 | "text": "OK", 671 | "value": "0" 672 | } 673 | ], 674 | "valueName": "current" 675 | } 676 | ], 677 | "title": "New row" 678 | } 679 | ], 680 | "time": { 681 | "from": "now-3h", 682 | "to": "now" 683 | }, 684 | "timepicker": { 685 | "refresh_intervals": [ 686 | "5s", 687 | "10s", 688 | "30s", 689 | "1m", 690 | "5m", 691 | "15m", 692 | "30m", 693 | "1h", 694 | "2h", 695 | "1d" 696 | ], 697 | "time_options": [ 698 | "5m", 699 | "15m", 700 | "1h", 701 | "6h", 702 | "12h", 703 | "24h", 704 | "2d", 705 | "7d", 706 | "30d" 707 | ] 708 | }, 709 | "templating": { 710 | "list": [] 711 | }, 712 | "annotations": { 713 | "list": [] 714 | }, 715 | "refresh": "10s", 716 | "schemaVersion": 12, 717 | "version": 21, 718 | "links": [] 719 | } -------------------------------------------------------------------------------- /prometheus/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM prom/prometheus 2 | COPY ./alert.rules /etc/prometheus/alert.rules 3 | COPY ./prometheus.yml /etc/prometheus/prometheus.yml 4 | -------------------------------------------------------------------------------- /prometheus/alert.rules: -------------------------------------------------------------------------------- 1 | ALERT service_down 2 | IF up == 0 3 | LABELS { severity = "critical" } 4 | ANNOTATIONS { 5 | summary = "Instance {{ $labels.instance }} down", 6 | description = "{{ $labels.instance }} of job {{ $labels.job }} is down.", 7 | } 8 | 9 | ALERT high_load 10 | IF node_load1 > 0.5 11 | ANNOTATIONS { 12 | summary = "Instance {{ $labels.instance }} under high load", 13 | description = "{{ $labels.instance }} of job {{ $labels.job }} is under high load.", 14 | } 15 | -------------------------------------------------------------------------------- /prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | # prometheus.yml 2 | global: 3 | scrape_interval: 5s 4 | external_labels: 5 | monitor: 'my-monitor' 6 | scrape_configs: 7 | - job_name: 'prometheus' 8 | static_configs: 9 | - targets: ['localhost:9090'] 10 | - job_name: 'node-exporter' 11 | static_configs: 12 | - targets: ['node-exporter:9100'] 13 | alerting: 14 | alertmanagers: 15 | - scheme: http 16 | static_configs: 17 | - targets: ['alertmanager:9093'] 18 | #rule_files: 19 | # - 'alert.rules' 20 | --------------------------------------------------------------------------------