├── 2 ├── 2-2-rules.yml ├── 2-3-alertmanager.yml └── 2-1-prometheus.yml ├── 3 ├── 3-2-prometheus.yml ├── 3-8-example.py ├── 3-1-example.py ├── 3-12-unitesting.py ├── 3-3-example.py ├── 3-10-example.py ├── 3-11-example.py ├── 3-9-example.py ├── 3-7-example.py ├── 3-6-example.py ├── 3-5-example.py └── 3-4-example.py ├── 4 ├── 4-3-config.py ├── 4-11-prometheus.yml ├── 4-13-graphite-bridge.py ├── 4-14-parse.py ├── 4-1-wsgi.py ├── 4-2-twisted.py ├── 4-12-pushgateway.py ├── 4-7-java-httpserver │ ├── src │ │ └── main │ │ │ └── java │ │ │ └── io │ │ │ └── robustperception │ │ │ └── book_examples │ │ │ └── java_httpserver │ │ │ └── Example.java │ └── pom.xml ├── 4-4-app.py ├── 4-6-example.go └── 4-9-java-servlet │ ├── src │ └── main │ │ └── java │ │ └── io │ │ └── robustperception │ │ └── book_examples │ │ └── java_servlet │ │ └── Example.java │ └── pom.xml ├── 5 └── 5-1-example.py ├── 7 └── 7-2-crontab ├── 8 ├── 8-5-prometheus.yml ├── 8-7-prometheus.yml ├── 8-1-prometheus.yml ├── 8-3-prometheus.yml ├── 8-8-prometheus.yml ├── 8-16-prometheus.yml ├── 8-25-prometheus.yml ├── 8-9-prometheus.yml ├── 8-20-prometheus.yml ├── 8-11-prometheus.yml ├── 8-12-prometheus.yml ├── 8-21-prometheus.yml ├── 8-23-prometheus.yml ├── 8-15-prometheus.yml ├── 8-17-prometheus.yml ├── 8-13-prometheus.yml ├── 8-14-prometheus.yml ├── 8-10-prometheus.yml ├── 8-24-prometheus.yml ├── 8-4-filesd.json ├── 8-22-prometheus.yml ├── 8-19-prometheus.yml └── 8-18-prometheus.yml ├── 9 ├── 9-1-prometheus.yml ├── 9-5-prometheus.yml ├── 9-6-prometheus.yml ├── 9-10-prometheus.yml ├── 9-9-prometheus.yml ├── 9-8-prometheus.yml ├── 9-7-prometheus.yml ├── kube-state-metrics.yml └── prometheus-deployment.yml ├── 10 ├── 10-7-prometheus.yml ├── 10-2-prometheus.yml ├── 10-5-prometheus.yml ├── 10-3-haproxy.cfg ├── 10-10-prometheus.yml ├── 10-9-prometheus.yml └── 10-6-grok.yml ├── 11 └── 11-2-prometheus.yml ├── 12 ├── 12-3-consul_metrics.py └── 12-2-consul_metrics.go ├── 17 ├── 17-2-rules.yml └── 17-1-prometheus.yml ├── 19 ├── 19-1-webhook_receiver.py └── combined-alertmanager.yml ├── README.md └── LICENSE /8/8-5-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | -------------------------------------------------------------------------------- /8/8-7-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: consul 3 | consul_sd_configs: 4 | - server: 'localhost:8500' 5 | -------------------------------------------------------------------------------- /2/2-2-rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: example 3 | rules: 4 | - alert: InstanceDown 5 | expr: up == 0 6 | for: 1m 7 | -------------------------------------------------------------------------------- /8/8-1-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: prometheus 3 | static_configs: 4 | - targets: 5 | - localhost:9090 6 | -------------------------------------------------------------------------------- /4/4-3-config.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import multiprocess 2 | 3 | def child_exit(server, worker): 4 | multiprocess.mark_process_dead(worker.pid) 5 | -------------------------------------------------------------------------------- /4/4-11-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: pushgateway 3 | honor_labels: true 4 | static_configs: 5 | - targets: 6 | - localhost:9091 7 | -------------------------------------------------------------------------------- /10/10-7-prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s 3 | scrape_configs: 4 | - job_name: grok 5 | static_configs: 6 | - targets: 7 | - localhost:9144 8 | -------------------------------------------------------------------------------- /3/3-2-prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s 3 | scrape_configs: 4 | - job_name: example 5 | static_configs: 6 | - targets: 7 | - localhost:8000 8 | -------------------------------------------------------------------------------- /8/8-3-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: node 3 | static_configs: 4 | - targets: 5 | - host1:9100 6 | - targets: 7 | - host2:9100 8 | -------------------------------------------------------------------------------- /10/10-2-prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s 3 | scrape_configs: 4 | - job_name: consul 5 | static_configs: 6 | - targets: 7 | - localhost:9107 8 | -------------------------------------------------------------------------------- /10/10-5-prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s 3 | scrape_configs: 4 | - job_name: haproxy 5 | static_configs: 6 | - targets: 7 | - localhost:9101 8 | -------------------------------------------------------------------------------- /8/8-8-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: ec2 3 | ec2_sd_configs: 4 | - region: 5 | access_key: 6 | secret_key: 7 | -------------------------------------------------------------------------------- /11/11-2-prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s 3 | scrape_configs: 4 | - job_name: application_name 5 | static_configs: 6 | - targets: 7 | - localhost:9122 8 | -------------------------------------------------------------------------------- /8/8-16-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [] 8 | target_label: team 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | This repository contains example code and files that go with the book 3 | [Prometheus Up & 4 | Running](https://www.safaribooksonline.com/library/view/prometheus-up/9781492034131/). 5 | 6 | -------------------------------------------------------------------------------- /4/4-13-graphite-bridge.py: -------------------------------------------------------------------------------- 1 | import time 2 | from prometheus_client.bridge.graphite import GraphiteBridge 3 | 4 | gb = GraphiteBridge(['graphite.your.org', 2003]) 5 | gb.start(10) 6 | while True: 7 | time.sleep(1) 8 | -------------------------------------------------------------------------------- /8/8-25-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: misbehaving 3 | static_configs: 4 | - targets: 5 | - localhost:1234 6 | metric_relabel_configs: 7 | - regex: 'node_.*' 8 | action: labeldrop 9 | -------------------------------------------------------------------------------- /9/9-1-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: cadvisor 3 | static_configs: 4 | - targets: 5 | - localhost:9090 6 | metric_relabel_configs: 7 | - regex: 'container_label_.*' 8 | action: labeldrop 9 | -------------------------------------------------------------------------------- /8/8-9-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [team] 8 | regex: infra 9 | action: keep 10 | -------------------------------------------------------------------------------- /7/7-2-crontab: -------------------------------------------------------------------------------- 1 | TEXTFILE=/path/to/textfile/directory 2 | 3 | # This must all be on one line 4 | */5 * * * * root (echo -n 'shadow_entries '; grep -c . /etc/shadow) > $TEXTFILE/shadow.prom.$$ && mv $TEXTFILE/shadow.prom.$$ $TEXTFILE/shadow.prom 5 | -------------------------------------------------------------------------------- /8/8-20-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: node 3 | consul_sd_configs: 4 | - server: 'localhost:8500' 5 | relabel_configs: 6 | - source_labels: [__meta_consul_tags] 7 | regex: '.*,prod,.*' 8 | action: keep 9 | -------------------------------------------------------------------------------- /8/8-11-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [team] 8 | regex: infra|monitoring 9 | action: keep 10 | -------------------------------------------------------------------------------- /9/9-5-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'kubelet' 3 | kubernetes_sd_configs: 4 | - role: node 5 | scheme: https 6 | tls_config: 7 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 8 | insecure_skip_verify: true 9 | -------------------------------------------------------------------------------- /8/8-12-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [job, team] 8 | regex: prometheus;monitoring 9 | action: drop 10 | -------------------------------------------------------------------------------- /4/4-14-parse.py: -------------------------------------------------------------------------------- 1 | from prometheus_client.parser import text_string_to_metric_families 2 | 3 | for family in text_string_to_metric_families(u"counter_total 1.0\n"): 4 | for sample in family.samples: 5 | print("Name: {0} Labels: {1} Value: {2}".format(*sample)) 6 | -------------------------------------------------------------------------------- /8/8-21-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: node 3 | consul_sd_configs: 4 | - server: 'localhost:8500' 5 | relabel_configs: 6 | - source_labels: [__meta_consul_tags] 7 | regex: '.*,(prod|staging|dev),.*' 8 | target_label: env 9 | -------------------------------------------------------------------------------- /2/2-3-alertmanager.yml: -------------------------------------------------------------------------------- 1 | global: 2 | smtp_smarthost: 'localhost:25' 3 | smtp_from: 'youraddress@example.org' 4 | route: 5 | receiver: example-email 6 | receivers: 7 | - name: example-email 8 | email_configs: 9 | - to: 'youraddress@example.org' 10 | -------------------------------------------------------------------------------- /8/8-23-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: prometheus 3 | static_configs: 4 | - targets: 5 | - localhost:9090 6 | metric_relabel_configs: 7 | - source_labels: [__name__] 8 | regex: http_request_size_bytes 9 | action: drop 10 | -------------------------------------------------------------------------------- /17/17-2-rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: example 3 | rules: 4 | - record: job:process_cpu_seconds:rate5m 5 | expr: sum without(instance)(rate(process_cpu_seconds_total[5m])) 6 | - record: job:process_open_fds:max 7 | expr: max without(instance)(process_open_fds) 8 | -------------------------------------------------------------------------------- /8/8-15-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [] 8 | regex: '(.*)' 9 | replacement: '${1}' 10 | target_label: team 11 | action: replace 12 | -------------------------------------------------------------------------------- /8/8-17-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: node 3 | consul_sd_configs: 4 | - server: 'localhost:8500' 5 | relabel_configs: 6 | - source_labels: [__meta_consul_address] 7 | regex: '(.*)' 8 | replacement: '${1}:9100' 9 | target_label: __address__ 10 | -------------------------------------------------------------------------------- /8/8-13-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [team] 8 | regex: monitoring 9 | replacement: monitor 10 | target_label: team 11 | action: replace 12 | -------------------------------------------------------------------------------- /8/8-14-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [team] 8 | regex: '(.*)ing' 9 | replacement: '${1}' 10 | target_label: team 11 | action: replace 12 | -------------------------------------------------------------------------------- /9/9-6-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'cadvisor' 3 | kubernetes_sd_configs: 4 | - role: node 5 | scheme: https 6 | tls_config: 7 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 8 | insecure_skip_verify: true 9 | metrics_path: /metrics/cadvisor 10 | -------------------------------------------------------------------------------- /8/8-10-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: file 3 | file_sd_configs: 4 | - files: 5 | - '*.json' 6 | relabel_configs: 7 | - source_labels: [team] 8 | regex: infra 9 | action: keep 10 | - source_labels: [team] 11 | regex: monitoring 12 | action: keep 13 | -------------------------------------------------------------------------------- /8/8-24-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: prometheus 3 | static_configs: 4 | - targets: 5 | - localhost:9090 6 | metric_relabel_configs: 7 | - source_labels: [__name__, le] 8 | regex: 'prometheus_tsdb_compaction_duration_seconds_bucket;(4|32|256)' 9 | action: drop 10 | -------------------------------------------------------------------------------- /8/8-4-filesd.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "targets": [ "host1:9100", "host2:9100" ], 4 | "labels": { 5 | "team": "infra", 6 | "job": "node" 7 | } 8 | }, 9 | { 10 | "targets": [ "host1:9090" ], 11 | "labels": { 12 | "team": "monitoring", 13 | "job": "prometheus" 14 | } 15 | } 16 | ] 17 | -------------------------------------------------------------------------------- /17/17-1-prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s 3 | evaluation_interval: 10s 4 | rule_files: 5 | - rules.yml 6 | scrape_configs: 7 | - job_name: prometheus 8 | static_configs: 9 | - targets: 10 | - localhost:9090 11 | - job_name: node 12 | static_configs: 13 | - targets: 14 | - localhost:9100 15 | -------------------------------------------------------------------------------- /9/9-10-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'k8pods' 3 | kubernetes_sd_configs: 4 | - role: pod 5 | relabel_configs: 6 | - source_labels: [__meta_kubernetes_pod_container_port_name] 7 | regex: metrics 8 | action: keep 9 | - source_labels: [__meta_kubernetes_pod_container_name] 10 | target_label: job 11 | -------------------------------------------------------------------------------- /3/3-8-example.py: -------------------------------------------------------------------------------- 1 | import time 2 | from prometheus_client import start_http_server 3 | from prometheus_client import Gauge 4 | 5 | TIME = Gauge('time_seconds', 6 | 'The current time.') 7 | TIME.set_function(lambda: time.time()) 8 | 9 | if __name__ == "__main__": 10 | start_http_server(8000) 11 | while True: 12 | time.sleep(1) 13 | -------------------------------------------------------------------------------- /10/10-3-haproxy.cfg: -------------------------------------------------------------------------------- 1 | defaults 2 | mode http 3 | timeout server 5s 4 | timeout connect 5s 5 | timeout client 5s 6 | 7 | frontend frontend 8 | bind *:1234 9 | use_backend backend 10 | 11 | backend backend 12 | server node_exporter 127.0.0.1:9100 13 | 14 | frontend monitoring 15 | bind *:1235 16 | no log 17 | stats uri / 18 | stats enable 19 | -------------------------------------------------------------------------------- /8/8-22-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: example 3 | consul_sd_configs: 4 | - server: 'localhost:8500' 5 | scrape_timeout: 5s 6 | metrics_path: /admin/metrics 7 | params: 8 | foo: [bar] 9 | scheme: https 10 | tls_config: 11 | insecure_skip_verify: true 12 | basic_auth: 13 | username: brian 14 | password: hunter2 15 | -------------------------------------------------------------------------------- /8/8-19-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: ec2 3 | ec2_sd_configs: 4 | - region: 5 | access_key: 6 | secret_key: 7 | relabel_configs: 8 | - source_labels: [__meta_ec2_tag_service] 9 | target_label: job 10 | - regex: __meta_ec2_public_tag_monitor_(.*) 11 | replacement: '${1}' 12 | action: labelmap 13 | -------------------------------------------------------------------------------- /8/8-18-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: consul 3 | consul_sd_configs: 4 | - server: 'localhost:8500' 5 | relabel_configs: 6 | - source_labels: [__meta_consul_address] 7 | regex: '(.*)' 8 | replacement: '${1}:9100' 9 | target_label: __address__ 10 | - source_labels: [__meta_consul_node] 11 | regex: '(.*)' 12 | replacement: '${1}:9100' 13 | target_label: instance 14 | -------------------------------------------------------------------------------- /2/2-1-prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s 3 | evaluation_interval: 10s 4 | rule_files: 5 | - rules.yml 6 | alerting: 7 | alertmanagers: 8 | - static_configs: 9 | - targets: 10 | - localhost:9093 11 | scrape_configs: 12 | - job_name: prometheus 13 | static_configs: 14 | - targets: 15 | - localhost:9090 16 | - job_name: node 17 | static_configs: 18 | - targets: 19 | - localhost:9100 20 | -------------------------------------------------------------------------------- /3/3-1-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | from prometheus_client import start_http_server 3 | 4 | class MyHandler(http.server.BaseHTTPRequestHandler): 5 | def do_GET(self): 6 | self.send_response(200) 7 | self.end_headers() 8 | self.wfile.write(b"Hello World") 9 | 10 | if __name__ == "__main__": 11 | start_http_server(8000) 12 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 13 | server.serve_forever() 14 | -------------------------------------------------------------------------------- /3/3-12-unitesting.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from prometheus_client import Counter, REGISTRY 3 | 4 | FOOS = Counter('foos_total', 'The number of foo calls.') 5 | 6 | def foo(): 7 | FOOS.inc() 8 | 9 | class TestFoo(unittest.TestCase): 10 | def test_counter_inc(self): 11 | before = REGISTRY.get_sample_value('foos_total') 12 | foo() 13 | after = REGISTRY.get_sample_value('foos_total') 14 | self.assertEqual(1, after - before) 15 | -------------------------------------------------------------------------------- /4/4-1-wsgi.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import make_wsgi_app 2 | from wsgiref.simple_server import make_server 3 | 4 | metrics_app = make_wsgi_app() 5 | 6 | def my_app(environ, start_fn): 7 | if environ['PATH_INFO'] == '/metrics': 8 | return metrics_app(environ, start_fn) 9 | start_fn('200 OK', []) 10 | return [b'Hello World'] 11 | 12 | if __name__ == '__main__': 13 | httpd = make_server('', 8000, my_app) 14 | httpd.serve_forever() 15 | -------------------------------------------------------------------------------- /9/9-9-prometheus.yml: -------------------------------------------------------------------------------- 1 | relabel_configs: 2 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 3 | regex: (.+) 4 | target_label: __scheme__ 5 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 6 | regex: (.+) 7 | target_label: __metrics_path__ 8 | - source_labels: 9 | - __address__ 10 | - __meta_kubernetes_service_annotation_prometheus_io_port 11 | regex: ([^:]+)(:\d+)?;(\d+) 12 | replacement: ${1}:${3} 13 | target_label: __address__ 14 | -------------------------------------------------------------------------------- /10/10-10-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: node 3 | metrics_path: /probe 4 | params: 5 | module: [ssh_banner] 6 | consul_sd_configs: 7 | - server: 'localhost:8500' 8 | relabel_configs: 9 | - source_labels: [__meta_consul_address] 10 | regex: '(.*)' 11 | replacement: '${1}:22' 12 | target_label: __param_target 13 | - source_labels: [__param_target] 14 | target_label: instance 15 | - target_label: __address__ 16 | replacement: 127.0.0.1:9115 17 | -------------------------------------------------------------------------------- /9/9-8-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'k8services' 3 | kubernetes_sd_configs: 4 | - role: endpoints 5 | relabel_configs: 6 | - source_labels: 7 | - __meta_kubernetes_namespace 8 | - __meta_kubernetes_service_name 9 | regex: default;kubernetes 10 | action: drop 11 | - source_labels: 12 | - __meta_kubernetes_namespace 13 | regex: default 14 | action: keep 15 | - source_labels: [__meta_kubernetes_service_name] 16 | target_label: job 17 | -------------------------------------------------------------------------------- /10/10-9-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: blackbox 3 | metrics_path: /probe 4 | params: 5 | module: [http_2xx] 6 | static_configs: 7 | - targets: 8 | - http://www.prometheus.io 9 | - http://www.robustperception.io 10 | - http://demo.robustperception.io 11 | relabel_configs: 12 | - source_labels: [__address__] 13 | target_label: __param_target 14 | - source_labels: [__param_target] 15 | target_label: instance 16 | - target_label: __address__ 17 | replacement: 127.0.0.1:9115 18 | -------------------------------------------------------------------------------- /9/9-7-prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'k8apiserver' 3 | kubernetes_sd_configs: 4 | - role: endpoints 5 | scheme: https 6 | tls_config: 7 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 8 | insecure_skip_verify: true 9 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 10 | relabel_configs: 11 | - source_labels: 12 | - __meta_kubernetes_namespace 13 | - __meta_kubernetes_service_name 14 | - __meta_kubernetes_endpoint_port_name 15 | action: keep 16 | regex: default;kubernetes;https 17 | -------------------------------------------------------------------------------- /19/19-1-webhook_receiver.py: -------------------------------------------------------------------------------- 1 | import json 2 | from http.server import BaseHTTPRequestHandler 3 | from http.server import HTTPServer 4 | 5 | class LogHandler(BaseHTTPRequestHandler): 6 | def do_POST(self): 7 | self.send_response(200) 8 | self.end_headers() 9 | length = int(self.headers['Content-Length']) 10 | data = json.loads(self.rfile.read(length).decode('utf-8')) 11 | for alert in data["alerts"]: 12 | print(alert) 13 | 14 | if __name__ == '__main__': 15 | httpd = HTTPServer(('', 1234), LogHandler) 16 | httpd.serve_forever() 17 | -------------------------------------------------------------------------------- /3/3-3-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | from prometheus_client import start_http_server 3 | from prometheus_client import Counter 4 | 5 | REQUESTS = Counter('hello_worlds_total', 6 | 'Hello Worlds requested.') 7 | 8 | class MyHandler(http.server.BaseHTTPRequestHandler): 9 | def do_GET(self): 10 | REQUESTS.inc() 11 | self.send_response(200) 12 | self.end_headers() 13 | self.wfile.write(b"Hello World") 14 | 15 | if __name__ == "__main__": 16 | start_http_server(8000) 17 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 18 | server.serve_forever() 19 | -------------------------------------------------------------------------------- /4/4-2-twisted.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import make_wsgi_app 2 | from twisted.web.server import Site 3 | from twisted.web.wsgi import WSGIResource 4 | from twisted.web.resource import Resource 5 | from twisted.internet import reactor 6 | 7 | metrics_resource = WSGIResource( 8 | reactor, reactor.getThreadPool(), make_wsgi_app()) 9 | 10 | class HelloWorld(Resource): 11 | isLeaf = False 12 | def render_GET(self, request): 13 | return b"Hello World" 14 | 15 | root = HelloWorld() 16 | root.putChild(b'metrics', metrics_resource) 17 | 18 | reactor.listenTCP(8000, Site(root)) 19 | reactor.run() 20 | -------------------------------------------------------------------------------- /3/3-10-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | from prometheus_client import start_http_server 3 | from prometheus_client import Summary 4 | 5 | LATENCY = Summary('hello_world_latency_seconds', 6 | 'Time for a request Hello World.') 7 | 8 | class MyHandler(http.server.BaseHTTPRequestHandler): 9 | @LATENCY.time() 10 | def do_GET(self): 11 | self.send_response(200) 12 | self.end_headers() 13 | self.wfile.write(b"Hello World") 14 | 15 | if __name__ == "__main__": 16 | start_http_server(8000) 17 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 18 | server.serve_forever() 19 | -------------------------------------------------------------------------------- /3/3-11-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | from prometheus_client import start_http_server 3 | from prometheus_client import Histogram 4 | 5 | LATENCY = Histogram('hello_world_latency_seconds', 6 | 'Time for a request Hello World.') 7 | 8 | class MyHandler(http.server.BaseHTTPRequestHandler): 9 | @LATENCY.time() 10 | def do_GET(self): 11 | self.send_response(200) 12 | self.end_headers() 13 | self.wfile.write(b"Hello World") 14 | 15 | if __name__ == "__main__": 16 | start_http_server(8000) 17 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 18 | server.serve_forever() 19 | -------------------------------------------------------------------------------- /5/5-1-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | from prometheus_client import start_http_server, Counter 3 | 4 | REQUESTS = Counter('hello_worlds_total', 5 | 'Hello Worlds requested.', 6 | labelnames=['path']) 7 | 8 | class MyHandler(http.server.BaseHTTPRequestHandler): 9 | def do_GET(self): 10 | REQUESTS.labels(self.path).inc() 11 | self.send_response(200) 12 | self.end_headers() 13 | self.wfile.write(b"Hello World") 14 | 15 | if __name__ == "__main__": 16 | start_http_server(8000) 17 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 18 | server.serve_forever() 19 | -------------------------------------------------------------------------------- /4/4-12-pushgateway.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import CollectorRegistry, Gauge, pushadd_to_gateway 2 | 3 | registry = CollectorRegistry() 4 | duration = Gauge('my_job_duration_seconds', 5 | 'Duration of my batch job in seconds', registry=registry) 6 | try: 7 | with duration.time(): 8 | # Your code here. 9 | pass 10 | 11 | # This only runs if there wasn't an exception. 12 | g = Gauge('my_job_last_success_seconds', 13 | 'Last time my batch job successfully finished', registry=registry) 14 | g.set_to_current_time() 15 | finally: 16 | pushadd_to_gateway('localhost:9091', job='batch', registry=registry) 17 | -------------------------------------------------------------------------------- /10/10-6-grok.yml: -------------------------------------------------------------------------------- 1 | global: 2 | config_version: 2 3 | input: 4 | type: file 5 | path: example.log 6 | readall: true # Use false in production 7 | grok: 8 | additional_patterns: 9 | - 'METHOD [A-Z]+' 10 | - 'PATH [^ ]+' 11 | - 'NUMBER [0-9.]+' 12 | metrics: 13 | - type: counter 14 | name: log_http_requests_total 15 | help: HTTP requests 16 | match: '%{METHOD} %{PATH:path} %{NUMBER:latency}' 17 | labels: 18 | path: '{{.path}}' 19 | - type: histogram 20 | name: log_http_request_latency_seconds_total 21 | help: HTTP request latency 22 | match: '%{METHOD} %{PATH:path} %{NUMBER:latency}' 23 | value: '{{.latency}}' 24 | server: 25 | port: 9144 26 | -------------------------------------------------------------------------------- /4/4-7-java-httpserver/src/main/java/io/robustperception/book_examples/java_httpserver/Example.java: -------------------------------------------------------------------------------- 1 | import io.prometheus.client.Counter; 2 | import io.prometheus.client.hotspot.DefaultExports; 3 | import io.prometheus.client.exporter.HTTPServer; 4 | 5 | public class Example { 6 | private static final Counter myCounter = Counter.build() 7 | .name("my_counter_total") 8 | .help("An example counter.").register(); 9 | 10 | public static void main(String[] args) throws Exception { 11 | DefaultExports.initialize(); 12 | HTTPServer server = new HTTPServer(8000); 13 | while (true) { 14 | myCounter.inc(); 15 | Thread.sleep(1000); 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /3/3-9-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | import time 3 | from prometheus_client import start_http_server 4 | from prometheus_client import Summary 5 | 6 | LATENCY = Summary('hello_world_latency_seconds', 7 | 'Time for a request Hello World.') 8 | 9 | class MyHandler(http.server.BaseHTTPRequestHandler): 10 | def do_GET(self): 11 | start = time.time() 12 | self.send_response(200) 13 | self.end_headers() 14 | self.wfile.write(b"Hello World") 15 | LATENCY.observe(time.time() - start) 16 | 17 | if __name__ == "__main__": 18 | start_http_server(8000) 19 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 20 | server.serve_forever() 21 | -------------------------------------------------------------------------------- /4/4-4-app.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import multiprocess, make_wsgi_app, CollectorRegistry 2 | from prometheus_client import Counter, Gauge 3 | 4 | REQUESTS = Counter("http_requests_total", "HTTP requests") 5 | IN_PROGRESS = Gauge("http_requests_inprogress", "Inprogress HTTP requests", 6 | multiprocess_mode='livesum') 7 | 8 | @IN_PROGRESS.track_inprogress() 9 | def app(environ, start_fn): 10 | REQUESTS.inc() 11 | if environ['PATH_INFO'] == '/metrics': 12 | registry = CollectorRegistry() 13 | multiprocess.MultiProcessCollector(registry) 14 | metrics_app = make_wsgi_app(registry) 15 | return metrics_app(environ, start_fn) 16 | start_fn('200 OK', []) 17 | return [b'Hello World'] 18 | -------------------------------------------------------------------------------- /4/4-6-example.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | 7 | "github.com/prometheus/client_golang/prometheus" 8 | "github.com/prometheus/client_golang/prometheus/promauto" 9 | "github.com/prometheus/client_golang/prometheus/promhttp" 10 | ) 11 | 12 | var ( 13 | requests = promauto.NewCounter( 14 | prometheus.CounterOpts{ 15 | Name: "hello_worlds_total", 16 | Help: "Hello Worlds requested.", 17 | }) 18 | ) 19 | 20 | func handler(w http.ResponseWriter, r *http.Request) { 21 | requests.Inc() 22 | w.Write([]byte("Hello World")) 23 | } 24 | 25 | func main() { 26 | http.HandleFunc("/", handler) 27 | http.Handle("/metrics", promhttp.Handler()) 28 | log.Fatal(http.ListenAndServe(":8000", nil)) 29 | } 30 | -------------------------------------------------------------------------------- /3/3-7-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | from prometheus_client import start_http_server 3 | from prometheus_client import Gauge 4 | 5 | INPROGRESS = Gauge('hello_worlds_inprogress', 6 | 'Number of Hello Worlds in progress.') 7 | LAST = Gauge('hello_world_last_time_seconds', 8 | 'The last time a Hello World was served.') 9 | 10 | class MyHandler(http.server.BaseHTTPRequestHandler): 11 | @INPROGRESS.track_inprogress() 12 | def do_GET(self): 13 | self.send_response(200) 14 | self.end_headers() 15 | self.wfile.write(b"Hello World") 16 | LAST.set_to_current_time() 17 | 18 | if __name__ == "__main__": 19 | start_http_server(8000) 20 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 21 | server.serve_forever() 22 | -------------------------------------------------------------------------------- /3/3-6-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | import time 3 | from prometheus_client import start_http_server 4 | from prometheus_client import Gauge 5 | 6 | INPROGRESS = Gauge('hello_worlds_inprogress', 7 | 'Number of Hello Worlds in progress.') 8 | LAST = Gauge('hello_world_last_time_seconds', 9 | 'The last time a Hello World was served.') 10 | 11 | class MyHandler(http.server.BaseHTTPRequestHandler): 12 | def do_GET(self): 13 | INPROGRESS.inc() 14 | self.send_response(200) 15 | self.end_headers() 16 | self.wfile.write(b"Hello World") 17 | LAST.set(time.time()) 18 | INPROGRESS.dec() 19 | 20 | if __name__ == "__main__": 21 | start_http_server(8000) 22 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 23 | server.serve_forever() 24 | -------------------------------------------------------------------------------- /9/kube-state-metrics.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1beta2 3 | kind: Deployment 4 | metadata: 5 | name: kube-state-metrics 6 | spec: 7 | selector: 8 | matchLabels: 9 | app: kube-state-metrics 10 | replicas: 1 11 | template: 12 | metadata: 13 | labels: 14 | app: kube-state-metrics 15 | spec: 16 | serviceAccountName: prometheus 17 | containers: 18 | - name: kube-state-metrics 19 | image: quay.io/coreos/kube-state-metrics:v1.2.0 20 | ports: 21 | - containerPort: 8080 22 | name: default 23 | --- 24 | kind: Service 25 | apiVersion: v1 26 | metadata: 27 | name: kube-state-metrics 28 | spec: 29 | selector: 30 | app: kube-state-metrics 31 | type: LoadBalancer 32 | ports: 33 | - protocol: TCP 34 | port: 8080 35 | targetPort: 8080 36 | -------------------------------------------------------------------------------- /3/3-5-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | import random 3 | from prometheus_client import start_http_server 4 | from prometheus_client import Counter 5 | 6 | REQUESTS = Counter('hello_worlds_total', 7 | 'Hello Worlds requested.') 8 | SALES = Counter('hello_world_sales_euro_total', 9 | 'Euros made serving Hello World.') 10 | 11 | class MyHandler(http.server.BaseHTTPRequestHandler): 12 | def do_GET(self): 13 | REQUESTS.inc() 14 | euros = random.random() 15 | SALES.inc(euros) 16 | self.send_response(200) 17 | self.end_headers() 18 | self.wfile.write("Hello World for {} euros.".format(euros).encode()) 19 | 20 | if __name__ == "__main__": 21 | start_http_server(8000) 22 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 23 | server.serve_forever() 24 | -------------------------------------------------------------------------------- /3/3-4-example.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | import random 3 | from prometheus_client import start_http_server 4 | from prometheus_client import Counter 5 | 6 | REQUESTS = Counter('hello_worlds_total', 7 | 'Hello Worlds requested.') 8 | EXCEPTIONS = Counter('hello_world_exceptions_total', 9 | 'Exceptions serving Hello World.') 10 | 11 | class MyHandler(http.server.BaseHTTPRequestHandler): 12 | def do_GET(self): 13 | REQUESTS.inc() 14 | with EXCEPTIONS.count_exceptions(): 15 | if random.random() < 0.2: 16 | raise Exception 17 | self.send_response(200) 18 | self.end_headers() 19 | self.wfile.write(b"Hello World") 20 | 21 | if __name__ == "__main__": 22 | start_http_server(8000) 23 | server = http.server.HTTPServer(('localhost', 8001), MyHandler) 24 | server.serve_forever() 25 | -------------------------------------------------------------------------------- /12/12-3-consul_metrics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import time 4 | from urllib.request import urlopen 5 | 6 | from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily 7 | from prometheus_client.core import SummaryMetricFamily, REGISTRY 8 | from prometheus_client import start_http_server 9 | 10 | 11 | def sanitise_name(s): 12 | return re.sub(r"[^a-zA-Z0-9:_]", "_", s) 13 | 14 | class ConsulCollector(object): 15 | def collect(self): 16 | out = urlopen("http://localhost:8500/v1/agent/metrics").read() 17 | metrics = json.loads(out.decode("utf-8")) 18 | 19 | for g in metrics["Gauges"]: 20 | yield GaugeMetricFamily(sanitise_name(g["Name"]), 21 | "Consul metric " + g["Name"], g["Value"]) 22 | 23 | for c in metrics["Counters"]: 24 | yield CounterMetricFamily(sanitise_name(c["Name"]) + "_total", 25 | "Consul metric " + c["Name"], c["Count"]) 26 | 27 | for s in metrics["Samples"]: 28 | yield SummaryMetricFamily(sanitise_name(s["Name"]) + "_seconds", 29 | "Consul metric " + s["Name"], 30 | count_value=c["Count"], sum_value=s["Sum"] / 1000) 31 | 32 | if __name__ == '__main__': 33 | REGISTRY.register(ConsulCollector()) 34 | start_http_server(8000) 35 | while True: 36 | time.sleep(1) 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /4/4-9-java-servlet/src/main/java/io/robustperception/book_examples/java_servlet/Example.java: -------------------------------------------------------------------------------- 1 | import io.prometheus.client.Counter; 2 | import io.prometheus.client.exporter.MetricsServlet; 3 | import io.prometheus.client.hotspot.DefaultExports; 4 | import javax.servlet.http.HttpServlet; 5 | import javax.servlet.http.HttpServletRequest; 6 | import javax.servlet.http.HttpServletResponse; 7 | import javax.servlet.ServletException; 8 | import org.eclipse.jetty.server.Server; 9 | import org.eclipse.jetty.servlet.ServletContextHandler; 10 | import org.eclipse.jetty.servlet.ServletHolder; 11 | import java.io.IOException; 12 | 13 | 14 | public class Example { 15 | static class ExampleServlet extends HttpServlet { 16 | private static final Counter requests = Counter.build() 17 | .name("hello_worlds_total") 18 | .help("Hello Worlds requested.").register(); 19 | 20 | @Override 21 | protected void doGet(final HttpServletRequest req, 22 | final HttpServletResponse resp) 23 | throws ServletException, IOException { 24 | requests.inc(); 25 | resp.getWriter().println("Hello World"); 26 | } 27 | } 28 | 29 | public static void main(String[] args) throws Exception { 30 | DefaultExports.initialize(); 31 | 32 | Server server = new Server(8000); 33 | ServletContextHandler context = new ServletContextHandler(); 34 | context.setContextPath("/"); 35 | server.setHandler(context); 36 | context.addServlet(new ServletHolder(new ExampleServlet()), "/"); 37 | context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics"); 38 | 39 | server.start(); 40 | server.join(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /4/4-7-java-httpserver/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | io.robustperception.book_examples 5 | java_httpserver 6 | jar 7 | 1.0-SNAPSHOT 8 | http://maven.apache.org 9 | 10 | 11 | io.prometheus 12 | simpleclient 13 | 0.3.0 14 | 15 | 16 | io.prometheus 17 | simpleclient_hotspot 18 | 0.3.0 19 | 20 | 21 | io.prometheus 22 | simpleclient_httpserver 23 | 0.3.0 24 | 25 | 26 | 27 | 28 | 29 | 30 | org.apache.maven.plugins 31 | maven-compiler-plugin 32 | 3.1 33 | 34 | 1.5 35 | 1.5 36 | 37 | 38 | 39 | 40 | maven-assembly-plugin 41 | 42 | 43 | 44 | Example 45 | 46 | 47 | 48 | jar-with-dependencies 49 | 50 | 51 | 52 | 53 | make-assembly 54 | package 55 | 56 | single 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /4/4-9-java-servlet/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | io.robustperception.book_examples 5 | java_servlet 6 | jar 7 | 1.0-SNAPSHOT 8 | http://maven.apache.org 9 | 10 | 11 | io.prometheus 12 | simpleclient 13 | 0.3.0 14 | 15 | 16 | io.prometheus 17 | simpleclient_hotspot 18 | 0.3.0 19 | 20 | 21 | io.prometheus 22 | simpleclient_servlet 23 | 0.3.0 24 | 25 | 26 | org.eclipse.jetty 27 | jetty-servlet 28 | 8.2.0.v20160908 29 | 30 | 31 | 32 | 33 | 34 | 35 | org.apache.maven.plugins 36 | maven-compiler-plugin 37 | 3.1 38 | 39 | 1.5 40 | 1.5 41 | 42 | 43 | 44 | 45 | maven-assembly-plugin 46 | 47 | 48 | 49 | Example 50 | 51 | 52 | 53 | jar-with-dependencies 54 | 55 | 56 | 57 | 58 | make-assembly 59 | package 60 | 61 | single 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /12/12-2-consul_metrics.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | "regexp" 7 | 8 | "github.com/hashicorp/consul/api" 9 | "github.com/prometheus/client_golang/prometheus" 10 | "github.com/prometheus/client_golang/prometheus/promhttp" 11 | ) 12 | 13 | var ( 14 | up = prometheus.NewDesc( 15 | "consul_up", 16 | "Was talking to Consul successful.", 17 | nil, nil, 18 | ) 19 | invalidChars = regexp.MustCompile("[^a-zA-Z0-9:_]") 20 | ) 21 | 22 | type ConsulCollector struct { 23 | } 24 | 25 | // Implements prometheus.Collector. 26 | func (c ConsulCollector) Describe(ch chan<- *prometheus.Desc) { 27 | ch <- up 28 | } 29 | 30 | // Implements prometheus.Collector. 31 | func (c ConsulCollector) Collect(ch chan<- prometheus.Metric) { 32 | consul, err := api.NewClient(api.DefaultConfig()) 33 | if err != nil { 34 | ch <- prometheus.MustNewConstMetric(up, prometheus.GaugeValue, 0) 35 | return 36 | } 37 | 38 | metrics, err := consul.Agent().Metrics() 39 | if err != nil { 40 | ch <- prometheus.MustNewConstMetric(up, prometheus.GaugeValue, 0) 41 | return 42 | } 43 | ch <- prometheus.MustNewConstMetric(up, prometheus.GaugeValue, 1) 44 | 45 | for _, g := range metrics.Gauges { 46 | name := invalidChars.ReplaceAllLiteralString(g.Name, "_") 47 | desc := prometheus.NewDesc(name, "Consul metric "+g.Name, nil, nil) 48 | ch <- prometheus.MustNewConstMetric( 49 | desc, prometheus.GaugeValue, float64(g.Value)) 50 | } 51 | 52 | for _, c := range metrics.Counters { 53 | name := invalidChars.ReplaceAllLiteralString(c.Name, "_") 54 | desc := prometheus.NewDesc(name+"_total", "Consul metric "+c.Name, nil, nil) 55 | ch <- prometheus.MustNewConstMetric( 56 | desc, prometheus.CounterValue, float64(c.Count)) 57 | } 58 | 59 | for _, s := range metrics.Samples { 60 | // All samples are times in milliseconds, we convert them to seconds below. 61 | name := invalidChars.ReplaceAllLiteralString(s.Name, "_") + "_seconds" 62 | countDesc := prometheus.NewDesc( 63 | name+"_count", "Consul metric "+s.Name, nil, nil) 64 | ch <- prometheus.MustNewConstMetric( 65 | countDesc, prometheus.CounterValue, float64(s.Count)) 66 | sumDesc := prometheus.NewDesc( 67 | name+"_sum", "Consul metric "+s.Name, nil, nil) 68 | ch <- prometheus.MustNewConstMetric( 69 | sumDesc, prometheus.CounterValue, s.Sum/1000) 70 | } 71 | } 72 | 73 | func main() { 74 | c := ConsulCollector{} 75 | prometheus.MustRegister(c) 76 | http.Handle("/metrics", promhttp.Handler()) 77 | log.Fatal(http.ListenAndServe(":8000", nil)) 78 | } 79 | -------------------------------------------------------------------------------- /19/combined-alertmanager.yml: -------------------------------------------------------------------------------- 1 | global: 2 | opsgenie_api_key: XXXXXXXX 3 | hipchat_auth_token: XXXXXXXX 4 | smtp_smarthost: 'localhost:25' 5 | smtp_from: 'youraddress@example.org' 6 | 7 | 8 | route: 9 | receiver: fallback-pager 10 | routes: 11 | # Log all alerts. 12 | - receiver: log-alerts 13 | continue: true 14 | 15 | # Frontend team. 16 | - match: 17 | team: frontend 18 | group_by: [region, env] 19 | group_interval: 10m 20 | receiver: frontend-pager 21 | routes: 22 | - match: 23 | severity: page 24 | receiver: frontend-pager 25 | group_wait: 1m 26 | - match: 27 | severity: ticket 28 | group_by: [region, env, alertname] 29 | receiver: frontend-ticket 30 | repeat_interval: 1d 31 | group_interval: 1d 32 | 33 | # Backend team. 34 | - match: 35 | team: backend 36 | receiver: backend-pager 37 | routes: 38 | - match: 39 | severity: page 40 | env: dev 41 | receiver: backend-ticket 42 | - match: 43 | severity: page 44 | receiver: backend-pager 45 | - match: 46 | severity: ticket 47 | receiver: backend-ticket 48 | 49 | 50 | inhibit_rules: 51 | - source_match: 52 | severity: 'page-regionfail' 53 | target_match: 54 | severity: 'page' 55 | equal: ['region'] 56 | 57 | 58 | receivers: 59 | - name: fallback-pager 60 | pagerduty_configs: 61 | - service_key: XXXXXXXX 62 | - name: log-alerts 63 | webhook_configs: 64 | - url: http://localhost:1234/log 65 | 66 | - name: frontend-pager 67 | pagerduty_configs: 68 | - service_key: XXXXXXXX 69 | slack_configs: 70 | - api_url: https://hooks.slack.com/services/XXXXXXXX 71 | channel: '#pages' 72 | title: 'Alerts in {{ .GroupLabels.region }} {{ .GroupLabels.env }}!' 73 | text: > 74 | {{ .Alerts | len }} alerts: 75 | {{ range .Alerts }} 76 | {{ range .Labels.SortedPairs }}{{ .Name }}={{ .Value }} {{ end }} 77 | {{ if eq .Annotations.wiki "" -}} 78 | Wiki: http://wiki.mycompany/{{ .Labels.alertname }} 79 | {{- else -}} 80 | Wiki: http://wiki.mycompany/{{ .Annotations.wiki }} 81 | {{- end }} 82 | {{ if ne .Annotations.dashboard "" -}} 83 | Dashboard: {{ .Annotations.dashboard }}®ion={{ .Labels.region }} 84 | {{- end }} 85 | 86 | {{ end }} 87 | - name: frontend-ticket 88 | email_configs: 89 | - to: 'frontendtickets@example.org' 90 | 91 | - name: backend-pager 92 | opsgenie_configs: 93 | - teams: backendTeam # This is a comma separated list. 94 | hipchat_configs: 95 | - room_id: XXX 96 | - room_id: YYY 97 | - name: backend-ticket 98 | email_configs: 99 | - to: 'backendtickets@example.org' 100 | -------------------------------------------------------------------------------- /9/prometheus-deployment.yml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus 5 | rules: 6 | - apiGroups: [""] 7 | resources: 8 | - nodes 9 | - services 10 | - endpoints 11 | - pods 12 | verbs: ["get", "list", "watch"] 13 | - apiGroups: 14 | - extensions 15 | resources: 16 | - ingresses 17 | verbs: ["get", "list", "watch"] 18 | --- 19 | apiVersion: v1 20 | kind: ServiceAccount 21 | metadata: 22 | name: prometheus 23 | namespace: default 24 | --- 25 | apiVersion: rbac.authorization.k8s.io/v1beta1 26 | kind: ClusterRoleBinding 27 | metadata: 28 | name: prometheus 29 | roleRef: 30 | apiGroup: rbac.authorization.k8s.io 31 | kind: ClusterRole 32 | name: prometheus 33 | subjects: 34 | - kind: ServiceAccount 35 | name: prometheus 36 | namespace: default 37 | --- 38 | apiVersion: v1 39 | data: 40 | prometheus.yml: | 41 | global: 42 | scrape_interval: 10s 43 | scrape_configs: 44 | - job_name: 'kubelet' 45 | kubernetes_sd_configs: 46 | - role: node 47 | scheme: https 48 | tls_config: 49 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 50 | insecure_skip_verify: true # Unfortunately required with Minikube. 51 | - job_name: 'cadvisor' 52 | kubernetes_sd_configs: 53 | - role: node 54 | scheme: https 55 | tls_config: 56 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 57 | insecure_skip_verify: true # Unfortunately required with Minikube. 58 | metrics_path: /metrics/cadvisor 59 | - job_name: 'k8apiserver' 60 | kubernetes_sd_configs: 61 | - role: endpoints 62 | scheme: https 63 | tls_config: 64 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 65 | insecure_skip_verify: true # Unfortunately required with Minikube. 66 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 67 | relabel_configs: 68 | - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 69 | action: keep 70 | regex: default;kubernetes;https 71 | - job_name: 'k8services' 72 | kubernetes_sd_configs: 73 | - role: endpoints 74 | relabel_configs: 75 | - source_labels: 76 | - __meta_kubernetes_namespace 77 | - __meta_kubernetes_service_name 78 | action: drop 79 | regex: default;kubernetes 80 | - source_labels: 81 | - __meta_kubernetes_namespace 82 | regex: default 83 | action: keep 84 | - source_labels: [__meta_kubernetes_service_name] 85 | target_label: job 86 | - job_name: 'k8pods' 87 | kubernetes_sd_configs: 88 | - role: pod 89 | relabel_configs: 90 | - source_labels: [__meta_kubernetes_pod_container_port_name] 91 | regex: metrics 92 | action: keep 93 | - source_labels: [__meta_kubernetes_pod_container_name] 94 | target_label: job 95 | kind: ConfigMap 96 | metadata: 97 | name: prometheus-config 98 | --- 99 | apiVersion: apps/v1beta2 100 | kind: Deployment 101 | metadata: 102 | name: prometheus 103 | spec: 104 | selector: 105 | matchLabels: 106 | app: prometheus 107 | replicas: 1 108 | template: 109 | metadata: 110 | labels: 111 | app: prometheus 112 | spec: 113 | serviceAccountName: prometheus 114 | containers: 115 | - name: prometheus 116 | image: prom/prometheus:v2.1.0 117 | ports: 118 | - containerPort: 9090 119 | name: default 120 | volumeMounts: 121 | - name: config-volume 122 | mountPath: /etc/prometheus 123 | volumes: 124 | - name: config-volume 125 | configMap: 126 | name: prometheus-config 127 | --- 128 | kind: Service 129 | apiVersion: v1 130 | metadata: 131 | name: prometheus 132 | spec: 133 | selector: 134 | app: prometheus 135 | type: LoadBalancer 136 | ports: 137 | - protocol: TCP 138 | port: 9090 139 | targetPort: 9090 140 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------