├── .DS_Store ├── README.md ├── alertmanager ├── README.md ├── alert_rule_1.yaml ├── alert_rule_2.yaml ├── alertmanager.yaml ├── apply_change.sh ├── helm_nginx_install.sh └── trigger_nginx_alert.sh ├── head-first-kubernetes ├── README.md ├── delete-random-pod-in-deploy.sh ├── manifests │ ├── deployment.yaml │ ├── pod.yaml │ └── service.yaml └── request.sh ├── install ├── README.md ├── alias.sh ├── create_expose_service.sh ├── delete_expose_service.sh ├── eks_cluster.yml ├── get_exposed_links.sh ├── get_links.sh ├── kube-prometheus │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── build.sh │ ├── docs │ │ ├── GKE-cadvisor-support.md │ │ ├── developing-prometheus-rules-and-grafana-dashboards.md │ │ ├── exposing-prometheus-alertmanager-grafana-ingress.md │ │ ├── kube-prometheus-on-kubeadm.md │ │ ├── monitoring-external-etcd.md │ │ └── monitoring-other-namespaces.md │ ├── example.jsonnet │ ├── examples │ │ ├── additional-namespaces.jsonnet │ │ ├── alertmanager-config-external.jsonnet │ │ ├── alertmanager-config.jsonnet │ │ ├── alertmanager-config.yaml │ │ ├── auth │ │ ├── basic-auth │ │ │ ├── secrets.yaml │ │ │ └── service-monitor.yaml │ │ ├── etcd-client-ca.crt │ │ ├── etcd-client.crt │ │ ├── etcd-client.key │ │ ├── etcd-skip-verify.jsonnet │ │ ├── etcd.jsonnet │ │ ├── example-app │ │ │ ├── example-app.yaml │ │ │ ├── prometheus-frontend-alertmanager-discovery-role-binding.yaml │ │ │ ├── prometheus-frontend-alertmanager-discovery-role.yaml │ │ │ ├── prometheus-frontend-role-binding.yaml │ │ │ ├── prometheus-frontend-role.yaml │ │ │ ├── prometheus-frontend-service-account.yaml │ │ │ ├── prometheus-frontend-svc.yaml │ │ │ ├── prometheus-frontend.yaml │ │ │ └── servicemonitor-frontend.yaml │ │ ├── example-grafana-dashboard.json │ │ ├── existingrule.json │ │ ├── existingrule.yaml │ │ ├── grafana-additional-jsonnet-dashboard-example.jsonnet │ │ ├── grafana-additional-rendered-dashboard-example.jsonnet │ │ ├── ingress.jsonnet │ │ ├── internal-registry.jsonnet │ │ ├── jsonnet-build-snippet │ │ │ └── build-snippet.jsonnet │ │ ├── jsonnet-snippets │ │ │ ├── bootkube.jsonnet │ │ │ ├── kops-coredns.jsonnet │ │ │ ├── kops.jsonnet │ │ │ ├── kube-aws.jsonnet │ │ │ ├── kubeadm.jsonnet │ │ │ ├── kubespray.jsonnet │ │ │ └── node-ports.jsonnet │ │ ├── ksonnet-example.jsonnet │ │ ├── kustomize.jsonnet │ │ ├── minikube.jsonnet │ │ ├── prometheus-additional-alert-rule-example.jsonnet │ │ ├── prometheus-additional-recording-rule-example.jsonnet │ │ ├── prometheus-additional-rendered-rule-example.jsonnet │ │ ├── prometheus-name-override.jsonnet │ │ └── prometheus-pvc.jsonnet │ ├── experimental │ │ ├── custom-metrics-api │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml │ │ │ ├── custom-metrics-apiservice.yaml │ │ │ ├── custom-metrics-cluster-role.yaml │ │ │ ├── custom-metrics-configmap.yaml │ │ │ ├── deploy.sh │ │ │ ├── hpa-custom-metrics-cluster-role-binding.yaml │ │ │ ├── sample-app.yaml │ │ │ └── teardown.sh │ │ └── metrics-server │ │ │ ├── auth-delegator.yaml │ │ │ ├── auth-reader.yaml │ │ │ ├── metrics-apiservice.yaml │ │ │ ├── metrics-server-cluster-role-binding.yaml │ │ │ ├── metrics-server-cluster-role.yaml │ │ │ ├── metrics-server-deployment.yaml │ │ │ ├── metrics-server-service-account.yaml │ │ │ └── metrics-server-service.yaml │ ├── grafana-image │ │ ├── Dockerfile │ │ ├── Makefile │ │ └── config.ini │ ├── hack │ │ └── example-service-monitoring │ │ │ ├── deploy │ │ │ └── teardown │ ├── jsonnet │ │ └── kube-prometheus │ │ │ ├── .gitignore │ │ │ ├── alertmanager │ │ │ └── alertmanager.libsonnet │ │ │ ├── alerts │ │ │ ├── alertmanager.libsonnet │ │ │ ├── alerts.libsonnet │ │ │ ├── general.libsonnet │ │ │ ├── node.libsonnet │ │ │ ├── prometheus-operator.libsonnet │ │ │ ├── prometheus.libsonnet │ │ │ └── tests.yaml │ │ │ ├── jsonnetfile.json │ │ │ ├── kube-prometheus-anti-affinity.libsonnet │ │ │ ├── kube-prometheus-bootkube.libsonnet │ │ │ ├── kube-prometheus-config-mixins.libsonnet │ │ │ ├── kube-prometheus-insecure-kubelet.libsonnet │ │ │ ├── kube-prometheus-kops-coredns.libsonnet │ │ │ ├── kube-prometheus-kops.libsonnet │ │ │ ├── kube-prometheus-ksonnet.libsonnet │ │ │ ├── kube-prometheus-kube-aws.libsonnet │ │ │ ├── kube-prometheus-kubeadm.libsonnet │ │ │ ├── kube-prometheus-kubespray.libsonnet │ │ │ ├── kube-prometheus-managed-cluster.libsonnet │ │ │ ├── kube-prometheus-node-ports.libsonnet │ │ │ ├── kube-prometheus-static-etcd.libsonnet │ │ │ ├── kube-prometheus-thanos.libsonnet │ │ │ ├── kube-prometheus.libsonnet │ │ │ ├── kube-state-metrics │ │ │ └── kube-state-metrics.libsonnet │ │ │ ├── lib │ │ │ ├── image.libsonnet │ │ │ └── lib.libsonnet │ │ │ ├── node-exporter │ │ │ └── node-exporter.libsonnet │ │ │ ├── prometheus-adapter │ │ │ └── prometheus-adapter.libsonnet │ │ │ ├── prometheus │ │ │ └── prometheus.libsonnet │ │ │ └── rules │ │ │ ├── node-rules.libsonnet │ │ │ └── rules.libsonnet │ ├── jsonnetfile.json │ ├── jsonnetfile.lock.json │ ├── kustomization.yaml │ ├── manifests │ │ ├── 00namespace-namespace.yaml │ │ ├── 0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml │ │ ├── 0prometheus-operator-0prometheusCustomResourceDefinition.yaml │ │ ├── 0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml │ │ ├── 0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml │ │ ├── 0prometheus-operator-clusterRole.yaml │ │ ├── 0prometheus-operator-clusterRoleBinding.yaml │ │ ├── 0prometheus-operator-deployment.yaml │ │ ├── 0prometheus-operator-service.yaml │ │ ├── 0prometheus-operator-serviceAccount.yaml │ │ ├── 0prometheus-operator-serviceMonitor.yaml │ │ ├── alertmanager-alertmanager.yaml │ │ ├── alertmanager-secret.yaml │ │ ├── alertmanager-service.yaml │ │ ├── alertmanager-serviceAccount.yaml │ │ ├── alertmanager-serviceMonitor.yaml │ │ ├── grafana-dashboardDatasources.yaml │ │ ├── grafana-dashboardDefinitions.yaml │ │ ├── grafana-dashboardSources.yaml │ │ ├── grafana-deployment.yaml │ │ ├── grafana-service.yaml │ │ ├── grafana-serviceAccount.yaml │ │ ├── grafana-serviceMonitor.yaml │ │ ├── kube-state-metrics-clusterRole.yaml │ │ ├── kube-state-metrics-clusterRoleBinding.yaml │ │ ├── kube-state-metrics-deployment.yaml │ │ ├── kube-state-metrics-role.yaml │ │ ├── kube-state-metrics-roleBinding.yaml │ │ ├── kube-state-metrics-service.yaml │ │ ├── kube-state-metrics-serviceAccount.yaml │ │ ├── kube-state-metrics-serviceMonitor.yaml │ │ ├── node-exporter-clusterRole.yaml │ │ ├── node-exporter-clusterRoleBinding.yaml │ │ ├── node-exporter-daemonset.yaml │ │ ├── node-exporter-service.yaml │ │ ├── node-exporter-serviceAccount.yaml │ │ ├── node-exporter-serviceMonitor.yaml │ │ ├── prometheus-adapter-apiService.yaml │ │ ├── prometheus-adapter-clusterRole.yaml │ │ ├── prometheus-adapter-clusterRoleBinding.yaml │ │ ├── prometheus-adapter-clusterRoleBindingDelegator.yaml │ │ ├── prometheus-adapter-clusterRoleServerResources.yaml │ │ ├── prometheus-adapter-configMap.yaml │ │ ├── prometheus-adapter-deployment.yaml │ │ ├── prometheus-adapter-roleBindingAuthReader.yaml │ │ ├── prometheus-adapter-service.yaml │ │ ├── prometheus-adapter-serviceAccount.yaml │ │ ├── prometheus-clusterRole.yaml │ │ ├── prometheus-clusterRoleBinding.yaml │ │ ├── prometheus-prometheus.yaml │ │ ├── prometheus-roleBindingConfig.yaml │ │ ├── prometheus-roleBindingSpecificNamespaces.yaml │ │ ├── prometheus-roleConfig.yaml │ │ ├── prometheus-roleSpecificNamespaces.yaml │ │ ├── prometheus-rules.yaml │ │ ├── prometheus-service.yaml │ │ ├── prometheus-serviceAccount.yaml │ │ ├── prometheus-serviceMonitor.yaml │ │ ├── prometheus-serviceMonitorApiserver.yaml │ │ └── prometheus-serviceMonitorKubelet.yaml │ ├── sync-to-internal-registry.jsonnet │ ├── test.sh │ └── tests │ │ └── e2e │ │ ├── main_test.go │ │ ├── prometheus_client.go │ │ └── travis-e2e.sh ├── step1.sh ├── step2.sh ├── step3.sh ├── step4.sh └── uninstall.sh ├── prometheus-operator ├── .DS_Store ├── README.md ├── hello_app_service_monitor │ ├── .DS_Store │ └── manifests │ │ ├── prometheus.yaml │ │ └── serviceMonitor-hello.yaml └── kubecost │ └── install.sh ├── prometheus-overview └── README.md └── service-discovery ├── .DS_Store ├── README.md ├── generate_yaml.sh ├── manifests ├── additional-scrape-configs.yaml ├── prometheus-additional-scrape-configs.yaml └── prometheus.yaml └── scrape_configs └── additional-scrape-configs.yaml /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Taipei-HUG/Prometheus-workshop/ae4c2c8e7666c408532c7618e537742aee8c88f6/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prometheus-workshop 2 | 3 | This repository contain [Prometheus workshop][1] code, settings and deployment scripts. 4 | 5 | Presentation Slide is [here][2] 6 | 7 | [1]: https://devops.kktix.cc/events/prometheus-workshop "Workshop link" 8 | [2]: https://docs.google.com/presentation/d/1nbqa-mDEFM3OM-BlT6D7en5sS44wlHyECH_QAxOS46Q 9 | 10 | 11 | ## Warm Up 12 | 13 | ### CH 0 14 | - [Installation](install/README.md) 15 | 16 | ### CH 1 17 | - [Head First Kubernetes](head-first-kubernetes/README.md) 18 | 19 | ### CH 2 20 | - [Prometheus Overview](prometheus-overview/README.md) 21 | 22 | ## Prometheus Ecosystem 23 | 24 | ### CH 3 25 | - [Service Discovery](service-discovery/README.md) 26 | 27 | ### CH 4 28 | - Grafana & Pushgateway 29 | 30 | ### CH 5 31 | - [AlertManager](alertmanager/README.md) 32 | 33 | ### CH 6 34 | - [Promethesus Operator](prometheus-operator/README.md) 35 | -------------------------------------------------------------------------------- /alertmanager/README.md: -------------------------------------------------------------------------------- 1 | # Alerting & Practical Cases 2 | 3 | Code for set up alert manager. 4 | -------------------------------------------------------------------------------- /alertmanager/alert_rule_1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-example-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: yourname.rules 12 | rules: 13 | - alert: YournameAlert 14 | expr: vector(1) 15 | -------------------------------------------------------------------------------- /alertmanager/alert_rule_2.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-example-rules2 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: nginx_rule 12 | rules: 13 | - alert: NGINXAlert 14 | expr: nginx_ingress_controller_nginx_process_requests_total > 1000 15 | annotations: 16 | description: '{{ $labels.instance }} of job {{ $labels.job }} has more the 1000 requests.' 17 | summary: 'Instance {{ $labels.job }} requests alert' 18 | -------------------------------------------------------------------------------- /alertmanager/alertmanager.yaml: -------------------------------------------------------------------------------- 1 | "global": 2 | "resolve_timeout": "5m" 3 | "receivers": 4 | - "name": "slack_alert1" 5 | "slack_configs": 6 | - "api_url": "https://hooks.slack.com/services/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" 7 | "channel": "#alert_1" 8 | - "name": "slack_alert2" 9 | "slack_configs": 10 | - "api_url": "https://hooks.slack.com/services/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" 11 | "channel": "#alert_2" 12 | "title": "{{ range .Alerts }}{{ .Annotations.summary }}\n{{ end }}" 13 | "text": "{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}" 14 | "route": 15 | "receiver": "slack_alert1" 16 | "group_interval": "1m" 17 | "group_wait": "30s" 18 | "repeat_interval": "3m" 19 | "group_by": 20 | - "job" 21 | "routes": 22 | - "match_re": 23 | "alertname": "^NGINX.*" 24 | "receiver": "slack_alert2" 25 | -------------------------------------------------------------------------------- /alertmanager/apply_change.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | kubectl -n monitoring delete secret alertmanager-main 3 | sleep 10 4 | kubectl -n monitoring create secret generic alertmanager-main --from-file=alertmanager.yaml 5 | -------------------------------------------------------------------------------- /alertmanager/helm_nginx_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo 'Installing nginx ans nginx exporter with helm' 3 | helm install --name nginx \ 4 | --set controller.stats.enabled=True \ 5 | --set controller.metrics.enabled=True \ 6 | --set controller.metrics.serviceMonitor.enabled=True \ 7 | --set controller.metrics.serviceMonitor.namespace=monitoring \ 8 | stable/nginx-ingress 9 | echo 'Nginx installed.' 10 | -------------------------------------------------------------------------------- /alertmanager/trigger_nginx_alert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | nginx_url=http://$(kubectl get svc nginx-nginx-ingress-controller -o jsonpath="{.status.loadBalancer.ingress[0].hostname}") 3 | 4 | echo 'curl nginx ELB 1000 times ...' 5 | for i in {1..1000} 6 | do 7 | curl -s ${nginx_url} > /dev/null 8 | done 9 | 10 | echo 'Done' 11 | -------------------------------------------------------------------------------- /head-first-kubernetes/README.md: -------------------------------------------------------------------------------- 1 | # Head First Kubernetes 2 | 3 | 4 | ### Demo Selector with kubectl 5 | - `$ kubectl get pod -n kube-system --show-labels` 6 | - `$ kubectl get pod -n kube-system --show-labels -l k8s-app=kube-dns` 7 | - `$ kubectl get pod -n kube-system --show-labels -l k8s-app=kube-dns -o wide` 8 | 9 | 10 | -------------------------------------------------------------------------------- /head-first-kubernetes/delete-random-pod-in-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | Count=$(kubectl get po -l app=hello -o go-template='{{len .items}}') 4 | Needle=$(( $RANDOM % $Count )) 5 | IFS='@' read -ra PodNames <<< "$(kubectl get po -l app=hello -o go-template='{{range .items}}{{ .metadata.name }}@{{ end }}')" 6 | PodNameForDelete=${PodNames[$Needle]} 7 | kubectl delete po $PodNameForDelete 8 | kubectl get po -------------------------------------------------------------------------------- /head-first-kubernetes/manifests/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: hello 5 | spec: 6 | replicas: 3 7 | template: 8 | metadata: 9 | labels: 10 | app: hello 11 | spec: 12 | containers: 13 | - name: app 14 | image: quay.io/owensengoku/prometheus-example-app:v0.1.0 15 | ports: 16 | - containerPort: 8080 17 | -------------------------------------------------------------------------------- /head-first-kubernetes/manifests/pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: busybox 5 | spec: 6 | containers: 7 | - name: busybox 8 | image: quay.io/owensengoku/busybox-curl:v0.1.0 9 | command: 10 | - "busybox" 11 | - "sh" 12 | - "-c" 13 | - "sleep 10000" -------------------------------------------------------------------------------- /head-first-kubernetes/manifests/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: hello 5 | labels: 6 | app: hello 7 | spec: 8 | ports: 9 | - name: http 10 | port: 8080 11 | targetPort: 8080 12 | selector: 13 | app: hello 14 | -------------------------------------------------------------------------------- /head-first-kubernetes/request.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | UrlPath=$1 3 | kubectl exec busybox -- curl --silent hello.default.svc:8080/$UrlPath 4 | -------------------------------------------------------------------------------- /install/README.md: -------------------------------------------------------------------------------- 1 | # Initial setting 2 | 3 | Setup AWS Cloud9, AWS EKS by using [eksctl][1] 4 | 5 | [1]: https://github.com/weaveworks/eksctl "github of eksctl" 6 | -------------------------------------------------------------------------------- /install/alias.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alias k='kubectl' 4 | alias kd='kubectl delete' 5 | alias ka='kubectl apply' 6 | alias km='kubectl -n monitoring' 7 | alias kma='kubectl -n monitoring apply' 8 | alias kmd='kubectl -n monitoring delete' 9 | -------------------------------------------------------------------------------- /install/create_expose_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | kubectl expose service grafana --port=80 --target-port=3000 --name=grafana-80 --type=LoadBalancer -n monitoring 3 | kubectl expose service prometheus-k8s --port=80 --target-port=9090 --name=prometheus-k8s-80 --type=LoadBalancer -n monitoring 4 | kubectl expose service my-pushgateway --port=80 --target-port=9091 --name=my-pushgateway-80 --type=LoadBalancer -n monitoring 5 | kubectl expose service alertmanager-main --port=80 --target-port=9093 --name=alertmanager-main-80 --type=LoadBalancer -n monitoring 6 | -------------------------------------------------------------------------------- /install/delete_expose_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kubectl delete service grafana-80 -n monitoring 4 | kubectl delete service prometheus-k8s-80 -n monitoring 5 | kubectl delete service my-pushgateway-80 -n monitoring 6 | kubectl delete service alertmanager-main-80 -n monitoring 7 | -------------------------------------------------------------------------------- /install/eks_cluster.yml: -------------------------------------------------------------------------------- 1 | apiVersion: eksctl.io/v1alpha5 2 | kind: ClusterConfig 3 | 4 | metadata: 5 | name: workshop 6 | region: us-west-2 7 | 8 | nodeGroups: 9 | - name: ng0 10 | instanceType: t3.large 11 | desiredCapacity: 2 12 | -------------------------------------------------------------------------------- /install/get_exposed_links.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "Grafana" 4 | echo "http://"$(kubectl get svc grafana-80 -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")"" 5 | 6 | echo "Prometheus" 7 | echo "http://"$(kubectl get svc prometheus-k8s-80 -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")"" 8 | 9 | echo "Pushgateway" 10 | echo "http://"$(kubectl get svc my-pushgateway-80 -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")"" 11 | 12 | echo "Alertmanager" 13 | echo "http://"$(kubectl get svc alertmanager-main-80 -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")"" 14 | -------------------------------------------------------------------------------- /install/get_links.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "Grafana" 4 | echo "http://"$(kubectl get svc grafana -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")":3000" 5 | 6 | echo "Prometheus" 7 | echo "http://"$(kubectl get svc prometheus-k8s -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")":9090" 8 | 9 | echo "Pushgateway" 10 | echo "http://"$(kubectl get svc my-pushgateway -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")":9091" 11 | 12 | echo "Alertmanager" 13 | echo "http://"$(kubectl get svc alertmanager-main -n monitoring -o jsonpath="{.status.loadBalancer.ingress[0].hostname}")":9093" 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/.gitignore: -------------------------------------------------------------------------------- 1 | tmp/ 2 | minikube-manifests/ 3 | vendor/ 4 | ./auth 5 | -------------------------------------------------------------------------------- /install/kube-prometheus/Makefile: -------------------------------------------------------------------------------- 1 | JSONNET_FMT := jsonnet fmt -n 2 --max-blank-lines 2 --string-style s --comment-style s 2 | 3 | JB_BINARY:=$(GOPATH)/bin/jb 4 | EMBEDMD_BINARY:=$(GOPATH)/bin/embedmd 5 | 6 | all: generate fmt test 7 | 8 | ../../hack/jsonnet-docker-image: ../../scripts/jsonnet/Dockerfile 9 | # Create empty target file, for the sole purpose of recording when this target 10 | # was last executed via the last-modification timestamp on the file. See 11 | # https://www.gnu.org/software/make/manual/make.html#Empty-Targets 12 | docker build -f - -t po-jsonnet . < ../../scripts/jsonnet/Dockerfile 13 | touch $@ 14 | 15 | generate-in-docker: ../../hack/jsonnet-docker-image 16 | @echo ">> Compiling assets and generating Kubernetes manifests" 17 | docker run \ 18 | --rm \ 19 | -u=$(shell id -u $(USER)):$(shell id -g $(USER)) \ 20 | -v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \ 21 | -v $(shell go env GOCACHE):/.cache/go-build \ 22 | --workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \ 23 | po-jsonnet make generate 24 | 25 | generate: manifests **.md 26 | 27 | **.md: $(EMBEDMD_BINARY) $(shell find examples) build.sh example.jsonnet 28 | $(EMBEDMD_BINARY) -w `find . -name "*.md" | grep -v vendor` 29 | 30 | manifests: vendor example.jsonnet build.sh 31 | rm -rf manifests 32 | ./build.sh ./examples/kustomize.jsonnet 33 | 34 | vendor: $(JB_BINARY) jsonnetfile.json jsonnetfile.lock.json 35 | rm -rf vendor 36 | $(JB_BINARY) install 37 | 38 | fmt: 39 | find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \ 40 | xargs -n 1 -- $(JSONNET_FMT) -i 41 | 42 | test: $(JB_BINARY) 43 | $(JB_BINARY) install 44 | ./test.sh 45 | 46 | test-e2e: 47 | go test -timeout 55m -v ./tests/e2e -count=1 48 | 49 | test-in-docker: ../../hack/jsonnet-docker-image 50 | @echo ">> Compiling assets and generating Kubernetes manifests" 51 | docker run \ 52 | --rm \ 53 | -u=$(shell id -u $(USER)):$(shell id -g $(USER)) \ 54 | -v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \ 55 | -v $(shell go env GOCACHE):/.cache/go-build \ 56 | --workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \ 57 | po-jsonnet make test 58 | 59 | $(JB_BINARY): 60 | go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb 61 | 62 | $(EMBEDMD_BINARY): 63 | go get github.com/campoy/embedmd 64 | 65 | .PHONY: generate generate-in-docker test test-in-docker fmt 66 | -------------------------------------------------------------------------------- /install/kube-prometheus/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files. 4 | 5 | set -e 6 | set -x 7 | # only exit with zero if all commands of the pipeline exit successfully 8 | set -o pipefail 9 | 10 | # Make sure to start with a clean 'manifests' dir 11 | rm -rf manifests 12 | mkdir manifests 13 | 14 | # optional, but we would like to generate yaml, not json 15 | jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {} 16 | 17 | -------------------------------------------------------------------------------- /install/kube-prometheus/docs/GKE-cadvisor-support.md: -------------------------------------------------------------------------------- 1 | # Kubelet / cAdvisor special configuration updates for GKE 2 | 3 | Prior to GKE 1.11, the kubelet does not support token 4 | authentication. Until it does, Prometheus must use HTTP (not HTTPS) 5 | for scraping. 6 | 7 | You can configure this behavior through kube-prometheus with: 8 | ``` 9 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + 10 | (import 'kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet') + 11 | { 12 | _config+:: { 13 | # ... config here 14 | } 15 | }; 16 | ``` 17 | 18 | Or, you can patch and re-apply your existing manifests with: 19 | 20 | On linux: 21 | 22 | ``` 23 | sed -i -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml 24 | ``` 25 | 26 | On MacOs: 27 | 28 | ``` 29 | sed -i '' -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml 30 | ``` 31 | 32 | After you have modified the yaml file please run 33 | 34 | ``` 35 | kubectl apply -f manifests/prometheus-serviceMonitorKubelet.yaml 36 | ``` 37 | -------------------------------------------------------------------------------- /install/kube-prometheus/docs/exposing-prometheus-alertmanager-grafana-ingress.md: -------------------------------------------------------------------------------- 1 | # Exposing Prometheus, Alertmanager and Grafana UIs via Ingress 2 | 3 | In order to access the web interfaces via the Internet [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) is a popular option. This guide explains, how Kubernetes Ingress can be setup, in order to expose the Prometheus, Alertmanager and Grafana UIs, that are included in the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) project. 4 | 5 | Note: before continuing, it is recommended to first get familiar with the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack by itself. 6 | 7 | ## Prerequisites 8 | 9 | Apart from a running Kubernetes cluster with a running [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack, a Kubernetes Ingress controller must be installed and functional. This guide was tested with the [nginx-ingress-controller](https://github.com/kubernetes/ingress-nginx). If you wish to reproduce the exact result in as depicted in this guide we recommend using the nginx-ingress-controller. 10 | 11 | ## Setting up Ingress 12 | 13 | The setup of Ingress objects is the same for Prometheus, Alertmanager and Grafana. Therefore this guides demonstrates it in detail for Prometheus as it can easily be adapted for the other applications. 14 | 15 | As monitoring data may contain sensitive data, this guide describes how to setup Ingress with basic auth as an example of minimal security. Of course this should be adapted to the preferred authentication mean of any particular organization, but we feel it is important to at least provide an example with a minimum of security. 16 | 17 | In order to setup basic auth, a secret with the `htpasswd` formatted file needs to be created. To do this, first install the [`htpasswd`](https://httpd.apache.org/docs/2.4/programs/htpasswd.html) tool. 18 | 19 | To create the `htpasswd` formatted file called `auth` run: 20 | 21 | ``` 22 | htpasswd -c auth 23 | ``` 24 | 25 | In order to use this a secret needs to be created containing the name of the `htpasswd`, and with annotations on the Ingress object basic auth can be configured. 26 | 27 | Also, the applications provide external links to themselves in alerts and various places. When an ingress is used in front of the applications these links need to be based on the external URL's. This can be configured for each application in jsonnet. 28 | 29 | ```jsonnet 30 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 31 | local secret = k.core.v1.secret; 32 | local ingress = k.extensions.v1beta1.ingress; 33 | local ingressTls = ingress.mixin.spec.tlsType; 34 | local ingressRule = ingress.mixin.spec.rulesType; 35 | local httpIngressPath = ingressRule.mixin.http.pathsType; 36 | 37 | local kp = 38 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 39 | { 40 | _config+:: { 41 | namespace: 'monitoring', 42 | }, 43 | prometheus+:: { 44 | prometheus+: { 45 | spec+: { 46 | externalUrl: 'http://prometheus.example.com', 47 | }, 48 | }, 49 | }, 50 | ingress+:: { 51 | 'prometheus-k8s': 52 | ingress.new() + 53 | ingress.mixin.metadata.withName($.prometheus.prometheus.metadata.name) + 54 | ingress.mixin.metadata.withNamespace($.prometheus.prometheus.metadata.namespace) + 55 | ingress.mixin.metadata.withAnnotations({ 56 | 'nginx.ingress.kubernetes.io/auth-type': 'basic', 57 | 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', 58 | 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', 59 | }) + 60 | ingress.mixin.spec.withRules( 61 | ingressRule.new() + 62 | ingressRule.withHost('prometheus.example.com') + 63 | ingressRule.mixin.http.withPaths( 64 | httpIngressPath.new() + 65 | httpIngressPath.mixin.backend.withServiceName($.prometheus.service.metadata.name) + 66 | httpIngressPath.mixin.backend.withServicePort('web') 67 | ), 68 | ), 69 | }, 70 | } + { 71 | ingress+:: { 72 | 'basic-auth-secret': 73 | secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) + 74 | secret.mixin.metadata.withNamespace($._config.namespace), 75 | }, 76 | }; 77 | 78 | k.core.v1.list.new([ 79 | kp.ingress['prometheus-k8s'], 80 | kp.ingress['basic-auth-secret'], 81 | ]) 82 | ``` 83 | 84 | In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL. 85 | 86 | In order to render the ingress objects similar to the other objects use as demonstrated in the [main readme](../README.md#usage): 87 | 88 | ``` 89 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 90 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 91 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 92 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 93 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 94 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 95 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + 96 | { ['ingress-' + name]: kp.ingress[name] for name in std.objectFields(kp.ingress) } 97 | ``` 98 | 99 | Note, that in comparison only the last line was added, the rest is identical to the original. 100 | 101 | See [ingress.jsonnet](../examples/ingress.jsonnet) for an example implementation. 102 | -------------------------------------------------------------------------------- /install/kube-prometheus/docs/monitoring-external-etcd.md: -------------------------------------------------------------------------------- 1 | # Monitoring external etcd 2 | This guide will help you monitor an external etcd cluster. When the etcd cluster is not hosted inside Kubernetes. 3 | This is often the case with Kubernetes setups. This approach has been tested with kube-aws but the same principals apply to other tools. 4 | 5 | Note that [etcd.jsonnet](../examples/etcd.jsonnet) & [kube-prometheus-static-etcd.libsonnet](../jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) (which are described by a section of the [Readme](../README.md#static-etcd-configuration)) do the following: 6 | * Put the three etcd TLS client files (CA & cert & key) into a secret in the namespace, and have Prometheus Operator load the secret. 7 | * Create the following (to expose etcd metrics - port 2379): a Service, Endpoint, & ServiceMonitor. 8 | 9 | # Step 1: Open the port 10 | 11 | You now need to allow the nodes Prometheus are running on to talk to the etcd on the port 2379 (if 2379 is the port used by etcd to expose the metrics) 12 | 13 | If using kube-aws, you will need to edit the etcd security group inbound, specifying the security group of your Kubernetes node (worker) as the source. 14 | 15 | ## kube-aws and EIP or ENI inconsistency 16 | With kube-aws, each etcd node has two IP addresses: 17 | 18 | * EC2 instance IP 19 | * EIP or ENI (depending on the chosen method in yuour cluster.yaml) 20 | 21 | For some reason, some etcd node answer to :2379/metrics on the intance IP (eth0), some others on the EIP|ENI address (eth1). See issue https://github.com/kubernetes-incubator/kube-aws/issues/923 22 | It would be of course much better if we could hit the EPI/ENI all the time as they don't change even if the underlying EC2 intance goes down. 23 | If specifying the Instance IP (eth0) in the Prometheus Operator ServiceMonitor, and the EC2 intance goes down, one would have to update the ServiceMonitor. 24 | 25 | Another idea woud be to use the DNS entries of etcd, but those are not currently supported for EndPoints objects in Kubernetes. 26 | 27 | # Step 2: verify 28 | 29 | Go to the Prometheus UI on :9090/config and check that you have an etcd job entry: 30 | ``` 31 | - job_name: monitoring/etcd-k8s/0 32 | scrape_interval: 30s 33 | scrape_timeout: 10s 34 | ... 35 | ``` 36 | 37 | On the :9090/targets page: 38 | * You should see "etcd" with the UP state. If not, check the Error column for more information. 39 | * If no "etcd" targets are even shown on this page, prometheus isn't attempting to scrape it. 40 | 41 | # Step 3: Grafana dashboard 42 | 43 | ## Find a dashboard you like 44 | 45 | Try to load this dashboard: 46 | https://grafana.com/dashboards/3070 47 | 48 | ## Save the dashboard in the configmap 49 | 50 | As documented here, [Developing Alerts and Dashboards](developing-prometheus-rules-and-grafana-dashboards.md), the Grafana instances are stateless. The dashboards are automatically re-loaded from the ConfigMap. 51 | So if you load a dashboard through the Grafana UI, it won't be kept unless saved in ConfigMap 52 | 53 | Read [the document](developing-prometheus-rules-and-grafana-dashboards.md), but in summary: 54 | 55 | ### Copy your dashboard: 56 | Once you are happy with the dashboard, export it and move it to `prometheus-operator/contrib/kube-prometheus/assets/grafana/` (ending in "-dashboard.json") 57 | 58 | ### Regenetate the grafana dashboard manifest: 59 | `hack/scripts/generate-dashboards-configmap.sh > manifests/grafana/grafana-dashboards.yaml` 60 | 61 | ### Reload the manifest in Kubernetes: 62 | ` kubectl -n monitoring replace -f manifests/grafana/grafana-dashboards.yaml` 63 | 64 | After a few minutes your dasboard will be available permanently to all Grafana instances 65 | -------------------------------------------------------------------------------- /install/kube-prometheus/docs/monitoring-other-namespaces.md: -------------------------------------------------------------------------------- 1 | # Monitoring other Kubernetes Namespaces 2 | This guide will help you monitor applications in other Namespaces. By default the RBAC rules are only enabled for the `Default` and `kube-system` Namespace during Install. 3 | 4 | # Setup 5 | You have to give the list of the Namespaces that you want to be able to monitor. 6 | This is done in the variable `prometheus.roleSpecificNamespaces`. You usually set this in your `.jsonnet` file when building the manifests. 7 | 8 | Example to create the needed `Role` and `Rolebindig` for the Namespace `foo` : 9 | ``` 10 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 11 | _config+:: { 12 | namespace: 'monitoring', 13 | 14 | prometheus+:: { 15 | namespaces: ["default", "kube-system","foo"], 16 | }, 17 | }, 18 | }; 19 | 20 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 21 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 22 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 23 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 24 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 25 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 26 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 27 | 28 | ``` 29 | -------------------------------------------------------------------------------- /install/kube-prometheus/example.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = 2 | (import 'kube-prometheus/kube-prometheus.libsonnet') + { 3 | _config+:: { 4 | namespace: 'monitoring', 5 | }, 6 | }; 7 | 8 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 9 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 10 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 11 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 12 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 13 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 14 | { ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + 15 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 16 | 17 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/additional-namespaces.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | _config+:: { 3 | namespace: 'monitoring', 4 | 5 | prometheus+:: { 6 | namespaces+: ['my-namespace', 'my-second-namespace'], 7 | }, 8 | }, 9 | }; 10 | 11 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 12 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 13 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 14 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 15 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 16 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 17 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 18 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/alertmanager-config-external.jsonnet: -------------------------------------------------------------------------------- 1 | ((import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | _config+:: { 3 | alertmanager+: { 4 | config: importstr 'alertmanager-config.yaml', 5 | }, 6 | }, 7 | }).alertmanager.secret 8 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/alertmanager-config.jsonnet: -------------------------------------------------------------------------------- 1 | ((import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | _config+:: { 3 | alertmanager+: { 4 | config: ||| 5 | global: 6 | resolve_timeout: 10m 7 | route: 8 | group_by: ['job'] 9 | group_wait: 30s 10 | group_interval: 5m 11 | repeat_interval: 12h 12 | receiver: 'null' 13 | routes: 14 | - match: 15 | alertname: Watchdog 16 | receiver: 'null' 17 | receivers: 18 | - name: 'null' 19 | |||, 20 | }, 21 | }, 22 | }).alertmanager.secret 23 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/alertmanager-config.yaml: -------------------------------------------------------------------------------- 1 | # external alertmanager yaml 2 | global: 3 | resolve_timeout: 10m 4 | route: 5 | group_by: ['job'] 6 | group_wait: 30s 7 | group_interval: 5m 8 | repeat_interval: 12h 9 | receiver: 'null' 10 | routes: 11 | - match: 12 | alertname: Watchdog 13 | receiver: 'null' 14 | receivers: 15 | - name: 'null' 16 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/auth: -------------------------------------------------------------------------------- 1 | # This file should not ever be used, it's just a mock. 2 | dontusethis:$apr1$heg6VIp7$1PSzJ/Z6fYboQ5pYrbgSy. 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/basic-auth/secrets.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: basic-auth 5 | data: 6 | password: dG9vcg== # toor 7 | user: YWRtaW4= # admin 8 | type: Opaque -------------------------------------------------------------------------------- /install/kube-prometheus/examples/basic-auth/service-monitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-apps: basic-auth-example 6 | name: basic-auth-example 7 | spec: 8 | endpoints: 9 | - basicAuth: 10 | password: 11 | name: basic-auth 12 | key: password 13 | username: 14 | name: basic-auth 15 | key: user 16 | port: metrics 17 | namespaceSelector: 18 | matchNames: 19 | - logging 20 | selector: 21 | matchLabels: 22 | app: myapp -------------------------------------------------------------------------------- /install/kube-prometheus/examples/etcd-client-ca.crt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Taipei-HUG/Prometheus-workshop/ae4c2c8e7666c408532c7618e537742aee8c88f6/install/kube-prometheus/examples/etcd-client-ca.crt -------------------------------------------------------------------------------- /install/kube-prometheus/examples/etcd-client.crt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Taipei-HUG/Prometheus-workshop/ae4c2c8e7666c408532c7618e537742aee8c88f6/install/kube-prometheus/examples/etcd-client.crt -------------------------------------------------------------------------------- /install/kube-prometheus/examples/etcd-client.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Taipei-HUG/Prometheus-workshop/ae4c2c8e7666c408532c7618e537742aee8c88f6/install/kube-prometheus/examples/etcd-client.key -------------------------------------------------------------------------------- /install/kube-prometheus/examples/etcd-skip-verify.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + { 3 | _config+:: { 4 | namespace: 'monitoring', 5 | 6 | etcd+:: { 7 | ips: ['127.0.0.1'], 8 | clientCA: importstr 'etcd-client-ca.crt', 9 | clientKey: importstr 'etcd-client.key', 10 | clientCert: importstr 'etcd-client.crt', 11 | insecureSkipVerify: true, 12 | }, 13 | }, 14 | }; 15 | 16 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 17 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 18 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 19 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 20 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 21 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 22 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 23 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/etcd.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + { 3 | _config+:: { 4 | namespace: 'monitoring', 5 | 6 | // Reference info: https://github.com/coreos/prometheus-operator/blob/master/contrib/kube-prometheus/README.md#static-etcd-configuration 7 | etcd+:: { 8 | // Configure this to be the IP(s) to scrape - i.e. your etcd node(s) (use commas to separate multiple values). 9 | ips: ['127.0.0.1'], 10 | 11 | // Reference info: 12 | // * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitorspec (has endpoints) 13 | // * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#endpoint (has tlsConfig) 14 | // * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig (has: caFile, certFile, keyFile, serverName, & insecureSkipVerify) 15 | 16 | // Set these three variables to the fully qualified directory path on your work machine to the certificate files that are valid to scrape etcd metrics with (check the apiserver container). 17 | // Most likely these certificates are generated somewhere in an infrastructure repository, so using the jsonnet `importstr` function can 18 | // be useful here. (Kube-aws stores these three files inside the credential folder.) 19 | // All the sensitive information on the certificates will end up in a Kubernetes Secret. 20 | clientCA: importstr 'etcd-client-ca.crt', 21 | clientKey: importstr 'etcd-client.key', 22 | clientCert: importstr 'etcd-client.crt', 23 | 24 | // Note that you should specify a value EITHER for 'serverName' OR for 'insecureSkipVerify'. (Don't specify a value for both of them, and don't specify a value for neither of them.) 25 | // * Specifying serverName: Ideally you should provide a valid value for serverName (and then insecureSkipVerify should be left as false - so that serverName gets used). 26 | // * Specifying insecureSkipVerify: insecureSkipVerify is only to be used (i.e. set to true) if you cannot (based on how your etcd certificates were created) use a Subject Alternative Name. 27 | // * If you specify a value: 28 | // ** for both of these variables: When 'insecureSkipVerify: true' is specified, then also specifying a value for serverName won't hurt anything but it will be ignored. 29 | // ** for neither of these variables: then you'll get authentication errors on the prom '/targets' page with your etcd targets. 30 | 31 | // A valid name (DNS or Subject Alternative Name) that the client (i.e. prometheus) will use to verify the etcd TLS certificate. 32 | // * Note that doing `nslookup etcd.kube-system.svc.cluster.local` (on a pod in a K8s cluster where kube-prometheus has been installed) shows that kube-prometheus sets up this hostname. 33 | // * `openssl x509 -noout -text -in etcd-client.pem` will print the Subject Alternative Names. 34 | serverName: 'etcd.kube-system.svc.cluster.local', 35 | 36 | // When insecureSkipVerify isn't specified, the default value is "false". 37 | //insecureSkipVerify: true, 38 | 39 | // In case you have generated the etcd certificate with kube-aws: 40 | // * If you only have one etcd node, you can use the value from 'etcd.internalDomainName' (specified in your kube-aws cluster.yaml) as the value for 'serverName'. 41 | // * But if you have multiple etcd nodes, you will need to use 'insecureSkipVerify: true' (if using default certificate generators method), as the valid certificate domain 42 | // will be different for each etcd node. (kube-aws default certificates are not valid against the IP - they were created for the DNS.) 43 | }, 44 | }, 45 | }; 46 | 47 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 48 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 49 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 50 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 51 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 52 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 53 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 54 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/example-app.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: example-app 5 | labels: 6 | tier: frontend 7 | namespace: default 8 | spec: 9 | selector: 10 | app: example-app 11 | ports: 12 | - name: web 13 | protocol: TCP 14 | port: 8080 15 | targetPort: web 16 | --- 17 | apiVersion: extensions/v1beta1 18 | kind: Deployment 19 | metadata: 20 | name: example-app 21 | namespace: default 22 | spec: 23 | replicas: 4 24 | template: 25 | metadata: 26 | labels: 27 | app: example-app 28 | version: 1.1.3 29 | spec: 30 | containers: 31 | - name: example-app 32 | image: quay.io/fabxc/prometheus_demo_service 33 | ports: 34 | - name: web 35 | containerPort: 8080 36 | protocol: TCP 37 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/prometheus-frontend-alertmanager-discovery-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: RoleBinding 3 | metadata: 4 | name: prometheus-frontend 5 | namespace: monitoring 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: alertmanager-discovery 10 | subjects: 11 | - kind: ServiceAccount 12 | name: prometheus-frontend 13 | namespace: default 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/prometheus-frontend-alertmanager-discovery-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: Role 3 | metadata: 4 | name: alertmanager-discovery 5 | namespace: monitoring 6 | rules: 7 | - apiGroups: [""] 8 | resources: 9 | - services 10 | - endpoints 11 | - pods 12 | verbs: ["list", "watch"] 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/prometheus-frontend-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: RoleBinding 3 | metadata: 4 | name: prometheus-frontend 5 | namespace: default 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: prometheus-frontend 10 | subjects: 11 | - kind: ServiceAccount 12 | name: prometheus-frontend 13 | namespace: default 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/prometheus-frontend-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: Role 3 | metadata: 4 | name: prometheus-frontend 5 | namespace: default 6 | rules: 7 | - apiGroups: [""] 8 | resources: 9 | - nodes 10 | - services 11 | - endpoints 12 | - pods 13 | verbs: ["get", "list", "watch"] 14 | - apiGroups: [""] 15 | resources: 16 | - configmaps 17 | verbs: ["get"] 18 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/prometheus-frontend-service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-frontend 5 | namespace: default 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/prometheus-frontend-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: prometheus-frontend 5 | namespace: default 6 | spec: 7 | type: NodePort 8 | ports: 9 | - name: web 10 | nodePort: 30100 11 | port: 9090 12 | protocol: TCP 13 | targetPort: web 14 | selector: 15 | prometheus: frontend 16 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/prometheus-frontend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: Prometheus 3 | metadata: 4 | name: frontend 5 | namespace: default 6 | labels: 7 | prometheus: frontend 8 | spec: 9 | serviceAccountName: prometheus-frontend 10 | version: v1.7.1 11 | serviceMonitorSelector: 12 | matchLabels: 13 | tier: frontend 14 | resources: 15 | requests: 16 | # 2Gi is default, but won't schedule if you don't have a node with >2Gi 17 | # memory. Modify based on your target and time-series count for 18 | # production use. This value is mainly meant for demonstration/testing 19 | # purposes. 20 | memory: 400Mi 21 | alerting: 22 | alertmanagers: 23 | - namespace: monitoring 24 | name: alertmanager-main 25 | port: web 26 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-app/servicemonitor-frontend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: frontend 5 | namespace: default 6 | labels: 7 | tier: frontend 8 | spec: 9 | selector: 10 | matchLabels: 11 | tier: frontend 12 | targetLabels: 13 | - tier 14 | endpoints: 15 | - port: web 16 | interval: 10s 17 | namespaceSelector: 18 | matchNames: 19 | - default 20 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/example-grafana-dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | 5 | ] 6 | }, 7 | "editable": false, 8 | "gnetid": null, 9 | "graphtooltip": 0, 10 | "hidecontrols": false, 11 | "id": null, 12 | "links": [ 13 | 14 | ], 15 | "refresh": "", 16 | "rows": [ 17 | { 18 | "collapse": false, 19 | "collapsed": false, 20 | "height": "250px", 21 | "panels": [ 22 | { 23 | "aliascolors": { 24 | 25 | }, 26 | "bars": false, 27 | "dashlength": 10, 28 | "dashes": false, 29 | "datasource": "$datasource", 30 | "fill": 1, 31 | "gridpos": { 32 | 33 | }, 34 | "id": 2, 35 | "legend": { 36 | "alignastable": false, 37 | "avg": false, 38 | "current": false, 39 | "max": false, 40 | "min": false, 41 | "rightside": false, 42 | "show": true, 43 | "total": false, 44 | "values": false 45 | }, 46 | "lines": true, 47 | "linewidth": 1, 48 | "nullpointmode": "null", 49 | "percentage": false, 50 | "pointradius": 5, 51 | "points": false, 52 | "renderer": "flot", 53 | "repeat": null, 54 | "seriesoverrides": [ 55 | 56 | ], 57 | "spacelength": 10, 58 | "span": 6, 59 | "stack": false, 60 | "steppedline": false, 61 | "targets": [ 62 | { 63 | "expr": "vector(1)", 64 | "format": "time_series", 65 | "intervalfactor": 2, 66 | "legendformat": "", 67 | "refid": "a" 68 | } 69 | ], 70 | "thresholds": [ 71 | 72 | ], 73 | "timefrom": null, 74 | "timeshift": null, 75 | "title": "my panel", 76 | "tooltip": { 77 | "shared": true, 78 | "sort": 0, 79 | "value_type": "individual" 80 | }, 81 | "type": "graph", 82 | "xaxis": { 83 | "buckets": null, 84 | "mode": "time", 85 | "name": null, 86 | "show": true, 87 | "values": [ 88 | 89 | ] 90 | }, 91 | "yaxes": [ 92 | { 93 | "format": "short", 94 | "label": null, 95 | "logbase": 1, 96 | "max": null, 97 | "min": null, 98 | "show": true 99 | }, 100 | { 101 | "format": "short", 102 | "label": null, 103 | "logbase": 1, 104 | "max": null, 105 | "min": null, 106 | "show": true 107 | } 108 | ] 109 | } 110 | ], 111 | "repeat": null, 112 | "repeatiteration": null, 113 | "repeatrowid": null, 114 | "showtitle": false, 115 | "title": "dashboard row", 116 | "titlesize": "h6", 117 | "type": "row" 118 | } 119 | ], 120 | "schemaversion": 14, 121 | "style": "dark", 122 | "tags": [ 123 | 124 | ], 125 | "templating": { 126 | "list": [ 127 | { 128 | "current": { 129 | "text": "prometheus", 130 | "value": "prometheus" 131 | }, 132 | "hide": 0, 133 | "label": null, 134 | "name": "datasource", 135 | "options": [ 136 | 137 | ], 138 | "query": "prometheus", 139 | "refresh": 1, 140 | "regex": "", 141 | "type": "datasource" 142 | } 143 | ] 144 | }, 145 | "time": { 146 | "from": "now-6h", 147 | "to": "now" 148 | }, 149 | "timepicker": { 150 | "refresh_intervals": [ 151 | "5s", 152 | "10s", 153 | "30s", 154 | "1m", 155 | "5m", 156 | "15m", 157 | "30m", 158 | "1h", 159 | "2h", 160 | "1d" 161 | ], 162 | "time_options": [ 163 | "5m", 164 | "15m", 165 | "1h", 166 | "6h", 167 | "12h", 168 | "24h", 169 | "2d", 170 | "7d", 171 | "30d" 172 | ] 173 | }, 174 | "timezone": "browser", 175 | "title": "my dashboard", 176 | "version": 0 177 | } 178 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/existingrule.json: -------------------------------------------------------------------------------- 1 | {"groups":[{"name":"example-group","rules":[{"alert":"Watchdog","annotations":{"description":"This is a Watchdog meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]} -------------------------------------------------------------------------------- /install/kube-prometheus/examples/existingrule.yaml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: example-group 3 | rules: 4 | - alert: Watchdog 5 | expr: vector(1) 6 | labels: 7 | severity: "none" 8 | annotations: 9 | description: This is a Watchdog meant to ensure that the entire alerting pipeline is functional. 10 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/grafana-additional-jsonnet-dashboard-example.jsonnet: -------------------------------------------------------------------------------- 1 | local grafana = import 'grafonnet/grafana.libsonnet'; 2 | local dashboard = grafana.dashboard; 3 | local row = grafana.row; 4 | local prometheus = grafana.prometheus; 5 | local template = grafana.template; 6 | local graphPanel = grafana.graphPanel; 7 | 8 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 9 | _config+:: { 10 | namespace: 'monitoring', 11 | }, 12 | grafanaDashboards+:: { 13 | 'my-dashboard.json': 14 | dashboard.new('My Dashboard') 15 | .addTemplate( 16 | { 17 | current: { 18 | text: 'Prometheus', 19 | value: 'Prometheus', 20 | }, 21 | hide: 0, 22 | label: null, 23 | name: 'datasource', 24 | options: [], 25 | query: 'prometheus', 26 | refresh: 1, 27 | regex: '', 28 | type: 'datasource', 29 | }, 30 | ) 31 | .addRow( 32 | row.new() 33 | .addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource') 34 | .addTarget(prometheus.target('vector(1)'))) 35 | ), 36 | }, 37 | }; 38 | 39 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 40 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 41 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 42 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 43 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 44 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 45 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 46 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/grafana-additional-rendered-dashboard-example.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | _config+:: { 3 | namespace: 'monitoring', 4 | }, 5 | grafanaDashboards+:: { 6 | 'my-dashboard.json': (import 'example-grafana-dashboard.json'), 7 | }, 8 | }; 9 | 10 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 11 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 12 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 13 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 14 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 15 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 16 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 17 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/ingress.jsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local secret = k.core.v1.secret; 3 | local ingress = k.extensions.v1beta1.ingress; 4 | local ingressTls = ingress.mixin.spec.tlsType; 5 | local ingressRule = ingress.mixin.spec.rulesType; 6 | local httpIngressPath = ingressRule.mixin.http.pathsType; 7 | 8 | local kp = 9 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 10 | { 11 | _config+:: { 12 | namespace: 'monitoring', 13 | }, 14 | // Configure External URL's per application 15 | alertmanager+:: { 16 | alertmanager+: { 17 | spec+: { 18 | externalUrl: 'http://alertmanager.example.com', 19 | }, 20 | }, 21 | }, 22 | grafana+:: { 23 | config+: { 24 | sections+: { 25 | server+: { 26 | root_url: 'http://grafana.example.com/', 27 | }, 28 | }, 29 | }, 30 | }, 31 | prometheus+:: { 32 | prometheus+: { 33 | spec+: { 34 | externalUrl: 'http://prometheus.example.com', 35 | }, 36 | }, 37 | }, 38 | // Create ingress objects per application 39 | ingress+:: { 40 | 'alertmanager-main': 41 | ingress.new() + 42 | ingress.mixin.metadata.withName('alertmanager-main') + 43 | ingress.mixin.metadata.withNamespace($._config.namespace) + 44 | ingress.mixin.metadata.withAnnotations({ 45 | 'nginx.ingress.kubernetes.io/auth-type': 'basic', 46 | 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', 47 | 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', 48 | }) + 49 | ingress.mixin.spec.withRules( 50 | ingressRule.new() + 51 | ingressRule.withHost('alertmanager.example.com') + 52 | ingressRule.mixin.http.withPaths( 53 | httpIngressPath.new() + 54 | httpIngressPath.mixin.backend.withServiceName('alertmanager-main') + 55 | httpIngressPath.mixin.backend.withServicePort('web') 56 | ), 57 | ), 58 | grafana: 59 | ingress.new() + 60 | ingress.mixin.metadata.withName('grafana') + 61 | ingress.mixin.metadata.withNamespace($._config.namespace) + 62 | ingress.mixin.metadata.withAnnotations({ 63 | 'nginx.ingress.kubernetes.io/auth-type': 'basic', 64 | 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', 65 | 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', 66 | }) + 67 | ingress.mixin.spec.withRules( 68 | ingressRule.new() + 69 | ingressRule.withHost('grafana.example.com') + 70 | ingressRule.mixin.http.withPaths( 71 | httpIngressPath.new() + 72 | httpIngressPath.mixin.backend.withServiceName('grafana') + 73 | httpIngressPath.mixin.backend.withServicePort('http') 74 | ), 75 | ), 76 | 'prometheus-k8s': 77 | ingress.new() + 78 | ingress.mixin.metadata.withName('prometheus-k8s') + 79 | ingress.mixin.metadata.withNamespace($._config.namespace) + 80 | ingress.mixin.metadata.withAnnotations({ 81 | 'nginx.ingress.kubernetes.io/auth-type': 'basic', 82 | 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', 83 | 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', 84 | }) + 85 | ingress.mixin.spec.withRules( 86 | ingressRule.new() + 87 | ingressRule.withHost('prometheus.example.com') + 88 | ingressRule.mixin.http.withPaths( 89 | httpIngressPath.new() + 90 | httpIngressPath.mixin.backend.withServiceName('prometheus-k8s') + 91 | httpIngressPath.mixin.backend.withServicePort('web') 92 | ), 93 | ), 94 | }, 95 | } + { 96 | // Create basic auth secret - replace 'auth' file with your own 97 | ingress+:: { 98 | 'basic-auth-secret': 99 | secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) + 100 | secret.mixin.metadata.withNamespace($._config.namespace), 101 | }, 102 | }; 103 | 104 | { [name + '-ingress']: kp.ingress[name] for name in std.objectFields(kp.ingress) } 105 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/internal-registry.jsonnet: -------------------------------------------------------------------------------- 1 | local mixin = import 'kube-prometheus/kube-prometheus-config-mixins.libsonnet'; 2 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 3 | _config+:: { 4 | namespace: 'monitoring', 5 | }, 6 | } + mixin.withImageRepository('internal-registry.com/organization'); 7 | 8 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 9 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 10 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 11 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 12 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 13 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 14 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 15 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-build-snippet/build-snippet.jsonnet: -------------------------------------------------------------------------------- 1 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 2 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 3 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 4 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 5 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 6 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 7 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 8 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-snippets/bootkube.jsonnet: -------------------------------------------------------------------------------- 1 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-bootkube.libsonnet') 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-snippets/kops-coredns.jsonnet: -------------------------------------------------------------------------------- 1 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-kops.libsonnet') + 3 | (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet') 4 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-snippets/kops.jsonnet: -------------------------------------------------------------------------------- 1 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-kops.libsonnet') 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-snippets/kube-aws.jsonnet: -------------------------------------------------------------------------------- 1 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-kube-aws.libsonnet') 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-snippets/kubeadm.jsonnet: -------------------------------------------------------------------------------- 1 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-snippets/kubespray.jsonnet: -------------------------------------------------------------------------------- 1 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-kubespray.libsonnet') 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/jsonnet-snippets/node-ports.jsonnet: -------------------------------------------------------------------------------- 1 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 2 | (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/ksonnet-example.jsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local daemonset = k.apps.v1beta2.daemonSet; 3 | 4 | ((import 'kube-prometheus/kube-prometheus.libsonnet') + { 5 | nodeExporter+: { 6 | daemonset+: 7 | daemonset.mixin.metadata.withNamespace('my-custom-namespace'), 8 | }, 9 | }).nodeExporter.daemonset 10 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/kustomize.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = 2 | (import 'kube-prometheus/kube-prometheus.libsonnet') + { 3 | _config+:: { 4 | namespace: 'monitoring', 5 | }, 6 | }; 7 | 8 | local manifests = 9 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 10 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 11 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 12 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 13 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 14 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 15 | { ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + 16 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }; 17 | 18 | local kustomizationResourceFile(name) = './manifests/' + name + '.yaml'; 19 | local kustomization = { 20 | apiVersion: 'kustomize.config.k8s.io/v1beta1', 21 | kind: 'Kustomization', 22 | resources: std.map(kustomizationResourceFile, std.objectFields(manifests)), 23 | }; 24 | 25 | manifests { 26 | '../kustomization': kustomization, 27 | } 28 | 29 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/minikube.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = 2 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 3 | (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') + 4 | // Note that NodePort type services is likely not a good idea for your production use case, it is only used for demonstration purposes here. 5 | (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') + 6 | { 7 | _config+:: { 8 | namespace: 'monitoring', 9 | alertmanager+:: { 10 | config: importstr 'alertmanager-config.yaml', 11 | }, 12 | grafana+:: { 13 | config: { // http://docs.grafana.org/installation/configuration/ 14 | sections: { 15 | // Do not require grafana users to login/authenticate 16 | 'auth.anonymous': { enabled: true }, 17 | }, 18 | }, 19 | }, 20 | }, 21 | 22 | // For simplicity, each of the following values for 'externalUrl': 23 | // * assume that `minikube ip` prints "192.168.99.100" 24 | // * hard-code the NodePort for each app 25 | prometheus+:: { 26 | prometheus+: { 27 | // Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec 28 | spec+: { 29 | // An e.g. of the purpose of this is so the "Source" links on http:///#/alerts are valid. 30 | externalUrl: 'http://192.168.99.100:30900', 31 | 32 | // Reference info: "external_labels" on https://prometheus.io/docs/prometheus/latest/configuration/configuration/ 33 | externalLabels: { 34 | // This 'cluster' label will be included on every firing prometheus alert. (This is more useful 35 | // when running multiple clusters in a shared environment (e.g. AWS) with other users.) 36 | cluster: 'minikube-', 37 | }, 38 | }, 39 | }, 40 | }, 41 | alertmanager+:: { 42 | alertmanager+: { 43 | // Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec 44 | spec+: { 45 | externalUrl: 'http://192.168.99.100:30903', 46 | 47 | logLevel: 'debug', // So firing alerts show up in log 48 | }, 49 | }, 50 | }, 51 | }; 52 | 53 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 54 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 55 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 56 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 57 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 58 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 59 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 60 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/prometheus-additional-alert-rule-example.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | _config+:: { 3 | namespace: 'monitoring', 4 | }, 5 | prometheusAlerts+:: { 6 | groups+: [ 7 | { 8 | name: 'example-group', 9 | rules: [ 10 | { 11 | alert: 'Watchdog', 12 | expr: 'vector(1)', 13 | labels: { 14 | severity: 'none', 15 | }, 16 | annotations: { 17 | description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.', 18 | }, 19 | }, 20 | ], 21 | }, 22 | ], 23 | }, 24 | }; 25 | 26 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 27 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 28 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 29 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 30 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 31 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 32 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 33 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/prometheus-additional-recording-rule-example.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | _config+:: { 3 | namespace: 'monitoring', 4 | }, 5 | prometheusRules+:: { 6 | groups+: [ 7 | { 8 | name: 'example-group', 9 | rules: [ 10 | { 11 | record: 'some_recording_rule_name', 12 | expr: 'vector(1)', 13 | }, 14 | ], 15 | }, 16 | ], 17 | }, 18 | }; 19 | 20 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 21 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 22 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 23 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 24 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 25 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 26 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 27 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/prometheus-additional-rendered-rule-example.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | prometheusAlerts+:: (import 'existingrule.json'), 3 | }; 4 | 5 | { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + 6 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 7 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 8 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 9 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 10 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 11 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 12 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/prometheus-name-override.jsonnet: -------------------------------------------------------------------------------- 1 | ((import 'kube-prometheus/kube-prometheus.libsonnet') + { 2 | prometheus+: { 3 | prometheus+: { 4 | metadata+: { 5 | name: 'my-name', 6 | }, 7 | }, 8 | }, 9 | }).prometheus.prometheus 10 | -------------------------------------------------------------------------------- /install/kube-prometheus/examples/prometheus-pvc.jsonnet: -------------------------------------------------------------------------------- 1 | // Reference info: documentation for https://github.com/ksonnet/ksonnet-lib can be found at http://g.bryan.dev.hepti.center 2 | // 3 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; // https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k.libsonnet - imports k8s.libsonnet 4 | // * https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k8s.libsonnet defines things such as "persistentVolumeClaim:: {" 5 | // 6 | local pvc = k.core.v1.persistentVolumeClaim; // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#persistentvolumeclaim-v1-core (defines variable named 'spec' of type 'PersistentVolumeClaimSpec') 7 | 8 | local kp = 9 | (import 'kube-prometheus/kube-prometheus.libsonnet') + 10 | (import 'kube-prometheus/kube-prometheus-bootkube.libsonnet') + 11 | { 12 | _config+:: { 13 | namespace: 'monitoring', 14 | }, 15 | 16 | prometheus+:: { 17 | prometheus+: { 18 | spec+: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec 19 | // If a value isn't specified for 'retention', then by default the '--storage.tsdb.retention=24h' arg will be passed to prometheus by prometheus-operator. 20 | // The possible values for a prometheus are: 21 | // * https://github.com/prometheus/common/blob/c7de230/model/time.go#L178 specifies "^([0-9]+)(y|w|d|h|m|s|ms)$" (years weeks days hours minutes seconds milliseconds) 22 | retention: '30d', 23 | 24 | // Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md 25 | // By default (if the following 'storage.volumeClaimTemplate' isn't created), prometheus will be created with an EmptyDir for the 'prometheus-k8s-db' volume (for the prom tsdb). 26 | // This 'storage.volumeClaimTemplate' causes the following to be automatically created (via dynamic provisioning) for each prometheus pod: 27 | // * PersistentVolumeClaim (and a corresponding PersistentVolume) 28 | // * the actual volume (per the StorageClassName specified below) 29 | storage: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#storagespec 30 | volumeClaimTemplate: // (same link as above where the 'pvc' variable is defined) 31 | pvc.new() + // http://g.bryan.dev.hepti.center/core/v1/persistentVolumeClaim/#core.v1.persistentVolumeClaim.new 32 | 33 | pvc.mixin.spec.withAccessModes('ReadWriteOnce') + 34 | 35 | // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#resourcerequirements-v1-core (defines 'requests'), 36 | // and https://kubernetes.io/docs/concepts/policy/resource-quotas/#storage-resource-quota (defines 'requests.storage') 37 | pvc.mixin.spec.resources.withRequests({ storage: '100Gi' }) + 38 | 39 | // A StorageClass of the following name (which can be seen via `kubectl get storageclass` from a node in the given K8s cluster) must exist prior to kube-prometheus being deployed. 40 | pvc.mixin.spec.withStorageClassName('ssd'), 41 | 42 | // The following 'selector' is only needed if you're using manual storage provisioning (https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md#manual-storage-provisioning). 43 | // And note that this is not supported/allowed by AWS - uncommenting the following 'selector' line (when deploying kube-prometheus to a K8s cluster in AWS) will cause the pvc to be stuck in the Pending status and have the following error: 44 | // * 'Failed to provision volume with StorageClass "ssd": claim.Spec.Selector is not supported for dynamic provisioning on AWS' 45 | //pvc.mixin.spec.selector.withMatchLabels({}), 46 | }, // storage 47 | }, // spec 48 | }, // prometheus 49 | }, // prometheus 50 | 51 | }; 52 | 53 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 54 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 55 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 56 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 57 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 58 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 59 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/.gitignore: -------------------------------------------------------------------------------- 1 | apiserver-key.pem 2 | apiserver.csr 3 | apiserver.pem 4 | metrics-ca-config.json 5 | metrics-ca.crt 6 | metrics-ca.key 7 | cm-adapter-serving-certs.yaml 8 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/README.md: -------------------------------------------------------------------------------- 1 | # Custom Metrics API 2 | 3 | The custom metrics API allows the HPA v2 to scale based on arbirary metrics. 4 | 5 | This directory contains an example deployment which extends the Prometheus Adapter, deployed with kube-prometheus, serve the [Custom Metrics API](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/instrumentation/custom-metrics-api.md) by talking to Prometheus running inside the cluster. 6 | 7 | Make sure you have the Prometheus Adapter up and running in the `monitoring` namespace. 8 | 9 | You can deploy everything in the `monitoring` namespace using `./deploy.sh`. 10 | 11 | When you're done, you can teardown using the `./teardown.sh` script. 12 | 13 | ### Sample App 14 | 15 | Additionally, this directory contains a sample app that uses the [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) to scale the Deployment's replicas of Pods up and down as needed. 16 | Deploy this app by running `kubectl apply -f sample-app.yaml`. 17 | Make the app accessible on your system, for example by using `kubectl port-forward svc/sample-app 8080`. Next you need to put some load on its http endpoints. 18 | 19 | A tool like [hey](https://github.com/rakyll/hey) is helpful for doing so: `hey -c 20 -n 100000000 http://localhost:8080/metrics` 20 | 21 | There is an even more detailed information on this sample app at [luxas/kubeadm-workshop](https://github.com/luxas/kubeadm-workshop#deploying-the-prometheus-operator-for-monitoring-services-in-the-cluster). 22 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: custom-metrics-server-resources 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: custom-metrics-server-resources 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-adapter 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/custom-metrics-apiservice.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiregistration.k8s.io/v1beta1 2 | kind: APIService 3 | metadata: 4 | name: v1beta1.custom.metrics.k8s.io 5 | spec: 6 | service: 7 | name: prometheus-adapter 8 | namespace: monitoring 9 | group: custom.metrics.k8s.io 10 | version: v1beta1 11 | insecureSkipTLSVerify: true 12 | groupPriorityMinimum: 100 13 | versionPriority: 100 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/custom-metrics-cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRole 3 | metadata: 4 | name: custom-metrics-server-resources 5 | rules: 6 | - apiGroups: 7 | - custom.metrics.k8s.io 8 | resources: ["*"] 9 | verbs: ["*"] 10 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/custom-metrics-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: adapter-config 5 | namespace: monitoring 6 | data: 7 | config.yaml: | 8 | rules: 9 | - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' 10 | seriesFilters: [] 11 | resources: 12 | overrides: 13 | namespace: 14 | resource: namespace 15 | pod_name: 16 | resource: pod 17 | name: 18 | matches: ^container_(.*)_seconds_total$ 19 | as: "" 20 | metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>) 21 | - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' 22 | seriesFilters: 23 | - isNot: ^container_.*_seconds_total$ 24 | resources: 25 | overrides: 26 | namespace: 27 | resource: namespace 28 | pod_name: 29 | resource: pod 30 | name: 31 | matches: ^container_(.*)_total$ 32 | as: "" 33 | metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>) 34 | - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' 35 | seriesFilters: 36 | - isNot: ^container_.*_total$ 37 | resources: 38 | overrides: 39 | namespace: 40 | resource: namespace 41 | pod_name: 42 | resource: pod 43 | name: 44 | matches: ^container_(.*)$ 45 | as: "" 46 | metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>) 47 | - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' 48 | seriesFilters: 49 | - isNot: .*_total$ 50 | resources: 51 | template: <<.Resource>> 52 | name: 53 | matches: "" 54 | as: "" 55 | metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) 56 | - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' 57 | seriesFilters: 58 | - isNot: .*_seconds_total 59 | resources: 60 | template: <<.Resource>> 61 | name: 62 | matches: ^(.*)_total$ 63 | as: "" 64 | metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>) 65 | - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' 66 | seriesFilters: [] 67 | resources: 68 | template: <<.Resource>> 69 | name: 70 | matches: ^(.*)_seconds_total$ 71 | as: "" 72 | metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>) 73 | resourceRules: 74 | cpu: 75 | containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>) 76 | nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[1m])) by (<<.GroupBy>>) 77 | resources: 78 | overrides: 79 | node: 80 | resource: node 81 | namespace: 82 | resource: namespace 83 | pod_name: 84 | resource: pod 85 | containerLabel: container_name 86 | memory: 87 | containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>) 88 | nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>) 89 | resources: 90 | overrides: 91 | node: 92 | resource: node 93 | namespace: 94 | resource: namespace 95 | pod_name: 96 | resource: pod 97 | containerLabel: container_name 98 | window: 1m 99 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | kubectl apply -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml 4 | kubectl apply -n monitoring -f custom-metrics-apiservice.yaml 5 | kubectl apply -n monitoring -f custom-metrics-cluster-role.yaml 6 | kubectl apply -n monitoring -f custom-metrics-configmap.yaml 7 | kubectl apply -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml 8 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/hpa-custom-metrics-cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: hpa-controller-custom-metrics 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: custom-metrics-server-resources 9 | subjects: 10 | - kind: ServiceAccount 11 | name: horizontal-pod-autoscaler 12 | namespace: kube-system 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/sample-app.yaml: -------------------------------------------------------------------------------- 1 | kind: ServiceMonitor 2 | apiVersion: monitoring.coreos.com/v1 3 | metadata: 4 | name: sample-app 5 | labels: 6 | app: sample-app 7 | spec: 8 | selector: 9 | matchLabels: 10 | app: sample-app 11 | endpoints: 12 | - port: http 13 | interval: 5s 14 | --- 15 | apiVersion: v1 16 | kind: Service 17 | metadata: 18 | name: sample-app 19 | labels: 20 | app: sample-app 21 | spec: 22 | ports: 23 | - name: http 24 | port: 8080 25 | targetPort: 8080 26 | selector: 27 | app: sample-app 28 | --- 29 | apiVersion: apps/v1 30 | kind: Deployment 31 | metadata: 32 | name: sample-app 33 | labels: 34 | app: sample-app 35 | spec: 36 | replicas: 1 37 | selector: 38 | matchLabels: 39 | app: sample-app 40 | template: 41 | metadata: 42 | labels: 43 | app: sample-app 44 | spec: 45 | containers: 46 | - image: luxas/autoscale-demo:v0.1.2 47 | name: metrics-provider 48 | ports: 49 | - name: http 50 | containerPort: 8080 51 | --- 52 | kind: HorizontalPodAutoscaler 53 | apiVersion: autoscaling/v2beta1 54 | metadata: 55 | name: sample-app 56 | spec: 57 | scaleTargetRef: 58 | apiVersion: apps/v1 59 | kind: Deployment 60 | name: sample-app 61 | minReplicas: 1 62 | maxReplicas: 10 63 | metrics: 64 | - type: Pods 65 | pods: 66 | metricName: http_requests 67 | targetAverageValue: 500m 68 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/custom-metrics-api/teardown.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | kubectl delete -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml 4 | kubectl delete -n monitoring -f custom-metrics-apiservice.yaml 5 | kubectl delete -n monitoring -f custom-metrics-cluster-role.yaml 6 | kubectl delete -n monitoring -f custom-metrics-configmap.yaml 7 | kubectl delete -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml 8 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/auth-delegator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: metrics-server:system:auth-delegator 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: system:auth-delegator 9 | subjects: 10 | - kind: ServiceAccount 11 | name: metrics-server 12 | namespace: kube-system 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/auth-reader.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: RoleBinding 3 | metadata: 4 | name: metrics-server-auth-reader 5 | namespace: kube-system 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: extension-apiserver-authentication-reader 10 | subjects: 11 | - kind: ServiceAccount 12 | name: metrics-server 13 | namespace: kube-system 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/metrics-apiservice.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiregistration.k8s.io/v1beta1 2 | kind: APIService 3 | metadata: 4 | name: v1beta1.metrics.k8s.io 5 | spec: 6 | service: 7 | name: metrics-server 8 | namespace: kube-system 9 | group: metrics.k8s.io 10 | version: v1beta1 11 | insecureSkipTLSVerify: true 12 | groupPriorityMinimum: 100 13 | versionPriority: 100 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/metrics-server-cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: system:metrics-server 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: system:metrics-server 9 | subjects: 10 | - kind: ServiceAccount 11 | name: metrics-server 12 | namespace: kube-system 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/metrics-server-cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: system:metrics-server 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - pods 10 | - nodes 11 | - nodes/stats 12 | - namespaces 13 | verbs: 14 | - get 15 | - list 16 | - watch 17 | - apiGroups: 18 | - "extensions" 19 | resources: 20 | - deployments 21 | verbs: 22 | - get 23 | - list 24 | - watch 25 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/metrics-server-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: metrics-server 5 | namespace: kube-system 6 | labels: 7 | k8s-app: metrics-server 8 | spec: 9 | selector: 10 | matchLabels: 11 | k8s-app: metrics-server 12 | template: 13 | metadata: 14 | name: metrics-server 15 | labels: 16 | k8s-app: metrics-server 17 | spec: 18 | serviceAccountName: metrics-server 19 | containers: 20 | - name: metrics-server 21 | image: gcr.io/google_containers/metrics-server-amd64:v0.2.0 22 | imagePullPolicy: Always 23 | command: 24 | - /metrics-server 25 | - --source=kubernetes.summary_api:'' 26 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/metrics-server-service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: metrics-server 5 | namespace: kube-system 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/experimental/metrics-server/metrics-server-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: metrics-server 5 | namespace: kube-system 6 | labels: 7 | kubernetes.io/name: "Metrics-server" 8 | spec: 9 | selector: 10 | k8s-app: metrics-server 11 | ports: 12 | - port: 443 13 | protocol: TCP 14 | targetPort: 443 15 | -------------------------------------------------------------------------------- /install/kube-prometheus/grafana-image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:9.3-slim 2 | 3 | ARG GRAFANA_VERSION 4 | 5 | RUN apt-get update && apt-get install -qq -y wget tar sqlite && \ 6 | wget -O /tmp/grafana.tar.gz https://s3-us-west-2.amazonaws.com/grafana-releases/release/grafana-$GRAFANA_VERSION.linux-x64.tar.gz && \ 7 | tar -zxvf /tmp/grafana.tar.gz -C /tmp && mv /tmp/grafana-$GRAFANA_VERSION /grafana && \ 8 | rm -rf /tmp/grafana.tar.gz 9 | 10 | ADD config.ini /grafana/conf/config.ini 11 | 12 | USER nobody 13 | EXPOSE 3000 14 | VOLUME [ "/data" ] 15 | WORKDIR /grafana 16 | ENTRYPOINT [ "/grafana/bin/grafana-server" ] 17 | CMD [ "-config=/grafana/conf/config.ini" ] 18 | -------------------------------------------------------------------------------- /install/kube-prometheus/grafana-image/Makefile: -------------------------------------------------------------------------------- 1 | VERSION=5.0.3 2 | IMAGE_TAG=$(VERSION) 3 | 4 | container: 5 | docker build --build-arg GRAFANA_VERSION=$(VERSION) -t quay.io/coreos/monitoring-grafana:$(IMAGE_TAG) . 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/grafana-image/config.ini: -------------------------------------------------------------------------------- 1 | [database] 2 | path = /data/grafana.db 3 | 4 | [paths] 5 | data = /data 6 | logs = /data/log 7 | plugins = /data/plugins 8 | 9 | [session] 10 | provider = memory 11 | 12 | [auth.basic] 13 | enabled = false 14 | 15 | [auth.anonymous] 16 | enabled = true 17 | -------------------------------------------------------------------------------- /install/kube-prometheus/hack/example-service-monitoring/deploy: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # exit immediately when a command fails 3 | set -e 4 | # only exit with zero if all commands of the pipeline exit successfully 5 | set -o pipefail 6 | # error on unset variables 7 | set -u 8 | 9 | kubectl apply -f examples/example-app 10 | -------------------------------------------------------------------------------- /install/kube-prometheus/hack/example-service-monitoring/teardown: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # exit immediately when a command fails 3 | set -e 4 | # only exit with zero if all commands of the pipeline exit successfully 5 | set -o pipefail 6 | # error on unset variables 7 | set -u 8 | 9 | kubectl delete -f examples/example-app 10 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/.gitignore: -------------------------------------------------------------------------------- 1 | jsonnetfile.lock.json 2 | vendor/ 3 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alertmanager/alertmanager.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | 3 | { 4 | _config+:: { 5 | namespace: 'default', 6 | 7 | versions+:: { 8 | alertmanager: 'v0.16.1', 9 | }, 10 | 11 | imageRepos+:: { 12 | alertmanager: 'quay.io/prometheus/alertmanager', 13 | }, 14 | 15 | alertmanager+:: { 16 | name: $._config.alertmanager.name, 17 | config: { 18 | global: { 19 | resolve_timeout: '5m', 20 | }, 21 | route: { 22 | group_by: ['job'], 23 | group_wait: '30s', 24 | group_interval: '5m', 25 | repeat_interval: '12h', 26 | receiver: 'null', 27 | routes: [ 28 | { 29 | receiver: 'null', 30 | match: { 31 | alertname: 'Watchdog', 32 | }, 33 | }, 34 | ], 35 | }, 36 | receivers: [ 37 | { 38 | name: 'null', 39 | }, 40 | ], 41 | }, 42 | replicas: 3, 43 | }, 44 | }, 45 | 46 | alertmanager+:: { 47 | secret: 48 | local secret = k.core.v1.secret; 49 | 50 | if std.type($._config.alertmanager.config) == 'object' then 51 | secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64(std.manifestYamlDoc($._config.alertmanager.config)) }) + 52 | secret.mixin.metadata.withNamespace($._config.namespace) 53 | else 54 | secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64($._config.alertmanager.config) }) + 55 | secret.mixin.metadata.withNamespace($._config.namespace), 56 | 57 | serviceAccount: 58 | local serviceAccount = k.core.v1.serviceAccount; 59 | 60 | serviceAccount.new('alertmanager-' + $._config.alertmanager.name) + 61 | serviceAccount.mixin.metadata.withNamespace($._config.namespace), 62 | 63 | service: 64 | local service = k.core.v1.service; 65 | local servicePort = k.core.v1.service.mixin.spec.portsType; 66 | 67 | local alertmanagerPort = servicePort.newNamed('web', 9093, 'web'); 68 | 69 | service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) + 70 | service.mixin.metadata.withNamespace($._config.namespace) + 71 | service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }), 72 | 73 | serviceMonitor: 74 | { 75 | apiVersion: 'monitoring.coreos.com/v1', 76 | kind: 'ServiceMonitor', 77 | metadata: { 78 | name: 'alertmanager', 79 | namespace: $._config.namespace, 80 | labels: { 81 | 'k8s-app': 'alertmanager', 82 | }, 83 | }, 84 | spec: { 85 | selector: { 86 | matchLabels: { 87 | alertmanager: $._config.alertmanager.name, 88 | }, 89 | }, 90 | endpoints: [ 91 | { 92 | port: 'web', 93 | interval: '30s', 94 | }, 95 | ], 96 | }, 97 | }, 98 | 99 | alertmanager: 100 | { 101 | apiVersion: 'monitoring.coreos.com/v1', 102 | kind: 'Alertmanager', 103 | metadata: { 104 | name: $._config.alertmanager.name, 105 | namespace: $._config.namespace, 106 | labels: { 107 | alertmanager: $._config.alertmanager.name, 108 | }, 109 | }, 110 | spec: { 111 | replicas: $._config.alertmanager.replicas, 112 | version: $._config.versions.alertmanager, 113 | baseImage: $._config.imageRepos.alertmanager, 114 | nodeSelector: { 'beta.kubernetes.io/os': 'linux' }, 115 | serviceAccountName: 'alertmanager-' + $._config.alertmanager.name, 116 | securityContext: { 117 | runAsUser: 1000, 118 | runAsNonRoot: true, 119 | fsGroup: 2000, 120 | }, 121 | }, 122 | }, 123 | }, 124 | } 125 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | prometheusAlerts+:: { 3 | groups+: [ 4 | { 5 | name: 'alertmanager.rules', 6 | rules: [ 7 | { 8 | alert: 'AlertmanagerConfigInconsistent', 9 | annotations: { 10 | message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.', 11 | }, 12 | expr: ||| 13 | count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 14 | ||| % $._config, 15 | 'for': '5m', 16 | labels: { 17 | severity: 'critical', 18 | }, 19 | }, 20 | { 21 | alert: 'AlertmanagerFailedReload', 22 | annotations: { 23 | message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.", 24 | }, 25 | expr: ||| 26 | alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0 27 | ||| % $._config, 28 | 'for': '10m', 29 | labels: { 30 | severity: 'warning', 31 | }, 32 | }, 33 | { 34 | alert:'AlertmanagerMembersInconsistent', 35 | annotations:{ 36 | message: 'Alertmanager has not found all other members of the cluster.', 37 | }, 38 | expr: ||| 39 | alertmanager_cluster_members{%(alertmanagerSelector)s} 40 | != on (service) GROUP_LEFT() 41 | count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s}) 42 | ||| % $._config, 43 | 'for': '5m', 44 | labels: { 45 | severity: 'critical', 46 | }, 47 | }, 48 | ], 49 | }, 50 | ], 51 | }, 52 | } 53 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alerts/alerts.libsonnet: -------------------------------------------------------------------------------- 1 | (import 'alertmanager.libsonnet') + 2 | (import 'general.libsonnet') + 3 | (import 'node.libsonnet') + 4 | (import 'prometheus.libsonnet') + 5 | (import 'prometheus-operator.libsonnet') 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alerts/general.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | prometheusAlerts+:: { 3 | groups+: [ 4 | { 5 | name: 'general.rules', 6 | rules: [ 7 | { 8 | alert: 'TargetDown', 9 | annotations: { 10 | message: '{{ $value }}% of the {{ $labels.job }} targets are down.', 11 | }, 12 | expr: '100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10', 13 | 'for': '10m', 14 | labels: { 15 | severity: 'warning', 16 | }, 17 | }, 18 | { 19 | alert: 'Watchdog', 20 | annotations: { 21 | message: ||| 22 | This is an alert meant to ensure that the entire alerting pipeline is functional. 23 | This alert is always firing, therefore it should always be firing in Alertmanager 24 | and always fire against a receiver. There are integrations with various notification 25 | mechanisms that send a notification when this alert is not firing. For example the 26 | "DeadMansSnitch" integration in PagerDuty. 27 | |||, 28 | }, 29 | expr: 'vector(1)', 30 | labels: { 31 | severity: 'none', 32 | }, 33 | }, 34 | ], 35 | }, 36 | ], 37 | }, 38 | } 39 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alerts/node.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | prometheusAlerts+:: { 3 | groups+: [ 4 | { 5 | name: 'kube-prometheus-node-alerting.rules', 6 | rules: [ 7 | { 8 | alert: 'NodeDiskRunningFull', 9 | annotations: { 10 | message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 24 hours.', 11 | }, 12 | expr: ||| 13 | (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0) 14 | ||| % $._config, 15 | 'for': '30m', 16 | labels: { 17 | severity: 'warning', 18 | }, 19 | }, 20 | { 21 | alert: 'NodeDiskRunningFull', 22 | annotations: { 23 | message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 2 hours.', 24 | }, 25 | expr: ||| 26 | (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0) 27 | ||| % $._config, 28 | 'for': '10m', 29 | labels: { 30 | severity: 'critical', 31 | }, 32 | }, 33 | ], 34 | }, 35 | ], 36 | }, 37 | } 38 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | prometheusAlerts+:: { 3 | groups+: [ 4 | { 5 | name: 'prometheus-operator', 6 | rules: [ 7 | { 8 | alert: 'PrometheusOperatorReconcileErrors', 9 | expr: ||| 10 | rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 11 | ||| % $._config, 12 | labels: { 13 | severity: 'warning', 14 | }, 15 | annotations: { 16 | message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.', 17 | }, 18 | 'for': '10m', 19 | }, 20 | { 21 | alert: 'PrometheusOperatorNodeLookupErrors', 22 | expr: ||| 23 | rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 24 | ||| % $._config, 25 | labels: { 26 | severity: 'warning', 27 | }, 28 | annotations: { 29 | message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.', 30 | }, 31 | 'for': '10m', 32 | }, 33 | ], 34 | }, 35 | ], 36 | }, 37 | } 38 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alerts/prometheus.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | prometheusAlerts+:: { 3 | groups+: [ 4 | { 5 | name: 'prometheus.rules', 6 | rules: [ 7 | { 8 | alert: 'PrometheusConfigReloadFailed', 9 | annotations: { 10 | description: "Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}", 11 | summary: "Reloading Prometheus' configuration failed", 12 | }, 13 | expr: ||| 14 | prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0 15 | ||| % $._config, 16 | 'for': '10m', 17 | labels: { 18 | severity: 'warning', 19 | }, 20 | }, 21 | { 22 | alert: 'PrometheusNotificationQueueRunningFull', 23 | annotations: { 24 | description: "Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ $labels.pod}}", 25 | summary: "Prometheus' alert notification queue is running full", 26 | }, 27 | expr: ||| 28 | predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30) > prometheus_notifications_queue_capacity{%(prometheusSelector)s} 29 | ||| % $._config, 30 | 'for': '10m', 31 | labels: { 32 | severity: 'warning', 33 | }, 34 | }, 35 | { 36 | alert: 'PrometheusErrorSendingAlerts', 37 | annotations: { 38 | description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}', 39 | summary: 'Errors while sending alert from Prometheus', 40 | }, 41 | expr: ||| 42 | rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.01 43 | ||| % $._config, 44 | 'for': '10m', 45 | labels: { 46 | severity: 'warning', 47 | }, 48 | }, 49 | { 50 | alert: 'PrometheusErrorSendingAlerts', 51 | annotations: { 52 | description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}', 53 | summary: 'Errors while sending alerts from Prometheus', 54 | }, 55 | expr: ||| 56 | rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.03 57 | ||| % $._config, 58 | 'for': '10m', 59 | labels: { 60 | severity: 'critical', 61 | }, 62 | }, 63 | { 64 | alert: 'PrometheusNotConnectedToAlertmanagers', 65 | annotations: { 66 | description: 'Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected to any Alertmanagers', 67 | summary: 'Prometheus is not connected to any Alertmanagers', 68 | }, 69 | expr: ||| 70 | prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s} < 1 71 | ||| % $._config, 72 | 'for': '10m', 73 | labels: { 74 | severity: 'warning', 75 | }, 76 | }, 77 | { 78 | alert: 'PrometheusTSDBReloadsFailing', 79 | annotations: { 80 | description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} reload failures over the last four hours.', 81 | summary: 'Prometheus has issues reloading data blocks from disk', 82 | }, 83 | expr: ||| 84 | increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0 85 | ||| % $._config, 86 | 'for': '12h', 87 | labels: { 88 | severity: 'warning', 89 | }, 90 | }, 91 | { 92 | alert: 'PrometheusTSDBCompactionsFailing', 93 | annotations: { 94 | description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} compaction failures over the last four hours.', 95 | summary: 'Prometheus has issues compacting sample blocks', 96 | }, 97 | expr: ||| 98 | increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0 99 | ||| % $._config, 100 | 'for': '12h', 101 | labels: { 102 | severity: 'warning', 103 | }, 104 | }, 105 | { 106 | alert: 'PrometheusTSDBWALCorruptions', 107 | annotations: { 108 | description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead log (WAL).', 109 | summary: 'Prometheus write-ahead log is corrupted', 110 | }, 111 | expr: ||| 112 | prometheus_tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0 113 | ||| % $._config, 114 | 'for': '4h', 115 | labels: { 116 | severity: 'warning', 117 | }, 118 | }, 119 | { 120 | alert: 'PrometheusNotIngestingSamples', 121 | annotations: { 122 | description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples.", 123 | summary: "Prometheus isn't ingesting samples", 124 | }, 125 | expr: ||| 126 | rate(prometheus_tsdb_head_samples_appended_total{%(prometheusSelector)s}[5m]) <= 0 127 | ||| % $._config, 128 | 'for': '10m', 129 | labels: { 130 | severity: 'warning', 131 | }, 132 | }, 133 | { 134 | alert: 'PrometheusTargetScrapesDuplicate', 135 | annotations: { 136 | description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate timestamps but different values', 137 | summary: 'Prometheus has many samples rejected', 138 | }, 139 | expr: ||| 140 | increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{%(prometheusSelector)s}[5m]) > 0 141 | ||| % $._config, 142 | 'for': '10m', 143 | labels: { 144 | severity: 'warning', 145 | }, 146 | }, 147 | ], 148 | }, 149 | ], 150 | }, 151 | } 152 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/alerts/tests.yaml: -------------------------------------------------------------------------------- 1 | # TODO(metalmatze): This file is temporarily saved here for later reference 2 | # until we find out how to integrate the tests into our jsonnet stack. 3 | 4 | rule_files: 5 | - rules.yaml 6 | 7 | evaluation_interval: 1m 8 | 9 | tests: 10 | - interval: 1m 11 | input_series: 12 | - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}' 13 | values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0' 14 | - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}' 15 | values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' 16 | - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}' 17 | values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' 18 | alert_rule_test: 19 | - eval_time: 5m 20 | alertname: AlertmanagerMembersInconsistent 21 | - eval_time: 11m 22 | alertname: AlertmanagerMembersInconsistent 23 | exp_alerts: 24 | - exp_labels: 25 | service: 'alertmanager-main' 26 | severity: critical 27 | job: 'alertmanager-main' 28 | instance: 10.10.10.0 29 | namespace: monitoring 30 | pod: alertmanager-main-0 31 | exp_annotations: 32 | message: 'Alertmanager has not found all other members of the cluster.' 33 | - eval_time: 17m 34 | alertname: AlertmanagerMembersInconsistent 35 | exp_alerts: 36 | - exp_labels: 37 | service: 'alertmanager-main' 38 | severity: critical 39 | job: 'alertmanager-main' 40 | instance: 10.10.10.0 41 | namespace: monitoring 42 | pod: alertmanager-main-0 43 | exp_annotations: 44 | message: 'Alertmanager has not found all other members of the cluster.' 45 | - eval_time: 23m 46 | alertname: AlertmanagerMembersInconsistent 47 | exp_alerts: 48 | - exp_labels: 49 | service: 'alertmanager-main' 50 | severity: critical 51 | job: 'alertmanager-main' 52 | instance: 10.10.10.0 53 | namespace: monitoring 54 | pod: alertmanager-main-0 55 | exp_annotations: 56 | message: 'Alertmanager has not found all other members of the cluster.' 57 | - interval: 1m 58 | input_series: 59 | - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}' 60 | values: '3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' 61 | - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}' 62 | values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2' 63 | - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}' 64 | values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2' 65 | alert_rule_test: 66 | - eval_time: 5m 67 | alertname: AlertmanagerMembersInconsistent 68 | - eval_time: 11m 69 | alertname: AlertmanagerMembersInconsistent 70 | exp_alerts: 71 | - exp_labels: 72 | service: 'alertmanager-main' 73 | severity: critical 74 | job: 'alertmanager-main' 75 | instance: 10.10.10.0 76 | namespace: monitoring 77 | pod: alertmanager-main-0 78 | exp_annotations: 79 | message: 'Alertmanager has not found all other members of the cluster.' 80 | - exp_labels: 81 | service: 'alertmanager-main' 82 | severity: critical 83 | job: 'alertmanager-main' 84 | instance: 10.10.10.1 85 | namespace: monitoring 86 | pod: alertmanager-main-1 87 | exp_annotations: 88 | message: 'Alertmanager has not found all other members of the cluster.' 89 | - exp_labels: 90 | service: 'alertmanager-main' 91 | severity: critical 92 | job: 'alertmanager-main' 93 | instance: 10.10.10.2 94 | namespace: monitoring 95 | pod: alertmanager-main-2 96 | exp_annotations: 97 | message: 'Alertmanager has not found all other members of the cluster.' 98 | - eval_time: 17m 99 | alertname: AlertmanagerMembersInconsistent 100 | exp_alerts: 101 | - exp_labels: 102 | service: 'alertmanager-main' 103 | severity: critical 104 | job: 'alertmanager-main' 105 | instance: 10.10.10.0 106 | namespace: monitoring 107 | pod: alertmanager-main-0 108 | exp_annotations: 109 | message: 'Alertmanager has not found all other members of the cluster.' 110 | - exp_labels: 111 | service: 'alertmanager-main' 112 | severity: critical 113 | job: 'alertmanager-main' 114 | instance: 10.10.10.1 115 | namespace: monitoring 116 | pod: alertmanager-main-1 117 | exp_annotations: 118 | message: 'Alertmanager has not found all other members of the cluster.' 119 | - exp_labels: 120 | service: 'alertmanager-main' 121 | severity: critical 122 | job: 'alertmanager-main' 123 | instance: 10.10.10.2 124 | namespace: monitoring 125 | pod: alertmanager-main-2 126 | exp_annotations: 127 | message: 'Alertmanager has not found all other members of the cluster.' 128 | - eval_time: 23m 129 | alertname: AlertmanagerMembersInconsistent 130 | exp_alerts: 131 | - exp_labels: 132 | service: 'alertmanager-main' 133 | severity: critical 134 | job: 'alertmanager-main' 135 | instance: 10.10.10.0 136 | namespace: monitoring 137 | pod: alertmanager-main-0 138 | exp_annotations: 139 | message: 'Alertmanager has not found all other members of the cluster.' 140 | - exp_labels: 141 | service: 'alertmanager-main' 142 | severity: critical 143 | job: 'alertmanager-main' 144 | instance: 10.10.10.1 145 | namespace: monitoring 146 | pod: alertmanager-main-1 147 | exp_annotations: 148 | message: 'Alertmanager has not found all other members of the cluster.' 149 | - exp_labels: 150 | service: 'alertmanager-main' 151 | severity: critical 152 | job: 'alertmanager-main' 153 | instance: 10.10.10.2 154 | namespace: monitoring 155 | pod: alertmanager-main-2 156 | exp_annotations: 157 | message: 'Alertmanager has not found all other members of the cluster.' 158 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/jsonnetfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ 3 | { 4 | "name": "ksonnet", 5 | "source": { 6 | "git": { 7 | "remote": "https://github.com/ksonnet/ksonnet-lib", 8 | "subdir": "" 9 | } 10 | }, 11 | "version": "master" 12 | }, 13 | { 14 | "name": "kubernetes-mixin", 15 | "source": { 16 | "git": { 17 | "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", 18 | "subdir": "" 19 | } 20 | }, 21 | "version": "master" 22 | }, 23 | { 24 | "name": "grafana", 25 | "source": { 26 | "git": { 27 | "remote": "https://github.com/brancz/kubernetes-grafana", 28 | "subdir": "grafana" 29 | } 30 | }, 31 | "version": "master" 32 | }, 33 | { 34 | "name": "prometheus-operator", 35 | "source": { 36 | "git": { 37 | "remote": "https://github.com/coreos/prometheus-operator", 38 | "subdir": "jsonnet/prometheus-operator" 39 | } 40 | }, 41 | "version": "v0.29.0" 42 | }, 43 | { 44 | "name": "etcd-mixin", 45 | "source": { 46 | "git": { 47 | "remote": "https://github.com/coreos/etcd", 48 | "subdir": "Documentation/etcd-mixin" 49 | } 50 | }, 51 | "version": "master" 52 | } 53 | ] 54 | } 55 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-anti-affinity.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local statefulSet = k.apps.v1beta2.statefulSet; 3 | local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType; 4 | local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType; 5 | 6 | { 7 | local antiaffinity(key, values) = { 8 | affinity: { 9 | podAntiAffinity: { 10 | preferredDuringSchedulingIgnoredDuringExecution: [ 11 | affinity.new() + 12 | affinity.withWeight(100) + 13 | affinity.mixin.podAffinityTerm.withNamespaces($._config.namespace) + 14 | affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') + 15 | affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([ 16 | matchExpression.new() + 17 | matchExpression.withKey(key) + 18 | matchExpression.withOperator('In') + 19 | matchExpression.withValues(values), 20 | ]), 21 | ], 22 | }, 23 | }, 24 | }, 25 | 26 | alertmanager+:: { 27 | alertmanager+: { 28 | spec+: 29 | antiaffinity('alertmanager', [$._config.alertmanager.name]), 30 | }, 31 | }, 32 | 33 | prometheus+: { 34 | prometheus+: { 35 | spec+: 36 | antiaffinity('prometheus', [$._config.prometheus.name]), 37 | }, 38 | }, 39 | } 40 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-bootkube.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local service = k.core.v1.service; 3 | local servicePort = k.core.v1.service.mixin.spec.portsType; 4 | 5 | { 6 | prometheus+:: { 7 | kubeControllerManagerPrometheusDiscoveryService: 8 | service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + 9 | service.mixin.metadata.withNamespace('kube-system') + 10 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + 11 | service.mixin.spec.withClusterIp('None'), 12 | kubeSchedulerPrometheusDiscoveryService: 13 | service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + 14 | service.mixin.metadata.withNamespace('kube-system') + 15 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 16 | service.mixin.spec.withClusterIp('None'), 17 | kubeDnsPrometheusDiscoveryService: 18 | service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) + 19 | service.mixin.metadata.withNamespace('kube-system') + 20 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + 21 | service.mixin.spec.withClusterIp('None'), 22 | }, 23 | } 24 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-config-mixins.libsonnet: -------------------------------------------------------------------------------- 1 | local l = import 'lib/lib.libsonnet'; 2 | 3 | // withImageRepository is a mixin that replaces all images prefixes by repository. eg. 4 | // quay.io/coreos/addon-resizer -> $repository/addon-resizer 5 | // grafana/grafana -> grafana $repository/grafana 6 | local withImageRepository(repository) = { 7 | local oldRepos = super._config.imageRepos, 8 | local substituteRepository(image, repository) = 9 | if repository == null then image else repository + '/' + l.imageName(image), 10 | _config+:: { 11 | imageRepos:: { 12 | [field]: substituteRepository(oldRepos[field], repository), 13 | for field in std.objectFields(oldRepos) 14 | } 15 | }, 16 | }; 17 | 18 | { 19 | withImageRepository:: withImageRepository, 20 | } 21 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | prometheus+:: { 3 | serviceMonitorKubelet+: 4 | { 5 | spec+: { 6 | endpoints: [ 7 | { 8 | port: 'http-metrics', 9 | scheme: 'http', 10 | interval: '30s', 11 | bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', 12 | }, 13 | { 14 | port: 'http-metrics', 15 | scheme: 'http', 16 | path: '/metrics/cadvisor', 17 | interval: '30s', 18 | honorLabels: true, 19 | bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', 20 | }, 21 | ], 22 | }, 23 | }, 24 | }, 25 | } 26 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-kops-coredns.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local service = k.core.v1.service; 3 | local servicePort = k.core.v1.service.mixin.spec.portsType; 4 | 5 | { 6 | prometheus+:: { 7 | kubeDnsPrometheusDiscoveryService: 8 | service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) + 9 | service.mixin.metadata.withNamespace('kube-system') + 10 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + 11 | service.mixin.spec.withClusterIp('None'), 12 | }, 13 | } 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-kops.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local service = k.core.v1.service; 3 | local servicePort = k.core.v1.service.mixin.spec.portsType; 4 | 5 | { 6 | prometheus+:: { 7 | kubeControllerManagerPrometheusDiscoveryService: 8 | service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + 9 | service.mixin.metadata.withNamespace('kube-system') + 10 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + 11 | service.mixin.spec.withClusterIp('None'), 12 | kubeSchedulerPrometheusDiscoveryService: 13 | service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + 14 | service.mixin.metadata.withNamespace('kube-system') + 15 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 16 | service.mixin.spec.withClusterIp('None'), 17 | kubeDnsPrometheusDiscoveryService: 18 | service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) + 19 | service.mixin.metadata.withNamespace('kube-system') + 20 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + 21 | service.mixin.spec.withClusterIp('None'), 22 | }, 23 | } 24 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-ksonnet.libsonnet: -------------------------------------------------------------------------------- 1 | local kp = (import 'kube-prometheus/kube-prometheus.libsonnet'); 2 | 3 | { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + 4 | { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + 5 | { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + 6 | { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + 7 | { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + 8 | { ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } 9 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-kube-aws.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local service = k.core.v1.service; 3 | local servicePort = k.core.v1.service.mixin.spec.portsType; 4 | 5 | { 6 | prometheus+: { 7 | kubeControllerManagerPrometheusDiscoveryService: 8 | service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + 9 | service.mixin.metadata.withNamespace('kube-system') + 10 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + 11 | service.mixin.spec.withClusterIp('None'), 12 | kubeSchedulerPrometheusDiscoveryService: 13 | service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + 14 | service.mixin.metadata.withNamespace('kube-system') + 15 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 16 | service.mixin.spec.withClusterIp('None'), 17 | }, 18 | } 19 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-kubeadm.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local service = k.core.v1.service; 3 | local servicePort = k.core.v1.service.mixin.spec.portsType; 4 | 5 | { 6 | prometheus+: { 7 | kubeControllerManagerPrometheusDiscoveryService: 8 | service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + 9 | service.mixin.metadata.withNamespace('kube-system') + 10 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + 11 | service.mixin.spec.withClusterIp('None'), 12 | kubeSchedulerPrometheusDiscoveryService: 13 | service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + 14 | service.mixin.metadata.withNamespace('kube-system') + 15 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 16 | service.mixin.spec.withClusterIp('None'), 17 | }, 18 | } 19 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-kubespray.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local service = k.core.v1.service; 3 | local servicePort = k.core.v1.service.mixin.spec.portsType; 4 | 5 | { 6 | prometheus+: { 7 | kubeControllerManagerPrometheusDiscoveryService: 8 | service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) + 9 | service.mixin.metadata.withNamespace('kube-system') + 10 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + 11 | service.mixin.spec.withClusterIp('None'), 12 | kubeSchedulerPrometheusDiscoveryService: 13 | service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) + 14 | service.mixin.metadata.withNamespace('kube-system') + 15 | service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 16 | service.mixin.spec.withClusterIp('None'), 17 | }, 18 | } 19 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-managed-cluster.libsonnet: -------------------------------------------------------------------------------- 1 | // On managed Kubernetes clusters some of the control plane components are not exposed to customers. 2 | // Disable scrape jobs and service monitors for these components by overwriting 'kube-prometheus.libsonnet' defaults 3 | // Note this doesn't disable generation of associated alerting rules but the rules don't trigger 4 | 5 | { 6 | _config+:: { 7 | // This snippet walks the original object (super.jobs, set as temp var j) and creates a replacement jobs object 8 | // excluding any members of the set specified (eg: controller and scheduler). 9 | local j = super.jobs, 10 | jobs: { 11 | [k]: j[k] 12 | for k in std.objectFields(j) 13 | if !std.setMember(k, ['KubeControllerManager', 'KubeScheduler']) 14 | }, 15 | }, 16 | 17 | // Same as above but for ServiceMonitor's 18 | local p = super.prometheus, 19 | prometheus: { 20 | [q]: p[q] 21 | for q in std.objectFields(p) 22 | if !std.setMember(q, ['serviceMonitorKubeControllerManager', 'serviceMonitorKubeScheduler']) 23 | }, 24 | 25 | // TODO: disable generationg of alerting rules 26 | // manifests/prometheus-rules.yaml:52: - name: kube-scheduler.rules 27 | 28 | } 29 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-node-ports.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local service = k.core.v1.service; 3 | local servicePort = k.core.v1.service.mixin.spec.portsType; 4 | 5 | { 6 | prometheus+: { 7 | service+: 8 | service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) + 9 | service.mixin.spec.withType('NodePort'), 10 | }, 11 | alertmanager+: { 12 | service+: 13 | service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) + 14 | service.mixin.spec.withType('NodePort'), 15 | }, 16 | grafana+: { 17 | service+: 18 | service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) + 19 | service.mixin.spec.withType('NodePort'), 20 | }, 21 | } 22 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | 3 | (import 'etcd-mixin/mixin.libsonnet') + { 4 | _config+:: { 5 | etcd: { 6 | ips: [], 7 | clientCA: null, 8 | clientKey: null, 9 | clientCert: null, 10 | serverName: null, 11 | insecureSkipVerify: null, 12 | }, 13 | }, 14 | prometheus+:: { 15 | serviceEtcd: 16 | local service = k.core.v1.service; 17 | local servicePort = k.core.v1.service.mixin.spec.portsType; 18 | 19 | local etcdServicePort = servicePort.newNamed('metrics', 2379, 2379); 20 | 21 | service.new('etcd', null, etcdServicePort) + 22 | service.mixin.metadata.withNamespace('kube-system') + 23 | service.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) + 24 | service.mixin.spec.withClusterIp('None'), 25 | endpointsEtcd: 26 | local endpoints = k.core.v1.endpoints; 27 | local endpointSubset = endpoints.subsetsType; 28 | local endpointPort = endpointSubset.portsType; 29 | 30 | local etcdPort = endpointPort.new() + 31 | endpointPort.withName('metrics') + 32 | endpointPort.withPort(2379) + 33 | endpointPort.withProtocol('TCP'); 34 | 35 | local subset = endpointSubset.new() + 36 | endpointSubset.withAddresses([ 37 | { ip: etcdIP } 38 | for etcdIP in $._config.etcd.ips 39 | ]) + 40 | endpointSubset.withPorts(etcdPort); 41 | 42 | endpoints.new() + 43 | endpoints.mixin.metadata.withName('etcd') + 44 | endpoints.mixin.metadata.withNamespace('kube-system') + 45 | endpoints.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) + 46 | endpoints.withSubsets(subset), 47 | serviceMonitorEtcd: 48 | { 49 | apiVersion: 'monitoring.coreos.com/v1', 50 | kind: 'ServiceMonitor', 51 | metadata: { 52 | name: 'etcd', 53 | namespace: 'kube-system', 54 | labels: { 55 | 'k8s-app': 'etcd', 56 | }, 57 | }, 58 | spec: { 59 | jobLabel: 'k8s-app', 60 | endpoints: [ 61 | { 62 | port: 'metrics', 63 | interval: '30s', 64 | scheme: 'https', 65 | // Prometheus Operator (and Prometheus) allow us to specify a tlsConfig. This is required as most likely your etcd metrics end points is secure. 66 | tlsConfig: { 67 | caFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client-ca.crt', 68 | keyFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.key', 69 | certFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.crt', 70 | [if $._config.etcd.serverName != null then 'serverName']: $._config.etcd.serverName, 71 | [if $._config.etcd.insecureSkipVerify != null then 'insecureSkipVerify']: $._config.etcd.insecureSkipVerify, 72 | }, 73 | }, 74 | ], 75 | selector: { 76 | matchLabels: { 77 | 'k8s-app': 'etcd', 78 | }, 79 | }, 80 | }, 81 | }, 82 | secretEtcdCerts: 83 | // Prometheus Operator allows us to mount secrets in the pod. By loading the secrets as files, they can be made available inside the Prometheus pod. 84 | local secret = k.core.v1.secret; 85 | secret.new('kube-etcd-client-certs', { 86 | 'etcd-client-ca.crt': std.base64($._config.etcd.clientCA), 87 | 'etcd-client.key': std.base64($._config.etcd.clientKey), 88 | 'etcd-client.crt': std.base64($._config.etcd.clientCert), 89 | }) + 90 | secret.mixin.metadata.withNamespace($._config.namespace), 91 | prometheus+: 92 | { 93 | // Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec 94 | spec+: { 95 | secrets+: [$.prometheus.secretEtcdCerts.metadata.name], 96 | }, 97 | }, 98 | }, 99 | } 100 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus.libsonnet: -------------------------------------------------------------------------------- 1 | local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; 2 | local configMapList = k.core.v1.configMapList; 3 | 4 | (import 'grafana/grafana.libsonnet') + 5 | (import 'kube-state-metrics/kube-state-metrics.libsonnet') + 6 | (import 'node-exporter/node-exporter.libsonnet') + 7 | (import 'alertmanager/alertmanager.libsonnet') + 8 | (import 'prometheus-operator/prometheus-operator.libsonnet') + 9 | (import 'prometheus/prometheus.libsonnet') + 10 | (import 'prometheus-adapter/prometheus-adapter.libsonnet') + 11 | (import 'kubernetes-mixin/mixin.libsonnet') + 12 | (import 'alerts/alerts.libsonnet') + 13 | (import 'rules/rules.libsonnet') + { 14 | kubePrometheus+:: { 15 | namespace: k.core.v1.namespace.new($._config.namespace), 16 | }, 17 | grafana+:: { 18 | dashboardDefinitions: configMapList.new(super.dashboardDefinitions), 19 | serviceMonitor: { 20 | apiVersion: 'monitoring.coreos.com/v1', 21 | kind: 'ServiceMonitor', 22 | metadata: { 23 | name: 'grafana', 24 | namespace: $._config.namespace, 25 | }, 26 | spec: { 27 | selector: { 28 | matchLabels: { 29 | app: 'grafana', 30 | }, 31 | }, 32 | endpoints: [ 33 | { 34 | port: 'http', 35 | interval: '15s', 36 | }, 37 | ], 38 | }, 39 | }, 40 | }, 41 | } + { 42 | _config+:: { 43 | namespace: 'default', 44 | 45 | versions+:: { 46 | grafana: '6.0.1', 47 | }, 48 | 49 | tlsCipherSuites: [ 50 | 'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721 51 | 'TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721 52 | 53 | // 'TLS_RSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566 54 | // 'TLS_RSA_WITH_3DES_EDE_CBC_SHA', // insecure: https://access.redhat.com/articles/2548661 55 | // 'TLS_RSA_WITH_AES_128_CBC_SHA', // disabled by h2 56 | // 'TLS_RSA_WITH_AES_256_CBC_SHA', // disabled by h2 57 | 'TLS_RSA_WITH_AES_128_CBC_SHA256', 58 | // 'TLS_RSA_WITH_AES_128_GCM_SHA256', // disabled by h2 59 | // 'TLS_RSA_WITH_AES_256_GCM_SHA384', // disabled by h2 60 | // 'TLS_ECDHE_ECDSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566 61 | // 'TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA',// disabled by h2 62 | // 'TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA',// disabled by h2 63 | // 'TLS_ECDHE_RSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566 64 | // 'TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA', // insecure: https://access.redhat.com/articles/2548661 65 | // 'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA', // disabled by h2 66 | // 'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA', // disabled by h2 67 | 'TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256', 68 | 'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256', 69 | 70 | // disabled by h2 means: https://github.com/golang/net/blob/e514e69ffb8bc3c76a71ae40de0118d794855992/http2/ciphers.go 71 | 72 | // 'TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384', // TODO: Might not work with h2 73 | // 'TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384', // TODO: Might not work with h2 74 | // 'TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305', // TODO: Might not work with h2 75 | // 'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305', // TODO: Might not work with h2 76 | ], 77 | 78 | cadvisorSelector: 'job="kubelet"', 79 | kubeletSelector: 'job="kubelet"', 80 | kubeStateMetricsSelector: 'job="kube-state-metrics"', 81 | nodeExporterSelector: 'job="node-exporter"', 82 | notKubeDnsSelector: 'job!="kube-dns"', 83 | kubeSchedulerSelector: 'job="kube-scheduler"', 84 | kubeControllerManagerSelector: 'job="kube-controller-manager"', 85 | kubeApiserverSelector: 'job="apiserver"', 86 | coreDNSSelector: 'job="kube-dns"', 87 | podLabel: 'pod', 88 | 89 | alertmanagerSelector: 'job="alertmanager-main",namespace="' + $._config.namespace + '"', 90 | prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"', 91 | prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + $._config.namespace + '"', 92 | 93 | jobs: { 94 | Kubelet: $._config.kubeletSelector, 95 | KubeScheduler: $._config.kubeSchedulerSelector, 96 | KubeControllerManager: $._config.kubeControllerManagerSelector, 97 | KubeAPI: $._config.kubeApiserverSelector, 98 | KubeStateMetrics: $._config.kubeStateMetricsSelector, 99 | NodeExporter: $._config.nodeExporterSelector, 100 | Alertmanager: $._config.alertmanagerSelector, 101 | Prometheus: $._config.prometheusSelector, 102 | PrometheusOperator: $._config.prometheusOperatorSelector, 103 | CoreDNS: $._config.coreDNSSelector, 104 | }, 105 | 106 | prometheus+:: { 107 | rules: $.prometheusRules + $.prometheusAlerts, 108 | }, 109 | 110 | grafana+:: { 111 | dashboards: $.grafanaDashboards, 112 | }, 113 | }, 114 | } 115 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/lib/image.libsonnet: -------------------------------------------------------------------------------- 1 | // imageName extracts the image name from a fully qualified image string. eg. 2 | // quay.io/coreos/addon-resizer -> addon-resizer 3 | // grafana/grafana -> grafana 4 | local imageName(image) = 5 | local parts = std.split(image, '/'); 6 | local len = std.length(parts); 7 | if len == 3 then 8 | # registry.com/org/image 9 | parts[2] 10 | else if len == 2 then 11 | # org/image 12 | parts[1] 13 | else if len == 1 then 14 | # image, ie. busybox 15 | parts[0] 16 | else 17 | error 'unknown image format: ' + image; 18 | 19 | { 20 | imageName:: imageName, 21 | } 22 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/lib/lib.libsonnet: -------------------------------------------------------------------------------- 1 | (import 'image.libsonnet') 2 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/rules/node-rules.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | prometheusRules+:: { 3 | groups+: [ 4 | { 5 | name: 'kube-prometheus-node-recording.rules', 6 | rules: [ 7 | { 8 | expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)', 9 | record: 'instance:node_cpu:rate:sum', 10 | }, 11 | { 12 | expr: 'sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)', 13 | record: 'instance:node_filesystem_usage:sum', 14 | }, 15 | { 16 | expr: 'sum(rate(node_network_receive_bytes_total[3m])) BY (instance)', 17 | record: 'instance:node_network_receive_bytes:rate:sum', 18 | }, 19 | { 20 | expr: 'sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)', 21 | record: 'instance:node_network_transmit_bytes:rate:sum', 22 | }, 23 | { 24 | expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)', 25 | record: 'instance:node_cpu:ratio', 26 | }, 27 | { 28 | expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))', 29 | record: 'cluster:node_cpu:sum_rate5m', 30 | }, 31 | { 32 | expr: 'cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))', 33 | record: 'cluster:node_cpu:ratio', 34 | }, 35 | ], 36 | }, 37 | ], 38 | }, 39 | } 40 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnet/kube-prometheus/rules/rules.libsonnet: -------------------------------------------------------------------------------- 1 | (import 'node-rules.libsonnet') 2 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnetfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ 3 | { 4 | "name": "kube-prometheus", 5 | "source": { 6 | "git": { 7 | "remote": "../../", 8 | "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" 9 | } 10 | }, 11 | "version": "." 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /install/kube-prometheus/jsonnetfile.lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ 3 | { 4 | "name": "kube-prometheus", 5 | "source": { 6 | "git": { 7 | "remote": "../../", 8 | "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" 9 | } 10 | }, 11 | "version": "3623fd0dfc7be15ab2fbe648217f238f614b2d1f" 12 | }, 13 | { 14 | "name": "ksonnet", 15 | "source": { 16 | "git": { 17 | "remote": "https://github.com/ksonnet/ksonnet-lib", 18 | "subdir": "" 19 | } 20 | }, 21 | "version": "d03da231d6c8bd74437b74a1e9e8b966f13dffa2" 22 | }, 23 | { 24 | "name": "kubernetes-mixin", 25 | "source": { 26 | "git": { 27 | "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", 28 | "subdir": "" 29 | } 30 | }, 31 | "version": "0669b548b8bc981f2676e7ec70c8f4a05fa39aa7" 32 | }, 33 | { 34 | "name": "grafonnet", 35 | "source": { 36 | "git": { 37 | "remote": "https://github.com/grafana/grafonnet-lib", 38 | "subdir": "grafonnet" 39 | } 40 | }, 41 | "version": "d270f529db9eb750425a173188c534ab92532f47" 42 | }, 43 | { 44 | "name": "grafana-builder", 45 | "source": { 46 | "git": { 47 | "remote": "https://github.com/kausalco/public", 48 | "subdir": "grafana-builder" 49 | } 50 | }, 51 | "version": "2c635c3310c6e61720871ac94d6d2572e37b83f7" 52 | }, 53 | { 54 | "name": "grafana", 55 | "source": { 56 | "git": { 57 | "remote": "https://github.com/brancz/kubernetes-grafana", 58 | "subdir": "grafana" 59 | } 60 | }, 61 | "version": "de2ec3f0f9115da2d47dc6b86af9b402e2bf146d" 62 | }, 63 | { 64 | "name": "prometheus-operator", 65 | "source": { 66 | "git": { 67 | "remote": "https://github.com/coreos/prometheus-operator", 68 | "subdir": "jsonnet/prometheus-operator" 69 | } 70 | }, 71 | "version": "7a25bf6b6bb2347dacb235659b73bc210117acc7" 72 | }, 73 | { 74 | "name": "etcd-mixin", 75 | "source": { 76 | "git": { 77 | "remote": "https://github.com/coreos/etcd", 78 | "subdir": "Documentation/etcd-mixin" 79 | } 80 | }, 81 | "version": "a621d807f061e1dd635033a8d6bc261461429e27" 82 | } 83 | ] 84 | } 85 | -------------------------------------------------------------------------------- /install/kube-prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ./manifests/00namespace-namespace.yaml 5 | - ./manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml 6 | - ./manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml 7 | - ./manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml 8 | - ./manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml 9 | - ./manifests/0prometheus-operator-clusterRole.yaml 10 | - ./manifests/0prometheus-operator-clusterRoleBinding.yaml 11 | - ./manifests/0prometheus-operator-deployment.yaml 12 | - ./manifests/0prometheus-operator-service.yaml 13 | - ./manifests/0prometheus-operator-serviceAccount.yaml 14 | - ./manifests/0prometheus-operator-serviceMonitor.yaml 15 | - ./manifests/alertmanager-alertmanager.yaml 16 | - ./manifests/alertmanager-secret.yaml 17 | - ./manifests/alertmanager-service.yaml 18 | - ./manifests/alertmanager-serviceAccount.yaml 19 | - ./manifests/alertmanager-serviceMonitor.yaml 20 | - ./manifests/grafana-dashboardDatasources.yaml 21 | - ./manifests/grafana-dashboardDefinitions.yaml 22 | - ./manifests/grafana-dashboardSources.yaml 23 | - ./manifests/grafana-deployment.yaml 24 | - ./manifests/grafana-service.yaml 25 | - ./manifests/grafana-serviceAccount.yaml 26 | - ./manifests/grafana-serviceMonitor.yaml 27 | - ./manifests/kube-state-metrics-clusterRole.yaml 28 | - ./manifests/kube-state-metrics-clusterRoleBinding.yaml 29 | - ./manifests/kube-state-metrics-deployment.yaml 30 | - ./manifests/kube-state-metrics-role.yaml 31 | - ./manifests/kube-state-metrics-roleBinding.yaml 32 | - ./manifests/kube-state-metrics-service.yaml 33 | - ./manifests/kube-state-metrics-serviceAccount.yaml 34 | - ./manifests/kube-state-metrics-serviceMonitor.yaml 35 | - ./manifests/node-exporter-clusterRole.yaml 36 | - ./manifests/node-exporter-clusterRoleBinding.yaml 37 | - ./manifests/node-exporter-daemonset.yaml 38 | - ./manifests/node-exporter-service.yaml 39 | - ./manifests/node-exporter-serviceAccount.yaml 40 | - ./manifests/node-exporter-serviceMonitor.yaml 41 | - ./manifests/prometheus-adapter-apiService.yaml 42 | - ./manifests/prometheus-adapter-clusterRole.yaml 43 | - ./manifests/prometheus-adapter-clusterRoleBinding.yaml 44 | - ./manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml 45 | - ./manifests/prometheus-adapter-clusterRoleServerResources.yaml 46 | - ./manifests/prometheus-adapter-configMap.yaml 47 | - ./manifests/prometheus-adapter-deployment.yaml 48 | - ./manifests/prometheus-adapter-roleBindingAuthReader.yaml 49 | - ./manifests/prometheus-adapter-service.yaml 50 | - ./manifests/prometheus-adapter-serviceAccount.yaml 51 | - ./manifests/prometheus-clusterRole.yaml 52 | - ./manifests/prometheus-clusterRoleBinding.yaml 53 | - ./manifests/prometheus-prometheus.yaml 54 | - ./manifests/prometheus-roleBindingConfig.yaml 55 | - ./manifests/prometheus-roleBindingSpecificNamespaces.yaml 56 | - ./manifests/prometheus-roleConfig.yaml 57 | - ./manifests/prometheus-roleSpecificNamespaces.yaml 58 | - ./manifests/prometheus-rules.yaml 59 | - ./manifests/prometheus-service.yaml 60 | - ./manifests/prometheus-serviceAccount.yaml 61 | - ./manifests/prometheus-serviceMonitor.yaml 62 | - ./manifests/prometheus-serviceMonitorApiserver.yaml 63 | - ./manifests/prometheus-serviceMonitorCoreDNS.yaml 64 | - ./manifests/prometheus-serviceMonitorKubeControllerManager.yaml 65 | - ./manifests/prometheus-serviceMonitorKubeScheduler.yaml 66 | - ./manifests/prometheus-serviceMonitorKubelet.yaml 67 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/00namespace-namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: monitoring 5 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/0prometheus-operator-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus-operator 5 | rules: 6 | - apiGroups: 7 | - apiextensions.k8s.io 8 | resources: 9 | - customresourcedefinitions 10 | verbs: 11 | - '*' 12 | - apiGroups: 13 | - monitoring.coreos.com 14 | resources: 15 | - alertmanagers 16 | - prometheuses 17 | - prometheuses/finalizers 18 | - alertmanagers/finalizers 19 | - servicemonitors 20 | - prometheusrules 21 | verbs: 22 | - '*' 23 | - apiGroups: 24 | - apps 25 | resources: 26 | - statefulsets 27 | verbs: 28 | - '*' 29 | - apiGroups: 30 | - "" 31 | resources: 32 | - configmaps 33 | - secrets 34 | verbs: 35 | - '*' 36 | - apiGroups: 37 | - "" 38 | resources: 39 | - pods 40 | verbs: 41 | - list 42 | - delete 43 | - apiGroups: 44 | - "" 45 | resources: 46 | - services 47 | - services/finalizers 48 | - endpoints 49 | verbs: 50 | - get 51 | - create 52 | - update 53 | - delete 54 | - apiGroups: 55 | - "" 56 | resources: 57 | - nodes 58 | verbs: 59 | - list 60 | - watch 61 | - apiGroups: 62 | - "" 63 | resources: 64 | - namespaces 65 | verbs: 66 | - get 67 | - list 68 | - watch 69 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/0prometheus-operator-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: prometheus-operator 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: prometheus-operator 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-operator 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/0prometheus-operator-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | k8s-app: prometheus-operator 6 | name: prometheus-operator 7 | namespace: monitoring 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | k8s-app: prometheus-operator 13 | template: 14 | metadata: 15 | labels: 16 | k8s-app: prometheus-operator 17 | spec: 18 | containers: 19 | - args: 20 | - --kubelet-service=kube-system/kubelet 21 | - --logtostderr=true 22 | - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1 23 | - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.29.0 24 | image: quay.io/coreos/prometheus-operator:v0.29.0 25 | name: prometheus-operator 26 | ports: 27 | - containerPort: 8080 28 | name: http 29 | resources: 30 | limits: 31 | cpu: 200m 32 | memory: 200Mi 33 | requests: 34 | cpu: 100m 35 | memory: 100Mi 36 | securityContext: 37 | allowPrivilegeEscalation: false 38 | readOnlyRootFilesystem: true 39 | nodeSelector: 40 | beta.kubernetes.io/os: linux 41 | securityContext: 42 | runAsNonRoot: true 43 | runAsUser: 65534 44 | serviceAccountName: prometheus-operator 45 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/0prometheus-operator-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: prometheus-operator 6 | name: prometheus-operator 7 | namespace: monitoring 8 | spec: 9 | clusterIP: None 10 | ports: 11 | - name: http 12 | port: 8080 13 | targetPort: http 14 | selector: 15 | k8s-app: prometheus-operator 16 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/0prometheus-operator-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-operator 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/0prometheus-operator-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: prometheus-operator 6 | name: prometheus-operator 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - honorLabels: true 11 | port: http 12 | selector: 13 | matchLabels: 14 | k8s-app: prometheus-operator 15 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/alertmanager-alertmanager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: Alertmanager 3 | metadata: 4 | labels: 5 | alertmanager: main 6 | name: main 7 | namespace: monitoring 8 | spec: 9 | baseImage: quay.io/prometheus/alertmanager 10 | nodeSelector: 11 | beta.kubernetes.io/os: linux 12 | replicas: 3 13 | securityContext: 14 | fsGroup: 2000 15 | runAsNonRoot: true 16 | runAsUser: 1000 17 | serviceAccountName: alertmanager-main 18 | version: v0.16.1 19 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/alertmanager-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOgotICJuYW1lIjogInNsYWNrX2FsZXJ0MSIKICAic2xhY2tfY29uZmlncyI6CiAgLSAiYXBpX3VybCI6ICJodHRwczovL2hvb2tzLnNsYWNrLmNvbS9zZXJ2aWNlcy9USFNCM0ozSzYvQkhUSEgxR01EL2NoMWZsTXhCMERCZURBNk9CNzJzd2FRQSIKICAgICJjaGFubmVsIjogIiNhbGVydF8xIgotICJuYW1lIjogInNsYWNrX2FsZXJ0MiIKICAic2xhY2tfY29uZmlncyI6CiAgLSAiYXBpX3VybCI6ICJodHRwczovL2hvb2tzLnNsYWNrLmNvbS9zZXJ2aWNlcy9USFNCM0ozSzYvQkhUSEgxR01EL2NoMWZsTXhCMERCZURBNk9CNzJzd2FRQSIKICAgICJjaGFubmVsIjogIiNhbGVydF8yIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjFtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAic2xhY2tfYWxlcnQxIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiM20iCiAgInJvdXRlcyI6IAogIC0gIm1hdGNoIjogCiAgICAgICJhbGVydG5hbWUiOiAiV2F0Y2hkb2ciCiAgICAicmVjZWl2ZXIiOiAic2xhY2tfYWxlcnQyIgo= 4 | kind: Secret 5 | metadata: 6 | name: alertmanager-main 7 | namespace: monitoring 8 | type: Opaque 9 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/alertmanager-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | alertmanager: main 6 | name: alertmanager-main 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: web 11 | port: 9093 12 | targetPort: web 13 | selector: 14 | alertmanager: main 15 | app: alertmanager 16 | type: LoadBalancer 17 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/alertmanager-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: alertmanager-main 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/alertmanager-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: alertmanager 6 | name: alertmanager 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - interval: 30s 11 | port: web 12 | selector: 13 | matchLabels: 14 | alertmanager: main 15 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/grafana-dashboardDatasources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | prometheus.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0= 4 | kind: Secret 5 | metadata: 6 | name: grafana-datasources 7 | namespace: monitoring 8 | type: Opaque 9 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/grafana-dashboardSources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | dashboards.yaml: |- 4 | { 5 | "apiVersion": 1, 6 | "providers": [ 7 | { 8 | "folder": "", 9 | "name": "0", 10 | "options": { 11 | "path": "/grafana-dashboard-definitions/0" 12 | }, 13 | "orgId": 1, 14 | "type": "file" 15 | } 16 | ] 17 | } 18 | kind: ConfigMap 19 | metadata: 20 | name: grafana-dashboards 21 | namespace: monitoring 22 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/grafana-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: grafana 6 | name: grafana 7 | namespace: monitoring 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: grafana 13 | template: 14 | metadata: 15 | labels: 16 | app: grafana 17 | spec: 18 | containers: 19 | - image: grafana/grafana:6.0.1 20 | name: grafana 21 | ports: 22 | - containerPort: 3000 23 | name: http 24 | readinessProbe: 25 | httpGet: 26 | path: /api/health 27 | port: http 28 | resources: 29 | limits: 30 | cpu: 200m 31 | memory: 200Mi 32 | requests: 33 | cpu: 100m 34 | memory: 100Mi 35 | volumeMounts: 36 | - mountPath: /var/lib/grafana 37 | name: grafana-storage 38 | readOnly: false 39 | - mountPath: /etc/grafana/provisioning/datasources 40 | name: grafana-datasources 41 | readOnly: false 42 | - mountPath: /etc/grafana/provisioning/dashboards 43 | name: grafana-dashboards 44 | readOnly: false 45 | - mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use 46 | name: grafana-dashboard-k8s-cluster-rsrc-use 47 | readOnly: false 48 | - mountPath: /grafana-dashboard-definitions/0/k8s-node-rsrc-use 49 | name: grafana-dashboard-k8s-node-rsrc-use 50 | readOnly: false 51 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster 52 | name: grafana-dashboard-k8s-resources-cluster 53 | readOnly: false 54 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace 55 | name: grafana-dashboard-k8s-resources-namespace 56 | readOnly: false 57 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod 58 | name: grafana-dashboard-k8s-resources-pod 59 | readOnly: false 60 | - mountPath: /grafana-dashboard-definitions/0/nodes 61 | name: grafana-dashboard-nodes 62 | readOnly: false 63 | - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage 64 | name: grafana-dashboard-persistentvolumesusage 65 | readOnly: false 66 | - mountPath: /grafana-dashboard-definitions/0/pods 67 | name: grafana-dashboard-pods 68 | readOnly: false 69 | - mountPath: /grafana-dashboard-definitions/0/statefulset 70 | name: grafana-dashboard-statefulset 71 | readOnly: false 72 | nodeSelector: 73 | beta.kubernetes.io/os: linux 74 | securityContext: 75 | runAsNonRoot: true 76 | runAsUser: 65534 77 | serviceAccountName: grafana 78 | volumes: 79 | - emptyDir: {} 80 | name: grafana-storage 81 | - name: grafana-datasources 82 | secret: 83 | secretName: grafana-datasources 84 | - configMap: 85 | name: grafana-dashboards 86 | name: grafana-dashboards 87 | - configMap: 88 | name: grafana-dashboard-k8s-cluster-rsrc-use 89 | name: grafana-dashboard-k8s-cluster-rsrc-use 90 | - configMap: 91 | name: grafana-dashboard-k8s-node-rsrc-use 92 | name: grafana-dashboard-k8s-node-rsrc-use 93 | - configMap: 94 | name: grafana-dashboard-k8s-resources-cluster 95 | name: grafana-dashboard-k8s-resources-cluster 96 | - configMap: 97 | name: grafana-dashboard-k8s-resources-namespace 98 | name: grafana-dashboard-k8s-resources-namespace 99 | - configMap: 100 | name: grafana-dashboard-k8s-resources-pod 101 | name: grafana-dashboard-k8s-resources-pod 102 | - configMap: 103 | name: grafana-dashboard-nodes 104 | name: grafana-dashboard-nodes 105 | - configMap: 106 | name: grafana-dashboard-persistentvolumesusage 107 | name: grafana-dashboard-persistentvolumesusage 108 | - configMap: 109 | name: grafana-dashboard-pods 110 | name: grafana-dashboard-pods 111 | - configMap: 112 | name: grafana-dashboard-statefulset 113 | name: grafana-dashboard-statefulset 114 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/grafana-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | app: grafana 6 | name: grafana 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: http 11 | port: 3000 12 | targetPort: http 13 | selector: 14 | app: grafana 15 | type: LoadBalancer 16 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/grafana-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: grafana 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/grafana-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: grafana 5 | namespace: monitoring 6 | spec: 7 | endpoints: 8 | - interval: 15s 9 | port: http 10 | selector: 11 | matchLabels: 12 | app: grafana 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: kube-state-metrics 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - configmaps 10 | - secrets 11 | - nodes 12 | - pods 13 | - services 14 | - resourcequotas 15 | - replicationcontrollers 16 | - limitranges 17 | - persistentvolumeclaims 18 | - persistentvolumes 19 | - namespaces 20 | - endpoints 21 | verbs: 22 | - list 23 | - watch 24 | - apiGroups: 25 | - extensions 26 | resources: 27 | - daemonsets 28 | - deployments 29 | - replicasets 30 | verbs: 31 | - list 32 | - watch 33 | - apiGroups: 34 | - apps 35 | resources: 36 | - statefulsets 37 | - daemonsets 38 | - deployments 39 | - replicasets 40 | verbs: 41 | - list 42 | - watch 43 | - apiGroups: 44 | - batch 45 | resources: 46 | - cronjobs 47 | - jobs 48 | verbs: 49 | - list 50 | - watch 51 | - apiGroups: 52 | - autoscaling 53 | resources: 54 | - horizontalpodautoscalers 55 | verbs: 56 | - list 57 | - watch 58 | - apiGroups: 59 | - authentication.k8s.io 60 | resources: 61 | - tokenreviews 62 | verbs: 63 | - create 64 | - apiGroups: 65 | - authorization.k8s.io 66 | resources: 67 | - subjectaccessreviews 68 | verbs: 69 | - create 70 | - apiGroups: 71 | - policy 72 | resources: 73 | - poddisruptionbudgets 74 | verbs: 75 | - list 76 | - watch 77 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: kube-state-metrics 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: kube-state-metrics 9 | subjects: 10 | - kind: ServiceAccount 11 | name: kube-state-metrics 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: kube-state-metrics 6 | name: kube-state-metrics 7 | namespace: monitoring 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: kube-state-metrics 13 | template: 14 | metadata: 15 | labels: 16 | app: kube-state-metrics 17 | spec: 18 | containers: 19 | - args: 20 | - --logtostderr 21 | - --secure-listen-address=:8443 22 | - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 23 | - --upstream=http://127.0.0.1:8081/ 24 | image: quay.io/coreos/kube-rbac-proxy:v0.4.1 25 | name: kube-rbac-proxy-main 26 | ports: 27 | - containerPort: 8443 28 | name: https-main 29 | resources: 30 | limits: 31 | cpu: 20m 32 | memory: 40Mi 33 | requests: 34 | cpu: 10m 35 | memory: 20Mi 36 | - args: 37 | - --logtostderr 38 | - --secure-listen-address=:9443 39 | - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 40 | - --upstream=http://127.0.0.1:8082/ 41 | image: quay.io/coreos/kube-rbac-proxy:v0.4.1 42 | name: kube-rbac-proxy-self 43 | ports: 44 | - containerPort: 9443 45 | name: https-self 46 | resources: 47 | limits: 48 | cpu: 20m 49 | memory: 40Mi 50 | requests: 51 | cpu: 10m 52 | memory: 20Mi 53 | - args: 54 | - --host=127.0.0.1 55 | - --port=8081 56 | - --telemetry-host=127.0.0.1 57 | - --telemetry-port=8082 58 | image: quay.io/coreos/kube-state-metrics:v1.5.0 59 | name: kube-state-metrics 60 | resources: 61 | limits: 62 | cpu: 100m 63 | memory: 150Mi 64 | requests: 65 | cpu: 100m 66 | memory: 150Mi 67 | - command: 68 | - /pod_nanny 69 | - --container=kube-state-metrics 70 | - --cpu=100m 71 | - --extra-cpu=2m 72 | - --memory=150Mi 73 | - --extra-memory=30Mi 74 | - --threshold=5 75 | - --deployment=kube-state-metrics 76 | env: 77 | - name: MY_POD_NAME 78 | valueFrom: 79 | fieldRef: 80 | apiVersion: v1 81 | fieldPath: metadata.name 82 | - name: MY_POD_NAMESPACE 83 | valueFrom: 84 | fieldRef: 85 | apiVersion: v1 86 | fieldPath: metadata.namespace 87 | image: k8s.gcr.io/addon-resizer:1.8.4 88 | name: addon-resizer 89 | resources: 90 | limits: 91 | cpu: 50m 92 | memory: 30Mi 93 | requests: 94 | cpu: 10m 95 | memory: 30Mi 96 | nodeSelector: 97 | beta.kubernetes.io/os: linux 98 | securityContext: 99 | runAsNonRoot: true 100 | runAsUser: 65534 101 | serviceAccountName: kube-state-metrics 102 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: monitoring 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - pods 11 | verbs: 12 | - get 13 | - apiGroups: 14 | - extensions 15 | resourceNames: 16 | - kube-state-metrics 17 | resources: 18 | - deployments 19 | verbs: 20 | - get 21 | - update 22 | - apiGroups: 23 | - apps 24 | resourceNames: 25 | - kube-state-metrics 26 | resources: 27 | - deployments 28 | verbs: 29 | - get 30 | - update 31 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-roleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: monitoring 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: kube-state-metrics 10 | subjects: 11 | - kind: ServiceAccount 12 | name: kube-state-metrics 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: kube-state-metrics 6 | name: kube-state-metrics 7 | namespace: monitoring 8 | spec: 9 | clusterIP: None 10 | ports: 11 | - name: https-main 12 | port: 8443 13 | targetPort: https-main 14 | - name: https-self 15 | port: 9443 16 | targetPort: https-self 17 | selector: 18 | app: kube-state-metrics 19 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/kube-state-metrics-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: kube-state-metrics 6 | name: kube-state-metrics 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | honorLabels: true 12 | interval: 30s 13 | port: https-main 14 | scheme: https 15 | scrapeTimeout: 30s 16 | tlsConfig: 17 | insecureSkipVerify: true 18 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 19 | interval: 30s 20 | port: https-self 21 | scheme: https 22 | tlsConfig: 23 | insecureSkipVerify: true 24 | jobLabel: k8s-app 25 | selector: 26 | matchLabels: 27 | k8s-app: kube-state-metrics 28 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/node-exporter-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: node-exporter 5 | rules: 6 | - apiGroups: 7 | - authentication.k8s.io 8 | resources: 9 | - tokenreviews 10 | verbs: 11 | - create 12 | - apiGroups: 13 | - authorization.k8s.io 14 | resources: 15 | - subjectaccessreviews 16 | verbs: 17 | - create 18 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/node-exporter-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: node-exporter 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: node-exporter 9 | subjects: 10 | - kind: ServiceAccount 11 | name: node-exporter 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/node-exporter-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: DaemonSet 3 | metadata: 4 | labels: 5 | app: node-exporter 6 | name: node-exporter 7 | namespace: monitoring 8 | spec: 9 | selector: 10 | matchLabels: 11 | app: node-exporter 12 | template: 13 | metadata: 14 | labels: 15 | app: node-exporter 16 | spec: 17 | containers: 18 | - args: 19 | - --web.listen-address=127.0.0.1:9100 20 | - --path.procfs=/host/proc 21 | - --path.sysfs=/host/sys 22 | - --path.rootfs=/host/root 23 | - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/) 24 | - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ 25 | image: quay.io/prometheus/node-exporter:v0.17.0 26 | name: node-exporter 27 | resources: 28 | limits: 29 | cpu: 250m 30 | memory: 180Mi 31 | requests: 32 | cpu: 102m 33 | memory: 180Mi 34 | volumeMounts: 35 | - mountPath: /host/proc 36 | name: proc 37 | readOnly: false 38 | - mountPath: /host/sys 39 | name: sys 40 | readOnly: false 41 | - mountPath: /host/root 42 | mountPropagation: HostToContainer 43 | name: root 44 | readOnly: true 45 | - args: 46 | - --logtostderr 47 | - --secure-listen-address=$(IP):9100 48 | - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 49 | - --upstream=http://127.0.0.1:9100/ 50 | env: 51 | - name: IP 52 | valueFrom: 53 | fieldRef: 54 | fieldPath: status.podIP 55 | image: quay.io/coreos/kube-rbac-proxy:v0.4.1 56 | name: kube-rbac-proxy 57 | ports: 58 | - containerPort: 9100 59 | hostPort: 9100 60 | name: https 61 | resources: 62 | limits: 63 | cpu: 20m 64 | memory: 40Mi 65 | requests: 66 | cpu: 10m 67 | memory: 20Mi 68 | hostNetwork: true 69 | hostPID: true 70 | nodeSelector: 71 | beta.kubernetes.io/os: linux 72 | securityContext: 73 | runAsNonRoot: true 74 | runAsUser: 65534 75 | serviceAccountName: node-exporter 76 | tolerations: 77 | - effect: NoExecute 78 | operator: Exists 79 | - effect: NoSchedule 80 | operator: Exists 81 | volumes: 82 | - hostPath: 83 | path: /proc 84 | name: proc 85 | - hostPath: 86 | path: /sys 87 | name: sys 88 | - hostPath: 89 | path: / 90 | name: root 91 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/node-exporter-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: node-exporter 6 | name: node-exporter 7 | namespace: monitoring 8 | spec: 9 | clusterIP: None 10 | ports: 11 | - name: https 12 | port: 9100 13 | targetPort: https 14 | selector: 15 | app: node-exporter 16 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/node-exporter-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: node-exporter 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/node-exporter-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: node-exporter 6 | name: node-exporter 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | interval: 30s 12 | port: https 13 | scheme: https 14 | tlsConfig: 15 | insecureSkipVerify: true 16 | jobLabel: k8s-app 17 | selector: 18 | matchLabels: 19 | k8s-app: node-exporter 20 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-apiService.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiregistration.k8s.io/v1 2 | kind: APIService 3 | metadata: 4 | name: v1beta1.metrics.k8s.io 5 | spec: 6 | group: metrics.k8s.io 7 | groupPriorityMinimum: 100 8 | insecureSkipTLSVerify: true 9 | service: 10 | name: prometheus-adapter 11 | namespace: monitoring 12 | version: v1beta1 13 | versionPriority: 100 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus-adapter 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - nodes 10 | - namespaces 11 | - pods 12 | - services 13 | verbs: 14 | - get 15 | - list 16 | - watch 17 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: prometheus-adapter 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: prometheus-adapter 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-adapter 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: resource-metrics:system:auth-delegator 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: system:auth-delegator 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-adapter 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-clusterRoleServerResources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: resource-metrics-server-resources 5 | rules: 6 | - apiGroups: 7 | - metrics.k8s.io 8 | resources: 9 | - '*' 10 | verbs: 11 | - '*' 12 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-configMap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | config.yaml: | 4 | resourceRules: 5 | cpu: 6 | containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>) 7 | nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) 8 | resources: 9 | overrides: 10 | node: 11 | resource: node 12 | namespace: 13 | resource: namespace 14 | pod_name: 15 | resource: pod 16 | containerLabel: container_name 17 | memory: 18 | containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>) 19 | nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>) 20 | resources: 21 | overrides: 22 | node: 23 | resource: node 24 | namespace: 25 | resource: namespace 26 | pod_name: 27 | resource: pod 28 | containerLabel: container_name 29 | window: 1m 30 | kind: ConfigMap 31 | metadata: 32 | name: adapter-config 33 | namespace: monitoring 34 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | name: prometheus-adapter 5 | namespace: monitoring 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | name: prometheus-adapter 11 | strategy: 12 | rollingUpdate: 13 | maxSurge: 1 14 | maxUnavailable: 0 15 | template: 16 | metadata: 17 | labels: 18 | name: prometheus-adapter 19 | spec: 20 | containers: 21 | - args: 22 | - --cert-dir=/var/run/serving-cert 23 | - --config=/etc/adapter/config.yaml 24 | - --logtostderr=true 25 | - --metrics-relist-interval=1m 26 | - --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/ 27 | - --secure-port=6443 28 | image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.4.1 29 | name: prometheus-adapter 30 | ports: 31 | - containerPort: 6443 32 | volumeMounts: 33 | - mountPath: /tmp 34 | name: tmpfs 35 | readOnly: false 36 | - mountPath: /var/run/serving-cert 37 | name: volume-serving-cert 38 | readOnly: false 39 | - mountPath: /etc/adapter 40 | name: config 41 | readOnly: false 42 | nodeSelector: 43 | beta.kubernetes.io/os: linux 44 | serviceAccountName: prometheus-adapter 45 | volumes: 46 | - emptyDir: {} 47 | name: tmpfs 48 | - emptyDir: {} 49 | name: volume-serving-cert 50 | - configMap: 51 | name: adapter-config 52 | name: config 53 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-roleBindingAuthReader.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: resource-metrics-auth-reader 5 | namespace: kube-system 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: extension-apiserver-authentication-reader 10 | subjects: 11 | - kind: ServiceAccount 12 | name: prometheus-adapter 13 | namespace: monitoring 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | name: prometheus-adapter 6 | name: prometheus-adapter 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: https 11 | port: 443 12 | targetPort: 6443 13 | selector: 14 | name: prometheus-adapter 15 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-adapter-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-adapter 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus-k8s 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - nodes/metrics 10 | verbs: 11 | - get 12 | - nonResourceURLs: 13 | - /metrics 14 | verbs: 15 | - get 16 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: prometheus-k8s 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: prometheus-k8s 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-k8s 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: Prometheus 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | name: k8s 7 | namespace: monitoring 8 | spec: 9 | alerting: 10 | alertmanagers: 11 | - name: alertmanager-main 12 | namespace: monitoring 13 | port: web 14 | baseImage: quay.io/prometheus/prometheus 15 | nodeSelector: 16 | beta.kubernetes.io/os: linux 17 | replicas: 1 18 | resources: 19 | requests: 20 | memory: 400Mi 21 | ruleSelector: 22 | matchLabels: 23 | prometheus: k8s 24 | role: alert-rules 25 | securityContext: 26 | fsGroup: 2000 27 | runAsNonRoot: true 28 | runAsUser: 1000 29 | serviceAccountName: prometheus-k8s 30 | serviceMonitorNamespaceSelector: {} 31 | serviceMonitorSelector: {} 32 | version: v2.7.2 33 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-roleBindingConfig.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: prometheus-k8s-config 5 | namespace: monitoring 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: prometheus-k8s-config 10 | subjects: 11 | - kind: ServiceAccount 12 | name: prometheus-k8s 13 | namespace: monitoring 14 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-roleBindingSpecificNamespaces.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | items: 3 | - apiVersion: rbac.authorization.k8s.io/v1 4 | kind: RoleBinding 5 | metadata: 6 | name: prometheus-k8s 7 | namespace: default 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: Role 11 | name: prometheus-k8s 12 | subjects: 13 | - kind: ServiceAccount 14 | name: prometheus-k8s 15 | namespace: monitoring 16 | - apiVersion: rbac.authorization.k8s.io/v1 17 | kind: RoleBinding 18 | metadata: 19 | name: prometheus-k8s 20 | namespace: kube-system 21 | roleRef: 22 | apiGroup: rbac.authorization.k8s.io 23 | kind: Role 24 | name: prometheus-k8s 25 | subjects: 26 | - kind: ServiceAccount 27 | name: prometheus-k8s 28 | namespace: monitoring 29 | - apiVersion: rbac.authorization.k8s.io/v1 30 | kind: RoleBinding 31 | metadata: 32 | name: prometheus-k8s 33 | namespace: monitoring 34 | roleRef: 35 | apiGroup: rbac.authorization.k8s.io 36 | kind: Role 37 | name: prometheus-k8s 38 | subjects: 39 | - kind: ServiceAccount 40 | name: prometheus-k8s 41 | namespace: monitoring 42 | kind: RoleBindingList 43 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-roleConfig.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | name: prometheus-k8s-config 5 | namespace: monitoring 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | verbs: 12 | - get 13 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-roleSpecificNamespaces.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | items: 3 | - apiVersion: rbac.authorization.k8s.io/v1 4 | kind: Role 5 | metadata: 6 | name: prometheus-k8s 7 | namespace: default 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - services 13 | - endpoints 14 | - pods 15 | verbs: 16 | - get 17 | - list 18 | - watch 19 | - apiVersion: rbac.authorization.k8s.io/v1 20 | kind: Role 21 | metadata: 22 | name: prometheus-k8s 23 | namespace: kube-system 24 | rules: 25 | - apiGroups: 26 | - "" 27 | resources: 28 | - services 29 | - endpoints 30 | - pods 31 | verbs: 32 | - get 33 | - list 34 | - watch 35 | - apiVersion: rbac.authorization.k8s.io/v1 36 | kind: Role 37 | metadata: 38 | name: prometheus-k8s 39 | namespace: monitoring 40 | rules: 41 | - apiGroups: 42 | - "" 43 | resources: 44 | - services 45 | - endpoints 46 | - pods 47 | verbs: 48 | - get 49 | - list 50 | - watch 51 | kind: RoleList 52 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | name: prometheus-k8s 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: web 11 | port: 9090 12 | targetPort: web 13 | selector: 14 | app: prometheus 15 | prometheus: k8s 16 | type: LoadBalancer 17 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-k8s 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: prometheus 6 | name: prometheus 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - interval: 30s 11 | port: web 12 | selector: 13 | matchLabels: 14 | prometheus: k8s 15 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-serviceMonitorApiserver.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: apiserver 6 | name: kube-apiserver 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | interval: 30s 12 | metricRelabelings: 13 | - action: drop 14 | regex: etcd_(debugging|disk|request|server).* 15 | sourceLabels: 16 | - __name__ 17 | - action: drop 18 | regex: apiserver_admission_controller_admission_latencies_seconds_.* 19 | sourceLabels: 20 | - __name__ 21 | - action: drop 22 | regex: apiserver_admission_step_admission_latencies_seconds_.* 23 | sourceLabels: 24 | - __name__ 25 | port: https 26 | scheme: https 27 | tlsConfig: 28 | caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 29 | serverName: kubernetes 30 | jobLabel: component 31 | namespaceSelector: 32 | matchNames: 33 | - default 34 | selector: 35 | matchLabels: 36 | component: apiserver 37 | provider: kubernetes 38 | -------------------------------------------------------------------------------- /install/kube-prometheus/manifests/prometheus-serviceMonitorKubelet.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: kubelet 6 | name: kubelet 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | honorLabels: true 12 | interval: 30s 13 | port: https-metrics 14 | scheme: https 15 | tlsConfig: 16 | insecureSkipVerify: true 17 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 18 | honorLabels: true 19 | interval: 30s 20 | metricRelabelings: 21 | - action: drop 22 | regex: container_([a-z_]+); 23 | sourceLabels: 24 | - __name__ 25 | - image 26 | - action: drop 27 | regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) 28 | sourceLabels: 29 | - __name__ 30 | path: /metrics/cadvisor 31 | port: https-metrics 32 | scheme: https 33 | tlsConfig: 34 | insecureSkipVerify: true 35 | jobLabel: k8s-app 36 | namespaceSelector: 37 | matchNames: 38 | - kube-system 39 | selector: 40 | matchLabels: 41 | k8s-app: kubelet 42 | -------------------------------------------------------------------------------- /install/kube-prometheus/sync-to-internal-registry.jsonnet: -------------------------------------------------------------------------------- 1 | local kp = import 'kube-prometheus/kube-prometheus.libsonnet'; 2 | local l = import 'kube-prometheus/lib/lib.libsonnet'; 3 | local config = kp._config; 4 | 5 | local makeImages(config) = [ 6 | { 7 | name: config.imageRepos[image], 8 | tag: config.versions[image], 9 | } 10 | for image in std.objectFields(config.imageRepos) 11 | ]; 12 | 13 | local upstreamImage(image) = '%s:%s' % [image.name, image.tag]; 14 | local downstreamImage(registry, image) = '%s/%s:%s' % [registry, l.imageName(image.name), image.tag]; 15 | 16 | local pullPush(image, newRegistry) = [ 17 | 'docker pull %s' % upstreamImage(image), 18 | 'docker tag %s %s' % [upstreamImage(image), downstreamImage(newRegistry, image)], 19 | 'docker push %s' % downstreamImage(newRegistry, image), 20 | ]; 21 | 22 | local images = makeImages(config); 23 | 24 | local output(repository) = std.flattenArrays([ 25 | pullPush(image, repository) 26 | for image in images 27 | ]); 28 | 29 | function(repository='my-registry.com/repository') 30 | std.join('\n', output(repository)) 31 | -------------------------------------------------------------------------------- /install/kube-prometheus/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | # only exit with zero if all commands of the pipeline exit successfully 4 | set -o pipefail 5 | 6 | 7 | for i in examples/jsonnet-snippets/*.jsonnet; do 8 | [ -f "$i" ] || break 9 | echo "Testing: ${i}" 10 | echo "" 11 | fileContent=$(<"$i") 12 | snippet="local kp = $fileContent; 13 | 14 | $( "test.jsonnet" 16 | echo "\`\`\`" 17 | echo "${snippet}" 18 | echo "\`\`\`" 19 | echo "" 20 | jsonnet -J vendor "test.jsonnet" > /dev/null 21 | rm -rf "test.jsonnet" 22 | done 23 | 24 | for i in examples/*.jsonnet; do 25 | [ -f "$i" ] || break 26 | echo "Testing: ${i}" 27 | echo "" 28 | echo "\`\`\`" 29 | cat "${i}" 30 | echo "\`\`\`" 31 | echo "" 32 | jsonnet -J vendor "${i}" > /dev/null 33 | done 34 | -------------------------------------------------------------------------------- /install/kube-prometheus/tests/e2e/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The prometheus-operator Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package e2e 16 | 17 | import ( 18 | "log" 19 | "os" 20 | "testing" 21 | "time" 22 | 23 | "github.com/pkg/errors" 24 | "k8s.io/apimachinery/pkg/util/wait" 25 | "k8s.io/client-go/kubernetes" 26 | "k8s.io/client-go/tools/clientcmd" 27 | ) 28 | 29 | var promClient *prometheusClient 30 | 31 | func TestMain(m *testing.M) { 32 | os.Exit(testMain(m)) 33 | } 34 | 35 | // testMain circumvents the issue, that one can not call `defer` in TestMain, as 36 | // `os.Exit` does not honor `defer` statements. For more details see: 37 | // http://blog.englund.nu/golang,/testing/2017/03/12/using-defer-in-testmain.html 38 | func testMain(m *testing.M) int { 39 | kubeConfigPath, ok := os.LookupEnv("KUBECONFIG") 40 | if !ok { 41 | log.Fatal("failed to retrieve KUBECONFIG env var") 42 | } 43 | 44 | config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) 45 | if err != nil { 46 | log.Fatal(err) 47 | } 48 | 49 | kubeClient, err := kubernetes.NewForConfig(config) 50 | if err != nil { 51 | log.Fatal(errors.Wrap(err, "creating kubeClient failed")) 52 | } 53 | 54 | promClient = newPrometheusClient(kubeClient) 55 | 56 | return m.Run() 57 | } 58 | 59 | func TestQueryPrometheus(t *testing.T) { 60 | t.Parallel() 61 | queries := []struct { 62 | query string 63 | expectN int 64 | }{ 65 | { 66 | // query: `up{job="node-exporter"} == 1`, 67 | // expectN: 1, 68 | // }, { 69 | // query: `up{job="kubelet"} == 1`, 70 | // expectN: 1, 71 | // }, { 72 | query: `up{job="apiserver"} == 1`, 73 | expectN: 1, 74 | // }, { 75 | // query: `up{job="kube-state-metrics"} == 1`, 76 | // expectN: 1, 77 | }, { 78 | query: `up{job="prometheus-k8s"} == 1`, 79 | expectN: 1, 80 | }, { 81 | query: `up{job="prometheus-operator"} == 1`, 82 | expectN: 1, 83 | }, { 84 | query: `up{job="alertmanager-main"} == 1`, 85 | expectN: 2, 86 | }, 87 | } 88 | 89 | // Wait for pod to respond at queries at all. Then start verifying their results. 90 | err := wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) { 91 | _, err := promClient.query("up") 92 | return err == nil, nil 93 | }) 94 | if err != nil { 95 | t.Fatal(errors.Wrap(err, "wait for prometheus-k8s")) 96 | } 97 | 98 | err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) { 99 | defer t.Log("---------------------------\n") 100 | 101 | for _, q := range queries { 102 | n, err := promClient.query(q.query) 103 | if err != nil { 104 | return false, err 105 | } 106 | if n < q.expectN { 107 | // Don't return an error as targets may only become visible after a while. 108 | t.Logf("expected at least %d results for %q but got %d", q.expectN, q.query, n) 109 | return false, nil 110 | } 111 | t.Logf("query %q succeeded", q.query) 112 | } 113 | return true, nil 114 | }) 115 | if err != nil { 116 | t.Fatal(err) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /install/kube-prometheus/tests/e2e/prometheus_client.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The prometheus-operator Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package e2e 16 | 17 | import ( 18 | "k8s.io/client-go/kubernetes" 19 | 20 | "github.com/Jeffail/gabs" 21 | ) 22 | 23 | type prometheusClient struct { 24 | kubeClient kubernetes.Interface 25 | } 26 | 27 | func newPrometheusClient(kubeClient kubernetes.Interface) *prometheusClient { 28 | return &prometheusClient{kubeClient} 29 | } 30 | 31 | // Query makes a request against the Prometheus /api/v1/query endpoint. 32 | func (c *prometheusClient) query(query string) (int, error) { 33 | req := c.kubeClient.CoreV1().RESTClient().Get(). 34 | Namespace("monitoring"). 35 | Resource("pods"). 36 | SubResource("proxy"). 37 | Name("prometheus-k8s-0:9090"). 38 | Suffix("/api/v1/query").Param("query", query) 39 | 40 | b, err := req.DoRaw() 41 | if err != nil { 42 | return 0, err 43 | } 44 | 45 | res, err := gabs.ParseJSON(b) 46 | if err != nil { 47 | return 0, err 48 | } 49 | 50 | n, err := res.ArrayCountP("data.result") 51 | return n, err 52 | } 53 | -------------------------------------------------------------------------------- /install/kube-prometheus/tests/e2e/travis-e2e.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # exit immediately when a command fails 3 | set -e 4 | # only exit with zero if all commands of the pipeline exit successfully 5 | set -o pipefail 6 | # error on unset variables 7 | set -u 8 | # print each command before executing it 9 | set -x 10 | 11 | SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}") 12 | 13 | "${SCRIPT_DIR}"/../../../../scripts/create-minikube.sh 14 | 15 | ( 16 | cd "${SCRIPT_DIR}"/../.. || exit 17 | kubectl apply -f manifests 18 | KUBECONFIG=~/.kube/config make test-e2e 19 | ) 20 | 21 | "${SCRIPT_DIR}"/../../../../scripts/delete-minikube.sh 22 | -------------------------------------------------------------------------------- /install/step1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir ~/bin 4 | 5 | echo "Get binary of kubectl, eksctl, awm-oam-authenticator ..." 6 | wget https://amazon-eks.s3-us-west-2.amazonaws.com/1.11.5/2018-12-06/bin/linux/amd64/kubectl && chmod +x kubectl && mv kubectl ~/bin/ 7 | 8 | wget https://amazon-eks.s3-us-west-2.amazonaws.com/1.11.5/2018-12-06/bin/linux/amd64/aws-iam-authenticator && chmod +x aws-iam-authenticator && mv aws-iam-authenticator ~/bin/ 9 | 10 | curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/latest_release/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp 11 | 12 | sudo mv /tmp/eksctl /usr/local/bin 13 | 14 | echo 'Done.' 15 | -------------------------------------------------------------------------------- /install/step2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cluster_name=workshop${RANDOM} 3 | 4 | # Create eks cluster using eksctl 5 | echo "Creating eks cluster and node group with two t3.large instances ..." 6 | sed -i "s/workshop/${cluster_name}/" eks_cluster.yml 7 | eksctl create cluster -f eks_cluster.yml 8 | 9 | # Get KUBECONFIG then kubectl can work 10 | aws eks update-kubeconfig --name ${cluster_name} 11 | 12 | # Test if kubernate cluster works good 13 | kubectl get all 14 | 15 | echo 'Done setting EKS.' 16 | -------------------------------------------------------------------------------- /install/step3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get helm into instance 4 | echo 'Installing helm to this instance and into Kubernetes ...' 5 | curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get | bash 6 | 7 | # Install tiller for helm and set tiller role to cluster-admin 8 | kubectl apply -f https://gist.githubusercontent.com/pahud/14e6cc08f3a7e65cd9b0e8bed454a901/raw/954d71614dda911c4f7960f0d18687fa1ea093fa/helm-sa-rolebinding.yaml 9 | 10 | # Initial helm into kubernetes 11 | helm init --service-account tiller --upgrade 12 | 13 | echo 'Done helm installation.' 14 | -------------------------------------------------------------------------------- /install/step4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install kube-prometheus to Kubernetes cluster 4 | echo "Installing kube-prometheus ..." 5 | kubectl apply -f kube-prometheus/manifests/ 6 | sleep 5 7 | kubectl apply -f kube-prometheus/manifests/ 8 | 9 | # Install pushgateway via Helm 10 | echo "Installing push-gateway ..." 11 | helm install --name prom-pushgateway stable/prometheus-pushgateway --set serviceMonitor.enabled=true --namespace=monitoring 12 | 13 | # Expose pushgateway to LBS 14 | kubectl expose deployment -n monitoring prom-pushgateway-prometheus-pushgateway --port=9091 --target-port=9091 --name=my-pushgateway --type=LoadBalancer 15 | 16 | # Check all pod state in Running 17 | echo 'kubectl get pod -n=monitoring' 18 | kubectl get pod -n=monitoring 19 | 20 | echo 'Done kube-prometheus installation.' 21 | -------------------------------------------------------------------------------- /install/uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Remove kube-prometheus 4 | kubectl delete -f kube-prometheus/manifests/ 5 | 6 | # Remove pushgateway 7 | helm delete prom-pushgateway 8 | 9 | # Remove EKS cluster 10 | eksctl delete cluster -f eks_cluster.yml 11 | -------------------------------------------------------------------------------- /prometheus-operator/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Taipei-HUG/Prometheus-workshop/ae4c2c8e7666c408532c7618e537742aee8c88f6/prometheus-operator/.DS_Store -------------------------------------------------------------------------------- /prometheus-operator/README.md: -------------------------------------------------------------------------------- 1 | # Prometheus Practice -------------------------------------------------------------------------------- /prometheus-operator/hello_app_service_monitor/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Taipei-HUG/Prometheus-workshop/ae4c2c8e7666c408532c7618e537742aee8c88f6/prometheus-operator/hello_app_service_monitor/.DS_Store -------------------------------------------------------------------------------- /prometheus-operator/hello_app_service_monitor/manifests/prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: Prometheus 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | name: k8s 7 | namespace: monitoring 8 | spec: 9 | alerting: 10 | alertmanagers: 11 | - name: alertmanager-main 12 | namespace: monitoring 13 | port: web 14 | baseImage: quay.io/prometheus/prometheus 15 | nodeSelector: 16 | beta.kubernetes.io/os: linux 17 | replicas: 1 18 | resources: 19 | requests: 20 | memory: 400Mi 21 | ruleSelector: 22 | matchLabels: 23 | prometheus: k8s 24 | role: alert-rules 25 | securityContext: 26 | fsGroup: 2000 27 | runAsNonRoot: true 28 | runAsUser: 1000 29 | serviceAccountName: prometheus-k8s 30 | serviceMonitorNamespaceSelector: {} 31 | serviceMonitorSelector: {} 32 | version: v2.7.2 33 | -------------------------------------------------------------------------------- /prometheus-operator/hello_app_service_monitor/manifests/serviceMonitor-hello.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | app: hello 6 | name: hello 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | interval: 15s 12 | targetPort: 8080 13 | jobLabel: app 14 | namespaceSelector: 15 | matchNames: 16 | - default 17 | selector: 18 | matchLabels: 19 | app: hello 20 | -------------------------------------------------------------------------------- /prometheus-operator/kubecost/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | kubectl create clusterrolebinding cluster-self-admin-binding --clusterrole=cluster-admin --serviceaccount=kube-system:default 3 | helm repo add kubecost https://kubecost.github.io/cost-analyzer/ 4 | helm install kubecost/cost-analyzer --namespace kubecost --name kubecost --set kubecostToken="d3JhaXRoYWxsZW5AZ21haWwuY29txm343yadf98" 5 | kubectl patch service kubecost-cost-analyzer --namespace kubecost -p '{"spec":{"type":"LoadBalancer"}}' 6 | -------------------------------------------------------------------------------- /prometheus-overview/README.md: -------------------------------------------------------------------------------- 1 | # Prometheus Overview 2 | 3 | Basic setting to initial prometheus by [prometheus operator][1] and [kube-prometheus][2] 4 | 5 | 6 | [1]: https://github.com/coreos/prometheus-operator 7 | [2]: https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus 8 | -------------------------------------------------------------------------------- /service-discovery/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Taipei-HUG/Prometheus-workshop/ae4c2c8e7666c408532c7618e537742aee8c88f6/service-discovery/.DS_Store -------------------------------------------------------------------------------- /service-discovery/README.md: -------------------------------------------------------------------------------- 1 | # Service Discovery 2 | -------------------------------------------------------------------------------- /service-discovery/generate_yaml.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | kubectl create secret generic prometheus-additional-scrape-configs -n monitoring --from-file=scrape_configs/additional-scrape-configs.yaml --dry-run -o yaml > manifests/prometheus-additional-scrape-configs.yaml -------------------------------------------------------------------------------- /service-discovery/manifests/additional-scrape-configs.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | additional-scrape-configs.yaml: LSBqb2JfbmFtZTogY29yZWRucwogIHNjcmFwZV9pbnRlcnZhbDogMTVzCiAgc2NyYXBlX3RpbWVvdXQ6IDEwcwogIG1ldHJpY3NfcGF0aDogL21ldHJpY3MKICBzY2hlbWU6IGh0dHAKICBrdWJlcm5ldGVzX3NkX2NvbmZpZ3M6CiAgLSByb2xlOiBlbmRwb2ludHMKICAgIG5hbWVzcGFjZXM6CiAgICAgIG5hbWVzOgogICAgICAtIGt1YmUtc3lzdGVtCiAgYmVhcmVyX3Rva2VuX2ZpbGU6IC92YXIvcnVuL3NlY3JldHMva3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC90b2tlbgogIHJlbGFiZWxfY29uZmlnczoKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19zZXJ2aWNlX2xhYmVsX2s4c19hcHBdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiBrdWJlLWRucwogICAgcmVwbGFjZW1lbnQ6ICQxCiAgICBhY3Rpb246IGtlZXAKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19wb2RfY29udGFpbmVyX3BvcnRfbnVtYmVyXQogICAgc2VwYXJhdG9yOiA7CiAgICByZWdleDogIjkxNTMiCiAgICByZXBsYWNlbWVudDogJDEKICAgIGFjdGlvbjoga2VlcAogIC0gc291cmNlX2xhYmVsczogW19fbWV0YV9rdWJlcm5ldGVzX2VuZHBvaW50X2FkZHJlc3NfdGFyZ2V0X2tpbmQsIF9fbWV0YV9rdWJlcm5ldGVzX2VuZHBvaW50X2FkZHJlc3NfdGFyZ2V0X25hbWVdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiBOb2RlOyguKikKICAgIHRhcmdldF9sYWJlbDogbm9kZQogICAgcmVwbGFjZW1lbnQ6ICR7MX0KICAgIGFjdGlvbjogcmVwbGFjZQogIC0gc291cmNlX2xhYmVsczogW19fbWV0YV9rdWJlcm5ldGVzX2VuZHBvaW50X2FkZHJlc3NfdGFyZ2V0X2tpbmQsIF9fbWV0YV9rdWJlcm5ldGVzX2VuZHBvaW50X2FkZHJlc3NfdGFyZ2V0X25hbWVdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiBQb2Q7KC4qKQogICAgdGFyZ2V0X2xhYmVsOiBwb2QKICAgIHJlcGxhY2VtZW50OiAkezF9CiAgICBhY3Rpb246IHJlcGxhY2UKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19uYW1lc3BhY2VdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLiopCiAgICB0YXJnZXRfbGFiZWw6IG5hbWVzcGFjZQogICAgcmVwbGFjZW1lbnQ6ICQxCiAgICBhY3Rpb246IHJlcGxhY2UKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19zZXJ2aWNlX25hbWVdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLiopCiAgICB0YXJnZXRfbGFiZWw6IHNlcnZpY2UKICAgIHJlcGxhY2VtZW50OiAkMQogICAgYWN0aW9uOiByZXBsYWNlCiAgLSBzb3VyY2VfbGFiZWxzOiBbX19tZXRhX2t1YmVybmV0ZXNfcG9kX25hbWVdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLiopCiAgICB0YXJnZXRfbGFiZWw6IHBvZAogICAgcmVwbGFjZW1lbnQ6ICQxCiAgICBhY3Rpb246IHJlcGxhY2UKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19zZXJ2aWNlX25hbWVdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLiopCiAgICB0YXJnZXRfbGFiZWw6IHNlcnZpY2UKICAgIHJlcGxhY2VtZW50OiAkMQogICAgYWN0aW9uOiByZXBsYWNlCiAgLSBzb3VyY2VfbGFiZWxzOiBbX19tZXRhX2t1YmVybmV0ZXNfcG9kX25hbWVdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLiopCiAgICB0YXJnZXRfbGFiZWw6IHBvZAogICAgcmVwbGFjZW1lbnQ6ICQxCiAgICBhY3Rpb246IHJlcGxhY2UKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19zZXJ2aWNlX25hbWVdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLiopCiAgICB0YXJnZXRfbGFiZWw6IGpvYgogICAgcmVwbGFjZW1lbnQ6ICR7MX0KICAgIGFjdGlvbjogcmVwbGFjZQogIC0gc291cmNlX2xhYmVsczogW19fbWV0YV9rdWJlcm5ldGVzX3NlcnZpY2VfbGFiZWxfazhzX2FwcF0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6ICguKykKICAgIHRhcmdldF9sYWJlbDogam9iCiAgICByZXBsYWNlbWVudDogJHsxfQogICAgYWN0aW9uOiByZXBsYWNlCiAgLSBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLiopCiAgICB0YXJnZXRfbGFiZWw6IGVuZHBvaW50CiAgICByZXBsYWNlbWVudDogIjkxNTMiCiAgICBhY3Rpb246IHJlcGxhY2U= 4 | kind: Secret 5 | metadata: 6 | creationTimestamp: null 7 | name: prometheus-additional-scrape-configs 8 | namespace: monitoring 9 | -------------------------------------------------------------------------------- /service-discovery/manifests/prometheus-additional-scrape-configs.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | additional-scrape-configs.yaml: LSBqb2JfbmFtZTogaGVsbG8KICBzY3JhcGVfaW50ZXJ2YWw6IDE1cwogIHNjcmFwZV90aW1lb3V0OiAxMHMKICBtZXRyaWNzX3BhdGg6IC9tZXRyaWNzCiAgc2NoZW1lOiBodHRwCiAga3ViZXJuZXRlc19zZF9jb25maWdzOgogIC0gcm9sZTogZW5kcG9pbnRzCiAgICBuYW1lc3BhY2VzOgogICAgICBuYW1lczoKICAgICAgLSBkZWZhdWx0CiAgYmVhcmVyX3Rva2VuX2ZpbGU6IC92YXIvcnVuL3NlY3JldHMva3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC90b2tlbgogIHJlbGFiZWxfY29uZmlnczoKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19zZXJ2aWNlX2xhYmVsX2FwcF0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6IGhlbGxvCiAgICByZXBsYWNlbWVudDogJDEKICAgIGFjdGlvbjoga2VlcAogIC0gc291cmNlX2xhYmVsczogW19fbWV0YV9rdWJlcm5ldGVzX3BvZF9jb250YWluZXJfcG9ydF9udW1iZXJdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAiODA4MCIKICAgIHJlcGxhY2VtZW50OiAkMQogICAgYWN0aW9uOiBrZWVwCiAgLSBzb3VyY2VfbGFiZWxzOiBbX19tZXRhX2t1YmVybmV0ZXNfZW5kcG9pbnRfYWRkcmVzc190YXJnZXRfa2luZCwgX19tZXRhX2t1YmVybmV0ZXNfZW5kcG9pbnRfYWRkcmVzc190YXJnZXRfbmFtZV0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6IE5vZGU7KC4qKQogICAgdGFyZ2V0X2xhYmVsOiBub2RlCiAgICByZXBsYWNlbWVudDogJHsxfQogICAgYWN0aW9uOiByZXBsYWNlCiAgLSBzb3VyY2VfbGFiZWxzOiBbX19tZXRhX2t1YmVybmV0ZXNfZW5kcG9pbnRfYWRkcmVzc190YXJnZXRfa2luZCwgX19tZXRhX2t1YmVybmV0ZXNfZW5kcG9pbnRfYWRkcmVzc190YXJnZXRfbmFtZV0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6IFBvZDsoLiopCiAgICB0YXJnZXRfbGFiZWw6IHBvZAogICAgcmVwbGFjZW1lbnQ6ICR7MX0KICAgIGFjdGlvbjogcmVwbGFjZQogIC0gc291cmNlX2xhYmVsczogW19fbWV0YV9rdWJlcm5ldGVzX25hbWVzcGFjZV0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6ICguKikKICAgIHRhcmdldF9sYWJlbDogbmFtZXNwYWNlCiAgICByZXBsYWNlbWVudDogJDEKICAgIGFjdGlvbjogcmVwbGFjZQogIC0gc291cmNlX2xhYmVsczogW19fbWV0YV9rdWJlcm5ldGVzX3NlcnZpY2VfbmFtZV0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6ICguKikKICAgIHRhcmdldF9sYWJlbDogc2VydmljZQogICAgcmVwbGFjZW1lbnQ6ICQxCiAgICBhY3Rpb246IHJlcGxhY2UKICAtIHNvdXJjZV9sYWJlbHM6IFtfX21ldGFfa3ViZXJuZXRlc19wb2RfbmFtZV0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6ICguKikKICAgIHRhcmdldF9sYWJlbDogcG9kCiAgICByZXBsYWNlbWVudDogJDEKICAgIGFjdGlvbjogcmVwbGFjZQogIC0gc291cmNlX2xhYmVsczogW19fbWV0YV9rdWJlcm5ldGVzX3NlcnZpY2VfbmFtZV0KICAgIHNlcGFyYXRvcjogOwogICAgcmVnZXg6ICguKikKICAgIHRhcmdldF9sYWJlbDogam9iCiAgICByZXBsYWNlbWVudDogJHsxfQogICAgYWN0aW9uOiByZXBsYWNlCiAgLSBzb3VyY2VfbGFiZWxzOiBbX19tZXRhX2t1YmVybmV0ZXNfc2VydmljZV9sYWJlbF9oZWxsb19hcHBdCiAgICBzZXBhcmF0b3I6IDsKICAgIHJlZ2V4OiAoLispCiAgICB0YXJnZXRfbGFiZWw6IGpvYgogICAgcmVwbGFjZW1lbnQ6ICR7MX0KICAgIGFjdGlvbjogcmVwbGFjZQogIC0gc2VwYXJhdG9yOiA7CiAgICByZWdleDogKC4qKQogICAgdGFyZ2V0X2xhYmVsOiBlbmRwb2ludAogICAgcmVwbGFjZW1lbnQ6ICI4MDgwIgogICAgYWN0aW9uOiByZXBsYWNlCg== 4 | kind: Secret 5 | metadata: 6 | creationTimestamp: null 7 | name: prometheus-additional-scrape-configs 8 | namespace: monitoring 9 | -------------------------------------------------------------------------------- /service-discovery/manifests/prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: Prometheus 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | name: k8s 7 | namespace: monitoring 8 | spec: 9 | additionalScrapeConfigs: 10 | key: additional-scrape-configs.yaml 11 | name: prometheus-additional-scrape-configs 12 | alerting: 13 | alertmanagers: 14 | - name: alertmanager-main 15 | namespace: monitoring 16 | port: web 17 | baseImage: quay.io/prometheus/prometheus 18 | nodeSelector: 19 | beta.kubernetes.io/os: linux 20 | replicas: 1 21 | resources: 22 | requests: 23 | memory: 400Mi 24 | ruleSelector: 25 | matchLabels: 26 | prometheus: k8s 27 | role: alert-rules 28 | securityContext: 29 | fsGroup: 2000 30 | runAsNonRoot: true 31 | runAsUser: 1000 32 | serviceAccountName: prometheus-k8s 33 | serviceMonitorNamespaceSelector: {} 34 | serviceMonitorSelector: {} 35 | version: v2.7.2 36 | -------------------------------------------------------------------------------- /service-discovery/scrape_configs/additional-scrape-configs.yaml: -------------------------------------------------------------------------------- 1 | - job_name: hello 2 | scrape_interval: 15s 3 | scrape_timeout: 10s 4 | metrics_path: /metrics 5 | scheme: http 6 | kubernetes_sd_configs: 7 | - role: endpoints 8 | namespaces: 9 | names: 10 | - default 11 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 12 | relabel_configs: 13 | - source_labels: [__meta_kubernetes_service_label_app] 14 | separator: ; 15 | regex: hello 16 | replacement: $1 17 | action: keep 18 | - source_labels: [__meta_kubernetes_pod_container_port_number] 19 | separator: ; 20 | regex: "8080" 21 | replacement: $1 22 | action: keep 23 | - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] 24 | separator: ; 25 | regex: Node;(.*) 26 | target_label: node 27 | replacement: ${1} 28 | action: replace 29 | - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name] 30 | separator: ; 31 | regex: Pod;(.*) 32 | target_label: pod 33 | replacement: ${1} 34 | action: replace 35 | - source_labels: [__meta_kubernetes_namespace] 36 | separator: ; 37 | regex: (.*) 38 | target_label: namespace 39 | replacement: $1 40 | action: replace 41 | - source_labels: [__meta_kubernetes_service_name] 42 | separator: ; 43 | regex: (.*) 44 | target_label: service 45 | replacement: $1 46 | action: replace 47 | - source_labels: [__meta_kubernetes_pod_name] 48 | separator: ; 49 | regex: (.*) 50 | target_label: pod 51 | replacement: $1 52 | action: replace 53 | - source_labels: [__meta_kubernetes_service_name] 54 | separator: ; 55 | regex: (.*) 56 | target_label: job 57 | replacement: ${1} 58 | action: replace 59 | - source_labels: [__meta_kubernetes_service_label_hello_app] 60 | separator: ; 61 | regex: (.+) 62 | target_label: job 63 | replacement: ${1} 64 | action: replace 65 | - separator: ; 66 | regex: (.*) 67 | target_label: endpoint 68 | replacement: "8080" 69 | action: replace 70 | --------------------------------------------------------------------------------