├── data_flow.png ├── sockshop ├── clean.sh ├── weaveworks.sh └── weaveworks-service.yaml ├── simple ├── clean.sh ├── simple.sh ├── echo-client.yaml └── echo-server.yaml ├── token-gcloud.sh ├── .gitignore ├── ambassador ├── ambassador-tlscontext.yaml ├── clean.sh ├── ambassador-service.yaml ├── qotm.yaml ├── ambassador.sh ├── ambassador-consul-connector.yaml ├── ambassador-rbac.yaml └── ambassador-rbac-prometheus.yaml ├── kube-dashboard └── dashboard.sh ├── token-minikube.sh ├── .tmuxp.yaml ├── consul ├── consul.sh ├── clean.sh ├── intentions.sh └── values.yaml ├── grafana ├── datasources.yaml ├── grafana.sh └── consul-cluster-health-dashboard.yaml ├── tiller └── helm-init.sh ├── prometheus ├── prometheus.sh └── default-values.yaml └── README.md /data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tradel/cc-kube-sockshop/HEAD/data_flow.png -------------------------------------------------------------------------------- /sockshop/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kubectl delete -f weaveworks-service.yaml 4 | helm delete --purge sockshop 5 | -------------------------------------------------------------------------------- /simple/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kubectl delete -f echo-server.yaml 4 | kubectl delete -f echo-client.yaml 5 | 6 | -------------------------------------------------------------------------------- /token-gcloud.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gcloud config config-helper --format=json | jq -r .credential.access_token | pbcopy 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | 3 | _attic/ 4 | .DS_Store 5 | 6 | *.key 7 | *.csr 8 | *.crt 9 | *.pem 10 | ssl.conf 11 | ca_config.json 12 | nohup.out 13 | -------------------------------------------------------------------------------- /ambassador/ambassador-tlscontext.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: ambassador/v1 2 | kind: TLSContext 3 | name: ambassador-consul 4 | hosts: [] 5 | secret: ambassador-consul -------------------------------------------------------------------------------- /simple/simple.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Deploy a simple HTTP echo service 4 | kubectl apply -f echo-server.yaml 5 | 6 | # Deploy a client for the echo service 7 | kubectl apply -f echo-client.yaml 8 | -------------------------------------------------------------------------------- /ambassador/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kubectl delete -f ambassador-consul-connector.yaml 4 | kubectl delete -f ambassador-service.yaml 5 | kubectl delete -f ambassador-rbac.yaml 6 | 7 | kubectl delete -f qotm.yaml 8 | -------------------------------------------------------------------------------- /kube-dashboard/dashboard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v1.10.1/src/deploy/recommended/kubernetes-dashboard.yaml 4 | 5 | kubectl proxy & 6 | open http://localhost:8001/api/v1/namespaces/kube-system/services/https:kubernetes-dashboard:/proxy/ 7 | -------------------------------------------------------------------------------- /token-minikube.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | token=$(kubectl -n kube-system get secrets -o json | jq -r '.items[] | select(.metadata.annotations."kubernetes.io/service-account.name"=="tiller") | .data.token') 4 | echo $token | base64 -D | pbcopy 5 | 6 | echo 'The auth token for the Kubernetes dashboard is now on the clipboard.' 7 | -------------------------------------------------------------------------------- /.tmuxp.yaml: -------------------------------------------------------------------------------- 1 | session_name: kube 2 | windows: 3 | - window_name: kube 4 | layout: main-vertical 5 | panes: 6 | - shell_command: kubectl config current-context 7 | focus: true 8 | - kubectl port-forward service/consul-ui 8500:80 9 | - kubectl -n monitoring port-forward service/grafana 3000:80 10 | - kubectl port-forward service/front-end 30001:80 11 | -------------------------------------------------------------------------------- /consul/consul.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Installing Consul from Helm chart repo..." 4 | helm repo add consul https://consul-helm-charts.storage.googleapis.com 5 | helm install --wait --name=consul consul/consul -f ./values.yaml 6 | 7 | nohup kubectl port-forward service/consul-ui 8500:80 --pod-running-timeout=1m & 8 | 9 | echo "" 10 | echo -n "Your Consul UI is at: http://localhost:8500" 11 | 12 | open http://localhost:8500 13 | -------------------------------------------------------------------------------- /grafana/datasources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: prom-grafana-datasource 5 | namespace: monitoring 6 | labels: 7 | grafana_datasource: "1" 8 | data: 9 | datasource.yaml: |- 10 | apiVersion: 1 11 | 12 | deleteDatasources: 13 | - name: Prometheus 14 | 15 | datasources: 16 | - name: Prometheus 17 | type: prometheus 18 | access: proxy 19 | url: http://prometheus-server:80 20 | isDefault: true 21 | -------------------------------------------------------------------------------- /consul/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | helm delete --purge consul 4 | 5 | kubectl delete daemonset consul 6 | kubectl delete statefulset consul-server 7 | kubectl delete deployment consul-connect-injector-webhook-deployment 8 | kubectl delete deployment consul-sync-catalog 9 | kubectl delete job consul-license 10 | kubectl delete persistentvolumeclaim data-consul-server-0 11 | kubectl delete persistentvolumeclaim data-consul-server-1 12 | kubectl delete persistentvolumeclaim data-consul-server-2 13 | -------------------------------------------------------------------------------- /tiller/helm-init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create an account for Tiller and grant it permissions. 4 | # NOTE: I took the lazy way and granted it "cluster-admin" role. 5 | # You should probably not do this in a real cluster. 6 | kubectl create serviceaccount --namespace kube-system tiller 7 | kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller 8 | 9 | # Let Helm deploy and configure the Tiller service. 10 | helm init --service-account tiller 11 | -------------------------------------------------------------------------------- /sockshop/weaveworks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Installing Sock Shop from Helm chart repo..." 4 | helm repo add consul https://consul-helm-charts.storage.googleapis.com 5 | helm install --wait --name=sockshop consul/microservices-demo \ 6 | --set loadtest.enabled=true \ 7 | --set loadtest.replicas=2 8 | kubectl apply -f weaveworks-service.yaml 9 | 10 | echo "" 11 | echo "Your sock shop URL is: http://localhost:30001" 12 | 13 | nohup kubectl port-forward service/front-end 30001:80 & 14 | open http://localhost:30001 15 | -------------------------------------------------------------------------------- /prometheus/prometheus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kubectl create ns monitoring 4 | 5 | # Deploys Prometheus server, node-exporter, and kube-state-metrics 6 | helm install --name prometheus --namespace monitoring stable/prometheus \ 7 | --set server.enableAdminApi=true \ 8 | --set server.persistentVolume.size=20Gi \ 9 | --set server.global.scrape_interval=10s \ 10 | --set server.global.scrape_timeout=5s 11 | 12 | kubectl -n monitoring port-forward service/prometheus-server 9090:80 & 13 | 14 | echo "" 15 | echo -n "Your Consul UI is at: http://localhost:9090" 16 | 17 | open http://localhost:9090 18 | 19 | -------------------------------------------------------------------------------- /simple/echo-client.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: http-echo-client 5 | spec: 6 | replicas: 1 7 | strategy: 8 | type: RollingUpdate 9 | template: 10 | metadata: 11 | labels: 12 | app: http-echo 13 | role: client 14 | annotations: 15 | "consul.hashicorp.com/connect-inject": "true" 16 | "consul.hashicorp.com/connect-service-upstreams": "http-echo:1234" 17 | spec: 18 | containers: 19 | - name: http-echo-client 20 | image: tutum/curl:latest 21 | command: [ "/bin/sh", "-c", "--" ] 22 | args: [ "while true; do sleep 30; done;" ] 23 | - name: openssl 24 | image: securefab/openssl:latest 25 | command: [ "/bin/sh", "-c", "--" ] 26 | args: [ "while true; do sleep 30; done;" ] -------------------------------------------------------------------------------- /sockshop/weaveworks-service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: front-end-nodeport 6 | annotations: 7 | getambassador.io/config: | 8 | --- 9 | apiVersion: ambassador/v1 10 | kind: Mapping 11 | name: front-end_mapping 12 | prefix: /socks/ 13 | service: front-end-proxy 14 | resolver: consul-dc1 15 | tls: ambassador-consul 16 | load_balancer: 17 | policy: round_robin 18 | --- 19 | apiVersion: ambassador/v1 20 | kind: Mapping 21 | name: fallback_mapping 22 | prefix: / 23 | service: front-end-proxy 24 | resolver: consul-dc1 25 | tls: ambassador-consul 26 | load_balancer: 27 | policy: round_robin 28 | spec: 29 | type: NodePort 30 | selector: 31 | name: front-end 32 | ports: 33 | - port: 80 34 | name: http 35 | -------------------------------------------------------------------------------- /ambassador/ambassador-service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: ambassador 6 | annotations: 7 | getambassador.io/config: | 8 | --- 9 | apiVersion: ambassador/v1 10 | kind: ConsulResolver 11 | name: consul-dc1 12 | address: consul-server.default.svc.cluster.local:8500 13 | datacenter: dc1 14 | --- 15 | apiVersion: ambassador/v1 16 | kind: Module 17 | name: tls 18 | config: 19 | server: 20 | enabled: True 21 | secret: sockshop-letsencrypt-cert 22 | spec: 23 | type: LoadBalancer 24 | externalTrafficPolicy: Local 25 | ports: 26 | - port: 80 27 | targetPort: 8080 28 | protocol: TCP 29 | name: http 30 | - port: 443 31 | targetPort: 8443 32 | protocol: TCP 33 | name: https 34 | selector: 35 | service: ambassador 36 | -------------------------------------------------------------------------------- /grafana/grafana.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | helm install --name grafana --namespace monitoring stable/grafana \ 4 | --set sidecar.datasources.enabled=true \ 5 | --set sidecar.dashboards.enabled=true \ 6 | --set sidecar.dashboards.searchNamespace=ALL \ 7 | --set sidecar.datasources.label=grafana_datasource \ 8 | --set sidecar.dashboards.label=grafana_dashboard \ 9 | --set sidecar.datasources.searchNamespace=ALL 10 | 11 | kubectl -n monitoring apply -f datasources.yaml 12 | kubectl -n monitoring apply -f consul-cluster-health-dashboard.yaml 13 | 14 | kubectl -n monitoring port-forward service/grafana 3000:80 & 15 | 16 | admin_password=$(kubectl -n monitoring get secret grafana -o jsonpath="{.data.admin-password}" | base64 --decode) 17 | 18 | echo "" 19 | echo "Grafana is available at: http://localhost:3000" 20 | echo "Your admin password is: ${admin_password}" 21 | 22 | open http://localhost:3000 23 | -------------------------------------------------------------------------------- /consul/intentions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export CONSUL_HTTP_ADDR=http://localhost:8500 4 | 5 | # Default rule: deny all 6 | consul intention create -replace -deny '*' '*' 7 | 8 | # Allow traffic to simple http echo service 9 | consul intention create -replace -allow '*' http-echo 10 | 11 | # Allow traffic to Ambassador test service 12 | consul intention create -replace -allow '*' qotm 13 | 14 | # Allow traffic between sock shop components 15 | consul intention create -replace -allow load-test front-end 16 | consul intention create -replace -allow front-end carts 17 | consul intention create -replace -allow front-end orders 18 | consul intention create -replace -allow front-end catalogue 19 | consul intention create -replace -allow front-end user 20 | consul intention create -replace -allow carts carts-db 21 | consul intention create -replace -allow orders orders-db 22 | consul intention create -replace -allow catalogue catalogue-db 23 | consul intention create -replace -allow user user-db 24 | consul intention create -replace -allow queue-master rabbitmq 25 | 26 | # Allow Ambassador to talk to anything 27 | consul intention create -replace -allow ambassador '*' 28 | -------------------------------------------------------------------------------- /consul/values.yaml: -------------------------------------------------------------------------------- 1 | # Sample values.yaml to set up a 3-node Consul Enterprise cluster with 100GB of 2 | # storage and automatic Connect injection for annotated pods in the "my-app" 3 | # namespace. 4 | # 5 | # To use this, you will first need to create a secret that contains your 6 | # enterprise license key, like this: 7 | # 8 | # apiVersion: v1 9 | # kind: Secret 10 | # metadata: 11 | # name: consul-license 12 | # data: 13 | # key: 01MV4UU43BK5HGYYTOJZWFQMTMNNEWU33JJZKEK.... 14 | 15 | global: 16 | enabled: true 17 | domain: consul 18 | image: "hashicorp/consul-enterprise:1.4.4-ent" 19 | datacenter: dc1 20 | gossipEncryption: 21 | enabled: true 22 | secretName: "consul-gossip-key" 23 | secretKey: "key" 24 | 25 | server: 26 | enabled: true 27 | replicas: 3 28 | bootstrapExpect: 3 29 | enterpriseLicense: 30 | secretName: "consul-ent-license" 31 | secretKey: "key" 32 | storage: 20Gi 33 | connect: true 34 | 35 | client: 36 | enabled: true 37 | grpc: true 38 | 39 | dns: 40 | enabled: true 41 | 42 | ui: 43 | enabled: true 44 | service: 45 | enabled: true 46 | type: NodePort 47 | 48 | connectInject: 49 | enabled: true 50 | default: false 51 | 52 | syncCatalog: 53 | enabled: false 54 | -------------------------------------------------------------------------------- /ambassador/qotm.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: Deployment 4 | metadata: 5 | name: qotm-mtls 6 | spec: 7 | replicas: 1 8 | strategy: 9 | type: RollingUpdate 10 | template: 11 | metadata: 12 | labels: 13 | app: qotm 14 | annotations: 15 | "consul.hashicorp.com/connect-inject": "true" 16 | spec: 17 | containers: 18 | - name: qotm 19 | image: datawire/qotm:1.7 20 | ports: 21 | - name: http-api 22 | containerPort: 5000 23 | readinessProbe: 24 | httpGet: 25 | path: /health 26 | port: 5000 27 | initialDelaySeconds: 30 28 | periodSeconds: 3 29 | resources: 30 | limits: 31 | cpu: "0.1" 32 | memory: 100Mi 33 | --- 34 | apiVersion: v1 35 | kind: Service 36 | metadata: 37 | name: qotm-consul-mtls 38 | annotations: 39 | getambassador.io/config: | 40 | --- 41 | apiVersion: ambassador/v1 42 | kind: Mapping 43 | name: consul_qotm_tls_mapping 44 | prefix: /qotm/ 45 | service: qotm-proxy 46 | resolver: consul-dc1 47 | tls: ambassador-consul 48 | load_balancer: 49 | policy: round_robin 50 | spec: 51 | selector: 52 | app: qotm 53 | ports: 54 | - name: http 55 | port: 80 56 | -------------------------------------------------------------------------------- /ambassador/ambassador.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # NOTE: the docker-registry secret named "quayio-datawire" is no longer 4 | # needed to install Ambassador with Connect integration. 5 | 6 | # Needed for GKE 7 | kubectl create clusterrolebinding my-cluster-admin-binding --clusterrole=cluster-admin --user=$(gcloud info --format="value(config.account)") 8 | 9 | # Deploy the Ambassador proxy and its Consul connector 10 | kubectl apply -f ambassador-rbac.yaml 11 | kubectl apply -f ambassador-service.yaml 12 | kubectl apply -f ambassador-consul-connector.yaml 13 | 14 | # Deploy the "Quote of the Moment" service for testing 15 | kubectl apply -f qotm.yaml 16 | 17 | echo "" 18 | echo "Waiting for Ambassador pod to start..." 19 | while [[ $( kubectl get pods -l service=ambassador -o jsonpath='{.items[0].status.phase}' ) != "Running" ]] 20 | do 21 | sleep 5 22 | done 23 | 24 | echo "Waiting for Ambassador Connect pod to start..." 25 | while [[ $( kubectl get pods -l app=ambassador,component=consul-connect -o jsonpath='{.items[0].status.phase}' ) != "Running" ]] 26 | do 27 | sleep 5 28 | done 29 | 30 | echo "Waiting for Ambassador load balancer to become ready..." 31 | ip=$( kubectl get service ambassador -o jsonpath='{.status.loadBalancer.ingress[0].ip}' ) 32 | while [[ "$ip" == "" ]] 33 | do 34 | sleep 5 35 | ip=$( kubectl get service ambassador -o jsonpath='{.status.loadBalancer.ingress[0].ip}' ) 36 | done 37 | 38 | echo "" 39 | echo "Ambassador proxy is running at: https://${ip}" 40 | -------------------------------------------------------------------------------- /simple/echo-server.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: Deployment 4 | metadata: 5 | name: http-echo 6 | spec: 7 | replicas: 1 8 | strategy: 9 | type: RollingUpdate 10 | template: 11 | metadata: 12 | labels: 13 | app: http-echo 14 | role: server 15 | annotations: 16 | "consul.hashicorp.com/connect-inject": "true" 17 | spec: 18 | containers: 19 | - name: http-echo 20 | image: hashicorp/http-echo:latest 21 | args: 22 | - -text="hello world" 23 | - -listen=:8080 24 | ports: 25 | - name: http 26 | containerPort: 8080 27 | readinessProbe: 28 | httpGet: 29 | path: /health 30 | port: 8080 31 | initialDelaySeconds: 30 32 | periodSeconds: 3 33 | resources: 34 | limits: 35 | cpu: "0.1" 36 | memory: 100Mi 37 | --- 38 | apiVersion: v1 39 | kind: Service 40 | metadata: 41 | name: http-echo 42 | annotations: 43 | getambassador.io/config: | 44 | --- 45 | apiVersion: ambassador/v1 46 | kind: Mapping 47 | name: http-echo_mapping 48 | prefix: /echo/ 49 | service: http-echo-proxy 50 | resolver: consul-dc1 51 | tls: ambassador-consul 52 | load_balancer: 53 | policy: round_robin 54 | spec: 55 | type: NodePort 56 | selector: 57 | app: http-echo 58 | ports: 59 | - port: 80 60 | name: http 61 | -------------------------------------------------------------------------------- /ambassador/ambassador-consul-connector.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1beta1 3 | kind: ClusterRole 4 | metadata: 5 | name: ambassador-consul-connect 6 | rules: 7 | - apiGroups: [""] 8 | resources: 9 | - secrets 10 | verbs: ["get", "list", "create", "delete", "patch"] 11 | --- 12 | apiVersion: v1 13 | kind: ServiceAccount 14 | metadata: 15 | name: ambassador-consul-connect 16 | --- 17 | apiVersion: rbac.authorization.k8s.io/v1 18 | kind: ClusterRoleBinding 19 | metadata: 20 | name: ambassador-consul-connect 21 | roleRef: 22 | apiGroup: rbac.authorization.k8s.io 23 | kind: ClusterRole 24 | name: ambassador-consul-connect 25 | subjects: 26 | - kind: ServiceAccount 27 | name: ambassador-consul-connect 28 | namespace: default 29 | --- 30 | apiVersion: v1 31 | kind: Service 32 | metadata: 33 | name: ambassador-consul-connector 34 | annotations: 35 | getambassador.io/config: | 36 | --- 37 | apiVersion: ambassador/v1 38 | kind: TLSContext 39 | name: ambassador-consul 40 | hosts: [] 41 | secret: ambassador-consul-connect 42 | spec: 43 | ports: 44 | - name: ambassador-consul-connector 45 | port: 80 46 | selector: 47 | component: consul-connect 48 | --- 49 | apiVersion: apps/v1 50 | kind: Deployment 51 | metadata: 52 | name: ambassador-consul-connect-integration 53 | labels: 54 | app: ambassador 55 | component: consul-connect 56 | spec: 57 | replicas: 1 58 | selector: 59 | matchLabels: 60 | app: ambassador 61 | component: consul-connect 62 | strategy: 63 | type: RollingUpdate 64 | template: 65 | metadata: 66 | labels: 67 | app: ambassador 68 | component: consul-connect 69 | annotations: 70 | "consul.hashicorp.com/connect-inject": "false" 71 | spec: 72 | serviceAccountName: ambassador-consul-connect 73 | terminationGracePeriodSeconds: 0 74 | containers: 75 | - name: consul-connect-integration 76 | image: quay.io/datawire/ambassador_pro:consul_connect_integration-0.4.0 77 | imagePullPolicy: Always 78 | resources: 79 | limits: 80 | cpu: 200m 81 | memory: 200Mi 82 | requests: 83 | cpu: 100m 84 | memory: 50Mi 85 | env: 86 | # Consul runs as a DaemonSet on each Node therefore we need to talk to the Host machine. 87 | # See: https://www.consul.io/docs/platform/k8s/run.html#architecture 88 | - name: _CONSUL_HOST 89 | valueFrom: 90 | fieldRef: 91 | fieldPath: status.hostIP 92 | -------------------------------------------------------------------------------- /ambassador/ambassador-rbac.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | labels: 6 | service: ambassador-admin 7 | name: ambassador-admin 8 | spec: 9 | type: NodePort 10 | ports: 11 | - name: ambassador-admin 12 | port: 8877 13 | targetPort: 8877 14 | selector: 15 | service: ambassador 16 | --- 17 | apiVersion: rbac.authorization.k8s.io/v1beta1 18 | kind: ClusterRole 19 | metadata: 20 | name: ambassador 21 | rules: 22 | - apiGroups: [""] 23 | resources: 24 | - namespaces 25 | - services 26 | - secrets 27 | - endpoints 28 | verbs: ["get", "list", "watch"] 29 | --- 30 | apiVersion: v1 31 | kind: ServiceAccount 32 | metadata: 33 | name: ambassador 34 | --- 35 | apiVersion: rbac.authorization.k8s.io/v1beta1 36 | kind: ClusterRoleBinding 37 | metadata: 38 | name: ambassador 39 | roleRef: 40 | apiGroup: rbac.authorization.k8s.io 41 | kind: ClusterRole 42 | name: ambassador 43 | subjects: 44 | - kind: ServiceAccount 45 | name: ambassador 46 | namespace: default 47 | --- 48 | apiVersion: extensions/v1beta1 49 | kind: Deployment 50 | metadata: 51 | name: ambassador 52 | spec: 53 | replicas: 3 54 | template: 55 | metadata: 56 | annotations: 57 | sidecar.istio.io/inject: "false" 58 | "consul.hashicorp.com/connect-inject": "false" 59 | labels: 60 | service: ambassador 61 | spec: 62 | affinity: 63 | podAntiAffinity: 64 | preferredDuringSchedulingIgnoredDuringExecution: 65 | - weight: 100 66 | podAffinityTerm: 67 | labelSelector: 68 | matchLabels: 69 | service: ambassador 70 | topologyKey: kubernetes.io/hostname 71 | serviceAccountName: ambassador 72 | containers: 73 | - name: ambassador 74 | image: quay.io/datawire/ambassador:0.60.0 75 | resources: 76 | limits: 77 | cpu: 1 78 | memory: 400Mi 79 | requests: 80 | cpu: 200m 81 | memory: 100Mi 82 | env: 83 | - name: AMBASSADOR_NAMESPACE 84 | valueFrom: 85 | fieldRef: 86 | fieldPath: metadata.namespace 87 | ports: 88 | - name: http 89 | containerPort: 8080 90 | - name: https 91 | containerPort: 8443 92 | - name: admin 93 | containerPort: 8877 94 | livenessProbe: 95 | httpGet: 96 | path: /ambassador/v0/check_alive 97 | port: 8877 98 | initialDelaySeconds: 30 99 | periodSeconds: 3 100 | readinessProbe: 101 | httpGet: 102 | path: /ambassador/v0/check_ready 103 | port: 8877 104 | initialDelaySeconds: 30 105 | periodSeconds: 3 106 | restartPolicy: Always 107 | -------------------------------------------------------------------------------- /ambassador/ambassador-rbac-prometheus.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | labels: 6 | service: ambassador-admin 7 | name: ambassador-admin 8 | spec: 9 | type: NodePort 10 | ports: 11 | - name: ambassador-admin 12 | port: 8877 13 | targetPort: 8877 14 | selector: 15 | service: ambassador 16 | --- 17 | apiVersion: rbac.authorization.k8s.io/v1beta1 18 | kind: ClusterRole 19 | metadata: 20 | name: ambassador 21 | rules: 22 | - apiGroups: [""] 23 | resources: 24 | - services 25 | verbs: ["get", "list", "watch"] 26 | - apiGroups: [""] 27 | resources: 28 | - configmaps 29 | verbs: ["create", "update", "patch", "get", "list", "watch"] 30 | - apiGroups: [""] 31 | resources: 32 | - secrets 33 | verbs: ["get", "list", "watch"] 34 | --- 35 | apiVersion: v1 36 | kind: ServiceAccount 37 | metadata: 38 | name: ambassador 39 | --- 40 | apiVersion: rbac.authorization.k8s.io/v1beta1 41 | kind: ClusterRoleBinding 42 | metadata: 43 | name: ambassador 44 | roleRef: 45 | apiGroup: rbac.authorization.k8s.io 46 | kind: ClusterRole 47 | name: ambassador 48 | subjects: 49 | - kind: ServiceAccount 50 | name: ambassador 51 | namespace: default 52 | --- 53 | apiVersion: v1 54 | kind: ConfigMap 55 | metadata: 56 | name: ambassador-statsd-mapping-config 57 | data: 58 | exporterConfiguration: | 59 | --- 60 | # see https://lyft.github.io/envoy/docs/configuration/cluster_manager/cluster_stats.html#config-cluster-manager-cluster-stats 61 | mappings: 62 | ### BEGIN General 63 | - match: envoy.cluster.*.upstream_cx_total 64 | name: envoy_upstream_cx_total 65 | labels: 66 | cluster: "$1" 67 | - match: envoy.cluster.*.upstream_cx_active 68 | name: envoy_upstream_cx_active 69 | labels: 70 | cluster: "$1" 71 | - match: envoy.cluster.*.upstream_connect_fail 72 | name: envoy_upstream_connect_fail 73 | labels: 74 | cluster: "$1" 75 | - match: envoy.cluster.*.membership_healthy 76 | name: envoy_membership_healthy 77 | labels: 78 | cluster: "$1" 79 | - match: envoy.cluster.*.membership_change 80 | name: envoy_membership_change 81 | labels: 82 | cluster: "$1" 83 | - match: envoy.cluster.*.membership_total 84 | name: envoy_membership_total 85 | labels: 86 | cluster: "$1" 87 | ### END General 88 | ### BEGIN Health check statistics 89 | - match: envoy.cluster.*.health_check.attempt 90 | name: envoy_health_check_attempt 91 | labels: 92 | cluster: "$1" 93 | - match: envoy.cluster.*.health_check.success 94 | name: envoy_health_check_success 95 | labels: 96 | cluster: "$1" 97 | - match: envoy.cluster.*.health_check.failure 98 | name: envoy_health_check_failure 99 | labels: 100 | cluster: "$1" 101 | - match: envoy.cluster.*.health_check.passive_failure 102 | name: envoy_health_check_passive_failure 103 | labels: 104 | cluster: "$1" 105 | - match: envoy.cluster.*.health_check.network_failure 106 | name: envoy_health_check_network_failure 107 | labels: 108 | cluster: "$1" 109 | - match: envoy.cluster.*.health_check.verify_cluster 110 | name: envoy_health_check_verify_cluster 111 | labels: 112 | cluster: "$1" 113 | - match: envoy.cluster.*.health_check.healthy 114 | name: envoy_health_check_healthy 115 | labels: 116 | cluster: "$1" 117 | ### END Health check statistics 118 | ### BEGIN Dynamic HTTP statistics 119 | - match: envoy.cluster.*.upstream_rq_time 120 | name: envoy_upstream_rq_time 121 | labels: 122 | cluster: "$1" 123 | - match: envoy.cluster.*.canary.upstream_rq_time 124 | name: envoy_canary_upstream_rq_time 125 | labels: 126 | cluster: "$1" 127 | - match: envoy.cluster.*.internal.upstream_rq_time 128 | name: envoy_canary_internal_upstream_rq_time 129 | labels: 130 | cluster: "$1" 131 | - match: envoy.cluster.*.external.upstream_rq_time 132 | name: envoy_external_upstream_rq_time 133 | labels: 134 | cluster: "$1" 135 | ### END Dynamic HTTP statistics 136 | ### BEGIN listener 137 | - match: envoy.listener.*.downstream_cx_length_ms 138 | name: envoy_listener_downstream_cx_length_ms 139 | labels: 140 | listener: $1 141 | - match: envoy.listener.*.downstream_cx_active 142 | name: envoy_listener_downstream_cx_active 143 | labels: 144 | listener: $1 145 | - match: envoy.listener.*.downstream_cx_proxy_proto_error 146 | name: envoy_listener_downstream_cx_proxy_proto_error 147 | labels: 148 | listener: $1 149 | - match: envoy.listener.*.downstream_cx_destroy 150 | name: envoy_listener_downstream_cx_destroy 151 | labels: 152 | listener: $1 153 | - match: envoy.listener.*.downstream_cx_total 154 | name: envoy_listener_downstream_cx_total 155 | labels: 156 | listener: $1 157 | - match: envoy.listener.*.*.*.*.downstream_cx_length_ms 158 | name: envoy_listener_downstream_cx_length_ms 159 | labels: 160 | listener: "$1.$2.$3.$4" 161 | - match: envoy.listener.*.*.*.*.downstream_cx_active 162 | name: envoy_listener_downstream_cx_active 163 | labels: 164 | listener: "$1.$2.$3.$4" 165 | - match: envoy.listener.*.*.*.*.downstream_cx_proxy_proto_error 166 | name: envoy_listener_downstream_cx_proxy_proto_error 167 | labels: 168 | listener: "$1.$2.$3.$4" 169 | - match: envoy.listener.*.*.*.*.downstream_cx_destroy 170 | name: envoy_listener_downstream_cx_destroy 171 | labels: 172 | listener: "$1.$2.$3.$4" 173 | - match: envoy.listener.*.*.*.*.downstream_cx_total 174 | name: envoy_listener_downstream_cx_total 175 | labels: 176 | listener: "$1.$2.$3.$4" 177 | ### END listener 178 | --- 179 | apiVersion: extensions/v1beta1 180 | kind: Deployment 181 | metadata: 182 | name: ambassador 183 | spec: 184 | replicas: 1 185 | template: 186 | metadata: 187 | annotations: 188 | sidecar.istio.io/inject: "false" 189 | "consul.hashicorp.com/connect-inject": "false" 190 | "prometheus.io/scrape": "true" 191 | "prometheus.io/port": "9102" 192 | labels: 193 | service: ambassador 194 | spec: 195 | serviceAccountName: ambassador 196 | volumes: 197 | - name: stats-exporter-mapping-config 198 | configMap: 199 | name: ambassador-statsd-mapping-config 200 | items: 201 | - key: exporterConfiguration 202 | path: mapping-config.yaml 203 | containers: 204 | - name: ambassador 205 | image: quay.io/datawire/ambassador:0.51.2 206 | resources: 207 | limits: 208 | cpu: 1 209 | memory: 400Mi 210 | requests: 211 | cpu: 200m 212 | memory: 100Mi 213 | env: 214 | - name: STATSD_ENABLED 215 | value: "true" 216 | - name: STATSD_HOST 217 | value: "localhost" 218 | - name: AMBASSADOR_NAMESPACE 219 | valueFrom: 220 | fieldRef: 221 | fieldPath: metadata.namespace 222 | ports: 223 | - name: http 224 | containerPort: 80 225 | - name: https 226 | containerPort: 443 227 | - name: admin 228 | containerPort: 8877 229 | livenessProbe: 230 | httpGet: 231 | path: /ambassador/v0/check_alive 232 | port: 8877 233 | initialDelaySeconds: 30 234 | periodSeconds: 3 235 | readinessProbe: 236 | httpGet: 237 | path: /ambassador/v0/check_ready 238 | port: 8877 239 | initialDelaySeconds: 30 240 | periodSeconds: 3 241 | - name: ambassador-statsd 242 | image: prom/statsd-exporter:v0.7.0 243 | ports: 244 | - name: metrics 245 | containerPort: 9102 246 | - name: listener 247 | containerPort: 8125 248 | args: ["--statsd.listen-udp=:8125", "--statsd.mapping-config=/statsd-exporter/mapping-config.yaml"] 249 | volumeMounts: 250 | - name: stats-exporter-mapping-config 251 | mountPath: /statsd-exporter/ 252 | readOnly: true 253 | restartPolicy: Always 254 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Ambassador/Consul Connect E2E Kubernetes Demo 3 | ============================================= 4 | 5 | ## Purpose 6 | 7 | This repo contains the configuration files and scripts to set up a Kubernetes 8 | cluster with everything you need to demo or explore Consul Connect. 9 | Specifically, it demonstrates the following features: 10 | 11 | * [Helm chart][helm-blog] to deploy Consul agents and servers with Gossip 12 | encryption, the Consul Connect injector, and catalog sync between 13 | Consul and Kubernetes. 14 | 15 | * Automatic injection of [Consul Connect sidecars][sidecars] into pods with 16 | a simple annotation. 17 | 18 | * An instance of the [HTTP echo service][echo] and its client, so you can 19 | test Connect functionality. 20 | 21 | * An instance of the DataWire [QoTM service][qotm], also for testing. 22 | 23 | * Working instance of the [Weaveworks Sock Shop][sockshop] microservice demo, 24 | with Consul Connect mediating all connections between services. 25 | 26 | * DataWire [Ambassador][] as the L7 gateway, routing requests from the 27 | Internet to the Connect proxies with full mutual TLS and using Consul 28 | for service discovery. 29 | 30 | --- 31 | 32 | ## Prerequisites 33 | 34 | You should also have a Kubernetes cluster up and running already, with the 35 | `kubectl` utility configured properly to talk to it, and a recent release of 36 | [Helm][]. On a Mac with Homebrew, you can simply type: 37 | 38 | brew install kubernetes-helm kubernetes-cli 39 | 40 | Create a Kubernetes secret with the contents of your Consul Enterprise license 41 | key. 42 | 43 | kubectl create secret generic consul-ent-license --from-file=bef1b5c5-4290-a854-a34b-af1651d5d41b.hclic 44 | 45 | --- 46 | 47 | ## Setup 48 | 49 | #### Helm and Tiller 50 | 51 | Run `tiller/helm-init.sh` to create a service account and install the Tiller 52 | service. 53 | 54 | #### Kubernetes Dashboard (optional) 55 | 56 | If you prefer using the dashboard to the CLI, you can run 57 | `kube-dashboard/dashboard.sh` to install the dashboard and start a local 58 | proxy. You can then log into the dashboard at http://localhost:8001/api/v1/namespaces/kube-system/services/https:kubernetes-dashboard:/proxy/. 59 | 60 | If the dashboard asks you to log in, there are helper scripts in `token-*.sh` 61 | that will copy an auth token onto the clipboard. 62 | 63 | #### Consul servers and clients 64 | 65 | The `consul/consul.sh` script uses the Consul Helm chart to deploy Consul 66 | servers and client agents. The Helm chart does all of the following: 67 | 68 | * Deploy 3 Consul server pods, each with 20GB storage (currently version 69 | 1.4.2 OSS). 70 | 71 | * Deploy agent pods to each host in the cluster. 72 | 73 | * Deploy the `consul-k8s` pod for service catalog sync and automatic 74 | Consul Connect injection. 75 | 76 | The chart can be customized in various ways by editing `consul/values.yaml`, 77 | for example: 78 | 79 | * If you are testing with minikube, you should change `server.replicas` and 80 | `server.bootstrapExpect` to 1. 81 | 82 | * You can use a specific version of [Consul][consul-tags], 83 | [Envoy][envoy-tags], or [`consul-k8s`][k8s-tags] by changing the 84 | `image` fields. 85 | 86 | * You can expose the Consul UI outside the cluster by changing 87 | `ui.service.type` from `NodePort` to `LoadBalancer` if you'd prefer not 88 | to use the `kubectl` port forwarder. 89 | 90 | If you would rather deploy enterprise binaries instead of OSS, make the 91 | following changes: 92 | 93 | * Create a secret in Kubernetes that contains your license key. See the 94 | comments at the top of `values.yaml` for an example. 95 | 96 | * Add the `-ent` tag to the end of the tag specified in `global.image`, 97 | e.g. `"consul:1.4.2-ent"`. 98 | 99 | * Set the name of your secret in `server.enterpriseLicense.secretName` 100 | and `server.enterpriseLicense.secretKey`. Use the commented entries 101 | in `values.yaml` as your guide. 102 | 103 | If anything goes wrong with the deployment (e.g. due to syntax errors in your 104 | `values.yaml`), you can use the `consul/clean.sh` script to clean everything 105 | out and try again. 106 | 107 | #### Simple HTTP echo service 108 | 109 | Some of the Consul docs use an HTTP "echo" service and client to demonstrate 110 | various concepts. Those can be deployed with `simple/simple.sh`. 111 | 112 | To test the connection between client and server, see the "Testing and Demos" 113 | section below. 114 | 115 | #### Load intentions into Consul Connect 116 | 117 | The `consul/intentions.sh` script creates a default set of intentions for 118 | Connect to enable the demos to run: 119 | 120 | * Ambassador may talk to anything. 121 | 122 | * The `carts`, `orders`, `catalogue`, and `user` services from the Sock Shop 123 | demo can talk to their own databases (`carts-db`, `orders-db`, etc). 124 | 125 | * The Sock Shop `front-end` web server can talk to the `carts`, `orders`, 126 | `catalogue`, and `user` services. 127 | 128 | * The HTTP echo client may talk to the echo server. 129 | 130 | * All other traffic is denied. 131 | 132 | #### Sock Shop demo 133 | 134 | Run the `sockshop/weaveworks.sh` script to deploy a version of the Sock Shop 135 | demo that is customized to use Consul Connect. Very few changes were actually 136 | necessary -- this shows how easy it is to adapt your own applications to 137 | Connect! 138 | 139 | The changes made were as follows: 140 | 141 | * In the Helm chart, annotate the pods to mark them for [sidecar 142 | injection][cartdb] and declare the [upstream dependencies][carts]. 143 | 144 | * For each downstream service service, add either an [environment variable][user] 145 | or a [command-line option][catalogue] to tell them to look on localhost 146 | for their upstreams. 147 | 148 | The only service that required actual code changes was the `front-end`, 149 | because it had the upstream service names hard-coded. I took advantage of the 150 | environment variables created by Connect, as you can see [here][frontend]. 151 | If you remove the Connect injection, the front end will revert to its old 152 | behavior. 153 | 154 | #### Ambassador 155 | 156 | Finally, deploy the Ambassador proxy by running `ambassador/ambassador.sh`. 157 | This will install Ambassador itself as well as the Consul Connector, which 158 | looks up the Consul mTLS certificates and provides them to the main 159 | Ambassador service. 160 | 161 | --- 162 | 163 | ## Testing and Demos 164 | 165 | #### Finding the IP address of Ambassador 166 | 167 | As an L7 gateway, Ambassador exposes a public IP address. You'll need to know 168 | that address to run any of the tests below. Use the command `kubectl describe 169 | service ambassador` and look for the "LoadBalancer Ingress". That is the 170 | public-facing IP address of your Ambassador service. When you see 171 | `AMBASSADOR_IP` in the examples below, replace it with that IP address. 172 | 173 | #### Simple HTTP echo service 174 | 175 | The HTTP echo client pod has been injected with a proxy to connect to 176 | the echo server. You can verify this by inspecting the pod and looking at 177 | the annotations: 178 | 179 | consul.hashicorp.com/connect-inject=true 180 | consul.hashicorp.com/connect-inject-status=injected 181 | consul.hashicorp.com/connect-service=http-echo-client 182 | consul.hashicorp.com/connect-service-upstreams=http-echo:1234 183 | 184 | You'll also see that the pod contains an extra container named 185 | `consul-connect-envoy-sidecar`. This is the proxy that carries connections 186 | to the upstream service. 187 | 188 | You can verify that the connection to the upstream service works by running 189 | a `curl` command inside the client container: 190 | 191 | $ kubectl exec -it http-echo-client curl localhost:1234 192 | "hello world" 193 | 194 | Try changing the intention from "allow" to "deny" and the `curl` command 195 | stops working immediately: 196 | 197 | $ consul intention create -replace -deny '*' http-echo 198 | $ kubectl exec -it http-echo-client curl localhost:1234 199 | curl: (52) Empty reply from server 200 | command terminated with exit code 52 201 | 202 | And of course you can allow traffic again via: 203 | 204 | $ consul intention create -replace -allow '*' http-echo 205 | 206 | You can also invoke the service from Ambassador: 207 | 208 | $ curl http://AMBASSADOR_IP/echo/ 209 | 210 | #### Ambassador QoTM service 211 | 212 | Ambassador provides a "Quote of the Moment" service. You can test it 213 | by opening http://AMBASSADOR_IP/qotm/. 214 | 215 | #### Sock Shop demo 216 | 217 | Finally, the big enchilada! Visit http://AMBASSADOR_IP/socks/ to test it out. 218 | The traffic flow to serve the page looks like this: 219 | 220 | ![traffic flow](data_flow.png) 221 | 222 | --- 223 | 224 | ## Future enhancements 225 | 226 | - [ ] Monitoring with Prometheus 227 | - [ ] Outward-facing SSL/TLS with Ambassador 228 | - [ ] ACL bootstrapping 229 | 230 | [sidecars]: https://www.consul.io/docs/platform/k8s/connect.html 231 | [sockshop]: https://microservices-demo.github.io/ 232 | [helm]: https://helm.sh/ 233 | [helm-blog]: https://kubernetes.io/blog/2016/10/helm-charts-making-it-simple-to-package-and-deploy-apps-on-kubernetes/ 234 | [ambassador]: https://www.getambassador.io/ 235 | [connector]: https://www.getambassador.io/user-guide/consul-connect-ambassador/ 236 | [qotm]: https://github.com/datawire/qotm 237 | [echo]: https://github.com/hashicorp/http-echo 238 | [proxy]: http://localhost:8001/api/v1/namespaces/kube-system/services/https:kubernetes-dashboard:/proxy/#!/overview?namespace=default 239 | [consul-tags]: https://hub.docker.com/_/consul?tab=tags 240 | [k8s-tags]: https://hub.docker.com/r/hashicorp/consul-k8s/tags 241 | [envoy-tags]: https://hub.docker.com/r/envoyproxy/envoy-alpine/tags 242 | [me]: mailto:todd@hashicorp.com 243 | [frontend]: https://github.com/tradel/front-end/blob/9c32e77828993c4571ac2219843a999e6e4e12cf/api/endpoints.js#L18-L35 244 | [cartdb]: https://github.com/tradel/microservices-demo/blob/2bc270d61c993f8a1ae3c8a492cae504b7c3ade5/deploy/kubernetes/helm-chart/templates/cart-db-dep.yaml#L14-L15 245 | [carts]: https://github.com/tradel/microservices-demo/blob/2bc270d61c993f8a1ae3c8a492cae504b7c3ade5/deploy/kubernetes/helm-chart/templates/carts-dep.yaml#L14-L16 246 | [catalogue]: https://github.com/tradel/microservices-demo/blob/2bc270d61c993f8a1ae3c8a492cae504b7c3ade5/deploy/kubernetes/helm-chart/templates/catalogue-dep.yaml#L24 247 | [user]: https://github.com/tradel/microservices-demo/blob/2bc270d61c993f8a1ae3c8a492cae504b7c3ade5/deploy/kubernetes/helm-chart/templates/user-dep.yaml#L31-L32 248 | -------------------------------------------------------------------------------- /grafana/consul-cluster-health-dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | namespace: monitoring 5 | name: consul-cluster-health-dashboard 6 | labels: 7 | grafana_dashboard: "1" 8 | data: 9 | consul-cluster-health-dashboard.json: |- 10 | { 11 | "__inputs": [], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "5.4.3" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "5.0.0" 24 | }, 25 | { 26 | "type": "panel", 27 | "id": "singlestat", 28 | "name": "Singlestat", 29 | "version": "5.0.0" 30 | } 31 | ], 32 | "annotations": { 33 | "list": [ 34 | { 35 | "builtIn": 1, 36 | "datasource": "-- Grafana --", 37 | "enable": true, 38 | "hide": true, 39 | "iconColor": "rgba(0, 211, 255, 1)", 40 | "name": "Annotations & Alerts", 41 | "type": "dashboard" 42 | } 43 | ] 44 | }, 45 | "editable": true, 46 | "gnetId": null, 47 | "graphTooltip": 0, 48 | "id": null, 49 | "links": [], 50 | "panels": [ 51 | { 52 | "cacheTimeout": null, 53 | "colorBackground": false, 54 | "colorValue": true, 55 | "colors": [ 56 | "#299c46", 57 | "rgba(237, 129, 40, 0.89)", 58 | "#d44a3a" 59 | ], 60 | "format": "none", 61 | "gauge": { 62 | "maxValue": 1000, 63 | "minValue": 0, 64 | "show": true, 65 | "thresholdLabels": false, 66 | "thresholdMarkers": true 67 | }, 68 | "gridPos": { 69 | "h": 6, 70 | "w": 4, 71 | "x": 0, 72 | "y": 0 73 | }, 74 | "id": 2, 75 | "interval": null, 76 | "links": [], 77 | "mappingType": 1, 78 | "mappingTypes": [ 79 | { 80 | "name": "value to text", 81 | "value": 1 82 | }, 83 | { 84 | "name": "range to text", 85 | "value": 2 86 | } 87 | ], 88 | "maxDataPoints": 100, 89 | "nullPointMode": "connected", 90 | "nullText": null, 91 | "postfix": " ms", 92 | "postfixFontSize": "50%", 93 | "prefix": "", 94 | "prefixFontSize": "50%", 95 | "rangeMaps": [ 96 | { 97 | "from": "null", 98 | "text": "N/A", 99 | "to": "null" 100 | } 101 | ], 102 | "sparkline": { 103 | "fillColor": "rgba(31, 118, 189, 0.18)", 104 | "full": false, 105 | "lineColor": "rgb(31, 120, 193)", 106 | "show": false 107 | }, 108 | "tableColumn": "", 109 | "targets": [ 110 | { 111 | "expr": "max(consul_raft_leader_lastContact) ", 112 | "format": "time_series", 113 | "intervalFactor": 1, 114 | "legendFormat": "", 115 | "refId": "A" 116 | } 117 | ], 118 | "thresholds": "200,500", 119 | "title": "Leader Last Contact", 120 | "type": "singlestat", 121 | "valueFontSize": "80%", 122 | "valueMaps": [ 123 | { 124 | "op": "=", 125 | "text": "N/A", 126 | "value": "null" 127 | } 128 | ], 129 | "valueName": "avg" 130 | }, 131 | { 132 | "aliasColors": {}, 133 | "bars": false, 134 | "dashLength": 10, 135 | "dashes": false, 136 | "fill": 1, 137 | "gridPos": { 138 | "h": 6, 139 | "w": 5, 140 | "x": 4, 141 | "y": 0 142 | }, 143 | "id": 16, 144 | "legend": { 145 | "avg": false, 146 | "current": false, 147 | "max": false, 148 | "min": false, 149 | "show": false, 150 | "total": false, 151 | "values": false 152 | }, 153 | "lines": true, 154 | "linewidth": 1, 155 | "links": [], 156 | "nullPointMode": "null", 157 | "percentage": false, 158 | "pointradius": 5, 159 | "points": false, 160 | "renderer": "flot", 161 | "seriesOverrides": [], 162 | "spaceLength": 10, 163 | "stack": false, 164 | "steppedLine": false, 165 | "targets": [ 166 | { 167 | "expr": "avg by (kubernetes_node) (irate(node_cpu_seconds_total{mode=\"user\"}[1m])) * 100", 168 | "format": "time_series", 169 | "hide": false, 170 | "interval": "", 171 | "intervalFactor": 1, 172 | "legendFormat": "{{kubernetes_node}}", 173 | "refId": "B" 174 | } 175 | ], 176 | "thresholds": [], 177 | "timeFrom": null, 178 | "timeRegions": [], 179 | "timeShift": null, 180 | "title": "CPU Usage Per Node", 181 | "tooltip": { 182 | "shared": true, 183 | "sort": 0, 184 | "value_type": "individual" 185 | }, 186 | "type": "graph", 187 | "xaxis": { 188 | "buckets": null, 189 | "mode": "time", 190 | "name": null, 191 | "show": true, 192 | "values": [] 193 | }, 194 | "yaxes": [ 195 | { 196 | "decimals": 0, 197 | "format": "percent", 198 | "label": "", 199 | "logBase": 1, 200 | "max": null, 201 | "min": null, 202 | "show": true 203 | }, 204 | { 205 | "format": "short", 206 | "label": null, 207 | "logBase": 1, 208 | "max": null, 209 | "min": null, 210 | "show": true 211 | } 212 | ], 213 | "yaxis": { 214 | "align": false, 215 | "alignLevel": null 216 | } 217 | }, 218 | { 219 | "aliasColors": {}, 220 | "bars": false, 221 | "dashLength": 10, 222 | "dashes": false, 223 | "fill": 1, 224 | "gridPos": { 225 | "h": 6, 226 | "w": 5, 227 | "x": 9, 228 | "y": 0 229 | }, 230 | "id": 18, 231 | "legend": { 232 | "avg": false, 233 | "current": false, 234 | "max": false, 235 | "min": false, 236 | "show": false, 237 | "total": false, 238 | "values": false 239 | }, 240 | "lines": true, 241 | "linewidth": 1, 242 | "links": [], 243 | "nullPointMode": "null", 244 | "percentage": false, 245 | "pointradius": 5, 246 | "points": false, 247 | "renderer": "flot", 248 | "seriesOverrides": [], 249 | "spaceLength": 10, 250 | "stack": false, 251 | "steppedLine": false, 252 | "targets": [ 253 | { 254 | "expr": "sum(increase(node_disk_read_bytes_total[1m])) by (kubernetes_node)", 255 | "format": "time_series", 256 | "interval": "", 257 | "intervalFactor": 1, 258 | "legendFormat": "{{kubernetes_node}}:read", 259 | "refId": "A" 260 | }, 261 | { 262 | "expr": "sum(increase(node_disk_written_bytes_total[1m])) by (kubernetes_node)", 263 | "format": "time_series", 264 | "hide": false, 265 | "interval": "", 266 | "intervalFactor": 1, 267 | "legendFormat": "{{kubernetes_node}}:write", 268 | "refId": "B" 269 | } 270 | ], 271 | "thresholds": [], 272 | "timeFrom": null, 273 | "timeRegions": [], 274 | "timeShift": null, 275 | "title": "Disk I/O Per Node", 276 | "tooltip": { 277 | "shared": true, 278 | "sort": 0, 279 | "value_type": "individual" 280 | }, 281 | "type": "graph", 282 | "xaxis": { 283 | "buckets": null, 284 | "mode": "time", 285 | "name": null, 286 | "show": true, 287 | "values": [] 288 | }, 289 | "yaxes": [ 290 | { 291 | "decimals": 0, 292 | "format": "bytes", 293 | "label": null, 294 | "logBase": 1, 295 | "max": null, 296 | "min": null, 297 | "show": true 298 | }, 299 | { 300 | "format": "short", 301 | "label": null, 302 | "logBase": 1, 303 | "max": null, 304 | "min": null, 305 | "show": true 306 | } 307 | ], 308 | "yaxis": { 309 | "align": false, 310 | "alignLevel": null 311 | } 312 | }, 313 | { 314 | "aliasColors": {}, 315 | "bars": false, 316 | "dashLength": 10, 317 | "dashes": false, 318 | "fill": 1, 319 | "gridPos": { 320 | "h": 6, 321 | "w": 5, 322 | "x": 14, 323 | "y": 0 324 | }, 325 | "id": 14, 326 | "legend": { 327 | "avg": false, 328 | "current": false, 329 | "max": false, 330 | "min": false, 331 | "show": false, 332 | "total": false, 333 | "values": false 334 | }, 335 | "lines": true, 336 | "linewidth": 1, 337 | "links": [], 338 | "nullPointMode": "null", 339 | "percentage": false, 340 | "pointradius": 5, 341 | "points": false, 342 | "renderer": "flot", 343 | "seriesOverrides": [], 344 | "spaceLength": 10, 345 | "stack": false, 346 | "steppedLine": false, 347 | "targets": [ 348 | { 349 | "expr": "sum(increase(node_network_receive_bytes_total[1m])) by (kubernetes_node)", 350 | "format": "time_series", 351 | "interval": "", 352 | "intervalFactor": 1, 353 | "legendFormat": "{{kubernetes_node}}:receive", 354 | "refId": "A" 355 | }, 356 | { 357 | "expr": "sum(increase(node_network_transmit_bytes_total[1m])) by (kubernetes_node)", 358 | "format": "time_series", 359 | "interval": "", 360 | "intervalFactor": 1, 361 | "legendFormat": "{{kubernetes_node}}:transmit", 362 | "refId": "B" 363 | } 364 | ], 365 | "thresholds": [], 366 | "timeFrom": null, 367 | "timeRegions": [], 368 | "timeShift": null, 369 | "title": "Network Traffic Per Node", 370 | "tooltip": { 371 | "shared": true, 372 | "sort": 0, 373 | "value_type": "individual" 374 | }, 375 | "type": "graph", 376 | "xaxis": { 377 | "buckets": null, 378 | "mode": "time", 379 | "name": null, 380 | "show": true, 381 | "values": [] 382 | }, 383 | "yaxes": [ 384 | { 385 | "decimals": 0, 386 | "format": "bytes", 387 | "label": null, 388 | "logBase": 1, 389 | "max": null, 390 | "min": null, 391 | "show": true 392 | }, 393 | { 394 | "format": "short", 395 | "label": null, 396 | "logBase": 1, 397 | "max": null, 398 | "min": null, 399 | "show": true 400 | } 401 | ], 402 | "yaxis": { 403 | "align": false, 404 | "alignLevel": null 405 | } 406 | }, 407 | { 408 | "aliasColors": {}, 409 | "bars": false, 410 | "dashLength": 10, 411 | "dashes": false, 412 | "fill": 1, 413 | "gridPos": { 414 | "h": 7, 415 | "w": 7, 416 | "x": 0, 417 | "y": 6 418 | }, 419 | "id": 22, 420 | "legend": { 421 | "avg": false, 422 | "current": false, 423 | "max": false, 424 | "min": false, 425 | "show": true, 426 | "total": false, 427 | "values": false 428 | }, 429 | "lines": true, 430 | "linewidth": 1, 431 | "links": [], 432 | "nullPointMode": "null", 433 | "percentage": false, 434 | "pointradius": 5, 435 | "points": false, 436 | "renderer": "flot", 437 | "seriesOverrides": [], 438 | "spaceLength": 10, 439 | "stack": false, 440 | "steppedLine": false, 441 | "targets": [ 442 | { 443 | "expr": "increase(consul_client_rpc{app=\"consul\",component=\"client\"}[1m])", 444 | "format": "time_series", 445 | "interval": "", 446 | "intervalFactor": 1, 447 | "legendFormat": "{{kubernetes_pod_name}}", 448 | "refId": "A" 449 | } 450 | ], 451 | "thresholds": [], 452 | "timeFrom": null, 453 | "timeRegions": [], 454 | "timeShift": null, 455 | "title": "Client RPC Requests Per Minute", 456 | "tooltip": { 457 | "shared": true, 458 | "sort": 0, 459 | "value_type": "individual" 460 | }, 461 | "type": "graph", 462 | "xaxis": { 463 | "buckets": null, 464 | "mode": "time", 465 | "name": null, 466 | "show": true, 467 | "values": [] 468 | }, 469 | "yaxes": [ 470 | { 471 | "format": "short", 472 | "label": null, 473 | "logBase": 1, 474 | "max": null, 475 | "min": null, 476 | "show": true 477 | }, 478 | { 479 | "format": "short", 480 | "label": null, 481 | "logBase": 1, 482 | "max": null, 483 | "min": null, 484 | "show": true 485 | } 486 | ], 487 | "yaxis": { 488 | "align": false, 489 | "alignLevel": null 490 | } 491 | }, 492 | { 493 | "aliasColors": {}, 494 | "bars": false, 495 | "dashLength": 10, 496 | "dashes": false, 497 | "fill": 1, 498 | "gridPos": { 499 | "h": 7, 500 | "w": 7, 501 | "x": 7, 502 | "y": 6 503 | }, 504 | "id": 4, 505 | "legend": { 506 | "alignAsTable": false, 507 | "avg": false, 508 | "current": false, 509 | "max": false, 510 | "min": false, 511 | "show": true, 512 | "total": false, 513 | "values": false 514 | }, 515 | "lines": true, 516 | "linewidth": 1, 517 | "links": [], 518 | "nullPointMode": "null", 519 | "percentage": false, 520 | "pointradius": 5, 521 | "points": false, 522 | "renderer": "flot", 523 | "seriesOverrides": [], 524 | "spaceLength": 10, 525 | "stack": false, 526 | "steppedLine": false, 527 | "targets": [ 528 | { 529 | "expr": "increase(consul_rpc_request{app=\"consul\",component=\"server\"}[1m])", 530 | "format": "time_series", 531 | "intervalFactor": 1, 532 | "legendFormat": "{{kubernetes_pod_name}}", 533 | "refId": "A" 534 | } 535 | ], 536 | "thresholds": [], 537 | "timeFrom": null, 538 | "timeRegions": [], 539 | "timeShift": null, 540 | "title": "Server RPC Requests Per Minute", 541 | "tooltip": { 542 | "shared": true, 543 | "sort": 0, 544 | "value_type": "individual" 545 | }, 546 | "type": "graph", 547 | "xaxis": { 548 | "buckets": null, 549 | "mode": "time", 550 | "name": null, 551 | "show": true, 552 | "values": [] 553 | }, 554 | "yaxes": [ 555 | { 556 | "format": "short", 557 | "label": "Count", 558 | "logBase": 1, 559 | "max": null, 560 | "min": null, 561 | "show": true 562 | }, 563 | { 564 | "format": "short", 565 | "label": null, 566 | "logBase": 1, 567 | "max": null, 568 | "min": null, 569 | "show": true 570 | } 571 | ], 572 | "yaxis": { 573 | "align": false, 574 | "alignLevel": null 575 | } 576 | }, 577 | { 578 | "aliasColors": {}, 579 | "bars": false, 580 | "dashLength": 10, 581 | "dashes": false, 582 | "fill": 1, 583 | "gridPos": { 584 | "h": 7, 585 | "w": 5, 586 | "x": 14, 587 | "y": 6 588 | }, 589 | "id": 12, 590 | "legend": { 591 | "avg": false, 592 | "current": false, 593 | "max": false, 594 | "min": false, 595 | "show": false, 596 | "total": false, 597 | "values": false 598 | }, 599 | "lines": true, 600 | "linewidth": 1, 601 | "links": [], 602 | "nullPointMode": "null", 603 | "percentage": false, 604 | "pointradius": 5, 605 | "points": false, 606 | "renderer": "flot", 607 | "seriesOverrides": [ 608 | { 609 | "alias": "/^sys-/", 610 | "dashes": true, 611 | "fill": 0 612 | } 613 | ], 614 | "spaceLength": 10, 615 | "stack": false, 616 | "steppedLine": false, 617 | "targets": [ 618 | { 619 | "expr": "go_memstats_alloc_bytes{app=\"consul\",component=\"server\"}", 620 | "format": "time_series", 621 | "interval": "", 622 | "intervalFactor": 1, 623 | "legendFormat": "{{kubernetes_pod_name}}:alloc", 624 | "refId": "A" 625 | }, 626 | { 627 | "expr": "go_memstats_sys_bytes{app=\"consul\",component=\"server\"}", 628 | "format": "time_series", 629 | "intervalFactor": 1, 630 | "legendFormat": "{{kubernetes_pod_name}}:sys", 631 | "refId": "B" 632 | } 633 | ], 634 | "thresholds": [], 635 | "timeFrom": null, 636 | "timeRegions": [], 637 | "timeShift": null, 638 | "title": "Allocated Memory", 639 | "tooltip": { 640 | "shared": true, 641 | "sort": 0, 642 | "value_type": "individual" 643 | }, 644 | "type": "graph", 645 | "xaxis": { 646 | "buckets": null, 647 | "mode": "time", 648 | "name": null, 649 | "show": true, 650 | "values": [] 651 | }, 652 | "yaxes": [ 653 | { 654 | "decimals": 0, 655 | "format": "bytes", 656 | "label": null, 657 | "logBase": 1, 658 | "max": null, 659 | "min": null, 660 | "show": true 661 | }, 662 | { 663 | "format": "short", 664 | "label": null, 665 | "logBase": 1, 666 | "max": null, 667 | "min": null, 668 | "show": true 669 | } 670 | ], 671 | "yaxis": { 672 | "align": false, 673 | "alignLevel": null 674 | } 675 | }, 676 | { 677 | "aliasColors": {}, 678 | "bars": true, 679 | "dashLength": 10, 680 | "dashes": false, 681 | "fill": 1, 682 | "gridPos": { 683 | "h": 7, 684 | "w": 7, 685 | "x": 0, 686 | "y": 13 687 | }, 688 | "id": 8, 689 | "legend": { 690 | "avg": false, 691 | "current": false, 692 | "max": false, 693 | "min": false, 694 | "show": true, 695 | "total": false, 696 | "values": false 697 | }, 698 | "lines": false, 699 | "linewidth": 1, 700 | "links": [], 701 | "nullPointMode": "null", 702 | "percentage": false, 703 | "pointradius": 5, 704 | "points": false, 705 | "renderer": "flot", 706 | "seriesOverrides": [ 707 | { 708 | "alias": "time-99th-percentile", 709 | "bars": false, 710 | "lines": true, 711 | "yaxis": 2 712 | } 713 | ], 714 | "spaceLength": 10, 715 | "stack": false, 716 | "steppedLine": false, 717 | "targets": [ 718 | { 719 | "expr": "sum(increase(consul_raft_apply[1m]))", 720 | "format": "time_series", 721 | "instant": false, 722 | "interval": "", 723 | "intervalFactor": 1, 724 | "legendFormat": "count", 725 | "refId": "A" 726 | }, 727 | { 728 | "expr": "max(consul_raft_commitTime{quantile=\"0.99\"}) ", 729 | "format": "time_series", 730 | "interval": "", 731 | "intervalFactor": 1, 732 | "legendFormat": "time-99th-percentile", 733 | "refId": "B" 734 | } 735 | ], 736 | "thresholds": [], 737 | "timeFrom": null, 738 | "timeRegions": [], 739 | "timeShift": null, 740 | "title": "Raft Transactions", 741 | "tooltip": { 742 | "shared": true, 743 | "sort": 0, 744 | "value_type": "individual" 745 | }, 746 | "type": "graph", 747 | "xaxis": { 748 | "buckets": null, 749 | "mode": "time", 750 | "name": null, 751 | "show": true, 752 | "values": [] 753 | }, 754 | "yaxes": [ 755 | { 756 | "format": "short", 757 | "label": "Count", 758 | "logBase": 1, 759 | "max": null, 760 | "min": null, 761 | "show": true 762 | }, 763 | { 764 | "format": "ms", 765 | "label": "", 766 | "logBase": 1, 767 | "max": null, 768 | "min": null, 769 | "show": true 770 | } 771 | ], 772 | "yaxis": { 773 | "align": false, 774 | "alignLevel": null 775 | } 776 | }, 777 | { 778 | "aliasColors": {}, 779 | "bars": false, 780 | "dashLength": 10, 781 | "dashes": false, 782 | "fill": 1, 783 | "gridPos": { 784 | "h": 7, 785 | "w": 7, 786 | "x": 7, 787 | "y": 13 788 | }, 789 | "id": 6, 790 | "legend": { 791 | "avg": false, 792 | "current": false, 793 | "max": false, 794 | "min": false, 795 | "show": true, 796 | "total": false, 797 | "values": false 798 | }, 799 | "lines": true, 800 | "linewidth": 1, 801 | "links": [], 802 | "nullPointMode": "null", 803 | "percentage": false, 804 | "pointradius": 5, 805 | "points": false, 806 | "renderer": "flot", 807 | "seriesOverrides": [], 808 | "spaceLength": 10, 809 | "stack": false, 810 | "steppedLine": false, 811 | "targets": [ 812 | { 813 | "expr": "consul_raft_commitTime{app=\"consul\",quantile=\"0.99\"}", 814 | "format": "time_series", 815 | "intervalFactor": 1, 816 | "legendFormat": "{{kubernetes_pod_name}}", 817 | "refId": "A" 818 | } 819 | ], 820 | "thresholds": [], 821 | "timeFrom": null, 822 | "timeRegions": [], 823 | "timeShift": null, 824 | "title": "Raft Commit Time", 825 | "tooltip": { 826 | "shared": true, 827 | "sort": 0, 828 | "value_type": "individual" 829 | }, 830 | "type": "graph", 831 | "xaxis": { 832 | "buckets": null, 833 | "mode": "time", 834 | "name": null, 835 | "show": true, 836 | "values": [] 837 | }, 838 | "yaxes": [ 839 | { 840 | "decimals": 0, 841 | "format": "ms", 842 | "label": "", 843 | "logBase": 1, 844 | "max": null, 845 | "min": null, 846 | "show": true 847 | }, 848 | { 849 | "format": "short", 850 | "label": null, 851 | "logBase": 1, 852 | "max": null, 853 | "min": null, 854 | "show": true 855 | } 856 | ], 857 | "yaxis": { 858 | "align": false, 859 | "alignLevel": null 860 | } 861 | }, 862 | { 863 | "aliasColors": {}, 864 | "bars": false, 865 | "dashLength": 10, 866 | "dashes": false, 867 | "fill": 1, 868 | "gridPos": { 869 | "h": 7, 870 | "w": 5, 871 | "x": 14, 872 | "y": 13 873 | }, 874 | "id": 10, 875 | "legend": { 876 | "avg": false, 877 | "current": false, 878 | "max": false, 879 | "min": false, 880 | "show": true, 881 | "total": false, 882 | "values": false 883 | }, 884 | "lines": true, 885 | "linewidth": 1, 886 | "links": [], 887 | "nullPointMode": "null", 888 | "percentage": false, 889 | "pointradius": 5, 890 | "points": false, 891 | "renderer": "flot", 892 | "seriesOverrides": [], 893 | "spaceLength": 10, 894 | "stack": false, 895 | "steppedLine": false, 896 | "targets": [ 897 | { 898 | "expr": "go_gc_duration_seconds{app=\"consul\",component=\"server\",quantile=\"1\"}", 899 | "format": "time_series", 900 | "interval": "", 901 | "intervalFactor": 1, 902 | "legendFormat": "{{kubernetes_pod_name}}", 903 | "refId": "A" 904 | } 905 | ], 906 | "thresholds": [], 907 | "timeFrom": null, 908 | "timeRegions": [], 909 | "timeShift": null, 910 | "title": "GC Time", 911 | "tooltip": { 912 | "shared": true, 913 | "sort": 0, 914 | "value_type": "individual" 915 | }, 916 | "type": "graph", 917 | "xaxis": { 918 | "buckets": null, 919 | "mode": "time", 920 | "name": null, 921 | "show": true, 922 | "values": [] 923 | }, 924 | "yaxes": [ 925 | { 926 | "decimals": 3, 927 | "format": "s", 928 | "label": "", 929 | "logBase": 1, 930 | "max": null, 931 | "min": null, 932 | "show": true 933 | }, 934 | { 935 | "format": "short", 936 | "label": null, 937 | "logBase": 1, 938 | "max": null, 939 | "min": null, 940 | "show": true 941 | } 942 | ], 943 | "yaxis": { 944 | "align": false, 945 | "alignLevel": null 946 | } 947 | }, 948 | { 949 | "aliasColors": {}, 950 | "bars": false, 951 | "dashLength": 10, 952 | "dashes": false, 953 | "fill": 1, 954 | "gridPos": { 955 | "h": 6, 956 | "w": 7, 957 | "x": 0, 958 | "y": 20 959 | }, 960 | "id": 24, 961 | "legend": { 962 | "avg": false, 963 | "current": false, 964 | "max": false, 965 | "min": false, 966 | "show": true, 967 | "total": false, 968 | "values": false 969 | }, 970 | "lines": true, 971 | "linewidth": 1, 972 | "links": [], 973 | "nullPointMode": "null", 974 | "percentage": false, 975 | "pointradius": 5, 976 | "points": false, 977 | "renderer": "flot", 978 | "seriesOverrides": [], 979 | "spaceLength": 10, 980 | "stack": false, 981 | "steppedLine": false, 982 | "targets": [ 983 | { 984 | "expr": "increase(consul_kvs_apply_count[1m])", 985 | "format": "time_series", 986 | "interval": "", 987 | "intervalFactor": 1, 988 | "legendFormat": "count", 989 | "refId": "A" 990 | } 991 | ], 992 | "thresholds": [], 993 | "timeFrom": null, 994 | "timeRegions": [], 995 | "timeShift": null, 996 | "title": "KV Write Load", 997 | "tooltip": { 998 | "shared": true, 999 | "sort": 0, 1000 | "value_type": "individual" 1001 | }, 1002 | "type": "graph", 1003 | "xaxis": { 1004 | "buckets": null, 1005 | "mode": "time", 1006 | "name": null, 1007 | "show": true, 1008 | "values": [] 1009 | }, 1010 | "yaxes": [ 1011 | { 1012 | "format": "short", 1013 | "label": null, 1014 | "logBase": 1, 1015 | "max": null, 1016 | "min": null, 1017 | "show": true 1018 | }, 1019 | { 1020 | "format": "short", 1021 | "label": null, 1022 | "logBase": 1, 1023 | "max": null, 1024 | "min": null, 1025 | "show": true 1026 | } 1027 | ], 1028 | "yaxis": { 1029 | "align": false, 1030 | "alignLevel": null 1031 | } 1032 | }, 1033 | { 1034 | "aliasColors": {}, 1035 | "bars": false, 1036 | "dashLength": 10, 1037 | "dashes": false, 1038 | "fill": 1, 1039 | "gridPos": { 1040 | "h": 6, 1041 | "w": 7, 1042 | "x": 7, 1043 | "y": 20 1044 | }, 1045 | "id": 25, 1046 | "legend": { 1047 | "avg": false, 1048 | "current": false, 1049 | "max": false, 1050 | "min": false, 1051 | "show": true, 1052 | "total": false, 1053 | "values": false 1054 | }, 1055 | "lines": true, 1056 | "linewidth": 1, 1057 | "links": [], 1058 | "nullPointMode": "null", 1059 | "percentage": false, 1060 | "pointradius": 5, 1061 | "points": false, 1062 | "renderer": "flot", 1063 | "seriesOverrides": [], 1064 | "spaceLength": 10, 1065 | "stack": false, 1066 | "steppedLine": false, 1067 | "targets": [ 1068 | { 1069 | "expr": "consul_kvs_apply{quantile=\"0.99\"}", 1070 | "format": "time_series", 1071 | "interval": "", 1072 | "intervalFactor": 1, 1073 | "legendFormat": "time", 1074 | "refId": "A" 1075 | } 1076 | ], 1077 | "thresholds": [], 1078 | "timeFrom": null, 1079 | "timeRegions": [], 1080 | "timeShift": null, 1081 | "title": "KVS Apply Time", 1082 | "tooltip": { 1083 | "shared": true, 1084 | "sort": 0, 1085 | "value_type": "individual" 1086 | }, 1087 | "type": "graph", 1088 | "xaxis": { 1089 | "buckets": null, 1090 | "mode": "time", 1091 | "name": null, 1092 | "show": true, 1093 | "values": [] 1094 | }, 1095 | "yaxes": [ 1096 | { 1097 | "decimals": 0, 1098 | "format": "ms", 1099 | "label": "", 1100 | "logBase": 1, 1101 | "max": null, 1102 | "min": null, 1103 | "show": true 1104 | }, 1105 | { 1106 | "format": "short", 1107 | "label": null, 1108 | "logBase": 1, 1109 | "max": null, 1110 | "min": null, 1111 | "show": true 1112 | } 1113 | ], 1114 | "yaxis": { 1115 | "align": false, 1116 | "alignLevel": null 1117 | } 1118 | }, 1119 | { 1120 | "cacheTimeout": null, 1121 | "colorBackground": true, 1122 | "colorValue": false, 1123 | "colors": [ 1124 | "#299c46", 1125 | "rgba(237, 129, 40, 0.89)", 1126 | "#d44a3a" 1127 | ], 1128 | "format": "none", 1129 | "gauge": { 1130 | "maxValue": 100, 1131 | "minValue": 0, 1132 | "show": false, 1133 | "thresholdLabels": false, 1134 | "thresholdMarkers": true 1135 | }, 1136 | "gridPos": { 1137 | "h": 6, 1138 | "w": 5, 1139 | "x": 14, 1140 | "y": 20 1141 | }, 1142 | "id": 20, 1143 | "interval": null, 1144 | "links": [], 1145 | "mappingType": 1, 1146 | "mappingTypes": [ 1147 | { 1148 | "name": "value to text", 1149 | "value": 1 1150 | }, 1151 | { 1152 | "name": "range to text", 1153 | "value": 2 1154 | } 1155 | ], 1156 | "maxDataPoints": 100, 1157 | "nullPointMode": "connected", 1158 | "nullText": null, 1159 | "postfix": "", 1160 | "postfixFontSize": "50%", 1161 | "prefix": "", 1162 | "prefixFontSize": "50%", 1163 | "rangeMaps": [ 1164 | { 1165 | "from": "null", 1166 | "text": "N/A", 1167 | "to": "null" 1168 | } 1169 | ], 1170 | "sparkline": { 1171 | "fillColor": "rgba(31, 118, 189, 0.18)", 1172 | "full": false, 1173 | "lineColor": "rgb(0, 0, 0)", 1174 | "show": true 1175 | }, 1176 | "tableColumn": "", 1177 | "targets": [ 1178 | { 1179 | "expr": "increase(consul_raft_state_leader[1h])", 1180 | "format": "time_series", 1181 | "intervalFactor": 1, 1182 | "legendFormat": "", 1183 | "refId": "A" 1184 | } 1185 | ], 1186 | "thresholds": "1,5", 1187 | "title": "Leader Elections", 1188 | "type": "singlestat", 1189 | "valueFontSize": "80%", 1190 | "valueMaps": [ 1191 | { 1192 | "op": "=", 1193 | "text": "N/A", 1194 | "value": "null" 1195 | }, 1196 | { 1197 | "op": "=", 1198 | "text": "None :-)", 1199 | "value": "0" 1200 | } 1201 | ], 1202 | "valueName": "total" 1203 | } 1204 | ], 1205 | "refresh": false, 1206 | "schemaVersion": 16, 1207 | "style": "dark", 1208 | "tags": [], 1209 | "templating": { 1210 | "list": [] 1211 | }, 1212 | "time": { 1213 | "from": "now-1h", 1214 | "to": "now" 1215 | }, 1216 | "timepicker": { 1217 | "refresh_intervals": [ 1218 | "5s", 1219 | "10s", 1220 | "30s", 1221 | "1m", 1222 | "5m", 1223 | "15m", 1224 | "30m", 1225 | "1h", 1226 | "2h", 1227 | "1d" 1228 | ], 1229 | "time_options": [ 1230 | "5m", 1231 | "15m", 1232 | "1h", 1233 | "6h", 1234 | "12h", 1235 | "24h", 1236 | "2d", 1237 | "7d", 1238 | "30d" 1239 | ] 1240 | }, 1241 | "timezone": "", 1242 | "title": "Consul Cluster Health", 1243 | "uid": "-ijohiuik", 1244 | "version": 1 1245 | } -------------------------------------------------------------------------------- /prometheus/default-values.yaml: -------------------------------------------------------------------------------- 1 | rbac: 2 | create: true 3 | 4 | imagePullSecrets: 5 | # - name: "image-pull-secret" 6 | 7 | ## Define serviceAccount names for components. Defaults to component's fully qualified name. 8 | ## 9 | serviceAccounts: 10 | alertmanager: 11 | create: true 12 | name: 13 | kubeStateMetrics: 14 | create: true 15 | name: 16 | nodeExporter: 17 | create: true 18 | name: 19 | pushgateway: 20 | create: true 21 | name: 22 | server: 23 | create: true 24 | name: 25 | 26 | alertmanager: 27 | ## If false, alertmanager will not be installed 28 | ## 29 | enabled: true 30 | 31 | ## alertmanager container name 32 | ## 33 | name: alertmanager 34 | 35 | ## alertmanager container image 36 | ## 37 | image: 38 | repository: prom/alertmanager 39 | tag: v0.15.3 40 | pullPolicy: IfNotPresent 41 | 42 | ## alertmanager priorityClassName 43 | ## 44 | priorityClassName: "" 45 | 46 | ## Additional alertmanager container arguments 47 | ## 48 | extraArgs: {} 49 | 50 | ## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug 51 | ## so that the various internal URLs are still able to access as they are in the default case. 52 | ## (Optional) 53 | prefixURL: "" 54 | 55 | ## External URL which can access alertmanager 56 | ## Maybe same with Ingress host name 57 | baseURL: "/" 58 | 59 | ## Additional alertmanager container environment variable 60 | ## For instance to add a http_proxy 61 | ## 62 | extraEnv: {} 63 | 64 | ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.alertmanager.configMapOverrideName}} 65 | ## Defining configMapOverrideName will cause templates/alertmanager-configmap.yaml 66 | ## to NOT generate a ConfigMap resource 67 | ## 68 | configMapOverrideName: "" 69 | 70 | ## The name of a secret in the same kubernetes namespace which contains the Alertmanager config 71 | ## Defining configFromSecret will cause templates/alertmanager-configmap.yaml 72 | ## to NOT generate a ConfigMap resource 73 | ## 74 | configFromSecret: "" 75 | 76 | ## The configuration file name to be loaded to alertmanager 77 | ## Must match the key within configuration loaded from ConfigMap/Secret 78 | ## 79 | configFileName: alertmanager.yml 80 | 81 | ingress: 82 | ## If true, alertmanager Ingress will be created 83 | ## 84 | enabled: false 85 | 86 | ## alertmanager Ingress annotations 87 | ## 88 | annotations: {} 89 | # kubernetes.io/ingress.class: nginx 90 | # kubernetes.io/tls-acme: 'true' 91 | 92 | ## alertmanager Ingress additional labels 93 | ## 94 | extraLabels: {} 95 | 96 | ## alertmanager Ingress hostnames with optional path 97 | ## Must be provided if Ingress is enabled 98 | ## 99 | hosts: [] 100 | # - alertmanager.domain.com 101 | # - domain.com/alertmanager 102 | 103 | ## alertmanager Ingress TLS configuration 104 | ## Secrets must be manually created in the namespace 105 | ## 106 | tls: [] 107 | # - secretName: prometheus-alerts-tls 108 | # hosts: 109 | # - alertmanager.domain.com 110 | 111 | ## Alertmanager Deployment Strategy type 112 | # strategy: 113 | # type: Recreate 114 | 115 | ## Node tolerations for alertmanager scheduling to nodes with taints 116 | ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ 117 | ## 118 | tolerations: [] 119 | # - key: "key" 120 | # operator: "Equal|Exists" 121 | # value: "value" 122 | # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" 123 | 124 | ## Node labels for alertmanager pod assignment 125 | ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ 126 | ## 127 | nodeSelector: {} 128 | 129 | ## Pod affinity 130 | ## 131 | affinity: {} 132 | 133 | ## Use an alternate scheduler, e.g. "stork". 134 | ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ 135 | ## 136 | # schedulerName: 137 | 138 | persistentVolume: 139 | ## If true, alertmanager will create/use a Persistent Volume Claim 140 | ## If false, use emptyDir 141 | ## 142 | enabled: true 143 | 144 | ## alertmanager data Persistent Volume access modes 145 | ## Must match those of existing PV or dynamic provisioner 146 | ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ 147 | ## 148 | accessModes: 149 | - ReadWriteOnce 150 | 151 | ## alertmanager data Persistent Volume Claim annotations 152 | ## 153 | annotations: {} 154 | 155 | ## alertmanager data Persistent Volume existing claim name 156 | ## Requires alertmanager.persistentVolume.enabled: true 157 | ## If defined, PVC must be created manually before volume will be bound 158 | existingClaim: "" 159 | 160 | ## alertmanager data Persistent Volume mount root path 161 | ## 162 | mountPath: /data 163 | 164 | ## alertmanager data Persistent Volume size 165 | ## 166 | size: 2Gi 167 | 168 | ## alertmanager data Persistent Volume Storage Class 169 | ## If defined, storageClassName: 170 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 171 | ## If undefined (the default) or set to null, no storageClassName spec is 172 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 173 | ## GKE, AWS & OpenStack) 174 | ## 175 | # storageClass: "-" 176 | 177 | ## Subdirectory of alertmanager data Persistent Volume to mount 178 | ## Useful if the volume's root directory is not empty 179 | ## 180 | subPath: "" 181 | 182 | ## Annotations to be added to alertmanager pods 183 | ## 184 | podAnnotations: {} 185 | 186 | ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below) 187 | ## 188 | replicaCount: 1 189 | 190 | statefulSet: 191 | ## If true, use a statefulset instead of a deployment for pod management. 192 | ## This allows to scale replicas to more than 1 pod 193 | ## 194 | enabled: false 195 | 196 | podManagementPolicy: OrderedReady 197 | 198 | ## Alertmanager headless service to use for the statefulset 199 | ## 200 | headless: 201 | annotations: {} 202 | labels: {} 203 | 204 | ## Enabling peer mesh service end points for enabling the HA alert manager 205 | ## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md 206 | # enableMeshPeer : true 207 | 208 | servicePort: 80 209 | 210 | ## alertmanager resource requests and limits 211 | ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ 212 | ## 213 | resources: {} 214 | # limits: 215 | # cpu: 10m 216 | # memory: 32Mi 217 | # requests: 218 | # cpu: 10m 219 | # memory: 32Mi 220 | 221 | ## Security context to be added to alertmanager pods 222 | ## 223 | securityContext: {} 224 | 225 | service: 226 | annotations: {} 227 | labels: {} 228 | clusterIP: "" 229 | 230 | ## Enabling peer mesh service end points for enabling the HA alert manager 231 | ## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md 232 | # enableMeshPeer : true 233 | 234 | ## List of IP addresses at which the alertmanager service is available 235 | ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips 236 | ## 237 | externalIPs: [] 238 | 239 | loadBalancerIP: "" 240 | loadBalancerSourceRanges: [] 241 | servicePort: 80 242 | # nodePort: 30000 243 | type: ClusterIP 244 | 245 | ## Monitors ConfigMap changes and POSTs to a URL 246 | ## Ref: https://github.com/jimmidyson/configmap-reload 247 | ## 248 | configmapReload: 249 | ## configmap-reload container name 250 | ## 251 | name: configmap-reload 252 | 253 | ## configmap-reload container image 254 | ## 255 | image: 256 | repository: jimmidyson/configmap-reload 257 | tag: v0.2.2 258 | pullPolicy: IfNotPresent 259 | 260 | ## Additional configmap-reload container arguments 261 | ## 262 | extraArgs: {} 263 | ## Additional configmap-reload volume directories 264 | ## 265 | extraVolumeDirs: [] 266 | 267 | 268 | ## Additional configmap-reload mounts 269 | ## 270 | extraConfigmapMounts: [] 271 | # - name: prometheus-alerts 272 | # mountPath: /etc/alerts.d 273 | # subPath: "" 274 | # configMap: prometheus-alerts 275 | # readOnly: true 276 | 277 | 278 | ## configmap-reload resource requests and limits 279 | ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ 280 | ## 281 | resources: {} 282 | 283 | initChownData: 284 | ## If false, data ownership will not be reset at startup 285 | ## This allows the prometheus-server to be run with an arbitrary user 286 | ## 287 | enabled: true 288 | 289 | ## initChownData container name 290 | ## 291 | name: init-chown-data 292 | 293 | ## initChownData container image 294 | ## 295 | image: 296 | repository: busybox 297 | tag: latest 298 | pullPolicy: IfNotPresent 299 | 300 | ## initChownData resource requests and limits 301 | ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ 302 | ## 303 | resources: {} 304 | 305 | kubeStateMetrics: 306 | ## If false, kube-state-metrics will not be installed 307 | ## 308 | enabled: true 309 | 310 | ## kube-state-metrics container name 311 | ## 312 | name: kube-state-metrics 313 | 314 | ## kube-state-metrics container image 315 | ## 316 | image: 317 | repository: quay.io/coreos/kube-state-metrics 318 | tag: v1.5.0 319 | pullPolicy: IfNotPresent 320 | 321 | ## kube-state-metrics priorityClassName 322 | ## 323 | priorityClassName: "" 324 | 325 | ## kube-state-metrics container arguments 326 | ## 327 | args: {} 328 | 329 | ## Node tolerations for kube-state-metrics scheduling to nodes with taints 330 | ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ 331 | ## 332 | tolerations: [] 333 | # - key: "key" 334 | # operator: "Equal|Exists" 335 | # value: "value" 336 | # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" 337 | 338 | ## Node labels for kube-state-metrics pod assignment 339 | ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ 340 | ## 341 | nodeSelector: {} 342 | 343 | ## Annotations to be added to kube-state-metrics pods 344 | ## 345 | podAnnotations: {} 346 | 347 | pod: 348 | labels: {} 349 | 350 | replicaCount: 1 351 | 352 | ## kube-state-metrics resource requests and limits 353 | ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ 354 | ## 355 | resources: {} 356 | # limits: 357 | # cpu: 10m 358 | # memory: 16Mi 359 | # requests: 360 | # cpu: 10m 361 | # memory: 16Mi 362 | 363 | ## Security context to be added to kube-state-metrics pods 364 | ## 365 | securityContext: {} 366 | 367 | service: 368 | annotations: 369 | prometheus.io/scrape: "true" 370 | labels: {} 371 | 372 | # Exposed as a headless service: 373 | # https://kubernetes.io/docs/concepts/services-networking/service/#headless-services 374 | clusterIP: None 375 | 376 | ## List of IP addresses at which the kube-state-metrics service is available 377 | ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips 378 | ## 379 | externalIPs: [] 380 | 381 | loadBalancerIP: "" 382 | loadBalancerSourceRanges: [] 383 | servicePort: 80 384 | type: ClusterIP 385 | 386 | nodeExporter: 387 | ## If false, node-exporter will not be installed 388 | ## 389 | enabled: true 390 | 391 | ## If true, node-exporter pods share the host network namespace 392 | ## 393 | hostNetwork: true 394 | 395 | ## If true, node-exporter pods share the host PID namespace 396 | ## 397 | hostPID: true 398 | 399 | ## node-exporter container name 400 | ## 401 | name: node-exporter 402 | 403 | ## node-exporter container image 404 | ## 405 | image: 406 | repository: prom/node-exporter 407 | tag: v0.17.0 408 | pullPolicy: IfNotPresent 409 | 410 | ## Specify if a Pod Security Policy for node-exporter must be created 411 | ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ 412 | ## 413 | podSecurityPolicy: 414 | enabled: False 415 | annotations: {} 416 | ## Specify pod annotations 417 | ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor 418 | ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp 419 | ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl 420 | ## 421 | # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' 422 | # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' 423 | # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' 424 | 425 | ## node-exporter priorityClassName 426 | ## 427 | priorityClassName: "" 428 | 429 | ## Custom Update Strategy 430 | ## 431 | updateStrategy: 432 | type: RollingUpdate 433 | 434 | ## Additional node-exporter container arguments 435 | ## 436 | extraArgs: {} 437 | 438 | ## Additional node-exporter hostPath mounts 439 | ## 440 | extraHostPathMounts: [] 441 | # - name: textfile-dir 442 | # mountPath: /srv/txt_collector 443 | # hostPath: /var/lib/node-exporter 444 | # readOnly: true 445 | 446 | extraConfigmapMounts: [] 447 | # - name: certs-configmap 448 | # mountPath: /prometheus 449 | # configMap: certs-configmap 450 | # readOnly: true 451 | 452 | ## Node tolerations for node-exporter scheduling to nodes with taints 453 | ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ 454 | ## 455 | tolerations: [] 456 | # - key: "key" 457 | # operator: "Equal|Exists" 458 | # value: "value" 459 | # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" 460 | 461 | ## Node labels for node-exporter pod assignment 462 | ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ 463 | ## 464 | nodeSelector: {} 465 | 466 | ## Annotations to be added to node-exporter pods 467 | ## 468 | podAnnotations: {} 469 | 470 | ## Labels to be added to node-exporter pods 471 | ## 472 | pod: 473 | labels: {} 474 | 475 | ## node-exporter resource limits & requests 476 | ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ 477 | ## 478 | resources: {} 479 | # limits: 480 | # cpu: 200m 481 | # memory: 50Mi 482 | # requests: 483 | # cpu: 100m 484 | # memory: 30Mi 485 | 486 | ## Security context to be added to node-exporter pods 487 | ## 488 | securityContext: {} 489 | # runAsUser: 0 490 | 491 | service: 492 | annotations: 493 | prometheus.io/scrape: "true" 494 | labels: {} 495 | 496 | # Exposed as a headless service: 497 | # https://kubernetes.io/docs/concepts/services-networking/service/#headless-services 498 | clusterIP: None 499 | 500 | ## List of IP addresses at which the node-exporter service is available 501 | ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips 502 | ## 503 | externalIPs: [] 504 | 505 | hostPort: 9100 506 | loadBalancerIP: "" 507 | loadBalancerSourceRanges: [] 508 | servicePort: 9100 509 | type: ClusterIP 510 | 511 | server: 512 | ## Prometheus server container name 513 | ## 514 | name: server 515 | sidecarContainers: 516 | 517 | ## Prometheus server container image 518 | ## 519 | image: 520 | repository: prom/prometheus 521 | tag: v2.7.1 522 | pullPolicy: IfNotPresent 523 | 524 | ## prometheus server priorityClassName 525 | ## 526 | priorityClassName: "" 527 | 528 | ## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug 529 | ## so that the various internal URLs are still able to access as they are in the default case. 530 | ## (Optional) 531 | prefixURL: "" 532 | 533 | ## External URL which can access alertmanager 534 | ## Maybe same with Ingress host name 535 | baseURL: "" 536 | 537 | ## Additional server container environment variables 538 | ## 539 | ## You specify this manually like you would a raw deployment manifest. 540 | ## This means you can bind in environment variables from secrets. 541 | ## 542 | ## e.g. static environment variable: 543 | ## - name: DEMO_GREETING 544 | ## value: "Hello from the environment" 545 | ## 546 | ## e.g. secret environment variable: 547 | ## - name: USERNAME 548 | ## valueFrom: 549 | ## secretKeyRef: 550 | ## name: mysecret 551 | ## key: username 552 | env: {} 553 | 554 | ## This flag controls access to the administrative HTTP API which includes functionality such as deleting time 555 | ## series. This is disabled by default. 556 | enableAdminApi: false 557 | 558 | ## Path to a configuration file on prometheus server container FS 559 | configPath: /etc/config/prometheus.yml 560 | 561 | global: 562 | ## How frequently to scrape targets by default 563 | ## 564 | scrape_interval: 1m 565 | ## How long until a scrape request times out 566 | ## 567 | scrape_timeout: 10s 568 | ## How frequently to evaluate rules 569 | ## 570 | evaluation_interval: 1m 571 | 572 | ## Additional Prometheus server container arguments 573 | ## 574 | extraArgs: {} 575 | 576 | ## Additional Prometheus server Volume mounts 577 | ## 578 | extraVolumeMounts: [] 579 | 580 | ## Additional Prometheus server Volumes 581 | ## 582 | extraVolumes: [] 583 | 584 | ## Additional Prometheus server hostPath mounts 585 | ## 586 | extraHostPathMounts: [] 587 | # - name: certs-dir 588 | # mountPath: /etc/kubernetes/certs 589 | # subPath: "" 590 | # hostPath: /etc/kubernetes/certs 591 | # readOnly: true 592 | 593 | extraConfigmapMounts: [] 594 | # - name: certs-configmap 595 | # mountPath: /prometheus 596 | # subPath: "" 597 | # configMap: certs-configmap 598 | # readOnly: true 599 | 600 | ## Additional Prometheus server Secret mounts 601 | # Defines additional mounts with secrets. Secrets must be manually created in the namespace. 602 | extraSecretMounts: [] 603 | # - name: secret-files 604 | # mountPath: /etc/secrets 605 | # subPath: "" 606 | # secretName: prom-secret-files 607 | # readOnly: true 608 | 609 | ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}} 610 | ## Defining configMapOverrideName will cause templates/server-configmap.yaml 611 | ## to NOT generate a ConfigMap resource 612 | ## 613 | configMapOverrideName: "" 614 | 615 | ingress: 616 | ## If true, Prometheus server Ingress will be created 617 | ## 618 | enabled: false 619 | 620 | ## Prometheus server Ingress annotations 621 | ## 622 | annotations: {} 623 | # kubernetes.io/ingress.class: nginx 624 | # kubernetes.io/tls-acme: 'true' 625 | 626 | ## Prometheus server Ingress additional labels 627 | ## 628 | extraLabels: {} 629 | 630 | ## Prometheus server Ingress hostnames with optional path 631 | ## Must be provided if Ingress is enabled 632 | ## 633 | hosts: [] 634 | # - prometheus.domain.com 635 | # - domain.com/prometheus 636 | 637 | ## Prometheus server Ingress TLS configuration 638 | ## Secrets must be manually created in the namespace 639 | ## 640 | tls: [] 641 | # - secretName: prometheus-server-tls 642 | # hosts: 643 | # - prometheus.domain.com 644 | 645 | ## Server Deployment Strategy type 646 | # strategy: 647 | # type: Recreate 648 | 649 | ## Node tolerations for server scheduling to nodes with taints 650 | ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ 651 | ## 652 | tolerations: [] 653 | # - key: "key" 654 | # operator: "Equal|Exists" 655 | # value: "value" 656 | # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" 657 | 658 | ## Node labels for Prometheus server pod assignment 659 | ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ 660 | ## 661 | nodeSelector: {} 662 | 663 | ## Pod affinity 664 | ## 665 | affinity: {} 666 | 667 | ## Use an alternate scheduler, e.g. "stork". 668 | ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ 669 | ## 670 | # schedulerName: 671 | 672 | persistentVolume: 673 | ## If true, Prometheus server will create/use a Persistent Volume Claim 674 | ## If false, use emptyDir 675 | ## 676 | enabled: true 677 | 678 | ## Prometheus server data Persistent Volume access modes 679 | ## Must match those of existing PV or dynamic provisioner 680 | ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ 681 | ## 682 | accessModes: 683 | - ReadWriteOnce 684 | 685 | ## Prometheus server data Persistent Volume annotations 686 | ## 687 | annotations: {} 688 | 689 | ## Prometheus server data Persistent Volume existing claim name 690 | ## Requires server.persistentVolume.enabled: true 691 | ## If defined, PVC must be created manually before volume will be bound 692 | existingClaim: "" 693 | 694 | ## Prometheus server data Persistent Volume mount root path 695 | ## 696 | mountPath: /data 697 | 698 | ## Prometheus server data Persistent Volume size 699 | ## 700 | size: 8Gi 701 | 702 | ## Prometheus server data Persistent Volume Storage Class 703 | ## If defined, storageClassName: 704 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 705 | ## If undefined (the default) or set to null, no storageClassName spec is 706 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 707 | ## GKE, AWS & OpenStack) 708 | ## 709 | # storageClass: "-" 710 | 711 | ## Subdirectory of Prometheus server data Persistent Volume to mount 712 | ## Useful if the volume's root directory is not empty 713 | ## 714 | subPath: "" 715 | 716 | ## Annotations to be added to Prometheus server pods 717 | ## 718 | podAnnotations: {} 719 | # iam.amazonaws.com/role: prometheus 720 | 721 | ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below) 722 | ## 723 | replicaCount: 1 724 | 725 | statefulSet: 726 | ## If true, use a statefulset instead of a deployment for pod management. 727 | ## This allows to scale replicas to more than 1 pod 728 | ## 729 | enabled: false 730 | 731 | annotations: {} 732 | podManagementPolicy: OrderedReady 733 | 734 | ## Alertmanager headless service to use for the statefulset 735 | ## 736 | headless: 737 | annotations: {} 738 | labels: {} 739 | servicePort: 80 740 | 741 | ## Prometheus server resource requests and limits 742 | ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ 743 | ## 744 | resources: {} 745 | # limits: 746 | # cpu: 500m 747 | # memory: 512Mi 748 | # requests: 749 | # cpu: 500m 750 | # memory: 512Mi 751 | 752 | ## Security context to be added to server pods 753 | ## 754 | securityContext: {} 755 | 756 | service: 757 | annotations: {} 758 | labels: {} 759 | clusterIP: "" 760 | 761 | ## List of IP addresses at which the Prometheus server service is available 762 | ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips 763 | ## 764 | externalIPs: [] 765 | 766 | loadBalancerIP: "" 767 | loadBalancerSourceRanges: [] 768 | servicePort: 80 769 | type: ClusterIP 770 | 771 | ## Prometheus server pod termination grace period 772 | ## 773 | terminationGracePeriodSeconds: 300 774 | 775 | ## Prometheus data retention period (i.e 360h) 776 | ## 777 | retention: "" 778 | 779 | pushgateway: 780 | ## If false, pushgateway will not be installed 781 | ## 782 | enabled: true 783 | 784 | ## pushgateway container name 785 | ## 786 | name: pushgateway 787 | 788 | ## pushgateway container image 789 | ## 790 | image: 791 | repository: prom/pushgateway 792 | tag: v0.6.0 793 | pullPolicy: IfNotPresent 794 | 795 | ## pushgateway priorityClassName 796 | ## 797 | priorityClassName: "" 798 | 799 | ## Additional pushgateway container arguments 800 | ## 801 | ## for example: persistence.file: /data/pushgateway.data 802 | extraArgs: {} 803 | 804 | ingress: 805 | ## If true, pushgateway Ingress will be created 806 | ## 807 | enabled: false 808 | 809 | ## pushgateway Ingress annotations 810 | ## 811 | annotations: {} 812 | # kubernetes.io/ingress.class: nginx 813 | # kubernetes.io/tls-acme: 'true' 814 | 815 | ## pushgateway Ingress hostnames with optional path 816 | ## Must be provided if Ingress is enabled 817 | ## 818 | hosts: [] 819 | # - pushgateway.domain.com 820 | # - domain.com/pushgateway 821 | 822 | ## pushgateway Ingress TLS configuration 823 | ## Secrets must be manually created in the namespace 824 | ## 825 | tls: [] 826 | # - secretName: prometheus-alerts-tls 827 | # hosts: 828 | # - pushgateway.domain.com 829 | 830 | ## Node tolerations for pushgateway scheduling to nodes with taints 831 | ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ 832 | ## 833 | tolerations: [] 834 | # - key: "key" 835 | # operator: "Equal|Exists" 836 | # value: "value" 837 | # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" 838 | 839 | ## Node labels for pushgateway pod assignment 840 | ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ 841 | ## 842 | nodeSelector: {} 843 | 844 | ## Annotations to be added to pushgateway pods 845 | ## 846 | podAnnotations: {} 847 | 848 | replicaCount: 1 849 | 850 | ## pushgateway resource requests and limits 851 | ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ 852 | ## 853 | resources: {} 854 | # limits: 855 | # cpu: 10m 856 | # memory: 32Mi 857 | # requests: 858 | # cpu: 10m 859 | # memory: 32Mi 860 | 861 | ## Security context to be added to push-gateway pods 862 | ## 863 | securityContext: {} 864 | 865 | service: 866 | annotations: 867 | prometheus.io/probe: pushgateway 868 | labels: {} 869 | clusterIP: "" 870 | 871 | ## List of IP addresses at which the pushgateway service is available 872 | ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips 873 | ## 874 | externalIPs: [] 875 | 876 | loadBalancerIP: "" 877 | loadBalancerSourceRanges: [] 878 | servicePort: 9091 879 | type: ClusterIP 880 | 881 | persistentVolume: 882 | ## If true, pushgateway will create/use a Persistent Volume Claim 883 | ## If false, use emptyDir 884 | ## 885 | enabled: false 886 | 887 | ## pushgateway data Persistent Volume access modes 888 | ## Must match those of existing PV or dynamic provisioner 889 | ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ 890 | ## 891 | accessModes: 892 | - ReadWriteOnce 893 | 894 | ## pushgateway data Persistent Volume Claim annotations 895 | ## 896 | annotations: {} 897 | 898 | ## pushgateway data Persistent Volume existing claim name 899 | ## Requires pushgateway.persistentVolume.enabled: true 900 | ## If defined, PVC must be created manually before volume will be bound 901 | existingClaim: "" 902 | 903 | ## pushgateway data Persistent Volume mount root path 904 | ## 905 | mountPath: /data 906 | 907 | ## pushgateway data Persistent Volume size 908 | ## 909 | size: 2Gi 910 | 911 | ## alertmanager data Persistent Volume Storage Class 912 | ## If defined, storageClassName: 913 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 914 | ## If undefined (the default) or set to null, no storageClassName spec is 915 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 916 | ## GKE, AWS & OpenStack) 917 | ## 918 | # storageClass: "-" 919 | 920 | ## Subdirectory of alertmanager data Persistent Volume to mount 921 | ## Useful if the volume's root directory is not empty 922 | ## 923 | subPath: "" 924 | 925 | 926 | ## alertmanager ConfigMap entries 927 | ## 928 | alertmanagerFiles: 929 | alertmanager.yml: 930 | global: {} 931 | # slack_api_url: '' 932 | 933 | receivers: 934 | - name: default-receiver 935 | # slack_configs: 936 | # - channel: '@you' 937 | # send_resolved: true 938 | 939 | route: 940 | group_wait: 10s 941 | group_interval: 5m 942 | receiver: default-receiver 943 | repeat_interval: 3h 944 | 945 | ## Prometheus server ConfigMap entries 946 | ## 947 | serverFiles: 948 | 949 | ## Alerts configuration 950 | ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ 951 | alerts: {} 952 | # groups: 953 | # - name: Instances 954 | # rules: 955 | # - alert: InstanceDown 956 | # expr: up == 0 957 | # for: 5m 958 | # labels: 959 | # severity: page 960 | # annotations: 961 | # description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.' 962 | # summary: 'Instance {{ $labels.instance }} down' 963 | 964 | rules: {} 965 | 966 | prometheus.yml: 967 | rule_files: 968 | - /etc/config/rules 969 | - /etc/config/alerts 970 | 971 | scrape_configs: 972 | - job_name: prometheus 973 | static_configs: 974 | - targets: 975 | - localhost:9090 976 | 977 | # A scrape configuration for running Prometheus on a Kubernetes cluster. 978 | # This uses separate scrape configs for cluster components (i.e. API server, node) 979 | # and services to allow each to use different authentication configs. 980 | # 981 | # Kubernetes labels will be added as Prometheus labels on metrics via the 982 | # `labelmap` relabeling action. 983 | 984 | # Scrape config for API servers. 985 | # 986 | # Kubernetes exposes API servers as endpoints to the default/kubernetes 987 | # service so this uses `endpoints` role and uses relabelling to only keep 988 | # the endpoints associated with the default/kubernetes service using the 989 | # default named port `https`. This works for single API server deployments as 990 | # well as HA API server deployments. 991 | - job_name: 'kubernetes-apiservers' 992 | 993 | kubernetes_sd_configs: 994 | - role: endpoints 995 | 996 | # Default to scraping over https. If required, just disable this or change to 997 | # `http`. 998 | scheme: https 999 | 1000 | # This TLS & bearer token file config is used to connect to the actual scrape 1001 | # endpoints for cluster components. This is separate to discovery auth 1002 | # configuration because discovery & scraping are two separate concerns in 1003 | # Prometheus. The discovery auth config is automatic if Prometheus runs inside 1004 | # the cluster. Otherwise, more config options have to be provided within the 1005 | # . 1006 | tls_config: 1007 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 1008 | # If your node certificates are self-signed or use a different CA to the 1009 | # master CA, then disable certificate verification below. Note that 1010 | # certificate verification is an integral part of a secure infrastructure 1011 | # so this should only be disabled in a controlled environment. You can 1012 | # disable certificate verification by uncommenting the line below. 1013 | # 1014 | insecure_skip_verify: true 1015 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 1016 | 1017 | # Keep only the default/kubernetes service endpoints for the https port. This 1018 | # will add targets for each API server which Kubernetes adds an endpoint to 1019 | # the default/kubernetes service. 1020 | relabel_configs: 1021 | - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 1022 | action: keep 1023 | regex: default;kubernetes;https 1024 | 1025 | - job_name: 'kubernetes-nodes' 1026 | 1027 | # Default to scraping over https. If required, just disable this or change to 1028 | # `http`. 1029 | scheme: https 1030 | 1031 | # This TLS & bearer token file config is used to connect to the actual scrape 1032 | # endpoints for cluster components. This is separate to discovery auth 1033 | # configuration because discovery & scraping are two separate concerns in 1034 | # Prometheus. The discovery auth config is automatic if Prometheus runs inside 1035 | # the cluster. Otherwise, more config options have to be provided within the 1036 | # . 1037 | tls_config: 1038 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 1039 | # If your node certificates are self-signed or use a different CA to the 1040 | # master CA, then disable certificate verification below. Note that 1041 | # certificate verification is an integral part of a secure infrastructure 1042 | # so this should only be disabled in a controlled environment. You can 1043 | # disable certificate verification by uncommenting the line below. 1044 | # 1045 | insecure_skip_verify: true 1046 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 1047 | 1048 | kubernetes_sd_configs: 1049 | - role: node 1050 | 1051 | relabel_configs: 1052 | - action: labelmap 1053 | regex: __meta_kubernetes_node_label_(.+) 1054 | - target_label: __address__ 1055 | replacement: kubernetes.default.svc:443 1056 | - source_labels: [__meta_kubernetes_node_name] 1057 | regex: (.+) 1058 | target_label: __metrics_path__ 1059 | replacement: /api/v1/nodes/$1/proxy/metrics 1060 | 1061 | 1062 | - job_name: 'kubernetes-nodes-cadvisor' 1063 | 1064 | # Default to scraping over https. If required, just disable this or change to 1065 | # `http`. 1066 | scheme: https 1067 | 1068 | # This TLS & bearer token file config is used to connect to the actual scrape 1069 | # endpoints for cluster components. This is separate to discovery auth 1070 | # configuration because discovery & scraping are two separate concerns in 1071 | # Prometheus. The discovery auth config is automatic if Prometheus runs inside 1072 | # the cluster. Otherwise, more config options have to be provided within the 1073 | # . 1074 | tls_config: 1075 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 1076 | # If your node certificates are self-signed or use a different CA to the 1077 | # master CA, then disable certificate verification below. Note that 1078 | # certificate verification is an integral part of a secure infrastructure 1079 | # so this should only be disabled in a controlled environment. You can 1080 | # disable certificate verification by uncommenting the line below. 1081 | # 1082 | insecure_skip_verify: true 1083 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 1084 | 1085 | kubernetes_sd_configs: 1086 | - role: node 1087 | 1088 | # This configuration will work only on kubelet 1.7.3+ 1089 | # As the scrape endpoints for cAdvisor have changed 1090 | # if you are using older version you need to change the replacement to 1091 | # replacement: /api/v1/nodes/$1:4194/proxy/metrics 1092 | # more info here https://github.com/coreos/prometheus-operator/issues/633 1093 | relabel_configs: 1094 | - action: labelmap 1095 | regex: __meta_kubernetes_node_label_(.+) 1096 | - target_label: __address__ 1097 | replacement: kubernetes.default.svc:443 1098 | - source_labels: [__meta_kubernetes_node_name] 1099 | regex: (.+) 1100 | target_label: __metrics_path__ 1101 | replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor 1102 | 1103 | # Scrape config for service endpoints. 1104 | # 1105 | # The relabeling allows the actual service scrape endpoint to be configured 1106 | # via the following annotations: 1107 | # 1108 | # * `prometheus.io/scrape`: Only scrape services that have a value of `true` 1109 | # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need 1110 | # to set this to `https` & most likely set the `tls_config` of the scrape config. 1111 | # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. 1112 | # * `prometheus.io/port`: If the metrics are exposed on a different port to the 1113 | # service then set this appropriately. 1114 | - job_name: 'kubernetes-service-endpoints' 1115 | 1116 | kubernetes_sd_configs: 1117 | - role: endpoints 1118 | 1119 | relabel_configs: 1120 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 1121 | action: keep 1122 | regex: true 1123 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 1124 | action: replace 1125 | target_label: __scheme__ 1126 | regex: (https?) 1127 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 1128 | action: replace 1129 | target_label: __metrics_path__ 1130 | regex: (.+) 1131 | - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 1132 | action: replace 1133 | target_label: __address__ 1134 | regex: ([^:]+)(?::\d+)?;(\d+) 1135 | replacement: $1:$2 1136 | - action: labelmap 1137 | regex: __meta_kubernetes_service_label_(.+) 1138 | - source_labels: [__meta_kubernetes_namespace] 1139 | action: replace 1140 | target_label: kubernetes_namespace 1141 | - source_labels: [__meta_kubernetes_service_name] 1142 | action: replace 1143 | target_label: kubernetes_name 1144 | - source_labels: [__meta_kubernetes_pod_node_name] 1145 | action: replace 1146 | target_label: kubernetes_node 1147 | 1148 | - job_name: 'prometheus-pushgateway' 1149 | honor_labels: true 1150 | 1151 | kubernetes_sd_configs: 1152 | - role: service 1153 | 1154 | relabel_configs: 1155 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] 1156 | action: keep 1157 | regex: pushgateway 1158 | 1159 | # Example scrape config for probing services via the Blackbox Exporter. 1160 | # 1161 | # The relabeling allows the actual service scrape endpoint to be configured 1162 | # via the following annotations: 1163 | # 1164 | # * `prometheus.io/probe`: Only probe services that have a value of `true` 1165 | - job_name: 'kubernetes-services' 1166 | 1167 | metrics_path: /probe 1168 | params: 1169 | module: [http_2xx] 1170 | 1171 | kubernetes_sd_configs: 1172 | - role: service 1173 | 1174 | relabel_configs: 1175 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] 1176 | action: keep 1177 | regex: true 1178 | - source_labels: [__address__] 1179 | target_label: __param_target 1180 | - target_label: __address__ 1181 | replacement: blackbox 1182 | - source_labels: [__param_target] 1183 | target_label: instance 1184 | - action: labelmap 1185 | regex: __meta_kubernetes_service_label_(.+) 1186 | - source_labels: [__meta_kubernetes_namespace] 1187 | target_label: kubernetes_namespace 1188 | - source_labels: [__meta_kubernetes_service_name] 1189 | target_label: kubernetes_name 1190 | 1191 | # Example scrape config for pods 1192 | # 1193 | # The relabeling allows the actual pod scrape endpoint to be configured via the 1194 | # following annotations: 1195 | # 1196 | # * `prometheus.io/scrape`: Only scrape pods that have a value of `true` 1197 | # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. 1198 | # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`. 1199 | - job_name: 'kubernetes-pods' 1200 | 1201 | kubernetes_sd_configs: 1202 | - role: pod 1203 | 1204 | relabel_configs: 1205 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 1206 | action: keep 1207 | regex: true 1208 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 1209 | action: replace 1210 | target_label: __metrics_path__ 1211 | regex: (.+) 1212 | - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 1213 | action: replace 1214 | regex: ([^:]+)(?::\d+)?;(\d+) 1215 | replacement: $1:$2 1216 | target_label: __address__ 1217 | - action: labelmap 1218 | regex: __meta_kubernetes_pod_label_(.+) 1219 | - source_labels: [__meta_kubernetes_namespace] 1220 | action: replace 1221 | target_label: kubernetes_namespace 1222 | - source_labels: [__meta_kubernetes_pod_name] 1223 | action: replace 1224 | target_label: kubernetes_pod_name 1225 | 1226 | # adds additional scrape configs to prometheus.yml 1227 | # must be a string so you have to add a | after extraScrapeConfigs: 1228 | # example adds prometheus-blackbox-exporter scrape config 1229 | extraScrapeConfigs: 1230 | # - job_name: 'prometheus-blackbox-exporter' 1231 | # metrics_path: /probe 1232 | # params: 1233 | # module: [http_2xx] 1234 | # static_configs: 1235 | # - targets: 1236 | # - https://example.com 1237 | # relabel_configs: 1238 | # - source_labels: [__address__] 1239 | # target_label: __param_target 1240 | # - source_labels: [__param_target] 1241 | # target_label: instance 1242 | # - target_label: __address__ 1243 | # replacement: prometheus-blackbox-exporter:9115 1244 | 1245 | networkPolicy: 1246 | ## Enable creation of NetworkPolicy resources. 1247 | ## 1248 | enabled: false --------------------------------------------------------------------------------