├── .gitignore
├── .gitmodules
├── README.md
├── archetypes
    └── default.md
├── config.toml
├── content
    ├── _index.md
    ├── introduction
    │   ├── _index.md
    │   ├── kubernetes-workspace
    │   │   └── _index.md
    │   └── using-this-site
    │   │   └── _index.md
    ├── monitoring-kubernetes
    │   ├── _index.md
    │   └── metrics
    │   │   ├── _index.md
    │   │   ├── kube-state-metrics
    │   │       ├── _index.md
    │   │       └── images
    │   │       │   └── kube-statefulset-created.png
    │   │   ├── kubelet-cadvisor
    │   │       ├── _index.md
    │   │       └── images
    │   │       │   └── kubelet.png
    │   │   └── node-exporter
    │   │       └── _index.md
    └── prometheus
    │   ├── _index.md
    │   ├── configuring-prometheus
    │       ├── _index.md
    │       └── using-service-monitors
    │       │   ├── _index.md
    │       │   └── images
    │       │       ├── graph.png
    │       │       └── targets.png
    │   ├── deploying-prometheus
    │       ├── _index.md
    │       ├── access-prometheus
    │       │   ├── _index.md
    │       │   └── images
    │       │   │   └── prometheus.png
    │       ├── deploying-prometheus-operator
    │       │   └── _index.md
    │       └── launch-prometheus-instance
    │       │   └── _index.md
    │   ├── using-thanos
    │       ├── _index.md
    │       ├── high-availability
    │       │   ├── _index.md
    │       │   ├── images
    │       │   │   ├── multiple-prometheus-with-service.png
    │       │   │   ├── multiple-prometheus-with-thanos.png
    │       │   │   ├── multiple-prometheus.png
    │       │   │   ├── thanos-graph.png
    │       │   │   └── thanos-stores.png
    │       │   └── static
    │       │   │   └── prometheus-with-sidecar.yaml
    │       ├── images
    │       │   └── thanos.png
    │       └── long-term-storage
    │       │   ├── _index.md
    │       │   ├── images
    │       │       ├── long-term-storage.png
    │       │       ├── thanos-query-with-store.png
    │       │       └── thanos-sidecar-upload.png
    │       │   └── static
    │       │       ├── thanos-with-components.yaml
    │       │       └── thanos-with-object-config.yaml
    │   └── what-is-prometheus
    │       ├── _index.md
    │       └── images
    │           └── logo.png
├── layouts
    └── partials
    │   ├── custom-footer.html
    │   ├── logo.html
    │   └── menu-footer.html
├── netlify.toml
└── static
    ├── css
        └── theme-mine.css
    └── images
        └── favicon.png


/.gitignore:
--------------------------------------------------------------------------------
1 | public/
2 | .DS_Store


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "themes/hugo-theme-learn"]
2 | 	path = themes/hugo-theme-learn
3 | 	url = https://github.com/matcornic/hugo-theme-learn.git
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # observability-for-kubernetes
 2 | 
 3 | This is the open source repository for the website [observability.thomasriley.co.uk](https://observability.thomasriley.co.uk) where I have been documenting my learnings from monitoring Kubernetes.
 4 | 
 5 | This is still a work in progress and I will be improving and adding more content to this website over time.
 6 | 
 7 | If you find this website useful please star and share it!
 8 | 
 9 | ## Contributing
10 | 
11 | If you spot a mistake please raise an Issue in this repository or if you would like to make a contribution a Pull Request is welcome!


--------------------------------------------------------------------------------
/archetypes/default.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "{{ replace .Name "-" " " | title }}"
3 | date: {{ .Date }}
4 | draft: true
5 | ---
6 | 
7 | 


--------------------------------------------------------------------------------
/config.toml:
--------------------------------------------------------------------------------
 1 | baseURL = "https://observability.thomasriley.co.uk/"
 2 | languageCode = "en-gb"
 3 | title = "Observability for Kubernetes"
 4 | theme = "hugo-theme-learn"
 5 | 
 6 | [params]
 7 |   editURL = "https://github.com/thomasriley/observability-for-kubernetes/edit/master/content/"
 8 |   author = "Thomas Riley"
 9 |   description = "Learn Monitoring & Observability for Kubernetes"
10 |   # Shows a checkmark for visited pages on the menu
11 |   showVisitedLinks = true
12 |   # Disable search function. It will hide search bar
13 |   disableSearch = false
14 |   # Javascript and CSS cache are automatically busted when new version of site is generated. 
15 |   # Set this to true to disable this behaviour (some proxies don't handle well this optimization)
16 |   disableAssetsBusting = false
17 |   # Set this to true to disable copy-to-clipboard button for inline code.
18 |   disableInlineCopyToClipBoard = false
19 |   # A title for shortcuts in menu is set by default. Set this to true to disable it. 
20 |   disableShortcutsTitle = false
21 |   # When using mulitlingual website, disable the switch language button.
22 |   disableLanguageSwitchingButton = true
23 |   # Hide breadcrumbs in the header and only show the current page title
24 |   disableBreadcrumb = false
25 |   # Hide Next and Previous page buttons normally displayed full height beside content
26 |   disableNextPrev = false
27 |   # Order sections in menu by "weight" or "title". Default to "weight"
28 |   ordersectionsby = "weight"
29 |   # Change default color scheme with a variant one. Can be "red", "blue", "green".
30 |   themeVariant = "green"
31 | 
32 | [outputs]
33 | home = [ "HTML", "RSS", "JSON"]


--------------------------------------------------------------------------------
/content/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Monitoring & Observability for Kubernetes"
 3 | ---
 4 | 
 5 | # Monitoring & Observability for Kubernetes
 6 | 
 7 | Hello, my name is [Tom Riley](https://thomasriley.co.uk). I am a Cloud & Platform Engineer who is passionate about Monitoring & Observability. I have created this website as a way of sharing my learnings from monitoring Kubernetes.
 8 | 
 9 | Please use the navigation on the left hand side to start learning about Monitoring & Observability for Kubernetes.
10 | 
11 | I suggest reading the [**Introduction**](https://observability.thomasriley.co.uk/introduction/) to learn about this website and prerequisites before progressing to learn about monitoring Kubernetes.
12 | 
13 | So far this website has chapters on:
14 | 
15 | * Deploying [**Prometheus**](https://observability.thomasriley.co.uk/prometheus/) to Kuberneres with Prometheus Operator and scaling to add long term storage of metrics using Thanos
16 | * [**Monitoring Kubernetes**](https://observability.thomasriley.co.uk/monitoring-kubernetes/) with Prometheus
17 | 
18 | I will be adding more content over time and also improving and updating existing chapters.
19 | 


--------------------------------------------------------------------------------
/content/introduction/_index.md:
--------------------------------------------------------------------------------
 1 | +++
 2 | title = "Introduction"
 3 | date = 2019-07-03T17:16:32+01:00
 4 | weight = 10
 5 | chapter = true
 6 | pre = "<b>1. </b>"
 7 | +++
 8 | 
 9 | ### Chapter 1
10 | 
11 | # Introduction
12 | 
13 | ### What is this site?
14 | 
15 | * I wanted a place to store all my notes and learnings about Monitoring & Observability while working with Kubernetes and containerisation.
16 | * Instead of maintaining a private set of notes, I created this site so I can give back to the community.
17 | * If you spot a mistake please create an [issue](https://github.com/thomasriley/observability-for-kubernetes/issues) or if wish to propose a change, please raise a [pull request](https://github.com/thomasriley/observability-for-kubernetes/pulls).
18 | * I hope you find this useful! If you do, please [star](https://github.com/thomasriley/observability-for-kubernetes/stargazers) it on GitHub and share it!
19 | 
20 | ### Who am I?
21 | 
22 | * My name is Tom Riley and I am a Cloud & Platform Engineer who is passionate about Monitoring & Observability.
23 | * Please see my website [thomasriley.co.uk](https://thomasriley.co.uk) to learn more or if you wish to get in touch.
24 | * You can [contact me](mailto:contact@thomasriley.co.uk) via email.
25 | 


--------------------------------------------------------------------------------
/content/introduction/kubernetes-workspace/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Kubernetes Workspace"
 3 | date: 2019-07-03T17:23:43+01:00
 4 | weight: 20
 5 | draft: false
 6 | ---
 7 | 
 8 | ## Kubernetes
 9 | 
10 | These tutorials assume you have access to a Kubernetes environment with full cluster admin privileges and have a basic understand of Kubernetes.
11 | 
12 | If you need to provision a Kubernetes environment, there are a few options I would suggest:
13 | 
14 | * Create a [free Google Cloud account](https://cloud.google.com/free/) with $300 in credit and use [Kubernetes Engine](https://cloud.google.com/kubernetes-engine/). Kubernetes Engine is not a free service but you can use the free credit to pay for the service.
15 | * Create a [DigitalOcean](https://digitalocean.com/) account and use their [Managed Kubernetes Service](https://www.digitalocean.com/products/kubernetes/). This is not a free service and there is no free credit easily available.
16 | * Use [minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/) or [KIND](https://github.com/kubernetes-sigs/kind) for running Kubernetes locally on your machine.
17 | 
18 | ## Other Requirements
19 | 
20 | Install the **Helm client** on your laptop and **initialise Helm within Kubernetes** with cluster admin priviliges. You can follow the [Helm documenation](https://helm.sh/docs/using_helm/) on how to do this.
21 | 
22 | Lastly, install [**Kubectl**](https://kubernetes.io/docs/tasks/tools/install-kubectl/) if you have not already so you can interact with Kubernetes.
23 | 


--------------------------------------------------------------------------------
/content/introduction/using-this-site/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Using This Site"
 3 | date: 2019-07-03T17:23:26+01:00
 4 | weight: 10
 5 | draft: false
 6 | ---
 7 | 
 8 | I have structured this site as a series of hands on tutorials. This is not a perfect guide on how to create  production ready systems, unless otherwise stated. It is however, a set of simple tutorials to help you on your journey for learning more about monitoring & observability for Kubernetes.
 9 | 
10 | You can use the arrow on the right hand side of the screen to navigate through to the next part of the tutorial. The left hand navigation will show you your progress.
11 | 


--------------------------------------------------------------------------------
/content/monitoring-kubernetes/_index.md:
--------------------------------------------------------------------------------
 1 | +++
 2 | title = "Monitoring Kubernetes"
 3 | date = 2019-07-03T22:46:02+01:00
 4 | weight = 30
 5 | chapter = true
 6 | pre = "<b>3. </b>"
 7 | +++
 8 | 
 9 | ### Chapter 3
10 | 
11 | # Monitoring Kubernetes
12 | 
13 | This chapter covers specific details on monitoring Kubernetes.
14 | 


--------------------------------------------------------------------------------
/content/monitoring-kubernetes/metrics/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Metrics"
 3 | date: 2019-07-03T22:57:19+01:00
 4 | weight: 10
 5 | draft: false
 6 | ---
 7 | 
 8 | Once Prometheus is up and running in a Kubernetes cluster, you can start collecting metrics from the different components of Kubernetes. If you do not yet have Prometheus running in Kubernetes please refer back to the [**Prometheus**](https://observability.thomasriley.co.uk/prometheus/) Chapter first.
 9 | 
10 | Within the Prometheus ecosystem there is a concept of creating applications that interrogate a service and expose a Prometheus formatted metrics endpoint that can then be scraped by Prometheus. These applications known as [**Prometheus Exporters**](https://prometheus.io/docs/instrumenting/exporters/).
11 | 
12 | In this section we will look at some of the Prometheus Exporters that are available for collecting metrics for monitoring Kubernetes:
13 | 
14 | * kube-state-metrics
15 | * Node Exporter
16 | * Kubelet & Cadvisor
17 | 


--------------------------------------------------------------------------------
/content/monitoring-kubernetes/metrics/kube-state-metrics/_index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Kube State Metrics"
  3 | date: 2019-07-04T11:03:44+01:00
  4 | weight: 10
  5 | draft: false
  6 | ---
  7 | 
  8 | ## Overview
  9 | 
 10 | The [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) application extracts metrics from Kubernetes about the state of the different types of objects in Kubernetes such as, Pods, Deployments, StatefulSets, etc.
 11 | 
 12 | The project itself describes itself pretty well:
 13 | 
 14 | ```text
 15 | kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. (See examples in the Metrics section below.) It is not focused on the health of the individual Kubernetes components, but rather on the health of the various objects inside, such as deployments, nodes and pods.
 16 | 
 17 | kube-state-metrics is about generating metrics from Kubernetes API objects without modification. This ensures that features provided by kube-state-metrics have the same grade of stability as the Kubernetes API objects themselves. In turn, this means that kube-state-metrics in certain situations may not show the exact same values as kubectl, as kubectl applies certain heuristics to display comprehensible messages. kube-state-metrics exposes raw data unmodified from the Kubernetes API, this way users have all the data they require and perform heuristics as they see fit.
 18 | 
 19 | The metrics are exported on the HTTP endpoint /metrics on the listening port (default 80). They are served as plaintext. They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. You can also open /metrics in a browser to see the raw metrics.
 20 | ```
 21 | 
 22 | ## Deployment
 23 | 
 24 | To use kube-state-metrics we need to deploy a single replica of kube-state-metrics as a Pod in the target Kubernetes cluster.
 25 | 
 26 | Create a file called **kube-state-metrics.yaml** and add the following:
 27 | 
 28 | ```yaml
 29 | ---
 30 | apiVersion: v1
 31 | kind: ServiceAccount
 32 | metadata:
 33 |   labels:
 34 |     app: kube-state-metrics
 35 |   name: kube-state-metrics
 36 |   namespace: prometheus
 37 | ---
 38 | apiVersion: rbac.authorization.k8s.io/v1
 39 | kind: ClusterRole
 40 | metadata:
 41 |   name: kube-state-metrics
 42 | rules:
 43 | - apiGroups: [""]
 44 |   resources:
 45 |   - configmaps
 46 |   - secrets
 47 |   - nodes
 48 |   - pods
 49 |   - services
 50 |   - resourcequotas
 51 |   - replicationcontrollers
 52 |   - limitranges
 53 |   - persistentvolumeclaims
 54 |   - persistentvolumes
 55 |   - namespaces
 56 |   - endpoints
 57 |   verbs: ["list", "watch"]
 58 | - apiGroups: ["extensions"]
 59 |   resources:
 60 |   - daemonsets
 61 |   - deployments
 62 |   - replicasets
 63 |   - ingresses
 64 |   verbs: ["list", "watch"]
 65 | - apiGroups: ["apps"]
 66 |   resources:
 67 |   - daemonsets
 68 |   - deployments
 69 |   - replicasets
 70 |   - statefulsets
 71 |   verbs: ["list", "watch"]
 72 | - apiGroups: ["batch"]
 73 |   resources:
 74 |   - cronjobs
 75 |   - jobs
 76 |   verbs: ["list", "watch"]
 77 | - apiGroups: ["autoscaling"]
 78 |   resources:
 79 |   - horizontalpodautoscalers
 80 |   verbs: ["list", "watch"]
 81 | - apiGroups: ["policy"]
 82 |   resources:
 83 |   - poddisruptionbudgets
 84 |   verbs: ["list", "watch"]
 85 | - apiGroups: ["certificates.k8s.io"]
 86 |   resources:
 87 |   - certificatesigningrequests
 88 |   verbs: ["list", "watch"]
 89 | - apiGroups: ["storage.k8s.io"]
 90 |   resources:
 91 |   - storageclasses
 92 |   verbs: ["list", "watch"]
 93 | - apiGroups: ["autoscaling.k8s.io"]
 94 |   resources:
 95 |   - verticalpodautoscalers
 96 |   verbs: ["list", "watch"]
 97 | ---
 98 | apiVersion: rbac.authorization.k8s.io/v1
 99 | kind: ClusterRoleBinding
100 | metadata:
101 |   labels:
102 |     app: kube-state-metrics
103 |   name: kube-state-metrics
104 | roleRef:
105 |   apiGroup: rbac.authorization.k8s.io
106 |   kind: ClusterRole
107 |   name: kube-state-metrics
108 | subjects:
109 | - kind: ServiceAccount
110 |   name: kube-state-metrics
111 |   namespace: prometheus
112 | ---
113 | apiVersion: extensions/v1beta1
114 | kind: Deployment
115 | metadata:
116 |   labels:
117 |     app: kube-state-metrics
118 |   name: kube-state-metrics
119 |   namespace: prometheus
120 | spec:
121 |   replicas: 1
122 |   selector:
123 |     matchLabels:
124 |       app: kube-state-metrics
125 |   strategy:
126 |     rollingUpdate:
127 |       maxSurge: 1
128 |       maxUnavailable: 0
129 |     type: RollingUpdate
130 |   template:
131 |     metadata:
132 |       labels:
133 |         app: kube-state-metrics
134 |     spec:
135 |       containers:
136 |       - image: gcr.io/google_containers/kube-state-metrics:v1.6.0
137 |         imagePullPolicy: IfNotPresent
138 |         livenessProbe:
139 |           failureThreshold: 3
140 |           httpGet:
141 |             path: /
142 |             port: 8080
143 |             scheme: HTTP
144 |           initialDelaySeconds: 30
145 |           periodSeconds: 10
146 |           successThreshold: 1
147 |           timeoutSeconds: 30
148 |         name: kube-state-metrics
149 |         ports:
150 |         - containerPort: 8080
151 |           protocol: TCP
152 |         readinessProbe:
153 |           failureThreshold: 3
154 |           httpGet:
155 |             path: /
156 |             port: 8080
157 |             scheme: HTTP
158 |           initialDelaySeconds: 30
159 |           periodSeconds: 10
160 |           successThreshold: 1
161 |           timeoutSeconds: 5
162 |         resources:
163 |           limits:
164 |             cpu: 500m
165 |             memory: 768Mi
166 |           requests:
167 |             cpu: 250m
168 |             memory: 768Mi
169 |       restartPolicy: Always
170 |       serviceAccount: kube-state-metrics
171 |       serviceAccountName: kube-state-metrics
172 | ---
173 | apiVersion: v1
174 | kind: Service
175 | metadata:
176 |   labels:
177 |     app: kube-state-metrics
178 |   name: kube-state-metrics
179 |   namespace: prometheus
180 | spec:
181 |   ports:
182 |   - name: kube-state-metrics
183 |     port: 80
184 |     protocol: TCP
185 |     targetPort: 8080
186 |   selector:
187 |     app: kube-state-metrics
188 |   type: ClusterIP
189 | ---
190 | apiVersion: monitoring.coreos.com/v1
191 | kind: ServiceMonitor
192 | metadata:
193 |   labels:
194 |     app: kube-state-metrics
195 |     serviceMonitorSelector: prometheus
196 |   name: kube-state-metrics
197 |   namespace: prometheus
198 | spec:
199 |   endpoints:
200 |   - honorLabels: true
201 |     interval: 30s
202 |     path: /metrics
203 |     targetPort: 8080
204 |   jobLabel: kube-state-metrics
205 |   namespaceSelector:
206 |     matchNames:
207 |     - prometheus
208 |   selector:
209 |     matchLabels:
210 |       app: kube-state-metrics
211 | ```
212 | 
213 | There is a lot going on in the YAML code above. But to summarise, we are creating a Cluster Role called **kube-state-metrics** that includes all the required RBAC permissions for the service to operate successfully. We then bind the Cluster Role to a Service Account that will be used by the Pod that is created. Speaking of that Pod, we create a Deployment object to actually deploy kube-state-metrics to the **prometheus** namespace, configuring it to use the Service Account we created. Lastly we create a ClusterIP Service and a ServiceMonitor within the **prometheus** namespace so that Prometheus can scrape the metrics that are exposed by kube-state-metrics.
214 | 
215 | Go ahead and install kube-state-metrics into your Kubernetes cluster by executing `kubectl apply -f kube-state-metrics.yaml`.
216 | 
217 | You can then use `kubectl get pods --namespace prometheus` to see the **kube-state-metrics** Pod being created by Kubernetes. After a brief moment you can then check the configured Targets in Prometheus and you will see that **kube-state-metrics** is now being successfully scraped.
218 | 
219 | ## Useful Metrics
220 | 
221 | The [documenation for kube-state-metrics](https://github.com/kubernetes/kube-state-metrics/tree/master/docs) provides a wealth of useful information on the metrics that are exposed by the service. In a later section, we will look at using these metrics to build powerful dashboards for visualising the health of a Kubernetes cluster.
222 | 
223 | However before we move on lets take a look at how we can make use of kube-state-metrics.
224 | 
225 | Lets say we wanted to see all the **StatefulSets** deployed in the Kubernetes cluster we can use the `kube_statefulset_created` metric. If we query Prometheus for this metrics, it will return a metric for each StatefulSet in the cluster and include Prometheus Labels that provide us with metadata about the StatefulSet.
226 | 
227 | ![Kube StatefulSet Created](/monitoring-kubernetes/metrics/kube-state-metrics/images/kube-statefulset-created.png?classes=shadow&width=55pc)
228 | 
229 | Above we can see that a single metric was returned, which is as follows:
230 | 
231 | **kube_statefulset_created{endpoint="8080",instance="10.8.4.8:8080",job="kube-state-metrics",namespace="prometheus",pod="kube-state-metrics-6f75b8b674-tspg8",service="kube-state-metrics",statefulset="prometheus-prometheus"}	 1562167613**
232 | 
233 | The metric shows that the StatefulSet named **prometheus-prometheus** in the namespace **prometheus** was created on 3rd July 2019 at 15:26 UTC. The time comes from the value **1562167613** which is a UNIX timestamp.
234 | 


--------------------------------------------------------------------------------
/content/monitoring-kubernetes/metrics/kube-state-metrics/images/kube-statefulset-created.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/monitoring-kubernetes/metrics/kube-state-metrics/images/kube-statefulset-created.png


--------------------------------------------------------------------------------
/content/monitoring-kubernetes/metrics/kubelet-cadvisor/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Kubelet & cAdvisor"
 3 | date: 2019-07-04T17:54:52+01:00
 4 | weight: 30
 5 | draft: false
 6 | ---
 7 | 
 8 | ## Overview
 9 | 
10 | Kubelet is a service that runs on each worker node in a Kubernetes cluster and is resposible for managing the Pods and containers on a machine. cAdvisor is a container resource usage and performance analysis tool, open sourced by Google. For monitoring Kubernetes with Prometheus we care about Kubelet and cAdvisor becuase we can scrape metrics from these services regarding container resource usage.
11 | 
12 | ## Deployment
13 | 
14 | We do not need to deploy a Prometheus Exporter to scrape metrics from Kublet and cAdvisor as they expose metrics endpoints out the box, therefore we only need to configure Prometheus to start scraping metrics using a ServiceMonitor.
15 | 
16 | Create a file called **kubelet.yaml** and add the following:
17 | 
18 | ```yaml
19 | apiVersion: monitoring.coreos.com/v1
20 | kind: ServiceMonitor
21 | metadata:
22 |   labels:
23 |     app: kubelet
24 |     serviceMonitorSelector: prometheus
25 |   name: kubelet
26 |   namespace: prometheus
27 | spec:
28 |   endpoints:
29 |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
30 |     honorLabels: true
31 |     interval: 30s
32 |     port: http-metrics
33 |     scheme: http
34 |     tlsConfig:
35 |       insecureSkipVerify: true
36 |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
37 |     honorLabels: true
38 |     interval: 30s
39 |     path: /metrics/cadvisor
40 |     port: http-metrics
41 |     scheme: http
42 |     tlsConfig:
43 |       insecureSkipVerify: true
44 |   jobLabel: kubelet
45 |   namespaceSelector:
46 |     matchNames:
47 |     - kube-system
48 |   selector:
49 |     matchLabels:
50 |       k8s-app: kubelet
51 | ```
52 | 
53 | The ServiceMonitor defined in the YAML above will scrape the **/metrics** and **/metrics/cadvisor** endpoints on Kubelet via the **kubelet** Service in the **kube-system** namespace. It is worth noting, the configuration of Kubelet may differ depending your Kubernetes environment. The example above is tested to work on GCP Kubernetes Engine.
54 | 
55 | Go ahead and install the ServiceMonitor into your Kubernetes cluster by executing `kubectl apply -f kubelet.yaml`.
56 | 
57 | After a few moments, you will see two Targets being scraped by Prometheus.
58 | 
59 | ![Kube StatefulSet Created](/monitoring-kubernetes/metrics/kubelet-cadvisor/images/kubelet.png?classes=shadow&width=55pc)
60 | 
61 | ## Useful Metrics
62 | 
63 | There are many useful metrics exposed by Kubelet for container resource usage.
64 | 
65 | Lets start by looking at CPU usage metrics:
66 | 
67 | * `container_cpu_system_seconds_total` - Usage of system CPU time
68 | * `container_cpu_user_seconds_total` - Usage of user CPU time
69 | * `container_cpu_usage_seconds_total` - Total CPU usage time (system + user)
70 | 
71 | There are a number of metrics available for memory but to best track the actual memory usage of a Pod use the `container_memory_working_set_bytes` metric.
72 | 
73 | These metrics include the labels **pod_name** and **namespace** to help you identify the resource usage of specifics Pods per Kubernetes Namespace.
74 | 


--------------------------------------------------------------------------------
/content/monitoring-kubernetes/metrics/kubelet-cadvisor/images/kubelet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/monitoring-kubernetes/metrics/kubelet-cadvisor/images/kubelet.png


--------------------------------------------------------------------------------
/content/monitoring-kubernetes/metrics/node-exporter/_index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Node Exporter"
  3 | date: 2019-07-04T16:21:58+01:00
  4 | weight: 20
  5 | draft: false
  6 | ---
  7 | 
  8 | ## Overview
  9 | 
 10 | The Node Exporter is a Prometheus Exporter developed by the Prometheus project. It is not specific to Kubernetes and is designed to expose hardware and OS metrics from *NIX based Kernels. The project can be found [here](https://github.com/prometheus/node_exporter) on GitHub.
 11 | 
 12 | We will look at using the Node Exporter to expose metrics for each node running in a Kubernetes cluster.
 13 | 
 14 | ## Deployment
 15 | 
 16 | The Node Exporter needs to run on each node in the Kubernetes cluster therefore we will use a DaemonSet to acheive this.
 17 | 
 18 | Create a file called **node-exporter.yaml** and add the following:
 19 | 
 20 | ```yaml
 21 | ---
 22 | apiVersion: extensions/v1beta1
 23 | kind: DaemonSet
 24 | metadata:
 25 |   labels:
 26 |     app: node-exporter
 27 |   name: node-exporter
 28 |   namespace: prometheus
 29 | spec:
 30 |   selector:
 31 |     matchLabels:
 32 |       app: node-exporter
 33 |   template:
 34 |     metadata:
 35 |       annotations:
 36 |         cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 37 |       labels:
 38 |         app: node-exporter
 39 |     spec:
 40 |       containers:
 41 |       - args:
 42 |         - --web.listen-address=0.0.0.0:9100
 43 |         - --path.procfs=/host/proc
 44 |         - --path.sysfs=/host/sys
 45 |         image: quay.io/prometheus/node-exporter:v0.18.1
 46 |         imagePullPolicy: IfNotPresent
 47 |         name: node-exporter
 48 |         ports:
 49 |         - containerPort: 9100
 50 |           hostPort: 9100
 51 |           name: metrics
 52 |           protocol: TCP
 53 |         resources:
 54 |           limits:
 55 |             cpu: 200m
 56 |             memory: 50Mi
 57 |           requests:
 58 |             cpu: 100m
 59 |             memory: 30Mi
 60 |         volumeMounts:
 61 |         - mountPath: /host/proc
 62 |           name: proc
 63 |           readOnly: true
 64 |         - mountPath: /host/sys
 65 |           name: sys
 66 |           readOnly: true
 67 |       hostNetwork: true
 68 |       hostPID: true
 69 |       restartPolicy: Always
 70 |       tolerations:
 71 |       - effect: NoSchedule
 72 |         operator: Exists
 73 |       - effect: NoExecute
 74 |         operator: Exists
 75 |       volumes:
 76 |       - hostPath:
 77 |           path: /proc
 78 |           type: ""
 79 |         name: proc
 80 |       - hostPath:
 81 |           path: /sys
 82 |           type: ""
 83 |         name: sys
 84 | ---
 85 | apiVersion: v1
 86 | kind: Service
 87 | metadata:
 88 |   labels:
 89 |     app: node-exporter
 90 |   name: node-exporter
 91 |   namespace: prometheus
 92 | spec:
 93 |   ports:
 94 |   - name: node-exporter
 95 |     port: 9100
 96 |     protocol: TCP
 97 |     targetPort: 9100
 98 |   selector:
 99 |     app: node-exporter
100 |   sessionAffinity: None
101 |   type: ClusterIP
102 | ---
103 | apiVersion: monitoring.coreos.com/v1
104 | kind: ServiceMonitor
105 | metadata:
106 |   labels:
107 |     app: node-exporter
108 |     serviceMonitorSelector: prometheus
109 |   name: node-exporter
110 |   namespace: prometheus
111 | spec:
112 |   endpoints:
113 |   - honorLabels: true
114 |     interval: 30s
115 |     path: /metrics
116 |     targetPort: 9100
117 |   jobLabel: node-exporter
118 |   namespaceSelector:
119 |     matchNames:
120 |     - prometheus
121 |   selector:
122 |     matchLabels:
123 |       app: node-exporter
124 | ```
125 | 
126 | The above YAML will create a DaemonSet that launches the Node Exporter on each node in the Kubernetes cluster. It includes a Kubernetes Service and ServiceMonitor to scrape metrics from all instances of Node Exporter.
127 | 
128 | Go ahead and install Node Exporter into your Kubernetes cluster by executing `kubectl apply -f node-exporter.yaml`.
129 | 
130 | You can then use `kubectl get pods --namespace prometheus` to see the **node-exporter** Pod(s) being created by Kubernetes. After a brief moment you can then check the configured Targets in Prometheus and you will see that **node-exporter** is now being successfully scraped.
131 | 
132 | ## Useful Metrics
133 | 
134 | Node Exporter has numerous collectors designed to gather OS and hardware metrics from various sources on a node. If you check the log output from a Node Exporter Pod using `kubectl logs` you can see the collectors that are active:
135 | 
136 | ```shell
137 | $kubectl logs node-exporter-c8cwp --namespace prometheus
138 | time="2019-07-04T15:47:47Z" level=info msg="Enabled collectors:" source="node_exporter.go:97"
139 | time="2019-07-04T15:47:47Z" level=info msg=" - arp" source="node_exporter.go:104"
140 | time="2019-07-04T15:47:47Z" level=info msg=" - bcache" source="node_exporter.go:104"
141 | time="2019-07-04T15:47:47Z" level=info msg=" - bonding" source="node_exporter.go:104"
142 | time="2019-07-04T15:47:47Z" level=info msg=" - conntrack" source="node_exporter.go:104"
143 | time="2019-07-04T15:47:47Z" level=info msg=" - cpu" source="node_exporter.go:104"
144 | time="2019-07-04T15:47:47Z" level=info msg=" - cpufreq" source="node_exporter.go:104"
145 | time="2019-07-04T15:47:47Z" level=info msg=" - diskstats" source="node_exporter.go:104"
146 | time="2019-07-04T15:47:47Z" level=info msg=" - edac" source="node_exporter.go:104"
147 | time="2019-07-04T15:47:47Z" level=info msg=" - entropy" source="node_exporter.go:104"
148 | time="2019-07-04T15:47:47Z" level=info msg=" - filefd" source="node_exporter.go:104"
149 | time="2019-07-04T15:47:47Z" level=info msg=" - filesystem" source="node_exporter.go:104"
150 | time="2019-07-04T15:47:47Z" level=info msg=" - hwmon" source="node_exporter.go:104"
151 | time="2019-07-04T15:47:47Z" level=info msg=" - infiniband" source="node_exporter.go:104"
152 | time="2019-07-04T15:47:47Z" level=info msg=" - ipvs" source="node_exporter.go:104"
153 | time="2019-07-04T15:47:47Z" level=info msg=" - loadavg" source="node_exporter.go:104"
154 | time="2019-07-04T15:47:47Z" level=info msg=" - mdadm" source="node_exporter.go:104"
155 | time="2019-07-04T15:47:47Z" level=info msg=" - meminfo" source="node_exporter.go:104"
156 | time="2019-07-04T15:47:47Z" level=info msg=" - netclass" source="node_exporter.go:104"
157 | time="2019-07-04T15:47:47Z" level=info msg=" - netdev" source="node_exporter.go:104"
158 | time="2019-07-04T15:47:47Z" level=info msg=" - netstat" source="node_exporter.go:104"
159 | time="2019-07-04T15:47:47Z" level=info msg=" - nfs" source="node_exporter.go:104"
160 | time="2019-07-04T15:47:47Z" level=info msg=" - nfsd" source="node_exporter.go:104"
161 | time="2019-07-04T15:47:47Z" level=info msg=" - pressure" source="node_exporter.go:104"
162 | time="2019-07-04T15:47:47Z" level=info msg=" - sockstat" source="node_exporter.go:104"
163 | time="2019-07-04T15:47:47Z" level=info msg=" - stat" source="node_exporter.go:104"
164 | time="2019-07-04T15:47:47Z" level=info msg=" - textfile" source="node_exporter.go:104"
165 | time="2019-07-04T15:47:47Z" level=info msg=" - time" source="node_exporter.go:104"
166 | time="2019-07-04T15:47:47Z" level=info msg=" - timex" source="node_exporter.go:104"
167 | time="2019-07-04T15:47:47Z" level=info msg=" - uname" source="node_exporter.go:104"
168 | time="2019-07-04T15:47:47Z" level=info msg=" - vmstat" source="node_exporter.go:104"
169 | time="2019-07-04T15:47:47Z" level=info msg=" - xfs" source="node_exporter.go:104"
170 | time="2019-07-04T15:47:47Z" level=info msg=" - zfs" source="node_exporter.go:104"
171 | ```
172 | 
173 | If you refer back to the Node Exporter [documenation](https://github.com/prometheus/node_exporter#collectors) you can see the method that each of these collectors uses to acquire metrics. For example, the **arp** collector exposes the metrics available in **/proc/net/arp** on Linux.
174 | 
175 | In Prometheus, you will see that the majority of metrics exposed by the Node Exporter are prefixed with **node__**. For example, the arp collector described above exposes a metric called **node_arp_entries** that contains the number of ARP entries in the ARP table for each network interface on a node.
176 | 


--------------------------------------------------------------------------------
/content/prometheus/_index.md:
--------------------------------------------------------------------------------
 1 | +++
 2 | title = "Prometheus"
 3 | date = 2019-07-02T11:59:25+01:00
 4 | weight = 20
 5 | chapter = true
 6 | pre = "<b>2. </b>"
 7 | +++
 8 | 
 9 | ### Chapter 2
10 | 
11 | # Metrics Monitoring with Prometheus
12 | 
13 | These workshops cover the use of Prometheus as a metrics oriented monitoring platform for Kubernetes.


--------------------------------------------------------------------------------
/content/prometheus/configuring-prometheus/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Configuring Prometheus"
 3 | date: 2019-07-03T12:58:43+01:00
 4 | weight: 30
 5 | draft: false
 6 | ---
 7 | 
 8 | Now that you have deployed an instance of Prometheus, lets look at how to configure it to monitor a service.
 9 | 
10 | In Prometheus if you select **Status > Configuration** (or click directly [here](http://localhost:9090/config)) you will see that out of the box it only has the configuration below:
11 | 
12 | ```
13 | global:
14 |   scrape_interval: 1m
15 |   scrape_timeout: 10s
16 |   evaluation_interval: 1m
17 | ```
18 | 
19 | The Prometheus Operator does a whole lot more than simply just deploy Prometheus. It is also a very powerful tool for automating the configuration of Prometheus within Kubernetes. In the next section we will look at how we can use it to configure Prometheus.
20 | 


--------------------------------------------------------------------------------
/content/prometheus/configuring-prometheus/using-service-monitors/_index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using Service Monitors"
  3 | date: 2019-07-03T13:45:50+01:00
  4 | weight: 10
  5 | draft: false
  6 | ---
  7 | 
  8 | Prometheus uses a pull based model for collecting metrics from applications and services. This means the applications and services must expose a HTTP(S) endpoint containing Prometheus formatted metrics. Prometheus will then, as per its configuration, periodically scrape metrics from these HTTP(S) endpoints.
  9 | 
 10 | The Prometheus Operator includes a Custom Resource Definition that allows the definition of the ServiceMonitor. The ServiceMonitor is used to define an application you wish to scrape metrics from within Kubernetes, the controller will action the ServiceMonitors we define and automatically build the required Prometheus configuration.
 11 | 
 12 | Within the ServiceMonitor we specify the Kubernetes Labels that the Operator can use to identify the Kubernetes Service which in turn then identifies the Pods, that we wish to monitor. Lets look at how we can use Prometheus to scrape metrics from its own inbuilt metrics endpoint.
 13 | 
 14 | Using **kubectl describe**, we can view the Labels on the **prometheus-operated** service that the Prometheus Operator previously created. If you wish to see this execute `kubectl describe service prometheus-operated --namespace prometheus` in your terminal or see the example below:
 15 | 
 16 | ```shell
 17 | $kubectl describe service prometheus-operated --namespace prometheus
 18 | Name:              prometheus-operated
 19 | Namespace:         prometheus
 20 | Labels:            operated-prometheus=true
 21 | Annotations:       <none>
 22 | Selector:          app=prometheus
 23 | Type:              ClusterIP
 24 | IP:                None
 25 | Port:              web  9090/TCP
 26 | TargetPort:        web/TCP
 27 | Endpoints:         10.8.3.7:9090
 28 | Session Affinity:  None
 29 | Events:            <none>
 30 | ```
 31 | Now we know this Kubernetes Service has the Label **operated-prometheus=true** we can create a ServiceMonitor to target this Service. Create a file called **servicemonitor.yaml** and include the following:
 32 | 
 33 | ```yaml
 34 | apiVersion: monitoring.coreos.com/v1
 35 | kind: ServiceMonitor
 36 | metadata:
 37 |   labels:
 38 |     serviceMonitorSelector: prometheus
 39 |   name: prometheus
 40 |   namespace: prometheus
 41 | spec:
 42 |   endpoints:
 43 |   - interval: 30s
 44 |     targetPort: 9090
 45 |     path: /metrics
 46 |   namespaceSelector:
 47 |     matchNames:
 48 |     - prometheus
 49 |   selector:
 50 |     matchLabels:
 51 |       operated-prometheus: "true"
 52 | ```
 53 | 
 54 | This Kubernetes Resource uses the **monitoring.coreos.com/v1** API Version that was installed into Kubernetes by the Prometheus Operator, as explained previously. It uses the **namespaceSelector** to specify the Kubernetes Namespace in which we wish to locate the Service, in this example above we are selecting within the **prometheus** namespace. It then uses the **selector** to specify that it must match the Label **operated-prometheus** being set as **"true"**.
 55 | 
 56 | Under the **endpoints** key we must specify one or more scrape targets for the target service. In this example it will scrape each Pod it selects on TCP port **9090** on the URL **/metrics** every **30 seconds**.
 57 | 
 58 | Now apply this YAML to the cluster by executing `kubectl apply -f servicemonitor.yaml`. You can then validate this has been created by execute `kubectl get servicemonitor --namespace prometheus`:
 59 | 
 60 | ```shell
 61 | $kubectl get servicemonitor
 62 | NAME                           AGE
 63 | prometheus                     1m
 64 | ```
 65 | 
 66 | Before Prometheus Operator will automatically update the running Prometheus instance configuration to set it to scrape metrics from itself, there is one more thing we must do. On the **ServiceMonitor** we defined a label on the resource called **serviceMonitorSelector**, as shown below:
 67 | 
 68 | ```yaml
 69 | metadata:
 70 |   labels:
 71 |     serviceMonitorSelector: prometheus
 72 | ```
 73 | 
 74 | You now need to update the Prometheus Resource configuration to instruct the Prometheus Operator to configure the Prometheus instance using all **ServiceMonitors** that have the **serviceMonitorSelector** Label set as **prometheus**.
 75 | 
 76 | Update the previous YAML file you created called **prometheus.yaml** and add the **serviceMonitorSelector** key to the Prometheus resource:
 77 | 
 78 | ```yaml
 79 |   serviceMonitorSelector:
 80 |     matchLabels:
 81 |       serviceMonitorSelector: prometheus
 82 | ```
 83 | 
 84 | The updated Prometheus resource should look similar to the example below:
 85 | 
 86 | ```yaml
 87 | apiVersion: monitoring.coreos.com/v1
 88 | kind: Prometheus
 89 | metadata:
 90 |   name: prometheus
 91 |   namespace: prometheus
 92 | spec:
 93 |   baseImage: quay.io/prometheus/prometheus
 94 |   logLevel: info
 95 |   podMetadata:
 96 |     annotations:
 97 |       cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 98 |     labels:
 99 |       app: prometheus
100 |   replicas: 1
101 |   resources:
102 |     limits:
103 |       cpu: 1
104 |       memory: 2Gi
105 |     requests:
106 |       cpu: 1
107 |       memory: 2Gi
108 |   retention: 12h
109 |   serviceAccountName: prometheus-service-account
110 |   serviceMonitorSelector:
111 |     matchLabels:
112 |       serviceMonitorSelector: prometheus
113 |   storage:
114 |     volumeClaimTemplate:
115 |       apiVersion: v1
116 |       kind: PersistentVolumeClaim
117 |       metadata:
118 |         name: prometheus-pvc
119 |       spec:
120 |         accessModes:
121 |         - ReadWriteOnce
122 |         resources:
123 |           requests:
124 |             storage: 10Gi
125 |   version: v2.10.0
126 | ```
127 | 
128 | Now apply this change to the Kubernetes cluster by running `kubectl apply -f prometheus.yaml`.
129 | 
130 | After a few moment the Prometheus Operator will automatically update the Prometheus instance you created with the Target configuration to scrape the Prometheus metrics endpoint on the Pod. After a minute or two, check the [Prometheus Configuration](http://localhost:9090/config) again, you will see the scrape config appear under the **scrape_configs** key.
131 | 
132 | In the Prometheus UI if you select **Status > Targets** (or go [here](http://localhost:9090/targets)) you will see details of the target Prometheus has identified, which is the single instance of Prometheus you launched:
133 | 
134 | ![Prometheus Targets](/prometheus/configuring-prometheus/using-service-monitors/images/targets.png?classes=shadow&width=55pc)
135 | 
136 | If you now select **Graph** at the top, the **Expression** search box will now auto-complete when you start typing. Go ahead and type 'prometheus' and you will see some metric names appear. If you select one and click **Execute** it will query for that metric. Here is an example for **prometheus_build_info**:
137 | 
138 | ![Prometheus Graph](/prometheus/configuring-prometheus/using-service-monitors/images/graph.png?classes=shadow&width=55pc)
139 | 
140 | You have now successfully configured Prometheus using the ServiceMonitor. Going forward when adding more services to Kubernetes that require Prometheus monitoring, the ServiceMonitor can be used to configure Prometheus as has been demonstrated.
141 | 


--------------------------------------------------------------------------------
/content/prometheus/configuring-prometheus/using-service-monitors/images/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/configuring-prometheus/using-service-monitors/images/graph.png


--------------------------------------------------------------------------------
/content/prometheus/configuring-prometheus/using-service-monitors/images/targets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/configuring-prometheus/using-service-monitors/images/targets.png


--------------------------------------------------------------------------------
/content/prometheus/deploying-prometheus/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Deploying Prometheus"
 3 | date: 2019-07-02T22:09:56+01:00
 4 | weight: 20
 5 | draft: false
 6 | ---
 7 | 
 8 | There are a number of ways you can deploy Prometheus to Kubernetes:
 9 | 
10 | * [Prometheus Operator](https://github.com/coreos/prometheus-operator.git)
11 | * [kube-prometheus](https://github.com/coreos/kube-prometheus)
12 | * [Community Helm Chart](https://github.com/prometheus-community/helm-charts)
13 | 
14 | ### Three Options
15 | 
16 | Lets look at these three options available for deploying Prometheus to Kubernetes.
17 | 
18 | #### Prometheus Operator
19 | 
20 | This is a Kubernetes Operator that provides several Custom Resource Definitions (CRDs) that will allow us to define and configure instances of Prometheus via Kubernetes resources. The Operator contains all the logic for managing the deployment and automated configuration of Prometheus based on the YAML configuration the user deployments to Kubernetes.
21 | 
22 | #### kube-prometheus
23 | 
24 | This project acts as a jssonet library for deploying Prometheus Operator and an entire Prometheus monitoring stack.
25 | 
26 | #### Community Helm Chart
27 | 
28 | This is similar to the kube-prometheus project however the deployment is done via Helm. This is a community driven chart in the stable Helm chart repository.
29 | 
30 | ### Next
31 | 
32 | In the subsequent workshops we will deploy the Prometheus Operator using the community Helm chart however we will disable the default bundled Prometheus instance configuration that is provided so that we can go through the process of using the Prometheus Operator step by step.
33 | 


--------------------------------------------------------------------------------
/content/prometheus/deploying-prometheus/access-prometheus/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Access Prometheus"
 3 | date: 2019-07-03T12:29:51+01:00
 4 | weight: 30
 5 | draft: false
 6 | ---
 7 | 
 8 | Now that you have deployed an instance of Prometheus, lets actually look at using it!
 9 | 
10 | Typically you might use an [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/), such as the [Nginx-Ingress](https://github.com/kubernetes/ingress-nginx), for exposing services such as the Prometheus UI to your users outside of a Kubernetes cluster. However, as this guide is not going into the specific details of building a production-ready Kubernetes environment, we will simply use Kubectl port forwarding to access the Prometheus service.
11 | 
12 | Lets port forward our local environment to the Prometheus instance running in Kubernetes. To do this execute `kubectl port-forward service/prometheus-operated 9090:9090 --namespace prometheus` in your terminal. The **service** called **prometheus-operated** is created by the Operator for accessing the Prometheus instance you created. 
13 | 
14 | If you wish to see Kubernetes Services in the **prometheus** namespace, then execute `kubectl get services --namespace prometheus` in your terminal.
15 | 
16 | You will now be able to access Prometheus in your web browser at [http://localhost:9090](http://localhost:9090).
17 | 
18 | ![Prometheus](/prometheus/deploying-prometheus/access-prometheus/images/prometheus.png?classes=shadow&width=55pc)


--------------------------------------------------------------------------------
/content/prometheus/deploying-prometheus/access-prometheus/images/prometheus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/deploying-prometheus/access-prometheus/images/prometheus.png


--------------------------------------------------------------------------------
/content/prometheus/deploying-prometheus/deploying-prometheus-operator/_index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Deploying Prometheus Operator with Helm"
  3 | date: 2019-07-02T12:08:47+01:00
  4 | weight: 10
  5 | draft: false
  6 | ---
  7 | 
  8 | First we will use the community maintained [Helm chart](https://github.com/helm/charts/tree/master/stable/prometheus-operator) for deploying Prometheus Operator to Kubernetes. By default, the Helm chart will also deploy and configure an instance of Prometheus however to begin with lets deploy a standalone instance of the Operator.
  9 | 
 10 | Lets modify the default behavior of the Helm chart. Create a file called **values.yaml** containing the following:
 11 | 
 12 | ```yaml
 13 | defaultRules:
 14 |   create: false
 15 | alertmanager:
 16 |   enabled: false
 17 | grafana:
 18 |   enabled: false
 19 | kubeApiServer:
 20 |   enabled: false
 21 | kubelet:
 22 |   enabled: false
 23 | kubeControllerManager:
 24 |   enabled: false
 25 | coreDns:
 26 |   enabled: false
 27 | kubeEtcd:
 28 |   enabled: false
 29 | kubeScheduler:
 30 |   enabled: false
 31 | kubeStateMetrics:
 32 |   enabled: false
 33 | nodeExporter:
 34 |   enabled: false
 35 | prometheus:
 36 |   enabled: false
 37 | ```
 38 | 
 39 | Then install the Prometheus Operator via Helm using the **helm upgrade** command as shown below:
 40 | 
 41 | ```shell
 42 | helm upgrade --install prometheus-operator stable/prometheus-operator --namespace prometheus --values values.yaml
 43 | ```
 44 | 
 45 | When this executes, Helm will display all of the resources it has successfully created in Kubernetes:
 46 | 
 47 | ```shell
 48 | $ helm upgrade --install prometheus-operator stable/prometheus-operator --namespace prometheus --values values.yaml
 49 | 
 50 | Release "prometheus-operator" does not exist. Installing it now.
 51 | NAME:   prometheus-operator
 52 | LAST DEPLOYED: Tue Jun 25 22:06:52 2019
 53 | NAMESPACE: prometheus
 54 | STATUS: DEPLOYED
 55 | 
 56 | RESOURCES:
 57 | ==> v1/ClusterRole
 58 | NAME                              AGE
 59 | prometheus-operator-operator      1s
 60 | prometheus-operator-operator-psp  1s
 61 | 
 62 | ==> v1/ClusterRoleBinding
 63 | NAME                              AGE
 64 | prometheus-operator-operator      1s
 65 | prometheus-operator-operator-psp  1s
 66 | 
 67 | ==> v1/Deployment
 68 | NAME                          READY  UP-TO-DATE  AVAILABLE  AGE
 69 | prometheus-operator-operator  0/1    1           0          1s
 70 | 
 71 | ==> v1/Pod(related)
 72 | NAME                                           READY  STATUS             RESTARTS  AGE
 73 | prometheus-operator-operator-694f88774b-q4r64  0/1    ContainerCreating  0         1s
 74 | 
 75 | ==> v1/Service
 76 | NAME                          TYPE       CLUSTER-IP     EXTERNAL-IP  PORT(S)   AGE
 77 | prometheus-operator-operator  ClusterIP  10.11.250.245  <none>       8080/TCP  1s
 78 | 
 79 | ==> v1/ServiceAccount
 80 | NAME                          SECRETS  AGE
 81 | prometheus-operator-operator  1        1s
 82 | 
 83 | ==> v1/ServiceMonitor
 84 | NAME                          AGE
 85 | prometheus-operator-operator  1s
 86 | 
 87 | ==> v1beta1/PodSecurityPolicy
 88 | NAME                          PRIV   CAPS      SELINUX   RUNASUSER  FSGROUP    SUPGROUP  READONLYROOTFS  VOLUMES
 89 | prometheus-operator-operator  false  RunAsAny  RunAsAny  MustRunAs  MustRunAs  false     configMap,emptyDir,projected,secret,downwardAPI,persistentVolumeClaim
 90 | 
 91 | 
 92 | NOTES:
 93 | The Prometheus Operator has been installed. Check its status by running:
 94 |   kubectl --namespace prometheus get pods -l "release=prometheus-operator"
 95 | 
 96 | Visit https://github.com/coreos/prometheus-operator for instructions on how
 97 | to create & configure Alertmanager and Prometheus instances using the Operator.
 98 | 
 99 | ```
100 | 
101 | Above you can see that Helm has deployed the **stable/prometheus-operator** Helm chart under the release name **prometheus-operator** into the Kubernetes namespace **prometheus** using the Helm values we created above in values.yaml.
102 | 
103 | If you then use Kubectl to list the Pods in the **prometheus** namespace you will see the Prometheus Operator is now installed:
104 | 
105 | ```shell
106 | $ kubectl get pods -n prometheus
107 | NAME                                            READY   STATUS    RESTARTS   AGE
108 | prometheus-operator-operator-694f88774b-q4r64   1/1     Running   0          6m47s
109 | ```
110 | 


--------------------------------------------------------------------------------
/content/prometheus/deploying-prometheus/launch-prometheus-instance/_index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Launch Prometheus Instance"
  3 | date: 2019-07-03T10:35:40+01:00
  4 | weight: 20
  5 | draft: false
  6 | ---
  7 | 
  8 | ### Launching Prometheus 
  9 | 
 10 | Now that we have deployed Prometheus Operator we can use it to launch an instance of Prometheus.
 11 | 
 12 | When we deployed the Operator, one of the tasks it performed when it first launched was to is install a number Custom Resource Definitions (CRDs) into Kubernetes. Out of the box, Kubernetes ships with many powerful Controllers such as the Deployment or Statefulset. CRDs provide a method of building completely bespoke Controllers that provide logic to a specific function. In this case, the CRDs installed by Prometheus Operator provide a means for launching and configuring Prometheus within Kubernetes.
 13 | 
 14 | If you run the `kubectl get customresourcedefinitions` command in your terminal you will see four CRDs provided by the Operator:
 15 | 
 16 | ```shell
 17 | $kubectl get customresourcedefinitions
 18 | NAME                                           CREATED AT
 19 | alertmanagers.monitoring.coreos.com            2019-07-02T13:13:21Z
 20 | prometheuses.monitoring.coreos.com             2019-07-02T13:13:21Z
 21 | prometheusrules.monitoring.coreos.com          2019-07-02T13:13:21Z
 22 | servicemonitors.monitoring.coreos.com          2019-07-02T13:13:21Z
 23 | ```
 24 | 
 25 | To begin with we will be making use of the **prometheuses.monitoring.coreos.com** Custom Resource Definition.
 26 | 
 27 | Create a file called **prometheus.yaml** and add the following:
 28 | 
 29 | ```yaml
 30 | apiVersion: monitoring.coreos.com/v1
 31 | kind: Prometheus
 32 | metadata:
 33 |   name: prometheus
 34 |   namespace: prometheus
 35 | spec:
 36 |   baseImage: quay.io/prometheus/prometheus
 37 |   logLevel: info
 38 |   podMetadata:
 39 |     annotations:
 40 |       cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 41 |     labels:
 42 |       app: prometheus
 43 |   replicas: 1
 44 |   resources:
 45 |     limits:
 46 |       cpu: 1
 47 |       memory: 2Gi
 48 |     requests:
 49 |       cpu: 1
 50 |       memory: 2Gi
 51 |   retention: 12h
 52 |   serviceAccountName: prometheus-service-account
 53 |   storage:
 54 |     volumeClaimTemplate:
 55 |       apiVersion: v1
 56 |       kind: PersistentVolumeClaim
 57 |       metadata:
 58 |         name: prometheus-pvc
 59 |       spec:
 60 |         accessModes:
 61 |         - ReadWriteOnce
 62 |         resources:
 63 |           requests:
 64 |             storage: 10Gi
 65 |   version: v2.10.0
 66 | ---
 67 | apiVersion: v1
 68 | kind: ServiceAccount
 69 | metadata:
 70 |   name: "prometheus-service-account"
 71 |   namespace: "prometheus"
 72 | ---
 73 | apiVersion: rbac.authorization.k8s.io/v1
 74 | kind: ClusterRole
 75 | metadata:
 76 |   name: "prometheus-cluster-role"
 77 | rules:
 78 | - apiGroups:
 79 |   - ""
 80 |   resources:
 81 |   - nodes
 82 |   - services
 83 |   - endpoints
 84 |   - pods
 85 |   verbs:
 86 |   - get
 87 |   - list
 88 |   - watch
 89 | - apiGroups:
 90 |   - ""
 91 |   resources:
 92 |   - nodes/metrics
 93 |   verbs:
 94 |   - get
 95 | - nonResourceURLs:
 96 |   - "/metrics"
 97 |   verbs:
 98 |   - get
 99 | ---
100 | apiVersion: rbac.authorization.k8s.io/v1
101 | kind: ClusterRoleBinding
102 | metadata:
103 |   name: "prometheus-cluster-role-binding"
104 | roleRef:
105 |   apiGroup: rbac.authorization.k8s.io
106 |   kind: ClusterRole
107 |   name: "prometheus-cluster-role"
108 | subjects:
109 | - kind: ServiceAccount
110 |   name: "prometheus-service-account"
111 |   namespace: prometheus
112 | ```
113 | 
114 | Now apply this YAML to your Kubernetes cluster using `kubectl apply -f prometheus.yaml`. Kubectl will show that it has successfully created the configuration, as shown below:
115 | 
116 | ```shell
117 | $kubectl apply -f prometheus.yaml
118 | prometheus.monitoring.coreos.com/prometheus created
119 | serviceaccount/prometheus-service-account created
120 | clusterrole.rbac.authorization.k8s.io/prometheus-cluster-role created
121 | clusterrolebinding.rbac.authorization.k8s.io/prometheus-cluster-role-binding created
122 | ```
123 | 
124 | Success! If you now list Pods in the **prometheus** namespace using `kubectl get pods --namespace prometheus` you will see an instance of Prometheus running alongside the Prometheus Operator:
125 | 
126 | ```shell
127 | $kubectl get pods
128 | NAME                                            READY   STATUS    RESTARTS   AGE
129 | prometheus-operator-operator-86bc4d5568-shs69   1/1     Running   0          10m
130 | prometheus-prometheus-0                         3/3     Running   0          1m
131 | ```
132 | 
133 | If you now check for the custom Prometheus resource that you have just installed into the cluster using `kubectl get prometheus --namespace prometheus` you will see the single result named **prometheus**:
134 | 
135 | ```shell
136 | $kubectl get prometheus --namespace prometheus
137 | NAME         AGE
138 | prometheus   6m
139 | ```
140 | 
141 | The Prometheus Operator acts as a Controller for the Custom Resources. When you deployed the **Prometheus** resource the Operator created the Prometheus instance, that you just identified when getting a list Pods in the **prometheus** namespace.
142 | 
143 | ### What does this mean?
144 | 
145 | Lets now take a look at the **prometheus.yaml** file we applied to Kubernetes and see what each section means.
146 | 
147 | ```yaml
148 | apiVersion: monitoring.coreos.com/v1
149 | kind: Prometheus
150 | metadata:
151 |   name: prometheus
152 |   namespace: prometheus
153 | ```
154 | 
155 | Here we define that we wish to create an object called **prometheus** that is of the type **Prometheus** as defined by the Kind and that this Kind is part of the API Version **monitoring.coreos.com/v1**, that was previously installed into Kubernetes by the Prometheus Operator as a Custom Resource Definition. This object will be created in the **prometheus** namespace.
156 | 
157 | Everything then under the **spec** of the YAML file defines what the instance of Prometheus should look like.
158 | 
159 | ```yaml
160 |   replicas: 1
161 |   resources:
162 |     limits:
163 |       cpu: 1
164 |       memory: 2Gi
165 |     requests:
166 |       cpu: 1
167 |       memory: 2Gi
168 | ```
169 | 
170 | Here we define the Resource limits (CPU & Memory) that each Prometheus Pod will be granted within Kubernetes. We also specify the number of instances that we require by setting **replicas**, in this example we have just the 1 instance.
171 | 
172 | ```yaml
173 |   baseImage: quay.io/prometheus/prometheus
174 |   version: v2.10.0
175 | ```
176 | 
177 | Setting the **baseImage** defines the actual Prometheus Docker image to be used. This will actually be defaulted to the Docker image that is released by the Prometheus project however we included it as an example. The **version** field sets the version of Prometheus you wish to use. You can see available versions on the [GitHub project](https://github.com/prometheus/prometheus/releases).
178 | 
179 | ```yaml
180 |   storage:
181 |     volumeClaimTemplate:
182 |       apiVersion: v1
183 |       kind: PersistentVolumeClaim
184 |       metadata:
185 |         name: prometheus-pvc
186 |       spec:
187 |         accessModes:
188 |         - ReadWriteOnce
189 |         resources:
190 |           requests:
191 |             storage: 10Gi
192 | ```
193 | 
194 | This block defines the storage that will be used by Prometheus. By default the Operator will create Prometheus Pods that use local storage only by using an [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir). If you wish to retain the state of Prometheus and therefore the metrics that it stores when re-launching Prometheus, such as during a version upgrade, then you need to use persistent storage. The **PersistentVolumeClaim (PVC)** defines the specification of the storage to be used by Prometheus. In this example we are creating a persistent disk that is 10Gi for each instance of Prometheus that is created.
195 | 
196 | In your terminal if you execute `kubectl get persistentvolumeclaim --namespace prometheus` you will see the PVC that has been created and Bound to a Persistent Volume for the single instance of Prometheus that you have created:
197 | 
198 | ```shell
199 | $kubectl get persistentvolumeclaim --namespace prometheus
200 | NAME                                     STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
201 | prometheus-pvc-prometheus-prometheus-0   Bound    pvc-c85b2a3b-9d7a-11e9-9e3c-42010a840079   10Gi       RWO            standard       21m
202 | ```
203 | The **ServiceAccount**, **ClusterRoleBinding** and **ClusterRoleBinding** are required for providing the Prometheus Pod with the required permissions to access the Kubernetes API as part of its service discovery process.
204 | 
205 | Lastly lets look at some of the Prometheus specific configuration of the **prometheus.yaml** file:
206 | 
207 | ```yaml
208 |   logLevel: info
209 |   retention: 12h
210 | ```
211 | 
212 | Here we define that Prometheus should retain 12 hours of metrics and that it should log using the **info** log level.
213 | 
214 | The Prometheus Operator GitHub project provides a [full set](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md) of API documentation that defines the fields that can be set on the CRDs that it provides. You can see the specification for the **Prometheus** **Kind** [here](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec).


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Using Thanos"
 3 | date: 2019-07-04T18:45:04+01:00
 4 | weight: 40
 5 | draft: false
 6 | ---
 7 | 
 8 | ![Thanos](/prometheus/using-thanos/images/thanos.png?classes=shadow&width=25pc)
 9 | 
10 | The [Thanos Project](https://github.com/improbable-eng/thanos) turns Prometheus into a highly available metrics platform with unlimited metrics storage.
11 | 
12 | The three key features of Thanos, are as follows:
13 | 
14 | * Global query view of all metrics from as many Prometheus instances as you require.
15 | * Long term storage of metrics.
16 | * High availability of Prometheus.
17 | 
18 | In this section we will look at how to deploy Thanos alongside Prometheus.
19 | 


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/high-availability/_index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "High Availability"
  3 | date: 2019-07-04T20:27:14+01:00
  4 | weight: 10
  5 | draft: false
  6 | ---
  7 | 
  8 | Out of the box Prometheus does not have any concept of high availability or redundancy. Prometheus itself may be a mature and reliable product but not everything is foolproof and you should always plan for *when* a Kubernetes worker node fails, not *if* it fails, and therefore we must be able to tolerate a Prometheus Pod restarting from time to time. Before we look at Thanos, lets see how we could tackle this problem with just Kubernetes & Prometheus.
  9 | 
 10 | ## High Availability with Kubernetes
 11 | 
 12 | Earlier in this chapter we used the Prometheus Operator to launch a single instance of Prometheus within Kubernetes. To avoid the scenario of metrics being unavailable, either permanently or for a short duration of time, we can run a second instance of Prometheus. Each instance of Prometheus will run independent of the other, however each still have the same configuration as set by the Prometheus Operator. Essentially, two copies of target metrics will be scraped, as shown below:
 13 | 
 14 | ![Two Prometheus Instances](/prometheus/using-thanos/high-availability/images/multiple-prometheus.png?classes=shadow&width=30pc)
 15 | 
 16 | Now lets update the previous **prometheus.yaml** file to support this new architecture. We need to change the **replicas** from 1 to 2 and then also add a **podAntiAffinity** to ensure that both instances of Prometheus are running on different Kubernetes worker nodes to ensure we truly benefit from having the additional redundant instance of Prometheus.
 17 | 
 18 | ```yaml
 19 | apiVersion: monitoring.coreos.com/v1
 20 | kind: Prometheus
 21 | metadata:
 22 |   name: prometheus
 23 |   namespace: prometheus
 24 | spec:
 25 |   affinity:
 26 |     podAntiAffinity:
 27 |       preferredDuringSchedulingIgnoredDuringExecution:
 28 |       - weight: 100
 29 |         podAffinityTerm:
 30 |           labelSelector:
 31 |             matchExpressions:
 32 |             - key: app
 33 |               operator: In
 34 |               values:
 35 |               - prometheus
 36 |           topologyKey: kubernetes.io/hostname
 37 |   baseImage: quay.io/prometheus/prometheus
 38 |   logLevel: info
 39 |   podMetadata:
 40 |     annotations:
 41 |       cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 42 |     labels:
 43 |       app: prometheus
 44 |   replicas: 2
 45 |   resources:
 46 |     limits:
 47 |       cpu: 1
 48 |       memory: 2Gi
 49 |     requests:
 50 |       cpu: 1
 51 |       memory: 2Gi
 52 |   retention: 12h
 53 |   serviceAccountName: prometheus-service-account
 54 |   serviceMonitorSelector:
 55 |     matchLabels:
 56 |       serviceMonitorSelector: prometheus
 57 |   storage:
 58 |     volumeClaimTemplate:
 59 |       apiVersion: v1
 60 |       kind: PersistentVolumeClaim
 61 |       metadata:
 62 |         name: prometheus-pvc
 63 |       spec:
 64 |         accessModes:
 65 |         - ReadWriteOnce
 66 |         resources:
 67 |           requests:
 68 |             storage: 10Gi
 69 |   version: v2.10.0
 70 | ---
 71 | apiVersion: v1
 72 | kind: ServiceAccount
 73 | metadata:
 74 |   name: "prometheus-service-account"
 75 |   namespace: "prometheus"
 76 | ---
 77 | apiVersion: rbac.authorization.k8s.io/v1
 78 | kind: ClusterRole
 79 | metadata:
 80 |   name: "prometheus-cluster-role"
 81 | rules:
 82 | - apiGroups:
 83 |   - ""
 84 |   resources:
 85 |   - nodes
 86 |   - services
 87 |   - endpoints
 88 |   - pods
 89 |   verbs:
 90 |   - get
 91 |   - list
 92 |   - watch
 93 | - apiGroups:
 94 |   - ""
 95 |   resources:
 96 |   - nodes/metrics
 97 |   verbs:
 98 |   - get
 99 | - nonResourceURLs:
100 |   - "/metrics"
101 |   verbs:
102 |   - get
103 | ---
104 | apiVersion: rbac.authorization.k8s.io/v1
105 | kind: ClusterRoleBinding
106 | metadata:
107 |   name: "prometheus-cluster-role-binding"
108 | roleRef:
109 |   apiGroup: rbac.authorization.k8s.io
110 |   kind: ClusterRole
111 |   name: "prometheus-cluster-role"
112 | subjects:
113 | - kind: ServiceAccount
114 |   name: "prometheus-service-account"
115 |   namespace: prometheus
116 | ```
117 | 
118 | Lets apply this updated **prometheus.yaml** to Kubernetes by running `kubectl apply -f prometheus.yaml`.
119 | 
120 | A moment or two after applying this, check the running Pods in the **prometheus** namespace by running `kubectl get pods --namespace prometheus`:
121 | 
122 | ```shell
123 | $kubectl get pods --namespace prometheus
124 | NAME                                            READY   STATUS    RESTARTS   AGE
125 | prometheus-operator-operator-86bc4d5568-7k6tp   1/1     Running   0          23h
126 | prometheus-prometheus-0                         3/3     Running   0          2d
127 | prometheus-prometheus-1                         3/3     Running   0          1m10s
128 | ```
129 | 
130 | Now lets put the reliability of this to the test:
131 | 
132 | * Reconnect to Prometheus by executing `kubectl port-forward service/prometheus-operated 9090:9090 --namespace prometheus` and then access the UI at [https://localhost:9090](http://localhost:9090)
133 | * Restart one of the instances of Prometheus by running `kubectl delete pod prometheus-prometheus-0 --namespace prometheus`
134 | * Immediately then check the Prometheus UI in your web browser, you will see that it is still available!
135 | 
136 | This is great however there is one thing we need to think about. If we have two instances of Prometheus with two copies of of the same metrics, which should we use?
137 | 
138 | Your Prometheus deployment uses a Kubernetes Service and in the previous example you used Kubectl port-forwarding, connecting to the Kubernetes Service directly, and therefore taking advantage of Kubernetes internal load balancing functionality. You have essentially  have implemented the illustration below:
139 | 
140 | ![Two Prometheus Instances with Kubernetes Service](/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-service.png?classes=shadow&width=30pc)
141 | 
142 | So when you connect to Prometheus via the Kuberneres Service the request will be serviced by one of the running Prometheus instances. However, when you make subsequent requests there is no guarantee that the request will be serviced by the same instance. Why is this an issue? The two instances of Prometheus that are running are independent of each other and while they do have the same scrape configuration there is no guarantee that they will scrape the targets at exactly the same time, therefore the time series metrics that they each collect may have different values.
143 | 
144 | What this all means is, each time you connect to Prometheus via the load balanced Kubernetes Service, you may see some oddness with metrics changing. When visualizing the metrics over time with dashboarding tools such as Grafana, this leads to a really poor experience for users, as each time you reload the same graph it may appear differently in the same time period. This is now where Thanos can help!
145 | 
146 | ## High Availability with Thanos
147 | 
148 | At a high level, HA for Prometheus with Thanos works as detailed below:
149 | 
150 | * First a sidecar is deployed alongside the Prometheus container and interacts with Prometheus. A sidecar is an additional container within the Kubernetes Pod running alongside other containers.
151 | * Next, an additional service is deployed called Thanos Query and is configured to be aware of of all instances of the Thanos Sidecar.
152 | * Thanos Query communicates with the Thanos Sidecar via [gRPC](https://grpc.io/) and de-duplicates metrics across all instances of Prometheus when executing a query. Query exposes users to a Prometheus-esuqe user interfance and also exposes the Prometheus API.
153 | 
154 | The diagram below shows this:
155 | 
156 | ![Two Prometheus Instances with Thanos Sidecar](/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-thanos.png?classes=shadow&width=30pc)
157 | 
158 | Now lets look at implementing this!
159 | 
160 | The Prometheus Operator supports the configuration of the Thanos Sidecar via the Prometheus CRD, so you simply need to update the existing deployment. You then also need to deploy the Thanos Query service and configure this to federate the instances of Prometheus that are running. You will use a Kubernetes Service as a mechanism of service discovery for configuring Thanos Query to identify the Prometheus instances, to do this you also need to add an additional Kubernetes Pod Label to the Prometheus Pods so you can select them with the label selector on the Kubernetes Service.
161 | 
162 | Finally, you must also set an external label for the Prometheus instances to use. The external label is required by Thanos and is used as a method of labelling all metrics that are derived from a particular instance of Thanos Query.
163 | 
164 | Update the Prometheus resource adding the **thanos** configuration, the new service discovery label, and also configure an external label for the Prometheus instances:
165 | 
166 | ```yaml
167 | spec:
168 |   podMetadata:
169 |     labels:
170 |       thanos-store-api: "true"
171 |   thanos:
172 |     version: v0.4.0
173 |     resources:
174 |       limits:
175 |         cpu: 500m
176 |         memory: 500Mi
177 |       requests:
178 |         cpu: 100m
179 |         memory: 500Mi
180 |   externalLabels:
181 |     cluster_environment: workshop
182 | ```
183 | 
184 | Then define a Kubernetes Deployment for Thanos Query and the Kubernetes Service for the purposes of service discovery by adding the below to **prometheus.yaml** also:
185 | 
186 | ```yaml
187 | ---
188 | apiVersion: apps/v1
189 | kind: Deployment
190 | metadata:
191 |   name: thanos-query
192 |   namespace: prometheus
193 |   labels:
194 |     app: thanos-query
195 | spec:
196 |   replicas: 1
197 |   selector:
198 |     matchLabels:
199 |       app: thanos-query
200 |   template:
201 |     metadata:
202 |       labels:
203 |         app: thanos-query
204 |     spec:
205 |       containers:
206 |       - name: thanos-query
207 |         image: improbable/thanos:v0.5.0
208 |         resources:
209 |           limits:
210 |             cpu: 500m
211 |             memory: 500Mi
212 |           requests:
213 |             cpu: 100m
214 |             memory: 500Mi
215 |         args:
216 |         - "query"
217 |         - "--log.level=debug"
218 |         - "--query.replica-label=prometheus_replica"
219 |         - "--store.sd-dns-resolver=miekgdns"
220 |         - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local"
221 |         ports:
222 |         - name: http
223 |           containerPort: 10902
224 |         - name: grpc
225 |           containerPort: 10901
226 |         - name: cluster
227 |           containerPort: 10900
228 | ---
229 | apiVersion: v1
230 | kind: Service
231 | metadata:
232 |   name: "thanos-store-api"
233 |   namespace: prometheus
234 | spec:
235 |   type: ClusterIP
236 |   clusterIP: None
237 |   ports:
238 |   - name: grpc
239 |     port: 10901
240 |     targetPort: grpc
241 |   selector:
242 |     thanos-store-api: "true"
243 | ```
244 | 
245 | The updated **prometheus.yaml** file should be similar to the example [here](/prometheus/using-thanos/high-availability/static/prometheus-with-sidecar.yaml).
246 | 
247 | Now apply this to Kubernetes by executing `kubectl apply -f prometheus.yaml`. You wil see two things occur, the new Thanos Query service will be deployed as a Pod and the existing Prometheus Pods will be restarted since the new Pod Label and Sidecar are to be added:
248 | 
249 | ```shell
250 | $kubectl get pods --namespace prometheus
251 | NAME                                            READY   STATUS              RESTARTS   AGE
252 | prometheus-operator-operator-86bc4d5568-94cpf   1/1     Running             0          18m
253 | prometheus-prometheus-0                         0/4     ContainerCreating   0          1s
254 | prometheus-prometheus-1                         0/4     Pending             0          1s
255 | thanos-query-58bcc6dcbb-67rn4                   0/1     ContainerCreating   0          4s
256 | 
257 | $kubectl get pods --namespace prometheus
258 | NAME                                            READY   STATUS              RESTARTS   AGE
259 | prometheus-operator-operator-86bc4d5568-94cpf   1/1     Running             0          19m
260 | prometheus-prometheus-0                         4/4     Running             0          1m
261 | prometheus-prometheus-1                         4/4     Running             0          1m
262 | thanos-query-58bcc6dcbb-67rn4                   1/1     Running             0          1m
263 | ```
264 | 
265 | You can see that Pods  **prometheus-prometheus-0** and **prometheus-prometheus-1** now show **4/4** on the container readiness. Previously this only showed 3 containers, but following this change there are now 4 containers in a Prometheus Pod due to the additional Thanos Sidecar.
266 | 
267 | Now connect to Thanos Query using port forwarding by executing a Kubectl command. You will need to substitute the correct name of your Thanos Query Pod. For example: `kubectl port-forward pod/thanos-query-58bcc6dcbb-67rn4 10902:10902 --namespace prometheus`.
268 | 
269 | When accessing [http://localhost:10902](http://localhost:10902) in your web browser you will see that the Thanos Query UI is awfully similar to the Prometheus UI. That is no accident, Thanos is actually based on the same codebase as Prometheus.
270 | 
271 | ![Thanos Query](/prometheus/using-thanos/high-availability/images/thanos-graph.png?classes=shadow&width=30pc)
272 | 
273 | When running a Prometheus query in Thanos you can see a checkbox named **deduplication**. If you experiment running Prometheus queries with this option enabled and disable you will see how Thanos deduplicates the metrics in the available Prometheus instances when querying.
274 | 
275 | If you select the **Stores** option in the menu at the top, Thanos Query has a interface for showing the Thanos Store API endpoints it is currently federating. When you check this, you will see the two Prometheis intances that are running, as shown below:
276 | 
277 | ![Thanos Stores](/prometheus/using-thanos/high-availability/images/thanos-stores.png?classes=shadow&width=30pc)
278 | 
279 | ## Conclusion
280 | 
281 | Success! In this tutorial you have successfully implemented Prometheus running with high availability using Thanos.
282 | 


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-service.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-service.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-thanos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-thanos.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/high-availability/images/multiple-prometheus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/multiple-prometheus.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/high-availability/images/thanos-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/thanos-graph.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/high-availability/images/thanos-stores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/thanos-stores.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/high-availability/static/prometheus-with-sidecar.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: monitoring.coreos.com/v1
  3 | kind: Prometheus
  4 | metadata:
  5 |   name: prometheus
  6 |   namespace: prometheus
  7 | spec:
  8 |   affinity:
  9 |     podAntiAffinity:
 10 |       preferredDuringSchedulingIgnoredDuringExecution:
 11 |       - weight: 100
 12 |         podAffinityTerm:
 13 |           labelSelector:
 14 |             matchExpressions:
 15 |             - key: app
 16 |               operator: In
 17 |               values:
 18 |               - prometheus
 19 |           topologyKey: kubernetes.io/hostname
 20 |   baseImage: quay.io/prometheus/prometheus
 21 |   logLevel: info
 22 |   podMetadata:
 23 |     annotations:
 24 |       cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 25 |     labels:
 26 |       app: prometheus
 27 |       thanos-store-api: "true"
 28 |   replicas: 2
 29 |   thanos:
 30 |     version: v0.4.0
 31 |     resources:
 32 |       limits:
 33 |         cpu: 500m
 34 |         memory: 500Mi
 35 |       requests:
 36 |         cpu: 100m
 37 |         memory: 500Mi
 38 |   resources:
 39 |     limits:
 40 |       cpu: 1
 41 |       memory: 2Gi
 42 |     requests:
 43 |       cpu: 1
 44 |       memory: 2Gi
 45 |   retention: 12h
 46 |   serviceAccountName: prometheus-service-account
 47 |   serviceMonitorSelector:
 48 |     matchLabels:
 49 |       serviceMonitorSelector: prometheus
 50 |   externalLabels:
 51 |     cluster_environment: workshop
 52 |   storage:
 53 |     volumeClaimTemplate:
 54 |       apiVersion: v1
 55 |       kind: PersistentVolumeClaim
 56 |       metadata:
 57 |         name: prometheus-pvc
 58 |       spec:
 59 |         accessModes:
 60 |         - ReadWriteOnce
 61 |         resources:
 62 |           requests:
 63 |             storage: 10Gi
 64 |   version: v2.10.0
 65 | ---
 66 | apiVersion: v1
 67 | kind: ServiceAccount
 68 | metadata:
 69 |   name: "prometheus-service-account"
 70 |   namespace: "prometheus"
 71 | ---
 72 | apiVersion: rbac.authorization.k8s.io/v1
 73 | kind: ClusterRole
 74 | metadata:
 75 |   name: "prometheus-cluster-role"
 76 | rules:
 77 | - apiGroups:
 78 |   - ""
 79 |   resources:
 80 |   - nodes
 81 |   - services
 82 |   - endpoints
 83 |   - pods
 84 |   verbs:
 85 |   - get
 86 |   - list
 87 |   - watch
 88 | - apiGroups:
 89 |   - ""
 90 |   resources:
 91 |   - nodes/metrics
 92 |   verbs:
 93 |   - get
 94 | - nonResourceURLs:
 95 |   - "/metrics"
 96 |   verbs:
 97 |   - get
 98 | ---
 99 | apiVersion: rbac.authorization.k8s.io/v1
100 | kind: ClusterRoleBinding
101 | metadata:
102 |   name: "prometheus-cluster-role-binding"
103 | roleRef:
104 |   apiGroup: rbac.authorization.k8s.io
105 |   kind: ClusterRole
106 |   name: "prometheus-cluster-role"
107 | subjects:
108 | - kind: ServiceAccount
109 |   name: "prometheus-service-account"
110 |   namespace: prometheus
111 | ---
112 | apiVersion: apps/v1
113 | kind: Deployment
114 | metadata:
115 |   name: thanos-query
116 |   namespace: prometheus
117 |   labels:
118 |     app: thanos-query
119 | spec:
120 |   replicas: 1
121 |   selector:
122 |     matchLabels:
123 |       app: thanos-query
124 |   template:
125 |     metadata:
126 |       labels:
127 |         app: thanos-query
128 |     spec:
129 |       containers:
130 |       - name: thanos-query
131 |         image: improbable/thanos:v0.5.0
132 |         resources:
133 |           limits:
134 |             cpu: 500m
135 |             memory: 500Mi
136 |           requests:
137 |             cpu: 100m
138 |             memory: 500Mi
139 |         args:
140 |         - "query"
141 |         - "--log.level=debug"
142 |         - "--query.replica-label=prometheus_replica"
143 |         - "--store.sd-dns-resolver=miekgdns"
144 |         - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local"
145 |         ports:
146 |         - name: http
147 |           containerPort: 10902
148 |         - name: grpc
149 |           containerPort: 10901
150 |         - name: cluster
151 |           containerPort: 10900
152 | ---
153 | apiVersion: v1
154 | kind: Service
155 | metadata:
156 |   name: "thanos-store-api"
157 |   namespace: prometheus
158 | spec:
159 |   type: ClusterIP
160 |   clusterIP: None
161 |   ports:
162 |   - name: grpc
163 |     port: 10901
164 |     targetPort: grpc
165 |   selector:
166 |     thanos-store-api: "true"


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/images/thanos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/images/thanos.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/long-term-storage/_index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Long Term Storage"
  3 | date: 2019-07-04T20:26:38+01:00
  4 | weight: 20
  5 | draft: false
  6 | ---
  7 | 
  8 | ## Long Term Storage 
  9 | 
 10 | Now that you have enabled high availability with Prometheus using Thanos you can look at the next killer feature of Thanos, long term storage of metrics!
 11 | 
 12 | To enable this, you first need to create a bucket on an object store such as AWS S3 or GCP Storage. Next you enable the Thanos Sidecar to upload metrics to the object store. Lastly, you need to deploy another Thanos component called Store that acts as an API for the metrics available in the object store and will be queried by the existing Thanos Query instance when a user executes a Prometheus query via Thanos Query. The diagram below shows this using a GCP Storage bucket.
 13 | 
 14 | ![Thanos With Long Term Storage](/prometheus/using-thanos/long-term-storage/images/long-term-storage.png?classes=shadow&width=40pc)
 15 | 
 16 | From the perspective of users they are completely unaware that when they execute a Prometheus query Thanos is querying the for metrics from both the Prometheus instances and from the object storage.
 17 | 
 18 | How does this work? Prometheus stores metrics in **blocks**. Initially, it stores a block in memory however periodically, typically every 2 hours, it writes the current in memory block out to the filesystem. As the Thanos Sidecar in the Prometheus Pod has the same shared filesystem as the Prometheus container it can see the new block that Prometheus writes to the filesystem. Once it sees a new block on disk, the Thanos Sidecar uploads the block to the object storage as per its configuration. This is shown below in the diagram.
 19 | 
 20 | ![Thanos Sidecar Upload](/prometheus/using-thanos/long-term-storage/images/thanos-sidecar-upload.png?classes=shadow&width=40pc)
 21 | 
 22 | Lets now implement this. For this example I created a Google Cloud Storage bucket called **observability-for-kubernetes-thanos-demo** and provisioned a Service Account with 'Storage Admin' permissions so that Thanos can read and write to the storage bucket. In the cloud provider of your choice, create a storage bucket and set of credentials with permission to access the bucket.
 23 | 
 24 | We you need to create a Thanos Object Store configuration file that provides the bucket configuration and credentials to Thanos. The structure of this file differs per cloud provider, you can see the [Thanos Documenation](https://github.com/improbable-eng/thanos/blob/master/docs/storage.md) to see the different options but for this example lets  proceed using Google Cloud Platform.
 25 | 
 26 | You will create a Kubernetes Secret that contains the GCP Service Account and Bucket Name. The example below shows the expected structure, however the GCP Service Account is not valid for obvious security reasons!
 27 | 
 28 | ```yaml
 29 | type: GCS
 30 | config:
 31 |   bucket: "observability-for-kubernetes-thanos-demo"
 32 |   service_account: |-
 33 |   {
 34 |     "type": "service_account",
 35 |     "project_id": "kubernetes-cloud-lab",
 36 |     "private_key_id": "",
 37 |     "private_key": "-----BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY-----\n",
 38 |     "client_email": "observability-for-kubernetes@kubernetes-cloud-lab.iam.gserviceaccount.com",
 39 |     "client_id": "",
 40 |     "auth_uri": "https://accounts.google.com/o/oauth2/auth",
 41 |     "token_uri": "https://oauth2.googleapis.com/token",
 42 |     "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
 43 |     "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/observability-for-kubernetes%40kubernetes-cloud-lab.iam.gserviceaccount.com"
 44 |   }
 45 | ```
 46 | 
 47 | You should take the valid version of the example above and base64 encode it. You can use `base64` in your terminal for doing this. Then create a file called **thanos-object-secret.yaml** and include the Kubernetes Secret, as shown below:
 48 | 
 49 | ```yaml
 50 | ---
 51 | apiVersion: v1
 52 | kind: Secret
 53 | metadata:
 54 |   name: thanos-config
 55 |   namespace: prometheus
 56 | data:
 57 |   thanos.config: dHlwZTogR0NTCmNvbmZpZzoKICBidWNrZXQ6ICJvYnNlcnZhYmlsaXR5LWZvci1rdWJlcm5ldGVzLXRoYW5vcy1kZW1vIgogIHNlcnZpY2VfYWNjb3VudDogfC0KICB7CiAgICAidHlwZSI6ICJzZXJ2aWNlX2FjY291bnQiLAogICAgInByb2plY3RfaWQiOiAia3ViZXJuZXRlcy1jbG91ZC1sYWIiLAogICAgInByaXZhdGVfa2V5X2lkIjogIiIsCiAgICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgICAiY2xpZW50X2VtYWlsIjogIm9ic2VydmFiaWxpdHktZm9yLWt1YmVybmV0ZXNAa3ViZXJuZXRlcy1jbG91ZC1sYWIuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICAgImNsaWVudF9pZCI6ICIiLAogICAgImF1dGhfdXJpIjogImh0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi9hdXRoIiwKICAgICJ0b2tlbl91cmkiOiAiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLAogICAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICAgImNsaWVudF94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL3JvYm90L3YxL21ldGFkYXRhL3g1MDkvb2JzZXJ2YWJpbGl0eS1mb3Ita3ViZXJuZXRlcyU0MGt1YmVybmV0ZXMtY2xvdWQtbGFiLmlhbS5nc2VydmljZWFjY291bnQuY29tIgogIH0=
 58 | ```
 59 | 
 60 | The value of **thanos.config** should be set to the base64 string that was just generated.
 61 | 
 62 | Apply **thanos-object-secret.yaml** to Kubernetes by executing `kubectl apply -f thanos-object-secret.yaml`:
 63 | 
 64 | ```shell
 65 | $kubectl apply -f thanos-object-secret.yaml
 66 | secret/cluster-thanos-config created
 67 | ```
 68 | 
 69 | With the object store configuration deployed to Kubernetes, next update the Prometheus resource adding the **objectStorageConfig** key so that the Prometheus Operator configures the Thanos Sidecar with the object storage config that has just been deployed to Kubernetes.
 70 | 
 71 | The Prometheus resource should look similar to the below with this added:
 72 | 
 73 | ```yaml
 74 | ---
 75 | apiVersion: monitoring.coreos.com/v1
 76 | kind: Prometheus
 77 | metadata:
 78 |   name: prometheus
 79 |   namespace: prometheus
 80 | spec:
 81 |   affinity:
 82 |     podAntiAffinity:
 83 |       preferredDuringSchedulingIgnoredDuringExecution:
 84 |       - weight: 100
 85 |         podAffinityTerm:
 86 |           labelSelector:
 87 |             matchExpressions:
 88 |             - key: app
 89 |               operator: In
 90 |               values:
 91 |               - prometheus
 92 |           topologyKey: kubernetes.io/hostname
 93 |   baseImage: quay.io/prometheus/prometheus
 94 |   logLevel: info
 95 |   podMetadata:
 96 |     annotations:
 97 |       cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 98 |     labels:
 99 |       app: prometheus
100 |       thanos-store-api: "true"
101 |   replicas: 2
102 |   thanos:
103 |     version: v0.4.0
104 |     resources:
105 |       limits:
106 |         cpu: 500m
107 |         memory: 500Mi
108 |       requests:
109 |         cpu: 100m
110 |         memory: 500Mi
111 |     objectStorageConfig:
112 |       key: thanos.config
113 |       name: thanos-config
114 |   resources:
115 |     limits:
116 |       cpu: 1
117 |       memory: 2Gi
118 |     requests:
119 |       cpu: 1
120 |       memory: 2Gi
121 |   retention: 12h
122 |   serviceAccountName: prometheus-service-account
123 |   serviceMonitorSelector:
124 |     matchLabels:
125 |       serviceMonitorSelector: prometheus
126 |   externalLabels:
127 |     cluster_environment: workshop
128 |   storage:
129 |     volumeClaimTemplate:
130 |       apiVersion: v1
131 |       kind: PersistentVolumeClaim
132 |       metadata:
133 |         name: prometheus-pvc
134 |       spec:
135 |         accessModes:
136 |         - ReadWriteOnce
137 |         resources:
138 |           requests:
139 |             storage: 10Gi
140 |   version: v2.10.0
141 | ```
142 | 
143 | Edit the **prometheus.yaml** file you created previously to reflect the changes above. Under the **objectStorageConfig** key set the **name** to be the name of the Kubernetes Secret you just created and the **key** to be the name of the secret key you used, which in this case is **thanos.config**.
144 | 
145 | Apply the updated **prometheus.yaml** to Kubernetes by running `kubectl apply -f prometheus.yaml`. To see an example of the full YAML file that reflects the changes described above see [here](/prometheus/using-thanos/long-term-storage/static/thanos-with-object-config.yaml).
146 | 
147 | Once the Prometheus Pods have restarted with the new configuration, use `kubectl logs` to view the logs from the **thanos-sidecar** container in one of the Prometheus Pods, as shown below:
148 | 
149 | ```shell
150 | $kubectl logs prometheus-prometheus-0 thanos-sidecar --namespace prometheus
151 | level=info ts=2019-07-05T20:48:04.403642465Z caller=flags.go:87 msg="gossip is disabled"
152 | level=info ts=2019-07-05T20:48:04.403893193Z caller=main.go:257 component=sidecar msg="disabled TLS, key and cert must be set to enable"
153 | level=info ts=2019-07-05T20:48:04.403923795Z caller=factory.go:39 msg="loading bucket configuration"
154 | level=info ts=2019-07-05T20:48:04.404486705Z caller=sidecar.go:319 msg="starting sidecar" peer="no gossip"
155 | level=info ts=2019-07-05T20:48:04.404636222Z caller=main.go:309 msg="Listening for metrics" address=0.0.0.0:10902
156 | level=info ts=2019-07-05T20:48:04.404674065Z caller=reloader.go:154 component=reloader msg="started watching config file and non-recursively rule dirs for changes" cfg= out= dirs=
157 | level=info ts=2019-07-05T20:48:04.40474036Z caller=sidecar.go:260 component=sidecar msg="Listening for StoreAPI gRPC" address=[10.8.2.14]:10901
158 | level=info ts=2019-07-05T20:48:06.414276291Z caller=sidecar.go:176 msg="successfully loaded prometheus external labels" external_labels="{cluster_environment=\"workshop\",prometheus=\"prometheus/prometheus\",prometheus_replica=\"prometheus-prometheus-0\"}"
159 | level=info ts=2019-07-05T20:48:34.441923855Z caller=shipper.go:350 msg="upload new block" id=01DF1NSPGCHYM8T82R8BKAX4HP
160 | level=info ts=2019-07-05T20:48:35.12152462Z caller=shipper.go:350 msg="upload new block" id=01DF1R3VHXY2NA2ZH4W4TVM33H
161 | level=info ts=2019-07-05T21:00:04.457140171Z caller=shipper.go:350 msg="upload new block" id=01DF1YZJT0GNZ2G0YWJWJ3NE25
162 | ```
163 | 
164 | If your instances of Prometheus have been running long enough (atleast 2 hours) there should be a block on the filesystem ready to be uploaded. If there is, you will see it upload the block to the storage bucket as shown above. See the log entry **"upload new block"** where it begins the upload. If you check the bucket in the cloud provider console you will the blocks are now present.
165 | 
166 | ## Thanos Components
167 | 
168 | Now that the Thanos Sidecar is uploading blocks to the object store, we need to deploy two addtional components: Thanos Store and Thanos Compact.
169 | 
170 | ### Thanos Store
171 | 
172 | Thanos Store acts as an API for querying Prometheus metrics stored in the object store.
173 | 
174 | Update **prometheus.yaml** to also include the following:
175 | 
176 | ```yaml
177 | ---
178 | apiVersion: apps/v1
179 | kind: StatefulSet
180 | metadata:
181 |   name: thanos-store
182 |   namespace: prometheus
183 |   labels:
184 |     app: thanos-store
185 |     thanos-store-api: "true"
186 | spec:
187 |   replicas: 1
188 |   serviceName: thanos-store
189 |   selector:
190 |     matchLabels:
191 |       app: thanos-store
192 |       thanos-store-api: "true"
193 |   template:
194 |     metadata:
195 |       labels:
196 |         app: thanos-store
197 |         thanos-store-api: "true"
198 |     spec:
199 |       containers:
200 |       - name: thanos-store
201 |         image: improbable/thanos:v0.5.0
202 |         resources:
203 |           limits:
204 |             cpu: 1
205 |             memory: 1Gi
206 |           requests:
207 |             cpu: 500m
208 |             memory: 1Gi
209 |         args:
210 |         - "store"
211 |         - "--data-dir=/prometheus/cache"
212 |         - "--objstore.config-file=/config/thanos.config"
213 |         - "--log.level=info"
214 |         - "--index-cache-size=256MB"
215 |         - "--chunk-pool-size=256MB"
216 |         - "--store.grpc.series-max-concurrency=30"
217 |         ports:
218 |         - name: http
219 |           containerPort: 10902
220 |         - name: grpc
221 |           containerPort: 10901
222 |         - name: cluster
223 |           containerPort: 10900
224 |         volumeMounts:
225 |         - mountPath: /prometheus
226 |           name:  thanos-store-storage
227 |         - mountPath: /config/
228 |           name: thanos-config
229 |       volumes:
230 |       - name: thanos-config
231 |         secret:
232 |           secretName: thanos-config
233 |   volumeClaimTemplates:
234 |   - metadata:
235 |       name: thanos-store-storage
236 |     spec:
237 |       accessModes: [ "ReadWriteOnce" ]
238 |       resources:
239 |         requests:
240 |           storage: 10Gi
241 | ```
242 | 
243 | Apply this to Kubernetes by running `kubectl apply -f prometheus.yaml`.
244 | 
245 | Kubernetes will launch a single Thanos Store Pod as per the configuration above. If you check the available **Stores** in the Thanos Query UI, you will now see Thanos Store listed in addition to the two Thanos Sidecars running alongside Prometheus. Now when querying via Thanos Query, the query will execute across both the Prometheus instances and also the metrics stored in the object store!
246 | 
247 | ![Thanos Query Stores](/prometheus/using-thanos/long-term-storage/images/thanos-query-with-store.png?classes=shadow&width=40pc)
248 | 
249 | ### Thanos Compact
250 | 
251 | Thanos Compact is the final Thanos component you need to deploy. 
252 | 
253 | Compact performs three main tasks:
254 | 
255 | * It executes a Prometheus compaction job on the blocks in the object store. Typically Prometheus would execute this for blocks locally on the filesystem but the process is disabled by Prometheus Operator when using Thanos.
256 | * It also executes a down-sampling process on metrics in the object store. Prometheus stores metrics with a resolution of 1 minute. However, if you were to execute a query over a period of months or years on a Prometheus environment using Thanos with long term storage, the number of data-points returned would be excessive! Therefore, Compact performs the down-sampling job adding a 5 minute and 1 hour sample in addition to the 1 minute sample. It does this by creating a new block and discarding the original once down-sampling is completed. When executing queries, Thanos will automatically select the most appropriate sample to return.
257 | * Lastly, it is also possible to set retention periods for the 1 minute, 5 minute and 1 hour samples. Compact will apply these retentions if they are set.
258 | 
259 | Now lets deploy Thanos Compact. 
260 | 
261 | Update **prometheus.yaml** to also include the following:
262 | 
263 | ```yaml
264 | ---
265 | apiVersion: apps/v1
266 | kind: StatefulSet
267 | metadata:
268 |   name: thanos-compact
269 |   namespace: prometheus
270 |   labels:
271 |     app: thanos-compact
272 | spec:
273 |   replicas: 1
274 |   serviceName: thanos-compact
275 |   selector:
276 |     matchLabels:
277 |       app: thanos-compact
278 |   template:
279 |     metadata:
280 |       labels:
281 |         app: thanos-compact
282 |     spec:
283 |       containers:
284 |       - name: thanos-compact
285 |         image: improbable/thanos:v0.5.0
286 |         resources:
287 |           limits:
288 |             cpu: 1
289 |             memory: 1Gi
290 |           requests:
291 |             cpu: 500m
292 |             memory: 1Gi
293 |         args:
294 |         - "compact"
295 |         - "--data-dir=/prometheus/compact"
296 |         - "--objstore.config-file=/config/thanos.config"
297 |         - "--log.level=info"
298 |         - "--retention.resolution-raw=2d"
299 |         - "--retention.resolution-5m=5d"
300 |         - "--retention.resolution-1h=10d"
301 |         - "--consistency-delay=15m"
302 |         - "--wait"
303 |         ports:
304 |         - name: http
305 |           containerPort: 10902
306 |         - name: grpc
307 |           containerPort: 10901
308 |         - name: cluster
309 |           containerPort: 10900
310 |         volumeMounts:
311 |         - mountPath: /prometheus
312 |           name: thanos-compact-storage
313 |         - mountPath: /config/
314 |           name: thanos-config
315 |       volumes:
316 |       - name: thanos-config
317 |         secret:
318 |           secretName: thanos-config
319 |   volumeClaimTemplates:
320 |   - metadata:
321 |       name: thanos-compact-storage
322 |     spec:
323 |       accessModes: [ "ReadWriteOnce" ]
324 |       resources:
325 |         requests:
326 |           storage: 10Gi
327 | ```
328 | 
329 | Apply this to Kubernetes by running `kubectl apply -f prometheus.yaml`.
330 | 
331 | Kubernetes will launch a single Thanos Compact Pod which will, once running, start performing the actions described above on blocks stored in the object store.
332 | 
333 | To see an example of the full YAML file that reflects the changes described above with Thanos Store and Compact see [here](/prometheus/using-thanos/long-term-storage/static/thanos-with-components.yaml).
334 | 


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/long-term-storage/images/long-term-storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/long-term-storage/images/long-term-storage.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/long-term-storage/images/thanos-query-with-store.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/long-term-storage/images/thanos-query-with-store.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/long-term-storage/images/thanos-sidecar-upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/long-term-storage/images/thanos-sidecar-upload.png


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/long-term-storage/static/thanos-with-components.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: Secret
  4 | metadata:
  5 |   name: thanos-config
  6 |   namespace: prometheus
  7 | data:
  8 |   thanos.config: dHlwZTogR0NTCmNvbmZpZzoKICBidWNrZXQ6ICJvYnNlcnZhYmlsaXR5LWZvci1rdWJlcm5ldGVzLXRoYW5vcy1kZW1vIgogIHNlcnZpY2VfYWNjb3VudDogfC0KICB7CiAgICAidHlwZSI6ICJzZXJ2aWNlX2FjY291bnQiLAogICAgInByb2plY3RfaWQiOiAia3ViZXJuZXRlcy1jbG91ZC1sYWIiLAogICAgInByaXZhdGVfa2V5X2lkIjogIiIsCiAgICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgICAiY2xpZW50X2VtYWlsIjogIm9ic2VydmFiaWxpdHktZm9yLWt1YmVybmV0ZXNAa3ViZXJuZXRlcy1jbG91ZC1sYWIuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICAgImNsaWVudF9pZCI6ICIiLAogICAgImF1dGhfdXJpIjogImh0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi9hdXRoIiwKICAgICJ0b2tlbl91cmkiOiAiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLAogICAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICAgImNsaWVudF94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL3JvYm90L3YxL21ldGFkYXRhL3g1MDkvb2JzZXJ2YWJpbGl0eS1mb3Ita3ViZXJuZXRlcyU0MGt1YmVybmV0ZXMtY2xvdWQtbGFiLmlhbS5nc2VydmljZWFjY291bnQuY29tIgogIH0=
  9 | ---
 10 | apiVersion: monitoring.coreos.com/v1
 11 | kind: Prometheus
 12 | metadata:
 13 |   name: prometheus
 14 |   namespace: prometheus
 15 | spec:
 16 |   affinity:
 17 |     podAntiAffinity:
 18 |       preferredDuringSchedulingIgnoredDuringExecution:
 19 |       - podAffinityTerm:
 20 |           labelSelector:
 21 |             matchLabels:
 22 |               app: prometheus
 23 |               prometheus: cluster
 24 |           topologyKey: kubernetes.io/hostname
 25 |         weight: 100
 26 |   baseImage: quay.io/prometheus/prometheus
 27 |   logLevel: info
 28 |   podMetadata:
 29 |     annotations:
 30 |       cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 31 |     labels:
 32 |       app: prometheus
 33 |       thanos-store-api: "true"
 34 |   replicas: 2
 35 |   thanos:
 36 |     version: v0.4.0
 37 |     resources:
 38 |       limits:
 39 |         cpu: 500m
 40 |         memory: 500Mi
 41 |       requests:
 42 |         cpu: 100m
 43 |         memory: 500Mi
 44 |     objectStorageConfig:
 45 |       key: thanos.config
 46 |       name: thanos-config
 47 |   resources:
 48 |     limits:
 49 |       cpu: 1
 50 |       memory: 2Gi
 51 |     requests:
 52 |       cpu: 1
 53 |       memory: 2Gi
 54 |   retention: 12h
 55 |   serviceAccountName: prometheus-service-account
 56 |   serviceMonitorSelector:
 57 |     matchLabels:
 58 |       serviceMonitorSelector: prometheus
 59 |   externalLabels:
 60 |     cluster_environment: workshop
 61 |   storage:
 62 |     volumeClaimTemplate:
 63 |       apiVersion: v1
 64 |       kind: PersistentVolumeClaim
 65 |       metadata:
 66 |         name: prometheus-pvc
 67 |       spec:
 68 |         accessModes:
 69 |         - ReadWriteOnce
 70 |         resources:
 71 |           requests:
 72 |             storage: 10Gi
 73 |   version: v2.10.0
 74 | ---
 75 | apiVersion: v1
 76 | kind: ServiceAccount
 77 | metadata:
 78 |   name: "prometheus-service-account"
 79 |   namespace: "prometheus"
 80 | ---
 81 | apiVersion: rbac.authorization.k8s.io/v1
 82 | kind: ClusterRole
 83 | metadata:
 84 |   name: "prometheus-cluster-role"
 85 | rules:
 86 | - apiGroups:
 87 |   - ""
 88 |   resources:
 89 |   - nodes
 90 |   - services
 91 |   - endpoints
 92 |   - pods
 93 |   verbs:
 94 |   - get
 95 |   - list
 96 |   - watch
 97 | - apiGroups:
 98 |   - ""
 99 |   resources:
100 |   - nodes/metrics
101 |   verbs:
102 |   - get
103 | - nonResourceURLs:
104 |   - "/metrics"
105 |   verbs:
106 |   - get
107 | ---
108 | apiVersion: rbac.authorization.k8s.io/v1
109 | kind: ClusterRoleBinding
110 | metadata:
111 |   name: "prometheus-cluster-role-binding"
112 | roleRef:
113 |   apiGroup: rbac.authorization.k8s.io
114 |   kind: ClusterRole
115 |   name: "prometheus-cluster-role"
116 | subjects:
117 | - kind: ServiceAccount
118 |   name: "prometheus-service-account"
119 |   namespace: prometheus
120 | ---
121 | apiVersion: apps/v1
122 | kind: Deployment
123 | metadata:
124 |   name: thanos-query
125 |   namespace: prometheus
126 |   labels:
127 |     app: thanos-query
128 | spec:
129 |   replicas: 1
130 |   selector:
131 |     matchLabels:
132 |       app: thanos-query
133 |   template:
134 |     metadata:
135 |       labels:
136 |         app: thanos-query
137 |     spec:
138 |       containers:
139 |       - name: thanos-query
140 |         image: improbable/thanos:v0.5.0
141 |         resources:
142 |           limits:
143 |             cpu: 500m
144 |             memory: 500Mi
145 |           requests:
146 |             cpu: 100m
147 |             memory: 500Mi
148 |         args:
149 |         - "query"
150 |         - "--log.level=debug"
151 |         - "--query.replica-label=prometheus_replica"
152 |         - "--store.sd-dns-resolver=miekgdns"
153 |         - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local"
154 |         ports:
155 |         - name: http
156 |           containerPort: 10902
157 |         - name: grpc
158 |           containerPort: 10901
159 |         - name: cluster
160 |           containerPort: 10900
161 | ---
162 | apiVersion: v1
163 | kind: Service
164 | metadata:
165 |   name: "thanos-store-api"
166 |   namespace: prometheus
167 | spec:
168 |   type: ClusterIP
169 |   clusterIP: None
170 |   ports:
171 |   - name: grpc
172 |     port: 10901
173 |     targetPort: grpc
174 |   selector:
175 |     thanos-store-api: "true"
176 | ---
177 | apiVersion: apps/v1
178 | kind: StatefulSet
179 | metadata:
180 |   name: thanos-store
181 |   namespace: prometheus
182 |   labels:
183 |     app: thanos-store
184 |     thanos-store-api: "true"
185 | spec:
186 |   replicas: 1
187 |   serviceName: thanos-store
188 |   selector:
189 |     matchLabels:
190 |       app: thanos-store
191 |       thanos-store-api: "true"
192 |   template:
193 |     metadata:
194 |       labels:
195 |         app: thanos-store
196 |         thanos-store-api: "true"
197 |     spec:
198 |       containers:
199 |       - name: thanos-store
200 |         image: improbable/thanos:v0.5.0
201 |         resources:
202 |           limits:
203 |             cpu: 1
204 |             memory: 1Gi
205 |           requests:
206 |             cpu: 500m
207 |             memory: 1Gi
208 |         args:
209 |         - "store"
210 |         - "--data-dir=/prometheus/cache"
211 |         - "--objstore.config-file=/config/thanos.config"
212 |         - "--log.level=info"
213 |         - "--index-cache-size=256MB"
214 |         - "--chunk-pool-size=256MB"
215 |         - "--store.grpc.series-max-concurrency=30"
216 |         ports:
217 |         - name: http
218 |           containerPort: 10902
219 |         - name: grpc
220 |           containerPort: 10901
221 |         - name: cluster
222 |           containerPort: 10900
223 |         volumeMounts:
224 |         - mountPath: /prometheus
225 |           name:  thanos-store-storage
226 |         - mountPath: /config/
227 |           name: thanos-config
228 |       volumes:
229 |       - name: thanos-config
230 |         secret:
231 |           secretName: thanos-config
232 |   volumeClaimTemplates:
233 |   - metadata:
234 |       name: thanos-store-storage
235 |     spec:
236 |       accessModes: [ "ReadWriteOnce" ]
237 |       resources:
238 |         requests:
239 |           storage: 10Gi
240 | ---
241 | apiVersion: apps/v1
242 | kind: StatefulSet
243 | metadata:
244 |   name: thanos-compact
245 |   namespace: prometheus
246 |   labels:
247 |     app: thanos-compact
248 | spec:
249 |   replicas: 1
250 |   serviceName: thanos-compact
251 |   selector:
252 |     matchLabels:
253 |       app: thanos-compact
254 |   template:
255 |     metadata:
256 |       labels:
257 |         app: thanos-compact
258 |     spec:
259 |       containers:
260 |       - name: thanos-compact
261 |         image: improbable/thanos:v0.5.0
262 |         resources:
263 |           limits:
264 |             cpu: 1
265 |             memory: 1Gi
266 |           requests:
267 |             cpu: 500m
268 |             memory: 1Gi
269 |         args:
270 |         - "compact"
271 |         - "--data-dir=/prometheus/compact"
272 |         - "--objstore.config-file=/config/thanos.config"
273 |         - "--log.level=info"
274 |         - "--retention.resolution-raw=2d"
275 |         - "--retention.resolution-5m=5d"
276 |         - "--retention.resolution-1h=10d"
277 |         - "--consistency-delay=15m"
278 |         - "--wait"
279 |         ports:
280 |         - name: http
281 |           containerPort: 10902
282 |         - name: grpc
283 |           containerPort: 10901
284 |         - name: cluster
285 |           containerPort: 10900
286 |         volumeMounts:
287 |         - mountPath: /prometheus
288 |           name: thanos-compact-storage
289 |         - mountPath: /config/
290 |           name: thanos-config
291 |       volumes:
292 |       - name: thanos-config
293 |         secret:
294 |           secretName: thanos-config
295 |   volumeClaimTemplates:
296 |   - metadata:
297 |       name: thanos-compact-storage
298 |     spec:
299 |       accessModes: [ "ReadWriteOnce" ]
300 |       resources:
301 |         requests:
302 |           storage: 10Gi
303 | 


--------------------------------------------------------------------------------
/content/prometheus/using-thanos/long-term-storage/static/thanos-with-object-config.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: Secret
  4 | metadata:
  5 |   name: thanos-config
  6 |   namespace: prometheus
  7 | data:
  8 |   thanos.config: dHlwZTogR0NTCmNvbmZpZzoKICBidWNrZXQ6ICJvYnNlcnZhYmlsaXR5LWZvci1rdWJlcm5ldGVzLXRoYW5vcy1kZW1vIgogIHNlcnZpY2VfYWNjb3VudDogfC0KICB7CiAgICAidHlwZSI6ICJzZXJ2aWNlX2FjY291bnQiLAogICAgInByb2plY3RfaWQiOiAia3ViZXJuZXRlcy1jbG91ZC1sYWIiLAogICAgInByaXZhdGVfa2V5X2lkIjogIiIsCiAgICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgICAiY2xpZW50X2VtYWlsIjogIm9ic2VydmFiaWxpdHktZm9yLWt1YmVybmV0ZXNAa3ViZXJuZXRlcy1jbG91ZC1sYWIuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICAgImNsaWVudF9pZCI6ICIiLAogICAgImF1dGhfdXJpIjogImh0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi9hdXRoIiwKICAgICJ0b2tlbl91cmkiOiAiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLAogICAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICAgImNsaWVudF94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL3JvYm90L3YxL21ldGFkYXRhL3g1MDkvb2JzZXJ2YWJpbGl0eS1mb3Ita3ViZXJuZXRlcyU0MGt1YmVybmV0ZXMtY2xvdWQtbGFiLmlhbS5nc2VydmljZWFjY291bnQuY29tIgogIH0=
  9 | ---
 10 | apiVersion: monitoring.coreos.com/v1
 11 | kind: Prometheus
 12 | metadata:
 13 |   name: prometheus
 14 |   namespace: prometheus
 15 | spec:
 16 |   affinity:
 17 |     podAntiAffinity:
 18 |       preferredDuringSchedulingIgnoredDuringExecution:
 19 |       - weight: 100
 20 |         podAffinityTerm:
 21 |           labelSelector:
 22 |             matchExpressions:
 23 |             - key: app
 24 |               operator: In
 25 |               values:
 26 |               - prometheus
 27 |           topologyKey: kubernetes.io/hostname
 28 |   baseImage: quay.io/prometheus/prometheus
 29 |   logLevel: info
 30 |   podMetadata:
 31 |     annotations:
 32 |       cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
 33 |     labels:
 34 |       app: prometheus
 35 |       thanos-store-api: "true"
 36 |   replicas: 2
 37 |   thanos:
 38 |     version: v0.4.0
 39 |     resources:
 40 |       limits:
 41 |         cpu: 500m
 42 |         memory: 500Mi
 43 |       requests:
 44 |         cpu: 100m
 45 |         memory: 500Mi
 46 |     objectStorageConfig:
 47 |       key: thanos.config
 48 |       name: thanos-config
 49 |   resources:
 50 |     limits:
 51 |       cpu: 1
 52 |       memory: 2Gi
 53 |     requests:
 54 |       cpu: 1
 55 |       memory: 2Gi
 56 |   retention: 12h
 57 |   serviceAccountName: prometheus-service-account
 58 |   serviceMonitorSelector:
 59 |     matchLabels:
 60 |       serviceMonitorSelector: prometheus
 61 |   externalLabels:
 62 |     cluster_environment: workshop
 63 |   storage:
 64 |     volumeClaimTemplate:
 65 |       apiVersion: v1
 66 |       kind: PersistentVolumeClaim
 67 |       metadata:
 68 |         name: prometheus-pvc
 69 |       spec:
 70 |         accessModes:
 71 |         - ReadWriteOnce
 72 |         resources:
 73 |           requests:
 74 |             storage: 10Gi
 75 |   version: v2.10.0
 76 | ---
 77 | apiVersion: v1
 78 | kind: ServiceAccount
 79 | metadata:
 80 |   name: "prometheus-service-account"
 81 |   namespace: "prometheus"
 82 | ---
 83 | apiVersion: rbac.authorization.k8s.io/v1
 84 | kind: ClusterRole
 85 | metadata:
 86 |   name: "prometheus-cluster-role"
 87 | rules:
 88 | - apiGroups:
 89 |   - ""
 90 |   resources:
 91 |   - nodes
 92 |   - services
 93 |   - endpoints
 94 |   - pods
 95 |   verbs:
 96 |   - get
 97 |   - list
 98 |   - watch
 99 | - apiGroups:
100 |   - ""
101 |   resources:
102 |   - nodes/metrics
103 |   verbs:
104 |   - get
105 | - nonResourceURLs:
106 |   - "/metrics"
107 |   verbs:
108 |   - get
109 | ---
110 | apiVersion: rbac.authorization.k8s.io/v1
111 | kind: ClusterRoleBinding
112 | metadata:
113 |   name: "prometheus-cluster-role-binding"
114 | roleRef:
115 |   apiGroup: rbac.authorization.k8s.io
116 |   kind: ClusterRole
117 |   name: "prometheus-cluster-role"
118 | subjects:
119 | - kind: ServiceAccount
120 |   name: "prometheus-service-account"
121 |   namespace: prometheus
122 | ---
123 | apiVersion: apps/v1
124 | kind: Deployment
125 | metadata:
126 |   name: thanos-query
127 |   namespace: prometheus
128 |   labels:
129 |     app: thanos-query
130 | spec:
131 |   replicas: 1
132 |   selector:
133 |     matchLabels:
134 |       app: thanos-query
135 |   template:
136 |     metadata:
137 |       labels:
138 |         app: thanos-query
139 |     spec:
140 |       containers:
141 |       - name: thanos-query
142 |         image: improbable/thanos:v0.5.0
143 |         resources:
144 |           limits:
145 |             cpu: 500m
146 |             memory: 500Mi
147 |           requests:
148 |             cpu: 100m
149 |             memory: 500Mi
150 |         args:
151 |         - "query"
152 |         - "--log.level=debug"
153 |         - "--query.replica-label=prometheus_replica"
154 |         - "--store.sd-dns-resolver=miekgdns"
155 |         - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local"
156 |         ports:
157 |         - name: http
158 |           containerPort: 10902
159 |         - name: grpc
160 |           containerPort: 10901
161 |         - name: cluster
162 |           containerPort: 10900
163 | ---
164 | apiVersion: v1
165 | kind: Service
166 | metadata:
167 |   name: "thanos-store-api"
168 |   namespace: prometheus
169 | spec:
170 |   type: ClusterIP
171 |   clusterIP: None
172 |   ports:
173 |   - name: grpc
174 |     port: 10901
175 |     targetPort: grpc
176 |   selector:
177 |     thanos-store-api: "true"


--------------------------------------------------------------------------------
/content/prometheus/what-is-prometheus/_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "What Is Prometheus?"
 3 | date: 2019-07-02T15:50:17+01:00
 4 | weight: 10
 5 | draft: false
 6 | ---
 7 | 
 8 | ![Prometheus](/prometheus/what-is-prometheus/images/logo.png?classes=shadow&width=40pc)
 9 | 
10 | Prometheus is an open-source metrics oriented monitoring and alerting tool. The project was first created by SoundCloud in 2012. In 2016 the project joined the Cloud Native Compute Foundation (CNCF). In 2018, it's CNCF maturity status changed from incubation to graduated. Prometheus was only the second CNCF project to graduate, after Kubernetes.
11 | 
12 | Prometheus has quickly become the de facto open-source monitoring tool for Kubernetes and is widely used and supported in the Cloud Native industry.
13 | 
14 | As described on the [Prometheus.io](Prometheus.io) website, the main features of Prometheus are:
15 | 
16 | * a multi-dimensional data model with time series data identified by metric name and key/value pairs
17 | * PromQL, a flexible query language to leverage this dimensionality
18 | * no reliance on distributed storage; single server nodes are autonomous
19 | * time series collection happens via a pull model over HTTP
20 | * pushing time series is supported via an intermediary gateway
21 | * targets are discovered via service discovery or static configuration
22 | * multiple modes of graphing and dashboarding support
23 | 
24 | For a more detailed introduction to Prometheus, the [introduction on the Prometheus documentation website](https://prometheus.io/docs/introduction) is excellent.
25 | 
26 | The rest of this chapter details how to deploy Prometheus to Kubernetes.
27 | 


--------------------------------------------------------------------------------
/content/prometheus/what-is-prometheus/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/what-is-prometheus/images/logo.png


--------------------------------------------------------------------------------
/layouts/partials/custom-footer.html:
--------------------------------------------------------------------------------
 1 | <script>
 2 | //     (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 3 | //     (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 4 | //     m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 5 | //     })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
 6 |   
 7 | //     ga('create', 'UA-143169476-1', 'auto');
 8 | //     ga('send', 'pageview');
 9 |   
10 |   </script>
11 | 


--------------------------------------------------------------------------------
/layouts/partials/logo.html:
--------------------------------------------------------------------------------
 1 | <a id="logo" href="/">
 2 |     <svg width="262.68602" height="48" xmlns="http://www.w3.org/2000/svg">
 3 | 
 4 |         <g>
 5 |          <title>background</title>
 6 |          <rect fill="none" id="canvas_background" height="50" width="264.68602" y="-1" x="-1"/>
 7 |         </g>
 8 |         <g>
 9 |          <title>Layer 1</title>
10 |          <path stroke="null" fill="#326de6" id="path10349" d="m46.757612,10.544739c-0.280445,-0.897425 -0.953514,-1.626582 -1.794849,-2.075294l-17.107157,-8.189c-0.448712,-0.224356 -0.953514,-0.280445 -1.402226,-0.280445c-0.448712,0 -0.953514,0 -1.402226,0.112178l-17.107157,8.245089c-0.841336,0.392623 -1.458315,1.121781 -1.682671,2.075294l-4.206678,18.453294c-0.168267,0.953514 0.056089,1.907027 0.616979,2.692274l11.834787,14.639239c0.673068,0.673068 1.626582,1.121781 2.580096,1.17787l18.845917,0c1.009603,0.112178 1.963116,-0.336534 2.580096,-1.17787l11.834787,-14.639239c0.56089,-0.785247 0.785247,-1.73876 0.673068,-2.692274l-4.262767,-18.341116z"/>
11 |          <path stroke="null" fill="#ffffff" id="path10351" d="m44.121427,28.156698l0,0c-0.056089,0 -0.112178,0 -0.112178,-0.056089c0,-0.056089 -0.112178,-0.056089 -0.224356,-0.056089c-0.224356,-0.056089 -0.448712,-0.056089 -0.673068,-0.056089c-0.112178,0 -0.224356,0 -0.336534,-0.056089l-0.056089,0c-0.616979,-0.056089 -1.290048,-0.168267 -1.907027,-0.336534c-0.168267,-0.056089 -0.336534,-0.224356 -0.392623,-0.392623c0.056089,0 0,0 0,0l0,0l-0.448712,-0.112178c0.224356,-1.626582 0.112178,-3.309253 -0.224356,-4.935836c-0.392623,-1.626582 -1.065692,-3.197075 -1.963116,-4.599301l0.336534,-0.336534l0,0l0,-0.056089c0,-0.168267 0.056089,-0.392623 0.168267,-0.504801c0.504801,-0.448712 1.009603,-0.785247 1.570493,-1.121781l0,0c0.112178,-0.056089 0.224356,-0.112178 0.336534,-0.168267c0.224356,-0.112178 0.392623,-0.224356 0.616979,-0.336534c0.056089,-0.056089 0.112178,-0.056089 0.168267,-0.112178c0.056089,-0.056089 0,-0.056089 0,-0.112178l0,0c0.504801,-0.392623 0.616979,-1.065692 0.224356,-1.570493c-0.168267,-0.224356 -0.504801,-0.392623 -0.785247,-0.392623c-0.280445,0 -0.56089,0.112178 -0.785247,0.280445l0,0l-0.056089,0.056089c-0.056089,0.056089 -0.112178,0.112178 -0.168267,0.112178c-0.168267,0.168267 -0.336534,0.336534 -0.448712,0.504801c-0.056089,0.112178 -0.168267,0.168267 -0.224356,0.224356l0,0c-0.392623,0.448712 -0.897425,0.897425 -1.402226,1.233959c-0.112178,0.056089 -0.224356,0.112178 -0.336534,0.112178c-0.056089,0 -0.168267,0 -0.224356,-0.056089l-0.056089,0l-0.448712,0.280445c-0.448712,-0.448712 -0.953514,-0.897425 -1.402226,-1.346137c-2.075294,-1.626582 -4.65539,-2.636185 -7.291575,-2.91663l-0.056089,-0.448712l0,0l0,0.056089c-0.168267,-0.112178 -0.224356,-0.280445 -0.280445,-0.448712c0,-0.616979 0,-1.233959 0.112178,-1.907027l0,-0.056089c0,-0.112178 0.056089,-0.224356 0.056089,-0.336534c0.056089,-0.224356 0.056089,-0.448712 0.112178,-0.673068l0,-0.336534l0,0c0.056089,-0.56089 -0.392623,-1.121781 -0.953514,-1.17787c-0.336534,-0.056089 -0.673068,0.112178 -0.953514,0.392623c-0.224356,0.224356 -0.336534,0.504801 -0.336534,0.785247l0,0l0,0.280445c0,0.224356 0.056089,0.448712 0.112178,0.673068c0.056089,0.112178 0.056089,0.224356 0.056089,0.336534l0,0.056089c0.112178,0.616979 0.112178,1.233959 0.112178,1.907027c-0.056089,0.168267 -0.112178,0.336534 -0.280445,0.448712l0,0.112178l0,0l-0.056089,0.448712c-0.616979,0.056089 -1.233959,0.168267 -1.907027,0.280445c-2.636185,0.56089 -5.048014,1.963116 -6.898952,3.926233l-0.336534,-0.224356l-0.056089,0c-0.056089,0 -0.112178,0.056089 -0.224356,0.056089c-0.112178,0 -0.224356,-0.056089 -0.336534,-0.112178c-0.504801,-0.392623 -1.009603,-0.841336 -1.402226,-1.290048l0,0c-0.056089,-0.112178 -0.168267,-0.168267 -0.224356,-0.224356c-0.168267,-0.168267 -0.280445,-0.336534 -0.448712,-0.504801c-0.056089,-0.056089 -0.112178,-0.056089 -0.168267,-0.112178c-0.056089,-0.056089 -0.056089,-0.056089 -0.056089,-0.056089l0,0c-0.224356,-0.168267 -0.504801,-0.280445 -0.785247,-0.280445c-0.336534,0 -0.616979,0.112178 -0.785247,0.392623c-0.336534,0.504801 -0.224356,1.17787 0.224356,1.570493l0,0c0.056089,0 0.056089,0.056089 0.056089,0.056089c0,0 0.112178,0.112178 0.168267,0.112178c0.168267,0.112178 0.392623,0.224356 0.616979,0.336534c0.112178,0.056089 0.224356,0.112178 0.336534,0.168267l0,0c0.56089,0.336534 1.121781,0.673068 1.570493,1.121781c0.112178,0.112178 0.224356,0.336534 0.168267,0.504801l0,-0.056089l0,0l0.336534,0.336534c-0.056089,0.112178 -0.112178,0.168267 -0.168267,0.280445c-1.73876,2.748363 -2.467918,6.001527 -1.963116,9.198603l-0.448712,0.112178l0,0c0,0.056089 -0.056089,0.056089 -0.056089,0.056089c-0.056089,0.168267 -0.224356,0.280445 -0.392623,0.392623c-0.616979,0.168267 -1.233959,0.280445 -1.907027,0.336534l0,0c-0.112178,0 -0.224356,0 -0.336534,0.056089c-0.224356,0 -0.448712,0.056089 -0.673068,0.056089c-0.056089,0 -0.112178,0.056089 -0.224356,0.056089c-0.056089,0 -0.056089,0 -0.112178,0.056089l0,0c-0.616979,0.112178 -1.009603,0.673068 -0.897425,1.290048c0,0 0,0 0,0c0.112178,0.504801 0.616979,0.841336 1.121781,0.785247c0.112178,0 0.168267,0 0.280445,-0.056089l0,0c0.056089,0 0.056089,0 0.056089,-0.056089c0,-0.056089 0.168267,0 0.224356,0c0.224356,-0.056089 0.448712,-0.168267 0.616979,-0.224356c0.112178,-0.056089 0.224356,-0.112178 0.336534,-0.112178l0.056089,0c0.616979,-0.224356 1.17787,-0.392623 1.850938,-0.504801l0.056089,0c0.168267,0 0.336534,0.056089 0.448712,0.168267c0.056089,0 0.056089,0.056089 0.056089,0.056089l0,0l0.504801,-0.056089c0.841336,2.580096 2.411829,4.879746 4.599301,6.562418c0.504801,0.392623 0.953514,0.729158 1.514404,1.009603l-0.280445,0.392623l0,0c0,0.056089 0.056089,0.056089 0.056089,0.056089c0.112178,0.168267 0.112178,0.392623 0.056089,0.56089c-0.224356,0.56089 -0.56089,1.121781 -0.897425,1.626582l0,0.056089c-0.056089,0.112178 -0.112178,0.168267 -0.224356,0.280445c-0.112178,0.112178 -0.224356,0.336534 -0.392623,0.56089c-0.056089,0.056089 -0.056089,0.112178 -0.112178,0.168267c0,0 0,0.056089 -0.056089,0.056089l0,0c-0.280445,0.56089 -0.056089,1.233959 0.448712,1.514404c0.112178,0.056089 0.280445,0.112178 0.392623,0.112178c0.448712,0 0.841336,-0.280445 1.065692,-0.673068l0,0c0,0 0,-0.056089 0.056089,-0.056089c0,-0.056089 0.056089,-0.112178 0.112178,-0.168267c0.056089,-0.224356 0.168267,-0.392623 0.224356,-0.616979l0.112178,-0.336534l0,0c0.168267,-0.616979 0.448712,-1.17787 0.729158,-1.73876c0.112178,-0.168267 0.280445,-0.280445 0.448712,-0.336534c0.056089,0 0.056089,0 0.056089,-0.056089l0,0l0.224356,-0.448712c1.570493,0.616979 3.197075,0.897425 4.879746,0.897425c1.009603,0 2.019205,-0.112178 3.028808,-0.392623c0.616979,-0.112178 1.233959,-0.336534 1.794849,-0.504801l0.224356,0.392623l0,0c0.056089,0 0.056089,0 0.056089,0.056089c0.168267,0.056089 0.336534,0.168267 0.448712,0.336534c0.280445,0.56089 0.56089,1.121781 0.729158,1.73876l0,0.056089l0.112178,0.336534c0.056089,0.224356 0.112178,0.448712 0.224356,0.616979c0.056089,0.056089 0.056089,0.112178 0.112178,0.168267c0,0 0,0.056089 0.056089,0.056089l0,0c0.224356,0.392623 0.616979,0.673068 1.065692,0.673068c0.168267,0 0.280445,-0.056089 0.448712,-0.112178c0.224356,-0.112178 0.448712,-0.336534 0.504801,-0.616979c0.056089,-0.280445 0.056089,-0.56089 -0.056089,-0.841336l0,0c0,-0.056089 -0.056089,-0.056089 -0.056089,-0.056089c0,-0.056089 -0.056089,-0.112178 -0.112178,-0.168267c-0.112178,-0.224356 -0.224356,-0.392623 -0.392623,-0.56089c-0.056089,-0.112178 -0.112178,-0.168267 -0.224356,-0.280445l0,-0.112178c-0.392623,-0.504801 -0.673068,-1.065692 -0.897425,-1.626582c-0.056089,-0.168267 -0.056089,-0.392623 0.056089,-0.56089c0,-0.056089 0.056089,-0.056089 0.056089,-0.056089l0,0l-0.168267,-0.448712c2.860541,-1.73876 5.048014,-4.431034 6.057616,-7.628109l0.448712,0.056089l0,0c0.056089,0 0.056089,-0.056089 0.056089,-0.056089c0.112178,-0.112178 0.280445,-0.168267 0.448712,-0.168267l0.056089,0c0.616979,0.112178 1.233959,0.280445 1.794849,0.504801l0.056089,0c0.112178,0.056089 0.224356,0.112178 0.336534,0.112178c0.224356,0.112178 0.392623,0.224356 0.616979,0.280445c0.056089,0 0.112178,0.056089 0.224356,0.056089c0.056089,0 0.056089,0 0.112178,0.056089l0,0c0.112178,0.056089 0.168267,0.056089 0.280445,0.056089c0.504801,0 0.953514,-0.336534 1.121781,-0.785247c-0.056089,-0.616979 -0.504801,-1.065692 -1.009603,-1.17787zm-16.209732,-1.73876l-1.514404,0.729158l-1.514404,-0.729158l-0.392623,-1.626582l1.065692,-1.346137l1.682671,0l1.065692,1.346137l-0.392623,1.626582zm9.142513,-3.645788c0.280445,1.17787 0.336534,2.35574 0.224356,3.53361l-5.328459,-1.514404l0,0c-0.504801,-0.112178 -0.785247,-0.616979 -0.673068,-1.121781c0.056089,-0.168267 0.112178,-0.280445 0.224356,-0.392623l4.206678,-3.814055c0.616979,1.009603 1.065692,2.131384 1.346137,3.309253zm-3.028808,-5.384548l-4.599301,3.253164c-0.392623,0.224356 -0.953514,0.168267 -1.233959,-0.224356c-0.112178,-0.112178 -0.168267,-0.224356 -0.168267,-0.392623l-0.336534,-5.664993c2.467918,0.280445 4.65539,1.346137 6.338061,3.028808l0,0zm-10.152116,-2.860541l1.121781,-0.224356l-0.280445,5.608904l0,0c0,0.504801 -0.448712,0.897425 -0.953514,0.897425c-0.168267,0 -0.280445,-0.056089 -0.448712,-0.112178l-4.65539,-3.309253c1.458315,-1.402226 3.253164,-2.411829 5.216281,-2.860541zm-6.842863,4.935836l4.150589,3.701877l0,0c0.392623,0.336534 0.448712,0.897425 0.112178,1.290048c-0.112178,0.168267 -0.224356,0.224356 -0.448712,0.280445l-5.440637,1.570493c-0.168267,-2.35574 0.392623,-4.767568 1.626582,-6.842863zm-0.953514,9.479048l5.552815,-0.953514c0.448712,0 0.897425,0.280445 0.953514,0.729158c0.056089,0.168267 0.056089,0.392623 -0.056089,0.56089l0,0l-2.131384,5.160192c-1.963116,-1.290048 -3.53361,-3.253164 -4.318856,-5.496726zm12.732212,6.955041c-0.785247,0.168267 -1.570493,0.280445 -2.411829,0.280445c-1.17787,0 -2.411829,-0.224356 -3.53361,-0.56089l2.748363,-4.991925c0.280445,-0.336534 0.729158,-0.448712 1.121781,-0.224356c0.168267,0.112178 0.280445,0.224356 0.448712,0.392623l0,0l2.692274,4.879746c-0.336534,0.056089 -0.673068,0.112178 -1.065692,0.224356zm6.842863,-4.879746c-0.841336,1.346137 -2.019205,2.524007 -3.365342,3.365342l-2.187473,-5.27237c-0.112178,-0.448712 0.112178,-0.897425 0.504801,-1.065692c0.168267,-0.056089 0.336534,-0.112178 0.504801,-0.112178l5.608904,0.953514c-0.280445,0.785247 -0.616979,1.514404 -1.065692,2.131384z"/>
12 |          <text transform="matrix(1.3331944942474365,0,0,1.3331944942474365,-0.6663878366183837,0) " stroke="null" font-style="normal" font-weight="bold" xml:space="preserve" text-anchor="start" font-family="'Trebuchet MS', Gadget, sans-serif" font-size="24" id="svg_6" y="26.581172" x="43.343018" stroke-opacity="null" stroke-width="0" fill="#ffffff">Observability</text>
13 |         </g>
14 |        </svg>
15 | </a>


--------------------------------------------------------------------------------
/layouts/partials/menu-footer.html:
--------------------------------------------------------------------------------
 1 | <center>
 2 |     <!-- Place this tag where you want the button to render. -->
 3 |     <a class="github-button" href="https://github.com/thomasriley/observability-for-kubernetes/archive/master.zip" data-icon="octicon-cloud-download" aria-label="Download thomasriley/observability-for-kubernetes on GitHub">Download</a>
 4 | 
 5 |     <!-- Place this tag where you want the button to render. -->
 6 |     <a class="github-button" href="https://github.com/thomasriley/observability-for-kubernetes" data-icon="octicon-star" data-show-count="true" aria-label="Star thomasriley/observability-for-kubernetes on GitHub">Star</a>
 7 | 
 8 |     <!-- Place this tag where you want the button to render. -->
 9 |     <a class="github-button" href="https://github.com/thomasriley/observability-for-kubernetes/fork" data-icon="octicon-repo-forked" data-show-count="true" aria-label="Fork thomasriley/observability-for-kubernetes on GitHub">Fork</a>
10 | 
11 |     <p>Built with <a href="https://github.com/thomasriley/observability-for-kubernetes"><i class="fas fa-heart"></i></a> from <a href="https://getgrav.org">Grav</a> and <a href="https://gohugo.io/">Hugo</a></p>
12 | </center>
13 | <!-- Place this tag in your head or just before your close body tag. -->
14 | <script async defer src="https://buttons.github.io/buttons.js"></script>


--------------------------------------------------------------------------------
/netlify.toml:
--------------------------------------------------------------------------------
 1 | [build]
 2 | publish = "public"
 3 | command = "hugo --gc --minify"
 4 | 
 5 | [context.production.environment]
 6 | HUGO_VERSION = "0.84.3"
 7 | HUGO_ENV = "production"
 8 | HUGO_ENABLEGITINFO = "true"
 9 | 
10 | [context.split1]
11 | command = "hugo --gc --minify --enableGitInfo"
12 | 
13 | [context.split1.environment]
14 | HUGO_VERSION = "0.84.3"
15 | HUGO_ENV = "production"
16 | 
17 | [context.deploy-preview]
18 | command = "hugo --gc --minify --buildFuture -b $DEPLOY_PRIME_URL"
19 | 
20 | [context.deploy-preview.environment]
21 | HUGO_VERSION = "0.84.3"
22 | 
23 | [context.branch-deploy]
24 | command = "hugo --gc --minify -b $DEPLOY_PRIME_URL"
25 | 
26 | [context.branch-deploy.environment]
27 | HUGO_VERSION = "0.84.3"
28 | 
29 | [context.next.environment]
30 | HUGO_ENABLEGITINFO = "true"


--------------------------------------------------------------------------------
/static/css/theme-mine.css:
--------------------------------------------------------------------------------
  1 | 
  2 | :root{
  3 |     
  4 |     --MAIN-TEXT-color:#323232; /* Color of text by default */
  5 |     --MAIN-TITLES-TEXT-color: #5e5e5e; /* Color of titles h2-h3-h4-h5 */
  6 |     --MAIN-LINK-color:#1C90F3; /* Color of links */
  7 |     --MAIN-LINK-HOVER-color:#167ad0; /* Color of hovered links */
  8 |     --MAIN-ANCHOR-color: #1C90F3; /* color of anchors on titles */
  9 | 
 10 |     --MENU-HEADER-BG-color:#1C90F3; /* Background color of menu header */
 11 |     --MENU-HEADER-BORDER-color:#33a1ff; /*Color of menu header border */ 
 12 | 
 13 |     --MENU-SEARCH-BG-color:#167ad0; /* Search field background color (by default borders + icons) */
 14 |     --MENU-SEARCH-BOX-color: #33a1ff; /* Override search field border color */
 15 |     --MENU-SEARCH-BOX-ICONS-color: #a1d2fd; /* Override search field icons color */
 16 | 
 17 |     --MENU-SECTIONS-ACTIVE-BG-color:#20272b; /* Background color of the active section and its childs */
 18 |     --MENU-SECTIONS-BG-color:#252c31; /* Background color of other sections */
 19 |     --MENU-SECTIONS-LINK-color: #ccc; /* Color of links in menu */
 20 |     --MENU-SECTIONS-LINK-HOVER-color: #e6e6e6;  /* Color of links in menu, when hovered */
 21 |     --MENU-SECTION-ACTIVE-CATEGORY-color: #777; /* Color of active category text */
 22 |     --MENU-SECTION-ACTIVE-CATEGORY-BG-color: #fff; /* Color of background for the active category (only) */
 23 | 
 24 |     --MENU-VISITED-color: #33a1ff; /* Color of 'page visited' icons in menu */
 25 |     --MENU-SECTION-HR-color: #20272b; /* Color of <hr> separator in menu */
 26 |     
 27 | }
 28 | 
 29 | body {
 30 |     color: var(--MAIN-TEXT-color) !important;
 31 | }
 32 | 
 33 | textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus {
 34 |     border-color: none;
 35 |     box-shadow: none;
 36 | }
 37 | 
 38 | h2, h3, h4, h5 {
 39 |     color: var(--MAIN-TITLES-TEXT-color) !important;
 40 | }
 41 | 
 42 | a {
 43 |     color: var(--MAIN-LINK-color);
 44 | }
 45 | 
 46 | .anchor {
 47 |     color: var(--MAIN-ANCHOR-color);
 48 | }
 49 | 
 50 | a:hover {
 51 |     color: var(--MAIN-LINK-HOVER-color);
 52 | }
 53 | 
 54 | #sidebar ul li.visited > a .read-icon {
 55 | 	color: var(--MENU-VISITED-color);
 56 | }
 57 | 
 58 | #body a.highlight:after {
 59 |     display: block;
 60 |     content: "";
 61 |     height: 1px;
 62 |     width: 0%;
 63 |     -webkit-transition: width 0.5s ease;
 64 |     -moz-transition: width 0.5s ease;
 65 |     -ms-transition: width 0.5s ease;
 66 |     transition: width 0.5s ease;
 67 |     background-color: var(--MAIN-LINK-HOVER-color);
 68 | }
 69 | #sidebar {
 70 | 	background-color: var(--MENU-SECTIONS-BG-color);
 71 | }
 72 | #sidebar #header-wrapper {
 73 |     background: var(--MENU-HEADER-BG-color);
 74 |     color: var(--MENU-SEARCH-BOX-color);
 75 |     border-color: var(--MENU-HEADER-BORDER-color);
 76 | }
 77 | #sidebar .searchbox {
 78 | 	border-color: var(--MENU-SEARCH-BOX-color);
 79 |     background: var(--MENU-SEARCH-BG-color);
 80 | }
 81 | #sidebar ul.topics > li.parent, #sidebar ul.topics > li.active {
 82 |     background: var(--MENU-SECTIONS-ACTIVE-BG-color);
 83 | }
 84 | #sidebar .searchbox * {
 85 |     color: var(--MENU-SEARCH-BOX-ICONS-color);
 86 | }
 87 | 
 88 | #sidebar a {
 89 |     color: var(--MENU-SECTIONS-LINK-color);
 90 | }
 91 | 
 92 | #sidebar a:hover {
 93 |     color: var(--MENU-SECTIONS-LINK-HOVER-color);
 94 | }
 95 | 
 96 | #sidebar ul li.active > a {
 97 |     background: var(--MENU-SECTION-ACTIVE-CATEGORY-BG-color);
 98 |     color: var(--MENU-SECTION-ACTIVE-CATEGORY-color) !important;
 99 | }
100 | 
101 | #sidebar hr {
102 |     border-color: var(--MENU-SECTION-HR-color);
103 | }
104 | 


--------------------------------------------------------------------------------
/static/images/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/static/images/favicon.png


--------------------------------------------------------------------------------