├── .gitignore ├── .gitmodules ├── README.md ├── archetypes └── default.md ├── config.toml ├── content ├── _index.md ├── introduction │ ├── _index.md │ ├── kubernetes-workspace │ │ └── _index.md │ └── using-this-site │ │ └── _index.md ├── monitoring-kubernetes │ ├── _index.md │ └── metrics │ │ ├── _index.md │ │ ├── kube-state-metrics │ │ ├── _index.md │ │ └── images │ │ │ └── kube-statefulset-created.png │ │ ├── kubelet-cadvisor │ │ ├── _index.md │ │ └── images │ │ │ └── kubelet.png │ │ └── node-exporter │ │ └── _index.md └── prometheus │ ├── _index.md │ ├── configuring-prometheus │ ├── _index.md │ └── using-service-monitors │ │ ├── _index.md │ │ └── images │ │ ├── graph.png │ │ └── targets.png │ ├── deploying-prometheus │ ├── _index.md │ ├── access-prometheus │ │ ├── _index.md │ │ └── images │ │ │ └── prometheus.png │ ├── deploying-prometheus-operator │ │ └── _index.md │ └── launch-prometheus-instance │ │ └── _index.md │ ├── using-thanos │ ├── _index.md │ ├── high-availability │ │ ├── _index.md │ │ ├── images │ │ │ ├── multiple-prometheus-with-service.png │ │ │ ├── multiple-prometheus-with-thanos.png │ │ │ ├── multiple-prometheus.png │ │ │ ├── thanos-graph.png │ │ │ └── thanos-stores.png │ │ └── static │ │ │ └── prometheus-with-sidecar.yaml │ ├── images │ │ └── thanos.png │ └── long-term-storage │ │ ├── _index.md │ │ ├── images │ │ ├── long-term-storage.png │ │ ├── thanos-query-with-store.png │ │ └── thanos-sidecar-upload.png │ │ └── static │ │ ├── thanos-with-components.yaml │ │ └── thanos-with-object-config.yaml │ └── what-is-prometheus │ ├── _index.md │ └── images │ └── logo.png ├── layouts └── partials │ ├── custom-footer.html │ ├── logo.html │ └── menu-footer.html ├── netlify.toml └── static ├── css └── theme-mine.css └── images └── favicon.png /.gitignore: -------------------------------------------------------------------------------- 1 | public/ 2 | .DS_Store -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "themes/hugo-theme-learn"] 2 | path = themes/hugo-theme-learn 3 | url = https://github.com/matcornic/hugo-theme-learn.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # observability-for-kubernetes 2 | 3 | This is the open source repository for the website [observability.thomasriley.co.uk](https://observability.thomasriley.co.uk) where I have been documenting my learnings from monitoring Kubernetes. 4 | 5 | This is still a work in progress and I will be improving and adding more content to this website over time. 6 | 7 | If you find this website useful please star and share it! 8 | 9 | ## Contributing 10 | 11 | If you spot a mistake please raise an Issue in this repository or if you would like to make a contribution a Pull Request is welcome! -------------------------------------------------------------------------------- /archetypes/default.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "{{ replace .Name "-" " " | title }}" 3 | date: {{ .Date }} 4 | draft: true 5 | --- 6 | 7 | -------------------------------------------------------------------------------- /config.toml: -------------------------------------------------------------------------------- 1 | baseURL = "https://observability.thomasriley.co.uk/" 2 | languageCode = "en-gb" 3 | title = "Observability for Kubernetes" 4 | theme = "hugo-theme-learn" 5 | 6 | [params] 7 | editURL = "https://github.com/thomasriley/observability-for-kubernetes/edit/master/content/" 8 | author = "Thomas Riley" 9 | description = "Learn Monitoring & Observability for Kubernetes" 10 | # Shows a checkmark for visited pages on the menu 11 | showVisitedLinks = true 12 | # Disable search function. It will hide search bar 13 | disableSearch = false 14 | # Javascript and CSS cache are automatically busted when new version of site is generated. 15 | # Set this to true to disable this behaviour (some proxies don't handle well this optimization) 16 | disableAssetsBusting = false 17 | # Set this to true to disable copy-to-clipboard button for inline code. 18 | disableInlineCopyToClipBoard = false 19 | # A title for shortcuts in menu is set by default. Set this to true to disable it. 20 | disableShortcutsTitle = false 21 | # When using mulitlingual website, disable the switch language button. 22 | disableLanguageSwitchingButton = true 23 | # Hide breadcrumbs in the header and only show the current page title 24 | disableBreadcrumb = false 25 | # Hide Next and Previous page buttons normally displayed full height beside content 26 | disableNextPrev = false 27 | # Order sections in menu by "weight" or "title". Default to "weight" 28 | ordersectionsby = "weight" 29 | # Change default color scheme with a variant one. Can be "red", "blue", "green". 30 | themeVariant = "green" 31 | 32 | [outputs] 33 | home = [ "HTML", "RSS", "JSON"] -------------------------------------------------------------------------------- /content/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Monitoring & Observability for Kubernetes" 3 | --- 4 | 5 | # Monitoring & Observability for Kubernetes 6 | 7 | Hello, my name is [Tom Riley](https://thomasriley.co.uk). I am a Cloud & Platform Engineer who is passionate about Monitoring & Observability. I have created this website as a way of sharing my learnings from monitoring Kubernetes. 8 | 9 | Please use the navigation on the left hand side to start learning about Monitoring & Observability for Kubernetes. 10 | 11 | I suggest reading the [**Introduction**](https://observability.thomasriley.co.uk/introduction/) to learn about this website and prerequisites before progressing to learn about monitoring Kubernetes. 12 | 13 | So far this website has chapters on: 14 | 15 | * Deploying [**Prometheus**](https://observability.thomasriley.co.uk/prometheus/) to Kuberneres with Prometheus Operator and scaling to add long term storage of metrics using Thanos 16 | * [**Monitoring Kubernetes**](https://observability.thomasriley.co.uk/monitoring-kubernetes/) with Prometheus 17 | 18 | I will be adding more content over time and also improving and updating existing chapters. 19 | -------------------------------------------------------------------------------- /content/introduction/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Introduction" 3 | date = 2019-07-03T17:16:32+01:00 4 | weight = 10 5 | chapter = true 6 | pre = "1. " 7 | +++ 8 | 9 | ### Chapter 1 10 | 11 | # Introduction 12 | 13 | ### What is this site? 14 | 15 | * I wanted a place to store all my notes and learnings about Monitoring & Observability while working with Kubernetes and containerisation. 16 | * Instead of maintaining a private set of notes, I created this site so I can give back to the community. 17 | * If you spot a mistake please create an [issue](https://github.com/thomasriley/observability-for-kubernetes/issues) or if wish to propose a change, please raise a [pull request](https://github.com/thomasriley/observability-for-kubernetes/pulls). 18 | * I hope you find this useful! If you do, please [star](https://github.com/thomasriley/observability-for-kubernetes/stargazers) it on GitHub and share it! 19 | 20 | ### Who am I? 21 | 22 | * My name is Tom Riley and I am a Cloud & Platform Engineer who is passionate about Monitoring & Observability. 23 | * Please see my website [thomasriley.co.uk](https://thomasriley.co.uk) to learn more or if you wish to get in touch. 24 | * You can [contact me](mailto:contact@thomasriley.co.uk) via email. 25 | -------------------------------------------------------------------------------- /content/introduction/kubernetes-workspace/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Kubernetes Workspace" 3 | date: 2019-07-03T17:23:43+01:00 4 | weight: 20 5 | draft: false 6 | --- 7 | 8 | ## Kubernetes 9 | 10 | These tutorials assume you have access to a Kubernetes environment with full cluster admin privileges and have a basic understand of Kubernetes. 11 | 12 | If you need to provision a Kubernetes environment, there are a few options I would suggest: 13 | 14 | * Create a [free Google Cloud account](https://cloud.google.com/free/) with $300 in credit and use [Kubernetes Engine](https://cloud.google.com/kubernetes-engine/). Kubernetes Engine is not a free service but you can use the free credit to pay for the service. 15 | * Create a [DigitalOcean](https://digitalocean.com/) account and use their [Managed Kubernetes Service](https://www.digitalocean.com/products/kubernetes/). This is not a free service and there is no free credit easily available. 16 | * Use [minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/) or [KIND](https://github.com/kubernetes-sigs/kind) for running Kubernetes locally on your machine. 17 | 18 | ## Other Requirements 19 | 20 | Install the **Helm client** on your laptop and **initialise Helm within Kubernetes** with cluster admin priviliges. You can follow the [Helm documenation](https://helm.sh/docs/using_helm/) on how to do this. 21 | 22 | Lastly, install [**Kubectl**](https://kubernetes.io/docs/tasks/tools/install-kubectl/) if you have not already so you can interact with Kubernetes. 23 | -------------------------------------------------------------------------------- /content/introduction/using-this-site/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using This Site" 3 | date: 2019-07-03T17:23:26+01:00 4 | weight: 10 5 | draft: false 6 | --- 7 | 8 | I have structured this site as a series of hands on tutorials. This is not a perfect guide on how to create production ready systems, unless otherwise stated. It is however, a set of simple tutorials to help you on your journey for learning more about monitoring & observability for Kubernetes. 9 | 10 | You can use the arrow on the right hand side of the screen to navigate through to the next part of the tutorial. The left hand navigation will show you your progress. 11 | -------------------------------------------------------------------------------- /content/monitoring-kubernetes/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Monitoring Kubernetes" 3 | date = 2019-07-03T22:46:02+01:00 4 | weight = 30 5 | chapter = true 6 | pre = "3. " 7 | +++ 8 | 9 | ### Chapter 3 10 | 11 | # Monitoring Kubernetes 12 | 13 | This chapter covers specific details on monitoring Kubernetes. 14 | -------------------------------------------------------------------------------- /content/monitoring-kubernetes/metrics/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Metrics" 3 | date: 2019-07-03T22:57:19+01:00 4 | weight: 10 5 | draft: false 6 | --- 7 | 8 | Once Prometheus is up and running in a Kubernetes cluster, you can start collecting metrics from the different components of Kubernetes. If you do not yet have Prometheus running in Kubernetes please refer back to the [**Prometheus**](https://observability.thomasriley.co.uk/prometheus/) Chapter first. 9 | 10 | Within the Prometheus ecosystem there is a concept of creating applications that interrogate a service and expose a Prometheus formatted metrics endpoint that can then be scraped by Prometheus. These applications known as [**Prometheus Exporters**](https://prometheus.io/docs/instrumenting/exporters/). 11 | 12 | In this section we will look at some of the Prometheus Exporters that are available for collecting metrics for monitoring Kubernetes: 13 | 14 | * kube-state-metrics 15 | * Node Exporter 16 | * Kubelet & Cadvisor 17 | -------------------------------------------------------------------------------- /content/monitoring-kubernetes/metrics/kube-state-metrics/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Kube State Metrics" 3 | date: 2019-07-04T11:03:44+01:00 4 | weight: 10 5 | draft: false 6 | --- 7 | 8 | ## Overview 9 | 10 | The [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) application extracts metrics from Kubernetes about the state of the different types of objects in Kubernetes such as, Pods, Deployments, StatefulSets, etc. 11 | 12 | The project itself describes itself pretty well: 13 | 14 | ```text 15 | kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. (See examples in the Metrics section below.) It is not focused on the health of the individual Kubernetes components, but rather on the health of the various objects inside, such as deployments, nodes and pods. 16 | 17 | kube-state-metrics is about generating metrics from Kubernetes API objects without modification. This ensures that features provided by kube-state-metrics have the same grade of stability as the Kubernetes API objects themselves. In turn, this means that kube-state-metrics in certain situations may not show the exact same values as kubectl, as kubectl applies certain heuristics to display comprehensible messages. kube-state-metrics exposes raw data unmodified from the Kubernetes API, this way users have all the data they require and perform heuristics as they see fit. 18 | 19 | The metrics are exported on the HTTP endpoint /metrics on the listening port (default 80). They are served as plaintext. They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. You can also open /metrics in a browser to see the raw metrics. 20 | ``` 21 | 22 | ## Deployment 23 | 24 | To use kube-state-metrics we need to deploy a single replica of kube-state-metrics as a Pod in the target Kubernetes cluster. 25 | 26 | Create a file called **kube-state-metrics.yaml** and add the following: 27 | 28 | ```yaml 29 | --- 30 | apiVersion: v1 31 | kind: ServiceAccount 32 | metadata: 33 | labels: 34 | app: kube-state-metrics 35 | name: kube-state-metrics 36 | namespace: prometheus 37 | --- 38 | apiVersion: rbac.authorization.k8s.io/v1 39 | kind: ClusterRole 40 | metadata: 41 | name: kube-state-metrics 42 | rules: 43 | - apiGroups: [""] 44 | resources: 45 | - configmaps 46 | - secrets 47 | - nodes 48 | - pods 49 | - services 50 | - resourcequotas 51 | - replicationcontrollers 52 | - limitranges 53 | - persistentvolumeclaims 54 | - persistentvolumes 55 | - namespaces 56 | - endpoints 57 | verbs: ["list", "watch"] 58 | - apiGroups: ["extensions"] 59 | resources: 60 | - daemonsets 61 | - deployments 62 | - replicasets 63 | - ingresses 64 | verbs: ["list", "watch"] 65 | - apiGroups: ["apps"] 66 | resources: 67 | - daemonsets 68 | - deployments 69 | - replicasets 70 | - statefulsets 71 | verbs: ["list", "watch"] 72 | - apiGroups: ["batch"] 73 | resources: 74 | - cronjobs 75 | - jobs 76 | verbs: ["list", "watch"] 77 | - apiGroups: ["autoscaling"] 78 | resources: 79 | - horizontalpodautoscalers 80 | verbs: ["list", "watch"] 81 | - apiGroups: ["policy"] 82 | resources: 83 | - poddisruptionbudgets 84 | verbs: ["list", "watch"] 85 | - apiGroups: ["certificates.k8s.io"] 86 | resources: 87 | - certificatesigningrequests 88 | verbs: ["list", "watch"] 89 | - apiGroups: ["storage.k8s.io"] 90 | resources: 91 | - storageclasses 92 | verbs: ["list", "watch"] 93 | - apiGroups: ["autoscaling.k8s.io"] 94 | resources: 95 | - verticalpodautoscalers 96 | verbs: ["list", "watch"] 97 | --- 98 | apiVersion: rbac.authorization.k8s.io/v1 99 | kind: ClusterRoleBinding 100 | metadata: 101 | labels: 102 | app: kube-state-metrics 103 | name: kube-state-metrics 104 | roleRef: 105 | apiGroup: rbac.authorization.k8s.io 106 | kind: ClusterRole 107 | name: kube-state-metrics 108 | subjects: 109 | - kind: ServiceAccount 110 | name: kube-state-metrics 111 | namespace: prometheus 112 | --- 113 | apiVersion: extensions/v1beta1 114 | kind: Deployment 115 | metadata: 116 | labels: 117 | app: kube-state-metrics 118 | name: kube-state-metrics 119 | namespace: prometheus 120 | spec: 121 | replicas: 1 122 | selector: 123 | matchLabels: 124 | app: kube-state-metrics 125 | strategy: 126 | rollingUpdate: 127 | maxSurge: 1 128 | maxUnavailable: 0 129 | type: RollingUpdate 130 | template: 131 | metadata: 132 | labels: 133 | app: kube-state-metrics 134 | spec: 135 | containers: 136 | - image: gcr.io/google_containers/kube-state-metrics:v1.6.0 137 | imagePullPolicy: IfNotPresent 138 | livenessProbe: 139 | failureThreshold: 3 140 | httpGet: 141 | path: / 142 | port: 8080 143 | scheme: HTTP 144 | initialDelaySeconds: 30 145 | periodSeconds: 10 146 | successThreshold: 1 147 | timeoutSeconds: 30 148 | name: kube-state-metrics 149 | ports: 150 | - containerPort: 8080 151 | protocol: TCP 152 | readinessProbe: 153 | failureThreshold: 3 154 | httpGet: 155 | path: / 156 | port: 8080 157 | scheme: HTTP 158 | initialDelaySeconds: 30 159 | periodSeconds: 10 160 | successThreshold: 1 161 | timeoutSeconds: 5 162 | resources: 163 | limits: 164 | cpu: 500m 165 | memory: 768Mi 166 | requests: 167 | cpu: 250m 168 | memory: 768Mi 169 | restartPolicy: Always 170 | serviceAccount: kube-state-metrics 171 | serviceAccountName: kube-state-metrics 172 | --- 173 | apiVersion: v1 174 | kind: Service 175 | metadata: 176 | labels: 177 | app: kube-state-metrics 178 | name: kube-state-metrics 179 | namespace: prometheus 180 | spec: 181 | ports: 182 | - name: kube-state-metrics 183 | port: 80 184 | protocol: TCP 185 | targetPort: 8080 186 | selector: 187 | app: kube-state-metrics 188 | type: ClusterIP 189 | --- 190 | apiVersion: monitoring.coreos.com/v1 191 | kind: ServiceMonitor 192 | metadata: 193 | labels: 194 | app: kube-state-metrics 195 | serviceMonitorSelector: prometheus 196 | name: kube-state-metrics 197 | namespace: prometheus 198 | spec: 199 | endpoints: 200 | - honorLabels: true 201 | interval: 30s 202 | path: /metrics 203 | targetPort: 8080 204 | jobLabel: kube-state-metrics 205 | namespaceSelector: 206 | matchNames: 207 | - prometheus 208 | selector: 209 | matchLabels: 210 | app: kube-state-metrics 211 | ``` 212 | 213 | There is a lot going on in the YAML code above. But to summarise, we are creating a Cluster Role called **kube-state-metrics** that includes all the required RBAC permissions for the service to operate successfully. We then bind the Cluster Role to a Service Account that will be used by the Pod that is created. Speaking of that Pod, we create a Deployment object to actually deploy kube-state-metrics to the **prometheus** namespace, configuring it to use the Service Account we created. Lastly we create a ClusterIP Service and a ServiceMonitor within the **prometheus** namespace so that Prometheus can scrape the metrics that are exposed by kube-state-metrics. 214 | 215 | Go ahead and install kube-state-metrics into your Kubernetes cluster by executing `kubectl apply -f kube-state-metrics.yaml`. 216 | 217 | You can then use `kubectl get pods --namespace prometheus` to see the **kube-state-metrics** Pod being created by Kubernetes. After a brief moment you can then check the configured Targets in Prometheus and you will see that **kube-state-metrics** is now being successfully scraped. 218 | 219 | ## Useful Metrics 220 | 221 | The [documenation for kube-state-metrics](https://github.com/kubernetes/kube-state-metrics/tree/master/docs) provides a wealth of useful information on the metrics that are exposed by the service. In a later section, we will look at using these metrics to build powerful dashboards for visualising the health of a Kubernetes cluster. 222 | 223 | However before we move on lets take a look at how we can make use of kube-state-metrics. 224 | 225 | Lets say we wanted to see all the **StatefulSets** deployed in the Kubernetes cluster we can use the `kube_statefulset_created` metric. If we query Prometheus for this metrics, it will return a metric for each StatefulSet in the cluster and include Prometheus Labels that provide us with metadata about the StatefulSet. 226 | 227 | ![Kube StatefulSet Created](/monitoring-kubernetes/metrics/kube-state-metrics/images/kube-statefulset-created.png?classes=shadow&width=55pc) 228 | 229 | Above we can see that a single metric was returned, which is as follows: 230 | 231 | **kube_statefulset_created{endpoint="8080",instance="10.8.4.8:8080",job="kube-state-metrics",namespace="prometheus",pod="kube-state-metrics-6f75b8b674-tspg8",service="kube-state-metrics",statefulset="prometheus-prometheus"} 1562167613** 232 | 233 | The metric shows that the StatefulSet named **prometheus-prometheus** in the namespace **prometheus** was created on 3rd July 2019 at 15:26 UTC. The time comes from the value **1562167613** which is a UNIX timestamp. 234 | -------------------------------------------------------------------------------- /content/monitoring-kubernetes/metrics/kube-state-metrics/images/kube-statefulset-created.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/monitoring-kubernetes/metrics/kube-state-metrics/images/kube-statefulset-created.png -------------------------------------------------------------------------------- /content/monitoring-kubernetes/metrics/kubelet-cadvisor/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Kubelet & cAdvisor" 3 | date: 2019-07-04T17:54:52+01:00 4 | weight: 30 5 | draft: false 6 | --- 7 | 8 | ## Overview 9 | 10 | Kubelet is a service that runs on each worker node in a Kubernetes cluster and is resposible for managing the Pods and containers on a machine. cAdvisor is a container resource usage and performance analysis tool, open sourced by Google. For monitoring Kubernetes with Prometheus we care about Kubelet and cAdvisor becuase we can scrape metrics from these services regarding container resource usage. 11 | 12 | ## Deployment 13 | 14 | We do not need to deploy a Prometheus Exporter to scrape metrics from Kublet and cAdvisor as they expose metrics endpoints out the box, therefore we only need to configure Prometheus to start scraping metrics using a ServiceMonitor. 15 | 16 | Create a file called **kubelet.yaml** and add the following: 17 | 18 | ```yaml 19 | apiVersion: monitoring.coreos.com/v1 20 | kind: ServiceMonitor 21 | metadata: 22 | labels: 23 | app: kubelet 24 | serviceMonitorSelector: prometheus 25 | name: kubelet 26 | namespace: prometheus 27 | spec: 28 | endpoints: 29 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 30 | honorLabels: true 31 | interval: 30s 32 | port: http-metrics 33 | scheme: http 34 | tlsConfig: 35 | insecureSkipVerify: true 36 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 37 | honorLabels: true 38 | interval: 30s 39 | path: /metrics/cadvisor 40 | port: http-metrics 41 | scheme: http 42 | tlsConfig: 43 | insecureSkipVerify: true 44 | jobLabel: kubelet 45 | namespaceSelector: 46 | matchNames: 47 | - kube-system 48 | selector: 49 | matchLabels: 50 | k8s-app: kubelet 51 | ``` 52 | 53 | The ServiceMonitor defined in the YAML above will scrape the **/metrics** and **/metrics/cadvisor** endpoints on Kubelet via the **kubelet** Service in the **kube-system** namespace. It is worth noting, the configuration of Kubelet may differ depending your Kubernetes environment. The example above is tested to work on GCP Kubernetes Engine. 54 | 55 | Go ahead and install the ServiceMonitor into your Kubernetes cluster by executing `kubectl apply -f kubelet.yaml`. 56 | 57 | After a few moments, you will see two Targets being scraped by Prometheus. 58 | 59 | ![Kube StatefulSet Created](/monitoring-kubernetes/metrics/kubelet-cadvisor/images/kubelet.png?classes=shadow&width=55pc) 60 | 61 | ## Useful Metrics 62 | 63 | There are many useful metrics exposed by Kubelet for container resource usage. 64 | 65 | Lets start by looking at CPU usage metrics: 66 | 67 | * `container_cpu_system_seconds_total` - Usage of system CPU time 68 | * `container_cpu_user_seconds_total` - Usage of user CPU time 69 | * `container_cpu_usage_seconds_total` - Total CPU usage time (system + user) 70 | 71 | There are a number of metrics available for memory but to best track the actual memory usage of a Pod use the `container_memory_working_set_bytes` metric. 72 | 73 | These metrics include the labels **pod_name** and **namespace** to help you identify the resource usage of specifics Pods per Kubernetes Namespace. 74 | -------------------------------------------------------------------------------- /content/monitoring-kubernetes/metrics/kubelet-cadvisor/images/kubelet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/monitoring-kubernetes/metrics/kubelet-cadvisor/images/kubelet.png -------------------------------------------------------------------------------- /content/monitoring-kubernetes/metrics/node-exporter/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Node Exporter" 3 | date: 2019-07-04T16:21:58+01:00 4 | weight: 20 5 | draft: false 6 | --- 7 | 8 | ## Overview 9 | 10 | The Node Exporter is a Prometheus Exporter developed by the Prometheus project. It is not specific to Kubernetes and is designed to expose hardware and OS metrics from *NIX based Kernels. The project can be found [here](https://github.com/prometheus/node_exporter) on GitHub. 11 | 12 | We will look at using the Node Exporter to expose metrics for each node running in a Kubernetes cluster. 13 | 14 | ## Deployment 15 | 16 | The Node Exporter needs to run on each node in the Kubernetes cluster therefore we will use a DaemonSet to acheive this. 17 | 18 | Create a file called **node-exporter.yaml** and add the following: 19 | 20 | ```yaml 21 | --- 22 | apiVersion: extensions/v1beta1 23 | kind: DaemonSet 24 | metadata: 25 | labels: 26 | app: node-exporter 27 | name: node-exporter 28 | namespace: prometheus 29 | spec: 30 | selector: 31 | matchLabels: 32 | app: node-exporter 33 | template: 34 | metadata: 35 | annotations: 36 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 37 | labels: 38 | app: node-exporter 39 | spec: 40 | containers: 41 | - args: 42 | - --web.listen-address=0.0.0.0:9100 43 | - --path.procfs=/host/proc 44 | - --path.sysfs=/host/sys 45 | image: quay.io/prometheus/node-exporter:v0.18.1 46 | imagePullPolicy: IfNotPresent 47 | name: node-exporter 48 | ports: 49 | - containerPort: 9100 50 | hostPort: 9100 51 | name: metrics 52 | protocol: TCP 53 | resources: 54 | limits: 55 | cpu: 200m 56 | memory: 50Mi 57 | requests: 58 | cpu: 100m 59 | memory: 30Mi 60 | volumeMounts: 61 | - mountPath: /host/proc 62 | name: proc 63 | readOnly: true 64 | - mountPath: /host/sys 65 | name: sys 66 | readOnly: true 67 | hostNetwork: true 68 | hostPID: true 69 | restartPolicy: Always 70 | tolerations: 71 | - effect: NoSchedule 72 | operator: Exists 73 | - effect: NoExecute 74 | operator: Exists 75 | volumes: 76 | - hostPath: 77 | path: /proc 78 | type: "" 79 | name: proc 80 | - hostPath: 81 | path: /sys 82 | type: "" 83 | name: sys 84 | --- 85 | apiVersion: v1 86 | kind: Service 87 | metadata: 88 | labels: 89 | app: node-exporter 90 | name: node-exporter 91 | namespace: prometheus 92 | spec: 93 | ports: 94 | - name: node-exporter 95 | port: 9100 96 | protocol: TCP 97 | targetPort: 9100 98 | selector: 99 | app: node-exporter 100 | sessionAffinity: None 101 | type: ClusterIP 102 | --- 103 | apiVersion: monitoring.coreos.com/v1 104 | kind: ServiceMonitor 105 | metadata: 106 | labels: 107 | app: node-exporter 108 | serviceMonitorSelector: prometheus 109 | name: node-exporter 110 | namespace: prometheus 111 | spec: 112 | endpoints: 113 | - honorLabels: true 114 | interval: 30s 115 | path: /metrics 116 | targetPort: 9100 117 | jobLabel: node-exporter 118 | namespaceSelector: 119 | matchNames: 120 | - prometheus 121 | selector: 122 | matchLabels: 123 | app: node-exporter 124 | ``` 125 | 126 | The above YAML will create a DaemonSet that launches the Node Exporter on each node in the Kubernetes cluster. It includes a Kubernetes Service and ServiceMonitor to scrape metrics from all instances of Node Exporter. 127 | 128 | Go ahead and install Node Exporter into your Kubernetes cluster by executing `kubectl apply -f node-exporter.yaml`. 129 | 130 | You can then use `kubectl get pods --namespace prometheus` to see the **node-exporter** Pod(s) being created by Kubernetes. After a brief moment you can then check the configured Targets in Prometheus and you will see that **node-exporter** is now being successfully scraped. 131 | 132 | ## Useful Metrics 133 | 134 | Node Exporter has numerous collectors designed to gather OS and hardware metrics from various sources on a node. If you check the log output from a Node Exporter Pod using `kubectl logs` you can see the collectors that are active: 135 | 136 | ```shell 137 | $kubectl logs node-exporter-c8cwp --namespace prometheus 138 | time="2019-07-04T15:47:47Z" level=info msg="Enabled collectors:" source="node_exporter.go:97" 139 | time="2019-07-04T15:47:47Z" level=info msg=" - arp" source="node_exporter.go:104" 140 | time="2019-07-04T15:47:47Z" level=info msg=" - bcache" source="node_exporter.go:104" 141 | time="2019-07-04T15:47:47Z" level=info msg=" - bonding" source="node_exporter.go:104" 142 | time="2019-07-04T15:47:47Z" level=info msg=" - conntrack" source="node_exporter.go:104" 143 | time="2019-07-04T15:47:47Z" level=info msg=" - cpu" source="node_exporter.go:104" 144 | time="2019-07-04T15:47:47Z" level=info msg=" - cpufreq" source="node_exporter.go:104" 145 | time="2019-07-04T15:47:47Z" level=info msg=" - diskstats" source="node_exporter.go:104" 146 | time="2019-07-04T15:47:47Z" level=info msg=" - edac" source="node_exporter.go:104" 147 | time="2019-07-04T15:47:47Z" level=info msg=" - entropy" source="node_exporter.go:104" 148 | time="2019-07-04T15:47:47Z" level=info msg=" - filefd" source="node_exporter.go:104" 149 | time="2019-07-04T15:47:47Z" level=info msg=" - filesystem" source="node_exporter.go:104" 150 | time="2019-07-04T15:47:47Z" level=info msg=" - hwmon" source="node_exporter.go:104" 151 | time="2019-07-04T15:47:47Z" level=info msg=" - infiniband" source="node_exporter.go:104" 152 | time="2019-07-04T15:47:47Z" level=info msg=" - ipvs" source="node_exporter.go:104" 153 | time="2019-07-04T15:47:47Z" level=info msg=" - loadavg" source="node_exporter.go:104" 154 | time="2019-07-04T15:47:47Z" level=info msg=" - mdadm" source="node_exporter.go:104" 155 | time="2019-07-04T15:47:47Z" level=info msg=" - meminfo" source="node_exporter.go:104" 156 | time="2019-07-04T15:47:47Z" level=info msg=" - netclass" source="node_exporter.go:104" 157 | time="2019-07-04T15:47:47Z" level=info msg=" - netdev" source="node_exporter.go:104" 158 | time="2019-07-04T15:47:47Z" level=info msg=" - netstat" source="node_exporter.go:104" 159 | time="2019-07-04T15:47:47Z" level=info msg=" - nfs" source="node_exporter.go:104" 160 | time="2019-07-04T15:47:47Z" level=info msg=" - nfsd" source="node_exporter.go:104" 161 | time="2019-07-04T15:47:47Z" level=info msg=" - pressure" source="node_exporter.go:104" 162 | time="2019-07-04T15:47:47Z" level=info msg=" - sockstat" source="node_exporter.go:104" 163 | time="2019-07-04T15:47:47Z" level=info msg=" - stat" source="node_exporter.go:104" 164 | time="2019-07-04T15:47:47Z" level=info msg=" - textfile" source="node_exporter.go:104" 165 | time="2019-07-04T15:47:47Z" level=info msg=" - time" source="node_exporter.go:104" 166 | time="2019-07-04T15:47:47Z" level=info msg=" - timex" source="node_exporter.go:104" 167 | time="2019-07-04T15:47:47Z" level=info msg=" - uname" source="node_exporter.go:104" 168 | time="2019-07-04T15:47:47Z" level=info msg=" - vmstat" source="node_exporter.go:104" 169 | time="2019-07-04T15:47:47Z" level=info msg=" - xfs" source="node_exporter.go:104" 170 | time="2019-07-04T15:47:47Z" level=info msg=" - zfs" source="node_exporter.go:104" 171 | ``` 172 | 173 | If you refer back to the Node Exporter [documenation](https://github.com/prometheus/node_exporter#collectors) you can see the method that each of these collectors uses to acquire metrics. For example, the **arp** collector exposes the metrics available in **/proc/net/arp** on Linux. 174 | 175 | In Prometheus, you will see that the majority of metrics exposed by the Node Exporter are prefixed with **node__**. For example, the arp collector described above exposes a metric called **node_arp_entries** that contains the number of ARP entries in the ARP table for each network interface on a node. 176 | -------------------------------------------------------------------------------- /content/prometheus/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Prometheus" 3 | date = 2019-07-02T11:59:25+01:00 4 | weight = 20 5 | chapter = true 6 | pre = "2. " 7 | +++ 8 | 9 | ### Chapter 2 10 | 11 | # Metrics Monitoring with Prometheus 12 | 13 | These workshops cover the use of Prometheus as a metrics oriented monitoring platform for Kubernetes. -------------------------------------------------------------------------------- /content/prometheus/configuring-prometheus/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Configuring Prometheus" 3 | date: 2019-07-03T12:58:43+01:00 4 | weight: 30 5 | draft: false 6 | --- 7 | 8 | Now that you have deployed an instance of Prometheus, lets look at how to configure it to monitor a service. 9 | 10 | In Prometheus if you select **Status > Configuration** (or click directly [here](http://localhost:9090/config)) you will see that out of the box it only has the configuration below: 11 | 12 | ``` 13 | global: 14 | scrape_interval: 1m 15 | scrape_timeout: 10s 16 | evaluation_interval: 1m 17 | ``` 18 | 19 | The Prometheus Operator does a whole lot more than simply just deploy Prometheus. It is also a very powerful tool for automating the configuration of Prometheus within Kubernetes. In the next section we will look at how we can use it to configure Prometheus. 20 | -------------------------------------------------------------------------------- /content/prometheus/configuring-prometheus/using-service-monitors/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using Service Monitors" 3 | date: 2019-07-03T13:45:50+01:00 4 | weight: 10 5 | draft: false 6 | --- 7 | 8 | Prometheus uses a pull based model for collecting metrics from applications and services. This means the applications and services must expose a HTTP(S) endpoint containing Prometheus formatted metrics. Prometheus will then, as per its configuration, periodically scrape metrics from these HTTP(S) endpoints. 9 | 10 | The Prometheus Operator includes a Custom Resource Definition that allows the definition of the ServiceMonitor. The ServiceMonitor is used to define an application you wish to scrape metrics from within Kubernetes, the controller will action the ServiceMonitors we define and automatically build the required Prometheus configuration. 11 | 12 | Within the ServiceMonitor we specify the Kubernetes Labels that the Operator can use to identify the Kubernetes Service which in turn then identifies the Pods, that we wish to monitor. Lets look at how we can use Prometheus to scrape metrics from its own inbuilt metrics endpoint. 13 | 14 | Using **kubectl describe**, we can view the Labels on the **prometheus-operated** service that the Prometheus Operator previously created. If you wish to see this execute `kubectl describe service prometheus-operated --namespace prometheus` in your terminal or see the example below: 15 | 16 | ```shell 17 | $kubectl describe service prometheus-operated --namespace prometheus 18 | Name: prometheus-operated 19 | Namespace: prometheus 20 | Labels: operated-prometheus=true 21 | Annotations: 22 | Selector: app=prometheus 23 | Type: ClusterIP 24 | IP: None 25 | Port: web 9090/TCP 26 | TargetPort: web/TCP 27 | Endpoints: 10.8.3.7:9090 28 | Session Affinity: None 29 | Events: 30 | ``` 31 | Now we know this Kubernetes Service has the Label **operated-prometheus=true** we can create a ServiceMonitor to target this Service. Create a file called **servicemonitor.yaml** and include the following: 32 | 33 | ```yaml 34 | apiVersion: monitoring.coreos.com/v1 35 | kind: ServiceMonitor 36 | metadata: 37 | labels: 38 | serviceMonitorSelector: prometheus 39 | name: prometheus 40 | namespace: prometheus 41 | spec: 42 | endpoints: 43 | - interval: 30s 44 | targetPort: 9090 45 | path: /metrics 46 | namespaceSelector: 47 | matchNames: 48 | - prometheus 49 | selector: 50 | matchLabels: 51 | operated-prometheus: "true" 52 | ``` 53 | 54 | This Kubernetes Resource uses the **monitoring.coreos.com/v1** API Version that was installed into Kubernetes by the Prometheus Operator, as explained previously. It uses the **namespaceSelector** to specify the Kubernetes Namespace in which we wish to locate the Service, in this example above we are selecting within the **prometheus** namespace. It then uses the **selector** to specify that it must match the Label **operated-prometheus** being set as **"true"**. 55 | 56 | Under the **endpoints** key we must specify one or more scrape targets for the target service. In this example it will scrape each Pod it selects on TCP port **9090** on the URL **/metrics** every **30 seconds**. 57 | 58 | Now apply this YAML to the cluster by executing `kubectl apply -f servicemonitor.yaml`. You can then validate this has been created by execute `kubectl get servicemonitor --namespace prometheus`: 59 | 60 | ```shell 61 | $kubectl get servicemonitor 62 | NAME AGE 63 | prometheus 1m 64 | ``` 65 | 66 | Before Prometheus Operator will automatically update the running Prometheus instance configuration to set it to scrape metrics from itself, there is one more thing we must do. On the **ServiceMonitor** we defined a label on the resource called **serviceMonitorSelector**, as shown below: 67 | 68 | ```yaml 69 | metadata: 70 | labels: 71 | serviceMonitorSelector: prometheus 72 | ``` 73 | 74 | You now need to update the Prometheus Resource configuration to instruct the Prometheus Operator to configure the Prometheus instance using all **ServiceMonitors** that have the **serviceMonitorSelector** Label set as **prometheus**. 75 | 76 | Update the previous YAML file you created called **prometheus.yaml** and add the **serviceMonitorSelector** key to the Prometheus resource: 77 | 78 | ```yaml 79 | serviceMonitorSelector: 80 | matchLabels: 81 | serviceMonitorSelector: prometheus 82 | ``` 83 | 84 | The updated Prometheus resource should look similar to the example below: 85 | 86 | ```yaml 87 | apiVersion: monitoring.coreos.com/v1 88 | kind: Prometheus 89 | metadata: 90 | name: prometheus 91 | namespace: prometheus 92 | spec: 93 | baseImage: quay.io/prometheus/prometheus 94 | logLevel: info 95 | podMetadata: 96 | annotations: 97 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 98 | labels: 99 | app: prometheus 100 | replicas: 1 101 | resources: 102 | limits: 103 | cpu: 1 104 | memory: 2Gi 105 | requests: 106 | cpu: 1 107 | memory: 2Gi 108 | retention: 12h 109 | serviceAccountName: prometheus-service-account 110 | serviceMonitorSelector: 111 | matchLabels: 112 | serviceMonitorSelector: prometheus 113 | storage: 114 | volumeClaimTemplate: 115 | apiVersion: v1 116 | kind: PersistentVolumeClaim 117 | metadata: 118 | name: prometheus-pvc 119 | spec: 120 | accessModes: 121 | - ReadWriteOnce 122 | resources: 123 | requests: 124 | storage: 10Gi 125 | version: v2.10.0 126 | ``` 127 | 128 | Now apply this change to the Kubernetes cluster by running `kubectl apply -f prometheus.yaml`. 129 | 130 | After a few moment the Prometheus Operator will automatically update the Prometheus instance you created with the Target configuration to scrape the Prometheus metrics endpoint on the Pod. After a minute or two, check the [Prometheus Configuration](http://localhost:9090/config) again, you will see the scrape config appear under the **scrape_configs** key. 131 | 132 | In the Prometheus UI if you select **Status > Targets** (or go [here](http://localhost:9090/targets)) you will see details of the target Prometheus has identified, which is the single instance of Prometheus you launched: 133 | 134 | ![Prometheus Targets](/prometheus/configuring-prometheus/using-service-monitors/images/targets.png?classes=shadow&width=55pc) 135 | 136 | If you now select **Graph** at the top, the **Expression** search box will now auto-complete when you start typing. Go ahead and type 'prometheus' and you will see some metric names appear. If you select one and click **Execute** it will query for that metric. Here is an example for **prometheus_build_info**: 137 | 138 | ![Prometheus Graph](/prometheus/configuring-prometheus/using-service-monitors/images/graph.png?classes=shadow&width=55pc) 139 | 140 | You have now successfully configured Prometheus using the ServiceMonitor. Going forward when adding more services to Kubernetes that require Prometheus monitoring, the ServiceMonitor can be used to configure Prometheus as has been demonstrated. 141 | -------------------------------------------------------------------------------- /content/prometheus/configuring-prometheus/using-service-monitors/images/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/configuring-prometheus/using-service-monitors/images/graph.png -------------------------------------------------------------------------------- /content/prometheus/configuring-prometheus/using-service-monitors/images/targets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/configuring-prometheus/using-service-monitors/images/targets.png -------------------------------------------------------------------------------- /content/prometheus/deploying-prometheus/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Deploying Prometheus" 3 | date: 2019-07-02T22:09:56+01:00 4 | weight: 20 5 | draft: false 6 | --- 7 | 8 | There are a number of ways you can deploy Prometheus to Kubernetes: 9 | 10 | * [Prometheus Operator](https://github.com/coreos/prometheus-operator.git) 11 | * [kube-prometheus](https://github.com/coreos/kube-prometheus) 12 | * [Community Helm Chart](https://github.com/prometheus-community/helm-charts) 13 | 14 | ### Three Options 15 | 16 | Lets look at these three options available for deploying Prometheus to Kubernetes. 17 | 18 | #### Prometheus Operator 19 | 20 | This is a Kubernetes Operator that provides several Custom Resource Definitions (CRDs) that will allow us to define and configure instances of Prometheus via Kubernetes resources. The Operator contains all the logic for managing the deployment and automated configuration of Prometheus based on the YAML configuration the user deployments to Kubernetes. 21 | 22 | #### kube-prometheus 23 | 24 | This project acts as a jssonet library for deploying Prometheus Operator and an entire Prometheus monitoring stack. 25 | 26 | #### Community Helm Chart 27 | 28 | This is similar to the kube-prometheus project however the deployment is done via Helm. This is a community driven chart in the stable Helm chart repository. 29 | 30 | ### Next 31 | 32 | In the subsequent workshops we will deploy the Prometheus Operator using the community Helm chart however we will disable the default bundled Prometheus instance configuration that is provided so that we can go through the process of using the Prometheus Operator step by step. 33 | -------------------------------------------------------------------------------- /content/prometheus/deploying-prometheus/access-prometheus/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Access Prometheus" 3 | date: 2019-07-03T12:29:51+01:00 4 | weight: 30 5 | draft: false 6 | --- 7 | 8 | Now that you have deployed an instance of Prometheus, lets actually look at using it! 9 | 10 | Typically you might use an [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/), such as the [Nginx-Ingress](https://github.com/kubernetes/ingress-nginx), for exposing services such as the Prometheus UI to your users outside of a Kubernetes cluster. However, as this guide is not going into the specific details of building a production-ready Kubernetes environment, we will simply use Kubectl port forwarding to access the Prometheus service. 11 | 12 | Lets port forward our local environment to the Prometheus instance running in Kubernetes. To do this execute `kubectl port-forward service/prometheus-operated 9090:9090 --namespace prometheus` in your terminal. The **service** called **prometheus-operated** is created by the Operator for accessing the Prometheus instance you created. 13 | 14 | If you wish to see Kubernetes Services in the **prometheus** namespace, then execute `kubectl get services --namespace prometheus` in your terminal. 15 | 16 | You will now be able to access Prometheus in your web browser at [http://localhost:9090](http://localhost:9090). 17 | 18 | ![Prometheus](/prometheus/deploying-prometheus/access-prometheus/images/prometheus.png?classes=shadow&width=55pc) -------------------------------------------------------------------------------- /content/prometheus/deploying-prometheus/access-prometheus/images/prometheus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/deploying-prometheus/access-prometheus/images/prometheus.png -------------------------------------------------------------------------------- /content/prometheus/deploying-prometheus/deploying-prometheus-operator/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Deploying Prometheus Operator with Helm" 3 | date: 2019-07-02T12:08:47+01:00 4 | weight: 10 5 | draft: false 6 | --- 7 | 8 | First we will use the community maintained [Helm chart](https://github.com/helm/charts/tree/master/stable/prometheus-operator) for deploying Prometheus Operator to Kubernetes. By default, the Helm chart will also deploy and configure an instance of Prometheus however to begin with lets deploy a standalone instance of the Operator. 9 | 10 | Lets modify the default behavior of the Helm chart. Create a file called **values.yaml** containing the following: 11 | 12 | ```yaml 13 | defaultRules: 14 | create: false 15 | alertmanager: 16 | enabled: false 17 | grafana: 18 | enabled: false 19 | kubeApiServer: 20 | enabled: false 21 | kubelet: 22 | enabled: false 23 | kubeControllerManager: 24 | enabled: false 25 | coreDns: 26 | enabled: false 27 | kubeEtcd: 28 | enabled: false 29 | kubeScheduler: 30 | enabled: false 31 | kubeStateMetrics: 32 | enabled: false 33 | nodeExporter: 34 | enabled: false 35 | prometheus: 36 | enabled: false 37 | ``` 38 | 39 | Then install the Prometheus Operator via Helm using the **helm upgrade** command as shown below: 40 | 41 | ```shell 42 | helm upgrade --install prometheus-operator stable/prometheus-operator --namespace prometheus --values values.yaml 43 | ``` 44 | 45 | When this executes, Helm will display all of the resources it has successfully created in Kubernetes: 46 | 47 | ```shell 48 | $ helm upgrade --install prometheus-operator stable/prometheus-operator --namespace prometheus --values values.yaml 49 | 50 | Release "prometheus-operator" does not exist. Installing it now. 51 | NAME: prometheus-operator 52 | LAST DEPLOYED: Tue Jun 25 22:06:52 2019 53 | NAMESPACE: prometheus 54 | STATUS: DEPLOYED 55 | 56 | RESOURCES: 57 | ==> v1/ClusterRole 58 | NAME AGE 59 | prometheus-operator-operator 1s 60 | prometheus-operator-operator-psp 1s 61 | 62 | ==> v1/ClusterRoleBinding 63 | NAME AGE 64 | prometheus-operator-operator 1s 65 | prometheus-operator-operator-psp 1s 66 | 67 | ==> v1/Deployment 68 | NAME READY UP-TO-DATE AVAILABLE AGE 69 | prometheus-operator-operator 0/1 1 0 1s 70 | 71 | ==> v1/Pod(related) 72 | NAME READY STATUS RESTARTS AGE 73 | prometheus-operator-operator-694f88774b-q4r64 0/1 ContainerCreating 0 1s 74 | 75 | ==> v1/Service 76 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 77 | prometheus-operator-operator ClusterIP 10.11.250.245 8080/TCP 1s 78 | 79 | ==> v1/ServiceAccount 80 | NAME SECRETS AGE 81 | prometheus-operator-operator 1 1s 82 | 83 | ==> v1/ServiceMonitor 84 | NAME AGE 85 | prometheus-operator-operator 1s 86 | 87 | ==> v1beta1/PodSecurityPolicy 88 | NAME PRIV CAPS SELINUX RUNASUSER FSGROUP SUPGROUP READONLYROOTFS VOLUMES 89 | prometheus-operator-operator false RunAsAny RunAsAny MustRunAs MustRunAs false configMap,emptyDir,projected,secret,downwardAPI,persistentVolumeClaim 90 | 91 | 92 | NOTES: 93 | The Prometheus Operator has been installed. Check its status by running: 94 | kubectl --namespace prometheus get pods -l "release=prometheus-operator" 95 | 96 | Visit https://github.com/coreos/prometheus-operator for instructions on how 97 | to create & configure Alertmanager and Prometheus instances using the Operator. 98 | 99 | ``` 100 | 101 | Above you can see that Helm has deployed the **stable/prometheus-operator** Helm chart under the release name **prometheus-operator** into the Kubernetes namespace **prometheus** using the Helm values we created above in values.yaml. 102 | 103 | If you then use Kubectl to list the Pods in the **prometheus** namespace you will see the Prometheus Operator is now installed: 104 | 105 | ```shell 106 | $ kubectl get pods -n prometheus 107 | NAME READY STATUS RESTARTS AGE 108 | prometheus-operator-operator-694f88774b-q4r64 1/1 Running 0 6m47s 109 | ``` 110 | -------------------------------------------------------------------------------- /content/prometheus/deploying-prometheus/launch-prometheus-instance/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Launch Prometheus Instance" 3 | date: 2019-07-03T10:35:40+01:00 4 | weight: 20 5 | draft: false 6 | --- 7 | 8 | ### Launching Prometheus 9 | 10 | Now that we have deployed Prometheus Operator we can use it to launch an instance of Prometheus. 11 | 12 | When we deployed the Operator, one of the tasks it performed when it first launched was to is install a number Custom Resource Definitions (CRDs) into Kubernetes. Out of the box, Kubernetes ships with many powerful Controllers such as the Deployment or Statefulset. CRDs provide a method of building completely bespoke Controllers that provide logic to a specific function. In this case, the CRDs installed by Prometheus Operator provide a means for launching and configuring Prometheus within Kubernetes. 13 | 14 | If you run the `kubectl get customresourcedefinitions` command in your terminal you will see four CRDs provided by the Operator: 15 | 16 | ```shell 17 | $kubectl get customresourcedefinitions 18 | NAME CREATED AT 19 | alertmanagers.monitoring.coreos.com 2019-07-02T13:13:21Z 20 | prometheuses.monitoring.coreos.com 2019-07-02T13:13:21Z 21 | prometheusrules.monitoring.coreos.com 2019-07-02T13:13:21Z 22 | servicemonitors.monitoring.coreos.com 2019-07-02T13:13:21Z 23 | ``` 24 | 25 | To begin with we will be making use of the **prometheuses.monitoring.coreos.com** Custom Resource Definition. 26 | 27 | Create a file called **prometheus.yaml** and add the following: 28 | 29 | ```yaml 30 | apiVersion: monitoring.coreos.com/v1 31 | kind: Prometheus 32 | metadata: 33 | name: prometheus 34 | namespace: prometheus 35 | spec: 36 | baseImage: quay.io/prometheus/prometheus 37 | logLevel: info 38 | podMetadata: 39 | annotations: 40 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 41 | labels: 42 | app: prometheus 43 | replicas: 1 44 | resources: 45 | limits: 46 | cpu: 1 47 | memory: 2Gi 48 | requests: 49 | cpu: 1 50 | memory: 2Gi 51 | retention: 12h 52 | serviceAccountName: prometheus-service-account 53 | storage: 54 | volumeClaimTemplate: 55 | apiVersion: v1 56 | kind: PersistentVolumeClaim 57 | metadata: 58 | name: prometheus-pvc 59 | spec: 60 | accessModes: 61 | - ReadWriteOnce 62 | resources: 63 | requests: 64 | storage: 10Gi 65 | version: v2.10.0 66 | --- 67 | apiVersion: v1 68 | kind: ServiceAccount 69 | metadata: 70 | name: "prometheus-service-account" 71 | namespace: "prometheus" 72 | --- 73 | apiVersion: rbac.authorization.k8s.io/v1 74 | kind: ClusterRole 75 | metadata: 76 | name: "prometheus-cluster-role" 77 | rules: 78 | - apiGroups: 79 | - "" 80 | resources: 81 | - nodes 82 | - services 83 | - endpoints 84 | - pods 85 | verbs: 86 | - get 87 | - list 88 | - watch 89 | - apiGroups: 90 | - "" 91 | resources: 92 | - nodes/metrics 93 | verbs: 94 | - get 95 | - nonResourceURLs: 96 | - "/metrics" 97 | verbs: 98 | - get 99 | --- 100 | apiVersion: rbac.authorization.k8s.io/v1 101 | kind: ClusterRoleBinding 102 | metadata: 103 | name: "prometheus-cluster-role-binding" 104 | roleRef: 105 | apiGroup: rbac.authorization.k8s.io 106 | kind: ClusterRole 107 | name: "prometheus-cluster-role" 108 | subjects: 109 | - kind: ServiceAccount 110 | name: "prometheus-service-account" 111 | namespace: prometheus 112 | ``` 113 | 114 | Now apply this YAML to your Kubernetes cluster using `kubectl apply -f prometheus.yaml`. Kubectl will show that it has successfully created the configuration, as shown below: 115 | 116 | ```shell 117 | $kubectl apply -f prometheus.yaml 118 | prometheus.monitoring.coreos.com/prometheus created 119 | serviceaccount/prometheus-service-account created 120 | clusterrole.rbac.authorization.k8s.io/prometheus-cluster-role created 121 | clusterrolebinding.rbac.authorization.k8s.io/prometheus-cluster-role-binding created 122 | ``` 123 | 124 | Success! If you now list Pods in the **prometheus** namespace using `kubectl get pods --namespace prometheus` you will see an instance of Prometheus running alongside the Prometheus Operator: 125 | 126 | ```shell 127 | $kubectl get pods 128 | NAME READY STATUS RESTARTS AGE 129 | prometheus-operator-operator-86bc4d5568-shs69 1/1 Running 0 10m 130 | prometheus-prometheus-0 3/3 Running 0 1m 131 | ``` 132 | 133 | If you now check for the custom Prometheus resource that you have just installed into the cluster using `kubectl get prometheus --namespace prometheus` you will see the single result named **prometheus**: 134 | 135 | ```shell 136 | $kubectl get prometheus --namespace prometheus 137 | NAME AGE 138 | prometheus 6m 139 | ``` 140 | 141 | The Prometheus Operator acts as a Controller for the Custom Resources. When you deployed the **Prometheus** resource the Operator created the Prometheus instance, that you just identified when getting a list Pods in the **prometheus** namespace. 142 | 143 | ### What does this mean? 144 | 145 | Lets now take a look at the **prometheus.yaml** file we applied to Kubernetes and see what each section means. 146 | 147 | ```yaml 148 | apiVersion: monitoring.coreos.com/v1 149 | kind: Prometheus 150 | metadata: 151 | name: prometheus 152 | namespace: prometheus 153 | ``` 154 | 155 | Here we define that we wish to create an object called **prometheus** that is of the type **Prometheus** as defined by the Kind and that this Kind is part of the API Version **monitoring.coreos.com/v1**, that was previously installed into Kubernetes by the Prometheus Operator as a Custom Resource Definition. This object will be created in the **prometheus** namespace. 156 | 157 | Everything then under the **spec** of the YAML file defines what the instance of Prometheus should look like. 158 | 159 | ```yaml 160 | replicas: 1 161 | resources: 162 | limits: 163 | cpu: 1 164 | memory: 2Gi 165 | requests: 166 | cpu: 1 167 | memory: 2Gi 168 | ``` 169 | 170 | Here we define the Resource limits (CPU & Memory) that each Prometheus Pod will be granted within Kubernetes. We also specify the number of instances that we require by setting **replicas**, in this example we have just the 1 instance. 171 | 172 | ```yaml 173 | baseImage: quay.io/prometheus/prometheus 174 | version: v2.10.0 175 | ``` 176 | 177 | Setting the **baseImage** defines the actual Prometheus Docker image to be used. This will actually be defaulted to the Docker image that is released by the Prometheus project however we included it as an example. The **version** field sets the version of Prometheus you wish to use. You can see available versions on the [GitHub project](https://github.com/prometheus/prometheus/releases). 178 | 179 | ```yaml 180 | storage: 181 | volumeClaimTemplate: 182 | apiVersion: v1 183 | kind: PersistentVolumeClaim 184 | metadata: 185 | name: prometheus-pvc 186 | spec: 187 | accessModes: 188 | - ReadWriteOnce 189 | resources: 190 | requests: 191 | storage: 10Gi 192 | ``` 193 | 194 | This block defines the storage that will be used by Prometheus. By default the Operator will create Prometheus Pods that use local storage only by using an [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir). If you wish to retain the state of Prometheus and therefore the metrics that it stores when re-launching Prometheus, such as during a version upgrade, then you need to use persistent storage. The **PersistentVolumeClaim (PVC)** defines the specification of the storage to be used by Prometheus. In this example we are creating a persistent disk that is 10Gi for each instance of Prometheus that is created. 195 | 196 | In your terminal if you execute `kubectl get persistentvolumeclaim --namespace prometheus` you will see the PVC that has been created and Bound to a Persistent Volume for the single instance of Prometheus that you have created: 197 | 198 | ```shell 199 | $kubectl get persistentvolumeclaim --namespace prometheus 200 | NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE 201 | prometheus-pvc-prometheus-prometheus-0 Bound pvc-c85b2a3b-9d7a-11e9-9e3c-42010a840079 10Gi RWO standard 21m 202 | ``` 203 | The **ServiceAccount**, **ClusterRoleBinding** and **ClusterRoleBinding** are required for providing the Prometheus Pod with the required permissions to access the Kubernetes API as part of its service discovery process. 204 | 205 | Lastly lets look at some of the Prometheus specific configuration of the **prometheus.yaml** file: 206 | 207 | ```yaml 208 | logLevel: info 209 | retention: 12h 210 | ``` 211 | 212 | Here we define that Prometheus should retain 12 hours of metrics and that it should log using the **info** log level. 213 | 214 | The Prometheus Operator GitHub project provides a [full set](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md) of API documentation that defines the fields that can be set on the CRDs that it provides. You can see the specification for the **Prometheus** **Kind** [here](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec). -------------------------------------------------------------------------------- /content/prometheus/using-thanos/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using Thanos" 3 | date: 2019-07-04T18:45:04+01:00 4 | weight: 40 5 | draft: false 6 | --- 7 | 8 | ![Thanos](/prometheus/using-thanos/images/thanos.png?classes=shadow&width=25pc) 9 | 10 | The [Thanos Project](https://github.com/improbable-eng/thanos) turns Prometheus into a highly available metrics platform with unlimited metrics storage. 11 | 12 | The three key features of Thanos, are as follows: 13 | 14 | * Global query view of all metrics from as many Prometheus instances as you require. 15 | * Long term storage of metrics. 16 | * High availability of Prometheus. 17 | 18 | In this section we will look at how to deploy Thanos alongside Prometheus. 19 | -------------------------------------------------------------------------------- /content/prometheus/using-thanos/high-availability/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "High Availability" 3 | date: 2019-07-04T20:27:14+01:00 4 | weight: 10 5 | draft: false 6 | --- 7 | 8 | Out of the box Prometheus does not have any concept of high availability or redundancy. Prometheus itself may be a mature and reliable product but not everything is foolproof and you should always plan for *when* a Kubernetes worker node fails, not *if* it fails, and therefore we must be able to tolerate a Prometheus Pod restarting from time to time. Before we look at Thanos, lets see how we could tackle this problem with just Kubernetes & Prometheus. 9 | 10 | ## High Availability with Kubernetes 11 | 12 | Earlier in this chapter we used the Prometheus Operator to launch a single instance of Prometheus within Kubernetes. To avoid the scenario of metrics being unavailable, either permanently or for a short duration of time, we can run a second instance of Prometheus. Each instance of Prometheus will run independent of the other, however each still have the same configuration as set by the Prometheus Operator. Essentially, two copies of target metrics will be scraped, as shown below: 13 | 14 | ![Two Prometheus Instances](/prometheus/using-thanos/high-availability/images/multiple-prometheus.png?classes=shadow&width=30pc) 15 | 16 | Now lets update the previous **prometheus.yaml** file to support this new architecture. We need to change the **replicas** from 1 to 2 and then also add a **podAntiAffinity** to ensure that both instances of Prometheus are running on different Kubernetes worker nodes to ensure we truly benefit from having the additional redundant instance of Prometheus. 17 | 18 | ```yaml 19 | apiVersion: monitoring.coreos.com/v1 20 | kind: Prometheus 21 | metadata: 22 | name: prometheus 23 | namespace: prometheus 24 | spec: 25 | affinity: 26 | podAntiAffinity: 27 | preferredDuringSchedulingIgnoredDuringExecution: 28 | - weight: 100 29 | podAffinityTerm: 30 | labelSelector: 31 | matchExpressions: 32 | - key: app 33 | operator: In 34 | values: 35 | - prometheus 36 | topologyKey: kubernetes.io/hostname 37 | baseImage: quay.io/prometheus/prometheus 38 | logLevel: info 39 | podMetadata: 40 | annotations: 41 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 42 | labels: 43 | app: prometheus 44 | replicas: 2 45 | resources: 46 | limits: 47 | cpu: 1 48 | memory: 2Gi 49 | requests: 50 | cpu: 1 51 | memory: 2Gi 52 | retention: 12h 53 | serviceAccountName: prometheus-service-account 54 | serviceMonitorSelector: 55 | matchLabels: 56 | serviceMonitorSelector: prometheus 57 | storage: 58 | volumeClaimTemplate: 59 | apiVersion: v1 60 | kind: PersistentVolumeClaim 61 | metadata: 62 | name: prometheus-pvc 63 | spec: 64 | accessModes: 65 | - ReadWriteOnce 66 | resources: 67 | requests: 68 | storage: 10Gi 69 | version: v2.10.0 70 | --- 71 | apiVersion: v1 72 | kind: ServiceAccount 73 | metadata: 74 | name: "prometheus-service-account" 75 | namespace: "prometheus" 76 | --- 77 | apiVersion: rbac.authorization.k8s.io/v1 78 | kind: ClusterRole 79 | metadata: 80 | name: "prometheus-cluster-role" 81 | rules: 82 | - apiGroups: 83 | - "" 84 | resources: 85 | - nodes 86 | - services 87 | - endpoints 88 | - pods 89 | verbs: 90 | - get 91 | - list 92 | - watch 93 | - apiGroups: 94 | - "" 95 | resources: 96 | - nodes/metrics 97 | verbs: 98 | - get 99 | - nonResourceURLs: 100 | - "/metrics" 101 | verbs: 102 | - get 103 | --- 104 | apiVersion: rbac.authorization.k8s.io/v1 105 | kind: ClusterRoleBinding 106 | metadata: 107 | name: "prometheus-cluster-role-binding" 108 | roleRef: 109 | apiGroup: rbac.authorization.k8s.io 110 | kind: ClusterRole 111 | name: "prometheus-cluster-role" 112 | subjects: 113 | - kind: ServiceAccount 114 | name: "prometheus-service-account" 115 | namespace: prometheus 116 | ``` 117 | 118 | Lets apply this updated **prometheus.yaml** to Kubernetes by running `kubectl apply -f prometheus.yaml`. 119 | 120 | A moment or two after applying this, check the running Pods in the **prometheus** namespace by running `kubectl get pods --namespace prometheus`: 121 | 122 | ```shell 123 | $kubectl get pods --namespace prometheus 124 | NAME READY STATUS RESTARTS AGE 125 | prometheus-operator-operator-86bc4d5568-7k6tp 1/1 Running 0 23h 126 | prometheus-prometheus-0 3/3 Running 0 2d 127 | prometheus-prometheus-1 3/3 Running 0 1m10s 128 | ``` 129 | 130 | Now lets put the reliability of this to the test: 131 | 132 | * Reconnect to Prometheus by executing `kubectl port-forward service/prometheus-operated 9090:9090 --namespace prometheus` and then access the UI at [https://localhost:9090](http://localhost:9090) 133 | * Restart one of the instances of Prometheus by running `kubectl delete pod prometheus-prometheus-0 --namespace prometheus` 134 | * Immediately then check the Prometheus UI in your web browser, you will see that it is still available! 135 | 136 | This is great however there is one thing we need to think about. If we have two instances of Prometheus with two copies of of the same metrics, which should we use? 137 | 138 | Your Prometheus deployment uses a Kubernetes Service and in the previous example you used Kubectl port-forwarding, connecting to the Kubernetes Service directly, and therefore taking advantage of Kubernetes internal load balancing functionality. You have essentially have implemented the illustration below: 139 | 140 | ![Two Prometheus Instances with Kubernetes Service](/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-service.png?classes=shadow&width=30pc) 141 | 142 | So when you connect to Prometheus via the Kuberneres Service the request will be serviced by one of the running Prometheus instances. However, when you make subsequent requests there is no guarantee that the request will be serviced by the same instance. Why is this an issue? The two instances of Prometheus that are running are independent of each other and while they do have the same scrape configuration there is no guarantee that they will scrape the targets at exactly the same time, therefore the time series metrics that they each collect may have different values. 143 | 144 | What this all means is, each time you connect to Prometheus via the load balanced Kubernetes Service, you may see some oddness with metrics changing. When visualizing the metrics over time with dashboarding tools such as Grafana, this leads to a really poor experience for users, as each time you reload the same graph it may appear differently in the same time period. This is now where Thanos can help! 145 | 146 | ## High Availability with Thanos 147 | 148 | At a high level, HA for Prometheus with Thanos works as detailed below: 149 | 150 | * First a sidecar is deployed alongside the Prometheus container and interacts with Prometheus. A sidecar is an additional container within the Kubernetes Pod running alongside other containers. 151 | * Next, an additional service is deployed called Thanos Query and is configured to be aware of of all instances of the Thanos Sidecar. 152 | * Thanos Query communicates with the Thanos Sidecar via [gRPC](https://grpc.io/) and de-duplicates metrics across all instances of Prometheus when executing a query. Query exposes users to a Prometheus-esuqe user interfance and also exposes the Prometheus API. 153 | 154 | The diagram below shows this: 155 | 156 | ![Two Prometheus Instances with Thanos Sidecar](/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-thanos.png?classes=shadow&width=30pc) 157 | 158 | Now lets look at implementing this! 159 | 160 | The Prometheus Operator supports the configuration of the Thanos Sidecar via the Prometheus CRD, so you simply need to update the existing deployment. You then also need to deploy the Thanos Query service and configure this to federate the instances of Prometheus that are running. You will use a Kubernetes Service as a mechanism of service discovery for configuring Thanos Query to identify the Prometheus instances, to do this you also need to add an additional Kubernetes Pod Label to the Prometheus Pods so you can select them with the label selector on the Kubernetes Service. 161 | 162 | Finally, you must also set an external label for the Prometheus instances to use. The external label is required by Thanos and is used as a method of labelling all metrics that are derived from a particular instance of Thanos Query. 163 | 164 | Update the Prometheus resource adding the **thanos** configuration, the new service discovery label, and also configure an external label for the Prometheus instances: 165 | 166 | ```yaml 167 | spec: 168 | podMetadata: 169 | labels: 170 | thanos-store-api: "true" 171 | thanos: 172 | version: v0.4.0 173 | resources: 174 | limits: 175 | cpu: 500m 176 | memory: 500Mi 177 | requests: 178 | cpu: 100m 179 | memory: 500Mi 180 | externalLabels: 181 | cluster_environment: workshop 182 | ``` 183 | 184 | Then define a Kubernetes Deployment for Thanos Query and the Kubernetes Service for the purposes of service discovery by adding the below to **prometheus.yaml** also: 185 | 186 | ```yaml 187 | --- 188 | apiVersion: apps/v1 189 | kind: Deployment 190 | metadata: 191 | name: thanos-query 192 | namespace: prometheus 193 | labels: 194 | app: thanos-query 195 | spec: 196 | replicas: 1 197 | selector: 198 | matchLabels: 199 | app: thanos-query 200 | template: 201 | metadata: 202 | labels: 203 | app: thanos-query 204 | spec: 205 | containers: 206 | - name: thanos-query 207 | image: improbable/thanos:v0.5.0 208 | resources: 209 | limits: 210 | cpu: 500m 211 | memory: 500Mi 212 | requests: 213 | cpu: 100m 214 | memory: 500Mi 215 | args: 216 | - "query" 217 | - "--log.level=debug" 218 | - "--query.replica-label=prometheus_replica" 219 | - "--store.sd-dns-resolver=miekgdns" 220 | - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local" 221 | ports: 222 | - name: http 223 | containerPort: 10902 224 | - name: grpc 225 | containerPort: 10901 226 | - name: cluster 227 | containerPort: 10900 228 | --- 229 | apiVersion: v1 230 | kind: Service 231 | metadata: 232 | name: "thanos-store-api" 233 | namespace: prometheus 234 | spec: 235 | type: ClusterIP 236 | clusterIP: None 237 | ports: 238 | - name: grpc 239 | port: 10901 240 | targetPort: grpc 241 | selector: 242 | thanos-store-api: "true" 243 | ``` 244 | 245 | The updated **prometheus.yaml** file should be similar to the example [here](/prometheus/using-thanos/high-availability/static/prometheus-with-sidecar.yaml). 246 | 247 | Now apply this to Kubernetes by executing `kubectl apply -f prometheus.yaml`. You wil see two things occur, the new Thanos Query service will be deployed as a Pod and the existing Prometheus Pods will be restarted since the new Pod Label and Sidecar are to be added: 248 | 249 | ```shell 250 | $kubectl get pods --namespace prometheus 251 | NAME READY STATUS RESTARTS AGE 252 | prometheus-operator-operator-86bc4d5568-94cpf 1/1 Running 0 18m 253 | prometheus-prometheus-0 0/4 ContainerCreating 0 1s 254 | prometheus-prometheus-1 0/4 Pending 0 1s 255 | thanos-query-58bcc6dcbb-67rn4 0/1 ContainerCreating 0 4s 256 | 257 | $kubectl get pods --namespace prometheus 258 | NAME READY STATUS RESTARTS AGE 259 | prometheus-operator-operator-86bc4d5568-94cpf 1/1 Running 0 19m 260 | prometheus-prometheus-0 4/4 Running 0 1m 261 | prometheus-prometheus-1 4/4 Running 0 1m 262 | thanos-query-58bcc6dcbb-67rn4 1/1 Running 0 1m 263 | ``` 264 | 265 | You can see that Pods **prometheus-prometheus-0** and **prometheus-prometheus-1** now show **4/4** on the container readiness. Previously this only showed 3 containers, but following this change there are now 4 containers in a Prometheus Pod due to the additional Thanos Sidecar. 266 | 267 | Now connect to Thanos Query using port forwarding by executing a Kubectl command. You will need to substitute the correct name of your Thanos Query Pod. For example: `kubectl port-forward pod/thanos-query-58bcc6dcbb-67rn4 10902:10902 --namespace prometheus`. 268 | 269 | When accessing [http://localhost:10902](http://localhost:10902) in your web browser you will see that the Thanos Query UI is awfully similar to the Prometheus UI. That is no accident, Thanos is actually based on the same codebase as Prometheus. 270 | 271 | ![Thanos Query](/prometheus/using-thanos/high-availability/images/thanos-graph.png?classes=shadow&width=30pc) 272 | 273 | When running a Prometheus query in Thanos you can see a checkbox named **deduplication**. If you experiment running Prometheus queries with this option enabled and disable you will see how Thanos deduplicates the metrics in the available Prometheus instances when querying. 274 | 275 | If you select the **Stores** option in the menu at the top, Thanos Query has a interface for showing the Thanos Store API endpoints it is currently federating. When you check this, you will see the two Prometheis intances that are running, as shown below: 276 | 277 | ![Thanos Stores](/prometheus/using-thanos/high-availability/images/thanos-stores.png?classes=shadow&width=30pc) 278 | 279 | ## Conclusion 280 | 281 | Success! In this tutorial you have successfully implemented Prometheus running with high availability using Thanos. 282 | -------------------------------------------------------------------------------- /content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-service.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-service.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-thanos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/multiple-prometheus-with-thanos.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/high-availability/images/multiple-prometheus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/multiple-prometheus.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/high-availability/images/thanos-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/thanos-graph.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/high-availability/images/thanos-stores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/high-availability/images/thanos-stores.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/high-availability/static/prometheus-with-sidecar.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: Prometheus 4 | metadata: 5 | name: prometheus 6 | namespace: prometheus 7 | spec: 8 | affinity: 9 | podAntiAffinity: 10 | preferredDuringSchedulingIgnoredDuringExecution: 11 | - weight: 100 12 | podAffinityTerm: 13 | labelSelector: 14 | matchExpressions: 15 | - key: app 16 | operator: In 17 | values: 18 | - prometheus 19 | topologyKey: kubernetes.io/hostname 20 | baseImage: quay.io/prometheus/prometheus 21 | logLevel: info 22 | podMetadata: 23 | annotations: 24 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 25 | labels: 26 | app: prometheus 27 | thanos-store-api: "true" 28 | replicas: 2 29 | thanos: 30 | version: v0.4.0 31 | resources: 32 | limits: 33 | cpu: 500m 34 | memory: 500Mi 35 | requests: 36 | cpu: 100m 37 | memory: 500Mi 38 | resources: 39 | limits: 40 | cpu: 1 41 | memory: 2Gi 42 | requests: 43 | cpu: 1 44 | memory: 2Gi 45 | retention: 12h 46 | serviceAccountName: prometheus-service-account 47 | serviceMonitorSelector: 48 | matchLabels: 49 | serviceMonitorSelector: prometheus 50 | externalLabels: 51 | cluster_environment: workshop 52 | storage: 53 | volumeClaimTemplate: 54 | apiVersion: v1 55 | kind: PersistentVolumeClaim 56 | metadata: 57 | name: prometheus-pvc 58 | spec: 59 | accessModes: 60 | - ReadWriteOnce 61 | resources: 62 | requests: 63 | storage: 10Gi 64 | version: v2.10.0 65 | --- 66 | apiVersion: v1 67 | kind: ServiceAccount 68 | metadata: 69 | name: "prometheus-service-account" 70 | namespace: "prometheus" 71 | --- 72 | apiVersion: rbac.authorization.k8s.io/v1 73 | kind: ClusterRole 74 | metadata: 75 | name: "prometheus-cluster-role" 76 | rules: 77 | - apiGroups: 78 | - "" 79 | resources: 80 | - nodes 81 | - services 82 | - endpoints 83 | - pods 84 | verbs: 85 | - get 86 | - list 87 | - watch 88 | - apiGroups: 89 | - "" 90 | resources: 91 | - nodes/metrics 92 | verbs: 93 | - get 94 | - nonResourceURLs: 95 | - "/metrics" 96 | verbs: 97 | - get 98 | --- 99 | apiVersion: rbac.authorization.k8s.io/v1 100 | kind: ClusterRoleBinding 101 | metadata: 102 | name: "prometheus-cluster-role-binding" 103 | roleRef: 104 | apiGroup: rbac.authorization.k8s.io 105 | kind: ClusterRole 106 | name: "prometheus-cluster-role" 107 | subjects: 108 | - kind: ServiceAccount 109 | name: "prometheus-service-account" 110 | namespace: prometheus 111 | --- 112 | apiVersion: apps/v1 113 | kind: Deployment 114 | metadata: 115 | name: thanos-query 116 | namespace: prometheus 117 | labels: 118 | app: thanos-query 119 | spec: 120 | replicas: 1 121 | selector: 122 | matchLabels: 123 | app: thanos-query 124 | template: 125 | metadata: 126 | labels: 127 | app: thanos-query 128 | spec: 129 | containers: 130 | - name: thanos-query 131 | image: improbable/thanos:v0.5.0 132 | resources: 133 | limits: 134 | cpu: 500m 135 | memory: 500Mi 136 | requests: 137 | cpu: 100m 138 | memory: 500Mi 139 | args: 140 | - "query" 141 | - "--log.level=debug" 142 | - "--query.replica-label=prometheus_replica" 143 | - "--store.sd-dns-resolver=miekgdns" 144 | - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local" 145 | ports: 146 | - name: http 147 | containerPort: 10902 148 | - name: grpc 149 | containerPort: 10901 150 | - name: cluster 151 | containerPort: 10900 152 | --- 153 | apiVersion: v1 154 | kind: Service 155 | metadata: 156 | name: "thanos-store-api" 157 | namespace: prometheus 158 | spec: 159 | type: ClusterIP 160 | clusterIP: None 161 | ports: 162 | - name: grpc 163 | port: 10901 164 | targetPort: grpc 165 | selector: 166 | thanos-store-api: "true" -------------------------------------------------------------------------------- /content/prometheus/using-thanos/images/thanos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/images/thanos.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/long-term-storage/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Long Term Storage" 3 | date: 2019-07-04T20:26:38+01:00 4 | weight: 20 5 | draft: false 6 | --- 7 | 8 | ## Long Term Storage 9 | 10 | Now that you have enabled high availability with Prometheus using Thanos you can look at the next killer feature of Thanos, long term storage of metrics! 11 | 12 | To enable this, you first need to create a bucket on an object store such as AWS S3 or GCP Storage. Next you enable the Thanos Sidecar to upload metrics to the object store. Lastly, you need to deploy another Thanos component called Store that acts as an API for the metrics available in the object store and will be queried by the existing Thanos Query instance when a user executes a Prometheus query via Thanos Query. The diagram below shows this using a GCP Storage bucket. 13 | 14 | ![Thanos With Long Term Storage](/prometheus/using-thanos/long-term-storage/images/long-term-storage.png?classes=shadow&width=40pc) 15 | 16 | From the perspective of users they are completely unaware that when they execute a Prometheus query Thanos is querying the for metrics from both the Prometheus instances and from the object storage. 17 | 18 | How does this work? Prometheus stores metrics in **blocks**. Initially, it stores a block in memory however periodically, typically every 2 hours, it writes the current in memory block out to the filesystem. As the Thanos Sidecar in the Prometheus Pod has the same shared filesystem as the Prometheus container it can see the new block that Prometheus writes to the filesystem. Once it sees a new block on disk, the Thanos Sidecar uploads the block to the object storage as per its configuration. This is shown below in the diagram. 19 | 20 | ![Thanos Sidecar Upload](/prometheus/using-thanos/long-term-storage/images/thanos-sidecar-upload.png?classes=shadow&width=40pc) 21 | 22 | Lets now implement this. For this example I created a Google Cloud Storage bucket called **observability-for-kubernetes-thanos-demo** and provisioned a Service Account with 'Storage Admin' permissions so that Thanos can read and write to the storage bucket. In the cloud provider of your choice, create a storage bucket and set of credentials with permission to access the bucket. 23 | 24 | We you need to create a Thanos Object Store configuration file that provides the bucket configuration and credentials to Thanos. The structure of this file differs per cloud provider, you can see the [Thanos Documenation](https://github.com/improbable-eng/thanos/blob/master/docs/storage.md) to see the different options but for this example lets proceed using Google Cloud Platform. 25 | 26 | You will create a Kubernetes Secret that contains the GCP Service Account and Bucket Name. The example below shows the expected structure, however the GCP Service Account is not valid for obvious security reasons! 27 | 28 | ```yaml 29 | type: GCS 30 | config: 31 | bucket: "observability-for-kubernetes-thanos-demo" 32 | service_account: |- 33 | { 34 | "type": "service_account", 35 | "project_id": "kubernetes-cloud-lab", 36 | "private_key_id": "", 37 | "private_key": "-----BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY-----\n", 38 | "client_email": "observability-for-kubernetes@kubernetes-cloud-lab.iam.gserviceaccount.com", 39 | "client_id": "", 40 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 41 | "token_uri": "https://oauth2.googleapis.com/token", 42 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 43 | "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/observability-for-kubernetes%40kubernetes-cloud-lab.iam.gserviceaccount.com" 44 | } 45 | ``` 46 | 47 | You should take the valid version of the example above and base64 encode it. You can use `base64` in your terminal for doing this. Then create a file called **thanos-object-secret.yaml** and include the Kubernetes Secret, as shown below: 48 | 49 | ```yaml 50 | --- 51 | apiVersion: v1 52 | kind: Secret 53 | metadata: 54 | name: thanos-config 55 | namespace: prometheus 56 | data: 57 | thanos.config: dHlwZTogR0NTCmNvbmZpZzoKICBidWNrZXQ6ICJvYnNlcnZhYmlsaXR5LWZvci1rdWJlcm5ldGVzLXRoYW5vcy1kZW1vIgogIHNlcnZpY2VfYWNjb3VudDogfC0KICB7CiAgICAidHlwZSI6ICJzZXJ2aWNlX2FjY291bnQiLAogICAgInByb2plY3RfaWQiOiAia3ViZXJuZXRlcy1jbG91ZC1sYWIiLAogICAgInByaXZhdGVfa2V5X2lkIjogIiIsCiAgICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgICAiY2xpZW50X2VtYWlsIjogIm9ic2VydmFiaWxpdHktZm9yLWt1YmVybmV0ZXNAa3ViZXJuZXRlcy1jbG91ZC1sYWIuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICAgImNsaWVudF9pZCI6ICIiLAogICAgImF1dGhfdXJpIjogImh0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi9hdXRoIiwKICAgICJ0b2tlbl91cmkiOiAiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLAogICAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICAgImNsaWVudF94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL3JvYm90L3YxL21ldGFkYXRhL3g1MDkvb2JzZXJ2YWJpbGl0eS1mb3Ita3ViZXJuZXRlcyU0MGt1YmVybmV0ZXMtY2xvdWQtbGFiLmlhbS5nc2VydmljZWFjY291bnQuY29tIgogIH0= 58 | ``` 59 | 60 | The value of **thanos.config** should be set to the base64 string that was just generated. 61 | 62 | Apply **thanos-object-secret.yaml** to Kubernetes by executing `kubectl apply -f thanos-object-secret.yaml`: 63 | 64 | ```shell 65 | $kubectl apply -f thanos-object-secret.yaml 66 | secret/cluster-thanos-config created 67 | ``` 68 | 69 | With the object store configuration deployed to Kubernetes, next update the Prometheus resource adding the **objectStorageConfig** key so that the Prometheus Operator configures the Thanos Sidecar with the object storage config that has just been deployed to Kubernetes. 70 | 71 | The Prometheus resource should look similar to the below with this added: 72 | 73 | ```yaml 74 | --- 75 | apiVersion: monitoring.coreos.com/v1 76 | kind: Prometheus 77 | metadata: 78 | name: prometheus 79 | namespace: prometheus 80 | spec: 81 | affinity: 82 | podAntiAffinity: 83 | preferredDuringSchedulingIgnoredDuringExecution: 84 | - weight: 100 85 | podAffinityTerm: 86 | labelSelector: 87 | matchExpressions: 88 | - key: app 89 | operator: In 90 | values: 91 | - prometheus 92 | topologyKey: kubernetes.io/hostname 93 | baseImage: quay.io/prometheus/prometheus 94 | logLevel: info 95 | podMetadata: 96 | annotations: 97 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 98 | labels: 99 | app: prometheus 100 | thanos-store-api: "true" 101 | replicas: 2 102 | thanos: 103 | version: v0.4.0 104 | resources: 105 | limits: 106 | cpu: 500m 107 | memory: 500Mi 108 | requests: 109 | cpu: 100m 110 | memory: 500Mi 111 | objectStorageConfig: 112 | key: thanos.config 113 | name: thanos-config 114 | resources: 115 | limits: 116 | cpu: 1 117 | memory: 2Gi 118 | requests: 119 | cpu: 1 120 | memory: 2Gi 121 | retention: 12h 122 | serviceAccountName: prometheus-service-account 123 | serviceMonitorSelector: 124 | matchLabels: 125 | serviceMonitorSelector: prometheus 126 | externalLabels: 127 | cluster_environment: workshop 128 | storage: 129 | volumeClaimTemplate: 130 | apiVersion: v1 131 | kind: PersistentVolumeClaim 132 | metadata: 133 | name: prometheus-pvc 134 | spec: 135 | accessModes: 136 | - ReadWriteOnce 137 | resources: 138 | requests: 139 | storage: 10Gi 140 | version: v2.10.0 141 | ``` 142 | 143 | Edit the **prometheus.yaml** file you created previously to reflect the changes above. Under the **objectStorageConfig** key set the **name** to be the name of the Kubernetes Secret you just created and the **key** to be the name of the secret key you used, which in this case is **thanos.config**. 144 | 145 | Apply the updated **prometheus.yaml** to Kubernetes by running `kubectl apply -f prometheus.yaml`. To see an example of the full YAML file that reflects the changes described above see [here](/prometheus/using-thanos/long-term-storage/static/thanos-with-object-config.yaml). 146 | 147 | Once the Prometheus Pods have restarted with the new configuration, use `kubectl logs` to view the logs from the **thanos-sidecar** container in one of the Prometheus Pods, as shown below: 148 | 149 | ```shell 150 | $kubectl logs prometheus-prometheus-0 thanos-sidecar --namespace prometheus 151 | level=info ts=2019-07-05T20:48:04.403642465Z caller=flags.go:87 msg="gossip is disabled" 152 | level=info ts=2019-07-05T20:48:04.403893193Z caller=main.go:257 component=sidecar msg="disabled TLS, key and cert must be set to enable" 153 | level=info ts=2019-07-05T20:48:04.403923795Z caller=factory.go:39 msg="loading bucket configuration" 154 | level=info ts=2019-07-05T20:48:04.404486705Z caller=sidecar.go:319 msg="starting sidecar" peer="no gossip" 155 | level=info ts=2019-07-05T20:48:04.404636222Z caller=main.go:309 msg="Listening for metrics" address=0.0.0.0:10902 156 | level=info ts=2019-07-05T20:48:04.404674065Z caller=reloader.go:154 component=reloader msg="started watching config file and non-recursively rule dirs for changes" cfg= out= dirs= 157 | level=info ts=2019-07-05T20:48:04.40474036Z caller=sidecar.go:260 component=sidecar msg="Listening for StoreAPI gRPC" address=[10.8.2.14]:10901 158 | level=info ts=2019-07-05T20:48:06.414276291Z caller=sidecar.go:176 msg="successfully loaded prometheus external labels" external_labels="{cluster_environment=\"workshop\",prometheus=\"prometheus/prometheus\",prometheus_replica=\"prometheus-prometheus-0\"}" 159 | level=info ts=2019-07-05T20:48:34.441923855Z caller=shipper.go:350 msg="upload new block" id=01DF1NSPGCHYM8T82R8BKAX4HP 160 | level=info ts=2019-07-05T20:48:35.12152462Z caller=shipper.go:350 msg="upload new block" id=01DF1R3VHXY2NA2ZH4W4TVM33H 161 | level=info ts=2019-07-05T21:00:04.457140171Z caller=shipper.go:350 msg="upload new block" id=01DF1YZJT0GNZ2G0YWJWJ3NE25 162 | ``` 163 | 164 | If your instances of Prometheus have been running long enough (atleast 2 hours) there should be a block on the filesystem ready to be uploaded. If there is, you will see it upload the block to the storage bucket as shown above. See the log entry **"upload new block"** where it begins the upload. If you check the bucket in the cloud provider console you will the blocks are now present. 165 | 166 | ## Thanos Components 167 | 168 | Now that the Thanos Sidecar is uploading blocks to the object store, we need to deploy two addtional components: Thanos Store and Thanos Compact. 169 | 170 | ### Thanos Store 171 | 172 | Thanos Store acts as an API for querying Prometheus metrics stored in the object store. 173 | 174 | Update **prometheus.yaml** to also include the following: 175 | 176 | ```yaml 177 | --- 178 | apiVersion: apps/v1 179 | kind: StatefulSet 180 | metadata: 181 | name: thanos-store 182 | namespace: prometheus 183 | labels: 184 | app: thanos-store 185 | thanos-store-api: "true" 186 | spec: 187 | replicas: 1 188 | serviceName: thanos-store 189 | selector: 190 | matchLabels: 191 | app: thanos-store 192 | thanos-store-api: "true" 193 | template: 194 | metadata: 195 | labels: 196 | app: thanos-store 197 | thanos-store-api: "true" 198 | spec: 199 | containers: 200 | - name: thanos-store 201 | image: improbable/thanos:v0.5.0 202 | resources: 203 | limits: 204 | cpu: 1 205 | memory: 1Gi 206 | requests: 207 | cpu: 500m 208 | memory: 1Gi 209 | args: 210 | - "store" 211 | - "--data-dir=/prometheus/cache" 212 | - "--objstore.config-file=/config/thanos.config" 213 | - "--log.level=info" 214 | - "--index-cache-size=256MB" 215 | - "--chunk-pool-size=256MB" 216 | - "--store.grpc.series-max-concurrency=30" 217 | ports: 218 | - name: http 219 | containerPort: 10902 220 | - name: grpc 221 | containerPort: 10901 222 | - name: cluster 223 | containerPort: 10900 224 | volumeMounts: 225 | - mountPath: /prometheus 226 | name: thanos-store-storage 227 | - mountPath: /config/ 228 | name: thanos-config 229 | volumes: 230 | - name: thanos-config 231 | secret: 232 | secretName: thanos-config 233 | volumeClaimTemplates: 234 | - metadata: 235 | name: thanos-store-storage 236 | spec: 237 | accessModes: [ "ReadWriteOnce" ] 238 | resources: 239 | requests: 240 | storage: 10Gi 241 | ``` 242 | 243 | Apply this to Kubernetes by running `kubectl apply -f prometheus.yaml`. 244 | 245 | Kubernetes will launch a single Thanos Store Pod as per the configuration above. If you check the available **Stores** in the Thanos Query UI, you will now see Thanos Store listed in addition to the two Thanos Sidecars running alongside Prometheus. Now when querying via Thanos Query, the query will execute across both the Prometheus instances and also the metrics stored in the object store! 246 | 247 | ![Thanos Query Stores](/prometheus/using-thanos/long-term-storage/images/thanos-query-with-store.png?classes=shadow&width=40pc) 248 | 249 | ### Thanos Compact 250 | 251 | Thanos Compact is the final Thanos component you need to deploy. 252 | 253 | Compact performs three main tasks: 254 | 255 | * It executes a Prometheus compaction job on the blocks in the object store. Typically Prometheus would execute this for blocks locally on the filesystem but the process is disabled by Prometheus Operator when using Thanos. 256 | * It also executes a down-sampling process on metrics in the object store. Prometheus stores metrics with a resolution of 1 minute. However, if you were to execute a query over a period of months or years on a Prometheus environment using Thanos with long term storage, the number of data-points returned would be excessive! Therefore, Compact performs the down-sampling job adding a 5 minute and 1 hour sample in addition to the 1 minute sample. It does this by creating a new block and discarding the original once down-sampling is completed. When executing queries, Thanos will automatically select the most appropriate sample to return. 257 | * Lastly, it is also possible to set retention periods for the 1 minute, 5 minute and 1 hour samples. Compact will apply these retentions if they are set. 258 | 259 | Now lets deploy Thanos Compact. 260 | 261 | Update **prometheus.yaml** to also include the following: 262 | 263 | ```yaml 264 | --- 265 | apiVersion: apps/v1 266 | kind: StatefulSet 267 | metadata: 268 | name: thanos-compact 269 | namespace: prometheus 270 | labels: 271 | app: thanos-compact 272 | spec: 273 | replicas: 1 274 | serviceName: thanos-compact 275 | selector: 276 | matchLabels: 277 | app: thanos-compact 278 | template: 279 | metadata: 280 | labels: 281 | app: thanos-compact 282 | spec: 283 | containers: 284 | - name: thanos-compact 285 | image: improbable/thanos:v0.5.0 286 | resources: 287 | limits: 288 | cpu: 1 289 | memory: 1Gi 290 | requests: 291 | cpu: 500m 292 | memory: 1Gi 293 | args: 294 | - "compact" 295 | - "--data-dir=/prometheus/compact" 296 | - "--objstore.config-file=/config/thanos.config" 297 | - "--log.level=info" 298 | - "--retention.resolution-raw=2d" 299 | - "--retention.resolution-5m=5d" 300 | - "--retention.resolution-1h=10d" 301 | - "--consistency-delay=15m" 302 | - "--wait" 303 | ports: 304 | - name: http 305 | containerPort: 10902 306 | - name: grpc 307 | containerPort: 10901 308 | - name: cluster 309 | containerPort: 10900 310 | volumeMounts: 311 | - mountPath: /prometheus 312 | name: thanos-compact-storage 313 | - mountPath: /config/ 314 | name: thanos-config 315 | volumes: 316 | - name: thanos-config 317 | secret: 318 | secretName: thanos-config 319 | volumeClaimTemplates: 320 | - metadata: 321 | name: thanos-compact-storage 322 | spec: 323 | accessModes: [ "ReadWriteOnce" ] 324 | resources: 325 | requests: 326 | storage: 10Gi 327 | ``` 328 | 329 | Apply this to Kubernetes by running `kubectl apply -f prometheus.yaml`. 330 | 331 | Kubernetes will launch a single Thanos Compact Pod which will, once running, start performing the actions described above on blocks stored in the object store. 332 | 333 | To see an example of the full YAML file that reflects the changes described above with Thanos Store and Compact see [here](/prometheus/using-thanos/long-term-storage/static/thanos-with-components.yaml). 334 | -------------------------------------------------------------------------------- /content/prometheus/using-thanos/long-term-storage/images/long-term-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/long-term-storage/images/long-term-storage.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/long-term-storage/images/thanos-query-with-store.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/long-term-storage/images/thanos-query-with-store.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/long-term-storage/images/thanos-sidecar-upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/using-thanos/long-term-storage/images/thanos-sidecar-upload.png -------------------------------------------------------------------------------- /content/prometheus/using-thanos/long-term-storage/static/thanos-with-components.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: thanos-config 6 | namespace: prometheus 7 | data: 8 | thanos.config: dHlwZTogR0NTCmNvbmZpZzoKICBidWNrZXQ6ICJvYnNlcnZhYmlsaXR5LWZvci1rdWJlcm5ldGVzLXRoYW5vcy1kZW1vIgogIHNlcnZpY2VfYWNjb3VudDogfC0KICB7CiAgICAidHlwZSI6ICJzZXJ2aWNlX2FjY291bnQiLAogICAgInByb2plY3RfaWQiOiAia3ViZXJuZXRlcy1jbG91ZC1sYWIiLAogICAgInByaXZhdGVfa2V5X2lkIjogIiIsCiAgICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgICAiY2xpZW50X2VtYWlsIjogIm9ic2VydmFiaWxpdHktZm9yLWt1YmVybmV0ZXNAa3ViZXJuZXRlcy1jbG91ZC1sYWIuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICAgImNsaWVudF9pZCI6ICIiLAogICAgImF1dGhfdXJpIjogImh0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi9hdXRoIiwKICAgICJ0b2tlbl91cmkiOiAiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLAogICAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICAgImNsaWVudF94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL3JvYm90L3YxL21ldGFkYXRhL3g1MDkvb2JzZXJ2YWJpbGl0eS1mb3Ita3ViZXJuZXRlcyU0MGt1YmVybmV0ZXMtY2xvdWQtbGFiLmlhbS5nc2VydmljZWFjY291bnQuY29tIgogIH0= 9 | --- 10 | apiVersion: monitoring.coreos.com/v1 11 | kind: Prometheus 12 | metadata: 13 | name: prometheus 14 | namespace: prometheus 15 | spec: 16 | affinity: 17 | podAntiAffinity: 18 | preferredDuringSchedulingIgnoredDuringExecution: 19 | - podAffinityTerm: 20 | labelSelector: 21 | matchLabels: 22 | app: prometheus 23 | prometheus: cluster 24 | topologyKey: kubernetes.io/hostname 25 | weight: 100 26 | baseImage: quay.io/prometheus/prometheus 27 | logLevel: info 28 | podMetadata: 29 | annotations: 30 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 31 | labels: 32 | app: prometheus 33 | thanos-store-api: "true" 34 | replicas: 2 35 | thanos: 36 | version: v0.4.0 37 | resources: 38 | limits: 39 | cpu: 500m 40 | memory: 500Mi 41 | requests: 42 | cpu: 100m 43 | memory: 500Mi 44 | objectStorageConfig: 45 | key: thanos.config 46 | name: thanos-config 47 | resources: 48 | limits: 49 | cpu: 1 50 | memory: 2Gi 51 | requests: 52 | cpu: 1 53 | memory: 2Gi 54 | retention: 12h 55 | serviceAccountName: prometheus-service-account 56 | serviceMonitorSelector: 57 | matchLabels: 58 | serviceMonitorSelector: prometheus 59 | externalLabels: 60 | cluster_environment: workshop 61 | storage: 62 | volumeClaimTemplate: 63 | apiVersion: v1 64 | kind: PersistentVolumeClaim 65 | metadata: 66 | name: prometheus-pvc 67 | spec: 68 | accessModes: 69 | - ReadWriteOnce 70 | resources: 71 | requests: 72 | storage: 10Gi 73 | version: v2.10.0 74 | --- 75 | apiVersion: v1 76 | kind: ServiceAccount 77 | metadata: 78 | name: "prometheus-service-account" 79 | namespace: "prometheus" 80 | --- 81 | apiVersion: rbac.authorization.k8s.io/v1 82 | kind: ClusterRole 83 | metadata: 84 | name: "prometheus-cluster-role" 85 | rules: 86 | - apiGroups: 87 | - "" 88 | resources: 89 | - nodes 90 | - services 91 | - endpoints 92 | - pods 93 | verbs: 94 | - get 95 | - list 96 | - watch 97 | - apiGroups: 98 | - "" 99 | resources: 100 | - nodes/metrics 101 | verbs: 102 | - get 103 | - nonResourceURLs: 104 | - "/metrics" 105 | verbs: 106 | - get 107 | --- 108 | apiVersion: rbac.authorization.k8s.io/v1 109 | kind: ClusterRoleBinding 110 | metadata: 111 | name: "prometheus-cluster-role-binding" 112 | roleRef: 113 | apiGroup: rbac.authorization.k8s.io 114 | kind: ClusterRole 115 | name: "prometheus-cluster-role" 116 | subjects: 117 | - kind: ServiceAccount 118 | name: "prometheus-service-account" 119 | namespace: prometheus 120 | --- 121 | apiVersion: apps/v1 122 | kind: Deployment 123 | metadata: 124 | name: thanos-query 125 | namespace: prometheus 126 | labels: 127 | app: thanos-query 128 | spec: 129 | replicas: 1 130 | selector: 131 | matchLabels: 132 | app: thanos-query 133 | template: 134 | metadata: 135 | labels: 136 | app: thanos-query 137 | spec: 138 | containers: 139 | - name: thanos-query 140 | image: improbable/thanos:v0.5.0 141 | resources: 142 | limits: 143 | cpu: 500m 144 | memory: 500Mi 145 | requests: 146 | cpu: 100m 147 | memory: 500Mi 148 | args: 149 | - "query" 150 | - "--log.level=debug" 151 | - "--query.replica-label=prometheus_replica" 152 | - "--store.sd-dns-resolver=miekgdns" 153 | - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local" 154 | ports: 155 | - name: http 156 | containerPort: 10902 157 | - name: grpc 158 | containerPort: 10901 159 | - name: cluster 160 | containerPort: 10900 161 | --- 162 | apiVersion: v1 163 | kind: Service 164 | metadata: 165 | name: "thanos-store-api" 166 | namespace: prometheus 167 | spec: 168 | type: ClusterIP 169 | clusterIP: None 170 | ports: 171 | - name: grpc 172 | port: 10901 173 | targetPort: grpc 174 | selector: 175 | thanos-store-api: "true" 176 | --- 177 | apiVersion: apps/v1 178 | kind: StatefulSet 179 | metadata: 180 | name: thanos-store 181 | namespace: prometheus 182 | labels: 183 | app: thanos-store 184 | thanos-store-api: "true" 185 | spec: 186 | replicas: 1 187 | serviceName: thanos-store 188 | selector: 189 | matchLabels: 190 | app: thanos-store 191 | thanos-store-api: "true" 192 | template: 193 | metadata: 194 | labels: 195 | app: thanos-store 196 | thanos-store-api: "true" 197 | spec: 198 | containers: 199 | - name: thanos-store 200 | image: improbable/thanos:v0.5.0 201 | resources: 202 | limits: 203 | cpu: 1 204 | memory: 1Gi 205 | requests: 206 | cpu: 500m 207 | memory: 1Gi 208 | args: 209 | - "store" 210 | - "--data-dir=/prometheus/cache" 211 | - "--objstore.config-file=/config/thanos.config" 212 | - "--log.level=info" 213 | - "--index-cache-size=256MB" 214 | - "--chunk-pool-size=256MB" 215 | - "--store.grpc.series-max-concurrency=30" 216 | ports: 217 | - name: http 218 | containerPort: 10902 219 | - name: grpc 220 | containerPort: 10901 221 | - name: cluster 222 | containerPort: 10900 223 | volumeMounts: 224 | - mountPath: /prometheus 225 | name: thanos-store-storage 226 | - mountPath: /config/ 227 | name: thanos-config 228 | volumes: 229 | - name: thanos-config 230 | secret: 231 | secretName: thanos-config 232 | volumeClaimTemplates: 233 | - metadata: 234 | name: thanos-store-storage 235 | spec: 236 | accessModes: [ "ReadWriteOnce" ] 237 | resources: 238 | requests: 239 | storage: 10Gi 240 | --- 241 | apiVersion: apps/v1 242 | kind: StatefulSet 243 | metadata: 244 | name: thanos-compact 245 | namespace: prometheus 246 | labels: 247 | app: thanos-compact 248 | spec: 249 | replicas: 1 250 | serviceName: thanos-compact 251 | selector: 252 | matchLabels: 253 | app: thanos-compact 254 | template: 255 | metadata: 256 | labels: 257 | app: thanos-compact 258 | spec: 259 | containers: 260 | - name: thanos-compact 261 | image: improbable/thanos:v0.5.0 262 | resources: 263 | limits: 264 | cpu: 1 265 | memory: 1Gi 266 | requests: 267 | cpu: 500m 268 | memory: 1Gi 269 | args: 270 | - "compact" 271 | - "--data-dir=/prometheus/compact" 272 | - "--objstore.config-file=/config/thanos.config" 273 | - "--log.level=info" 274 | - "--retention.resolution-raw=2d" 275 | - "--retention.resolution-5m=5d" 276 | - "--retention.resolution-1h=10d" 277 | - "--consistency-delay=15m" 278 | - "--wait" 279 | ports: 280 | - name: http 281 | containerPort: 10902 282 | - name: grpc 283 | containerPort: 10901 284 | - name: cluster 285 | containerPort: 10900 286 | volumeMounts: 287 | - mountPath: /prometheus 288 | name: thanos-compact-storage 289 | - mountPath: /config/ 290 | name: thanos-config 291 | volumes: 292 | - name: thanos-config 293 | secret: 294 | secretName: thanos-config 295 | volumeClaimTemplates: 296 | - metadata: 297 | name: thanos-compact-storage 298 | spec: 299 | accessModes: [ "ReadWriteOnce" ] 300 | resources: 301 | requests: 302 | storage: 10Gi 303 | -------------------------------------------------------------------------------- /content/prometheus/using-thanos/long-term-storage/static/thanos-with-object-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: thanos-config 6 | namespace: prometheus 7 | data: 8 | thanos.config: dHlwZTogR0NTCmNvbmZpZzoKICBidWNrZXQ6ICJvYnNlcnZhYmlsaXR5LWZvci1rdWJlcm5ldGVzLXRoYW5vcy1kZW1vIgogIHNlcnZpY2VfYWNjb3VudDogfC0KICB7CiAgICAidHlwZSI6ICJzZXJ2aWNlX2FjY291bnQiLAogICAgInByb2plY3RfaWQiOiAia3ViZXJuZXRlcy1jbG91ZC1sYWIiLAogICAgInByaXZhdGVfa2V5X2lkIjogIiIsCiAgICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgICAiY2xpZW50X2VtYWlsIjogIm9ic2VydmFiaWxpdHktZm9yLWt1YmVybmV0ZXNAa3ViZXJuZXRlcy1jbG91ZC1sYWIuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICAgImNsaWVudF9pZCI6ICIiLAogICAgImF1dGhfdXJpIjogImh0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi9hdXRoIiwKICAgICJ0b2tlbl91cmkiOiAiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLAogICAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICAgImNsaWVudF94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL3JvYm90L3YxL21ldGFkYXRhL3g1MDkvb2JzZXJ2YWJpbGl0eS1mb3Ita3ViZXJuZXRlcyU0MGt1YmVybmV0ZXMtY2xvdWQtbGFiLmlhbS5nc2VydmljZWFjY291bnQuY29tIgogIH0= 9 | --- 10 | apiVersion: monitoring.coreos.com/v1 11 | kind: Prometheus 12 | metadata: 13 | name: prometheus 14 | namespace: prometheus 15 | spec: 16 | affinity: 17 | podAntiAffinity: 18 | preferredDuringSchedulingIgnoredDuringExecution: 19 | - weight: 100 20 | podAffinityTerm: 21 | labelSelector: 22 | matchExpressions: 23 | - key: app 24 | operator: In 25 | values: 26 | - prometheus 27 | topologyKey: kubernetes.io/hostname 28 | baseImage: quay.io/prometheus/prometheus 29 | logLevel: info 30 | podMetadata: 31 | annotations: 32 | cluster-autoscaler.kubernetes.io/safe-to-evict: "true" 33 | labels: 34 | app: prometheus 35 | thanos-store-api: "true" 36 | replicas: 2 37 | thanos: 38 | version: v0.4.0 39 | resources: 40 | limits: 41 | cpu: 500m 42 | memory: 500Mi 43 | requests: 44 | cpu: 100m 45 | memory: 500Mi 46 | objectStorageConfig: 47 | key: thanos.config 48 | name: thanos-config 49 | resources: 50 | limits: 51 | cpu: 1 52 | memory: 2Gi 53 | requests: 54 | cpu: 1 55 | memory: 2Gi 56 | retention: 12h 57 | serviceAccountName: prometheus-service-account 58 | serviceMonitorSelector: 59 | matchLabels: 60 | serviceMonitorSelector: prometheus 61 | externalLabels: 62 | cluster_environment: workshop 63 | storage: 64 | volumeClaimTemplate: 65 | apiVersion: v1 66 | kind: PersistentVolumeClaim 67 | metadata: 68 | name: prometheus-pvc 69 | spec: 70 | accessModes: 71 | - ReadWriteOnce 72 | resources: 73 | requests: 74 | storage: 10Gi 75 | version: v2.10.0 76 | --- 77 | apiVersion: v1 78 | kind: ServiceAccount 79 | metadata: 80 | name: "prometheus-service-account" 81 | namespace: "prometheus" 82 | --- 83 | apiVersion: rbac.authorization.k8s.io/v1 84 | kind: ClusterRole 85 | metadata: 86 | name: "prometheus-cluster-role" 87 | rules: 88 | - apiGroups: 89 | - "" 90 | resources: 91 | - nodes 92 | - services 93 | - endpoints 94 | - pods 95 | verbs: 96 | - get 97 | - list 98 | - watch 99 | - apiGroups: 100 | - "" 101 | resources: 102 | - nodes/metrics 103 | verbs: 104 | - get 105 | - nonResourceURLs: 106 | - "/metrics" 107 | verbs: 108 | - get 109 | --- 110 | apiVersion: rbac.authorization.k8s.io/v1 111 | kind: ClusterRoleBinding 112 | metadata: 113 | name: "prometheus-cluster-role-binding" 114 | roleRef: 115 | apiGroup: rbac.authorization.k8s.io 116 | kind: ClusterRole 117 | name: "prometheus-cluster-role" 118 | subjects: 119 | - kind: ServiceAccount 120 | name: "prometheus-service-account" 121 | namespace: prometheus 122 | --- 123 | apiVersion: apps/v1 124 | kind: Deployment 125 | metadata: 126 | name: thanos-query 127 | namespace: prometheus 128 | labels: 129 | app: thanos-query 130 | spec: 131 | replicas: 1 132 | selector: 133 | matchLabels: 134 | app: thanos-query 135 | template: 136 | metadata: 137 | labels: 138 | app: thanos-query 139 | spec: 140 | containers: 141 | - name: thanos-query 142 | image: improbable/thanos:v0.5.0 143 | resources: 144 | limits: 145 | cpu: 500m 146 | memory: 500Mi 147 | requests: 148 | cpu: 100m 149 | memory: 500Mi 150 | args: 151 | - "query" 152 | - "--log.level=debug" 153 | - "--query.replica-label=prometheus_replica" 154 | - "--store.sd-dns-resolver=miekgdns" 155 | - "--store=dnssrv+_grpc._tcp.thanos-store-api.prometheus.svc.cluster.local" 156 | ports: 157 | - name: http 158 | containerPort: 10902 159 | - name: grpc 160 | containerPort: 10901 161 | - name: cluster 162 | containerPort: 10900 163 | --- 164 | apiVersion: v1 165 | kind: Service 166 | metadata: 167 | name: "thanos-store-api" 168 | namespace: prometheus 169 | spec: 170 | type: ClusterIP 171 | clusterIP: None 172 | ports: 173 | - name: grpc 174 | port: 10901 175 | targetPort: grpc 176 | selector: 177 | thanos-store-api: "true" -------------------------------------------------------------------------------- /content/prometheus/what-is-prometheus/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "What Is Prometheus?" 3 | date: 2019-07-02T15:50:17+01:00 4 | weight: 10 5 | draft: false 6 | --- 7 | 8 | ![Prometheus](/prometheus/what-is-prometheus/images/logo.png?classes=shadow&width=40pc) 9 | 10 | Prometheus is an open-source metrics oriented monitoring and alerting tool. The project was first created by SoundCloud in 2012. In 2016 the project joined the Cloud Native Compute Foundation (CNCF). In 2018, it's CNCF maturity status changed from incubation to graduated. Prometheus was only the second CNCF project to graduate, after Kubernetes. 11 | 12 | Prometheus has quickly become the de facto open-source monitoring tool for Kubernetes and is widely used and supported in the Cloud Native industry. 13 | 14 | As described on the [Prometheus.io](Prometheus.io) website, the main features of Prometheus are: 15 | 16 | * a multi-dimensional data model with time series data identified by metric name and key/value pairs 17 | * PromQL, a flexible query language to leverage this dimensionality 18 | * no reliance on distributed storage; single server nodes are autonomous 19 | * time series collection happens via a pull model over HTTP 20 | * pushing time series is supported via an intermediary gateway 21 | * targets are discovered via service discovery or static configuration 22 | * multiple modes of graphing and dashboarding support 23 | 24 | For a more detailed introduction to Prometheus, the [introduction on the Prometheus documentation website](https://prometheus.io/docs/introduction) is excellent. 25 | 26 | The rest of this chapter details how to deploy Prometheus to Kubernetes. 27 | -------------------------------------------------------------------------------- /content/prometheus/what-is-prometheus/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/content/prometheus/what-is-prometheus/images/logo.png -------------------------------------------------------------------------------- /layouts/partials/custom-footer.html: -------------------------------------------------------------------------------- 1 | 11 | -------------------------------------------------------------------------------- /layouts/partials/logo.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /layouts/partials/menu-footer.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | Download 4 | 5 | 6 | Star 7 | 8 | 9 | Fork 10 | 11 |

Built with from Grav and Hugo

12 |
13 | 14 | -------------------------------------------------------------------------------- /netlify.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | publish = "public" 3 | command = "hugo --gc --minify" 4 | 5 | [context.production.environment] 6 | HUGO_VERSION = "0.84.3" 7 | HUGO_ENV = "production" 8 | HUGO_ENABLEGITINFO = "true" 9 | 10 | [context.split1] 11 | command = "hugo --gc --minify --enableGitInfo" 12 | 13 | [context.split1.environment] 14 | HUGO_VERSION = "0.84.3" 15 | HUGO_ENV = "production" 16 | 17 | [context.deploy-preview] 18 | command = "hugo --gc --minify --buildFuture -b $DEPLOY_PRIME_URL" 19 | 20 | [context.deploy-preview.environment] 21 | HUGO_VERSION = "0.84.3" 22 | 23 | [context.branch-deploy] 24 | command = "hugo --gc --minify -b $DEPLOY_PRIME_URL" 25 | 26 | [context.branch-deploy.environment] 27 | HUGO_VERSION = "0.84.3" 28 | 29 | [context.next.environment] 30 | HUGO_ENABLEGITINFO = "true" -------------------------------------------------------------------------------- /static/css/theme-mine.css: -------------------------------------------------------------------------------- 1 | 2 | :root{ 3 | 4 | --MAIN-TEXT-color:#323232; /* Color of text by default */ 5 | --MAIN-TITLES-TEXT-color: #5e5e5e; /* Color of titles h2-h3-h4-h5 */ 6 | --MAIN-LINK-color:#1C90F3; /* Color of links */ 7 | --MAIN-LINK-HOVER-color:#167ad0; /* Color of hovered links */ 8 | --MAIN-ANCHOR-color: #1C90F3; /* color of anchors on titles */ 9 | 10 | --MENU-HEADER-BG-color:#1C90F3; /* Background color of menu header */ 11 | --MENU-HEADER-BORDER-color:#33a1ff; /*Color of menu header border */ 12 | 13 | --MENU-SEARCH-BG-color:#167ad0; /* Search field background color (by default borders + icons) */ 14 | --MENU-SEARCH-BOX-color: #33a1ff; /* Override search field border color */ 15 | --MENU-SEARCH-BOX-ICONS-color: #a1d2fd; /* Override search field icons color */ 16 | 17 | --MENU-SECTIONS-ACTIVE-BG-color:#20272b; /* Background color of the active section and its childs */ 18 | --MENU-SECTIONS-BG-color:#252c31; /* Background color of other sections */ 19 | --MENU-SECTIONS-LINK-color: #ccc; /* Color of links in menu */ 20 | --MENU-SECTIONS-LINK-HOVER-color: #e6e6e6; /* Color of links in menu, when hovered */ 21 | --MENU-SECTION-ACTIVE-CATEGORY-color: #777; /* Color of active category text */ 22 | --MENU-SECTION-ACTIVE-CATEGORY-BG-color: #fff; /* Color of background for the active category (only) */ 23 | 24 | --MENU-VISITED-color: #33a1ff; /* Color of 'page visited' icons in menu */ 25 | --MENU-SECTION-HR-color: #20272b; /* Color of
separator in menu */ 26 | 27 | } 28 | 29 | body { 30 | color: var(--MAIN-TEXT-color) !important; 31 | } 32 | 33 | textarea:focus, input[type="email"]:focus, input[type="number"]:focus, input[type="password"]:focus, input[type="search"]:focus, input[type="tel"]:focus, input[type="text"]:focus, input[type="url"]:focus, input[type="color"]:focus, input[type="date"]:focus, input[type="datetime"]:focus, input[type="datetime-local"]:focus, input[type="month"]:focus, input[type="time"]:focus, input[type="week"]:focus, select[multiple=multiple]:focus { 34 | border-color: none; 35 | box-shadow: none; 36 | } 37 | 38 | h2, h3, h4, h5 { 39 | color: var(--MAIN-TITLES-TEXT-color) !important; 40 | } 41 | 42 | a { 43 | color: var(--MAIN-LINK-color); 44 | } 45 | 46 | .anchor { 47 | color: var(--MAIN-ANCHOR-color); 48 | } 49 | 50 | a:hover { 51 | color: var(--MAIN-LINK-HOVER-color); 52 | } 53 | 54 | #sidebar ul li.visited > a .read-icon { 55 | color: var(--MENU-VISITED-color); 56 | } 57 | 58 | #body a.highlight:after { 59 | display: block; 60 | content: ""; 61 | height: 1px; 62 | width: 0%; 63 | -webkit-transition: width 0.5s ease; 64 | -moz-transition: width 0.5s ease; 65 | -ms-transition: width 0.5s ease; 66 | transition: width 0.5s ease; 67 | background-color: var(--MAIN-LINK-HOVER-color); 68 | } 69 | #sidebar { 70 | background-color: var(--MENU-SECTIONS-BG-color); 71 | } 72 | #sidebar #header-wrapper { 73 | background: var(--MENU-HEADER-BG-color); 74 | color: var(--MENU-SEARCH-BOX-color); 75 | border-color: var(--MENU-HEADER-BORDER-color); 76 | } 77 | #sidebar .searchbox { 78 | border-color: var(--MENU-SEARCH-BOX-color); 79 | background: var(--MENU-SEARCH-BG-color); 80 | } 81 | #sidebar ul.topics > li.parent, #sidebar ul.topics > li.active { 82 | background: var(--MENU-SECTIONS-ACTIVE-BG-color); 83 | } 84 | #sidebar .searchbox * { 85 | color: var(--MENU-SEARCH-BOX-ICONS-color); 86 | } 87 | 88 | #sidebar a { 89 | color: var(--MENU-SECTIONS-LINK-color); 90 | } 91 | 92 | #sidebar a:hover { 93 | color: var(--MENU-SECTIONS-LINK-HOVER-color); 94 | } 95 | 96 | #sidebar ul li.active > a { 97 | background: var(--MENU-SECTION-ACTIVE-CATEGORY-BG-color); 98 | color: var(--MENU-SECTION-ACTIVE-CATEGORY-color) !important; 99 | } 100 | 101 | #sidebar hr { 102 | border-color: var(--MENU-SECTION-HR-color); 103 | } 104 | -------------------------------------------------------------------------------- /static/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasriley/observability-for-kubernetes/3f7c3e88a3fa7dfea0efbb340a6fe90a31f4b227/static/images/favicon.png --------------------------------------------------------------------------------