├── .gitignore ├── README.md ├── i ├── grafana │ ├── dashboard_k8s_1.15.json │ └── dashboard_k8s_1.16.json ├── helm │ ├── config │ │ ├── grafana.yaml │ │ ├── loki-stack.yaml │ │ ├── metrics-server.yaml │ │ └── prometheus.yaml │ └── helmfile.yaml └── k8s │ └── deploy.yaml └── run └── grafana.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kubernetes Resource Monitoring by example 2 | 3 | Medium article: https://medium.com/@wuestkamp/k8s-monitor-pod-cpu-and-memory-usage-with-prometheus-28eec6d84729?source=friends_link&sk=b498011bceb730596ee93d56869a2f5c 4 | 5 | 6 | ## NOTICE K8s > 1.16 7 | If you’re using Kubernetes >=1.16 you’ll have to use **pod** instead of **pod_name** and **container** instead of **container_name**. 8 | 9 | https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.16.md#removed-metrics 10 | 11 | 12 | ## run yourself 13 | 14 | ### You need a cluster, like with Gcloud: 15 | ``` 16 | gcloud container clusters create resources --num-nodes 3 --zone europe-west3-b --machine-type n1-standard-2 --node-version=1.15 --cluster-version=1.15 17 | ``` 18 | 19 | ### Install helm repos: 20 | Helmfile is used for managing helm repos. 21 | ``` 22 | kubectl create ns grafana 23 | kubectl create ns prometheus 24 | 25 | # if you need to install metrics-server create the namespace and uncomment code in i/helm/helmfile.yaml 26 | #kubectl create ns metrics-server 27 | 28 | cd i/helm 29 | helmfile diff 30 | helmfile sync 31 | ``` 32 | 33 | ### Install k8s test app: 34 | ``` 35 | kubectl apply -f i/k8s 36 | ``` 37 | 38 | 39 | ## monitor 40 | 41 | ### Run Grafana 42 | ``` 43 | ./run/grafana.sh 44 | ``` 45 | Then head to http://localhost:3000/login 46 | 47 | User: admin 48 | 49 | Password: printed in the terminal output. 50 | 51 | #### install Dashboard 52 | Go to http://localhost:3000/dashboard/import and import the dashboard from `i/grafana/dashboard_k8s_1.15.json` or `i/grafana/dashboard_k8s_1.16.json`. 53 | 54 | There are different dashboards because metric names have changes from k8s 1.16 on. 55 | 56 | 57 | ### Use resources 58 | ``` 59 | kubectl run curl --image=curlimages/curl --rm --restart=Never -it sh 60 | 61 | curl --data "millicores=400&durationSec=300" compute:8080/ConsumeCPU 62 | curl --data "megabytes=400&durationSec=300" compute:8080/ConsumeMem 63 | ``` 64 | 65 | 66 | # Prometheus queries K8s till 1.15 67 | 68 | ## cpu 69 | ``` 70 | # container usage 71 | rate(container_cpu_usage_seconds_total{pod=~"compute-.*", image!="", container_name!="POD"}[5m]) 72 | 73 | # container requests 74 | avg(kube_pod_container_resource_requests_cpu_cores{pod=~"compute-.*"}) 75 | 76 | # container limits 77 | avg(kube_pod_container_resource_limits_cpu_cores{pod=~"compute-.*"}) 78 | 79 | # throttling 80 | rate(container_cpu_cfs_throttled_seconds_total{pod=~"compute-.*", container_name!="POD", image!=""}[5m]) 81 | ``` 82 | 83 | ## memory 84 | ``` 85 | # container usage 86 | container_memory_working_set_bytes{pod_name=~"compute-.*", image!="", container_name!="POD"} 87 | 88 | # container requests 89 | avg(kube_pod_container_resource_requests_memory_bytes{pod=~"compute-.*"}) 90 | 91 | # container limits 92 | avg(kube_pod_container_resource_limits_memory_bytes{pod=~"compute-.*"}) 93 | ``` 94 | 95 | # Prometheus queries K8s from 1.16 96 | 97 | ## cpu 98 | ``` 99 | # container usage 100 | rate(container_cpu_usage_seconds_total{pod=~"compute-.*", image!="", container!="POD"}[5m]) 101 | 102 | # container requests 103 | avg(kube_pod_container_resource_requests_cpu_cores{pod=~"compute-.*"}) 104 | 105 | # container limits 106 | avg(kube_pod_container_resource_limits_cpu_cores{pod=~"compute-.*"}) 107 | 108 | # throttling 109 | rate(container_cpu_cfs_throttled_seconds_total{pod=~"compute-.*", container!="POD", image!=""}[5m]) 110 | ``` 111 | 112 | ## memory 113 | ``` 114 | # container usage 115 | container_memory_working_set_bytes{pod=~"compute-.*", image!="", container!="POD"} 116 | 117 | # container requests 118 | avg(kube_pod_container_resource_requests_memory_bytes{pod=~"compute-.*"}) 119 | 120 | # container limits 121 | avg(kube_pod_container_resource_limits_memory_bytes{pod=~"compute-.*"}) 122 | ``` 123 | -------------------------------------------------------------------------------- /i/grafana/dashboard_k8s_1.15.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_PROMETHEUS", 5 | "label": "Prometheus", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "6.5.2" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "prometheus", 28 | "name": "Prometheus", 29 | "version": "1.0.0" 30 | } 31 | ], 32 | "annotations": { 33 | "list": [ 34 | { 35 | "builtIn": 1, 36 | "datasource": "-- Grafana --", 37 | "enable": true, 38 | "hide": true, 39 | "iconColor": "rgba(0, 211, 255, 1)", 40 | "name": "Annotations & Alerts", 41 | "type": "dashboard" 42 | } 43 | ] 44 | }, 45 | "editable": true, 46 | "gnetId": null, 47 | "graphTooltip": 0, 48 | "id": null, 49 | "links": [], 50 | "panels": [ 51 | { 52 | "aliasColors": { 53 | "container limits": "yellow", 54 | "container requests": "green", 55 | "container usage": "blue", 56 | "throttling": "red", 57 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "blue", 58 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-red", 59 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-blue", 60 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "red" 61 | }, 62 | "bars": false, 63 | "dashLength": 10, 64 | "dashes": false, 65 | "datasource": "${DS_PROMETHEUS}", 66 | "fill": 0, 67 | "fillGradient": 0, 68 | "gridPos": { 69 | "h": 9, 70 | "w": 24, 71 | "x": 0, 72 | "y": 0 73 | }, 74 | "hiddenSeries": false, 75 | "id": 2, 76 | "legend": { 77 | "avg": false, 78 | "current": false, 79 | "max": false, 80 | "min": false, 81 | "show": true, 82 | "total": false, 83 | "values": false 84 | }, 85 | "lines": true, 86 | "linewidth": 2, 87 | "nullPointMode": "null", 88 | "options": { 89 | "dataLinks": [] 90 | }, 91 | "percentage": false, 92 | "pointradius": 2, 93 | "points": false, 94 | "renderer": "flot", 95 | "seriesOverrides": [], 96 | "spaceLength": 10, 97 | "stack": false, 98 | "steppedLine": false, 99 | "targets": [ 100 | { 101 | "expr": "rate(container_cpu_usage_seconds_total{pod=~\"compute-.*\", image!=\"\", container_name!=\"POD\"}[5m])", 102 | "hide": false, 103 | "legendFormat": "container usage", 104 | "refId": "A" 105 | }, 106 | { 107 | "expr": "avg(kube_pod_container_resource_requests_cpu_cores{pod=~\"compute-.*\"})", 108 | "hide": false, 109 | "legendFormat": "container requests", 110 | "refId": "B" 111 | }, 112 | { 113 | "expr": "avg(kube_pod_container_resource_limits_cpu_cores{pod=~\"compute-.*\"})", 114 | "hide": false, 115 | "legendFormat": "container limits", 116 | "refId": "C" 117 | }, 118 | { 119 | "expr": "rate(container_cpu_cfs_throttled_seconds_total{pod=~\"compute-.*\", container_name!=\"POD\", image!=\"\"}[5m])", 120 | "hide": false, 121 | "legendFormat": "throttling", 122 | "refId": "D" 123 | } 124 | ], 125 | "thresholds": [], 126 | "timeFrom": null, 127 | "timeRegions": [], 128 | "timeShift": null, 129 | "title": "CPU pod", 130 | "tooltip": { 131 | "shared": true, 132 | "sort": 0, 133 | "value_type": "individual" 134 | }, 135 | "type": "graph", 136 | "xaxis": { 137 | "buckets": null, 138 | "mode": "time", 139 | "name": null, 140 | "show": true, 141 | "values": [] 142 | }, 143 | "yaxes": [ 144 | { 145 | "format": "short", 146 | "label": null, 147 | "logBase": 1, 148 | "max": null, 149 | "min": null, 150 | "show": true 151 | }, 152 | { 153 | "format": "short", 154 | "label": null, 155 | "logBase": 1, 156 | "max": null, 157 | "min": null, 158 | "show": true 159 | } 160 | ], 161 | "yaxis": { 162 | "align": false, 163 | "alignLevel": null 164 | } 165 | }, 166 | { 167 | "aliasColors": { 168 | "container limits": "yellow", 169 | "container requests": "green", 170 | "container usage": "blue", 171 | "throttling": "red", 172 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "blue", 173 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-red", 174 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-blue", 175 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "red" 176 | }, 177 | "bars": false, 178 | "dashLength": 10, 179 | "dashes": false, 180 | "datasource": "${DS_PROMETHEUS}", 181 | "fill": 0, 182 | "fillGradient": 0, 183 | "gridPos": { 184 | "h": 9, 185 | "w": 24, 186 | "x": 0, 187 | "y": 9 188 | }, 189 | "hiddenSeries": false, 190 | "id": 3, 191 | "legend": { 192 | "avg": false, 193 | "current": false, 194 | "max": false, 195 | "min": false, 196 | "show": true, 197 | "total": false, 198 | "values": false 199 | }, 200 | "lines": true, 201 | "linewidth": 2, 202 | "nullPointMode": "null", 203 | "options": { 204 | "dataLinks": [] 205 | }, 206 | "percentage": false, 207 | "pointradius": 2, 208 | "points": false, 209 | "renderer": "flot", 210 | "seriesOverrides": [], 211 | "spaceLength": 10, 212 | "stack": false, 213 | "steppedLine": false, 214 | "targets": [ 215 | { 216 | "expr": "container_memory_working_set_bytes{pod_name=~\"compute-.*\", image!=\"\", container_name!=\"POD\"}", 217 | "hide": false, 218 | "legendFormat": "container usage", 219 | "refId": "A" 220 | }, 221 | { 222 | "expr": "avg(kube_pod_container_resource_requests_memory_bytes{pod=~\"compute-.*\"})", 223 | "hide": false, 224 | "legendFormat": "container requests", 225 | "refId": "B" 226 | }, 227 | { 228 | "expr": "avg(kube_pod_container_resource_limits_memory_bytes{pod=~\"compute-.*\"})", 229 | "hide": false, 230 | "legendFormat": "container limits", 231 | "refId": "C" 232 | } 233 | ], 234 | "thresholds": [], 235 | "timeFrom": null, 236 | "timeRegions": [], 237 | "timeShift": null, 238 | "title": "Memory pod", 239 | "tooltip": { 240 | "shared": true, 241 | "sort": 0, 242 | "value_type": "individual" 243 | }, 244 | "type": "graph", 245 | "xaxis": { 246 | "buckets": null, 247 | "mode": "time", 248 | "name": null, 249 | "show": true, 250 | "values": [] 251 | }, 252 | "yaxes": [ 253 | { 254 | "format": "bytes", 255 | "label": null, 256 | "logBase": 1, 257 | "max": null, 258 | "min": null, 259 | "show": true 260 | }, 261 | { 262 | "format": "short", 263 | "label": null, 264 | "logBase": 1, 265 | "max": null, 266 | "min": null, 267 | "show": true 268 | } 269 | ], 270 | "yaxis": { 271 | "align": false, 272 | "alignLevel": null 273 | } 274 | } 275 | ], 276 | "refresh": "5s", 277 | "schemaVersion": 21, 278 | "style": "dark", 279 | "tags": [], 280 | "templating": { 281 | "list": [] 282 | }, 283 | "time": { 284 | "from": "now-5m", 285 | "to": "now" 286 | }, 287 | "timepicker": { 288 | "refresh_intervals": [ 289 | "5s", 290 | "10s", 291 | "30s", 292 | "1m", 293 | "5m", 294 | "15m", 295 | "30m", 296 | "1h", 297 | "2h", 298 | "1d" 299 | ] 300 | }, 301 | "timezone": "", 302 | "title": "CPU Mem", 303 | "uid": "2cXq0H8Zz", 304 | "version": 4 305 | } -------------------------------------------------------------------------------- /i/grafana/dashboard_k8s_1.16.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_PROMETHEUS", 5 | "label": "Prometheus", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "6.5.2" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "prometheus", 28 | "name": "Prometheus", 29 | "version": "1.0.0" 30 | } 31 | ], 32 | "annotations": { 33 | "list": [ 34 | { 35 | "builtIn": 1, 36 | "datasource": "-- Grafana --", 37 | "enable": true, 38 | "hide": true, 39 | "iconColor": "rgba(0, 211, 255, 1)", 40 | "name": "Annotations & Alerts", 41 | "type": "dashboard" 42 | } 43 | ] 44 | }, 45 | "editable": true, 46 | "gnetId": null, 47 | "graphTooltip": 0, 48 | "id": null, 49 | "links": [], 50 | "panels": [ 51 | { 52 | "aliasColors": { 53 | "container limits": "yellow", 54 | "container requests": "green", 55 | "container usage": "blue", 56 | "throttling": "red", 57 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "blue", 58 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-red", 59 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-blue", 60 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "red" 61 | }, 62 | "bars": false, 63 | "dashLength": 10, 64 | "dashes": false, 65 | "datasource": "${DS_PROMETHEUS}", 66 | "fill": 0, 67 | "fillGradient": 0, 68 | "gridPos": { 69 | "h": 9, 70 | "w": 24, 71 | "x": 0, 72 | "y": 0 73 | }, 74 | "hiddenSeries": false, 75 | "id": 2, 76 | "legend": { 77 | "avg": false, 78 | "current": false, 79 | "max": false, 80 | "min": false, 81 | "show": true, 82 | "total": false, 83 | "values": false 84 | }, 85 | "lines": true, 86 | "linewidth": 2, 87 | "nullPointMode": "null", 88 | "options": { 89 | "dataLinks": [] 90 | }, 91 | "percentage": false, 92 | "pointradius": 2, 93 | "points": false, 94 | "renderer": "flot", 95 | "seriesOverrides": [], 96 | "spaceLength": 10, 97 | "stack": false, 98 | "steppedLine": false, 99 | "targets": [ 100 | { 101 | "expr": "rate(container_cpu_usage_seconds_total{pod=~\"compute-.*\", image!=\"\", container!=\"POD\"}[5m])", 102 | "hide": false, 103 | "legendFormat": "container usage", 104 | "refId": "A" 105 | }, 106 | { 107 | "expr": "avg(kube_pod_container_resource_requests_cpu_cores{pod=~\"compute-.*\"})", 108 | "hide": false, 109 | "legendFormat": "container requests", 110 | "refId": "B" 111 | }, 112 | { 113 | "expr": "avg(kube_pod_container_resource_limits_cpu_cores{pod=~\"compute-.*\"})", 114 | "hide": false, 115 | "legendFormat": "container limits", 116 | "refId": "C" 117 | }, 118 | { 119 | "expr": "rate(container_cpu_cfs_throttled_seconds_total{pod=~\"compute-.*\", container!=\"POD\", image!=\"\"}[5m])", 120 | "hide": false, 121 | "legendFormat": "throttling", 122 | "refId": "D" 123 | } 124 | ], 125 | "thresholds": [], 126 | "timeFrom": null, 127 | "timeRegions": [], 128 | "timeShift": null, 129 | "title": "CPU pod", 130 | "tooltip": { 131 | "shared": true, 132 | "sort": 0, 133 | "value_type": "individual" 134 | }, 135 | "type": "graph", 136 | "xaxis": { 137 | "buckets": null, 138 | "mode": "time", 139 | "name": null, 140 | "show": true, 141 | "values": [] 142 | }, 143 | "yaxes": [ 144 | { 145 | "format": "short", 146 | "label": null, 147 | "logBase": 1, 148 | "max": null, 149 | "min": null, 150 | "show": true 151 | }, 152 | { 153 | "format": "short", 154 | "label": null, 155 | "logBase": 1, 156 | "max": null, 157 | "min": null, 158 | "show": true 159 | } 160 | ], 161 | "yaxis": { 162 | "align": false, 163 | "alignLevel": null 164 | } 165 | }, 166 | { 167 | "aliasColors": { 168 | "container limits": "yellow", 169 | "container requests": "green", 170 | "container usage": "blue", 171 | "throttling": "red", 172 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "blue", 173 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",container=\"compute\",container_name=\"compute\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61/04f0310038f5341c88206af01bbff9eb604365a5448432e3eab6ac2bbd3a96e1\",image=\"vish/stress@sha256:b6456a3df6db5e063e1783153627947484a3db387be99e49708c70a9a15e7177\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",name=\"k8s_compute_compute-67b4c4c44f-2c8gj_default_b65d5b3b-b991-434e-8e80-2815bac17b61_0\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-red", 174 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",cpu=\"total\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "super-light-blue", 175 | "{beta_kubernetes_io_arch=\"amd64\",beta_kubernetes_io_fluentd_ds_ready=\"true\",beta_kubernetes_io_instance_type=\"n1-standard-2\",beta_kubernetes_io_os=\"linux\",cloud_google_com_gke_nodepool=\"default-pool\",cloud_google_com_gke_os_distribution=\"cos\",failure_domain_beta_kubernetes_io_region=\"europe-west3\",failure_domain_beta_kubernetes_io_zone=\"europe-west3-b\",id=\"/kubepods/burstable/podb65d5b3b-b991-434e-8e80-2815bac17b61\",instance=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",job=\"kubernetes-cadvisor\",kubernetes_io_arch=\"amd64\",kubernetes_io_hostname=\"gke-resources-test-default-pool-9c0bbcbe-4lgj\",kubernetes_io_os=\"linux\",namespace=\"default\",pod=\"compute-67b4c4c44f-2c8gj\",pod_name=\"compute-67b4c4c44f-2c8gj\"}": "red" 176 | }, 177 | "bars": false, 178 | "dashLength": 10, 179 | "dashes": false, 180 | "datasource": "${DS_PROMETHEUS}", 181 | "fill": 0, 182 | "fillGradient": 0, 183 | "gridPos": { 184 | "h": 9, 185 | "w": 24, 186 | "x": 0, 187 | "y": 9 188 | }, 189 | "hiddenSeries": false, 190 | "id": 3, 191 | "legend": { 192 | "avg": false, 193 | "current": false, 194 | "max": false, 195 | "min": false, 196 | "show": true, 197 | "total": false, 198 | "values": false 199 | }, 200 | "lines": true, 201 | "linewidth": 2, 202 | "nullPointMode": "null", 203 | "options": { 204 | "dataLinks": [] 205 | }, 206 | "percentage": false, 207 | "pointradius": 2, 208 | "points": false, 209 | "renderer": "flot", 210 | "seriesOverrides": [], 211 | "spaceLength": 10, 212 | "stack": false, 213 | "steppedLine": false, 214 | "targets": [ 215 | { 216 | "expr": "container_memory_working_set_bytes{pod=~\"compute-.*\", image!=\"\", container!=\"POD\"}", 217 | "hide": false, 218 | "legendFormat": "container usage", 219 | "refId": "A" 220 | }, 221 | { 222 | "expr": "avg(kube_pod_container_resource_requests_memory_bytes{pod=~\"compute-.*\"})", 223 | "hide": false, 224 | "legendFormat": "container requests", 225 | "refId": "B" 226 | }, 227 | { 228 | "expr": "avg(kube_pod_container_resource_limits_memory_bytes{pod=~\"compute-.*\"})", 229 | "hide": false, 230 | "legendFormat": "container limits", 231 | "refId": "C" 232 | } 233 | ], 234 | "thresholds": [], 235 | "timeFrom": null, 236 | "timeRegions": [], 237 | "timeShift": null, 238 | "title": "Memory pod", 239 | "tooltip": { 240 | "shared": true, 241 | "sort": 0, 242 | "value_type": "individual" 243 | }, 244 | "type": "graph", 245 | "xaxis": { 246 | "buckets": null, 247 | "mode": "time", 248 | "name": null, 249 | "show": true, 250 | "values": [] 251 | }, 252 | "yaxes": [ 253 | { 254 | "format": "bytes", 255 | "label": null, 256 | "logBase": 1, 257 | "max": null, 258 | "min": null, 259 | "show": true 260 | }, 261 | { 262 | "format": "short", 263 | "label": null, 264 | "logBase": 1, 265 | "max": null, 266 | "min": null, 267 | "show": true 268 | } 269 | ], 270 | "yaxis": { 271 | "align": false, 272 | "alignLevel": null 273 | } 274 | } 275 | ], 276 | "refresh": "5s", 277 | "schemaVersion": 21, 278 | "style": "dark", 279 | "tags": [], 280 | "templating": { 281 | "list": [] 282 | }, 283 | "time": { 284 | "from": "now-5m", 285 | "to": "now" 286 | }, 287 | "timepicker": { 288 | "refresh_intervals": [ 289 | "5s", 290 | "10s", 291 | "30s", 292 | "1m", 293 | "5m", 294 | "15m", 295 | "30m", 296 | "1h", 297 | "2h", 298 | "1d" 299 | ] 300 | }, 301 | "timezone": "", 302 | "title": "CPU Mem", 303 | "uid": "2cXq0H8Zz", 304 | "version": 2 305 | } -------------------------------------------------------------------------------- /i/helm/config/grafana.yaml: -------------------------------------------------------------------------------- 1 | persistence: 2 | enabled: true 3 | persistence: 4 | size: 1Gi 5 | persistence: 6 | storageClassName: retain 7 | resources: 8 | requests: 9 | memory: 100Mi 10 | cpu: 100m 11 | limits: 12 | memory: 300Mi 13 | cpu: 200m 14 | plugins: 15 | - grafana-image-renderer 16 | datasources: 17 | datasources.yaml: 18 | apiVersion: 1 19 | datasources: 20 | - name: Prometheus 21 | type: prometheus 22 | url: http://prometheus-server.prometheus 23 | access: proxy 24 | isDefault: true 25 | -------------------------------------------------------------------------------- /i/helm/config/loki-stack.yaml: -------------------------------------------------------------------------------- 1 | loki: 2 | resources: 3 | limits: 4 | memory: 300Mi 5 | cpu: 200m 6 | -------------------------------------------------------------------------------- /i/helm/config/metrics-server.yaml: -------------------------------------------------------------------------------- 1 | args: 2 | - --kubelet-preferred-address-types=InternalIP 3 | -------------------------------------------------------------------------------- /i/helm/config/prometheus.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | resources: 3 | requests: 4 | memory: 100Mi 5 | cpu: 100m 6 | limits: 7 | memory: 800Mi 8 | cpu: 500m 9 | 10 | global: 11 | scrape_interval: 10s 12 | 13 | serverFiles: 14 | prometheus.yml: 15 | scrape_configs: 16 | # scrape config for API servers 17 | - job_name: 'kubernetes-apiservers' 18 | kubernetes_sd_configs: 19 | - role: endpoints 20 | namespaces: 21 | names: 22 | - default 23 | scheme: https 24 | tls_config: 25 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 26 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 27 | relabel_configs: 28 | - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 29 | action: keep 30 | regex: kubernetes;https 31 | 32 | # scrape config for nodes (kubelet) 33 | - job_name: 'kubernetes-nodes' 34 | scheme: https 35 | tls_config: 36 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 37 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 38 | kubernetes_sd_configs: 39 | - role: node 40 | relabel_configs: 41 | - action: labelmap 42 | regex: __meta_kubernetes_node_label_(.+) 43 | - target_label: __address__ 44 | replacement: kubernetes.default.svc:443 45 | - source_labels: [__meta_kubernetes_node_name] 46 | regex: (.+) 47 | target_label: __metrics_path__ 48 | replacement: /api/v1/nodes/${1}/proxy/metrics 49 | 50 | # Scrape config for Kubelet cAdvisor. 51 | # 52 | # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics 53 | # (those whose names begin with 'container_') have been removed from the 54 | # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to 55 | # retrieve those metrics. 56 | # 57 | # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor 58 | # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics" 59 | # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with 60 | # the --cadvisor-port=0 Kubelet flag). 61 | # 62 | # This job is not necessary and should be removed in Kubernetes 1.6 and 63 | # earlier versions, or it will cause the metrics to be scraped twice. 64 | - job_name: 'kubernetes-cadvisor' 65 | scheme: https 66 | tls_config: 67 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 68 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 69 | kubernetes_sd_configs: 70 | - role: node 71 | relabel_configs: 72 | - action: labelmap 73 | regex: __meta_kubernetes_node_label_(.+) 74 | - target_label: __address__ 75 | replacement: kubernetes.default.svc:443 76 | - source_labels: [__meta_kubernetes_node_name] 77 | regex: (.+) 78 | target_label: __metrics_path__ 79 | replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 80 | 81 | # scrape config for service endpoints. 82 | - job_name: 'kubernetes-service-endpoints' 83 | kubernetes_sd_configs: 84 | - role: endpoints 85 | relabel_configs: 86 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 87 | action: keep 88 | regex: true 89 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 90 | action: replace 91 | target_label: __scheme__ 92 | regex: (https?) 93 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 94 | action: replace 95 | target_label: __metrics_path__ 96 | regex: (.+) 97 | - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 98 | action: replace 99 | target_label: __address__ 100 | regex: ([^:]+)(?::\d+)?;(\d+) 101 | replacement: $1:$2 102 | - action: labelmap 103 | regex: __meta_kubernetes_service_label_(.+) 104 | - source_labels: [__meta_kubernetes_namespace] 105 | action: replace 106 | target_label: kubernetes_namespace 107 | - source_labels: [__meta_kubernetes_service_name] 108 | action: replace 109 | target_label: kubernetes_name 110 | 111 | - job_name: 'kubernetes-pods' 112 | kubernetes_sd_configs: 113 | - role: pod 114 | relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job. 115 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 116 | action: keep 117 | regex: true 118 | - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status] 119 | action: drop 120 | regex: (.+) 121 | - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls] 122 | action: drop 123 | regex: (true) 124 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 125 | action: replace 126 | target_label: __metrics_path__ 127 | regex: (.+) 128 | - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 129 | action: replace 130 | regex: ([^:]+)(?::\d+)?;(\d+) 131 | replacement: $1:$2 132 | target_label: __address__ 133 | - action: labelmap 134 | regex: __meta_kubernetes_pod_label_(.+) 135 | - source_labels: [__meta_kubernetes_namespace] 136 | action: replace 137 | target_label: namespace 138 | - source_labels: [__meta_kubernetes_pod_name] 139 | action: replace 140 | target_label: pod_name 141 | - job_name: 'kubernetes-pods-istio-secure' 142 | scheme: https 143 | tls_config: 144 | ca_file: /etc/istio-certs/root-cert.pem 145 | cert_file: /etc/istio-certs/cert-chain.pem 146 | key_file: /etc/istio-certs/key.pem 147 | insecure_skip_verify: true # prometheus does not support secure naming. 148 | kubernetes_sd_configs: 149 | - role: pod 150 | relabel_configs: 151 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 152 | action: keep 153 | regex: true 154 | # sidecar status annotation is added by sidecar injector and 155 | # istio_workload_mtls_ability can be specifically placed on a pod to indicate its ability to receive mtls traffic. 156 | - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status, __meta_kubernetes_pod_annotation_istio_mtls] 157 | action: keep 158 | regex: (([^;]+);([^;]*))|(([^;]*);(true)) 159 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 160 | action: replace 161 | target_label: __metrics_path__ 162 | regex: (.+) 163 | - source_labels: [__address__] # Only keep address that is host:port 164 | action: keep # otherwise an extra target with ':443' is added for https scheme 165 | regex: ([^:]+):(\d+) 166 | - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 167 | action: replace 168 | regex: ([^:]+)(?::\d+)?;(\d+) 169 | replacement: $1:$2 170 | target_label: __address__ 171 | - action: labelmap 172 | regex: __meta_kubernetes_pod_label_(.+) 173 | - source_labels: [__meta_kubernetes_namespace] 174 | action: replace 175 | target_label: namespace 176 | - source_labels: [__meta_kubernetes_pod_name] 177 | action: replace 178 | target_label: pod_name 179 | -------------------------------------------------------------------------------- /i/helm/helmfile.yaml: -------------------------------------------------------------------------------- 1 | helmDefaults: 2 | tillerless: true 3 | atomic: false 4 | verify: false 5 | wait: true 6 | timeout: 1200 7 | 8 | repositories: 9 | - name: stable 10 | url: https://kubernetes-charts.storage.googleapis.com 11 | - name: loki 12 | url: https://grafana.github.io/loki/charts 13 | 14 | releases: 15 | - name: grafana 16 | chart: stable/grafana 17 | version: 4.3.2 18 | namespace: grafana 19 | values: 20 | - "./config/grafana.yaml" 21 | 22 | - name: prometheus 23 | chart: stable/prometheus 24 | version: 10.3.1 25 | namespace: prometheus 26 | values: 27 | - "./config/prometheus.yaml" 28 | 29 | #- name: metrics-server 30 | # chart: stable/metrics-server 31 | # version: 2.9.0 32 | # namespace: metrics-server 33 | # values: 34 | # - "./config/metrics-server.yaml" 35 | -------------------------------------------------------------------------------- /i/k8s/deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | run: compute 7 | name: compute 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | run: compute 13 | strategy: {} 14 | template: 15 | metadata: 16 | labels: 17 | run: compute 18 | spec: 19 | containers: 20 | - name: compute 21 | image: gcr.io/kubernetes-e2e-test-images/resource-consumer:1.5 22 | resources: 23 | limits: 24 | cpu: "700m" 25 | memory: "500Mi" 26 | requests: 27 | cpu: "500m" 28 | memory: "250Mi" 29 | --- 30 | apiVersion: v1 31 | kind: Service 32 | metadata: 33 | creationTimestamp: null 34 | labels: 35 | run: compute 36 | name: compute 37 | spec: 38 | ports: 39 | - port: 8080 40 | protocol: TCP 41 | targetPort: 8080 42 | selector: 43 | run: compute 44 | -------------------------------------------------------------------------------- /run/grafana.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "Password: " && kubectl get secret -n grafana grafana -o jsonpath="{.data.admin-password}" | base64 --decode 3 | echo 4 | kubectl port-forward -n grafana service/grafana 3000:80 5 | --------------------------------------------------------------------------------