├── README.md ├── prometheus-server ├── README.md ├── alertmanager │ ├── alertmanager-config-configmap.yaml │ ├── alertmanager-dep.yaml │ └── alertmanager-svc.yaml ├── prometheus-server-namespace.yaml └── prometheus-server │ ├── prometheus-server-config-configmap.yaml │ ├── prometheus-server-dep.yaml │ ├── prometheus-server-rbac.yaml │ ├── prometheus-server-rule-configmap.yaml │ └── prometheus-server-svc.yaml └── prometheus ├── README.md ├── kube-state-metrics ├── kube-state-metrics-dep.yaml ├── kube-state-metrics-rbac.yaml └── kube-state-metrics-svc.yaml ├── prometheus-namespace.yaml └── prometheus ├── prometheus-config-configmap.yaml ├── prometheus-dep.yaml ├── prometheus-rbac.yaml └── prometheus-svc.yaml /README.md: -------------------------------------------------------------------------------- 1 | # prometheus-monitor-kubernetes 2 | 如何使用prometheus来监控kubernetes集群 3 | 4 | # 架构 5 | ![架构图](http://static.dragonest.com/share/img/prometheus-monitor.png) 6 | 7 | # prometheus部署 8 | 请参考prometheus文件夹,根据README步骤apply 9 | 10 | # prometheus-server部署 11 | 请参考prometheus-server文件夹,根据README步骤apply 12 | 13 | # 参考文章 14 | [Prometheus监控k8s(一)——监控框架调研](https://www.servicemesher.com/blog/prometheus-monitor-k8s-1/) 15 | 16 | [Prometheus监控k8s(二)——监控部署](https://www.servicemesher.com/blog/prometheus-monitor-k8s-2/) 17 | 18 | [Prometheus监控k8s(三)——业务指标采集](https://www.servicemesher.com/blog/prometheus-monitor-k8s-3/) 19 | -------------------------------------------------------------------------------- /prometheus-server/README.md: -------------------------------------------------------------------------------- 1 | ## Prometheus + Kube-state-metrics + Grafana监控kubernetes集群说明 2 | 3 | ### 创建namespace 4 | * 创建namespace:kubectl apply -f ./prometheus-server-namespace.yaml 5 | 6 | ### 部署alertmanager 7 | 需要修改alertmanager/alertmanager-config-configmap.yaml,定义告警方式 8 | * 创建alertmanager-configmap:kubectl apply -f ./alertmanager/alertmanager-config-configmap.yaml 9 | * 创建alertmanager-dep: kubectl apply -f ./alertmanager/alertmanager-dep.yaml 10 | * 创建alertmanager-svc: kubectl apply -f ./alertmanager/alertmanager-svc.yaml 11 | 12 | ### 部署prometheus-server 13 | 需要修改prometheus-server/prometheus-server-config-configmap.yaml的scrape_configs,定义好prometheus的数据源 14 | * 创建rbac权限:kubectl apply -f ./prometheus-server/prometheus-server-rbac.yaml 15 | * 创建configmap作为prometheus-server配置文件:kubectl apply -f ./prometheus-server/prometheus-server-config-configmap.yaml 16 | * 创建config,map作为prometheus-server的rule文件:kubectl apply -f ./prometheus-server/prometheus-server-rule-configmap.yaml 17 | * 部署prometheus-server-dep:kubectl apply -f ./prometheus-server/prometheus-server-dep.yaml 18 | * 部署prometheus-server-svc:kubectl apply -f ./prometheus-server/prometheus-server-svc.yaml 19 | -------------------------------------------------------------------------------- /prometheus-server/alertmanager/alertmanager-config-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: alertmanager-config 5 | namespace: prometheus-server 6 | data: 7 | config.yml: | 8 | global: 9 | resolve_timeout: 5m 10 | route: 11 | receiver: send_msg_warning 12 | group_wait: 30s 13 | group_interval: 5m 14 | repeat_interval: 4h 15 | group_by: ['alertname', 'k8scluster', 'node', 'container', 'exported_job', 'daemonset'] 16 | routes: 17 | - receiver: send_msg_warning 18 | group_wait: 60s 19 | match: 20 | severity: warning 21 | 22 | receivers: 23 | - name: send_msg_warning 24 | webhook_configs: 25 | - url: 'http://msg.x.com/xxx/' 26 | send_resolved: true 27 | http_config: 28 | bearer_token: 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' -------------------------------------------------------------------------------- /prometheus-server/alertmanager/alertmanager-dep.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | name: alertmanager-dep 5 | namespace: prometheus-server 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: alertmanager-dep 11 | template: 12 | metadata: 13 | labels: 14 | app: alertmanager-dep 15 | spec: 16 | containers: 17 | - image: prom/alertmanager:v0.15.2 18 | name: alertmanager 19 | args: 20 | - "--config.file=/etc/alertmanager/config.yml" 21 | - "--storage.path=/alertmanager" 22 | - "--data.retention=720h" 23 | ports: 24 | - containerPort: 9093 25 | protocol: TCP 26 | volumeMounts: 27 | - mountPath: "/alertmanager" 28 | name: data 29 | - mountPath: "/etc/alertmanager" 30 | name: config-volume 31 | resources: 32 | requests: 33 | cpu: 100m 34 | memory: 100Mi 35 | limits: 36 | cpu: 500m 37 | memory: 2500Mi 38 | volumes: 39 | - name: data 40 | emptyDir: {} 41 | - name: config-volume 42 | configMap: 43 | name: alertmanager-config -------------------------------------------------------------------------------- /prometheus-server/alertmanager/alertmanager-svc.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: alertmanager-svc 5 | namespace: prometheus-server 6 | spec: 7 | type: LoadBalancer 8 | ports: 9 | - port: 80 10 | targetPort: 9093 11 | selector: 12 | app: alertmanager-dep -------------------------------------------------------------------------------- /prometheus-server/prometheus-server-namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: prometheus-server -------------------------------------------------------------------------------- /prometheus-server/prometheus-server/prometheus-server-config-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: prometheus-server-config 5 | namespace: prometheus-server 6 | data: 7 | prometheus.yml: | 8 | global: 9 | scrape_interval: 30s 10 | scrape_timeout: 30s 11 | evaluation_interval: 30s 12 | 13 | alerting: 14 | alertmanagers: 15 | - static_configs: 16 | - targets: 17 | - alertmanager-svc.prometheus-server.svc.cluster.local:80 18 | scheme: http 19 | timeout: 10s 20 | 21 | rule_files: 22 | - "/etc/prometheus/rule/rule.yml" 23 | 24 | scrape_configs: 25 | - job_name: federate-xxxx 26 | honor_labels: true 27 | params: 28 | match[]: 29 | - '{job=~"kubernetes-.*"}' 30 | scrape_interval: 30s 31 | scrape_timeout: 30s 32 | metrics_path: /federate 33 | scheme: http 34 | static_configs: 35 | - targets: 36 | - x.x.x.x:30090 37 | labels: 38 | k8scluster: xxxx-k8s 39 | - job_name: federate-yyyy 40 | honor_labels: true 41 | params: 42 | match[]: 43 | - '{job=~"kubernetes-.*"}' 44 | scrape_interval: 30s 45 | scrape_timeout: 30s 46 | metrics_path: /federate 47 | scheme: http 48 | static_configs: 49 | - targets: 50 | - y.y.y.y:30090 51 | labels: 52 | k8scluster: yyyy-k8s 53 | -------------------------------------------------------------------------------- /prometheus-server/prometheus-server/prometheus-server-dep.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | name: prometheus-server-dep 5 | namespace: prometheus-server 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: prometheus-server-dep 11 | template: 12 | metadata: 13 | labels: 14 | app: prometheus-server-dep 15 | spec: 16 | containers: 17 | - image: prom/prometheus:v2.3.2 18 | name: prometheus-server 19 | command: 20 | - "/bin/prometheus" 21 | args: 22 | - "--config.file=/etc/prometheus/config/prometheus.yml" 23 | - "--storage.tsdb.path=/prometheus" 24 | - "--web.console.libraries=/usr/share/prometheus/console_libraries" 25 | - "--web.console.templates=/usr/share/prometheus" 26 | - "--storage.tsdb.retention=30d" 27 | - "--web.enable-lifecycle" 28 | ports: 29 | - containerPort: 9090 30 | protocol: TCP 31 | volumeMounts: 32 | - name: "data" 33 | mountPath: "/prometheus" 34 | - name: "server-config-volume" 35 | mountPath: "/etc/prometheus/config" 36 | - name: "rule-config-volume" 37 | mountPath: "/etc/prometheus/rule" 38 | resources: 39 | requests: 40 | cpu: 100m 41 | memory: 100Mi 42 | limits: 43 | cpu: 500m 44 | memory: 2500Mi 45 | serviceAccountName: prometheus-server 46 | volumes: 47 | - name: data 48 | emptyDir: {} 49 | - name: server-config-volume 50 | configMap: 51 | name: prometheus-server-config 52 | - name: rule-config-volume 53 | configMap: 54 | name: prometheus-server-rule-config -------------------------------------------------------------------------------- /prometheus-server/prometheus-server/prometheus-server-rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus-server 5 | rules: 6 | - apiGroups: [""] 7 | resources: 8 | - nodes 9 | - nodes/proxy 10 | - services 11 | - endpoints 12 | - pods 13 | verbs: ["get", "list", "watch"] 14 | - apiGroups: 15 | - extensions 16 | resources: 17 | - ingresses 18 | verbs: ["get", "list", "watch"] 19 | - nonResourceURLs: ["/metrics"] 20 | verbs: ["get"] 21 | --- 22 | apiVersion: v1 23 | kind: ServiceAccount 24 | metadata: 25 | name: prometheus-server 26 | namespace: prometheus-server 27 | --- 28 | apiVersion: rbac.authorization.k8s.io/v1beta1 29 | kind: ClusterRoleBinding 30 | metadata: 31 | name: prometheus-server 32 | roleRef: 33 | apiGroup: rbac.authorization.k8s.io 34 | kind: ClusterRole 35 | name: prometheus-server 36 | subjects: 37 | - kind: ServiceAccount 38 | name: prometheus-server 39 | namespace: prometheus-server -------------------------------------------------------------------------------- /prometheus-server/prometheus-server/prometheus-server-rule-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: prometheus-server-rule-config 5 | namespace: prometheus-server 6 | data: 7 | rule.yml: | 8 | groups: 9 | - name: kubernetes 10 | rules: 11 | - alert: PodDown 12 | expr: kube_pod_status_phase{phase="Unknown"} == 1 or kube_pod_status_phase{phase="Failed"} == 1 13 | for: 3m 14 | labels: 15 | severity: error 16 | service: prometheus_bot 17 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.kubernetes_namespace }}" 18 | annotations: 19 | summary: Pod Down 20 | k8scluster: "{{ $labels.k8scluster}}" 21 | namespace: "{{ $labels.kubernetes_namespace }}" 22 | pod: "{{ $labels.pod }}" 23 | container: "{{ $labels.container }}" 24 | 25 | - alert: PodRestart 26 | expr: changes(kube_pod_container_status_restarts_total{pod !~ "analyzer.*"}[10m]) > 0 27 | for: 3m 28 | labels: 29 | severity: error 30 | service: prometheus_bot 31 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.kubernetes_namespace }}" 32 | annotations: 33 | summary: Pod Restart 34 | k8scluster: "{{ $labels.k8scluster}}" 35 | namespace: "{{ $labels.kubernetes_namespace }}" 36 | pod: "{{ $labels.pod }}" 37 | container: "{{ $labels.container }}" 38 | 39 | - alert: PodTerminated 40 | expr: kube_pod_container_status_terminated_reason{reason!="Completed"} > 0 41 | for: 5m 42 | labels: 43 | severity: error 44 | service: prometheus_bot 45 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.kubernetes_namespace }}" 46 | annotations: 47 | summary: Pod Terminated 48 | k8scluster: "{{ $labels.k8scluster}}" 49 | namespace: "{{ $labels.kubernetes_namespace }}" 50 | pod: "{{ $labels.pod }}" 51 | container: "{{ $labels.container }}" 52 | reason: "{{ $labels.reason }}" 53 | 54 | - alert: NodeUnschedulable 55 | expr: kube_node_spec_unschedulable == 1 56 | for: 5m 57 | labels: 58 | severity: error 59 | service: prometheus_bot 60 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.kubernetes_namespace }}" 61 | annotations: 62 | summary: Node Unschedulable 63 | k8scluster: "{{ $labels.k8scluster}}" 64 | node: "{{ $labels.node }}" 65 | 66 | - alert: NodeStatusError 67 | expr: kube_node_status_condition{condition="Ready", status!="true"} == 1 68 | for: 5m 69 | labels: 70 | severity: error 71 | service: prometheus_bot 72 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.kubernetes_namespace }}" 73 | annotations: 74 | summary: Node Status Error 75 | k8scluster: "{{ $labels.k8scluster}}" 76 | node: "{{ $labels.node }}" 77 | 78 | - alert: DaemonsetUnavailable 79 | expr: kube_daemonset_status_number_unavailable > 0 80 | for: 5m 81 | labels: 82 | severity: error 83 | service: prometheus_bot 84 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.kubernetes_namespace }}" 85 | annotations: 86 | summary: Daemonset Unavailable 87 | k8scluster: "{{ $labels.k8scluster}}" 88 | namespace: "{{ $labels.kubernetes_namespace }}" 89 | daemonset: "{{ $labels.daemonset }}" 90 | 91 | - alert: JobFailed 92 | expr: kube_job_status_failed == 1 93 | for: 5m 94 | labels: 95 | severity: error 96 | service: prometheus_bot 97 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.kubernetes_namespace }}" 98 | annotations: 99 | summary: Job Failed 100 | k8scluster: "{{ $labels.k8scluster}}" 101 | namespace: "{{ $labels.kubernetes_namespace }}" 102 | job: "{{ $labels.exported_job }}" 103 | 104 | - alert: ClusterMemoryUsageRate 105 | expr: sum by(k8scluster) (container_memory_working_set_bytes{image!=""}) / sum by(k8scluster) (machine_memory_bytes) * 100 > 80 106 | for: 5m 107 | labels: 108 | severity: error 109 | service: prometheus_bot 110 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.namespace }}" 111 | annotations: 112 | summary: "K8s集群内存使用率大于80% current: {{ $value }}" 113 | k8scluster: "{{ $labels.k8scluster}}" 114 | 115 | - alert: ClusterCpuUsageRate 116 | expr: sum by (k8scluster) (rate(container_cpu_usage_seconds_total{image != ""}[5m])) / sum by (k8scluster) (machine_cpu_cores) * 100 > 60 117 | for: 5m 118 | labels: 119 | severity: error 120 | service: prometheus_bot 121 | receiver_group: "{{ $labels.k8scluster}}_{{ $labels.namespace }}" 122 | annotations: 123 | summary: "K8s集群CPU使用率大于60% current: {{ $value }}" 124 | k8scluster: "{{ $labels.k8scluster}}" 125 | -------------------------------------------------------------------------------- /prometheus-server/prometheus-server/prometheus-server-svc.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: prometheus-server-svc 5 | namespace: prometheus-server 6 | spec: 7 | type: LoadBalancer 8 | ports: 9 | - port: 80 10 | targetPort: 9090 11 | selector: 12 | app: prometheus-server-dep -------------------------------------------------------------------------------- /prometheus/README.md: -------------------------------------------------------------------------------- 1 | ## 一键部署 2 | kubectl apply -f ./prometheus-namespace.yaml 3 | 4 | kubectl apply -f ./prometheus/prometheus-rbac.yaml 5 | 6 | kubectl apply -f ./prometheus/prometheus-config-configmap.yaml 7 | 8 | kubectl apply -f ./prometheus/prometheus-dep.yaml 9 | 10 | kubectl apply -f ./prometheus/prometheus-svc.yaml 11 | 12 | kubectl apply -f ./kube-state-metrics/kube-state-metrics-rbac.yaml 13 | 14 | kubectl apply -f ./kube-state-metrics/kube-state-metrics-dep.yaml 15 | 16 | kubectl apply -f ./kube-state-metrics/kube-state-metrics-svc.yaml 17 | 18 | kubectl get -n prometheus pod 19 | 20 | ## Prometheus + Kube-state-metrics监控kubernetes集群说明 21 | 22 | ### 创建namespace 23 | * 创建namespace:kubectl apply -f ./prometheus-namespace.yaml 24 | 25 | ### 部署prometheus 26 | * 创建rbac权限:kubectl apply -f ./prometheus/prometheus-rbac.yaml 27 | * 创建configmap作为prometheus配置文件:kubectl apply -f ./prometheus/prometheus-config-configmap.yaml 28 | * 部署prometheus-dep:kubectl apply -f ./prometheus/prometheus-dep.yaml 29 | * 部署prometheus-svc:kubectl apply -f ./prometheus/prometheus-svc.yaml 30 | 31 | > 部署promethues之后,就可以采集pod、container的cpu、mem、network等指标。但是不能监控daemonset、deployment等应用的状态,如果需要监控这些,还要部署 kube-state-metrics 32 | 33 | 34 | ### 部署kube-state-metrics 35 | * 创建rbac权限:kubectl apply -f ./kube-state-metrics/kube-state-metrics-rbac.yaml 36 | * 部署kube-state-metrics-dep:kubectl apply -f ./kube-state-metrics/kube-state-metrics-dep.yaml 37 | * 部署kube-state-metrics-svc:kubectl apply -f ./kube-state-metrics/kube-state-metrics-svc.yaml 38 | 39 | 40 | -------------------------------------------------------------------------------- /prometheus/kube-state-metrics/kube-state-metrics-dep.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | # Kubernetes versions after 1.9.0 should use apps/v1 3 | # Kubernetes versions before 1.8.0 should use apps/v1beta1 or extensions/v1beta1 4 | # addon-resizer描述:https://github.com/kubernetes/autoscaler/tree/master/addon-resizer 5 | kind: Deployment 6 | metadata: 7 | name: kube-state-metrics 8 | namespace: prometheus 9 | spec: 10 | selector: 11 | matchLabels: 12 | k8s-app: kube-state-metrics 13 | replicas: 1 14 | template: 15 | metadata: 16 | labels: 17 | k8s-app: kube-state-metrics 18 | spec: 19 | serviceAccountName: kube-state-metrics 20 | containers: 21 | - name: kube-state-metrics 22 | image: xianyuluo/kube-state-metrics:v1.3.1 23 | ports: 24 | - name: http-metrics 25 | containerPort: 8080 26 | - name: telemetry 27 | containerPort: 8081 28 | readinessProbe: 29 | httpGet: 30 | path: /healthz 31 | port: 8080 32 | initialDelaySeconds: 5 33 | timeoutSeconds: 5 34 | - name: addon-resizer 35 | image: xianyuluo/addon-resizer:1.7 36 | resources: 37 | limits: 38 | cpu: 100m 39 | memory: 30Mi 40 | requests: 41 | cpu: 100m 42 | memory: 30Mi 43 | env: 44 | - name: MY_POD_NAME 45 | valueFrom: 46 | fieldRef: 47 | fieldPath: metadata.name 48 | - name: MY_POD_NAMESPACE 49 | valueFrom: 50 | fieldRef: 51 | fieldPath: metadata.namespace 52 | command: 53 | - /pod_nanny 54 | - --container=kube-state-metrics 55 | - --cpu=100m 56 | - --extra-cpu=1m 57 | - --memory=100Mi 58 | - --extra-memory=2Mi 59 | - --threshold=5 60 | - --deployment=kube-state-metrics 61 | -------------------------------------------------------------------------------- /prometheus/kube-state-metrics/kube-state-metrics-rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: prometheus 6 | --- 7 | 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 10 | kind: Role 11 | metadata: 12 | namespace: prometheus 13 | name: kube-state-metrics-resizer 14 | rules: 15 | - apiGroups: [""] 16 | resources: 17 | - pods 18 | verbs: ["get"] 19 | - apiGroups: ["extensions"] 20 | resources: 21 | - deployments 22 | resourceNames: ["kube-state-metrics"] 23 | verbs: ["get", "update"] 24 | --- 25 | 26 | apiVersion: rbac.authorization.k8s.io/v1 27 | # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 28 | kind: ClusterRole 29 | metadata: 30 | name: kube-state-metrics 31 | rules: 32 | - apiGroups: [""] 33 | resources: 34 | - configmaps 35 | - secrets 36 | - nodes 37 | - pods 38 | - services 39 | - resourcequotas 40 | - replicationcontrollers 41 | - limitranges 42 | - persistentvolumeclaims 43 | - persistentvolumes 44 | - namespaces 45 | - endpoints 46 | verbs: ["list", "watch"] 47 | - apiGroups: ["extensions"] 48 | resources: 49 | - daemonsets 50 | - deployments 51 | - replicasets 52 | verbs: ["list", "watch"] 53 | - apiGroups: ["apps"] 54 | resources: 55 | - statefulsets 56 | verbs: ["list", "watch"] 57 | - apiGroups: ["batch"] 58 | resources: 59 | - cronjobs 60 | - jobs 61 | verbs: ["list", "watch"] 62 | - apiGroups: ["autoscaling"] 63 | resources: 64 | - horizontalpodautoscalers 65 | verbs: ["list", "watch"] 66 | --- 67 | 68 | apiVersion: rbac.authorization.k8s.io/v1 69 | # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 70 | kind: RoleBinding 71 | metadata: 72 | name: kube-state-metrics 73 | namespace: prometheus 74 | roleRef: 75 | apiGroup: rbac.authorization.k8s.io 76 | kind: Role 77 | name: kube-state-metrics-resizer 78 | subjects: 79 | - kind: ServiceAccount 80 | name: kube-state-metrics 81 | namespace: prometheus 82 | --- 83 | 84 | apiVersion: rbac.authorization.k8s.io/v1 85 | # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 86 | kind: ClusterRoleBinding 87 | metadata: 88 | name: kube-state-metrics 89 | roleRef: 90 | apiGroup: rbac.authorization.k8s.io 91 | kind: ClusterRole 92 | name: kube-state-metrics 93 | subjects: 94 | - kind: ServiceAccount 95 | name: kube-state-metrics 96 | namespace: prometheus -------------------------------------------------------------------------------- /prometheus/kube-state-metrics/kube-state-metrics-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: prometheus 6 | labels: 7 | k8s-app: kube-state-metrics 8 | annotations: 9 | prometheus.io/scrape: 'true' 10 | spec: 11 | ports: 12 | - name: http-metrics 13 | port: 8080 14 | targetPort: http-metrics 15 | protocol: TCP 16 | - name: telemetry 17 | port: 8081 18 | targetPort: telemetry 19 | protocol: TCP 20 | selector: 21 | k8s-app: kube-state-metrics -------------------------------------------------------------------------------- /prometheus/prometheus-namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: prometheus -------------------------------------------------------------------------------- /prometheus/prometheus/prometheus-config-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: prometheus-config 5 | namespace: prometheus 6 | data: 7 | prometheus.yml: | 8 | global: 9 | scrape_interval: 15s 10 | evaluation_interval: 15s 11 | scrape_configs: 12 | 13 | - job_name: 'kubernetes-apiservers' 14 | kubernetes_sd_configs: 15 | - role: endpoints 16 | scheme: https 17 | tls_config: 18 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 19 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 20 | relabel_configs: 21 | - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 22 | action: keep 23 | regex: default;kubernetes;https 24 | 25 | - job_name: 'kubernetes-nodes' 26 | kubernetes_sd_configs: 27 | - role: node 28 | scheme: https 29 | tls_config: 30 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 31 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 32 | relabel_configs: 33 | - action: labelmap 34 | regex: __meta_kubernetes_node_label_(.+) 35 | - target_label: __address__ 36 | replacement: kubernetes.default.svc:443 37 | - source_labels: [__meta_kubernetes_node_name] 38 | regex: (.+) 39 | target_label: __metrics_path__ 40 | replacement: /api/v1/nodes/${1}/proxy/metrics 41 | 42 | - job_name: 'kubernetes-cadvisor' 43 | kubernetes_sd_configs: 44 | - role: node 45 | scheme: https 46 | tls_config: 47 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 48 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 49 | relabel_configs: 50 | - action: labelmap 51 | regex: __meta_kubernetes_node_label_(.+) 52 | - target_label: __address__ 53 | replacement: kubernetes.default.svc:443 54 | - source_labels: [__meta_kubernetes_node_name] 55 | regex: (.+) 56 | target_label: __metrics_path__ 57 | replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 58 | 59 | - job_name: 'kubernetes-service-endpoints' 60 | kubernetes_sd_configs: 61 | - role: endpoints 62 | relabel_configs: 63 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 64 | action: keep 65 | regex: true 66 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 67 | action: replace 68 | target_label: __scheme__ 69 | regex: (https?) 70 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 71 | action: replace 72 | target_label: __metrics_path__ 73 | regex: (.+) 74 | - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 75 | action: replace 76 | target_label: __address__ 77 | regex: ([^:]+)(?::\d+)?;(\d+) 78 | replacement: $1:$2 79 | - action: labelmap 80 | regex: __meta_kubernetes_service_label_(.+) 81 | - source_labels: [__meta_kubernetes_namespace] 82 | action: replace 83 | target_label: kubernetes_namespace 84 | - source_labels: [__meta_kubernetes_service_name] 85 | action: replace 86 | target_label: kubernetes_name 87 | 88 | - job_name: 'kubernetes-services' 89 | kubernetes_sd_configs: 90 | - role: service 91 | metrics_path: /probe 92 | params: 93 | module: [http_2xx] 94 | relabel_configs: 95 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] 96 | action: keep 97 | regex: true 98 | - source_labels: [__address__] 99 | target_label: __param_target 100 | - target_label: __address__ 101 | replacement: blackbox-exporter.example.com:9115 102 | - source_labels: [__param_target] 103 | target_label: instance 104 | - action: labelmap 105 | regex: __meta_kubernetes_service_label_(.+) 106 | - source_labels: [__meta_kubernetes_namespace] 107 | target_label: kubernetes_namespace 108 | - source_labels: [__meta_kubernetes_service_name] 109 | target_label: kubernetes_name 110 | 111 | - job_name: 'kubernetes-ingresses' 112 | kubernetes_sd_configs: 113 | - role: ingress 114 | relabel_configs: 115 | - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] 116 | action: keep 117 | regex: true 118 | - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path] 119 | regex: (.+);(.+);(.+) 120 | replacement: ${1}://${2}${3} 121 | target_label: __param_target 122 | - target_label: __address__ 123 | replacement: blackbox-exporter.example.com:9115 124 | - source_labels: [__param_target] 125 | target_label: instance 126 | - action: labelmap 127 | regex: __meta_kubernetes_ingress_label_(.+) 128 | - source_labels: [__meta_kubernetes_namespace] 129 | target_label: kubernetes_namespace 130 | - source_labels: [__meta_kubernetes_ingress_name] 131 | target_label: kubernetes_name 132 | 133 | - job_name: 'kubernetes-pods' 134 | kubernetes_sd_configs: 135 | - role: pod 136 | relabel_configs: 137 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 138 | action: keep 139 | regex: true 140 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 141 | action: replace 142 | target_label: __metrics_path__ 143 | regex: (.+) 144 | - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 145 | action: replace 146 | regex: ([^:]+)(?::\d+)?;(\d+) 147 | replacement: $1:$2 148 | target_label: __address__ 149 | - action: labelmap 150 | regex: __meta_kubernetes_pod_label_(.+) 151 | - source_labels: [__meta_kubernetes_namespace] 152 | action: replace 153 | target_label: kubernetes_namespace 154 | - source_labels: [__meta_kubernetes_pod_name] 155 | action: replace 156 | target_label: kubernetes_pod_name 157 | 158 | # - job_name: 'traefik' 159 | # static_configs: 160 | # - targets: ['traefik-ingress-service.traefik.svc.cluster.local:8080'] 161 | -------------------------------------------------------------------------------- /prometheus/prometheus/prometheus-dep.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | name: prometheus-dep 5 | namespace: prometheus 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: prometheus-dep 11 | template: 12 | metadata: 13 | labels: 14 | app: prometheus-dep 15 | spec: 16 | containers: 17 | - image: prom/prometheus:v2.3.2 18 | name: prometheus 19 | command: 20 | - "/bin/prometheus" 21 | args: 22 | - "--config.file=/etc/prometheus/prometheus.yml" 23 | - "--storage.tsdb.path=/prometheus" 24 | - "--storage.tsdb.retention=1d" 25 | ports: 26 | - containerPort: 9090 27 | protocol: TCP 28 | volumeMounts: 29 | - mountPath: "/prometheus" 30 | name: data 31 | - mountPath: "/etc/prometheus" 32 | name: config-volume 33 | resources: 34 | requests: 35 | cpu: 100m 36 | memory: 100Mi 37 | limits: 38 | cpu: 500m 39 | memory: 2500Mi 40 | serviceAccountName: prometheus 41 | volumes: 42 | - name: data 43 | emptyDir: {} 44 | - name: config-volume 45 | configMap: 46 | name: prometheus-config -------------------------------------------------------------------------------- /prometheus/prometheus/prometheus-rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus 5 | rules: 6 | - apiGroups: [""] 7 | resources: 8 | - nodes 9 | - nodes/proxy 10 | - services 11 | - endpoints 12 | - pods 13 | verbs: ["get", "list", "watch"] 14 | - apiGroups: 15 | - extensions 16 | resources: 17 | - ingresses 18 | verbs: ["get", "list", "watch"] 19 | - nonResourceURLs: ["/metrics"] 20 | verbs: ["get"] 21 | --- 22 | apiVersion: v1 23 | kind: ServiceAccount 24 | metadata: 25 | name: prometheus 26 | namespace: prometheus 27 | --- 28 | apiVersion: rbac.authorization.k8s.io/v1beta1 29 | kind: ClusterRoleBinding 30 | metadata: 31 | name: prometheus 32 | roleRef: 33 | apiGroup: rbac.authorization.k8s.io 34 | kind: ClusterRole 35 | name: prometheus 36 | subjects: 37 | - kind: ServiceAccount 38 | name: prometheus 39 | namespace: prometheus -------------------------------------------------------------------------------- /prometheus/prometheus/prometheus-svc.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: prometheus-svc 5 | namespace: prometheus 6 | spec: 7 | type: NodePort 8 | ports: 9 | - port: 9090 10 | targetPort: 9090 11 | nodePort: 30090 12 | selector: 13 | app: prometheus-dep --------------------------------------------------------------------------------