├── EFK ├── README.md ├── .DS_Store └── log-pilot + elasticsearch + kibana │ ├── traefik-kibana.yaml │ ├── eureka.yaml │ ├── kibana.yml │ ├── log-pilot.yml │ ├── README.md │ └── elasticsearch.yml ├── .DS_Store ├── Harbor └── README.md ├── prometheus ├── README.md ├── .DS_Store ├── prometheus-ns.yaml ├── prometheus-svc.yaml ├── traefik-prometheus.yaml ├── prometheus_rules │ ├── prometheus-node_exporter-rules.yaml │ ├── prometheus-altermanager-rules.yaml │ ├── prometheus-etcd-rules.yaml │ ├── prometheus-memory-rules.yaml │ ├── prometheus-grafana-rules.yaml │ ├── prometheus-deployment-rules.yaml │ ├── prometheus-prometheus-rules.yaml │ ├── prometheus-cpu-rules.yaml │ ├── prometheus-k8s-pod-rules.yaml │ ├── prometheus-disk-rules.yaml │ └── prometheus-k8s-rules.yaml ├── prometheus-rbac.yaml ├── prometheus-deploy.yaml └── prometheus-cm.yaml ├── elasticsearch ├── README.md ├── elasticsearch-storageclass.yaml ├── elasticsearch-svc.yaml └── elasticsearch-statefulset.yaml ├── images ├── elk.png ├── dash2.png ├── grafana.png ├── metrics.png ├── dashboard1.png └── install_tag.png ├── prometheus-operator ├── .DS_Store ├── 00namespace-namespace.yaml ├── grafana-serviceAccount.yaml ├── prometheus-serviceAccount.yaml ├── alertmanager-serviceAccount.yaml ├── node-exporter-serviceAccount.yaml ├── 0prometheus-operator-serviceAccount.yaml ├── kube-state-metrics-serviceAccount.yaml ├── prometheus-adapter-serviceAccount.yaml ├── prometheus-roleConfig.yaml ├── grafana-service.yaml ├── grafana-serviceMonitor.yaml ├── prometheus-adapter-clusterRoleServerResources.yaml ├── prometheus-clusterRole.yaml ├── prometheus-adapter-clusterRole.yaml ├── prometheus-adapter-service.yaml ├── node-exporter-service.yaml ├── prometheus-serviceMonitor.yaml ├── alertmanager-serviceMonitor.yaml ├── kube-state-metrics-roleBinding.yaml ├── node-exporter-clusterRoleBinding.yaml ├── prometheus-clusterRoleBinding.yaml ├── prometheus-service.yaml ├── 0prometheus-operator-service.yaml ├── prometheus-kubeSchedulerService.yaml ├── alertmanager-service.yaml ├── kube-state-metrics-clusterRoleBinding.yaml ├── prometheus-adapter-clusterRoleBinding.yaml ├── 0prometheus-operator-clusterRoleBinding.yaml ├── prometheus-adapter-apiService.yaml ├── prometheus-roleBindingConfig.yaml ├── 0prometheus-operator-serviceMonitor.yaml ├── node-exporter-clusterRole.yaml ├── prometheus-kube-controller-manager-service.yaml ├── prometheus-adapter-clusterRoleBindingDelegator.yaml ├── prometheus-adapter-roleBindingAuthReader.yaml ├── kube-state-metrics-service.yaml ├── prometheus-serviceMonitorKubeScheduler.yaml ├── alertmanager-alertmanager.yaml ├── prometheus-serviceMonitorCoreDNS.yaml ├── prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml ├── node-exporter-serviceMonitor.yaml ├── alertmanager-secret.yaml ├── grafana-dashboardSources.yaml ├── kube-state-metrics-role.yaml ├── grafana-dashboardDatasources.yaml ├── prometheus_rules │ ├── prometheus-node_exporter-rules.yaml │ ├── prometheus-altermanager-rules.yaml │ ├── prometheus-etcd-rules.yaml │ ├── prometheus-memory-rules.yaml │ ├── prometheus-grafana-rules.yaml │ ├── prometheus-deployment-rules.yaml │ ├── prometheus-prometheus-rules.yaml │ ├── prometheus-cpu-rules.yaml │ ├── prometheus-k8s-pod-rules.yaml │ ├── prometheus-disk-rules.yaml │ └── prometheus-k8s-rules.yaml ├── prometheus-serviceMonitorKubeControllerManager.yaml ├── kube-state-metrics-serviceMonitor.yaml ├── prometheus-prometheus.yaml ├── prometheus-roleSpecificNamespaces.yaml ├── prometheus-serviceMonitorKubelet.yaml ├── prometheus-serviceMonitorApiserver.yaml ├── prometheus-roleBindingSpecificNamespaces.yaml ├── 0prometheus-operator-clusterRole.yaml ├── alertmanager-config.yaml ├── 0prometheus-operator-deployment.yaml ├── kube-state-metrics-clusterRole.yaml ├── prometheus-adapter-configMap.yaml ├── prometheus-adapter-deployment.yaml ├── node-exporter-daemonset.yaml ├── kube-state-metrics-deployment.yaml ├── grafana-deployment.yaml └── 0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml ├── StorageClass ├── nfs-client-class.yaml ├── nginx-demo.yaml ├── nfs-client.yaml ├── nfs-client-sa.yaml └── README.md ├── common ├── encryption-config.yaml ├── kube-scheduler.service ├── kubelet-config.yaml ├── flanneld.service ├── kubelet.service ├── etcd.service ├── kube-controller-manager.service ├── environment.sh └── kube-apiserver.service ├── grafana ├── traefik-grafana.yaml ├── node-exporter-svc.yaml ├── grafana-svc.yaml ├── node-exporter-ds.yaml ├── README.md └── grafana-deploy.yaml ├── metrics-server ├── README.md ├── auth-delegator.yaml ├── metrics-apiservice.yaml ├── metrics-server-service.yaml ├── auth-reader.yaml ├── aggregated-metrics-reader.yaml ├── resource-reader.yaml └── metrics-server-deployment.yaml ├── jenkins ├── README.md ├── pv-pvc.yml ├── rbc.yml ├── Jenkins-slave.Dockerfile └── Jenkins-deploy.yml ├── Monitoring item ├── node-exporter │ ├── node-exporter-svc.yaml │ └── node-exporter-ds.yaml └── prometheus-etcdService │ ├── prometheus-etcdService.yaml │ └── prometheus-serviceMonitorEtcd.yaml ├── prometheus_rules ├── prometheus-mysql-rules.yaml ├── prometheus-redis-rules.yaml ├── prometheus-node_exporter-rules.yaml ├── prometheus-altermanager-rules.yaml ├── prometheus-etcd-rules.yaml ├── prometheus-memory-rules.yaml ├── prometheus-grafana-rules.yaml ├── prometheus-deployment-rules.yaml ├── prometheus-prometheus-rules.yaml ├── prometheus-cpu-rules.yaml ├── prometheus-k8s-pod-rules.yaml ├── prometheus-disk-rules.yaml └── prometheus-k8s-rules.yaml ├── traefik ├── traefik-ui.yaml ├── traefik-rbac.yaml └── traefik-ds.yaml ├── kubernets-dashboard ├── dashboard-admin_new.yaml ├── README.md └── kubernetes-dashboard.yaml ├── kubeadm └── config.yaml ├── Calico ├── rbac-kdd.yaml └── calico.yaml ├── ingress ├── README.md └── mandatory.yaml └── kafka └── kafka-cluster.yaml /EFK/README.md: -------------------------------------------------------------------------------- 1 | Please look forward to updating 2 | ================= -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/.DS_Store -------------------------------------------------------------------------------- /Harbor/README.md: -------------------------------------------------------------------------------- 1 | Please look forward to updating 2 | ================= -------------------------------------------------------------------------------- /prometheus/README.md: -------------------------------------------------------------------------------- 1 | Please look forward to updating 2 | ================= -------------------------------------------------------------------------------- /elasticsearch/README.md: -------------------------------------------------------------------------------- 1 | elasticsearch 2 | ================= 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /EFK/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/EFK/.DS_Store -------------------------------------------------------------------------------- /images/elk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/images/elk.png -------------------------------------------------------------------------------- /images/dash2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/images/dash2.png -------------------------------------------------------------------------------- /images/grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/images/grafana.png -------------------------------------------------------------------------------- /images/metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/images/metrics.png -------------------------------------------------------------------------------- /prometheus/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/prometheus/.DS_Store -------------------------------------------------------------------------------- /images/dashboard1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/images/dashboard1.png -------------------------------------------------------------------------------- /images/install_tag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/images/install_tag.png -------------------------------------------------------------------------------- /prometheus-operator/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cf1998/kubernetes/HEAD/prometheus-operator/.DS_Store -------------------------------------------------------------------------------- /prometheus/prometheus-ns.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: kube-prometheus 5 | -------------------------------------------------------------------------------- /prometheus-operator/00namespace-namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: monitoring 5 | -------------------------------------------------------------------------------- /prometheus-operator/grafana-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: grafana 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-k8s 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /elasticsearch/elasticsearch-storageclass.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: storage.k8s.io/v1 2 | kind: StorageClass 3 | metadata: 4 | name: es-data-db 5 | provisioner: fuseim.pri/ifs -------------------------------------------------------------------------------- /prometheus-operator/alertmanager-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: alertmanager-main 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /prometheus-operator/node-exporter-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: node-exporter 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /prometheus-operator/0prometheus-operator-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-operator 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-serviceAccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-adapter 5 | namespace: monitoring 6 | -------------------------------------------------------------------------------- /StorageClass/nfs-client-class.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: storage.k8s.io/v1 2 | kind: StorageClass 3 | metadata: 4 | name: es-data-db 5 | provisioner: fuseim.pri/ifs # or choose another name, must match deployment's env PROVISIONER_NAME' -------------------------------------------------------------------------------- /prometheus-operator/prometheus-roleConfig.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | name: prometheus-k8s-config 5 | namespace: monitoring 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | verbs: 12 | - get 13 | -------------------------------------------------------------------------------- /prometheus-operator/grafana-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | app: grafana 6 | name: grafana 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: http 11 | port: 3000 12 | targetPort: http 13 | selector: 14 | app: grafana 15 | -------------------------------------------------------------------------------- /prometheus-operator/grafana-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: grafana 5 | namespace: monitoring 6 | spec: 7 | endpoints: 8 | - interval: 15s 9 | port: http 10 | selector: 11 | matchLabels: 12 | app: grafana 13 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-clusterRoleServerResources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: resource-metrics-server-resources 5 | rules: 6 | - apiGroups: 7 | - metrics.k8s.io 8 | resources: 9 | - '*' 10 | verbs: 11 | - '*' 12 | -------------------------------------------------------------------------------- /common/encryption-config.yaml: -------------------------------------------------------------------------------- 1 | kind: EncryptionConfig 2 | apiVersion: v1 3 | resources: 4 | - resources: 5 | - secrets 6 | providers: 7 | - aescbc: 8 | keys: 9 | - name: key1 10 | secret: Ru2T2ZzTZSe1hLkuE2qjYx3vIPDuphIpIxEEnG81oMg= 11 | - identity: {} 12 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus-k8s 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - nodes/metrics 10 | verbs: 11 | - get 12 | - nonResourceURLs: 13 | - /metrics 14 | verbs: 15 | - get 16 | -------------------------------------------------------------------------------- /prometheus/prometheus-svc.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | labels: 5 | app: prometheus 6 | name: prometheus 7 | namespace: kube-prometheus 8 | spec: 9 | type: NodePort 10 | ports: 11 | - port: 9090 12 | targetPort: 9090 13 | nodePort: 30003 14 | selector: 15 | app: prometheus 16 | -------------------------------------------------------------------------------- /grafana/traefik-grafana.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: grafana 5 | namespace: kube-prometheus 6 | spec: 7 | rules: 8 | - host: grafana.baishuchao.com 9 | http: 10 | paths: 11 | - path: / 12 | backend: 13 | serviceName: grafana 14 | servicePort: 3000 -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus-adapter 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - nodes 10 | - namespaces 11 | - pods 12 | - services 13 | verbs: 14 | - get 15 | - list 16 | - watch 17 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | name: prometheus-adapter 6 | name: prometheus-adapter 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: https 11 | port: 443 12 | targetPort: 6443 13 | selector: 14 | name: prometheus-adapter 15 | -------------------------------------------------------------------------------- /metrics-server/README.md: -------------------------------------------------------------------------------- 1 | # 部署 metrics-server 组件 2 | 3 | ## 安装 metrics-server 4 | 5 | ``` 6 | [root@ks-master k8s]# git clone https://github.com/baishuchao/kubernetes.git 7 | [root@ks-master k8s]# cd kubernetes/metrics-server 8 | [root@ks-master metrics-server]#kubectl apply -f . 9 | ``` 10 | 11 | ## 验证 12 | 13 | ![](../images/metrics.png) 14 | 15 | -------------------------------------------------------------------------------- /prometheus-operator/node-exporter-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: node-exporter 6 | name: node-exporter 7 | namespace: monitoring 8 | spec: 9 | clusterIP: None 10 | ports: 11 | - name: https 12 | port: 9100 13 | targetPort: https 14 | selector: 15 | app: node-exporter 16 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: prometheus 6 | name: prometheus 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - interval: 30s 11 | port: web 12 | selector: 13 | matchLabels: 14 | prometheus: k8s 15 | -------------------------------------------------------------------------------- /prometheus/traefik-prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: prometheus 5 | namespace: kube-prometheus 6 | spec: 7 | rules: 8 | - host: prometheus.baishuchao.com 9 | http: 10 | paths: 11 | - path: / 12 | backend: 13 | serviceName: prometheus 14 | servicePort: 9090 -------------------------------------------------------------------------------- /EFK/ log-pilot + elasticsearch + kibana/traefik-kibana.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: kibana 5 | namespace: kube-system 6 | spec: 7 | rules: 8 | - host: kibana.baishuchao.com 9 | http: 10 | paths: 11 | - path: / 12 | backend: 13 | serviceName: kibana 14 | servicePort: 80 -------------------------------------------------------------------------------- /elasticsearch/elasticsearch-svc.yaml: -------------------------------------------------------------------------------- 1 | 2 | kind: Service 3 | apiVersion: v1 4 | metadata: 5 | name: elasticsearch 6 | namespace: logging 7 | labels: 8 | app: elasticsearch 9 | spec: 10 | selector: 11 | app: elasticsearch 12 | clusterIP: None 13 | ports: 14 | - port: 9200 15 | name: rest 16 | - port: 9300 17 | name: inter-node -------------------------------------------------------------------------------- /grafana/node-exporter-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: node-exporter 6 | name: node-exporter 7 | namespace: kube-prometheus 8 | spec: 9 | ports: 10 | - name: http 11 | port: 9100 12 | nodePort: 31672 13 | protocol: TCP 14 | type: NodePort 15 | selector: 16 | k8s-app: node-exporter 17 | -------------------------------------------------------------------------------- /prometheus-operator/alertmanager-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: alertmanager 6 | name: alertmanager 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - interval: 30s 11 | port: web 12 | selector: 13 | matchLabels: 14 | alertmanager: main 15 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-roleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: monitoring 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: kube-state-metrics 10 | subjects: 11 | - kind: ServiceAccount 12 | name: kube-state-metrics 13 | -------------------------------------------------------------------------------- /prometheus-operator/node-exporter-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: node-exporter 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: node-exporter 9 | subjects: 10 | - kind: ServiceAccount 11 | name: node-exporter 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: prometheus-k8s 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: prometheus-k8s 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-k8s 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | name: prometheus-k8s 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: web 11 | port: 9090 12 | targetPort: web 13 | selector: 14 | app: prometheus 15 | prometheus: k8s 16 | sessionAffinity: ClientIP 17 | -------------------------------------------------------------------------------- /prometheus-operator/0prometheus-operator-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: prometheus-operator 6 | name: prometheus-operator 7 | namespace: monitoring 8 | spec: 9 | clusterIP: None 10 | ports: 11 | - name: http 12 | port: 8080 13 | targetPort: http 14 | selector: 15 | k8s-app: prometheus-operator 16 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-kubeSchedulerService.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | namespace: kube-system 5 | name: kube-scheduler 6 | labels: 7 | k8s-app: kube-scheduler 8 | spec: 9 | selector: 10 | component: kube-scheduler 11 | ports: 12 | - name: http-metrics 13 | port: 10251 14 | targetPort: 10251 15 | protocol: TCP 16 | -------------------------------------------------------------------------------- /prometheus-operator/alertmanager-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | alertmanager: main 6 | name: alertmanager-main 7 | namespace: monitoring 8 | spec: 9 | ports: 10 | - name: web 11 | port: 9093 12 | targetPort: web 13 | selector: 14 | alertmanager: main 15 | app: alertmanager 16 | sessionAffinity: ClientIP 17 | -------------------------------------------------------------------------------- /metrics-server/auth-delegator.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1beta1 3 | kind: ClusterRoleBinding 4 | metadata: 5 | name: metrics-server:system:auth-delegator 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: ClusterRole 9 | name: system:auth-delegator 10 | subjects: 11 | - kind: ServiceAccount 12 | name: metrics-server 13 | namespace: kube-system 14 | -------------------------------------------------------------------------------- /metrics-server/metrics-apiservice.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apiregistration.k8s.io/v1beta1 3 | kind: APIService 4 | metadata: 5 | name: v1beta1.metrics.k8s.io 6 | spec: 7 | service: 8 | name: metrics-server 9 | namespace: kube-system 10 | group: metrics.k8s.io 11 | version: v1beta1 12 | insecureSkipTLSVerify: true 13 | groupPriorityMinimum: 100 14 | versionPriority: 100 15 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: kube-state-metrics 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: kube-state-metrics 9 | subjects: 10 | - kind: ServiceAccount 11 | name: kube-state-metrics 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: prometheus-adapter 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: prometheus-adapter 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-adapter 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /prometheus-operator/0prometheus-operator-clusterRoleBinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: prometheus-operator 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: prometheus-operator 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-operator 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-apiService.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiregistration.k8s.io/v1 2 | kind: APIService 3 | metadata: 4 | name: v1beta1.metrics.k8s.io 5 | spec: 6 | group: metrics.k8s.io 7 | groupPriorityMinimum: 100 8 | insecureSkipTLSVerify: true 9 | service: 10 | name: prometheus-adapter 11 | namespace: monitoring 12 | version: v1beta1 13 | versionPriority: 100 14 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-roleBindingConfig.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: prometheus-k8s-config 5 | namespace: monitoring 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: prometheus-k8s-config 10 | subjects: 11 | - kind: ServiceAccount 12 | name: prometheus-k8s 13 | namespace: monitoring 14 | -------------------------------------------------------------------------------- /grafana/grafana-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: grafana 5 | namespace: kube-prometheus 6 | labels: 7 | app: grafana 8 | component: core 9 | annotations: 10 | prometheus.io/scrape: 'true' 11 | spec: 12 | type: NodePort 13 | ports: 14 | - port: 3000 15 | nodePort: 30002 16 | selector: 17 | app: grafana 18 | component: core 19 | -------------------------------------------------------------------------------- /metrics-server/metrics-server-service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: metrics-server 6 | namespace: kube-system 7 | labels: 8 | kubernetes.io/name: "Metrics-server" 9 | kubernetes.io/cluster-service: "true" 10 | spec: 11 | selector: 12 | k8s-app: metrics-server 13 | ports: 14 | - port: 443 15 | protocol: TCP 16 | targetPort: 443 17 | -------------------------------------------------------------------------------- /prometheus-operator/0prometheus-operator-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: prometheus-operator 6 | name: prometheus-operator 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - honorLabels: true 11 | port: http 12 | selector: 13 | matchLabels: 14 | k8s-app: prometheus-operator 15 | -------------------------------------------------------------------------------- /prometheus-operator/node-exporter-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: node-exporter 5 | rules: 6 | - apiGroups: 7 | - authentication.k8s.io 8 | resources: 9 | - tokenreviews 10 | verbs: 11 | - create 12 | - apiGroups: 13 | - authorization.k8s.io 14 | resources: 15 | - subjectaccessreviews 16 | verbs: 17 | - create 18 | -------------------------------------------------------------------------------- /metrics-server/auth-reader.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1beta1 3 | kind: RoleBinding 4 | metadata: 5 | name: metrics-server-auth-reader 6 | namespace: kube-system 7 | roleRef: 8 | apiGroup: rbac.authorization.k8s.io 9 | kind: Role 10 | name: extension-apiserver-authentication-reader 11 | subjects: 12 | - kind: ServiceAccount 13 | name: metrics-server 14 | namespace: kube-system 15 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-kube-controller-manager-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | namespace: kube-system 5 | name: kube-controller-manager 6 | labels: 7 | k8s-app: kube-controller-manager 8 | spec: 9 | selector: 10 | component: kube-controller-manager 11 | ports: 12 | - name: http-metrics 13 | port: 10252 14 | targetPort: 10252 15 | protocol: TCP 16 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-clusterRoleBindingDelegator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: resource-metrics:system:auth-delegator 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: system:auth-delegator 9 | subjects: 10 | - kind: ServiceAccount 11 | name: prometheus-adapter 12 | namespace: monitoring 13 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-roleBindingAuthReader.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: resource-metrics-auth-reader 5 | namespace: kube-system 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: extension-apiserver-authentication-reader 10 | subjects: 11 | - kind: ServiceAccount 12 | name: prometheus-adapter 13 | namespace: monitoring 14 | -------------------------------------------------------------------------------- /jenkins/README.md: -------------------------------------------------------------------------------- 1 | # Jenkins 部署 2 | 3 | ## 创建namespace 4 | 5 | * pv是通过NF网络存储创建 6 | * 容器的 /var/jenkins_home 目录通过pvc进行挂载并持久化存储 7 | 8 | 9 | ``` 10 | root@ks-master:~# kubectl get namespaces ops 11 | 12 | ``` 13 | 14 | ## 通过编排文件安装Jenkins 15 | 16 | ``` 17 | root@ks-master:~# git clone https://github.com/baishuchao/kubernetes.git 18 | root@ks-master:~# cd kubernetes/jenkins/ 19 | root@ks-master:~/kubernetes/jenkins# kubectl apply -f . 20 | ``` 21 | 22 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | k8s-app: kube-state-metrics 6 | name: kube-state-metrics 7 | namespace: monitoring 8 | spec: 9 | clusterIP: None 10 | ports: 11 | - name: https-main 12 | port: 8443 13 | targetPort: https-main 14 | - name: https-self 15 | port: 9443 16 | targetPort: https-self 17 | selector: 18 | app: kube-state-metrics 19 | -------------------------------------------------------------------------------- /metrics-server/aggregated-metrics-reader.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRole 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: system:aggregated-metrics-reader 5 | labels: 6 | rbac.authorization.k8s.io/aggregate-to-view: "true" 7 | rbac.authorization.k8s.io/aggregate-to-edit: "true" 8 | rbac.authorization.k8s.io/aggregate-to-admin: "true" 9 | rules: 10 | - apiGroups: ["metrics.k8s.io"] 11 | resources: ["pods"] 12 | verbs: ["get", "list", "watch"] 13 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-serviceMonitorKubeScheduler.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: kube-scheduler 6 | name: kube-scheduler 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - interval: 30s 11 | port: http-metrics 12 | jobLabel: k8s-app 13 | namespaceSelector: 14 | matchNames: 15 | - kube-system 16 | selector: 17 | matchLabels: 18 | k8s-app: kube-scheduler 19 | -------------------------------------------------------------------------------- /prometheus-operator/alertmanager-alertmanager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: Alertmanager 3 | metadata: 4 | labels: 5 | alertmanager: main 6 | name: main 7 | namespace: monitoring 8 | spec: 9 | baseImage: quay.io/prometheus/alertmanager 10 | nodeSelector: 11 | beta.kubernetes.io/os: linux 12 | replicas: 3 13 | securityContext: 14 | fsGroup: 2000 15 | runAsNonRoot: true 16 | runAsUser: 1000 17 | serviceAccountName: alertmanager-main 18 | version: v0.17.0 19 | -------------------------------------------------------------------------------- /grafana/node-exporter-ds.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: DaemonSet 3 | metadata: 4 | name: node-exporter 5 | namespace: kube-prometheus 6 | labels: 7 | k8s-app: node-exporter 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | k8s-app: node-exporter 13 | spec: 14 | containers: 15 | - image: prom/node-exporter:latest 16 | name: node-exporter 17 | ports: 18 | - containerPort: 9100 19 | protocol: TCP 20 | name: http 21 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-serviceMonitorCoreDNS.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: coredns 6 | name: coredns 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | interval: 15s 12 | port: metrics 13 | jobLabel: k8s-app 14 | namespaceSelector: 15 | matchNames: 16 | - kube-system 17 | selector: 18 | matchLabels: 19 | k8s-app: kube-dns 20 | -------------------------------------------------------------------------------- /Monitoring item/node-exporter/node-exporter-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | annotations: 5 | prometheus.io/scrape: 'true' 6 | name: prometheus-node-exporter 7 | namespace: kube-prometheus 8 | labels: 9 | app: node-exporter 10 | component: node-exporter 11 | spec: 12 | clusterIP: None 13 | ports: 14 | - name: prometheus-node-exporter 15 | port: 9100 16 | protocol: TCP 17 | selector: 18 | app: node-exporter 19 | component: node-exporter 20 | type: ClusterIP 21 | -------------------------------------------------------------------------------- /common/kube-scheduler.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Scheduler 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | WorkingDirectory=/data/k8s/k8s/kube-scheduler 7 | ExecStart=/opt/k8s/bin/kube-scheduler \ 8 | --master=10.10.11.21:5443 9 | --address=127.0.0.1 \ 10 | --kube-api-qps=100 \ 11 | --logtostderr=true \ 12 | --log-dir /var/log/kubernetes \ 13 | --v=4 14 | Restart=always 15 | RestartSec=5 16 | StartLimitInterval=0 17 | 18 | [Install] 19 | WantedBy=multi-user.target 20 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | rbac.authorization.k8s.io/aggregate-to-admin: "true" 6 | rbac.authorization.k8s.io/aggregate-to-edit: "true" 7 | rbac.authorization.k8s.io/aggregate-to-view: "true" 8 | name: system:aggregated-metrics-reader 9 | rules: 10 | - apiGroups: 11 | - metrics.k8s.io 12 | resources: 13 | - pods 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | -------------------------------------------------------------------------------- /jenkins/pv-pvc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolume 3 | metadata: 4 | name: jenkinspv 5 | spec: 6 | capacity: 7 | storage: 20Gi 8 | accessModes: 9 | - ReadWriteMany 10 | persistentVolumeReclaimPolicy: Delete 11 | nfs: 12 | server: 172.16.100.211 13 | path: /data/jenkins 14 | 15 | --- 16 | kind: PersistentVolumeClaim 17 | apiVersion: v1 18 | metadata: 19 | name: jenkinspvc 20 | namespace: ops 21 | spec: 22 | accessModes: 23 | - ReadWriteMany 24 | resources: 25 | requests: 26 | storage: 20Gi -------------------------------------------------------------------------------- /prometheus-operator/node-exporter-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: node-exporter 6 | name: node-exporter 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | interval: 30s 12 | port: https 13 | scheme: https 14 | tlsConfig: 15 | insecureSkipVerify: true 16 | jobLabel: k8s-app 17 | selector: 18 | matchLabels: 19 | k8s-app: node-exporter 20 | -------------------------------------------------------------------------------- /prometheus-operator/alertmanager-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg== 4 | kind: Secret 5 | metadata: 6 | name: alertmanager-main 7 | namespace: monitoring 8 | type: Opaque 9 | -------------------------------------------------------------------------------- /prometheus-operator/grafana-dashboardSources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | dashboards.yaml: |- 4 | { 5 | "apiVersion": 1, 6 | "providers": [ 7 | { 8 | "folder": "", 9 | "name": "0", 10 | "options": { 11 | "path": "/grafana-dashboard-definitions/0" 12 | }, 13 | "orgId": 1, 14 | "type": "file" 15 | } 16 | ] 17 | } 18 | kind: ConfigMap 19 | metadata: 20 | name: grafana-dashboards 21 | namespace: monitoring 22 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-mysql-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-mysql-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: MySQL状态监控 12 | rules: 13 | - alert: MySQL down 14 | annotations: 15 | detail: "{{ $labels.instance }} MySQL down (当前值: {{ $value }})" 16 | summary: "MySQL down!!!请管理员尽快排查" 17 | expr: mysql_up == 0 18 | for: 1m 19 | labels: 20 | severity: 严重 21 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-redis-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-redis-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: redis状态监控 12 | rules: 13 | - alert: redis is down 14 | annotations: 15 | detail: "{{ $labels.instance }} redis is down (当前值: {{ $value }})" 16 | summary: "redis is down!!!请管理员尽快排查" 17 | expr: redis_up == 0 18 | for: 1m 19 | labels: 20 | severity: 严重 21 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | name: kube-state-metrics 5 | namespace: monitoring 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - pods 11 | verbs: 12 | - get 13 | - apiGroups: 14 | - extensions 15 | resourceNames: 16 | - kube-state-metrics 17 | resources: 18 | - deployments 19 | verbs: 20 | - get 21 | - update 22 | - apiGroups: 23 | - apps 24 | resourceNames: 25 | - kube-state-metrics 26 | resources: 27 | - deployments 28 | verbs: 29 | - get 30 | - update 31 | -------------------------------------------------------------------------------- /prometheus-operator/grafana-dashboardDatasources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0= 4 | kind: Secret 5 | metadata: 6 | name: grafana-datasources 7 | namespace: monitoring 8 | type: Opaque 9 | -------------------------------------------------------------------------------- /traefik/traefik-ui.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: traefik-web-ui 5 | namespace: default 6 | spec: 7 | selector: 8 | k8s-app: traefik-ingress-lb 9 | ports: 10 | - port: 80 11 | targetPort: 8080 12 | --- 13 | apiVersion: extensions/v1beta1 14 | kind: Ingress 15 | metadata: 16 | name: traefik-web-ui 17 | namespace: default 18 | annotations: 19 | kubernetes.io/ingress.class: traefik 20 | spec: 21 | rules: 22 | - host: k8s-traefik.domain.com 23 | http: 24 | paths: 25 | - backend: 26 | serviceName: traefik-web-ui 27 | servicePort: 80 28 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-node_exporter-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-node-exporter-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: NodeExporterMonitoring 12 | rules: 13 | - alert: NodeExporterDown 14 | annotations: 15 | detail: "采集器NodeExporterDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "采集器NodeExporterDown" 17 | expr: | 18 | absent(up{job="node-exporter"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-node_exporter-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-node-exporter-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: NodeExporterMonitoring 12 | rules: 13 | - alert: NodeExporterDown 14 | annotations: 15 | detail: "采集器NodeExporterDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "采集器NodeExporterDown" 17 | expr: | 18 | absent(up{job="node-exporter"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /Monitoring item/prometheus-etcdService/prometheus-etcdService.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: etcd-k8s 5 | namespace: kube-system 6 | labels: 7 | k8s-app: etcd 8 | spec: 9 | type: ClusterIP 10 | clusterIP: None 11 | ports: 12 | - name: port 13 | port: 2379 14 | protocol: TCP 15 | 16 | --- 17 | apiVersion: v1 18 | kind: Endpoints 19 | metadata: 20 | name: etcd-k8s 21 | namespace: kube-system 22 | labels: 23 | k8s-app: etcd 24 | subsets: 25 | - addresses: 26 | - ip: 192.168.100.135 27 | nodeName: etc-master 28 | ports: 29 | - name: port 30 | port: 2379 31 | protocol: TCP 32 | -------------------------------------------------------------------------------- /kubernets-dashboard/dashboard-admin_new.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRoleBinding 2 | apiVersion: rbac.authorization.k8s.io/v1beta1 3 | metadata: 4 | name: admin 5 | annotations: 6 | rbac.authorization.kubernetes.io/autoupdate: "true" 7 | roleRef: 8 | kind: ClusterRole 9 | name: cluster-admin 10 | apiGroup: rbac.authorization.k8s.io 11 | subjects: 12 | - kind: ServiceAccount 13 | name: admin 14 | namespace: kube-system 15 | --- 16 | apiVersion: v1 17 | kind: ServiceAccount 18 | metadata: 19 | name: admin 20 | namespace: kube-system 21 | labels: 22 | kubernetes.io/cluster-service: "true" 23 | addonmanager.kubernetes.io/mode: Reconcile 24 | -------------------------------------------------------------------------------- /common/kubelet-config.yaml: -------------------------------------------------------------------------------- 1 | kind: KubeletConfiguration 2 | apiVersion: kubelet.config.k8s.io/v1beta1 3 | authentication: 4 | anonymous: 5 | enabled: false 6 | webhook: 7 | enabled: true 8 | x509: 9 | clientCAFile: "/etc/kubernetes/cert/ca.pem" 10 | authorization: 11 | mode: Webhook 12 | clusterDomain: "cluster.local" 13 | clusterDNS: 14 | - "10.254.0.2" 15 | podCIDR: "" 16 | maxPods: 220 17 | serializeImagePulls: false 18 | hairpinMode: promiscuous-bridge 19 | cgroupDriver: cgroupfs 20 | runtimeRequestTimeout: "15m" 21 | rotateCertificates: true 22 | serverTLSBootstrap: true 23 | readOnlyPort: 0 24 | port: 10250 25 | address: "10.10.11.21" 26 | -------------------------------------------------------------------------------- /metrics-server/resource-reader.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: system:metrics-server 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - pods 11 | - nodes 12 | - nodes/stats 13 | verbs: 14 | - get 15 | - list 16 | - watch 17 | --- 18 | apiVersion: rbac.authorization.k8s.io/v1 19 | kind: ClusterRoleBinding 20 | metadata: 21 | name: system:metrics-server 22 | roleRef: 23 | apiGroup: rbac.authorization.k8s.io 24 | kind: ClusterRole 25 | name: system:metrics-server 26 | subjects: 27 | - kind: ServiceAccount 28 | name: metrics-server 29 | namespace: kube-system 30 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-node_exporter-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-node-exporter-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: NodeExporterMonitoring 12 | rules: 13 | - alert: NodeExporterDown 14 | annotations: 15 | detail: "采集器NodeExporterDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "采集器NodeExporterDown" 17 | expr: | 18 | absent(up{job="node-exporter"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /prometheus_rules/prometheus-altermanager-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-altermanager-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: AlterManagerMonitoring 12 | rules: 13 | - alert: AlertmanagerDown 14 | annotations: 15 | detail: "AlertmanagerDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "AlertmanagerDown" 17 | expr: | 18 | absent(up{job="alertmanager-main",namespace="monitoring"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /prometheus-operator/prometheus-serviceMonitorKubeControllerManager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: kube-controller-manager 6 | name: kube-controller-manager 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - interval: 30s 11 | metricRelabelings: 12 | - action: drop 13 | regex: etcd_(debugging|disk|request|server).* 14 | sourceLabels: 15 | - __name__ 16 | port: http-metrics 17 | jobLabel: k8s-app 18 | namespaceSelector: 19 | matchNames: 20 | - kube-system 21 | selector: 22 | matchLabels: 23 | k8s-app: kube-controller-manager 24 | -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-altermanager-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-altermanager-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: AlterManagerMonitoring 12 | rules: 13 | - alert: AlertmanagerDown 14 | annotations: 15 | detail: "AlertmanagerDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "AlertmanagerDown" 17 | expr: | 18 | absent(up{job="alertmanager-main",namespace="monitoring"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /prometheus_rules/prometheus-etcd-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-etcd-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: EtcdMonitoring 12 | rules: 13 | - alert: EtcdDown 14 | annotations: 15 | detail: "{{$labels.instance}}: etcd down (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: etcd 出现异常,请管理员尽快排查" 17 | expr: | 18 | up{endpoint="port",job="etcd",namespace="kube-system",service="etcd-k8s"} == 0 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-altermanager-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-altermanager-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: AlterManagerMonitoring 12 | rules: 13 | - alert: AlertmanagerDown 14 | annotations: 15 | detail: "AlertmanagerDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "AlertmanagerDown" 17 | expr: | 18 | absent(up{job="alertmanager-main",namespace="monitoring"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-etcd-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-etcd-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: EtcdMonitoring 12 | rules: 13 | - alert: EtcdDown 14 | annotations: 15 | detail: "{{$labels.instance}}: etcd down (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: etcd 出现异常,请管理员尽快排查" 17 | expr: | 18 | up{endpoint="port",job="etcd",namespace="kube-system",service="etcd-k8s"} == 0 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-memory-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-memory-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机内存监控 12 | rules: 13 | - alert: Node内存使用率过高 14 | annotations: 15 | detail: "{{$labels.instance}}: 内存使用率高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: 内存使用率过高" 17 | expr: | 18 | (1 - ( avg by (instance) (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))))* 100 > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-etcd-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-etcd-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: EtcdMonitoring 12 | rules: 13 | - alert: EtcdDown 14 | annotations: 15 | detail: "{{$labels.instance}}: etcd down (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: etcd 出现异常,请管理员尽快排查" 17 | expr: | 18 | up{endpoint="port",job="etcd",namespace="kube-system",service="etcd-k8s"} == 0 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-memory-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-memory-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机内存监控 12 | rules: 13 | - alert: Node内存使用率过高 14 | annotations: 15 | detail: "{{$labels.instance}}: 内存使用率高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: 内存使用率过高" 17 | expr: | 18 | (1 - ( avg by (instance) (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))))* 100 > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-memory-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-memory-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机内存监控 12 | rules: 13 | - alert: Node内存使用率过高 14 | annotations: 15 | detail: "{{$labels.instance}}: 内存使用率高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: 内存使用率过高" 17 | expr: | 18 | (1 - ( avg by (instance) (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))))* 100 > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-grafana-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-grafana-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubeGrafanaMonitoring 12 | rules: 13 | - alert: KubeGrafanaDown 14 | annotations: 15 | detail: "你的监控展示平台Down掉了,请确认信息,IP:{{$labels.instance}}, POD:{{$labels.pod}},(当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: Kubelet Down" 17 | expr: | 18 | up{endpoint="http",job="grafana",namespace="monitoring",service="grafana"} == 0 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-grafana-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-grafana-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubeGrafanaMonitoring 12 | rules: 13 | - alert: KubeGrafanaDown 14 | annotations: 15 | detail: "你的监控展示平台Down掉了,请确认信息,IP:{{$labels.instance}}, POD:{{$labels.pod}},(当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: Kubelet Down" 17 | expr: | 18 | up{endpoint="http",job="grafana",namespace="monitoring",service="grafana"} == 0 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-grafana-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-grafana-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubeGrafanaMonitoring 12 | rules: 13 | - alert: KubeGrafanaDown 14 | annotations: 15 | detail: "你的监控展示平台Down掉了,请确认信息,IP:{{$labels.instance}}, POD:{{$labels.pod}},(当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: Kubelet Down" 17 | expr: | 18 | up{endpoint="http",job="grafana",namespace="monitoring",service="grafana"} == 0 19 | for: 1m 20 | labels: 21 | severity: 警告 -------------------------------------------------------------------------------- /Monitoring item/prometheus-etcdService/prometheus-serviceMonitorEtcd.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: etcd-k8s 5 | namespace: monitoring 6 | labels: 7 | k8s-app: etcd-k8s 8 | spec: 9 | jobLabel: k8s-app 10 | endpoints: 11 | - port: port 12 | interval: 30s 13 | scheme: https 14 | tlsConfig: 15 | caFile: /etc/prometheus/secrets/etcd-certs/ca.crt 16 | certFile: /etc/prometheus/secrets/etcd-certs/healthcheck-client.crt 17 | keyFile: /etc/prometheus/secrets/etcd-certs/healthcheck-client.key 18 | insecureSkipVerify: true 19 | selector: 20 | matchLabels: 21 | k8s-app: etcd 22 | namespaceSelector: 23 | matchNames: 24 | - kube-system 25 | -------------------------------------------------------------------------------- /kubernets-dashboard/README.md: -------------------------------------------------------------------------------- 1 | # 部署 kubernetes dashboard 组件 2 | 3 | 4 | ## 安装 kubernetes dashboard 5 | ```shell 6 | [root@ks-master k8s]# git clone https://github.com/baishuchao/kubernetes.git 7 | [root@ks-master k8s]# cd kubernetes/kubernets-dashboard/ 8 | [root@ks-master kubernets-dashboard]# kubectl apply -f kubernetes-dashboard.yaml 9 | [root@ks-master kubernets-dashboard]# kubectl apply -f dashboard-admin_new.yaml 10 | ``` 11 | 12 | ## 通过浏览器访问dashboard 13 | 14 | ![](../images/dashboard1.png) 15 | 16 | 17 | **通过令牌认证** 18 | 19 | ``` 20 | [root@ks-master k8s]# kubectl describe secret `kubectl get secrets -n kube-system |grep admin | awk '{print $1}'` -n kube-system 21 | ``` 22 | 23 | 24 | 25 | > 注:获取令牌 26 | 27 | 28 | 29 | ![](../images/dash2.png) 30 | 31 | > 到此为止 dashboard部署完毕,关于dashboard使用说明请参考官方文档! 32 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-serviceMonitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: kube-state-metrics 6 | name: kube-state-metrics 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | honorLabels: true 12 | interval: 30s 13 | port: https-main 14 | scheme: https 15 | scrapeTimeout: 30s 16 | tlsConfig: 17 | insecureSkipVerify: true 18 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 19 | interval: 30s 20 | port: https-self 21 | scheme: https 22 | tlsConfig: 23 | insecureSkipVerify: true 24 | jobLabel: k8s-app 25 | selector: 26 | matchLabels: 27 | k8s-app: kube-state-metrics 28 | -------------------------------------------------------------------------------- /StorageClass/nginx-demo.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: StatefulSet 3 | metadata: 4 | name: nfs-web 5 | spec: 6 | serviceName: "nginx" 7 | replicas: 3 8 | template: 9 | metadata: 10 | labels: 11 | app: nfs-web 12 | spec: 13 | terminationGracePeriodSeconds: 10 14 | containers: 15 | - name: nginx 16 | image: nginx:1.7.9 17 | ports: 18 | - containerPort: 80 19 | name: web 20 | volumeMounts: 21 | - name: www 22 | mountPath: /usr/share/nginx/html 23 | volumeClaimTemplates: 24 | - metadata: 25 | name: www 26 | annotations: 27 | volume.beta.kubernetes.io/storage-class: es-data-db 28 | spec: 29 | accessModes: [ "ReadWriteOnce" ] 30 | resources: 31 | requests: 32 | storage: 1Gi -------------------------------------------------------------------------------- /grafana/README.md: -------------------------------------------------------------------------------- 1 | Table of Contents 2 | ================= 3 | 4 | 5 | 6 | - [目录](#目录) 7 | - [granfana+prometheus全网监控系统](#granfanaprometheus全网监控系统) 8 | - [1. 部署granfana+prometheus](#1-部署granfanaprometheus) 9 | - [2. 自定义监控文件](#2-自定义监控文件) 10 | - [3. 实现邮件告警](#3-实现邮件告警) 11 | - [待更新](#待更新) 12 | 13 | 14 | 15 | # 目录     16 | ## granfana+prometheus全网监控系统 17 | 18 | ### 1. 部署granfana+prometheus 19 | 20 | 21 | ### 2. 自定义监控文件 22 | 23 | 24 | 25 | ### 3. 实现邮件告警 26 | 27 | 28 | 29 | ### 待更新 -------------------------------------------------------------------------------- /prometheus_rules/prometheus-deployment-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-deployment-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubeDeploymentMonitoring 12 | rules: 13 | - alert: KubeDeploymentError 14 | annotations: 15 | detail: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not 16 | matched the expected number of replicas for longer than an hour." 17 | summary: "Deployment在一小时内部署有异常,请管理员检查部署" 18 | expr: | 19 | kube_deployment_spec_replicas{job="kube-state-metrics"}!=kube_deployment_status_replicas_available{job="kube-state-metrics"} 20 | for: 1h 21 | labels: 22 | severity: 严重 23 | -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-deployment-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-deployment-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubeDeploymentMonitoring 12 | rules: 13 | - alert: KubeDeploymentError 14 | annotations: 15 | detail: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not 16 | matched the expected number of replicas for longer than an hour." 17 | summary: "Deployment在一小时内部署有异常,请管理员检查部署" 18 | expr: | 19 | kube_deployment_spec_replicas{job="kube-state-metrics"}!=kube_deployment_status_replicas_available{job="kube-state-metrics"} 20 | for: 1h 21 | labels: 22 | severity: 严重 23 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: Prometheus 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | name: k8s 7 | namespace: monitoring 8 | spec: 9 | alerting: 10 | alertmanagers: 11 | - name: alertmanager-main 12 | namespace: monitoring 13 | port: web 14 | baseImage: quay.io/prometheus/prometheus 15 | nodeSelector: 16 | beta.kubernetes.io/os: linux 17 | replicas: 2 18 | resources: 19 | requests: 20 | memory: 400Mi 21 | ruleSelector: 22 | matchLabels: 23 | prometheus: k8s 24 | role: alert-rules 25 | securityContext: 26 | fsGroup: 2000 27 | runAsNonRoot: true 28 | runAsUser: 1000 29 | serviceAccountName: prometheus-k8s 30 | serviceMonitorNamespaceSelector: {} 31 | serviceMonitorSelector: {} 32 | version: v2.7.2 33 | -------------------------------------------------------------------------------- /common/flanneld.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Flanneld overlay address etcd agent 3 | After=network.target 4 | After=network-online.target 5 | Wants=network-online.target 6 | After=etcd.service 7 | Before=docker.service 8 | 9 | [Service] 10 | Type=notify 11 | ExecStart=/opt/k8s/bin/flanneld \ 12 | -etcd-cafile=/etc/kubernetes/cert/ca.pem \ 13 | -etcd-certfile=/etc/flanneld/cert/flanneld.pem \ 14 | -etcd-keyfile=/etc/flanneld/cert/flanneld-key.pem \ 15 | -etcd-endpoints=https://10.10.11.21:2379,https://10.10.11.20:2379,https://10.10.11.19:2379 \ 16 | -etcd-prefix=/kubernetes/network \ 17 | -iface=ens192 18 | ExecStartPost=/opt/k8s/bin/mk-docker-opts.sh -k DOCKER_NETWORK_OPTIONS -d /run/flannel/docker 19 | Restart=always 20 | RestartSec=5 21 | StartLimitInterval=0 22 | 23 | [Install] 24 | WantedBy=multi-user.target 25 | RequiredBy=docker.service 26 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-deployment-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-deployment-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubeDeploymentMonitoring 12 | rules: 13 | - alert: KubeDeploymentError 14 | annotations: 15 | detail: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not 16 | matched the expected number of replicas for longer than an hour." 17 | summary: "Deployment在一小时内部署有异常,请管理员检查部署" 18 | expr: | 19 | kube_deployment_spec_replicas{job="kube-state-metrics"}!=kube_deployment_status_replicas_available{job="kube-state-metrics"} 20 | for: 1h 21 | labels: 22 | severity: 严重 23 | -------------------------------------------------------------------------------- /traefik/traefik-rbac.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1beta1 4 | metadata: 5 | name: traefik-ingress-controller 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - services 11 | - endpoints 12 | - secrets 13 | verbs: 14 | - get 15 | - list 16 | - watch 17 | - apiGroups: 18 | - extensions 19 | resources: 20 | - ingresses 21 | verbs: 22 | - get 23 | - list 24 | - watch 25 | --- 26 | kind: ClusterRoleBinding 27 | apiVersion: rbac.authorization.k8s.io/v1beta1 28 | metadata: 29 | name: traefik-ingress-controller 30 | roleRef: 31 | apiGroup: rbac.authorization.k8s.io 32 | kind: ClusterRole 33 | name: traefik-ingress-controller 34 | subjects: 35 | - kind: ServiceAccount 36 | name: traefik-ingress-controller 37 | namespace: default 38 | -------------------------------------------------------------------------------- /Monitoring item/node-exporter/node-exporter-ds.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: prometheus-node-exporter 5 | namespace: kube-prometheus 6 | labels: 7 | app: node-exporter 8 | component: node-exporter 9 | spec: 10 | selector: 11 | matchLabels: 12 | app: node-exporter 13 | component: node-exporter 14 | template: 15 | metadata: 16 | name: prometheus-node-exporter 17 | labels: 18 | app: node-exporter 19 | component: node-exporter 20 | spec: 21 | tolerations: 22 | - effect: NoSchedule 23 | key: node-role.kubernetes.io/master 24 | containers: 25 | - image: prom/node-exporter:v0.15.2 26 | name: prometheus-node-exporter 27 | ports: 28 | - name: prom-node-exp 29 | containerPort: 9100 30 | hostPort: 9100 31 | hostNetwork: true 32 | hostPID: true 33 | -------------------------------------------------------------------------------- /prometheus/prometheus-rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus 5 | rules: 6 | - apiGroups: [""] 7 | resources: 8 | - nodes 9 | - nodes/proxy 10 | - services 11 | - endpoints 12 | - pods 13 | verbs: ["get", "list", "watch"] 14 | - apiGroups: 15 | - extensions 16 | resources: 17 | - ingresses 18 | verbs: ["get", "list", "watch"] 19 | - nonResourceURLs: ["/metrics"] 20 | verbs: ["get"] 21 | --- 22 | apiVersion: v1 23 | kind: ServiceAccount 24 | metadata: 25 | name: prometheus 26 | namespace: kube-prometheus 27 | --- 28 | apiVersion: rbac.authorization.k8s.io/v1 29 | kind: ClusterRoleBinding 30 | metadata: 31 | name: prometheus 32 | roleRef: 33 | apiGroup: rbac.authorization.k8s.io 34 | kind: ClusterRole 35 | name: prometheus 36 | subjects: 37 | - kind: ServiceAccount 38 | name: prometheus 39 | namespace: kube-prometheus 40 | -------------------------------------------------------------------------------- /EFK/ log-pilot + elasticsearch + kibana/eureka.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: eureka-server 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: eureka-server 10 | template: 11 | metadata: 12 | labels: 13 | app: eureka-server 14 | spec: 15 | containers: 16 | - name: eureka-server 17 | image: registry.cn-hangzhou.aliyuncs.com/cloud-ts/eureka-server:690 18 | volumeMounts: 19 | - name: cloud-logs 20 | mountPath: /logs/cloud 21 | imagePullPolicy: Always 22 | env: 23 | - name: aliyun_logs_pod 24 | value: "/logs/cloud/*.log" 25 | ports: 26 | - containerPort: 8761 27 | imagePullSecrets: 28 | - name: registry-secret 29 | volumes: 30 | - name: cloud-logs 31 | emptyDir: {} 32 | 33 | -------------------------------------------------------------------------------- /StorageClass/nfs-client.yaml: -------------------------------------------------------------------------------- 1 | kind: Deployment 2 | apiVersion: extensions/v1beta1 3 | metadata: 4 | name: nfs-client-provisioner 5 | spec: 6 | replicas: 1 7 | strategy: 8 | type: Recreate 9 | template: 10 | metadata: 11 | labels: 12 | app: nfs-client-provisioner 13 | spec: 14 | serviceAccountName: nfs-client-provisioner 15 | containers: 16 | - name: nfs-client-provisioner 17 | image: quay.io/external_storage/nfs-client-provisioner:latest 18 | volumeMounts: 19 | - name: nfs-client-root 20 | mountPath: /persistentvolumes 21 | env: 22 | - name: PROVISIONER_NAME 23 | value: fuseim.pri/ifs 24 | - name: NFS_SERVER 25 | value: 172.16.100.211 26 | - name: NFS_PATH 27 | value: /data/k8s 28 | volumes: 29 | - name: nfs-client-root 30 | nfs: 31 | server: 172.16.100.211 32 | path: /data/k8s -------------------------------------------------------------------------------- /common/kubelet.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Kubelet 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | After=docker.service 5 | Requires=docker.service 6 | 7 | [Service] 8 | WorkingDirectory=/data/k8s/k8s/kubelet 9 | ExecStart=/opt/k8s/bin/kubelet \ 10 | --root-dir=/data/k8s/k8s/kubelet \ 11 | --bootstrap-kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig \ 12 | --cert-dir=/etc/kubernetes/cert \ 13 | --kubeconfig=/etc/kubernetes/kubelet.kubeconfig \ 14 | --config=/etc/kubernetes/kubelet-config.yaml \ 15 | --hostname-override=ks-master \ 16 | --pod-infra-container-image=registry.cn-beijing.aliyuncs.com/k8s_images/pause-amd64:3.1 17 | --allow-privileged=true \ 18 | --event-qps=0 \ 19 | --kube-api-qps=1000 \ 20 | --kube-api-burst=2000 \ 21 | --registry-qps=0 \ 22 | --image-pull-progress-deadline=30m \ 23 | --logtostderr=true \ 24 | --v=2 25 | Restart=always 26 | RestartSec=5 27 | StartLimitInterval=0 28 | 29 | [Install] 30 | WantedBy=multi-user.target 31 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-prometheus-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-prometheus-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: PrometheusMonitoring 12 | rules: 13 | - alert: PrometheusDown 14 | annotations: 15 | detail: "PrometheusDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "PrometheusDown" 17 | expr: | 18 | absent(up{job="prometheus-k8s",namespace="monitoring"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 22 | - name: PrometheusOperatorMonitoring 23 | rules: 24 | - alert: PrometheusOperatorDown 25 | annotations: 26 | detail: "PrometheusOperatorDown,请管理员尽快处理,(当前值: {{ $value }})" 27 | summary: "PrometheusOperatorDown" 28 | expr: | 29 | absent(up{job="prometheus-operator",namespace="monitoring"} == 1) 30 | for: 1m 31 | labels: 32 | severity: 警告 -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-prometheus-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-prometheus-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: PrometheusMonitoring 12 | rules: 13 | - alert: PrometheusDown 14 | annotations: 15 | detail: "PrometheusDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "PrometheusDown" 17 | expr: | 18 | absent(up{job="prometheus-k8s",namespace="monitoring"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 22 | - name: PrometheusOperatorMonitoring 23 | rules: 24 | - alert: PrometheusOperatorDown 25 | annotations: 26 | detail: "PrometheusOperatorDown,请管理员尽快处理,(当前值: {{ $value }})" 27 | summary: "PrometheusOperatorDown" 28 | expr: | 29 | absent(up{job="prometheus-operator",namespace="monitoring"} == 1) 30 | for: 1m 31 | labels: 32 | severity: 警告 -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-prometheus-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-prometheus-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: PrometheusMonitoring 12 | rules: 13 | - alert: PrometheusDown 14 | annotations: 15 | detail: "PrometheusDown,请管理员尽快处理,(当前值: {{ $value }})" 16 | summary: "PrometheusDown" 17 | expr: | 18 | absent(up{job="prometheus-k8s",namespace="monitoring"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 警告 22 | - name: PrometheusOperatorMonitoring 23 | rules: 24 | - alert: PrometheusOperatorDown 25 | annotations: 26 | detail: "PrometheusOperatorDown,请管理员尽快处理,(当前值: {{ $value }})" 27 | summary: "PrometheusOperatorDown" 28 | expr: | 29 | absent(up{job="prometheus-operator",namespace="monitoring"} == 1) 30 | for: 1m 31 | labels: 32 | severity: 警告 -------------------------------------------------------------------------------- /prometheus_rules/prometheus-cpu-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-cpu-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机CPU利用率监控 12 | rules: 13 | - alert: Node主机CPU利用率过高 14 | annotations: 15 | detail: "{{$labels.instance}}: CPU利用率过高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: CPU利用率过高" 17 | expr: | 18 | 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[5m])) * 100) > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: Node主机CPULoad监控 23 | rules: 24 | - alert: Node主机CPULoad过高 25 | annotations: 26 | detail: "{{$labels.instance}}: 15分钟内CPU Load 过高,(当前值: {{ $value }})" 27 | summary: "{{$labels.instance}}: 15分钟内CPU Load 过高" 28 | expr: | 29 | (node_load15) > 2 #根据你的主机核心数来定 30 | for: 1m 31 | labels: 32 | severity: 严重 -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-cpu-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-cpu-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机CPU利用率监控 12 | rules: 13 | - alert: Node主机CPU利用率过高 14 | annotations: 15 | detail: "{{$labels.instance}}: CPU利用率过高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: CPU利用率过高" 17 | expr: | 18 | 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[5m])) * 100) > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: Node主机CPULoad监控 23 | rules: 24 | - alert: Node主机CPULoad过高 25 | annotations: 26 | detail: "{{$labels.instance}}: 15分钟内CPU Load 过高,(当前值: {{ $value }})" 27 | summary: "{{$labels.instance}}: 15分钟内CPU Load 过高" 28 | expr: | 29 | (node_load15) > 2 #根据你的主机核心数来定 30 | for: 1m 31 | labels: 32 | severity: 严重 -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-cpu-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-cpu-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机CPU利用率监控 12 | rules: 13 | - alert: Node主机CPU利用率过高 14 | annotations: 15 | detail: "{{$labels.instance}}: CPU利用率过高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: CPU利用率过高" 17 | expr: | 18 | 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[5m])) * 100) > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: Node主机CPULoad监控 23 | rules: 24 | - alert: Node主机CPULoad过高 25 | annotations: 26 | detail: "{{$labels.instance}}: 15分钟内CPU Load 过高,(当前值: {{ $value }})" 27 | summary: "{{$labels.instance}}: 15分钟内CPU Load 过高" 28 | expr: | 29 | (node_load15) > 2 #根据你的主机核心数来定 30 | for: 1m 31 | labels: 32 | severity: 严重 -------------------------------------------------------------------------------- /prometheus-operator/prometheus-roleSpecificNamespaces.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | items: 3 | - apiVersion: rbac.authorization.k8s.io/v1 4 | kind: Role 5 | metadata: 6 | name: prometheus-k8s 7 | namespace: default 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - services 13 | - endpoints 14 | - pods 15 | verbs: 16 | - get 17 | - list 18 | - watch 19 | - apiVersion: rbac.authorization.k8s.io/v1 20 | kind: Role 21 | metadata: 22 | name: prometheus-k8s 23 | namespace: kube-system 24 | rules: 25 | - apiGroups: 26 | - "" 27 | resources: 28 | - services 29 | - endpoints 30 | - pods 31 | verbs: 32 | - get 33 | - list 34 | - watch 35 | - apiVersion: rbac.authorization.k8s.io/v1 36 | kind: Role 37 | metadata: 38 | name: prometheus-k8s 39 | namespace: monitoring 40 | rules: 41 | - apiGroups: 42 | - "" 43 | resources: 44 | - services 45 | - endpoints 46 | - pods 47 | verbs: 48 | - get 49 | - list 50 | - watch 51 | kind: RoleList 52 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-serviceMonitorKubelet.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: kubelet 6 | name: kubelet 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | honorLabels: true 12 | interval: 30s 13 | port: https-metrics 14 | scheme: https 15 | tlsConfig: 16 | insecureSkipVerify: true 17 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 18 | honorLabels: true 19 | interval: 30s 20 | metricRelabelings: 21 | - action: drop 22 | regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) 23 | sourceLabels: 24 | - __name__ 25 | path: /metrics/cadvisor 26 | port: https-metrics 27 | scheme: https 28 | tlsConfig: 29 | insecureSkipVerify: true 30 | jobLabel: k8s-app 31 | namespaceSelector: 32 | matchNames: 33 | - kube-system 34 | selector: 35 | matchLabels: 36 | k8s-app: kubelet 37 | -------------------------------------------------------------------------------- /grafana/grafana-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: grafana-core 5 | namespace: kube-prometheus 6 | labels: 7 | app: grafana 8 | component: core 9 | spec: 10 | replicas: 1 11 | template: 12 | metadata: 13 | labels: 14 | app: grafana 15 | component: core 16 | spec: 17 | containers: 18 | - image: grafana/grafana:5.0.0 19 | name: grafana-core 20 | imagePullPolicy: IfNotPresent 21 | resources: 22 | limits: 23 | cpu: 100m 24 | memory: 100Mi 25 | requests: 26 | cpu: 100m 27 | memory: 100Mi 28 | env: 29 | - name: GF_AUTH_BASIC_ENABLED 30 | value: "true" 31 | - name: GF_AUTH_ANONYMOUS_ENABLED 32 | value: "false" 33 | readinessProbe: 34 | httpGet: 35 | path: /login 36 | port: 3000 37 | volumeMounts: 38 | - name: grafana-persistent-storage 39 | mountPath: /var 40 | volumes: 41 | - name: grafana-persistent-storage 42 | emptyDir: {} 43 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-serviceMonitorApiserver.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | k8s-app: apiserver 6 | name: kube-apiserver 7 | namespace: monitoring 8 | spec: 9 | endpoints: 10 | - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 11 | interval: 30s 12 | metricRelabelings: 13 | - action: drop 14 | regex: etcd_(debugging|disk|request|server).* 15 | sourceLabels: 16 | - __name__ 17 | - action: drop 18 | regex: apiserver_admission_controller_admission_latencies_seconds_.* 19 | sourceLabels: 20 | - __name__ 21 | - action: drop 22 | regex: apiserver_admission_step_admission_latencies_seconds_.* 23 | sourceLabels: 24 | - __name__ 25 | port: https 26 | scheme: https 27 | tlsConfig: 28 | caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 29 | serverName: kubernetes 30 | jobLabel: component 31 | namespaceSelector: 32 | matchNames: 33 | - default 34 | selector: 35 | matchLabels: 36 | component: apiserver 37 | provider: kubernetes 38 | -------------------------------------------------------------------------------- /EFK/ log-pilot + elasticsearch + kibana/kibana.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: kibana 6 | namespace: kube-system 7 | labels: 8 | component: kibana 9 | spec: 10 | selector: 11 | component: kibana 12 | ports: 13 | - name: http 14 | port: 80 15 | targetPort: http 16 | type: NodePort 17 | --- 18 | apiVersion: apps/v1beta1 19 | kind: Deployment 20 | metadata: 21 | name: kibana 22 | namespace: kube-system 23 | labels: 24 | component: kibana 25 | spec: 26 | replicas: 1 27 | selector: 28 | matchLabels: 29 | component: kibana 30 | template: 31 | metadata: 32 | labels: 33 | component: kibana 34 | spec: 35 | containers: 36 | - name: kibana 37 | image: registry.cn-hangzhou.aliyuncs.com/acs-sample/kibana:5.5.1 38 | env: 39 | - name: CLUSTER_NAME 40 | value: docker-cluster 41 | - name: ELASTICSEARCH_URL 42 | value: http://elasticsearch-api:9200/ 43 | resources: 44 | limits: 45 | cpu: 1000m 46 | requests: 47 | cpu: 100m 48 | ports: 49 | - containerPort: 5601 50 | name: http 51 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-roleBindingSpecificNamespaces.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | items: 3 | - apiVersion: rbac.authorization.k8s.io/v1 4 | kind: RoleBinding 5 | metadata: 6 | name: prometheus-k8s 7 | namespace: default 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: Role 11 | name: prometheus-k8s 12 | subjects: 13 | - kind: ServiceAccount 14 | name: prometheus-k8s 15 | namespace: monitoring 16 | - apiVersion: rbac.authorization.k8s.io/v1 17 | kind: RoleBinding 18 | metadata: 19 | name: prometheus-k8s 20 | namespace: kube-system 21 | roleRef: 22 | apiGroup: rbac.authorization.k8s.io 23 | kind: Role 24 | name: prometheus-k8s 25 | subjects: 26 | - kind: ServiceAccount 27 | name: prometheus-k8s 28 | namespace: monitoring 29 | - apiVersion: rbac.authorization.k8s.io/v1 30 | kind: RoleBinding 31 | metadata: 32 | name: prometheus-k8s 33 | namespace: monitoring 34 | roleRef: 35 | apiGroup: rbac.authorization.k8s.io 36 | kind: Role 37 | name: prometheus-k8s 38 | subjects: 39 | - kind: ServiceAccount 40 | name: prometheus-k8s 41 | namespace: monitoring 42 | kind: RoleBindingList 43 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-k8s-pod-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-pod-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubePodCrashLooping 12 | rules: 13 | - alert: KubePodCrashLooping 14 | annotations: 15 | detail: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.' 16 | summary: "POD五分钟内发生重启次数" 17 | expr: | 18 | rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 19 | for: 1h 20 | labels: 21 | severity: 严重 22 | - name: KubePodNotReadyMonitoring 23 | rules: 24 | - alert: KubePodNotReady 25 | annotations: 26 | detail: "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than an hour." 27 | summary: "POD状态异常,或者没有准备,在一小时内" 28 | expr: | 29 | sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0 30 | for: 1h 31 | labels: 32 | severity: 严重 33 | -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-k8s-pod-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-pod-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubePodCrashLooping 12 | rules: 13 | - alert: KubePodCrashLooping 14 | annotations: 15 | detail: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.' 16 | summary: "POD五分钟内发生重启次数" 17 | expr: | 18 | rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 19 | for: 1h 20 | labels: 21 | severity: 严重 22 | - name: KubePodNotReadyMonitoring 23 | rules: 24 | - alert: KubePodNotReady 25 | annotations: 26 | detail: "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than an hour." 27 | summary: "POD状态异常,或者没有准备,在一小时内" 28 | expr: | 29 | sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0 30 | for: 1h 31 | labels: 32 | severity: 严重 33 | -------------------------------------------------------------------------------- /StorageClass/nfs-client-sa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: nfs-client-provisioner 5 | 6 | --- 7 | kind: ClusterRole 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | metadata: 10 | name: nfs-client-provisioner-runner 11 | rules: 12 | - apiGroups: [""] 13 | resources: ["persistentvolumes"] 14 | verbs: ["get", "list", "watch", "create", "delete"] 15 | - apiGroups: [""] 16 | resources: ["persistentvolumeclaims"] 17 | verbs: ["get", "list", "watch", "update"] 18 | - apiGroups: ["storage.k8s.io"] 19 | resources: ["storageclasses"] 20 | verbs: ["get", "list", "watch"] 21 | - apiGroups: [""] 22 | resources: ["events"] 23 | verbs: ["list", "watch", "create", "update", "patch"] 24 | - apiGroups: [""] 25 | resources: ["endpoints"] 26 | verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] 27 | 28 | --- 29 | kind: ClusterRoleBinding 30 | apiVersion: rbac.authorization.k8s.io/v1 31 | metadata: 32 | name: run-nfs-client-provisioner 33 | subjects: 34 | - kind: ServiceAccount 35 | name: nfs-client-provisioner 36 | namespace: default 37 | roleRef: 38 | kind: ClusterRole 39 | name: nfs-client-provisioner-runner 40 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /metrics-server/metrics-server-deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: metrics-server 6 | namespace: kube-system 7 | --- 8 | apiVersion: extensions/v1beta1 9 | kind: Deployment 10 | metadata: 11 | name: metrics-server 12 | namespace: kube-system 13 | labels: 14 | k8s-app: metrics-server 15 | spec: 16 | selector: 17 | matchLabels: 18 | k8s-app: metrics-server 19 | template: 20 | metadata: 21 | name: metrics-server 22 | labels: 23 | k8s-app: metrics-server 24 | spec: 25 | serviceAccountName: metrics-server 26 | volumes: 27 | # mount in tmp so we can safely use from-scratch images and/or read-only containers 28 | - name: tmp-dir 29 | emptyDir: {} 30 | containers: 31 | - name: metrics-server 32 | #image: k8s.gcr.io/metrics-server-amd64:v0.3.2 33 | image: mirrorgooglecontainers/metrics-server-amd64:v0.3.2 34 | imagePullPolicy: IfNotPresent 35 | command: 36 | - /metrics-server 37 | - --kubelet-preferred-address-types=InternalIP 38 | - --kubelet-insecure-tls 39 | volumeMounts: 40 | - name: tmp-dir 41 | mountPath: /tmp 42 | 43 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-k8s-pod-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-pod-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: KubePodCrashLooping 12 | rules: 13 | - alert: KubePodCrashLooping 14 | annotations: 15 | detail: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.' 16 | summary: "POD五分钟内发生重启次数" 17 | expr: | 18 | rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 19 | for: 1h 20 | labels: 21 | severity: 严重 22 | - name: KubePodNotReadyMonitoring 23 | rules: 24 | - alert: KubePodNotReady 25 | annotations: 26 | detail: "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than an hour." 27 | summary: "POD状态异常,或者没有准备,在一小时内" 28 | expr: | 29 | sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0 30 | for: 1h 31 | labels: 32 | severity: 严重 33 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-disk-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-disk-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机数据盘监控 12 | rules: 13 | - alert: Node主机数据盘可用空间不足 14 | annotations: 15 | detail: "{{$labels.instance}}: 磁盘:{{$labels.mountpoint}}使用率高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: 磁盘使用率过高" 17 | expr: | 18 | ((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"} ) / node_filesystem_size_bytes{mountpoint="/"} * 100) > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: Node主机系统盘监控 23 | rules: 24 | - alert: Node主机系统盘可用空间不足 25 | annotations: 26 | detail: "{{$labels.instance}}: 磁盘:{{$labels.mountpoint}}使用率高于80% (当前值: {{ $value }})" 27 | summary: "{{$labels.instance}}: 磁盘使用率过高" 28 | expr: | 29 | ((node_filesystem_size_bytes{mountpoint="/boot"} - node_filesystem_free_bytes{mountpoint="/boot"} ) / node_filesystem_size_bytes{mountpoint="/boot"} * 100) > 80 30 | for: 1m 31 | labels: 32 | severity: 严重 -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-disk-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-disk-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机数据盘监控 12 | rules: 13 | - alert: Node主机数据盘可用空间不足 14 | annotations: 15 | detail: "{{$labels.instance}}: 磁盘:{{$labels.mountpoint}}使用率高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: 磁盘使用率过高" 17 | expr: | 18 | ((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"} ) / node_filesystem_size_bytes{mountpoint="/"} * 100) > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: Node主机系统盘监控 23 | rules: 24 | - alert: Node主机系统盘可用空间不足 25 | annotations: 26 | detail: "{{$labels.instance}}: 磁盘:{{$labels.mountpoint}}使用率高于80% (当前值: {{ $value }})" 27 | summary: "{{$labels.instance}}: 磁盘使用率过高" 28 | expr: | 29 | ((node_filesystem_size_bytes{mountpoint="/boot"} - node_filesystem_free_bytes{mountpoint="/boot"} ) / node_filesystem_size_bytes{mountpoint="/boot"} * 100) > 80 30 | for: 1m 31 | labels: 32 | severity: 严重 -------------------------------------------------------------------------------- /prometheus-operator/0prometheus-operator-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: prometheus-operator 5 | rules: 6 | - apiGroups: 7 | - apiextensions.k8s.io 8 | resources: 9 | - customresourcedefinitions 10 | verbs: 11 | - '*' 12 | - apiGroups: 13 | - monitoring.coreos.com 14 | resources: 15 | - alertmanagers 16 | - prometheuses 17 | - prometheuses/finalizers 18 | - alertmanagers/finalizers 19 | - servicemonitors 20 | - prometheusrules 21 | verbs: 22 | - '*' 23 | - apiGroups: 24 | - apps 25 | resources: 26 | - statefulsets 27 | verbs: 28 | - '*' 29 | - apiGroups: 30 | - "" 31 | resources: 32 | - configmaps 33 | - secrets 34 | verbs: 35 | - '*' 36 | - apiGroups: 37 | - "" 38 | resources: 39 | - pods 40 | verbs: 41 | - list 42 | - delete 43 | - apiGroups: 44 | - "" 45 | resources: 46 | - services 47 | - services/finalizers 48 | - endpoints 49 | verbs: 50 | - get 51 | - create 52 | - update 53 | - delete 54 | - apiGroups: 55 | - "" 56 | resources: 57 | - nodes 58 | verbs: 59 | - list 60 | - watch 61 | - apiGroups: 62 | - "" 63 | resources: 64 | - namespaces 65 | verbs: 66 | - get 67 | - list 68 | - watch 69 | -------------------------------------------------------------------------------- /prometheus-operator/alertmanager-config.yaml: -------------------------------------------------------------------------------- 1 | global: 2 | # 在没有报警的情况下声明为已解决的时间 3 | resolve_timeout: 5m 4 | # 配置邮件发送信息 5 | smtp_smarthost: 'smtp.exmail.qq.com:465' 6 | smtp_from: 'baishuchao@gbc.mn' 7 | smtp_auth_username: 'baishuchao@XXX.mn' 8 | smtp_auth_password: 'Admin@1234' 9 | smtp_hello: '163.com' 10 | smtp_require_tls: false 11 | # 所有报警信息进入后的根路由,用来设置报警的分发策略 12 | route: 13 | # 这里的标签列表是接收到报警信息后的重新分组标签,例如,接收到的报警信息里面有许多具有 cluster=A 和 alertname=LatncyHigh 这样的标签的报警信息将会批量被聚合到一个分组里面 14 | group_by: ['alertname', 'cluster'] 15 | # 当一个新的报警分组被创建后,需要等待至少group_wait时间来初始化通知,这种方式可以确保您能有足够的时间为同一分组来获取多个警报,然后一起触发这个报警信息。 16 | group_wait: 30s 17 | 18 | # 当第一个报警发送后,等待'group_interval'时间来发送新的一组报警信息。 19 | group_interval: 5m 20 | 21 | # 如果一个报警信息已经发送成功了,等待'repeat_interval'时间来重新发送他们 22 | repeat_interval: 5m 23 | 24 | # 默认的receiver:如果一个报警没有被一个route匹配,则发送给默认的接收器 25 | receiver: default 26 | 27 | # 上面所有的属性都由所有子路由继承,并且可以在每个子路由上进行覆盖。 28 | routes: 29 | - receiver: email 30 | group_wait: 10s 31 | match: 32 | team: node 33 | receivers: 34 | - name: 'default' 35 | email_configs: 36 | - to: 'baishuchao@yeah.net' 37 | send_resolved: true 38 | - name: 'email' 39 | email_configs: 40 | - to: 'baishuchao@yeah.net' 41 | send_resolved: true -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-disk-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-disk-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: Node主机数据盘监控 12 | rules: 13 | - alert: Node主机数据盘可用空间不足 14 | annotations: 15 | detail: "{{$labels.instance}}: 磁盘:{{$labels.mountpoint}}使用率高于75% (当前值: {{ $value }})" 16 | summary: "{{$labels.instance}}: 磁盘使用率过高" 17 | expr: | 18 | ((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"} ) / node_filesystem_size_bytes{mountpoint="/"} * 100) > 75 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: Node主机系统盘监控 23 | rules: 24 | - alert: Node主机系统盘可用空间不足 25 | annotations: 26 | detail: "{{$labels.instance}}: 磁盘:{{$labels.mountpoint}}使用率高于80% (当前值: {{ $value }})" 27 | summary: "{{$labels.instance}}: 磁盘使用率过高" 28 | expr: | 29 | ((node_filesystem_size_bytes{mountpoint="/boot"} - node_filesystem_free_bytes{mountpoint="/boot"} ) / node_filesystem_size_bytes{mountpoint="/boot"} * 100) > 80 30 | for: 1m 31 | labels: 32 | severity: 严重 -------------------------------------------------------------------------------- /jenkins/rbc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: jenkins2 5 | namespace: ops 6 | 7 | --- 8 | 9 | kind: ClusterRole 10 | apiVersion: rbac.authorization.k8s.io/v1beta1 11 | metadata: 12 | name: jenkins2 13 | rules: 14 | - apiGroups: ["extensions", "apps"] 15 | resources: ["deployments"] 16 | verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] 17 | - apiGroups: [""] 18 | resources: ["services"] 19 | verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] 20 | - apiGroups: [""] 21 | resources: ["pods"] 22 | verbs: ["create","delete","get","list","patch","update","watch"] 23 | - apiGroups: [""] 24 | resources: ["pods/exec"] 25 | verbs: ["create","delete","get","list","patch","update","watch"] 26 | - apiGroups: [""] 27 | resources: ["pods/log"] 28 | verbs: ["get","list","watch"] 29 | - apiGroups: [""] 30 | resources: ["secrets"] 31 | verbs: ["get"] 32 | 33 | --- 34 | apiVersion: rbac.authorization.k8s.io/v1beta1 35 | kind: ClusterRoleBinding 36 | metadata: 37 | name: jenkins2 38 | namespace: ops 39 | roleRef: 40 | apiGroup: rbac.authorization.k8s.io 41 | kind: ClusterRole 42 | name: jenkins2 43 | subjects: 44 | - kind: ServiceAccount 45 | name: jenkins2 46 | namespace: ops -------------------------------------------------------------------------------- /kubeadm/config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kubeproxy.config.k8s.io/v1alpha1 2 | kind: KubeProxyConfiguration 3 | mode: "ipvs" #使用IPVS模式,非iptables 4 | --- 5 | apiVersion: kubeadm.k8s.io/v1beta1 #v1beta1版本,非v1alpha版本,语法会有变化 6 | certificatesDir: /etc/kubernetes/pki 7 | clusterName: kubernetes 8 | controlPlaneEndpoint: 10.10.12.143:6443 #api server IP地址 9 | controllerManager: {} 10 | dns: 11 | type: CoreDNS #默认DNS:CoreDNS 12 | #imageRepository: k8s.gcr.io #官方镜像 13 | imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers #国内阿里镜像 14 | kind: ClusterConfiguration 15 | kubernetesVersion: v1.13.3 #K8S版本 16 | networking: 17 | dnsDomain: cluster.local 18 | serviceSubnet: 10.96.0.0/12 #SVC网络段 19 | podSubnet: 100.64.0.0/10 #POD网络段 20 | apiServer: 21 | certSANs: 22 | - 10.10.12.143 23 | - 10.10.12.142 24 | - 10.10.12.141 25 | extraArgs: 26 | etcd-cafile: /etc/etcd/ssl/ca.pem 27 | etcd-certfile: /etc/etcd/ssl/etcd.pem 28 | etcd-keyfile: /etc/etcd/ssl/etcd-key.pem 29 | etcd: #使用外接etcd高可用 30 | external: 31 | caFile: /etc/etcd/ssl/ca.pem 32 | certFile: /etc/etcd/ssl/etcd.pem 33 | keyFile: /etc/etcd/ssl/etcd-key.pem 34 | endpoints: 35 | - https://10.10.12.143:2379 36 | - https://10.10.12.142:2379 37 | - https://10.10.12.141:2379 38 | 39 | 40 | -------------------------------------------------------------------------------- /prometheus-operator/0prometheus-operator-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | k8s-app: prometheus-operator 6 | name: prometheus-operator 7 | namespace: monitoring 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | k8s-app: prometheus-operator 13 | template: 14 | metadata: 15 | labels: 16 | k8s-app: prometheus-operator 17 | spec: 18 | containers: 19 | - args: 20 | - --kubelet-service=kube-system/kubelet 21 | - --logtostderr=true 22 | - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1 23 | - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.29.0 24 | image: quay.io/coreos/prometheus-operator:v0.29.0 25 | name: prometheus-operator 26 | ports: 27 | - containerPort: 8080 28 | name: http 29 | resources: 30 | limits: 31 | cpu: 200m 32 | memory: 200Mi 33 | requests: 34 | cpu: 100m 35 | memory: 100Mi 36 | securityContext: 37 | allowPrivilegeEscalation: false 38 | readOnlyRootFilesystem: true 39 | nodeSelector: 40 | beta.kubernetes.io/os: linux 41 | securityContext: 42 | runAsNonRoot: true 43 | runAsUser: 65534 44 | serviceAccountName: prometheus-operator 45 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-clusterRole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: kube-state-metrics 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - configmaps 10 | - secrets 11 | - nodes 12 | - pods 13 | - services 14 | - resourcequotas 15 | - replicationcontrollers 16 | - limitranges 17 | - persistentvolumeclaims 18 | - persistentvolumes 19 | - namespaces 20 | - endpoints 21 | verbs: 22 | - list 23 | - watch 24 | - apiGroups: 25 | - extensions 26 | resources: 27 | - daemonsets 28 | - deployments 29 | - replicasets 30 | verbs: 31 | - list 32 | - watch 33 | - apiGroups: 34 | - apps 35 | resources: 36 | - statefulsets 37 | - daemonsets 38 | - deployments 39 | - replicasets 40 | verbs: 41 | - list 42 | - watch 43 | - apiGroups: 44 | - batch 45 | resources: 46 | - cronjobs 47 | - jobs 48 | verbs: 49 | - list 50 | - watch 51 | - apiGroups: 52 | - autoscaling 53 | resources: 54 | - horizontalpodautoscalers 55 | verbs: 56 | - list 57 | - watch 58 | - apiGroups: 59 | - authentication.k8s.io 60 | resources: 61 | - tokenreviews 62 | verbs: 63 | - create 64 | - apiGroups: 65 | - authorization.k8s.io 66 | resources: 67 | - subjectaccessreviews 68 | verbs: 69 | - create 70 | - apiGroups: 71 | - policy 72 | resources: 73 | - poddisruptionbudgets 74 | verbs: 75 | - list 76 | - watch 77 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-configMap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | config.yaml: | 4 | resourceRules: 5 | cpu: 6 | containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>) 7 | nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) 8 | resources: 9 | overrides: 10 | node: 11 | resource: node 12 | namespace: 13 | resource: namespace 14 | pod_name: 15 | resource: pod 16 | containerLabel: container_name 17 | memory: 18 | containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>) 19 | nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>) 20 | resources: 21 | overrides: 22 | node: 23 | resource: node 24 | namespace: 25 | resource: namespace 26 | pod_name: 27 | resource: pod 28 | containerLabel: container_name 29 | window: 1m 30 | kind: ConfigMap 31 | metadata: 32 | name: adapter-config 33 | namespace: monitoring 34 | -------------------------------------------------------------------------------- /prometheus/prometheus-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | name: prometheus-deployment 6 | name: prometheus 7 | namespace: kube-prometheus 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: prometheus 13 | template: 14 | metadata: 15 | labels: 16 | app: prometheus 17 | spec: 18 | containers: 19 | - image: prom/prometheus:v2.3.2 20 | name: prometheus 21 | command: 22 | - "/bin/prometheus" 23 | args: 24 | - "--config.file=/etc/prometheus/prometheus.yml" 25 | - "--storage.tsdb.path=/prometheus" 26 | - "--storage.tsdb.retention=24h" 27 | - "--web.enable-admin-api" # 控制对admin HTTP API的访问,其中包括删除时间序列等功能 28 | - "--web.enable-lifecycle" # 支持热更新,直接执行localhost:9090/-/reload立即生效 29 | ports: 30 | - containerPort: 9090 31 | protocol: TCP 32 | volumeMounts: 33 | - mountPath: "/prometheus" 34 | name: data 35 | - mountPath: "/etc/prometheus" 36 | name: config-volume 37 | resources: 38 | requests: 39 | cpu: 100m 40 | memory: 100Mi 41 | limits: 42 | cpu: 500m 43 | memory: 2500Mi 44 | serviceAccountName: prometheus 45 | volumes: 46 | - name: data 47 | emptyDir: {} 48 | - name: config-volume 49 | configMap: 50 | name: prometheus-config 51 | -------------------------------------------------------------------------------- /common/etcd.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Etcd Server 3 | After=network.target 4 | After=network-online.target 5 | Wants=network-online.target 6 | Documentation=https://github.com/coreos 7 | 8 | [Service] 9 | Type=notify 10 | WorkingDirectory=/data/k8s/etcd/data 11 | ExecStart=/opt/k8s/bin/etcd \ 12 | --data-dir=/data/k8s/etcd/data \ 13 | --wal-dir=/data/k8s/etcd/wal \ 14 | --name=ks-node2 \ 15 | --cert-file=/etc/etcd/cert/etcd.pem \ 16 | --key-file=/etc/etcd/cert/etcd-key.pem \ 17 | --trusted-ca-file=/etc/kubernetes/cert/ca.pem \ 18 | --peer-cert-file=/etc/etcd/cert/etcd.pem \ 19 | --peer-key-file=/etc/etcd/cert/etcd-key.pem \ 20 | --peer-trusted-ca-file=/etc/kubernetes/cert/ca.pem \ 21 | --peer-client-cert-auth \ 22 | --client-cert-auth \ 23 | --listen-peer-urls=https://10.10.11.19:2380 \ 24 | --initial-advertise-peer-urls=https://10.10.11.19:2380 \ 25 | --listen-client-urls=https://10.10.11.19:2379,http://127.0.0.1:2379 \ 26 | --advertise-client-urls=https://10.10.11.19:2379 \ 27 | --initial-cluster-token=etcd-cluster-0 \ 28 | --initial-cluster=ks-master=https://10.10.11.21:2380,ks-node1=https://10.10.11.20:2380,ks-node2=https://10.10.11.19:2380 \ 29 | --initial-cluster-state=new \ 30 | --auto-compaction-mode=periodic \ 31 | --auto-compaction-retention=1 \ 32 | --max-request-bytes=33554432 \ 33 | --quota-backend-bytes=6442450944 \ 34 | --heartbeat-interval=250 \ 35 | --election-timeout=2000 36 | Restart=on-failure 37 | RestartSec=5 38 | LimitNOFILE=65536 39 | 40 | [Install] 41 | WantedBy=multi-user.target 42 | -------------------------------------------------------------------------------- /common/kube-controller-manager.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Controller Manager 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | WorkingDirectory=/data/k8s/k8s/kube-controller-manager 7 | ExecStart=/opt/k8s/bin/kube-controller-manager \ 8 | --master=https://10.10.11.21:6443 \ 9 | --bind-address=127.0.0.1 \ 10 | --kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \ 11 | --authentication-kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \ 12 | --authorization-kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \ 13 | --service-cluster-ip-range=10.254.0.0/16 \ 14 | --cluster-name=kubernetes \ 15 | --cluster-signing-cert-file=/etc/kubernetes/cert/ca.pem \ 16 | --cluster-signing-key-file=/etc/kubernetes/cert/ca-key.pem \ 17 | --experimental-cluster-signing-duration=8760h \ 18 | --root-ca-file=/etc/kubernetes/cert/ca.pem \ 19 | --service-account-private-key-file=/etc/kubernetes/cert/ca-key.pem \ 20 | --leader-elect=true \ 21 | --controllers=*,bootstrapsigner,tokencleaner \ 22 | --horizontal-pod-autoscaler-use-rest-clients=true \ 23 | --horizontal-pod-autoscaler-sync-period=10s \ 24 | --tls-cert-file=/etc/kubernetes/cert/kube-controller-manager.pem \ 25 | --tls-private-key-file=/etc/kubernetes/cert/kube-controller-manager-key.pem \ 26 | --use-service-account-credentials=true \ 27 | --kube-api-qps=1000 \ 28 | --kube-api-burst=2000 \ 29 | --logtostderr=true \ 30 | --v=4 31 | Restart=on-failure 32 | RestartSec=5 33 | 34 | [Install] 35 | WantedBy=multi-user.target 36 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus-adapter-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | name: prometheus-adapter 5 | namespace: monitoring 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | name: prometheus-adapter 11 | strategy: 12 | rollingUpdate: 13 | maxSurge: 1 14 | maxUnavailable: 0 15 | template: 16 | metadata: 17 | labels: 18 | name: prometheus-adapter 19 | spec: 20 | containers: 21 | - args: 22 | - --cert-dir=/var/run/serving-cert 23 | - --config=/etc/adapter/config.yaml 24 | - --logtostderr=true 25 | - --metrics-relist-interval=1m 26 | - --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/ 27 | - --secure-port=6443 28 | image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.4.1 29 | name: prometheus-adapter 30 | ports: 31 | - containerPort: 6443 32 | volumeMounts: 33 | - mountPath: /tmp 34 | name: tmpfs 35 | readOnly: false 36 | - mountPath: /var/run/serving-cert 37 | name: volume-serving-cert 38 | readOnly: false 39 | - mountPath: /etc/adapter 40 | name: config 41 | readOnly: false 42 | nodeSelector: 43 | beta.kubernetes.io/os: linux 44 | serviceAccountName: prometheus-adapter 45 | volumes: 46 | - emptyDir: {} 47 | name: tmpfs 48 | - emptyDir: {} 49 | name: volume-serving-cert 50 | - configMap: 51 | name: adapter-config 52 | name: config 53 | -------------------------------------------------------------------------------- /traefik/traefik-ds.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: traefik-ingress-controller 6 | namespace: default 7 | --- 8 | kind: DaemonSet 9 | apiVersion: extensions/v1beta1 10 | metadata: 11 | name: traefik-ingress-controller 12 | namespace: default 13 | labels: 14 | k8s-app: traefik-ingress-lb 15 | spec: 16 | template: 17 | metadata: 18 | labels: 19 | k8s-app: traefik-ingress-lb 20 | name: traefik-ingress-lb 21 | spec: 22 | serviceAccountName: traefik-ingress-controller 23 | terminationGracePeriodSeconds: 60 24 | hostNetwork: true 25 | restartPolicy: Always 26 | volumes: 27 | containers: 28 | - image: traefik 29 | name: traefik-ingress-lb 30 | volumeMounts: 31 | resources: 32 | limits: 33 | cpu: 200m 34 | memory: 30Mi 35 | requests: 36 | cpu: 100m 37 | memory: 20Mi 38 | ports: 39 | - name: http 40 | containerPort: 80 41 | hostPort: 80 42 | - name: admin 43 | containerPort: 8080 44 | securityContext: 45 | privileged: true 46 | args: 47 | - --api 48 | - --kubernetes 49 | - --logLevel=INFO 50 | - --configfile=/root/k8s-online/traefik/traefik.toml 51 | --- 52 | kind: Service 53 | apiVersion: v1 54 | metadata: 55 | name: traefik-ingress-service 56 | namespace: default 57 | spec: 58 | selector: 59 | k8s-app: traefik-ingress-lb 60 | ports: 61 | - protocol: TCP 62 | port: 80 63 | name: http 64 | - protocol: TCP 65 | port: 8080 66 | name: admin 67 | type: NodePort 68 | -------------------------------------------------------------------------------- /jenkins/Jenkins-slave.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:stretch 2 | 3 | ENV JAVA_HOME=/usr/local/newhope/java1.8 \ 4 | PATH=/usr/local/newhope/java1.8/bin:$PATH \ 5 | TIMEZONE=Asia/Shanghai \ 6 | LANG=zh_CN.UTF-8 7 | 8 | RUN echo "${TIMEZONE}" > /etc/timezone \ 9 | && echo "$LANG UTF-8" > /etc/locale.gen \ 10 | && apt-get update -q \ 11 | && ln -sf /usr/share/zoneinfo/${TIMEZONE} /etc/localtime \ 12 | && mkdir -p /usr/local/newhope/java1.8 \ 13 | && mkdir -p /home/jenkins/.jenkins \ 14 | && mkdir -p /home/jenkins/agent \ 15 | && mkdir -p /usr/share/jenkins \ 16 | && mkdir -p /root/.kube 17 | 18 | COPY java1.8 /usr/local/newhope/java1.8 19 | COPY kubectl /usr/local/bin/kubectl 20 | COPY jenkins-slave /usr/local/bin/jenkins-slave 21 | COPY slave.jar /usr/share/jenkins 22 | 23 | # java/字符集/DinD/svn/jnlp 24 | RUN mkdir /usr/java/jdk1.8.0_121/bin -p \ 25 | && ln -s /usr/local/newhope/java1.8 /usr/java/jdk1.8.0_121 \ 26 | && DEBIAN_FRONTEND=noninteractive apt-get install -yq curl apt-utils dialog locales apt-transport-https build-essential bzip2 ca-certificates sudo jq unzip zip gnupg2 software-properties-common \ 27 | && update-locale LANG=$LANG \ 28 | && locale-gen $LANG \ 29 | && DEBIAN_FRONTEND=noninteractive dpkg-reconfigure locales \ 30 | &&curl -fsSL https://download.docker.com/linux/$(. /etc/os-release; echo "$ID")/gpg |apt-key add - \ 31 | && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/$(. /etc/os-release; echo "$ID") $(lsb_release -cs) stable" \ 32 | && apt-get update -y \ 33 | && apt-get install -y docker-ce=17.09.1~ce-0~debian \ 34 | && sudo apt-get install -y subversion \ 35 | && groupadd -g 10000 jenkins \ 36 | && useradd -c "Jenkins user" -d $HOME -u 10000 -g 10000 -m jenkins \ 37 | && usermod -a -G docker jenkins \ 38 | && sed -i '/^root/a\jenkins ALL=(ALL:ALL) NOPASSWD:ALL' /etc/sudoers 39 | 40 | USER root 41 | 42 | WORKDIR /home/jenkins 43 | 44 | ENTRYPOINT ["jenkins-slave"] 45 | 46 | 47 | -------------------------------------------------------------------------------- /Calico/rbac-kdd.yaml: -------------------------------------------------------------------------------- 1 | # Calico Version v3.1.6 2 | # https://docs.projectcalico.org/v3.1/releases#v3.1.6 3 | kind: ClusterRole 4 | apiVersion: rbac.authorization.k8s.io/v1beta1 5 | metadata: 6 | name: calico-node 7 | rules: 8 | - apiGroups: [""] 9 | resources: 10 | - namespaces 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: [""] 16 | resources: 17 | - pods/status 18 | verbs: 19 | - update 20 | - apiGroups: [""] 21 | resources: 22 | - pods 23 | verbs: 24 | - get 25 | - list 26 | - watch 27 | - patch 28 | - apiGroups: [""] 29 | resources: 30 | - services 31 | verbs: 32 | - get 33 | - apiGroups: [""] 34 | resources: 35 | - endpoints 36 | verbs: 37 | - get 38 | - apiGroups: [""] 39 | resources: 40 | - nodes 41 | verbs: 42 | - get 43 | - list 44 | - update 45 | - watch 46 | - apiGroups: ["extensions"] 47 | resources: 48 | - networkpolicies 49 | verbs: 50 | - get 51 | - list 52 | - watch 53 | - apiGroups: ["networking.k8s.io"] 54 | resources: 55 | - networkpolicies 56 | verbs: 57 | - watch 58 | - list 59 | - apiGroups: ["crd.projectcalico.org"] 60 | resources: 61 | - globalfelixconfigs 62 | - felixconfigurations 63 | - bgppeers 64 | - globalbgpconfigs 65 | - bgpconfigurations 66 | - ippools 67 | - globalnetworkpolicies 68 | - globalnetworksets 69 | - networkpolicies 70 | - clusterinformations 71 | - hostendpoints 72 | verbs: 73 | - create 74 | - get 75 | - list 76 | - update 77 | - watch 78 | 79 | --- 80 | 81 | apiVersion: rbac.authorization.k8s.io/v1beta1 82 | kind: ClusterRoleBinding 83 | metadata: 84 | name: calico-node 85 | roleRef: 86 | apiGroup: rbac.authorization.k8s.io 87 | kind: ClusterRole 88 | name: calico-node 89 | subjects: 90 | - kind: ServiceAccount 91 | name: calico-node 92 | namespace: kube-system 93 | -------------------------------------------------------------------------------- /common/environment.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # 生成 EncryptionConfig 所需的加密 key 4 | export ENCRYPTION_KEY=$(head -c 32 /dev/urandom | base64) 5 | 6 | # 集群各机器 IP 数组 7 | export NODE_IPS=(10.10.11.21 10.10.11.20 10.10.11.19) 8 | 9 | #集群WOEKER机器 IP 数组 10 | 11 | export WORKER_IPS=(10.10.11.20 10.10.11.19) 12 | 13 | # 集群master IP 数组 14 | export MASTER_IPS=(10.10.11.21) 15 | 16 | # 集群master主机名 17 | export MASTER_NAMES=(ks-master) 18 | 19 | # 集群各 IP 对应的 主机名数组 20 | export NODE_NAMES=(ks-master ks-node1 ks-node2 ) 21 | 22 | # etcd 集群服务地址列表 23 | export ETCD_ENDPOINTS="https://10.10.11.21:2379,https://10.10.11.20:2379,https://10.10.11.19:2379" 24 | 25 | # etcd 集群间通信的 IP 和端口 26 | export ETCD_NODES="ks-master=https://10.10.11.21:2380,ks-node1=https://10.10.11.20:2380,ks-node2=https://10.10.11.19:2380" 27 | 28 | # kube-apiserver 的反向代理(kube-nginx)地址端口 29 | export KUBE_APISERVER="https://127.0.0.1:6443" 30 | 31 | # 节点间互联网络接口名称 32 | export IFACE="ens192" 33 | 34 | # etcd 数据目录 35 | export ETCD_DATA_DIR="/data/k8s/etcd/data" 36 | 37 | # etcd WAL 目录,建议是 SSD 磁盘分区,或者和 ETCD_DATA_DIR 不同的磁盘分区 38 | export ETCD_WAL_DIR="/data/k8s/etcd/wal" 39 | 40 | # k8s 各组件数据目录 41 | export K8S_DIR="/data/k8s/k8s" 42 | 43 | # docker 数据目录 44 | export DOCKER_DIR="/data/k8s/docker" 45 | 46 | ## 以下参数一般不需要修改 47 | 48 | # TLS Bootstrapping 使用的 Token,可以使用命令 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成 49 | BOOTSTRAP_TOKEN="41f7e4ba8b7be874fcff18bf5cf41a7c" 50 | 51 | # 最好使用 当前未用的网段 来定义服务网段和 Pod 网段 52 | 53 | # 服务网段,部署前路由不可达,部署后集群内路由可达(kube-proxy 保证) 54 | SERVICE_CIDR="10.254.0.0/16" 55 | 56 | # Pod 网段,建议 /16 段地址,部署前路由不可达,部署后集群内路由可达(flanneld 保证) 57 | CLUSTER_CIDR="172.30.0.0/16" 58 | 59 | # 服务端口范围 (NodePort Range) 60 | export NODE_PORT_RANGE="30000-32767" 61 | 62 | # flanneld 网络配置前缀 63 | export FLANNEL_ETCD_PREFIX="/kubernetes/network" 64 | 65 | # kubernetes 服务 IP (一般是 SERVICE_CIDR 中第一个IP) 66 | export CLUSTER_KUBERNETES_SVC_IP="10.254.0.1" 67 | 68 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配) 69 | export CLUSTER_DNS_SVC_IP="10.254.0.2" 70 | 71 | # 集群 DNS 域名(末尾不带点号) 72 | export CLUSTER_DNS_DOMAIN="cluster.local" 73 | 74 | # 将二进制目录 /opt/k8s/bin 加到 PATH 中 75 | export PATH=/opt/k8s/bin:$PATH 76 | -------------------------------------------------------------------------------- /EFK/ log-pilot + elasticsearch + kibana/log-pilot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: DaemonSet 4 | metadata: 5 | name: log-pilot 6 | namespace: kube-system 7 | labels: 8 | k8s-app: log-pilot 9 | kubernetes.io/cluster-service: "true" 10 | spec: 11 | template: 12 | metadata: 13 | labels: 14 | k8s-app: log-es 15 | kubernetes.io/cluster-service: "true" 16 | version: v1.22 17 | spec: 18 | tolerations: 19 | - key: node-role.kubernetes.io/master 20 | effect: NoSchedule 21 | serviceAccountName: admin 22 | containers: 23 | - name: log-pilot 24 | image: registry.cn-hangzhou.aliyuncs.com/acs-sample/log-pilot:0.9-filebeat 25 | resources: 26 | limits: 27 | memory: 200Mi 28 | requests: 29 | cpu: 100m 30 | memory: 200Mi 31 | env: 32 | - name: "FILEBEAT_OUTPUT" 33 | value: "elasticsearch" 34 | - name: "ELASTICSEARCH_HOST" 35 | value: "elasticsearch-api" 36 | - name: "ELASTICSEARCH_PORT" 37 | value: "9200" 38 | - name: "ELASTICSEARCH_USER" 39 | value: "elastic" 40 | - name: "ELASTICSEARCH_PASSWORD" 41 | value: "changeme" 42 | volumeMounts: 43 | - name: sock 44 | mountPath: /var/run/docker.sock 45 | - name: root 46 | mountPath: /host 47 | readOnly: true 48 | - name: varlib 49 | mountPath: /var/lib/filebeat 50 | - name: varlog 51 | mountPath: /var/log/filebeat 52 | securityContext: 53 | capabilities: 54 | add: 55 | - SYS_ADMIN 56 | terminationGracePeriodSeconds: 30 57 | volumes: 58 | - name: sock 59 | hostPath: 60 | path: /var/run/docker.sock 61 | - name: root 62 | hostPath: 63 | path: / 64 | - name: varlib 65 | hostPath: 66 | path: /var/lib/filebeat 67 | type: DirectoryOrCreate 68 | - name: varlog 69 | hostPath: 70 | path: /var/log/filebeat 71 | type: DirectoryOrCreate 72 | -------------------------------------------------------------------------------- /common/kube-apiserver.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes API Server 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | After=network.target 5 | 6 | [Service] 7 | WorkingDirectory=/data/k8s/k8s/kube-apiserver 8 | ExecStart=/opt/k8s/bin/kube-apiserver \ 9 | --enable-admission-plugins=Initializers,NamespaceLifecycle,NodeRestriction,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota \ 10 | --anonymous-auth=false \ 11 | --experimental-encryption-provider-config=/etc/kubernetes/encryption-config.yaml \ 12 | --advertise-address=10.10.11.21 \ 13 | --bind-address=10.10.11.21 \ 14 | --insecure-port=0 \ 15 | --authorization-mode=Node,RBAC \ 16 | --runtime-config=api/all \ 17 | --enable-bootstrap-token-auth \ 18 | --service-cluster-ip-range=10.254.0.0/16 \ 19 | --service-node-port-range=30000-32767 \ 20 | --tls-cert-file=/etc/kubernetes/cert/kubernetes.pem \ 21 | --tls-private-key-file=/etc/kubernetes/cert/kubernetes-key.pem \ 22 | --client-ca-file=/etc/kubernetes/cert/ca.pem \ 23 | --kubelet-certificate-authority=/etc/kubernetes/cert/ca.pem \ 24 | --kubelet-client-certificate=/etc/kubernetes/cert/kubernetes.pem \ 25 | --kubelet-client-key=/etc/kubernetes/cert/kubernetes-key.pem \ 26 | --kubelet-https=true \ 27 | --service-account-key-file=/etc/kubernetes/cert/ca.pem \ 28 | --etcd-cafile=/etc/kubernetes/cert/ca.pem \ 29 | --etcd-certfile=/etc/kubernetes/cert/kubernetes.pem \ 30 | --etcd-keyfile=/etc/kubernetes/cert/kubernetes-key.pem \ 31 | --etcd-servers=https://10.10.11.21:2379,https://10.10.11.20:2379,https://10.10.11.19:2379 \ 32 | --enable-swagger-ui=true \ 33 | --allow-privileged=true \ 34 | --max-mutating-requests-inflight=2000 \ 35 | --max-requests-inflight=4000 \ 36 | --apiserver-count=3 \ 37 | --audit-log-maxage=30 \ 38 | --audit-log-maxbackup=3 \ 39 | --audit-log-maxsize=100 \ 40 | --audit-log-path=/data/k8s/k8s/kube-apiserver/audit.log \ 41 | --event-ttl=168h \ 42 | --logtostderr=true \ 43 | --alsologtostderr=true \ 44 | --logtostderr=false \ 45 | --log-dir=/var/log/kubernetes \ 46 | --v=2 47 | Restart=on-failure 48 | RestartSec=5 49 | Type=notify 50 | LimitNOFILE=65536 51 | 52 | [Install] 53 | WantedBy=multi-user.target 54 | -------------------------------------------------------------------------------- /prometheus_rules/prometheus-k8s-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: APIServerMonitoring 12 | rules: 13 | - alert: APIServerDown 14 | annotations: 15 | detail: "APIServer Down" 16 | summary: "APIServer has disappeared from Prometheus target discovery." 17 | expr: | 18 | absent(up{job="apiserver"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: KubeSchedulerMonitoring 23 | rules: 24 | - alert: KubeSchedulerDown 25 | annotations: 26 | detail: "KubeSchedulerDown" 27 | summary: "KubeScheduler has disappeared from Prometheus target discovery." 28 | expr: | 29 | absent(up{job="kube-scheduler"} == 1) 30 | for: 1m 31 | labels: 32 | severity: 严重 33 | # - name: KubeControllerManagerMonitoring 34 | # rules: 35 | # - alert: KubeControllerManagerDown 36 | # annotations: 37 | # detail: "KubeControllerManagerDown" 38 | # summary: "KubeControllerManager has disappeared from Prometheus target discovery." 39 | # expr: | 40 | # absent(up{job="kube-controller-manager"} == 1) 41 | # for: 1m 42 | # labels: 43 | # severity: 严重 44 | - name: KubeletMonitorings 45 | rules: 46 | - alert: KubeletDown 47 | annotations: 48 | detail: "你有一台NodeName为:{{$labels.node}}, 实例IP端口:{{$labels.instance}},Kubelet Down (当前值: {{ $value }})" 49 | summary: "{{$labels.instance}}: Kubelet Down" 50 | expr: | 51 | up{endpoint="https-metrics",job="kubelet",namespace="kube-system",service="kubelet"} == 0 52 | for: 1m 53 | labels: 54 | severity: 严重 55 | - name: KubeNodeNotReady 56 | rules: 57 | - alert: KubeNodeDown 58 | annotations: 59 | detail: "{{ $labels.node }} has been unready for more than an hour." 60 | summary: "有一台Node状态是NoReady了,请管理员尽快检查!" 61 | expr: | 62 | kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 63 | for: 1h 64 | labels: 65 | severity: 严重 66 | -------------------------------------------------------------------------------- /prometheus/prometheus_rules/prometheus-k8s-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: APIServerMonitoring 12 | rules: 13 | - alert: APIServerDown 14 | annotations: 15 | detail: "APIServer Down" 16 | summary: "APIServer has disappeared from Prometheus target discovery." 17 | expr: | 18 | absent(up{job="apiserver"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: KubeSchedulerMonitoring 23 | rules: 24 | - alert: KubeSchedulerDown 25 | annotations: 26 | detail: "KubeSchedulerDown" 27 | summary: "KubeScheduler has disappeared from Prometheus target discovery." 28 | expr: | 29 | absent(up{job="kube-scheduler"} == 1) 30 | for: 1m 31 | labels: 32 | severity: 严重 33 | # - name: KubeControllerManagerMonitoring 34 | # rules: 35 | # - alert: KubeControllerManagerDown 36 | # annotations: 37 | # detail: "KubeControllerManagerDown" 38 | # summary: "KubeControllerManager has disappeared from Prometheus target discovery." 39 | # expr: | 40 | # absent(up{job="kube-controller-manager"} == 1) 41 | # for: 1m 42 | # labels: 43 | # severity: 严重 44 | - name: KubeletMonitorings 45 | rules: 46 | - alert: KubeletDown 47 | annotations: 48 | detail: "你有一台NodeName为:{{$labels.node}}, 实例IP端口:{{$labels.instance}},Kubelet Down (当前值: {{ $value }})" 49 | summary: "{{$labels.instance}}: Kubelet Down" 50 | expr: | 51 | up{endpoint="https-metrics",job="kubelet",namespace="kube-system",service="kubelet"} == 0 52 | for: 1m 53 | labels: 54 | severity: 严重 55 | - name: KubeNodeNotReady 56 | rules: 57 | - alert: KubeNodeDown 58 | annotations: 59 | detail: "{{ $labels.node }} has been unready for more than an hour." 60 | summary: "有一台Node状态是NoReady了,请管理员尽快检查!" 61 | expr: | 62 | kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 63 | for: 1h 64 | labels: 65 | severity: 严重 66 | -------------------------------------------------------------------------------- /prometheus-operator/prometheus_rules/prometheus-k8s-rules.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: k8s 6 | role: alert-rules 7 | name: prometheus-k8s-rules 8 | namespace: monitoring 9 | spec: 10 | groups: 11 | - name: APIServerMonitoring 12 | rules: 13 | - alert: APIServerDown 14 | annotations: 15 | detail: "APIServer Down" 16 | summary: "APIServer has disappeared from Prometheus target discovery." 17 | expr: | 18 | absent(up{job="apiserver"} == 1) 19 | for: 1m 20 | labels: 21 | severity: 严重 22 | - name: KubeSchedulerMonitoring 23 | rules: 24 | - alert: KubeSchedulerDown 25 | annotations: 26 | detail: "KubeSchedulerDown" 27 | summary: "KubeScheduler has disappeared from Prometheus target discovery." 28 | expr: | 29 | absent(up{job="kube-scheduler"} == 1) 30 | for: 1m 31 | labels: 32 | severity: 严重 33 | # - name: KubeControllerManagerMonitoring 34 | # rules: 35 | # - alert: KubeControllerManagerDown 36 | # annotations: 37 | # detail: "KubeControllerManagerDown" 38 | # summary: "KubeControllerManager has disappeared from Prometheus target discovery." 39 | # expr: | 40 | # absent(up{job="kube-controller-manager"} == 1) 41 | # for: 1m 42 | # labels: 43 | # severity: 严重 44 | - name: KubeletMonitorings 45 | rules: 46 | - alert: KubeletDown 47 | annotations: 48 | detail: "你有一台NodeName为:{{$labels.node}}, 实例IP端口:{{$labels.instance}},Kubelet Down (当前值: {{ $value }})" 49 | summary: "{{$labels.instance}}: Kubelet Down" 50 | expr: | 51 | up{endpoint="https-metrics",job="kubelet",namespace="kube-system",service="kubelet"} == 0 52 | for: 1m 53 | labels: 54 | severity: 严重 55 | - name: KubeNodeNotReady 56 | rules: 57 | - alert: KubeNodeDown 58 | annotations: 59 | detail: "{{ $labels.node }} has been unready for more than an hour." 60 | summary: "有一台Node状态是NoReady了,请管理员尽快检查!" 61 | expr: | 62 | kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 63 | for: 1h 64 | labels: 65 | severity: 严重 66 | -------------------------------------------------------------------------------- /jenkins/Jenkins-deploy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: Deployment 4 | metadata: 5 | name: jenkins2 6 | namespace: ops 7 | spec: 8 | template: 9 | metadata: 10 | labels: 11 | app: jenkins2 12 | spec: 13 | terminationGracePeriodSeconds: 10 14 | serviceAccount: jenkins2 15 | containers: 16 | - name: jenkins 17 | image: jenkins/jenkins:lts 18 | imagePullPolicy: IfNotPresent 19 | ports: 20 | - containerPort: 8080 21 | name: web 22 | protocol: TCP 23 | - containerPort: 50000 24 | name: agent 25 | protocol: TCP 26 | resources: 27 | limits: 28 | cpu: 1000m 29 | memory: 1Gi 30 | requests: 31 | cpu: 500m 32 | memory: 512Mi 33 | livenessProbe: 34 | httpGet: 35 | path: /login 36 | port: 8080 37 | initialDelaySeconds: 60 38 | timeoutSeconds: 5 39 | failureThreshold: 12 40 | readinessProbe: 41 | httpGet: 42 | path: /login 43 | port: 8080 44 | initialDelaySeconds: 60 45 | timeoutSeconds: 5 46 | failureThreshold: 12 47 | volumeMounts: 48 | - name: jenkinshome 49 | subPath: jenkins2 50 | mountPath: /var/jenkins_home 51 | env: 52 | - name: LIMITS_MEMORY 53 | valueFrom: 54 | resourceFieldRef: 55 | resource: limits.memory 56 | divisor: 1Mi 57 | - name: JAVA_OPTS 58 | value: -Xmx$(LIMITS_MEMORY)m -XshowSettings:vm -Dhudson.slaves.NodeProvisioner.initialDelay=0 -Dhudson.slaves.NodeProvisioner.MARGIN=50 -Dhudson.slaves.NodeProvisioner.MARGIN0=0.85 -Duser.timezone=Asia/Shanghai 59 | securityContext: 60 | fsGroup: 1000 61 | volumes: 62 | - name: jenkinshome 63 | persistentVolumeClaim: 64 | claimName: jenkinspvc 65 | 66 | --- 67 | apiVersion: v1 68 | kind: Service 69 | metadata: 70 | name: jenkins2 71 | namespace: ops 72 | labels: 73 | app: jenkins2 74 | spec: 75 | selector: 76 | app: jenkins2 77 | type: NodePort 78 | ports: 79 | - name: web 80 | port: 8080 81 | targetPort: web 82 | nodePort: 30002 83 | - name: agent 84 | port: 50000 85 | targetPort: agent -------------------------------------------------------------------------------- /ingress/README.md: -------------------------------------------------------------------------------- 1 | Table of Contents 2 | ================= 3 | 4 | 5 | 6 | 7 | - [目录](#目录) 8 | - [Ingress 组件](#ingress-组件) 9 | - [1. Ingress 简介](#1-ingress-简介) 10 | - [2. 部署 Ingress](#2-部署-ingress) 11 | - [3. 使用ingress发布项目](#3-使用ingress发布项目) 12 | - [4. 验证是否成功](#4--验证是否成功) 13 | - [5. 配置TLS ingress](#5-配置tls-ingress) 14 | - [待更新](#待更新) 15 | 16 | 17 | 18 | 19 | # 目录     20 | ## Ingress 组件 21 | 22 | ### 1. Ingress 简介 23 | 24 | > Kubernetes 提供了两种内建的云端负载均衡机制用于发布公共应用,一种是工作于传输层的Service资源,它实现的是“TCP负载均衡器”,另一种是Ingress资源,它实现的是“HTTP/HTTPS负载均衡器”。 25 | 26 | - TCP负载均衡器 27 | 无论是iptables还是ipvs模型的service资源都配置于Linux内核中的Netfilter之上进行四层调度,是一种类型更改为通用的调度器,支持调度HTTP,MySQL等应用服务。不过,也正是由于工作于传输层从而使得它无法做到类似卸载HTTPS中的SSL会话等一类操作,也不支持基于URL的请求调度机制,而且,Kubernets也不支持为此类负载均衡器配置任何类型的健康状态检查机制。 28 | 29 | - HTTP(s)负载均衡器 30 | HTTP(s) 负载均衡器是应用负载均衡机制的一种,支持根据环境做出更好的调度决策。与传输层调度相比,它提供了诸如可自定义URL映射和TLS卸载等功能,并支持多种类型的后端服务器健康状态检查机制。 31 | 32 | 33 | ### 2. 部署 Ingress 34 | 35 | ```yaml 36 | [root@ks-master ~]# wget https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml 37 | [root@ks-master ~]# kubectl apply -f mandatory.yaml 38 | [root@ks-master ~]# kubectl get pods --all-namespaces |grep ingress 39 | ingress-nginx traefik-ingress-controller-gwcjk 1/1 Running 0 40 | ingress-nginx traefik-ingress-controller-rphgz 1/1 Running 0 41 | ingress-nginx traefik-ingress-controller-sd5l7 1/1 Running 0 42 | 43 | ``` 44 | > 注释:全部ks-master 上执行 45 | 46 | 47 | ### 3. 使用ingress发布项目 48 | 49 | ```yaml 50 | apiVersion: extensions/v1beta1 51 | kind: Ingress 52 | metadata: 53 | name: grafana 54 | namespace: default 55 | spec: 56 | rules: 57 | - host: grafana.baishuchao.com 58 | http: 59 | paths: 60 | - path: / 61 | backend: 62 | serviceName: grafana 63 | servicePort: 3000 64 | ``` 65 | 66 | 67 | ### 4. 验证是否成功 68 | 69 | 通过浏览器访问 `grafana.baishuchao.com` 70 | 71 | ![](../images/grafana.png) 72 | 73 | 74 | > 说明:如果能通过域名访问到,则访问成功 75 | 76 | ### 5. 配置TLS ingress 77 | 78 | ## 待更新 79 | -------------------------------------------------------------------------------- /elasticsearch/elasticsearch-statefulset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: StatefulSet 3 | metadata: 4 | name: es-cluster 5 | namespace: logging 6 | spec: 7 | serviceName: elasticsearch 8 | replicas: 3 9 | selector: 10 | matchLabels: 11 | app: elasticsearch 12 | template: 13 | metadata: 14 | labels: 15 | app: elasticsearch 16 | spec: 17 | containers: 18 | - name: elasticsearch 19 | image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.4.3 20 | resources: 21 | limits: 22 | cpu: 1000m 23 | requests: 24 | cpu: 100m 25 | ports: 26 | - containerPort: 9200 27 | name: rest 28 | protocol: TCP 29 | - containerPort: 9300 30 | name: inter-node 31 | protocol: TCP 32 | volumeMounts: 33 | - name: data 34 | mountPath: /usr/share/elasticsearch/data 35 | env: 36 | - name: cluster.name 37 | value: k8s-logs 38 | - name: node.name 39 | valueFrom: 40 | fieldRef: 41 | fieldPath: metadata.name 42 | - name: discovery.zen.ping.unicast.hosts 43 | value: "es-cluster-0.elasticsearch,es-cluster-1.elasticsearch,es-cluster-2.elasticsearch" 44 | - name: discovery.zen.minimum_master_nodes 45 | value: "2" 46 | - name: ES_JAVA_OPTS 47 | value: "-Xms512m -Xmx512m" 48 | initContainers: 49 | - name: fix-permissions 50 | image: busybox 51 | command: ["sh", "-c", "chown -R 1000:1000 /usr/share/elasticsearch/data"] 52 | securityContext: 53 | privileged: true 54 | volumeMounts: 55 | - name: data 56 | mountPath: /usr/share/elasticsearch/data 57 | - name: increase-vm-max-map 58 | image: busybox 59 | command: ["sysctl", "-w", "vm.max_map_count=262144"] 60 | securityContext: 61 | privileged: true 62 | - name: increase-fd-ulimit 63 | image: busybox 64 | command: ["sh", "-c", "ulimit -n 65536"] 65 | securityContext: 66 | privileged: true 67 | volumeClaimTemplates: 68 | - metadata: 69 | name: data 70 | labels: 71 | app: elasticsearch 72 | spec: 73 | accessModes: [ "ReadWriteOnce" ] 74 | storageClassName: es-data-db 75 | resources: 76 | requests: 77 | storage: 50Gi 78 | 79 | -------------------------------------------------------------------------------- /kafka/kafka-cluster.yaml: -------------------------------------------------------------------------------- 1 | kind: Deployment 2 | apiVersion: extensions/v1beta1 3 | metadata: 4 | name: kafka-deployment-1 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | name: kafka-service-1 10 | template: 11 | metadata: 12 | labels: 13 | name: kafka-service-1 14 | app: kafka-service-1 15 | spec: 16 | containers: 17 | - name: kafka-1 18 | image: wurstmeister/kafka 19 | imagePullPolicy: IfNotPresent 20 | ports: 21 | - containerPort: 9092 22 | env: 23 | - name: KAFKA_ADVERTISED_PORT 24 | value: "9092" 25 | - name: KAFKA_ADVERTISED_HOST_NAME 26 | value: [kafka-service1的clusterIP] 27 | - name: KAFKA_ZOOKEEPER_CONNECT 28 | value: zoo1:2181,zoo2:2181,zoo3:2181 29 | - name: KAFKA_BROKER_ID 30 | value: "1" 31 | - name: KAFKA_CREATE_TOPICS 32 | value: mytopic:2:1 33 | --- 34 | kind: Deployment 35 | apiVersion: extensions/v1beta1 36 | metadata: 37 | name: kafka-deployment-2 38 | spec: 39 | replicas: 1 40 | selector: 41 | selector: 42 | matchLabels: 43 | name: kafka-service-2 44 | template: 45 | metadata: 46 | labels: 47 | name: kafka-service-2 48 | app: kafka-service-2 49 | spec: 50 | containers: 51 | - name: kafka-2 52 | image: wurstmeister/kafka 53 | imagePullPolicy: IfNotPresent 54 | ports: 55 | - containerPort: 9092 56 | env: 57 | - name: KAFKA_ADVERTISED_PORT 58 | value: "9092" 59 | - name: KAFKA_ADVERTISED_HOST_NAME 60 | value: [kafka-service2的clusterIP] 61 | - name: KAFKA_ZOOKEEPER_CONNECT 62 | value: zoo1:2181,zoo2:2181,zoo3:2181 63 | - name: KAFKA_BROKER_ID 64 | value: "2" 65 | --- 66 | kind: Deployment 67 | apiVersion: extensions/v1beta1 68 | metadata: 69 | name: kafka-deployment-3 70 | spec: 71 | replicas: 1 72 | selector: 73 | selector: 74 | matchLabels: 75 | name: kafka-service-3 76 | template: 77 | metadata: 78 | labels: 79 | name: kafka-service-3 80 | app: kafka-service-3 81 | spec: 82 | containers: 83 | - name: kafka-3 84 | image: wurstmeister/kafka 85 | imagePullPolicy: IfNotPresent 86 | ports: 87 | - containerPort: 9092 88 | env: 89 | - name: KAFKA_ADVERTISED_PORT 90 | value: "9092" 91 | - name: KAFKA_ADVERTISED_HOST_NAME 92 | value: [kafka-service3的clusterIP] 93 | - name: KAFKA_ZOOKEEPER_CONNECT 94 | value: zoo1:2181,zoo2:2181,zoo3:2181 95 | - name: KAFKA_BROKER_ID 96 | value: "3" -------------------------------------------------------------------------------- /StorageClass/README.md: -------------------------------------------------------------------------------- 1 | StorageClass 2 | ================= 3 | 4 | 如果我们后端存储使用NFS,那么我们就需要使用nfs-client自动配置程序(Provisioner),这个程序使用我们已经配置好的NFS服务器,来自动创建持久卷,也就是自动帮我们创建PV 5 | 6 | 7 | * 自动创建的 PV 以${namespace}-${pvcName}-${pvName}这样的命名格式创建在 NFS 服务器上的共享数据目录中 8 | 9 | * 而当这个 PV 被回收后会以archieved-${namespace}-${pvcName}-${pvName}这样的命名格式存在 NFS 服务器上。 10 | 11 | ## 创建NFS-Client 12 | 13 | 14 | ``` 15 | root@ks-master:~# git clone https://github.com/baishuchao/kubernetes.git 16 | Cloning into 'kubernetes'... 17 | remote: Enumerating objects: 137, done. 18 | remote: Counting objects: 100% (137/137), done. 19 | remote: Compressing objects: 100% (94/94), done. 20 | remote: Total 395 (delta 47), reused 128 (delta 41), pack-reused 258 21 | Receiving objects: 100% (395/395), 1.06 MiB | 192.00 KiB/s, done. 22 | Resolving deltas: 100% (140/140), done. 23 | 24 | root@ks-master:~# cd kubernetes/StorageClass/ 25 | root@ks-master:~/kubernetes/StorageClass# ll 26 | total 24 27 | drwxr-xr-x 2 root root 4096 Jul 19 11:38 ./ 28 | drwxr-xr-x 20 root root 4096 Jul 19 11:38 ../ 29 | -rw-r--r-- 1 root root 185 Jul 19 11:38 nfs-client-class.yaml 30 | -rw-r--r-- 1 root root 1060 Jul 19 11:38 nfs-client-sa.yaml 31 | -rw-r--r-- 1 root root 847 Jul 19 11:38 nfs-client.yaml 32 | -rw-r--r-- 1 root root 33 Jul 19 11:38 README.md 33 | 34 | root@ks-master:~/kubernetes/StorageClass# kubectl apply -f . 35 | 36 | ``` 37 | 38 | 39 | ## 创建完成后查看下资源状态 40 | 41 | ``` 42 | root@ks-master:~/kubernetes/StorageClass# kubectl get pods 43 | NAME READY STATUS RESTARTS AGE 44 | nfs-client-provisioner-587469c7f9-csqg6 1/1 Running 0 19m 45 | 46 | root@ks-master:~/kubernetes/StorageClass# kubectl get sc 47 | NAME PROVISIONER AGE 48 | es-data-db fuseim.pri/ifs 18m 49 | 50 | ``` 51 | 52 | ## 创建demo测试 53 | 54 | ``` 55 | root@ks-master:~/kubernetes/StorageClass# kubectl apply -f nginx-demo.yaml 56 | statefulset.apps/nfs-web created 57 | 58 | ## 我们可以看到是不是也生成了3个 PVC 对象,名称由模板名称 name 加上 Pod 的名称组合而成,这3个 PVC 对象也都是 绑定状态了,很显然我们查看 PV 也可以看到对应的3个 PV 对象 59 | 60 | root@ks-master:/data/k8s# ll 61 | total 28 62 | drwxrwxrwx 7 root root 4096 Jul 19 11:45 ./ 63 | drwxr-xr-x 6 root root 4096 Jul 19 11:15 ../ 64 | drwxrwxrwx 2 root root 4096 Jul 19 11:45 default-www-nfs-web-0-pvc-04557632-303e-4fba-b6a9-11fdcaeca200/ 65 | drwxrwxrwx 2 root root 4096 Jul 19 11:45 default-www-nfs-web-1-pvc-f5285f03-2a76-4f9c-b54c-bb32cb25666d/ 66 | drwxrwxrwx 3 kaifa kaifa 4096 Jul 19 11:25 logging-data-es-cluster-0-pvc-fb2c586b-fe32-4145-b1f6-f7e4bbef843c/ 67 | drwxrwxrwx 3 kaifa kaifa 4096 Jul 19 11:30 logging-data-es-cluster-1-pvc-d89e9db2-730a-4b69-82a3-c60cbbdbae53/ 68 | drwxrwxrwx 3 kaifa kaifa 4096 Jul 19 11:30 logging-data-es-cluster-2-pvc-55f98f3b-90e2-4504-90cb-de96e6b01eb5/ 69 | root@ks-master:/data/k8s# 70 | 71 | ``` 72 | 73 | -------------------------------------------------------------------------------- /prometheus-operator/node-exporter-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: DaemonSet 3 | metadata: 4 | labels: 5 | app: node-exporter 6 | name: node-exporter 7 | namespace: monitoring 8 | spec: 9 | selector: 10 | matchLabels: 11 | app: node-exporter 12 | template: 13 | metadata: 14 | labels: 15 | app: node-exporter 16 | spec: 17 | containers: 18 | - args: 19 | - --web.listen-address=127.0.0.1:9100 20 | - --path.procfs=/host/proc 21 | - --path.sysfs=/host/sys 22 | - --path.rootfs=/host/root 23 | - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/) 24 | - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ 25 | image: quay.io/prometheus/node-exporter:v0.17.0 26 | name: node-exporter 27 | resources: 28 | limits: 29 | cpu: 250m 30 | memory: 180Mi 31 | requests: 32 | cpu: 102m 33 | memory: 180Mi 34 | volumeMounts: 35 | - mountPath: /host/proc 36 | name: proc 37 | readOnly: false 38 | - mountPath: /host/sys 39 | name: sys 40 | readOnly: false 41 | - mountPath: /host/root 42 | mountPropagation: HostToContainer 43 | name: root 44 | readOnly: true 45 | - args: 46 | - --logtostderr 47 | - --secure-listen-address=$(IP):9100 48 | - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 49 | - --upstream=http://127.0.0.1:9100/ 50 | env: 51 | - name: IP 52 | valueFrom: 53 | fieldRef: 54 | fieldPath: status.podIP 55 | image: quay.io/coreos/kube-rbac-proxy:v0.4.1 56 | name: kube-rbac-proxy 57 | ports: 58 | - containerPort: 9100 59 | hostPort: 9100 60 | name: https 61 | resources: 62 | limits: 63 | cpu: 20m 64 | memory: 60Mi 65 | requests: 66 | cpu: 10m 67 | memory: 20Mi 68 | hostNetwork: true 69 | hostPID: true 70 | nodeSelector: 71 | beta.kubernetes.io/os: linux 72 | securityContext: 73 | runAsNonRoot: true 74 | runAsUser: 65534 75 | serviceAccountName: node-exporter 76 | tolerations: 77 | - effect: NoExecute 78 | operator: Exists 79 | - effect: NoSchedule 80 | operator: Exists 81 | volumes: 82 | - hostPath: 83 | path: /proc 84 | name: proc 85 | - hostPath: 86 | path: /sys 87 | name: sys 88 | - hostPath: 89 | path: / 90 | name: root 91 | -------------------------------------------------------------------------------- /EFK/ log-pilot + elasticsearch + kibana/README.md: -------------------------------------------------------------------------------- 1 | # log-pilot + elasticsearch + kibana kubernetes 日志系统部署 2 | 3 | ### 需求 4 | 5 | > 开发者在面对 kubernetes 分布式集群下的日志需求时,常常会感到头疼,既有容器自身特性的原因,也有现有日志采集工具的桎梏,主要包括: 6 | 7 | **容器本身特性:** 8 | - 采集目标多:容器本身的特性导致采集目标多,需要采集容器内日志、容器 stdout。对于容器内部的文件日志采集,现在并没有一个很好的工具能够去动态发现采集。针对每种数据源都有对应的采集软件,但缺乏一站式的工具。 9 | 10 | - 弹性伸缩难:kubernetes 是分布式的集群,服务、环境的弹性伸缩对于日志采集带来了很大的困难,无法像传统虚拟机环境下那样,事先配置好日志的采集路径等信息,采集的动态性以及数据完整性是非常大的挑战。 11 | 12 | **现有日志工具的一些缺陷**: 13 | 14 | - 缺乏动态配置的能力。目前的采集工具都需要事先手动配置好日志采集方式和路径等信息,因为它无法能够自动感知到容器的生命周期变化或者动态漂移,所以它无法动态地去配置。 15 | 16 | - 日志采集重复或丢失的问题。因为现在的一些采集工具基本上是通过 tail 的方式来进行日志采集的,那么这里就可能存在两个方面的问题:一个是可能导致日志丢失,比如采集工具在重启的过程中,而应用依然在写日志,那么就有可能导致这个窗口期的日志丢失;而对于这种情况一般保守的做法就是,默认往前多采集 1M 日志或 2M 的日志,那么这就又会可能引起日志采集重复的问题。 17 | 18 | - 未明确标记日志源。因为一个应用可能有很多个容器,输出的应用日志也是一样的,那么当我们将所有应用日志收集到统一日志存储后端时,在搜索日志的时候,我们就无法明确这条日志具体是哪一个节点上的哪一个应用容器产生的。 19 | 20 | ### log-pilot 介绍 21 | 22 | > og-Pilot 是一个智能容器日志采集工具,它不仅能够高效便捷地将容器日志采集输出到多种存储日志后端,同时还能够动态地发现和采集容器内部的日志文件。 23 | 24 | > 针对前面提出的日志采集难题,log-pilot 通过声明式配置实现强大的容器事件管理,可同时获取容器标准输出和内部文件日志,解决了动态伸缩问题,此外,log-pilot 具有自动发现机制,CheckPoint 及句柄保持的机制,自动日志数据打标,有效应对动态配置、日志重复和丢失以及日志源标记等问题。 25 | 26 | 目前 log-pilot 在 Github 完全开源,项目地址是 https://github.com/AliyunContainerService/log-pilot 。您可以深入了解更多实现原理。 27 | 28 | 29 | 30 | ### 部署log-pilot + elasticsearch + kibana 31 | 32 | ```shell 33 | [root@ks-master k8s]# git clone https://github.com/baishuchao/kubernetes.git 34 | [root@ks-master k8s]# cd kubernetes/EFK/\ log-pilot\ +\ elasticsearch\ +\ kibana/ 35 | [root@ks-master log-pilot + elasticsearch + kibana]# kubectl apply -f elasticsearch.yml # 安装elasticsearch集群 36 | [root@ks-master log-pilot + elasticsearch + kibana]# kubectl apply -f kibana.yml 37 | [root@ks-master log-pilot + elasticsearch + kibana]# kubectl apply -f log-pilot.yml 38 | [root@ks-master log-pilot + elasticsearch + kibana]# kubectl apply -f traefik-kibana.yaml 39 | 40 | ``` 41 | 42 | 43 | 44 | 45 | 46 | ### 案例(收集spring-cloud日志) 47 | 48 | ```yaml 49 | apiVersion: extensions/v1beta1 50 | kind: Deployment 51 | metadata: 52 | name: eureka-server 53 | spec: 54 | replicas: 1 55 | selector: 56 | matchLabels: 57 | app: eureka-server 58 | template: 59 | metadata: 60 | labels: 61 | app: eureka-server 62 | spec: 63 | containers: 64 | - name: eureka-server 65 | image: registry.cn-hangzhou.aliyuncs.com/cloud-ts/eureka-server:690 66 | volumeMounts: 67 | - name: cloud-logs 68 | mountPath: /logs/cloud 69 | imagePullPolicy: Always 70 | env: 71 | - name: aliyun_logs_pod 72 | value: "/logs/cloud/*.log" # 容器日志路径 73 | ports: 74 | - containerPort: 8761 75 | imagePullSecrets: 76 | - name: registry-secret 77 | volumes: 78 | - name: cloud-logs 79 | emptyDir: {} 80 | ``` 81 | 82 | **部署eureka-server** 83 | ``` 84 | [root@ks-master log-pilot + elasticsearch + kibana]# kubectl apply -f eureka.yaml 85 | ``` 86 | 87 | 88 | ### 见证奇迹的时刻 89 | 90 | **访问url http://kibana.baishuchao.com** 91 | 92 | ![](../../images/elk.png) 93 | 94 | 95 | -------------------------------------------------------------------------------- /prometheus-operator/kube-state-metrics-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: kube-state-metrics 6 | name: kube-state-metrics 7 | namespace: monitoring 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: kube-state-metrics 13 | template: 14 | metadata: 15 | labels: 16 | app: kube-state-metrics 17 | spec: 18 | containers: 19 | - args: 20 | - --logtostderr 21 | - --secure-listen-address=:8443 22 | - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 23 | - --upstream=http://127.0.0.1:8081/ 24 | image: quay.io/coreos/kube-rbac-proxy:v0.4.1 25 | name: kube-rbac-proxy-main 26 | ports: 27 | - containerPort: 8443 28 | name: https-main 29 | resources: 30 | limits: 31 | cpu: 20m 32 | memory: 40Mi 33 | requests: 34 | cpu: 10m 35 | memory: 20Mi 36 | - args: 37 | - --logtostderr 38 | - --secure-listen-address=:9443 39 | - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 40 | - --upstream=http://127.0.0.1:8082/ 41 | image: quay.io/coreos/kube-rbac-proxy:v0.4.1 42 | name: kube-rbac-proxy-self 43 | ports: 44 | - containerPort: 9443 45 | name: https-self 46 | resources: 47 | limits: 48 | cpu: 20m 49 | memory: 40Mi 50 | requests: 51 | cpu: 10m 52 | memory: 20Mi 53 | - args: 54 | - --host=127.0.0.1 55 | - --port=8081 56 | - --telemetry-host=127.0.0.1 57 | - --telemetry-port=8082 58 | image: quay.io/coreos/kube-state-metrics:v1.5.0 59 | name: kube-state-metrics 60 | resources: 61 | limits: 62 | cpu: 100m 63 | memory: 150Mi 64 | requests: 65 | cpu: 100m 66 | memory: 150Mi 67 | - command: 68 | - /pod_nanny 69 | - --container=kube-state-metrics 70 | - --cpu=100m 71 | - --extra-cpu=2m 72 | - --memory=150Mi 73 | - --extra-memory=30Mi 74 | - --threshold=5 75 | - --deployment=kube-state-metrics 76 | env: 77 | - name: MY_POD_NAME 78 | valueFrom: 79 | fieldRef: 80 | apiVersion: v1 81 | fieldPath: metadata.name 82 | - name: MY_POD_NAMESPACE 83 | valueFrom: 84 | fieldRef: 85 | apiVersion: v1 86 | fieldPath: metadata.namespace 87 | image: k8s.gcr.io/addon-resizer:1.8.4 88 | name: addon-resizer 89 | resources: 90 | limits: 91 | cpu: 50m 92 | memory: 30Mi 93 | requests: 94 | cpu: 10m 95 | memory: 30Mi 96 | nodeSelector: 97 | beta.kubernetes.io/os: linux 98 | securityContext: 99 | runAsNonRoot: true 100 | runAsUser: 65534 101 | serviceAccountName: kube-state-metrics 102 | -------------------------------------------------------------------------------- /EFK/ log-pilot + elasticsearch + kibana/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: elasticsearch-api 6 | namespace: kube-system 7 | labels: 8 | name: elasticsearch 9 | spec: 10 | selector: 11 | app: es 12 | ports: 13 | - name: transport 14 | port: 9200 15 | protocol: TCP 16 | --- 17 | apiVersion: v1 18 | kind: Service 19 | metadata: 20 | name: elasticsearch-discovery 21 | namespace: kube-system 22 | labels: 23 | name: elasticsearch 24 | spec: 25 | selector: 26 | app: es 27 | ports: 28 | - name: transport 29 | port: 9300 30 | protocol: TCP 31 | --- 32 | apiVersion: apps/v1beta1 33 | kind: StatefulSet 34 | metadata: 35 | name: elasticsearch 36 | namespace: kube-system 37 | labels: 38 | kubernetes.io/cluster-service: "true" 39 | spec: 40 | replicas: 3 41 | serviceName: "elasticsearch-service" 42 | selector: 43 | matchLabels: 44 | app: es 45 | template: 46 | metadata: 47 | labels: 48 | app: es 49 | spec: 50 | tolerations: 51 | - effect: NoSchedule 52 | key: node-role.kubernetes.io/master 53 | serviceAccountName: admin 54 | initContainers: 55 | - name: init-sysctl 56 | image: busybox:1.27 57 | command: 58 | - sysctl 59 | - -w 60 | - vm.max_map_count=262144 61 | securityContext: 62 | privileged: true 63 | containers: 64 | - name: elasticsearch 65 | image: registry.cn-hangzhou.aliyuncs.com/cqz/elasticsearch:5.5.1 66 | ports: 67 | - containerPort: 9200 68 | protocol: TCP 69 | - containerPort: 9300 70 | protocol: TCP 71 | securityContext: 72 | capabilities: 73 | add: 74 | - IPC_LOCK 75 | - SYS_RESOURCE 76 | resources: 77 | limits: 78 | memory: 4000Mi 79 | requests: 80 | cpu: 100m 81 | memory: 2000Mi 82 | env: 83 | - name: "http.host" 84 | value: "0.0.0.0" 85 | - name: "network.host" 86 | value: "_eth0_" 87 | - name: "cluster.name" 88 | value: "docker-cluster" 89 | - name: "bootstrap.memory_lock" 90 | value: "false" 91 | - name: "discovery.zen.ping.unicast.hosts" 92 | value: "elasticsearch-discovery" 93 | - name: "discovery.zen.ping.unicast.hosts.resolve_timeout" 94 | value: "10s" 95 | - name: "discovery.zen.ping_timeout" 96 | value: "6s" 97 | - name: "discovery.zen.minimum_master_nodes" 98 | value: "2" 99 | - name: "discovery.zen.fd.ping_interval" 100 | value: "2s" 101 | - name: "discovery.zen.no_master_block" 102 | value: "write" 103 | - name: "gateway.expected_nodes" 104 | value: "2" 105 | - name: "gateway.expected_master_nodes" 106 | value: "1" 107 | - name: "transport.tcp.connect_timeout" 108 | value: "60s" 109 | - name: "ES_JAVA_OPTS" 110 | value: "-Xms2g -Xmx2g" 111 | livenessProbe: 112 | tcpSocket: 113 | port: transport 114 | initialDelaySeconds: 20 115 | periodSeconds: 10 116 | volumeMounts: 117 | - name: es-data 118 | mountPath: /data 119 | terminationGracePeriodSeconds: 30 120 | volumes: 121 | - name: es-data 122 | hostPath: 123 | path: /es-data 124 | -------------------------------------------------------------------------------- /prometheus-operator/grafana-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta2 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: grafana 6 | name: grafana 7 | namespace: monitoring 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: grafana 13 | template: 14 | metadata: 15 | labels: 16 | app: grafana 17 | spec: 18 | containers: 19 | - image: grafana/grafana:6.0.1 20 | name: grafana 21 | ports: 22 | - containerPort: 3000 23 | name: http 24 | readinessProbe: 25 | httpGet: 26 | path: /api/health 27 | port: http 28 | resources: 29 | limits: 30 | cpu: 200m 31 | memory: 200Mi 32 | requests: 33 | cpu: 100m 34 | memory: 100Mi 35 | volumeMounts: 36 | - mountPath: /var/lib/grafana 37 | name: grafana-storage 38 | readOnly: false 39 | - mountPath: /etc/grafana/provisioning/datasources 40 | name: grafana-datasources 41 | readOnly: false 42 | - mountPath: /etc/grafana/provisioning/dashboards 43 | name: grafana-dashboards 44 | readOnly: false 45 | - mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use 46 | name: grafana-dashboard-k8s-cluster-rsrc-use 47 | readOnly: false 48 | - mountPath: /grafana-dashboard-definitions/0/k8s-node-rsrc-use 49 | name: grafana-dashboard-k8s-node-rsrc-use 50 | readOnly: false 51 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster 52 | name: grafana-dashboard-k8s-resources-cluster 53 | readOnly: false 54 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace 55 | name: grafana-dashboard-k8s-resources-namespace 56 | readOnly: false 57 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod 58 | name: grafana-dashboard-k8s-resources-pod 59 | readOnly: false 60 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload 61 | name: grafana-dashboard-k8s-resources-workload 62 | readOnly: false 63 | - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace 64 | name: grafana-dashboard-k8s-resources-workloads-namespace 65 | readOnly: false 66 | - mountPath: /grafana-dashboard-definitions/0/nodes 67 | name: grafana-dashboard-nodes 68 | readOnly: false 69 | - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage 70 | name: grafana-dashboard-persistentvolumesusage 71 | readOnly: false 72 | - mountPath: /grafana-dashboard-definitions/0/pods 73 | name: grafana-dashboard-pods 74 | readOnly: false 75 | - mountPath: /grafana-dashboard-definitions/0/statefulset 76 | name: grafana-dashboard-statefulset 77 | readOnly: false 78 | nodeSelector: 79 | beta.kubernetes.io/os: linux 80 | securityContext: 81 | runAsNonRoot: true 82 | runAsUser: 65534 83 | serviceAccountName: grafana 84 | volumes: 85 | - emptyDir: {} 86 | name: grafana-storage 87 | - name: grafana-datasources 88 | secret: 89 | secretName: grafana-datasources 90 | - configMap: 91 | name: grafana-dashboards 92 | name: grafana-dashboards 93 | - configMap: 94 | name: grafana-dashboard-k8s-cluster-rsrc-use 95 | name: grafana-dashboard-k8s-cluster-rsrc-use 96 | - configMap: 97 | name: grafana-dashboard-k8s-node-rsrc-use 98 | name: grafana-dashboard-k8s-node-rsrc-use 99 | - configMap: 100 | name: grafana-dashboard-k8s-resources-cluster 101 | name: grafana-dashboard-k8s-resources-cluster 102 | - configMap: 103 | name: grafana-dashboard-k8s-resources-namespace 104 | name: grafana-dashboard-k8s-resources-namespace 105 | - configMap: 106 | name: grafana-dashboard-k8s-resources-pod 107 | name: grafana-dashboard-k8s-resources-pod 108 | - configMap: 109 | name: grafana-dashboard-k8s-resources-workload 110 | name: grafana-dashboard-k8s-resources-workload 111 | - configMap: 112 | name: grafana-dashboard-k8s-resources-workloads-namespace 113 | name: grafana-dashboard-k8s-resources-workloads-namespace 114 | - configMap: 115 | name: grafana-dashboard-nodes 116 | name: grafana-dashboard-nodes 117 | - configMap: 118 | name: grafana-dashboard-persistentvolumesusage 119 | name: grafana-dashboard-persistentvolumesusage 120 | - configMap: 121 | name: grafana-dashboard-pods 122 | name: grafana-dashboard-pods 123 | - configMap: 124 | name: grafana-dashboard-statefulset 125 | name: grafana-dashboard-statefulset 126 | -------------------------------------------------------------------------------- /kubernets-dashboard/kubernetes-dashboard.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The Kubernetes Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # ------------------- Dashboard Secret ------------------- # 16 | 17 | apiVersion: v1 18 | kind: Secret 19 | metadata: 20 | labels: 21 | k8s-app: kubernetes-dashboard 22 | name: kubernetes-dashboard-certs 23 | namespace: kube-system 24 | type: Opaque 25 | 26 | --- 27 | # ------------------- Dashboard Service Account ------------------- # 28 | 29 | apiVersion: v1 30 | kind: ServiceAccount 31 | metadata: 32 | labels: 33 | k8s-app: kubernetes-dashboard 34 | name: kubernetes-dashboard 35 | namespace: kube-system 36 | 37 | --- 38 | # ------------------- Dashboard Role & Role Binding ------------------- # 39 | 40 | kind: Role 41 | apiVersion: rbac.authorization.k8s.io/v1 42 | metadata: 43 | name: kubernetes-dashboard-minimal 44 | namespace: kube-system 45 | rules: 46 | # Allow Dashboard to create 'kubernetes-dashboard-key-holder' secret. 47 | - apiGroups: [""] 48 | resources: ["secrets"] 49 | verbs: ["create"] 50 | # Allow Dashboard to create 'kubernetes-dashboard-settings' config map. 51 | - apiGroups: [""] 52 | resources: ["configmaps"] 53 | verbs: ["create"] 54 | # Allow Dashboard to get, update and delete Dashboard exclusive secrets. 55 | - apiGroups: [""] 56 | resources: ["secrets"] 57 | resourceNames: ["kubernetes-dashboard-key-holder", "kubernetes-dashboard-certs"] 58 | verbs: ["get", "update", "delete"] 59 | # Allow Dashboard to get and update 'kubernetes-dashboard-settings' config map. 60 | - apiGroups: [""] 61 | resources: ["configmaps"] 62 | resourceNames: ["kubernetes-dashboard-settings"] 63 | verbs: ["get", "update"] 64 | # Allow Dashboard to get metrics from heapster. 65 | - apiGroups: [""] 66 | resources: ["services"] 67 | resourceNames: ["heapster"] 68 | verbs: ["proxy"] 69 | - apiGroups: [""] 70 | resources: ["services/proxy"] 71 | resourceNames: ["heapster", "http:heapster:", "https:heapster:"] 72 | verbs: ["get"] 73 | 74 | --- 75 | apiVersion: rbac.authorization.k8s.io/v1 76 | kind: RoleBinding 77 | metadata: 78 | name: kubernetes-dashboard-minimal 79 | namespace: kube-system 80 | roleRef: 81 | apiGroup: rbac.authorization.k8s.io 82 | kind: Role 83 | name: kubernetes-dashboard-minimal 84 | subjects: 85 | - kind: ServiceAccount 86 | name: kubernetes-dashboard 87 | namespace: kube-system 88 | 89 | --- 90 | # ------------------- Dashboard Deployment ------------------- # 91 | 92 | kind: Deployment 93 | apiVersion: apps/v1 94 | metadata: 95 | labels: 96 | k8s-app: kubernetes-dashboard 97 | name: kubernetes-dashboard 98 | namespace: kube-system 99 | spec: 100 | replicas: 1 101 | revisionHistoryLimit: 10 102 | selector: 103 | matchLabels: 104 | k8s-app: kubernetes-dashboard 105 | template: 106 | metadata: 107 | labels: 108 | k8s-app: kubernetes-dashboard 109 | spec: 110 | containers: 111 | - name: kubernetes-dashboard 112 | image: registry.cn-hangzhou.aliyuncs.com/google_containers/kubernetes-dashboard-amd64:v1.10.0 113 | ports: 114 | - containerPort: 8443 115 | protocol: TCP 116 | args: 117 | - --auto-generate-certificates 118 | # Uncomment the following line to manually specify Kubernetes API server Host 119 | # If not specified, Dashboard will attempt to auto discover the API server and connect 120 | # to it. Uncomment only if the default does not work. 121 | # - --apiserver-host=http://my-address:port 122 | volumeMounts: 123 | - name: kubernetes-dashboard-certs 124 | mountPath: /certs 125 | # Create on-disk volume to store exec logs 126 | - mountPath: /tmp 127 | name: tmp-volume 128 | livenessProbe: 129 | httpGet: 130 | scheme: HTTPS 131 | path: / 132 | port: 8443 133 | initialDelaySeconds: 30 134 | timeoutSeconds: 30 135 | volumes: 136 | - name: kubernetes-dashboard-certs 137 | secret: 138 | secretName: kubernetes-dashboard-certs 139 | - name: tmp-volume 140 | emptyDir: {} 141 | serviceAccountName: kubernetes-dashboard 142 | # Comment the following tolerations if Dashboard must not be deployed on master 143 | tolerations: 144 | - key: node-role.kubernetes.io/master 145 | effect: NoSchedule 146 | 147 | --- 148 | # ------------------- Dashboard Service ------------------- # 149 | 150 | kind: Service 151 | apiVersion: v1 152 | metadata: 153 | labels: 154 | k8s-app: kubernetes-dashboard 155 | name: kubernetes-dashboard 156 | namespace: kube-system 157 | spec: 158 | type: NodePort 159 | ports: 160 | - port: 443 161 | targetPort: 8443 162 | nodePort: 30000 163 | selector: 164 | k8s-app: kubernetes-dashboard 165 | -------------------------------------------------------------------------------- /prometheus/prometheus-cm.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: prometheus-config 5 | namespace: kube-prometheus 6 | data: 7 | prometheus.yml: | 8 | global: 9 | scrape_interval: 15s 10 | evaluation_interval: 15s 11 | scrape_configs: 12 | 13 | - job_name: 'kubernetes-apiservers' 14 | kubernetes_sd_configs: 15 | - role: endpoints 16 | scheme: https 17 | tls_config: 18 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 19 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 20 | relabel_configs: 21 | - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 22 | action: keep 23 | regex: default;kubernetes;https 24 | 25 | - job_name: 'kubernetes-nodes' 26 | kubernetes_sd_configs: 27 | - role: node 28 | scheme: https 29 | tls_config: 30 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 31 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 32 | relabel_configs: 33 | - action: labelmap 34 | regex: __meta_kubernetes_node_label_(.+) 35 | - target_label: __address__ 36 | replacement: kubernetes.default.svc:443 37 | - source_labels: [__meta_kubernetes_node_name] 38 | regex: (.+) 39 | target_label: __metrics_path__ 40 | replacement: /api/v1/nodes/${1}/proxy/metrics 41 | 42 | - job_name: 'kubernetes-cadvisor' 43 | kubernetes_sd_configs: 44 | - role: node 45 | scheme: https 46 | tls_config: 47 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 48 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 49 | relabel_configs: 50 | - action: labelmap 51 | regex: __meta_kubernetes_node_label_(.+) 52 | - target_label: __address__ 53 | replacement: kubernetes.default.svc:443 54 | - source_labels: [__meta_kubernetes_node_name] 55 | regex: (.+) 56 | target_label: __metrics_path__ 57 | replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 58 | 59 | - job_name: 'kubernetes-service-business-endpoints' 60 | kubernetes_sd_configs: 61 | - role: endpoints 62 | metrics_path: /prometheus 63 | relabel_configs: 64 | - source_labels: [__meta_kubernetes_endpoints_name] 65 | action: keep 66 | regex: (container-console-service) 67 | 68 | - job_name: 'kubernetes-service-endpoints' 69 | kubernetes_sd_configs: 70 | - role: endpoints 71 | relabel_configs: 72 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 73 | action: keep 74 | regex: true 75 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 76 | action: replace 77 | target_label: __scheme__ 78 | regex: (https?) 79 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 80 | action: replace 81 | target_label: __metrics_path__ 82 | regex: (.+) 83 | - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 84 | action: replace 85 | target_label: __address__ 86 | regex: ([^:]+)(?::\d+)?;(\d+) 87 | replacement: $1:$2 88 | - action: labelmap 89 | regex: __meta_kubernetes_service_label_(.+) 90 | - source_labels: [__meta_kubernetes_namespace] 91 | action: replace 92 | target_label: kubernetes_namespace 93 | - source_labels: [__meta_kubernetes_service_name] 94 | action: replace 95 | target_label: kubernetes_name 96 | 97 | - job_name: 'kubernetes-services' 98 | kubernetes_sd_configs: 99 | - role: service 100 | metrics_path: /probe 101 | params: 102 | module: [http_2xx] 103 | relabel_configs: 104 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] 105 | action: keep 106 | regex: true 107 | - source_labels: [__address__] 108 | target_label: __param_target 109 | - target_label: __address__ 110 | replacement: blackbox-exporter.example.com:9115 111 | - source_labels: [__param_target] 112 | target_label: instance 113 | - action: labelmap 114 | regex: __meta_kubernetes_service_label_(.+) 115 | - source_labels: [__meta_kubernetes_namespace] 116 | target_label: kubernetes_namespace 117 | - source_labels: [__meta_kubernetes_service_name] 118 | target_label: kubernetes_name 119 | 120 | - job_name: 'kubernetes-ingresses' 121 | kubernetes_sd_configs: 122 | - role: ingress 123 | relabel_configs: 124 | - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] 125 | action: keep 126 | regex: true 127 | - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path] 128 | regex: (.+);(.+);(.+) 129 | replacement: ${1}://${2}${3} 130 | target_label: __param_target 131 | - target_label: __address__ 132 | replacement: blackbox-exporter.example.com:9115 133 | - source_labels: [__param_target] 134 | target_label: instance 135 | - action: labelmap 136 | regex: __meta_kubernetes_ingress_label_(.+) 137 | - source_labels: [__meta_kubernetes_namespace] 138 | target_label: kubernetes_namespace 139 | - source_labels: [__meta_kubernetes_ingress_name] 140 | target_label: kubernetes_name 141 | 142 | - job_name: 'kubernetes-pods' 143 | kubernetes_sd_configs: 144 | - role: pod 145 | relabel_configs: 146 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 147 | action: keep 148 | regex: true 149 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 150 | action: replace 151 | target_label: __metrics_path__ 152 | regex: (.+) 153 | - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 154 | action: replace 155 | regex: ([^:]+)(?::\d+)?;(\d+) 156 | replacement: $1:$2 157 | target_label: __address__ 158 | - action: labelmap 159 | regex: __meta_kubernetes_pod_label_(.+) 160 | - source_labels: [__meta_kubernetes_namespace] 161 | action: replace 162 | target_label: kubernetes_namespace 163 | - source_labels: [__meta_kubernetes_pod_name] 164 | action: replace 165 | target_label: kubernetes_pod_name 166 | -------------------------------------------------------------------------------- /ingress/mandatory.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: ingress-nginx 5 | labels: 6 | app.kubernetes.io/name: ingress-nginx 7 | app.kubernetes.io/part-of: ingress-nginx 8 | 9 | --- 10 | 11 | kind: ConfigMap 12 | apiVersion: v1 13 | metadata: 14 | name: nginx-configuration 15 | namespace: ingress-nginx 16 | labels: 17 | app.kubernetes.io/name: ingress-nginx 18 | app.kubernetes.io/part-of: ingress-nginx 19 | 20 | --- 21 | kind: ConfigMap 22 | apiVersion: v1 23 | metadata: 24 | name: tcp-services 25 | namespace: ingress-nginx 26 | labels: 27 | app.kubernetes.io/name: ingress-nginx 28 | app.kubernetes.io/part-of: ingress-nginx 29 | 30 | --- 31 | kind: ConfigMap 32 | apiVersion: v1 33 | metadata: 34 | name: udp-services 35 | namespace: ingress-nginx 36 | labels: 37 | app.kubernetes.io/name: ingress-nginx 38 | app.kubernetes.io/part-of: ingress-nginx 39 | 40 | --- 41 | apiVersion: v1 42 | kind: ServiceAccount 43 | metadata: 44 | name: nginx-ingress-serviceaccount 45 | namespace: ingress-nginx 46 | labels: 47 | app.kubernetes.io/name: ingress-nginx 48 | app.kubernetes.io/part-of: ingress-nginx 49 | 50 | --- 51 | apiVersion: rbac.authorization.k8s.io/v1beta1 52 | kind: ClusterRole 53 | metadata: 54 | name: nginx-ingress-clusterrole 55 | labels: 56 | app.kubernetes.io/name: ingress-nginx 57 | app.kubernetes.io/part-of: ingress-nginx 58 | rules: 59 | - apiGroups: 60 | - "" 61 | resources: 62 | - configmaps 63 | - endpoints 64 | - nodes 65 | - pods 66 | - secrets 67 | verbs: 68 | - list 69 | - watch 70 | - apiGroups: 71 | - "" 72 | resources: 73 | - nodes 74 | verbs: 75 | - get 76 | - apiGroups: 77 | - "" 78 | resources: 79 | - services 80 | verbs: 81 | - get 82 | - list 83 | - watch 84 | - apiGroups: 85 | - "extensions" 86 | resources: 87 | - ingresses 88 | verbs: 89 | - get 90 | - list 91 | - watch 92 | - apiGroups: 93 | - "" 94 | resources: 95 | - events 96 | verbs: 97 | - create 98 | - patch 99 | - apiGroups: 100 | - "extensions" 101 | resources: 102 | - ingresses/status 103 | verbs: 104 | - update 105 | 106 | --- 107 | apiVersion: rbac.authorization.k8s.io/v1beta1 108 | kind: Role 109 | metadata: 110 | name: nginx-ingress-role 111 | namespace: ingress-nginx 112 | labels: 113 | app.kubernetes.io/name: ingress-nginx 114 | app.kubernetes.io/part-of: ingress-nginx 115 | rules: 116 | - apiGroups: 117 | - "" 118 | resources: 119 | - configmaps 120 | - pods 121 | - secrets 122 | - namespaces 123 | verbs: 124 | - get 125 | - apiGroups: 126 | - "" 127 | resources: 128 | - configmaps 129 | resourceNames: 130 | # Defaults to "-" 131 | # Here: "-" 132 | # This has to be adapted if you change either parameter 133 | # when launching the nginx-ingress-controller. 134 | - "ingress-controller-leader-nginx" 135 | verbs: 136 | - get 137 | - update 138 | - apiGroups: 139 | - "" 140 | resources: 141 | - configmaps 142 | verbs: 143 | - create 144 | - apiGroups: 145 | - "" 146 | resources: 147 | - endpoints 148 | verbs: 149 | - get 150 | 151 | --- 152 | apiVersion: rbac.authorization.k8s.io/v1beta1 153 | kind: RoleBinding 154 | metadata: 155 | name: nginx-ingress-role-nisa-binding 156 | namespace: ingress-nginx 157 | labels: 158 | app.kubernetes.io/name: ingress-nginx 159 | app.kubernetes.io/part-of: ingress-nginx 160 | roleRef: 161 | apiGroup: rbac.authorization.k8s.io 162 | kind: Role 163 | name: nginx-ingress-role 164 | subjects: 165 | - kind: ServiceAccount 166 | name: nginx-ingress-serviceaccount 167 | namespace: ingress-nginx 168 | 169 | --- 170 | apiVersion: rbac.authorization.k8s.io/v1beta1 171 | kind: ClusterRoleBinding 172 | metadata: 173 | name: nginx-ingress-clusterrole-nisa-binding 174 | labels: 175 | app.kubernetes.io/name: ingress-nginx 176 | app.kubernetes.io/part-of: ingress-nginx 177 | roleRef: 178 | apiGroup: rbac.authorization.k8s.io 179 | kind: ClusterRole 180 | name: nginx-ingress-clusterrole 181 | subjects: 182 | - kind: ServiceAccount 183 | name: nginx-ingress-serviceaccount 184 | namespace: ingress-nginx 185 | 186 | --- 187 | 188 | apiVersion: apps/v1 189 | kind: Deployment 190 | metadata: 191 | name: nginx-ingress-controller 192 | namespace: ingress-nginx 193 | labels: 194 | app.kubernetes.io/name: ingress-nginx 195 | app.kubernetes.io/part-of: ingress-nginx 196 | spec: 197 | replicas: 1 198 | selector: 199 | matchLabels: 200 | app.kubernetes.io/name: ingress-nginx 201 | app.kubernetes.io/part-of: ingress-nginx 202 | template: 203 | metadata: 204 | labels: 205 | app.kubernetes.io/name: ingress-nginx 206 | app.kubernetes.io/part-of: ingress-nginx 207 | annotations: 208 | prometheus.io/port: "10254" 209 | prometheus.io/scrape: "true" 210 | spec: 211 | serviceAccountName: nginx-ingress-serviceaccount 212 | containers: 213 | - name: nginx-ingress-controller 214 | image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.23.0 215 | args: 216 | - /nginx-ingress-controller 217 | - --configmap=$(POD_NAMESPACE)/nginx-configuration 218 | - --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services 219 | - --udp-services-configmap=$(POD_NAMESPACE)/udp-services 220 | - --publish-service=$(POD_NAMESPACE)/ingress-nginx 221 | - --annotations-prefix=nginx.ingress.kubernetes.io 222 | securityContext: 223 | allowPrivilegeEscalation: true 224 | capabilities: 225 | drop: 226 | - ALL 227 | add: 228 | - NET_BIND_SERVICE 229 | # www-data -> 33 230 | runAsUser: 33 231 | env: 232 | - name: POD_NAME 233 | valueFrom: 234 | fieldRef: 235 | fieldPath: metadata.name 236 | - name: POD_NAMESPACE 237 | valueFrom: 238 | fieldRef: 239 | fieldPath: metadata.namespace 240 | ports: 241 | - name: http 242 | containerPort: 80 243 | - name: https 244 | containerPort: 443 245 | livenessProbe: 246 | failureThreshold: 3 247 | httpGet: 248 | path: /healthz 249 | port: 10254 250 | scheme: HTTP 251 | initialDelaySeconds: 10 252 | periodSeconds: 10 253 | successThreshold: 1 254 | timeoutSeconds: 10 255 | readinessProbe: 256 | failureThreshold: 3 257 | httpGet: 258 | path: /healthz 259 | port: 10254 260 | scheme: HTTP 261 | periodSeconds: 10 262 | successThreshold: 1 263 | timeoutSeconds: 10 264 | 265 | --- 266 | -------------------------------------------------------------------------------- /Calico/calico.yaml: -------------------------------------------------------------------------------- 1 | # Calico Version v3.1.6 2 | # https://docs.projectcalico.org/v3.1/releases#v3.1.6 3 | # This manifest includes the following component versions: 4 | # calico/node:v3.1.6 5 | # calico/cni:v3.1.6 6 | 7 | # This ConfigMap is used to configure a self-hosted Calico installation. 8 | kind: ConfigMap 9 | apiVersion: v1 10 | metadata: 11 | name: calico-config 12 | namespace: kube-system 13 | data: 14 | # To enable Typha, set this to "calico-typha" *and* set a non-zero value for Typha replicas 15 | # below. We recommend using Typha if you have more than 50 nodes. Above 100 nodes it is 16 | # essential. 17 | typha_service_name: "none" 18 | 19 | # The CNI network configuration to install on each node. 20 | cni_network_config: |- 21 | { 22 | "name": "k8s-pod-network", 23 | "cniVersion": "0.3.0", 24 | "plugins": [ 25 | { 26 | "type": "calico", 27 | "log_level": "info", 28 | "datastore_type": "kubernetes", 29 | "nodename": "__KUBERNETES_NODE_NAME__", 30 | "mtu": 1500, 31 | "ipam": { 32 | "type": "host-local", 33 | "subnet": "usePodCidr" 34 | }, 35 | "policy": { 36 | "type": "k8s" 37 | }, 38 | "kubernetes": { 39 | "kubeconfig": "__KUBECONFIG_FILEPATH__" 40 | } 41 | }, 42 | { 43 | "type": "portmap", 44 | "snat": true, 45 | "capabilities": {"portMappings": true} 46 | } 47 | ] 48 | } 49 | 50 | --- 51 | 52 | # This manifest creates a Service, which will be backed by Calico's Typha daemon. 53 | # Typha sits in between Felix and the API server, reducing Calico's load on the API server. 54 | 55 | apiVersion: v1 56 | kind: Service 57 | metadata: 58 | name: calico-typha 59 | namespace: kube-system 60 | labels: 61 | k8s-app: calico-typha 62 | spec: 63 | ports: 64 | - port: 5473 65 | protocol: TCP 66 | targetPort: calico-typha 67 | name: calico-typha 68 | selector: 69 | k8s-app: calico-typha 70 | 71 | --- 72 | 73 | # This manifest creates a Deployment of Typha to back the above service. 74 | 75 | apiVersion: apps/v1beta1 76 | kind: Deployment 77 | metadata: 78 | name: calico-typha 79 | namespace: kube-system 80 | labels: 81 | k8s-app: calico-typha 82 | spec: 83 | # Number of Typha replicas. To enable Typha, set this to a non-zero value *and* set the 84 | # typha_service_name variable in the calico-config ConfigMap above. 85 | # 86 | # We recommend using Typha if you have more than 50 nodes. Above 100 nodes it is essential 87 | # (when using the Kubernetes datastore). Use one replica for every 100-200 nodes. In 88 | # production, we recommend running at least 3 replicas to reduce the impact of rolling upgrade. 89 | replicas: 0 90 | revisionHistoryLimit: 2 91 | template: 92 | metadata: 93 | labels: 94 | k8s-app: calico-typha 95 | annotations: 96 | # This, along with the CriticalAddonsOnly toleration below, marks the pod as a critical 97 | # add-on, ensuring it gets priority scheduling and that its resources are reserved 98 | # if it ever gets evicted. 99 | scheduler.alpha.kubernetes.io/critical-pod: '' 100 | spec: 101 | hostNetwork: true 102 | tolerations: 103 | # Mark the pod as a critical add-on for rescheduling. 104 | - key: CriticalAddonsOnly 105 | operator: Exists 106 | # Since Calico can't network a pod until Typha is up, we need to run Typha itself 107 | # as a host-networked pod. 108 | serviceAccountName: calico-node 109 | containers: 110 | - image: quay.io/calico/typha:v3.1.6 111 | name: calico-typha 112 | ports: 113 | - containerPort: 5473 114 | name: calico-typha 115 | protocol: TCP 116 | env: 117 | # Enable "info" logging by default. Can be set to "debug" to increase verbosity. 118 | - name: TYPHA_LOGSEVERITYSCREEN 119 | value: "info" 120 | # Disable logging to file and syslog since those don't make sense in Kubernetes. 121 | - name: TYPHA_LOGFILEPATH 122 | value: "none" 123 | - name: TYPHA_LOGSEVERITYSYS 124 | value: "none" 125 | # Monitor the Kubernetes API to find the number of running instances and rebalance 126 | # connections. 127 | - name: TYPHA_CONNECTIONREBALANCINGMODE 128 | value: "kubernetes" 129 | - name: TYPHA_DATASTORETYPE 130 | value: "kubernetes" 131 | - name: TYPHA_HEALTHENABLED 132 | value: "true" 133 | # Uncomment these lines to enable prometheus metrics. Since Typha is host-networked, 134 | # this opens a port on the host, which may need to be secured. 135 | #- name: TYPHA_PROMETHEUSMETRICSENABLED 136 | # value: "true" 137 | #- name: TYPHA_PROMETHEUSMETRICSPORT 138 | # value: "9093" 139 | livenessProbe: 140 | httpGet: 141 | path: /liveness 142 | port: 9098 143 | periodSeconds: 30 144 | initialDelaySeconds: 30 145 | readinessProbe: 146 | httpGet: 147 | path: /readiness 148 | port: 9098 149 | periodSeconds: 10 150 | 151 | --- 152 | 153 | # This manifest installs the calico/node container, as well 154 | # as the Calico CNI plugins and network config on 155 | # each master and worker node in a Kubernetes cluster. 156 | kind: DaemonSet 157 | apiVersion: extensions/v1beta1 158 | metadata: 159 | name: calico-node 160 | namespace: kube-system 161 | labels: 162 | k8s-app: calico-node 163 | spec: 164 | selector: 165 | matchLabels: 166 | k8s-app: calico-node 167 | updateStrategy: 168 | type: RollingUpdate 169 | rollingUpdate: 170 | maxUnavailable: 1 171 | template: 172 | metadata: 173 | labels: 174 | k8s-app: calico-node 175 | annotations: 176 | # This, along with the CriticalAddonsOnly toleration below, 177 | # marks the pod as a critical add-on, ensuring it gets 178 | # priority scheduling and that its resources are reserved 179 | # if it ever gets evicted. 180 | scheduler.alpha.kubernetes.io/critical-pod: '' 181 | spec: 182 | hostNetwork: true 183 | tolerations: 184 | # Make sure calico/node gets scheduled on all nodes. 185 | - effect: NoSchedule 186 | operator: Exists 187 | # Mark the pod as a critical add-on for rescheduling. 188 | - key: CriticalAddonsOnly 189 | operator: Exists 190 | - effect: NoExecute 191 | operator: Exists 192 | serviceAccountName: calico-node 193 | # Minimize downtime during a rolling upgrade or deletion; tell Kubernetes to do a "force 194 | # deletion": https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods. 195 | terminationGracePeriodSeconds: 0 196 | containers: 197 | # Runs calico/node container on each Kubernetes node. This 198 | # container programs network policy and routes on each 199 | # host. 200 | - name: calico-node 201 | image: quay.io/calico/node:v3.1.6 202 | env: 203 | # Use Kubernetes API as the backing datastore. 204 | - name: DATASTORE_TYPE 205 | value: "kubernetes" 206 | # Enable felix info logging. 207 | - name: FELIX_LOGSEVERITYSCREEN 208 | value: "info" 209 | # Cluster type to identify the deployment type 210 | - name: CLUSTER_TYPE 211 | value: "k8s,bgp" 212 | # Disable file logging so `kubectl logs` works. 213 | - name: CALICO_DISABLE_FILE_LOGGING 214 | value: "true" 215 | # Set Felix endpoint to host default action to ACCEPT. 216 | - name: FELIX_DEFAULTENDPOINTTOHOSTACTION 217 | value: "ACCEPT" 218 | # Disable IPV6 on Kubernetes. 219 | - name: FELIX_IPV6SUPPORT 220 | value: "false" 221 | # Set MTU for tunnel device used if ipip is enabled 222 | - name: FELIX_IPINIPMTU 223 | value: "1440" 224 | # Wait for the datastore. 225 | - name: WAIT_FOR_DATASTORE 226 | value: "true" 227 | # The default IPv4 pool to create on startup if none exists. Pod IPs will be 228 | # chosen from this range. Changing this value after installation will have 229 | # no effect. This should fall within `--cluster-cidr`. 230 | - name: CALICO_IPV4POOL_CIDR 231 | value: "192.168.0.0/16" 232 | # Enable IPIP 233 | - name: CALICO_IPV4POOL_IPIP 234 | value: "Always" 235 | # Enable IP-in-IP within Felix. 236 | - name: FELIX_IPINIPENABLED 237 | value: "true" 238 | # Typha support: controlled by the ConfigMap. 239 | - name: FELIX_TYPHAK8SSERVICENAME 240 | valueFrom: 241 | configMapKeyRef: 242 | name: calico-config 243 | key: typha_service_name 244 | # Set based on the k8s node name. 245 | - name: NODENAME 246 | valueFrom: 247 | fieldRef: 248 | fieldPath: spec.nodeName 249 | # Auto-detect the BGP IP address. 250 | - name: IP 251 | value: "autodetect" 252 | - name: FELIX_HEALTHENABLED 253 | value: "true" 254 | securityContext: 255 | privileged: true 256 | resources: 257 | requests: 258 | cpu: 250m 259 | livenessProbe: 260 | httpGet: 261 | path: /liveness 262 | port: 9099 263 | periodSeconds: 10 264 | initialDelaySeconds: 10 265 | failureThreshold: 6 266 | readinessProbe: 267 | httpGet: 268 | path: /readiness 269 | port: 9099 270 | periodSeconds: 10 271 | volumeMounts: 272 | - mountPath: /lib/modules 273 | name: lib-modules 274 | readOnly: true 275 | - mountPath: /var/run/calico 276 | name: var-run-calico 277 | readOnly: false 278 | - mountPath: /var/lib/calico 279 | name: var-lib-calico 280 | readOnly: false 281 | # This container installs the Calico CNI binaries 282 | # and CNI network config file on each node. 283 | - name: install-cni 284 | image: quay.io/calico/cni:v3.1.6 285 | command: ["/install-cni.sh"] 286 | env: 287 | # Name of the CNI config file to create. 288 | - name: CNI_CONF_NAME 289 | value: "10-calico.conflist" 290 | # The CNI network config to install on each node. 291 | - name: CNI_NETWORK_CONFIG 292 | valueFrom: 293 | configMapKeyRef: 294 | name: calico-config 295 | key: cni_network_config 296 | # Set the hostname based on the k8s node name. 297 | - name: KUBERNETES_NODE_NAME 298 | valueFrom: 299 | fieldRef: 300 | fieldPath: spec.nodeName 301 | volumeMounts: 302 | - mountPath: /host/opt/cni/bin 303 | name: cni-bin-dir 304 | - mountPath: /host/etc/cni/net.d 305 | name: cni-net-dir 306 | volumes: 307 | # Used by calico/node. 308 | - name: lib-modules 309 | hostPath: 310 | path: /lib/modules 311 | - name: var-run-calico 312 | hostPath: 313 | path: /var/run/calico 314 | - name: var-lib-calico 315 | hostPath: 316 | path: /var/lib/calico 317 | # Used to install CNI. 318 | - name: cni-bin-dir 319 | hostPath: 320 | path: /opt/cni/bin 321 | - name: cni-net-dir 322 | hostPath: 323 | path: /etc/cni/net.d 324 | 325 | # Create all the CustomResourceDefinitions needed for 326 | # Calico policy and networking mode. 327 | --- 328 | 329 | apiVersion: apiextensions.k8s.io/v1beta1 330 | kind: CustomResourceDefinition 331 | metadata: 332 | name: felixconfigurations.crd.projectcalico.org 333 | spec: 334 | scope: Cluster 335 | group: crd.projectcalico.org 336 | version: v1 337 | names: 338 | kind: FelixConfiguration 339 | plural: felixconfigurations 340 | singular: felixconfiguration 341 | 342 | --- 343 | 344 | apiVersion: apiextensions.k8s.io/v1beta1 345 | kind: CustomResourceDefinition 346 | metadata: 347 | name: bgppeers.crd.projectcalico.org 348 | spec: 349 | scope: Cluster 350 | group: crd.projectcalico.org 351 | version: v1 352 | names: 353 | kind: BGPPeer 354 | plural: bgppeers 355 | singular: bgppeer 356 | 357 | --- 358 | 359 | apiVersion: apiextensions.k8s.io/v1beta1 360 | kind: CustomResourceDefinition 361 | metadata: 362 | name: bgpconfigurations.crd.projectcalico.org 363 | spec: 364 | scope: Cluster 365 | group: crd.projectcalico.org 366 | version: v1 367 | names: 368 | kind: BGPConfiguration 369 | plural: bgpconfigurations 370 | singular: bgpconfiguration 371 | 372 | --- 373 | 374 | apiVersion: apiextensions.k8s.io/v1beta1 375 | kind: CustomResourceDefinition 376 | metadata: 377 | name: ippools.crd.projectcalico.org 378 | spec: 379 | scope: Cluster 380 | group: crd.projectcalico.org 381 | version: v1 382 | names: 383 | kind: IPPool 384 | plural: ippools 385 | singular: ippool 386 | 387 | --- 388 | 389 | apiVersion: apiextensions.k8s.io/v1beta1 390 | kind: CustomResourceDefinition 391 | metadata: 392 | name: hostendpoints.crd.projectcalico.org 393 | spec: 394 | scope: Cluster 395 | group: crd.projectcalico.org 396 | version: v1 397 | names: 398 | kind: HostEndpoint 399 | plural: hostendpoints 400 | singular: hostendpoint 401 | 402 | --- 403 | 404 | apiVersion: apiextensions.k8s.io/v1beta1 405 | kind: CustomResourceDefinition 406 | metadata: 407 | name: clusterinformations.crd.projectcalico.org 408 | spec: 409 | scope: Cluster 410 | group: crd.projectcalico.org 411 | version: v1 412 | names: 413 | kind: ClusterInformation 414 | plural: clusterinformations 415 | singular: clusterinformation 416 | 417 | --- 418 | 419 | apiVersion: apiextensions.k8s.io/v1beta1 420 | kind: CustomResourceDefinition 421 | metadata: 422 | name: globalnetworkpolicies.crd.projectcalico.org 423 | spec: 424 | scope: Cluster 425 | group: crd.projectcalico.org 426 | version: v1 427 | names: 428 | kind: GlobalNetworkPolicy 429 | plural: globalnetworkpolicies 430 | singular: globalnetworkpolicy 431 | 432 | --- 433 | 434 | apiVersion: apiextensions.k8s.io/v1beta1 435 | kind: CustomResourceDefinition 436 | metadata: 437 | name: globalnetworksets.crd.projectcalico.org 438 | spec: 439 | scope: Cluster 440 | group: crd.projectcalico.org 441 | version: v1 442 | names: 443 | kind: GlobalNetworkSet 444 | plural: globalnetworksets 445 | singular: globalnetworkset 446 | 447 | --- 448 | 449 | apiVersion: apiextensions.k8s.io/v1beta1 450 | kind: CustomResourceDefinition 451 | metadata: 452 | name: networkpolicies.crd.projectcalico.org 453 | spec: 454 | scope: Namespaced 455 | group: crd.projectcalico.org 456 | version: v1 457 | names: 458 | kind: NetworkPolicy 459 | plural: networkpolicies 460 | singular: networkpolicy 461 | 462 | --- 463 | 464 | apiVersion: v1 465 | kind: ServiceAccount 466 | metadata: 467 | name: calico-node 468 | namespace: kube-system 469 | -------------------------------------------------------------------------------- /prometheus-operator/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | creationTimestamp: null 5 | name: servicemonitors.monitoring.coreos.com 6 | spec: 7 | group: monitoring.coreos.com 8 | names: 9 | kind: ServiceMonitor 10 | plural: servicemonitors 11 | scope: Namespaced 12 | validation: 13 | openAPIV3Schema: 14 | properties: 15 | apiVersion: 16 | description: 'APIVersion defines the versioned schema of this representation 17 | of an object. Servers should convert recognized schemas to the latest 18 | internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' 19 | type: string 20 | kind: 21 | description: 'Kind is a string value representing the REST resource this 22 | object represents. Servers may infer this from the endpoint the client 23 | submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' 24 | type: string 25 | spec: 26 | description: ServiceMonitorSpec contains specification parameters for a 27 | ServiceMonitor. 28 | properties: 29 | endpoints: 30 | description: A list of endpoints allowed as part of this ServiceMonitor. 31 | items: 32 | description: Endpoint defines a scrapeable endpoint serving Prometheus 33 | metrics. 34 | properties: 35 | basicAuth: 36 | description: 'BasicAuth allow an endpoint to authenticate over 37 | basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' 38 | properties: 39 | password: 40 | description: SecretKeySelector selects a key of a Secret. 41 | properties: 42 | key: 43 | description: The key of the secret to select from. Must 44 | be a valid secret key. 45 | type: string 46 | name: 47 | description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' 48 | type: string 49 | optional: 50 | description: Specify whether the Secret or it's key must 51 | be defined 52 | type: boolean 53 | required: 54 | - key 55 | username: 56 | description: SecretKeySelector selects a key of a Secret. 57 | properties: 58 | key: 59 | description: The key of the secret to select from. Must 60 | be a valid secret key. 61 | type: string 62 | name: 63 | description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' 64 | type: string 65 | optional: 66 | description: Specify whether the Secret or it's key must 67 | be defined 68 | type: boolean 69 | required: 70 | - key 71 | bearerTokenFile: 72 | description: File to read bearer token for scraping targets. 73 | type: string 74 | honorLabels: 75 | description: HonorLabels chooses the metric's labels on collisions 76 | with target labels. 77 | type: boolean 78 | interval: 79 | description: Interval at which metrics should be scraped 80 | type: string 81 | metricRelabelings: 82 | description: MetricRelabelConfigs to apply to samples before ingestion. 83 | items: 84 | description: 'RelabelConfig allows dynamic rewriting of the 85 | label set, being applied to samples before ingestion. It defines 86 | ``-section of Prometheus configuration. 87 | More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' 88 | properties: 89 | action: 90 | description: Action to perform based on regex matching. 91 | Default is 'replace' 92 | type: string 93 | modulus: 94 | description: Modulus to take of the hash of the source label 95 | values. 96 | format: int64 97 | type: integer 98 | regex: 99 | description: Regular expression against which the extracted 100 | value is matched. defailt is '(.*)' 101 | type: string 102 | replacement: 103 | description: Replacement value against which a regex replace 104 | is performed if the regular expression matches. Regex 105 | capture groups are available. Default is '$1' 106 | type: string 107 | separator: 108 | description: Separator placed between concatenated source 109 | label values. default is ';'. 110 | type: string 111 | sourceLabels: 112 | description: The source labels select values from existing 113 | labels. Their content is concatenated using the configured 114 | separator and matched against the configured regular expression 115 | for the replace, keep, and drop actions. 116 | items: 117 | type: string 118 | type: array 119 | targetLabel: 120 | description: Label to which the resulting value is written 121 | in a replace action. It is mandatory for replace actions. 122 | Regex capture groups are available. 123 | type: string 124 | type: array 125 | params: 126 | description: Optional HTTP URL parameters 127 | type: object 128 | path: 129 | description: HTTP path to scrape for metrics. 130 | type: string 131 | port: 132 | description: Name of the service port this endpoint refers to. 133 | Mutually exclusive with targetPort. 134 | type: string 135 | proxyUrl: 136 | description: ProxyURL eg http://proxyserver:2195 Directs scrapes 137 | to proxy through this endpoint. 138 | type: string 139 | relabelings: 140 | description: 'RelabelConfigs to apply to samples before ingestion. 141 | More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config' 142 | items: 143 | description: 'RelabelConfig allows dynamic rewriting of the 144 | label set, being applied to samples before ingestion. It defines 145 | ``-section of Prometheus configuration. 146 | More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' 147 | properties: 148 | action: 149 | description: Action to perform based on regex matching. 150 | Default is 'replace' 151 | type: string 152 | modulus: 153 | description: Modulus to take of the hash of the source label 154 | values. 155 | format: int64 156 | type: integer 157 | regex: 158 | description: Regular expression against which the extracted 159 | value is matched. defailt is '(.*)' 160 | type: string 161 | replacement: 162 | description: Replacement value against which a regex replace 163 | is performed if the regular expression matches. Regex 164 | capture groups are available. Default is '$1' 165 | type: string 166 | separator: 167 | description: Separator placed between concatenated source 168 | label values. default is ';'. 169 | type: string 170 | sourceLabels: 171 | description: The source labels select values from existing 172 | labels. Their content is concatenated using the configured 173 | separator and matched against the configured regular expression 174 | for the replace, keep, and drop actions. 175 | items: 176 | type: string 177 | type: array 178 | targetLabel: 179 | description: Label to which the resulting value is written 180 | in a replace action. It is mandatory for replace actions. 181 | Regex capture groups are available. 182 | type: string 183 | type: array 184 | scheme: 185 | description: HTTP scheme to use for scraping. 186 | type: string 187 | scrapeTimeout: 188 | description: Timeout after which the scrape is ended 189 | type: string 190 | targetPort: 191 | anyOf: 192 | - type: string 193 | - type: integer 194 | tlsConfig: 195 | description: TLSConfig specifies TLS configuration parameters. 196 | properties: 197 | caFile: 198 | description: The CA cert to use for the targets. 199 | type: string 200 | certFile: 201 | description: The client cert file for the targets. 202 | type: string 203 | insecureSkipVerify: 204 | description: Disable target certificate validation. 205 | type: boolean 206 | keyFile: 207 | description: The client key file for the targets. 208 | type: string 209 | serverName: 210 | description: Used to verify the hostname for the targets. 211 | type: string 212 | type: array 213 | jobLabel: 214 | description: The label to use to retrieve the job name from. 215 | type: string 216 | namespaceSelector: 217 | description: NamespaceSelector is a selector for selecting either all 218 | namespaces or a list of namespaces. 219 | properties: 220 | any: 221 | description: Boolean describing whether all namespaces are selected 222 | in contrast to a list restricting them. 223 | type: boolean 224 | matchNames: 225 | description: List of namespace names. 226 | items: 227 | type: string 228 | type: array 229 | podTargetLabels: 230 | description: PodTargetLabels transfers labels on the Kubernetes Pod 231 | onto the target. 232 | items: 233 | type: string 234 | type: array 235 | sampleLimit: 236 | description: SampleLimit defines per-scrape limit on number of scraped 237 | samples that will be accepted. 238 | format: int64 239 | type: integer 240 | selector: 241 | description: A label selector is a label query over a set of resources. 242 | The result of matchLabels and matchExpressions are ANDed. An empty 243 | label selector matches all objects. A null label selector matches 244 | no objects. 245 | properties: 246 | matchExpressions: 247 | description: matchExpressions is a list of label selector requirements. 248 | The requirements are ANDed. 249 | items: 250 | description: A label selector requirement is a selector that contains 251 | values, a key, and an operator that relates the key and values. 252 | properties: 253 | key: 254 | description: key is the label key that the selector applies 255 | to. 256 | type: string 257 | operator: 258 | description: operator represents a key's relationship to a 259 | set of values. Valid operators are In, NotIn, Exists and 260 | DoesNotExist. 261 | type: string 262 | values: 263 | description: values is an array of string values. If the operator 264 | is In or NotIn, the values array must be non-empty. If the 265 | operator is Exists or DoesNotExist, the values array must 266 | be empty. This array is replaced during a strategic merge 267 | patch. 268 | items: 269 | type: string 270 | type: array 271 | required: 272 | - key 273 | - operator 274 | type: array 275 | matchLabels: 276 | description: matchLabels is a map of {key,value} pairs. A single 277 | {key,value} in the matchLabels map is equivalent to an element 278 | of matchExpressions, whose key field is "key", the operator is 279 | "In", and the values array contains only "value". The requirements 280 | are ANDed. 281 | type: object 282 | targetLabels: 283 | description: TargetLabels transfers labels on the Kubernetes Service 284 | onto the target. 285 | items: 286 | type: string 287 | type: array 288 | required: 289 | - endpoints 290 | - selector 291 | version: v1 292 | --------------------------------------------------------------------------------