├── .gitignore ├── .goreleaser.yml ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── chart ├── .helmignore ├── Chart.yaml ├── templates │ ├── _helpers.tpl │ ├── account.yaml │ ├── daemonset.yaml │ ├── role.yaml │ └── rolebinding.yaml └── values.yaml ├── cmd └── main.go ├── go.mod ├── go.sum └── pkg ├── config └── flags.go ├── jsonpatch └── types.go └── loadwatcher ├── cpucount.go ├── event_type.go ├── evict_selection.go ├── evicter_evict.go ├── evicter_type.go ├── tainter_taint.go ├── tainter_type.go ├── watcher_run.go └── watcher_type.go /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | before: 2 | hooks: 3 | - go vet ./... 4 | - go test -v ./... 5 | builds: 6 | - main: ./cmd/main.go 7 | binary: kubernetes-loadwatcher 8 | env: 9 | - CGO_ENABLED=0 10 | - GO111MODULE=on 11 | goos: 12 | - linux 13 | goarch: 14 | - amd64 15 | checksum: 16 | name_template: 'checksums.txt' 17 | snapshot: 18 | name_template: "{{ .Tag }}-next" 19 | changelog: 20 | sort: asc 21 | filters: 22 | exclude: 23 | - '^docs:' 24 | - '^test:' 25 | dockers: 26 | - image_templates: 27 | - quay.io/mittwald/kubernetes-loadwatcher:latest 28 | - quay.io/mittwald/kubernetes-loadwatcher:stable 29 | - quay.io/mittwald/kubernetes-loadwatcher:{{ .Tag }} 30 | binaries: 31 | - kubernetes-loadwatcher 32 | goos: linux 33 | goarch: amd64 34 | goarm: '' -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - '1.14' 5 | 6 | before_script: 7 | - sudo sysctl -w net.ipv4.ip_forward=1 8 | 9 | services: 10 | - docker 11 | 12 | env: 13 | global: 14 | - GO111MODULE=on 15 | - CGO_ENABLED=0 16 | - GO_VERSION=1.14 17 | - secure: a6FJSuOoyjy4SZMvBLobprLMtKReOAX8c528jCe/HapvWkd0b8muSNDqiDNuf9S4dxkBd7Vq93LblPCs8mCVnMDx3Isx/Z1n6SeNefXmWMDaRUR72G0Dlb6gUfDEwqej9dMOAoFHFqtzGHB6TexTBjyM3y5GGo0T5Kt1AcslMt/bWcAu460+meuPRoFKd5pLjz/kCx7a2FGm63Jt0Zi8uN8vqAkJb15kcZWBNHyJJL0YXFIeBNJsfrAeBeRX20bpRWKuGJDAZiKlnUVQolHaUxrg8ZvHBGbCKEJQqRe+ZpMNo9Im7Xm00pHCPcgcGLDg5+8xtjQPmanhpY9Aun5PAD9oPXNlQ7wEKkC7SiXOyfm5PaT71yaZfi+7Qe4HwbwoZMXDF+XvgFjGoLT5hVawKW32/bs6SfuLWcl2nKRtIDjHm2K8P6uzO7YUUEidF2CsjS9Rf7fHuoAHLov82xdO+iUTEmZ3mySdrY3WO3niPcuBMg0C4E4aYRbDpP/LMNl75odwA1i/FZwFsTTp9yho9A0nn/djNq3INUumac/D8N0lPbvRyy5Xh69MVXb9YovbV4iHi8D8OZLOBoe98DOKy/zpNy0hzesJWR3ytWOBuVhseu1CiHoVOrJsyaGE9hOZz3k9g/NYHOtw3UJ01Kuhm7rJCyWclaxqmRZkHOOjQq8= 18 | script: 19 | - bash <(wget -qO- https://git.io/goreleaser) --snapshot --skip-publish --rm-dist 20 | 21 | before_deploy: 22 | - if [[ -n "${DOCKER_LOGIN_USERNAME}" ]] && [[ -n "${DOCKER_LOGIN_PASSWORD}" ]] && [[ -n "${DOCKER_LOGIN_URL}" ]]; then docker login -u "${DOCKER_LOGIN_USERNAME}" -p "${DOCKER_LOGIN_PASSWORD}" "${DOCKER_LOGIN_URL}"; fi 23 | - export FULL_IMAGE="${DOCKER_LOGIN_URL}/mittwald/kubernetes-loadwatcher" 24 | deploy: 25 | - provider: script 26 | script: curl -sL https://git.io/goreleaser | bash -s -- --snapshot --skip-publish --rm-dist && docker push "${FULL_IMAGE}:latest" 27 | on: 28 | tags: false 29 | branch: master 30 | condition: $TRAVIS_OS_NAME = linux 31 | - provider: script 32 | script: curl -sL https://git.io/goreleaser | bash -s -- --rm-dist 33 | on: 34 | tags: true 35 | condition: $TRAVIS_OS_NAME = linux 36 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.12 AS builder 2 | 3 | COPY . /work 4 | WORKDIR /work 5 | RUN useradd loadwatcher 6 | 7 | FROM scratch 8 | 9 | LABEL MAINTAINER="Martin Helmich " 10 | COPY kubernetes-loadwatcher /usr/sbin/kubernetes-loadwatcher 11 | COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ 12 | COPY --from=builder /etc/passwd /etc/ 13 | 14 | USER loadwatcher 15 | 16 | ENTRYPOINT ["/usr/sbin/kubernetes-loadwatcher", "-logtostderr"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kubernetes Load Watcher 2 | 3 | [![Build Status](https://travis-ci.com/mittwald/kubernetes-loadwatcher.svg?branch=master)](https://travis-ci.com/mittwald/kubernetes-loadwatcher) 4 | [![Docker Repository on Quay](https://quay.io/repository/mittwald/kubernetes-loadwatcher/status "Docker Repository on Quay")](https://quay.io/repository/mittwald/kubernetes-loadwatcher) 5 | [![Maintainability](https://api.codeclimate.com/v1/badges/9b21aca83864d9202fd4/maintainability)](https://codeclimate.com/github/mittwald/kubernetes-loadwatcher/maintainability) 6 | 7 | Automatically taint and evict nodes with high CPU load. 8 | 9 | ## Synopsis 10 | 11 | By default, Kubernetes will not evict Pods from a node based on CPU usage, since CPU is considered a compressible resource. However, in some cases it might be desirable to actually evict some pods from a node with high CPU load (or at least to prevent Kubernetes from scheduling even more pods on a node that is already overloaded). 12 | 13 | This project contains a small Kubernetes controller that watches each node's CPU load; when a certain threshold is exceeded, the node will be tainted (so that no additional workloads are scheduled on an already-overloaded node) and finally the controller will start to evict Pods from the node. 14 | 15 | ## Installation 16 | 17 | This repository contains a Helm chart that can be used to install the controller; it needs to be run as a DaemonSet on every node. 18 | 19 | ```console 20 | > git clone github.com/mittwald/kubernetes-loadwatcher 21 | > helm upgrade \ 22 | --install \ 23 | --namespace kube-system \ 24 | loadwatcher \ 25 | ./kubernetes-loadwatcher/chart 26 | ``` 27 | 28 | ## How it works 29 | 30 | This controller can be started with two threshold flags: `-taint-threshold` and `-evict-threshold`. The controller will continuously monitor a node's CPU load. 31 | 32 | - If the CPU load (5min average) exceeds the _taint threshold_, the node will be tainted with a `loadwatcher.mittwald.systems/load-exceeded` taint with the `PreferNoSchedule` effect. This will instruct Kubernetes to not schedule any additional workloads on this node if at all possible. 33 | - If the CPU load (both 5min and 15min average) falls back below the _taint threshold_, the taint will be removed again. 34 | - If the CPU load (15 min average) exceeds the _eviction threshold_, the controller will pick a suitable Pod running on the node and evict it. However, the following types of Pods will _not_ be evicted: 35 | 36 | - Pods with the `Guaranteed` QoS class 37 | - Pods belonging to Stateful Sets 38 | - Pods belonging to Daemon Sets 39 | - Standalone pods not managed by any kind of controller 40 | - Pods running in the `kube-system` namespace or with a critical `priorityClassName` 41 | 42 | Among the remaining pods, pods with the `BestEfford` QoS class will be preferred for eviction. 43 | 44 | After a Pod was evicted, the next Pod will be evicted after a configurable _eviction backoff_ (controllable using the `evict-backoff` argument) if the load15 is still above the _eviction threshold_. 45 | -------------------------------------------------------------------------------- /chart/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for Kubernetes 4 | name: loadwatcher 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /chart/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "chart.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "chart.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "chart.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /chart/templates/account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: {{ include "chart.fullname" . }} 5 | labels: 6 | app.kubernetes.io/name: {{ include "chart.name" . }} 7 | helm.sh/chart: {{ include "chart.chart" . }} 8 | app.kubernetes.io/instance: {{ .Release.Name }} 9 | app.kubernetes.io/managed-by: {{ .Release.Service }} -------------------------------------------------------------------------------- /chart/templates/daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: {{ include "chart.fullname" . }} 5 | labels: 6 | app.kubernetes.io/name: {{ include "chart.name" . }} 7 | helm.sh/chart: {{ include "chart.chart" . }} 8 | app.kubernetes.io/instance: {{ .Release.Name }} 9 | app.kubernetes.io/managed-by: {{ .Release.Service }} 10 | spec: 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: {{ include "chart.name" . }} 14 | app.kubernetes.io/instance: {{ .Release.Name }} 15 | template: 16 | metadata: 17 | labels: 18 | app.kubernetes.io/name: {{ include "chart.name" . }} 19 | app.kubernetes.io/instance: {{ .Release.Name }} 20 | spec: 21 | serviceAccountName: {{ include "chart.fullname" . }} 22 | containers: 23 | - name: {{ .Chart.Name }} 24 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 25 | imagePullPolicy: {{ .Values.image.pullPolicy }} 26 | env: 27 | - name: NODE_NAME 28 | valueFrom: 29 | fieldRef: 30 | fieldPath: spec.nodeName 31 | args: 32 | - -node-name=$(NODE_NAME) 33 | {{- if .Values.taintThreshold }} 34 | - -taint-threshold={{ .Values.taintThreshold }} 35 | {{- end }} 36 | {{- if .Values.evictThreshold }} 37 | - -evict-threshold={{ .Values.evictThreshold }} 38 | {{- end }} 39 | - -evict-backoff={{ .Values.evictBackoff }} 40 | - -v=8 41 | resources: 42 | {{ toYaml .Values.resources | indent 12 }} 43 | {{- with .Values.nodeSelector }} 44 | nodeSelector: 45 | {{ toYaml . | indent 8 }} 46 | {{- end }} 47 | {{- with .Values.affinity }} 48 | affinity: 49 | {{ toYaml . | indent 8 }} 50 | {{- end }} 51 | tolerations: 52 | - key: loadwatcher.mittwald.systems/load-exceeded 53 | operator: Exists 54 | effect: NoSchedule 55 | {{- with .Values.tolerations }} 56 | {{ toYaml . | indent 8 }} 57 | {{- end }} 58 | -------------------------------------------------------------------------------- /chart/templates/role.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRole 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: {{ include "chart.fullname" . }} 5 | labels: 6 | app.kubernetes.io/name: {{ include "chart.name" . }} 7 | helm.sh/chart: {{ include "chart.chart" . }} 8 | app.kubernetes.io/instance: {{ .Release.Name }} 9 | app.kubernetes.io/managed-by: {{ .Release.Service }} 10 | rules: 11 | - apiGroups: [""] # "" indicates the core API group 12 | resources: ["nodes"] 13 | verbs: ["get", "watch", "list", "update", "patch"] 14 | - apiGroups: [""] 15 | resources: ["events"] 16 | verbs: ["create", "patch", "list", "get"] 17 | - apiGroups: [""] 18 | resources: ["pods/eviction"] 19 | verbs: ["create"] 20 | - apiGroups: [""] 21 | resources: ["pods"] 22 | verbs: ["list", "get", "watch"] 23 | -------------------------------------------------------------------------------- /chart/templates/rolebinding.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRoleBinding 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: {{ include "chart.fullname" . }} 5 | labels: 6 | app.kubernetes.io/name: {{ include "chart.name" . }} 7 | helm.sh/chart: {{ include "chart.chart" . }} 8 | app.kubernetes.io/instance: {{ .Release.Name }} 9 | app.kubernetes.io/managed-by: {{ .Release.Service }} 10 | subjects: 11 | - kind: ServiceAccount 12 | name: {{ include "chart.fullname" . }} 13 | namespace: {{ .Release.Namespace }} 14 | roleRef: 15 | kind: ClusterRole 16 | name: {{ include "chart.fullname" . }} 17 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /chart/values.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | repository: quay.io/mittwald/kubernetes-loadwatcher 3 | tag: stable 4 | pullPolicy: IfNotPresent 5 | 6 | nameOverride: "" 7 | fullnameOverride: "" 8 | 9 | taintThreshold: ~ 10 | evictThreshold: ~ 11 | evictBackoff: 10m 12 | 13 | resources: {} 14 | # We usually recommend not to specify default resources and to leave this as a conscious 15 | # choice for the user. This also increases chances charts run on environments with little 16 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 17 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 18 | # limits: 19 | # cpu: 100m 20 | # memory: 128Mi 21 | # requests: 22 | # cpu: 100m 23 | # memory: 128Mi 24 | 25 | nodeSelector: {} 26 | 27 | tolerations: [] 28 | 29 | affinity: {} 30 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "github.com/mittwald/kubernetes-loadwatcher/pkg/config" 7 | "github.com/mittwald/kubernetes-loadwatcher/pkg/loadwatcher" 8 | "k8s.io/client-go/kubernetes" 9 | "k8s.io/client-go/rest" 10 | "k8s.io/client-go/tools/clientcmd" 11 | "k8s.io/klog" 12 | "os" 13 | "os/signal" 14 | "syscall" 15 | ) 16 | 17 | func main() { 18 | var f config.StartupFlags 19 | 20 | klog.InitFlags(nil) 21 | 22 | flag.StringVar(&f.KubeConfig, "kubeconfig", "", "file path to kubeconfig") 23 | flag.IntVar(&f.TaintThreshold, "taint-threshold", 0, "load threshold value (set to 0 for automatic detection)") 24 | flag.IntVar(&f.EvictThreshold, "evict-threshold", 0, "load threshold value (set to 0 for automatic detection)") 25 | flag.StringVar(&f.EvictBackoff, "evict-backoff", "10m", "time to wait between evicting Pods") 26 | flag.StringVar(&f.NodeName, "node-name", "", "current node name") 27 | flag.Parse() 28 | 29 | if f.NodeName == "" { 30 | panic("-node-name not set") 31 | } 32 | 33 | cfg, err := loadKubernetesConfig(f) 34 | if err != nil { 35 | panic(err) 36 | } 37 | 38 | c, err := kubernetes.NewForConfig(cfg) 39 | if err != nil { 40 | panic(err) 41 | } 42 | 43 | w, err := loadwatcher.NewWatcher(f.TaintThreshold) 44 | if err != nil { 45 | panic(err) 46 | } 47 | 48 | t, err := loadwatcher.NewTainter(c, f.NodeName) 49 | if err != nil { 50 | panic(err) 51 | } 52 | 53 | e, err := loadwatcher.NewEvicter(c, f.EvictThreshold, f.NodeName, f.EvictBackoff) 54 | if err != nil { 55 | panic(err) 56 | } 57 | 58 | sigChan := make(chan os.Signal, 1) 59 | signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) 60 | 61 | ctx, cancelFn := context.WithCancel(context.Background()) 62 | 63 | go func() { 64 | s := <-sigChan 65 | 66 | klog.Infof("received signal %s", s) 67 | 68 | cancelFn() 69 | }() 70 | 71 | isTainted, err := t.IsNodeTainted(ctx) 72 | if err != nil { 73 | panic(err) 74 | } 75 | 76 | w.SetAsHigh(isTainted) 77 | 78 | exc, dec, errs := w.Run(ctx) 79 | for { 80 | select { 81 | case evt, ok := <-exc: 82 | if !ok { 83 | klog.Infof("exceedance channel closed; stopping") 84 | return 85 | } 86 | 87 | if evt.ChangedToHigh() { 88 | klog.Infof("load5 exceeded threshold, load5=%f load15=%f", evt.Load5, evt.Load15) 89 | 90 | if err := t.TaintNode(ctx, evt); err != nil { 91 | klog.Errorf("error while tainting node: %s", err.Error()) 92 | } 93 | } 94 | 95 | if _, err := e.EvictPod(ctx, evt); err != nil { 96 | klog.Errorf("error while evicting pod: %s", err.Error()) 97 | } 98 | case evt, ok := <-dec: 99 | if !ok { 100 | klog.Infof("deceedance channel closed; stopping") 101 | return 102 | } 103 | 104 | klog.Infof("load15 deceeded threshold, load5=%f load15=%f", evt.Load5, evt.Load15) 105 | 106 | if err := t.UntaintNode(ctx, evt); err != nil { 107 | klog.Errorf("error while removing taint from node: %s", err.Error()) 108 | } 109 | case err, ok := <-errs: 110 | if !ok { 111 | return 112 | } 113 | 114 | if err != nil { 115 | klog.Errorf("error while polling for status updates: %s", err.Error()) 116 | } 117 | } 118 | } 119 | } 120 | 121 | func loadKubernetesConfig(f config.StartupFlags) (*rest.Config, error) { 122 | if f.KubeConfig == "" { 123 | return rest.InClusterConfig() 124 | } 125 | 126 | return clientcmd.BuildConfigFromFlags("", f.KubeConfig) 127 | } 128 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mittwald/kubernetes-loadwatcher 2 | 3 | go 1.22.0 4 | 5 | require ( 6 | github.com/shirou/gopsutil v3.21.11+incompatible 7 | k8s.io/api v0.29.1 8 | k8s.io/apimachinery v0.29.1 9 | k8s.io/client-go v0.29.1 10 | k8s.io/klog v1.0.0 11 | ) 12 | 13 | require ( 14 | github.com/davecgh/go-spew v1.1.1 // indirect 15 | github.com/emicklei/go-restful/v3 v3.11.0 // indirect 16 | github.com/go-logr/logr v1.3.0 // indirect 17 | github.com/go-ole/go-ole v1.2.6 // indirect 18 | github.com/go-openapi/jsonpointer v0.19.6 // indirect 19 | github.com/go-openapi/jsonreference v0.20.2 // indirect 20 | github.com/go-openapi/swag v0.22.3 // indirect 21 | github.com/gogo/protobuf v1.3.2 // indirect 22 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 23 | github.com/golang/protobuf v1.5.3 // indirect 24 | github.com/google/gnostic-models v0.6.8 // indirect 25 | github.com/google/gofuzz v1.2.0 // indirect 26 | github.com/google/uuid v1.3.0 // indirect 27 | github.com/imdario/mergo v0.3.7 // indirect 28 | github.com/josharian/intern v1.0.0 // indirect 29 | github.com/json-iterator/go v1.1.12 // indirect 30 | github.com/mailru/easyjson v0.7.7 // indirect 31 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 32 | github.com/modern-go/reflect2 v1.0.2 // indirect 33 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 34 | github.com/spf13/pflag v1.0.5 // indirect 35 | github.com/tklauser/go-sysconf v0.3.13 // indirect 36 | github.com/tklauser/numcpus v0.7.0 // indirect 37 | github.com/yusufpapurcu/wmi v1.2.4 // indirect 38 | golang.org/x/net v0.19.0 // indirect 39 | golang.org/x/oauth2 v0.10.0 // indirect 40 | golang.org/x/sys v0.15.0 // indirect 41 | golang.org/x/term v0.15.0 // indirect 42 | golang.org/x/text v0.14.0 // indirect 43 | golang.org/x/time v0.3.0 // indirect 44 | google.golang.org/appengine v1.6.7 // indirect 45 | google.golang.org/protobuf v1.31.0 // indirect 46 | gopkg.in/inf.v0 v0.9.1 // indirect 47 | gopkg.in/yaml.v2 v2.4.0 // indirect 48 | gopkg.in/yaml.v3 v3.0.1 // indirect 49 | k8s.io/klog/v2 v2.110.1 // indirect 50 | k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect 51 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect 52 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 53 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect 54 | sigs.k8s.io/yaml v1.3.0 // indirect 55 | ) 56 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= 6 | github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= 7 | github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= 8 | github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= 9 | github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 10 | github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= 11 | github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= 12 | github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= 13 | github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= 14 | github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= 15 | github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= 16 | github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= 17 | github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= 18 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= 19 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= 20 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 21 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 22 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= 23 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 24 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 25 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 26 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 27 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 28 | github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= 29 | github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= 30 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 31 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 32 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 33 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 34 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 35 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= 36 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 37 | github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= 38 | github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= 39 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= 40 | github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 41 | github.com/imdario/mergo v0.3.7 h1:Y+UAYTZ7gDEuOfhxKWy+dvb5dRQ6rJjFSdX2HZY1/gI= 42 | github.com/imdario/mergo v0.3.7/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= 43 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 44 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 45 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 46 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 47 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 48 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 49 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 50 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 51 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 52 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 53 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 54 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 55 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 56 | github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= 57 | github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= 58 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 59 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 60 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 61 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 62 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 63 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 64 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 65 | github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= 66 | github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= 67 | github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= 68 | github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= 69 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 70 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 71 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 72 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 73 | github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= 74 | github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= 75 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 76 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 77 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 78 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 79 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 80 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 81 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 82 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 83 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 84 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 85 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 86 | github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4= 87 | github.com/tklauser/go-sysconf v0.3.13/go.mod h1:zwleP4Q4OehZHGn4CYZDipCgg9usW5IJePewFCGVEa0= 88 | github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr4= 89 | github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY= 90 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 91 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 92 | github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= 93 | github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= 94 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 95 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 96 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 97 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 98 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 99 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 100 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 101 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 102 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 103 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 104 | golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= 105 | golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= 106 | golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8= 107 | golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI= 108 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 109 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 110 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 111 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 112 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 113 | golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 114 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 115 | golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= 116 | golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 117 | golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= 118 | golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= 119 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 120 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 121 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 122 | golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= 123 | golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 124 | golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= 125 | golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 126 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 127 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 128 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 129 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 130 | golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= 131 | golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= 132 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 133 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 134 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 135 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 136 | google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= 137 | google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 138 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 139 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 140 | google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= 141 | google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 142 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 143 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 144 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 145 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 146 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 147 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 148 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 149 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 150 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 151 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 152 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 153 | k8s.io/api v0.29.1 h1:DAjwWX/9YT7NQD4INu49ROJuZAAAP/Ijki48GUPzxqw= 154 | k8s.io/api v0.29.1/go.mod h1:7Kl10vBRUXhnQQI8YR/R327zXC8eJ7887/+Ybta+RoQ= 155 | k8s.io/apimachinery v0.29.1 h1:KY4/E6km/wLBguvCZv8cKTeOwwOBqFNjwJIdMkMbbRc= 156 | k8s.io/apimachinery v0.29.1/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= 157 | k8s.io/client-go v0.29.1 h1:19B/+2NGEwnFLzt0uB5kNJnfTsbV8w6TgQRz9l7ti7A= 158 | k8s.io/client-go v0.29.1/go.mod h1:TDG/psL9hdet0TI9mGyHJSgRkW3H9JZk2dNEUS7bRks= 159 | k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= 160 | k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= 161 | k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= 162 | k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= 163 | k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= 164 | k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= 165 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= 166 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= 167 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= 168 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= 169 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= 170 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= 171 | sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= 172 | sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= 173 | -------------------------------------------------------------------------------- /pkg/config/flags.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type StartupFlags struct { 4 | KubeConfig string 5 | TaintThreshold int 6 | EvictThreshold int 7 | EvictBackoff string 8 | NodeName string 9 | } 10 | -------------------------------------------------------------------------------- /pkg/jsonpatch/types.go: -------------------------------------------------------------------------------- 1 | package jsonpatch 2 | 3 | import "encoding/json" 4 | 5 | type PatchList []Patch 6 | 7 | type Patch struct { 8 | Op string `json:"op"` 9 | Path string `json:"path"` 10 | Value interface{} `json:"value,omitempty"` 11 | } 12 | 13 | func (p Patch) ToJSON() []byte { 14 | j, err := json.Marshal(&p) 15 | if err != nil { 16 | panic(err) 17 | } 18 | 19 | return j 20 | } 21 | 22 | func (p PatchList) ToJSON() []byte { 23 | j, err := json.Marshal(&p) 24 | if err != nil { 25 | panic(err) 26 | } 27 | 28 | return j 29 | } 30 | -------------------------------------------------------------------------------- /pkg/loadwatcher/cpucount.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import "github.com/shirou/gopsutil/cpu" 4 | 5 | func determineCPUCount() (cpuCount int32, err error) { 6 | cpus, err := cpu.Info() 7 | if err != nil { 8 | return 0, err 9 | } 10 | 11 | for i := range cpus { 12 | cpuCount += cpus[i].Cores 13 | } 14 | 15 | return 16 | } 17 | -------------------------------------------------------------------------------- /pkg/loadwatcher/event_type.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | type LoadThresholdEvent struct { 4 | LoadThreshold float64 5 | Load5 float64 6 | Load15 float64 7 | 8 | IsHigh bool 9 | WasHigh bool 10 | } 11 | 12 | func (e *LoadThresholdEvent) ChangedToHigh() bool { 13 | return e.IsHigh && !e.WasHigh 14 | } 15 | 16 | func (e *LoadThresholdEvent) ChangedToLow() bool { 17 | return !e.IsHigh && e.WasHigh 18 | } 19 | -------------------------------------------------------------------------------- /pkg/loadwatcher/evict_selection.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import ( 4 | "k8s.io/api/core/v1" 5 | "k8s.io/klog" 6 | "sort" 7 | ) 8 | 9 | type PodCandidateSet []PodCandidate 10 | 11 | func (s PodCandidateSet) Len() int { 12 | return len(s) 13 | } 14 | 15 | func (s PodCandidateSet) Less(i, j int) bool { 16 | return s[i].Score < s[j].Score 17 | } 18 | 19 | func (s PodCandidateSet) Swap(i, j int) { 20 | x := s[i] 21 | s[i] = s[j] 22 | s[j] = x 23 | } 24 | 25 | type PodCandidate struct { 26 | Pod *v1.Pod 27 | Score int 28 | } 29 | 30 | func PodCandidateSetFromPodList(l *v1.PodList) PodCandidateSet { 31 | s := make(PodCandidateSet, len(l.Items)) 32 | 33 | for i := range l.Items { 34 | s[i] = PodCandidate{ 35 | Pod: &l.Items[i], 36 | Score: 0, 37 | } 38 | } 39 | 40 | return s 41 | } 42 | 43 | func (s PodCandidateSet) scoreByQOSClass() { 44 | for i := range s { 45 | switch s[i].Pod.Status.QOSClass { 46 | case v1.PodQOSBestEffort: 47 | s[i].Score += 200 48 | case v1.PodQOSBurstable: 49 | s[i].Score += 100 50 | } 51 | } 52 | } 53 | 54 | func (s PodCandidateSet) scoreByOwnerType() { 55 | for i := range s { 56 | // do not evict Pods without owner; these will probably not be re-scheduled if evicted 57 | if len(s[i].Pod.OwnerReferences) == 0 { 58 | s[i].Score -= 1000 59 | } 60 | 61 | for j := range s[i].Pod.OwnerReferences { 62 | o := &s[i].Pod.OwnerReferences[j] 63 | 64 | switch o.Kind { 65 | case "ReplicaSet": 66 | s[i].Score += 100 67 | case "StatefulSet": 68 | s[i].Score -= 1000 69 | case "DaemonSet": 70 | s[i].Score -= 1000 71 | } 72 | } 73 | } 74 | } 75 | 76 | func (s PodCandidateSet) scoreByCriticality() { 77 | for i := range s { 78 | if s[i].Pod.Namespace == "kube-system" { 79 | s[i].Score -= 1000 80 | } 81 | 82 | switch s[i].Pod.Spec.PriorityClassName { 83 | case "system-cluster-critical": 84 | s[i].Score -= 1000 85 | case "system-node-critical": 86 | s[i].Score -= 1000 87 | } 88 | 89 | if _, ok := s[i].Pod.Annotations["scheduler.alpha.kubernetes.io/critical-pod"]; ok { 90 | s[i].Score -= 1000 91 | } 92 | } 93 | } 94 | 95 | func (s PodCandidateSet) SelectPodForEviction() *v1.Pod { 96 | s.scoreByQOSClass() 97 | s.scoreByOwnerType() 98 | s.scoreByCriticality() 99 | 100 | sort.Stable(sort.Reverse(s)) 101 | 102 | for i := range s { 103 | klog.Infof("eviction candidate: %s/%s (score of %d)", s[i].Pod.Namespace, s[i].Pod.Name, s[i].Score) 104 | } 105 | 106 | for i := range s { 107 | if s[i].Score < 0 { 108 | continue 109 | } 110 | 111 | klog.Infof("selected candidate: %s/%s (score of %d)", s[i].Pod.Namespace, s[i].Pod.Name, s[i].Score) 112 | return s[i].Pod 113 | } 114 | 115 | return nil 116 | } 117 | -------------------------------------------------------------------------------- /pkg/loadwatcher/evicter_evict.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import ( 4 | "context" 5 | "k8s.io/api/core/v1" 6 | "k8s.io/api/policy/v1beta1" 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | "k8s.io/apimachinery/pkg/fields" 9 | "k8s.io/klog" 10 | "time" 11 | ) 12 | 13 | // CanEvict determines if the evicter can now evict a Pod at this time, or if it 14 | // is still in its back-off period. 15 | func (e *Evicter) CanEvict() bool { 16 | if e.lastEviction.IsZero() { 17 | return true 18 | } 19 | 20 | return time.Now().Sub(e.lastEviction) > e.backoff 21 | } 22 | 23 | // EvictPod tries to pick a suitable Pod for eviction and evict it. 24 | func (e *Evicter) EvictPod(ctx context.Context, evt LoadThresholdEvent) (bool, error) { 25 | if evt.Load15 < e.threshold { 26 | return false, nil 27 | } 28 | 29 | if !e.CanEvict() { 30 | klog.Infof("eviction threshold exceeded; still in back-off") 31 | return false, nil 32 | } 33 | 34 | klog.Infof("searching for pod to evict") 35 | 36 | fieldSelector := fields.OneTermEqualSelector("spec.nodeName", e.nodeName) 37 | 38 | podsOnNode, err := e.client.CoreV1().Pods("").List(ctx, metav1.ListOptions{ 39 | FieldSelector: fieldSelector.String(), 40 | }) 41 | 42 | if err != nil { 43 | return false, err 44 | } 45 | 46 | candidates := PodCandidateSetFromPodList(podsOnNode) 47 | podToEvict := candidates.SelectPodForEviction() 48 | 49 | if podToEvict == nil { 50 | e.recorder.Eventf(e.nodeRef, v1.EventTypeWarning, "NoPodToEvict", "wanted to evict Pod, but no suitable candidate found") 51 | return false, nil 52 | } 53 | 54 | eviction := v1beta1.Eviction{ 55 | ObjectMeta: metav1.ObjectMeta{ 56 | Name: podToEvict.ObjectMeta.Name, 57 | Namespace: podToEvict.ObjectMeta.Namespace, 58 | }, 59 | } 60 | 61 | klog.Infof("eviction: %+v", eviction) 62 | 63 | e.lastEviction = time.Now() 64 | 65 | e.recorder.Eventf(podToEvict, v1.EventTypeWarning, "EvictHighLoad", "evicting pod due to high load on node load15=%.2f threshold=%.2f", evt.Load15, evt.LoadThreshold) 66 | e.recorder.Eventf(e.nodeRef, v1.EventTypeWarning, "EvictHighLoad", "evicting pod due to high load on node load15=%.2f threshold=%.2f", evt.Load15, evt.LoadThreshold) 67 | 68 | err = e.client.CoreV1().Pods(podToEvict.Namespace).Evict(ctx, &eviction) 69 | return true, err 70 | } 71 | -------------------------------------------------------------------------------- /pkg/loadwatcher/evicter_type.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import ( 4 | "k8s.io/api/core/v1" 5 | "k8s.io/apimachinery/pkg/types" 6 | "k8s.io/client-go/kubernetes" 7 | "k8s.io/client-go/kubernetes/scheme" 8 | typedv1 "k8s.io/client-go/kubernetes/typed/core/v1" 9 | "k8s.io/client-go/tools/record" 10 | "k8s.io/klog" 11 | "time" 12 | ) 13 | 14 | type Evicter struct { 15 | client kubernetes.Interface 16 | threshold float64 17 | nodeName string 18 | nodeRef *v1.ObjectReference 19 | recorder record.EventRecorder 20 | backoff time.Duration 21 | lastEviction time.Time 22 | } 23 | 24 | func NewEvicter(client kubernetes.Interface, threshold int, nodeName string, backoff string) (*Evicter, error) { 25 | if threshold == 0 { 26 | cpuCount, err := determineCPUCount() 27 | if err != nil { 28 | return nil, err 29 | } 30 | 31 | threshold = int(cpuCount) * 4 32 | } 33 | 34 | backoffDuration, err := time.ParseDuration(backoff) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | b := record.NewBroadcaster() 40 | b.StartLogging(klog.Infof) 41 | b.StartRecordingToSink(&typedv1.EventSinkImpl{ 42 | Interface: client.CoreV1().Events(""), 43 | }) 44 | 45 | r := b.NewRecorder(scheme.Scheme, v1.EventSource{Host: nodeName, Component: ComponentName + "/evicter"}) 46 | 47 | nodeRef := &v1.ObjectReference{ 48 | Kind: "Node", 49 | Name: nodeName, 50 | UID: types.UID(nodeName), 51 | Namespace: "", 52 | } 53 | 54 | return &Evicter{ 55 | client: client, 56 | threshold: float64(threshold), 57 | nodeName: nodeName, 58 | nodeRef: nodeRef, 59 | recorder: r, 60 | backoff: backoffDuration, 61 | }, nil 62 | } 63 | -------------------------------------------------------------------------------- /pkg/loadwatcher/tainter_taint.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/mittwald/kubernetes-loadwatcher/pkg/jsonpatch" 7 | "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/types" 10 | "k8s.io/klog" 11 | ) 12 | 13 | // IsNodeTainted tests is the current node is already tainted. This may happen 14 | // if the loadwatcher happens to restart (for whichever reason) AFTER it has 15 | // tainted the node and then terminates before it can remove the taint. 16 | func (t *Tainter) IsNodeTainted(ctx context.Context) (bool, error) { 17 | node, err := t.client.CoreV1().Nodes().Get(ctx, t.nodeName, metav1.GetOptions{}) 18 | if err != nil { 19 | return false, err 20 | } 21 | 22 | for i := range node.Spec.Taints { 23 | if node.Spec.Taints[i].Key == TaintKey { 24 | return true, nil 25 | } 26 | } 27 | 28 | return false, nil 29 | } 30 | 31 | // TaintNode taints the current node and attaches a respective Event object to 32 | // the node. 33 | func (t *Tainter) TaintNode(ctx context.Context, evt LoadThresholdEvent) error { 34 | node, err := t.client.CoreV1().Nodes().Get(ctx, t.nodeName, metav1.GetOptions{}) 35 | if err != nil { 36 | return err 37 | } 38 | 39 | nodeCopy := node.DeepCopy() 40 | 41 | if nodeCopy.Spec.Taints == nil { 42 | nodeCopy.Spec.Taints = make([]v1.Taint, 0, 1) 43 | } 44 | 45 | for i := range nodeCopy.Spec.Taints { 46 | if nodeCopy.Spec.Taints[i].Key == TaintKey { 47 | klog.Infof("wanted to taint node %s, but taint already exists", nodeCopy.Name) 48 | return nil 49 | } 50 | } 51 | 52 | nodeCopy.Spec.Taints = append(nodeCopy.Spec.Taints, v1.Taint{ 53 | Key: TaintKey, 54 | Value: "true", 55 | Effect: v1.TaintEffectPreferNoSchedule, 56 | }) 57 | 58 | _, err = t.client.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) 59 | 60 | t.recorder.Eventf(t.nodeRef, v1.EventTypeWarning, "LoadThresholdExceeded", "load5 on node was %.2f; exceeded threshold of %.2f. tainting node", evt.Load5, evt.LoadThreshold) 61 | 62 | if err != nil { 63 | t.recorder.Eventf(t.nodeRef, v1.EventTypeWarning, "NodePatchError", "could not patch node: %s", err.Error()) 64 | return err 65 | } 66 | 67 | return nil 68 | } 69 | 70 | // UntaintNode removes the taint from the node again 71 | func (t *Tainter) UntaintNode(ctx context.Context, evt LoadThresholdEvent) error { 72 | node, err := t.client.CoreV1().Nodes().Get(ctx, t.nodeName, metav1.GetOptions{}) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | taintIndex := -1 78 | 79 | for i, t := range node.Spec.Taints { 80 | if t.Key == TaintKey { 81 | taintIndex = i 82 | break 83 | } 84 | } 85 | 86 | if taintIndex == -1 { 87 | klog.Infof("wanted to remove taint from node %s, but taint was already gone", node.Name) 88 | return nil 89 | } 90 | 91 | t.recorder.Eventf(t.nodeRef, v1.EventTypeNormal, "LoadThresholdDeceeded", "load15 on node was %.2f; deceeded threshold of %.2f. untainting node", evt.Load15, evt.LoadThreshold) 92 | 93 | _, err = t.client.CoreV1().Nodes().Patch(ctx, t.nodeName, types.JSONPatchType, jsonpatch.PatchList{{ 94 | Op: "test", 95 | Path: fmt.Sprintf("/spec/taints/%d/key", taintIndex), 96 | Value: TaintKey, 97 | }, { 98 | Op: "remove", 99 | Path: fmt.Sprintf("/spec/taints/%d", taintIndex), 100 | Value: "", 101 | }}.ToJSON(), metav1.PatchOptions{}) 102 | 103 | if err != nil { 104 | t.recorder.Eventf(t.nodeRef, v1.EventTypeWarning, "NodePatchError", "could not patch node: %s", err.Error()) 105 | return err 106 | } 107 | 108 | return nil 109 | } 110 | -------------------------------------------------------------------------------- /pkg/loadwatcher/tainter_type.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import ( 4 | "k8s.io/api/core/v1" 5 | "k8s.io/apimachinery/pkg/types" 6 | "k8s.io/client-go/kubernetes" 7 | "k8s.io/client-go/kubernetes/scheme" 8 | typedv1 "k8s.io/client-go/kubernetes/typed/core/v1" 9 | "k8s.io/client-go/tools/record" 10 | "k8s.io/klog" 11 | ) 12 | 13 | const ComponentName = "loadwatcher" 14 | 15 | const TaintKey = "loadwatcher.mittwald.systems/load-exceeded" 16 | 17 | type Tainter struct { 18 | client kubernetes.Interface 19 | recorder record.EventRecorder 20 | nodeName string 21 | nodeRef *v1.ObjectReference 22 | } 23 | 24 | func NewTainter(c kubernetes.Interface, nodeName string) (*Tainter, error) { 25 | b := record.NewBroadcaster() 26 | b.StartLogging(klog.Infof) 27 | b.StartRecordingToSink(&typedv1.EventSinkImpl{ 28 | Interface: c.CoreV1().Events(""), 29 | }) 30 | 31 | r := b.NewRecorder(scheme.Scheme, v1.EventSource{Host: nodeName, Component: ComponentName + "/tainter"}) 32 | 33 | nodeRef := &v1.ObjectReference{ 34 | Kind: "Node", 35 | Name: nodeName, 36 | UID: types.UID(nodeName), 37 | Namespace: "", 38 | } 39 | 40 | return &Tainter{ 41 | client: c, 42 | recorder: r, 43 | nodeName: nodeName, 44 | nodeRef: nodeRef, 45 | }, nil 46 | } 47 | -------------------------------------------------------------------------------- /pkg/loadwatcher/watcher_run.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import ( 4 | "context" 5 | "github.com/shirou/gopsutil/load" 6 | "k8s.io/klog" 7 | "time" 8 | ) 9 | 10 | func (w *Watcher) SetAsHigh(high bool) { 11 | w.isCurrentlyHigh = high 12 | } 13 | 14 | // Run contains the main loop of the load watcher. At a configurable interval, 15 | // it will query the nodes CPU load and emit "LoadThresholdEvents" when a 16 | // "high load" condition is encountered (or that condition passes again). 17 | func (w *Watcher) Run(ctx context.Context) (<-chan LoadThresholdEvent, <-chan LoadThresholdEvent, <-chan error) { 18 | exceeded := make(chan LoadThresholdEvent) 19 | deceeded := make(chan LoadThresholdEvent) 20 | errs := make(chan error) 21 | ticker := time.Tick(w.TickerInterval) 22 | 23 | go func() { 24 | defer func() { 25 | close(exceeded) 26 | close(deceeded) 27 | close(errs) 28 | }() 29 | 30 | for { 31 | select { 32 | case <-ticker: 33 | loadStat, err := load.Avg() 34 | if err != nil { 35 | errs <- err 36 | } 37 | 38 | klog.Infof("current state: high_load=%t load5=%.2f load15=%.2f threshold=%.2f", w.isCurrentlyHigh, loadStat.Load5, loadStat.Load15, w.LoadThreshold) 39 | 40 | if loadStat.Load5 >= w.LoadThreshold { 41 | evt := LoadThresholdEvent{ 42 | Load5: loadStat.Load5, 43 | Load15: loadStat.Load15, 44 | LoadThreshold: w.LoadThreshold, 45 | WasHigh: w.isCurrentlyHigh, 46 | IsHigh: true, 47 | } 48 | w.isCurrentlyHigh = true 49 | exceeded <- evt 50 | } else if loadStat.Load5 < w.LoadThreshold && loadStat.Load15 < w.LoadThreshold && w.isCurrentlyHigh { 51 | evt := LoadThresholdEvent{ 52 | Load5: loadStat.Load5, 53 | Load15: loadStat.Load15, 54 | LoadThreshold: w.LoadThreshold, 55 | WasHigh: w.isCurrentlyHigh, 56 | IsHigh: false, 57 | } 58 | w.isCurrentlyHigh = false 59 | deceeded <- evt 60 | } 61 | case <-ctx.Done(): 62 | if err := ctx.Err(); err != nil { 63 | errs <- err 64 | } 65 | return 66 | } 67 | } 68 | }() 69 | 70 | return exceeded, deceeded, errs 71 | } 72 | -------------------------------------------------------------------------------- /pkg/loadwatcher/watcher_type.go: -------------------------------------------------------------------------------- 1 | package loadwatcher 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type Watcher struct { 8 | TickerInterval time.Duration 9 | LoadThreshold float64 10 | 11 | isCurrentlyHigh bool 12 | } 13 | 14 | func NewWatcher(loadThreshold int) (*Watcher, error) { 15 | if loadThreshold == 0 { 16 | cpuCount, err := determineCPUCount() 17 | if err != nil { 18 | return nil, err 19 | } 20 | 21 | loadThreshold = int(cpuCount) 22 | } 23 | 24 | return &Watcher{ 25 | LoadThreshold: float64(loadThreshold), 26 | TickerInterval: 15 * time.Second, 27 | }, nil 28 | } 29 | --------------------------------------------------------------------------------