├── .github └── workflows │ ├── code_verify.yaml │ └── workflows-approve.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── Makefile.def ├── OWNERS ├── README.md ├── cmd └── descheduler │ ├── app │ ├── options │ │ └── options.go │ ├── server.go │ └── version.go │ └── descheduler.go ├── docs └── img │ └── descheduler_EN.svg ├── go.mod ├── go.sum ├── hack ├── lib │ ├── golang.sh │ ├── init.sh │ ├── install.sh │ └── util.sh └── verify-gofmt.sh ├── installer ├── dockerfile │ └── descheduler │ │ └── Dockerfile └── volcano-descheduler-development.yaml └── pkg ├── apis └── componentconfig │ ├── doc.go │ ├── register.go │ ├── types.go │ ├── v1alpha1 │ ├── defaults.go │ ├── doc.go │ ├── register.go │ ├── types.go │ ├── zz_generated.conversion.go │ ├── zz_generated.deepcopy.go │ └── zz_generated.defaults.go │ └── zz_generated.deepcopy.go ├── descheduler ├── descheduler.go ├── descheduler_test.go ├── leaderelection.go ├── policyconfig.go ├── policyconfig_test.go └── setupplugins.go └── framework ├── plugins └── loadaware │ ├── deepcopy_generated.go │ ├── defaults.go │ ├── defaults_test.go │ ├── load_aware.go │ ├── metrics │ ├── metrics_client.go │ ├── metrics_client_prometheus.go │ └── metrics_client_prometheus_adapt.go │ ├── nodeutilization.go │ ├── nodeutilization_test.go │ ├── register.go │ ├── types.go │ ├── validation.go │ └── validation_test.go └── profile ├── profile.go └── profile_test.go /.github/workflows/code_verify.yaml: -------------------------------------------------------------------------------- 1 | name: Code Verify 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | pull_request: 9 | 10 | jobs: 11 | verify: 12 | runs-on: ubuntu-24.04 13 | name: Verify codes, generated files 14 | timeout-minutes: 40 15 | env: 16 | GOPATH: /home/runner/work/${{ github.repository }} 17 | steps: 18 | - name: Install Go 19 | uses: actions/setup-go@v4 20 | with: 21 | go-version: 1.22.x 22 | 23 | - name: Checkout code 24 | uses: actions/checkout@v3 25 | with: 26 | fetch-depth: 0 27 | path: ./src/github.com/${{ github.repository }} 28 | 29 | - uses: actions/cache@v2 30 | with: 31 | path: ~/go/pkg/mod 32 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 33 | 34 | - name: Run verify test 35 | run: | 36 | make verify 37 | make image 38 | sudo make unit-test 39 | working-directory: ./src/github.com/${{ github.repository }} 40 | -------------------------------------------------------------------------------- /.github/workflows/workflows-approve.yaml: -------------------------------------------------------------------------------- 1 | name: Approve Workflows 2 | 3 | on: 4 | pull_request_target: 5 | types: 6 | - labeled 7 | - synchronize 8 | branches: 9 | - main 10 | - release-** 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | approve: 17 | name: Approve workflows if contains ok-to-test label 18 | if: contains(github.event.pull_request.labels.*.name, 'ok-to-test') 19 | runs-on: ubuntu-latest 20 | permissions: 21 | actions: write 22 | steps: 23 | - name: Execute workflows 24 | uses: actions/github-script@v7 25 | continue-on-error: true 26 | with: 27 | github-token: ${{ secrets.GITHUB_TOKEN }} 28 | script: | 29 | const result = await github.rest.actions.listWorkflowRunsForRepo({ 30 | owner: context.repo.owner, 31 | repo: context.repo.repo, 32 | event: "pull_request", 33 | status: "action_required", 34 | head_sha: context.payload.pull_request.head.sha, 35 | per_page: 100 36 | }); 37 | 38 | for (var run of result.data.workflow_runs) { 39 | await github.rest.actions.approveWorkflowRun({ 40 | owner: context.repo.owner, 41 | repo: context.repo.repo, 42 | run_id: run.id 43 | }); 44 | } 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | go.work.sum 23 | 24 | # env file 25 | .env 26 | 27 | # IDE 28 | .idea 29 | .vscode 30 | 31 | # output 32 | _output 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Volcano Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | BIN_DIR=_output/bin 16 | RELEASE_DIR=_output/release 17 | REPO_PATH=volcano.sh/descheduler 18 | IMAGE_PREFIX=volcanosh 19 | CC ?= "gcc" 20 | SUPPORT_PLUGINS ?= "no" 21 | CRD_VERSION ?= v1 22 | BUILDX_OUTPUT_TYPE ?= "docker" 23 | 24 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 25 | ifeq (,$(shell go env GOBIN)) 26 | GOBIN=$(shell go env GOPATH)/bin 27 | else 28 | GOBIN=$(shell go env GOBIN) 29 | endif 30 | 31 | OS=$(shell uname -s | tr '[:upper:]' '[:lower:]') 32 | 33 | # Get OS architecture 34 | OSARCH=$(shell uname -m) 35 | ifeq ($(OSARCH),x86_64) 36 | GOARCH?=amd64 37 | else ifeq ($(OSARCH),x64) 38 | GOARCH?=amd64 39 | else ifeq ($(OSARCH),aarch64) 40 | GOARCH?=arm64 41 | else ifeq ($(OSARCH),aarch64_be) 42 | GOARCH?=arm64 43 | else ifeq ($(OSARCH),armv8b) 44 | GOARCH?=arm64 45 | else ifeq ($(OSARCH),armv8l) 46 | GOARCH?=arm64 47 | else ifeq ($(OSARCH),i386) 48 | GOARCH?=x86 49 | else ifeq ($(OSARCH),i686) 50 | GOARCH?=x86 51 | else ifeq ($(OSARCH),arm) 52 | GOARCH?=arm 53 | else 54 | GOARCH?=$(OSARCH) 55 | endif 56 | 57 | # Run `make image DOCKER_PLATFORMS="linux/amd64,linux/arm64" BUILDX_OUTPUT_TYPE=registry IMAGE_PREFIX=[yourregistry]` to push multi-platform 58 | DOCKER_PLATFORMS ?= "linux/${GOARCH}" 59 | 60 | GOOS ?= linux 61 | 62 | include Makefile.def 63 | 64 | .EXPORT_ALL_VARIABLES: 65 | 66 | all: vc-descheduler 67 | 68 | init: 69 | mkdir -p ${BIN_DIR} 70 | mkdir -p ${RELEASE_DIR} 71 | 72 | vc-descheduler: init 73 | CC=${CC} CGO_ENABLED=0 go build -ldflags ${LD_FLAGS} -o ${BIN_DIR}/vc-descheduler ./cmd/descheduler 74 | 75 | image_bins: vc-descheduler 76 | 77 | image: 78 | for name in descheduler; do\ 79 | docker buildx build -t "${IMAGE_PREFIX}/vc-$$name:$(TAG)" . -f ./installer/dockerfile/$$name/Dockerfile --output=type=${BUILDX_OUTPUT_TYPE} --platform ${DOCKER_PLATFORMS} --build-arg APK_MIRROR=${APK_MIRROR} --build-arg OPEN_EULER_IMAGE_TAG=${OPEN_EULER_IMAGE_TAG}; \ 80 | done 81 | 82 | unit-test: 83 | go clean -testcache 84 | if [ ${OS} = 'darwin' ];then\ 85 | go list ./... | grep -v "/e2e" | GOOS=${OS} xargs go test;\ 86 | else\ 87 | go test -p 8 -race $$(find pkg cmd -type f -name '*_test.go' | sed -r 's|/[^/]+$$||' | sort | uniq | sed "s|^|volcano.sh/descheduler/|");\ 88 | fi; 89 | 90 | clean: 91 | rm -rf _output/ 92 | rm -f *.log 93 | 94 | verify: 95 | hack/verify-gofmt.sh 96 | 97 | mod-download-go: 98 | @-GOFLAGS="-mod=readonly" find -name go.mod -execdir go mod download \; 99 | # go mod tidy is needed with Golang 1.16+ as go mod download affects go.sum 100 | # https://github.com/golang/go/issues/43994 101 | # exclude docs folder 102 | @find . -path ./docs -prune -o -name go.mod -execdir go mod tidy \; 103 | -------------------------------------------------------------------------------- /Makefile.def: -------------------------------------------------------------------------------- 1 | 2 | # If tag not explicitly set in users default to the git sha. 3 | TAG ?= $(shell git rev-parse --verify HEAD) 4 | GitSHA=`git rev-parse HEAD` 5 | Date=`date "+%Y-%m-%d %H:%M:%S"` 6 | RELEASE_VER=latest 7 | OPEN_EULER_IMAGE_TAG ?= 22.03-lts-sp2 8 | LD_FLAGS=" \ 9 | -X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \ 10 | -X '${REPO_PATH}/pkg/version.Built=${Date}' \ 11 | -X '${REPO_PATH}/pkg/version.Version=${RELEASE_VER}'" 12 | 13 | -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - william-wang 3 | - kevin-wangzefeng 4 | - Monokaix 5 | reviewers: 6 | - william-wang 7 | - kevin-wangzefeng 8 | - lowang-bh 9 | - hwdef 10 | - Monokaix 11 | - archlitchi 12 | 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | The Volcano descheduler is developed based on the upstream Kubernetes community [descheduler](https://github.com/kubernetes-sigs/descheduler.git) project. Its initial version is based on the v0.27.1 tag of the upstream Kubernetes community, and the main codes come from the upstream descheduler project. The project also follows the Apache 2.0 open source license and retains the original license statement in the source file. In addition, Volcano descheduler also clearly marked its changes to the upstream descheduler project. 4 | 5 | > This is an alpha version and code is subject to change. 6 | 7 | # Why Volcano descheduler? 8 | 9 | The upstream Kubernetes community's descheduler provides basic descheduling plugins and functions, but there are still the following problems that cannot meet the user's scenarios: 10 | 11 | - When deploying descheduler in the form of Deployment, cronTab cannot be set to execute rescheduling tasks regularly. The native descheduler supports multiple deployment types, such as Job, cronJob, and Deployment. Users need to deploy multiple types of workloads to meet the needs of different scenarios. 12 | 13 | - The descheduler makes descheduling decisions based on the resource requests of the Pod, without considering the actual load of the node, and there is a problem of inaccurate descheduling decisions. It is worth noting that descheduling is a relatively dangerous and destructive action. The timing and accuracy of rescheduling need to be strictly guaranteed to avoid unexpected behavior. 14 | 15 | - It is hard to perfectly cooperate with the scheduler. Scheduling and descheduling are two mutually coordinated processes. When descheduling and evicting Pods, it is necessary to perceive whether the cluster can accommodate the newly generated Pods to avoid meaningless rescheduling, which is crucial to ensure business stability. 16 | 17 | # Features 18 | 19 | Volcano descheduler provides the following enhanced capabilities while fully retaining the functions and compatible code framework of the upstream Kubernetes community descheduler: 20 | 21 | ## Descheduling via crontab or fixed interval 22 | 23 | Users can deploy the `Volcano descheduler` as a Deploment type workload instead of a cronJob. Then specify the command line parameters to run the descheduler according to cronTab expression or fixed interval. 24 | 25 | **cronTab scheduled task**: Specify the parameter `--descheduling-interval-cron-expression='0 0 * * *'`, which means to run descheduling once every morning. 26 | 27 | **Fixed interval**: Specify the parameter `--descheduling-interval=10m`, which means descheduling will be run every 10 minutes. 28 | 29 | And please notice that `--descheduling-interval` has a higher priority than `--descheduling-interval-cron-expression`, the descheduler's behavior is subject to the `--descheduling-interva` setting when both parameters are set. 30 | 31 | ## Real Load Aware Descheduling 32 | 33 | In the process of kubernetes cluster governance, hotspots are often formed due to high CPU, memory and other utilization conditions, which not only affects the stable operation of Pods on the current node, but also leads to a surge in the chances of node failure. In order to cope with problems such as load imbalance of cluster nodes and dynamically balance the resource utilization rate among nodes, it is necessary to construct a cluster resource view based on the relevant monitoring metrics of nodes, so that in the cluster governance phase, through real-time monitoring, it can automatically intervene to migrate some Pods on the nodes with high resource utilization rate to the nodes with low utilization rate, when high resource utilization rate, node failure, and high number of Pods are observed. 34 | 35 | The native descheduler only supports load-aware scheduling based on Pod request, which evicts Pods on nodes with higher utilization rates, thus equalizing resource utilization among nodes and avoiding overheating of individual node. However, Pod request does not reflect the real resource utilization of the nodes, so Volcano implements descheduling based on the real load of the nodes, by querying the metrics exposed by nodes, more accurate descheduling is performed based on the real load of CPU and Memory. 36 | 37 | ![LoadAware-EN](docs/img/descheduler_EN.svg) 38 | 39 | The principle of LoadAware is shown in the figure above: 40 | 41 | - Appropriately utilized nodes: nodes with resource utilization greater than or equal to 30% and less than or equal to 80%. The load level range of this node is a reasonable range expected to be reached. 42 | 43 | - Over-utilized nodes: nodes with resource utilization higher than 80%. Hotspot nodes will evict some Pods and reduce the load level to no more than 80%. The descheduler will schedule the Pods on the hotspot nodes to the idle nodes. 44 | 45 | - Under-utilized nodes: nodes with resource utilization lower than 30%. 46 | 47 | # Quick start 48 | 49 | ## Prepare 50 | 51 | Install [prometheue](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus) or [prometheus-adaptor](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter), and [prometheus-node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter), The real load of the node is exposed to the `Volcano descheduler` through node-exporter and prometheus. 52 | 53 | Add the following automatic discovery and node label replacement rules for the node-exporter service in the `scrape_configs` configuration of prometheus. This step is very important, otherwise `Volcano descheduler` cannot get the real load metrics of the node. For more details about `scrape_configs`, please refer to [Configuration | Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). 54 | 55 | ```yaml 56 | scrape_configs: 57 | - job_name: 'kubernetes-service-endpoints' 58 | kubernetes_sd_configs: 59 | - role: endpoints 60 | relabel_configs: 61 | - source_labels: [__meta_kubernetes_pod_node_name] 62 | action: replace 63 | target_label: instance 64 | ``` 65 | 66 | ## Install Volcano descheduler 67 | 68 | ### Install via yaml 69 | 70 | ```shell 71 | # create ns first. 72 | kubectl create ns volcano-system 73 | # deploy descheduler yaml. 74 | kubectl apply -f https://raw.githubusercontent.com/volcano-sh/descheduler/main/installer/volcano-descheduler-development.yaml 75 | ``` 76 | 77 | ## Configurations 78 | 79 | The default descheduling configuration is in the `volcano-descheduler` configMap under the `volcano-system` namespace. You can update the descheduling configuration by modifying the data in the configMap. The plugins enabled by default are `LoadAware` and `DefaultEvictor`, which perform load-aware descheduling and eviction respectively. 80 | 81 | ```yaml 82 | apiVersion: "descheduler/v1alpha2" 83 | kind: "DeschedulerPolicy" 84 | profiles: 85 | - name: default 86 | pluginConfig: 87 | - args: 88 | ignorePvcPods: true 89 | nodeFit: true 90 | priorityThreshold: 91 | value: 10000 92 | name: DefaultEvictor 93 | - args: 94 | evictableNamespaces: 95 | exclude: 96 | - kube-system 97 | metrics: 98 | address: null 99 | type: null 100 | targetThresholds: 101 | cpu: 80 # Eviction will be triggered when the node CPU utilization exceeds 80% 102 | memory: 85 # Eviction will be triggered when the node memory utilization exceeds 85% 103 | thresholds: 104 | cpu: 30 # Pods can be scheduled to nodes whose CPU resource utilization is less than 30% 105 | memory: 30 # Pods can be scheduled to nodes whose memory resource utilization is less than 30%. 106 | name: LoadAware 107 | plugins: 108 | balance: 109 | enabled: 110 | - LoadAware 111 | ``` 112 | 113 | For the full configuration and parameter description of the `DefaultEvictor` plugin, please refer to: [DefaultEvictor Configuration](https://github.com/kubernetes-sigs/descheduler/tree/master#evictor-plugin-configuration-default-evictor). 114 | 115 | `LoadAware` plugin parameter description: 116 | 117 | | Name | type | Default Value | Description | 118 | | :-----------------: | :------------------: | :-----------: | :----------------------------------------------------------: | 119 | | nodeSelector | string | nil | Limiting the nodes which are processed | 120 | | evictableNamespaces | map(string:[]string) | nil | Exclude evicting pods under excluded namespaces | 121 | | nodeFit | bool | false | Set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. | 122 | | numberOfNodes | int | 0 | This parameter can be configured to activate the strategy only when the number of under utilized nodes are above the configured value. This could be helpful in large clusters where a few nodes could go under utilized frequently or for a short period of time. | 123 | | duration | string | 2m | The time range specified when querying the actual utilization metrics of nodes, only takes effect when `metrics.type` is configured as `prometheus`. | 124 | | metrics | map(string:string) | nil | **Required Field**
Contains two parameters:
type: The type of metrics source, only supports `prometheus` and `prometheus_adaptor`.
address: The service address of `prometheus`. | 125 | | targetThresholds | map(string:int) | nil | **Required Field**
Supported configuration keys are `cpu`, `memory`, and `pods`.
When the node resource utilization (for `cpu` or `memory`) exceeds the setting threshold, it will trigger Pods eviction on the node, with the unit being %.
When the number of Pods on the node exceeds the set threshold, it will trigger Pods eviction on the node, with the unit being number. | 126 | | thresholds | map(string:int) | nil | **Required Field**
The evicted Pods should be scheduled to nodes with utilization below the `thresholds`.
The threshold for the same resource type cannot exceed the threshold set in `targetThresholds`. | 127 | 128 | In addition to the above `LoadAware plugin` enhancements, `Volcano descheduler` also supports native descheduler functions and plugins. If you want to configure other native plugins, please refer to: [kubernetes-sigs/descheduler](https://github.com/kubernetes-sigs/descheduler/blob/master/docs/user-guide.md). 129 | 130 | # Best practices 131 | 132 | When the Pods on the node with relatively high resource utilization are evicted, we expect that the new created Pods should avoid being scheduled to the node with relatively high resource utilization again. Therefore, the `Volcano scheduler` also needs to enable the plugin `usage` based on real load awareness, for detailed description and configuration of `usage`, please refer to: [volcano usage plugin](https://github.com/volcano-sh/volcano/blob/master/docs/design/usage-based-scheduling.md). 133 | 134 | # Trouble shotting 135 | 136 | When the configuration parameter `metrics.type` of the LoadAware plugin is set to `prometheus`, `Volcano scheduler` queries the actual utilization of cpu and memory through the following `PromQL` statement. When the expected eviction behavior does not occur, you can query it manually through prometheus, check whether the node metrics are correctly exposed, and compare it with the log of `Volcano descheduler` to judge its actual behavior. 137 | 138 | **cpu:** 139 | 140 | ```shell 141 | avg_over_time((1 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle",instance="$replace_with_your_node_name"}[30s])) * 1))[2m:30s]) 142 | ``` 143 | 144 | **memory:** 145 | 146 | ```shell 147 | avg_over_time(((1-node_memory_MemAvailable_bytes{instance="$replace_with_your_node_name"}/node_memory_MemTotal_bytes{instance="$replace_with_your_node_name"}))[2m:30s]) 148 | ``` 149 | 150 | # Development 151 | 152 | ## build binary 153 | 154 | ```shell 155 | make vc-descheduler 156 | ``` 157 | 158 | ## build image 159 | 160 | ```shell 161 | make image 162 | ``` 163 | 164 | # Release Guide 165 | 166 | The release cadence of the `descheduler` is not synchronized with that of [Volcano](https://github.com/volcano-sh/volcano). This is because the `descheduler` is a sub-repository under volcano-sh, and its code and feature changes are relatively minor. We will adapt to the upstream Kubernetes community's descheduler project as needed and release new versions accordingly. -------------------------------------------------------------------------------- /cmd/descheduler/app/options/options.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Copyright 2024 The Volcano Authors. 17 | 18 | Modifications made by Volcano authors: 19 | - [2023]Add `DeschedulingIntervalCronExpression` flag 20 | */ 21 | 22 | // Package options provides the descheduler flags 23 | package options 24 | 25 | import ( 26 | "time" 27 | 28 | "github.com/spf13/pflag" 29 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 | apiserveroptions "k8s.io/apiserver/pkg/server/options" 31 | clientset "k8s.io/client-go/kubernetes" 32 | componentbaseconfig "k8s.io/component-base/config" 33 | componentbaseoptions "k8s.io/component-base/config/options" 34 | 35 | "volcano.sh/descheduler/pkg/apis/componentconfig" 36 | ) 37 | 38 | const ( 39 | DefaultDeschedulerPort = 10258 40 | ) 41 | 42 | // DeschedulerServer configuration 43 | type DeschedulerServer struct { 44 | componentconfig.DeschedulerConfiguration 45 | 46 | Client clientset.Interface 47 | EventClient clientset.Interface 48 | SecureServing *apiserveroptions.SecureServingOptionsWithLoopback 49 | DisableMetrics bool 50 | } 51 | 52 | // NewDeschedulerServer creates a new DeschedulerServer with default parameters 53 | func NewDeschedulerServer() (*DeschedulerServer, error) { 54 | cfg, err := newDefaultComponentConfig() 55 | if err != nil { 56 | return nil, err 57 | } 58 | 59 | secureServing := apiserveroptions.NewSecureServingOptions().WithLoopback() 60 | secureServing.BindPort = DefaultDeschedulerPort 61 | 62 | return &DeschedulerServer{ 63 | DeschedulerConfiguration: *cfg, 64 | SecureServing: secureServing, 65 | }, nil 66 | } 67 | 68 | func newDefaultComponentConfig() (*componentconfig.DeschedulerConfiguration, error) { 69 | versionedCfg := componentconfig.DeschedulerConfiguration{ 70 | LeaderElection: componentbaseconfig.LeaderElectionConfiguration{ 71 | LeaderElect: false, 72 | LeaseDuration: metav1.Duration{Duration: 137 * time.Second}, 73 | RenewDeadline: metav1.Duration{Duration: 107 * time.Second}, 74 | RetryPeriod: metav1.Duration{Duration: 26 * time.Second}, 75 | ResourceLock: "leases", 76 | ResourceName: "vc-descheduler", 77 | ResourceNamespace: "volcano-system", 78 | }, 79 | } 80 | return &versionedCfg, nil 81 | } 82 | 83 | // AddFlags adds flags for a specific SchedulerServer to the specified FlagSet 84 | func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) { 85 | fs.StringVar(&rs.Logging.Format, "logging-format", "text", `Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log_dir, --log_file, --log_file_max_size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning.`) 86 | fs.DurationVar(&rs.DeschedulingInterval, "descheduling-interval", rs.DeschedulingInterval, "Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.") 87 | fs.StringVar(&rs.ClientConnection.Kubeconfig, "kubeconfig", rs.ClientConnection.Kubeconfig, "File with kube configuration. Deprecated, use client-connection-kubeconfig instead.") 88 | fs.StringVar(&rs.ClientConnection.Kubeconfig, "client-connection-kubeconfig", rs.ClientConnection.Kubeconfig, "File path to kube configuration for interacting with kubernetes apiserver.") 89 | fs.Float32Var(&rs.ClientConnection.QPS, "client-connection-qps", rs.ClientConnection.QPS, "QPS to use for interacting with kubernetes apiserver.") 90 | fs.Int32Var(&rs.ClientConnection.Burst, "client-connection-burst", rs.ClientConnection.Burst, "Burst to use for interacting with kubernetes apiserver.") 91 | fs.StringVar(&rs.PolicyConfigFile, "policy-config-file", rs.PolicyConfigFile, "File with descheduler policy configuration.") 92 | fs.BoolVar(&rs.DryRun, "dry-run", rs.DryRun, "Execute descheduler in dry run mode.") 93 | fs.BoolVar(&rs.DisableMetrics, "disable-metrics", rs.DisableMetrics, "Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.") 94 | fs.StringVar(&rs.DeschedulingIntervalCronExpression, "descheduling-interval-cron-expression", rs.DeschedulingIntervalCronExpression, "Time interval between two consecutive descheduler executions. Cron expression is allowed to use to configure this parameter.") 95 | 96 | componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs) 97 | 98 | rs.SecureServing.AddFlags(fs) 99 | } 100 | -------------------------------------------------------------------------------- /cmd/descheduler/app/server.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Copyright 2024 The Volcano Authors. 17 | 18 | Modifications made by Volcano authors: 19 | - [2024]Rename package name to volcano.sh 20 | */ 21 | 22 | // Package app implements a Server object for running the descheduler. 23 | package app 24 | 25 | import ( 26 | "context" 27 | "io" 28 | "os" 29 | "os/signal" 30 | "syscall" 31 | 32 | "github.com/spf13/cobra" 33 | apiserver "k8s.io/apiserver/pkg/server" 34 | "k8s.io/apiserver/pkg/server/healthz" 35 | "k8s.io/apiserver/pkg/server/mux" 36 | restclient "k8s.io/client-go/rest" 37 | registry "k8s.io/component-base/logs/api/v1" 38 | jsonLog "k8s.io/component-base/logs/json" 39 | _ "k8s.io/component-base/logs/json/register" 40 | "k8s.io/component-base/metrics/legacyregistry" 41 | "k8s.io/klog/v2" 42 | 43 | "volcano.sh/descheduler/cmd/descheduler/app/options" 44 | "volcano.sh/descheduler/pkg/descheduler" 45 | ) 46 | 47 | // NewDeschedulerCommand creates a *cobra.Command object with default parameters 48 | func NewDeschedulerCommand(out io.Writer) *cobra.Command { 49 | s, err := options.NewDeschedulerServer() 50 | if err != nil { 51 | klog.ErrorS(err, "unable to initialize server") 52 | } 53 | 54 | cmd := &cobra.Command{ 55 | Use: "descheduler", 56 | Short: "descheduler", 57 | Long: `The descheduler evicts pods which may be bound to less desired nodes`, 58 | Run: func(cmd *cobra.Command, args []string) { 59 | // LoopbackClientConfig is a config for a privileged loopback connection 60 | var LoopbackClientConfig *restclient.Config 61 | var SecureServing *apiserver.SecureServingInfo 62 | if err := s.SecureServing.ApplyTo(&SecureServing, &LoopbackClientConfig); err != nil { 63 | klog.ErrorS(err, "failed to apply secure server configuration") 64 | return 65 | } 66 | 67 | var factory registry.LogFormatFactory 68 | if s.Logging.Format == "json" { 69 | factory = jsonLog.Factory{} 70 | } 71 | 72 | if factory == nil { 73 | klog.ClearLogger() 74 | } else { 75 | log, loggerControl := factory.Create(registry.LoggingConfiguration{ 76 | Format: s.Logging.Format, 77 | Verbosity: s.Logging.Verbosity, 78 | }, registry.LoggingOptions{}) 79 | defer loggerControl.Flush() 80 | klog.SetLogger(log) 81 | } 82 | 83 | ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) 84 | 85 | pathRecorderMux := mux.NewPathRecorderMux("descheduler") 86 | if !s.DisableMetrics { 87 | pathRecorderMux.Handle("/metrics", legacyregistry.HandlerWithReset()) 88 | } 89 | 90 | healthz.InstallHandler(pathRecorderMux, healthz.NamedCheck("Descheduler", healthz.PingHealthz.Check)) 91 | 92 | stoppedCh, _, err := SecureServing.Serve(pathRecorderMux, 0, ctx.Done()) 93 | if err != nil { 94 | klog.Fatalf("failed to start secure server: %v", err) 95 | return 96 | } 97 | 98 | err = Run(ctx, s) 99 | if err != nil { 100 | klog.ErrorS(err, "descheduler server") 101 | } 102 | 103 | done() 104 | // wait for metrics server to close 105 | <-stoppedCh 106 | }, 107 | } 108 | cmd.SetOut(out) 109 | flags := cmd.Flags() 110 | s.AddFlags(flags) 111 | return cmd 112 | } 113 | 114 | func Run(ctx context.Context, rs *options.DeschedulerServer) error { 115 | return descheduler.Run(ctx, rs) 116 | } 117 | 118 | func SetupLogs() { 119 | klog.SetOutput(os.Stdout) 120 | klog.InitFlags(nil) 121 | } 122 | -------------------------------------------------------------------------------- /cmd/descheduler/app/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package app 18 | 19 | import ( 20 | "fmt" 21 | 22 | "github.com/spf13/cobra" 23 | "sigs.k8s.io/descheduler/pkg/version" 24 | ) 25 | 26 | func NewVersionCommand() *cobra.Command { 27 | versionCmd := &cobra.Command{ 28 | Use: "version", 29 | Short: "Version of descheduler", 30 | Long: `Prints the version of descheduler.`, 31 | Run: func(cmd *cobra.Command, args []string) { 32 | fmt.Printf("Descheduler version %+v\n", version.Get()) 33 | }, 34 | } 35 | return versionCmd 36 | } 37 | -------------------------------------------------------------------------------- /cmd/descheduler/descheduler.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Copyright 2024 The Volcano Authors. 17 | 18 | Modifications made by Volcano authors: 19 | - [2024]Register LoadAwareUtilizationPluginName plugin 20 | */ 21 | 22 | package main 23 | 24 | import ( 25 | "os" 26 | 27 | "k8s.io/component-base/cli" 28 | "sigs.k8s.io/descheduler/pkg/descheduler" 29 | "sigs.k8s.io/descheduler/pkg/framework/pluginregistry" 30 | 31 | "volcano.sh/descheduler/cmd/descheduler/app" 32 | "volcano.sh/descheduler/pkg/framework/plugins/loadaware" 33 | ) 34 | 35 | func init() { 36 | descheduler.SetupPlugins() 37 | pluginregistry.Register(loadaware.LoadAwareUtilizationPluginName, loadaware.NewLoadAwareUtilization, &loadaware.LoadAwareUtilization{}, &loadaware.LoadAwareUtilizationArgs{}, loadaware.ValidateLoadAwareUtilizationArgs, loadaware.SetDefaults_LoadAwareUtilizationArgs, pluginregistry.PluginRegistry) 38 | } 39 | 40 | func main() { 41 | out := os.Stdout 42 | cmd := app.NewDeschedulerCommand(out) 43 | cmd.AddCommand(app.NewVersionCommand()) 44 | 45 | code := cli.Run(cmd) 46 | os.Exit(code) 47 | } 48 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module volcano.sh/descheduler 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/google/go-cmp v0.5.9 7 | github.com/prometheus/client_golang v1.14.0 8 | github.com/prometheus/common v0.37.0 9 | github.com/robfig/cron/v3 v3.0.1 10 | github.com/spf13/cobra v1.6.0 11 | github.com/spf13/pflag v1.0.5 12 | k8s.io/api v0.27.1 13 | k8s.io/apimachinery v0.27.1 14 | k8s.io/apiserver v0.27.0 15 | k8s.io/client-go v0.27.1 16 | k8s.io/component-base v0.27.0 17 | k8s.io/klog/v2 v2.90.1 18 | k8s.io/metrics v0.27.1 19 | k8s.io/utils v0.0.0-20230313181309-38a27ef9d749 20 | sigs.k8s.io/descheduler v0.27.1 21 | ) 22 | 23 | require ( 24 | github.com/NYTimes/gziphandler v1.1.1 // indirect 25 | github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect 26 | github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect 27 | github.com/beorn7/perks v1.0.1 // indirect 28 | github.com/blang/semver/v4 v4.0.0 // indirect 29 | github.com/cenkalti/backoff/v4 v4.1.3 // indirect 30 | github.com/cespare/xxhash/v2 v2.1.2 // indirect 31 | github.com/coreos/go-semver v0.3.0 // indirect 32 | github.com/coreos/go-systemd/v22 v22.4.0 // indirect 33 | github.com/davecgh/go-spew v1.1.1 // indirect 34 | github.com/emicklei/go-restful/v3 v3.9.0 // indirect 35 | github.com/evanphx/json-patch v4.12.0+incompatible // indirect 36 | github.com/felixge/httpsnoop v1.0.3 // indirect 37 | github.com/fsnotify/fsnotify v1.6.0 // indirect 38 | github.com/go-logr/logr v1.2.3 // indirect 39 | github.com/go-logr/stdr v1.2.2 // indirect 40 | github.com/go-logr/zapr v1.2.3 // indirect 41 | github.com/go-openapi/jsonpointer v0.19.6 // indirect 42 | github.com/go-openapi/jsonreference v0.20.1 // indirect 43 | github.com/go-openapi/swag v0.22.3 // indirect 44 | github.com/gogo/protobuf v1.3.2 // indirect 45 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 46 | github.com/golang/protobuf v1.5.3 // indirect 47 | github.com/google/cel-go v0.12.6 // indirect 48 | github.com/google/gnostic v0.5.7-v3refs // indirect 49 | github.com/google/gofuzz v1.1.0 // indirect 50 | github.com/google/uuid v1.3.0 // indirect 51 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect 52 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect 53 | github.com/imdario/mergo v0.3.6 // indirect 54 | github.com/inconshreveable/mousetrap v1.0.1 // indirect 55 | github.com/josharian/intern v1.0.0 // indirect 56 | github.com/json-iterator/go v1.1.12 // indirect 57 | github.com/mailru/easyjson v0.7.7 // indirect 58 | github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect 59 | github.com/mitchellh/mapstructure v1.4.1 // indirect 60 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 61 | github.com/modern-go/reflect2 v1.0.2 // indirect 62 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 63 | github.com/pkg/errors v0.9.1 // indirect 64 | github.com/prometheus/client_model v0.3.0 // indirect 65 | github.com/prometheus/procfs v0.8.0 // indirect 66 | github.com/stoewer/go-strcase v1.2.0 // indirect 67 | go.etcd.io/etcd/api/v3 v3.5.7 // indirect 68 | go.etcd.io/etcd/client/pkg/v3 v3.5.7 // indirect 69 | go.etcd.io/etcd/client/v3 v3.5.7 // indirect 70 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.35.0 // indirect 71 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.35.1 // indirect 72 | go.opentelemetry.io/otel v1.10.0 // indirect 73 | go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect 74 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.10.0 // indirect 75 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0 // indirect 76 | go.opentelemetry.io/otel/metric v0.31.0 // indirect 77 | go.opentelemetry.io/otel/sdk v1.10.0 // indirect 78 | go.opentelemetry.io/otel/trace v1.10.0 // indirect 79 | go.opentelemetry.io/proto/otlp v0.19.0 // indirect 80 | go.uber.org/atomic v1.7.0 // indirect 81 | go.uber.org/multierr v1.6.0 // indirect 82 | go.uber.org/zap v1.19.0 // indirect 83 | golang.org/x/crypto v0.1.0 // indirect 84 | golang.org/x/net v0.8.0 // indirect 85 | golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect 86 | golang.org/x/sync v0.1.0 // indirect 87 | golang.org/x/sys v0.6.0 // indirect 88 | golang.org/x/term v0.6.0 // indirect 89 | golang.org/x/text v0.8.0 // indirect 90 | golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect 91 | google.golang.org/appengine v1.6.7 // indirect 92 | google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21 // indirect 93 | google.golang.org/grpc v1.51.0 // indirect 94 | google.golang.org/protobuf v1.28.1 // indirect 95 | gopkg.in/inf.v0 v0.9.1 // indirect 96 | gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect 97 | gopkg.in/yaml.v2 v2.4.0 // indirect 98 | gopkg.in/yaml.v3 v3.0.1 // indirect 99 | k8s.io/component-helpers v0.27.0 // indirect 100 | k8s.io/kms v0.27.0 // indirect 101 | k8s.io/kube-openapi v0.0.0-20230308215209-15aac26d736a // indirect 102 | sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.1.1 // indirect 103 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 104 | sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect 105 | sigs.k8s.io/yaml v1.3.0 // indirect 106 | ) 107 | -------------------------------------------------------------------------------- /hack/lib/golang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 The Kubernetes Authors. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Compare version numbers 18 | kube::golang::version_gt() { 19 | return "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"; 20 | } 21 | 22 | # Ensure the go tool exists and is a viable version. 23 | kube::golang::verify_go_version() { 24 | if [[ -z "$(which go)" ]]; then 25 | kube::log::usage_from_stdin < /dev/null 38 | if [[ $? -ne 0 ]]; then 39 | echo -e "\033[31mERROR\033[0m: ${IMAGE_PREFIX}/vc-controller-manager:${TAG} does not exist" 40 | exit 1 41 | fi 42 | docker image inspect "${IMAGE_PREFIX}/vc-scheduler:${TAG}" > /dev/null 43 | if [[ $? -ne 0 ]]; then 44 | echo -e "\033[31mERROR\033[0m: ${IMAGE_PREFIX}/vc-scheduler:${TAG} does not exist" 45 | exit 1 46 | fi 47 | docker image inspect "${IMAGE_PREFIX}/vc-webhook-manager:${TAG}" > /dev/null 48 | if [[ $? -ne 0 ]]; then 49 | echo -e "\033[31mERROR\033[0m: ${IMAGE_PREFIX}/vc-webhook-manager:${TAG} does not exist" 50 | exit 1 51 | fi 52 | } 53 | 54 | # check if kubectl installed 55 | function check-prerequisites { 56 | echo "Checking prerequisites" 57 | which kubectl >/dev/null 2>&1 58 | if [[ $? -ne 0 ]]; then 59 | echo -e "\033[31mERROR\033[0m: kubectl not installed" 60 | exit 1 61 | else 62 | echo -n "Found kubectl, version: " && kubectl version --client 63 | fi 64 | } 65 | 66 | # check if kind installed 67 | function check-kind { 68 | echo "Checking kind" 69 | which kind >/dev/null 2>&1 70 | if [[ $? -ne 0 ]]; then 71 | echo "Installing kind ..." 72 | GOOS=${OS} go install sigs.k8s.io/kind@v0.24.0 73 | else 74 | echo -n "Found kind, version: " && kind version 75 | fi 76 | } 77 | 78 | # install helm if not installed 79 | function install-helm { 80 | echo "Checking helm" 81 | which helm >/dev/null 2>&1 82 | if [[ $? -ne 0 ]]; then 83 | echo "Installing helm via script" 84 | HELM_TEMP_DIR=$(mktemp -d) 85 | curl -fsSL -o ${HELM_TEMP_DIR}/get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 86 | chmod 700 ${HELM_TEMP_DIR}/get_helm.sh && ${HELM_TEMP_DIR}/get_helm.sh 87 | else 88 | echo -n "Found helm, version: " && helm version 89 | fi 90 | } 91 | 92 | function install-ginkgo-if-not-exist { 93 | echo "Checking ginkgo" 94 | which ginkgo >/dev/null 2>&1 95 | if [[ $? -ne 0 ]]; then 96 | echo "Installing ginkgo ..." 97 | GOOS=${OS} go install github.com/onsi/ginkgo/v2/ginkgo 98 | else 99 | echo -n "Found ginkgo, version: " && ginkgo version 100 | fi 101 | } 102 | -------------------------------------------------------------------------------- /hack/lib/util.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2014 The Kubernetes Authors. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # ----------------------------------------------------------------------------- 18 | # CHANGELOG 19 | # Volcano Authors: 20 | # Some functions derived from https://github.com/kubernetes/kubernetes/blob/v1.19.0-beta.2/hack/utils.sh 21 | # for update-vendor-licenses, verify-vendor-licenses 22 | 23 | 24 | # Example: kube::util::trap_add 'echo "in trap DEBUG"' DEBUG 25 | # See: http://stackoverflow.com/questions/3338030/multiple-bash-traps-for-the-same-signal 26 | kube::util::trap_add() { 27 | local trap_add_cmd 28 | trap_add_cmd=$1 29 | shift 30 | 31 | for trap_add_name in "$@"; do 32 | local existing_cmd 33 | local new_cmd 34 | 35 | # Grab the currently defined trap commands for this trap 36 | existing_cmd=$(trap -p "${trap_add_name}" | awk -F"'" '{print $2}') 37 | 38 | if [[ -z "${existing_cmd}" ]]; then 39 | new_cmd="${trap_add_cmd}" 40 | else 41 | new_cmd="${trap_add_cmd};${existing_cmd}" 42 | fi 43 | 44 | # Assign the test. Disable the shellcheck warning telling that trap 45 | # commands should be single quoted to avoid evaluating them at this 46 | # point instead evaluating them at run time. The logic of adding new 47 | # commands to a single trap requires them to be evaluated right away. 48 | # shellcheck disable=SC2064 49 | trap "${new_cmd}" "${trap_add_name}" 50 | done 51 | } 52 | 53 | # Opposite of kube::util::ensure-temp-dir() 54 | kube::util::cleanup-temp-dir() { 55 | rm -rf "${KUBE_TEMP}" 56 | } 57 | 58 | # Create a temp dir that'll be deleted at the end of this bash session. 59 | # 60 | # Vars set: 61 | # KUBE_TEMP 62 | kube::util::ensure-temp-dir() { 63 | if [[ -z ${KUBE_TEMP-} ]]; then 64 | KUBE_TEMP=$(mktemp -d 2>/dev/null || mktemp -d -t kubernetes.XXXXXX) 65 | kube::util::trap_add kube::util::cleanup-temp-dir EXIT 66 | fi 67 | } 68 | 69 | # outputs md5 hash of $1, works on macOS and Linux 70 | function kube::util::md5() { 71 | if which md5 >/dev/null 2>&1; then 72 | md5 -q "$1" 73 | else 74 | md5sum "$1" | awk '{ print $1 }' 75 | fi 76 | } 77 | -------------------------------------------------------------------------------- /hack/verify-gofmt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 The Kubernetes Authors. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -o errexit 18 | set -o nounset 19 | set -o pipefail 20 | 21 | KUBE_ROOT=$(dirname "${BASH_SOURCE}")/.. 22 | source "${KUBE_ROOT}/hack/lib/init.sh" 23 | 24 | kube::golang::verify_go_version 25 | 26 | cd "${KUBE_ROOT}" 27 | 28 | find_files() { 29 | find . -not \( \ 30 | \( \ 31 | -wholename './output' \ 32 | -o -wholename './_output' \ 33 | -o -wholename './_gopath' \ 34 | -o -wholename './release' \ 35 | -o -wholename './target' \ 36 | -o -wholename '*/third_party/*' \ 37 | -o -wholename '*/vendor/*' \ 38 | -o -wholename '*/contrib/*' \ 39 | -o -wholename './staging/src/k8s.io/client-go/*vendor/*' \ 40 | \) -prune \ 41 | \) -name '*.go' 42 | } 43 | # gofmt exits with non-zero exit code if it finds a problem unrelated to 44 | # formatting (e.g., a file does not parse correctly). Without "|| true" this 45 | # would have led to no useful error message from gofmt, because the script would 46 | # have failed before getting to the "echo" in the block below. 47 | GOFMT="gofmt -d -s" 48 | diff=$(find_files | xargs ${GOFMT} 2>&1) || true 49 | if [[ -n "${diff}" ]]; then 50 | echo "${diff}" 51 | exit 1 52 | fi 53 | -------------------------------------------------------------------------------- /installer/dockerfile/descheduler/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Volcano Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM golang:1.20.3 AS builder 16 | WORKDIR /go/src/volcano.sh/ 17 | COPY go.mod go.sum ./ 18 | RUN go mod download 19 | ADD . descheduler 20 | RUN cd descheduler && make vc-descheduler 21 | 22 | FROM alpine:latest 23 | COPY --from=builder /go/src/volcano.sh/descheduler/_output/bin/vc-descheduler /vc-descheduler 24 | ENTRYPOINT ["/vc-descheduler"] 25 | -------------------------------------------------------------------------------- /installer/volcano-descheduler-development.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: volcano-descheduler 5 | namespace: volcano-system 6 | 7 | --- 8 | apiVersion: v1 9 | kind: ConfigMap 10 | metadata: 11 | name: volcano-descheduler 12 | namespace: volcano-system 13 | data: 14 | policy.yaml: | 15 | apiVersion: "descheduler/v1alpha2" 16 | kind: "DeschedulerPolicy" 17 | profiles: 18 | - name: default 19 | pluginConfig: 20 | - args: 21 | ignorePvcPods: true 22 | nodeFit: true 23 | priorityThreshold: 24 | value: 10000 25 | name: DefaultEvictor 26 | - args: 27 | evictableNamespaces: 28 | exclude: 29 | - kube-system 30 | metrics: 31 | address: null 32 | type: null 33 | targetThresholds: 34 | cpu: 80 35 | memory: 85 36 | thresholds: 37 | cpu: 30 38 | memory: 30 39 | name: LoadAware 40 | plugins: 41 | balance: 42 | enabled: 43 | - LoadAware 44 | 45 | --- 46 | kind: ClusterRole 47 | apiVersion: rbac.authorization.k8s.io/v1 48 | metadata: 49 | name: volcano-descheduler 50 | rules: 51 | - apiGroups: ["events.k8s.io"] 52 | resources: ["events"] 53 | verbs: ["create", "update"] 54 | - apiGroups: [""] 55 | resources: ["nodes"] 56 | verbs: ["get", "watch", "list"] 57 | - apiGroups: [""] 58 | resources: ["namespaces"] 59 | verbs: ["get", "watch", "list"] 60 | - apiGroups: [""] 61 | resources: ["pods"] 62 | verbs: ["get", "watch", "list", "delete"] 63 | - apiGroups: [""] 64 | resources: ["pods/eviction"] 65 | verbs: ["create"] 66 | - apiGroups: ["scheduling.k8s.io"] 67 | resources: ["priorityclasses"] 68 | verbs: ["get", "watch", "list"] 69 | - apiGroups: ["metrics.k8s.io"] 70 | resources: ["pods"] 71 | verbs: ["get", "list", "watch"] 72 | 73 | --- 74 | apiVersion: rbac.authorization.k8s.io/v1 75 | kind: ClusterRoleBinding 76 | metadata: 77 | name: volcano-descheduler 78 | roleRef: 79 | apiGroup: rbac.authorization.k8s.io 80 | kind: ClusterRole 81 | name: volcano-descheduler 82 | subjects: 83 | - kind: ServiceAccount 84 | name: volcano-descheduler 85 | namespace: volcano-system 86 | 87 | --- 88 | kind: Deployment 89 | apiVersion: apps/v1 90 | metadata: 91 | name: volcano-descheduler 92 | namespace: volcano-system 93 | labels: 94 | app: descheduler 95 | k8s-app: descheduler 96 | spec: 97 | replicas: 1 98 | revisionHistoryLimit: 10 99 | selector: 100 | matchLabels: 101 | app: descheduler 102 | k8s-app: descheduler 103 | template: 104 | metadata: 105 | labels: 106 | app: descheduler 107 | k8s-app: descheduler 108 | spec: 109 | serviceAccountName: volcano-descheduler 110 | volumes: 111 | - name: policy-volume 112 | configMap: 113 | name: volcano-descheduler 114 | - name: log 115 | hostPath: 116 | path: /var/log/volcano/descheduler 117 | containers: 118 | - name: descheduler 119 | image: docker.io/volcanosh/vc-descheduler:latest 120 | command: ["sh", "-c"] 121 | args: 122 | - > 123 | /vc-descheduler --descheduling-interval-cron-expression='*/10 * * * *' 124 | --descheduling-interval=10m 125 | --policy-config-file=/policy-dir/policy.yaml 126 | --leader-elect=false 127 | --leader-elect-resource-namespace=volcano-system 128 | --v=3 1>>/var/log/volcano/descheduler/descheduler.log 2>&1 129 | imagePullPolicy: Always 130 | env: 131 | - name: POD_NAMESPACE 132 | valueFrom: 133 | fieldRef: 134 | fieldPath: metadata.namespace 135 | volumeMounts: 136 | - mountPath: /policy-dir 137 | name: policy-volume 138 | - name: log 139 | mountPath: /var/log/volcano/descheduler 140 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // +k8s:deepcopy-gen=package,register 18 | 19 | package componentconfig // import "sigs.k8s.io/descheduler/pkg/apis/componentconfig" 20 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package componentconfig 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/runtime" 21 | "k8s.io/apimachinery/pkg/runtime/schema" 22 | ) 23 | 24 | var ( 25 | SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) 26 | AddToScheme = SchemeBuilder.AddToScheme 27 | ) 28 | 29 | // GroupName is the group name used in this package 30 | const GroupName = "deschedulercomponentconfig" 31 | 32 | // SchemeGroupVersion is group version used to register these objects 33 | var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal} 34 | 35 | // Kind takes an unqualified kind and returns a Group qualified GroupKind 36 | func Kind(kind string) schema.GroupKind { 37 | return SchemeGroupVersion.WithKind(kind).GroupKind() 38 | } 39 | 40 | // Resource takes an unqualified resource and returns a Group qualified GroupResource 41 | func Resource(resource string) schema.GroupResource { 42 | return SchemeGroupVersion.WithResource(resource).GroupResource() 43 | } 44 | 45 | func addKnownTypes(scheme *runtime.Scheme) error { 46 | scheme.AddKnownTypes(SchemeGroupVersion, 47 | &DeschedulerConfiguration{}, 48 | ) 49 | return nil 50 | } 51 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Copyright 2024 The Volcano Authors. 17 | 18 | Modifications made by Volcano authors: 19 | - [2024]Add `DeschedulingIntervalCronExpression` config 20 | */ 21 | 22 | package componentconfig 23 | 24 | import ( 25 | "time" 26 | 27 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | componentbaseconfig "k8s.io/component-base/config" 29 | registry "k8s.io/component-base/logs/api/v1" 30 | ) 31 | 32 | // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object 33 | 34 | type DeschedulerConfiguration struct { 35 | metav1.TypeMeta 36 | 37 | // Time interval for descheduler to run 38 | DeschedulingInterval time.Duration 39 | 40 | // cron expression for run descheduler 41 | DeschedulingIntervalCronExpression string 42 | 43 | // KubeconfigFile is path to kubeconfig file with authorization and master 44 | // location information. 45 | // Deprecated: Use clientConnection.kubeConfig instead. 46 | KubeconfigFile string 47 | 48 | // PolicyConfigFile is the filepath to the descheduler policy configuration. 49 | PolicyConfigFile string 50 | 51 | // Dry run 52 | DryRun bool 53 | 54 | // Node selectors 55 | NodeSelector string 56 | 57 | // MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node. 58 | MaxNoOfPodsToEvictPerNode int 59 | 60 | // EvictLocalStoragePods allows pods using local storage to be evicted. 61 | EvictLocalStoragePods bool 62 | 63 | // IgnorePVCPods sets whether PVC pods should be allowed to be evicted 64 | IgnorePVCPods bool 65 | 66 | // LeaderElection starts Deployment using leader election loop 67 | LeaderElection componentbaseconfig.LeaderElectionConfiguration 68 | 69 | // Logging specifies the options of logging. 70 | // Refer to [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/api/v1/options.go) for more information. 71 | Logging registry.LoggingConfiguration 72 | 73 | // ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver. 74 | // Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information. 75 | ClientConnection componentbaseconfig.ClientConnectionConfiguration 76 | } 77 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/v1alpha1/defaults.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import "k8s.io/apimachinery/pkg/runtime" 20 | 21 | func addDefaultingFuncs(scheme *runtime.Scheme) error { 22 | return RegisterDefaults(scheme) 23 | } 24 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // +k8s:deepcopy-gen=package,register 18 | // +k8s:conversion-gen=sigs.k8s.io/descheduler/pkg/apis/componentconfig 19 | // +k8s:defaulter-gen=TypeMeta 20 | 21 | // Package v1alpha1 is the v1alpha1 version of the descheduler's componentconfig API 22 | // +groupName=deschedulercomponentconfig 23 | 24 | package v1alpha1 // import "sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1" 25 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/v1alpha1/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/runtime" 21 | "k8s.io/apimachinery/pkg/runtime/schema" 22 | ) 23 | 24 | var ( 25 | SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) 26 | localSchemeBuilder = &SchemeBuilder 27 | AddToScheme = SchemeBuilder.AddToScheme 28 | ) 29 | 30 | // GroupName is the group name use in this package 31 | const ( 32 | GroupName = "deschedulercomponentconfig" 33 | GroupVersion = "v1alpha1" 34 | ) 35 | 36 | // SchemeGroupVersion is group version used to register these objects 37 | var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} 38 | 39 | // Kind takes an unqualified kind and returns a Group qualified GroupKind 40 | func Kind(kind string) schema.GroupKind { 41 | return SchemeGroupVersion.WithKind(kind).GroupKind() 42 | } 43 | 44 | // Resource takes an unqualified resource and returns a Group qualified GroupResource 45 | func Resource(resource string) schema.GroupResource { 46 | return SchemeGroupVersion.WithResource(resource).GroupResource() 47 | } 48 | 49 | func init() { 50 | // We only register manually written functions here. The registration of the 51 | // generated functions takes place in the generated files. The separation 52 | // makes the code compile even when the generated files are missing. 53 | localSchemeBuilder.Register(addKnownTypes, addDefaultingFuncs) 54 | } 55 | 56 | func addKnownTypes(scheme *runtime.Scheme) error { 57 | // TODO this will get cleaned up with the scheme types are fixed 58 | scheme.AddKnownTypes(SchemeGroupVersion, 59 | &DeschedulerConfiguration{}, 60 | ) 61 | return nil 62 | } 63 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/v1alpha1/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | "time" 21 | 22 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 | componentbaseconfig "k8s.io/component-base/config" 24 | registry "k8s.io/component-base/logs/api/v1" 25 | ) 26 | 27 | // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object 28 | 29 | type DeschedulerConfiguration struct { 30 | metav1.TypeMeta `json:",inline"` 31 | 32 | // Time interval for descheduler to run 33 | DeschedulingInterval time.Duration `json:"deschedulingInterval,omitempty"` 34 | 35 | // KubeconfigFile is path to kubeconfig file with authorization and master 36 | // location information. 37 | // Deprecated: Use clientConnection.kubeConfig instead. 38 | KubeconfigFile string `json:"kubeconfigFile"` 39 | 40 | // PolicyConfigFile is the filepath to the descheduler policy configuration. 41 | PolicyConfigFile string `json:"policyConfigFile,omitempty"` 42 | 43 | // Dry run 44 | DryRun bool `json:"dryRun,omitempty"` 45 | 46 | // Node selectors 47 | NodeSelector string `json:"nodeSelector,omitempty"` 48 | 49 | // MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node. 50 | MaxNoOfPodsToEvictPerNode int `json:"maxNoOfPodsToEvictPerNode,omitempty"` 51 | 52 | // EvictLocalStoragePods allows pods using local storage to be evicted. 53 | EvictLocalStoragePods bool `json:"evictLocalStoragePods,omitempty"` 54 | 55 | // IgnorePVCPods sets whether PVC pods should be allowed to be evicted 56 | IgnorePVCPods bool `json:"ignorePvcPods,omitempty"` 57 | 58 | // LeaderElection starts Deployment using leader election loop 59 | LeaderElection componentbaseconfig.LeaderElectionConfiguration `json:"leaderElection,omitempty"` 60 | 61 | // Logging specifies the options of logging. 62 | // Refer to [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/api/v1/options.go) for more information. 63 | Logging registry.LoggingConfiguration `json:"logging,omitempty"` 64 | 65 | // ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver. 66 | // Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information. 67 | ClientConnection componentbaseconfig.ClientConnectionConfiguration `json:"clientConnection,omitempty"` 68 | } 69 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | Copyright 2023 The Kubernetes Authors. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | // Code generated by conversion-gen. DO NOT EDIT. 21 | 22 | package v1alpha1 23 | 24 | import ( 25 | time "time" 26 | 27 | conversion "k8s.io/apimachinery/pkg/conversion" 28 | runtime "k8s.io/apimachinery/pkg/runtime" 29 | componentconfig "sigs.k8s.io/descheduler/pkg/apis/componentconfig" 30 | ) 31 | 32 | func init() { 33 | localSchemeBuilder.Register(RegisterConversions) 34 | } 35 | 36 | // RegisterConversions adds conversion functions to the given scheme. 37 | // Public to allow building arbitrary schemes. 38 | func RegisterConversions(s *runtime.Scheme) error { 39 | if err := s.AddGeneratedConversionFunc((*DeschedulerConfiguration)(nil), (*componentconfig.DeschedulerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { 40 | return Convert_v1alpha1_DeschedulerConfiguration_To_componentconfig_DeschedulerConfiguration(a.(*DeschedulerConfiguration), b.(*componentconfig.DeschedulerConfiguration), scope) 41 | }); err != nil { 42 | return err 43 | } 44 | if err := s.AddGeneratedConversionFunc((*componentconfig.DeschedulerConfiguration)(nil), (*DeschedulerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { 45 | return Convert_componentconfig_DeschedulerConfiguration_To_v1alpha1_DeschedulerConfiguration(a.(*componentconfig.DeschedulerConfiguration), b.(*DeschedulerConfiguration), scope) 46 | }); err != nil { 47 | return err 48 | } 49 | return nil 50 | } 51 | 52 | func autoConvert_v1alpha1_DeschedulerConfiguration_To_componentconfig_DeschedulerConfiguration(in *DeschedulerConfiguration, out *componentconfig.DeschedulerConfiguration, s conversion.Scope) error { 53 | out.DeschedulingInterval = time.Duration(in.DeschedulingInterval) 54 | out.KubeconfigFile = in.KubeconfigFile 55 | out.PolicyConfigFile = in.PolicyConfigFile 56 | out.DryRun = in.DryRun 57 | out.NodeSelector = in.NodeSelector 58 | out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode 59 | out.EvictLocalStoragePods = in.EvictLocalStoragePods 60 | out.IgnorePVCPods = in.IgnorePVCPods 61 | out.LeaderElection = in.LeaderElection 62 | out.Logging = in.Logging 63 | out.ClientConnection = in.ClientConnection 64 | return nil 65 | } 66 | 67 | // Convert_v1alpha1_DeschedulerConfiguration_To_componentconfig_DeschedulerConfiguration is an autogenerated conversion function. 68 | func Convert_v1alpha1_DeschedulerConfiguration_To_componentconfig_DeschedulerConfiguration(in *DeschedulerConfiguration, out *componentconfig.DeschedulerConfiguration, s conversion.Scope) error { 69 | return autoConvert_v1alpha1_DeschedulerConfiguration_To_componentconfig_DeschedulerConfiguration(in, out, s) 70 | } 71 | 72 | func autoConvert_componentconfig_DeschedulerConfiguration_To_v1alpha1_DeschedulerConfiguration(in *componentconfig.DeschedulerConfiguration, out *DeschedulerConfiguration, s conversion.Scope) error { 73 | out.DeschedulingInterval = time.Duration(in.DeschedulingInterval) 74 | out.KubeconfigFile = in.KubeconfigFile 75 | out.PolicyConfigFile = in.PolicyConfigFile 76 | out.DryRun = in.DryRun 77 | out.NodeSelector = in.NodeSelector 78 | out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode 79 | out.EvictLocalStoragePods = in.EvictLocalStoragePods 80 | out.IgnorePVCPods = in.IgnorePVCPods 81 | out.LeaderElection = in.LeaderElection 82 | out.Logging = in.Logging 83 | out.ClientConnection = in.ClientConnection 84 | return nil 85 | } 86 | 87 | // Convert_componentconfig_DeschedulerConfiguration_To_v1alpha1_DeschedulerConfiguration is an autogenerated conversion function. 88 | func Convert_componentconfig_DeschedulerConfiguration_To_v1alpha1_DeschedulerConfiguration(in *componentconfig.DeschedulerConfiguration, out *DeschedulerConfiguration, s conversion.Scope) error { 89 | return autoConvert_componentconfig_DeschedulerConfiguration_To_v1alpha1_DeschedulerConfiguration(in, out, s) 90 | } 91 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | Copyright 2023 The Kubernetes Authors. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | // Code generated by deepcopy-gen. DO NOT EDIT. 21 | 22 | package v1alpha1 23 | 24 | import ( 25 | runtime "k8s.io/apimachinery/pkg/runtime" 26 | ) 27 | 28 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 29 | func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) { 30 | *out = *in 31 | out.TypeMeta = in.TypeMeta 32 | out.LeaderElection = in.LeaderElection 33 | in.Logging.DeepCopyInto(&out.Logging) 34 | out.ClientConnection = in.ClientConnection 35 | return 36 | } 37 | 38 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeschedulerConfiguration. 39 | func (in *DeschedulerConfiguration) DeepCopy() *DeschedulerConfiguration { 40 | if in == nil { 41 | return nil 42 | } 43 | out := new(DeschedulerConfiguration) 44 | in.DeepCopyInto(out) 45 | return out 46 | } 47 | 48 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 49 | func (in *DeschedulerConfiguration) DeepCopyObject() runtime.Object { 50 | if c := in.DeepCopy(); c != nil { 51 | return c 52 | } 53 | return nil 54 | } 55 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/v1alpha1/zz_generated.defaults.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | Copyright 2023 The Kubernetes Authors. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | // Code generated by defaulter-gen. DO NOT EDIT. 21 | 22 | package v1alpha1 23 | 24 | import ( 25 | runtime "k8s.io/apimachinery/pkg/runtime" 26 | ) 27 | 28 | // RegisterDefaults adds defaulters functions to the given scheme. 29 | // Public to allow building arbitrary schemes. 30 | // All generated defaulters are covering - they call all nested defaulters. 31 | func RegisterDefaults(scheme *runtime.Scheme) error { 32 | return nil 33 | } 34 | -------------------------------------------------------------------------------- /pkg/apis/componentconfig/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | Copyright 2023 The Kubernetes Authors. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | // Code generated by deepcopy-gen. DO NOT EDIT. 21 | 22 | package componentconfig 23 | 24 | import ( 25 | runtime "k8s.io/apimachinery/pkg/runtime" 26 | ) 27 | 28 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 29 | func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) { 30 | *out = *in 31 | out.TypeMeta = in.TypeMeta 32 | out.LeaderElection = in.LeaderElection 33 | in.Logging.DeepCopyInto(&out.Logging) 34 | out.ClientConnection = in.ClientConnection 35 | return 36 | } 37 | 38 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeschedulerConfiguration. 39 | func (in *DeschedulerConfiguration) DeepCopy() *DeschedulerConfiguration { 40 | if in == nil { 41 | return nil 42 | } 43 | out := new(DeschedulerConfiguration) 44 | in.DeepCopyInto(out) 45 | return out 46 | } 47 | 48 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 49 | func (in *DeschedulerConfiguration) DeepCopyObject() runtime.Object { 50 | if c := in.DeepCopy(); c != nil { 51 | return c 52 | } 53 | return nil 54 | } 55 | -------------------------------------------------------------------------------- /pkg/descheduler/descheduler.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Copyright 2024 The Volcano Authors. 17 | 18 | Modifications made by Volcano authors: 19 | - [2024]Support crontab expression running descheduler 20 | - [2024]add LoadAware plugin for PreEvictionFilter extension point 21 | */ 22 | 23 | package descheduler 24 | 25 | import ( 26 | "context" 27 | "encoding/json" 28 | "fmt" 29 | "math" 30 | "os" 31 | "strconv" 32 | "strings" 33 | "time" 34 | 35 | "github.com/robfig/cron/v3" 36 | v1 "k8s.io/api/core/v1" 37 | policy "k8s.io/api/policy/v1" 38 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 39 | "k8s.io/apimachinery/pkg/labels" 40 | "k8s.io/apimachinery/pkg/runtime" 41 | "k8s.io/apimachinery/pkg/util/wait" 42 | "k8s.io/client-go/informers" 43 | clientset "k8s.io/client-go/kubernetes" 44 | fakeclientset "k8s.io/client-go/kubernetes/fake" 45 | listersv1 "k8s.io/client-go/listers/core/v1" 46 | schedulingv1 "k8s.io/client-go/listers/scheduling/v1" 47 | core "k8s.io/client-go/testing" 48 | "k8s.io/client-go/tools/events" 49 | componentbaseconfig "k8s.io/component-base/config" 50 | "k8s.io/klog/v2" 51 | "sigs.k8s.io/descheduler/metrics" 52 | "sigs.k8s.io/descheduler/pkg/api" 53 | "sigs.k8s.io/descheduler/pkg/descheduler/client" 54 | "sigs.k8s.io/descheduler/pkg/descheduler/evictions" 55 | eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" 56 | nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" 57 | podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" 58 | "sigs.k8s.io/descheduler/pkg/framework/pluginregistry" 59 | "sigs.k8s.io/descheduler/pkg/utils" 60 | "sigs.k8s.io/descheduler/pkg/version" 61 | 62 | "volcano.sh/descheduler/cmd/descheduler/app/options" 63 | "volcano.sh/descheduler/pkg/framework/plugins/loadaware" 64 | frameworkprofile "volcano.sh/descheduler/pkg/framework/profile" 65 | ) 66 | 67 | const podNameEnvKey string = "HOSTNAME" 68 | const podNamespaceEnvKey string = "POD_NAMESPACE" 69 | 70 | func Run(ctx context.Context, rs *options.DeschedulerServer) error { 71 | metrics.Register() 72 | 73 | clientConnection := rs.ClientConnection 74 | if rs.KubeconfigFile != "" && clientConnection.Kubeconfig == "" { 75 | clientConnection.Kubeconfig = rs.KubeconfigFile 76 | } 77 | 78 | rsclient, eventClient, err := createClients(clientConnection) 79 | if err != nil { 80 | return err 81 | } 82 | rs.Client = rsclient 83 | rs.EventClient = eventClient 84 | 85 | deschedulerPolicy, err := LoadPolicyConfig(rs.PolicyConfigFile, rs.Client, pluginregistry.PluginRegistry) 86 | if err != nil { 87 | return err 88 | } 89 | if deschedulerPolicy == nil { 90 | return fmt.Errorf("deschedulerPolicy is nil") 91 | } 92 | deschedulerPolicyJson, err := json.MarshalIndent(deschedulerPolicy, "", " ") 93 | if err != nil { 94 | return fmt.Errorf("faild to marshal deschedulerPolicy: %v", err) 95 | } 96 | klog.V(5).InfoS("Successfully load descheduler policy", "deschedulerPolicy", string(deschedulerPolicyJson)) 97 | 98 | // Add k8s compatibility warnings to logs 99 | versionCompatibilityCheck(rs) 100 | 101 | evictionPolicyGroupVersion, err := eutils.SupportEviction(rs.Client) 102 | if err != nil || len(evictionPolicyGroupVersion) == 0 { 103 | return err 104 | } 105 | 106 | runFn := func() error { 107 | return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion) 108 | } 109 | 110 | if rs.DeschedulingInterval.Seconds() == 0 { 111 | _, err = cron.ParseStandard(rs.DeschedulingIntervalCronExpression) 112 | if rs.DeschedulingIntervalCronExpression == "" || err != nil { 113 | return fmt.Errorf("Both DeschedulingInterval and deschedulingIntervalCronExpression are not configured. At least one of these two parameter must be configured. ") 114 | } 115 | klog.V(1).InfoS("Run with deschedulingIntervalCronExpression", "intervalCronExpression", rs.DeschedulingIntervalCronExpression) 116 | } else { 117 | klog.V(1).InfoS("Run with deschedulingInterval", "interval", rs.DeschedulingInterval) 118 | } 119 | 120 | if rs.LeaderElection.LeaderElect && rs.DryRun { 121 | klog.V(1).InfoS("Warning: DryRun is set to True. You need to disable it to use Leader Election.") 122 | } 123 | 124 | if rs.LeaderElection.LeaderElect && !rs.DryRun { 125 | if err := NewLeaderElection(runFn, rsclient, &rs.LeaderElection, ctx); err != nil { 126 | return fmt.Errorf("leaderElection: %w", err) 127 | } 128 | return nil 129 | } 130 | 131 | return runFn() 132 | } 133 | 134 | func versionCompatibilityCheck(rs *options.DeschedulerServer) { 135 | serverVersion, serverErr := rs.Client.Discovery().ServerVersion() 136 | if serverErr != nil { 137 | klog.V(1).InfoS("Warning: Get Kubernetes server version fail") 138 | return 139 | } 140 | 141 | deschedulerMinorVersion := strings.Split(version.Get().Minor, ".")[0] 142 | deschedulerMinorVersionFloat, err := strconv.ParseFloat(deschedulerMinorVersion, 64) 143 | if err != nil { 144 | klog.Warning("Warning: Convert Descheduler minor version to float fail") 145 | } 146 | 147 | kubernetesMinorVersionFloat, err := strconv.ParseFloat(serverVersion.Minor, 64) 148 | if err != nil { 149 | klog.Warning("Warning: Convert Kubernetes server minor version to float fail") 150 | } 151 | 152 | if math.Abs(deschedulerMinorVersionFloat-kubernetesMinorVersionFloat) > 3 { 153 | klog.Warningf("Warning: Descheduler minor version %v is not supported on your version of Kubernetes %v.%v. See compatibility docs for more info: https://github.com/kubernetes-sigs/descheduler#compatibility-matrix", deschedulerMinorVersion, serverVersion.Major, serverVersion.Minor) 154 | } 155 | } 156 | 157 | func cachedClient( 158 | realClient clientset.Interface, 159 | podLister listersv1.PodLister, 160 | nodeLister listersv1.NodeLister, 161 | namespaceLister listersv1.NamespaceLister, 162 | priorityClassLister schedulingv1.PriorityClassLister, 163 | ) (clientset.Interface, error) { 164 | fakeClient := fakeclientset.NewSimpleClientset() 165 | // simulate a pod eviction by deleting a pod 166 | fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { 167 | if action.GetSubresource() == "eviction" { 168 | createAct, matched := action.(core.CreateActionImpl) 169 | if !matched { 170 | return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl") 171 | } 172 | eviction, matched := createAct.Object.(*policy.Eviction) 173 | if !matched { 174 | return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction") 175 | } 176 | if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil { 177 | return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err) 178 | } 179 | return true, nil, nil 180 | } 181 | // fallback to the default reactor 182 | return false, nil, nil 183 | }) 184 | 185 | klog.V(3).Infof("Pulling resources for the cached client from the cluster") 186 | pods, err := podLister.List(labels.Everything()) 187 | if err != nil { 188 | return nil, fmt.Errorf("unable to list pods: %v", err) 189 | } 190 | 191 | for _, item := range pods { 192 | if _, err := fakeClient.CoreV1().Pods(item.Namespace).Create(context.TODO(), item, metav1.CreateOptions{}); err != nil { 193 | return nil, fmt.Errorf("unable to copy pod: %v", err) 194 | } 195 | } 196 | 197 | nodes, err := nodeLister.List(labels.Everything()) 198 | if err != nil { 199 | return nil, fmt.Errorf("unable to list nodes: %v", err) 200 | } 201 | 202 | for _, item := range nodes { 203 | if _, err := fakeClient.CoreV1().Nodes().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil { 204 | return nil, fmt.Errorf("unable to copy node: %v", err) 205 | } 206 | } 207 | 208 | namespaces, err := namespaceLister.List(labels.Everything()) 209 | if err != nil { 210 | return nil, fmt.Errorf("unable to list namespaces: %v", err) 211 | } 212 | 213 | for _, item := range namespaces { 214 | if _, err := fakeClient.CoreV1().Namespaces().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil { 215 | return nil, fmt.Errorf("unable to copy namespace: %v", err) 216 | } 217 | } 218 | 219 | priorityClasses, err := priorityClassLister.List(labels.Everything()) 220 | if err != nil { 221 | return nil, fmt.Errorf("unable to list priorityclasses: %v", err) 222 | } 223 | 224 | for _, item := range priorityClasses { 225 | if _, err := fakeClient.SchedulingV1().PriorityClasses().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil { 226 | return nil, fmt.Errorf("unable to copy priorityclass: %v", err) 227 | } 228 | } 229 | 230 | return fakeClient, nil 231 | } 232 | 233 | func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string) error { 234 | sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0) 235 | podInformer := sharedInformerFactory.Core().V1().Pods().Informer() 236 | podLister := sharedInformerFactory.Core().V1().Pods().Lister() 237 | nodeLister := sharedInformerFactory.Core().V1().Nodes().Lister() 238 | namespaceLister := sharedInformerFactory.Core().V1().Namespaces().Lister() 239 | priorityClassLister := sharedInformerFactory.Scheduling().V1().PriorityClasses().Lister() 240 | 241 | ctx, cancel := context.WithCancel(ctx) 242 | defer cancel() 243 | 244 | getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) 245 | if err != nil { 246 | return fmt.Errorf("build get pods assigned to node function error: %v", err) 247 | } 248 | 249 | sharedInformerFactory.Start(ctx.Done()) 250 | sharedInformerFactory.WaitForCacheSync(ctx.Done()) 251 | 252 | var nodeSelector string 253 | if deschedulerPolicy.NodeSelector != nil { 254 | nodeSelector = *deschedulerPolicy.NodeSelector 255 | } 256 | 257 | var eventClient clientset.Interface 258 | if rs.DryRun { 259 | eventClient = fakeclientset.NewSimpleClientset() 260 | } else { 261 | eventClient = rs.Client 262 | } 263 | 264 | eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient) 265 | defer eventBroadcaster.Shutdown() 266 | 267 | cycleSharedInformerFactory := sharedInformerFactory 268 | run := func() { 269 | deschedulerPolicy, err = LoadPolicyConfig(rs.PolicyConfigFile, rs.Client, pluginregistry.PluginRegistry) 270 | if err != nil || deschedulerPolicy == nil { 271 | klog.ErrorS(err, "Failed to load policy config") 272 | eventDeschedulerParamErr(rs.Client, eventRecorder, err) 273 | return 274 | } 275 | 276 | deschedulerPolicyJson, err := json.MarshalIndent(deschedulerPolicy, "", " ") 277 | if err != nil { 278 | klog.ErrorS(err, "Failed to marshal descheduler policy") 279 | return 280 | } 281 | klog.V(5).InfoS("Successfully load descheduler policy", "deschedulerPolicy", string(deschedulerPolicyJson)) 282 | 283 | //add LoadAware plugin for PreEvictionFilter extension point 284 | //When configuring the Loadaware plugin, users can implement the PreEvictionFilter extension point by default, 285 | //which allows consideration of the actual node utilization during eviction. 286 | for index, profile := range deschedulerPolicy.Profiles { 287 | for _, balancePlugin := range profile.Plugins.Balance.Enabled { 288 | if balancePlugin == loadaware.LoadAwareUtilizationPluginName { 289 | deschedulerPolicy.Profiles[index].Plugins.PreEvictionFilter.Enabled = 290 | append(deschedulerPolicy.Profiles[index].Plugins.PreEvictionFilter.Enabled, loadaware.LoadAwareUtilizationPluginName) 291 | break 292 | } 293 | } 294 | } 295 | 296 | loopStartDuration := time.Now() 297 | defer metrics.DeschedulerLoopDuration.With(map[string]string{}).Observe(time.Since(loopStartDuration).Seconds()) 298 | nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeLister, nodeSelector) 299 | if err != nil { 300 | klog.V(1).InfoS("Unable to get ready nodes", "err", err) 301 | cancel() 302 | return 303 | } 304 | 305 | if len(nodes) <= 1 { 306 | klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..") 307 | cancel() 308 | return 309 | } 310 | 311 | var client clientset.Interface 312 | // When the dry mode is enable, collect all the relevant objects (mostly pods) under a fake client. 313 | // So when evicting pods while running multiple strategies in a row have the cummulative effect 314 | // as is when evicting pods for real. 315 | if rs.DryRun { 316 | klog.V(3).Infof("Building a cached client from the cluster for the dry run") 317 | // Create a new cache so we start from scratch without any leftovers 318 | fakeClient, err := cachedClient(rs.Client, podLister, nodeLister, namespaceLister, priorityClassLister) 319 | if err != nil { 320 | klog.Error(err) 321 | return 322 | } 323 | 324 | // create a new instance of the shared informer factor from the cached client 325 | fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) 326 | // register the pod informer, otherwise it will not get running 327 | getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods().Informer()) 328 | if err != nil { 329 | klog.Errorf("build get pods assigned to node function error: %v", err) 330 | return 331 | } 332 | 333 | fakeCtx, cncl := context.WithCancel(context.TODO()) 334 | defer cncl() 335 | fakeSharedInformerFactory.Start(fakeCtx.Done()) 336 | fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done()) 337 | 338 | client = fakeClient 339 | cycleSharedInformerFactory = fakeSharedInformerFactory 340 | } else { 341 | client = rs.Client 342 | } 343 | 344 | klog.V(3).Infof("Building a pod evictor") 345 | podEvictor := evictions.NewPodEvictor( 346 | client, 347 | evictionPolicyGroupVersion, 348 | rs.DryRun, 349 | deschedulerPolicy.MaxNoOfPodsToEvictPerNode, 350 | deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace, 351 | nodes, 352 | !rs.DisableMetrics, 353 | eventRecorder, 354 | ) 355 | 356 | for _, profile := range deschedulerPolicy.Profiles { 357 | currProfile, err := frameworkprofile.NewProfile( 358 | profile, 359 | pluginregistry.PluginRegistry, 360 | frameworkprofile.WithClientSet(client), 361 | frameworkprofile.WithSharedInformerFactory(cycleSharedInformerFactory), 362 | frameworkprofile.WithPodEvictor(podEvictor), 363 | frameworkprofile.WithGetPodsAssignedToNodeFnc(getPodsAssignedToNode), 364 | ) 365 | if err != nil { 366 | klog.ErrorS(err, "unable to create a profile", "profile", profile.Name) 367 | continue 368 | } 369 | 370 | // First deschedule 371 | status := currProfile.RunDeschedulePlugins(ctx, nodes) 372 | if status != nil && status.Err != nil { 373 | klog.ErrorS(status.Err, "running deschedule extension point failed with error", "profile", profile.Name) 374 | continue 375 | } 376 | // Then balance 377 | status = currProfile.RunBalancePlugins(ctx, nodes) 378 | if status != nil && status.Err != nil { 379 | klog.ErrorS(status.Err, "running balance extension point failed with error", "profile", profile.Name) 380 | continue 381 | } 382 | } 383 | 384 | klog.V(1).InfoS("Number of evicted pods", "totalEvicted", podEvictor.TotalEvicted()) 385 | } 386 | if rs.DeschedulingInterval.Seconds() != 0 { 387 | wait.NonSlidingUntil(run, rs.DeschedulingInterval, ctx.Done()) 388 | } else { 389 | c := cron.New() 390 | c.AddFunc(rs.DeschedulingIntervalCronExpression, run) 391 | c.Run() 392 | } 393 | return nil 394 | } 395 | 396 | func GetPluginConfig(pluginName string, pluginConfigs []api.PluginConfig) (*api.PluginConfig, int) { 397 | for idx, pluginConfig := range pluginConfigs { 398 | if pluginConfig.Name == pluginName { 399 | return &pluginConfig, idx 400 | } 401 | } 402 | return nil, 0 403 | } 404 | 405 | func createClients(clientConnection componentbaseconfig.ClientConnectionConfiguration) (clientset.Interface, clientset.Interface, error) { 406 | kClient, err := client.CreateClient(clientConnection, "descheduler") 407 | if err != nil { 408 | return nil, nil, err 409 | } 410 | 411 | eventClient, err := client.CreateClient(clientConnection, "") 412 | if err != nil { 413 | return nil, nil, err 414 | } 415 | 416 | return kClient, eventClient, nil 417 | } 418 | 419 | func eventDeschedulerParamErr(client clientset.Interface, eventRecorder events.EventRecorder, errParam error) { 420 | podName := os.Getenv(podNameEnvKey) 421 | podNamespace := os.Getenv(podNamespaceEnvKey) 422 | pod, err := client.CoreV1().Pods(podNamespace).Get(context.TODO(), podName, metav1.GetOptions{}) 423 | if err != nil { 424 | klog.Errorf("Want to event error %v,but get pod error %v", errParam, err) 425 | return 426 | } 427 | eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "Load Config Error", "Warning", "descheduler run err due to parameter error:%v", errParam) 428 | } 429 | -------------------------------------------------------------------------------- /pkg/descheduler/descheduler_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Copyright 2024 The Volcano Authors. 17 | 18 | Modifications made by Volcano authors: 19 | - [2024]Rename package name to volcano.sh 20 | */ 21 | 22 | package descheduler 23 | 24 | import ( 25 | "context" 26 | "fmt" 27 | "os" 28 | "path" 29 | "testing" 30 | "time" 31 | 32 | v1 "k8s.io/api/core/v1" 33 | policy "k8s.io/api/policy/v1" 34 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 | "k8s.io/apimachinery/pkg/conversion" 36 | "k8s.io/apimachinery/pkg/runtime" 37 | fakeclientset "k8s.io/client-go/kubernetes/fake" 38 | core "k8s.io/client-go/testing" 39 | "sigs.k8s.io/descheduler/pkg/api" 40 | "sigs.k8s.io/descheduler/pkg/api/v1alpha1" 41 | "sigs.k8s.io/descheduler/pkg/framework/pluginregistry" 42 | "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" 43 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removeduplicates" 44 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingnodetaints" 45 | "sigs.k8s.io/descheduler/test" 46 | 47 | "volcano.sh/descheduler/cmd/descheduler/app/options" 48 | ) 49 | 50 | // scope contains information about an ongoing conversion. 51 | type scope struct { 52 | converter *conversion.Converter 53 | meta *conversion.Meta 54 | } 55 | 56 | // Convert continues a conversion. 57 | func (s scope) Convert(src, dest interface{}) error { 58 | return s.converter.Convert(src, dest, s.meta) 59 | } 60 | 61 | // Meta returns the meta object that was originally passed to Convert. 62 | func (s scope) Meta() *conversion.Meta { 63 | return s.meta 64 | } 65 | 66 | func TestTaintsUpdated(t *testing.T) { 67 | pluginregistry.PluginRegistry = pluginregistry.NewRegistry() 68 | pluginregistry.Register(removepodsviolatingnodetaints.PluginName, removepodsviolatingnodetaints.New, &removepodsviolatingnodetaints.RemovePodsViolatingNodeTaints{}, &removepodsviolatingnodetaints.RemovePodsViolatingNodeTaintsArgs{}, removepodsviolatingnodetaints.ValidateRemovePodsViolatingNodeTaintsArgs, removepodsviolatingnodetaints.SetDefaults_RemovePodsViolatingNodeTaintsArgs, pluginregistry.PluginRegistry) 69 | pluginregistry.Register(defaultevictor.PluginName, defaultevictor.New, &defaultevictor.DefaultEvictor{}, &defaultevictor.DefaultEvictorArgs{}, defaultevictor.ValidateDefaultEvictorArgs, defaultevictor.SetDefaults_DefaultEvictorArgs, pluginregistry.PluginRegistry) 70 | 71 | ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) 72 | defer cancel() 73 | 74 | n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) 75 | n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil) 76 | 77 | p1 := test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, nil) 78 | p1.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ 79 | {}, 80 | } 81 | 82 | client := fakeclientset.NewSimpleClientset(n1, n2, p1) 83 | eventClient := fakeclientset.NewSimpleClientset(n1, n2, p1) 84 | dp := &v1alpha1.DeschedulerPolicy{ 85 | Strategies: v1alpha1.StrategyList{ 86 | "RemovePodsViolatingNodeTaints": v1alpha1.DeschedulerStrategy{ 87 | Enabled: true, 88 | }, 89 | }, 90 | } 91 | 92 | rs, err := options.NewDeschedulerServer() 93 | if err != nil { 94 | t.Fatalf("Unable to initialize server: %v", err) 95 | } 96 | rs.Client = client 97 | rs.EventClient = eventClient 98 | rs.DeschedulingInterval = time.Second 99 | 100 | pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{}) 101 | if err != nil { 102 | t.Errorf("Unable to list pods: %v", err) 103 | } 104 | if len(pods.Items) < 1 { 105 | t.Errorf("The pod was evicted before a node was tained") 106 | } 107 | 108 | n1WithTaint := n1.DeepCopy() 109 | n1WithTaint.Spec.Taints = []v1.Taint{ 110 | { 111 | Key: "key", 112 | Value: "value", 113 | Effect: v1.TaintEffectNoSchedule, 114 | }, 115 | } 116 | 117 | if _, err := client.CoreV1().Nodes().Update(ctx, n1WithTaint, metav1.UpdateOptions{}); err != nil { 118 | t.Fatalf("Unable to update node: %v\n", err) 119 | } 120 | 121 | var evictedPods []string 122 | client.PrependReactor("create", "pods", podEvictionReactionFuc(&evictedPods)) 123 | 124 | internalDeschedulerPolicy := &api.DeschedulerPolicy{} 125 | scope := scope{} 126 | err = v1alpha1.V1alpha1ToInternal(dp, pluginregistry.PluginRegistry, internalDeschedulerPolicy, scope) 127 | if err != nil { 128 | t.Fatalf("Unable to convert v1alpha1 to v1alpha2: %v", err) 129 | } 130 | 131 | tmpDir := t.TempDir() 132 | filePath := path.Join(tmpDir, "descheduler-policyConfig.yaml") 133 | policyConfig := []byte(` 134 | apiVersion: "descheduler/v1alpha2" 135 | kind: "DeschedulerPolicy" 136 | profiles: 137 | - name: strategy-RemovePodsViolatingNodeTaints-profile 138 | pluginConfig: 139 | - name: "RemovePodsViolatingNodeTaints" 140 | - name: "DefaultEvictor" 141 | plugins: 142 | deschedule: 143 | enabled: 144 | - "RemovePodsViolatingNodeTaints" 145 | filter: 146 | enabled: 147 | - "DefaultEvictor" 148 | preEvictionFilter: 149 | enabled: 150 | - "DefaultEvictor" 151 | `) 152 | err = os.WriteFile(filePath, policyConfig, 0644) 153 | if err != nil { 154 | t.Fatalf("Failed to write policyConfig to file: %v", err) 155 | } 156 | rs.PolicyConfigFile = filePath 157 | 158 | if err := RunDeschedulerStrategies(ctx, rs, internalDeschedulerPolicy, "v1"); err != nil { 159 | t.Fatalf("Unable to run descheduler strategies: %v", err) 160 | } 161 | 162 | if len(evictedPods) == 0 { 163 | t.Fatalf("Unable to evict pod, node taint did not get propagated to descheduler strategies %v\n", err) 164 | } 165 | } 166 | 167 | func TestDuplicate(t *testing.T) { 168 | pluginregistry.PluginRegistry = pluginregistry.NewRegistry() 169 | pluginregistry.Register(removeduplicates.PluginName, removeduplicates.New, &removeduplicates.RemoveDuplicates{}, &removeduplicates.RemoveDuplicatesArgs{}, removeduplicates.ValidateRemoveDuplicatesArgs, removeduplicates.SetDefaults_RemoveDuplicatesArgs, pluginregistry.PluginRegistry) 170 | pluginregistry.Register(defaultevictor.PluginName, defaultevictor.New, &defaultevictor.DefaultEvictor{}, &defaultevictor.DefaultEvictorArgs{}, defaultevictor.ValidateDefaultEvictorArgs, defaultevictor.SetDefaults_DefaultEvictorArgs, pluginregistry.PluginRegistry) 171 | 172 | ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) 173 | defer cancel() 174 | 175 | node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) 176 | node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil) 177 | 178 | p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) 179 | p1.Namespace = "dev" 180 | p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil) 181 | p2.Namespace = "dev" 182 | p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil) 183 | p3.Namespace = "dev" 184 | 185 | ownerRef1 := test.GetReplicaSetOwnerRefList() 186 | p1.ObjectMeta.OwnerReferences = ownerRef1 187 | p2.ObjectMeta.OwnerReferences = ownerRef1 188 | p3.ObjectMeta.OwnerReferences = ownerRef1 189 | 190 | client := fakeclientset.NewSimpleClientset(node1, node2, p1, p2, p3) 191 | eventClient := fakeclientset.NewSimpleClientset(node1, node2, p1, p2, p3) 192 | dp := &v1alpha1.DeschedulerPolicy{ 193 | Strategies: v1alpha1.StrategyList{ 194 | "RemoveDuplicates": v1alpha1.DeschedulerStrategy{ 195 | Enabled: true, 196 | }, 197 | }, 198 | } 199 | 200 | rs, err := options.NewDeschedulerServer() 201 | if err != nil { 202 | t.Fatalf("Unable to initialize server: %v", err) 203 | } 204 | rs.Client = client 205 | rs.EventClient = eventClient 206 | rs.DeschedulingInterval = time.Second 207 | 208 | pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{}) 209 | if err != nil { 210 | t.Errorf("Unable to list pods: %v", err) 211 | } 212 | 213 | if len(pods.Items) != 3 { 214 | t.Errorf("Pods number should be 3 before evict") 215 | } 216 | 217 | var evictedPods []string 218 | client.PrependReactor("create", "pods", podEvictionReactionFuc(&evictedPods)) 219 | 220 | internalDeschedulerPolicy := &api.DeschedulerPolicy{} 221 | scope := scope{} 222 | err = v1alpha1.V1alpha1ToInternal(dp, pluginregistry.PluginRegistry, internalDeschedulerPolicy, scope) 223 | if err != nil { 224 | t.Fatalf("Unable to convert v1alpha1 to v1alpha2: %v", err) 225 | } 226 | 227 | tmpDir := t.TempDir() 228 | filePath := path.Join(tmpDir, "descheduler-policyConfig.yaml") 229 | policyConfig := []byte(` 230 | apiVersion: "descheduler/v1alpha2" 231 | kind: "DeschedulerPolicy" 232 | profiles: 233 | - name: strategy-RemoveDuplicates-profile 234 | pluginConfig: 235 | - name: "RemoveDuplicates" 236 | - name: "DefaultEvictor" 237 | plugins: 238 | balance: 239 | enabled: 240 | - "RemoveDuplicates" 241 | filter: 242 | enabled: 243 | - "DefaultEvictor" 244 | preEvictionFilter: 245 | enabled: 246 | - "DefaultEvictor" 247 | `) 248 | err = os.WriteFile(filePath, policyConfig, 0644) 249 | if err != nil { 250 | t.Fatalf("Failed to write policyConfig to file: %v", err) 251 | } 252 | rs.PolicyConfigFile = filePath 253 | 254 | if err := RunDeschedulerStrategies(ctx, rs, internalDeschedulerPolicy, "v1"); err != nil { 255 | t.Fatalf("Unable to run descheduler strategies: %v", err) 256 | } 257 | 258 | if len(evictedPods) == 0 { 259 | t.Fatalf("Unable to evict pod, node taint did not get propagated to descheduler strategies %v\n", err) 260 | } 261 | } 262 | 263 | func TestRootCancel(t *testing.T) { 264 | ctx, cancel := context.WithCancel(context.Background()) 265 | n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) 266 | n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil) 267 | client := fakeclientset.NewSimpleClientset(n1, n2) 268 | eventClient := fakeclientset.NewSimpleClientset(n1, n2) 269 | dp := &api.DeschedulerPolicy{ 270 | Profiles: []api.DeschedulerProfile{}, // no strategies needed for this test 271 | } 272 | 273 | rs, err := options.NewDeschedulerServer() 274 | if err != nil { 275 | t.Fatalf("Unable to initialize server: %v", err) 276 | } 277 | rs.Client = client 278 | rs.EventClient = eventClient 279 | rs.DeschedulingInterval = 100 * time.Millisecond 280 | errChan := make(chan error, 1) 281 | defer close(errChan) 282 | 283 | go func() { 284 | err := RunDeschedulerStrategies(ctx, rs, dp, "v1") 285 | errChan <- err 286 | }() 287 | cancel() 288 | select { 289 | case err := <-errChan: 290 | if err != nil { 291 | t.Fatalf("Unable to run descheduler strategies: %v", err) 292 | } 293 | case <-time.After(1 * time.Second): 294 | t.Fatal("Root ctx should have canceled immediately") 295 | } 296 | } 297 | 298 | func TestRootCancelWithNoInterval(t *testing.T) { 299 | ctx, cancel := context.WithCancel(context.Background()) 300 | n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) 301 | n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil) 302 | client := fakeclientset.NewSimpleClientset(n1, n2) 303 | eventClient := fakeclientset.NewSimpleClientset(n1, n2) 304 | dp := &api.DeschedulerPolicy{ 305 | Profiles: []api.DeschedulerProfile{}, // no strategies needed for this test 306 | } 307 | 308 | rs, err := options.NewDeschedulerServer() 309 | if err != nil { 310 | t.Fatalf("Unable to initialize server: %v", err) 311 | } 312 | rs.Client = client 313 | rs.EventClient = eventClient 314 | rs.DeschedulingInterval = 1 315 | errChan := make(chan error, 1) 316 | defer close(errChan) 317 | 318 | go func() { 319 | err := RunDeschedulerStrategies(ctx, rs, dp, "v1") 320 | errChan <- err 321 | }() 322 | cancel() 323 | select { 324 | case err := <-errChan: 325 | if err != nil { 326 | t.Fatalf("Unable to run descheduler strategies: %v", err) 327 | } 328 | case <-time.After(1 * time.Second): 329 | t.Fatal("Root ctx should have canceled immediately") 330 | } 331 | } 332 | 333 | func podEvictionReactionFuc(evictedPods *[]string) func(action core.Action) (bool, runtime.Object, error) { 334 | return func(action core.Action) (bool, runtime.Object, error) { 335 | if action.GetSubresource() == "eviction" { 336 | createAct, matched := action.(core.CreateActionImpl) 337 | if !matched { 338 | return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl") 339 | } 340 | if eviction, matched := createAct.Object.(*policy.Eviction); matched { 341 | *evictedPods = append(*evictedPods, eviction.GetName()) 342 | } 343 | } 344 | return false, nil, nil // fallback to the default reactor 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /pkg/descheduler/leaderelection.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package descheduler 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "os" 23 | 24 | "k8s.io/apimachinery/pkg/util/uuid" 25 | clientset "k8s.io/client-go/kubernetes" 26 | "k8s.io/client-go/tools/leaderelection" 27 | "k8s.io/client-go/tools/leaderelection/resourcelock" 28 | componentbaseconfig "k8s.io/component-base/config" 29 | "k8s.io/klog/v2" 30 | ) 31 | 32 | // NewLeaderElection starts the leader election code loop 33 | func NewLeaderElection( 34 | run func() error, 35 | client clientset.Interface, 36 | LeaderElectionConfig *componentbaseconfig.LeaderElectionConfiguration, 37 | ctx context.Context, 38 | ) error { 39 | var id string 40 | 41 | if hostname, err := os.Hostname(); err != nil { 42 | // on errors, make sure we're unique 43 | id = string(uuid.NewUUID()) 44 | } else { 45 | // add a uniquifier so that two processes on the same host don't accidentally both become active 46 | id = hostname + "_" + string(uuid.NewUUID()) 47 | } 48 | 49 | klog.V(3).Infof("Assigned unique lease holder id: %s", id) 50 | 51 | if len(LeaderElectionConfig.ResourceNamespace) == 0 { 52 | return fmt.Errorf("namespace may not be empty") 53 | } 54 | 55 | if len(LeaderElectionConfig.ResourceName) == 0 { 56 | return fmt.Errorf("name may not be empty") 57 | } 58 | 59 | lock, err := resourcelock.New( 60 | LeaderElectionConfig.ResourceLock, 61 | LeaderElectionConfig.ResourceNamespace, 62 | LeaderElectionConfig.ResourceName, 63 | client.CoreV1(), 64 | client.CoordinationV1(), 65 | resourcelock.ResourceLockConfig{ 66 | Identity: id, 67 | }, 68 | ) 69 | if err != nil { 70 | return fmt.Errorf("unable to create leader election lock: %v", err) 71 | } 72 | 73 | leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ 74 | Lock: lock, 75 | ReleaseOnCancel: true, 76 | LeaseDuration: LeaderElectionConfig.LeaseDuration.Duration, 77 | RenewDeadline: LeaderElectionConfig.RenewDeadline.Duration, 78 | RetryPeriod: LeaderElectionConfig.RetryPeriod.Duration, 79 | Callbacks: leaderelection.LeaderCallbacks{ 80 | OnStartedLeading: func(ctx context.Context) { 81 | klog.V(1).InfoS("Started leading") 82 | err := run() 83 | if err != nil { 84 | klog.Error(err) 85 | } 86 | }, 87 | OnStoppedLeading: func() { 88 | klog.V(1).InfoS("Leader lost") 89 | }, 90 | OnNewLeader: func(identity string) { 91 | // Just got the lock 92 | if identity == id { 93 | return 94 | } 95 | klog.V(1).Infof("New leader elected: %v", identity) 96 | }, 97 | }, 98 | }) 99 | return nil 100 | } 101 | -------------------------------------------------------------------------------- /pkg/descheduler/policyconfig.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package descheduler 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "os" 23 | 24 | "k8s.io/apimachinery/pkg/runtime" 25 | utilerrors "k8s.io/apimachinery/pkg/util/errors" 26 | clientset "k8s.io/client-go/kubernetes" 27 | "k8s.io/klog/v2" 28 | 29 | "sigs.k8s.io/descheduler/pkg/api" 30 | "sigs.k8s.io/descheduler/pkg/api/v1alpha1" 31 | "sigs.k8s.io/descheduler/pkg/api/v1alpha2" 32 | "sigs.k8s.io/descheduler/pkg/descheduler/scheme" 33 | "sigs.k8s.io/descheduler/pkg/framework/pluginregistry" 34 | "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" 35 | "sigs.k8s.io/descheduler/pkg/utils" 36 | ) 37 | 38 | func LoadPolicyConfig(policyConfigFile string, client clientset.Interface, registry pluginregistry.Registry) (*api.DeschedulerPolicy, error) { 39 | if policyConfigFile == "" { 40 | klog.V(1).InfoS("Policy config file not specified") 41 | return nil, nil 42 | } 43 | 44 | policy, err := os.ReadFile(policyConfigFile) 45 | if err != nil { 46 | return nil, fmt.Errorf("failed to read policy config file %q: %+v", policyConfigFile, err) 47 | } 48 | 49 | return decode(policyConfigFile, policy, client, registry) 50 | } 51 | 52 | func decode(policyConfigFile string, policy []byte, client clientset.Interface, registry pluginregistry.Registry) (*api.DeschedulerPolicy, error) { 53 | internalPolicy := &api.DeschedulerPolicy{} 54 | var err error 55 | 56 | decoder := scheme.Codecs.UniversalDecoder(v1alpha1.SchemeGroupVersion, v1alpha2.SchemeGroupVersion, api.SchemeGroupVersion) 57 | if err := runtime.DecodeInto(decoder, policy, internalPolicy); err != nil { 58 | return nil, fmt.Errorf("failed decoding descheduler's policy config %q: %v", policyConfigFile, err) 59 | } 60 | 61 | err = validateDeschedulerConfiguration(*internalPolicy, registry) 62 | if err != nil { 63 | return nil, err 64 | } 65 | 66 | setDefaults(*internalPolicy, registry, client) 67 | 68 | return internalPolicy, nil 69 | } 70 | 71 | func setDefaults(in api.DeschedulerPolicy, registry pluginregistry.Registry, client clientset.Interface) *api.DeschedulerPolicy { 72 | for idx, profile := range in.Profiles { 73 | // If we need to set defaults coming from loadtime in each profile we do it here 74 | in.Profiles[idx] = setDefaultEvictor(profile, client) 75 | for _, pluginConfig := range profile.PluginConfigs { 76 | setDefaultsPluginConfig(&pluginConfig, registry) 77 | } 78 | } 79 | return &in 80 | } 81 | 82 | func setDefaultsPluginConfig(pluginConfig *api.PluginConfig, registry pluginregistry.Registry) { 83 | if _, ok := registry[pluginConfig.Name]; ok { 84 | pluginUtilities := registry[pluginConfig.Name] 85 | if pluginUtilities.PluginArgDefaulter != nil { 86 | pluginUtilities.PluginArgDefaulter(pluginConfig.Args) 87 | } 88 | } 89 | } 90 | 91 | func findPluginName(names []string, key string) bool { 92 | for _, name := range names { 93 | if name == key { 94 | return true 95 | } 96 | } 97 | return false 98 | } 99 | 100 | func setDefaultEvictor(profile api.DeschedulerProfile, client clientset.Interface) api.DeschedulerProfile { 101 | newPluginConfig := api.PluginConfig{ 102 | Name: defaultevictor.PluginName, 103 | Args: &defaultevictor.DefaultEvictorArgs{ 104 | EvictLocalStoragePods: false, 105 | EvictSystemCriticalPods: false, 106 | IgnorePvcPods: false, 107 | EvictFailedBarePods: false, 108 | }, 109 | } 110 | 111 | // Always enable DefaultEvictor plugin for filter/preEvictionFilter extension points 112 | if !findPluginName(profile.Plugins.Filter.Enabled, defaultevictor.PluginName) { 113 | profile.Plugins.Filter.Enabled = append([]string{defaultevictor.PluginName}, profile.Plugins.Filter.Enabled...) 114 | } 115 | 116 | if !findPluginName(profile.Plugins.PreEvictionFilter.Enabled, defaultevictor.PluginName) { 117 | profile.Plugins.PreEvictionFilter.Enabled = append([]string{defaultevictor.PluginName}, profile.Plugins.PreEvictionFilter.Enabled...) 118 | } 119 | 120 | defaultevictorPluginConfig, idx := GetPluginConfig(defaultevictor.PluginName, profile.PluginConfigs) 121 | if defaultevictorPluginConfig == nil { 122 | profile.PluginConfigs = append([]api.PluginConfig{newPluginConfig}, profile.PluginConfigs...) 123 | defaultevictorPluginConfig = &newPluginConfig 124 | idx = 0 125 | } 126 | 127 | thresholdPriority, err := utils.GetPriorityValueFromPriorityThreshold(context.TODO(), client, defaultevictorPluginConfig.Args.(*defaultevictor.DefaultEvictorArgs).PriorityThreshold) 128 | if err != nil { 129 | klog.Error(err, "Failed to get threshold priority from args") 130 | } 131 | profile.PluginConfigs[idx].Args.(*defaultevictor.DefaultEvictorArgs).PriorityThreshold = &api.PriorityThreshold{} 132 | profile.PluginConfigs[idx].Args.(*defaultevictor.DefaultEvictorArgs).PriorityThreshold.Value = &thresholdPriority 133 | return profile 134 | } 135 | 136 | func validateDeschedulerConfiguration(in api.DeschedulerPolicy, registry pluginregistry.Registry) error { 137 | var errorsInProfiles []error 138 | for _, profile := range in.Profiles { 139 | for _, pluginConfig := range profile.PluginConfigs { 140 | if _, ok := registry[pluginConfig.Name]; !ok { 141 | errorsInProfiles = append(errorsInProfiles, fmt.Errorf("in profile %s: plugin %s in pluginConfig not registered", profile.Name, pluginConfig.Name)) 142 | continue 143 | } 144 | 145 | pluginUtilities := registry[pluginConfig.Name] 146 | if pluginUtilities.PluginArgValidator == nil { 147 | continue 148 | } 149 | if err := pluginUtilities.PluginArgValidator(pluginConfig.Args); err != nil { 150 | errorsInProfiles = append(errorsInProfiles, fmt.Errorf("in profile %s: %s", profile.Name, err.Error())) 151 | } 152 | } 153 | } 154 | return utilerrors.NewAggregate(errorsInProfiles) 155 | } 156 | -------------------------------------------------------------------------------- /pkg/descheduler/setupplugins.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package descheduler 18 | 19 | import ( 20 | "sigs.k8s.io/descheduler/pkg/framework/pluginregistry" 21 | "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" 22 | "sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization" 23 | "sigs.k8s.io/descheduler/pkg/framework/plugins/podlifetime" 24 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removeduplicates" 25 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removefailedpods" 26 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodshavingtoomanyrestarts" 27 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity" 28 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingnodeaffinity" 29 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingnodetaints" 30 | "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint" 31 | ) 32 | 33 | func SetupPlugins() { 34 | pluginregistry.PluginRegistry = pluginregistry.NewRegistry() 35 | RegisterDefaultPlugins(pluginregistry.PluginRegistry) 36 | } 37 | 38 | func RegisterDefaultPlugins(registry pluginregistry.Registry) { 39 | pluginregistry.Register(defaultevictor.PluginName, defaultevictor.New, &defaultevictor.DefaultEvictor{}, &defaultevictor.DefaultEvictorArgs{}, defaultevictor.ValidateDefaultEvictorArgs, defaultevictor.SetDefaults_DefaultEvictorArgs, registry) 40 | pluginregistry.Register(nodeutilization.LowNodeUtilizationPluginName, nodeutilization.NewLowNodeUtilization, &nodeutilization.LowNodeUtilization{}, &nodeutilization.LowNodeUtilizationArgs{}, nodeutilization.ValidateLowNodeUtilizationArgs, nodeutilization.SetDefaults_LowNodeUtilizationArgs, registry) 41 | pluginregistry.Register(nodeutilization.HighNodeUtilizationPluginName, nodeutilization.NewHighNodeUtilization, &nodeutilization.HighNodeUtilization{}, &nodeutilization.HighNodeUtilizationArgs{}, nodeutilization.ValidateHighNodeUtilizationArgs, nodeutilization.SetDefaults_HighNodeUtilizationArgs, registry) 42 | pluginregistry.Register(podlifetime.PluginName, podlifetime.New, &podlifetime.PodLifeTime{}, &podlifetime.PodLifeTimeArgs{}, podlifetime.ValidatePodLifeTimeArgs, podlifetime.SetDefaults_PodLifeTimeArgs, registry) 43 | pluginregistry.Register(removeduplicates.PluginName, removeduplicates.New, &removeduplicates.RemoveDuplicates{}, &removeduplicates.RemoveDuplicatesArgs{}, removeduplicates.ValidateRemoveDuplicatesArgs, removeduplicates.SetDefaults_RemoveDuplicatesArgs, registry) 44 | pluginregistry.Register(removefailedpods.PluginName, removefailedpods.New, &removefailedpods.RemoveFailedPods{}, &removefailedpods.RemoveFailedPodsArgs{}, removefailedpods.ValidateRemoveFailedPodsArgs, removefailedpods.SetDefaults_RemoveFailedPodsArgs, registry) 45 | pluginregistry.Register(removepodshavingtoomanyrestarts.PluginName, removepodshavingtoomanyrestarts.New, &removepodshavingtoomanyrestarts.RemovePodsHavingTooManyRestarts{}, &removepodshavingtoomanyrestarts.RemovePodsHavingTooManyRestartsArgs{}, removepodshavingtoomanyrestarts.ValidateRemovePodsHavingTooManyRestartsArgs, removepodshavingtoomanyrestarts.SetDefaults_RemovePodsHavingTooManyRestartsArgs, registry) 46 | pluginregistry.Register(removepodsviolatinginterpodantiaffinity.PluginName, removepodsviolatinginterpodantiaffinity.New, &removepodsviolatinginterpodantiaffinity.RemovePodsViolatingInterPodAntiAffinity{}, &removepodsviolatinginterpodantiaffinity.RemovePodsViolatingInterPodAntiAffinityArgs{}, removepodsviolatinginterpodantiaffinity.ValidateRemovePodsViolatingInterPodAntiAffinityArgs, removepodsviolatinginterpodantiaffinity.SetDefaults_RemovePodsViolatingInterPodAntiAffinityArgs, registry) 47 | pluginregistry.Register(removepodsviolatingnodeaffinity.PluginName, removepodsviolatingnodeaffinity.New, &removepodsviolatingnodeaffinity.RemovePodsViolatingNodeAffinity{}, &removepodsviolatingnodeaffinity.RemovePodsViolatingNodeAffinityArgs{}, removepodsviolatingnodeaffinity.ValidateRemovePodsViolatingNodeAffinityArgs, removepodsviolatingnodeaffinity.SetDefaults_RemovePodsViolatingNodeAffinityArgs, registry) 48 | pluginregistry.Register(removepodsviolatingnodetaints.PluginName, removepodsviolatingnodetaints.New, &removepodsviolatingnodetaints.RemovePodsViolatingNodeTaints{}, &removepodsviolatingnodetaints.RemovePodsViolatingNodeTaintsArgs{}, removepodsviolatingnodetaints.ValidateRemovePodsViolatingNodeTaintsArgs, removepodsviolatingnodetaints.SetDefaults_RemovePodsViolatingNodeTaintsArgs, registry) 49 | pluginregistry.Register(removepodsviolatingtopologyspreadconstraint.PluginName, removepodsviolatingtopologyspreadconstraint.New, &removepodsviolatingtopologyspreadconstraint.RemovePodsViolatingTopologySpreadConstraint{}, &removepodsviolatingtopologyspreadconstraint.RemovePodsViolatingTopologySpreadConstraintArgs{}, removepodsviolatingtopologyspreadconstraint.ValidateRemovePodsViolatingTopologySpreadConstraintArgs, removepodsviolatingtopologyspreadconstraint.SetDefaults_RemovePodsViolatingTopologySpreadConstraintArgs, registry) 50 | } 51 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/deepcopy_generated.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | Copyright 2024 The Volcano Authors. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | // Code generated DO NOT EDIT. 20 | 21 | package loadaware 22 | 23 | import ( 24 | runtime "k8s.io/apimachinery/pkg/runtime" 25 | api "sigs.k8s.io/descheduler/pkg/api" 26 | ) 27 | 28 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 29 | func (in *LoadAwareUtilizationArgs) DeepCopyInto(out *LoadAwareUtilizationArgs) { 30 | *out = *in 31 | out.TypeMeta = in.TypeMeta 32 | if in.Thresholds != nil { 33 | in, out := &in.Thresholds, &out.Thresholds 34 | *out = make(api.ResourceThresholds, len(*in)) 35 | for key, val := range *in { 36 | (*out)[key] = val 37 | } 38 | } 39 | if in.TargetThresholds != nil { 40 | in, out := &in.TargetThresholds, &out.TargetThresholds 41 | *out = make(api.ResourceThresholds, len(*in)) 42 | for key, val := range *in { 43 | (*out)[key] = val 44 | } 45 | } 46 | if in.EvictableNamespaces != nil { 47 | in, out := &in.EvictableNamespaces, &out.EvictableNamespaces 48 | *out = new(api.Namespaces) 49 | (*in).DeepCopyInto(*out) 50 | } 51 | out.MetricsConfiguration = in.MetricsConfiguration 52 | return 53 | } 54 | 55 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LoadAwareUtilizationArgs. 56 | func (in *LoadAwareUtilizationArgs) DeepCopy() *LoadAwareUtilizationArgs { 57 | if in == nil { 58 | return nil 59 | } 60 | out := new(LoadAwareUtilizationArgs) 61 | in.DeepCopyInto(out) 62 | return out 63 | } 64 | 65 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 66 | func (in *LoadAwareUtilizationArgs) DeepCopyObject() runtime.Object { 67 | if c := in.DeepCopy(); c != nil { 68 | return c 69 | } 70 | return nil 71 | } 72 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/defaults.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/runtime" 21 | "k8s.io/klog/v2" 22 | ) 23 | 24 | func SetDefaults_LoadAwareUtilizationArgs(obj runtime.Object) { 25 | args, ok := obj.(*LoadAwareUtilizationArgs) 26 | if !ok { 27 | klog.Errorf("obj with type %T could not parse", obj) 28 | } 29 | if !args.UseDeviationThresholds { 30 | args.UseDeviationThresholds = false 31 | } 32 | if args.Thresholds == nil { 33 | args.Thresholds = nil 34 | } 35 | if args.TargetThresholds == nil { 36 | args.TargetThresholds = nil 37 | } 38 | if args.NumberOfNodes == 0 { 39 | args.NumberOfNodes = 0 40 | } 41 | if args.Duration == "" { 42 | args.Duration = "2m" 43 | } 44 | //defaultEvictor 45 | if args.NodeSelector == "" { 46 | args.NodeSelector = "" 47 | } 48 | if !args.EvictLocalStoragePods { 49 | args.EvictLocalStoragePods = false 50 | } 51 | if !args.EvictSystemCriticalPods { 52 | args.EvictSystemCriticalPods = false 53 | } 54 | if !args.IgnorePvcPods { 55 | args.IgnorePvcPods = false 56 | } 57 | if !args.EvictFailedBarePods { 58 | args.EvictFailedBarePods = false 59 | } 60 | if args.LabelSelector == nil { 61 | args.LabelSelector = nil 62 | } 63 | if args.PriorityThreshold == nil { 64 | args.PriorityThreshold = nil 65 | } 66 | if !args.NodeFit { 67 | args.NodeFit = false 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/defaults_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "testing" 21 | 22 | "github.com/google/go-cmp/cmp" 23 | v1 "k8s.io/api/core/v1" 24 | "k8s.io/apimachinery/pkg/runtime" 25 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 26 | "sigs.k8s.io/descheduler/pkg/api" 27 | ) 28 | 29 | func TestSetDefaults_LowNodeUtilizationArgs(t *testing.T) { 30 | tests := []struct { 31 | name string 32 | in runtime.Object 33 | want runtime.Object 34 | }{ 35 | { 36 | name: "LowNodeUtilizationArgs empty", 37 | in: &LoadAwareUtilizationArgs{}, 38 | want: &LoadAwareUtilizationArgs{ 39 | UseDeviationThresholds: false, 40 | Thresholds: nil, 41 | TargetThresholds: nil, 42 | NumberOfNodes: 0, 43 | }, 44 | }, 45 | { 46 | name: "LowNodeUtilizationArgs with value", 47 | in: &LoadAwareUtilizationArgs{ 48 | UseDeviationThresholds: true, 49 | Thresholds: api.ResourceThresholds{ 50 | v1.ResourceCPU: 20, 51 | v1.ResourceMemory: 120, 52 | }, 53 | TargetThresholds: api.ResourceThresholds{ 54 | v1.ResourceCPU: 80, 55 | v1.ResourceMemory: 80, 56 | }, 57 | NumberOfNodes: 10, 58 | }, 59 | want: &LoadAwareUtilizationArgs{ 60 | UseDeviationThresholds: true, 61 | Thresholds: api.ResourceThresholds{ 62 | v1.ResourceCPU: 20, 63 | v1.ResourceMemory: 120, 64 | }, 65 | TargetThresholds: api.ResourceThresholds{ 66 | v1.ResourceCPU: 80, 67 | v1.ResourceMemory: 80, 68 | }, 69 | NumberOfNodes: 10, 70 | }, 71 | }, 72 | } 73 | for _, tc := range tests { 74 | scheme := runtime.NewScheme() 75 | utilruntime.Must(AddToScheme(scheme)) 76 | t.Run(tc.name, func(t *testing.T) { 77 | scheme.Default(tc.in) 78 | if diff := cmp.Diff(tc.in, tc.want); diff != "" { 79 | t.Errorf("Got unexpected defaults (-want, +got):\n%s", diff) 80 | } 81 | }) 82 | } 83 | } 84 | 85 | func TestSetDefaults_HighNodeUtilizationArgs(t *testing.T) { 86 | tests := []struct { 87 | name string 88 | in runtime.Object 89 | want runtime.Object 90 | }{ 91 | { 92 | name: "HighNodeUtilizationArgs empty", 93 | in: &LoadAwareUtilizationArgs{}, 94 | want: &LoadAwareUtilizationArgs{ 95 | Thresholds: nil, 96 | NumberOfNodes: 0, 97 | }, 98 | }, 99 | { 100 | name: "HighNodeUtilizationArgs with value", 101 | in: &LoadAwareUtilizationArgs{ 102 | Thresholds: api.ResourceThresholds{ 103 | v1.ResourceCPU: 20, 104 | v1.ResourceMemory: 120, 105 | }, 106 | NumberOfNodes: 10, 107 | }, 108 | want: &LoadAwareUtilizationArgs{ 109 | Thresholds: api.ResourceThresholds{ 110 | v1.ResourceCPU: 20, 111 | v1.ResourceMemory: 120, 112 | }, 113 | NumberOfNodes: 10, 114 | }, 115 | }, 116 | } 117 | for _, tc := range tests { 118 | scheme := runtime.NewScheme() 119 | utilruntime.Must(AddToScheme(scheme)) 120 | t.Run(tc.name, func(t *testing.T) { 121 | scheme.Default(tc.in) 122 | if diff := cmp.Diff(tc.in, tc.want); diff != "" { 123 | t.Errorf("Got unexpected defaults (-want, +got):\n%s", diff) 124 | } 125 | }) 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/load_aware.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | 23 | v1 "k8s.io/api/core/v1" 24 | "k8s.io/apimachinery/pkg/api/resource" 25 | "k8s.io/apimachinery/pkg/runtime" 26 | utilerrors "k8s.io/apimachinery/pkg/util/errors" 27 | "k8s.io/klog/v2" 28 | nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" 29 | podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" 30 | frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" 31 | resourceutil "sigs.k8s.io/descheduler/pkg/utils" 32 | ) 33 | 34 | const LoadAwareUtilizationPluginName = "LoadAware" 35 | 36 | // LoadAwareUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory actual resource usage are used 37 | // to calculate nodes' utilization and not the Request. 38 | 39 | type LoadAwareUtilization struct { 40 | handle frameworktypes.Handle 41 | args *LoadAwareUtilizationArgs 42 | podFilter func(pod *v1.Pod) bool 43 | usages []NodeUsage 44 | } 45 | 46 | var _ frameworktypes.BalancePlugin = &LoadAwareUtilization{} 47 | 48 | // NewLoadAwareUtilization builds plugin from its arguments while passing a handle 49 | func NewLoadAwareUtilization(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plugin, error) { 50 | lowNodeUtilizationArgsArgs, ok := args.(*LoadAwareUtilizationArgs) 51 | if !ok { 52 | return nil, fmt.Errorf("want args to be of type LoadAwareUtilizationArgs, got %T", args) 53 | } 54 | 55 | podFilter, err := podutil.NewOptions(). 56 | WithFilter(handle.Evictor().Filter). 57 | BuildFilterFunc() 58 | if err != nil { 59 | return nil, fmt.Errorf("error initializing pod filter function: %v", err) 60 | } 61 | usages := make([]NodeUsage, 0) 62 | 63 | return &LoadAwareUtilization{ 64 | handle: handle, 65 | args: lowNodeUtilizationArgsArgs, 66 | podFilter: podFilter, 67 | usages: usages, 68 | }, nil 69 | } 70 | 71 | // Name retrieves the plugin name 72 | func (l *LoadAwareUtilization) Name() string { 73 | return LoadAwareUtilizationPluginName 74 | } 75 | 76 | // Balance extension point implementation for the plugin 77 | func (l *LoadAwareUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status { 78 | useDeviationThresholds := l.args.UseDeviationThresholds 79 | thresholds := l.args.Thresholds 80 | targetThresholds := l.args.TargetThresholds 81 | 82 | // check if CPU/Mem are set, if not, set thresholds and targetThresholds to same value, so that the unseted resource(cpu or memory) will not affect the descheduler 83 | if _, ok := thresholds[v1.ResourcePods]; !ok { 84 | if useDeviationThresholds { 85 | thresholds[v1.ResourcePods] = MinResourcePercentage 86 | targetThresholds[v1.ResourcePods] = MinResourcePercentage 87 | } else { 88 | thresholds[v1.ResourcePods] = MaxResourcePercentage 89 | targetThresholds[v1.ResourcePods] = MaxResourcePercentage 90 | } 91 | } 92 | if _, ok := thresholds[v1.ResourceCPU]; !ok { 93 | if useDeviationThresholds { 94 | thresholds[v1.ResourceCPU] = MinResourcePercentage 95 | targetThresholds[v1.ResourceCPU] = MinResourcePercentage 96 | } else { 97 | thresholds[v1.ResourceCPU] = MaxResourcePercentage 98 | targetThresholds[v1.ResourceCPU] = MaxResourcePercentage 99 | } 100 | } 101 | if _, ok := thresholds[v1.ResourceMemory]; !ok { 102 | if useDeviationThresholds { 103 | thresholds[v1.ResourceMemory] = MinResourcePercentage 104 | targetThresholds[v1.ResourceMemory] = MinResourcePercentage 105 | } else { 106 | thresholds[v1.ResourceMemory] = MaxResourcePercentage 107 | targetThresholds[v1.ResourceMemory] = MaxResourcePercentage 108 | } 109 | } 110 | resourceNames := getResourceNames(thresholds) 111 | 112 | l.usages = l.getNodeUsage(nodes, resourceNames, l.handle.GetPodsAssignedToNodeFunc()) 113 | lowNodes, sourceNodes := classifyNodes( 114 | l.usages, 115 | getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames, l.handle.GetPodsAssignedToNodeFunc(), useDeviationThresholds), 116 | // The node has to be schedulable (to be able to move workload there) 117 | func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { 118 | if nodeutil.IsNodeUnschedulable(node) { 119 | klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node)) 120 | return false 121 | } 122 | return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold) 123 | }, 124 | func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { 125 | return isNodeAboveTargetUtilization(usage, threshold.highResourceThreshold) 126 | }, 127 | ) 128 | 129 | // log message for nodes with low utilization 130 | underutilizationCriteria := []interface{}{ 131 | "CPU", thresholds[v1.ResourceCPU], 132 | "Mem", thresholds[v1.ResourceMemory], 133 | "Pods", thresholds[v1.ResourcePods], 134 | } 135 | for name := range thresholds { 136 | if !nodeutil.IsBasicResource(name) { 137 | underutilizationCriteria = append(underutilizationCriteria, string(name), int64(thresholds[name])) 138 | } 139 | } 140 | klog.V(1).InfoS("Criteria for a node under utilization", underutilizationCriteria...) 141 | klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes)) 142 | 143 | // log message for over utilized nodes 144 | overutilizationCriteria := []interface{}{ 145 | "CPU", targetThresholds[v1.ResourceCPU], 146 | "Mem", targetThresholds[v1.ResourceMemory], 147 | "Pods", targetThresholds[v1.ResourcePods], 148 | } 149 | for name := range targetThresholds { 150 | if !nodeutil.IsBasicResource(name) { 151 | overutilizationCriteria = append(overutilizationCriteria, string(name), int64(targetThresholds[name])) 152 | } 153 | } 154 | klog.V(1).InfoS("Criteria for a node above target utilization", overutilizationCriteria...) 155 | klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes)) 156 | 157 | if len(lowNodes) == 0 { 158 | klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further") 159 | return nil 160 | } 161 | 162 | if len(lowNodes) <= l.args.NumberOfNodes { 163 | klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", l.args.NumberOfNodes) 164 | return nil 165 | } 166 | 167 | if len(lowNodes) == len(nodes) { 168 | klog.V(1).InfoS("All nodes are underutilized, nothing to do here") 169 | return nil 170 | } 171 | 172 | if len(sourceNodes) == 0 { 173 | klog.V(1).InfoS("All nodes are under target utilization, nothing to do here") 174 | return nil 175 | } 176 | 177 | // stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved 178 | continueEvictionCond := func(nodeInfo NodeInfo, desNodeAvailableRes map[v1.ResourceName]*resource.Quantity) bool { 179 | if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) { 180 | return false 181 | } 182 | for name := range desNodeAvailableRes { 183 | if desNodeAvailableRes[name].CmpInt64(0) < 1 { 184 | return false 185 | } 186 | } 187 | 188 | return true 189 | } 190 | 191 | // Sort the nodes by the usage in descending order 192 | sortNodesByUsage(sourceNodes, false) 193 | 194 | evictPodsFromSourceNodes( 195 | ctx, 196 | l.args.EvictableNamespaces, 197 | sourceNodes, 198 | lowNodes, 199 | l.handle.Evictor(), 200 | l.podFilter, 201 | resourceNames, 202 | continueEvictionCond) 203 | 204 | return nil 205 | } 206 | 207 | func (l *LoadAwareUtilization) PreEvictionFilter(pod *v1.Pod) bool { 208 | if l.args.NodeFit { 209 | nodes, err := nodeutil.ReadyNodes(context.TODO(), l.handle.ClientSet(), l.handle.SharedInformerFactory().Core().V1().Nodes().Lister(), l.args.NodeSelector) 210 | if err != nil { 211 | klog.ErrorS(err, "unable to list ready nodes", "pod", klog.KObj(pod)) 212 | return false 213 | } 214 | if !l.NewPodFitsAnyOtherNode(l.handle.GetPodsAssignedToNodeFunc(), pod, nodes) { 215 | klog.InfoS("pod does not fit on any other node because of nodeSelector(s), Taint(s), nodes marked as unschedulable, or nodeusage is over the threshold", "pod", klog.KObj(pod)) 216 | return false 217 | } 218 | return true 219 | } 220 | return true 221 | } 222 | 223 | func (l *LoadAwareUtilization) Filter(pod *v1.Pod) bool { 224 | return true 225 | } 226 | 227 | // NewPodFitsAnyOtherNode checks if the given pod will fit any of the given nodes, besides the node 228 | // the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function. 229 | func (l *LoadAwareUtilization) NewPodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool { 230 | for _, node := range nodes { 231 | // Skip node pod is already on 232 | if node.Name == pod.Spec.NodeName { 233 | continue 234 | } 235 | 236 | var errors []error 237 | fitErrors := nodeutil.NodeFit(nodeIndexer, pod, node) 238 | errors = append(errors, fitErrors...) 239 | usageErrors := l.checkNodeUsage(pod, node) 240 | errors = append(errors, usageErrors...) 241 | if len(errors) == 0 { 242 | klog.V(1).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) 243 | return true 244 | } 245 | klog.V(1).InfoS("Pod does not fit on node", 246 | "pod:", klog.KObj(pod), "node:", klog.KObj(node), "error:", utilerrors.NewAggregate(errors).Error()) 247 | } 248 | 249 | return false 250 | } 251 | 252 | // Check whether the resources requested by the pod on the node exceed the threshold. 253 | func (l *LoadAwareUtilization) checkNodeUsage(pod *v1.Pod, node *v1.Node) []error { 254 | var errors []error 255 | 256 | nodeUsage := NodeUsage{} 257 | for _, usage := range l.usages { 258 | if usage.node.Name == node.Name { 259 | nodeUsage = usage 260 | break 261 | } 262 | } 263 | 264 | if nodeUsage.overUseResources == nil { 265 | return errors 266 | } 267 | 268 | // Relationship between nodeUsage.overUseResources and pod resource request 269 | if resourceutil.GetResourceRequest(pod, v1.ResourceCPU) >= 0 { 270 | for _, value := range *nodeUsage.overUseResources { 271 | if value == v1.ResourceCPU { 272 | errors = append(errors, fmt.Errorf("node's cpu usage is over the threshold, request cpu:%v, overUseResources:%v", resourceutil.GetResourceRequest(pod, v1.ResourceCPU), value)) 273 | } 274 | } 275 | } 276 | if resourceutil.GetResourceRequest(pod, v1.ResourceMemory) >= 0 { 277 | for _, value := range *nodeUsage.overUseResources { 278 | if value == v1.ResourceMemory { 279 | errors = append(errors, fmt.Errorf("node's memory usage is over the threshold,request memory:%v, overUseResources:%v", resourceutil.GetResourceRequest(pod, v1.ResourceMemory), value)) 280 | } 281 | } 282 | } 283 | 284 | return errors 285 | } 286 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/metrics/metrics_client.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package source 18 | 19 | import ( 20 | "context" 21 | "errors" 22 | "fmt" 23 | 24 | v1 "k8s.io/api/core/v1" 25 | "k8s.io/apimachinery/pkg/api/resource" 26 | "k8s.io/apimachinery/pkg/types" 27 | ) 28 | 29 | const ( 30 | MetricsTypePrometheus = "prometheus" 31 | MetricsTypePrometheusAdaptor = "prometheus_adaptor" 32 | ) 33 | 34 | type Metrics struct { 35 | Type string `json:"type"` 36 | Address string `json:"address"` 37 | } 38 | 39 | type NodeMetrics struct { 40 | CPU float64 41 | Memory float64 42 | } 43 | 44 | type MetricsClient interface { 45 | NodesMetricsAvg(ctx context.Context, nodesMetrics map[string]*NodeMetrics, period string) error 46 | PodsMetricsAvg(ctx context.Context, pods []*v1.Pod, period string) (map[types.NamespacedName]map[v1.ResourceName]*resource.Quantity, error) 47 | } 48 | 49 | func NewMetricsClient(metricsConf Metrics) (MetricsClient, error) { 50 | metricsType := metricsConf.Type 51 | if len(metricsType) == 0 { 52 | return nil, errors.New("Metrics type is empty, the load-aware rescheduling function does not take effect.") 53 | } 54 | if metricsType == MetricsTypePrometheus { 55 | return NewPrometheusMetricsClient(metricsConf) 56 | } else if metricsType == MetricsTypePrometheusAdaptor { 57 | return NewCustomMetricsClient() 58 | } else { 59 | return nil, fmt.Errorf("Data cannot be collected from the %s monitoring system. "+ 60 | "The supported monitoring systems are %s and %s.", 61 | metricsType, MetricsTypePrometheus, MetricsTypePrometheusAdaptor) 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/metrics/metrics_client_prometheus.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package source 18 | 19 | import ( 20 | "context" 21 | "crypto/tls" 22 | "fmt" 23 | "net/http" 24 | "strconv" 25 | "strings" 26 | "time" 27 | 28 | "github.com/prometheus/client_golang/api" 29 | prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1" 30 | pmodel "github.com/prometheus/common/model" 31 | v1 "k8s.io/api/core/v1" 32 | "k8s.io/apimachinery/pkg/api/resource" 33 | "k8s.io/apimachinery/pkg/types" 34 | "k8s.io/klog/v2" 35 | ) 36 | 37 | const ( 38 | // promCPUUsageAvg record name of cpu average usage defined in prometheus rules 39 | promCPUUsageAvg = "cpu_usage_avg" 40 | // promMemUsageAvg record name of mem average usage defined in prometheus rules 41 | promMemUsageAvg = "mem_usage_avg" 42 | ) 43 | 44 | type PrometheusMetricsClient struct { 45 | address string 46 | conf Metrics 47 | } 48 | 49 | func NewPrometheusMetricsClient(metricsConf Metrics) (*PrometheusMetricsClient, error) { 50 | address := metricsConf.Address 51 | if len(address) == 0 { 52 | return nil, fmt.Errorf("Metrics address is empty, the load-aware rescheduling function does not take effect.") 53 | } 54 | return &PrometheusMetricsClient{address: address, conf: metricsConf}, nil 55 | } 56 | 57 | func (p *PrometheusMetricsClient) NodesMetricsAvg(ctx context.Context, nodesMetrics map[string]*NodeMetrics, period string) error { 58 | klog.V(5).Infof("Get node metrics from prometheus") 59 | for nodeName := range nodesMetrics { 60 | nodeMetrics, err := p.NodeMetricsAvg(ctx, nodeName, period) 61 | if err != nil { 62 | klog.Errorf("Failed to query the node(%s) metrios, error is: %v.", nodeName, err) 63 | return err 64 | } 65 | klog.V(5).Infof("Node(%s) usage metrics is %v", nodeName, nodeMetrics) 66 | nodesMetrics[nodeName] = &nodeMetrics 67 | } 68 | return nil 69 | } 70 | 71 | func (p *PrometheusMetricsClient) NodeMetricsAvg(ctx context.Context, nodeName string, period string) (NodeMetrics, error) { 72 | klog.V(4).Infof("Get node metrics from Prometheus: %s", p.address) 73 | var client api.Client 74 | var err error 75 | transport := &http.Transport{ 76 | TLSClientConfig: &tls.Config{}, 77 | } 78 | client, err = api.NewClient(api.Config{ 79 | Address: p.address, 80 | RoundTripper: transport, 81 | }) 82 | if err != nil { 83 | return NodeMetrics{}, err 84 | } 85 | v1api := prometheusv1.NewAPI(client) 86 | nodeMetrics := NodeMetrics{} 87 | cpuQueryStr := fmt.Sprintf("avg_over_time((1 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\",instance=\"%s\"}[30s])) * 1))[%s:30s])", nodeName, period) 88 | memQueryStr := fmt.Sprintf("avg_over_time(((1-node_memory_MemAvailable_bytes{instance=\"%s\"}/node_memory_MemTotal_bytes{instance=\"%s\"}))[%s:30s])", nodeName, nodeName, period) 89 | 90 | for _, metric := range []string{cpuQueryStr, memQueryStr} { 91 | res, warnings, err := v1api.Query(ctx, metric, time.Now()) 92 | if err != nil { 93 | klog.Errorf("Error querying Prometheus: %v", err) 94 | } 95 | if len(warnings) > 0 { 96 | klog.V(3).Infof("Warning querying Prometheus: %v", warnings) 97 | } 98 | if res == nil || res.String() == "" { 99 | klog.Warningf("Warning querying Prometheus: no data found for %s", metric) 100 | continue 101 | } 102 | // plugin.usage only need type pmodel.ValVector in Prometheus.rulues 103 | if res.Type() != pmodel.ValVector { 104 | continue 105 | } 106 | // only method res.String() can get data, dataType []pmodel.ValVector, eg: "{k1:v1, ...} => #[value] @#[timespace]\n {k2:v2, ...} => ..." 107 | firstRowValVector := strings.Split(res.String(), "\n")[0] 108 | rowValues := strings.Split(strings.TrimSpace(firstRowValVector), "=>") 109 | value := strings.Split(strings.TrimSpace(rowValues[1]), " ") 110 | switch metric { 111 | case cpuQueryStr: 112 | cpuUsage, err := strconv.ParseFloat(value[0], 64) 113 | if err != nil { 114 | klog.Warning("Warning: Convert cpuUsage to float fail") 115 | } 116 | nodeMetrics.CPU = cpuUsage 117 | case memQueryStr: 118 | memUsage, err := strconv.ParseFloat(value[0], 64) 119 | if err != nil { 120 | klog.Warning("Warning: Convert memUsage to float fail") 121 | } 122 | nodeMetrics.Memory = memUsage 123 | } 124 | } 125 | return nodeMetrics, nil 126 | } 127 | 128 | func (p *PrometheusMetricsClient) PodsMetricsAvg(ctx context.Context, pods []*v1.Pod, period string) (map[types.NamespacedName]map[v1.ResourceName]*resource.Quantity, error) { 129 | var ret = make(map[types.NamespacedName]map[v1.ResourceName]*resource.Quantity) 130 | var client api.Client 131 | var err error 132 | transport := &http.Transport{ 133 | TLSClientConfig: &tls.Config{}, 134 | } 135 | client, err = api.NewClient(api.Config{ 136 | Address: p.address, 137 | RoundTripper: transport, 138 | }) 139 | if err != nil { 140 | return nil, err 141 | } 142 | v1api := prometheusv1.NewAPI(client) 143 | cpuQuery := "avg_over_time((( (irate(container_cpu_usage_seconds_total{pod=\"%s\",container=\"\",name=\"\" }[30s])) * 1))[%s:30s])" 144 | memQuery := "container_memory_usage_bytes{pod=\"%s\",container=\"\",name=\"\"}" 145 | var cpuQueryStr, memQueryStr string 146 | for _, pod := range pods { 147 | tmpMap := make(map[v1.ResourceName]*resource.Quantity) 148 | cpuQueryStr = fmt.Sprintf(cpuQuery, pod.Name, period) 149 | memQueryStr = fmt.Sprintf(memQuery, pod.Name) 150 | for _, metric := range []string{cpuQueryStr, memQueryStr} { 151 | res, warnings, err := v1api.Query(ctx, metric, time.Now()) 152 | if err != nil { 153 | klog.Errorf("Error querying Prometheus: %v", err) 154 | } 155 | if len(warnings) > 0 { 156 | klog.V(3).Infof("Warning querying Prometheus: %v", warnings) 157 | } 158 | if res == nil || res.String() == "" { 159 | klog.Warningf("Warning querying Prometheus: no data found for %s", metric) 160 | continue 161 | } 162 | // plugin.usage only need type pmodel.ValVector in Prometheus.rulues 163 | if res.Type() != pmodel.ValVector { 164 | continue 165 | } 166 | // only method res.String() can get data, dataType []pmodel.ValVector, eg: "{k1:v1, ...} => #[value] @#[timespace]\n {k2:v2, ...} => ..." 167 | firstRowValVector := strings.Split(res.String(), "\n")[0] 168 | rowValues := strings.Split(strings.TrimSpace(firstRowValVector), "=>") 169 | value := strings.Split(strings.TrimSpace(rowValues[1]), " ") 170 | 171 | tmp := resource.MustParse(value[0]) 172 | switch metric { 173 | case cpuQueryStr: 174 | tmpMap[v1.ResourceCPU] = &tmp 175 | case memQueryStr: 176 | tmpMap[v1.ResourceMemory] = &tmp 177 | } 178 | } 179 | ret[types.NamespacedName{ 180 | Namespace: pod.Namespace, 181 | Name: pod.Name, 182 | }] = tmpMap 183 | 184 | } 185 | return ret, err 186 | } 187 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/metrics/metrics_client_prometheus_adapt.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package source 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | 23 | v1 "k8s.io/api/core/v1" 24 | "k8s.io/apimachinery/pkg/api/resource" 25 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | "k8s.io/apimachinery/pkg/labels" 27 | "k8s.io/apimachinery/pkg/runtime/schema" 28 | "k8s.io/apimachinery/pkg/types" 29 | "k8s.io/client-go/discovery" 30 | cacheddiscovery "k8s.io/client-go/discovery/cached/memory" 31 | "k8s.io/client-go/rest" 32 | "k8s.io/client-go/restmapper" 33 | "k8s.io/klog/v2" 34 | "k8s.io/metrics/pkg/client/clientset/versioned" 35 | metricsv1beta1 "k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1" 36 | customclient "k8s.io/metrics/pkg/client/custom_metrics" 37 | ) 38 | 39 | const ( 40 | // CustomNodeCPUUsageAvg record name of cpu average usage defined in prometheus adapt rules 41 | CustomNodeCPUUsageAvg = "node_cpu_usage_avg" 42 | // CustomNodeMemUsageAvg record name of mem average usage defined in prometheus adapt rules 43 | CustomNodeMemUsageAvg = "node_memory_usage_avg" 44 | ) 45 | 46 | type KMetricsClient struct { 47 | CustomMetricsCli customclient.CustomMetricsClient 48 | MetricsCli metricsv1beta1.MetricsV1beta1Interface 49 | } 50 | 51 | var kMetricsClient *KMetricsClient 52 | 53 | func NewCustomMetricsClient() (*KMetricsClient, error) { 54 | if kMetricsClient != nil { 55 | return kMetricsClient, nil 56 | } 57 | klog.V(3).Infof("Create custom metrics client to get nodes and pods metrics") 58 | 59 | cfg, err := rest.InClusterConfig() 60 | if err != nil { 61 | return nil, fmt.Errorf("unable to build in cluster config: %v", err) 62 | } 63 | 64 | discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(cfg) 65 | cachedDiscoClient := cacheddiscovery.NewMemCacheClient(discoveryClient) 66 | restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscoClient) 67 | apiVersionsGetter := customclient.NewAvailableAPIsGetter(discoveryClient) 68 | customMetricsClient := customclient.NewForConfig(cfg, restMapper, apiVersionsGetter) 69 | 70 | metricsClient, err := versioned.NewForConfig(cfg) 71 | if err != nil { 72 | return nil, fmt.Errorf("New metrics client failed, error is : %v", err) 73 | } 74 | metricsClientV1beta1 := metricsClient.MetricsV1beta1() 75 | 76 | kMetricsClient = &KMetricsClient{ 77 | CustomMetricsCli: customMetricsClient, 78 | MetricsCli: metricsClientV1beta1, 79 | } 80 | 81 | return kMetricsClient, nil 82 | } 83 | 84 | func (km *KMetricsClient) NodesMetricsAvg(_ context.Context, nodesMetrics map[string]*NodeMetrics, _ string) error { 85 | klog.V(5).Infof("Get node metrics from custom metrics api") 86 | 87 | groupKind := schema.GroupKind{ 88 | Group: "", 89 | Kind: "Node", 90 | } 91 | for _, metricName := range []string{CustomNodeCPUUsageAvg, CustomNodeMemUsageAvg} { 92 | metricsValue, err := km.CustomMetricsCli.RootScopedMetrics().GetForObjects(groupKind, labels.NewSelector(), metricName, labels.NewSelector()) 93 | if err != nil { 94 | klog.Errorf("Failed to query the indicator %s, error is: %v.", metricName, err) 95 | return err 96 | } 97 | for _, metricValue := range metricsValue.Items { 98 | nodeName := metricValue.DescribedObject.Name 99 | if _, ok := nodesMetrics[nodeName]; !ok { 100 | klog.Warningf("The node %s information is obtained through the custom metrics API, but the volcano cache does not contain the node information.", nodeName) 101 | continue 102 | } 103 | klog.V(5).Infof("The current usage information of node %s is %v", nodeName, nodesMetrics[nodeName]) 104 | switch metricName { 105 | case CustomNodeCPUUsageAvg: 106 | nodesMetrics[nodeName].CPU = metricValue.Value.AsApproximateFloat64() 107 | case CustomNodeMemUsageAvg: 108 | nodesMetrics[nodeName].Memory = metricValue.Value.AsApproximateFloat64() 109 | default: 110 | klog.Errorf("Node supports %s and %s metrics, and %s indicates abnormal metrics.", CustomNodeCPUUsageAvg, CustomNodeMemUsageAvg, metricName) 111 | } 112 | klog.V(5).Infof("The updated usage information of node %s is %v.", nodeName, nodesMetrics[nodeName]) 113 | } 114 | } 115 | 116 | return nil 117 | } 118 | 119 | func (km *KMetricsClient) PodsMetricsAvg(ctx context.Context, pods []*v1.Pod, period string) (map[types.NamespacedName]map[v1.ResourceName]*resource.Quantity, error) { 120 | klog.V(5).Infof("Get pods metrics from metrics api") 121 | podsMetrics := make(map[types.NamespacedName]map[v1.ResourceName]*resource.Quantity, len(pods)) 122 | 123 | for _, pod := range pods { 124 | podMetrics, err := km.MetricsCli.PodMetricses(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) 125 | if err != nil { 126 | klog.Warningf("Failed to query pod(%s/%s) metrics, error is: %v.", pod.Namespace, pod.Name, err) 127 | continue 128 | } 129 | podUsage := make(map[v1.ResourceName]*resource.Quantity) 130 | for _, containerUsage := range podMetrics.Containers { 131 | for _, resourceName := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory} { 132 | if _, ok := podUsage[resourceName]; !ok { 133 | podUsage[resourceName] = &resource.Quantity{} 134 | } 135 | podUsage[resourceName].Add(containerUsage.Usage[resourceName]) 136 | } 137 | } 138 | namespaceName := types.NamespacedName{ 139 | Namespace: pod.Namespace, 140 | Name: pod.Name, 141 | } 142 | podsMetrics[namespaceName] = podUsage 143 | klog.V(5).Infof("Pod(%s/%s) metrics is %v", pod.Namespace, pod.Name, podUsage) 144 | } 145 | return podsMetrics, nil 146 | } 147 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/nodeutilization.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "sort" 23 | 24 | v1 "k8s.io/api/core/v1" 25 | "k8s.io/apimachinery/pkg/api/resource" 26 | "k8s.io/apimachinery/pkg/types" 27 | "k8s.io/apimachinery/pkg/util/sets" 28 | "k8s.io/klog/v2" 29 | "sigs.k8s.io/descheduler/pkg/api" 30 | "sigs.k8s.io/descheduler/pkg/descheduler/evictions" 31 | "sigs.k8s.io/descheduler/pkg/descheduler/node" 32 | nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" 33 | podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" 34 | frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" 35 | "sigs.k8s.io/descheduler/pkg/utils" 36 | 37 | source "volcano.sh/descheduler/pkg/framework/plugins/loadaware/metrics" 38 | ) 39 | 40 | // NodeUsage stores a node's info, pods on it, thresholds and its resource usage 41 | type NodeUsage struct { 42 | node *v1.Node 43 | usage map[v1.ResourceName]*resource.Quantity 44 | allPods []*v1.Pod 45 | podMetrics map[types.NamespacedName]map[v1.ResourceName]*resource.Quantity 46 | overUseResources *[]v1.ResourceName 47 | } 48 | 49 | type NodeThresholds struct { 50 | lowResourceThreshold map[v1.ResourceName]*resource.Quantity 51 | highResourceThreshold map[v1.ResourceName]*resource.Quantity 52 | } 53 | 54 | type NodeInfo struct { 55 | NodeUsage 56 | thresholds NodeThresholds 57 | } 58 | 59 | type continueEvictionCond func(nodeInfo NodeInfo, desNodeAvailableRes map[v1.ResourceName]*resource.Quantity) bool 60 | 61 | // NodePodsMap is a set of (node, pods) pairs 62 | type NodePodsMap map[*v1.Node][]*v1.Pod 63 | 64 | const ( 65 | // MinResourcePercentage is the minimum value of a resource's percentage 66 | MinResourcePercentage = 0 67 | // MaxResourcePercentage is the maximum value of a resource's percentage 68 | MaxResourcePercentage = 100 69 | ) 70 | 71 | func normalizePercentage(percent api.Percentage) api.Percentage { 72 | if percent > MaxResourcePercentage { 73 | return MaxResourcePercentage 74 | } 75 | if percent < MinResourcePercentage { 76 | return MinResourcePercentage 77 | } 78 | return percent 79 | } 80 | 81 | func getNodeThresholds( 82 | nodes []*v1.Node, 83 | lowThreshold, highThreshold api.ResourceThresholds, 84 | resourceNames []v1.ResourceName, 85 | getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, 86 | useDeviationThresholds bool, 87 | ) map[string]NodeThresholds { 88 | nodeThresholdsMap := map[string]NodeThresholds{} 89 | 90 | averageResourceUsagePercent := api.ResourceThresholds{} 91 | if useDeviationThresholds { 92 | averageResourceUsagePercent = averageNodeBasicresources(nodes, getPodsAssignedToNode, resourceNames) 93 | } 94 | 95 | for _, node := range nodes { 96 | nodeCapacity := node.Status.Capacity 97 | if len(node.Status.Allocatable) > 0 { 98 | nodeCapacity = node.Status.Allocatable 99 | } 100 | 101 | nodeThresholdsMap[node.Name] = NodeThresholds{ 102 | lowResourceThreshold: map[v1.ResourceName]*resource.Quantity{}, 103 | highResourceThreshold: map[v1.ResourceName]*resource.Quantity{}, 104 | } 105 | 106 | for _, resourceName := range resourceNames { 107 | if useDeviationThresholds { 108 | capacity := nodeCapacity[resourceName] 109 | if lowThreshold[resourceName] == MinResourcePercentage { 110 | nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = &capacity 111 | nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = &capacity 112 | } else { 113 | nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]-lowThreshold[resourceName])) 114 | nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]+highThreshold[resourceName])) 115 | } 116 | } else { 117 | nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, lowThreshold[resourceName]) 118 | nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, highThreshold[resourceName]) 119 | } 120 | } 121 | 122 | } 123 | return nodeThresholdsMap 124 | } 125 | 126 | func (l *LoadAwareUtilization) getNodeUsage( 127 | nodes []*v1.Node, 128 | resourceNames []v1.ResourceName, 129 | getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, 130 | ) []NodeUsage { 131 | klog.V(5).Infof("start get node usage metrics") 132 | var nodeUsageList []NodeUsage 133 | nodesMetrics := make(map[string]*source.NodeMetrics) 134 | nodesInfo := make(map[string]*v1.Node) 135 | for _, node := range nodes { 136 | nodesMetrics[node.Name] = &source.NodeMetrics{} 137 | nodesInfo[node.Name] = node 138 | } 139 | 140 | client, err := source.NewMetricsClient(l.args.MetricsConfiguration) 141 | if err != nil { 142 | klog.Errorf("New metrics client failed, metrics config is %v, error is %v", l.args.MetricsConfiguration, err) 143 | return nodeUsageList 144 | } 145 | client.NodesMetricsAvg(context.TODO(), nodesMetrics, l.args.Duration) 146 | for nodeName, nodeMetrics := range nodesMetrics { 147 | pods, err := podutil.ListPodsOnANode(nodeName, getPodsAssignedToNode, nil) 148 | if err != nil { 149 | klog.V(2).Infof("Failed to obtain the pod information of the node(%s), error is %v", nodeName, err) 150 | continue 151 | } 152 | nodeUsage := getNodeUtilization(nodesInfo[nodeName], pods, nodeMetrics) 153 | podMetric, err := client.PodsMetricsAvg(context.TODO(), pods, l.args.Duration) 154 | if err != nil { 155 | klog.V(2).Infof("Failed to get pod metrics average, error is %v", err) 156 | continue 157 | } 158 | overUseResources := make([]v1.ResourceName, 0) 159 | nodeUsageList = append(nodeUsageList, NodeUsage{ 160 | node: nodesInfo[nodeName], 161 | usage: nodeUsage, 162 | allPods: pods, 163 | podMetrics: podMetric, 164 | overUseResources: &overUseResources, 165 | }) 166 | } 167 | return nodeUsageList 168 | } 169 | 170 | func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity { 171 | defaultFormat := resource.DecimalSI 172 | if resourceName == v1.ResourceMemory { 173 | defaultFormat = resource.BinarySI 174 | } 175 | 176 | resourceCapacityFraction := func(resourceNodeCapacity int64) int64 { 177 | // A threshold is in percentages but in <0;100> interval. 178 | // Performing `threshold * 0.01` will convert <0;100> interval into <0;1>. 179 | // Multiplying it with capacity will give fraction of the capacity corresponding to the given resource threshold in Quantity units. 180 | return int64(float64(threshold) * 0.01 * float64(resourceNodeCapacity)) 181 | } 182 | 183 | resourceCapacityQuantity := nodeCapacity.Name(resourceName, defaultFormat) 184 | 185 | if resourceName == v1.ResourceCPU { 186 | return resource.NewMilliQuantity(resourceCapacityFraction(resourceCapacityQuantity.MilliValue()), defaultFormat) 187 | } 188 | return resource.NewQuantity(resourceCapacityFraction(resourceCapacityQuantity.Value()), defaultFormat) 189 | } 190 | 191 | func resourceUsagePercentages(nodeUsage NodeUsage) map[v1.ResourceName]float64 { 192 | nodeCapacity := nodeUsage.node.Status.Capacity 193 | if len(nodeUsage.node.Status.Allocatable) > 0 { 194 | nodeCapacity = nodeUsage.node.Status.Allocatable 195 | } 196 | 197 | resourceUsagePercentage := map[v1.ResourceName]float64{} 198 | for resourceName, resourceUsage := range nodeUsage.usage { 199 | capacity := nodeCapacity[resourceName] 200 | if !capacity.IsZero() { 201 | resourceUsagePercentage[resourceName] = 100 * float64(resourceUsage.MilliValue()) / float64(capacity.MilliValue()) 202 | } 203 | } 204 | 205 | return resourceUsagePercentage 206 | } 207 | 208 | // classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between 209 | // low and high thresholds, it is simply ignored. 210 | func classifyNodes( 211 | nodeUsages []NodeUsage, 212 | nodeThresholds map[string]NodeThresholds, 213 | lowThresholdFilter, highThresholdFilter func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool, 214 | ) ([]NodeInfo, []NodeInfo) { 215 | lowNodes, highNodes := []NodeInfo{}, []NodeInfo{} 216 | 217 | for _, nodeUsage := range nodeUsages { 218 | nodeInfo := NodeInfo{ 219 | NodeUsage: nodeUsage, 220 | thresholds: nodeThresholds[nodeUsage.node.Name], 221 | } 222 | if lowThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) { 223 | klog.InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) 224 | lowNodes = append(lowNodes, nodeInfo) 225 | } else if highThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) { 226 | klog.InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) 227 | highNodes = append(highNodes, nodeInfo) 228 | } else { 229 | klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) 230 | } 231 | } 232 | 233 | return lowNodes, highNodes 234 | } 235 | 236 | // evictPodsFromSourceNodes evicts pods based on priority, if all the pods on the node have priority, if not 237 | // evicts them based on QoS as fallback option. 238 | // TODO: @ravig Break this function into smaller functions. 239 | func evictPodsFromSourceNodes( 240 | ctx context.Context, 241 | evictableNamespaces *api.Namespaces, 242 | sourceNodes, destinationNodes []NodeInfo, 243 | podEvictor frameworktypes.Evictor, 244 | podFilter func(pod *v1.Pod) bool, 245 | resourceNames []v1.ResourceName, 246 | continueEviction continueEvictionCond, 247 | ) { 248 | // upper bound on total number of pods/cpu/memory and optional extended resources to be moved 249 | desNodeAvailableRes := map[v1.ResourceName]*resource.Quantity{ 250 | v1.ResourcePods: {}, 251 | v1.ResourceCPU: {}, 252 | v1.ResourceMemory: {}, 253 | } 254 | 255 | taintsOfDestinationNodes := make(map[string][]v1.Taint, len(destinationNodes)) 256 | for _, node := range destinationNodes { 257 | taintsOfDestinationNodes[node.node.Name] = node.node.Spec.Taints 258 | 259 | for _, name := range resourceNames { 260 | if _, ok := desNodeAvailableRes[name]; !ok { 261 | desNodeAvailableRes[name] = resource.NewQuantity(0, resource.DecimalSI) 262 | } 263 | desNodeAvailableRes[name].Add(*node.thresholds.highResourceThreshold[name]) 264 | desNodeAvailableRes[name].Sub(*node.usage[name]) 265 | } 266 | } 267 | 268 | // log message in one line 269 | keysAndValues := []interface{}{ 270 | "CPU", desNodeAvailableRes[v1.ResourceCPU].MilliValue(), 271 | "Mem", desNodeAvailableRes[v1.ResourceMemory].Value(), 272 | "Pods", desNodeAvailableRes[v1.ResourcePods].Value(), 273 | } 274 | for name := range desNodeAvailableRes { 275 | if !node.IsBasicResource(name) { 276 | keysAndValues = append(keysAndValues, string(name), desNodeAvailableRes[name].Value()) 277 | } 278 | } 279 | klog.V(1).InfoS("Total capacity to be moved", keysAndValues...) 280 | 281 | for _, node := range sourceNodes { 282 | klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage) 283 | 284 | nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter) 285 | klog.V(2).InfoS("Pods on node", "node", klog.KObj(node.node), "allPods", len(node.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods)) 286 | 287 | if len(removablePods) == 0 { 288 | klog.V(1).InfoS("No removable pods on node, try next node", "current node is ", klog.KObj(node.node)) 289 | continue 290 | } 291 | 292 | klog.V(1).InfoS("Evicting pods based on used quantity") 293 | // sort the evictable Pods based on used quantity 294 | keyResource := v1.ResourceMemory 295 | for _, value := range *node.overUseResources { 296 | if value == v1.ResourceCPU { 297 | keyResource = v1.ResourceCPU 298 | break 299 | } 300 | } 301 | klog.V(3).Infof("Removeable pod sort by %s resources", keyResource) 302 | sort.Slice(removablePods, func(i, j int) bool { 303 | podLeft := types.NamespacedName{ 304 | Namespace: removablePods[i].Namespace, 305 | Name: removablePods[i].Name, 306 | } 307 | podRight := types.NamespacedName{ 308 | Namespace: removablePods[j].Namespace, 309 | Name: removablePods[j].Name, 310 | } 311 | resoureCPULeft := make(map[v1.ResourceName]*resource.Quantity) 312 | resouceCPURight := make(map[v1.ResourceName]*resource.Quantity) 313 | var exist bool 314 | if resoureCPULeft, exist = node.podMetrics[podLeft]; !exist { 315 | // The podmetrics table does not contain the usage of the current pod. 316 | // The pod is sorted at the end of the queue. 317 | return false 318 | } else if resouceCPURight, exist = node.podMetrics[podRight]; !exist { 319 | // The podmetrics table does not contain the podRight usage information. 320 | // The podLeft field is sorted before the podRight field. 321 | return true 322 | } 323 | 324 | if quantityLeft, ok := resoureCPULeft[keyResource]; !ok { 325 | // The podmetrics table does not contain the usage of the current pod. 326 | // The pod is sorted at the end of the queue. 327 | return false 328 | } else if quantityRight, ok := resouceCPURight[keyResource]; !ok { 329 | // The podmetrics table does not contain the podRight usage information. 330 | // The podLeft field is sorted before the podRight field. 331 | return true 332 | } else { 333 | return quantityLeft.Cmp(*quantityRight) > 0 334 | } 335 | }) 336 | 337 | evictPods(ctx, evictableNamespaces, removablePods, node, desNodeAvailableRes, taintsOfDestinationNodes, podEvictor, continueEviction) 338 | } 339 | } 340 | 341 | func evictPods( 342 | ctx context.Context, 343 | evictableNamespaces *api.Namespaces, 344 | inputPods []*v1.Pod, 345 | nodeInfo NodeInfo, 346 | desNodeAvailableRes map[v1.ResourceName]*resource.Quantity, 347 | taintsOfLowNodes map[string][]v1.Taint, 348 | podEvictor frameworktypes.Evictor, 349 | continueEviction continueEvictionCond, 350 | ) { 351 | excludedNamespaces := sets.NewString([]string{}...) 352 | if evictableNamespaces != nil { 353 | excludedNamespaces.Insert(evictableNamespaces.Exclude...) 354 | } 355 | 356 | if continueEviction(nodeInfo, desNodeAvailableRes) { 357 | for _, pod := range inputPods { 358 | if !utils.PodToleratesTaints(pod, taintsOfLowNodes) { 359 | klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod)) 360 | continue 361 | } 362 | 363 | preEvictionFilterWithOptions, err := podutil.NewOptions(). 364 | WithFilter(podEvictor.PreEvictionFilter). 365 | WithoutNamespaces(excludedNamespaces). 366 | BuildFilterFunc() 367 | if err != nil { 368 | klog.ErrorS(err, "could not build preEvictionFilter with namespace exclusion") 369 | continue 370 | } 371 | 372 | if preEvictionFilterWithOptions(pod) { 373 | if podEvictor.Evict(ctx, pod, evictions.EvictOptions{}) { 374 | klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod)) 375 | 376 | for resourceName := range desNodeAvailableRes { 377 | if resourceName == v1.ResourcePods { 378 | nodeInfo.usage[resourceName].Sub(*resource.NewQuantity(1, resource.DecimalSI)) 379 | desNodeAvailableRes[resourceName].Sub(*resource.NewQuantity(1, resource.DecimalSI)) 380 | } else { 381 | podNamespaceAndName := types.NamespacedName{ 382 | Namespace: pod.Namespace, 383 | Name: pod.Name, 384 | } 385 | if resourceMetrics, ok := nodeInfo.podMetrics[podNamespaceAndName]; !ok { 386 | klog.Warningf("The actual pod usage is not obtained from the cache information of the node. "+ 387 | "Therefore, the pod(%s) is not evicted.", podNamespaceAndName) 388 | continue 389 | } else if quantity, ok := resourceMetrics[resourceName]; !ok { 390 | klog.Warningf("The %v indicator of the pod is not obtained from the node information. The pod(%v) is not evicted.", resourceName, podNamespaceAndName) 391 | continue 392 | } else { 393 | nodeInfo.usage[resourceName].Sub(*quantity) 394 | desNodeAvailableRes[resourceName].Sub(*quantity) 395 | } 396 | } 397 | } 398 | 399 | keysAndValues := []interface{}{ 400 | "node", nodeInfo.node.Name, 401 | "CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(), 402 | "Mem", nodeInfo.usage[v1.ResourceMemory].Value(), 403 | "Pods", nodeInfo.usage[v1.ResourcePods].Value(), 404 | } 405 | for name := range desNodeAvailableRes { 406 | if !nodeutil.IsBasicResource(name) { 407 | keysAndValues = append(keysAndValues, string(name), desNodeAvailableRes[name].Value()) 408 | } 409 | } 410 | 411 | klog.V(3).InfoS("Updated node usage", keysAndValues...) 412 | // check if pods can be still evicted 413 | if !continueEviction(nodeInfo, desNodeAvailableRes) { 414 | break 415 | } 416 | } 417 | } 418 | if podEvictor.NodeLimitExceeded(nodeInfo.node) { 419 | return 420 | } 421 | } 422 | } 423 | } 424 | 425 | // sortNodesByUsage sorts nodes based on usage according to the given plugin. 426 | func sortNodesByUsage(nodes []NodeInfo, ascending bool) { 427 | sort.Slice(nodes, func(i, j int) bool { 428 | ti := nodes[i].usage[v1.ResourceMemory].Value() + nodes[i].usage[v1.ResourceCPU].MilliValue() + nodes[i].usage[v1.ResourcePods].Value() 429 | tj := nodes[j].usage[v1.ResourceMemory].Value() + nodes[j].usage[v1.ResourceCPU].MilliValue() + nodes[j].usage[v1.ResourcePods].Value() 430 | 431 | // extended resources 432 | for name := range nodes[i].usage { 433 | if !nodeutil.IsBasicResource(name) { 434 | ti = ti + nodes[i].usage[name].Value() 435 | tj = tj + nodes[j].usage[name].Value() 436 | } 437 | } 438 | 439 | // Return ascending order for HighNodeUtilization plugin 440 | if ascending { 441 | return ti < tj 442 | } 443 | 444 | // Return descending order for LowNodeUtilization plugin 445 | return ti > tj 446 | }) 447 | } 448 | 449 | // isNodeAboveTargetUtilization checks if a node is overutilized 450 | // At least one resource has to be above the high threshold 451 | func isNodeAboveTargetUtilization(usage NodeUsage, threshold map[v1.ResourceName]*resource.Quantity) bool { 452 | for name, nodeValue := range usage.usage { 453 | // usage.highResourceThreshold[name] < nodeValue 454 | if threshold[name].Cmp(*nodeValue) == -1 { 455 | *usage.overUseResources = append(*usage.overUseResources, name) 456 | return true 457 | } 458 | } 459 | return false 460 | } 461 | 462 | // isNodeWithLowUtilization checks if a node is underutilized 463 | // All resources have to be below the low threshold 464 | func isNodeWithLowUtilization(usage NodeUsage, threshold map[v1.ResourceName]*resource.Quantity) bool { 465 | for name, nodeValue := range usage.usage { 466 | // usage.lowResourceThreshold[name] < nodeValue 467 | if threshold[name].Cmp(*nodeValue) == -1 { 468 | return false 469 | } 470 | } 471 | 472 | return true 473 | } 474 | 475 | // getResourceNames returns list of resource names in resource thresholds 476 | func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName { 477 | resourceNames := make([]v1.ResourceName, 0, len(thresholds)) 478 | for name := range thresholds { 479 | resourceNames = append(resourceNames, name) 480 | } 481 | return resourceNames 482 | } 483 | 484 | func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) { 485 | var nonRemovablePods, removablePods []*v1.Pod 486 | 487 | for _, pod := range pods { 488 | if !filter(pod) { 489 | nonRemovablePods = append(nonRemovablePods, pod) 490 | } else { 491 | removablePods = append(removablePods, pod) 492 | } 493 | } 494 | 495 | return nonRemovablePods, removablePods 496 | } 497 | 498 | func averageNodeBasicresources(nodes []*v1.Node, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, resourceNames []v1.ResourceName) api.ResourceThresholds { 499 | total := api.ResourceThresholds{} 500 | average := api.ResourceThresholds{} 501 | numberOfNodes := len(nodes) 502 | for _, node := range nodes { 503 | pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, nil) 504 | if err != nil { 505 | numberOfNodes-- 506 | continue 507 | } 508 | usage := nodeutil.NodeUtilization(pods, resourceNames) 509 | nodeCapacity := node.Status.Capacity 510 | if len(node.Status.Allocatable) > 0 { 511 | nodeCapacity = node.Status.Allocatable 512 | } 513 | for resource, value := range usage { 514 | nodeCapacityValue := nodeCapacity[resource] 515 | if resource == v1.ResourceCPU { 516 | total[resource] += api.Percentage(value.MilliValue()) / api.Percentage(nodeCapacityValue.MilliValue()) * 100.0 517 | } else { 518 | total[resource] += api.Percentage(value.Value()) / api.Percentage(nodeCapacityValue.Value()) * 100.0 519 | } 520 | } 521 | } 522 | for resource, value := range total { 523 | average[resource] = value / api.Percentage(numberOfNodes) 524 | } 525 | return average 526 | } 527 | 528 | func getNodeUtilization(node *v1.Node, pods []*v1.Pod, nodeMetrics *source.NodeMetrics) map[v1.ResourceName]*resource.Quantity { 529 | nodeResource := make(map[v1.ResourceName]*resource.Quantity) 530 | 531 | nodeStatusCapacityCpu := node.Status.Capacity[v1.ResourceCPU] 532 | nodeStatusCpuUsed := resource.MustParse(fmt.Sprintf("%f", float64(nodeStatusCapacityCpu.Value())*nodeMetrics.CPU)) 533 | nodeResource[v1.ResourceCPU] = &nodeStatusCpuUsed 534 | 535 | nodeStatusCapacityMemory := node.Status.Capacity[v1.ResourceMemory] 536 | nodeStatusUsedMemory := resource.MustParse(fmt.Sprintf("%f", float64(nodeStatusCapacityMemory.Value())*nodeMetrics.Memory)) 537 | nodeResource[v1.ResourceMemory] = &nodeStatusUsedMemory 538 | 539 | nodeResource[v1.ResourcePods] = resource.NewQuantity(int64(len(pods)), resource.DecimalSI) 540 | return nodeResource 541 | } 542 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/nodeutilization_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "math" 21 | "testing" 22 | 23 | v1 "k8s.io/api/core/v1" 24 | "k8s.io/apimachinery/pkg/api/resource" 25 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | ) 27 | 28 | var ( 29 | lowPriority = int32(0) 30 | highPriority = int32(10000) 31 | extendedResource = v1.ResourceName("example.com/foo") 32 | testNode1 = NodeInfo{ 33 | NodeUsage: NodeUsage{ 34 | node: &v1.Node{ 35 | Status: v1.NodeStatus{ 36 | Capacity: v1.ResourceList{ 37 | v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), 38 | v1.ResourceMemory: *resource.NewQuantity(3977868*1024, resource.BinarySI), 39 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 40 | }, 41 | Allocatable: v1.ResourceList{ 42 | v1.ResourceCPU: *resource.NewMilliQuantity(1930, resource.DecimalSI), 43 | v1.ResourceMemory: *resource.NewQuantity(3287692*1024, resource.BinarySI), 44 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 45 | }, 46 | }, 47 | ObjectMeta: metav1.ObjectMeta{Name: "node1"}, 48 | }, 49 | usage: map[v1.ResourceName]*resource.Quantity{ 50 | v1.ResourceCPU: resource.NewMilliQuantity(1730, resource.DecimalSI), 51 | v1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI), 52 | v1.ResourcePods: resource.NewQuantity(25, resource.BinarySI), 53 | }, 54 | }, 55 | } 56 | testNode2 = NodeInfo{ 57 | NodeUsage: NodeUsage{ 58 | node: &v1.Node{ 59 | Status: v1.NodeStatus{ 60 | Capacity: v1.ResourceList{ 61 | v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), 62 | v1.ResourceMemory: *resource.NewQuantity(3977868*1024, resource.BinarySI), 63 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 64 | }, 65 | Allocatable: v1.ResourceList{ 66 | v1.ResourceCPU: *resource.NewMilliQuantity(1930, resource.DecimalSI), 67 | v1.ResourceMemory: *resource.NewQuantity(3287692*1024, resource.BinarySI), 68 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 69 | }, 70 | }, 71 | ObjectMeta: metav1.ObjectMeta{Name: "node2"}, 72 | }, 73 | usage: map[v1.ResourceName]*resource.Quantity{ 74 | v1.ResourceCPU: resource.NewMilliQuantity(1220, resource.DecimalSI), 75 | v1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI), 76 | v1.ResourcePods: resource.NewQuantity(11, resource.BinarySI), 77 | }, 78 | }, 79 | } 80 | testNode3 = NodeInfo{ 81 | NodeUsage: NodeUsage{ 82 | node: &v1.Node{ 83 | Status: v1.NodeStatus{ 84 | Capacity: v1.ResourceList{ 85 | v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), 86 | v1.ResourceMemory: *resource.NewQuantity(3977868*1024, resource.BinarySI), 87 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 88 | }, 89 | Allocatable: v1.ResourceList{ 90 | v1.ResourceCPU: *resource.NewMilliQuantity(1930, resource.DecimalSI), 91 | v1.ResourceMemory: *resource.NewQuantity(3287692*1024, resource.BinarySI), 92 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 93 | }, 94 | }, 95 | ObjectMeta: metav1.ObjectMeta{Name: "node3"}, 96 | }, 97 | usage: map[v1.ResourceName]*resource.Quantity{ 98 | v1.ResourceCPU: resource.NewMilliQuantity(1530, resource.DecimalSI), 99 | v1.ResourceMemory: resource.NewQuantity(5038982964, resource.BinarySI), 100 | v1.ResourcePods: resource.NewQuantity(20, resource.BinarySI), 101 | }, 102 | }, 103 | } 104 | ) 105 | 106 | func TestResourceUsagePercentages(t *testing.T) { 107 | resourceUsagePercentage := resourceUsagePercentages(NodeUsage{ 108 | node: &v1.Node{ 109 | Status: v1.NodeStatus{ 110 | Capacity: v1.ResourceList{ 111 | v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), 112 | v1.ResourceMemory: *resource.NewQuantity(3977868*1024, resource.BinarySI), 113 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 114 | }, 115 | Allocatable: v1.ResourceList{ 116 | v1.ResourceCPU: *resource.NewMilliQuantity(1930, resource.DecimalSI), 117 | v1.ResourceMemory: *resource.NewQuantity(3287692*1024, resource.BinarySI), 118 | v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), 119 | }, 120 | }, 121 | }, 122 | usage: map[v1.ResourceName]*resource.Quantity{ 123 | v1.ResourceCPU: resource.NewMilliQuantity(1220, resource.DecimalSI), 124 | v1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI), 125 | v1.ResourcePods: resource.NewQuantity(11, resource.BinarySI), 126 | }, 127 | }) 128 | 129 | expectedUsageInIntPercentage := map[v1.ResourceName]float64{ 130 | v1.ResourceCPU: 63, 131 | v1.ResourceMemory: 90, 132 | v1.ResourcePods: 37, 133 | } 134 | 135 | for resourceName, percentage := range expectedUsageInIntPercentage { 136 | if math.Floor(resourceUsagePercentage[resourceName]) != percentage { 137 | t.Errorf("Incorrect percentange computation, expected %v, got math.Floor(%v) instead", percentage, resourceUsagePercentage[resourceName]) 138 | } 139 | } 140 | 141 | t.Logf("resourceUsagePercentage: %#v\n", resourceUsagePercentage) 142 | } 143 | 144 | func TestSortNodesByUsageDescendingOrder(t *testing.T) { 145 | nodeList := []NodeInfo{testNode1, testNode2, testNode3} 146 | expectedNodeList := []NodeInfo{testNode3, testNode1, testNode2} // testNode3 has the highest usage 147 | sortNodesByUsage(nodeList, false) // ascending=false, sort nodes in descending order 148 | 149 | for i := 0; i < len(expectedNodeList); i++ { 150 | if nodeList[i].NodeUsage.node.Name != expectedNodeList[i].NodeUsage.node.Name { 151 | t.Errorf("Expected %v, got %v", expectedNodeList[i].NodeUsage.node.Name, nodeList[i].NodeUsage.node.Name) 152 | } 153 | } 154 | } 155 | 156 | func TestSortNodesByUsageAscendingOrder(t *testing.T) { 157 | nodeList := []NodeInfo{testNode1, testNode2, testNode3} 158 | expectedNodeList := []NodeInfo{testNode2, testNode1, testNode3} 159 | sortNodesByUsage(nodeList, true) // ascending=true, sort nodes in ascending order 160 | 161 | for i := 0; i < len(expectedNodeList); i++ { 162 | if nodeList[i].NodeUsage.node.Name != expectedNodeList[i].NodeUsage.node.Name { 163 | t.Errorf("Expected %v, got %v", expectedNodeList[i].NodeUsage.node.Name, nodeList[i].NodeUsage.node.Name) 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/runtime" 21 | ) 22 | 23 | var ( 24 | SchemeBuilder = runtime.NewSchemeBuilder() 25 | localSchemeBuilder = &SchemeBuilder 26 | AddToScheme = localSchemeBuilder.AddToScheme 27 | ) 28 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | "sigs.k8s.io/descheduler/pkg/api" 22 | 23 | source "volcano.sh/descheduler/pkg/framework/plugins/loadaware/metrics" 24 | ) 25 | 26 | type LoadAwareUtilizationArgs struct { 27 | metav1.TypeMeta `json:",inline"` 28 | 29 | UseDeviationThresholds bool `json:"useDeviationThresholds"` 30 | Thresholds api.ResourceThresholds `json:"thresholds"` 31 | TargetThresholds api.ResourceThresholds `json:"targetThresholds"` 32 | NumberOfNodes int `json:"numberOfNodes"` 33 | 34 | //DefaultEvitorArgs 35 | NodeSelector string `json:"nodeSelector"` 36 | EvictLocalStoragePods bool `json:"evictLocalStoragePods"` 37 | EvictSystemCriticalPods bool `json:"evictSystemCriticalPods"` 38 | IgnorePvcPods bool `json:"ignorePvcPods"` 39 | EvictFailedBarePods bool `json:"evictFailedBarePods"` 40 | LabelSelector *metav1.LabelSelector `json:"labelSelector"` 41 | PriorityThreshold *api.PriorityThreshold `json:"priorityThreshold"` 42 | NodeFit bool `json:"nodeFit"` 43 | 44 | // Naming this one differently since namespaces are still 45 | // considered while considering resources used by pods 46 | // but then filtered out before eviction 47 | EvictableNamespaces *api.Namespaces `json:"evictableNamespaces"` 48 | 49 | MetricsConfiguration source.Metrics `json:"metrics"` 50 | 51 | Duration string `json:"duration"` 52 | } 53 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/validation.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "fmt" 21 | 22 | "k8s.io/apimachinery/pkg/runtime" 23 | "k8s.io/klog/v2" 24 | "sigs.k8s.io/descheduler/pkg/api" 25 | ) 26 | 27 | func ValidateLoadAwareUtilizationArgs(obj runtime.Object) error { 28 | args, ok := obj.(*LoadAwareUtilizationArgs) 29 | if !ok { 30 | klog.Errorf("obj with type %T could not parse", obj) 31 | } 32 | // only exclude can be set, or not at all 33 | if args.EvictableNamespaces != nil && len(args.EvictableNamespaces.Include) > 0 { 34 | return fmt.Errorf("only Exclude namespaces can be set, inclusion is not supported") 35 | } 36 | err := validateLoadAwareUtilizationThresholds(args.Thresholds, args.TargetThresholds, args.UseDeviationThresholds) 37 | if err != nil { 38 | return err 39 | } 40 | return nil 41 | } 42 | 43 | func validateLoadAwareUtilizationThresholds(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) error { 44 | // validate thresholds and targetThresholds config 45 | if err := validateThresholds(thresholds); err != nil { 46 | return fmt.Errorf("thresholds config is not valid: %v", err) 47 | } 48 | if err := validateThresholds(targetThresholds); err != nil { 49 | return fmt.Errorf("targetThresholds config is not valid: %v", err) 50 | } 51 | 52 | // validate if thresholds and targetThresholds have same resources configured 53 | if len(thresholds) != len(targetThresholds) { 54 | return fmt.Errorf("thresholds and targetThresholds configured different resources") 55 | } 56 | for resourceName, value := range thresholds { 57 | if targetValue, ok := targetThresholds[resourceName]; !ok { 58 | return fmt.Errorf("thresholds and targetThresholds configured different resources") 59 | } else if value > targetValue && !useDeviationThresholds { 60 | return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName) 61 | } 62 | } 63 | return nil 64 | } 65 | 66 | // validateThresholds checks if thresholds have valid resource name and resource percentage configured 67 | func validateThresholds(thresholds api.ResourceThresholds) error { 68 | if len(thresholds) == 0 { 69 | return fmt.Errorf("no resource threshold is configured") 70 | } 71 | for name, percent := range thresholds { 72 | if percent < MinResourcePercentage || percent > MaxResourcePercentage { 73 | return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage) 74 | } 75 | } 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /pkg/framework/plugins/loadaware/validation_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package loadaware 18 | 19 | import ( 20 | "fmt" 21 | "testing" 22 | 23 | v1 "k8s.io/api/core/v1" 24 | "sigs.k8s.io/descheduler/pkg/api" 25 | ) 26 | 27 | func TestValidateLowNodeUtilizationPluginConfig(t *testing.T) { 28 | extendedResource := v1.ResourceName("example.com/foo") 29 | tests := []struct { 30 | name string 31 | thresholds api.ResourceThresholds 32 | targetThresholds api.ResourceThresholds 33 | errInfo error 34 | }{ 35 | { 36 | name: "passing invalid thresholds", 37 | thresholds: api.ResourceThresholds{ 38 | v1.ResourceCPU: 20, 39 | v1.ResourceMemory: 120, 40 | }, 41 | targetThresholds: api.ResourceThresholds{ 42 | v1.ResourceCPU: 80, 43 | v1.ResourceMemory: 80, 44 | }, 45 | errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf( 46 | "%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)), 47 | }, 48 | { 49 | name: "thresholds and targetThresholds configured different num of resources", 50 | thresholds: api.ResourceThresholds{ 51 | v1.ResourceCPU: 20, 52 | v1.ResourceMemory: 20, 53 | }, 54 | targetThresholds: api.ResourceThresholds{ 55 | v1.ResourceCPU: 80, 56 | v1.ResourceMemory: 80, 57 | v1.ResourcePods: 80, 58 | }, 59 | errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), 60 | }, 61 | { 62 | name: "thresholds and targetThresholds configured different resources", 63 | thresholds: api.ResourceThresholds{ 64 | v1.ResourceCPU: 20, 65 | v1.ResourceMemory: 20, 66 | }, 67 | targetThresholds: api.ResourceThresholds{ 68 | v1.ResourceCPU: 80, 69 | v1.ResourcePods: 80, 70 | }, 71 | errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), 72 | }, 73 | { 74 | name: "thresholds' CPU config value is greater than targetThresholds'", 75 | thresholds: api.ResourceThresholds{ 76 | v1.ResourceCPU: 90, 77 | v1.ResourceMemory: 20, 78 | }, 79 | targetThresholds: api.ResourceThresholds{ 80 | v1.ResourceCPU: 80, 81 | v1.ResourceMemory: 80, 82 | }, 83 | errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", v1.ResourceCPU), 84 | }, 85 | { 86 | name: "only thresholds configured extended resource", 87 | thresholds: api.ResourceThresholds{ 88 | v1.ResourceCPU: 20, 89 | v1.ResourceMemory: 20, 90 | extendedResource: 20, 91 | }, 92 | targetThresholds: api.ResourceThresholds{ 93 | v1.ResourceCPU: 80, 94 | v1.ResourceMemory: 80, 95 | }, 96 | errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), 97 | }, 98 | { 99 | name: "only targetThresholds configured extended resource", 100 | thresholds: api.ResourceThresholds{ 101 | v1.ResourceCPU: 20, 102 | v1.ResourceMemory: 20, 103 | }, 104 | targetThresholds: api.ResourceThresholds{ 105 | v1.ResourceCPU: 80, 106 | v1.ResourceMemory: 80, 107 | extendedResource: 80, 108 | }, 109 | errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), 110 | }, 111 | { 112 | name: "thresholds and targetThresholds configured different extended resources", 113 | thresholds: api.ResourceThresholds{ 114 | v1.ResourceCPU: 20, 115 | v1.ResourceMemory: 20, 116 | extendedResource: 20, 117 | }, 118 | targetThresholds: api.ResourceThresholds{ 119 | v1.ResourceCPU: 80, 120 | v1.ResourceMemory: 80, 121 | "example.com/bar": 80, 122 | }, 123 | errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), 124 | }, 125 | { 126 | name: "thresholds' extended resource config value is greater than targetThresholds'", 127 | thresholds: api.ResourceThresholds{ 128 | v1.ResourceCPU: 20, 129 | v1.ResourceMemory: 20, 130 | extendedResource: 90, 131 | }, 132 | targetThresholds: api.ResourceThresholds{ 133 | v1.ResourceCPU: 80, 134 | v1.ResourceMemory: 80, 135 | extendedResource: 20, 136 | }, 137 | errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", extendedResource), 138 | }, 139 | { 140 | name: "passing valid plugin config", 141 | thresholds: api.ResourceThresholds{ 142 | v1.ResourceCPU: 20, 143 | v1.ResourceMemory: 20, 144 | }, 145 | targetThresholds: api.ResourceThresholds{ 146 | v1.ResourceCPU: 80, 147 | v1.ResourceMemory: 80, 148 | }, 149 | errInfo: nil, 150 | }, 151 | { 152 | name: "passing valid plugin config with extended resource", 153 | thresholds: api.ResourceThresholds{ 154 | v1.ResourceCPU: 20, 155 | v1.ResourceMemory: 20, 156 | extendedResource: 20, 157 | }, 158 | targetThresholds: api.ResourceThresholds{ 159 | v1.ResourceCPU: 80, 160 | v1.ResourceMemory: 80, 161 | extendedResource: 80, 162 | }, 163 | errInfo: nil, 164 | }, 165 | } 166 | 167 | for _, testCase := range tests { 168 | args := &LoadAwareUtilizationArgs{ 169 | Thresholds: testCase.thresholds, 170 | TargetThresholds: testCase.targetThresholds, 171 | } 172 | validateErr := validateLoadAwareUtilizationThresholds(args.Thresholds, args.TargetThresholds, false) 173 | 174 | if validateErr == nil || testCase.errInfo == nil { 175 | if validateErr != testCase.errInfo { 176 | t.Errorf("expected validity of plugin config: thresholds %#v targetThresholds %#v to be %v but got %v instead", 177 | testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr) 178 | } 179 | } else if validateErr.Error() != testCase.errInfo.Error() { 180 | t.Errorf("expected validity of plugin config: thresholds %#v targetThresholds %#v to be %v but got %v instead", 181 | testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr) 182 | } 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /pkg/framework/profile/profile.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Kubernetes Authors. 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | 13 | Copyright 2024 The Volcano Authors. 14 | 15 | Modifications made by Volcano authors: 16 | - [2024]Abstract evictor as an interface 17 | */ 18 | 19 | package profile 20 | 21 | import ( 22 | "context" 23 | "fmt" 24 | "time" 25 | 26 | "sigs.k8s.io/descheduler/metrics" 27 | "sigs.k8s.io/descheduler/pkg/api" 28 | "sigs.k8s.io/descheduler/pkg/descheduler/evictions" 29 | podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" 30 | "sigs.k8s.io/descheduler/pkg/framework/pluginregistry" 31 | frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" 32 | 33 | v1 "k8s.io/api/core/v1" 34 | "k8s.io/apimachinery/pkg/util/errors" 35 | "k8s.io/apimachinery/pkg/util/sets" 36 | "k8s.io/client-go/informers" 37 | clientset "k8s.io/client-go/kubernetes" 38 | 39 | "k8s.io/klog/v2" 40 | ) 41 | 42 | const DefaultEvictor string = "default" 43 | 44 | // evictorImpl implements the Evictor interface so plugins 45 | // can evict a pod without importing a specific pod evictor 46 | type evictorImpl struct { 47 | podEvictor *evictions.PodEvictor 48 | filter podutil.FilterFunc 49 | preEvictionFilter podutil.FilterFunc 50 | } 51 | 52 | var _ frameworktypes.Evictor = &evictorImpl{} 53 | 54 | // Filter checks if a pod can be evicted 55 | func (ei *evictorImpl) Filter(pod *v1.Pod) bool { 56 | return ei.filter(pod) 57 | } 58 | 59 | // PreEvictionFilter checks if pod can be evicted right before eviction 60 | func (ei *evictorImpl) PreEvictionFilter(pod *v1.Pod) bool { 61 | return ei.preEvictionFilter(pod) 62 | } 63 | 64 | // Evict evicts a pod (no pre-check performed) 65 | func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool { 66 | return ei.podEvictor.EvictPod(ctx, pod, opts) 67 | } 68 | 69 | func (ei *evictorImpl) NodeLimitExceeded(node *v1.Node) bool { 70 | return ei.podEvictor.NodeLimitExceeded(node) 71 | } 72 | 73 | // handleImpl implements the framework handle which gets passed to plugins 74 | type handleImpl struct { 75 | clientSet clientset.Interface 76 | getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc 77 | sharedInformerFactory informers.SharedInformerFactory 78 | evictor *evictorImpl 79 | } 80 | 81 | var _ frameworktypes.Handle = &handleImpl{} 82 | 83 | // ClientSet retrieves kube client set 84 | func (hi *handleImpl) ClientSet() clientset.Interface { 85 | return hi.clientSet 86 | } 87 | 88 | // GetPodsAssignedToNodeFunc retrieves GetPodsAssignedToNodeFunc implementation 89 | func (hi *handleImpl) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { 90 | return hi.getPodsAssignedToNodeFunc 91 | } 92 | 93 | // SharedInformerFactory retrieves shared informer factory 94 | func (hi *handleImpl) SharedInformerFactory() informers.SharedInformerFactory { 95 | return hi.sharedInformerFactory 96 | } 97 | 98 | // Evictor retrieves evictor so plugins can filter and evict pods 99 | func (hi *handleImpl) Evictor() frameworktypes.Evictor { 100 | return hi.evictor 101 | } 102 | 103 | type filterPlugin interface { 104 | frameworktypes.Plugin 105 | Filter(pod *v1.Pod) bool 106 | } 107 | 108 | type preEvictionFilterPlugin interface { 109 | frameworktypes.Plugin 110 | PreEvictionFilter(pod *v1.Pod) bool 111 | } 112 | type Evictor interface { 113 | NodeEvicted(node *v1.Node) uint 114 | TotalEvicted() uint 115 | NodeLimitExceeded(node *v1.Node) bool 116 | EvictPod(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool 117 | } 118 | type profileImpl struct { 119 | profileName string 120 | podEvictor Evictor 121 | 122 | deschedulePlugins []frameworktypes.DeschedulePlugin 123 | balancePlugins []frameworktypes.BalancePlugin 124 | filterPlugins []filterPlugin 125 | preEvictionFilterPlugins []preEvictionFilterPlugin 126 | 127 | // Each extension point with a list of plugins implementing the extension point. 128 | deschedule sets.String 129 | balance sets.String 130 | filter sets.String 131 | preEvictionFilter sets.String 132 | } 133 | 134 | // Option for the handleImpl. 135 | type Option func(*handleImplOpts) 136 | 137 | type handleImplOpts struct { 138 | clientSet clientset.Interface 139 | sharedInformerFactory informers.SharedInformerFactory 140 | getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc 141 | podEvictor *evictions.PodEvictor 142 | } 143 | 144 | // WithClientSet sets clientSet for the scheduling frameworkImpl. 145 | func WithClientSet(clientSet clientset.Interface) Option { 146 | return func(o *handleImplOpts) { 147 | o.clientSet = clientSet 148 | } 149 | } 150 | 151 | func WithSharedInformerFactory(sharedInformerFactory informers.SharedInformerFactory) Option { 152 | return func(o *handleImplOpts) { 153 | o.sharedInformerFactory = sharedInformerFactory 154 | } 155 | } 156 | 157 | func WithPodEvictor(podEvictor *evictions.PodEvictor) Option { 158 | return func(o *handleImplOpts) { 159 | o.podEvictor = podEvictor 160 | } 161 | } 162 | 163 | func WithGetPodsAssignedToNodeFnc(getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc) Option { 164 | return func(o *handleImplOpts) { 165 | o.getPodsAssignedToNodeFunc = getPodsAssignedToNodeFunc 166 | } 167 | } 168 | 169 | func getPluginConfig(pluginName string, pluginConfigs []api.PluginConfig) (*api.PluginConfig, int) { 170 | for idx, pluginConfig := range pluginConfigs { 171 | if pluginConfig.Name == pluginName { 172 | return &pluginConfig, idx 173 | } 174 | } 175 | return nil, 0 176 | } 177 | 178 | func buildPlugin(config api.DeschedulerProfile, pluginName string, handle *handleImpl, reg pluginregistry.Registry) (frameworktypes.Plugin, error) { 179 | pc, _ := getPluginConfig(pluginName, config.PluginConfigs) 180 | if pc == nil { 181 | klog.ErrorS(fmt.Errorf("unable to get plugin config"), "skipping plugin", "plugin", pluginName, "profile", config.Name) 182 | return nil, fmt.Errorf("unable to find %q plugin config", pluginName) 183 | } 184 | 185 | registryPlugin, ok := reg[pluginName] 186 | if !ok { 187 | klog.ErrorS(fmt.Errorf("unable to find plugin in the pluginsMap"), "skipping plugin", "plugin", pluginName) 188 | return nil, fmt.Errorf("unable to find %q plugin in the pluginsMap", pluginName) 189 | } 190 | pg, err := registryPlugin.PluginBuilder(pc.Args, handle) 191 | if err != nil { 192 | klog.ErrorS(err, "unable to initialize a plugin", "pluginName", pluginName) 193 | return nil, fmt.Errorf("unable to initialize %q plugin: %v", pluginName, err) 194 | } 195 | return pg, nil 196 | } 197 | 198 | func (p *profileImpl) registryToExtensionPoints(registry pluginregistry.Registry) { 199 | p.deschedule = sets.NewString() 200 | p.balance = sets.NewString() 201 | p.filter = sets.NewString() 202 | p.preEvictionFilter = sets.NewString() 203 | 204 | for plugin, pluginUtilities := range registry { 205 | if _, ok := pluginUtilities.PluginType.(frameworktypes.DeschedulePlugin); ok { 206 | p.deschedule.Insert(plugin) 207 | } 208 | if _, ok := pluginUtilities.PluginType.(frameworktypes.BalancePlugin); ok { 209 | p.balance.Insert(plugin) 210 | } 211 | if _, ok := pluginUtilities.PluginType.(frameworktypes.EvictorPlugin); ok { 212 | p.filter.Insert(plugin) 213 | p.preEvictionFilter.Insert(plugin) 214 | } 215 | } 216 | } 217 | 218 | func NewProfile(config api.DeschedulerProfile, reg pluginregistry.Registry, opts ...Option) (*profileImpl, error) { 219 | hOpts := &handleImplOpts{} 220 | for _, optFnc := range opts { 221 | optFnc(hOpts) 222 | } 223 | 224 | if hOpts.clientSet == nil { 225 | return nil, fmt.Errorf("clientSet missing") 226 | } 227 | 228 | if hOpts.sharedInformerFactory == nil { 229 | return nil, fmt.Errorf("sharedInformerFactory missing") 230 | } 231 | 232 | if hOpts.podEvictor == nil { 233 | return nil, fmt.Errorf("podEvictor missing") 234 | } 235 | pi := &profileImpl{ 236 | profileName: config.Name, 237 | podEvictor: hOpts.podEvictor, 238 | deschedulePlugins: []frameworktypes.DeschedulePlugin{}, 239 | balancePlugins: []frameworktypes.BalancePlugin{}, 240 | filterPlugins: []filterPlugin{}, 241 | preEvictionFilterPlugins: []preEvictionFilterPlugin{}, 242 | } 243 | pi.registryToExtensionPoints(reg) 244 | 245 | if !pi.deschedule.HasAll(config.Plugins.Deschedule.Enabled...) { 246 | return nil, fmt.Errorf("profile %q configures deschedule extension point of non-existing plugins: %v", config.Name, sets.NewString(config.Plugins.Deschedule.Enabled...).Difference(pi.deschedule)) 247 | } 248 | if !pi.balance.HasAll(config.Plugins.Balance.Enabled...) { 249 | return nil, fmt.Errorf("profile %q configures balance extension point of non-existing plugins: %v", config.Name, sets.NewString(config.Plugins.Balance.Enabled...).Difference(pi.balance)) 250 | } 251 | if !pi.filter.HasAll(config.Plugins.Filter.Enabled...) { 252 | return nil, fmt.Errorf("profile %q configures filter extension point of non-existing plugins: %v", config.Name, sets.NewString(config.Plugins.Filter.Enabled...).Difference(pi.filter)) 253 | } 254 | if !pi.preEvictionFilter.HasAll(config.Plugins.PreEvictionFilter.Enabled...) { 255 | return nil, fmt.Errorf("profile %q configures preEvictionFilter extension point of non-existing plugins: %v", config.Name, sets.NewString(config.Plugins.PreEvictionFilter.Enabled...).Difference(pi.preEvictionFilter)) 256 | } 257 | 258 | handle := &handleImpl{ 259 | clientSet: hOpts.clientSet, 260 | getPodsAssignedToNodeFunc: hOpts.getPodsAssignedToNodeFunc, 261 | sharedInformerFactory: hOpts.sharedInformerFactory, 262 | evictor: &evictorImpl{ 263 | podEvictor: hOpts.podEvictor, 264 | }, 265 | } 266 | 267 | pluginNames := append(config.Plugins.Deschedule.Enabled, config.Plugins.Balance.Enabled...) 268 | pluginNames = append(pluginNames, config.Plugins.Filter.Enabled...) 269 | pluginNames = append(pluginNames, config.Plugins.PreEvictionFilter.Enabled...) 270 | 271 | plugins := make(map[string]frameworktypes.Plugin) 272 | for _, plugin := range sets.NewString(pluginNames...).List() { 273 | pg, err := buildPlugin(config, plugin, handle, reg) 274 | if err != nil { 275 | return nil, fmt.Errorf("unable to build %v plugin: %v", plugin, err) 276 | } 277 | if pg == nil { 278 | return nil, fmt.Errorf("got empty %v plugin build", plugin) 279 | } 280 | plugins[plugin] = pg 281 | } 282 | 283 | // Later, when a default list of plugins and their extension points is established, 284 | // compute the list of enabled extension points as (DefaultEnabled + Enabled - Disabled) 285 | for _, pluginName := range config.Plugins.Deschedule.Enabled { 286 | pi.deschedulePlugins = append(pi.deschedulePlugins, plugins[pluginName].(frameworktypes.DeschedulePlugin)) 287 | } 288 | 289 | for _, pluginName := range config.Plugins.Balance.Enabled { 290 | pi.balancePlugins = append(pi.balancePlugins, plugins[pluginName].(frameworktypes.BalancePlugin)) 291 | } 292 | 293 | filters := []podutil.FilterFunc{} 294 | for _, pluginName := range config.Plugins.Filter.Enabled { 295 | pi.filterPlugins = append(pi.filterPlugins, plugins[pluginName].(filterPlugin)) 296 | filters = append(filters, plugins[pluginName].(filterPlugin).Filter) 297 | } 298 | 299 | preEvictionFilters := []podutil.FilterFunc{} 300 | for _, pluginName := range config.Plugins.PreEvictionFilter.Enabled { 301 | pi.preEvictionFilterPlugins = append(pi.preEvictionFilterPlugins, plugins[pluginName].(preEvictionFilterPlugin)) 302 | preEvictionFilters = append(preEvictionFilters, plugins[pluginName].(preEvictionFilterPlugin).PreEvictionFilter) 303 | } 304 | 305 | handle.evictor.filter = podutil.WrapFilterFuncs(filters...) 306 | handle.evictor.preEvictionFilter = podutil.WrapFilterFuncs(preEvictionFilters...) 307 | 308 | return pi, nil 309 | } 310 | 311 | func (d profileImpl) RunDeschedulePlugins(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status { 312 | errs := []error{} 313 | for _, pl := range d.deschedulePlugins { 314 | evicted := d.podEvictor.TotalEvicted() 315 | // TODO: strategyName should be accessible from within the strategy using a framework 316 | // handle or function which the Evictor has access to. For migration/in-progress framework 317 | // work, we are currently passing this via context. To be removed 318 | // (See discussion thread https://github.com/kubernetes-sigs/descheduler/pull/885#discussion_r919962292) 319 | strategyStart := time.Now() 320 | childCtx := context.WithValue(ctx, "strategyName", pl.Name()) 321 | status := pl.Deschedule(childCtx, nodes) 322 | metrics.DeschedulerStrategyDuration.With(map[string]string{"strategy": pl.Name(), "profile": d.profileName}).Observe(time.Since(strategyStart).Seconds()) 323 | 324 | if status != nil && status.Err != nil { 325 | errs = append(errs, fmt.Errorf("plugin %q finished with error: %v", pl.Name(), status.Err)) 326 | } 327 | klog.V(1).InfoS("Total number of pods evicted", "extension point", "Deschedule", "evictedPods", d.podEvictor.TotalEvicted()-evicted) 328 | } 329 | 330 | aggrErr := errors.NewAggregate(errs) 331 | if aggrErr == nil { 332 | return &frameworktypes.Status{} 333 | } 334 | 335 | return &frameworktypes.Status{ 336 | Err: fmt.Errorf("%v", aggrErr.Error()), 337 | } 338 | } 339 | 340 | func (d profileImpl) RunBalancePlugins(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status { 341 | errs := []error{} 342 | for _, pl := range d.balancePlugins { 343 | evicted := d.podEvictor.TotalEvicted() 344 | // TODO: strategyName should be accessible from within the strategy using a framework 345 | // handle or function which the Evictor has access to. For migration/in-progress framework 346 | // work, we are currently passing this via context. To be removed 347 | // (See discussion thread https://github.com/kubernetes-sigs/descheduler/pull/885#discussion_r919962292) 348 | strategyStart := time.Now() 349 | childCtx := context.WithValue(ctx, "strategyName", pl.Name()) 350 | status := pl.Balance(childCtx, nodes) 351 | metrics.DeschedulerStrategyDuration.With(map[string]string{"strategy": pl.Name(), "profile": d.profileName}).Observe(time.Since(strategyStart).Seconds()) 352 | 353 | if status != nil && status.Err != nil { 354 | errs = append(errs, fmt.Errorf("plugin %q finished with error: %v", pl.Name(), status.Err)) 355 | } 356 | klog.V(1).InfoS("Total number of pods evicted", "extension point", "Balance", "evictedPods", d.podEvictor.TotalEvicted()-evicted) 357 | } 358 | 359 | aggrErr := errors.NewAggregate(errs) 360 | if aggrErr == nil { 361 | return &frameworktypes.Status{} 362 | } 363 | 364 | return &frameworktypes.Status{ 365 | Err: fmt.Errorf("%v", aggrErr.Error()), 366 | } 367 | } 368 | --------------------------------------------------------------------------------