├── .github ├── copy-pr-bot.yaml ├── dependabot.yml └── workflows │ └── ci.yaml ├── .gitignore ├── .golangci.yaml ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── api └── upgrade │ └── v1alpha1 │ ├── upgrade_spec.go │ └── zz_generated.deepcopy.go ├── cmd └── apply-crds │ └── main.go ├── docker └── Dockerfile.devel ├── docs └── automatic-ofed-upgrade.md ├── go.mod ├── go.sum ├── hack ├── boilerplate.go.txt └── crd │ └── bases │ └── maintenance.nvidia.com_nodemaintenances.yaml ├── images └── driver-upgrade-state-diagram.png └── pkg ├── consts └── consts.go ├── crdutil ├── README.md ├── crdutil.go ├── crdutil_test.go ├── suite_test.go └── test-files │ ├── test-crds.yaml │ └── updated-test-crds.yaml └── upgrade ├── common_manager.go ├── consts.go ├── cordon_manager.go ├── cordon_manager_test.go ├── drain_manager.go ├── drain_manager_test.go ├── mocks ├── CordonManager.go ├── DrainManager.go ├── NodeUpgradeStateProvider.go ├── PodManager.go └── ValidationManager.go ├── node_upgrade_state_provider.go ├── node_upgrade_state_provider_test.go ├── pod_manager.go ├── pod_manager_test.go ├── safe_driver_load_manager.go ├── safe_driver_load_manager_test.go ├── upgrade_inplace.go ├── upgrade_requestor.go ├── upgrade_state.go ├── upgrade_state_test.go ├── upgrade_suit_test.go ├── util.go ├── validation_manager.go └── validation_manager_test.go /.github/copy-pr-bot.yaml: -------------------------------------------------------------------------------- 1 | # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/#configuration 2 | enabled: true 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Please see the documentation for all configuration options: 2 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 3 | 4 | version: 2 5 | updates: 6 | - package-ecosystem: "gomod" 7 | target-branch: main 8 | directory: "/" 9 | schedule: 10 | interval: "daily" 11 | labels: 12 | - dependencies 13 | groups: 14 | k8sio: 15 | patterns: 16 | - k8s.io/* 17 | exclude-patterns: 18 | - k8s.io/klog/* 19 | 20 | - package-ecosystem: "github-actions" 21 | directory: "/" 22 | schedule: 23 | interval: "daily" 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: "CI" 16 | on: 17 | push: 18 | branches: 19 | - "pull-request/[0-9]+" 20 | - main 21 | 22 | jobs: 23 | lint: 24 | runs-on: ubuntu-22.04 25 | steps: 26 | - name: Set up Go 27 | uses: actions/setup-go@v5 28 | with: 29 | go-version: 1.24.x 30 | - name: Check out code into the Go module directory 31 | uses: actions/checkout@v4 32 | - name: Lint 33 | run: make lint 34 | test: 35 | runs-on: ubuntu-22.04 36 | steps: 37 | - name: Set up Go 38 | uses: actions/setup-go@v5 39 | with: 40 | go-version: 1.24.x 41 | - name: Check out code into the Go module directory 42 | uses: actions/checkout@v4 43 | - name: Run tests 44 | run: make test 45 | go-check: 46 | runs-on: ubuntu-latest 47 | steps: 48 | - uses: actions/checkout@v4 49 | - name: Set up Go 50 | uses: actions/setup-go@v5 51 | with: 52 | go-version: 1.24.x 53 | - name: Run go checks 54 | run: make go-check 55 | coverage: 56 | runs-on: ubuntu-latest 57 | steps: 58 | - uses: actions/checkout@v4 59 | - name: Set up Go 60 | uses: actions/setup-go@v5 61 | with: 62 | go-version: 1.24.x 63 | - name: Generate coverage report 64 | run: make cov-report 65 | - name: Upload to Coveralls 66 | uses: coverallsapp/github-action@v2 67 | with: 68 | github-token: ${{ secrets.GITHUB_TOKEN }} 69 | path-to-lcov: lcov.info 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | *.test 4 | testbin/ 5 | cover.out 6 | .idea 7 | bin/ 8 | lcov.info 9 | 10 | vendor/ 11 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | run: 3 | modules-download-mode: readonly 4 | tests: false 5 | linters: 6 | default: none 7 | enable: 8 | - asasalint 9 | - asciicheck 10 | - bidichk 11 | - bodyclose 12 | - containedctx 13 | - contextcheck 14 | - decorder 15 | - depguard 16 | - dogsled 17 | - durationcheck 18 | - errcheck 19 | - errchkjson 20 | - errname 21 | - forbidigo 22 | - forcetypeassert 23 | - funlen 24 | - goconst 25 | - gocritic 26 | - gocyclo 27 | - goheader 28 | - gomodguard 29 | - goprintffuncname 30 | - gosec 31 | - govet 32 | - grouper 33 | - importas 34 | - ineffassign 35 | - interfacebloat 36 | - lll 37 | - loggercheck 38 | - maintidx 39 | - makezero 40 | - misspell 41 | - nakedret 42 | - nilerr 43 | - nilnil 44 | - noctx 45 | - nolintlint 46 | - nosprintfhostport 47 | - prealloc 48 | - predeclared 49 | - promlinter 50 | - reassign 51 | - revive 52 | - rowserrcheck 53 | - staticcheck 54 | - thelper 55 | - tparallel 56 | - unconvert 57 | - unparam 58 | - unused 59 | - usestdlibvars 60 | - wastedassign 61 | - whitespace 62 | settings: 63 | depguard: 64 | rules: 65 | main: 66 | list-mode: original 67 | allow: 68 | - $gostd 69 | - github.com/NVIDIA 70 | - github.com/go-logr/logr 71 | - k8s.io 72 | - sigs.k8s.io 73 | dupl: 74 | threshold: 100 75 | funlen: 76 | lines: 120 77 | statements: 58 78 | goconst: 79 | min-len: 2 80 | min-occurrences: 2 81 | gocritic: 82 | disabled-checks: 83 | - appendAssign 84 | gocyclo: 85 | min-complexity: 30 86 | ireturn: 87 | allow: 88 | - anon 89 | - error 90 | - empty 91 | - stdlib 92 | lll: 93 | line-length: 120 94 | misspell: 95 | locale: US 96 | staticcheck: 97 | dot-import-whitelist: 98 | - github.com/onsi/ginkgo 99 | - github.com/onsi/ginkgo/extensions/table 100 | - github.com/onsi/gomega 101 | - github.com/onsi/gomega/gstruct 102 | exclusions: 103 | generated: lax 104 | presets: 105 | - comments 106 | - common-false-positives 107 | - legacy 108 | - std-error-handling 109 | paths: 110 | - third_party$ 111 | - builtin$ 112 | - examples$ 113 | formatters: 114 | enable: 115 | - gofmt 116 | - goimports 117 | settings: 118 | goimports: 119 | local-prefixes: 120 | - github.com/NVIDIA/k8s-operator-libs 121 | exclusions: 122 | generated: lax 123 | paths: 124 | - third_party$ 125 | - builtin$ 126 | - examples$ 127 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribute to the NVIDIA `go-nvlib` Project 2 | 3 | Want to contribute to the NVIDIA `operator-libs` project? Awesome! 4 | We only require you to sign your work as described in the following section. 5 | 6 | ## Sign your work 7 | 8 | The sign-off is a simple signature at the end of the description for the patch. 9 | Your signature certifies that you wrote the patch or otherwise have the right 10 | to pass it on as an open-source patch. 11 | 12 | The rules are pretty simple, and sign-off means that you certify the DCO below 13 | (from [developercertificate.org](http://developercertificate.org/)): 14 | 15 | ``` 16 | Developer Certificate of Origin 17 | Version 1.1 18 | 19 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 20 | 1 Letterman Drive 21 | Suite D4700 22 | San Francisco, CA, 94129 23 | 24 | Everyone is permitted to copy and distribute verbatim copies of this 25 | license document, but changing it is not allowed. 26 | 27 | Developer's Certificate of Origin 1.1 28 | 29 | By making a contribution to this project, I certify that: 30 | 31 | (a) The contribution was created in whole or in part by me and I 32 | have the right to submit it under the open source license 33 | indicated in the file; or 34 | 35 | (b) The contribution is based upon previous work that, to the best 36 | of my knowledge, is covered under an appropriate open source 37 | license and I have the right under that license to submit that 38 | work with modifications, whether created in whole or in part 39 | by me, under the same open source license (unless I am 40 | permitted to submit under a different license), as indicated 41 | in the file; or 42 | 43 | (c) The contribution was provided directly to me by some other 44 | person who certified (a), (b) or (c) and I have not modified 45 | it. 46 | 47 | (d) I understand and agree that this project and the contribution 48 | are public and that a record of the contribution (including all 49 | personal information I submit with it, including my sign-off) is 50 | maintained indefinitely and may be redistributed consistent with 51 | this project or the open source license(s) involved. 52 | ``` 53 | 54 | To sign off, you just add the following line to every git commit message: 55 | 56 | Signed-off-by: Joe Smith 57 | 58 | You must use your real name (sorry, no pseudonyms or anonymous contributions). 59 | 60 | If you set your `user.name` and `user.email` using git config, you can sign 61 | your commit automatically with `git commit -s`. 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | MODULE := github.com/NVIDIA/k8s-operator-libs 16 | 17 | DOCKER ?= docker 18 | 19 | GOLANG_VERSION := 1.24 20 | 21 | ifeq ($(IMAGE),) 22 | REGISTRY ?= nvidia 23 | IMAGE=$(REGISTRY)/operator-libs 24 | endif 25 | IMAGE_TAG ?= $(GOLANG_VERSION) 26 | BUILDIMAGE ?= $(IMAGE):$(IMAGE_TAG)-devel 27 | 28 | # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. 29 | ENVTEST_K8S_VERSION = 1.32.x 30 | 31 | TARGETS := all check lint go-check generate test cov-report controller-gen golangci-lint gcov2lcov 32 | DOCKER_TARGETS := $(patsubst %, docker-%, $(TARGETS)) 33 | .PHONY: $(TARGETS) $(DOCKER_TARGETS) 34 | 35 | GOOS := linux 36 | 37 | # Tools 38 | TOOLSDIR=$(CURDIR)/bin 39 | 40 | GOLANGCILINT ?= $(TOOLSDIR)/golangci-lint 41 | CONTROLLER_GEN ?= $(TOOLSDIR)/controller-gen 42 | GCOV2LCOV ?= $(TOOLSDIR)/gcov2lcov 43 | SETUP_ENVTEST ?= $(TOOLSDIR)/setup-envtest 44 | GOLANGCILINT_VERSION ?= v2.1.6 45 | CONTROLLER_GEN_VERSION ?= v0.16.5 46 | GCOV2LCOV_VERSION ?= v1.1.1 47 | SETUP_ENVTEST_RELEASE ?= release-0.19 48 | 49 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 50 | ifeq (,$(shell go env GOBIN)) 51 | GOBIN=$(shell go env GOPATH)/bin 52 | else 53 | GOBIN=$(shell go env GOBIN) 54 | endif 55 | 56 | # Setting SHELL to bash allows bash commands to be executed by recipes. 57 | # This is a requirement for 'setup-envtest.sh' in the test target. 58 | # Options are set to exit when a recipe line exits non-zero or a piped command fails. 59 | SHELL = /usr/bin/env bash -o pipefail 60 | .SHELLFLAGS = -ec 61 | 62 | all: generate check test cov-report ## Generate code, run checks and tests 63 | 64 | check: lint go-check ## Run linters and go checks 65 | 66 | lint: golangci-lint ## Lint code 67 | $(GOLANGCILINT) run --timeout 10m 68 | 69 | go-check: ## Run go checks to ensure modules are synced 70 | go mod tidy && git diff --exit-code 71 | 72 | generate: controller-gen ## Generate code 73 | $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./api/..." 74 | go generate $(MODULE)/... 75 | 76 | test: setup-envtest generate; $(info running $(NAME:%=% )tests...) @ ## Run tests 77 | export KUBEBUILDER_ASSETS="$(shell $(SETUP_ENVTEST) use -p path $(ENVTEST_K8S_VERSION))" && \ 78 | go test ./... -coverprofile cover.out 79 | 80 | cov-report: gcov2lcov test ## Build test coverage report in lcov format 81 | $(GCOV2LCOV) -infile cover.out -outfile lcov.info 82 | 83 | controller-gen: ## Download controller-gen locally if necessary 84 | $(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_GEN_VERSION)) 85 | 86 | golangci-lint: ## Download golangci-lint locally if necessary. 87 | $(call go-install-tool,$(GOLANGCILINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$(GOLANGCILINT_VERSION)) 88 | 89 | gcov2lcov: ## Download gcov2lcov locally if necessary. 90 | $(call go-install-tool,$(GCOV2LCOV),github.com/jandelgado/gcov2lcov@$(GCOV2LCOV_VERSION)) 91 | 92 | setup-envtest: ## Download setup-envtest locally if necessary 93 | $(call go-install-tool,$(SETUP_ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest@$(SETUP_ENVTEST_RELEASE)) 94 | 95 | # Generate an image for containerized builds 96 | # Note: This image is local only 97 | .PHONY: .build-image .pull-build-image .push-build-image 98 | .build-image: docker/Dockerfile.devel 99 | if [ "$(SKIP_IMAGE_BUILD)" = "" ]; then \ 100 | $(DOCKER) build \ 101 | --progress=plain \ 102 | --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ 103 | --tag $(BUILDIMAGE) \ 104 | -f $(^) \ 105 | docker; \ 106 | fi 107 | 108 | .pull-build-image: 109 | $(DOCKER) pull $(BUILDIMAGE) 110 | 111 | .push-build-image: 112 | $(DOCKER) push $(BUILDIMAGE) 113 | 114 | $(DOCKER_TARGETS): docker-%: .build-image ## Run command in docker 115 | @echo "Running 'make $(*)' in docker container $(BUILDIMAGE)" 116 | $(DOCKER) run \ 117 | --rm \ 118 | -e GOCACHE=/tmp/.cache \ 119 | -e GOLANGCI_LINT_CACHE=/tmp/.cache \ 120 | -e PROJECT_DIR=$(PWD) \ 121 | -v $(PWD):$(PWD) \ 122 | -w $(PWD) \ 123 | --user $$(id -u):$$(id -g) \ 124 | $(BUILDIMAGE) \ 125 | make $(*) 126 | 127 | # go-install-tool will 'go install' any package $2 and install it to $1. 128 | define go-install-tool 129 | @[ -f $(1) ] || { \ 130 | echo "Downloading $(2)" ;\ 131 | GOBIN=$(TOOLSDIR) go install $(2) ;\ 132 | } 133 | endef 134 | 135 | .PHONY: help 136 | help: ## Show this message 137 | @grep -E '^[ a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ 138 | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' 139 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The `operator-libs` project 2 | 3 | This respository holds a collection of go packages to ease the development of 4 | NVIDIA Operators for GPU/NIC management. 5 | -------------------------------------------------------------------------------- /api/upgrade/v1alpha1/upgrade_spec.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/runtime/schema" 21 | "k8s.io/apimachinery/pkg/util/intstr" 22 | ) 23 | 24 | // DriverUpgradePolicySpec describes policy configuration for automatic upgrades 25 | // +kubebuilder:object:root=true 26 | // +kubebuilder:object:generate=true 27 | type DriverUpgradePolicySpec struct { 28 | // AutoUpgrade is a global switch for automatic upgrade feature 29 | // if set to false all other options are ignored 30 | // +optional 31 | // +kubebuilder:default:=false 32 | AutoUpgrade bool `json:"autoUpgrade,omitempty"` 33 | // MaxParallelUpgrades indicates how many nodes can be upgraded in parallel 34 | // 0 means no limit, all nodes will be upgraded in parallel 35 | // +optional 36 | // +kubebuilder:default:=1 37 | // +kubebuilder:validation:Minimum:=0 38 | MaxParallelUpgrades int `json:"maxParallelUpgrades,omitempty"` 39 | // MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade. 40 | // Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%). 41 | // Absolute number is calculated from percentage by rounding up. 42 | // By default, a fixed value of 25% is used. 43 | // +optional 44 | // +kubebuilder:default:="25%" 45 | MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` 46 | PodDeletion *PodDeletionSpec `json:"podDeletion,omitempty"` 47 | WaitForCompletion *WaitForCompletionSpec `json:"waitForCompletion,omitempty"` 48 | DrainSpec *DrainSpec `json:"drain,omitempty"` 49 | } 50 | 51 | // WaitForCompletionSpec describes the configuration for waiting on job completions 52 | type WaitForCompletionSpec struct { 53 | // PodSelector specifies a label selector for the pods to wait for completion 54 | // For more details on label selectors, see: 55 | // https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors 56 | // +optional 57 | PodSelector string `json:"podSelector,omitempty"` 58 | // TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means 59 | // infinite 60 | // +optional 61 | // +kubebuilder:default:=0 62 | // +kubebuilder:validation:Minimum:=0 63 | TimeoutSecond int `json:"timeoutSeconds,omitempty"` 64 | } 65 | 66 | // PodDeletionSpec describes configuration for deletion of pods using special resources during automatic upgrade 67 | type PodDeletionSpec struct { 68 | // Force indicates if force deletion is allowed 69 | // +optional 70 | // +kubebuilder:default:=false 71 | Force bool `json:"force,omitempty"` 72 | // TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means 73 | // infinite 74 | // +optional 75 | // +kubebuilder:default:=300 76 | // +kubebuilder:validation:Minimum:=0 77 | TimeoutSecond int `json:"timeoutSeconds,omitempty"` 78 | // DeleteEmptyDir indicates if should continue even if there are pods using emptyDir 79 | // (local data that will be deleted when the pod is deleted) 80 | // +optional 81 | // +kubebuilder:default:=false 82 | DeleteEmptyDir bool `json:"deleteEmptyDir,omitempty"` 83 | } 84 | 85 | // DrainSpec describes configuration for node drain during automatic upgrade 86 | type DrainSpec struct { 87 | // Enable indicates if node draining is allowed during upgrade 88 | // +optional 89 | // +kubebuilder:default:=false 90 | Enable bool `json:"enable,omitempty"` 91 | // Force indicates if force draining is allowed 92 | // +optional 93 | // +kubebuilder:default:=false 94 | Force bool `json:"force,omitempty"` 95 | // PodSelector specifies a label selector to filter pods on the node that need to be drained 96 | // For more details on label selectors, see: 97 | // https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors 98 | // +optional 99 | PodSelector string `json:"podSelector,omitempty"` 100 | // TimeoutSecond specifies the length of time in seconds to wait before giving up drain, zero means infinite 101 | // +optional 102 | // +kubebuilder:default:=300 103 | // +kubebuilder:validation:Minimum:=0 104 | TimeoutSecond int `json:"timeoutSeconds,omitempty"` 105 | // DeleteEmptyDir indicates if should continue even if there are pods using emptyDir 106 | // (local data that will be deleted when the node is drained) 107 | // +optional 108 | // +kubebuilder:default:=false 109 | DeleteEmptyDir bool `json:"deleteEmptyDir,omitempty"` 110 | } 111 | 112 | // GetObjectKind return ObjectKind 113 | func (obj *DriverUpgradePolicySpec) GetObjectKind() schema.ObjectKind { return nil } 114 | -------------------------------------------------------------------------------- /api/upgrade/v1alpha1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | 3 | /* 4 | Copyright 2022 NVIDIA 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | // Code generated by controller-gen. DO NOT EDIT. 20 | 21 | package v1alpha1 22 | 23 | import ( 24 | runtime "k8s.io/apimachinery/pkg/runtime" 25 | "k8s.io/apimachinery/pkg/util/intstr" 26 | ) 27 | 28 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 29 | func (in *DriverUpgradePolicySpec) DeepCopyInto(out *DriverUpgradePolicySpec) { 30 | *out = *in 31 | if in.MaxUnavailable != nil { 32 | in, out := &in.MaxUnavailable, &out.MaxUnavailable 33 | *out = new(intstr.IntOrString) 34 | **out = **in 35 | } 36 | if in.PodDeletion != nil { 37 | in, out := &in.PodDeletion, &out.PodDeletion 38 | *out = new(PodDeletionSpec) 39 | **out = **in 40 | } 41 | if in.WaitForCompletion != nil { 42 | in, out := &in.WaitForCompletion, &out.WaitForCompletion 43 | *out = new(WaitForCompletionSpec) 44 | **out = **in 45 | } 46 | if in.DrainSpec != nil { 47 | in, out := &in.DrainSpec, &out.DrainSpec 48 | *out = new(DrainSpec) 49 | **out = **in 50 | } 51 | } 52 | 53 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverUpgradePolicySpec. 54 | func (in *DriverUpgradePolicySpec) DeepCopy() *DriverUpgradePolicySpec { 55 | if in == nil { 56 | return nil 57 | } 58 | out := new(DriverUpgradePolicySpec) 59 | in.DeepCopyInto(out) 60 | return out 61 | } 62 | 63 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 64 | func (in *DriverUpgradePolicySpec) DeepCopyObject() runtime.Object { 65 | if c := in.DeepCopy(); c != nil { 66 | return c 67 | } 68 | return nil 69 | } 70 | -------------------------------------------------------------------------------- /cmd/apply-crds/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import "github.com/NVIDIA/k8s-operator-libs/pkg/crdutil" 20 | 21 | func main() { 22 | crdutil.EnsureCRDsCmd() 23 | } 24 | -------------------------------------------------------------------------------- /docker/Dockerfile.devel: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ARG GOLANG_VERSION=1.24 15 | FROM golang:${GOLANG_VERSION} 16 | -------------------------------------------------------------------------------- /docs/automatic-ofed-upgrade.md: -------------------------------------------------------------------------------- 1 | # Automatic Driver Upgrade 2 | When a containerized driver is reloaded on the node, all Pods which use a resource(GPU, secondary NIC etc) enabled by the driver will lose access to those in their containers. All PODs which use those resources need to be removed from the node before the driver Pod is reloaded on that node. 3 | 4 | It is possible to do a driver upgrade manually by using an `OnDelete` UpdateStrategy. But this procedure requires a lot of manual actions and can be error prone. 5 | 6 | This document describes the automatic upgrade flow for the containerized driver. 7 | 8 | ### Upgrade NVIDIA drivers automatically 9 | * Following spec defines the UpgradePolicy for the Driver in the CustomResource 10 | 11 | ``` 12 | apiVersion: nvidia.com/v1 13 | kind: CustomResource 14 | metadata: 15 | name: example-custom-resource 16 | namespace: nvidia-operator 17 | spec: 18 | driver: 19 | upgradePolicy: 20 | # autoUpgrade is a global switch for automatic upgrade feature 21 | # if set to false all other options are ignored 22 | autoUpgrade: true 23 | # maxParallelUpgrades indicates how many nodes can be upgraded in parallel 24 | # 0 means no limit, all nodes will be upgraded in parallel 25 | maxParallelUpgrades: 0 26 | # describes configuration for node drain during automatic upgrade 27 | drain: 28 | # allow node draining during upgrade 29 | enable: true 30 | # allow force draining 31 | force: false 32 | # specify a label selector to filter pods on the node that need to be drained 33 | podSelector: "" 34 | # specify the length of time in seconds to wait before giving up drain, zero means infinite 35 | # if not specified, the default is 300 seconds 36 | timeoutSeconds: 300 37 | # specify if should continue even if there are pods using emptyDir 38 | deleteEmptyDir: false 39 | ``` 40 | 41 | * To track each node's upgrade status separately, run `kubectl describe node | grep nvidia.com/-driver-upgrade-state`. See [Node upgrade states](#node-upgrade-states) section describing each state. 42 | 43 | ### Safe driver loading 44 | 45 | On Node startup, the containerized driver takes time to compile and load. 46 | During that time, workloads might get scheduled on that Node. 47 | When the driver is eventually loaded, all existing PODs using resources managed by the driver will lose access to them. 48 | Some such PODs might silently fail or hang. 49 | To avoid such a situation, before the containerized driver is loaded, 50 | the Node should get Cordoned and Drained to ensure all workloads are rescheduled. 51 | The Node should be un-cordoned when the driver is ready on it. 52 | 53 | The safe driver loading feature is implemented as a part of the upgrade flow, 54 | meaning safe driver loading is a special scenario of the upgrade procedure, 55 | where we upgrade from the inbox driver (driver which is installed on the host) to the containerized driver. 56 | 57 | The default safe load implementation in the library assumes two-step driver loading procedure. 58 | As a first step, the driver pod should load the [init container](https://github.com/Mellanox/network-operator-init-container), 59 | which will set "safe driver load annotation" (`nvidia.com/-driver-upgrade.driver-wait-for-safe-load`) 60 | on the node object, then the container blocks until the upgrade library removes the annotation from the node object. 61 | When the init container completes successfully (when the annotation was removed from the Node object), 62 | the driver Pod will proceed to the second step and do the driver loading. 63 | After that, the upgrade library will wait for the driver to become ready and then Uncordon the node if required. 64 | 65 | There is no need to enable the safe driver load feature in the upgrade library explicitly. 66 | The feature will automatically kick in if "safe driver load annotation" is present on the Node object. 67 | 68 | ### Details 69 | #### Node upgrade states 70 | Each node's upgrade status is reflected in its `nvidia.com/-driver-upgrade-state` label. This label can have the following values: 71 | * Unknown (empty) node has this state when the upgrade flow is disabled or the node hasn't been processed yet 72 | * `upgrade-required` is set when the driver pod on the node is not up-to-date and required upgrade or if the driver is waiting for safe load 73 | * `cordon-required` is set when the node needs to be made unschedulable in preparation for driver upgrade 74 | * `wait-for-jobs-required` is set on the node when we need to wait on jobs to complete until given timeout 75 | * `drain-required` is set when the node is required to be scheduled for drain 76 | * `pod-restart-required` is set when the driver pod on the node is scheduled for restart 77 | or when unblock of the driver loading is required (safe driver load) 78 | * `validation-required` is set when validation of the new driver deployed on the node is required before moving to `uncordon-required` 79 | * `uncordon-required` is set when driver pod on the node is up-to-date and has "Ready" status 80 | * `upgrade-done` is set when driver pod is up to date and running on the node, the node is schedulable 81 | * `node-maintenance-required` is set for requestor mode upgrade (e.g.`MAINTENANCE_OPERATOR_ENABLED=true`) post `upgrade-required` state. Essentially it will create a matching nodeMaintenance object for maintenance operator to perform its node operations. 82 | * `post-maintenance-required` is set when node maintenance status condition is `ready`, meaning maintenance operator has completed 83 | cordoning and draining related node(s), and now requestor (client) needs to perform post operations (e.g restart driver's pod, restart node, etc.) 84 | * `upgrade-failed` is set when there are any failures during the driver upgrade, see [Troubleshooting](#node-is-in-drain-failed-state) section for more details. 85 | 86 | #### State change diagram 87 | 88 | _NOTE: the diagram is outdated_ 89 | 90 | ![State change diagram](images/driver-upgrade-state-diagram.png) 91 | 92 | #### Upgrade modes 93 | ##### in-place 94 | in-place (legacy) mode is incorporating full driver upgrade lifecycle, including nodes operations e.g. cordon, pod eviction, drain, uncordon. 95 | It also maintains an internal scheduler for performing above node operations, according 96 | to provided `maxParallelUpgrades` under `UpgradePolicy`. 97 | 98 | ##### requestor 99 | The new `requestor` upgrade mode uses the [NVIDIA maintenance operator](https://github.com/Mellanox/maintenance-operator) nodeMaintenance k8s API objects, to initiate the DOCA driver upgrade process. 100 | Essentially, it will retire current upgrade controller (in-place mode) from performing the following node operations: cordon, wait for pods completion, drain, uncordon. 101 | To enable requestor mode, the following environment variable should be enabled `MAINTENANCE_OPERATOR_ENABLED=true`. 102 | requestor also exposes controller-runtime predicate functions under `pkg/upgrade/requestor/predicate.go`, embed in your 103 | controller manager watchers: 104 | ``` 105 | ctrl.NewControllerManagedBy(mgr).For(*). 106 | ... 107 | Watches(&maintenancev1alpha1.NodeMaintenance{}, createUpdateDeleteEnqueue, 108 | builder.WithPredicates(requestor.NewConditionChangedPredicate(setupLog, 109 | requestorOpts.MaintenanceOPRequestorID))). 110 | ``` 111 | * Make sure that NVIDIA maintenance-operator pod is running. 112 | 113 | > __Note__: Initially `k8s-operator-libs` will support both `requestor`, `inplace` (legacy) modes simultaneously. 114 | > Meaning in case node undergoes upgrade prior to enabling `requestor` mode, node will continue `inplace` upgrade mode. Only after `requestor` mode is set, and upgrade 115 | > controller has set nodes state to be upgrade-required, only then new requestor mode will take place. 116 | 117 | ### Troubleshooting 118 | #### Node is in `upgrade-failed` state 119 | * Drain the node manually by running `kubectl drain --ignore-daemonsets` 120 | * Delete the driver pod on the node manually by running the following command: 121 | 122 | ``` 123 | kubectl delete pod -n `kubectl get -A pods --field-selector spec.nodeName= -l --no-headers | awk '{print $1" "$2}'` 124 | ``` 125 | 126 | * Wait for the node to finish upgrading 127 | #### Updated driver pod failed to start / New version of driver can't install on the node 128 | * Manually delete the pod using by using `kubectl delete -n ` 129 | * If after the restart the pod still fails, change the driver version in the CustomResource to the previous or other working version -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/NVIDIA/k8s-operator-libs 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.23.4 6 | 7 | require ( 8 | github.com/Mellanox/maintenance-operator/api v0.1.1 9 | github.com/go-logr/logr v1.4.2 10 | github.com/onsi/ginkgo/v2 v2.23.0 11 | github.com/onsi/gomega v1.36.2 12 | github.com/stretchr/testify v1.10.0 13 | k8s.io/api v0.32.3 14 | k8s.io/apiextensions-apiserver v0.32.3 15 | k8s.io/apimachinery v0.32.3 16 | k8s.io/client-go v0.32.3 17 | k8s.io/kubectl v0.32.3 18 | sigs.k8s.io/controller-runtime v0.20.4 19 | ) 20 | 21 | require ( 22 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect 23 | github.com/MakeNowJust/heredoc v1.0.0 // indirect 24 | github.com/beorn7/perks v1.0.1 // indirect 25 | github.com/blang/semver/v4 v4.0.0 // indirect 26 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 27 | github.com/chai2010/gettext-go v1.0.2 // indirect 28 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 29 | github.com/emicklei/go-restful/v3 v3.11.0 // indirect 30 | github.com/evanphx/json-patch v5.6.0+incompatible // indirect 31 | github.com/evanphx/json-patch/v5 v5.9.11 // indirect 32 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect 33 | github.com/fsnotify/fsnotify v1.7.0 // indirect 34 | github.com/fxamacker/cbor/v2 v2.7.0 // indirect 35 | github.com/go-errors/errors v1.4.2 // indirect 36 | github.com/go-logr/zapr v1.3.0 // indirect 37 | github.com/go-openapi/jsonpointer v0.21.0 // indirect 38 | github.com/go-openapi/jsonreference v0.20.2 // indirect 39 | github.com/go-openapi/swag v0.23.0 // indirect 40 | github.com/go-task/slim-sprig/v3 v3.0.0 // indirect 41 | github.com/gogo/protobuf v1.3.2 // indirect 42 | github.com/golang/protobuf v1.5.4 // indirect 43 | github.com/google/btree v1.1.3 // indirect 44 | github.com/google/gnostic-models v0.6.8 // indirect 45 | github.com/google/go-cmp v0.6.0 // indirect 46 | github.com/google/gofuzz v1.2.0 // indirect 47 | github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect 48 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect 49 | github.com/google/uuid v1.6.0 // indirect 50 | github.com/gorilla/websocket v1.5.0 // indirect 51 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect 52 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 53 | github.com/josharian/intern v1.0.0 // indirect 54 | github.com/json-iterator/go v1.1.12 // indirect 55 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect 56 | github.com/mailru/easyjson v0.7.7 // indirect 57 | github.com/mitchellh/go-wordwrap v1.0.1 // indirect 58 | github.com/moby/spdystream v0.5.0 // indirect 59 | github.com/moby/term v0.5.0 // indirect 60 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 61 | github.com/modern-go/reflect2 v1.0.2 // indirect 62 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect 63 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 64 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect 65 | github.com/peterbourgon/diskv v2.0.1+incompatible // indirect 66 | github.com/pkg/errors v0.9.1 // indirect 67 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 68 | github.com/prometheus/client_golang v1.19.1 // indirect 69 | github.com/prometheus/client_model v0.6.1 // indirect 70 | github.com/prometheus/common v0.55.0 // indirect 71 | github.com/prometheus/procfs v0.15.1 // indirect 72 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 73 | github.com/spf13/cobra v1.8.1 // indirect 74 | github.com/spf13/pflag v1.0.5 // indirect 75 | github.com/stretchr/objx v0.5.2 // indirect 76 | github.com/x448/float16 v0.8.4 // indirect 77 | github.com/xlab/treeprint v1.2.0 // indirect 78 | go.uber.org/multierr v1.11.0 // indirect 79 | go.uber.org/zap v1.27.0 // indirect 80 | golang.org/x/net v0.35.0 // indirect 81 | golang.org/x/oauth2 v0.23.0 // indirect 82 | golang.org/x/sync v0.11.0 // indirect 83 | golang.org/x/sys v0.30.0 // indirect 84 | golang.org/x/term v0.29.0 // indirect 85 | golang.org/x/text v0.22.0 // indirect 86 | golang.org/x/time v0.7.0 // indirect 87 | golang.org/x/tools v0.30.0 // indirect 88 | gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect 89 | google.golang.org/protobuf v1.36.1 // indirect 90 | gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect 91 | gopkg.in/inf.v0 v0.9.1 // indirect 92 | gopkg.in/yaml.v3 v3.0.1 // indirect 93 | k8s.io/cli-runtime v0.32.3 // indirect 94 | k8s.io/component-base v0.32.3 // indirect 95 | k8s.io/klog/v2 v2.130.1 // indirect 96 | k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect 97 | k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect 98 | sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect 99 | sigs.k8s.io/kustomize/api v0.18.0 // indirect 100 | sigs.k8s.io/kustomize/kyaml v0.18.1 // indirect 101 | sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect 102 | sigs.k8s.io/yaml v1.4.0 // indirect 103 | ) 104 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | -------------------------------------------------------------------------------- /hack/crd/bases/maintenance.nvidia.com_nodemaintenances.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | controller-gen.kubebuilder.io/version: v0.15.0 7 | name: nodemaintenances.maintenance.nvidia.com 8 | spec: 9 | group: maintenance.nvidia.com 10 | names: 11 | kind: NodeMaintenance 12 | listKind: NodeMaintenanceList 13 | plural: nodemaintenances 14 | singular: nodemaintenance 15 | scope: Namespaced 16 | versions: 17 | - additionalPrinterColumns: 18 | - jsonPath: .spec.nodeName 19 | name: Node 20 | type: string 21 | - jsonPath: .spec.requestorID 22 | name: Requestor 23 | type: string 24 | - jsonPath: .status.conditions[?(@.type=='Ready')].status 25 | name: Ready 26 | type: string 27 | - jsonPath: .status.conditions[?(@.type=='Ready')].reason 28 | name: Phase 29 | type: string 30 | - jsonPath: .status.conditions[?(@.type=='Failed')].reason 31 | name: Failed 32 | type: string 33 | name: v1alpha1 34 | schema: 35 | openAPIV3Schema: 36 | description: NodeMaintenance is the Schema for the nodemaintenances API 37 | properties: 38 | apiVersion: 39 | description: |- 40 | APIVersion defines the versioned schema of this representation of an object. 41 | Servers should convert recognized schemas to the latest internal value, and 42 | may reject unrecognized values. 43 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources 44 | type: string 45 | kind: 46 | description: |- 47 | Kind is a string value representing the REST resource this object represents. 48 | Servers may infer this from the endpoint the client submits requests to. 49 | Cannot be updated. 50 | In CamelCase. 51 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds 52 | type: string 53 | metadata: 54 | type: object 55 | spec: 56 | description: NodeMaintenanceSpec defines the desired state of NodeMaintenance 57 | properties: 58 | additionalRequestors: 59 | description: |- 60 | AdditionalRequestors is a set of additional requestor IDs which are using the same NodeMaintenance 61 | request. addition or removal of requiestor IDs to this list MUST be made with update operation (and retry on failure) 62 | which will replace the entire list. 63 | items: 64 | type: string 65 | type: array 66 | x-kubernetes-list-type: set 67 | cordon: 68 | default: true 69 | description: Cordon if set, marks node as unschedulable during maintenance 70 | operation 71 | type: boolean 72 | drainSpec: 73 | description: DrainSpec specifies how a node will be drained. if not 74 | provided, no draining will be performed. 75 | properties: 76 | deleteEmptyDir: 77 | default: false 78 | description: |- 79 | DeleteEmptyDir indicates if should continue even if there are pods using emptyDir 80 | (local data that will be deleted when the node is drained) 81 | type: boolean 82 | force: 83 | default: false 84 | description: Force draining even if there are pods that do not 85 | declare a controller 86 | type: boolean 87 | podEvictionFilters: 88 | description: |- 89 | PodEvictionFilters specifies filters for pods that need to undergo eviction during drain. 90 | if specified. only pods that match PodEvictionFilters will be evicted during drain operation. 91 | if unspecified. all non-daemonset pods will be evicted. 92 | logical OR is performed between filter entires. logical AND is performed within different filters 93 | in a filter entry. 94 | items: 95 | description: PodEvictionFiterEntry defines filters for Pod evictions 96 | during drain operation 97 | properties: 98 | byResourceNameRegex: 99 | description: ByResourceNameRegex filters pods by the name 100 | of the resources they consume using regex. 101 | type: string 102 | type: object 103 | type: array 104 | podSelector: 105 | description: |- 106 | PodSelector specifies a label selector to filter pods on the node that need to be drained 107 | For more details on label selectors, see: 108 | https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors 109 | type: string 110 | timeoutSeconds: 111 | default: 300 112 | description: TimeoutSecond specifies the length of time in seconds 113 | to wait before giving up drain, zero means infinite 114 | format: int32 115 | minimum: 0 116 | type: integer 117 | type: object 118 | nodeName: 119 | description: |- 120 | NodeName is The name of the node that maintenance operation will be performed on 121 | creation fails if node obj does not exist (webhook) 122 | type: string 123 | x-kubernetes-validations: 124 | - message: Value is immutable 125 | rule: self == oldSelf 126 | requestorID: 127 | description: |- 128 | RequestorID MUST follow domain name notation format (https://tools.ietf.org/html/rfc1035#section-2.3.1) 129 | It MUST be 63 characters or less, beginning and ending with an alphanumeric 130 | character ([a-z0-9A-Z]) with dashes (-), dots (.), and alphanumerics between. 131 | caller SHOULD NOT create multiple objects with same requestorID and nodeName. 132 | This field identifies the requestor of the operation. 133 | maxLength: 63 134 | minLength: 2 135 | pattern: ^([a-z0-9A-Z]([-a-z0-9A-Z]*[a-z0-9A-Z])?(\.[a-z0-9A-Z]([-a-z0-9A-Z]*[a-z0-9A-Z])?)*)$ 136 | type: string 137 | x-kubernetes-validations: 138 | - message: Value is immutable 139 | rule: self == oldSelf 140 | waitForPodCompletion: 141 | description: |- 142 | WaitForPodCompletion specifies pods via selector to wait for completion before performing drain operation 143 | if not provided, will not wait for pods to complete 144 | properties: 145 | podSelector: 146 | description: |- 147 | PodSelector specifies a label selector for the pods to wait for completion 148 | For more details on label selectors, see: 149 | https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors 150 | example: app=my-workloads 151 | type: string 152 | timeoutSeconds: 153 | default: 0 154 | description: |- 155 | TimeoutSecond specifies the length of time in seconds 156 | to wait before giving up on pod termination, zero means infinite 157 | format: int32 158 | minimum: 0 159 | type: integer 160 | type: object 161 | required: 162 | - nodeName 163 | - requestorID 164 | type: object 165 | status: 166 | description: NodeMaintenanceStatus defines the observed state of NodeMaintenance 167 | properties: 168 | conditions: 169 | description: Conditions represents observations of NodeMaintenance 170 | current state 171 | items: 172 | description: "Condition contains details for one aspect of the current 173 | state of this API Resource.\n---\nThis struct is intended for 174 | direct use as an array at the field path .status.conditions. For 175 | example,\n\n\n\ttype FooStatus struct{\n\t // Represents the 176 | observations of a foo's current state.\n\t // Known .status.conditions.type 177 | are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // 178 | +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t 179 | \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" 180 | patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t 181 | \ // other fields\n\t}" 182 | properties: 183 | lastTransitionTime: 184 | description: |- 185 | lastTransitionTime is the last time the condition transitioned from one status to another. 186 | This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. 187 | format: date-time 188 | type: string 189 | message: 190 | description: |- 191 | message is a human readable message indicating details about the transition. 192 | This may be an empty string. 193 | maxLength: 32768 194 | type: string 195 | observedGeneration: 196 | description: |- 197 | observedGeneration represents the .metadata.generation that the condition was set based upon. 198 | For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date 199 | with respect to the current state of the instance. 200 | format: int64 201 | minimum: 0 202 | type: integer 203 | reason: 204 | description: |- 205 | reason contains a programmatic identifier indicating the reason for the condition's last transition. 206 | Producers of specific condition types may define expected values and meanings for this field, 207 | and whether the values are considered a guaranteed API. 208 | The value should be a CamelCase string. 209 | This field may not be empty. 210 | maxLength: 1024 211 | minLength: 1 212 | pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ 213 | type: string 214 | status: 215 | description: status of the condition, one of True, False, Unknown. 216 | enum: 217 | - "True" 218 | - "False" 219 | - Unknown 220 | type: string 221 | type: 222 | description: |- 223 | type of condition in CamelCase or in foo.example.com/CamelCase. 224 | --- 225 | Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be 226 | useful (see .node.status.conditions), the ability to deconflict is important. 227 | The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) 228 | maxLength: 316 229 | pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ 230 | type: string 231 | required: 232 | - lastTransitionTime 233 | - message 234 | - reason 235 | - status 236 | - type 237 | type: object 238 | type: array 239 | x-kubernetes-list-map-keys: 240 | - type 241 | x-kubernetes-list-type: map 242 | drain: 243 | description: Drain represents the drain status of the node 244 | properties: 245 | drainProgress: 246 | description: DrainProgress represents the draining progress as 247 | percentage 248 | format: int32 249 | minimum: 0 250 | type: integer 251 | evictionPods: 252 | description: EvictionPods is the total number of pods that need 253 | to be evicted at the time NodeMaintenance started draining 254 | format: int32 255 | minimum: 0 256 | type: integer 257 | totalPods: 258 | description: TotalPods is the number of pods on the node at the 259 | time NodeMaintenance started draining 260 | format: int32 261 | minimum: 0 262 | type: integer 263 | waitForEviction: 264 | description: WaitForEviction is the list of namespaced named pods 265 | that need to be evicted 266 | items: 267 | type: string 268 | type: array 269 | required: 270 | - drainProgress 271 | - evictionPods 272 | - totalPods 273 | type: object 274 | waitForCompletion: 275 | description: WaitForCompletion is the list of namespaced named pods 276 | that we wait to complete 277 | items: 278 | type: string 279 | type: array 280 | type: object 281 | type: object 282 | served: true 283 | storage: true 284 | subresources: 285 | status: {} 286 | -------------------------------------------------------------------------------- /images/driver-upgrade-state-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/k8s-operator-libs/0f2e999d5b1ec6419fbf40005d0c9de536103191/images/driver-upgrade-state-diagram.png -------------------------------------------------------------------------------- /pkg/consts/consts.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 NVIDIA 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package consts 18 | 19 | /* 20 | This package contains constants used throughout the projects and does not fall into a particular package 21 | */ 22 | 23 | // Note: if a different logger is used than zap (operator-sdk default), these values would probably need to change. 24 | const ( 25 | LogLevelError = iota - 2 26 | LogLevelWarning 27 | LogLevelInfo 28 | LogLevelDebug 29 | ) 30 | -------------------------------------------------------------------------------- /pkg/crdutil/README.md: -------------------------------------------------------------------------------- 1 | # CRD Apply Tool 2 | 3 | This tool is designed to help deploy and manage Custom Resource Definitions (CRDs) in a Kubernetes cluster. 4 | It applies all CRDs found in specified directories, providing a solution to some of the limitations of Helm when it comes to managing CRDs. 5 | 6 | ## Motivation 7 | 8 | While Helm is commonly used for managing Kubernetes resources, it has certain restrictions with CRDs: 9 | 10 | - CRDs placed in Helm's top-level `crds/` directory are not updated on upgrades or rollbacks. 11 | - Placing CRDs in Helm’s `templates/` directory is not entirely safe, as deletions and upgrades of CRDs are not always handled properly. 12 | 13 | This tool offers a more reliable way to apply CRDs, ensuring they are created or updated as needed. 14 | 15 | ## Features 16 | 17 | - **Apply CRDs from multiple directories**: Allows specifying multiple directories containing CRD YAML manifests. 18 | - **Recursive directory search**: Walks through each specified directory to find and apply all YAML files. 19 | - **Safe update mechanism**: Checks if a CRD already exists; if so, it updates it with the latest version. 20 | - **Handles multiple YAML documents**: Supports files containing multiple CRD documents separated by YAML document delimiters. 21 | 22 | ## Usage 23 | 24 | Compile and run the tool by providing the `-crds-dir` flag with paths to the directories containing the CRD YAML files: 25 | 26 | ```bash 27 | go build -o crd-apply-tool 28 | ./crd-apply-tool -crds-dir /path/to/crds1 -crds-dir /path/to/crds2 29 | ``` 30 | 31 | In a Helm pre-install hook it can look like: 32 | 33 | ```yaml 34 | apiVersion: batch/v1 35 | kind: Job 36 | metadata: 37 | name: upgrade-crd 38 | annotations: 39 | "helm.sh/hook": pre-install,pre-upgrade 40 | "helm.sh/hook-weight": "1" 41 | "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation 42 | spec: 43 | template: 44 | metadata: 45 | name: upgrade-crd 46 | spec: 47 | containers: 48 | - name: upgrade-crd 49 | image: path-to-your/crd-apply-image 50 | imagePullPolicy: IfNotPresent 51 | command: 52 | - /apply-crds 53 | args: 54 | - --crds-dir=/crds/operator 55 | ``` 56 | 57 | > Note: the image must contain all your CRDs in e.g. the `/crds/operator` directory. 58 | 59 | ## Flags 60 | 61 | - `-crds-dir` (required): Specifies a directory path that contains the CRD manifests in YAML format. This flag can be provided multiple times to apply CRDs from multiple directories. 62 | -------------------------------------------------------------------------------- /pkg/crdutil/crdutil.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package crdutil 18 | 19 | import ( 20 | "context" 21 | "flag" 22 | "fmt" 23 | "io" 24 | "log" 25 | "os" 26 | "path/filepath" 27 | "strings" 28 | 29 | apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" 30 | "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 31 | v1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" 32 | apierrors "k8s.io/apimachinery/pkg/api/errors" 33 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 34 | "k8s.io/apimachinery/pkg/util/wait" 35 | "k8s.io/apimachinery/pkg/util/yaml" 36 | "k8s.io/client-go/util/retry" 37 | ctrl "sigs.k8s.io/controller-runtime" 38 | ) 39 | 40 | type StringList []string 41 | 42 | func (s *StringList) String() string { 43 | return strings.Join(*s, ", ") 44 | } 45 | 46 | func (s *StringList) Set(value string) error { 47 | *s = append(*s, value) 48 | return nil 49 | } 50 | 51 | var ( 52 | crdsDir StringList 53 | ) 54 | 55 | func initFlags() { 56 | flag.Var(&crdsDir, "crds-dir", "Path to the directory containing the CRD manifests") 57 | flag.Parse() 58 | 59 | if len(crdsDir) == 0 { 60 | log.Fatalf("CRDs directory is required") 61 | } 62 | 63 | for _, crdDir := range crdsDir { 64 | if _, err := os.Stat(crdDir); os.IsNotExist(err) { 65 | log.Fatalf("CRDs directory %s does not exist", crdsDir) 66 | } 67 | } 68 | } 69 | 70 | // EnsureCRDsCmd reads each YAML file in the directory, splits it into documents, and applies each CRD to the cluster. 71 | // The parameter --crds-dir is required and should point to the directory containing the CRD manifests. 72 | func EnsureCRDsCmd() { 73 | ctx := context.Background() 74 | 75 | initFlags() 76 | 77 | config, err := ctrl.GetConfig() 78 | if err != nil { 79 | log.Fatalf("Failed to get Kubernetes config: %v", err) 80 | } 81 | 82 | client, err := clientset.NewForConfig(config) 83 | if err != nil { 84 | log.Fatalf("Failed to create API extensions client: %v", err) 85 | } 86 | 87 | if err := walkCrdsDir(ctx, client.ApiextensionsV1().CustomResourceDefinitions()); err != nil { 88 | log.Fatalf("Failed to apply CRDs: %v", err) 89 | } 90 | } 91 | 92 | // walkCrdsDir walks the CRDs directory and applies each YAML file. 93 | func walkCrdsDir(ctx context.Context, crdClient v1.CustomResourceDefinitionInterface) error { 94 | for _, crdDir := range crdsDir { 95 | // Walk the directory recursively and apply each YAML file. 96 | err := filepath.Walk(crdDir, func(path string, info os.FileInfo, err error) error { 97 | if err != nil { 98 | return err 99 | } 100 | if info.IsDir() || filepath.Ext(path) != ".yaml" { 101 | return nil 102 | } 103 | 104 | log.Printf("Apply CRDs from file: %s", path) 105 | if err := applyCRDsFromFile(ctx, crdClient, path); err != nil { 106 | return fmt.Errorf("apply CRD %s: %w", path, err) 107 | } 108 | return nil 109 | }) 110 | if err != nil { 111 | return fmt.Errorf("walk the path %s: %w", crdsDir, err) 112 | } 113 | } 114 | return nil 115 | } 116 | 117 | // applyCRDsFromFile reads a YAML file, splits it into documents, and applies each CRD to the cluster. 118 | func applyCRDsFromFile(ctx context.Context, crdClient v1.CustomResourceDefinitionInterface, filePath string) error { 119 | file, err := os.Open(filePath) 120 | if err != nil { 121 | return fmt.Errorf("open file %q: %w", filePath, err) 122 | } 123 | defer file.Close() 124 | 125 | // Create a decoder that reads multiple YAML documents. 126 | decoder := yaml.NewYAMLOrJSONDecoder(file, 4096) 127 | var crdsToApply []*apiextensionsv1.CustomResourceDefinition 128 | for { 129 | crd := &apiextensionsv1.CustomResourceDefinition{} 130 | if err := decoder.Decode(crd); err != nil { 131 | if err == io.EOF { 132 | break 133 | } 134 | return fmt.Errorf("decode YAML: %w", err) 135 | } 136 | if crd.GetObjectKind().GroupVersionKind().Kind != "CustomResourceDefinition" { 137 | log.Printf("Skipping non-CRD object %s", crd.GetName()) 138 | continue 139 | } 140 | crdsToApply = append(crdsToApply, crd) 141 | } 142 | 143 | // Apply each CRD separately. 144 | for _, crd := range crdsToApply { 145 | err := wait.ExponentialBackoffWithContext(ctx, retry.DefaultBackoff, func(context.Context) (bool, error) { 146 | if err := applyCRD(ctx, crdClient, crd); err != nil { 147 | log.Printf("Failed to apply CRD %s: %v", crd.Name, err) 148 | return false, nil 149 | } 150 | return true, nil 151 | }) 152 | if err != nil { 153 | return fmt.Errorf("apply CRD %s: %w", crd.Name, err) 154 | } 155 | } 156 | return nil 157 | } 158 | 159 | // applyCRD creates or updates the CRD. 160 | func applyCRD( 161 | ctx context.Context, 162 | crdClient v1.CustomResourceDefinitionInterface, 163 | crd *apiextensionsv1.CustomResourceDefinition, 164 | ) error { 165 | // Check if CRD already exists in cluster and create if not found. 166 | curCRD, err := crdClient.Get(ctx, crd.Name, metav1.GetOptions{}) 167 | if apierrors.IsNotFound(err) { 168 | log.Printf("Create CRD %s", crd.Name) 169 | _, err = crdClient.Create(ctx, crd, metav1.CreateOptions{}) 170 | if err != nil { 171 | return fmt.Errorf("create CRD %s: %w", crd.Name, err) 172 | } 173 | } else { 174 | log.Printf("Update CRD %s", crd.Name) 175 | // Set resource version to update an existing CRD. 176 | crd.SetResourceVersion(curCRD.GetResourceVersion()) 177 | _, err = crdClient.Update(ctx, crd, metav1.UpdateOptions{}) 178 | if err != nil { 179 | return fmt.Errorf("update CRD %s: %w", crd.Name, err) 180 | } 181 | } 182 | return nil 183 | } 184 | -------------------------------------------------------------------------------- /pkg/crdutil/crdutil_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package crdutil 18 | 19 | import ( 20 | "context" 21 | 22 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 | 24 | . "github.com/onsi/ginkgo/v2" 25 | . "github.com/onsi/gomega" 26 | ) 27 | 28 | var _ = Describe("CRD Application", func() { 29 | var ( 30 | ctx context.Context 31 | ) 32 | 33 | BeforeEach(func() { 34 | ctx = context.Background() 35 | }) 36 | 37 | AfterEach(func() { 38 | Expect(testCRDClient.DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{})).NotTo(HaveOccurred()) 39 | }) 40 | 41 | Describe("applyCRDsFromFile", func() { 42 | It("should apply CRDs multiple times from a valid YAML file", func() { 43 | By("applying CRDs") 44 | Expect(applyCRDsFromFile(ctx, testCRDClient, "test-files/test-crds.yaml")).To(Succeed()) 45 | Expect(applyCRDsFromFile(ctx, testCRDClient, "test-files/test-crds.yaml")).To(Succeed()) 46 | Expect(applyCRDsFromFile(ctx, testCRDClient, "test-files/test-crds.yaml")).To(Succeed()) 47 | Expect(applyCRDsFromFile(ctx, testCRDClient, "test-files/test-crds.yaml")).To(Succeed()) 48 | 49 | By("verifying CRDs are applied") 50 | crds, err := testCRDClient.List(ctx, metav1.ListOptions{}) 51 | Expect(err).NotTo(HaveOccurred()) 52 | Expect(crds.Items).To(HaveLen(2)) 53 | }) 54 | 55 | It("should update CRDs", func() { 56 | By("applying CRDs") 57 | Expect(applyCRDsFromFile(ctx, testCRDClient, "test-files/test-crds.yaml")).To(Succeed()) 58 | 59 | By("verifying CRDs do not have spec.foobar") 60 | for _, crdName := range []string{"bars.example.com", "foos.example.com"} { 61 | crd, err := testCRDClient.Get(ctx, crdName, metav1.GetOptions{}) 62 | Expect(err).NotTo(HaveOccurred()) 63 | props := crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties 64 | Expect(props).To(HaveKey("spec")) 65 | Expect(props["spec"].Properties).NotTo(HaveKey("foobar")) 66 | } 67 | 68 | By("updating CRDs") 69 | Expect(applyCRDsFromFile(ctx, testCRDClient, "test-files/updated-test-crds.yaml")).To(Succeed()) 70 | 71 | By("verifying CRDs are updated") 72 | for _, crdName := range []string{"bars.example.com", "foos.example.com"} { 73 | crd, err := testCRDClient.Get(ctx, crdName, metav1.GetOptions{}) 74 | Expect(err).NotTo(HaveOccurred()) 75 | props := crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties 76 | Expect(props["spec"].Properties).To(HaveKey("foobar")) 77 | } 78 | }) 79 | }) 80 | }) 81 | -------------------------------------------------------------------------------- /pkg/crdutil/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package crdutil 18 | 19 | import ( 20 | "testing" 21 | 22 | "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 23 | v1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" 24 | "sigs.k8s.io/controller-runtime/pkg/envtest" 25 | logf "sigs.k8s.io/controller-runtime/pkg/log" 26 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 27 | 28 | . "github.com/onsi/ginkgo/v2" 29 | . "github.com/onsi/gomega" 30 | ) 31 | 32 | var ( 33 | testCRDClient v1.CustomResourceDefinitionInterface 34 | testEnv *envtest.Environment 35 | ) 36 | 37 | func TestApplyCrds(t *testing.T) { 38 | RegisterFailHandler(Fail) 39 | RunSpecs(t, "ApplyCrds Suite") 40 | } 41 | 42 | var _ = BeforeSuite(func() { 43 | logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) 44 | 45 | By("bootstrapping test environment") 46 | testEnv = &envtest.Environment{} 47 | 48 | cfg, err := testEnv.Start() 49 | Expect(err).NotTo(HaveOccurred()) 50 | Expect(cfg).NotTo(BeNil()) 51 | 52 | // create clientset with scheme 53 | client, err := clientset.NewForConfig(cfg) 54 | Expect(err).NotTo(HaveOccurred()) 55 | Expect(client).NotTo(BeNil()) 56 | testCRDClient = client.ApiextensionsV1().CustomResourceDefinitions() 57 | 58 | go func() { 59 | defer GinkgoRecover() 60 | }() 61 | }) 62 | 63 | var _ = AfterSuite(func() { 64 | By("tearing down the test environment") 65 | err := testEnv.Stop() 66 | Expect(err).NotTo(HaveOccurred()) 67 | }) 68 | -------------------------------------------------------------------------------- /pkg/crdutil/test-files/test-crds.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | name: foos.example.com 5 | spec: 6 | group: example.com 7 | names: 8 | kind: Foo 9 | listKind: FooList 10 | singular: foo 11 | plural: foos 12 | scope: Namespaced 13 | versions: 14 | - name: v1 15 | served: true 16 | storage: true 17 | schema: 18 | openAPIV3Schema: 19 | type: object 20 | properties: 21 | spec: 22 | type: object 23 | --- 24 | # non CRD yamls should not be handled and skipped 25 | --- 26 | apiVersion: apiextensions.k8s.io/v1 27 | kind: CustomResourceDefinition 28 | metadata: 29 | name: bars.example.com 30 | spec: 31 | group: example.com 32 | names: 33 | kind: Bar 34 | listKind: BarList 35 | singular: bar 36 | plural: bars 37 | scope: Namespaced 38 | versions: 39 | - name: v1 40 | served: true 41 | storage: true 42 | schema: 43 | openAPIV3Schema: 44 | type: object 45 | properties: 46 | spec: 47 | type: object 48 | -------------------------------------------------------------------------------- /pkg/crdutil/test-files/updated-test-crds.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | name: foos.example.com 5 | spec: 6 | group: example.com 7 | names: 8 | kind: Foo 9 | listKind: FooList 10 | singular: foo 11 | plural: foos 12 | scope: Namespaced 13 | versions: 14 | - name: v1 15 | served: true 16 | storage: true 17 | schema: 18 | openAPIV3Schema: 19 | type: object 20 | properties: 21 | spec: 22 | type: object 23 | properties: 24 | foobar: 25 | type: string 26 | --- 27 | apiVersion: apiextensions.k8s.io/v1 28 | kind: CustomResourceDefinition 29 | metadata: 30 | name: bars.example.com 31 | spec: 32 | group: example.com 33 | names: 34 | kind: Bar 35 | listKind: BarList 36 | singular: bar 37 | plural: bars 38 | scope: Namespaced 39 | versions: 40 | - name: v1 41 | served: true 42 | storage: true 43 | schema: 44 | openAPIV3Schema: 45 | type: object 46 | properties: 47 | spec: 48 | type: object 49 | properties: 50 | foobar: 51 | type: string 52 | -------------------------------------------------------------------------------- /pkg/upgrade/consts.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade 18 | 19 | const ( 20 | // UpgradeStateLabelKeyFmt is the format of the node label key indicating driver upgrade states 21 | UpgradeStateLabelKeyFmt = "nvidia.com/%s-driver-upgrade-state" 22 | // UpgradeSkipNodeLabelKeyFmt is the format of the node label boolean key indicating to skip driver upgrade 23 | UpgradeSkipNodeLabelKeyFmt = "nvidia.com/%s-driver-upgrade.skip" 24 | // UpgradeWaitForSafeDriverLoadAnnotationKeyFmt is the format of the node annotation key indicating that 25 | // the driver is waiting for safe load. Meaning node should be cordoned and workloads should be removed from the 26 | // node before the driver can continue to load. 27 | UpgradeWaitForSafeDriverLoadAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade.driver-wait-for-safe-load" 28 | // UpgradeInitialStateAnnotationKeyFmt is the format of the node annotation indicating node was unschedulable at 29 | // beginning of upgrade process 30 | UpgradeInitialStateAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade.node-initial-state.unschedulable" 31 | // UpgradeWaitForPodCompletionStartTimeAnnotationKeyFmt is the format of the node annotation indicating start time 32 | // for waiting on pod completions 33 | //nolint: lll 34 | UpgradeWaitForPodCompletionStartTimeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-wait-for-pod-completion-start-time" 35 | // UpgradeValidationStartTimeAnnotationKeyFmt is the format of the node annotation indicating start time for 36 | // validation-required state 37 | UpgradeValidationStartTimeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-validation-start-time" 38 | // UpgradeRequestedAnnotationKeyFmt is the format of the node label key indicating driver upgrade was requested 39 | // (used for orphaned pods) 40 | // Setting this label will trigger setting upgrade state to upgrade-required 41 | UpgradeRequestedAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-requested" 42 | // UpgradeRequestorModeAnnotationKeyFmt 43 | UpgradeRequestorModeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-requestor-mode" 44 | // UpgradeStateUnknown Node has this state when the upgrade flow is disabled or the node hasn't been processed yet 45 | UpgradeStateUnknown = "" 46 | // UpgradeStateUpgradeRequired is set when the driver pod on the node is not up-to-date and required upgrade 47 | // No actions are performed at this stage 48 | UpgradeStateUpgradeRequired = "upgrade-required" 49 | // UpgradeStateCordonRequired is set when the node needs to be made unschedulable in preparation for driver upgrade 50 | UpgradeStateCordonRequired = "cordon-required" 51 | // UpgradeStateWaitForJobsRequired is set on the node when we need to wait on jobs to complete until given timeout. 52 | UpgradeStateWaitForJobsRequired = "wait-for-jobs-required" 53 | // UpgradeStatePodDeletionRequired is set when deletion of pods is required for the driver upgrade to proceed. 54 | UpgradeStatePodDeletionRequired = "pod-deletion-required" 55 | // UpgradeStateDrainRequired is set when the node is required to be scheduled for drain. After the drain the state 56 | // is changed either to UpgradeStatePodRestartRequired or UpgradeStateFailed 57 | UpgradeStateDrainRequired = "drain-required" 58 | // UpgradeStateNodeMaintenanceRequired is set when the node is scheduled for node maintenance. 59 | // The node maintenance operations, like cordon, drain, etc., are carried out by an external maintenance 60 | // operator. This state is only ever used / valid when UseMaintenanceOperator is true and 61 | // an external maintenance operator exists. 62 | UpgradeStateNodeMaintenanceRequired = "node-maintenance-required" 63 | // UpgradeStatePostMaintenanceRequired is set after node maintenance is completed by an 64 | // external maintenance operator. This state indicates that the requestor is required to perform 65 | // post-maintenance operations (e.g. restart driver pods). 66 | UpgradeStatePostMaintenanceRequired = "post-maintenance-required" 67 | // UpgradeStatePodRestartRequired is set when the driver pod on the node is scheduled for restart 68 | // or when unblock of the driver loading is required (safe driver load) 69 | UpgradeStatePodRestartRequired = "pod-restart-required" 70 | // UpgradeStateValidationRequired is set when validation of the new driver deployed on the node is 71 | // required before moving to UpgradeStateUncordonRequired. 72 | UpgradeStateValidationRequired = "validation-required" 73 | // UpgradeStateUncordonRequired is set when driver pod on the node is up-to-date and has "Ready" status 74 | UpgradeStateUncordonRequired = "uncordon-required" 75 | // UpgradeStateDone is set when driver pod is up to date and running on the node, the node is schedulable 76 | UpgradeStateDone = "upgrade-done" 77 | // UpgradeStateFailed is set when there are any failures during the driver upgrade 78 | UpgradeStateFailed = "upgrade-failed" 79 | ) 80 | 81 | const ( 82 | // nodeNameFieldSelectorFmt is the format of a field selector that can be used in metav1.ListOptions to filter by 83 | // node 84 | nodeNameFieldSelectorFmt = "spec.nodeName=%s" 85 | // nullString is the word null as string to avoid duplication and linting errors 86 | nullString = "null" 87 | // trueString is the word true as string to avoid duplication and linting errors 88 | trueString = "true" 89 | ) 90 | -------------------------------------------------------------------------------- /pkg/upgrade/cordon_manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | 14 | package upgrade 15 | 16 | import ( 17 | "context" 18 | 19 | "github.com/go-logr/logr" 20 | corev1 "k8s.io/api/core/v1" 21 | "k8s.io/client-go/kubernetes" 22 | "k8s.io/kubectl/pkg/drain" 23 | ) 24 | 25 | // CordonManagerImpl implements CordonManager interface and can 26 | // cordon / uncordon k8s nodes 27 | type CordonManagerImpl struct { 28 | k8sInterface kubernetes.Interface 29 | log logr.Logger 30 | } 31 | 32 | // CordonManager provides methods for cordoning / uncordoning nodes 33 | type CordonManager interface { 34 | Cordon(ctx context.Context, node *corev1.Node) error 35 | Uncordon(ctx context.Context, node *corev1.Node) error 36 | } 37 | 38 | // Cordon marks a node as unschedulable 39 | func (m *CordonManagerImpl) Cordon(ctx context.Context, node *corev1.Node) error { 40 | helper := &drain.Helper{Ctx: ctx, Client: m.k8sInterface} 41 | return drain.RunCordonOrUncordon(helper, node, true) 42 | } 43 | 44 | // Uncordon marks a node as schedulable 45 | func (m *CordonManagerImpl) Uncordon(ctx context.Context, node *corev1.Node) error { 46 | helper := &drain.Helper{Ctx: ctx, Client: m.k8sInterface} 47 | return drain.RunCordonOrUncordon(helper, node, false) 48 | } 49 | 50 | // NewCordonManager returns a CordonManagerImpl 51 | func NewCordonManager(k8sInterface kubernetes.Interface, log logr.Logger) *CordonManagerImpl { 52 | return &CordonManagerImpl{ 53 | k8sInterface: k8sInterface, 54 | log: log, 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /pkg/upgrade/cordon_manager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade_test 18 | 19 | import ( 20 | . "github.com/onsi/ginkgo/v2" 21 | . "github.com/onsi/gomega" 22 | 23 | upgrade "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 24 | ) 25 | 26 | var _ = Describe("CordonManager tests", func() { 27 | It("CordonManager should mark a node as schedulable/unschedulable", func() { 28 | node := createNode("test-node") 29 | 30 | cordonManager := upgrade.NewCordonManager(k8sInterface, log) 31 | err := cordonManager.Cordon(testCtx, node) 32 | Expect(err).To(Succeed()) 33 | Expect(node.Spec.Unschedulable).To(BeTrue()) 34 | 35 | err = cordonManager.Uncordon(testCtx, node) 36 | Expect(err).To(Succeed()) 37 | Expect(node.Spec.Unschedulable).To(BeFalse()) 38 | }) 39 | }) 40 | -------------------------------------------------------------------------------- /pkg/upgrade/drain_manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | 14 | package upgrade 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os" 20 | "time" 21 | 22 | "github.com/go-logr/logr" 23 | corev1 "k8s.io/api/core/v1" 24 | "k8s.io/client-go/kubernetes" 25 | "k8s.io/client-go/tools/record" 26 | "k8s.io/kubectl/pkg/drain" 27 | 28 | v1alpha1 "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" 29 | "github.com/NVIDIA/k8s-operator-libs/pkg/consts" 30 | ) 31 | 32 | // DrainConfiguration contains the drain specification and the list of nodes to schedule drain on 33 | type DrainConfiguration struct { 34 | Spec *v1alpha1.DrainSpec 35 | Nodes []*corev1.Node 36 | } 37 | 38 | // DrainManagerImpl implements DrainManager interface and can perform nodes drain based on received DrainConfiguration 39 | type DrainManagerImpl struct { 40 | k8sInterface kubernetes.Interface 41 | drainingNodes *StringSet 42 | nodeUpgradeStateProvider NodeUpgradeStateProvider 43 | log logr.Logger 44 | eventRecorder record.EventRecorder 45 | } 46 | 47 | // DrainManager is an interface that allows to schedule nodes drain based on DrainSpec 48 | type DrainManager interface { 49 | ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error 50 | } 51 | 52 | // ScheduleNodesDrain receives DrainConfiguration and schedules drain for each node in the list. 53 | // When the node gets scheduled, it's marked as being drained and therefore will not be scheduled for drain twice 54 | // if the initial drain didn't complete yet. 55 | // During the drain the node is cordoned first, and then pods on the node are evicted. 56 | // If the drain is successful, the node moves to UpgradeStatePodRestartRequiredstate, 57 | // otherwise it moves to UpgradeStateFailed state. 58 | func (m *DrainManagerImpl) ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error { 59 | m.log.V(consts.LogLevelInfo).Info("Drain Manager, starting Node Drain") 60 | 61 | if len(drainConfig.Nodes) == 0 { 62 | m.log.V(consts.LogLevelInfo).Info("Drain Manager, no nodes scheduled to drain") 63 | return nil 64 | } 65 | 66 | drainSpec := drainConfig.Spec 67 | 68 | if drainSpec == nil { 69 | return fmt.Errorf("drain spec should not be empty") 70 | } 71 | if !drainSpec.Enable { 72 | m.log.V(consts.LogLevelInfo).Info("Drain Manager, drain is disabled") 73 | return nil 74 | } 75 | 76 | drainHelper := &drain.Helper{ 77 | Ctx: ctx, 78 | Client: m.k8sInterface, 79 | Force: drainSpec.Force, 80 | // OFED Drivers Pods are part of a DaemonSet, so, this option needs to be set to true 81 | IgnoreAllDaemonSets: true, 82 | DeleteEmptyDirData: drainSpec.DeleteEmptyDir, 83 | GracePeriodSeconds: -1, 84 | Timeout: time.Duration(drainSpec.TimeoutSecond) * time.Second, 85 | PodSelector: drainSpec.PodSelector, 86 | OnPodDeletionOrEvictionFinished: func(pod *corev1.Pod, usingEviction bool, err error) { 87 | log := m.log.WithValues("using-eviction", usingEviction, "pod", pod.Name, "namespace", pod.Namespace) 88 | if err != nil { 89 | log.V(consts.LogLevelWarning).Info("Drain Pod failed", "error", err) 90 | return 91 | } 92 | log.V(consts.LogLevelInfo).Info("Drain Pod finished") 93 | }, 94 | Out: os.Stdout, 95 | ErrOut: os.Stdout, 96 | } 97 | 98 | for _, node := range drainConfig.Nodes { 99 | // We need to shadow the loop variable or initialize some other one with its value 100 | // to avoid concurrency issues when launching goroutines. 101 | // If a loop variable is used as it is, all/most goroutines, spawned inside this loop, 102 | // will use the 'node' value of the last item in drainConfig.Nodes 103 | node := node 104 | if !m.drainingNodes.Has(node.Name) { 105 | m.log.V(consts.LogLevelInfo).Info("Schedule drain for node", "node", node.Name) 106 | logEvent(m.eventRecorder, node, corev1.EventTypeNormal, GetEventReason(), "Scheduling drain of the node") 107 | 108 | m.drainingNodes.Add(node.Name) 109 | go func() { 110 | defer m.drainingNodes.Remove(node.Name) 111 | err := drain.RunCordonOrUncordon(drainHelper, node, true) 112 | if err != nil { 113 | m.log.V(consts.LogLevelError).Error(err, "Failed to cordon node", "node", node.Name) 114 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, node, UpgradeStateFailed) 115 | logEventf(m.eventRecorder, node, corev1.EventTypeWarning, GetEventReason(), 116 | "Failed to cordon the node, %s", err.Error()) 117 | return 118 | } 119 | m.log.V(consts.LogLevelInfo).Info("Cordoned the node", "node", node.Name) 120 | 121 | err = drain.RunNodeDrain(drainHelper, node.Name) 122 | if err != nil { 123 | m.log.V(consts.LogLevelError).Error(err, "Failed to drain node", "node", node.Name) 124 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, node, UpgradeStateFailed) 125 | logEventf(m.eventRecorder, node, corev1.EventTypeWarning, GetEventReason(), 126 | "Failed to drain the node, %s", err.Error()) 127 | return 128 | } 129 | m.log.V(consts.LogLevelInfo).Info("Drained the node", "node", node.Name) 130 | logEvent(m.eventRecorder, node, corev1.EventTypeNormal, GetEventReason(), "Successfully drained the node") 131 | 132 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, node, UpgradeStatePodRestartRequired) 133 | }() 134 | } else { 135 | m.log.V(consts.LogLevelInfo).Info("Node is already being drained, skipping", "node", node.Name) 136 | } 137 | } 138 | return nil 139 | } 140 | 141 | // NewDrainManager creates a DrainManager 142 | func NewDrainManager( 143 | k8sInterface kubernetes.Interface, 144 | nodeUpgradeStateProvider NodeUpgradeStateProvider, 145 | log logr.Logger, 146 | eventRecorder record.EventRecorder) *DrainManagerImpl { 147 | mgr := &DrainManagerImpl{ 148 | k8sInterface: k8sInterface, 149 | log: log, 150 | drainingNodes: NewStringSet(), 151 | nodeUpgradeStateProvider: nodeUpgradeStateProvider, 152 | eventRecorder: eventRecorder, 153 | } 154 | 155 | return mgr 156 | } 157 | -------------------------------------------------------------------------------- /pkg/upgrade/drain_manager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade_test 18 | 19 | import ( 20 | "context" 21 | "time" 22 | 23 | . "github.com/onsi/ginkgo/v2" 24 | . "github.com/onsi/gomega" 25 | corev1 "k8s.io/api/core/v1" 26 | "k8s.io/apimachinery/pkg/types" 27 | 28 | v1alpha1 "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" 29 | "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 30 | ) 31 | 32 | var _ = Describe("DrainManager tests", func() { 33 | It("DrainManager should drain nodes", func() { 34 | node := createNode("node") 35 | 36 | drainManager := upgrade.NewDrainManager(k8sInterface, upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder), log, eventRecorder) 37 | drainSpec := &v1alpha1.DrainSpec{ 38 | Enable: true, 39 | Force: false, 40 | PodSelector: "", 41 | TimeoutSecond: 1, 42 | DeleteEmptyDir: true, 43 | } 44 | nodeArray := []*corev1.Node{node} 45 | err := drainManager.ScheduleNodesDrain(testCtx, &upgrade.DrainConfiguration{Nodes: nodeArray, Spec: drainSpec}) 46 | Expect(err).To(Succeed()) 47 | 48 | time.Sleep(time.Second) 49 | 50 | observedNode := &corev1.Node{} 51 | err = k8sClient.Get(testCtx, types.NamespacedName{Name: node.Name}, observedNode) 52 | Expect(err).To(Succeed()) 53 | Expect(observedNode.Spec.Unschedulable).To(BeTrue()) 54 | }) 55 | It("DrainManager should drain all nodes it receives", func() { 56 | testCtx := context.TODO() 57 | 58 | node1 := createNode("node1") 59 | node2 := createNode("node2") 60 | node3 := createNode("node3") 61 | 62 | drainManager := upgrade.NewDrainManager(k8sInterface, upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder), log, eventRecorder) 63 | drainSpec := &v1alpha1.DrainSpec{ 64 | Enable: true, 65 | Force: false, 66 | PodSelector: "", 67 | TimeoutSecond: 1, 68 | DeleteEmptyDir: true, 69 | } 70 | nodeArray := []*corev1.Node{node1, node2, node3} 71 | err := drainManager.ScheduleNodesDrain(testCtx, &upgrade.DrainConfiguration{Nodes: nodeArray, Spec: drainSpec}) 72 | Expect(err).To(Succeed()) 73 | 74 | time.Sleep(time.Second) 75 | 76 | observedNode1 := &corev1.Node{} 77 | err = k8sClient.Get(testCtx, types.NamespacedName{Name: node1.Name}, observedNode1) 78 | Expect(err).To(Succeed()) 79 | Expect(observedNode1.Spec.Unschedulable).To(BeTrue()) 80 | 81 | observedNode2 := &corev1.Node{} 82 | err = k8sClient.Get(testCtx, types.NamespacedName{Name: node2.Name}, observedNode2) 83 | Expect(err).To(Succeed()) 84 | Expect(observedNode2.Spec.Unschedulable).To(BeTrue()) 85 | 86 | observedNode3 := &corev1.Node{} 87 | err = k8sClient.Get(testCtx, types.NamespacedName{Name: node3.Name}, observedNode3) 88 | Expect(err).To(Succeed()) 89 | Expect(observedNode3.Spec.Unschedulable).To(BeTrue()) 90 | }) 91 | It("DrainManager should not fail on empty node list", func() { 92 | testCtx := context.TODO() 93 | 94 | drainManager := upgrade.NewDrainManager(k8sInterface, upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder), log, eventRecorder) 95 | drainSpec := &v1alpha1.DrainSpec{ 96 | Enable: true, 97 | Force: false, 98 | PodSelector: "", 99 | TimeoutSecond: 1, 100 | DeleteEmptyDir: true, 101 | } 102 | err := drainManager.ScheduleNodesDrain(testCtx, &upgrade.DrainConfiguration{Nodes: nil, Spec: drainSpec}) 103 | Expect(err).To(Succeed()) 104 | 105 | time.Sleep(time.Second) 106 | }) 107 | It("DrainManager should return error on nil drain spec", func() { 108 | testCtx := context.TODO() 109 | 110 | node := createNode("node") 111 | 112 | drainManager := upgrade.NewDrainManager(k8sInterface, upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder), log, eventRecorder) 113 | 114 | nodeArray := []*corev1.Node{node} 115 | err := drainManager.ScheduleNodesDrain(testCtx, &upgrade.DrainConfiguration{Nodes: nodeArray, Spec: nil}) 116 | Expect(err).ToNot(Succeed()) 117 | 118 | time.Sleep(time.Second) 119 | 120 | observedNode := &corev1.Node{} 121 | err = k8sClient.Get(testCtx, types.NamespacedName{Name: node.Name}, observedNode) 122 | Expect(err).To(Succeed()) 123 | Expect(observedNode.Spec.Unschedulable).To(BeFalse()) 124 | }) 125 | It("DrainManager should skip drain on empty drain spec", func() { 126 | testCtx := context.TODO() 127 | 128 | node := createNode("node") 129 | 130 | drainManager := upgrade.NewDrainManager(k8sInterface, upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder), log, eventRecorder) 131 | 132 | nodeArray := []*corev1.Node{node} 133 | err := drainManager.ScheduleNodesDrain(testCtx, &upgrade.DrainConfiguration{Nodes: nodeArray, Spec: &v1alpha1.DrainSpec{}}) 134 | Expect(err).To(Succeed()) 135 | 136 | time.Sleep(time.Second) 137 | 138 | observedNode := &corev1.Node{} 139 | err = k8sClient.Get(testCtx, types.NamespacedName{Name: node.Name}, observedNode) 140 | Expect(err).To(Succeed()) 141 | Expect(observedNode.Spec.Unschedulable).To(BeFalse()) 142 | }) 143 | It("DrainManager should skip drain if drain is disabled in the spec", func() { 144 | testCtx := context.TODO() 145 | 146 | node := createNode("node") 147 | 148 | drainManager := upgrade.NewDrainManager(k8sInterface, upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder), log, eventRecorder) 149 | 150 | nodeArray := []*corev1.Node{node} 151 | err := drainManager.ScheduleNodesDrain( 152 | testCtx, &upgrade.DrainConfiguration{Nodes: nodeArray, Spec: &v1alpha1.DrainSpec{Enable: false}}) 153 | Expect(err).To(Succeed()) 154 | 155 | time.Sleep(time.Second) 156 | 157 | observedNode := &corev1.Node{} 158 | err = k8sClient.Get(testCtx, types.NamespacedName{Name: node.Name}, observedNode) 159 | Expect(err).To(Succeed()) 160 | Expect(observedNode.Spec.Unschedulable).To(BeFalse()) 161 | }) 162 | }) 163 | -------------------------------------------------------------------------------- /pkg/upgrade/mocks/CordonManager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | // Code generated by mockery v2.15.0. DO NOT EDIT. 14 | 15 | package mocks 16 | 17 | import ( 18 | context "context" 19 | 20 | mock "github.com/stretchr/testify/mock" 21 | 22 | v1 "k8s.io/api/core/v1" 23 | ) 24 | 25 | // CordonManager is an autogenerated mock type for the CordonManager type 26 | type CordonManager struct { 27 | mock.Mock 28 | } 29 | 30 | // Cordon provides a mock function with given fields: ctx, node 31 | func (_m *CordonManager) Cordon(ctx context.Context, node *v1.Node) error { 32 | ret := _m.Called(ctx, node) 33 | 34 | var r0 error 35 | if rf, ok := ret.Get(0).(func(context.Context, *v1.Node) error); ok { 36 | r0 = rf(ctx, node) 37 | } else { 38 | r0 = ret.Error(0) 39 | } 40 | 41 | return r0 42 | } 43 | 44 | // Uncordon provides a mock function with given fields: ctx, node 45 | func (_m *CordonManager) Uncordon(ctx context.Context, node *v1.Node) error { 46 | ret := _m.Called(ctx, node) 47 | 48 | var r0 error 49 | if rf, ok := ret.Get(0).(func(context.Context, *v1.Node) error); ok { 50 | r0 = rf(ctx, node) 51 | } else { 52 | r0 = ret.Error(0) 53 | } 54 | 55 | return r0 56 | } 57 | 58 | type mockConstructorTestingTNewCordonManager interface { 59 | mock.TestingT 60 | Cleanup(func()) 61 | } 62 | 63 | // NewCordonManager creates a new instance of CordonManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. 64 | func NewCordonManager(t mockConstructorTestingTNewCordonManager) *CordonManager { 65 | mock := &CordonManager{} 66 | mock.Mock.Test(t) 67 | 68 | t.Cleanup(func() { mock.AssertExpectations(t) }) 69 | 70 | return mock 71 | } 72 | -------------------------------------------------------------------------------- /pkg/upgrade/mocks/DrainManager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | // Code generated by mockery v1.0.0. DO NOT EDIT. 14 | //nolint 15 | package mocks 16 | 17 | import ( 18 | context "context" 19 | 20 | upgrade "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 21 | mock "github.com/stretchr/testify/mock" 22 | ) 23 | 24 | // DrainManager is an autogenerated mock type for the DrainManager type 25 | type DrainManager struct { 26 | mock.Mock 27 | } 28 | 29 | // ScheduleNodesDrain provides a mock function with given fields: ctx, drainConfig 30 | func (_m *DrainManager) ScheduleNodesDrain(ctx context.Context, drainConfig *upgrade.DrainConfiguration) error { 31 | ret := _m.Called(ctx, drainConfig) 32 | 33 | var r0 error 34 | if rf, ok := ret.Get(0).(func(context.Context, *upgrade.DrainConfiguration) error); ok { 35 | r0 = rf(ctx, drainConfig) 36 | } else { 37 | r0 = ret.Error(0) 38 | } 39 | 40 | return r0 41 | } 42 | -------------------------------------------------------------------------------- /pkg/upgrade/mocks/NodeUpgradeStateProvider.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Code generated by mockery v2.15.0. DO NOT EDIT. 18 | 19 | package mocks 20 | 21 | import ( 22 | context "context" 23 | 24 | mock "github.com/stretchr/testify/mock" 25 | 26 | v1 "k8s.io/api/core/v1" 27 | ) 28 | 29 | // NodeUpgradeStateProvider is an autogenerated mock type for the NodeUpgradeStateProvider type 30 | type NodeUpgradeStateProvider struct { 31 | mock.Mock 32 | } 33 | 34 | // ChangeNodeUpgradeAnnotation provides a mock function with given fields: ctx, node, key, value 35 | func (_m *NodeUpgradeStateProvider) ChangeNodeUpgradeAnnotation(ctx context.Context, node *v1.Node, key string, value string) error { 36 | ret := _m.Called(ctx, node, key, value) 37 | 38 | var r0 error 39 | if rf, ok := ret.Get(0).(func(context.Context, *v1.Node, string, string) error); ok { 40 | r0 = rf(ctx, node, key, value) 41 | } else { 42 | r0 = ret.Error(0) 43 | } 44 | 45 | return r0 46 | } 47 | 48 | // ChangeNodeUpgradeState provides a mock function with given fields: ctx, node, newNodeState 49 | func (_m *NodeUpgradeStateProvider) ChangeNodeUpgradeState(ctx context.Context, node *v1.Node, newNodeState string) error { 50 | ret := _m.Called(ctx, node, newNodeState) 51 | 52 | var r0 error 53 | if rf, ok := ret.Get(0).(func(context.Context, *v1.Node, string) error); ok { 54 | r0 = rf(ctx, node, newNodeState) 55 | } else { 56 | r0 = ret.Error(0) 57 | } 58 | 59 | return r0 60 | } 61 | 62 | // GetNode provides a mock function with given fields: ctx, nodeName 63 | func (_m *NodeUpgradeStateProvider) GetNode(ctx context.Context, nodeName string) (*v1.Node, error) { 64 | ret := _m.Called(ctx, nodeName) 65 | 66 | var r0 *v1.Node 67 | if rf, ok := ret.Get(0).(func(context.Context, string) *v1.Node); ok { 68 | r0 = rf(ctx, nodeName) 69 | } else { 70 | if ret.Get(0) != nil { 71 | r0 = ret.Get(0).(*v1.Node) 72 | } 73 | } 74 | 75 | var r1 error 76 | if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { 77 | r1 = rf(ctx, nodeName) 78 | } else { 79 | r1 = ret.Error(1) 80 | } 81 | 82 | return r0, r1 83 | } 84 | 85 | type mockConstructorTestingTNewNodeUpgradeStateProvider interface { 86 | mock.TestingT 87 | Cleanup(func()) 88 | } 89 | 90 | // NewNodeUpgradeStateProvider creates a new instance of NodeUpgradeStateProvider. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. 91 | func NewNodeUpgradeStateProvider(t mockConstructorTestingTNewNodeUpgradeStateProvider) *NodeUpgradeStateProvider { 92 | mock := &NodeUpgradeStateProvider{} 93 | mock.Mock.Test(t) 94 | 95 | t.Cleanup(func() { mock.AssertExpectations(t) }) 96 | 97 | return mock 98 | } 99 | -------------------------------------------------------------------------------- /pkg/upgrade/mocks/PodManager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | 14 | // Code generated by mockery v2.15.0. DO NOT EDIT. 15 | 16 | package mocks 17 | 18 | import ( 19 | context "context" 20 | 21 | corev1 "k8s.io/api/core/v1" 22 | 23 | mock "github.com/stretchr/testify/mock" 24 | 25 | upgrade "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 26 | 27 | v1 "k8s.io/api/apps/v1" 28 | ) 29 | 30 | // PodManager is an autogenerated mock type for the PodManager type 31 | type PodManager struct { 32 | mock.Mock 33 | } 34 | 35 | // GetDaemonsetControllerRevisionHash provides a mock function with given fields: ctx, daemonset 36 | func (_m *PodManager) GetDaemonsetControllerRevisionHash(ctx context.Context, daemonset *v1.DaemonSet) (string, error) { 37 | ret := _m.Called(ctx, daemonset) 38 | 39 | var r0 string 40 | if rf, ok := ret.Get(0).(func(context.Context, *v1.DaemonSet) string); ok { 41 | r0 = rf(ctx, daemonset) 42 | } else { 43 | r0 = ret.Get(0).(string) 44 | } 45 | 46 | var r1 error 47 | if rf, ok := ret.Get(1).(func(context.Context, *v1.DaemonSet) error); ok { 48 | r1 = rf(ctx, daemonset) 49 | } else { 50 | r1 = ret.Error(1) 51 | } 52 | 53 | return r0, r1 54 | } 55 | 56 | // GetPodControllerRevisionHash provides a mock function with given fields: ctx, pod 57 | func (_m *PodManager) GetPodControllerRevisionHash(pod *corev1.Pod) (string, error) { 58 | ret := _m.Called(pod) 59 | 60 | var r0 string 61 | if rf, ok := ret.Get(0).(func(*corev1.Pod) string); ok { 62 | r0 = rf(pod) 63 | } else { 64 | r0 = ret.Get(0).(string) 65 | } 66 | 67 | var r1 error 68 | if rf, ok := ret.Get(1).(func(*corev1.Pod) error); ok { 69 | r1 = rf(pod) 70 | } else { 71 | r1 = ret.Error(1) 72 | } 73 | 74 | return r0, r1 75 | } 76 | 77 | // GetPodDeletionFilter provides a mock function with given fields: 78 | func (_m *PodManager) GetPodDeletionFilter() upgrade.PodDeletionFilter { 79 | ret := _m.Called() 80 | 81 | var r0 upgrade.PodDeletionFilter 82 | if rf, ok := ret.Get(0).(func() upgrade.PodDeletionFilter); ok { 83 | r0 = rf() 84 | } else { 85 | if ret.Get(0) != nil { 86 | r0 = ret.Get(0).(upgrade.PodDeletionFilter) 87 | } 88 | } 89 | 90 | return r0 91 | } 92 | 93 | // ScheduleCheckOnPodCompletion provides a mock function with given fields: ctx, config 94 | func (_m *PodManager) ScheduleCheckOnPodCompletion(ctx context.Context, config *upgrade.PodManagerConfig) error { 95 | ret := _m.Called(ctx, config) 96 | 97 | var r0 error 98 | if rf, ok := ret.Get(0).(func(context.Context, *upgrade.PodManagerConfig) error); ok { 99 | r0 = rf(ctx, config) 100 | } else { 101 | r0 = ret.Error(0) 102 | } 103 | 104 | return r0 105 | } 106 | 107 | // SchedulePodEviction provides a mock function with given fields: ctx, config 108 | func (_m *PodManager) SchedulePodEviction(ctx context.Context, config *upgrade.PodManagerConfig) error { 109 | ret := _m.Called(ctx, config) 110 | 111 | var r0 error 112 | if rf, ok := ret.Get(0).(func(context.Context, *upgrade.PodManagerConfig) error); ok { 113 | r0 = rf(ctx, config) 114 | } else { 115 | r0 = ret.Error(0) 116 | } 117 | 118 | return r0 119 | } 120 | 121 | // SchedulePodsRestart provides a mock function with given fields: ctx, pods 122 | func (_m *PodManager) SchedulePodsRestart(ctx context.Context, pods []*corev1.Pod) error { 123 | ret := _m.Called(ctx, pods) 124 | 125 | var r0 error 126 | if rf, ok := ret.Get(0).(func(context.Context, []*corev1.Pod) error); ok { 127 | r0 = rf(ctx, pods) 128 | } else { 129 | r0 = ret.Error(0) 130 | } 131 | 132 | return r0 133 | } 134 | 135 | type mockConstructorTestingTNewPodManager interface { 136 | mock.TestingT 137 | Cleanup(func()) 138 | } 139 | 140 | // NewPodManager creates a new instance of PodManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. 141 | func NewPodManager(t mockConstructorTestingTNewPodManager) *PodManager { 142 | mock := &PodManager{} 143 | mock.Mock.Test(t) 144 | 145 | t.Cleanup(func() { mock.AssertExpectations(t) }) 146 | 147 | return mock 148 | } 149 | -------------------------------------------------------------------------------- /pkg/upgrade/mocks/ValidationManager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Code generated by mockery v2.15.0. DO NOT EDIT. 18 | 19 | package mocks 20 | 21 | import ( 22 | context "context" 23 | 24 | mock "github.com/stretchr/testify/mock" 25 | 26 | v1 "k8s.io/api/core/v1" 27 | ) 28 | 29 | // ValidationManager is an autogenerated mock type for the ValidationManager type 30 | type ValidationManager struct { 31 | mock.Mock 32 | } 33 | 34 | // Validate provides a mock function with given fields: ctx, node 35 | func (_m *ValidationManager) Validate(ctx context.Context, node *v1.Node) (bool, error) { 36 | ret := _m.Called(ctx, node) 37 | 38 | var r0 bool 39 | if rf, ok := ret.Get(0).(func(context.Context, *v1.Node) bool); ok { 40 | r0 = rf(ctx, node) 41 | } else { 42 | r0 = ret.Get(0).(bool) 43 | } 44 | 45 | var r1 error 46 | if rf, ok := ret.Get(1).(func(context.Context, *v1.Node) error); ok { 47 | r1 = rf(ctx, node) 48 | } else { 49 | r1 = ret.Error(1) 50 | } 51 | 52 | return r0, r1 53 | } 54 | 55 | type mockConstructorTestingTNewValidationManager interface { 56 | mock.TestingT 57 | Cleanup(func()) 58 | } 59 | 60 | // NewValidationManager creates a new instance of ValidationManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. 61 | func NewValidationManager(t mockConstructorTestingTNewValidationManager) *ValidationManager { 62 | mock := &ValidationManager{} 63 | mock.Mock.Test(t) 64 | 65 | t.Cleanup(func() { mock.AssertExpectations(t) }) 66 | 67 | return mock 68 | } 69 | -------------------------------------------------------------------------------- /pkg/upgrade/node_upgrade_state_provider.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | 14 | package upgrade 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "time" 20 | 21 | "github.com/go-logr/logr" 22 | corev1 "k8s.io/api/core/v1" 23 | "k8s.io/apimachinery/pkg/types" 24 | "k8s.io/apimachinery/pkg/util/wait" 25 | "k8s.io/client-go/tools/record" 26 | "sigs.k8s.io/controller-runtime/pkg/client" 27 | 28 | "github.com/NVIDIA/k8s-operator-libs/pkg/consts" 29 | ) 30 | 31 | // NodeUpgradeStateProvider allows for synchronized operations on node objects and ensures that the node, 32 | // got from the provider, always has the up-to-date upgrade state 33 | type NodeUpgradeStateProvider interface { 34 | GetNode(ctx context.Context, nodeName string) (*corev1.Node, error) 35 | ChangeNodeUpgradeState(ctx context.Context, node *corev1.Node, newNodeState string) error 36 | ChangeNodeUpgradeAnnotation(ctx context.Context, node *corev1.Node, key string, value string) error 37 | } 38 | 39 | // NodeUpgradeStateProviderImpl implements the NodeUpgradeStateProvider interface 40 | type NodeUpgradeStateProviderImpl struct { 41 | K8sClient client.Client 42 | Log logr.Logger 43 | nodeMutex KeyedMutex 44 | eventRecorder record.EventRecorder 45 | } 46 | 47 | // NewNodeUpgradeStateProvider creates a NodeUpgradeStateProviderImpl 48 | func NewNodeUpgradeStateProvider(k8sClient client.Client, log logr.Logger, 49 | eventRecorder record.EventRecorder) NodeUpgradeStateProvider { 50 | return &NodeUpgradeStateProviderImpl{ 51 | K8sClient: k8sClient, 52 | Log: log, 53 | nodeMutex: KeyedMutex{}, 54 | eventRecorder: eventRecorder, 55 | } 56 | } 57 | 58 | // GetNode returns a corev1.Node according to name 59 | func (p *NodeUpgradeStateProviderImpl) GetNode(ctx context.Context, nodeName string) (*corev1.Node, error) { 60 | defer p.nodeMutex.Lock(nodeName)() 61 | 62 | node := corev1.Node{} 63 | err := p.K8sClient.Get(ctx, types.NamespacedName{Name: nodeName}, &node) 64 | if err != nil { 65 | return nil, err 66 | } 67 | return &node, nil 68 | } 69 | 70 | // ChangeNodeUpgradeState patches a given corev1.Node object and updates its UpgradeStateLabel with a given value 71 | // The function then waits for the operator cache to get updated 72 | func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState( 73 | ctx context.Context, node *corev1.Node, newNodeState string) error { 74 | p.Log.V(consts.LogLevelInfo).Info("Updating node upgrade state", 75 | "node", node.Name, 76 | "new state", newNodeState) 77 | 78 | defer p.nodeMutex.Lock(node.Name)() 79 | 80 | patchString := []byte(fmt.Sprintf(`{"metadata":{"labels":{%q: %q}}}`, GetUpgradeStateLabelKey(), newNodeState)) 81 | patch := client.RawPatch(types.StrategicMergePatchType, patchString) 82 | err := p.K8sClient.Patch(ctx, node, patch) 83 | if err != nil { 84 | p.Log.V(consts.LogLevelError).Error(err, "Failed to patch node state label on a node object", 85 | "node", node, 86 | "state", newNodeState) 87 | logEventf(p.eventRecorder, node, corev1.EventTypeWarning, GetEventReason(), 88 | "Failed to update node state label to %s, %s", newNodeState, err.Error()) 89 | return err 90 | } 91 | 92 | // Upgrade controller is watching on a set of different resources (ClusterPolicy, NicClusterPolicy, DaemonSet, Pods) 93 | // Because of that, when a new Reconcile event is triggered, the operator cache might not have the latest changes 94 | // For example, the node object might have a different upgrade-state value even though it was just changed here. 95 | // To fix that problem, after the state of the node has successfully been changed, we poll the same node object 96 | // until its state matches the newly changed one. Get request in that case takes objects from the operator cache, 97 | // so we wait until it's synced. 98 | // That way, since only one call to reconcile at a time is allowed for upgrade controller, each new update 99 | // will have the updated node object in the cache. 100 | timeoutCtx, cancel := context.WithTimeout(ctx, time.Second*10) 101 | defer cancel() 102 | //nolint:staticcheck 103 | err = wait.PollImmediateUntil(time.Second, func() (bool, error) { 104 | p.Log.V(consts.LogLevelDebug).Info("Requesting node object to see if operator cache has updated", 105 | "node", node.Name) 106 | err := p.K8sClient.Get(timeoutCtx, types.NamespacedName{Name: node.Name}, node) 107 | if err != nil { 108 | return false, err 109 | } 110 | nodeState := node.Labels[GetUpgradeStateLabelKey()] 111 | if nodeState != newNodeState { 112 | p.Log.V(consts.LogLevelDebug).Info("upgrade state label for node doesn't match the expected", 113 | "node", node.Name, "expected", newNodeState, "actual", nodeState) 114 | return false, nil 115 | } 116 | return true, nil 117 | }, timeoutCtx.Done()) 118 | 119 | if err != nil { 120 | p.Log.V(consts.LogLevelError).Error(err, "Error while waiting on node label update", 121 | "node", node, 122 | "state", newNodeState) 123 | logEventf(p.eventRecorder, node, corev1.EventTypeWarning, GetEventReason(), 124 | "Failed to update node state label to %s, %s", newNodeState, err.Error()) 125 | } else { 126 | p.Log.V(consts.LogLevelInfo).Info("Successfully changed node upgrade state label", 127 | "node", node.Name, 128 | "new state", newNodeState) 129 | logEventf(p.eventRecorder, node, corev1.EventTypeNormal, GetEventReason(), 130 | "Successfully updated node state label to %s", newNodeState) 131 | } 132 | 133 | return err 134 | } 135 | 136 | // ChangeNodeUpgradeAnnotation patches a given corev1.Node object and updates an annotation with a given value 137 | // The function then waits for the operator cache to get updated 138 | func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeAnnotation( 139 | ctx context.Context, node *corev1.Node, key string, value string) error { 140 | p.Log.V(consts.LogLevelInfo).Info("Updating node upgrade annotation", 141 | "node", node.Name, 142 | "annotationKey", key, 143 | "annotationValue", value) 144 | 145 | defer p.nodeMutex.Lock(node.Name)() 146 | 147 | patchString := []byte(fmt.Sprintf(`{"metadata":{"annotations":{%q: %q}}}`, key, value)) 148 | if value == nullString { 149 | patchString = []byte(fmt.Sprintf(`{"metadata":{"annotations":{%q: null}}}`, key)) 150 | } 151 | patch := client.RawPatch(types.MergePatchType, patchString) 152 | err := p.K8sClient.Patch(ctx, node, patch) 153 | if err != nil { 154 | p.Log.V(consts.LogLevelError).Error(err, "Failed to patch node state annotation on a node object", 155 | "node", node, 156 | "annotationKey", key, 157 | "annotationValue", value) 158 | logEventf(p.eventRecorder, node, corev1.EventTypeWarning, GetEventReason(), 159 | "Failed to update node annotation %s=%s: %s", key, value, err.Error()) 160 | return err 161 | } 162 | 163 | // Upgrade controller is watching on a set of different resources (ClusterPolicy, NicClusterPolicy, DaemonSet, Pods) 164 | // Because of that, when a new Reconcile event is triggered, the operator cache might not have the latest changes 165 | // For example, the node object might have a different upgrade-state value even though it was just changed here. 166 | // To fix that problem, after the state of the node has successfully been changed, we poll the same node object 167 | // until its state matches the newly changed one. Get request in that case takes objects from the operator cache, 168 | // so we wait until it's synced. 169 | // That way, since only one call to reconcile at a time is allowed for upgrade controller, each new update 170 | // will have the updated node object in the cache. 171 | timeoutCtx, cancel := context.WithTimeout(ctx, time.Second*10) 172 | defer cancel() 173 | //nolint:staticcheck 174 | err = wait.PollImmediateUntil(time.Second, func() (bool, error) { 175 | p.Log.V(consts.LogLevelDebug).Info("Requesting node object to see if operator cache has updated", 176 | "node", node.Name) 177 | err := p.K8sClient.Get(timeoutCtx, types.NamespacedName{Name: node.Name}, node) 178 | if err != nil { 179 | return false, err 180 | } 181 | annotationValue, exists := node.Annotations[key] 182 | if value == nullString { 183 | // annotation key should be removed 184 | if exists { 185 | p.Log.V(consts.LogLevelDebug).Info("upgrade state annotation for node should be removed but it still exists", 186 | "node", node.Name, "annotationKey", key) 187 | return false, nil 188 | } 189 | return true, nil 190 | } 191 | if annotationValue != value { 192 | p.Log.V(consts.LogLevelDebug).Info("upgrade state annotation for node doesn't match the expected", 193 | "node", node.Name, "annotationKey", key, "expected", value, "actual", annotationValue) 194 | return false, nil 195 | } 196 | return true, nil 197 | }, timeoutCtx.Done()) 198 | 199 | if err != nil { 200 | p.Log.V(consts.LogLevelError).Error(err, "Error while waiting on node annotation update", 201 | "node", node, 202 | "annotationKey", key, 203 | "annotationValue", value) 204 | logEventf(p.eventRecorder, node, corev1.EventTypeWarning, GetEventReason(), 205 | "Failed to update node annotation to %s=%s: %s", key, value, err.Error()) 206 | } else { 207 | p.Log.V(consts.LogLevelInfo).Info("Successfully changed node upgrade state annotation", 208 | "node", node.Name, 209 | "annotationKey", key, 210 | "annotationValue", value) 211 | logEventf(p.eventRecorder, node, corev1.EventTypeNormal, GetEventReason(), 212 | "Successfully updated node annotation to %s=%s", key, value) 213 | } 214 | 215 | return err 216 | } 217 | -------------------------------------------------------------------------------- /pkg/upgrade/node_upgrade_state_provider_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade_test 18 | 19 | import ( 20 | "fmt" 21 | 22 | . "github.com/onsi/ginkgo/v2" 23 | . "github.com/onsi/gomega" 24 | corev1 "k8s.io/api/core/v1" 25 | 26 | upgrade "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 27 | ) 28 | 29 | var _ = Describe("NodeUpgradeStateProvider tests", func() { 30 | var id string 31 | var node *corev1.Node 32 | 33 | BeforeEach(func() { 34 | id = randSeq(5) 35 | node = createNode(fmt.Sprintf("node-%s", id)) 36 | }) 37 | It("NodeUpgradeStateProvider should change node upgrade state and retrieve the latest node object", func() { 38 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 39 | 40 | err := provider.ChangeNodeUpgradeState(testCtx, node, upgrade.UpgradeStateUpgradeRequired) 41 | Expect(err).To(Succeed()) 42 | 43 | node, err = provider.GetNode(testCtx, node.Name) 44 | Expect(err).To(Succeed()) 45 | Expect(node.Labels[upgrade.GetUpgradeStateLabelKey()]).To(Equal(upgrade.UpgradeStateUpgradeRequired)) 46 | }) 47 | It("NodeUpgradeStateProvider should change node upgrade annotation and retrieve the latest node object", func() { 48 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 49 | 50 | key := upgrade.GetUpgradeInitialStateAnnotationKey() 51 | err := provider.ChangeNodeUpgradeAnnotation(testCtx, node, key, "true") 52 | Expect(err).To(Succeed()) 53 | 54 | node, err = provider.GetNode(testCtx, node.Name) 55 | Expect(err).To(Succeed()) 56 | Expect(node.Annotations[key]).To(Equal("true")) 57 | }) 58 | It("NodeUpgradeStateProvider should delete node upgrade annotation and retrieve the latest node object", func() { 59 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 60 | 61 | key := upgrade.GetUpgradeInitialStateAnnotationKey() 62 | err := provider.ChangeNodeUpgradeAnnotation(testCtx, node, key, "null") 63 | Expect(err).To(Succeed()) 64 | 65 | node, err = provider.GetNode(testCtx, node.Name) 66 | Expect(err).To(Succeed()) 67 | _, exist := node.Annotations[key] 68 | Expect(exist).To(Equal(false)) 69 | }) 70 | }) 71 | -------------------------------------------------------------------------------- /pkg/upgrade/pod_manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "os" 23 | "sort" 24 | "strconv" 25 | "strings" 26 | "sync" 27 | "time" 28 | 29 | "github.com/go-logr/logr" 30 | appsv1 "k8s.io/api/apps/v1" 31 | corev1 "k8s.io/api/core/v1" 32 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 | "k8s.io/apimachinery/pkg/labels" 34 | "k8s.io/client-go/kubernetes" 35 | "k8s.io/client-go/tools/record" 36 | "k8s.io/kubectl/pkg/drain" 37 | 38 | v1alpha1 "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" 39 | "github.com/NVIDIA/k8s-operator-libs/pkg/consts" 40 | ) 41 | 42 | // PodManagerImpl implements PodManager interface and checks for pod states 43 | type PodManagerImpl struct { 44 | k8sInterface kubernetes.Interface 45 | nodeUpgradeStateProvider NodeUpgradeStateProvider 46 | podDeletionFilter PodDeletionFilter 47 | nodesInProgress *StringSet 48 | log logr.Logger 49 | eventRecorder record.EventRecorder 50 | } 51 | 52 | // PodManager is an interface that allows to wait on certain pod statuses 53 | type PodManager interface { 54 | ScheduleCheckOnPodCompletion(ctx context.Context, config *PodManagerConfig) error 55 | SchedulePodsRestart(ctx context.Context, pods []*corev1.Pod) error 56 | SchedulePodEviction(ctx context.Context, config *PodManagerConfig) error 57 | GetPodDeletionFilter() PodDeletionFilter 58 | GetPodControllerRevisionHash(pod *corev1.Pod) (string, error) 59 | GetDaemonsetControllerRevisionHash(ctx context.Context, daemonset *appsv1.DaemonSet) (string, error) 60 | } 61 | 62 | // PodManagerConfig represent the selector for pods and Node names to be considered for managing those pods 63 | type PodManagerConfig struct { 64 | Nodes []*corev1.Node 65 | DeletionSpec *v1alpha1.PodDeletionSpec 66 | WaitForCompletionSpec *v1alpha1.WaitForCompletionSpec 67 | DrainEnabled bool 68 | } 69 | 70 | const ( 71 | // PodControllerRevisionHashLabelKey is the label key containing the controller-revision-hash 72 | PodControllerRevisionHashLabelKey = "controller-revision-hash" 73 | ) 74 | 75 | // PodDeletionFilter takes a pod and returns a boolean indicating whether the pod should be deleted 76 | type PodDeletionFilter func(corev1.Pod) bool 77 | 78 | // GetPodDeletionFilter returns the PodDeletionFilter 79 | func (m *PodManagerImpl) GetPodDeletionFilter() PodDeletionFilter { 80 | return m.podDeletionFilter 81 | } 82 | 83 | // GetPodControllerRevisionHash returns the Pod Controller Revision Hash from its labels 84 | func (m *PodManagerImpl) GetPodControllerRevisionHash(pod *corev1.Pod) (string, error) { 85 | if hash, ok := pod.Labels[PodControllerRevisionHashLabelKey]; ok { 86 | return hash, nil 87 | } 88 | return "", fmt.Errorf("controller-revision-hash label not present for pod %s", pod.Name) 89 | } 90 | 91 | // GetDaemonsetControllerRevisionHash returns the latest DaemonSet Controller Revision Hash 92 | func (m *PodManagerImpl) GetDaemonsetControllerRevisionHash(ctx context.Context, 93 | daemonset *appsv1.DaemonSet) (string, error) { 94 | // get all revisions for the daemonset 95 | listOptions := meta_v1.ListOptions{LabelSelector: labels.SelectorFromSet(daemonset.Spec.Selector.MatchLabels).String()} 96 | controllerRevisionList, err := m.k8sInterface.AppsV1().ControllerRevisions(daemonset.Namespace).List(ctx, listOptions) 97 | if err != nil { 98 | return "", fmt.Errorf("error getting controller revision list for daemonset %s: %v", daemonset.Name, err) 99 | } 100 | 101 | var revisions []appsv1.ControllerRevision 102 | for _, controllerRevision := range controllerRevisionList.Items { 103 | if strings.HasPrefix(controllerRevision.Name, daemonset.Name) { 104 | revisions = append(revisions, controllerRevision) 105 | } 106 | } 107 | 108 | if len(revisions) == 0 { 109 | return "", fmt.Errorf("no revision found for daemonset %s", daemonset.Name) 110 | } 111 | 112 | // sort the revision list to make sure we obtain latest revision always 113 | sort.Slice(revisions, func(i, j int) bool { return revisions[i].Revision < revisions[j].Revision }) 114 | 115 | currentRevision := revisions[len(revisions)-1] 116 | hash := strings.TrimPrefix(currentRevision.Name, fmt.Sprintf("%s-", daemonset.Name)) 117 | return hash, nil 118 | } 119 | 120 | // SchedulePodEviction receives a config for pod eviction and deletes pods for each node in the list. 121 | // The set of pods to delete is determined by a filter that is provided to the PodManagerImpl during construction. 122 | func (m *PodManagerImpl) SchedulePodEviction(ctx context.Context, config *PodManagerConfig) error { 123 | m.log.V(consts.LogLevelInfo).Info("Starting Pod Deletion") 124 | 125 | if len(config.Nodes) == 0 { 126 | m.log.V(consts.LogLevelInfo).Info("No nodes scheduled for pod deletion") 127 | return nil 128 | } 129 | 130 | podDeletionSpec := config.DeletionSpec 131 | 132 | if podDeletionSpec == nil { 133 | return fmt.Errorf("pod deletion spec should not be empty") 134 | } 135 | 136 | // Create a custom drain filter which will be passed to the drain helper. 137 | // The drain helper will carry out the actual deletion of pods on a node. 138 | customDrainFilter := func(pod corev1.Pod) drain.PodDeleteStatus { 139 | deleteFunc := m.podDeletionFilter(pod) 140 | if !deleteFunc { 141 | return drain.MakePodDeleteStatusSkip() 142 | } 143 | return drain.MakePodDeleteStatusOkay() 144 | } 145 | 146 | drainHelper := drain.Helper{ 147 | Ctx: ctx, 148 | Client: m.k8sInterface, 149 | Out: os.Stdout, 150 | ErrOut: os.Stderr, 151 | GracePeriodSeconds: -1, 152 | IgnoreAllDaemonSets: true, 153 | DeleteEmptyDirData: podDeletionSpec.DeleteEmptyDir, 154 | Force: podDeletionSpec.Force, 155 | Timeout: time.Duration(podDeletionSpec.TimeoutSecond) * time.Second, 156 | AdditionalFilters: []drain.PodFilter{customDrainFilter}, 157 | } 158 | 159 | for _, node := range config.Nodes { 160 | if !m.nodesInProgress.Has(node.Name) { 161 | m.log.V(consts.LogLevelInfo).Info("Deleting pods on node", "node", node.Name) 162 | m.nodesInProgress.Add(node.Name) 163 | 164 | go func(node corev1.Node) { 165 | defer m.nodesInProgress.Remove(node.Name) 166 | 167 | m.log.V(consts.LogLevelInfo).Info("Identifying pods to delete", "node", node.Name) 168 | 169 | // List all pods 170 | podList, err := m.ListPods(ctx, "", node.Name) 171 | if err != nil { 172 | m.log.V(consts.LogLevelError).Error(err, "Failed to list pods", "node", node.Name) 173 | return 174 | } 175 | 176 | // Get number of pods requiring deletion using the podDeletionFilter 177 | numPodsToDelete := 0 178 | for _, pod := range podList.Items { 179 | if m.podDeletionFilter(pod) { 180 | numPodsToDelete++ 181 | } 182 | } 183 | 184 | if numPodsToDelete == 0 { 185 | m.log.V(consts.LogLevelInfo).Info("No pods require deletion", "node", node.Name) 186 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, &node, UpgradeStatePodRestartRequired) 187 | return 188 | } 189 | 190 | m.log.V(consts.LogLevelInfo).Info("Identifying which pods can be deleted", "node", node.Name) 191 | podDeleteList, errs := drainHelper.GetPodsForDeletion(node.Name) 192 | 193 | numPodsCanDelete := len(podDeleteList.Pods()) 194 | if numPodsCanDelete != numPodsToDelete { 195 | m.log.V(consts.LogLevelError).Error(nil, "Cannot delete all required pods", "node", node.Name) 196 | for _, err := range errs { 197 | m.log.V(consts.LogLevelError).Error(err, "Error reported by drain helper", "node", node.Name) 198 | } 199 | m.updateNodeToDrainOrFailed(ctx, node, config.DrainEnabled) 200 | return 201 | } 202 | 203 | for _, p := range podDeleteList.Pods() { 204 | m.log.V(consts.LogLevelInfo).Info("Identified pod to delete", "node", node.Name, 205 | "namespace", p.Namespace, "name", p.Name) 206 | } 207 | m.log.V(consts.LogLevelDebug).Info("Warnings when identifying pods to delete", 208 | "warnings", podDeleteList.Warnings(), "node", node.Name) 209 | 210 | err = drainHelper.DeleteOrEvictPods(podDeleteList.Pods()) 211 | if err != nil { 212 | m.log.V(consts.LogLevelError).Error(err, "Failed to delete pods on the node", "node", node.Name) 213 | logEventf(m.eventRecorder, &node, corev1.EventTypeWarning, GetEventReason(), 214 | "Failed to delete workload pods on the node for the driver upgrade, %s", err.Error()) 215 | m.updateNodeToDrainOrFailed(ctx, node, config.DrainEnabled) 216 | return 217 | } 218 | 219 | m.log.V(consts.LogLevelInfo).Info("Deleted pods on the node", "node", node.Name) 220 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, &node, UpgradeStatePodRestartRequired) 221 | logEvent(m.eventRecorder, &node, corev1.EventTypeNormal, GetEventReason(), 222 | "Deleted workload pods on the node for the driver upgrade") 223 | }(*node) 224 | } else { 225 | m.log.V(consts.LogLevelInfo).Info("Node is already getting pods deleted, skipping", "node", node.Name) 226 | } 227 | } 228 | return nil 229 | } 230 | 231 | // SchedulePodsRestart receives a list of pods and schedules to delete them 232 | // TODO, schedule deletion of pods in parallel on all nodes 233 | func (m *PodManagerImpl) SchedulePodsRestart(ctx context.Context, pods []*corev1.Pod) error { 234 | m.log.V(consts.LogLevelInfo).Info("Starting Pod Delete") 235 | if len(pods) == 0 { 236 | m.log.V(consts.LogLevelInfo).Info("No pods scheduled to restart") 237 | return nil 238 | } 239 | for _, pod := range pods { 240 | m.log.V(consts.LogLevelInfo).Info("Deleting pod", "pod", pod.Name) 241 | deleteOptions := meta_v1.DeleteOptions{} 242 | err := m.k8sInterface.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, deleteOptions) 243 | if err != nil { 244 | m.log.V(consts.LogLevelInfo).Error(err, "Failed to delete pod", "pod", pod.Name) 245 | logEventf(m.eventRecorder, pod, corev1.EventTypeWarning, GetEventReason(), 246 | "Failed to restart driver pod %s", err.Error()) 247 | return err 248 | } 249 | } 250 | return nil 251 | } 252 | 253 | // ScheduleCheckOnPodCompletion receives PodSelectorConfig and schedules checks for pod statuses on each node in the 254 | // list. If the checks are successful, the node moves to UpgradeStatePodDeletionRequired state, 255 | // otherwise it will stay in the same current state. 256 | func (m *PodManagerImpl) ScheduleCheckOnPodCompletion(ctx context.Context, config *PodManagerConfig) error { 257 | m.log.V(consts.LogLevelInfo).Info("Pod Manager, starting checks on pod statuses") 258 | var wg sync.WaitGroup 259 | 260 | for _, node := range config.Nodes { 261 | m.log.V(consts.LogLevelInfo).Info("Schedule checks for pod completion", "node", node.Name) 262 | // fetch the pods using the label selector provided 263 | podList, err := m.ListPods(ctx, config.WaitForCompletionSpec.PodSelector, node.Name) 264 | if err != nil { 265 | m.log.V(consts.LogLevelError).Error(err, "Failed to list pods", 266 | "selector", config.WaitForCompletionSpec.PodSelector, "node", node.Name) 267 | return err 268 | } 269 | if len(podList.Items) > 0 { 270 | m.log.V(consts.LogLevelDebug).Error(err, "Found workload pods", 271 | "selector", config.WaitForCompletionSpec.PodSelector, "node", node.Name, "pods", len(podList.Items)) 272 | } 273 | // Increment the WaitGroup counter. 274 | wg.Add(1) 275 | go func(node corev1.Node) { 276 | // Decrement the counter when the goroutine completes. 277 | defer wg.Done() 278 | running := false 279 | for _, pod := range podList.Items { 280 | running = m.IsPodRunningOrPending(pod) 281 | if running { 282 | break 283 | } 284 | } 285 | // if workload pods are running, then check if timeout is specified and exceeded. 286 | // if no timeout is specified, then ignore the state updates and wait for completions. 287 | if running { 288 | m.log.V(consts.LogLevelInfo).Info("Workload pods are still running on the node", "node", node.Name) 289 | // check whether timeout is provided and is exceeded for job completions 290 | if config.WaitForCompletionSpec.TimeoutSecond != 0 { 291 | err = m.HandleTimeoutOnPodCompletions(ctx, &node, int64(config.WaitForCompletionSpec.TimeoutSecond)) 292 | if err != nil { 293 | logEventf(m.eventRecorder, &node, corev1.EventTypeWarning, GetEventReason(), 294 | "Failed to handle timeout for job completions, %s", err.Error()) 295 | return 296 | } 297 | } 298 | return 299 | } 300 | // remove annotation used for tracking start time 301 | annotationKey := GetWaitForPodCompletionStartTimeAnnotationKey() 302 | err = m.nodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, &node, annotationKey, "null") 303 | if err != nil { 304 | logEventf(m.eventRecorder, &node, corev1.EventTypeWarning, GetEventReason(), 305 | "Failed to remove annotation used to track job completions: %s", err.Error()) 306 | return 307 | } 308 | // update node state 309 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, &node, UpgradeStatePodDeletionRequired) 310 | m.log.V(consts.LogLevelInfo).Info("Updated the node state", "node", node.Name, 311 | "state", UpgradeStatePodDeletionRequired) 312 | }(*node) 313 | } 314 | // Wait for all goroutines to complete 315 | wg.Wait() 316 | return nil 317 | } 318 | 319 | // ListPods returns the list of pods in all namespaces with the given selector 320 | func (m *PodManagerImpl) ListPods(ctx context.Context, selector string, nodeName string) (*corev1.PodList, error) { 321 | listOptions := meta_v1.ListOptions{LabelSelector: selector, 322 | FieldSelector: fmt.Sprintf(nodeNameFieldSelectorFmt, nodeName)} 323 | podList, err := m.k8sInterface.CoreV1().Pods("").List(ctx, listOptions) 324 | if err != nil { 325 | return nil, err 326 | } 327 | return podList, nil 328 | } 329 | 330 | // HandleTimeoutOnPodCompletions transitions node based on the timeout for job completions on the node 331 | func (m *PodManagerImpl) HandleTimeoutOnPodCompletions(ctx context.Context, node *corev1.Node, 332 | timeoutSeconds int64) error { 333 | annotationKey := GetWaitForPodCompletionStartTimeAnnotationKey() 334 | currentTime := time.Now().Unix() 335 | // check if annotation already exists for tracking start time 336 | if _, present := node.Annotations[annotationKey]; !present { 337 | // add the annotation to track start time 338 | err := m.nodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, node, annotationKey, 339 | strconv.FormatInt(currentTime, 10)) 340 | if err != nil { 341 | m.log.V(consts.LogLevelError).Error(err, "Failed to add annotation to track job completions", 342 | "node", node.Name, "annotation", annotationKey) 343 | return err 344 | } 345 | return nil 346 | } 347 | // check if timeout reached 348 | startTime, err := strconv.ParseInt(node.Annotations[annotationKey], 10, 64) 349 | if err != nil { 350 | m.log.V(consts.LogLevelError).Error(err, "Failed to convert start time to track job completions", 351 | "node", node.Name) 352 | return err 353 | } 354 | if currentTime > startTime+timeoutSeconds { 355 | // timeout exceeded, mark node for pod/job deletions 356 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, node, UpgradeStatePodDeletionRequired) 357 | m.log.V(consts.LogLevelInfo).Info("Timeout exceeded for job completions, updated the node state", 358 | "node", node.Name, "state", UpgradeStatePodDeletionRequired) 359 | // remove annotation used for tracking start time 360 | err = m.nodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, node, annotationKey, "null") 361 | if err != nil { 362 | m.log.V(consts.LogLevelError).Error(err, "Failed to remove annotation used to track job completions", 363 | "node", node.Name, "annotation", annotationKey) 364 | return err 365 | } 366 | } 367 | return nil 368 | } 369 | 370 | // IsPodRunningOrPending returns true when the given pod is currently in Running or Pending state 371 | func (m *PodManagerImpl) IsPodRunningOrPending(pod corev1.Pod) bool { 372 | switch pod.Status.Phase { 373 | case corev1.PodRunning: 374 | m.log.V(consts.LogLevelDebug).Info("Pod status", "pod", pod.Name, "node", pod.Spec.NodeName, 375 | "state", corev1.PodRunning) 376 | return true 377 | case corev1.PodPending: 378 | m.log.V(consts.LogLevelInfo).Info("Pod status", "pod", pod.Name, "node", pod.Spec.NodeName, 379 | "state", corev1.PodPending) 380 | return true 381 | case corev1.PodFailed: 382 | m.log.V(consts.LogLevelInfo).Info("Pod status", "pod", pod.Name, "node", pod.Spec.NodeName, 383 | "state", corev1.PodFailed) 384 | return false 385 | case corev1.PodSucceeded: 386 | m.log.V(consts.LogLevelInfo).Info("Pod status", "pod", pod.Name, "node", pod.Spec.NodeName, 387 | "state", corev1.PodSucceeded) 388 | return false 389 | } 390 | return false 391 | } 392 | 393 | func (m *PodManagerImpl) updateNodeToDrainOrFailed(ctx context.Context, node corev1.Node, drainEnabled bool) { 394 | nextState := UpgradeStateFailed 395 | if drainEnabled { 396 | m.log.V(consts.LogLevelInfo).Info("Pod deletion failed but drain is enabled in spec. Will attempt a node drain", 397 | "node", node.Name) 398 | logEvent(m.eventRecorder, &node, corev1.EventTypeWarning, GetEventReason(), 399 | "Pod deletion failed but drain is enabled in spec. Will attempt a node drain") 400 | nextState = UpgradeStateDrainRequired 401 | } 402 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, &node, nextState) 403 | } 404 | 405 | // NewPodManager returns an instance of PodManager implementation 406 | func NewPodManager( 407 | k8sInterface kubernetes.Interface, 408 | nodeUpgradeStateProvider NodeUpgradeStateProvider, 409 | log logr.Logger, 410 | podDeletionFilter PodDeletionFilter, 411 | eventRecorder record.EventRecorder) *PodManagerImpl { 412 | mgr := &PodManagerImpl{ 413 | k8sInterface: k8sInterface, 414 | log: log, 415 | nodeUpgradeStateProvider: nodeUpgradeStateProvider, 416 | podDeletionFilter: podDeletionFilter, 417 | nodesInProgress: NewStringSet(), 418 | eventRecorder: eventRecorder, 419 | } 420 | 421 | return mgr 422 | } 423 | -------------------------------------------------------------------------------- /pkg/upgrade/safe_driver_load_manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade 18 | 19 | import ( 20 | "context" 21 | 22 | "github.com/go-logr/logr" 23 | corev1 "k8s.io/api/core/v1" 24 | 25 | "github.com/NVIDIA/k8s-operator-libs/pkg/consts" 26 | ) 27 | 28 | // SafeDriverLoadManagerImpl default implementation of the SafeDriverLoadManager interface 29 | // Support for safe driver loading is implemented as a part of the upgrade flow. 30 | // When UpgradeStateManager detects a node that is waiting for a safe driver load, 31 | // it will unconditionally transfer it to the UpgradeStateUpgradeRequired state and wait for Cordon 32 | // and Drain operations to complete according to the upgrade policy. 33 | // When the Pod is eventually in the UpgradeStatePodRestartRequired state, 34 | // the UpgradeStateManager will unblock the driver loading (by removing the safe driver load annotation) 35 | // instead of restarting the Pod. 36 | // The default implementation of the SafeDriverLoadManager interface assumes that the driver's safe load 37 | // mechanism is implemented as a two-step procedure. 38 | // As a first step, the driver pod should load the init container, 39 | // which will set "safe driver load annotation" (defined in UpgradeWaitForSafeDriverLoadAnnotationKeyFmt) 40 | // on the node object, then the container blocks until another entity removes the annotation from the node object. 41 | // When the init container completes successfully (when the annotation was removed from the Node object), 42 | // the driver Pod will proceed to the second step and do the driver loading. 43 | // After that, the UpgradeStateManager will wait for the driver to become ready and then Uncordon the node if required. 44 | type SafeDriverLoadManagerImpl struct { 45 | nodeUpgradeStateProvider NodeUpgradeStateProvider 46 | log logr.Logger 47 | } 48 | 49 | // IsWaitingForSafeDriverLoad checks if driver Pod on the node is waiting for a safe load. 50 | // The check is implemented by check that "safe driver loading annotation" is set on the Node object 51 | func (s *SafeDriverLoadManagerImpl) IsWaitingForSafeDriverLoad(_ context.Context, node *corev1.Node) (bool, error) { 52 | return node.Annotations[GetUpgradeDriverWaitForSafeLoadAnnotationKey()] != "", nil 53 | } 54 | 55 | // UnblockLoading unblocks driver loading on the node by remove "safe driver loading annotation" 56 | // from the Node object 57 | func (s *SafeDriverLoadManagerImpl) UnblockLoading(ctx context.Context, node *corev1.Node) error { 58 | annotationKey := GetUpgradeDriverWaitForSafeLoadAnnotationKey() 59 | if node.Annotations[annotationKey] == "" { 60 | return nil 61 | } 62 | // driver on the node is waiting for safe load, unblock loading 63 | err := s.nodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, node, annotationKey, "null") 64 | if err != nil { 65 | s.log.V(consts.LogLevelError).Error( 66 | err, "Failed to change node upgrade annotation for node", "node", 67 | node, "annotation", annotationKey) 68 | return err 69 | } 70 | return nil 71 | } 72 | 73 | // SafeDriverLoadManager interface defines handlers to interact with drivers that are waiting for a safe load 74 | type SafeDriverLoadManager interface { 75 | // IsWaitingForSafeDriverLoad checks if driver Pod on the node is waiting for a safe load 76 | IsWaitingForSafeDriverLoad(ctx context.Context, node *corev1.Node) (bool, error) 77 | // UnblockLoading unblocks driver loading on the node 78 | UnblockLoading(ctx context.Context, node *corev1.Node) error 79 | } 80 | 81 | // NewSafeDriverLoadManager returns an instance of SafeDriverLoadManager implementation 82 | func NewSafeDriverLoadManager( 83 | nodeUpgradeStateProvider NodeUpgradeStateProvider, log logr.Logger) *SafeDriverLoadManagerImpl { 84 | mgr := &SafeDriverLoadManagerImpl{ 85 | log: log, 86 | nodeUpgradeStateProvider: nodeUpgradeStateProvider, 87 | } 88 | return mgr 89 | } 90 | -------------------------------------------------------------------------------- /pkg/upgrade/safe_driver_load_manager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade_test 18 | 19 | import ( 20 | "fmt" 21 | 22 | "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 23 | . "github.com/onsi/ginkgo/v2" 24 | . "github.com/onsi/gomega" 25 | "k8s.io/apimachinery/pkg/types" 26 | "sigs.k8s.io/controller-runtime/pkg/client" 27 | 28 | corev1 "k8s.io/api/core/v1" 29 | ) 30 | 31 | var _ = Describe("SafeDriverLoadManager", func() { 32 | var ( 33 | node *corev1.Node 34 | id string 35 | mgr upgrade.SafeDriverLoadManager 36 | ) 37 | BeforeEach(func() { 38 | // generate random id for test 39 | id = randSeq(5) 40 | // create k8s objects 41 | node = createNode(fmt.Sprintf("node-%s", id)) 42 | mgr = upgrade.NewSafeDriverLoadManager(upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder), log) 43 | }) 44 | It("IsWaitingForSafeDriverLoad", func() { 45 | annotationKey := upgrade.GetUpgradeDriverWaitForSafeLoadAnnotationKey() 46 | Expect(k8sClient.Patch( 47 | testCtx, node, client.RawPatch(types.StrategicMergePatchType, 48 | []byte(fmt.Sprintf(`{"metadata":{"annotations":{%q: "true"}}}`, 49 | annotationKey))))).NotTo(HaveOccurred()) 50 | Expect(k8sClient.Get(testCtx, types.NamespacedName{Name: node.Name}, node)).NotTo(HaveOccurred()) 51 | Expect(mgr.IsWaitingForSafeDriverLoad(testCtx, node)).To(BeTrue()) 52 | Expect(k8sClient.Patch( 53 | testCtx, node, client.RawPatch(types.StrategicMergePatchType, 54 | []byte(fmt.Sprintf(`{"metadata":{"annotations":{%q: null}}}`, 55 | annotationKey))))).NotTo(HaveOccurred()) 56 | Expect(k8sClient.Get(testCtx, types.NamespacedName{Name: node.Name}, node)).NotTo(HaveOccurred()) 57 | Expect(mgr.IsWaitingForSafeDriverLoad(testCtx, node)).To(BeFalse()) 58 | }) 59 | It("UnblockLoading", func() { 60 | annotationKey := upgrade.GetUpgradeDriverWaitForSafeLoadAnnotationKey() 61 | Expect(k8sClient.Patch( 62 | testCtx, node, client.RawPatch(types.StrategicMergePatchType, 63 | []byte(fmt.Sprintf(`{"metadata":{"annotations":{%q: "true"}}}`, 64 | annotationKey))))).NotTo(HaveOccurred()) 65 | Expect(mgr.UnblockLoading(testCtx, node)).NotTo(HaveOccurred()) 66 | Expect(k8sClient.Get(testCtx, types.NamespacedName{Name: node.Name}, node)).NotTo(HaveOccurred()) 67 | Expect(node.Annotations[annotationKey]).To(BeEmpty()) 68 | // should not fail when called on non blocked node 69 | Expect(mgr.UnblockLoading(testCtx, node)).NotTo(HaveOccurred()) 70 | }) 71 | }) 72 | -------------------------------------------------------------------------------- /pkg/upgrade/upgrade_inplace.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade 18 | 19 | import ( 20 | "context" 21 | 22 | "k8s.io/apimachinery/pkg/util/intstr" 23 | 24 | "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" 25 | "github.com/NVIDIA/k8s-operator-libs/pkg/consts" 26 | ) 27 | 28 | // InplaceNodeStateManagerImpl contains concrete implementations for distinct inplace upgrade mode 29 | type InplaceNodeStateManagerImpl struct { 30 | *CommonUpgradeManagerImpl 31 | } 32 | 33 | // NewClusterUpgradeStateManager creates a new instance of InplaceNodeStateManagerImpl 34 | func NewInplaceNodeStateManagerImpl(commonmanager *CommonUpgradeManagerImpl) (ProcessNodeStateManager, 35 | error) { 36 | manager := &InplaceNodeStateManagerImpl{ 37 | CommonUpgradeManagerImpl: commonmanager, 38 | } 39 | return manager, nil 40 | } 41 | 42 | // ProcessUpgradeRequiredNodes processes UpgradeStateUpgradeRequired nodes and moves them to UpgradeStateCordonRequired 43 | // until the limit on max parallel upgrades is reached. 44 | func (m *InplaceNodeStateManagerImpl) ProcessUpgradeRequiredNodes( 45 | ctx context.Context, currentClusterState *ClusterUpgradeState, 46 | upgradePolicy *v1alpha1.DriverUpgradePolicySpec) error { 47 | var err error 48 | 49 | totalNodes := m.GetTotalManagedNodes(currentClusterState) 50 | upgradesInProgress := m.GetUpgradesInProgress(currentClusterState) 51 | currentUnavailableNodes := m.GetCurrentUnavailableNodes(currentClusterState) 52 | maxUnavailable := totalNodes 53 | 54 | if upgradePolicy.MaxUnavailable != nil { 55 | maxUnavailable, err = intstr.GetScaledValueFromIntOrPercent(upgradePolicy.MaxUnavailable, totalNodes, true) 56 | if err != nil { 57 | m.Log.V(consts.LogLevelError).Error(err, "Failed to compute maxUnavailable from the current total nodes") 58 | return err 59 | } 60 | } 61 | upgradesAvailable := m.GetUpgradesAvailable(currentClusterState, upgradePolicy.MaxParallelUpgrades, 62 | maxUnavailable) 63 | m.Log.V(consts.LogLevelInfo).Info("Upgrades in progress", 64 | "currently in progress", upgradesInProgress, 65 | "max parallel upgrades", upgradePolicy.MaxParallelUpgrades, 66 | "upgrade slots available", upgradesAvailable, 67 | "currently unavailable nodes", currentUnavailableNodes, 68 | "total number of nodes", totalNodes, 69 | "maximum nodes that can be unavailable", maxUnavailable) 70 | 71 | for _, nodeState := range currentClusterState.NodeStates[UpgradeStateUpgradeRequired] { 72 | if m.IsUpgradeRequested(nodeState.Node) { 73 | // Make sure to remove the upgrade-requested annotation 74 | err := m.NodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, nodeState.Node, 75 | GetUpgradeRequestedAnnotationKey(), "null") 76 | if err != nil { 77 | m.Log.V(consts.LogLevelError).Error( 78 | err, "Failed to delete node upgrade-requested annotation") 79 | return err 80 | } 81 | } 82 | if m.SkipNodeUpgrade(nodeState.Node) { 83 | m.Log.V(consts.LogLevelInfo).Info("Node is marked for skipping upgrades", "node", nodeState.Node.Name) 84 | continue 85 | } 86 | 87 | if upgradesAvailable <= 0 { 88 | // when no new node upgrades are available, progess with manually cordoned nodes 89 | if m.IsNodeUnschedulable(nodeState.Node) { 90 | m.Log.V(consts.LogLevelDebug).Info("Node is already cordoned, progressing for driver upgrade", 91 | "node", nodeState.Node.Name) 92 | } else { 93 | m.Log.V(consts.LogLevelDebug).Info("Node upgrade limit reached, pausing further upgrades", 94 | "node", nodeState.Node.Name) 95 | continue 96 | } 97 | } 98 | 99 | err := m.NodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, nodeState.Node, UpgradeStateCordonRequired) 100 | if err == nil { 101 | upgradesAvailable-- 102 | m.Log.V(consts.LogLevelInfo).Info("Node waiting for cordon", 103 | "node", nodeState.Node.Name) 104 | } else { 105 | m.Log.V(consts.LogLevelError).Error( 106 | err, "Failed to change node upgrade state", "state", UpgradeStateCordonRequired) 107 | return err 108 | } 109 | } 110 | 111 | return nil 112 | } 113 | 114 | // ProcessNodeMaintenanceRequiredNodes is a used to satisfy ProcessNodeStateManager interface 115 | func (m *InplaceNodeStateManagerImpl) ProcessNodeMaintenanceRequiredNodes(ctx context.Context, 116 | currentClusterState *ClusterUpgradeState) error { 117 | _ = ctx 118 | _ = currentClusterState 119 | return nil 120 | } 121 | 122 | // ProcessUncordonRequiredNodes processes UpgradeStateUncordonRequired nodes, 123 | // uncordons them and moves them to UpgradeStateDone state 124 | func (m *InplaceNodeStateManagerImpl) ProcessUncordonRequiredNodes( 125 | ctx context.Context, currentClusterState *ClusterUpgradeState) error { 126 | m.Log.V(consts.LogLevelInfo).Info("ProcessUncordonRequiredNodes") 127 | 128 | for _, nodeState := range currentClusterState.NodeStates[UpgradeStateUncordonRequired] { 129 | // skip in case node had undergone uncordon by maintenance operator 130 | if nodeState.NodeMaintenance != nil { 131 | continue 132 | } 133 | err := m.CordonManager.Uncordon(ctx, nodeState.Node) 134 | if err != nil { 135 | m.Log.V(consts.LogLevelWarning).Error( 136 | err, "Node uncordon failed", "node", nodeState.Node) 137 | return err 138 | } 139 | err = m.NodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, nodeState.Node, UpgradeStateDone) 140 | if err != nil { 141 | m.Log.V(consts.LogLevelError).Error( 142 | err, "Failed to change node upgrade state", "state", UpgradeStateDone) 143 | return err 144 | } 145 | } 146 | return nil 147 | } 148 | -------------------------------------------------------------------------------- /pkg/upgrade/upgrade_state.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | 23 | "github.com/go-logr/logr" 24 | appsv1 "k8s.io/api/apps/v1" 25 | corev1 "k8s.io/api/core/v1" 26 | "k8s.io/client-go/rest" 27 | "k8s.io/client-go/tools/record" 28 | "sigs.k8s.io/controller-runtime/pkg/client" 29 | 30 | "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" 31 | "github.com/NVIDIA/k8s-operator-libs/pkg/consts" 32 | ) 33 | 34 | // ClusterUpgradeStateManager is an interface for performing cluster upgrades of driver containers 35 | type ClusterUpgradeStateManager interface { 36 | CommonUpgradeStateManager 37 | // WithPodDeletionEnabled provides an option to enable the optional 'pod-deletion' 38 | // state and pass a custom PodDeletionFilter to use 39 | WithPodDeletionEnabled(filter PodDeletionFilter) ClusterUpgradeStateManager 40 | // WithValidationEnabled provides an option to enable the optional 'validation' state 41 | // and pass a podSelector to specify which pods are performing the validation 42 | WithValidationEnabled(podSelector string) ClusterUpgradeStateManager 43 | // BuildState builds a point-in-time snapshot of the driver upgrade state in the cluster. 44 | BuildState(ctx context.Context, namespace string, 45 | driverLabels map[string]string) (*ClusterUpgradeState, error) 46 | // ApplyState receives a complete cluster upgrade state and, based on upgrade policy, processes each node's state. 47 | // Based on the current state of the node, it is calculated if the node can be moved to the next state right now 48 | // or whether any actions need to be scheduled for the node to move to the next state. 49 | // The function is stateless and idempotent. If the error was returned before all nodes' states were processed, 50 | // ApplyState would be called again and complete the processing - all the decisions are based on the input data. 51 | ApplyState(ctx context.Context, 52 | currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.DriverUpgradePolicySpec) (err error) 53 | } 54 | 55 | // ClusterUpgradeStateManagerImpl serves as a state machine for the ClusterUpgradeState 56 | // It processes each node and based on its state schedules the required jobs to change their state to the next one 57 | type ClusterUpgradeStateManagerImpl struct { 58 | *CommonUpgradeManagerImpl 59 | inplace ProcessNodeStateManager 60 | requestor ProcessNodeStateManager 61 | opts StateOptions 62 | } 63 | 64 | // NewClusterUpgradeStateManager creates a new instance of ClusterUpgradeStateManagerImpl 65 | func NewClusterUpgradeStateManager( 66 | log logr.Logger, 67 | k8sConfig *rest.Config, 68 | eventRecorder record.EventRecorder, 69 | opts StateOptions) (ClusterUpgradeStateManager, error) { 70 | commonmanager, err := NewCommonUpgradeStateManager(log, k8sConfig, Scheme, eventRecorder) 71 | if err != nil { 72 | return nil, fmt.Errorf("failed to create commonmanager upgrade state manager. %v", err) 73 | } 74 | requestor, err := NewRequestorNodeStateManagerImpl(commonmanager, opts.Requestor) 75 | if err != nil && err != ErrNodeMaintenanceUpgradeDisabled { 76 | return nil, fmt.Errorf("failed to create requestor upgrade state manager. %v", err) 77 | } 78 | 79 | inplace, err := NewInplaceNodeStateManagerImpl(commonmanager) 80 | if err != nil { 81 | return nil, fmt.Errorf("failed to create inplace upgrade state manager. %v", err) 82 | } 83 | 84 | manager := &ClusterUpgradeStateManagerImpl{ 85 | CommonUpgradeManagerImpl: commonmanager, 86 | requestor: requestor, 87 | inplace: inplace, 88 | opts: opts, 89 | } 90 | 91 | return manager, nil 92 | } 93 | 94 | type StateOptions struct { 95 | Requestor RequestorOptions 96 | } 97 | 98 | // BuildState builds a point-in-time snapshot of the driver upgrade state in the cluster. 99 | func (m *ClusterUpgradeStateManagerImpl) BuildState(ctx context.Context, namespace string, 100 | driverLabels map[string]string) (*ClusterUpgradeState, error) { 101 | m.Log.V(consts.LogLevelInfo).Info("Building state") 102 | 103 | upgradeState := NewClusterUpgradeState() 104 | 105 | daemonSets, err := m.GetDriverDaemonSets(ctx, namespace, driverLabels) 106 | if err != nil { 107 | m.Log.V(consts.LogLevelError).Error(err, "Failed to get driver DaemonSet list") 108 | return nil, err 109 | } 110 | 111 | m.Log.V(consts.LogLevelDebug).Info("Got driver DaemonSets", "length", len(daemonSets)) 112 | 113 | // Get list of driver pods 114 | podList := &corev1.PodList{} 115 | 116 | err = m.K8sClient.List(ctx, podList, 117 | client.InNamespace(namespace), 118 | client.MatchingLabels(driverLabels), 119 | ) 120 | 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | filteredPodList := []corev1.Pod{} 126 | for _, ds := range daemonSets { 127 | dsPods := m.GetPodsOwnedbyDs(ds, podList.Items) 128 | if int(ds.Status.DesiredNumberScheduled) != len(dsPods) { 129 | m.Log.V(consts.LogLevelInfo).Info("Driver DaemonSet has Unscheduled pods", "name", ds.Name) 130 | return nil, fmt.Errorf("driver DaemonSet should not have Unscheduled pods") 131 | } 132 | filteredPodList = append(filteredPodList, dsPods...) 133 | } 134 | 135 | // Collect also orphaned driver pods 136 | filteredPodList = append(filteredPodList, m.GetOrphanedPods(podList.Items)...) 137 | 138 | upgradeStateLabel := GetUpgradeStateLabelKey() 139 | 140 | for i := range filteredPodList { 141 | pod := &filteredPodList[i] 142 | var ownerDaemonSet *appsv1.DaemonSet 143 | if IsOrphanedPod(pod) { 144 | ownerDaemonSet = nil 145 | } else { 146 | ownerDaemonSet = daemonSets[pod.OwnerReferences[0].UID] 147 | } 148 | // Check if pod is already scheduled to a Node 149 | if pod.Spec.NodeName == "" && pod.Status.Phase == corev1.PodPending { 150 | m.Log.V(consts.LogLevelInfo).Info("Driver Pod has no NodeName, skipping", "pod", pod.Name) 151 | continue 152 | } 153 | nodeState, err := m.buildNodeUpgradeState(ctx, pod, ownerDaemonSet) 154 | if err != nil { 155 | m.Log.V(consts.LogLevelError).Error(err, "Failed to build node upgrade state for pod", "pod", pod) 156 | return nil, err 157 | } 158 | nodeStateLabel := nodeState.Node.Labels[upgradeStateLabel] 159 | upgradeState.NodeStates[nodeStateLabel] = append( 160 | upgradeState.NodeStates[nodeStateLabel], nodeState) 161 | } 162 | 163 | return &upgradeState, nil 164 | } 165 | 166 | // ApplyState receives a complete cluster upgrade state and, based on upgrade policy, processes each node's state. 167 | // Based on the current state of the node, it is calculated if the node can be moved to the next state right now 168 | // or whether any actions need to be scheduled for the node to move to the next state. 169 | // The function is stateless and idempotent. If the error was returned before all nodes' states were processed, 170 | // ApplyState would be called again and complete the processing - all the decisions are based on the input data. 171 | func (m *ClusterUpgradeStateManagerImpl) ApplyState(ctx context.Context, 172 | currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.DriverUpgradePolicySpec) (err error) { 173 | m.Log.V(consts.LogLevelInfo).Info("State Manager, got state update") 174 | 175 | if currentState == nil { 176 | return fmt.Errorf("currentState should not be empty") 177 | } 178 | 179 | if upgradePolicy == nil || !upgradePolicy.AutoUpgrade { 180 | m.Log.V(consts.LogLevelInfo).Info("Driver auto upgrade is disabled, skipping") 181 | return nil 182 | } 183 | 184 | m.Log.V(consts.LogLevelInfo).Info("Node states:", 185 | "Unknown", len(currentState.NodeStates[UpgradeStateUnknown]), 186 | UpgradeStateDone, len(currentState.NodeStates[UpgradeStateDone]), 187 | UpgradeStateUpgradeRequired, len(currentState.NodeStates[UpgradeStateUpgradeRequired]), 188 | UpgradeStateCordonRequired, len(currentState.NodeStates[UpgradeStateCordonRequired]), 189 | UpgradeStateWaitForJobsRequired, len(currentState.NodeStates[UpgradeStateWaitForJobsRequired]), 190 | UpgradeStatePodDeletionRequired, len(currentState.NodeStates[UpgradeStatePodDeletionRequired]), 191 | UpgradeStateFailed, len(currentState.NodeStates[UpgradeStateFailed]), 192 | UpgradeStateDrainRequired, len(currentState.NodeStates[UpgradeStateDrainRequired]), 193 | UpgradeStateNodeMaintenanceRequired, len(currentState.NodeStates[UpgradeStateNodeMaintenanceRequired]), 194 | UpgradeStatePostMaintenanceRequired, len(currentState.NodeStates[UpgradeStatePostMaintenanceRequired]), 195 | UpgradeStatePodRestartRequired, len(currentState.NodeStates[UpgradeStatePodRestartRequired]), 196 | UpgradeStateValidationRequired, len(currentState.NodeStates[UpgradeStateValidationRequired]), 197 | UpgradeStateUncordonRequired, len(currentState.NodeStates[UpgradeStateUncordonRequired])) 198 | 199 | // Determine the object to log this event 200 | // m.EventRecorder.Eventf(m.Namespace, v1.EventTypeNormal, GetEventReason(), 201 | // "InProgress: %d, MaxParallelUpgrades: %d, UpgradeSlotsAvailable: %s", upgradesInProgress, 202 | // upgradePolicy.MaxParallelUpgrades, upgradesAvailable) 203 | 204 | // First, check if unknown or ready nodes need to be upgraded 205 | err = m.ProcessDoneOrUnknownNodes(ctx, currentState, UpgradeStateUnknown) 206 | if err != nil { 207 | m.Log.V(consts.LogLevelError).Error(err, "Failed to process nodes", "state", UpgradeStateUnknown) 208 | return err 209 | } 210 | err = m.ProcessDoneOrUnknownNodes(ctx, currentState, UpgradeStateDone) 211 | if err != nil { 212 | m.Log.V(consts.LogLevelError).Error(err, "Failed to process nodes", "state", UpgradeStateDone) 213 | return err 214 | } 215 | // Start upgrade process for upgradesAvailable number of nodes 216 | err = m.ProcessUpgradeRequiredNodesWrapper(ctx, currentState, upgradePolicy) 217 | if err != nil { 218 | m.Log.V(consts.LogLevelError).Error( 219 | err, "Failed to process nodes", "state", UpgradeStateUpgradeRequired) 220 | return err 221 | } 222 | 223 | err = m.ProcessCordonRequiredNodes(ctx, currentState) 224 | if err != nil { 225 | m.Log.V(consts.LogLevelError).Error(err, "Failed to cordon nodes") 226 | return err 227 | } 228 | 229 | err = m.ProcessWaitForJobsRequiredNodes(ctx, currentState, upgradePolicy.WaitForCompletion) 230 | if err != nil { 231 | m.Log.V(consts.LogLevelError).Error(err, "Failed to waiting for required jobs to complete") 232 | return err 233 | } 234 | 235 | drainEnabled := upgradePolicy.DrainSpec != nil && upgradePolicy.DrainSpec.Enable 236 | err = m.ProcessPodDeletionRequiredNodes(ctx, currentState, upgradePolicy.PodDeletion, drainEnabled) 237 | if err != nil { 238 | m.Log.V(consts.LogLevelError).Error(err, "Failed to delete pods") 239 | return err 240 | } 241 | 242 | // Schedule nodes for drain 243 | err = m.ProcessDrainNodes(ctx, currentState, upgradePolicy.DrainSpec) 244 | if err != nil { 245 | m.Log.V(consts.LogLevelError).Error(err, "Failed to schedule nodes drain") 246 | return err 247 | } 248 | 249 | // TODO: in future versions we'll remove 'pod-restart-required' and use 'post-maintenance-required' instead 250 | // to indicate a general post maintennace node operations (e.g. restart driver pods, node reboot etc.) 251 | err = m.ProcessNodeMaintenanceRequiredNodesWrapper(ctx, currentState) 252 | if err != nil { 253 | m.Log.V(consts.LogLevelError).Error(err, "Failed for post maintenance") 254 | return err 255 | } 256 | 257 | err = m.ProcessPodRestartNodes(ctx, currentState) 258 | if err != nil { 259 | m.Log.V(consts.LogLevelError).Error(err, "Failed for 'pod-restart-required' state") 260 | return err 261 | } 262 | 263 | err = m.ProcessUpgradeFailedNodes(ctx, currentState) 264 | if err != nil { 265 | m.Log.V(consts.LogLevelError).Error(err, "Failed to process nodes in 'upgrade-failed' state") 266 | return err 267 | } 268 | err = m.ProcessValidationRequiredNodes(ctx, currentState) 269 | if err != nil { 270 | m.Log.V(consts.LogLevelError).Error(err, "Failed to validate driver upgrade") 271 | return err 272 | } 273 | 274 | err = m.ProcessUncordonRequiredNodesWrapper(ctx, currentState) 275 | if err != nil { 276 | m.Log.V(consts.LogLevelError).Error(err, "Failed to uncordon nodes") 277 | return err 278 | } 279 | m.Log.V(consts.LogLevelInfo).Info("State Manager, finished processing") 280 | return nil 281 | } 282 | 283 | func (m *ClusterUpgradeStateManagerImpl) GetRequestor() ProcessNodeStateManager { 284 | return m.requestor 285 | } 286 | 287 | func (m *ClusterUpgradeStateManagerImpl) ProcessUpgradeRequiredNodesWrapper(ctx context.Context, 288 | currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.DriverUpgradePolicySpec) error { 289 | var err error 290 | // Start upgrade process for upgradesAvailable number of nodes 291 | if m.opts.Requestor.UseMaintenanceOperator { 292 | err = m.requestor.ProcessUpgradeRequiredNodes(ctx, currentState, upgradePolicy) 293 | } else { 294 | err = m.inplace.ProcessUpgradeRequiredNodes(ctx, currentState, upgradePolicy) 295 | } 296 | return err 297 | } 298 | 299 | func (m *ClusterUpgradeStateManagerImpl) ProcessNodeMaintenanceRequiredNodesWrapper(ctx context.Context, 300 | currentState *ClusterUpgradeState) error { 301 | var err error 302 | if m.opts.Requestor.UseMaintenanceOperator { 303 | if err = m.requestor.ProcessNodeMaintenanceRequiredNodes(ctx, currentState); err != nil { 304 | return err 305 | } 306 | } 307 | 308 | return err 309 | } 310 | 311 | func (m *ClusterUpgradeStateManagerImpl) ProcessUncordonRequiredNodesWrapper(ctx context.Context, 312 | currentState *ClusterUpgradeState) error { 313 | // The idea of calling both inplace and requestor ProcessUncordonRequiredNodes is to handle a case 314 | // where some nodes had already undergone inplace upgrage process, and yet to complete it, 315 | // before enabling requestor upgrade mode. In this case, although requestor upgrade mode is enabled, 316 | // inplace flow will keep processing pending nodes which already started inplace upgrade process. 317 | err := m.inplace.ProcessUncordonRequiredNodes(ctx, currentState) 318 | if err != nil { 319 | return err 320 | } 321 | if m.opts.Requestor.UseMaintenanceOperator { 322 | err = m.requestor.ProcessUncordonRequiredNodes(ctx, currentState) 323 | } 324 | return err 325 | } 326 | 327 | // WithPodDeletionEnabled provides an option to enable the optional 'pod-deletion' state and pass a custom 328 | // PodDeletionFilter to use 329 | func (m *ClusterUpgradeStateManagerImpl) WithPodDeletionEnabled(filter PodDeletionFilter) ClusterUpgradeStateManager { 330 | if filter == nil { 331 | m.Log.V(consts.LogLevelWarning).Info("Cannot enable PodDeletion state as PodDeletionFilter is nil") 332 | return m 333 | } 334 | m.PodManager = NewPodManager(m.K8sInterface, m.NodeUpgradeStateProvider, m.Log, filter, m.EventRecorder) 335 | m.podDeletionStateEnabled = true 336 | return m 337 | } 338 | 339 | // WithValidationEnabled provides an option to enable the optional 'validation' state and pass a podSelector to specify 340 | // which pods are performing the validation 341 | func (m *ClusterUpgradeStateManagerImpl) WithValidationEnabled(podSelector string) ClusterUpgradeStateManager { 342 | if podSelector == "" { 343 | m.Log.V(consts.LogLevelWarning).Info("Cannot enable Validation state as podSelector is empty") 344 | return m 345 | } 346 | m.ValidationManager = NewValidationManager(m.K8sInterface, m.Log, m.EventRecorder, m.NodeUpgradeStateProvider, 347 | podSelector) 348 | m.validationStateEnabled = true 349 | return m 350 | } 351 | 352 | // buildNodeUpgradeState creates a mapping between a node, 353 | // the driver POD running on them and the daemon set, controlling this pod 354 | func (m *ClusterUpgradeStateManagerImpl) buildNodeUpgradeState( 355 | ctx context.Context, pod *corev1.Pod, ds *appsv1.DaemonSet) (*NodeUpgradeState, error) { 356 | var nm client.Object 357 | node, err := m.NodeUpgradeStateProvider.GetNode(ctx, pod.Spec.NodeName) 358 | if err != nil { 359 | return nil, fmt.Errorf("unable to get node %s: %v", pod.Spec.NodeName, err) 360 | } 361 | 362 | if m.opts.Requestor.UseMaintenanceOperator { 363 | rum, ok := m.requestor.(*RequestorNodeStateManagerImpl) 364 | if !ok { 365 | return nil, fmt.Errorf("failed to cast rquestor upgrade manager: %v", err) 366 | } 367 | nm, err = rum.GetNodeMaintenanceObj(ctx, node.Name) 368 | if err != nil { 369 | return nil, fmt.Errorf("failed while trying to fetch nodeMaintennace obj: %v", err) 370 | } 371 | } 372 | 373 | upgradeStateLabel := GetUpgradeStateLabelKey() 374 | m.Log.V(consts.LogLevelInfo).Info("Node hosting a driver pod", 375 | "node", node.Name, "state", node.Labels[upgradeStateLabel]) 376 | 377 | return &NodeUpgradeState{Node: node, DriverPod: pod, DriverDaemonSet: ds, NodeMaintenance: nm}, nil 378 | } 379 | -------------------------------------------------------------------------------- /pkg/upgrade/upgrade_suit_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade_test 18 | 19 | import ( 20 | "context" 21 | "math/rand" 22 | "path/filepath" 23 | "testing" 24 | 25 | maintenancev1alpha1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" 26 | "github.com/go-logr/logr" 27 | . "github.com/onsi/ginkgo/v2" 28 | . "github.com/onsi/gomega" 29 | "github.com/stretchr/testify/mock" 30 | appsv1 "k8s.io/api/apps/v1" 31 | corev1 "k8s.io/api/core/v1" 32 | "k8s.io/apimachinery/pkg/api/errors" 33 | "k8s.io/apimachinery/pkg/api/resource" 34 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 | "k8s.io/apimachinery/pkg/types" 37 | "k8s.io/client-go/kubernetes" 38 | "k8s.io/client-go/rest" 39 | "k8s.io/client-go/tools/record" 40 | ctrl "sigs.k8s.io/controller-runtime" 41 | "sigs.k8s.io/controller-runtime/pkg/client" 42 | "sigs.k8s.io/controller-runtime/pkg/envtest" 43 | logf "sigs.k8s.io/controller-runtime/pkg/log" 44 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 45 | 46 | "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 47 | "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/mocks" 48 | // +kubebuilder:scaffold:imports 49 | ) 50 | 51 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 52 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 53 | var ( 54 | k8sConfig *rest.Config 55 | k8sClient client.Client 56 | k8sInterface kubernetes.Interface 57 | testEnv *envtest.Environment 58 | log logr.Logger 59 | nodeUpgradeStateProvider mocks.NodeUpgradeStateProvider 60 | drainManager mocks.DrainManager 61 | podManager mocks.PodManager 62 | cordonManager mocks.CordonManager 63 | validationManager mocks.ValidationManager 64 | eventRecorder = record.NewFakeRecorder(100) 65 | createdObjects []client.Object 66 | testCtx context.Context 67 | ) 68 | 69 | func TestAPIs(t *testing.T) { 70 | RegisterFailHandler(Fail) 71 | 72 | RunSpecs(t, "Controller Suite") 73 | } 74 | 75 | var _ = BeforeSuite(func() { 76 | logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) 77 | 78 | // set up context 79 | testCtx = ctrl.SetupSignalHandler() 80 | By("bootstrapping test environment") 81 | testEnv = &envtest.Environment{ 82 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "hack", "crd", "bases")}, 83 | } 84 | 85 | var err error 86 | k8sConfig, err = testEnv.Start() 87 | Expect(err).NotTo(HaveOccurred()) 88 | Expect(k8sConfig).NotTo(BeNil()) 89 | 90 | err = maintenancev1alpha1.AddToScheme(upgrade.Scheme) 91 | Expect(err).NotTo(HaveOccurred()) 92 | 93 | // +kubebuilder:scaffold:scheme 94 | 95 | k8sClient, err = client.New(k8sConfig, client.Options{Scheme: upgrade.Scheme}) 96 | Expect(err).NotTo(HaveOccurred()) 97 | Expect(k8sClient).NotTo(BeNil()) 98 | 99 | k8sInterface, err = kubernetes.NewForConfig(k8sConfig) 100 | Expect(err).NotTo(HaveOccurred()) 101 | Expect(k8sInterface).NotTo(BeNil()) 102 | 103 | log = ctrl.Log.WithName("upgradeSuitTest") 104 | 105 | // set driver name to be managed by the upgrade-manager 106 | upgrade.SetDriverName("gpu") 107 | 108 | nodeUpgradeStateProvider = mocks.NodeUpgradeStateProvider{} 109 | nodeUpgradeStateProvider. 110 | On("ChangeNodeUpgradeState", mock.Anything, mock.Anything, mock.Anything). 111 | Return(func(ctx context.Context, node *corev1.Node, newNodeState string) error { 112 | node.Labels[upgrade.GetUpgradeStateLabelKey()] = newNodeState 113 | return nil 114 | }) 115 | nodeUpgradeStateProvider. 116 | On("ChangeNodeUpgradeAnnotation", mock.Anything, mock.Anything, mock.Anything, mock.Anything). 117 | Return(func(ctx context.Context, node *corev1.Node, key string, value string) error { 118 | if value == "null" { 119 | delete(node.Annotations, key) 120 | } else { 121 | node.Annotations[key] = value 122 | } 123 | return nil 124 | }) 125 | nodeUpgradeStateProvider. 126 | On("GetNode", mock.Anything, mock.Anything). 127 | Return( 128 | func(ctx context.Context, nodeName string) *corev1.Node { 129 | return getNode(nodeName) 130 | }, 131 | func(ctx context.Context, nodeName string) error { 132 | return nil 133 | }, 134 | ) 135 | 136 | drainManager = mocks.DrainManager{} 137 | drainManager. 138 | On("ScheduleNodesDrain", mock.Anything, mock.Anything). 139 | Return(nil) 140 | podManager = mocks.PodManager{} 141 | podManager. 142 | On("SchedulePodsRestart", mock.Anything, mock.Anything). 143 | Return(nil) 144 | podManager. 145 | On("ScheduleCheckOnPodCompletion", mock.Anything, mock.Anything). 146 | Return(nil) 147 | podManager. 148 | On("SchedulePodEviction", mock.Anything, mock.Anything). 149 | Return(nil) 150 | podManager. 151 | On("GetPodDeletionFilter"). 152 | Return(nil) 153 | podManager. 154 | On("GetPodControllerRevisionHash", mock.Anything). 155 | Return( 156 | func(pod *corev1.Pod) string { 157 | return pod.Labels[upgrade.PodControllerRevisionHashLabelKey] 158 | }, 159 | func(pod *corev1.Pod) error { 160 | return nil 161 | }, 162 | ) 163 | podManager. 164 | On("GetDaemonsetControllerRevisionHash", mock.Anything, mock.Anything, mock.Anything). 165 | Return("test-hash-12345", nil) 166 | cordonManager = mocks.CordonManager{} 167 | cordonManager. 168 | On("Cordon", mock.Anything, mock.Anything, mock.Anything). 169 | Return(nil) 170 | cordonManager. 171 | On("Uncordon", mock.Anything, mock.Anything, mock.Anything). 172 | Return(nil) 173 | validationManager = mocks.ValidationManager{} 174 | validationManager. 175 | On("Validate", mock.Anything, mock.Anything). 176 | Return(true, nil) 177 | }) 178 | 179 | var _ = AfterSuite(func() { 180 | By("tearing down the test environment") 181 | err := testEnv.Stop() 182 | Expect(err).NotTo(HaveOccurred()) 183 | }) 184 | 185 | var _ = BeforeEach(func() { 186 | createdObjects = nil 187 | }) 188 | 189 | var _ = AfterEach(func() { 190 | for i := range createdObjects { 191 | r := createdObjects[i] 192 | key := client.ObjectKeyFromObject(r) 193 | err := k8sClient.Delete(context.TODO(), r) 194 | if err != nil && !errors.IsNotFound(err) { 195 | Expect(err).NotTo(HaveOccurred()) 196 | } 197 | // drain events from FakeRecorder 198 | for len(eventRecorder.Events) > 0 { 199 | <-eventRecorder.Events 200 | } 201 | _, isNamespace := r.(*corev1.Namespace) 202 | if !isNamespace { 203 | Eventually(func() error { 204 | return k8sClient.Get(context.TODO(), key, r) 205 | }).Should(HaveOccurred()) 206 | } 207 | } 208 | }) 209 | 210 | type Node struct { 211 | *corev1.Node 212 | } 213 | 214 | func NewNode(name string) Node { 215 | node := &corev1.Node{ 216 | ObjectMeta: metav1.ObjectMeta{ 217 | Name: name, 218 | Labels: map[string]string{"dummy-key": "dummy-value"}, 219 | Annotations: map[string]string{"dummy-key": "dummy-value"}, 220 | }, 221 | } 222 | Expect(node.Labels).NotTo(BeNil()) 223 | return Node{node} 224 | } 225 | 226 | func (n Node) WithUpgradeState(state string) Node { 227 | if n.Labels == nil { 228 | n.Labels = make(map[string]string) 229 | } 230 | n.Labels[upgrade.GetUpgradeStateLabelKey()] = state 231 | return n 232 | } 233 | 234 | func (n Node) WithLabels(l map[string]string) Node { 235 | n.Labels = l 236 | return n 237 | } 238 | 239 | func (n Node) WithAnnotations(a map[string]string) Node { 240 | n.Annotations = a 241 | return n 242 | } 243 | 244 | func (n Node) Unschedulable(b bool) Node { 245 | n.Spec.Unschedulable = b 246 | return n 247 | } 248 | 249 | func (n Node) Create() *corev1.Node { 250 | node := n.Node 251 | err := k8sClient.Create(context.TODO(), node) 252 | Expect(err).NotTo(HaveOccurred()) 253 | createdObjects = append(createdObjects, node) 254 | return node 255 | } 256 | 257 | type NodeMaintenance struct { 258 | *maintenancev1alpha1.NodeMaintenance 259 | } 260 | 261 | func NewNodeMaintenance(name, namespace string) NodeMaintenance { 262 | nm := &maintenancev1alpha1.NodeMaintenance{ 263 | ObjectMeta: metav1.ObjectMeta{ 264 | Name: name, 265 | Namespace: namespace, 266 | }, 267 | Spec: maintenancev1alpha1.NodeMaintenanceSpec{ 268 | NodeName: name, 269 | RequestorID: "dummy-requestor.com", 270 | }, 271 | } 272 | 273 | return NodeMaintenance{nm} 274 | } 275 | 276 | func (m NodeMaintenance) WithConditions(condition v1.Condition) NodeMaintenance { 277 | conditions := []v1.Condition{} 278 | conditions = append(conditions, condition) 279 | status := maintenancev1alpha1.NodeMaintenanceStatus{ 280 | Conditions: conditions, 281 | } 282 | m.Status = status 283 | err := k8sClient.Status().Update(context.TODO(), m) 284 | Expect(err).NotTo(HaveOccurred()) 285 | 286 | return m 287 | } 288 | 289 | func (m NodeMaintenance) Create() *maintenancev1alpha1.NodeMaintenance { 290 | nm := m.NodeMaintenance 291 | err := k8sClient.Create(context.TODO(), nm) 292 | Expect(err).NotTo(HaveOccurred()) 293 | createdObjects = append(createdObjects, nm) 294 | 295 | return nm 296 | } 297 | 298 | type DaemonSet struct { 299 | *appsv1.DaemonSet 300 | 301 | desiredNumberScheduled int32 302 | } 303 | 304 | func NewDaemonSet(name, namespace string, selector map[string]string) DaemonSet { 305 | ds := &appsv1.DaemonSet{ 306 | ObjectMeta: metav1.ObjectMeta{ 307 | Name: name, 308 | Namespace: namespace, 309 | }, 310 | Spec: appsv1.DaemonSetSpec{ 311 | Selector: &metav1.LabelSelector{MatchLabels: selector}, 312 | Template: corev1.PodTemplateSpec{ 313 | ObjectMeta: metav1.ObjectMeta{ 314 | Labels: selector, 315 | }, 316 | Spec: corev1.PodSpec{ 317 | // fill in some required fields in the pod spec 318 | Containers: []corev1.Container{ 319 | {Name: "foo", Image: "foo"}, 320 | }, 321 | }, 322 | }, 323 | }, 324 | } 325 | return DaemonSet{ds, 0} 326 | } 327 | 328 | func (d DaemonSet) WithLabels(labels map[string]string) DaemonSet { 329 | d.ObjectMeta.Labels = labels 330 | return d 331 | } 332 | 333 | func (d DaemonSet) WithDesiredNumberScheduled(num int32) DaemonSet { 334 | d.desiredNumberScheduled = num 335 | return d 336 | } 337 | 338 | func (d DaemonSet) Create() *appsv1.DaemonSet { 339 | ds := d.DaemonSet 340 | err := k8sClient.Create(context.TODO(), ds) 341 | Expect(err).NotTo(HaveOccurred()) 342 | 343 | // set Pod in Running state and mark Container as Ready 344 | ds.Status.DesiredNumberScheduled = d.desiredNumberScheduled 345 | err = k8sClient.Status().Update(context.TODO(), ds) 346 | Expect(err).NotTo(HaveOccurred()) 347 | createdObjects = append(createdObjects, ds) 348 | return ds 349 | } 350 | 351 | type Pod struct { 352 | *corev1.Pod 353 | } 354 | 355 | func NewPod(name, namespace, nodeName string) Pod { 356 | gracePeriodSeconds := int64(0) 357 | pod := &corev1.Pod{ 358 | ObjectMeta: metav1.ObjectMeta{ 359 | Name: name, 360 | Namespace: namespace, 361 | }, 362 | Spec: corev1.PodSpec{ 363 | TerminationGracePeriodSeconds: &gracePeriodSeconds, 364 | NodeName: nodeName, 365 | Containers: []corev1.Container{ 366 | { 367 | Name: "test-container", 368 | Image: "test-image", 369 | }, 370 | }, 371 | }, 372 | } 373 | 374 | return Pod{pod} 375 | } 376 | 377 | func (p Pod) WithLabels(labels map[string]string) Pod { 378 | p.ObjectMeta.Labels = labels 379 | return p 380 | } 381 | 382 | func (p Pod) WithEmptyDir() Pod { 383 | p.Spec.Volumes = []corev1.Volume{ 384 | { 385 | Name: "volume", 386 | VolumeSource: corev1.VolumeSource{ 387 | EmptyDir: &corev1.EmptyDirVolumeSource{}, 388 | }, 389 | }, 390 | } 391 | return p 392 | } 393 | 394 | func (p Pod) WithResource(name, quantity string) Pod { 395 | resourceQuantity, err := resource.ParseQuantity(quantity) 396 | Expect(err).NotTo(HaveOccurred()) 397 | p.Spec.Containers[0].Resources = corev1.ResourceRequirements{ 398 | Limits: corev1.ResourceList{ 399 | corev1.ResourceName(name): resourceQuantity, 400 | }, 401 | } 402 | return p 403 | } 404 | 405 | func (p Pod) WithOwnerReference(ownerRef metav1.OwnerReference) Pod { 406 | p.OwnerReferences = append(p.OwnerReferences, ownerRef) 407 | return p 408 | } 409 | 410 | func (p Pod) Create() *corev1.Pod { 411 | pod := p.Pod 412 | err := k8sClient.Create(context.TODO(), pod) 413 | Expect(err).NotTo(HaveOccurred()) 414 | 415 | // set Pod in Running state and mark Container as Ready 416 | pod.Status.Phase = corev1.PodRunning 417 | pod.Status.ContainerStatuses = []corev1.ContainerStatus{{Ready: true}} 418 | err = k8sClient.Status().Update(context.TODO(), pod) 419 | Expect(err).NotTo(HaveOccurred()) 420 | createdObjects = append(createdObjects, pod) 421 | return pod 422 | } 423 | 424 | func createNamespace(name string) *corev1.Namespace { 425 | namespace := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: name}} 426 | err := k8sClient.Create(context.TODO(), namespace) 427 | Expect(err).NotTo(HaveOccurred()) 428 | createdObjects = append(createdObjects, namespace) 429 | return namespace 430 | } 431 | 432 | func updatePodStatus(pod *corev1.Pod) error { 433 | err := k8sClient.Status().Update(context.TODO(), pod) 434 | Expect(err).NotTo(HaveOccurred()) 435 | return err 436 | } 437 | 438 | func createNode(name string) *corev1.Node { 439 | node := &corev1.Node{} 440 | node.Name = name 441 | err := k8sClient.Create(context.TODO(), node) 442 | Expect(err).NotTo(HaveOccurred()) 443 | createdObjects = append(createdObjects, node) 444 | return node 445 | } 446 | 447 | func getNode(name string) *corev1.Node { 448 | node := &corev1.Node{} 449 | err := k8sClient.Get(context.TODO(), types.NamespacedName{Name: name}, node) 450 | Expect(err).NotTo(HaveOccurred()) 451 | Expect(node).NotTo(BeNil()) 452 | return node 453 | } 454 | 455 | func getNodeUpgradeState(node *corev1.Node) string { 456 | return node.Labels[upgrade.GetUpgradeStateLabelKey()] 457 | } 458 | 459 | func isUnschedulableAnnotationPresent(node *corev1.Node) bool { 460 | _, ok := node.Annotations[upgrade.GetUpgradeInitialStateAnnotationKey()] 461 | return ok 462 | } 463 | 464 | func deleteObj(obj client.Object) { 465 | Expect(k8sClient.Delete(context.TODO(), obj)).To(BeNil()) 466 | } 467 | 468 | func isWaitForCompletionAnnotationPresent(node *corev1.Node) bool { 469 | _, ok := node.Annotations[upgrade.GetWaitForPodCompletionStartTimeAnnotationKey()] 470 | return ok 471 | } 472 | 473 | func isValidationAnnotationPresent(node *corev1.Node) bool { 474 | _, ok := node.Annotations[upgrade.GetValidationStartTimeAnnotationKey()] 475 | return ok 476 | } 477 | 478 | func randSeq(n int) string { 479 | letters := []rune("abcdefghijklmnopqrstuvwxyz") 480 | b := make([]rune, n) 481 | for i := range b { 482 | b[i] = letters[rand.Intn(len(letters))] 483 | } 484 | return string(b) 485 | } 486 | -------------------------------------------------------------------------------- /pkg/upgrade/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade 18 | 19 | import ( 20 | "fmt" 21 | "strings" 22 | "sync" 23 | 24 | "k8s.io/apimachinery/pkg/runtime" 25 | "k8s.io/client-go/tools/record" 26 | ) 27 | 28 | // StringSet implements a thread safe Set of Strings 29 | type StringSet struct { 30 | m map[string]bool 31 | mu sync.RWMutex 32 | } 33 | 34 | // NewStringSet creates a StringSet 35 | func NewStringSet() *StringSet { 36 | return &StringSet{ 37 | m: make(map[string]bool), 38 | mu: sync.RWMutex{}, 39 | } 40 | } 41 | 42 | // Add item to set 43 | func (s *StringSet) Add(item string) { 44 | s.mu.Lock() 45 | defer s.mu.Unlock() 46 | s.m[item] = true 47 | } 48 | 49 | // Remove deletes the specified item from the set 50 | func (s *StringSet) Remove(item string) { 51 | s.mu.Lock() 52 | defer s.mu.Unlock() 53 | delete(s.m, item) 54 | } 55 | 56 | // Has looks for item exists in the map 57 | func (s *StringSet) Has(item string) bool { 58 | s.mu.RLock() 59 | defer s.mu.RUnlock() 60 | _, ok := s.m[item] 61 | return ok 62 | } 63 | 64 | // Clear removes all items from the set 65 | func (s *StringSet) Clear() { 66 | s.mu.Lock() 67 | defer s.mu.Unlock() 68 | s.m = make(map[string]bool) 69 | } 70 | 71 | // KeyedMutex is a struct that provides a per-key synchronized access 72 | type KeyedMutex struct { 73 | mutexes sync.Map // Zero value is empty and ready for use 74 | } 75 | 76 | // UnlockFunc is a function that release a lock 77 | type UnlockFunc = func() 78 | 79 | // Lock locks a mutex, associated with a given key and returns an unlock function 80 | func (m *KeyedMutex) Lock(key string) UnlockFunc { 81 | value, _ := m.mutexes.LoadOrStore(key, &sync.Mutex{}) 82 | mtx, ok := value.(*sync.Mutex) 83 | if !ok { 84 | panic("object is not of type sync.Mutex which is what was expected") 85 | } 86 | mtx.Lock() 87 | return func() { mtx.Unlock() } 88 | } 89 | 90 | var ( 91 | // DriverName is the name of the driver to be managed by this package 92 | DriverName string 93 | ) 94 | 95 | // SetDriverName sets the name of the driver managed by the upgrade package 96 | func SetDriverName(driver string) { 97 | DriverName = driver 98 | } 99 | 100 | // GetUpgradeStateLabelKey returns state label key used for upgrades 101 | func GetUpgradeStateLabelKey() string { 102 | return fmt.Sprintf(UpgradeStateLabelKeyFmt, DriverName) 103 | } 104 | 105 | // GetUpgradeSkipNodeLabelKey returns node label used to skip upgrades 106 | func GetUpgradeSkipNodeLabelKey() string { 107 | return fmt.Sprintf(UpgradeSkipNodeLabelKeyFmt, DriverName) 108 | } 109 | 110 | // GetUpgradeDriverWaitForSafeLoadAnnotationKey returns the key for annotation used to mark node as waiting for driver 111 | // safe load 112 | func GetUpgradeDriverWaitForSafeLoadAnnotationKey() string { 113 | return fmt.Sprintf(UpgradeWaitForSafeDriverLoadAnnotationKeyFmt, DriverName) 114 | } 115 | 116 | // GetUpgradeRequestedAnnotationKey returns the key for annotation used to mark node as driver upgrade is requested 117 | // externally (orphaned pod) 118 | func GetUpgradeRequestedAnnotationKey() string { 119 | return fmt.Sprintf(UpgradeRequestedAnnotationKeyFmt, DriverName) 120 | } 121 | 122 | // GetUpgradeRequestorModeAnnotationKey returns the key for annotation used to mark node as requestor upgrade mode 123 | // in progress 124 | func GetUpgradeRequestorModeAnnotationKey() string { 125 | return fmt.Sprintf(UpgradeRequestorModeAnnotationKeyFmt, DriverName) 126 | } 127 | 128 | // GetUpgradeInitialStateAnnotationKey returns the key for annotation used to track initial state of the node 129 | func GetUpgradeInitialStateAnnotationKey() string { 130 | return fmt.Sprintf(UpgradeInitialStateAnnotationKeyFmt, DriverName) 131 | } 132 | 133 | // GetWaitForPodCompletionStartTimeAnnotationKey returns the key for annotation used to track start time for waiting on 134 | // pod/job completions 135 | func GetWaitForPodCompletionStartTimeAnnotationKey() string { 136 | return fmt.Sprintf(UpgradeWaitForPodCompletionStartTimeAnnotationKeyFmt, DriverName) 137 | } 138 | 139 | // GetValidationStartTimeAnnotationKey returns the key for annotation indicating start time for validation-required 140 | // state 141 | func GetValidationStartTimeAnnotationKey() string { 142 | return fmt.Sprintf(UpgradeValidationStartTimeAnnotationKeyFmt, DriverName) 143 | } 144 | 145 | // GetEventReason returns the reason type based on the driver name 146 | func GetEventReason() string { 147 | return fmt.Sprintf("%sDriverUpgrade", strings.ToUpper(DriverName)) 148 | } 149 | 150 | // logEventf logs a formatted event for a given kubernetes object 151 | func logEventf(recorder record.EventRecorder, object runtime.Object, eventType string, reason string, messageFmt string, 152 | args ...interface{}) { 153 | if recorder != nil { 154 | recorder.Eventf(object, eventType, reason, messageFmt, args...) 155 | } 156 | } 157 | 158 | // logEvent logs an event for a given kubernetes object 159 | func logEvent(recorder record.EventRecorder, object runtime.Object, eventType string, reason string, 160 | messageFmt string) { 161 | if recorder != nil { 162 | recorder.Event(object, eventType, reason, messageFmt) 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /pkg/upgrade/validation_manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | 14 | package upgrade 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "strconv" 20 | "time" 21 | 22 | "github.com/go-logr/logr" 23 | corev1 "k8s.io/api/core/v1" 24 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 | "k8s.io/client-go/kubernetes" 26 | "k8s.io/client-go/tools/record" 27 | 28 | "github.com/NVIDIA/k8s-operator-libs/pkg/consts" 29 | ) 30 | 31 | const ( 32 | validationTimeoutSeconds = 600 33 | ) 34 | 35 | // ValidationManagerImpl implements the ValidationManager interface and waits on a validation pod, 36 | // identified via podSelector, to be Ready. 37 | type ValidationManagerImpl struct { 38 | k8sInterface kubernetes.Interface 39 | log logr.Logger 40 | eventRecorder record.EventRecorder 41 | nodeUpgradeStateProvider NodeUpgradeStateProvider 42 | 43 | // podSelector indicates the pod performing validation on the node after a driver upgrade 44 | podSelector string 45 | } 46 | 47 | // ValidationManager is an interface for validating driver upgrades 48 | type ValidationManager interface { 49 | Validate(ctx context.Context, node *corev1.Node) (bool, error) 50 | } 51 | 52 | // NewValidationManager returns an instance of ValidationManager implementation 53 | func NewValidationManager( 54 | k8sInterface kubernetes.Interface, 55 | log logr.Logger, 56 | eventRecorder record.EventRecorder, 57 | nodeUpgradeStateProvider NodeUpgradeStateProvider, 58 | podSelector string) *ValidationManagerImpl { 59 | mgr := &ValidationManagerImpl{ 60 | k8sInterface: k8sInterface, 61 | log: log, 62 | eventRecorder: eventRecorder, 63 | nodeUpgradeStateProvider: nodeUpgradeStateProvider, 64 | podSelector: podSelector, 65 | } 66 | 67 | return mgr 68 | } 69 | 70 | // Validate checks if the validation pod(s), identified via podSelector, is Ready 71 | func (m *ValidationManagerImpl) Validate(ctx context.Context, node *corev1.Node) (bool, error) { 72 | if m.podSelector == "" { 73 | return true, nil 74 | } 75 | 76 | // fetch the pods using the label selector provided 77 | listOptions := metav1.ListOptions{LabelSelector: m.podSelector, 78 | FieldSelector: fmt.Sprintf(nodeNameFieldSelectorFmt, node.Name)} 79 | podList, err := m.k8sInterface.CoreV1().Pods("").List(ctx, listOptions) 80 | if err != nil { 81 | m.log.V(consts.LogLevelError).Error(err, "Failed to list pods", "selector", m.podSelector, "node", node.Name) 82 | return false, err 83 | } 84 | 85 | if len(podList.Items) == 0 { 86 | m.log.V(consts.LogLevelWarning).Info("No validation pods found on the node", "node", node.Name, 87 | "podSelector", m.podSelector) 88 | return false, nil 89 | } 90 | 91 | m.log.V(consts.LogLevelDebug).Info("Found validation pods", "selector", m.podSelector, "node", node.Name, 92 | "pods", len(podList.Items)) 93 | 94 | done := true 95 | for _, pod := range podList.Items { 96 | if !m.isPodReady(pod) { 97 | err = m.handleTimeout(ctx, node, int64(validationTimeoutSeconds)) 98 | if err != nil { 99 | logEventf(m.eventRecorder, node, corev1.EventTypeWarning, GetEventReason(), 100 | "Failed to handle timeout for validation state", err.Error()) 101 | return false, fmt.Errorf("unable to handle timeout for validation state: %v", err) 102 | } 103 | done = false 104 | break 105 | } 106 | // remove annotation used for tracking state time 107 | annotationKey := GetValidationStartTimeAnnotationKey() 108 | err = m.nodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, node, annotationKey, "null") 109 | if err != nil { 110 | m.log.V(consts.LogLevelError).Error(err, "Failed to remove annotation used to track validation completion", 111 | "node", node.Name, "annotation", annotationKey) 112 | return done, err 113 | } 114 | } 115 | return done, nil 116 | } 117 | 118 | func (m *ValidationManagerImpl) isPodReady(pod corev1.Pod) bool { 119 | if pod.Status.Phase != corev1.PodRunning { 120 | m.log.V(consts.LogLevelDebug).Info("Pod not Running", "pod", pod.Name, "podPhase", pod.Status.Phase) 121 | return false 122 | } 123 | if len(pod.Status.ContainerStatuses) == 0 { 124 | m.log.V(consts.LogLevelDebug).Info("No containers running in pod", "pod", pod.Name) 125 | return false 126 | } 127 | 128 | for i := range pod.Status.ContainerStatuses { 129 | if !pod.Status.ContainerStatuses[i].Ready { 130 | m.log.V(consts.LogLevelDebug).Info("Not all containers ready in pod", "pod", pod.Name) 131 | return false 132 | } 133 | } 134 | 135 | return true 136 | } 137 | 138 | // HandleTimeoutOnPodCompletions transitions node based on the timeout for job completions on the node 139 | func (m *ValidationManagerImpl) handleTimeout(ctx context.Context, node *corev1.Node, timeoutSeconds int64) error { 140 | annotationKey := GetValidationStartTimeAnnotationKey() 141 | currentTime := time.Now().Unix() 142 | // check if annotation already exists for tracking start time 143 | if _, present := node.Annotations[annotationKey]; !present { 144 | // add the annotation to track start time 145 | err := m.nodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, node, annotationKey, 146 | strconv.FormatInt(currentTime, 10)) 147 | if err != nil { 148 | m.log.V(consts.LogLevelError).Error(err, "Failed to add annotation to track validation completion", 149 | "node", node.Name, "annotation", annotationKey) 150 | return err 151 | } 152 | return nil 153 | } 154 | // check if timeout reached 155 | startTime, err := strconv.ParseInt(node.Annotations[annotationKey], 10, 64) 156 | if err != nil { 157 | m.log.V(consts.LogLevelError).Error(err, "Failed to convert start time to track validation completion", 158 | "node", node.Name) 159 | return err 160 | } 161 | if currentTime > startTime+timeoutSeconds { 162 | // timeout exceeded, mark node in failed state 163 | _ = m.nodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, node, UpgradeStateFailed) 164 | m.log.V(consts.LogLevelInfo).Info("Timeout exceeded for validation, updated the node state", "node", node.Name, 165 | "state", UpgradeStateFailed) 166 | // remove annotation used for tracking start time 167 | err = m.nodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, node, annotationKey, "null") 168 | if err != nil { 169 | m.log.V(consts.LogLevelError).Error(err, "Failed to remove annotation used to track validation completion", 170 | "node", node.Name, "annotation", annotationKey) 171 | return err 172 | } 173 | } 174 | return nil 175 | } 176 | -------------------------------------------------------------------------------- /pkg/upgrade/validation_manager_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 NVIDIA CORPORATION & AFFILIATES 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package upgrade_test 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "strconv" 23 | "time" 24 | 25 | . "github.com/onsi/ginkgo/v2" 26 | . "github.com/onsi/gomega" 27 | corev1 "k8s.io/api/core/v1" 28 | 29 | "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" 30 | ) 31 | 32 | var _ = Describe("ValidationManager", func() { 33 | var ctx context.Context 34 | var id string 35 | var node *corev1.Node 36 | var namespace *corev1.Namespace 37 | 38 | BeforeEach(func() { 39 | ctx = context.TODO() 40 | id = randSeq(5) 41 | node = createNode(fmt.Sprintf("node-%s", id)) 42 | namespace = createNamespace(fmt.Sprintf("namespace-%s", id)) 43 | }) 44 | 45 | It("should return no error if podSelector is empty", func() { 46 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 47 | validationManager := upgrade.NewValidationManager(k8sInterface, log, eventRecorder, provider, "") 48 | validationDone, err := validationManager.Validate(ctx, node) 49 | Expect(err).To(Succeed()) 50 | Expect(validationDone).To(Equal(true)) 51 | }) 52 | 53 | It("Validate() should return false when no validation pods are running", func() { 54 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 55 | validationManager := upgrade.NewValidationManager(k8sInterface, log, eventRecorder, provider, "app=validation") 56 | validationDone, err := validationManager.Validate(ctx, node) 57 | Expect(err).To(Succeed()) 58 | Expect(validationDone).To(Equal(false)) 59 | Expect(isValidationAnnotationPresent(node)).To(Equal(false)) 60 | }) 61 | 62 | It("Validate() should return true if validation pod is Running and Ready", func() { 63 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 64 | _ = NewPod("pod", namespace.Name, node.Name). 65 | WithLabels(map[string]string{"app": "validator"}). 66 | Create() 67 | validationManager := upgrade.NewValidationManager(k8sInterface, log, eventRecorder, provider, "app=validator") 68 | validationDone, err := validationManager.Validate(ctx, node) 69 | Expect(err).To(Succeed()) 70 | Expect(validationDone).To(Equal(true)) 71 | Expect(isValidationAnnotationPresent(node)).To(Equal(false)) 72 | }) 73 | 74 | It("Validate() should return false if validation pod is Running but not Ready", func() { 75 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 76 | pod := NewPod("pod", namespace.Name, node.Name). 77 | WithLabels(map[string]string{"app": "validator"}). 78 | Create() 79 | pod.Status.ContainerStatuses[0].Ready = false 80 | _ = updatePodStatus(pod) 81 | 82 | validationManager := upgrade.NewValidationManager(k8sInterface, log, eventRecorder, provider, "app=validator") 83 | validationDone, err := validationManager.Validate(ctx, node) 84 | Expect(err).To(Succeed()) 85 | Expect(validationDone).To(Equal(false)) 86 | Expect(isValidationAnnotationPresent(node)).To(Equal(true)) 87 | }) 88 | 89 | It("Validate() should return false if validation pod is not Running", func() { 90 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 91 | pod := NewPod("pod", namespace.Name, node.Name). 92 | WithLabels(map[string]string{"app": "validator"}). 93 | Create() 94 | pod.Status.Phase = "Terminating" 95 | _ = updatePodStatus(pod) 96 | 97 | validationManager := upgrade.NewValidationManager(k8sInterface, log, eventRecorder, provider, "app=validator") 98 | validationDone, err := validationManager.Validate(ctx, node) 99 | Expect(err).To(Succeed()) 100 | Expect(validationDone).To(Equal(false)) 101 | Expect(isValidationAnnotationPresent(node)).To(Equal(true)) 102 | }) 103 | 104 | It("Validate() should mark node as UpgradeFailed when validation does not complete before timeout", func() { 105 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 106 | err := provider.ChangeNodeUpgradeState(ctx, node, upgrade.UpgradeStateValidationRequired) 107 | Expect(err).To(Succeed()) 108 | 109 | pod := NewPod("pod", namespace.Name, node.Name). 110 | WithLabels(map[string]string{"app": "validator"}). 111 | Create() 112 | pod.Status.ContainerStatuses[0].Ready = false 113 | _ = updatePodStatus(pod) 114 | 115 | validationManager := upgrade.NewValidationManager(k8sInterface, log, eventRecorder, provider, "app=validator") 116 | validationDone, err := validationManager.Validate(ctx, node) 117 | Expect(err).To(Succeed()) 118 | Expect(validationDone).To(Equal(false)) 119 | 120 | node, err = provider.GetNode(ctx, node.Name) 121 | Expect(err).To(Succeed()) 122 | Expect(node.Labels[upgrade.GetUpgradeStateLabelKey()]).To(Equal(upgrade.UpgradeStateValidationRequired)) 123 | 124 | Expect(isValidationAnnotationPresent(node)).To(Equal(true)) 125 | 126 | startTime := strconv.FormatInt(time.Now().Unix()-605, 10) 127 | provider.ChangeNodeUpgradeAnnotation(ctx, node, upgrade.GetValidationStartTimeAnnotationKey(), startTime) 128 | 129 | validationDone, err = validationManager.Validate(ctx, node) 130 | Expect(err).To(Succeed()) 131 | Expect(validationDone).To(Equal(false)) 132 | 133 | node, err = provider.GetNode(ctx, node.Name) 134 | Expect(err).To(Succeed()) 135 | Expect(node.Labels[upgrade.GetUpgradeStateLabelKey()]).To(Equal(upgrade.UpgradeStateFailed)) 136 | Expect(isValidationAnnotationPresent(node)).To(Equal(false)) 137 | }) 138 | 139 | It("Validate() should remove annotation when validation completes before timeout", func() { 140 | provider := upgrade.NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder) 141 | err := provider.ChangeNodeUpgradeState(ctx, node, upgrade.UpgradeStateValidationRequired) 142 | Expect(err).To(Succeed()) 143 | 144 | pod := NewPod("pod", namespace.Name, node.Name). 145 | WithLabels(map[string]string{"app": "validator"}). 146 | Create() 147 | pod.Status.ContainerStatuses[0].Ready = false 148 | _ = updatePodStatus(pod) 149 | 150 | validationManager := upgrade.NewValidationManager(k8sInterface, log, eventRecorder, provider, "app=validator") 151 | validationDone, err := validationManager.Validate(ctx, node) 152 | Expect(err).To(Succeed()) 153 | Expect(validationDone).To(Equal(false)) 154 | 155 | node, err = provider.GetNode(ctx, node.Name) 156 | Expect(err).To(Succeed()) 157 | Expect(node.Labels[upgrade.GetUpgradeStateLabelKey()]).To(Equal(upgrade.UpgradeStateValidationRequired)) 158 | 159 | Expect(isValidationAnnotationPresent(node)).To(Equal(true)) 160 | 161 | pod.Status.ContainerStatuses[0].Ready = true 162 | _ = updatePodStatus(pod) 163 | 164 | validationDone, err = validationManager.Validate(ctx, node) 165 | Expect(err).To(Succeed()) 166 | Expect(validationDone).To(Equal(true)) 167 | 168 | node, err = provider.GetNode(ctx, node.Name) 169 | Expect(err).To(Succeed()) 170 | Expect(isValidationAnnotationPresent(node)).To(Equal(false)) 171 | }) 172 | }) 173 | --------------------------------------------------------------------------------